]>
Commit | Line | Data |
---|---|---|
2cb7cef9 BS |
1 | From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org> |
2 | Subject: [PATCH] Linux: Update to 2.6.27 | |
3 | Patch-mainline: 2.6.27 | |
4 | ||
5 | This patch contains the differences between Linux 2.6.26 and 2.6.27. | |
6 | ||
7 | Acked-by: Jeff Mahoney <jeffm@suse.com> | |
8 | Automatically created from "patches.kernel.org/patch-2.6.27" by xen-port-patches.py | |
9 | ||
82094b55 AF |
10 | --- sle11-2009-10-16.orig/arch/x86/Kconfig 2009-03-16 16:38:05.000000000 +0100 |
11 | +++ sle11-2009-10-16/arch/x86/Kconfig 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
12 | @@ -594,7 +594,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT |
13 | config AMD_IOMMU | |
14 | bool "AMD IOMMU support" | |
15 | select SWIOTLB | |
16 | - depends on X86_64 && PCI && ACPI | |
17 | + depends on X86_64 && PCI && ACPI && !X86_64_XEN | |
18 | help | |
19 | With this option you can enable support for AMD IOMMU hardware in | |
20 | your system. An IOMMU is a hardware component which provides | |
21 | @@ -629,8 +629,10 @@ config MAXSMP | |
22 | ||
23 | config NR_CPUS | |
24 | int "Maximum number of CPUs (2-4096)" | |
25 | + range 2 32 if XEN | |
26 | range 2 4096 | |
27 | depends on SMP | |
28 | + default "32" if MAXSMP && XEN | |
29 | default "4096" if MAXSMP | |
30 | default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 | |
31 | default "16" if X86_64_XEN | |
32 | @@ -1227,7 +1229,7 @@ config MTRR | |
33 | config MTRR_SANITIZER | |
34 | bool | |
35 | prompt "MTRR cleanup support" | |
36 | - depends on MTRR | |
37 | + depends on MTRR && !XEN | |
38 | help | |
39 | Convert MTRR layout from continuous to discrete, so X drivers can | |
40 | add writeback entries. | |
82094b55 AF |
41 | --- sle11-2009-10-16.orig/arch/x86/Kconfig.debug 2009-03-16 16:33:40.000000000 +0100 |
42 | +++ sle11-2009-10-16/arch/x86/Kconfig.debug 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
43 | @@ -25,6 +25,7 @@ config STRICT_DEVMEM |
44 | config X86_VERBOSE_BOOTUP | |
45 | bool "Enable verbose x86 bootup info messages" | |
46 | default y | |
47 | + depends on !XEN | |
48 | help | |
49 | Enables the informational output from the decompression stage | |
50 | (e.g. bzImage) of the boot. If you disable this you will still | |
51 | @@ -179,7 +180,7 @@ config MMIOTRACE_HOOKS | |
52 | ||
53 | config MMIOTRACE | |
54 | bool "Memory mapped IO tracing" | |
55 | - depends on DEBUG_KERNEL && PCI | |
56 | + depends on DEBUG_KERNEL && PCI && !XEN | |
57 | select TRACING | |
58 | select MMIOTRACE_HOOKS | |
59 | help | |
82094b55 AF |
60 | --- sle11-2009-10-16.orig/arch/x86/Makefile 2009-02-16 16:18:36.000000000 +0100 |
61 | +++ sle11-2009-10-16/arch/x86/Makefile 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
62 | @@ -116,8 +116,8 @@ mflags-$(CONFIG_X86_VOYAGER) := -Iinclud |
63 | mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/ | |
64 | ||
65 | # Xen subarch support | |
66 | -mflags-$(CONFIG_X86_XEN) := -Iinclude/asm-x86/mach-xen | |
67 | -mcore-$(CONFIG_X86_XEN) := arch/x86/mach-xen/ | |
68 | +mflags-$(CONFIG_XEN) := -Iinclude/asm-x86/mach-xen | |
69 | +mcore-$(CONFIG_XEN) := arch/x86/mach-xen/ | |
70 | ||
71 | # generic subarchitecture | |
72 | mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic | |
73 | @@ -128,8 +128,10 @@ mcore-$(CONFIG_X86_GENERICARCH) := arch/ | |
74 | mflags-y += -Iinclude/asm-x86/mach-default | |
75 | ||
76 | # 64 bit does not support subarch support - clear sub arch variables | |
77 | +ifneq ($(CONFIG_XEN),y) | |
78 | fcore-$(CONFIG_X86_64) := | |
79 | mcore-$(CONFIG_X86_64) := | |
80 | +endif | |
81 | ||
82 | KBUILD_CFLAGS += $(mflags-y) | |
83 | KBUILD_AFLAGS += $(mflags-y) | |
82094b55 AF |
84 | --- sle11-2009-10-16.orig/arch/x86/ia32/ia32entry-xen.S 2009-03-16 16:38:05.000000000 +0100 |
85 | +++ sle11-2009-10-16/arch/x86/ia32/ia32entry-xen.S 2009-10-16 14:51:56.000000000 +0200 | |
2cb7cef9 BS |
86 | @@ -15,6 +15,16 @@ |
87 | #include <asm/irqflags.h> | |
88 | #include <linux/linkage.h> | |
89 | ||
90 | +/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | |
91 | +#include <linux/elf-em.h> | |
92 | +#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) | |
93 | +#define __AUDIT_ARCH_LE 0x40000000 | |
94 | + | |
95 | +#ifndef CONFIG_AUDITSYSCALL | |
96 | +#define sysexit_audit int_ret_from_sys_call | |
97 | +#define sysretl_audit int_ret_from_sys_call | |
98 | +#endif | |
99 | + | |
100 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) | |
101 | ||
102 | .macro IA32_ARG_FIXUP noebp=0 | |
103 | @@ -37,6 +47,11 @@ | |
104 | movq %rax,R8(%rsp) | |
105 | .endm | |
106 | ||
107 | + /* | |
108 | + * Reload arg registers from stack in case ptrace changed them. | |
109 | + * We don't reload %eax because syscall_trace_enter() returned | |
110 | + * the value it wants us to use in the table lookup. | |
111 | + */ | |
112 | .macro LOAD_ARGS32 offset | |
113 | movl \offset(%rsp),%r11d | |
114 | movl \offset+8(%rsp),%r10d | |
115 | @@ -46,7 +61,6 @@ | |
116 | movl \offset+48(%rsp),%edx | |
117 | movl \offset+56(%rsp),%esi | |
118 | movl \offset+64(%rsp),%edi | |
119 | - movl \offset+72(%rsp),%eax | |
120 | .endm | |
121 | ||
122 | .macro CFI_STARTPROC32 simple | |
123 | @@ -61,6 +75,19 @@ | |
124 | CFI_UNDEFINED r15 | |
125 | .endm | |
126 | ||
127 | +#ifdef CONFIG_PARAVIRT | |
128 | +ENTRY(native_usergs_sysret32) | |
129 | + swapgs | |
130 | + sysretl | |
131 | +ENDPROC(native_usergs_sysret32) | |
132 | + | |
133 | +ENTRY(native_irq_enable_sysexit) | |
134 | + swapgs | |
135 | + sti | |
136 | + sysexit | |
137 | +ENDPROC(native_irq_enable_sysexit) | |
138 | +#endif | |
139 | + | |
140 | /* | |
141 | * 32bit SYSENTER instruction entry. | |
142 | * | |
143 | @@ -98,7 +125,7 @@ ENTRY(ia32_sysenter_target) | |
144 | CFI_RESTORE rcx | |
145 | movl %ebp,%ebp /* zero extension */ | |
146 | movl %eax,%eax | |
147 | - movl 48-THREAD_SIZE+threadinfo_sysenter_return(%rsp),%r10d | |
148 | + movl 48-THREAD_SIZE+TI_sysenter_return(%rsp),%r10d | |
149 | movl $__USER32_DS,40(%rsp) | |
150 | movq %rbp,32(%rsp) | |
151 | movl $__USER32_CS,16(%rsp) | |
82094b55 | 152 | @@ -113,19 +140,75 @@ ENTRY(ia32_sysenter_target) |
2cb7cef9 BS |
153 | .quad 1b,ia32_badarg |
154 | .previous | |
155 | GET_THREAD_INFO(%r10) | |
156 | - orl $TS_COMPAT,threadinfo_status(%r10) | |
157 | - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) | |
158 | + orl $TS_COMPAT,TI_status(%r10) | |
159 | + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) | |
160 | jnz sysenter_tracesys | |
161 | -sysenter_do_call: | |
162 | cmpl $(IA32_NR_syscalls-1),%eax | |
163 | ja ia32_badsys | |
164 | +sysenter_do_call: | |
165 | IA32_ARG_FIXUP 1 | |
166 | +sysenter_dispatch: | |
167 | call *ia32_sys_call_table(,%rax,8) | |
168 | movq %rax,RAX-ARGOFFSET(%rsp) | |
169 | + GET_THREAD_INFO(%r10) | |
170 | + DISABLE_INTERRUPTS(CLBR_NONE) | |
171 | + TRACE_IRQS_OFF | |
172 | + testl $_TIF_ALLWORK_MASK,TI_flags(%r10) | |
173 | + jnz sysexit_audit | |
82094b55 AF |
174 | jmp int_ret_from_sys_call |
175 | ||
2cb7cef9 BS |
176 | +#ifdef CONFIG_AUDITSYSCALL |
177 | + .macro auditsys_entry_common | |
178 | + movl %esi,%r9d /* 6th arg: 4th syscall arg */ | |
179 | + movl %edx,%r8d /* 5th arg: 3rd syscall arg */ | |
180 | + /* (already in %ecx) 4th arg: 2nd syscall arg */ | |
181 | + movl %ebx,%edx /* 3rd arg: 1st syscall arg */ | |
182 | + movl %eax,%esi /* 2nd arg: syscall number */ | |
183 | + movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ | |
184 | + call audit_syscall_entry | |
185 | + movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ | |
186 | + cmpl $(IA32_NR_syscalls-1),%eax | |
187 | + ja ia32_badsys | |
188 | + movl %ebx,%edi /* reload 1st syscall arg */ | |
189 | + movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */ | |
190 | + movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */ | |
191 | + movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */ | |
192 | + movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ | |
193 | + .endm | |
194 | + | |
195 | + .macro auditsys_exit exit,ebpsave=RBP | |
196 | + testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) | |
197 | + jnz int_ret_from_sys_call | |
198 | + TRACE_IRQS_ON | |
199 | + ENABLE_INTERRUPTS(CLBR_NONE) | |
200 | + movl %eax,%esi /* second arg, syscall return value */ | |
201 | + cmpl $0,%eax /* is it < 0? */ | |
202 | + setl %al /* 1 if so, 0 if not */ | |
203 | + movzbl %al,%edi /* zero-extend that into %edi */ | |
204 | + inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | |
205 | + call audit_syscall_exit | |
2cb7cef9 BS |
206 | + movl \ebpsave-ARGOFFSET(%rsp),%ebp /* reload user register value */ |
207 | + movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | |
208 | + DISABLE_INTERRUPTS(CLBR_NONE) | |
209 | + TRACE_IRQS_OFF | |
82094b55 | 210 | + jmp int_with_check |
2cb7cef9 BS |
211 | + .endm |
212 | + | |
213 | +sysenter_auditsys: | |
214 | + auditsys_entry_common | |
215 | + movl %ebp,%r9d /* reload 6th syscall arg */ | |
216 | + jmp sysenter_dispatch | |
217 | + | |
218 | +sysexit_audit: | |
219 | + auditsys_exit sysexit_from_sys_call | |
220 | +#endif | |
82094b55 | 221 | + |
2cb7cef9 BS |
222 | sysenter_tracesys: |
223 | xchgl %r9d,%ebp | |
224 | +#ifdef CONFIG_AUDITSYSCALL | |
225 | + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) | |
226 | + jz sysenter_auditsys | |
227 | +#endif | |
228 | SAVE_REST | |
229 | CLEAR_RREGS | |
230 | movq %r9,R9(%rsp) | |
82094b55 | 231 | @@ -186,18 +269,38 @@ ENTRY(ia32_cstar_target) |
2cb7cef9 BS |
232 | .quad 1b,ia32_badarg |
233 | .previous | |
234 | GET_THREAD_INFO(%r10) | |
235 | - orl $TS_COMPAT,threadinfo_status(%r10) | |
236 | - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) | |
237 | + orl $TS_COMPAT,TI_status(%r10) | |
238 | + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) | |
239 | jnz cstar_tracesys | |
240 | cstar_do_call: | |
241 | cmpl $IA32_NR_syscalls-1,%eax | |
242 | ja ia32_badsys | |
243 | IA32_ARG_FIXUP 1 | |
244 | +cstar_dispatch: | |
245 | call *ia32_sys_call_table(,%rax,8) | |
246 | movq %rax,RAX-ARGOFFSET(%rsp) | |
247 | + GET_THREAD_INFO(%r10) | |
248 | + DISABLE_INTERRUPTS(CLBR_NONE) | |
249 | + testl $_TIF_ALLWORK_MASK,TI_flags(%r10) | |
250 | + jnz sysretl_audit | |
251 | jmp int_ret_from_sys_call | |
252 | ||
253 | -cstar_tracesys: | |
254 | +#ifdef CONFIG_AUDITSYSCALL | |
255 | +cstar_auditsys: | |
256 | + movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */ | |
257 | + auditsys_entry_common | |
258 | + movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */ | |
259 | + jmp cstar_dispatch | |
260 | + | |
261 | +sysretl_audit: | |
262 | + auditsys_exit sysretl_from_sys_call, RCX /* user %ebp in RCX slot */ | |
263 | +#endif | |
264 | + | |
265 | +cstar_tracesys: | |
266 | +#ifdef CONFIG_AUDITSYSCALL | |
267 | + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) | |
268 | + jz cstar_auditsys | |
269 | +#endif | |
270 | xchgl %r9d,%ebp | |
271 | SAVE_REST | |
272 | CLEAR_RREGS | |
82094b55 | 273 | @@ -263,8 +366,8 @@ ENTRY(ia32_syscall) |
2cb7cef9 BS |
274 | this could be a problem. */ |
275 | SAVE_ARGS 0,0,1 | |
276 | GET_THREAD_INFO(%r10) | |
277 | - orl $TS_COMPAT,threadinfo_status(%r10) | |
278 | - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) | |
279 | + orl $TS_COMPAT,TI_status(%r10) | |
280 | + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) | |
281 | jnz ia32_tracesys | |
282 | ia32_do_syscall: | |
283 | cmpl $(IA32_NR_syscalls-1),%eax | |
82094b55 | 284 | @@ -309,13 +412,11 @@ quiet_ni_syscall: |
2cb7cef9 BS |
285 | PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi |
286 | PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi | |
287 | PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx | |
288 | - PTREGSCALL stub32_sigsuspend, sys32_sigsuspend, %rcx | |
289 | PTREGSCALL stub32_execve, sys32_execve, %rcx | |
290 | PTREGSCALL stub32_fork, sys_fork, %rdi | |
291 | PTREGSCALL stub32_clone, sys32_clone, %rdx | |
292 | PTREGSCALL stub32_vfork, sys_vfork, %rdi | |
293 | PTREGSCALL stub32_iopl, sys_iopl, %rsi | |
294 | - PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend, %rdx | |
295 | ||
296 | ENTRY(ia32_ptregs_common) | |
297 | popq %r11 | |
82094b55 | 298 | @@ -415,7 +516,7 @@ ia32_sys_call_table: |
2cb7cef9 BS |
299 | .quad sys_ssetmask |
300 | .quad sys_setreuid16 /* 70 */ | |
301 | .quad sys_setregid16 | |
302 | - .quad stub32_sigsuspend | |
303 | + .quad sys32_sigsuspend | |
304 | .quad compat_sys_sigpending | |
305 | .quad sys_sethostname | |
306 | .quad compat_sys_setrlimit /* 75 */ | |
82094b55 | 307 | @@ -522,7 +623,7 @@ ia32_sys_call_table: |
2cb7cef9 BS |
308 | .quad sys32_rt_sigpending |
309 | .quad compat_sys_rt_sigtimedwait | |
310 | .quad sys32_rt_sigqueueinfo | |
311 | - .quad stub32_rt_sigsuspend | |
312 | + .quad sys_rt_sigsuspend | |
313 | .quad sys32_pread /* 180 */ | |
314 | .quad sys32_pwrite | |
315 | .quad sys_chown16 | |
82094b55 | 316 | @@ -670,4 +771,10 @@ ia32_sys_call_table: |
2cb7cef9 BS |
317 | .quad sys32_fallocate |
318 | .quad compat_sys_timerfd_settime /* 325 */ | |
319 | .quad compat_sys_timerfd_gettime | |
320 | + .quad compat_sys_signalfd4 | |
321 | + .quad sys_eventfd2 | |
322 | + .quad sys_epoll_create1 | |
323 | + .quad sys_dup3 /* 330 */ | |
324 | + .quad sys_pipe2 | |
325 | + .quad sys_inotify_init1 | |
326 | ia32_syscall_end: | |
82094b55 AF |
327 | --- sle11-2009-10-16.orig/arch/x86/kernel/Makefile 2009-03-16 16:38:05.000000000 +0100 |
328 | +++ sle11-2009-10-16/arch/x86/kernel/Makefile 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
329 | @@ -120,9 +120,10 @@ ifeq ($(CONFIG_X86_64),y) |
330 | ||
331 | obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o | |
332 | ||
333 | - obj-$(CONFIG_XEN) += nmi_64.o | |
334 | + obj-$(CONFIG_XEN) += nmi.o | |
335 | time_64-$(CONFIG_XEN) += time_32.o | |
336 | endif | |
337 | ||
338 | -disabled-obj-$(CONFIG_XEN) := crash.o early-quirks.o hpet.o i8253.o i8259_$(BITS).o \ | |
339 | - pci-swiotlb_64.o reboot.o smpboot.o tlb_$(BITS).o tsc_$(BITS).o tsc_sync.o vsmp_64.o | |
340 | +disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o hpet.o i8253.o \ | |
341 | + i8259.o irqinit_$(BITS).o pci-swiotlb_64.o reboot.o smpboot.o \ | |
342 | + tlb_$(BITS).o tsc.o tsc_sync.o vsmp_64.o | |
82094b55 AF |
343 | --- sle11-2009-10-16.orig/arch/x86/kernel/acpi/boot.c 2009-08-26 11:55:26.000000000 +0200 |
344 | +++ sle11-2009-10-16/arch/x86/kernel/acpi/boot.c 2009-08-26 12:03:49.000000000 +0200 | |
345 | @@ -949,7 +949,9 @@ void __init mp_register_ioapic(int id, u | |
2cb7cef9 BS |
346 | mp_ioapics[idx].mp_flags = MPC_APIC_USABLE; |
347 | mp_ioapics[idx].mp_apicaddr = address; | |
348 | ||
349 | +#ifndef CONFIG_XEN | |
350 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); | |
351 | +#endif | |
352 | mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id); | |
353 | #ifdef CONFIG_X86_32 | |
354 | mp_ioapics[idx].mp_apicver = io_apic_get_version(idx); | |
82094b55 | 355 | @@ -1106,7 +1108,7 @@ int mp_register_gsi(u32 gsi, int trigger |
2cb7cef9 BS |
356 | { |
357 | int ioapic; | |
358 | int ioapic_pin; | |
359 | -#ifdef CONFIG_X86_32 | |
360 | +#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN) | |
361 | #define MAX_GSI_NUM 4096 | |
362 | #define IRQ_COMPRESSION_START 64 | |
363 | ||
82094b55 | 364 | @@ -1154,7 +1156,7 @@ int mp_register_gsi(u32 gsi, int trigger |
2cb7cef9 BS |
365 | if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { |
366 | pr_debug(KERN_DEBUG "Pin %d-%d already programmed\n", | |
367 | mp_ioapic_routing[ioapic].apic_id, ioapic_pin); | |
368 | -#ifdef CONFIG_X86_32 | |
369 | +#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN) | |
370 | return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); | |
371 | #else | |
372 | return gsi; | |
82094b55 | 373 | @@ -1162,7 +1164,7 @@ int mp_register_gsi(u32 gsi, int trigger |
2cb7cef9 BS |
374 | } |
375 | ||
376 | set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed); | |
377 | -#ifdef CONFIG_X86_32 | |
378 | +#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN) | |
379 | /* | |
380 | * For GSI >= 64, use IRQ compression | |
381 | */ | |
82094b55 AF |
382 | --- sle11-2009-10-16.orig/arch/x86/kernel/acpi/sleep-xen.c 2009-03-16 16:38:05.000000000 +0100 |
383 | +++ sle11-2009-10-16/arch/x86/kernel/acpi/sleep-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
384 | @@ -9,6 +9,7 @@ |
385 | #include <linux/bootmem.h> | |
386 | #include <linux/dmi.h> | |
387 | #include <linux/cpumask.h> | |
388 | +#include <asm/segment.h> | |
389 | ||
390 | #include "realmode/wakeup.h" | |
391 | #include "sleep.h" | |
392 | @@ -20,7 +21,7 @@ unsigned long acpi_realmode_flags; | |
393 | /* address in low memory of the wakeup routine. */ | |
394 | static unsigned long acpi_realmode; | |
395 | ||
396 | -#ifdef CONFIG_64BIT | |
397 | +#if defined(CONFIG_SMP) && defined(CONFIG_64BIT) | |
398 | static char temp_stack[10240]; | |
399 | #endif | |
400 | #endif | |
401 | @@ -54,18 +55,27 @@ int acpi_save_state_mem(void) | |
402 | header->video_mode = saved_video_mode; | |
403 | ||
404 | header->wakeup_jmp_seg = acpi_wakeup_address >> 4; | |
405 | + | |
406 | + /* | |
407 | + * Set up the wakeup GDT. We set these up as Big Real Mode, | |
408 | + * that is, with limits set to 4 GB. At least the Lenovo | |
409 | + * Thinkpad X61 is known to need this for the video BIOS | |
410 | + * initialization quirk to work; this is likely to also | |
411 | + * be the case for other laptops or integrated video devices. | |
412 | + */ | |
413 | + | |
414 | /* GDT[0]: GDT self-pointer */ | |
415 | header->wakeup_gdt[0] = | |
416 | (u64)(sizeof(header->wakeup_gdt) - 1) + | |
417 | ((u64)(acpi_wakeup_address + | |
418 | ((char *)&header->wakeup_gdt - (char *)acpi_realmode)) | |
419 | << 16); | |
420 | - /* GDT[1]: real-mode-like code segment */ | |
421 | - header->wakeup_gdt[1] = (0x009bULL << 40) + | |
422 | - ((u64)acpi_wakeup_address << 16) + 0xffff; | |
423 | - /* GDT[2]: real-mode-like data segment */ | |
424 | - header->wakeup_gdt[2] = (0x0093ULL << 40) + | |
425 | - ((u64)acpi_wakeup_address << 16) + 0xffff; | |
426 | + /* GDT[1]: big real mode-like code segment */ | |
427 | + header->wakeup_gdt[1] = | |
428 | + GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff); | |
429 | + /* GDT[2]: big real mode-like data segment */ | |
430 | + header->wakeup_gdt[2] = | |
431 | + GDT_ENTRY(0x8093, acpi_wakeup_address, 0xfffff); | |
432 | ||
433 | #ifndef CONFIG_64BIT | |
434 | store_gdt((struct desc_ptr *)&header->pmode_gdt); | |
435 | @@ -79,7 +89,7 @@ int acpi_save_state_mem(void) | |
436 | #endif /* !CONFIG_64BIT */ | |
437 | ||
438 | header->pmode_cr0 = read_cr0(); | |
439 | - header->pmode_cr4 = read_cr4(); | |
440 | + header->pmode_cr4 = read_cr4_safe(); | |
441 | header->realmode_flags = acpi_realmode_flags; | |
442 | header->real_magic = 0x12345678; | |
443 | ||
444 | @@ -89,7 +99,9 @@ int acpi_save_state_mem(void) | |
445 | saved_magic = 0x12345678; | |
446 | #else /* CONFIG_64BIT */ | |
447 | header->trampoline_segment = setup_trampoline() >> 4; | |
448 | - init_rsp = (unsigned long)temp_stack + 4096; | |
449 | +#ifdef CONFIG_SMP | |
450 | + stack_start.sp = temp_stack + 4096; | |
451 | +#endif | |
452 | initial_code = (unsigned long)wakeup_long64; | |
453 | saved_magic = 0x123456789abcdef0; | |
454 | #endif /* CONFIG_64BIT */ | |
455 | @@ -145,6 +157,12 @@ static int __init acpi_sleep_setup(char | |
456 | acpi_realmode_flags |= 2; | |
457 | if (strncmp(str, "s3_beep", 7) == 0) | |
458 | acpi_realmode_flags |= 4; | |
459 | +#ifdef CONFIG_HIBERNATION | |
460 | + if (strncmp(str, "s4_nohwsig", 10) == 0) | |
461 | + acpi_no_s4_hw_signature(); | |
462 | +#endif | |
463 | + if (strncmp(str, "old_ordering", 12) == 0) | |
464 | + acpi_old_suspend_ordering(); | |
465 | str = strchr(str, ','); | |
466 | if (str != NULL) | |
467 | str += strspn(str, ", \t"); | |
82094b55 AF |
468 | --- sle11-2009-10-16.orig/arch/x86/kernel/apic_32-xen.c 2009-03-16 16:33:40.000000000 +0100 |
469 | +++ sle11-2009-10-16/arch/x86/kernel/apic_32-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
470 | @@ -59,7 +59,10 @@ static cpumask_t timer_bcast_ipi; |
471 | /* | |
472 | * Debug level, exported for io_apic.c | |
473 | */ | |
474 | -int apic_verbosity; | |
475 | +unsigned int apic_verbosity; | |
476 | + | |
477 | +/* Have we found an MP table */ | |
478 | +int smp_found_config; | |
479 | ||
480 | #ifndef CONFIG_XEN | |
481 | static int modern_apic(void) | |
82094b55 AF |
482 | --- sle11-2009-10-16.orig/arch/x86/kernel/apic_64-xen.c 2009-03-16 16:33:40.000000000 +0100 |
483 | +++ sle11-2009-10-16/arch/x86/kernel/apic_64-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
484 | @@ -39,7 +39,10 @@ int disable_apic; |
485 | /* | |
486 | * Debug level, exported for io_apic.c | |
487 | */ | |
488 | -int apic_verbosity; | |
489 | +unsigned int apic_verbosity; | |
490 | + | |
491 | +/* Have we found an MP table */ | |
492 | +int smp_found_config; | |
493 | ||
494 | /* | |
495 | * The guts of the apic timer interrupt | |
82094b55 AF |
496 | --- sle11-2009-10-16.orig/arch/x86/kernel/asm-offsets_64.c 2008-11-25 12:35:54.000000000 +0100 |
497 | +++ sle11-2009-10-16/arch/x86/kernel/asm-offsets_64.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
498 | @@ -138,7 +138,7 @@ int main(void) |
499 | ||
500 | BLANK(); | |
501 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); | |
502 | -#ifdef CONFIG_XEN | |
503 | +#ifdef CONFIG_PARAVIRT_XEN | |
504 | BLANK(); | |
505 | OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); | |
506 | OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); | |
82094b55 AF |
507 | --- sle11-2009-10-16.orig/arch/x86/kernel/cpu/amd_64.c 2009-10-28 14:55:02.000000000 +0100 |
508 | +++ sle11-2009-10-16/arch/x86/kernel/cpu/amd_64.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
509 | @@ -193,6 +193,7 @@ static void __cpuinit init_amd(struct cp |
510 | fam10h_check_enable_mmcfg(); | |
511 | } | |
512 | ||
513 | +#ifndef CONFIG_XEN | |
514 | if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { | |
515 | unsigned long long tseg; | |
516 | ||
517 | @@ -211,6 +212,7 @@ static void __cpuinit init_amd(struct cp | |
518 | set_memory_4k((unsigned long)__va(tseg), 1); | |
519 | } | |
520 | } | |
521 | +#endif | |
522 | } | |
523 | ||
524 | static struct cpu_dev amd_cpu_dev __cpuinitdata = { | |
82094b55 AF |
525 | --- sle11-2009-10-16.orig/arch/x86/kernel/cpu/bugs_64.c 2009-10-28 14:55:02.000000000 +0100 |
526 | +++ sle11-2009-10-16/arch/x86/kernel/cpu/bugs_64.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
527 | @@ -20,6 +20,7 @@ void __init check_bugs(void) |
528 | #endif | |
529 | alternative_instructions(); | |
530 | ||
531 | +#ifndef CONFIG_XEN | |
532 | /* | |
533 | * Make sure the first 2MB area is not mapped by huge pages | |
534 | * There are typically fixed size MTRRs in there and overlapping | |
535 | @@ -30,4 +31,5 @@ void __init check_bugs(void) | |
536 | */ | |
537 | if (!direct_gbpages) | |
538 | set_memory_4k((unsigned long)__va(0), 1); | |
539 | +#endif | |
540 | } | |
82094b55 AF |
541 | --- sle11-2009-10-16.orig/arch/x86/kernel/cpu/common-xen.c 2009-03-16 16:38:05.000000000 +0100 |
542 | +++ sle11-2009-10-16/arch/x86/kernel/cpu/common-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
543 | @@ -13,6 +13,7 @@ |
544 | #include <asm/mtrr.h> | |
545 | #include <asm/mce.h> | |
546 | #include <asm/pat.h> | |
547 | +#include <asm/asm.h> | |
548 | #ifdef CONFIG_X86_LOCAL_APIC | |
549 | #include <asm/mpspec.h> | |
550 | #include <asm/apic.h> | |
551 | @@ -341,11 +342,24 @@ static void __init early_cpu_detect(void | |
552 | ||
553 | get_cpu_vendor(c, 1); | |
554 | ||
555 | + early_get_cap(c); | |
556 | + | |
557 | if (c->x86_vendor != X86_VENDOR_UNKNOWN && | |
558 | cpu_devs[c->x86_vendor]->c_early_init) | |
559 | cpu_devs[c->x86_vendor]->c_early_init(c); | |
560 | +} | |
561 | ||
562 | - early_get_cap(c); | |
563 | +/* | |
564 | + * The NOPL instruction is supposed to exist on all CPUs with | |
565 | + * family >= 6; unfortunately, that's not true in practice because | |
566 | + * of early VIA chips and (more importantly) broken virtualizers that | |
567 | + * are not easy to detect. In the latter case it doesn't even *fail* | |
568 | + * reliably, so probing for it doesn't even work. Disable it completely | |
569 | + * unless we can find a reliable way to detect all the broken cases. | |
570 | + */ | |
571 | +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) | |
572 | +{ | |
573 | + clear_cpu_cap(c, X86_FEATURE_NOPL); | |
574 | } | |
575 | ||
576 | static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | |
577 | @@ -402,8 +416,8 @@ static void __cpuinit generic_identify(s | |
578 | } | |
579 | ||
580 | init_scattered_cpuid_features(c); | |
581 | + detect_nopl(c); | |
582 | } | |
583 | - | |
584 | } | |
585 | ||
586 | static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) | |
587 | @@ -434,7 +448,7 @@ __setup("serialnumber", x86_serial_nr_se | |
588 | /* | |
589 | * This does the hard work of actually picking apart the CPU stuff... | |
590 | */ | |
591 | -void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |
592 | +static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |
593 | { | |
594 | int i; | |
595 | ||
596 | @@ -448,6 +462,8 @@ void __cpuinit identify_cpu(struct cpuin | |
597 | c->x86_max_cores = 1; | |
598 | c->x86_clflush_size = 32; | |
599 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | |
600 | + if (boot_cpu_has(X86_FEATURE_SYSCALL32)) | |
601 | + set_cpu_cap(c, X86_FEATURE_SYSCALL32); | |
602 | ||
603 | if (!have_cpuid_p()) { | |
604 | /* | |
605 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
82094b55 | 606 | +++ sle11-2009-10-16/arch/x86/kernel/cpu/common_64-xen.c 2009-06-04 10:21:39.000000000 +0200 |
2cb7cef9 BS |
607 | @@ -0,0 +1,771 @@ |
608 | +#include <linux/init.h> | |
609 | +#include <linux/kernel.h> | |
610 | +#include <linux/sched.h> | |
611 | +#include <linux/string.h> | |
612 | +#include <linux/bootmem.h> | |
613 | +#include <linux/bitops.h> | |
614 | +#include <linux/module.h> | |
615 | +#include <linux/kgdb.h> | |
616 | +#include <linux/topology.h> | |
617 | +#include <linux/delay.h> | |
618 | +#include <linux/smp.h> | |
619 | +#include <linux/percpu.h> | |
620 | +#include <asm/i387.h> | |
621 | +#include <asm/msr.h> | |
622 | +#include <asm/io.h> | |
623 | +#include <asm/linkage.h> | |
624 | +#include <asm/mmu_context.h> | |
625 | +#include <asm/mtrr.h> | |
626 | +#include <asm/mce.h> | |
627 | +#include <asm/pat.h> | |
628 | +#include <asm/asm.h> | |
629 | +#include <asm/numa.h> | |
630 | +#ifdef CONFIG_X86_LOCAL_APIC | |
631 | +#include <asm/mpspec.h> | |
632 | +#include <asm/apic.h> | |
633 | +#include <mach_apic.h> | |
634 | +#elif defined(CONFIG_XEN) | |
635 | +#include <mach_apic.h> | |
636 | +#endif | |
637 | +#include <asm/pda.h> | |
638 | +#include <asm/pgtable.h> | |
639 | +#include <asm/processor.h> | |
640 | +#include <asm/desc.h> | |
641 | +#include <asm/atomic.h> | |
642 | +#include <asm/proto.h> | |
643 | +#include <asm/sections.h> | |
644 | +#include <asm/setup.h> | |
645 | +#include <asm/genapic.h> | |
646 | + | |
647 | +#include "cpu.h" | |
648 | + | |
649 | +/* We need valid kernel segments for data and code in long mode too | |
650 | + * IRET will check the segment types kkeil 2000/10/28 | |
651 | + * Also sysret mandates a special GDT layout | |
652 | + */ | |
653 | +/* The TLS descriptors are currently at a different place compared to i386. | |
654 | + Hopefully nobody expects them at a fixed place (Wine?) */ | |
655 | +DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { | |
656 | + [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, | |
657 | + [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, | |
658 | + [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, | |
659 | + [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, | |
660 | + [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, | |
661 | + [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, | |
662 | +} }; | |
663 | +EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); | |
664 | + | |
665 | +__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; | |
666 | + | |
667 | +/* Current gdt points %fs at the "master" per-cpu area: after this, | |
668 | + * it's on the real one. */ | |
669 | +void switch_to_new_gdt(void) | |
670 | +{ | |
671 | +#ifndef CONFIG_XEN | |
672 | + struct desc_ptr gdt_descr; | |
673 | + | |
674 | + gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); | |
675 | + gdt_descr.size = GDT_SIZE - 1; | |
676 | + load_gdt(&gdt_descr); | |
677 | +#else | |
678 | + void *va, *gdt_addr = get_cpu_gdt_table(smp_processor_id()); | |
679 | + unsigned long frames[16]; | |
680 | + unsigned int f = 0; | |
681 | + | |
682 | + for (va = gdt_addr; va < gdt_addr + GDT_SIZE; va += PAGE_SIZE) { | |
683 | + frames[f++] = virt_to_mfn(va); | |
684 | + make_page_readonly(va, XENFEAT_writable_descriptor_tables); | |
685 | + } | |
686 | + if (HYPERVISOR_set_gdt(frames, GDT_SIZE / sizeof(struct desc_struct))) | |
687 | + BUG(); | |
688 | +#endif | |
689 | +} | |
690 | + | |
691 | +struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; | |
692 | + | |
693 | +static void __cpuinit default_init(struct cpuinfo_x86 *c) | |
694 | +{ | |
695 | + display_cacheinfo(c); | |
696 | +} | |
697 | + | |
698 | +static struct cpu_dev __cpuinitdata default_cpu = { | |
699 | + .c_init = default_init, | |
700 | + .c_vendor = "Unknown", | |
701 | +}; | |
702 | +static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; | |
703 | + | |
704 | +int __cpuinit get_model_name(struct cpuinfo_x86 *c) | |
705 | +{ | |
706 | + unsigned int *v; | |
707 | + | |
708 | + if (c->extended_cpuid_level < 0x80000004) | |
709 | + return 0; | |
710 | + | |
711 | + v = (unsigned int *) c->x86_model_id; | |
712 | + cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); | |
713 | + cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); | |
714 | + cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); | |
715 | + c->x86_model_id[48] = 0; | |
716 | + return 1; | |
717 | +} | |
718 | + | |
719 | + | |
720 | +void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | |
721 | +{ | |
722 | + unsigned int n, dummy, ebx, ecx, edx; | |
723 | + | |
724 | + n = c->extended_cpuid_level; | |
725 | + | |
726 | + if (n >= 0x80000005) { | |
727 | + cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); | |
728 | + printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), " | |
729 | + "D cache %dK (%d bytes/line)\n", | |
730 | + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); | |
731 | + c->x86_cache_size = (ecx>>24) + (edx>>24); | |
732 | + /* On K8 L1 TLB is inclusive, so don't count it */ | |
733 | + c->x86_tlbsize = 0; | |
734 | + } | |
735 | + | |
736 | + if (n >= 0x80000006) { | |
737 | + cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); | |
738 | + ecx = cpuid_ecx(0x80000006); | |
739 | + c->x86_cache_size = ecx >> 16; | |
740 | + c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); | |
741 | + | |
742 | + printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", | |
743 | + c->x86_cache_size, ecx & 0xFF); | |
744 | + } | |
745 | +} | |
746 | + | |
747 | +void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |
748 | +{ | |
749 | +#ifdef CONFIG_SMP | |
750 | + u32 eax, ebx, ecx, edx; | |
751 | + int index_msb, core_bits; | |
752 | + | |
753 | + cpuid(1, &eax, &ebx, &ecx, &edx); | |
754 | + | |
755 | + | |
756 | + if (!cpu_has(c, X86_FEATURE_HT)) | |
757 | + return; | |
758 | + if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) | |
759 | + goto out; | |
760 | + | |
761 | + smp_num_siblings = (ebx & 0xff0000) >> 16; | |
762 | + | |
763 | + if (smp_num_siblings == 1) { | |
764 | + printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); | |
765 | + } else if (smp_num_siblings > 1) { | |
766 | + | |
767 | + if (smp_num_siblings > NR_CPUS) { | |
768 | + printk(KERN_WARNING "CPU: Unsupported number of " | |
769 | + "siblings %d", smp_num_siblings); | |
770 | + smp_num_siblings = 1; | |
771 | + return; | |
772 | + } | |
773 | + | |
774 | + index_msb = get_count_order(smp_num_siblings); | |
775 | + c->phys_proc_id = phys_pkg_id(index_msb); | |
776 | + | |
777 | + smp_num_siblings = smp_num_siblings / c->x86_max_cores; | |
778 | + | |
779 | + index_msb = get_count_order(smp_num_siblings); | |
780 | + | |
781 | + core_bits = get_count_order(c->x86_max_cores); | |
782 | + | |
783 | + c->cpu_core_id = phys_pkg_id(index_msb) & | |
784 | + ((1 << core_bits) - 1); | |
785 | + } | |
786 | +out: | |
787 | + if ((c->x86_max_cores * smp_num_siblings) > 1) { | |
788 | + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | |
789 | + c->phys_proc_id); | |
790 | + printk(KERN_INFO "CPU: Processor Core ID: %d\n", | |
791 | + c->cpu_core_id); | |
792 | + } | |
793 | + | |
794 | +#endif | |
795 | +} | |
796 | + | |
797 | +static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) | |
798 | +{ | |
799 | + char *v = c->x86_vendor_id; | |
800 | + int i; | |
801 | + static int printed; | |
802 | + | |
803 | + for (i = 0; i < X86_VENDOR_NUM; i++) { | |
804 | + if (cpu_devs[i]) { | |
805 | + if (!strcmp(v, cpu_devs[i]->c_ident[0]) || | |
806 | + (cpu_devs[i]->c_ident[1] && | |
807 | + !strcmp(v, cpu_devs[i]->c_ident[1]))) { | |
808 | + c->x86_vendor = i; | |
809 | + this_cpu = cpu_devs[i]; | |
810 | + return; | |
811 | + } | |
812 | + } | |
813 | + } | |
814 | + if (!printed) { | |
815 | + printed++; | |
816 | + printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); | |
817 | + printk(KERN_ERR "CPU: Your system may be unstable.\n"); | |
818 | + } | |
819 | + c->x86_vendor = X86_VENDOR_UNKNOWN; | |
820 | +} | |
821 | + | |
822 | +static void __init early_cpu_support_print(void) | |
823 | +{ | |
824 | + int i,j; | |
825 | + struct cpu_dev *cpu_devx; | |
826 | + | |
827 | + printk("KERNEL supported cpus:\n"); | |
828 | + for (i = 0; i < X86_VENDOR_NUM; i++) { | |
829 | + cpu_devx = cpu_devs[i]; | |
830 | + if (!cpu_devx) | |
831 | + continue; | |
832 | + for (j = 0; j < 2; j++) { | |
833 | + if (!cpu_devx->c_ident[j]) | |
834 | + continue; | |
835 | + printk(" %s %s\n", cpu_devx->c_vendor, | |
836 | + cpu_devx->c_ident[j]); | |
837 | + } | |
838 | + } | |
839 | +} | |
840 | + | |
841 | +/* | |
842 | + * The NOPL instruction is supposed to exist on all CPUs with | |
843 | + * family >= 6, unfortunately, that's not true in practice because | |
844 | + * of early VIA chips and (more importantly) broken virtualizers that | |
845 | + * are not easy to detect. Hence, probe for it based on first | |
846 | + * principles. | |
847 | + * | |
848 | + * Note: no 64-bit chip is known to lack these, but put the code here | |
849 | + * for consistency with 32 bits, and to make it utterly trivial to | |
850 | + * diagnose the problem should it ever surface. | |
851 | + */ | |
852 | +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) | |
853 | +{ | |
854 | + const u32 nopl_signature = 0x888c53b1; /* Random number */ | |
855 | + u32 has_nopl = nopl_signature; | |
856 | + | |
857 | + clear_cpu_cap(c, X86_FEATURE_NOPL); | |
858 | + if (c->x86 >= 6) { | |
859 | + asm volatile("\n" | |
860 | + "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ | |
861 | + "2:\n" | |
862 | + " .section .fixup,\"ax\"\n" | |
863 | + "3: xor %0,%0\n" | |
864 | + " jmp 2b\n" | |
865 | + " .previous\n" | |
866 | + _ASM_EXTABLE(1b,3b) | |
867 | + : "+a" (has_nopl)); | |
868 | + | |
869 | + if (has_nopl == nopl_signature) | |
870 | + set_cpu_cap(c, X86_FEATURE_NOPL); | |
871 | + } | |
872 | +} | |
873 | + | |
874 | +static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); | |
875 | + | |
876 | +void __init early_cpu_init(void) | |
877 | +{ | |
878 | + struct cpu_vendor_dev *cvdev; | |
879 | + | |
880 | + for (cvdev = __x86cpuvendor_start ; | |
881 | + cvdev < __x86cpuvendor_end ; | |
882 | + cvdev++) | |
883 | + cpu_devs[cvdev->vendor] = cvdev->cpu_dev; | |
884 | + early_cpu_support_print(); | |
885 | + early_identify_cpu(&boot_cpu_data); | |
886 | +} | |
887 | + | |
888 | +/* Do some early cpuid on the boot CPU to get some parameter that are | |
889 | + needed before check_bugs. Everything advanced is in identify_cpu | |
890 | + below. */ | |
891 | +static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |
892 | +{ | |
893 | + u32 tfms, xlvl; | |
894 | + | |
895 | + c->loops_per_jiffy = loops_per_jiffy; | |
896 | + c->x86_cache_size = -1; | |
897 | + c->x86_vendor = X86_VENDOR_UNKNOWN; | |
898 | + c->x86_model = c->x86_mask = 0; /* So far unknown... */ | |
899 | + c->x86_vendor_id[0] = '\0'; /* Unset */ | |
900 | + c->x86_model_id[0] = '\0'; /* Unset */ | |
901 | + c->x86_clflush_size = 64; | |
902 | + c->x86_cache_alignment = c->x86_clflush_size; | |
903 | + c->x86_max_cores = 1; | |
904 | + c->x86_coreid_bits = 0; | |
905 | + c->extended_cpuid_level = 0; | |
906 | + memset(&c->x86_capability, 0, sizeof c->x86_capability); | |
907 | + | |
908 | + /* Get vendor name */ | |
909 | + cpuid(0x00000000, (unsigned int *)&c->cpuid_level, | |
910 | + (unsigned int *)&c->x86_vendor_id[0], | |
911 | + (unsigned int *)&c->x86_vendor_id[8], | |
912 | + (unsigned int *)&c->x86_vendor_id[4]); | |
913 | + | |
914 | + get_cpu_vendor(c); | |
915 | + | |
916 | + /* Initialize the standard set of capabilities */ | |
917 | + /* Note that the vendor-specific code below might override */ | |
918 | + | |
919 | + /* Intel-defined flags: level 0x00000001 */ | |
920 | + if (c->cpuid_level >= 0x00000001) { | |
921 | + __u32 misc; | |
922 | + cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4], | |
923 | + &c->x86_capability[0]); | |
924 | + c->x86 = (tfms >> 8) & 0xf; | |
925 | + c->x86_model = (tfms >> 4) & 0xf; | |
926 | + c->x86_mask = tfms & 0xf; | |
927 | + if (c->x86 == 0xf) | |
928 | + c->x86 += (tfms >> 20) & 0xff; | |
929 | + if (c->x86 >= 0x6) | |
930 | + c->x86_model += ((tfms >> 16) & 0xF) << 4; | |
931 | + if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) | |
932 | + c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; | |
933 | + } else { | |
934 | + /* Have CPUID level 0 only - unheard of */ | |
935 | + c->x86 = 4; | |
936 | + } | |
937 | + | |
938 | + c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; | |
939 | +#ifdef CONFIG_SMP | |
940 | + c->phys_proc_id = c->initial_apicid; | |
941 | +#endif | |
942 | + /* AMD-defined flags: level 0x80000001 */ | |
943 | + xlvl = cpuid_eax(0x80000000); | |
944 | + c->extended_cpuid_level = xlvl; | |
945 | + if ((xlvl & 0xffff0000) == 0x80000000) { | |
946 | + if (xlvl >= 0x80000001) { | |
947 | + c->x86_capability[1] = cpuid_edx(0x80000001); | |
948 | + c->x86_capability[6] = cpuid_ecx(0x80000001); | |
949 | + } | |
950 | + if (xlvl >= 0x80000004) | |
951 | + get_model_name(c); /* Default name */ | |
952 | + } | |
953 | + | |
954 | + /* Transmeta-defined flags: level 0x80860001 */ | |
955 | + xlvl = cpuid_eax(0x80860000); | |
956 | + if ((xlvl & 0xffff0000) == 0x80860000) { | |
957 | + /* Don't set x86_cpuid_level here for now to not confuse. */ | |
958 | + if (xlvl >= 0x80860001) | |
959 | + c->x86_capability[2] = cpuid_edx(0x80860001); | |
960 | + } | |
961 | + | |
962 | + if (c->extended_cpuid_level >= 0x80000007) | |
963 | + c->x86_power = cpuid_edx(0x80000007); | |
964 | + | |
965 | + if (c->extended_cpuid_level >= 0x80000008) { | |
966 | + u32 eax = cpuid_eax(0x80000008); | |
967 | + | |
968 | + c->x86_virt_bits = (eax >> 8) & 0xff; | |
969 | + c->x86_phys_bits = eax & 0xff; | |
970 | + } | |
971 | + | |
972 | + detect_nopl(c); | |
973 | + | |
974 | + if (c->x86_vendor != X86_VENDOR_UNKNOWN && | |
975 | + cpu_devs[c->x86_vendor]->c_early_init) | |
976 | + cpu_devs[c->x86_vendor]->c_early_init(c); | |
977 | + | |
978 | + validate_pat_support(c); | |
979 | +} | |
980 | + | |
981 | +/* | |
982 | + * This does the hard work of actually picking apart the CPU stuff... | |
983 | + */ | |
984 | +static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |
985 | +{ | |
986 | + int i; | |
987 | + | |
988 | + early_identify_cpu(c); | |
989 | + | |
990 | + init_scattered_cpuid_features(c); | |
991 | + | |
992 | + c->apicid = phys_pkg_id(0); | |
993 | + | |
994 | + /* | |
995 | + * Vendor-specific initialization. In this section we | |
996 | + * canonicalize the feature flags, meaning if there are | |
997 | + * features a certain CPU supports which CPUID doesn't | |
998 | + * tell us, CPUID claiming incorrect flags, or other bugs, | |
999 | + * we handle them here. | |
1000 | + * | |
1001 | + * At the end of this section, c->x86_capability better | |
1002 | + * indicate the features this CPU genuinely supports! | |
1003 | + */ | |
1004 | + if (this_cpu->c_init) | |
1005 | + this_cpu->c_init(c); | |
1006 | + | |
1007 | + detect_ht(c); | |
1008 | + | |
1009 | + /* | |
1010 | + * On SMP, boot_cpu_data holds the common feature set between | |
1011 | + * all CPUs; so make sure that we indicate which features are | |
1012 | + * common between the CPUs. The first time this routine gets | |
1013 | + * executed, c == &boot_cpu_data. | |
1014 | + */ | |
1015 | + if (c != &boot_cpu_data) { | |
1016 | + /* AND the already accumulated flags with these */ | |
1017 | + for (i = 0; i < NCAPINTS; i++) | |
1018 | + boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; | |
1019 | + } | |
1020 | + | |
1021 | + /* Clear all flags overriden by options */ | |
1022 | + for (i = 0; i < NCAPINTS; i++) | |
1023 | + c->x86_capability[i] &= ~cleared_cpu_caps[i]; | |
1024 | + | |
1025 | +#ifdef CONFIG_X86_MCE | |
1026 | + mcheck_init(c); | |
1027 | +#endif | |
1028 | + select_idle_routine(c); | |
1029 | + | |
1030 | +#ifdef CONFIG_NUMA | |
1031 | + numa_add_cpu(smp_processor_id()); | |
1032 | +#endif | |
1033 | + | |
1034 | +} | |
1035 | + | |
1036 | +void __cpuinit identify_boot_cpu(void) | |
1037 | +{ | |
1038 | + identify_cpu(&boot_cpu_data); | |
1039 | +} | |
1040 | + | |
1041 | +void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | |
1042 | +{ | |
1043 | + BUG_ON(c == &boot_cpu_data); | |
1044 | + identify_cpu(c); | |
1045 | + mtrr_ap_init(); | |
1046 | +} | |
1047 | + | |
1048 | +static __init int setup_noclflush(char *arg) | |
1049 | +{ | |
1050 | + setup_clear_cpu_cap(X86_FEATURE_CLFLSH); | |
1051 | + return 1; | |
1052 | +} | |
1053 | +__setup("noclflush", setup_noclflush); | |
1054 | + | |
1055 | +void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) | |
1056 | +{ | |
1057 | + if (c->x86_model_id[0]) | |
1058 | + printk(KERN_CONT "%s", c->x86_model_id); | |
1059 | + | |
1060 | + if (c->x86_mask || c->cpuid_level >= 0) | |
1061 | + printk(KERN_CONT " stepping %02x\n", c->x86_mask); | |
1062 | + else | |
1063 | + printk(KERN_CONT "\n"); | |
1064 | +} | |
1065 | + | |
1066 | +static __init int setup_disablecpuid(char *arg) | |
1067 | +{ | |
1068 | + int bit; | |
1069 | + if (get_option(&arg, &bit) && bit < NCAPINTS*32) | |
1070 | + setup_clear_cpu_cap(bit); | |
1071 | + else | |
1072 | + return 0; | |
1073 | + return 1; | |
1074 | +} | |
1075 | +__setup("clearcpuid=", setup_disablecpuid); | |
1076 | + | |
1077 | +cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; | |
1078 | + | |
1079 | +struct x8664_pda **_cpu_pda __read_mostly; | |
1080 | +EXPORT_SYMBOL(_cpu_pda); | |
1081 | + | |
1082 | +#ifndef CONFIG_X86_NO_IDT | |
1083 | +struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | |
1084 | +#endif | |
1085 | + | |
1086 | +char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; | |
1087 | + | |
1088 | +unsigned long __supported_pte_mask __read_mostly = ~0UL; | |
1089 | +EXPORT_SYMBOL_GPL(__supported_pte_mask); | |
1090 | + | |
1091 | +static int do_not_nx __cpuinitdata; | |
1092 | + | |
1093 | +/* noexec=on|off | |
1094 | +Control non executable mappings for 64bit processes. | |
1095 | + | |
1096 | +on Enable(default) | |
1097 | +off Disable | |
1098 | +*/ | |
1099 | +static int __init nonx_setup(char *str) | |
1100 | +{ | |
1101 | + if (!str) | |
1102 | + return -EINVAL; | |
1103 | + if (!strncmp(str, "on", 2)) { | |
1104 | + __supported_pte_mask |= _PAGE_NX; | |
1105 | + do_not_nx = 0; | |
1106 | + } else if (!strncmp(str, "off", 3)) { | |
1107 | + do_not_nx = 1; | |
1108 | + __supported_pte_mask &= ~_PAGE_NX; | |
1109 | + } | |
1110 | + return 0; | |
1111 | +} | |
1112 | +early_param("noexec", nonx_setup); | |
1113 | + | |
1114 | +int force_personality32; | |
1115 | + | |
1116 | +/* noexec32=on|off | |
1117 | +Control non executable heap for 32bit processes. | |
1118 | +To control the stack too use noexec=off | |
1119 | + | |
1120 | +on PROT_READ does not imply PROT_EXEC for 32bit processes (default) | |
1121 | +off PROT_READ implies PROT_EXEC | |
1122 | +*/ | |
1123 | +static int __init nonx32_setup(char *str) | |
1124 | +{ | |
1125 | + if (!strcmp(str, "on")) | |
1126 | + force_personality32 &= ~READ_IMPLIES_EXEC; | |
1127 | + else if (!strcmp(str, "off")) | |
1128 | + force_personality32 |= READ_IMPLIES_EXEC; | |
1129 | + return 1; | |
1130 | +} | |
1131 | +__setup("noexec32=", nonx32_setup); | |
1132 | + | |
1133 | +static void __init_refok switch_pt(int cpu) | |
1134 | +{ | |
1135 | +#ifdef CONFIG_XEN | |
1136 | + if (cpu == 0) | |
1137 | + xen_init_pt(); | |
1138 | + xen_pt_switch(__pa_symbol(init_level4_pgt)); | |
1139 | + xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt))); | |
1140 | +#endif | |
1141 | +} | |
1142 | + | |
1143 | +void pda_init(int cpu) | |
1144 | +{ | |
1145 | + struct x8664_pda *pda = cpu_pda(cpu); | |
1146 | + | |
1147 | + /* Setup up data that may be needed in __get_free_pages early */ | |
1148 | + loadsegment(fs, 0); | |
1149 | + loadsegment(gs, 0); | |
1150 | +#ifndef CONFIG_XEN | |
1151 | + /* Memory clobbers used to order PDA accessed */ | |
1152 | + mb(); | |
1153 | + wrmsrl(MSR_GS_BASE, pda); | |
1154 | + mb(); | |
1155 | +#else | |
1156 | + if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL, | |
1157 | + (unsigned long)pda)) | |
1158 | + BUG(); | |
1159 | +#endif | |
1160 | + | |
1161 | + pda->cpunumber = cpu; | |
1162 | + pda->irqcount = -1; | |
1163 | + pda->kernelstack = (unsigned long)stack_thread_info() - | |
1164 | + PDA_STACKOFFSET + THREAD_SIZE; | |
1165 | + pda->active_mm = &init_mm; | |
1166 | + pda->mmu_state = 0; | |
1167 | + | |
1168 | + if (cpu == 0) { | |
1169 | + /* others are initialized in smpboot.c */ | |
1170 | + pda->pcurrent = &init_task; | |
1171 | + pda->irqstackptr = boot_cpu_stack; | |
1172 | + pda->irqstackptr += IRQSTACKSIZE - 64; | |
1173 | + } else { | |
1174 | + if (!pda->irqstackptr) { | |
1175 | + pda->irqstackptr = (char *) | |
1176 | + __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); | |
1177 | + if (!pda->irqstackptr) | |
1178 | + panic("cannot allocate irqstack for cpu %d", | |
1179 | + cpu); | |
1180 | + pda->irqstackptr += IRQSTACKSIZE - 64; | |
1181 | + } | |
1182 | + | |
1183 | + if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) | |
1184 | + pda->nodenumber = cpu_to_node(cpu); | |
1185 | + } | |
1186 | + | |
1187 | + switch_pt(cpu); | |
1188 | +} | |
1189 | + | |
1190 | +#ifndef CONFIG_X86_NO_TSS | |
1191 | +char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + | |
1192 | + DEBUG_STKSZ] __page_aligned_bss; | |
1193 | +#endif | |
1194 | + | |
1195 | +extern asmlinkage void ignore_sysret(void); | |
1196 | + | |
1197 | +void __cpuinit syscall_init(void) | |
1198 | +{ | |
1199 | +#ifndef CONFIG_XEN | |
1200 | + /* | |
1201 | + * LSTAR and STAR live in a bit strange symbiosis. | |
1202 | + * They both write to the same internal register. STAR allows to | |
1203 | + * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. | |
1204 | + */ | |
1205 | + wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); | |
1206 | + wrmsrl(MSR_LSTAR, system_call); | |
1207 | + wrmsrl(MSR_CSTAR, ignore_sysret); | |
1208 | + | |
1209 | + /* Flags to clear on syscall */ | |
1210 | + wrmsrl(MSR_SYSCALL_MASK, | |
1211 | + X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); | |
1212 | +#endif | |
1213 | +#ifdef CONFIG_IA32_EMULATION | |
1214 | + syscall32_cpu_init(); | |
1215 | +#else | |
1216 | + static const struct callback_register __cpuinitconst cstar = { | |
1217 | + .type = CALLBACKTYPE_syscall32, | |
1218 | + .address = (unsigned long)ignore_sysret | |
1219 | + }; | |
1220 | + | |
1221 | + if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar)) | |
1222 | + printk(KERN_WARNING "Unable to register CSTAR callback\n"); | |
1223 | +#endif | |
1224 | +} | |
1225 | + | |
1226 | +void __cpuinit check_efer(void) | |
1227 | +{ | |
1228 | + unsigned long efer; | |
1229 | + | |
1230 | + rdmsrl(MSR_EFER, efer); | |
1231 | + if (!(efer & EFER_NX) || do_not_nx) | |
1232 | + __supported_pte_mask &= ~_PAGE_NX; | |
1233 | +} | |
1234 | + | |
1235 | +unsigned long kernel_eflags; | |
1236 | + | |
1237 | +#ifndef CONFIG_X86_NO_TSS | |
1238 | +/* | |
1239 | + * Copies of the original ist values from the tss are only accessed during | |
1240 | + * debugging, no special alignment required. | |
1241 | + */ | |
1242 | +DEFINE_PER_CPU(struct orig_ist, orig_ist); | |
1243 | +#endif | |
1244 | + | |
1245 | +/* | |
1246 | + * cpu_init() initializes state that is per-CPU. Some data is already | |
1247 | + * initialized (naturally) in the bootstrap process, such as the GDT | |
1248 | + * and IDT. We reload them nevertheless, this function acts as a | |
1249 | + * 'CPU state barrier', nothing should get across. | |
1250 | + * A lot of state is already set up in PDA init. | |
1251 | + */ | |
1252 | +void __cpuinit cpu_init(void) | |
1253 | +{ | |
1254 | + int cpu = stack_smp_processor_id(); | |
1255 | +#ifndef CONFIG_X86_NO_TSS | |
1256 | + struct tss_struct *t = &per_cpu(init_tss, cpu); | |
1257 | + struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); | |
1258 | + unsigned long v; | |
1259 | + char *estacks = NULL; | |
1260 | + int i; | |
1261 | +#endif | |
1262 | + struct task_struct *me; | |
1263 | + | |
1264 | + /* CPU 0 is initialised in head64.c */ | |
1265 | + if (cpu != 0) | |
1266 | + pda_init(cpu); | |
1267 | +#ifndef CONFIG_X86_NO_TSS | |
1268 | + else | |
1269 | + estacks = boot_exception_stacks; | |
1270 | +#endif | |
1271 | + | |
1272 | + me = current; | |
1273 | + | |
1274 | + if (cpu_test_and_set(cpu, cpu_initialized)) | |
1275 | + panic("CPU#%d already initialized!\n", cpu); | |
1276 | + | |
1277 | + printk(KERN_INFO "Initializing CPU#%d\n", cpu); | |
1278 | + | |
1279 | + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | |
1280 | + | |
1281 | + /* | |
1282 | + * Initialize the per-CPU GDT with the boot GDT, | |
1283 | + * and set up the GDT descriptor: | |
1284 | + */ | |
1285 | + | |
1286 | + switch_to_new_gdt(); | |
1287 | +#ifndef CONFIG_X86_NO_IDT | |
1288 | + load_idt((const struct desc_ptr *)&idt_descr); | |
1289 | +#endif | |
1290 | + | |
1291 | + memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); | |
1292 | + syscall_init(); | |
1293 | + | |
1294 | + wrmsrl(MSR_FS_BASE, 0); | |
1295 | + wrmsrl(MSR_KERNEL_GS_BASE, 0); | |
1296 | + barrier(); | |
1297 | + | |
1298 | + check_efer(); | |
1299 | + | |
1300 | +#ifndef CONFIG_X86_NO_TSS | |
1301 | + /* | |
1302 | + * set up and load the per-CPU TSS | |
1303 | + */ | |
1304 | + if (!orig_ist->ist[0]) { | |
1305 | + static const unsigned int order[N_EXCEPTION_STACKS] = { | |
1306 | + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, | |
1307 | + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER | |
1308 | + }; | |
1309 | + for (v = 0; v < N_EXCEPTION_STACKS; v++) { | |
1310 | + if (cpu) { | |
1311 | + estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); | |
1312 | + if (!estacks) | |
1313 | + panic("Cannot allocate exception " | |
1314 | + "stack %ld %d\n", v, cpu); | |
1315 | + } | |
1316 | + estacks += PAGE_SIZE << order[v]; | |
1317 | + orig_ist->ist[v] = t->x86_tss.ist[v] = | |
1318 | + (unsigned long)estacks; | |
1319 | + } | |
1320 | + } | |
1321 | + | |
1322 | + t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); | |
1323 | + /* | |
1324 | + * <= is required because the CPU will access up to | |
1325 | + * 8 bits beyond the end of the IO permission bitmap. | |
1326 | + */ | |
1327 | + for (i = 0; i <= IO_BITMAP_LONGS; i++) | |
1328 | + t->io_bitmap[i] = ~0UL; | |
1329 | +#endif | |
1330 | + | |
1331 | + atomic_inc(&init_mm.mm_count); | |
1332 | + me->active_mm = &init_mm; | |
1333 | + if (me->mm) | |
1334 | + BUG(); | |
1335 | + enter_lazy_tlb(&init_mm, me); | |
1336 | + | |
1337 | + load_sp0(t, ¤t->thread); | |
1338 | +#ifndef CONFIG_X86_NO_TSS | |
1339 | + set_tss_desc(cpu, t); | |
1340 | + load_TR_desc(); | |
1341 | +#endif | |
1342 | + load_LDT(&init_mm.context); | |
1343 | + | |
1344 | +#ifdef CONFIG_KGDB | |
1345 | + /* | |
1346 | + * If the kgdb is connected no debug regs should be altered. This | |
1347 | + * is only applicable when KGDB and a KGDB I/O module are built | |
1348 | + * into the kernel and you are using early debugging with | |
1349 | + * kgdbwait. KGDB will control the kernel HW breakpoint registers. | |
1350 | + */ | |
1351 | + if (kgdb_connected && arch_kgdb_ops.correct_hw_break) | |
1352 | + arch_kgdb_ops.correct_hw_break(); | |
1353 | + else { | |
1354 | +#endif | |
1355 | + /* | |
1356 | + * Clear all 6 debug registers: | |
1357 | + */ | |
1358 | + | |
1359 | + set_debugreg(0UL, 0); | |
1360 | + set_debugreg(0UL, 1); | |
1361 | + set_debugreg(0UL, 2); | |
1362 | + set_debugreg(0UL, 3); | |
1363 | + set_debugreg(0UL, 6); | |
1364 | + set_debugreg(0UL, 7); | |
1365 | +#ifdef CONFIG_KGDB | |
1366 | + /* If the kgdb is connected no debug regs should be altered. */ | |
1367 | + } | |
1368 | +#endif | |
1369 | + | |
1370 | + fpu_init(); | |
1371 | + | |
1372 | + asm ("pushfq; popq %0" : "=rm" (kernel_eflags)); | |
1373 | + if (raw_irqs_disabled()) | |
1374 | + kernel_eflags &= ~X86_EFLAGS_IF; | |
1375 | + | |
1376 | + if (is_uv_system()) | |
1377 | + uv_cpu_init(); | |
1378 | +} | |
1379 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
82094b55 | 1380 | +++ sle11-2009-10-16/arch/x86/kernel/e820-xen.c 2009-06-04 10:21:39.000000000 +0200 |
2cb7cef9 BS |
1381 | @@ -0,0 +1,1545 @@ |
1382 | +/* | |
1383 | + * Handle the memory map. | |
1384 | + * The functions here do the job until bootmem takes over. | |
1385 | + * | |
1386 | + * Getting sanitize_e820_map() in sync with i386 version by applying change: | |
1387 | + * - Provisions for empty E820 memory regions (reported by certain BIOSes). | |
1388 | + * Alex Achenbach <xela@slit.de>, December 2002. | |
1389 | + * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> | |
1390 | + * | |
1391 | + */ | |
1392 | +#include <linux/kernel.h> | |
1393 | +#include <linux/types.h> | |
1394 | +#include <linux/init.h> | |
1395 | +#include <linux/bootmem.h> | |
1396 | +#include <linux/ioport.h> | |
1397 | +#include <linux/string.h> | |
1398 | +#include <linux/kexec.h> | |
1399 | +#include <linux/module.h> | |
1400 | +#include <linux/mm.h> | |
1401 | +#include <linux/pfn.h> | |
1402 | +#include <linux/suspend.h> | |
1403 | +#include <linux/firmware-map.h> | |
1404 | + | |
1405 | +#include <asm/pgtable.h> | |
1406 | +#include <asm/page.h> | |
1407 | +#include <asm/e820.h> | |
1408 | +#include <asm/proto.h> | |
1409 | +#include <asm/setup.h> | |
1410 | +#include <xen/interface/memory.h> | |
1411 | + | |
1412 | +/* | |
1413 | + * The e820 map is the map that gets modified e.g. with command line parameters | |
1414 | + * and that is also registered with modifications in the kernel resource tree | |
1415 | + * with the iomem_resource as parent. | |
1416 | + * | |
1417 | + * The e820_saved is directly saved after the BIOS-provided memory map is | |
1418 | + * copied. It doesn't get modified afterwards. It's registered for the | |
1419 | + * /sys/firmware/memmap interface. | |
1420 | + * | |
1421 | + * That memory map is not modified and is used as base for kexec. The kexec'd | |
1422 | + * kernel should get the same memory map as the firmware provides. Then the | |
1423 | + * user can e.g. boot the original kernel with mem=1G while still booting the | |
1424 | + * next kernel with full memory. | |
1425 | + */ | |
1426 | +struct e820map e820; | |
1427 | +#ifndef CONFIG_XEN | |
1428 | +struct e820map e820_saved; | |
1429 | +#else | |
1430 | +static struct e820map machine_e820; | |
1431 | +#define e820_saved machine_e820 | |
1432 | +#endif | |
1433 | + | |
1434 | +/* For PCI or other memory-mapped resources */ | |
1435 | +unsigned long pci_mem_start = 0xaeedbabe; | |
1436 | +#ifdef CONFIG_PCI | |
1437 | +EXPORT_SYMBOL(pci_mem_start); | |
1438 | +#endif | |
1439 | + | |
1440 | +/* | |
1441 | + * This function checks if any part of the range <start,end> is mapped | |
1442 | + * with type. | |
1443 | + */ | |
1444 | +int | |
1445 | +e820_any_mapped(u64 start, u64 end, unsigned type) | |
1446 | +{ | |
1447 | + int i; | |
1448 | + | |
1449 | +#ifndef CONFIG_XEN | |
1450 | + for (i = 0; i < e820.nr_map; i++) { | |
1451 | + struct e820entry *ei = &e820.map[i]; | |
1452 | +#else | |
1453 | + if (!is_initial_xendomain()) | |
1454 | + return 0; | |
1455 | + for (i = 0; i < machine_e820.nr_map; ++i) { | |
1456 | + const struct e820entry *ei = &machine_e820.map[i]; | |
1457 | +#endif | |
1458 | + | |
1459 | + if (type && ei->type != type) | |
1460 | + continue; | |
1461 | + if (ei->addr >= end || ei->addr + ei->size <= start) | |
1462 | + continue; | |
1463 | + return 1; | |
1464 | + } | |
1465 | + return 0; | |
1466 | +} | |
1467 | +EXPORT_SYMBOL_GPL(e820_any_mapped); | |
1468 | + | |
1469 | +/* | |
1470 | + * This function checks if the entire range <start,end> is mapped with type. | |
1471 | + * | |
1472 | + * Note: this function only works correct if the e820 table is sorted and | |
1473 | + * not-overlapping, which is the case | |
1474 | + */ | |
1475 | +int __init e820_all_mapped(u64 start, u64 end, unsigned type) | |
1476 | +{ | |
1477 | + int i; | |
1478 | + | |
1479 | +#ifndef CONFIG_XEN | |
1480 | + for (i = 0; i < e820.nr_map; i++) { | |
1481 | + struct e820entry *ei = &e820.map[i]; | |
1482 | +#else | |
1483 | + if (!is_initial_xendomain()) | |
1484 | + return 0; | |
1485 | + for (i = 0; i < machine_e820.nr_map; ++i) { | |
1486 | + const struct e820entry *ei = &machine_e820.map[i]; | |
1487 | +#endif | |
1488 | + | |
1489 | + if (type && ei->type != type) | |
1490 | + continue; | |
1491 | + /* is the region (part) in overlap with the current region ?*/ | |
1492 | + if (ei->addr >= end || ei->addr + ei->size <= start) | |
1493 | + continue; | |
1494 | + | |
1495 | + /* if the region is at the beginning of <start,end> we move | |
1496 | + * start to the end of the region since it's ok until there | |
1497 | + */ | |
1498 | + if (ei->addr <= start) | |
1499 | + start = ei->addr + ei->size; | |
1500 | + /* | |
1501 | + * if start is now at or beyond end, we're done, full | |
1502 | + * coverage | |
1503 | + */ | |
1504 | + if (start >= end) | |
1505 | + return 1; | |
1506 | + } | |
1507 | + return 0; | |
1508 | +} | |
1509 | + | |
1510 | +/* | |
1511 | + * Add a memory region to the kernel e820 map. | |
1512 | + */ | |
1513 | +void __init e820_add_region(u64 start, u64 size, int type) | |
1514 | +{ | |
1515 | + int x = e820.nr_map; | |
1516 | + | |
1517 | + if (x == ARRAY_SIZE(e820.map)) { | |
1518 | + printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); | |
1519 | + return; | |
1520 | + } | |
1521 | + | |
1522 | + e820.map[x].addr = start; | |
1523 | + e820.map[x].size = size; | |
1524 | + e820.map[x].type = type; | |
1525 | + e820.nr_map++; | |
1526 | +} | |
1527 | + | |
1528 | +void __init e820_print_map(char *who) | |
1529 | +{ | |
1530 | + int i; | |
1531 | + | |
1532 | + for (i = 0; i < e820.nr_map; i++) { | |
1533 | + printk(KERN_INFO " %s: %016Lx - %016Lx ", who, | |
1534 | + (unsigned long long) e820.map[i].addr, | |
1535 | + (unsigned long long) | |
1536 | + (e820.map[i].addr + e820.map[i].size)); | |
1537 | + switch (e820.map[i].type) { | |
1538 | + case E820_RAM: | |
1539 | + case E820_RESERVED_KERN: | |
1540 | + printk(KERN_CONT "(usable)\n"); | |
1541 | + break; | |
1542 | + case E820_RESERVED: | |
1543 | + printk(KERN_CONT "(reserved)\n"); | |
1544 | + break; | |
1545 | + case E820_ACPI: | |
1546 | + printk(KERN_CONT "(ACPI data)\n"); | |
1547 | + break; | |
1548 | + case E820_NVS: | |
1549 | + printk(KERN_CONT "(ACPI NVS)\n"); | |
1550 | + break; | |
1551 | + default: | |
1552 | + printk(KERN_CONT "type %u\n", e820.map[i].type); | |
1553 | + break; | |
1554 | + } | |
1555 | + } | |
1556 | +} | |
1557 | + | |
1558 | +/* | |
1559 | + * Sanitize the BIOS e820 map. | |
1560 | + * | |
1561 | + * Some e820 responses include overlapping entries. The following | |
1562 | + * replaces the original e820 map with a new one, removing overlaps, | |
1563 | + * and resolving conflicting memory types in favor of highest | |
1564 | + * numbered type. | |
1565 | + * | |
1566 | + * The input parameter biosmap points to an array of 'struct | |
1567 | + * e820entry' which on entry has elements in the range [0, *pnr_map) | |
1568 | + * valid, and which has space for up to max_nr_map entries. | |
1569 | + * On return, the resulting sanitized e820 map entries will be in | |
1570 | + * overwritten in the same location, starting at biosmap. | |
1571 | + * | |
1572 | + * The integer pointed to by pnr_map must be valid on entry (the | |
1573 | + * current number of valid entries located at biosmap) and will | |
1574 | + * be updated on return, with the new number of valid entries | |
1575 | + * (something no more than max_nr_map.) | |
1576 | + * | |
1577 | + * The return value from sanitize_e820_map() is zero if it | |
1578 | + * successfully 'sanitized' the map entries passed in, and is -1 | |
1579 | + * if it did nothing, which can happen if either of (1) it was | |
1580 | + * only passed one map entry, or (2) any of the input map entries | |
1581 | + * were invalid (start + size < start, meaning that the size was | |
1582 | + * so big the described memory range wrapped around through zero.) | |
1583 | + * | |
1584 | + * Visually we're performing the following | |
1585 | + * (1,2,3,4 = memory types)... | |
1586 | + * | |
1587 | + * Sample memory map (w/overlaps): | |
1588 | + * ____22__________________ | |
1589 | + * ______________________4_ | |
1590 | + * ____1111________________ | |
1591 | + * _44_____________________ | |
1592 | + * 11111111________________ | |
1593 | + * ____________________33__ | |
1594 | + * ___________44___________ | |
1595 | + * __________33333_________ | |
1596 | + * ______________22________ | |
1597 | + * ___________________2222_ | |
1598 | + * _________111111111______ | |
1599 | + * _____________________11_ | |
1600 | + * _________________4______ | |
1601 | + * | |
1602 | + * Sanitized equivalent (no overlap): | |
1603 | + * 1_______________________ | |
1604 | + * _44_____________________ | |
1605 | + * ___1____________________ | |
1606 | + * ____22__________________ | |
1607 | + * ______11________________ | |
1608 | + * _________1______________ | |
1609 | + * __________3_____________ | |
1610 | + * ___________44___________ | |
1611 | + * _____________33_________ | |
1612 | + * _______________2________ | |
1613 | + * ________________1_______ | |
1614 | + * _________________4______ | |
1615 | + * ___________________2____ | |
1616 | + * ____________________33__ | |
1617 | + * ______________________4_ | |
1618 | + */ | |
1619 | + | |
1620 | +int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, | |
1621 | + int *pnr_map) | |
1622 | +{ | |
1623 | + struct change_member { | |
1624 | + struct e820entry *pbios; /* pointer to original bios entry */ | |
1625 | + unsigned long long addr; /* address for this change point */ | |
1626 | + }; | |
1627 | + static struct change_member change_point_list[2*E820_X_MAX] __initdata; | |
1628 | + static struct change_member *change_point[2*E820_X_MAX] __initdata; | |
1629 | + static struct e820entry *overlap_list[E820_X_MAX] __initdata; | |
1630 | + static struct e820entry new_bios[E820_X_MAX] __initdata; | |
1631 | + struct change_member *change_tmp; | |
1632 | + unsigned long current_type, last_type; | |
1633 | + unsigned long long last_addr; | |
1634 | + int chgidx, still_changing; | |
1635 | + int overlap_entries; | |
1636 | + int new_bios_entry; | |
1637 | + int old_nr, new_nr, chg_nr; | |
1638 | + int i; | |
1639 | + | |
1640 | + /* if there's only one memory region, don't bother */ | |
1641 | +#ifdef CONFIG_XEN | |
1642 | + if (*pnr_map == 1) | |
1643 | + return 0; | |
1644 | +#endif | |
1645 | + if (*pnr_map < 2) | |
1646 | + return -1; | |
1647 | + | |
1648 | + old_nr = *pnr_map; | |
1649 | + BUG_ON(old_nr > max_nr_map); | |
1650 | + | |
1651 | + /* bail out if we find any unreasonable addresses in bios map */ | |
1652 | + for (i = 0; i < old_nr; i++) | |
1653 | + if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) | |
1654 | + return -1; | |
1655 | + | |
1656 | + /* create pointers for initial change-point information (for sorting) */ | |
1657 | + for (i = 0; i < 2 * old_nr; i++) | |
1658 | + change_point[i] = &change_point_list[i]; | |
1659 | + | |
1660 | + /* record all known change-points (starting and ending addresses), | |
1661 | + omitting those that are for empty memory regions */ | |
1662 | + chgidx = 0; | |
1663 | + for (i = 0; i < old_nr; i++) { | |
1664 | + if (biosmap[i].size != 0) { | |
1665 | + change_point[chgidx]->addr = biosmap[i].addr; | |
1666 | + change_point[chgidx++]->pbios = &biosmap[i]; | |
1667 | + change_point[chgidx]->addr = biosmap[i].addr + | |
1668 | + biosmap[i].size; | |
1669 | + change_point[chgidx++]->pbios = &biosmap[i]; | |
1670 | + } | |
1671 | + } | |
1672 | + chg_nr = chgidx; | |
1673 | + | |
1674 | + /* sort change-point list by memory addresses (low -> high) */ | |
1675 | + still_changing = 1; | |
1676 | + while (still_changing) { | |
1677 | + still_changing = 0; | |
1678 | + for (i = 1; i < chg_nr; i++) { | |
1679 | + unsigned long long curaddr, lastaddr; | |
1680 | + unsigned long long curpbaddr, lastpbaddr; | |
1681 | + | |
1682 | + curaddr = change_point[i]->addr; | |
1683 | + lastaddr = change_point[i - 1]->addr; | |
1684 | + curpbaddr = change_point[i]->pbios->addr; | |
1685 | + lastpbaddr = change_point[i - 1]->pbios->addr; | |
1686 | + | |
1687 | + /* | |
1688 | + * swap entries, when: | |
1689 | + * | |
1690 | + * curaddr > lastaddr or | |
1691 | + * curaddr == lastaddr and curaddr == curpbaddr and | |
1692 | + * lastaddr != lastpbaddr | |
1693 | + */ | |
1694 | + if (curaddr < lastaddr || | |
1695 | + (curaddr == lastaddr && curaddr == curpbaddr && | |
1696 | + lastaddr != lastpbaddr)) { | |
1697 | + change_tmp = change_point[i]; | |
1698 | + change_point[i] = change_point[i-1]; | |
1699 | + change_point[i-1] = change_tmp; | |
1700 | + still_changing = 1; | |
1701 | + } | |
1702 | + } | |
1703 | + } | |
1704 | + | |
1705 | + /* create a new bios memory map, removing overlaps */ | |
1706 | + overlap_entries = 0; /* number of entries in the overlap table */ | |
1707 | + new_bios_entry = 0; /* index for creating new bios map entries */ | |
1708 | + last_type = 0; /* start with undefined memory type */ | |
1709 | + last_addr = 0; /* start with 0 as last starting address */ | |
1710 | + | |
1711 | + /* loop through change-points, determining affect on the new bios map */ | |
1712 | + for (chgidx = 0; chgidx < chg_nr; chgidx++) { | |
1713 | + /* keep track of all overlapping bios entries */ | |
1714 | + if (change_point[chgidx]->addr == | |
1715 | + change_point[chgidx]->pbios->addr) { | |
1716 | + /* | |
1717 | + * add map entry to overlap list (> 1 entry | |
1718 | + * implies an overlap) | |
1719 | + */ | |
1720 | + overlap_list[overlap_entries++] = | |
1721 | + change_point[chgidx]->pbios; | |
1722 | + } else { | |
1723 | + /* | |
1724 | + * remove entry from list (order independent, | |
1725 | + * so swap with last) | |
1726 | + */ | |
1727 | + for (i = 0; i < overlap_entries; i++) { | |
1728 | + if (overlap_list[i] == | |
1729 | + change_point[chgidx]->pbios) | |
1730 | + overlap_list[i] = | |
1731 | + overlap_list[overlap_entries-1]; | |
1732 | + } | |
1733 | + overlap_entries--; | |
1734 | + } | |
1735 | + /* | |
1736 | + * if there are overlapping entries, decide which | |
1737 | + * "type" to use (larger value takes precedence -- | |
1738 | + * 1=usable, 2,3,4,4+=unusable) | |
1739 | + */ | |
1740 | + current_type = 0; | |
1741 | + for (i = 0; i < overlap_entries; i++) | |
1742 | + if (overlap_list[i]->type > current_type) | |
1743 | + current_type = overlap_list[i]->type; | |
1744 | + /* | |
1745 | + * continue building up new bios map based on this | |
1746 | + * information | |
1747 | + */ | |
1748 | + if (current_type != last_type) { | |
1749 | + if (last_type != 0) { | |
1750 | + new_bios[new_bios_entry].size = | |
1751 | + change_point[chgidx]->addr - last_addr; | |
1752 | + /* | |
1753 | + * move forward only if the new size | |
1754 | + * was non-zero | |
1755 | + */ | |
1756 | + if (new_bios[new_bios_entry].size != 0) | |
1757 | + /* | |
1758 | + * no more space left for new | |
1759 | + * bios entries ? | |
1760 | + */ | |
1761 | + if (++new_bios_entry >= max_nr_map) | |
1762 | + break; | |
1763 | + } | |
1764 | + if (current_type != 0) { | |
1765 | + new_bios[new_bios_entry].addr = | |
1766 | + change_point[chgidx]->addr; | |
1767 | + new_bios[new_bios_entry].type = current_type; | |
1768 | + last_addr = change_point[chgidx]->addr; | |
1769 | + } | |
1770 | + last_type = current_type; | |
1771 | + } | |
1772 | + } | |
1773 | + /* retain count for new bios entries */ | |
1774 | + new_nr = new_bios_entry; | |
1775 | + | |
1776 | + /* copy new bios mapping into original location */ | |
1777 | + memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); | |
1778 | + *pnr_map = new_nr; | |
1779 | + | |
1780 | + return 0; | |
1781 | +} | |
1782 | + | |
1783 | +static int __init __append_e820_map(struct e820entry *biosmap, int nr_map) | |
1784 | +{ | |
1785 | + while (nr_map) { | |
1786 | + u64 start = biosmap->addr; | |
1787 | + u64 size = biosmap->size; | |
1788 | + u64 end = start + size; | |
1789 | + u32 type = biosmap->type; | |
1790 | + | |
1791 | + /* Overflow in 64 bits? Ignore the memory map. */ | |
1792 | + if (start > end) | |
1793 | + return -1; | |
1794 | + | |
1795 | + e820_add_region(start, size, type); | |
1796 | + | |
1797 | + biosmap++; | |
1798 | + nr_map--; | |
1799 | + } | |
1800 | + return 0; | |
1801 | +} | |
1802 | + | |
1803 | +/* | |
1804 | + * Copy the BIOS e820 map into a safe place. | |
1805 | + * | |
1806 | + * Sanity-check it while we're at it.. | |
1807 | + * | |
1808 | + * If we're lucky and live on a modern system, the setup code | |
1809 | + * will have given us a memory map that we can use to properly | |
1810 | + * set up memory. If we aren't, we'll fake a memory map. | |
1811 | + */ | |
1812 | +static int __init append_e820_map(struct e820entry *biosmap, int nr_map) | |
1813 | +{ | |
1814 | +#ifndef CONFIG_XEN | |
1815 | + /* Only one memory region (or negative)? Ignore it */ | |
1816 | + if (nr_map < 2) | |
1817 | + return -1; | |
1818 | +#else | |
1819 | + BUG_ON(nr_map < 1); | |
1820 | +#endif | |
1821 | + | |
1822 | + return __append_e820_map(biosmap, nr_map); | |
1823 | +} | |
1824 | + | |
1825 | +static u64 __init e820_update_range_map(struct e820map *e820x, u64 start, | |
1826 | + u64 size, unsigned old_type, | |
1827 | + unsigned new_type) | |
1828 | +{ | |
1829 | + unsigned int i, x; | |
1830 | + u64 real_updated_size = 0; | |
1831 | + | |
1832 | + BUG_ON(old_type == new_type); | |
1833 | + | |
1834 | + if (size > (ULLONG_MAX - start)) | |
1835 | + size = ULLONG_MAX - start; | |
1836 | + | |
1837 | + for (i = 0; i < e820x->nr_map; i++) { | |
1838 | + struct e820entry *ei = &e820x->map[i]; | |
1839 | + u64 final_start, final_end; | |
1840 | + if (ei->type != old_type) | |
1841 | + continue; | |
1842 | + /* totally covered? */ | |
1843 | + if (ei->addr >= start && | |
1844 | + (ei->addr + ei->size) <= (start + size)) { | |
1845 | + ei->type = new_type; | |
1846 | + real_updated_size += ei->size; | |
1847 | + continue; | |
1848 | + } | |
1849 | + /* partially covered */ | |
1850 | + final_start = max(start, ei->addr); | |
1851 | + final_end = min(start + size, ei->addr + ei->size); | |
1852 | + if (final_start >= final_end) | |
1853 | + continue; | |
1854 | + | |
1855 | + x = e820x->nr_map; | |
1856 | + if (x == ARRAY_SIZE(e820x->map)) { | |
1857 | + printk(KERN_ERR "Too many memory map entries!\n"); | |
1858 | + break; | |
1859 | + } | |
1860 | + e820x->map[x].addr = final_start; | |
1861 | + e820x->map[x].size = final_end - final_start; | |
1862 | + e820x->map[x].type = new_type; | |
1863 | + e820x->nr_map++; | |
1864 | + | |
1865 | + real_updated_size += final_end - final_start; | |
1866 | + | |
1867 | + if (ei->addr < final_start) | |
1868 | + continue; | |
1869 | + ei->addr = final_end; | |
1870 | + ei->size -= final_end - final_start; | |
1871 | + } | |
1872 | + return real_updated_size; | |
1873 | +} | |
1874 | + | |
1875 | +u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, | |
1876 | + unsigned new_type) | |
1877 | +{ | |
1878 | + return e820_update_range_map(&e820, start, size, old_type, new_type); | |
1879 | +} | |
1880 | + | |
1881 | +static u64 __init e820_update_range_saved(u64 start, u64 size, | |
1882 | + unsigned old_type, unsigned new_type) | |
1883 | +{ | |
1884 | +#ifdef CONFIG_XEN | |
1885 | + if (is_initial_xendomain()) | |
1886 | + return e820_update_range_map(&machine_e820, | |
1887 | + phys_to_machine(start), size, | |
1888 | + old_type, new_type); | |
1889 | +#endif | |
1890 | + return e820_update_range_map(&e820_saved, start, size, old_type, | |
1891 | + new_type); | |
1892 | +} | |
1893 | + | |
1894 | +/* make e820 not cover the range */ | |
1895 | +u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, | |
1896 | + int checktype) | |
1897 | +{ | |
1898 | + int i; | |
1899 | + u64 real_removed_size = 0; | |
1900 | + | |
1901 | + if (size > (ULLONG_MAX - start)) | |
1902 | + size = ULLONG_MAX - start; | |
1903 | + | |
1904 | + for (i = 0; i < e820.nr_map; i++) { | |
1905 | + struct e820entry *ei = &e820.map[i]; | |
1906 | + u64 final_start, final_end; | |
1907 | + | |
1908 | + if (checktype && ei->type != old_type) | |
1909 | + continue; | |
1910 | + /* totally covered? */ | |
1911 | + if (ei->addr >= start && | |
1912 | + (ei->addr + ei->size) <= (start + size)) { | |
1913 | + real_removed_size += ei->size; | |
1914 | + memset(ei, 0, sizeof(struct e820entry)); | |
1915 | + continue; | |
1916 | + } | |
1917 | + /* partially covered */ | |
1918 | + final_start = max(start, ei->addr); | |
1919 | + final_end = min(start + size, ei->addr + ei->size); | |
1920 | + if (final_start >= final_end) | |
1921 | + continue; | |
1922 | + real_removed_size += final_end - final_start; | |
1923 | + | |
1924 | + ei->size -= final_end - final_start; | |
1925 | + if (ei->addr < final_start) | |
1926 | + continue; | |
1927 | + ei->addr = final_end; | |
1928 | + } | |
1929 | + return real_removed_size; | |
1930 | +} | |
1931 | + | |
1932 | +void __init update_e820(void) | |
1933 | +{ | |
1934 | + int nr_map; | |
1935 | + | |
1936 | + nr_map = e820.nr_map; | |
1937 | + if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) | |
1938 | + return; | |
1939 | + e820.nr_map = nr_map; | |
1940 | + printk(KERN_INFO "modified physical RAM map:\n"); | |
1941 | + e820_print_map("modified"); | |
1942 | +} | |
1943 | +static void __init update_e820_saved(void) | |
1944 | +{ | |
1945 | + int nr_map; | |
1946 | + | |
1947 | + nr_map = e820_saved.nr_map; | |
1948 | + if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map)) | |
1949 | + return; | |
1950 | + e820_saved.nr_map = nr_map; | |
1951 | +} | |
1952 | + | |
1953 | +#ifdef CONFIG_XEN | |
1954 | +#define e820 machine_e820 | |
1955 | +#endif | |
1956 | + | |
1957 | +#define MAX_GAP_END 0x100000000ull | |
1958 | +/* | |
1959 | + * Search for a gap in the e820 memory space from start_addr to end_addr. | |
1960 | + */ | |
1961 | +__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, | |
1962 | + unsigned long start_addr, unsigned long long end_addr) | |
1963 | +{ | |
1964 | + unsigned long long last; | |
1965 | + int i = e820.nr_map; | |
1966 | + int found = 0; | |
1967 | + | |
1968 | + last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END; | |
1969 | +#ifdef CONFIG_X86_64 | |
1970 | + if (start_addr >= MAX_GAP_END) | |
1971 | + last = end_addr ?: (1UL << boot_cpu_data.x86_phys_bits); | |
1972 | +#endif | |
1973 | + | |
1974 | + while (--i >= 0) { | |
1975 | + unsigned long long start = e820.map[i].addr; | |
1976 | + unsigned long long end = start + e820.map[i].size; | |
1977 | + | |
1978 | + if (end < start_addr) | |
1979 | + continue; | |
1980 | + | |
1981 | + /* | |
1982 | + * Since "last" is at most 4GB, we know we'll | |
1983 | + * fit in 32 bits if this condition is true | |
1984 | + */ | |
1985 | + if (last > end) { | |
1986 | + unsigned long gap = last - end; | |
1987 | + | |
1988 | + if (gap >= *gapsize) { | |
1989 | + *gapsize = gap; | |
1990 | + *gapstart = end; | |
1991 | + found = 1; | |
1992 | + } | |
1993 | + } | |
1994 | + if (start < last) | |
1995 | + last = start; | |
1996 | + } | |
1997 | + return found; | |
1998 | +} | |
1999 | + | |
2000 | +/* | |
2001 | + * Search for the biggest gap in the low 32 bits of the e820 | |
2002 | + * memory space. We pass this space to PCI to assign MMIO resources | |
2003 | + * for hotplug or unconfigured devices in. | |
2004 | + * Hopefully the BIOS let enough space left. | |
2005 | + */ | |
2006 | +__init void e820_setup_gap(void) | |
2007 | +{ | |
2008 | + unsigned long gapstart, gapsize, round; | |
2009 | + int found; | |
2010 | + | |
2011 | + gapstart = 0x10000000; | |
2012 | + gapsize = 0x400000; | |
2013 | + found = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END); | |
2014 | + | |
2015 | +#ifdef CONFIG_X86_64 | |
2016 | + if (!found) { | |
2017 | + printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " | |
2018 | + "address range\n" | |
2019 | + KERN_ERR "PCI: Unassigned devices with 32bit resource " | |
2020 | + "registers may break!\n"); | |
2021 | + found = e820_search_gap(&gapstart, &gapsize, MAX_GAP_END, 0); | |
2022 | + BUG_ON(!found); | |
2023 | + } | |
2024 | +#endif | |
2025 | + | |
2026 | + /* | |
2027 | + * See how much we want to round up: start off with | |
2028 | + * rounding to the next 1MB area. | |
2029 | + */ | |
2030 | + round = 0x100000; | |
2031 | + while ((gapsize >> 4) > round) | |
2032 | + round += round; | |
2033 | + /* Fun with two's complement */ | |
2034 | + pci_mem_start = (gapstart + round) & -round; | |
2035 | + | |
2036 | + printk(KERN_INFO | |
2037 | + "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", | |
2038 | + pci_mem_start, gapstart, gapsize); | |
2039 | +} | |
2040 | + | |
2041 | +#undef e820 | |
2042 | + | |
2043 | +#ifndef CONFIG_XEN | |
2044 | +/** | |
2045 | + * Because of the size limitation of struct boot_params, only first | |
2046 | + * 128 E820 memory entries are passed to kernel via | |
2047 | + * boot_params.e820_map, others are passed via SETUP_E820_EXT node of | |
2048 | + * linked list of struct setup_data, which is parsed here. | |
2049 | + */ | |
2050 | +void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data) | |
2051 | +{ | |
2052 | + u32 map_len; | |
2053 | + int entries; | |
2054 | + struct e820entry *extmap; | |
2055 | + | |
2056 | + entries = sdata->len / sizeof(struct e820entry); | |
2057 | + map_len = sdata->len + sizeof(struct setup_data); | |
2058 | + if (map_len > PAGE_SIZE) | |
2059 | + sdata = early_ioremap(pa_data, map_len); | |
2060 | + extmap = (struct e820entry *)(sdata->data); | |
2061 | + __append_e820_map(extmap, entries); | |
2062 | + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | |
2063 | + if (map_len > PAGE_SIZE) | |
2064 | + early_iounmap(sdata, map_len); | |
2065 | + printk(KERN_INFO "extended physical RAM map:\n"); | |
2066 | + e820_print_map("extended"); | |
2067 | +} | |
2068 | + | |
2069 | +#if defined(CONFIG_X86_64) || \ | |
2070 | + (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) | |
2071 | +/** | |
2072 | + * Find the ranges of physical addresses that do not correspond to | |
2073 | + * e820 RAM areas and mark the corresponding pages as nosave for | |
2074 | + * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). | |
2075 | + * | |
2076 | + * This function requires the e820 map to be sorted and without any | |
2077 | + * overlapping entries and assumes the first e820 area to be RAM. | |
2078 | + */ | |
2079 | +void __init e820_mark_nosave_regions(unsigned long limit_pfn) | |
2080 | +{ | |
2081 | + int i; | |
2082 | + unsigned long pfn; | |
2083 | + | |
2084 | + pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); | |
2085 | + for (i = 1; i < e820.nr_map; i++) { | |
2086 | + struct e820entry *ei = &e820.map[i]; | |
2087 | + | |
2088 | + if (pfn < PFN_UP(ei->addr)) | |
2089 | + register_nosave_region(pfn, PFN_UP(ei->addr)); | |
2090 | + | |
2091 | + pfn = PFN_DOWN(ei->addr + ei->size); | |
2092 | + if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN) | |
2093 | + register_nosave_region(PFN_UP(ei->addr), pfn); | |
2094 | + | |
2095 | + if (pfn >= limit_pfn) | |
2096 | + break; | |
2097 | + } | |
2098 | +} | |
2099 | +#endif | |
2100 | +#endif | |
2101 | + | |
2102 | +/* | |
2103 | + * Early reserved memory areas. | |
2104 | + */ | |
2105 | +#define MAX_EARLY_RES 20 | |
2106 | + | |
2107 | +struct early_res { | |
2108 | + u64 start, end; | |
2109 | + char name[16]; | |
2110 | + char overlap_ok; | |
2111 | +}; | |
2112 | +static struct early_res early_res[MAX_EARLY_RES] __initdata = { | |
2113 | +#ifndef CONFIG_XEN | |
2114 | + { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ | |
2115 | +#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE) | |
2116 | + { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, | |
2117 | +#endif | |
2118 | +#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) | |
2119 | + /* | |
2120 | + * But first pinch a few for the stack/trampoline stuff | |
2121 | + * FIXME: Don't need the extra page at 4K, but need to fix | |
2122 | + * trampoline before removing it. (see the GDT stuff) | |
2123 | + */ | |
2124 | + { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" }, | |
2125 | + /* | |
2126 | + * Has to be in very low memory so we can execute | |
2127 | + * real-mode AP code. | |
2128 | + */ | |
2129 | + { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" }, | |
2130 | +#endif | |
2131 | +#endif | |
2132 | + {} | |
2133 | +}; | |
2134 | + | |
2135 | +static int __init find_overlapped_early(u64 start, u64 end) | |
2136 | +{ | |
2137 | + int i; | |
2138 | + struct early_res *r; | |
2139 | + | |
2140 | + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | |
2141 | + r = &early_res[i]; | |
2142 | + if (end > r->start && start < r->end) | |
2143 | + break; | |
2144 | + } | |
2145 | + | |
2146 | + return i; | |
2147 | +} | |
2148 | + | |
2149 | +/* | |
2150 | + * Drop the i-th range from the early reservation map, | |
2151 | + * by copying any higher ranges down one over it, and | |
2152 | + * clearing what had been the last slot. | |
2153 | + */ | |
2154 | +static void __init drop_range(int i) | |
2155 | +{ | |
2156 | + int j; | |
2157 | + | |
2158 | + for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) | |
2159 | + ; | |
2160 | + | |
2161 | + memmove(&early_res[i], &early_res[i + 1], | |
2162 | + (j - 1 - i) * sizeof(struct early_res)); | |
2163 | + | |
2164 | + early_res[j - 1].end = 0; | |
2165 | +} | |
2166 | + | |
2167 | +/* | |
2168 | + * Split any existing ranges that: | |
2169 | + * 1) are marked 'overlap_ok', and | |
2170 | + * 2) overlap with the stated range [start, end) | |
2171 | + * into whatever portion (if any) of the existing range is entirely | |
2172 | + * below or entirely above the stated range. Drop the portion | |
2173 | + * of the existing range that overlaps with the stated range, | |
2174 | + * which will allow the caller of this routine to then add that | |
2175 | + * stated range without conflicting with any existing range. | |
2176 | + */ | |
2177 | +static void __init drop_overlaps_that_are_ok(u64 start, u64 end) | |
2178 | +{ | |
2179 | + int i; | |
2180 | + struct early_res *r; | |
2181 | + u64 lower_start, lower_end; | |
2182 | + u64 upper_start, upper_end; | |
2183 | + char name[16]; | |
2184 | + | |
2185 | + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | |
2186 | + r = &early_res[i]; | |
2187 | + | |
2188 | + /* Continue past non-overlapping ranges */ | |
2189 | + if (end <= r->start || start >= r->end) | |
2190 | + continue; | |
2191 | + | |
2192 | + /* | |
2193 | + * Leave non-ok overlaps as is; let caller | |
2194 | + * panic "Overlapping early reservations" | |
2195 | + * when it hits this overlap. | |
2196 | + */ | |
2197 | + if (!r->overlap_ok) | |
2198 | + return; | |
2199 | + | |
2200 | + /* | |
2201 | + * We have an ok overlap. We will drop it from the early | |
2202 | + * reservation map, and add back in any non-overlapping | |
2203 | + * portions (lower or upper) as separate, overlap_ok, | |
2204 | + * non-overlapping ranges. | |
2205 | + */ | |
2206 | + | |
2207 | + /* 1. Note any non-overlapping (lower or upper) ranges. */ | |
2208 | + strncpy(name, r->name, sizeof(name) - 1); | |
2209 | + | |
2210 | + lower_start = lower_end = 0; | |
2211 | + upper_start = upper_end = 0; | |
2212 | + if (r->start < start) { | |
2213 | + lower_start = r->start; | |
2214 | + lower_end = start; | |
2215 | + } | |
2216 | + if (r->end > end) { | |
2217 | + upper_start = end; | |
2218 | + upper_end = r->end; | |
2219 | + } | |
2220 | + | |
2221 | + /* 2. Drop the original ok overlapping range */ | |
2222 | + drop_range(i); | |
2223 | + | |
2224 | + i--; /* resume for-loop on copied down entry */ | |
2225 | + | |
2226 | + /* 3. Add back in any non-overlapping ranges. */ | |
2227 | + if (lower_end) | |
2228 | + reserve_early_overlap_ok(lower_start, lower_end, name); | |
2229 | + if (upper_end) | |
2230 | + reserve_early_overlap_ok(upper_start, upper_end, name); | |
2231 | + } | |
2232 | +} | |
2233 | + | |
2234 | +static void __init __reserve_early(u64 start, u64 end, char *name, | |
2235 | + int overlap_ok) | |
2236 | +{ | |
2237 | + int i; | |
2238 | + struct early_res *r; | |
2239 | + | |
2240 | + i = find_overlapped_early(start, end); | |
2241 | + if (i >= MAX_EARLY_RES) | |
2242 | + panic("Too many early reservations"); | |
2243 | + r = &early_res[i]; | |
2244 | + if (r->end) | |
2245 | + panic("Overlapping early reservations " | |
2246 | + "%llx-%llx %s to %llx-%llx %s\n", | |
2247 | + start, end - 1, name?name:"", r->start, | |
2248 | + r->end - 1, r->name); | |
2249 | + r->start = start; | |
2250 | + r->end = end; | |
2251 | + r->overlap_ok = overlap_ok; | |
2252 | + if (name) | |
2253 | + strncpy(r->name, name, sizeof(r->name) - 1); | |
2254 | +} | |
2255 | + | |
2256 | +/* | |
2257 | + * A few early reservtations come here. | |
2258 | + * | |
2259 | + * The 'overlap_ok' in the name of this routine does -not- mean it | |
2260 | + * is ok for these reservations to overlap an earlier reservation. | |
2261 | + * Rather it means that it is ok for subsequent reservations to | |
2262 | + * overlap this one. | |
2263 | + * | |
2264 | + * Use this entry point to reserve early ranges when you are doing | |
2265 | + * so out of "Paranoia", reserving perhaps more memory than you need, | |
2266 | + * just in case, and don't mind a subsequent overlapping reservation | |
2267 | + * that is known to be needed. | |
2268 | + * | |
2269 | + * The drop_overlaps_that_are_ok() call here isn't really needed. | |
2270 | + * It would be needed if we had two colliding 'overlap_ok' | |
2271 | + * reservations, so that the second such would not panic on the | |
2272 | + * overlap with the first. We don't have any such as of this | |
2273 | + * writing, but might as well tolerate such if it happens in | |
2274 | + * the future. | |
2275 | + */ | |
2276 | +void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) | |
2277 | +{ | |
2278 | + drop_overlaps_that_are_ok(start, end); | |
2279 | + __reserve_early(start, end, name, 1); | |
2280 | +} | |
2281 | + | |
2282 | +/* | |
2283 | + * Most early reservations come here. | |
2284 | + * | |
2285 | + * We first have drop_overlaps_that_are_ok() drop any pre-existing | |
2286 | + * 'overlap_ok' ranges, so that we can then reserve this memory | |
2287 | + * range without risk of panic'ing on an overlapping overlap_ok | |
2288 | + * early reservation. | |
2289 | + */ | |
2290 | +void __init reserve_early(u64 start, u64 end, char *name) | |
2291 | +{ | |
2292 | + drop_overlaps_that_are_ok(start, end); | |
2293 | + __reserve_early(start, end, name, 0); | |
2294 | +} | |
2295 | + | |
2296 | +void __init free_early(u64 start, u64 end) | |
2297 | +{ | |
2298 | + struct early_res *r; | |
2299 | + int i; | |
2300 | + | |
2301 | + i = find_overlapped_early(start, end); | |
2302 | + r = &early_res[i]; | |
2303 | + if (i >= MAX_EARLY_RES || r->end != end || r->start != start) | |
2304 | + panic("free_early on not reserved area: %llx-%llx!", | |
2305 | + start, end - 1); | |
2306 | + | |
2307 | + drop_range(i); | |
2308 | +} | |
2309 | + | |
2310 | +void __init early_res_to_bootmem(u64 start, u64 end) | |
2311 | +{ | |
2312 | + int i, count; | |
2313 | + u64 final_start, final_end; | |
2314 | + | |
2315 | + count = 0; | |
2316 | + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) | |
2317 | + count++; | |
2318 | + | |
2319 | + printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n", | |
2320 | + count, start, end); | |
2321 | + for (i = 0; i < count; i++) { | |
2322 | + struct early_res *r = &early_res[i]; | |
2323 | + printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, | |
2324 | + r->start, r->end, r->name); | |
2325 | + final_start = max(start, r->start); | |
2326 | + final_end = min(end, r->end); | |
2327 | + if (final_start >= final_end) { | |
2328 | + printk(KERN_CONT "\n"); | |
2329 | + continue; | |
2330 | + } | |
2331 | + printk(KERN_CONT " ==> [%010llx - %010llx]\n", | |
2332 | + final_start, final_end); | |
2333 | + reserve_bootmem_generic(final_start, final_end - final_start, | |
2334 | + BOOTMEM_DEFAULT); | |
2335 | + } | |
2336 | +} | |
2337 | + | |
2338 | +/* Check for already reserved areas */ | |
2339 | +static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) | |
2340 | +{ | |
2341 | + int i; | |
2342 | + u64 addr = *addrp; | |
2343 | + int changed = 0; | |
2344 | + struct early_res *r; | |
2345 | +again: | |
2346 | + i = find_overlapped_early(addr, addr + size); | |
2347 | + r = &early_res[i]; | |
2348 | + if (i < MAX_EARLY_RES && r->end) { | |
2349 | + *addrp = addr = round_up(r->end, align); | |
2350 | + changed = 1; | |
2351 | + goto again; | |
2352 | + } | |
2353 | + return changed; | |
2354 | +} | |
2355 | + | |
2356 | +/* Check for already reserved areas */ | |
2357 | +static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) | |
2358 | +{ | |
2359 | + int i; | |
2360 | + u64 addr = *addrp, last; | |
2361 | + u64 size = *sizep; | |
2362 | + int changed = 0; | |
2363 | +again: | |
2364 | + last = addr + size; | |
2365 | + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | |
2366 | + struct early_res *r = &early_res[i]; | |
2367 | + if (last > r->start && addr < r->start) { | |
2368 | + size = r->start - addr; | |
2369 | + changed = 1; | |
2370 | + goto again; | |
2371 | + } | |
2372 | + if (last > r->end && addr < r->end) { | |
2373 | + addr = round_up(r->end, align); | |
2374 | + size = last - addr; | |
2375 | + changed = 1; | |
2376 | + goto again; | |
2377 | + } | |
2378 | + if (last <= r->end && addr >= r->start) { | |
2379 | + (*sizep)++; | |
2380 | + return 0; | |
2381 | + } | |
2382 | + } | |
2383 | + if (changed) { | |
2384 | + *addrp = addr; | |
2385 | + *sizep = size; | |
2386 | + } | |
2387 | + return changed; | |
2388 | +} | |
2389 | + | |
2390 | +/* | |
2391 | + * Find a free area with specified alignment in a specific range. | |
2392 | + */ | |
2393 | +u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) | |
2394 | +{ | |
2395 | + int i; | |
2396 | + | |
2397 | + for (i = 0; i < e820.nr_map; i++) { | |
2398 | + struct e820entry *ei = &e820.map[i]; | |
2399 | + u64 addr, last; | |
2400 | + u64 ei_last; | |
2401 | + | |
2402 | + if (ei->type != E820_RAM) | |
2403 | + continue; | |
2404 | + addr = round_up(ei->addr, align); | |
2405 | + ei_last = ei->addr + ei->size; | |
2406 | + if (addr < start) | |
2407 | + addr = round_up(start, align); | |
2408 | + if (addr >= ei_last) | |
2409 | + continue; | |
2410 | + while (bad_addr(&addr, size, align) && addr+size <= ei_last) | |
2411 | + ; | |
2412 | + last = addr + size; | |
2413 | + if (last > ei_last) | |
2414 | + continue; | |
2415 | + if (last > end) | |
2416 | + continue; | |
2417 | + return addr; | |
2418 | + } | |
2419 | + return -1ULL; | |
2420 | +} | |
2421 | + | |
2422 | +/* | |
2423 | + * Find next free range after *start | |
2424 | + */ | |
2425 | +u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) | |
2426 | +{ | |
2427 | + int i; | |
2428 | + | |
2429 | + for (i = 0; i < e820.nr_map; i++) { | |
2430 | + struct e820entry *ei = &e820.map[i]; | |
2431 | + u64 addr, last; | |
2432 | + u64 ei_last; | |
2433 | + | |
2434 | + if (ei->type != E820_RAM) | |
2435 | + continue; | |
2436 | + addr = round_up(ei->addr, align); | |
2437 | + ei_last = ei->addr + ei->size; | |
2438 | + if (addr < start) | |
2439 | + addr = round_up(start, align); | |
2440 | + if (addr >= ei_last) | |
2441 | + continue; | |
2442 | + *sizep = ei_last - addr; | |
2443 | + while (bad_addr_size(&addr, sizep, align) && | |
2444 | + addr + *sizep <= ei_last) | |
2445 | + ; | |
2446 | + last = addr + *sizep; | |
2447 | + if (last > ei_last) | |
2448 | + continue; | |
2449 | + return addr; | |
2450 | + } | |
2451 | + | |
2452 | + return -1ULL; | |
2453 | +} | |
2454 | + | |
2455 | +/* | |
2456 | + * pre allocated 4k and reserved it in e820 | |
2457 | + */ | |
2458 | +u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) | |
2459 | +{ | |
2460 | + u64 size = 0; | |
2461 | + u64 addr; | |
2462 | + u64 start; | |
2463 | +#ifdef CONFIG_XEN | |
2464 | + unsigned int order = get_order(sizet); | |
2465 | + | |
2466 | + if (is_initial_xendomain()) { | |
2467 | + sizet = PAGE_SIZE << order; | |
2468 | + if (align < PAGE_SIZE) | |
2469 | + align = PAGE_SIZE; | |
2470 | + } | |
2471 | +#endif | |
2472 | + for (start = startt; ; start += size) { | |
2473 | + start = find_e820_area_size(start, &size, align); | |
2474 | + if (!(start + 1)) | |
2475 | + return 0; | |
2476 | + if (size >= sizet) | |
2477 | + break; | |
2478 | + } | |
2479 | + | |
2480 | +#ifdef CONFIG_X86_32 | |
2481 | + if (start >= MAXMEM) | |
2482 | + return 0; | |
2483 | + if (start + size > MAXMEM) | |
2484 | + size = MAXMEM - start; | |
2485 | +#endif | |
2486 | +#ifdef CONFIG_XEN | |
2487 | + if ((start >> PAGE_SHIFT) >= xen_start_info->nr_pages) | |
2488 | + return 0; | |
2489 | + if (PFN_UP(start + size) > xen_start_info->nr_pages) | |
2490 | + size = ((u64)xen_start_info->nr_pages << PAGE_SHIFT) - start; | |
2491 | +#endif | |
2492 | + | |
2493 | + addr = round_down(start + size - sizet, align); | |
2494 | + if (addr < start) | |
2495 | + return 0; | |
2496 | +#ifdef CONFIG_XEN | |
2497 | + if (is_initial_xendomain()) { | |
2498 | + int rc; | |
2499 | + unsigned long max_initmap_pfn; | |
2500 | + | |
2501 | + max_initmap_pfn = ALIGN(PFN_UP(__pa(xen_start_info->pt_base)) | |
2502 | + + xen_start_info->nr_pt_frames | |
2503 | + + 1 + (1 << (19 - PAGE_SHIFT)), | |
2504 | + 1UL << (22 - PAGE_SHIFT)); | |
2505 | +#ifdef CONFIG_X86_32 | |
2506 | + if ((addr >> PAGE_SHIFT) | |
2507 | + < max(max_initmap_pfn, max_pfn_mapped)) | |
2508 | + rc = xen_create_contiguous_region((unsigned long) | |
2509 | + __va(addr), | |
2510 | + order, 32); | |
2511 | +#else | |
2512 | + if ((addr >> PAGE_SHIFT) < max_pfn_mapped) | |
2513 | + rc = xen_create_contiguous_region((unsigned long) | |
2514 | + __va(addr), | |
2515 | + order, 32); | |
2516 | + else if ((addr >> PAGE_SHIFT) < max_initmap_pfn) | |
2517 | + rc = xen_create_contiguous_region(__START_KERNEL_map | |
2518 | + + addr, | |
2519 | + order, 32); | |
2520 | +#endif | |
2521 | + else | |
2522 | + rc = early_create_contiguous_region(addr >> PAGE_SHIFT, | |
2523 | + order, 32); | |
2524 | + if (rc) | |
2525 | + return 0; | |
2526 | + } | |
2527 | +#endif | |
2528 | + e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); | |
2529 | + e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); | |
2530 | + printk(KERN_INFO "update e820 for early_reserve_e820\n"); | |
2531 | + update_e820(); | |
2532 | + update_e820_saved(); | |
2533 | + | |
2534 | + return addr; | |
2535 | +} | |
2536 | + | |
2537 | +#ifdef CONFIG_X86_32 | |
2538 | +# ifdef CONFIG_X86_PAE | |
2539 | +# define MAX_ARCH_PFN (1ULL<<(40-PAGE_SHIFT)) | |
2540 | +# else | |
2541 | +# define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT)) | |
2542 | +# endif | |
2543 | +#else /* CONFIG_X86_32 */ | |
2544 | +# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT | |
2545 | +#endif | |
2546 | + | |
2547 | +/* | |
2548 | + * Find the highest page frame number we have available | |
2549 | + */ | |
2550 | +static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) | |
2551 | +{ | |
2552 | + int i; | |
2553 | + unsigned long last_pfn = 0; | |
2554 | + unsigned long max_arch_pfn = MAX_ARCH_PFN; | |
2555 | + | |
2556 | + for (i = 0; i < e820.nr_map; i++) { | |
2557 | + struct e820entry *ei = &e820.map[i]; | |
2558 | + unsigned long start_pfn; | |
2559 | + unsigned long end_pfn; | |
2560 | + | |
2561 | + if (ei->type != type) | |
2562 | + continue; | |
2563 | + | |
2564 | + start_pfn = ei->addr >> PAGE_SHIFT; | |
2565 | + end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT; | |
2566 | + | |
2567 | + if (start_pfn >= limit_pfn) | |
2568 | + continue; | |
2569 | + if (end_pfn > limit_pfn) { | |
2570 | + last_pfn = limit_pfn; | |
2571 | + break; | |
2572 | + } | |
2573 | + if (end_pfn > last_pfn) | |
2574 | + last_pfn = end_pfn; | |
2575 | + } | |
2576 | + | |
2577 | + if (last_pfn > max_arch_pfn) | |
2578 | + last_pfn = max_arch_pfn; | |
2579 | + | |
2580 | + printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n", | |
2581 | + last_pfn, max_arch_pfn); | |
2582 | + return last_pfn; | |
2583 | +} | |
2584 | +unsigned long __init e820_end_of_ram_pfn(void) | |
2585 | +{ | |
2586 | + return e820_end_pfn(MAX_ARCH_PFN, E820_RAM); | |
2587 | +} | |
2588 | + | |
2589 | +unsigned long __init e820_end_of_low_ram_pfn(void) | |
2590 | +{ | |
2591 | + return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); | |
2592 | +} | |
2593 | +/* | |
2594 | + * Finds an active region in the address range from start_pfn to last_pfn and | |
2595 | + * returns its range in ei_startpfn and ei_endpfn for the e820 entry. | |
2596 | + */ | |
2597 | +int __init e820_find_active_region(const struct e820entry *ei, | |
2598 | + unsigned long start_pfn, | |
2599 | + unsigned long last_pfn, | |
2600 | + unsigned long *ei_startpfn, | |
2601 | + unsigned long *ei_endpfn) | |
2602 | +{ | |
2603 | + u64 align = PAGE_SIZE; | |
2604 | + | |
2605 | + *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT; | |
2606 | + *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT; | |
2607 | + | |
2608 | + /* Skip map entries smaller than a page */ | |
2609 | + if (*ei_startpfn >= *ei_endpfn) | |
2610 | + return 0; | |
2611 | + | |
2612 | + /* Skip if map is outside the node */ | |
2613 | + if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || | |
2614 | + *ei_startpfn >= last_pfn) | |
2615 | + return 0; | |
2616 | + | |
2617 | + /* Check for overlaps */ | |
2618 | + if (*ei_startpfn < start_pfn) | |
2619 | + *ei_startpfn = start_pfn; | |
2620 | + if (*ei_endpfn > last_pfn) | |
2621 | + *ei_endpfn = last_pfn; | |
2622 | + | |
2623 | + return 1; | |
2624 | +} | |
2625 | + | |
2626 | +/* Walk the e820 map and register active regions within a node */ | |
2627 | +void __init e820_register_active_regions(int nid, unsigned long start_pfn, | |
2628 | + unsigned long last_pfn) | |
2629 | +{ | |
2630 | + unsigned long ei_startpfn; | |
2631 | + unsigned long ei_endpfn; | |
2632 | + int i; | |
2633 | + | |
2634 | + for (i = 0; i < e820.nr_map; i++) | |
2635 | + if (e820_find_active_region(&e820.map[i], | |
2636 | + start_pfn, last_pfn, | |
2637 | + &ei_startpfn, &ei_endpfn)) | |
2638 | + add_active_range(nid, ei_startpfn, ei_endpfn); | |
2639 | +} | |
2640 | + | |
2641 | +/* | |
2642 | + * Find the hole size (in bytes) in the memory range. | |
2643 | + * @start: starting address of the memory range to scan | |
2644 | + * @end: ending address of the memory range to scan | |
2645 | + */ | |
2646 | +u64 __init e820_hole_size(u64 start, u64 end) | |
2647 | +{ | |
2648 | + unsigned long start_pfn = start >> PAGE_SHIFT; | |
2649 | + unsigned long last_pfn = end >> PAGE_SHIFT; | |
2650 | + unsigned long ei_startpfn, ei_endpfn, ram = 0; | |
2651 | + int i; | |
2652 | + | |
2653 | + for (i = 0; i < e820.nr_map; i++) { | |
2654 | + if (e820_find_active_region(&e820.map[i], | |
2655 | + start_pfn, last_pfn, | |
2656 | + &ei_startpfn, &ei_endpfn)) | |
2657 | + ram += ei_endpfn - ei_startpfn; | |
2658 | + } | |
2659 | + return end - start - ((u64)ram << PAGE_SHIFT); | |
2660 | +} | |
2661 | + | |
2662 | +static void early_panic(char *msg) | |
2663 | +{ | |
2664 | + early_printk(msg); | |
2665 | + panic(msg); | |
2666 | +} | |
2667 | + | |
2668 | +static int userdef __initdata; | |
2669 | + | |
2670 | +/* "mem=nopentium" disables the 4MB page tables. */ | |
2671 | +static int __init parse_memopt(char *p) | |
2672 | +{ | |
2673 | + u64 mem_size, current_end; | |
2674 | + unsigned int i; | |
2675 | + | |
2676 | + if (!p) | |
2677 | + return -EINVAL; | |
2678 | + | |
2679 | +#ifdef CONFIG_X86_32 | |
2680 | + if (!strcmp(p, "nopentium")) { | |
2681 | + setup_clear_cpu_cap(X86_FEATURE_PSE); | |
2682 | + return 0; | |
2683 | + } | |
2684 | +#endif | |
2685 | + | |
2686 | + userdef = 1; | |
2687 | + mem_size = memparse(p, &p); | |
2688 | + e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); | |
2689 | + | |
2690 | + i = e820.nr_map - 1; | |
2691 | + current_end = e820.map[i].addr + e820.map[i].size; | |
2692 | + if (current_end < mem_size) { | |
2693 | + /* | |
2694 | + * The e820 map ends before our requested size so | |
2695 | + * extend the final entry to the requested address. | |
2696 | + */ | |
2697 | + if (e820.map[i].type == E820_RAM) | |
2698 | + e820.map[i].size = mem_size - e820.map[i].addr; | |
2699 | + else | |
2700 | + e820_add_region(current_end, mem_size - current_end, E820_RAM); | |
2701 | + } | |
2702 | + | |
2703 | + return 0; | |
2704 | +} | |
2705 | +early_param("mem", parse_memopt); | |
2706 | + | |
2707 | +#ifndef CONFIG_XEN | |
2708 | +static int __init parse_memmap_opt(char *p) | |
2709 | +{ | |
2710 | + char *oldp; | |
2711 | + u64 start_at, mem_size; | |
2712 | + | |
2713 | + if (!p) | |
2714 | + return -EINVAL; | |
2715 | + | |
2716 | + if (!strncmp(p, "exactmap", 8)) { | |
2717 | +#ifdef CONFIG_CRASH_DUMP | |
2718 | + /* | |
2719 | + * If we are doing a crash dump, we still need to know | |
2720 | + * the real mem size before original memory map is | |
2721 | + * reset. | |
2722 | + */ | |
2723 | + saved_max_pfn = e820_end_of_ram_pfn(); | |
2724 | +#endif | |
2725 | + e820.nr_map = 0; | |
2726 | + userdef = 1; | |
2727 | + return 0; | |
2728 | + } | |
2729 | + | |
2730 | + oldp = p; | |
2731 | + mem_size = memparse(p, &p); | |
2732 | + if (p == oldp) | |
2733 | + return -EINVAL; | |
2734 | + | |
2735 | + userdef = 1; | |
2736 | + if (*p == '@') { | |
2737 | + start_at = memparse(p+1, &p); | |
2738 | + e820_add_region(start_at, mem_size, E820_RAM); | |
2739 | + } else if (*p == '#') { | |
2740 | + start_at = memparse(p+1, &p); | |
2741 | + e820_add_region(start_at, mem_size, E820_ACPI); | |
2742 | + } else if (*p == '$') { | |
2743 | + start_at = memparse(p+1, &p); | |
2744 | + e820_add_region(start_at, mem_size, E820_RESERVED); | |
2745 | + } else | |
2746 | + e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); | |
2747 | + | |
2748 | + return *p == '\0' ? 0 : -EINVAL; | |
2749 | +} | |
2750 | +early_param("memmap", parse_memmap_opt); | |
82094b55 | 2751 | +#endif |
2cb7cef9 BS |
2752 | + |
2753 | +void __init finish_e820_parsing(void) | |
2754 | +{ | |
2755 | + if (userdef) { | |
2756 | + int nr = e820.nr_map; | |
2757 | + | |
2758 | + if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) | |
2759 | + early_panic("Invalid user supplied memory map"); | |
2760 | + e820.nr_map = nr; | |
2761 | + | |
2762 | + printk(KERN_INFO "user-defined physical RAM map:\n"); | |
2763 | + e820_print_map("user"); | |
2764 | + } | |
2765 | +} | |
2cb7cef9 BS |
2766 | + |
2767 | +static inline const char *e820_type_to_string(int e820_type) | |
2768 | +{ | |
2769 | + switch (e820_type) { | |
2770 | + case E820_RESERVED_KERN: | |
2771 | + case E820_RAM: return "System RAM"; | |
2772 | + case E820_ACPI: return "ACPI Tables"; | |
2773 | + case E820_NVS: return "ACPI Non-volatile Storage"; | |
2774 | + default: return "reserved"; | |
2775 | + } | |
2776 | +} | |
2777 | + | |
2778 | +#ifdef CONFIG_XEN | |
2779 | +#define e820 machine_e820 | |
2780 | +#endif | |
2781 | + | |
2782 | +/* | |
2783 | + * Mark e820 reserved areas as busy for the resource manager. | |
2784 | + */ | |
2785 | +void __init e820_reserve_resources(void) | |
2786 | +{ | |
2787 | + int i; | |
2788 | + struct resource *res; | |
2789 | + u64 end; | |
2790 | + | |
2791 | + res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); | |
2792 | + for (i = 0; i < e820.nr_map; i++) { | |
2793 | + end = e820.map[i].addr + e820.map[i].size - 1; | |
2794 | +#ifndef CONFIG_RESOURCES_64BIT | |
2795 | + if (end > 0x100000000ULL) { | |
2796 | + res++; | |
2797 | + continue; | |
2798 | + } | |
2799 | +#endif | |
2800 | + res->name = e820_type_to_string(e820.map[i].type); | |
2801 | + res->start = e820.map[i].addr; | |
2802 | + res->end = end; | |
2803 | + | |
2804 | + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; | |
2805 | + insert_resource(&iomem_resource, res); | |
2806 | + res++; | |
2807 | + } | |
2808 | + | |
2809 | + for (i = 0; i < e820_saved.nr_map; i++) { | |
2810 | + struct e820entry *entry = &e820_saved.map[i]; | |
2811 | + firmware_map_add_early(entry->addr, | |
2812 | + entry->addr + entry->size - 1, | |
2813 | + e820_type_to_string(entry->type)); | |
2814 | + } | |
2815 | +} | |
2816 | + | |
2817 | +#undef e820 | |
2818 | + | |
2819 | +#ifndef CONFIG_XEN | |
2820 | +char *__init default_machine_specific_memory_setup(void) | |
2821 | +{ | |
2822 | + char *who = "BIOS-e820"; | |
2823 | + int new_nr; | |
2824 | + /* | |
2825 | + * Try to copy the BIOS-supplied E820-map. | |
2826 | + * | |
2827 | + * Otherwise fake a memory map; one section from 0k->640k, | |
2828 | + * the next section from 1mb->appropriate_mem_k | |
2829 | + */ | |
2830 | + new_nr = boot_params.e820_entries; | |
2831 | + sanitize_e820_map(boot_params.e820_map, | |
2832 | + ARRAY_SIZE(boot_params.e820_map), | |
2833 | + &new_nr); | |
2834 | + boot_params.e820_entries = new_nr; | |
2835 | + if (append_e820_map(boot_params.e820_map, boot_params.e820_entries) | |
2836 | + < 0) { | |
2837 | + u64 mem_size; | |
2838 | + | |
2839 | + /* compare results from other methods and take the greater */ | |
2840 | + if (boot_params.alt_mem_k | |
2841 | + < boot_params.screen_info.ext_mem_k) { | |
2842 | + mem_size = boot_params.screen_info.ext_mem_k; | |
2843 | + who = "BIOS-88"; | |
2844 | + } else { | |
2845 | + mem_size = boot_params.alt_mem_k; | |
2846 | + who = "BIOS-e801"; | |
2847 | + } | |
2848 | + | |
2849 | + e820.nr_map = 0; | |
2850 | + e820_add_region(0, LOWMEMSIZE(), E820_RAM); | |
2851 | + e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM); | |
2852 | + } | |
2853 | + | |
2854 | + /* In case someone cares... */ | |
2855 | + return who; | |
2856 | +} | |
2857 | + | |
2858 | +char *__init __attribute__((weak)) machine_specific_memory_setup(void) | |
2859 | +{ | |
2860 | + if (x86_quirks->arch_memory_setup) { | |
2861 | + char *who = x86_quirks->arch_memory_setup(); | |
2862 | + | |
2863 | + if (who) | |
2864 | + return who; | |
2865 | + } | |
2866 | + return default_machine_specific_memory_setup(); | |
2867 | +} | |
2868 | +#endif | |
2869 | + | |
2870 | +char * __init memory_setup(void) | |
2871 | +{ | |
2872 | + int rc, nr_map; | |
2873 | + struct xen_memory_map memmap; | |
2874 | + /* | |
2875 | + * This is rather large for a stack variable but this early in | |
2876 | + * the boot process we know we have plenty slack space. | |
2877 | + */ | |
2878 | + struct e820entry map[E820MAX]; | |
2879 | + | |
2880 | + memmap.nr_entries = E820MAX; | |
2881 | + set_xen_guest_handle(memmap.buffer, map); | |
2882 | + | |
2883 | + rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); | |
2884 | + if (rc == -ENOSYS) { | |
2885 | + memmap.nr_entries = 1; | |
2886 | + map[0].addr = 0ULL; | |
2887 | + map[0].size = PFN_PHYS((unsigned long long)xen_start_info->nr_pages); | |
2888 | + /* 8MB slack (to balance backend allocations). */ | |
2889 | + map[0].size += 8ULL << 20; | |
2890 | + map[0].type = E820_RAM; | |
2891 | + rc = 0; | |
2892 | + } | |
2893 | + BUG_ON(rc); | |
2894 | + | |
2895 | + nr_map = memmap.nr_entries; | |
2896 | + sanitize_e820_map(map, ARRAY_SIZE(map), &nr_map); | |
2897 | + | |
2898 | + if (append_e820_map(map, nr_map) < 0) | |
2899 | + BUG(); | |
2900 | + | |
2901 | +#ifdef CONFIG_XEN | |
2902 | + if (is_initial_xendomain()) { | |
2903 | + memmap.nr_entries = E820MAX; | |
2904 | + set_xen_guest_handle(memmap.buffer, machine_e820.map); | |
2905 | + | |
2906 | + if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)) | |
2907 | + BUG(); | |
2908 | + machine_e820.nr_map = memmap.nr_entries; | |
2909 | + } | |
2910 | +#endif | |
2911 | + | |
2912 | + return "Xen"; | |
2913 | +} | |
2914 | + | |
2915 | +void __init setup_memory_map(void) | |
2916 | +{ | |
2917 | + char *who; | |
2918 | + | |
2919 | + who = memory_setup(); | |
2920 | +#ifdef CONFIG_XEN | |
2921 | + if (!is_initial_xendomain()) | |
2922 | +#endif | |
2923 | + memcpy(&e820_saved, &e820, sizeof(struct e820map)); | |
2924 | + printk(KERN_INFO "Xen-provided physical RAM map:\n"); | |
2925 | + e820_print_map(who); | |
2926 | +} | |
82094b55 | 2927 | --- sle11-2009-10-16.orig/arch/x86/kernel/e820_32-xen.c 2009-03-16 16:38:05.000000000 +0100 |
2cb7cef9 BS |
2928 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 |
2929 | @@ -1,873 +0,0 @@ | |
2930 | -#include <linux/kernel.h> | |
2931 | -#include <linux/types.h> | |
2932 | -#include <linux/init.h> | |
2933 | -#include <linux/bootmem.h> | |
2934 | -#include <linux/ioport.h> | |
2935 | -#include <linux/string.h> | |
2936 | -#include <linux/kexec.h> | |
2937 | -#include <linux/module.h> | |
2938 | -#include <linux/mm.h> | |
2939 | -#include <linux/pfn.h> | |
2940 | -#include <linux/uaccess.h> | |
2941 | -#include <linux/suspend.h> | |
2942 | - | |
2943 | -#include <asm/pgtable.h> | |
2944 | -#include <asm/page.h> | |
2945 | -#include <asm/e820.h> | |
2946 | -#include <asm/setup.h> | |
2947 | -#include <xen/interface/memory.h> | |
2948 | - | |
2949 | -struct e820map e820; | |
2950 | -struct change_member { | |
2951 | - struct e820entry *pbios; /* pointer to original bios entry */ | |
2952 | - unsigned long long addr; /* address for this change point */ | |
2953 | -}; | |
2954 | -static struct change_member change_point_list[2*E820MAX] __initdata; | |
2955 | -static struct change_member *change_point[2*E820MAX] __initdata; | |
2956 | -static struct e820entry *overlap_list[E820MAX] __initdata; | |
2957 | -static struct e820entry new_bios[E820MAX] __initdata; | |
2958 | -/* For PCI or other memory-mapped resources */ | |
2959 | -unsigned long pci_mem_start = 0x10000000; | |
2960 | -#ifdef CONFIG_PCI | |
2961 | -EXPORT_SYMBOL(pci_mem_start); | |
2962 | -#endif | |
2963 | -extern int user_defined_memmap; | |
2964 | - | |
2965 | -static struct resource system_rom_resource = { | |
2966 | - .name = "System ROM", | |
2967 | - .start = 0xf0000, | |
2968 | - .end = 0xfffff, | |
2969 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
2970 | -}; | |
2971 | - | |
2972 | -static struct resource extension_rom_resource = { | |
2973 | - .name = "Extension ROM", | |
2974 | - .start = 0xe0000, | |
2975 | - .end = 0xeffff, | |
2976 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
2977 | -}; | |
2978 | - | |
2979 | -static struct resource adapter_rom_resources[] = { { | |
2980 | - .name = "Adapter ROM", | |
2981 | - .start = 0xc8000, | |
2982 | - .end = 0, | |
2983 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
2984 | -}, { | |
2985 | - .name = "Adapter ROM", | |
2986 | - .start = 0, | |
2987 | - .end = 0, | |
2988 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
2989 | -}, { | |
2990 | - .name = "Adapter ROM", | |
2991 | - .start = 0, | |
2992 | - .end = 0, | |
2993 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
2994 | -}, { | |
2995 | - .name = "Adapter ROM", | |
2996 | - .start = 0, | |
2997 | - .end = 0, | |
2998 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
2999 | -}, { | |
3000 | - .name = "Adapter ROM", | |
3001 | - .start = 0, | |
3002 | - .end = 0, | |
3003 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
3004 | -}, { | |
3005 | - .name = "Adapter ROM", | |
3006 | - .start = 0, | |
3007 | - .end = 0, | |
3008 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
3009 | -} }; | |
3010 | - | |
3011 | -static struct resource video_rom_resource = { | |
3012 | - .name = "Video ROM", | |
3013 | - .start = 0xc0000, | |
3014 | - .end = 0xc7fff, | |
3015 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
3016 | -}; | |
3017 | - | |
3018 | -#define ROMSIGNATURE 0xaa55 | |
3019 | - | |
3020 | -static int __init romsignature(const unsigned char *rom) | |
3021 | -{ | |
3022 | - const unsigned short * const ptr = (const unsigned short *)rom; | |
3023 | - unsigned short sig; | |
3024 | - | |
3025 | - return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE; | |
3026 | -} | |
3027 | - | |
3028 | -static int __init romchecksum(const unsigned char *rom, unsigned long length) | |
3029 | -{ | |
3030 | - unsigned char sum, c; | |
3031 | - | |
3032 | - for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--) | |
3033 | - sum += c; | |
3034 | - return !length && !sum; | |
3035 | -} | |
3036 | - | |
3037 | -static void __init probe_roms(void) | |
3038 | -{ | |
3039 | - const unsigned char *rom; | |
3040 | - unsigned long start, length, upper; | |
3041 | - unsigned char c; | |
3042 | - int i; | |
3043 | - | |
3044 | -#ifdef CONFIG_XEN | |
3045 | - /* Nothing to do if not running in dom0. */ | |
3046 | - if (!is_initial_xendomain()) | |
3047 | - return; | |
3048 | -#endif | |
3049 | - | |
3050 | - /* video rom */ | |
3051 | - upper = adapter_rom_resources[0].start; | |
3052 | - for (start = video_rom_resource.start; start < upper; start += 2048) { | |
3053 | - rom = isa_bus_to_virt(start); | |
3054 | - if (!romsignature(rom)) | |
3055 | - continue; | |
3056 | - | |
3057 | - video_rom_resource.start = start; | |
3058 | - | |
3059 | - if (probe_kernel_address(rom + 2, c) != 0) | |
3060 | - continue; | |
3061 | - | |
3062 | - /* 0 < length <= 0x7f * 512, historically */ | |
3063 | - length = c * 512; | |
3064 | - | |
3065 | - /* if checksum okay, trust length byte */ | |
3066 | - if (length && romchecksum(rom, length)) | |
3067 | - video_rom_resource.end = start + length - 1; | |
3068 | - | |
3069 | - request_resource(&iomem_resource, &video_rom_resource); | |
3070 | - break; | |
3071 | - } | |
3072 | - | |
3073 | - start = (video_rom_resource.end + 1 + 2047) & ~2047UL; | |
3074 | - if (start < upper) | |
3075 | - start = upper; | |
3076 | - | |
3077 | - /* system rom */ | |
3078 | - request_resource(&iomem_resource, &system_rom_resource); | |
3079 | - upper = system_rom_resource.start; | |
3080 | - | |
3081 | - /* check for extension rom (ignore length byte!) */ | |
3082 | - rom = isa_bus_to_virt((unsigned long)extension_rom_resource.start); | |
3083 | - if (romsignature(rom)) { | |
3084 | - length = extension_rom_resource.end - extension_rom_resource.start + 1; | |
3085 | - if (romchecksum(rom, length)) { | |
3086 | - request_resource(&iomem_resource, &extension_rom_resource); | |
3087 | - upper = extension_rom_resource.start; | |
3088 | - } | |
3089 | - } | |
3090 | - | |
3091 | - /* check for adapter roms on 2k boundaries */ | |
3092 | - for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { | |
3093 | - rom = isa_bus_to_virt(start); | |
3094 | - if (!romsignature(rom)) | |
3095 | - continue; | |
3096 | - | |
3097 | - if (probe_kernel_address(rom + 2, c) != 0) | |
3098 | - continue; | |
3099 | - | |
3100 | - /* 0 < length <= 0x7f * 512, historically */ | |
3101 | - length = c * 512; | |
3102 | - | |
3103 | - /* but accept any length that fits if checksum okay */ | |
3104 | - if (!length || start + length > upper || !romchecksum(rom, length)) | |
3105 | - continue; | |
3106 | - | |
3107 | - adapter_rom_resources[i].start = start; | |
3108 | - adapter_rom_resources[i].end = start + length - 1; | |
3109 | - request_resource(&iomem_resource, &adapter_rom_resources[i]); | |
3110 | - | |
3111 | - start = adapter_rom_resources[i++].end & ~2047UL; | |
3112 | - } | |
3113 | -} | |
3114 | - | |
3115 | -#ifdef CONFIG_XEN | |
3116 | -static struct e820map machine_e820; | |
3117 | -#define e820 machine_e820 | |
3118 | -#endif | |
3119 | - | |
3120 | -/* | |
3121 | - * Request address space for all standard RAM and ROM resources | |
3122 | - * and also for regions reported as reserved by the e820. | |
3123 | - */ | |
3124 | -void __init init_iomem_resources(struct resource *code_resource, | |
3125 | - struct resource *data_resource, | |
3126 | - struct resource *bss_resource) | |
3127 | -{ | |
3128 | - int i; | |
3129 | - | |
3130 | - probe_roms(); | |
3131 | - for (i = 0; i < e820.nr_map; i++) { | |
3132 | - struct resource *res; | |
3133 | -#ifndef CONFIG_RESOURCES_64BIT | |
3134 | - if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) | |
3135 | - continue; | |
3136 | -#endif | |
3137 | - res = kzalloc(sizeof(struct resource), GFP_ATOMIC); | |
3138 | - switch (e820.map[i].type) { | |
3139 | - case E820_RAM: res->name = "System RAM"; break; | |
3140 | - case E820_ACPI: res->name = "ACPI Tables"; break; | |
3141 | - case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; | |
3142 | - default: res->name = "reserved"; | |
3143 | - } | |
3144 | - res->start = e820.map[i].addr; | |
3145 | - res->end = res->start + e820.map[i].size - 1; | |
3146 | - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; | |
3147 | - if (request_resource(&iomem_resource, res)) { | |
3148 | - kfree(res); | |
3149 | - continue; | |
3150 | - } | |
3151 | - if (e820.map[i].type == E820_RAM) { | |
3152 | - /* | |
3153 | - * We don't know which RAM region contains kernel data, | |
3154 | - * so we try it repeatedly and let the resource manager | |
3155 | - * test it. | |
3156 | - */ | |
3157 | -#ifndef CONFIG_XEN | |
3158 | - request_resource(res, code_resource); | |
3159 | - request_resource(res, data_resource); | |
3160 | - request_resource(res, bss_resource); | |
3161 | -#endif | |
3162 | -#ifdef CONFIG_KEXEC | |
3163 | - if (crashk_res.start != crashk_res.end) | |
3164 | - request_resource(res, &crashk_res); | |
3165 | -#ifdef CONFIG_XEN | |
3166 | - xen_machine_kexec_register_resources(res); | |
3167 | -#endif | |
3168 | -#endif | |
3169 | - } | |
3170 | - } | |
3171 | -} | |
3172 | - | |
3173 | -#undef e820 | |
3174 | - | |
3175 | -#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) | |
3176 | -/** | |
3177 | - * e820_mark_nosave_regions - Find the ranges of physical addresses that do not | |
3178 | - * correspond to e820 RAM areas and mark the corresponding pages as nosave for | |
3179 | - * hibernation. | |
3180 | - * | |
3181 | - * This function requires the e820 map to be sorted and without any | |
3182 | - * overlapping entries and assumes the first e820 area to be RAM. | |
3183 | - */ | |
3184 | -void __init e820_mark_nosave_regions(void) | |
3185 | -{ | |
3186 | - int i; | |
3187 | - unsigned long pfn; | |
3188 | - | |
3189 | - pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); | |
3190 | - for (i = 1; i < e820.nr_map; i++) { | |
3191 | - struct e820entry *ei = &e820.map[i]; | |
3192 | - | |
3193 | - if (pfn < PFN_UP(ei->addr)) | |
3194 | - register_nosave_region(pfn, PFN_UP(ei->addr)); | |
3195 | - | |
3196 | - pfn = PFN_DOWN(ei->addr + ei->size); | |
3197 | - if (ei->type != E820_RAM) | |
3198 | - register_nosave_region(PFN_UP(ei->addr), pfn); | |
3199 | - | |
3200 | - if (pfn >= max_low_pfn) | |
3201 | - break; | |
3202 | - } | |
3203 | -} | |
3204 | -#endif | |
3205 | - | |
3206 | -void __init add_memory_region(unsigned long long start, | |
3207 | - unsigned long long size, int type) | |
3208 | -{ | |
3209 | - int x; | |
3210 | - | |
3211 | - x = e820.nr_map; | |
3212 | - | |
3213 | - if (x == E820MAX) { | |
3214 | - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); | |
3215 | - return; | |
3216 | - } | |
3217 | - | |
3218 | - e820.map[x].addr = start; | |
3219 | - e820.map[x].size = size; | |
3220 | - e820.map[x].type = type; | |
3221 | - e820.nr_map++; | |
3222 | -} /* add_memory_region */ | |
3223 | - | |
3224 | -/* | |
3225 | - * Sanitize the BIOS e820 map. | |
3226 | - * | |
3227 | - * Some e820 responses include overlapping entries. The following | |
3228 | - * replaces the original e820 map with a new one, removing overlaps. | |
3229 | - * | |
3230 | - */ | |
3231 | -int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | |
3232 | -{ | |
3233 | - struct change_member *change_tmp; | |
3234 | - unsigned long current_type, last_type; | |
3235 | - unsigned long long last_addr; | |
3236 | - int chgidx, still_changing; | |
3237 | - int overlap_entries; | |
3238 | - int new_bios_entry; | |
3239 | - int old_nr, new_nr, chg_nr; | |
3240 | - int i; | |
3241 | - | |
3242 | - /* | |
3243 | - Visually we're performing the following (1,2,3,4 = memory types)... | |
3244 | - | |
3245 | - Sample memory map (w/overlaps): | |
3246 | - ____22__________________ | |
3247 | - ______________________4_ | |
3248 | - ____1111________________ | |
3249 | - _44_____________________ | |
3250 | - 11111111________________ | |
3251 | - ____________________33__ | |
3252 | - ___________44___________ | |
3253 | - __________33333_________ | |
3254 | - ______________22________ | |
3255 | - ___________________2222_ | |
3256 | - _________111111111______ | |
3257 | - _____________________11_ | |
3258 | - _________________4______ | |
3259 | - | |
3260 | - Sanitized equivalent (no overlap): | |
3261 | - 1_______________________ | |
3262 | - _44_____________________ | |
3263 | - ___1____________________ | |
3264 | - ____22__________________ | |
3265 | - ______11________________ | |
3266 | - _________1______________ | |
3267 | - __________3_____________ | |
3268 | - ___________44___________ | |
3269 | - _____________33_________ | |
3270 | - _______________2________ | |
3271 | - ________________1_______ | |
3272 | - _________________4______ | |
3273 | - ___________________2____ | |
3274 | - ____________________33__ | |
3275 | - ______________________4_ | |
3276 | - */ | |
3277 | - /* if there's only one memory region, don't bother */ | |
3278 | - if (*pnr_map < 2) { | |
3279 | - return -1; | |
3280 | - } | |
3281 | - | |
3282 | - old_nr = *pnr_map; | |
3283 | - | |
3284 | - /* bail out if we find any unreasonable addresses in bios map */ | |
3285 | - for (i=0; i<old_nr; i++) | |
3286 | - if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) { | |
3287 | - return -1; | |
3288 | - } | |
3289 | - | |
3290 | - /* create pointers for initial change-point information (for sorting) */ | |
3291 | - for (i=0; i < 2*old_nr; i++) | |
3292 | - change_point[i] = &change_point_list[i]; | |
3293 | - | |
3294 | - /* record all known change-points (starting and ending addresses), | |
3295 | - omitting those that are for empty memory regions */ | |
3296 | - chgidx = 0; | |
3297 | - for (i=0; i < old_nr; i++) { | |
3298 | - if (biosmap[i].size != 0) { | |
3299 | - change_point[chgidx]->addr = biosmap[i].addr; | |
3300 | - change_point[chgidx++]->pbios = &biosmap[i]; | |
3301 | - change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; | |
3302 | - change_point[chgidx++]->pbios = &biosmap[i]; | |
3303 | - } | |
3304 | - } | |
3305 | - chg_nr = chgidx; /* true number of change-points */ | |
3306 | - | |
3307 | - /* sort change-point list by memory addresses (low -> high) */ | |
3308 | - still_changing = 1; | |
3309 | - while (still_changing) { | |
3310 | - still_changing = 0; | |
3311 | - for (i=1; i < chg_nr; i++) { | |
3312 | - /* if <current_addr> > <last_addr>, swap */ | |
3313 | - /* or, if current=<start_addr> & last=<end_addr>, swap */ | |
3314 | - if ((change_point[i]->addr < change_point[i-1]->addr) || | |
3315 | - ((change_point[i]->addr == change_point[i-1]->addr) && | |
3316 | - (change_point[i]->addr == change_point[i]->pbios->addr) && | |
3317 | - (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) | |
3318 | - ) | |
3319 | - { | |
3320 | - change_tmp = change_point[i]; | |
3321 | - change_point[i] = change_point[i-1]; | |
3322 | - change_point[i-1] = change_tmp; | |
3323 | - still_changing=1; | |
3324 | - } | |
3325 | - } | |
3326 | - } | |
3327 | - | |
3328 | - /* create a new bios memory map, removing overlaps */ | |
3329 | - overlap_entries=0; /* number of entries in the overlap table */ | |
3330 | - new_bios_entry=0; /* index for creating new bios map entries */ | |
3331 | - last_type = 0; /* start with undefined memory type */ | |
3332 | - last_addr = 0; /* start with 0 as last starting address */ | |
3333 | - /* loop through change-points, determining affect on the new bios map */ | |
3334 | - for (chgidx=0; chgidx < chg_nr; chgidx++) | |
3335 | - { | |
3336 | - /* keep track of all overlapping bios entries */ | |
3337 | - if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) | |
3338 | - { | |
3339 | - /* add map entry to overlap list (> 1 entry implies an overlap) */ | |
3340 | - overlap_list[overlap_entries++]=change_point[chgidx]->pbios; | |
3341 | - } | |
3342 | - else | |
3343 | - { | |
3344 | - /* remove entry from list (order independent, so swap with last) */ | |
3345 | - for (i=0; i<overlap_entries; i++) | |
3346 | - { | |
3347 | - if (overlap_list[i] == change_point[chgidx]->pbios) | |
3348 | - overlap_list[i] = overlap_list[overlap_entries-1]; | |
3349 | - } | |
3350 | - overlap_entries--; | |
3351 | - } | |
3352 | - /* if there are overlapping entries, decide which "type" to use */ | |
3353 | - /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ | |
3354 | - current_type = 0; | |
3355 | - for (i=0; i<overlap_entries; i++) | |
3356 | - if (overlap_list[i]->type > current_type) | |
3357 | - current_type = overlap_list[i]->type; | |
3358 | - /* continue building up new bios map based on this information */ | |
3359 | - if (current_type != last_type) { | |
3360 | - if (last_type != 0) { | |
3361 | - new_bios[new_bios_entry].size = | |
3362 | - change_point[chgidx]->addr - last_addr; | |
3363 | - /* move forward only if the new size was non-zero */ | |
3364 | - if (new_bios[new_bios_entry].size != 0) | |
3365 | - if (++new_bios_entry >= E820MAX) | |
3366 | - break; /* no more space left for new bios entries */ | |
3367 | - } | |
3368 | - if (current_type != 0) { | |
3369 | - new_bios[new_bios_entry].addr = change_point[chgidx]->addr; | |
3370 | - new_bios[new_bios_entry].type = current_type; | |
3371 | - last_addr=change_point[chgidx]->addr; | |
3372 | - } | |
3373 | - last_type = current_type; | |
3374 | - } | |
3375 | - } | |
3376 | - new_nr = new_bios_entry; /* retain count for new bios entries */ | |
3377 | - | |
3378 | - /* copy new bios mapping into original location */ | |
3379 | - memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); | |
3380 | - *pnr_map = new_nr; | |
3381 | - | |
3382 | - return 0; | |
3383 | -} | |
3384 | - | |
3385 | -/* | |
3386 | - * Copy the BIOS e820 map into a safe place. | |
3387 | - * | |
3388 | - * Sanity-check it while we're at it.. | |
3389 | - * | |
3390 | - * If we're lucky and live on a modern system, the setup code | |
3391 | - * will have given us a memory map that we can use to properly | |
3392 | - * set up memory. If we aren't, we'll fake a memory map. | |
3393 | - * | |
3394 | - * We check to see that the memory map contains at least 2 elements | |
3395 | - * before we'll use it, because the detection code in setup.S may | |
3396 | - * not be perfect and most every PC known to man has two memory | |
3397 | - * regions: one from 0 to 640k, and one from 1mb up. (The IBM | |
3398 | - * thinkpad 560x, for example, does not cooperate with the memory | |
3399 | - * detection code.) | |
3400 | - */ | |
3401 | -int __init copy_e820_map(struct e820entry *biosmap, int nr_map) | |
3402 | -{ | |
3403 | -#ifndef CONFIG_XEN | |
3404 | - /* Only one memory region (or negative)? Ignore it */ | |
3405 | - if (nr_map < 2) | |
3406 | - return -1; | |
3407 | -#else | |
3408 | - BUG_ON(nr_map < 1); | |
3409 | -#endif | |
3410 | - | |
3411 | - do { | |
3412 | - u64 start = biosmap->addr; | |
3413 | - u64 size = biosmap->size; | |
3414 | - u64 end = start + size; | |
3415 | - u32 type = biosmap->type; | |
3416 | - | |
3417 | - /* Overflow in 64 bits? Ignore the memory map. */ | |
3418 | - if (start > end) | |
3419 | - return -1; | |
3420 | - | |
3421 | - add_memory_region(start, size, type); | |
3422 | - } while (biosmap++, --nr_map); | |
3423 | - | |
3424 | -#ifdef CONFIG_XEN | |
3425 | - if (is_initial_xendomain()) { | |
3426 | - struct xen_memory_map memmap; | |
3427 | - | |
3428 | - memmap.nr_entries = E820MAX; | |
3429 | - set_xen_guest_handle(memmap.buffer, machine_e820.map); | |
3430 | - | |
3431 | - if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)) | |
3432 | - BUG(); | |
3433 | - machine_e820.nr_map = memmap.nr_entries; | |
3434 | - } else | |
3435 | - machine_e820 = e820; | |
3436 | -#endif | |
3437 | - | |
3438 | - return 0; | |
3439 | -} | |
3440 | - | |
3441 | -/* | |
3442 | - * Find the highest page frame number we have available | |
3443 | - */ | |
3444 | -void __init propagate_e820_map(void) | |
3445 | -{ | |
3446 | - int i; | |
3447 | - | |
3448 | - max_pfn = 0; | |
3449 | - | |
3450 | - for (i = 0; i < e820.nr_map; i++) { | |
3451 | - unsigned long start, end; | |
3452 | - /* RAM? */ | |
3453 | - if (e820.map[i].type != E820_RAM) | |
3454 | - continue; | |
3455 | - start = PFN_UP(e820.map[i].addr); | |
3456 | - end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); | |
3457 | - if (start >= end) | |
3458 | - continue; | |
3459 | - if (end > max_pfn) | |
3460 | - max_pfn = end; | |
3461 | - memory_present(0, start, end); | |
3462 | - } | |
3463 | -} | |
3464 | - | |
3465 | -/* | |
3466 | - * Register fully available low RAM pages with the bootmem allocator. | |
3467 | - */ | |
3468 | -void __init register_bootmem_low_pages(unsigned long max_low_pfn) | |
3469 | -{ | |
3470 | - int i; | |
3471 | - | |
3472 | - for (i = 0; i < e820.nr_map; i++) { | |
3473 | - unsigned long curr_pfn, last_pfn, size; | |
3474 | - /* | |
3475 | - * Reserve usable low memory | |
3476 | - */ | |
3477 | - if (e820.map[i].type != E820_RAM) | |
3478 | - continue; | |
3479 | - /* | |
3480 | - * We are rounding up the start address of usable memory: | |
3481 | - */ | |
3482 | - curr_pfn = PFN_UP(e820.map[i].addr); | |
3483 | - if (curr_pfn >= max_low_pfn) | |
3484 | - continue; | |
3485 | - /* | |
3486 | - * ... and at the end of the usable range downwards: | |
3487 | - */ | |
3488 | - last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); | |
3489 | - | |
3490 | -#ifdef CONFIG_XEN | |
3491 | - /* | |
3492 | - * Truncate to the number of actual pages currently | |
3493 | - * present. | |
3494 | - */ | |
3495 | - if (last_pfn > xen_start_info->nr_pages) | |
3496 | - last_pfn = xen_start_info->nr_pages; | |
3497 | -#endif | |
3498 | - | |
3499 | - if (last_pfn > max_low_pfn) | |
3500 | - last_pfn = max_low_pfn; | |
3501 | - | |
3502 | - /* | |
3503 | - * .. finally, did all the rounding and playing | |
3504 | - * around just make the area go away? | |
3505 | - */ | |
3506 | - if (last_pfn <= curr_pfn) | |
3507 | - continue; | |
3508 | - | |
3509 | - size = last_pfn - curr_pfn; | |
3510 | - free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); | |
3511 | - } | |
3512 | -} | |
3513 | - | |
3514 | -void __init e820_register_memory(void) | |
3515 | -{ | |
3516 | - unsigned long gapstart, gapsize, round; | |
3517 | - unsigned long long last; | |
3518 | - int i; | |
3519 | - | |
3520 | -#ifdef CONFIG_XEN | |
3521 | - if (is_initial_xendomain()) { | |
3522 | - struct xen_memory_map memmap; | |
3523 | - | |
3524 | - memmap.nr_entries = E820MAX; | |
3525 | - set_xen_guest_handle(memmap.buffer, machine_e820.map); | |
3526 | - | |
3527 | - if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)) | |
3528 | - BUG(); | |
3529 | - machine_e820.nr_map = memmap.nr_entries; | |
3530 | - } | |
3531 | - else | |
3532 | - machine_e820 = e820; | |
3533 | -#define e820 machine_e820 | |
3534 | -#endif | |
3535 | - | |
3536 | - /* | |
3537 | - * Search for the biggest gap in the low 32 bits of the e820 | |
3538 | - * memory space. | |
3539 | - */ | |
3540 | - last = 0x100000000ull; | |
3541 | - gapstart = 0x10000000; | |
3542 | - gapsize = 0x400000; | |
3543 | - i = e820.nr_map; | |
3544 | - while (--i >= 0) { | |
3545 | - unsigned long long start = e820.map[i].addr; | |
3546 | - unsigned long long end = start + e820.map[i].size; | |
3547 | - | |
3548 | - /* | |
3549 | - * Since "last" is at most 4GB, we know we'll | |
3550 | - * fit in 32 bits if this condition is true | |
3551 | - */ | |
3552 | - if (last > end) { | |
3553 | - unsigned long gap = last - end; | |
3554 | - | |
3555 | - if (gap > gapsize) { | |
3556 | - gapsize = gap; | |
3557 | - gapstart = end; | |
3558 | - } | |
3559 | - } | |
3560 | - if (start < last) | |
3561 | - last = start; | |
3562 | - } | |
3563 | -#undef e820 | |
3564 | - | |
3565 | - /* | |
3566 | - * See how much we want to round up: start off with | |
3567 | - * rounding to the next 1MB area. | |
3568 | - */ | |
3569 | - round = 0x100000; | |
3570 | - while ((gapsize >> 4) > round) | |
3571 | - round += round; | |
3572 | - /* Fun with two's complement */ | |
3573 | - pci_mem_start = (gapstart + round) & -round; | |
3574 | - | |
3575 | - printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", | |
3576 | - pci_mem_start, gapstart, gapsize); | |
3577 | -} | |
3578 | - | |
3579 | -void __init print_memory_map(char *who) | |
3580 | -{ | |
3581 | - int i; | |
3582 | - | |
3583 | - for (i = 0; i < e820.nr_map; i++) { | |
3584 | - printk(" %s: %016Lx - %016Lx ", who, | |
3585 | - e820.map[i].addr, | |
3586 | - e820.map[i].addr + e820.map[i].size); | |
3587 | - switch (e820.map[i].type) { | |
3588 | - case E820_RAM: printk("(usable)\n"); | |
3589 | - break; | |
3590 | - case E820_RESERVED: | |
3591 | - printk("(reserved)\n"); | |
3592 | - break; | |
3593 | - case E820_ACPI: | |
3594 | - printk("(ACPI data)\n"); | |
3595 | - break; | |
3596 | - case E820_NVS: | |
3597 | - printk("(ACPI NVS)\n"); | |
3598 | - break; | |
3599 | - default: printk("type %u\n", e820.map[i].type); | |
3600 | - break; | |
3601 | - } | |
3602 | - } | |
3603 | -} | |
3604 | - | |
3605 | -void __init limit_regions(unsigned long long size) | |
3606 | -{ | |
3607 | - unsigned long long current_addr = 0; | |
3608 | - int i; | |
3609 | - | |
3610 | - print_memory_map("limit_regions start"); | |
3611 | - for (i = 0; i < e820.nr_map; i++) { | |
3612 | - current_addr = e820.map[i].addr + e820.map[i].size; | |
3613 | - if (current_addr < size) | |
3614 | - continue; | |
3615 | - | |
3616 | - if (e820.map[i].type != E820_RAM) | |
3617 | - continue; | |
3618 | - | |
3619 | - if (e820.map[i].addr >= size) { | |
3620 | - /* | |
3621 | - * This region starts past the end of the | |
3622 | - * requested size, skip it completely. | |
3623 | - */ | |
3624 | - e820.nr_map = i; | |
3625 | - } else { | |
3626 | - e820.nr_map = i + 1; | |
3627 | - e820.map[i].size -= current_addr - size; | |
3628 | - } | |
3629 | - print_memory_map("limit_regions endfor"); | |
3630 | - return; | |
3631 | - } | |
3632 | -#ifdef CONFIG_XEN | |
3633 | - if (current_addr < size) { | |
3634 | - /* | |
3635 | - * The e820 map finished before our requested size so | |
3636 | - * extend the final entry to the requested address. | |
3637 | - */ | |
3638 | - --i; | |
3639 | - if (e820.map[i].type == E820_RAM) | |
3640 | - e820.map[i].size -= current_addr - size; | |
3641 | - else | |
3642 | - add_memory_region(current_addr, size - current_addr, E820_RAM); | |
3643 | - } | |
3644 | -#endif | |
3645 | - print_memory_map("limit_regions endfunc"); | |
3646 | -} | |
3647 | - | |
3648 | -/* | |
3649 | - * This function checks if any part of the range <start,end> is mapped | |
3650 | - * with type. | |
3651 | - */ | |
3652 | -int | |
3653 | -e820_any_mapped(u64 start, u64 end, unsigned type) | |
3654 | -{ | |
3655 | - int i; | |
3656 | - | |
3657 | -#ifndef CONFIG_XEN | |
3658 | - for (i = 0; i < e820.nr_map; i++) { | |
3659 | - const struct e820entry *ei = &e820.map[i]; | |
3660 | -#else | |
3661 | - if (!is_initial_xendomain()) | |
3662 | - return 0; | |
3663 | - for (i = 0; i < machine_e820.nr_map; ++i) { | |
3664 | - const struct e820entry *ei = &machine_e820.map[i]; | |
3665 | -#endif | |
3666 | - | |
3667 | - if (type && ei->type != type) | |
3668 | - continue; | |
3669 | - if (ei->addr >= end || ei->addr + ei->size <= start) | |
3670 | - continue; | |
3671 | - return 1; | |
3672 | - } | |
3673 | - return 0; | |
3674 | -} | |
3675 | -EXPORT_SYMBOL_GPL(e820_any_mapped); | |
3676 | - | |
3677 | - /* | |
3678 | - * This function checks if the entire range <start,end> is mapped with type. | |
3679 | - * | |
3680 | - * Note: this function only works correct if the e820 table is sorted and | |
3681 | - * not-overlapping, which is the case | |
3682 | - */ | |
3683 | -int __init | |
3684 | -e820_all_mapped(unsigned long s, unsigned long e, unsigned type) | |
3685 | -{ | |
3686 | - u64 start = s; | |
3687 | - u64 end = e; | |
3688 | - int i; | |
3689 | - | |
3690 | -#ifndef CONFIG_XEN | |
3691 | - for (i = 0; i < e820.nr_map; i++) { | |
3692 | - struct e820entry *ei = &e820.map[i]; | |
3693 | -#else | |
3694 | - if (!is_initial_xendomain()) | |
3695 | - return 0; | |
3696 | - for (i = 0; i < machine_e820.nr_map; ++i) { | |
3697 | - const struct e820entry *ei = &machine_e820.map[i]; | |
3698 | -#endif | |
3699 | - | |
3700 | - if (type && ei->type != type) | |
3701 | - continue; | |
3702 | - /* is the region (part) in overlap with the current region ?*/ | |
3703 | - if (ei->addr >= end || ei->addr + ei->size <= start) | |
3704 | - continue; | |
3705 | - /* if the region is at the beginning of <start,end> we move | |
3706 | - * start to the end of the region since it's ok until there | |
3707 | - */ | |
3708 | - if (ei->addr <= start) | |
3709 | - start = ei->addr + ei->size; | |
3710 | - /* if start is now at or beyond end, we're done, full | |
3711 | - * coverage */ | |
3712 | - if (start >= end) | |
3713 | - return 1; /* we're done */ | |
3714 | - } | |
3715 | - return 0; | |
3716 | -} | |
3717 | - | |
3718 | -static int __init parse_memmap(char *arg) | |
3719 | -{ | |
3720 | - if (!arg) | |
3721 | - return -EINVAL; | |
3722 | - | |
3723 | - if (strcmp(arg, "exactmap") == 0) { | |
3724 | -#ifdef CONFIG_CRASH_DUMP | |
3725 | - /* If we are doing a crash dump, we | |
3726 | - * still need to know the real mem | |
3727 | - * size before original memory map is | |
3728 | - * reset. | |
3729 | - */ | |
3730 | - propagate_e820_map(); | |
3731 | - saved_max_pfn = max_pfn; | |
3732 | -#endif | |
3733 | - e820.nr_map = 0; | |
3734 | - user_defined_memmap = 1; | |
3735 | - } else { | |
3736 | - /* If the user specifies memory size, we | |
3737 | - * limit the BIOS-provided memory map to | |
3738 | - * that size. exactmap can be used to specify | |
3739 | - * the exact map. mem=number can be used to | |
3740 | - * trim the existing memory map. | |
3741 | - */ | |
3742 | - unsigned long long start_at, mem_size; | |
3743 | - | |
3744 | - mem_size = memparse(arg, &arg); | |
3745 | - if (*arg == '@') { | |
3746 | - start_at = memparse(arg+1, &arg); | |
3747 | - add_memory_region(start_at, mem_size, E820_RAM); | |
3748 | - } else if (*arg == '#') { | |
3749 | - start_at = memparse(arg+1, &arg); | |
3750 | - add_memory_region(start_at, mem_size, E820_ACPI); | |
3751 | - } else if (*arg == '$') { | |
3752 | - start_at = memparse(arg+1, &arg); | |
3753 | - add_memory_region(start_at, mem_size, E820_RESERVED); | |
3754 | - } else { | |
3755 | - limit_regions(mem_size); | |
3756 | - user_defined_memmap = 1; | |
3757 | - } | |
3758 | - } | |
3759 | - return 0; | |
3760 | -} | |
3761 | -early_param("memmap", parse_memmap); | |
3762 | - | |
3763 | -#ifndef CONFIG_XEN | |
3764 | -void __init update_memory_range(u64 start, u64 size, unsigned old_type, | |
3765 | - unsigned new_type) | |
3766 | -{ | |
3767 | - int i; | |
3768 | - | |
3769 | - BUG_ON(old_type == new_type); | |
3770 | - | |
3771 | - for (i = 0; i < e820.nr_map; i++) { | |
3772 | - struct e820entry *ei = &e820.map[i]; | |
3773 | - u64 final_start, final_end; | |
3774 | - if (ei->type != old_type) | |
3775 | - continue; | |
3776 | - /* totally covered? */ | |
3777 | - if (ei->addr >= start && ei->size <= size) { | |
3778 | - ei->type = new_type; | |
3779 | - continue; | |
3780 | - } | |
3781 | - /* partially covered */ | |
3782 | - final_start = max(start, ei->addr); | |
3783 | - final_end = min(start + size, ei->addr + ei->size); | |
3784 | - if (final_start >= final_end) | |
3785 | - continue; | |
3786 | - add_memory_region(final_start, final_end - final_start, | |
3787 | - new_type); | |
3788 | - } | |
3789 | -} | |
3790 | - | |
3791 | -void __init update_e820(void) | |
3792 | -{ | |
3793 | - u8 nr_map; | |
3794 | - | |
3795 | - nr_map = e820.nr_map; | |
3796 | - if (sanitize_e820_map(e820.map, &nr_map)) | |
3797 | - return; | |
3798 | - e820.nr_map = nr_map; | |
3799 | - printk(KERN_INFO "modified physical RAM map:\n"); | |
3800 | - print_memory_map("modified"); | |
3801 | -} | |
3802 | -#endif | |
82094b55 | 3803 | --- sle11-2009-10-16.orig/arch/x86/kernel/e820_64-xen.c 2009-03-16 16:38:05.000000000 +0100 |
2cb7cef9 BS |
3804 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 |
3805 | @@ -1,1045 +0,0 @@ | |
3806 | -/* | |
3807 | - * Handle the memory map. | |
3808 | - * The functions here do the job until bootmem takes over. | |
3809 | - * | |
3810 | - * Getting sanitize_e820_map() in sync with i386 version by applying change: | |
3811 | - * - Provisions for empty E820 memory regions (reported by certain BIOSes). | |
3812 | - * Alex Achenbach <xela@slit.de>, December 2002. | |
3813 | - * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> | |
3814 | - * | |
3815 | - */ | |
3816 | -#include <linux/kernel.h> | |
3817 | -#include <linux/types.h> | |
3818 | -#include <linux/init.h> | |
3819 | -#include <linux/bootmem.h> | |
3820 | -#include <linux/ioport.h> | |
3821 | -#include <linux/string.h> | |
3822 | -#include <linux/kexec.h> | |
3823 | -#include <linux/module.h> | |
3824 | -#include <linux/mm.h> | |
3825 | -#include <linux/suspend.h> | |
3826 | -#include <linux/pfn.h> | |
3827 | - | |
3828 | -#include <asm/pgtable.h> | |
3829 | -#include <asm/page.h> | |
3830 | -#include <asm/e820.h> | |
3831 | -#include <asm/proto.h> | |
3832 | -#include <asm/setup.h> | |
3833 | -#include <asm/sections.h> | |
3834 | -#include <asm/kdebug.h> | |
3835 | -#include <xen/interface/memory.h> | |
3836 | - | |
3837 | -struct e820map e820 __initdata; | |
3838 | -#ifdef CONFIG_XEN | |
3839 | -struct e820map machine_e820; | |
3840 | -#endif | |
3841 | - | |
3842 | -/* | |
3843 | - * PFN of last memory page. | |
3844 | - */ | |
3845 | -unsigned long end_pfn; | |
3846 | - | |
3847 | -/* | |
3848 | - * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | |
3849 | - * The direct mapping extends to max_pfn_mapped, so that we can directly access | |
3850 | - * apertures, ACPI and other tables without having to play with fixmaps. | |
3851 | - */ | |
3852 | -unsigned long max_pfn_mapped; | |
3853 | - | |
3854 | -/* | |
3855 | - * Last pfn which the user wants to use. | |
3856 | - */ | |
3857 | -static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; | |
3858 | - | |
3859 | -/* | |
3860 | - * Early reserved memory areas. | |
3861 | - */ | |
3862 | -#define MAX_EARLY_RES 20 | |
3863 | - | |
3864 | -struct early_res { | |
3865 | - unsigned long start, end; | |
3866 | - char name[16]; | |
3867 | -}; | |
3868 | -static struct early_res early_res[MAX_EARLY_RES] __initdata = { | |
3869 | -#ifndef CONFIG_XEN | |
3870 | - { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ | |
3871 | -#ifdef CONFIG_X86_TRAMPOLINE | |
3872 | - { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, | |
3873 | -#endif | |
3874 | -#endif | |
3875 | - {} | |
3876 | -}; | |
3877 | - | |
3878 | -void __init reserve_early(unsigned long start, unsigned long end, char *name) | |
3879 | -{ | |
3880 | - int i; | |
3881 | - struct early_res *r; | |
3882 | - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | |
3883 | - r = &early_res[i]; | |
3884 | - if (end > r->start && start < r->end) | |
3885 | - panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n", | |
3886 | - start, end - 1, name?name:"", r->start, r->end - 1, r->name); | |
3887 | - } | |
3888 | - if (i >= MAX_EARLY_RES) | |
3889 | - panic("Too many early reservations"); | |
3890 | - r = &early_res[i]; | |
3891 | - r->start = start; | |
3892 | - r->end = end; | |
3893 | - if (name) | |
3894 | - strncpy(r->name, name, sizeof(r->name) - 1); | |
3895 | -} | |
3896 | - | |
3897 | -void __init free_early(unsigned long start, unsigned long end) | |
3898 | -{ | |
3899 | - struct early_res *r; | |
3900 | - int i, j; | |
3901 | - | |
3902 | - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | |
3903 | - r = &early_res[i]; | |
3904 | - if (start == r->start && end == r->end) | |
3905 | - break; | |
3906 | - } | |
3907 | - if (i >= MAX_EARLY_RES || !early_res[i].end) | |
3908 | - panic("free_early on not reserved area: %lx-%lx!", start, end); | |
3909 | - | |
3910 | - for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) | |
3911 | - ; | |
3912 | - | |
3913 | - memmove(&early_res[i], &early_res[i + 1], | |
3914 | - (j - 1 - i) * sizeof(struct early_res)); | |
3915 | - | |
3916 | - early_res[j - 1].end = 0; | |
3917 | -} | |
3918 | - | |
3919 | -void __init early_res_to_bootmem(unsigned long start, unsigned long end) | |
3920 | -{ | |
3921 | - int i; | |
3922 | - unsigned long final_start, final_end; | |
3923 | - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | |
3924 | - struct early_res *r = &early_res[i]; | |
3925 | - final_start = max(start, r->start); | |
3926 | - final_end = min(end, r->end); | |
3927 | - if (final_start >= final_end) | |
3928 | - continue; | |
3929 | - printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i, | |
3930 | - final_start, final_end - 1, r->name); | |
3931 | - reserve_bootmem_generic(final_start, final_end - final_start); | |
3932 | - } | |
3933 | -} | |
3934 | - | |
3935 | -/* Check for already reserved areas */ | |
3936 | -static inline int __init | |
3937 | -bad_addr(unsigned long *addrp, unsigned long size, unsigned long align) | |
3938 | -{ | |
3939 | - int i; | |
3940 | - unsigned long addr = *addrp, last; | |
3941 | - int changed = 0; | |
3942 | -again: | |
3943 | - last = addr + size; | |
3944 | - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | |
3945 | - struct early_res *r = &early_res[i]; | |
3946 | - if (last >= r->start && addr < r->end) { | |
3947 | - *addrp = addr = round_up(r->end, align); | |
3948 | - changed = 1; | |
3949 | - goto again; | |
3950 | - } | |
3951 | - } | |
3952 | - return changed; | |
3953 | -} | |
3954 | - | |
3955 | -/* Check for already reserved areas */ | |
3956 | -static inline int __init | |
3957 | -bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align) | |
3958 | -{ | |
3959 | - int i; | |
3960 | - unsigned long addr = *addrp, last; | |
3961 | - unsigned long size = *sizep; | |
3962 | - int changed = 0; | |
3963 | -again: | |
3964 | - last = addr + size; | |
3965 | - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | |
3966 | - struct early_res *r = &early_res[i]; | |
3967 | - if (last > r->start && addr < r->start) { | |
3968 | - size = r->start - addr; | |
3969 | - changed = 1; | |
3970 | - goto again; | |
3971 | - } | |
3972 | - if (last > r->end && addr < r->end) { | |
3973 | - addr = round_up(r->end, align); | |
3974 | - size = last - addr; | |
3975 | - changed = 1; | |
3976 | - goto again; | |
3977 | - } | |
3978 | - if (last <= r->end && addr >= r->start) { | |
3979 | - (*sizep)++; | |
3980 | - return 0; | |
3981 | - } | |
3982 | - } | |
3983 | - if (changed) { | |
3984 | - *addrp = addr; | |
3985 | - *sizep = size; | |
3986 | - } | |
3987 | - return changed; | |
3988 | -} | |
3989 | -/* | |
3990 | - * This function checks if any part of the range <start,end> is mapped | |
3991 | - * with type. | |
3992 | - */ | |
3993 | -int | |
3994 | -e820_any_mapped(unsigned long start, unsigned long end, unsigned type) | |
3995 | -{ | |
3996 | - int i; | |
3997 | - | |
3998 | -#ifndef CONFIG_XEN | |
3999 | - for (i = 0; i < e820.nr_map; i++) { | |
4000 | - struct e820entry *ei = &e820.map[i]; | |
4001 | -#else | |
4002 | - if (!is_initial_xendomain()) | |
4003 | - return 0; | |
4004 | - for (i = 0; i < machine_e820.nr_map; i++) { | |
4005 | - const struct e820entry *ei = &machine_e820.map[i]; | |
4006 | -#endif | |
4007 | - | |
4008 | - if (type && ei->type != type) | |
4009 | - continue; | |
4010 | - if (ei->addr >= end || ei->addr + ei->size <= start) | |
4011 | - continue; | |
4012 | - return 1; | |
4013 | - } | |
4014 | - return 0; | |
4015 | -} | |
4016 | -EXPORT_SYMBOL_GPL(e820_any_mapped); | |
4017 | - | |
4018 | -/* | |
4019 | - * This function checks if the entire range <start,end> is mapped with type. | |
4020 | - * | |
4021 | - * Note: this function only works correct if the e820 table is sorted and | |
4022 | - * not-overlapping, which is the case | |
4023 | - */ | |
4024 | -int __init e820_all_mapped(unsigned long start, unsigned long end, | |
4025 | - unsigned type) | |
4026 | -{ | |
4027 | - int i; | |
4028 | - | |
4029 | -#ifndef CONFIG_XEN | |
4030 | - for (i = 0; i < e820.nr_map; i++) { | |
4031 | - struct e820entry *ei = &e820.map[i]; | |
4032 | -#else | |
4033 | - if (!is_initial_xendomain()) | |
4034 | - return 0; | |
4035 | - for (i = 0; i < machine_e820.nr_map; i++) { | |
4036 | - const struct e820entry *ei = &machine_e820.map[i]; | |
4037 | -#endif | |
4038 | - | |
4039 | - if (type && ei->type != type) | |
4040 | - continue; | |
4041 | - /* is the region (part) in overlap with the current region ?*/ | |
4042 | - if (ei->addr >= end || ei->addr + ei->size <= start) | |
4043 | - continue; | |
4044 | - | |
4045 | - /* if the region is at the beginning of <start,end> we move | |
4046 | - * start to the end of the region since it's ok until there | |
4047 | - */ | |
4048 | - if (ei->addr <= start) | |
4049 | - start = ei->addr + ei->size; | |
4050 | - /* | |
4051 | - * if start is now at or beyond end, we're done, full | |
4052 | - * coverage | |
4053 | - */ | |
4054 | - if (start >= end) | |
4055 | - return 1; | |
4056 | - } | |
4057 | - return 0; | |
4058 | -} | |
4059 | - | |
4060 | -/* | |
4061 | - * Find a free area with specified alignment in a specific range. | |
4062 | - */ | |
4063 | -unsigned long __init find_e820_area(unsigned long start, unsigned long end, | |
4064 | - unsigned long size, unsigned long align) | |
4065 | -{ | |
4066 | - int i; | |
4067 | - | |
4068 | - for (i = 0; i < e820.nr_map; i++) { | |
4069 | - struct e820entry *ei = &e820.map[i]; | |
4070 | - unsigned long addr, last; | |
4071 | - unsigned long ei_last; | |
4072 | - | |
4073 | - if (ei->type != E820_RAM) | |
4074 | - continue; | |
4075 | - addr = round_up(ei->addr, align); | |
4076 | - ei_last = ei->addr + ei->size; | |
4077 | - if (addr < start) | |
4078 | - addr = round_up(start, align); | |
4079 | - if (addr >= ei_last) | |
4080 | - continue; | |
4081 | - while (bad_addr(&addr, size, align) && addr+size <= ei_last) | |
4082 | - ; | |
4083 | - last = addr + size; | |
4084 | - if (last > ei_last) | |
4085 | - continue; | |
4086 | - if (last > end) | |
4087 | - continue; | |
4088 | - return addr; | |
4089 | - } | |
4090 | - return -1UL; | |
4091 | -} | |
4092 | - | |
4093 | -/* | |
4094 | - * Find next free range after *start | |
4095 | - */ | |
4096 | -unsigned long __init find_e820_area_size(unsigned long start, | |
4097 | - unsigned long *sizep, | |
4098 | - unsigned long align) | |
4099 | -{ | |
4100 | - int i; | |
4101 | - | |
4102 | - for (i = 0; i < e820.nr_map; i++) { | |
4103 | - struct e820entry *ei = &e820.map[i]; | |
4104 | - unsigned long addr, last; | |
4105 | - unsigned long ei_last; | |
4106 | - | |
4107 | - if (ei->type != E820_RAM) | |
4108 | - continue; | |
4109 | - addr = round_up(ei->addr, align); | |
4110 | - ei_last = ei->addr + ei->size; | |
4111 | - if (addr < start) | |
4112 | - addr = round_up(start, align); | |
4113 | - if (addr >= ei_last) | |
4114 | - continue; | |
4115 | - *sizep = ei_last - addr; | |
4116 | - while (bad_addr_size(&addr, sizep, align) && | |
4117 | - addr + *sizep <= ei_last) | |
4118 | - ; | |
4119 | - last = addr + *sizep; | |
4120 | - if (last > ei_last) | |
4121 | - continue; | |
4122 | - return addr; | |
4123 | - } | |
4124 | - return -1UL; | |
4125 | - | |
4126 | -} | |
4127 | -/* | |
4128 | - * Find the highest page frame number we have available | |
4129 | - */ | |
4130 | -unsigned long __init e820_end_of_ram(void) | |
4131 | -{ | |
4132 | - unsigned long end_pfn; | |
4133 | - | |
4134 | - end_pfn = find_max_pfn_with_active_regions(); | |
4135 | - | |
4136 | - if (end_pfn > max_pfn_mapped) | |
4137 | - max_pfn_mapped = end_pfn; | |
4138 | - if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT) | |
4139 | - max_pfn_mapped = MAXMEM>>PAGE_SHIFT; | |
4140 | - if (end_pfn > end_user_pfn) | |
4141 | - end_pfn = end_user_pfn; | |
4142 | - if (end_pfn > max_pfn_mapped) | |
4143 | - end_pfn = max_pfn_mapped; | |
4144 | - | |
4145 | - printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped); | |
4146 | - return end_pfn; | |
4147 | -} | |
4148 | - | |
4149 | -/* | |
4150 | - * Mark e820 reserved areas as busy for the resource manager. | |
4151 | - */ | |
4152 | -void __init e820_reserve_resources(struct e820entry *e820, int nr_map) | |
4153 | -{ | |
4154 | - int i; | |
4155 | - struct resource *res; | |
4156 | - | |
4157 | - res = alloc_bootmem_low(sizeof(struct resource) * nr_map); | |
4158 | - for (i = 0; i < nr_map; i++) { | |
4159 | - switch (e820[i].type) { | |
4160 | - case E820_RAM: res->name = "System RAM"; break; | |
4161 | - case E820_ACPI: res->name = "ACPI Tables"; break; | |
4162 | - case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; | |
4163 | - default: res->name = "reserved"; | |
4164 | - } | |
4165 | - res->start = e820[i].addr; | |
4166 | - res->end = res->start + e820[i].size - 1; | |
4167 | - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; | |
4168 | - insert_resource(&iomem_resource, res); | |
4169 | - res++; | |
4170 | - } | |
4171 | -} | |
4172 | - | |
4173 | -#ifndef CONFIG_XEN | |
4174 | -/* | |
4175 | - * Find the ranges of physical addresses that do not correspond to | |
4176 | - * e820 RAM areas and mark the corresponding pages as nosave for software | |
4177 | - * suspend and suspend to RAM. | |
4178 | - * | |
4179 | - * This function requires the e820 map to be sorted and without any | |
4180 | - * overlapping entries and assumes the first e820 area to be RAM. | |
4181 | - */ | |
4182 | -void __init e820_mark_nosave_regions(void) | |
4183 | -{ | |
4184 | - int i; | |
4185 | - unsigned long paddr; | |
4186 | - | |
4187 | - paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE); | |
4188 | - for (i = 1; i < e820.nr_map; i++) { | |
4189 | - struct e820entry *ei = &e820.map[i]; | |
4190 | - | |
4191 | - if (paddr < ei->addr) | |
4192 | - register_nosave_region(PFN_DOWN(paddr), | |
4193 | - PFN_UP(ei->addr)); | |
4194 | - | |
4195 | - paddr = round_down(ei->addr + ei->size, PAGE_SIZE); | |
4196 | - if (ei->type != E820_RAM) | |
4197 | - register_nosave_region(PFN_UP(ei->addr), | |
4198 | - PFN_DOWN(paddr)); | |
4199 | - | |
4200 | - if (paddr >= (end_pfn << PAGE_SHIFT)) | |
4201 | - break; | |
4202 | - } | |
4203 | -} | |
4204 | -#endif | |
4205 | - | |
4206 | -/* | |
4207 | - * Finds an active region in the address range from start_pfn to end_pfn and | |
4208 | - * returns its range in ei_startpfn and ei_endpfn for the e820 entry. | |
4209 | - */ | |
4210 | -static int __init e820_find_active_region(const struct e820entry *ei, | |
4211 | - unsigned long start_pfn, | |
4212 | - unsigned long end_pfn, | |
4213 | - unsigned long *ei_startpfn, | |
4214 | - unsigned long *ei_endpfn) | |
4215 | -{ | |
4216 | - *ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT; | |
4217 | - *ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT; | |
4218 | - | |
4219 | - /* Skip map entries smaller than a page */ | |
4220 | - if (*ei_startpfn >= *ei_endpfn) | |
4221 | - return 0; | |
4222 | - | |
4223 | - /* Check if max_pfn_mapped should be updated */ | |
4224 | - if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped) | |
4225 | - max_pfn_mapped = *ei_endpfn; | |
4226 | - | |
4227 | - /* Skip if map is outside the node */ | |
4228 | - if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || | |
4229 | - *ei_startpfn >= end_pfn) | |
4230 | - return 0; | |
4231 | - | |
4232 | - /* Check for overlaps */ | |
4233 | - if (*ei_startpfn < start_pfn) | |
4234 | - *ei_startpfn = start_pfn; | |
4235 | - if (*ei_endpfn > end_pfn) | |
4236 | - *ei_endpfn = end_pfn; | |
4237 | - | |
4238 | - /* Obey end_user_pfn to save on memmap */ | |
4239 | - if (*ei_startpfn >= end_user_pfn) | |
4240 | - return 0; | |
4241 | - if (*ei_endpfn > end_user_pfn) | |
4242 | - *ei_endpfn = end_user_pfn; | |
4243 | - | |
4244 | - return 1; | |
4245 | -} | |
4246 | - | |
4247 | -/* Walk the e820 map and register active regions within a node */ | |
4248 | -void __init | |
4249 | -e820_register_active_regions(int nid, unsigned long start_pfn, | |
4250 | - unsigned long end_pfn) | |
4251 | -{ | |
4252 | - unsigned long ei_startpfn; | |
4253 | - unsigned long ei_endpfn; | |
4254 | - int i; | |
4255 | - | |
4256 | - for (i = 0; i < e820.nr_map; i++) | |
4257 | - if (e820_find_active_region(&e820.map[i], | |
4258 | - start_pfn, end_pfn, | |
4259 | - &ei_startpfn, &ei_endpfn)) | |
4260 | - add_active_range(nid, ei_startpfn, ei_endpfn); | |
4261 | -} | |
4262 | - | |
4263 | -/* | |
4264 | - * Add a memory region to the kernel e820 map. | |
4265 | - */ | |
4266 | -void __init add_memory_region(unsigned long start, unsigned long size, int type) | |
4267 | -{ | |
4268 | - int x = e820.nr_map; | |
4269 | - | |
4270 | - if (x == E820MAX) { | |
4271 | - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); | |
4272 | - return; | |
4273 | - } | |
4274 | - | |
4275 | - e820.map[x].addr = start; | |
4276 | - e820.map[x].size = size; | |
4277 | - e820.map[x].type = type; | |
4278 | - e820.nr_map++; | |
4279 | -} | |
4280 | - | |
4281 | -/* | |
4282 | - * Find the hole size (in bytes) in the memory range. | |
4283 | - * @start: starting address of the memory range to scan | |
4284 | - * @end: ending address of the memory range to scan | |
4285 | - */ | |
4286 | -unsigned long __init e820_hole_size(unsigned long start, unsigned long end) | |
4287 | -{ | |
4288 | - unsigned long start_pfn = start >> PAGE_SHIFT; | |
4289 | - unsigned long end_pfn = end >> PAGE_SHIFT; | |
4290 | - unsigned long ei_startpfn, ei_endpfn, ram = 0; | |
4291 | - int i; | |
4292 | - | |
4293 | - for (i = 0; i < e820.nr_map; i++) { | |
4294 | - if (e820_find_active_region(&e820.map[i], | |
4295 | - start_pfn, end_pfn, | |
4296 | - &ei_startpfn, &ei_endpfn)) | |
4297 | - ram += ei_endpfn - ei_startpfn; | |
4298 | - } | |
4299 | - return end - start - (ram << PAGE_SHIFT); | |
4300 | -} | |
4301 | - | |
4302 | -static void __init e820_print_map(char *who) | |
4303 | -{ | |
4304 | - int i; | |
4305 | - | |
4306 | - for (i = 0; i < e820.nr_map; i++) { | |
4307 | - printk(KERN_INFO " %s: %016Lx - %016Lx ", who, | |
4308 | - (unsigned long long) e820.map[i].addr, | |
4309 | - (unsigned long long) | |
4310 | - (e820.map[i].addr + e820.map[i].size)); | |
4311 | - switch (e820.map[i].type) { | |
4312 | - case E820_RAM: | |
4313 | - printk(KERN_CONT "(usable)\n"); | |
4314 | - break; | |
4315 | - case E820_RESERVED: | |
4316 | - printk(KERN_CONT "(reserved)\n"); | |
4317 | - break; | |
4318 | - case E820_ACPI: | |
4319 | - printk(KERN_CONT "(ACPI data)\n"); | |
4320 | - break; | |
4321 | - case E820_NVS: | |
4322 | - printk(KERN_CONT "(ACPI NVS)\n"); | |
4323 | - break; | |
4324 | - default: | |
4325 | - printk(KERN_CONT "type %u\n", e820.map[i].type); | |
4326 | - break; | |
4327 | - } | |
4328 | - } | |
4329 | -} | |
4330 | - | |
4331 | -/* | |
4332 | - * Sanitize the BIOS e820 map. | |
4333 | - * | |
4334 | - * Some e820 responses include overlapping entries. The following | |
4335 | - * replaces the original e820 map with a new one, removing overlaps. | |
4336 | - * | |
4337 | - */ | |
4338 | -static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) | |
4339 | -{ | |
4340 | - struct change_member { | |
4341 | - struct e820entry *pbios; /* pointer to original bios entry */ | |
4342 | - unsigned long long addr; /* address for this change point */ | |
4343 | - }; | |
4344 | - static struct change_member change_point_list[2*E820MAX] __initdata; | |
4345 | - static struct change_member *change_point[2*E820MAX] __initdata; | |
4346 | - static struct e820entry *overlap_list[E820MAX] __initdata; | |
4347 | - static struct e820entry new_bios[E820MAX] __initdata; | |
4348 | - struct change_member *change_tmp; | |
4349 | - unsigned long current_type, last_type; | |
4350 | - unsigned long long last_addr; | |
4351 | - int chgidx, still_changing; | |
4352 | - int overlap_entries; | |
4353 | - int new_bios_entry; | |
4354 | - int old_nr, new_nr, chg_nr; | |
4355 | - int i; | |
4356 | - | |
4357 | - /* | |
4358 | - Visually we're performing the following | |
4359 | - (1,2,3,4 = memory types)... | |
4360 | - | |
4361 | - Sample memory map (w/overlaps): | |
4362 | - ____22__________________ | |
4363 | - ______________________4_ | |
4364 | - ____1111________________ | |
4365 | - _44_____________________ | |
4366 | - 11111111________________ | |
4367 | - ____________________33__ | |
4368 | - ___________44___________ | |
4369 | - __________33333_________ | |
4370 | - ______________22________ | |
4371 | - ___________________2222_ | |
4372 | - _________111111111______ | |
4373 | - _____________________11_ | |
4374 | - _________________4______ | |
4375 | - | |
4376 | - Sanitized equivalent (no overlap): | |
4377 | - 1_______________________ | |
4378 | - _44_____________________ | |
4379 | - ___1____________________ | |
4380 | - ____22__________________ | |
4381 | - ______11________________ | |
4382 | - _________1______________ | |
4383 | - __________3_____________ | |
4384 | - ___________44___________ | |
4385 | - _____________33_________ | |
4386 | - _______________2________ | |
4387 | - ________________1_______ | |
4388 | - _________________4______ | |
4389 | - ___________________2____ | |
4390 | - ____________________33__ | |
4391 | - ______________________4_ | |
4392 | - */ | |
4393 | - | |
4394 | - /* if there's only one memory region, don't bother */ | |
4395 | - if (*pnr_map < 2) | |
4396 | - return -1; | |
4397 | - | |
4398 | - old_nr = *pnr_map; | |
4399 | - | |
4400 | - /* bail out if we find any unreasonable addresses in bios map */ | |
4401 | - for (i = 0; i < old_nr; i++) | |
4402 | - if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) | |
4403 | - return -1; | |
4404 | - | |
4405 | - /* create pointers for initial change-point information (for sorting) */ | |
4406 | - for (i = 0; i < 2 * old_nr; i++) | |
4407 | - change_point[i] = &change_point_list[i]; | |
4408 | - | |
4409 | - /* record all known change-points (starting and ending addresses), | |
4410 | - omitting those that are for empty memory regions */ | |
4411 | - chgidx = 0; | |
4412 | - for (i = 0; i < old_nr; i++) { | |
4413 | - if (biosmap[i].size != 0) { | |
4414 | - change_point[chgidx]->addr = biosmap[i].addr; | |
4415 | - change_point[chgidx++]->pbios = &biosmap[i]; | |
4416 | - change_point[chgidx]->addr = biosmap[i].addr + | |
4417 | - biosmap[i].size; | |
4418 | - change_point[chgidx++]->pbios = &biosmap[i]; | |
4419 | - } | |
4420 | - } | |
4421 | - chg_nr = chgidx; | |
4422 | - | |
4423 | - /* sort change-point list by memory addresses (low -> high) */ | |
4424 | - still_changing = 1; | |
4425 | - while (still_changing) { | |
4426 | - still_changing = 0; | |
4427 | - for (i = 1; i < chg_nr; i++) { | |
4428 | - unsigned long long curaddr, lastaddr; | |
4429 | - unsigned long long curpbaddr, lastpbaddr; | |
4430 | - | |
4431 | - curaddr = change_point[i]->addr; | |
4432 | - lastaddr = change_point[i - 1]->addr; | |
4433 | - curpbaddr = change_point[i]->pbios->addr; | |
4434 | - lastpbaddr = change_point[i - 1]->pbios->addr; | |
4435 | - | |
4436 | - /* | |
4437 | - * swap entries, when: | |
4438 | - * | |
4439 | - * curaddr > lastaddr or | |
4440 | - * curaddr == lastaddr and curaddr == curpbaddr and | |
4441 | - * lastaddr != lastpbaddr | |
4442 | - */ | |
4443 | - if (curaddr < lastaddr || | |
4444 | - (curaddr == lastaddr && curaddr == curpbaddr && | |
4445 | - lastaddr != lastpbaddr)) { | |
4446 | - change_tmp = change_point[i]; | |
4447 | - change_point[i] = change_point[i-1]; | |
4448 | - change_point[i-1] = change_tmp; | |
4449 | - still_changing = 1; | |
4450 | - } | |
4451 | - } | |
4452 | - } | |
4453 | - | |
4454 | - /* create a new bios memory map, removing overlaps */ | |
4455 | - overlap_entries = 0; /* number of entries in the overlap table */ | |
4456 | - new_bios_entry = 0; /* index for creating new bios map entries */ | |
4457 | - last_type = 0; /* start with undefined memory type */ | |
4458 | - last_addr = 0; /* start with 0 as last starting address */ | |
4459 | - | |
4460 | - /* loop through change-points, determining affect on the new bios map */ | |
4461 | - for (chgidx = 0; chgidx < chg_nr; chgidx++) { | |
4462 | - /* keep track of all overlapping bios entries */ | |
4463 | - if (change_point[chgidx]->addr == | |
4464 | - change_point[chgidx]->pbios->addr) { | |
4465 | - /* | |
4466 | - * add map entry to overlap list (> 1 entry | |
4467 | - * implies an overlap) | |
4468 | - */ | |
4469 | - overlap_list[overlap_entries++] = | |
4470 | - change_point[chgidx]->pbios; | |
4471 | - } else { | |
4472 | - /* | |
4473 | - * remove entry from list (order independent, | |
4474 | - * so swap with last) | |
4475 | - */ | |
4476 | - for (i = 0; i < overlap_entries; i++) { | |
4477 | - if (overlap_list[i] == | |
4478 | - change_point[chgidx]->pbios) | |
4479 | - overlap_list[i] = | |
4480 | - overlap_list[overlap_entries-1]; | |
4481 | - } | |
4482 | - overlap_entries--; | |
4483 | - } | |
4484 | - /* | |
4485 | - * if there are overlapping entries, decide which | |
4486 | - * "type" to use (larger value takes precedence -- | |
4487 | - * 1=usable, 2,3,4,4+=unusable) | |
4488 | - */ | |
4489 | - current_type = 0; | |
4490 | - for (i = 0; i < overlap_entries; i++) | |
4491 | - if (overlap_list[i]->type > current_type) | |
4492 | - current_type = overlap_list[i]->type; | |
4493 | - /* | |
4494 | - * continue building up new bios map based on this | |
4495 | - * information | |
4496 | - */ | |
4497 | - if (current_type != last_type) { | |
4498 | - if (last_type != 0) { | |
4499 | - new_bios[new_bios_entry].size = | |
4500 | - change_point[chgidx]->addr - last_addr; | |
4501 | - /* | |
4502 | - * move forward only if the new size | |
4503 | - * was non-zero | |
4504 | - */ | |
4505 | - if (new_bios[new_bios_entry].size != 0) | |
4506 | - /* | |
4507 | - * no more space left for new | |
4508 | - * bios entries ? | |
4509 | - */ | |
4510 | - if (++new_bios_entry >= E820MAX) | |
4511 | - break; | |
4512 | - } | |
4513 | - if (current_type != 0) { | |
4514 | - new_bios[new_bios_entry].addr = | |
4515 | - change_point[chgidx]->addr; | |
4516 | - new_bios[new_bios_entry].type = current_type; | |
4517 | - last_addr = change_point[chgidx]->addr; | |
4518 | - } | |
4519 | - last_type = current_type; | |
4520 | - } | |
4521 | - } | |
4522 | - /* retain count for new bios entries */ | |
4523 | - new_nr = new_bios_entry; | |
4524 | - | |
4525 | - /* copy new bios mapping into original location */ | |
4526 | - memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); | |
4527 | - *pnr_map = new_nr; | |
4528 | - | |
4529 | - return 0; | |
4530 | -} | |
4531 | - | |
4532 | -/* | |
4533 | - * Copy the BIOS e820 map into a safe place. | |
4534 | - * | |
4535 | - * Sanity-check it while we're at it.. | |
4536 | - * | |
4537 | - * If we're lucky and live on a modern system, the setup code | |
4538 | - * will have given us a memory map that we can use to properly | |
4539 | - * set up memory. If we aren't, we'll fake a memory map. | |
4540 | - */ | |
4541 | -static int __init copy_e820_map(struct e820entry *biosmap, int nr_map) | |
4542 | -{ | |
4543 | -#ifndef CONFIG_XEN | |
4544 | - /* Only one memory region (or negative)? Ignore it */ | |
4545 | - if (nr_map < 2) | |
4546 | - return -1; | |
4547 | -#else | |
4548 | - BUG_ON(nr_map < 1); | |
4549 | -#endif | |
4550 | - | |
4551 | - do { | |
4552 | - u64 start = biosmap->addr; | |
4553 | - u64 size = biosmap->size; | |
4554 | - u64 end = start + size; | |
4555 | - u32 type = biosmap->type; | |
4556 | - | |
4557 | - /* Overflow in 64 bits? Ignore the memory map. */ | |
4558 | - if (start > end) | |
4559 | - return -1; | |
4560 | - | |
4561 | - add_memory_region(start, size, type); | |
4562 | - } while (biosmap++, --nr_map); | |
4563 | - | |
4564 | -#ifdef CONFIG_XEN | |
4565 | - if (is_initial_xendomain()) { | |
4566 | - struct xen_memory_map memmap; | |
4567 | - | |
4568 | - memmap.nr_entries = E820MAX; | |
4569 | - set_xen_guest_handle(memmap.buffer, machine_e820.map); | |
4570 | - | |
4571 | - if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)) | |
4572 | - BUG(); | |
4573 | - machine_e820.nr_map = memmap.nr_entries; | |
4574 | - } else | |
4575 | - machine_e820 = e820; | |
4576 | -#endif | |
4577 | - | |
4578 | - return 0; | |
4579 | -} | |
4580 | - | |
4581 | -static void early_panic(char *msg) | |
4582 | -{ | |
4583 | - early_printk(msg); | |
4584 | - panic(msg); | |
4585 | -} | |
4586 | - | |
4587 | -/* We're not void only for x86 32-bit compat */ | |
4588 | -char * __init machine_specific_memory_setup(void) | |
4589 | -{ | |
4590 | -#ifndef CONFIG_XEN | |
4591 | - char *who = "BIOS-e820"; | |
4592 | - /* | |
4593 | - * Try to copy the BIOS-supplied E820-map. | |
4594 | - * | |
4595 | - * Otherwise fake a memory map; one section from 0k->640k, | |
4596 | - * the next section from 1mb->appropriate_mem_k | |
4597 | - */ | |
4598 | - sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); | |
4599 | - if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) | |
4600 | - early_panic("Cannot find a valid memory map"); | |
4601 | -#else /* CONFIG_XEN */ | |
4602 | - char *who = "Xen"; | |
4603 | - int rc; | |
4604 | - struct xen_memory_map memmap; | |
4605 | - /* | |
4606 | - * This is rather large for a stack variable but this early in | |
4607 | - * the boot process we know we have plenty slack space. | |
4608 | - */ | |
4609 | - struct e820entry map[E820MAX]; | |
4610 | - | |
4611 | - memmap.nr_entries = E820MAX; | |
4612 | - set_xen_guest_handle(memmap.buffer, map); | |
4613 | - | |
4614 | - rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); | |
4615 | - if ( rc == -ENOSYS ) { | |
4616 | - memmap.nr_entries = 1; | |
4617 | - map[0].addr = 0ULL; | |
4618 | - map[0].size = xen_start_info->nr_pages << PAGE_SHIFT; | |
4619 | - /* 8MB slack (to balance backend allocations). */ | |
4620 | - map[0].size += 8 << 20; | |
4621 | - map[0].type = E820_RAM; | |
4622 | - rc = 0; | |
4623 | - } | |
4624 | - BUG_ON(rc); | |
4625 | - | |
4626 | - sanitize_e820_map(map, (char *)&memmap.nr_entries); | |
4627 | - | |
4628 | - if (copy_e820_map(map, (char)memmap.nr_entries) < 0) | |
4629 | - early_panic("Cannot find a valid memory map"); | |
4630 | -#endif | |
4631 | - printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | |
4632 | - e820_print_map(who); | |
4633 | - | |
4634 | - /* In case someone cares... */ | |
4635 | - return who; | |
4636 | -} | |
4637 | - | |
4638 | -static int __init parse_memopt(char *p) | |
4639 | -{ | |
4640 | - int i; | |
4641 | - unsigned long current_end; | |
4642 | - unsigned long end; | |
4643 | - | |
4644 | - if (!p) | |
4645 | - return -EINVAL; | |
4646 | - end_user_pfn = memparse(p, &p); | |
4647 | - end_user_pfn >>= PAGE_SHIFT; | |
4648 | - | |
4649 | - end = end_user_pfn<<PAGE_SHIFT; | |
4650 | - i = e820.nr_map-1; | |
4651 | - current_end = e820.map[i].addr + e820.map[i].size; | |
4652 | - | |
4653 | - if (current_end < end) { | |
4654 | - /* | |
4655 | - * The e820 map ends before our requested size so | |
4656 | - * extend the final entry to the requested address. | |
4657 | - */ | |
4658 | - if (e820.map[i].type == E820_RAM) | |
4659 | - e820.map[i].size = end - e820.map[i].addr; | |
4660 | - else | |
4661 | - add_memory_region(current_end, end - current_end, E820_RAM); | |
4662 | - } | |
4663 | - | |
4664 | - return 0; | |
4665 | -} | |
4666 | -early_param("mem", parse_memopt); | |
4667 | - | |
4668 | -static int userdef __initdata; | |
4669 | - | |
4670 | -static int __init parse_memmap_opt(char *p) | |
4671 | -{ | |
4672 | - char *oldp; | |
4673 | - unsigned long long start_at, mem_size; | |
4674 | - | |
4675 | - if (!strcmp(p, "exactmap")) { | |
4676 | -#ifdef CONFIG_CRASH_DUMP | |
4677 | - /* | |
4678 | - * If we are doing a crash dump, we still need to know | |
4679 | - * the real mem size before original memory map is | |
4680 | - * reset. | |
4681 | - */ | |
4682 | - e820_register_active_regions(0, 0, -1UL); | |
4683 | - saved_max_pfn = e820_end_of_ram(); | |
4684 | - remove_all_active_ranges(); | |
4685 | -#endif | |
4686 | - max_pfn_mapped = 0; | |
4687 | - e820.nr_map = 0; | |
4688 | - userdef = 1; | |
4689 | - return 0; | |
4690 | - } | |
4691 | - | |
4692 | - oldp = p; | |
4693 | - mem_size = memparse(p, &p); | |
4694 | - if (p == oldp) | |
4695 | - return -EINVAL; | |
4696 | - | |
4697 | - userdef = 1; | |
4698 | - if (*p == '@') { | |
4699 | - start_at = memparse(p+1, &p); | |
4700 | - add_memory_region(start_at, mem_size, E820_RAM); | |
4701 | - } else if (*p == '#') { | |
4702 | - start_at = memparse(p+1, &p); | |
4703 | - add_memory_region(start_at, mem_size, E820_ACPI); | |
4704 | - } else if (*p == '$') { | |
4705 | - start_at = memparse(p+1, &p); | |
4706 | - add_memory_region(start_at, mem_size, E820_RESERVED); | |
4707 | - } else { | |
4708 | - end_user_pfn = (mem_size >> PAGE_SHIFT); | |
4709 | - } | |
4710 | - return *p == '\0' ? 0 : -EINVAL; | |
4711 | -} | |
4712 | -early_param("memmap", parse_memmap_opt); | |
4713 | - | |
4714 | -void __init finish_e820_parsing(void) | |
4715 | -{ | |
4716 | - if (userdef) { | |
4717 | - char nr = e820.nr_map; | |
4718 | - | |
4719 | - if (sanitize_e820_map(e820.map, &nr) < 0) | |
4720 | - early_panic("Invalid user supplied memory map"); | |
4721 | - e820.nr_map = nr; | |
4722 | - | |
4723 | - printk(KERN_INFO "user-defined physical RAM map:\n"); | |
4724 | - e820_print_map("user"); | |
4725 | - } | |
4726 | -} | |
4727 | - | |
4728 | -#ifndef CONFIG_XEN | |
4729 | -void __init update_memory_range(u64 start, u64 size, unsigned old_type, | |
4730 | - unsigned new_type) | |
4731 | -{ | |
4732 | - int i; | |
4733 | - | |
4734 | - BUG_ON(old_type == new_type); | |
4735 | - | |
4736 | - for (i = 0; i < e820.nr_map; i++) { | |
4737 | - struct e820entry *ei = &e820.map[i]; | |
4738 | - u64 final_start, final_end; | |
4739 | - if (ei->type != old_type) | |
4740 | - continue; | |
4741 | - /* totally covered? */ | |
4742 | - if (ei->addr >= start && ei->size <= size) { | |
4743 | - ei->type = new_type; | |
4744 | - continue; | |
4745 | - } | |
4746 | - /* partially covered */ | |
4747 | - final_start = max(start, ei->addr); | |
4748 | - final_end = min(start + size, ei->addr + ei->size); | |
4749 | - if (final_start >= final_end) | |
4750 | - continue; | |
4751 | - add_memory_region(final_start, final_end - final_start, | |
4752 | - new_type); | |
4753 | - } | |
4754 | -} | |
4755 | - | |
4756 | -void __init update_e820(void) | |
4757 | -{ | |
4758 | - u8 nr_map; | |
4759 | - | |
4760 | - nr_map = e820.nr_map; | |
4761 | - if (sanitize_e820_map(e820.map, &nr_map)) | |
4762 | - return; | |
4763 | - e820.nr_map = nr_map; | |
4764 | - printk(KERN_INFO "modified physical RAM map:\n"); | |
4765 | - e820_print_map("modified"); | |
4766 | -} | |
4767 | -#endif | |
4768 | - | |
4769 | -unsigned long pci_mem_start = 0xaeedbabe; | |
4770 | -EXPORT_SYMBOL(pci_mem_start); | |
4771 | - | |
4772 | -/* | |
4773 | - * Search for the biggest gap in the low 32 bits of the e820 | |
4774 | - * memory space. We pass this space to PCI to assign MMIO resources | |
4775 | - * for hotplug or unconfigured devices in. | |
4776 | - * Hopefully the BIOS let enough space left. | |
4777 | - */ | |
4778 | -__init void e820_setup_gap(struct e820entry *e820, int nr_map) | |
4779 | -{ | |
4780 | - unsigned long gapstart, gapsize, round; | |
4781 | - unsigned long last; | |
4782 | - int i; | |
4783 | - int found = 0; | |
4784 | - | |
4785 | - last = 0x100000000ull; | |
4786 | - gapstart = 0x10000000; | |
4787 | - gapsize = 0x400000; | |
4788 | - i = nr_map; | |
4789 | - while (--i >= 0) { | |
4790 | - unsigned long long start = e820[i].addr; | |
4791 | - unsigned long long end = start + e820[i].size; | |
4792 | - | |
4793 | - /* | |
4794 | - * Since "last" is at most 4GB, we know we'll | |
4795 | - * fit in 32 bits if this condition is true | |
4796 | - */ | |
4797 | - if (last > end) { | |
4798 | - unsigned long gap = last - end; | |
4799 | - | |
4800 | - if (gap > gapsize) { | |
4801 | - gapsize = gap; | |
4802 | - gapstart = end; | |
4803 | - found = 1; | |
4804 | - } | |
4805 | - } | |
4806 | - if (start < last) | |
4807 | - last = start; | |
4808 | - } | |
4809 | - | |
4810 | - if (!found) { | |
4811 | - gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; | |
4812 | - printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " | |
4813 | - "address range\n" | |
4814 | - KERN_ERR "PCI: Unassigned devices with 32bit resource " | |
4815 | - "registers may break!\n"); | |
4816 | - } | |
4817 | - | |
4818 | - /* | |
4819 | - * See how much we want to round up: start off with | |
4820 | - * rounding to the next 1MB area. | |
4821 | - */ | |
4822 | - round = 0x100000; | |
4823 | - while ((gapsize >> 4) > round) | |
4824 | - round += round; | |
4825 | - /* Fun with two's complement */ | |
4826 | - pci_mem_start = (gapstart + round) & -round; | |
4827 | - | |
4828 | - printk(KERN_INFO | |
4829 | - "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", | |
4830 | - pci_mem_start, gapstart, gapsize); | |
4831 | -} | |
4832 | - | |
4833 | -int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) | |
4834 | -{ | |
4835 | - int i; | |
4836 | - | |
4837 | - if (slot < 0 || slot >= e820.nr_map) | |
4838 | - return -1; | |
4839 | - for (i = slot; i < e820.nr_map; i++) { | |
4840 | - if (e820.map[i].type != E820_RAM) | |
4841 | - continue; | |
4842 | - break; | |
4843 | - } | |
4844 | - if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT)) | |
4845 | - return -1; | |
4846 | - *addr = e820.map[i].addr; | |
4847 | - *size = min_t(u64, e820.map[i].size + e820.map[i].addr, | |
4848 | - max_pfn << PAGE_SHIFT) - *addr; | |
4849 | - return i + 1; | |
4850 | -} | |
82094b55 AF |
4851 | --- sle11-2009-10-16.orig/arch/x86/kernel/early_printk-xen.c 2009-09-24 10:29:16.000000000 +0200 |
4852 | +++ sle11-2009-10-16/arch/x86/kernel/early_printk-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
4853 | @@ -225,7 +225,7 @@ static struct console simnow_console = { |
4854 | static struct console *early_console = &early_vga_console; | |
4855 | static int early_console_initialized; | |
4856 | ||
4857 | -void early_printk(const char *fmt, ...) | |
4858 | +asmlinkage void early_printk(const char *fmt, ...) | |
4859 | { | |
4860 | char buf[512]; | |
4861 | int n; | |
82094b55 AF |
4862 | --- sle11-2009-10-16.orig/arch/x86/kernel/entry_32-xen.S 2009-03-16 16:38:05.000000000 +0100 |
4863 | +++ sle11-2009-10-16/arch/x86/kernel/entry_32-xen.S 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
4864 | @@ -51,15 +51,26 @@ |
4865 | #include <asm/percpu.h> | |
4866 | #include <asm/dwarf2.h> | |
4867 | #include <asm/processor-flags.h> | |
4868 | -#include "irq_vectors.h" | |
4869 | +#include <asm/ftrace.h> | |
4870 | +#include <asm/irq_vectors.h> | |
4871 | #include <xen/interface/xen.h> | |
4872 | ||
4873 | +/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | |
4874 | +#include <linux/elf-em.h> | |
4875 | +#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) | |
4876 | +#define __AUDIT_ARCH_LE 0x40000000 | |
4877 | + | |
4878 | +#ifndef CONFIG_AUDITSYSCALL | |
4879 | +#define sysenter_audit syscall_trace_entry | |
4880 | +#define sysexit_audit syscall_exit_work | |
4881 | +#endif | |
4882 | + | |
4883 | /* | |
4884 | * We use macros for low-level operations which need to be overridden | |
4885 | * for paravirtualization. The following will never clobber any registers: | |
4886 | * INTERRUPT_RETURN (aka. "iret") | |
4887 | * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") | |
4888 | - * ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit"). | |
4889 | + * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). | |
4890 | * | |
4891 | * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must | |
4892 | * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). | |
4893 | @@ -277,11 +288,6 @@ END(resume_kernel) | |
4894 | #endif | |
4895 | CFI_ENDPROC | |
4896 | ||
4897 | - .macro test_tif ti_reg # system call tracing in operation / emulation | |
4898 | - /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ | |
4899 | - testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(\ti_reg) | |
4900 | - .endm | |
4901 | - | |
4902 | /* SYSENTER_RETURN points to after the "sysenter" instruction in | |
4903 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ | |
4904 | ||
4905 | @@ -338,8 +344,9 @@ sysenter_past_esp: | |
4906 | .previous | |
4907 | ||
4908 | GET_THREAD_INFO(%ebp) | |
4909 | - test_tif %ebp | |
4910 | - jnz syscall_trace_entry | |
4911 | + testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) | |
4912 | + jnz sysenter_audit | |
4913 | +sysenter_do_call: | |
4914 | cmpl $(nr_syscalls), %eax | |
4915 | jae syscall_badsys | |
4916 | call *sys_call_table(,%eax,4) | |
4917 | @@ -349,14 +356,54 @@ sysenter_past_esp: | |
4918 | TRACE_IRQS_OFF | |
4919 | movl TI_flags(%ebp), %ecx | |
4920 | testw $_TIF_ALLWORK_MASK, %cx | |
4921 | - jne syscall_exit_work | |
4922 | + jne sysexit_audit | |
4923 | +sysenter_exit: | |
4924 | /* if something modifies registers it must also disable sysexit */ | |
4925 | movl PT_EIP(%esp), %edx | |
4926 | movl PT_OLDESP(%esp), %ecx | |
4927 | xorl %ebp,%ebp | |
4928 | TRACE_IRQS_ON | |
4929 | 1: mov PT_FS(%esp), %fs | |
4930 | - ENABLE_INTERRUPTS_SYSCALL_RET | |
4931 | + ENABLE_INTERRUPTS_SYSEXIT | |
4932 | + | |
4933 | +#ifdef CONFIG_AUDITSYSCALL | |
4934 | +sysenter_audit: | |
4935 | + testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) | |
4936 | + jnz syscall_trace_entry | |
4937 | + addl $4,%esp | |
4938 | + CFI_ADJUST_CFA_OFFSET -4 | |
4939 | + /* %esi already in 8(%esp) 6th arg: 4th syscall arg */ | |
4940 | + /* %edx already in 4(%esp) 5th arg: 3rd syscall arg */ | |
4941 | + /* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */ | |
4942 | + movl %ebx,%ecx /* 3rd arg: 1st syscall arg */ | |
4943 | + movl %eax,%edx /* 2nd arg: syscall number */ | |
4944 | + movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ | |
4945 | + call audit_syscall_entry | |
4946 | + pushl %ebx | |
4947 | + CFI_ADJUST_CFA_OFFSET 4 | |
4948 | + movl PT_EAX(%esp),%eax /* reload syscall number */ | |
4949 | + jmp sysenter_do_call | |
4950 | + | |
4951 | +sysexit_audit: | |
4952 | + testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx | |
4953 | + jne syscall_exit_work | |
4954 | + TRACE_IRQS_ON | |
4955 | + ENABLE_INTERRUPTS(CLBR_ANY) | |
4956 | + movl %eax,%edx /* second arg, syscall return value */ | |
4957 | + cmpl $0,%eax /* is it < 0? */ | |
4958 | + setl %al /* 1 if so, 0 if not */ | |
4959 | + movzbl %al,%eax /* zero-extend that */ | |
4960 | + inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | |
4961 | + call audit_syscall_exit | |
4962 | + DISABLE_INTERRUPTS(CLBR_ANY) | |
4963 | + TRACE_IRQS_OFF | |
4964 | + movl TI_flags(%ebp), %ecx | |
4965 | + testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx | |
4966 | + jne syscall_exit_work | |
4967 | + movl PT_EAX(%esp),%eax /* reload syscall return value */ | |
4968 | + jmp sysenter_exit | |
4969 | +#endif | |
4970 | + | |
4971 | CFI_ENDPROC | |
4972 | .pushsection .fixup,"ax" | |
4973 | 2: movl $0,PT_FS(%esp) | |
4974 | @@ -400,7 +447,7 @@ ENTRY(system_call) | |
4975 | CFI_ADJUST_CFA_OFFSET 4 | |
4976 | SAVE_ALL | |
4977 | GET_THREAD_INFO(%ebp) | |
4978 | - test_tif %ebp | |
4979 | + testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) | |
4980 | jnz syscall_trace_entry | |
4981 | cmpl $(nr_syscalls), %eax | |
4982 | jae syscall_badsys | |
4983 | @@ -413,10 +460,6 @@ syscall_exit: | |
4984 | # setting need_resched or sigpending | |
4985 | # between sampling and the iret | |
4986 | TRACE_IRQS_OFF | |
4987 | - testl $X86_EFLAGS_TF,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit | |
4988 | - jz no_singlestep | |
4989 | - orl $_TIF_SINGLESTEP,TI_flags(%ebp) | |
4990 | -no_singlestep: | |
4991 | movl TI_flags(%ebp), %ecx | |
4992 | testw $_TIF_ALLWORK_MASK, %cx # current->work | |
4993 | jne syscall_exit_work | |
4994 | @@ -588,12 +631,8 @@ END(work_pending) | |
4995 | syscall_trace_entry: | |
4996 | movl $-ENOSYS,PT_EAX(%esp) | |
4997 | movl %esp, %eax | |
4998 | - xorl %edx,%edx | |
4999 | - call do_syscall_trace | |
5000 | - cmpl $0, %eax | |
5001 | - jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, | |
5002 | - # so must skip actual syscall | |
5003 | - movl PT_ORIG_EAX(%esp), %eax | |
5004 | + call syscall_trace_enter | |
5005 | + /* What it returned is what we'll actually use. */ | |
5006 | cmpl $(nr_syscalls), %eax | |
5007 | jnae syscall_call | |
5008 | jmp syscall_exit | |
5009 | @@ -602,14 +641,13 @@ END(syscall_trace_entry) | |
5010 | # perform syscall exit tracing | |
5011 | ALIGN | |
5012 | syscall_exit_work: | |
5013 | - testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl | |
5014 | + testb $_TIF_WORK_SYSCALL_EXIT, %cl | |
5015 | jz work_pending | |
5016 | TRACE_IRQS_ON | |
5017 | - ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call | |
5018 | + ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call | |
5019 | # schedule() instead | |
5020 | movl %esp, %eax | |
5021 | - movl $1, %edx | |
5022 | - call do_syscall_trace | |
5023 | + call syscall_trace_leave | |
5024 | jmp resume_userspace | |
5025 | END(syscall_exit_work) | |
5026 | CFI_ENDPROC | |
5027 | @@ -1113,10 +1151,10 @@ ENTRY(native_iret) | |
5028 | .previous | |
5029 | END(native_iret) | |
5030 | ||
5031 | -ENTRY(native_irq_enable_syscall_ret) | |
5032 | +ENTRY(native_irq_enable_sysexit) | |
5033 | sti | |
5034 | sysexit | |
5035 | -END(native_irq_enable_syscall_ret) | |
5036 | +END(native_irq_enable_sysexit) | |
5037 | #endif | |
5038 | ||
5039 | KPROBE_ENTRY(int3) | |
5040 | @@ -1265,6 +1303,77 @@ ENTRY(kernel_thread_helper) | |
5041 | CFI_ENDPROC | |
5042 | ENDPROC(kernel_thread_helper) | |
5043 | ||
5044 | +#ifdef CONFIG_FTRACE | |
5045 | +#ifdef CONFIG_DYNAMIC_FTRACE | |
5046 | + | |
5047 | +ENTRY(mcount) | |
5048 | + pushl %eax | |
5049 | + pushl %ecx | |
5050 | + pushl %edx | |
5051 | + movl 0xc(%esp), %eax | |
5052 | + subl $MCOUNT_INSN_SIZE, %eax | |
5053 | + | |
5054 | +.globl mcount_call | |
5055 | +mcount_call: | |
5056 | + call ftrace_stub | |
5057 | + | |
5058 | + popl %edx | |
5059 | + popl %ecx | |
5060 | + popl %eax | |
5061 | + | |
5062 | + ret | |
5063 | +END(mcount) | |
5064 | + | |
5065 | +ENTRY(ftrace_caller) | |
5066 | + pushl %eax | |
5067 | + pushl %ecx | |
5068 | + pushl %edx | |
5069 | + movl 0xc(%esp), %eax | |
5070 | + movl 0x4(%ebp), %edx | |
5071 | + subl $MCOUNT_INSN_SIZE, %eax | |
5072 | + | |
5073 | +.globl ftrace_call | |
5074 | +ftrace_call: | |
5075 | + call ftrace_stub | |
5076 | + | |
5077 | + popl %edx | |
5078 | + popl %ecx | |
5079 | + popl %eax | |
5080 | + | |
5081 | +.globl ftrace_stub | |
5082 | +ftrace_stub: | |
5083 | + ret | |
5084 | +END(ftrace_caller) | |
5085 | + | |
5086 | +#else /* ! CONFIG_DYNAMIC_FTRACE */ | |
5087 | + | |
5088 | +ENTRY(mcount) | |
5089 | + cmpl $ftrace_stub, ftrace_trace_function | |
5090 | + jnz trace | |
5091 | +.globl ftrace_stub | |
5092 | +ftrace_stub: | |
5093 | + ret | |
5094 | + | |
5095 | + /* taken from glibc */ | |
5096 | +trace: | |
5097 | + pushl %eax | |
5098 | + pushl %ecx | |
5099 | + pushl %edx | |
5100 | + movl 0xc(%esp), %eax | |
5101 | + movl 0x4(%ebp), %edx | |
5102 | + subl $MCOUNT_INSN_SIZE, %eax | |
5103 | + | |
5104 | + call *ftrace_trace_function | |
5105 | + | |
5106 | + popl %edx | |
5107 | + popl %ecx | |
5108 | + popl %eax | |
5109 | + | |
5110 | + jmp ftrace_stub | |
5111 | +END(mcount) | |
5112 | +#endif /* CONFIG_DYNAMIC_FTRACE */ | |
5113 | +#endif /* CONFIG_FTRACE */ | |
5114 | + | |
5115 | #include <asm/alternative-asm.h> | |
5116 | ||
5117 | # pv syscall call handler stub | |
5118 | @@ -1290,7 +1399,7 @@ ENTRY(ia32pv_cstar_target) | |
5119 | .previous | |
5120 | SAVE_ALL | |
5121 | GET_THREAD_INFO(%ebp) | |
5122 | - test_tif %ebp | |
5123 | + testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) | |
5124 | jnz cstar_trace_entry | |
5125 | cmpl $nr_syscalls,%eax | |
5126 | jae cstar_badsys | |
5127 | @@ -1324,29 +1433,21 @@ cstar_trace_entry: | |
5128 | btl %eax,cstar_special | |
5129 | jc .Lcstar_trace_special | |
5130 | 1: movl %esp,%eax | |
5131 | - xorl %edx,%edx | |
5132 | LOCK_PREFIX | |
5133 | orl $_TIF_CSTAR,TI_flags(%ebp) | |
5134 | - call do_syscall_trace | |
5135 | + call syscall_trace_enter | |
5136 | LOCK_PREFIX | |
5137 | andl $~_TIF_CSTAR,TI_flags(%ebp) | |
5138 | - testl %eax,%eax | |
5139 | - jne .Lcstar_resume # ret != 0 -> running under PTRACE_SYSEMU, | |
5140 | - # so must skip actual syscall | |
5141 | - movl PT_ORIG_EAX(%esp),%eax | |
5142 | + /* What it returned is what we'll actually use. */ | |
5143 | cmpl $nr_syscalls,%eax | |
5144 | jb .Lcstar_call | |
5145 | jmp .Lcstar_exit | |
5146 | .Lcstar_trace_special: | |
5147 | movl PT_ECX(%esp),%ecx | |
5148 | movl %esp,%eax | |
5149 | - xorl %edx,%edx | |
5150 | movl %ecx,PT_EBP(%esp) # put user EBP back in place | |
5151 | - call do_syscall_trace | |
5152 | - testl %eax,%eax | |
5153 | - jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, | |
5154 | - # so must skip actual syscall | |
5155 | - movl PT_ORIG_EAX(%esp),%eax | |
5156 | + call syscall_trace_enter | |
5157 | + /* What it returned is what we'll actually use. */ | |
5158 | cmpl $nr_syscalls,%eax | |
5159 | jb syscall_call | |
5160 | jmp syscall_exit | |
82094b55 AF |
5161 | --- sle11-2009-10-16.orig/arch/x86/kernel/entry_64.S 2009-10-28 14:55:02.000000000 +0100 |
5162 | +++ sle11-2009-10-16/arch/x86/kernel/entry_64.S 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
5163 | @@ -1409,7 +1409,7 @@ ENTRY(arch_unwind_init_running) |
5164 | ENDPROC(arch_unwind_init_running) | |
5165 | #endif | |
5166 | ||
5167 | -#ifdef CONFIG_XEN | |
5168 | +#ifdef CONFIG_PARAVIRT_XEN | |
5169 | ENTRY(xen_hypervisor_callback) | |
5170 | zeroentry xen_do_hypervisor_callback | |
5171 | END(xen_hypervisor_callback) | |
5172 | @@ -1507,7 +1507,7 @@ ENTRY(xen_failsafe_callback) | |
5173 | CFI_ENDPROC | |
5174 | END(xen_failsafe_callback) | |
5175 | ||
5176 | -#endif /* CONFIG_XEN */ | |
5177 | +#endif /* CONFIG_PARAVIRT_XEN */ | |
5178 | ||
5179 | #ifdef CONFIG_KDB | |
5180 | ||
82094b55 AF |
5181 | --- sle11-2009-10-16.orig/arch/x86/kernel/entry_64-xen.S 2009-03-16 16:38:05.000000000 +0100 |
5182 | +++ sle11-2009-10-16/arch/x86/kernel/entry_64-xen.S 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
5183 | @@ -53,19 +53,130 @@ |
5184 | #include <asm/hw_irq.h> | |
5185 | #include <asm/page.h> | |
5186 | #include <asm/irqflags.h> | |
5187 | +#include <asm/ftrace.h> | |
5188 | #include <asm/errno.h> | |
5189 | #include <xen/interface/xen.h> | |
5190 | #include <xen/interface/features.h> | |
5191 | ||
5192 | +/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | |
5193 | +#include <linux/elf-em.h> | |
5194 | +#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) | |
5195 | +#define __AUDIT_ARCH_64BIT 0x80000000 | |
5196 | +#define __AUDIT_ARCH_LE 0x40000000 | |
5197 | + | |
5198 | .code64 | |
5199 | ||
5200 | +#ifdef CONFIG_FTRACE | |
5201 | +#ifdef CONFIG_DYNAMIC_FTRACE | |
5202 | +ENTRY(mcount) | |
5203 | + | |
5204 | + subq $0x38, %rsp | |
5205 | + movq %rax, (%rsp) | |
5206 | + movq %rcx, 8(%rsp) | |
5207 | + movq %rdx, 16(%rsp) | |
5208 | + movq %rsi, 24(%rsp) | |
5209 | + movq %rdi, 32(%rsp) | |
5210 | + movq %r8, 40(%rsp) | |
5211 | + movq %r9, 48(%rsp) | |
5212 | + | |
5213 | + movq 0x38(%rsp), %rdi | |
5214 | + subq $MCOUNT_INSN_SIZE, %rdi | |
5215 | + | |
5216 | +.globl mcount_call | |
5217 | +mcount_call: | |
5218 | + call ftrace_stub | |
5219 | + | |
5220 | + movq 48(%rsp), %r9 | |
5221 | + movq 40(%rsp), %r8 | |
5222 | + movq 32(%rsp), %rdi | |
5223 | + movq 24(%rsp), %rsi | |
5224 | + movq 16(%rsp), %rdx | |
5225 | + movq 8(%rsp), %rcx | |
5226 | + movq (%rsp), %rax | |
5227 | + addq $0x38, %rsp | |
5228 | + | |
5229 | + retq | |
5230 | +END(mcount) | |
5231 | + | |
5232 | +ENTRY(ftrace_caller) | |
5233 | + | |
5234 | + /* taken from glibc */ | |
5235 | + subq $0x38, %rsp | |
5236 | + movq %rax, (%rsp) | |
5237 | + movq %rcx, 8(%rsp) | |
5238 | + movq %rdx, 16(%rsp) | |
5239 | + movq %rsi, 24(%rsp) | |
5240 | + movq %rdi, 32(%rsp) | |
5241 | + movq %r8, 40(%rsp) | |
5242 | + movq %r9, 48(%rsp) | |
5243 | + | |
5244 | + movq 0x38(%rsp), %rdi | |
5245 | + movq 8(%rbp), %rsi | |
5246 | + subq $MCOUNT_INSN_SIZE, %rdi | |
5247 | + | |
5248 | +.globl ftrace_call | |
5249 | +ftrace_call: | |
5250 | + call ftrace_stub | |
5251 | + | |
5252 | + movq 48(%rsp), %r9 | |
5253 | + movq 40(%rsp), %r8 | |
5254 | + movq 32(%rsp), %rdi | |
5255 | + movq 24(%rsp), %rsi | |
5256 | + movq 16(%rsp), %rdx | |
5257 | + movq 8(%rsp), %rcx | |
5258 | + movq (%rsp), %rax | |
5259 | + addq $0x38, %rsp | |
5260 | + | |
5261 | +.globl ftrace_stub | |
5262 | +ftrace_stub: | |
5263 | + retq | |
5264 | +END(ftrace_caller) | |
5265 | + | |
5266 | +#else /* ! CONFIG_DYNAMIC_FTRACE */ | |
5267 | +ENTRY(mcount) | |
5268 | + cmpq $ftrace_stub, ftrace_trace_function | |
5269 | + jnz trace | |
5270 | +.globl ftrace_stub | |
5271 | +ftrace_stub: | |
5272 | + retq | |
5273 | + | |
5274 | +trace: | |
5275 | + /* taken from glibc */ | |
5276 | + subq $0x38, %rsp | |
5277 | + movq %rax, (%rsp) | |
5278 | + movq %rcx, 8(%rsp) | |
5279 | + movq %rdx, 16(%rsp) | |
5280 | + movq %rsi, 24(%rsp) | |
5281 | + movq %rdi, 32(%rsp) | |
5282 | + movq %r8, 40(%rsp) | |
5283 | + movq %r9, 48(%rsp) | |
5284 | + | |
5285 | + movq 0x38(%rsp), %rdi | |
5286 | + movq 8(%rbp), %rsi | |
5287 | + subq $MCOUNT_INSN_SIZE, %rdi | |
5288 | + | |
5289 | + call *ftrace_trace_function | |
5290 | + | |
5291 | + movq 48(%rsp), %r9 | |
5292 | + movq 40(%rsp), %r8 | |
5293 | + movq 32(%rsp), %rdi | |
5294 | + movq 24(%rsp), %rsi | |
5295 | + movq 16(%rsp), %rdx | |
5296 | + movq 8(%rsp), %rcx | |
5297 | + movq (%rsp), %rax | |
5298 | + addq $0x38, %rsp | |
5299 | + | |
5300 | + jmp ftrace_stub | |
5301 | +END(mcount) | |
5302 | +#endif /* CONFIG_DYNAMIC_FTRACE */ | |
5303 | +#endif /* CONFIG_FTRACE */ | |
5304 | + | |
5305 | #ifndef CONFIG_PREEMPT | |
5306 | #define retint_kernel retint_restore_args | |
5307 | #endif | |
5308 | ||
5309 | #ifdef CONFIG_PARAVIRT | |
5310 | -ENTRY(native_irq_enable_syscall_ret) | |
5311 | - movq %gs:pda_oldrsp,%rsp | |
5312 | +ENTRY(native_usergs_sysret64) | |
5313 | swapgs | |
5314 | sysretq | |
5315 | #endif /* CONFIG_PARAVIRT */ | |
5316 | @@ -102,7 +213,7 @@ NMI_MASK = 0x80000000 | |
5317 | .macro FAKE_STACK_FRAME child_rip | |
5318 | /* push in order ss, rsp, eflags, cs, rip */ | |
5319 | xorl %eax, %eax | |
5320 | - pushq %rax /* ss */ | |
5321 | + pushq $__KERNEL_DS /* ss */ | |
5322 | CFI_ADJUST_CFA_OFFSET 8 | |
5323 | /*CFI_REL_OFFSET ss,0*/ | |
5324 | pushq %rax /* rsp */ | |
5325 | @@ -197,13 +308,13 @@ ENTRY(ret_from_fork) | |
5326 | CFI_ADJUST_CFA_OFFSET -4 | |
5327 | call schedule_tail | |
5328 | GET_THREAD_INFO(%rcx) | |
5329 | - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) | |
5330 | + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) | |
5331 | jnz rff_trace | |
5332 | rff_action: | |
5333 | RESTORE_REST | |
5334 | testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? | |
5335 | je int_ret_from_sys_call | |
5336 | - testl $_TIF_IA32,threadinfo_flags(%rcx) | |
5337 | + testl $_TIF_IA32,TI_flags(%rcx) | |
5338 | jnz int_ret_from_sys_call | |
5339 | RESTORE_TOP_OF_STACK %rdi,ARGOFFSET | |
5340 | jmp ret_from_sys_call | |
5341 | @@ -265,8 +376,9 @@ ENTRY(system_call) | |
5342 | SAVE_ARGS -8,0 | |
5343 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | |
5344 | GET_THREAD_INFO(%rcx) | |
5345 | - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) | |
5346 | + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) | |
5347 | jnz tracesys | |
5348 | +system_call_fastpath: | |
5349 | cmpq $__NR_syscall_max,%rax | |
5350 | ja badsys | |
5351 | movq %r10,%rcx | |
5352 | @@ -284,7 +396,7 @@ sysret_check: | |
5353 | GET_THREAD_INFO(%rcx) | |
5354 | DISABLE_INTERRUPTS(CLBR_NONE) | |
5355 | TRACE_IRQS_OFF | |
5356 | - movl threadinfo_flags(%rcx),%edx | |
5357 | + movl TI_flags(%rcx),%edx | |
5358 | andl %edi,%edx | |
5359 | jnz sysret_careful | |
5360 | CFI_REMEMBER_STATE | |
5361 | @@ -315,16 +427,16 @@ sysret_careful: | |
5362 | sysret_signal: | |
5363 | TRACE_IRQS_ON | |
5364 | ENABLE_INTERRUPTS(CLBR_NONE) | |
5365 | - testl $_TIF_DO_NOTIFY_MASK,%edx | |
5366 | - jz 1f | |
5367 | - | |
5368 | - /* Really a signal */ | |
5369 | +#ifdef CONFIG_AUDITSYSCALL | |
5370 | + bt $TIF_SYSCALL_AUDIT,%edx | |
5371 | + jc sysret_audit | |
5372 | +#endif | |
5373 | /* edx: work flags (arg3) */ | |
5374 | leaq do_notify_resume(%rip),%rax | |
5375 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 | |
5376 | xorl %esi,%esi # oldset -> arg2 | |
5377 | call ptregscall_common | |
5378 | -1: movl $_TIF_NEED_RESCHED,%edi | |
5379 | + movl $_TIF_WORK_MASK,%edi | |
5380 | /* Use IRET because user could have changed frame. This | |
5381 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ | |
5382 | DISABLE_INTERRUPTS(CLBR_NONE) | |
5383 | @@ -335,14 +447,56 @@ badsys: | |
5384 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) | |
5385 | jmp ret_from_sys_call | |
5386 | ||
5387 | +#ifdef CONFIG_AUDITSYSCALL | |
5388 | + /* | |
5389 | + * Fast path for syscall audit without full syscall trace. | |
5390 | + * We just call audit_syscall_entry() directly, and then | |
5391 | + * jump back to the normal fast path. | |
5392 | + */ | |
5393 | +auditsys: | |
5394 | + movq %r10,%r9 /* 6th arg: 4th syscall arg */ | |
5395 | + movq %rdx,%r8 /* 5th arg: 3rd syscall arg */ | |
5396 | + movq %rsi,%rcx /* 4th arg: 2nd syscall arg */ | |
5397 | + movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ | |
5398 | + movq %rax,%rsi /* 2nd arg: syscall number */ | |
5399 | + movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ | |
5400 | + call audit_syscall_entry | |
5401 | + LOAD_ARGS 0 /* reload call-clobbered registers */ | |
5402 | + jmp system_call_fastpath | |
5403 | + | |
5404 | + /* | |
5405 | + * Return fast path for syscall audit. Call audit_syscall_exit() | |
5406 | + * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT | |
5407 | + * masked off. | |
5408 | + */ | |
5409 | +sysret_audit: | |
5410 | + movq %rax,%rsi /* second arg, syscall return value */ | |
5411 | + cmpq $0,%rax /* is it < 0? */ | |
5412 | + setl %al /* 1 if so, 0 if not */ | |
5413 | + movzbl %al,%edi /* zero-extend that into %edi */ | |
5414 | + inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | |
5415 | + call audit_syscall_exit | |
5416 | + movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | |
5417 | + jmp sysret_check | |
5418 | +#endif /* CONFIG_AUDITSYSCALL */ | |
5419 | + | |
5420 | /* Do syscall tracing */ | |
5421 | tracesys: | |
5422 | +#ifdef CONFIG_AUDITSYSCALL | |
5423 | + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) | |
5424 | + jz auditsys | |
5425 | +#endif | |
5426 | SAVE_REST | |
5427 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ | |
5428 | FIXUP_TOP_OF_STACK %rdi | |
5429 | movq %rsp,%rdi | |
5430 | call syscall_trace_enter | |
5431 | - LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ | |
5432 | + /* | |
5433 | + * Reload arg registers from stack in case ptrace changed them. | |
5434 | + * We don't reload %rax because syscall_trace_enter() returned | |
5435 | + * the value it wants us to use in the table lookup. | |
5436 | + */ | |
5437 | + LOAD_ARGS ARGOFFSET, 1 | |
5438 | RESTORE_REST | |
5439 | cmpq $__NR_syscall_max,%rax | |
5440 | ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ | |
5441 | @@ -356,6 +510,7 @@ tracesys: | |
5442 | * Has correct top of stack, but partial stack frame. | |
5443 | */ | |
5444 | .globl int_ret_from_sys_call | |
5445 | + .globl int_with_check | |
5446 | int_ret_from_sys_call: | |
5447 | DISABLE_INTERRUPTS(CLBR_NONE) | |
5448 | TRACE_IRQS_OFF | |
5449 | @@ -370,10 +525,10 @@ int_ret_from_sys_call: | |
5450 | int_with_check: | |
5451 | LOCKDEP_SYS_EXIT_IRQ | |
5452 | GET_THREAD_INFO(%rcx) | |
5453 | - movl threadinfo_flags(%rcx),%edx | |
5454 | + movl TI_flags(%rcx),%edx | |
5455 | andl %edi,%edx | |
5456 | jnz int_careful | |
5457 | - andl $~TS_COMPAT,threadinfo_status(%rcx) | |
5458 | + andl $~TS_COMPAT,TI_status(%rcx) | |
5459 | jmp retint_restore_args | |
5460 | ||
5461 | /* Either reschedule or signal or syscall exit tracking needed. */ | |
5462 | @@ -399,7 +554,7 @@ int_very_careful: | |
5463 | ENABLE_INTERRUPTS(CLBR_NONE) | |
5464 | SAVE_REST | |
5465 | /* Check for syscall exit trace */ | |
5466 | - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx | |
5467 | + testl $_TIF_WORK_SYSCALL_EXIT,%edx | |
5468 | jz int_signal | |
5469 | pushq %rdi | |
5470 | CFI_ADJUST_CFA_OFFSET 8 | |
5471 | @@ -407,7 +562,7 @@ int_very_careful: | |
5472 | call syscall_trace_leave | |
5473 | popq %rdi | |
5474 | CFI_ADJUST_CFA_OFFSET -8 | |
5475 | - andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi | |
5476 | + andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi | |
5477 | jmp int_restore_rest | |
5478 | ||
5479 | int_signal: | |
5480 | @@ -416,7 +571,7 @@ int_signal: | |
5481 | movq %rsp,%rdi # &ptregs -> arg1 | |
5482 | xorl %esi,%esi # oldset -> arg2 | |
5483 | call do_notify_resume | |
5484 | -1: movl $_TIF_NEED_RESCHED,%edi | |
5485 | +1: movl $_TIF_WORK_MASK,%edi | |
5486 | int_restore_rest: | |
5487 | RESTORE_REST | |
5488 | DISABLE_INTERRUPTS(CLBR_NONE) | |
5489 | @@ -443,7 +598,6 @@ END(\label) | |
5490 | PTREGSCALL stub_clone, sys_clone, %r8 | |
5491 | PTREGSCALL stub_fork, sys_fork, %rdi | |
5492 | PTREGSCALL stub_vfork, sys_vfork, %rdi | |
5493 | - PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx | |
5494 | PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx | |
5495 | PTREGSCALL stub_iopl, sys_iopl, %rsi | |
5496 | ||
5497 | @@ -517,10 +671,12 @@ END(stub_rt_sigreturn) | |
5498 | * | |
5499 | */ | |
5500 | ||
5501 | -retint_check: | |
5502 | +retint_with_reschedule: | |
5503 | CFI_DEFAULT_STACK adj=1 | |
5504 | + movl $_TIF_WORK_MASK,%edi | |
5505 | +retint_check: | |
5506 | LOCKDEP_SYS_EXIT_IRQ | |
5507 | - movl threadinfo_flags(%rcx),%edx | |
5508 | + movl TI_flags(%rcx),%edx | |
5509 | andl %edi,%edx | |
5510 | CFI_REMEMBER_STATE | |
5511 | jnz retint_careful | |
5512 | @@ -565,17 +721,16 @@ retint_signal: | |
5513 | RESTORE_REST | |
5514 | DISABLE_INTERRUPTS(CLBR_NONE) | |
5515 | TRACE_IRQS_OFF | |
5516 | - movl $_TIF_NEED_RESCHED,%edi | |
5517 | GET_THREAD_INFO(%rcx) | |
5518 | - jmp retint_check | |
5519 | + jmp retint_with_reschedule | |
5520 | ||
5521 | #ifdef CONFIG_PREEMPT | |
5522 | /* Returning to kernel space. Check if we need preemption */ | |
5523 | /* rcx: threadinfo. interrupts off. */ | |
5524 | ENTRY(retint_kernel) | |
5525 | - cmpl $0,threadinfo_preempt_count(%rcx) | |
5526 | + cmpl $0,TI_preempt_count(%rcx) | |
5527 | jnz retint_restore_args | |
5528 | - bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx) | |
5529 | + bt $TIF_NEED_RESCHED,TI_flags(%rcx) | |
5530 | jnc retint_restore_args | |
5531 | bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ | |
5532 | jnc retint_restore_args | |
5533 | @@ -630,6 +785,9 @@ END(invalidate_interrupt\num) | |
5534 | ENTRY(call_function_interrupt) | |
5535 | apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt | |
5536 | END(call_function_interrupt) | |
5537 | +ENTRY(call_function_single_interrupt) | |
5538 | + apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt | |
5539 | +END(call_function_single_interrupt) | |
5540 | ENTRY(irq_move_cleanup_interrupt) | |
5541 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt | |
5542 | END(irq_move_cleanup_interrupt) | |
5543 | @@ -639,6 +797,10 @@ ENTRY(apic_timer_interrupt) | |
5544 | apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt | |
5545 | END(apic_timer_interrupt) | |
5546 | ||
5547 | +ENTRY(uv_bau_message_intr1) | |
5548 | + apicinterrupt 220,uv_bau_message_interrupt | |
5549 | +END(uv_bau_message_intr1) | |
5550 | + | |
5551 | ENTRY(error_interrupt) | |
5552 | apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt | |
5553 | END(error_interrupt) | |
5554 | @@ -752,7 +914,7 @@ paranoid_restore\trace: | |
5555 | jmp irq_return | |
5556 | paranoid_userspace\trace: | |
5557 | GET_THREAD_INFO(%rcx) | |
5558 | - movl threadinfo_flags(%rcx),%ebx | |
5559 | + movl TI_flags(%rcx),%ebx | |
5560 | andl $_TIF_WORK_MASK,%ebx | |
5561 | jz paranoid_swapgs\trace | |
5562 | movq %rsp,%rdi /* &pt_regs */ | |
5563 | @@ -849,7 +1011,7 @@ error_exit: | |
5564 | testb $3,CS-ARGOFFSET(%rsp) | |
5565 | jz retint_kernel | |
5566 | LOCKDEP_SYS_EXIT_IRQ | |
5567 | - movl threadinfo_flags(%rcx),%edx | |
5568 | + movl TI_flags(%rcx),%edx | |
5569 | movl $_TIF_WORK_MASK,%edi | |
5570 | andl %edi,%edx | |
5571 | jnz retint_careful | |
5572 | @@ -871,11 +1033,11 @@ error_kernelspace: | |
5573 | iret run with kernel gs again, so don't set the user space flag. | |
5574 | B stepping K8s sometimes report an truncated RIP for IRET | |
5575 | exceptions returning to compat mode. Check for these here too. */ | |
5576 | - leaq irq_return(%rip),%rbp | |
5577 | - cmpq %rbp,RIP(%rsp) | |
5578 | + leaq irq_return(%rip),%rcx | |
5579 | + cmpq %rcx,RIP(%rsp) | |
5580 | je error_swapgs | |
5581 | - movl %ebp,%ebp /* zero extend */ | |
5582 | - cmpq %rbp,RIP(%rsp) | |
5583 | + movl %ecx,%ecx /* zero extend */ | |
5584 | + cmpq %rcx,RIP(%rsp) | |
5585 | je error_swapgs | |
5586 | cmpq $gs_change,RIP(%rsp) | |
5587 | je error_swapgs | |
5588 | @@ -1121,6 +1283,7 @@ END(device_not_available) | |
5589 | /* runs on exception stack */ | |
5590 | KPROBE_ENTRY(debug) | |
5591 | /* INTR_FRAME | |
5592 | + PARAVIRT_ADJUST_EXCEPTION_FRAME | |
5593 | pushq $0 | |
5594 | CFI_ADJUST_CFA_OFFSET 8 */ | |
5595 | zeroentry do_debug | |
5596 | @@ -1148,6 +1311,7 @@ END(do_nmi_callback) | |
5597 | ||
5598 | KPROBE_ENTRY(int3) | |
5599 | /* INTR_FRAME | |
5600 | + PARAVIRT_ADJUST_EXCEPTION_FRAME | |
5601 | pushq $0 | |
5602 | CFI_ADJUST_CFA_OFFSET 8 */ | |
5603 | zeroentry do_int3 | |
5604 | @@ -1171,14 +1335,11 @@ ENTRY(coprocessor_segment_overrun) | |
5605 | zeroentry do_coprocessor_segment_overrun | |
5606 | END(coprocessor_segment_overrun) | |
5607 | ||
5608 | -ENTRY(reserved) | |
5609 | - zeroentry do_reserved | |
5610 | -END(reserved) | |
5611 | - | |
5612 | #if 0 | |
5613 | /* runs on exception stack */ | |
5614 | ENTRY(double_fault) | |
5615 | XCPT_FRAME | |
5616 | + PARAVIRT_ADJUST_EXCEPTION_FRAME | |
5617 | paranoidentry do_double_fault | |
5618 | jmp paranoid_exit1 | |
5619 | CFI_ENDPROC | |
5620 | @@ -1196,6 +1357,7 @@ END(segment_not_present) | |
5621 | /* runs on exception stack */ | |
5622 | ENTRY(stack_segment) | |
5623 | /* XCPT_FRAME | |
5624 | + PARAVIRT_ADJUST_EXCEPTION_FRAME | |
5625 | paranoidentry do_stack_segment */ | |
5626 | errorentry do_stack_segment | |
5627 | /* jmp paranoid_exit1 | |
5628 | @@ -1222,6 +1384,7 @@ END(spurious_interrupt_bug) | |
5629 | /* runs on exception stack */ | |
5630 | ENTRY(machine_check) | |
5631 | INTR_FRAME | |
5632 | + PARAVIRT_ADJUST_EXCEPTION_FRAME | |
5633 | pushq $0 | |
5634 | CFI_ADJUST_CFA_OFFSET 8 | |
5635 | paranoidentry do_machine_check | |
82094b55 AF |
5636 | --- sle11-2009-10-16.orig/arch/x86/kernel/fixup.c 2009-03-16 16:33:40.000000000 +0100 |
5637 | +++ sle11-2009-10-16/arch/x86/kernel/fixup.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
5638 | @@ -33,6 +33,7 @@ |
5639 | #include <linux/kernel.h> | |
5640 | #include <linux/delay.h> | |
5641 | #include <linux/version.h> | |
5642 | +#include <asm/traps.h> | |
5643 | ||
5644 | #define DP(_f, _args...) printk(KERN_ALERT " " _f "\n" , ## _args ) | |
5645 | ||
82094b55 AF |
5646 | --- sle11-2009-10-16.orig/arch/x86/kernel/genapic_64-xen.c 2009-03-16 16:38:05.000000000 +0100 |
5647 | +++ sle11-2009-10-16/arch/x86/kernel/genapic_64-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
5648 | @@ -58,7 +58,7 @@ void __init setup_apic_routing(void) |
5649 | else | |
5650 | #endif | |
5651 | ||
5652 | - if (num_possible_cpus() <= 8) | |
5653 | + if (max_physical_apicid < 8) | |
5654 | genapic = &apic_flat; | |
5655 | else | |
5656 | genapic = &apic_physflat; | |
5657 | @@ -121,4 +121,5 @@ int is_uv_system(void) | |
5658 | { | |
5659 | return uv_system_type != UV_NONE; | |
5660 | } | |
5661 | +EXPORT_SYMBOL_GPL(is_uv_system); | |
5662 | #endif | |
82094b55 AF |
5663 | --- sle11-2009-10-16.orig/arch/x86/kernel/genapic_xen_64.c 2009-03-16 16:38:05.000000000 +0100 |
5664 | +++ sle11-2009-10-16/arch/x86/kernel/genapic_xen_64.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
5665 | @@ -43,7 +43,7 @@ void xen_send_IPI_shortcut(unsigned int |
5666 | __send_IPI_one(smp_processor_id(), vector); | |
5667 | break; | |
5668 | case APIC_DEST_ALLBUT: | |
5669 | - for (cpu = 0; cpu < NR_CPUS; ++cpu) { | |
5670 | + for_each_possible_cpu(cpu) { | |
5671 | if (cpu == smp_processor_id()) | |
5672 | continue; | |
5673 | if (cpu_isset(cpu, cpu_online_map)) { | |
5674 | @@ -52,7 +52,7 @@ void xen_send_IPI_shortcut(unsigned int | |
5675 | } | |
5676 | break; | |
5677 | case APIC_DEST_ALLINC: | |
5678 | - for (cpu = 0; cpu < NR_CPUS; ++cpu) { | |
5679 | + for_each_possible_cpu(cpu) { | |
5680 | if (cpu_isset(cpu, cpu_online_map)) { | |
5681 | __send_IPI_one(cpu, vector); | |
5682 | } | |
5683 | @@ -81,8 +81,6 @@ static cpumask_t xen_vector_allocation_d | |
5684 | */ | |
5685 | static void xen_init_apic_ldr(void) | |
5686 | { | |
5687 | - Dprintk("%s\n", __FUNCTION__); | |
5688 | - return; | |
5689 | } | |
5690 | ||
5691 | static void xen_send_IPI_allbutself(int vector) | |
5692 | @@ -92,14 +90,12 @@ static void xen_send_IPI_allbutself(int | |
5693 | * we get an APIC send error if we try to broadcast. | |
5694 | * thus we have to avoid sending IPIs in this case. | |
5695 | */ | |
5696 | - Dprintk("%s\n", __FUNCTION__); | |
5697 | if (num_online_cpus() > 1) | |
5698 | xen_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_LOGICAL); | |
5699 | } | |
5700 | ||
5701 | static void xen_send_IPI_all(int vector) | |
5702 | { | |
5703 | - Dprintk("%s\n", __FUNCTION__); | |
5704 | xen_send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); | |
5705 | } | |
5706 | ||
5707 | @@ -109,11 +105,10 @@ static void xen_send_IPI_mask(cpumask_t | |
5708 | unsigned int cpu; | |
5709 | unsigned long flags; | |
5710 | ||
5711 | - Dprintk("%s\n", __FUNCTION__); | |
5712 | local_irq_save(flags); | |
5713 | WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); | |
5714 | ||
5715 | - for (cpu = 0; cpu < NR_CPUS; ++cpu) { | |
5716 | + for_each_possible_cpu(cpu) { | |
5717 | if (cpu_isset(cpu, cpumask)) { | |
5718 | __send_IPI_one(cpu, vector); | |
5719 | } | |
5720 | @@ -125,14 +120,12 @@ static void xen_send_IPI_mask(cpumask_t | |
5721 | static int xen_apic_id_registered(void) | |
5722 | { | |
5723 | /* better be set */ | |
5724 | - Dprintk("%s\n", __FUNCTION__); | |
5725 | return physid_isset(smp_processor_id(), phys_cpu_present_map); | |
5726 | } | |
5727 | #endif | |
5728 | ||
5729 | static unsigned int xen_cpu_mask_to_apicid(cpumask_t cpumask) | |
5730 | { | |
5731 | - Dprintk("%s\n", __FUNCTION__); | |
5732 | return cpus_addr(cpumask)[0] & APIC_ALL_CPUS; | |
5733 | } | |
5734 | ||
5735 | @@ -140,7 +133,6 @@ static unsigned int phys_pkg_id(int inde | |
5736 | { | |
5737 | u32 ebx; | |
5738 | ||
5739 | - Dprintk("%s\n", __FUNCTION__); | |
5740 | ebx = cpuid_ebx(1); | |
5741 | return ((ebx >> 24) & 0xFF) >> index_msb; | |
5742 | } | |
5743 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
82094b55 | 5744 | +++ sle11-2009-10-16/arch/x86/kernel/head-xen.c 2009-06-04 10:21:39.000000000 +0200 |
2cb7cef9 BS |
5745 | @@ -0,0 +1,57 @@ |
5746 | +#include <linux/kernel.h> | |
5747 | +#include <linux/init.h> | |
5748 | + | |
5749 | +#include <asm/setup.h> | |
5750 | +#include <asm/bios_ebda.h> | |
5751 | + | |
5752 | +#define BIOS_LOWMEM_KILOBYTES 0x413 | |
5753 | + | |
5754 | +/* | |
5755 | + * The BIOS places the EBDA/XBDA at the top of conventional | |
5756 | + * memory, and usually decreases the reported amount of | |
5757 | + * conventional memory (int 0x12) too. This also contains a | |
5758 | + * workaround for Dell systems that neglect to reserve EBDA. | |
5759 | + * The same workaround also avoids a problem with the AMD768MPX | |
5760 | + * chipset: reserve a page before VGA to prevent PCI prefetch | |
5761 | + * into it (errata #56). Usually the page is reserved anyways, | |
5762 | + * unless you have no PS/2 mouse plugged in. | |
5763 | + */ | |
5764 | +void __init reserve_ebda_region(void) | |
5765 | +{ | |
5766 | +#ifndef CONFIG_XEN | |
5767 | + unsigned int lowmem, ebda_addr; | |
5768 | + | |
5769 | + /* To determine the position of the EBDA and the */ | |
5770 | + /* end of conventional memory, we need to look at */ | |
5771 | + /* the BIOS data area. In a paravirtual environment */ | |
5772 | + /* that area is absent. We'll just have to assume */ | |
5773 | + /* that the paravirt case can handle memory setup */ | |
5774 | + /* correctly, without our help. */ | |
5775 | + if (paravirt_enabled()) | |
5776 | + return; | |
5777 | + | |
5778 | + /* end of low (conventional) memory */ | |
5779 | + lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); | |
5780 | + lowmem <<= 10; | |
5781 | + | |
5782 | + /* start of EBDA area */ | |
5783 | + ebda_addr = get_bios_ebda(); | |
5784 | + | |
5785 | + /* Fixup: bios puts an EBDA in the top 64K segment */ | |
5786 | + /* of conventional memory, but does not adjust lowmem. */ | |
5787 | + if ((lowmem - ebda_addr) <= 0x10000) | |
5788 | + lowmem = ebda_addr; | |
5789 | + | |
5790 | + /* Fixup: bios does not report an EBDA at all. */ | |
5791 | + /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ | |
5792 | + if ((ebda_addr == 0) && (lowmem >= 0x9f000)) | |
5793 | + lowmem = 0x9f000; | |
5794 | + | |
5795 | + /* Paranoia: should never happen, but... */ | |
5796 | + if ((lowmem == 0) || (lowmem >= 0x100000)) | |
5797 | + lowmem = 0x9f000; | |
5798 | + | |
5799 | + /* reserve all memory between lowmem and the 1MB mark */ | |
5800 | + reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved"); | |
5801 | +#endif | |
5802 | +} | |
5803 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
82094b55 | 5804 | +++ sle11-2009-10-16/arch/x86/kernel/head32-xen.c 2009-06-04 10:21:39.000000000 +0200 |
2cb7cef9 BS |
5805 | @@ -0,0 +1,57 @@ |
5806 | +/* | |
5807 | + * linux/arch/i386/kernel/head32.c -- prepare to run common code | |
5808 | + * | |
5809 | + * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE | |
5810 | + * Copyright (C) 2007 Eric Biederman <ebiederm@xmission.com> | |
5811 | + */ | |
5812 | + | |
5813 | +#include <linux/init.h> | |
5814 | +#include <linux/start_kernel.h> | |
5815 | + | |
5816 | +#include <asm/setup.h> | |
5817 | +#include <asm/sections.h> | |
5818 | +#include <asm/e820.h> | |
5819 | +#include <asm/bios_ebda.h> | |
5820 | + | |
5821 | +void __init i386_start_kernel(void) | |
5822 | +{ | |
5823 | + reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); | |
5824 | + | |
5825 | +#ifndef CONFIG_XEN | |
5826 | +#ifdef CONFIG_BLK_DEV_INITRD | |
5827 | + /* Reserve INITRD */ | |
5828 | + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | |
5829 | + u64 ramdisk_image = boot_params.hdr.ramdisk_image; | |
5830 | + u64 ramdisk_size = boot_params.hdr.ramdisk_size; | |
5831 | + u64 ramdisk_end = ramdisk_image + ramdisk_size; | |
5832 | + reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); | |
5833 | + } | |
5834 | +#endif | |
5835 | + reserve_early(init_pg_tables_start, init_pg_tables_end, | |
5836 | + "INIT_PG_TABLE"); | |
5837 | +#else | |
5838 | + reserve_early(ALIGN(__pa_symbol(&_end), PAGE_SIZE), | |
5839 | + __pa(xen_start_info->pt_base) | |
5840 | + + (xen_start_info->nr_pt_frames << PAGE_SHIFT), | |
5841 | + "Xen provided"); | |
5842 | + | |
5843 | + { | |
5844 | + int max_cmdline; | |
5845 | + | |
5846 | + if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE) | |
5847 | + max_cmdline = COMMAND_LINE_SIZE; | |
5848 | + memcpy(boot_command_line, xen_start_info->cmd_line, max_cmdline); | |
5849 | + boot_command_line[max_cmdline-1] = '\0'; | |
5850 | + } | |
5851 | +#endif | |
5852 | + | |
5853 | + reserve_ebda_region(); | |
5854 | + | |
5855 | + /* | |
5856 | + * At this point everything still needed from the boot loader | |
5857 | + * or BIOS or kernel text should be early reserved or marked not | |
5858 | + * RAM in e820. All other memory is free game. | |
5859 | + */ | |
5860 | + | |
5861 | + start_kernel(); | |
5862 | +} | |
82094b55 AF |
5863 | --- sle11-2009-10-16.orig/arch/x86/kernel/head64-xen.c 2009-03-16 16:38:05.000000000 +0100 |
5864 | +++ sle11-2009-10-16/arch/x86/kernel/head64-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
5865 | @@ -32,7 +32,26 @@ |
5866 | #include <asm/e820.h> | |
5867 | #include <asm/bios_ebda.h> | |
5868 | ||
5869 | -unsigned long start_pfn; | |
5870 | +/* boot cpu pda */ | |
5871 | +static struct x8664_pda _boot_cpu_pda __read_mostly; | |
5872 | + | |
5873 | +#ifdef CONFIG_SMP | |
5874 | +/* | |
5875 | + * We install an empty cpu_pda pointer table to indicate to early users | |
5876 | + * (numa_set_node) that the cpu_pda pointer table for cpus other than | |
5877 | + * the boot cpu is not yet setup. | |
5878 | + */ | |
5879 | +static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; | |
5880 | +#else | |
5881 | +static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; | |
5882 | +#endif | |
5883 | + | |
5884 | +void __init x86_64_init_pda(void) | |
5885 | +{ | |
5886 | + _cpu_pda = __cpu_pda; | |
5887 | + cpu_pda(0) = &_boot_cpu_pda; | |
5888 | + pda_init(0); | |
5889 | +} | |
5890 | ||
5891 | #ifndef CONFIG_XEN | |
5892 | static void __init zap_identity_mappings(void) | |
5893 | @@ -77,83 +96,10 @@ EXPORT_SYMBOL(machine_to_phys_mapping); | |
5894 | unsigned int machine_to_phys_order; | |
5895 | EXPORT_SYMBOL(machine_to_phys_order); | |
5896 | ||
5897 | -#define BIOS_LOWMEM_KILOBYTES 0x413 | |
5898 | - | |
5899 | -/* | |
5900 | - * The BIOS places the EBDA/XBDA at the top of conventional | |
5901 | - * memory, and usually decreases the reported amount of | |
5902 | - * conventional memory (int 0x12) too. This also contains a | |
5903 | - * workaround for Dell systems that neglect to reserve EBDA. | |
5904 | - * The same workaround also avoids a problem with the AMD768MPX | |
5905 | - * chipset: reserve a page before VGA to prevent PCI prefetch | |
5906 | - * into it (errata #56). Usually the page is reserved anyways, | |
5907 | - * unless you have no PS/2 mouse plugged in. | |
5908 | - */ | |
5909 | -static void __init reserve_ebda_region(void) | |
5910 | -{ | |
5911 | -#ifndef CONFIG_XEN | |
5912 | - unsigned int lowmem, ebda_addr; | |
5913 | - | |
5914 | - /* To determine the position of the EBDA and the */ | |
5915 | - /* end of conventional memory, we need to look at */ | |
5916 | - /* the BIOS data area. In a paravirtual environment */ | |
5917 | - /* that area is absent. We'll just have to assume */ | |
5918 | - /* that the paravirt case can handle memory setup */ | |
5919 | - /* correctly, without our help. */ | |
5920 | - if (paravirt_enabled()) | |
5921 | - return; | |
5922 | - | |
5923 | - /* end of low (conventional) memory */ | |
5924 | - lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); | |
5925 | - lowmem <<= 10; | |
5926 | - | |
5927 | - /* start of EBDA area */ | |
5928 | - ebda_addr = get_bios_ebda(); | |
5929 | - | |
5930 | - /* Fixup: bios puts an EBDA in the top 64K segment */ | |
5931 | - /* of conventional memory, but does not adjust lowmem. */ | |
5932 | - if ((lowmem - ebda_addr) <= 0x10000) | |
5933 | - lowmem = ebda_addr; | |
5934 | - | |
5935 | - /* Fixup: bios does not report an EBDA at all. */ | |
5936 | - /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ | |
5937 | - if ((ebda_addr == 0) && (lowmem >= 0x9f000)) | |
5938 | - lowmem = 0x9f000; | |
5939 | - | |
5940 | - /* Paranoia: should never happen, but... */ | |
5941 | - if ((lowmem == 0) || (lowmem >= 0x100000)) | |
5942 | - lowmem = 0x9f000; | |
5943 | - | |
5944 | - /* reserve all memory between lowmem and the 1MB mark */ | |
5945 | - reserve_early(lowmem, 0x100000, "BIOS reserved"); | |
5946 | -#endif | |
5947 | -} | |
5948 | - | |
5949 | -static void __init reserve_setup_data(void) | |
5950 | -{ | |
5951 | -#ifndef CONFIG_XEN | |
5952 | - struct setup_data *data; | |
5953 | - unsigned long pa_data; | |
5954 | - char buf[32]; | |
5955 | - | |
5956 | - if (boot_params.hdr.version < 0x0209) | |
5957 | - return; | |
5958 | - pa_data = boot_params.hdr.setup_data; | |
5959 | - while (pa_data) { | |
5960 | - data = early_ioremap(pa_data, sizeof(*data)); | |
5961 | - sprintf(buf, "setup data %x", data->type); | |
5962 | - reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); | |
5963 | - pa_data = data->next; | |
5964 | - early_iounmap(data, sizeof(*data)); | |
5965 | - } | |
5966 | -#endif | |
5967 | -} | |
5968 | - | |
5969 | void __init x86_64_start_kernel(char * real_mode_data) | |
5970 | { | |
5971 | struct xen_machphys_mapping mapping; | |
5972 | unsigned long machine_to_phys_nr_ents; | |
5973 | - int i; | |
5974 | ||
5975 | /* | |
5976 | * Build-time sanity checks on the kernel image and module | |
5977 | @@ -167,6 +113,7 @@ void __init x86_64_start_kernel(char * r | |
5978 | BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); | |
5979 | BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == | |
5980 | (__START_KERNEL & PGDIR_MASK))); | |
5981 | + BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); | |
5982 | ||
5983 | xen_setup_features(); | |
5984 | ||
5985 | @@ -174,8 +121,6 @@ void __init x86_64_start_kernel(char * r | |
5986 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | |
5987 | phys_to_machine_mapping = | |
5988 | (unsigned long *)xen_start_info->mfn_list; | |
5989 | - start_pfn = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) + | |
5990 | - xen_start_info->nr_pt_frames; | |
5991 | ||
5992 | machine_to_phys_mapping = (unsigned long *)MACH2PHYS_VIRT_START; | |
5993 | machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; | |
5994 | @@ -208,19 +153,23 @@ void __init x86_64_start_kernel(char * r | |
5995 | ||
5996 | early_printk("Kernel alive\n"); | |
5997 | ||
5998 | - for (i = 0; i < NR_CPUS; i++) | |
5999 | - cpu_pda(i) = &boot_cpu_pda[i]; | |
6000 | + x86_64_init_pda(); | |
6001 | ||
6002 | - pda_init(0); | |
6003 | + early_printk("Kernel really alive\n"); | |
6004 | + | |
6005 | + x86_64_start_reservations(real_mode_data); | |
6006 | +} | |
6007 | + | |
6008 | +void __init x86_64_start_reservations(char *real_mode_data) | |
6009 | +{ | |
6010 | copy_bootdata(__va(real_mode_data)); | |
6011 | ||
6012 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); | |
6013 | ||
6014 | reserve_early(round_up(__pa_symbol(&_end), PAGE_SIZE), | |
6015 | - start_pfn << PAGE_SHIFT, "Xen provided"); | |
6016 | - | |
6017 | - reserve_ebda_region(); | |
6018 | - reserve_setup_data(); | |
6019 | + __pa(xen_start_info->pt_base) | |
6020 | + + (xen_start_info->nr_pt_frames << PAGE_SHIFT), | |
6021 | + "Xen provided"); | |
6022 | ||
6023 | /* | |
6024 | * At this point everything still needed from the boot loader | |
82094b55 AF |
6025 | --- sle11-2009-10-16.orig/arch/x86/kernel/head_64-xen.S 2009-02-16 16:17:21.000000000 +0100 |
6026 | +++ sle11-2009-10-16/arch/x86/kernel/head_64-xen.S 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
6027 | @@ -95,53 +95,6 @@ NEXT_PAGE(hypercall_page) |
6028 | ||
6029 | #undef NEXT_PAGE | |
6030 | ||
6031 | - .data | |
6032 | - | |
6033 | - .align 16 | |
6034 | - .globl cpu_gdt_descr | |
6035 | -cpu_gdt_descr: | |
6036 | - .word gdt_end-cpu_gdt_table-1 | |
6037 | -gdt: | |
6038 | - .quad cpu_gdt_table | |
6039 | -#ifdef CONFIG_SMP | |
6040 | - .rept NR_CPUS-1 | |
6041 | - .word 0 | |
6042 | - .quad 0 | |
6043 | - .endr | |
6044 | -#endif | |
6045 | - | |
6046 | -/* We need valid kernel segments for data and code in long mode too | |
6047 | - * IRET will check the segment types kkeil 2000/10/28 | |
6048 | - * Also sysret mandates a special GDT layout | |
6049 | - */ | |
6050 | - | |
6051 | - .section .data.page_aligned, "aw" | |
6052 | - .align PAGE_SIZE | |
6053 | - | |
6054 | -/* The TLS descriptors are currently at a different place compared to i386. | |
6055 | - Hopefully nobody expects them at a fixed place (Wine?) */ | |
6056 | - | |
6057 | -ENTRY(cpu_gdt_table) | |
6058 | - .quad 0x0000000000000000 /* NULL descriptor */ | |
6059 | - .quad 0x00cf9b000000ffff /* __KERNEL32_CS */ | |
6060 | - .quad 0x00af9b000000ffff /* __KERNEL_CS */ | |
6061 | - .quad 0x00cf93000000ffff /* __KERNEL_DS */ | |
6062 | - .quad 0x00cffb000000ffff /* __USER32_CS */ | |
6063 | - .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */ | |
6064 | - .quad 0x00affb000000ffff /* __USER_CS */ | |
6065 | - .quad 0x0 /* unused */ | |
6066 | - .quad 0,0 /* TSS */ | |
6067 | - .quad 0,0 /* LDT */ | |
6068 | - .quad 0,0,0 /* three TLS descriptors */ | |
6069 | - .quad 0x0000f40000000000 /* node/CPU stored in limit */ | |
6070 | -gdt_end: | |
6071 | - /* asm/segment.h:GDT_ENTRIES must match this */ | |
6072 | - /* This should be a multiple of the cache line size */ | |
6073 | - /* GDTs of other CPUs are now dynamically allocated */ | |
6074 | - | |
6075 | - /* zero the remaining page */ | |
6076 | - .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 | |
6077 | - | |
6078 | .section .bss.page_aligned, "aw", @nobits | |
6079 | .align PAGE_SIZE | |
6080 | ENTRY(empty_zero_page) | |
82094b55 AF |
6081 | --- sle11-2009-10-16.orig/arch/x86/kernel/io_apic_32-xen.c 2009-03-16 16:38:05.000000000 +0100 |
6082 | +++ sle11-2009-10-16/arch/x86/kernel/io_apic_32-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
6083 | @@ -25,6 +25,7 @@ |
6084 | #include <linux/init.h> | |
6085 | #include <linux/delay.h> | |
6086 | #include <linux/sched.h> | |
6087 | +#include <linux/bootmem.h> | |
6088 | #include <linux/mc146818rtc.h> | |
6089 | #include <linux/compiler.h> | |
6090 | #include <linux/acpi.h> | |
6091 | @@ -75,7 +76,7 @@ static struct { int pin, apic; } ioapic_ | |
6092 | static DEFINE_SPINLOCK(ioapic_lock); | |
6093 | static DEFINE_SPINLOCK(vector_lock); | |
6094 | ||
6095 | -int timer_over_8254 __initdata = 1; | |
6096 | +int timer_through_8259 __initdata; | |
6097 | ||
6098 | /* | |
6099 | * Is the SiS APIC rmw bug present ? | |
6100 | @@ -89,15 +90,21 @@ int sis_apic_bug = -1; | |
6101 | int nr_ioapic_registers[MAX_IO_APICS]; | |
6102 | ||
6103 | /* I/O APIC entries */ | |
6104 | -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; | |
6105 | +struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; | |
6106 | int nr_ioapics; | |
6107 | ||
6108 | /* MP IRQ source entries */ | |
6109 | -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; | |
6110 | +struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; | |
6111 | ||
6112 | /* # of MP IRQ source entries */ | |
6113 | int mp_irq_entries; | |
6114 | ||
6115 | +#if defined (CONFIG_MCA) || defined (CONFIG_EISA) | |
6116 | +int mp_bus_id_to_type[MAX_MP_BUSSES]; | |
6117 | +#endif | |
6118 | + | |
6119 | +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); | |
6120 | + | |
6121 | static int disable_timer_pin_1 __initdata; | |
6122 | ||
6123 | /* | |
6124 | @@ -128,7 +135,7 @@ struct io_apic { | |
6125 | static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) | |
6126 | { | |
6127 | return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) | |
6128 | - + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); | |
6129 | + + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); | |
6130 | } | |
6131 | #endif | |
6132 | ||
6133 | @@ -142,7 +149,7 @@ static inline unsigned int io_apic_read( | |
6134 | struct physdev_apic apic_op; | |
6135 | int ret; | |
6136 | ||
6137 | - apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr; | |
6138 | + apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr; | |
6139 | apic_op.reg = reg; | |
6140 | ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op); | |
6141 | if (ret) | |
6142 | @@ -160,7 +167,7 @@ static inline void io_apic_write(unsigne | |
6143 | #else | |
6144 | struct physdev_apic apic_op; | |
6145 | ||
6146 | - apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr; | |
6147 | + apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr; | |
6148 | apic_op.reg = reg; | |
6149 | apic_op.value = value; | |
6150 | WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op)); | |
6151 | @@ -288,7 +295,7 @@ static void __init replace_pin_at_irq(un | |
6152 | } | |
6153 | } | |
6154 | ||
6155 | -static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) | |
6156 | +static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) | |
6157 | { | |
6158 | struct irq_pin_list *entry = irq_2_pin + irq; | |
6159 | unsigned int pin, reg; | |
6160 | @@ -308,30 +315,32 @@ static void __modify_IO_APIC_irq (unsign | |
6161 | } | |
6162 | ||
6163 | /* mask = 1 */ | |
6164 | -static void __mask_IO_APIC_irq (unsigned int irq) | |
6165 | +static void __mask_IO_APIC_irq(unsigned int irq) | |
6166 | { | |
6167 | - __modify_IO_APIC_irq(irq, 0x00010000, 0); | |
6168 | + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0); | |
6169 | } | |
6170 | ||
6171 | /* mask = 0 */ | |
6172 | -static void __unmask_IO_APIC_irq (unsigned int irq) | |
6173 | +static void __unmask_IO_APIC_irq(unsigned int irq) | |
6174 | { | |
6175 | - __modify_IO_APIC_irq(irq, 0, 0x00010000); | |
6176 | + __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED); | |
6177 | } | |
6178 | ||
6179 | /* mask = 1, trigger = 0 */ | |
6180 | -static void __mask_and_edge_IO_APIC_irq (unsigned int irq) | |
6181 | +static void __mask_and_edge_IO_APIC_irq(unsigned int irq) | |
6182 | { | |
6183 | - __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); | |
6184 | + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, | |
6185 | + IO_APIC_REDIR_LEVEL_TRIGGER); | |
6186 | } | |
6187 | ||
6188 | /* mask = 0, trigger = 1 */ | |
6189 | -static void __unmask_and_level_IO_APIC_irq (unsigned int irq) | |
6190 | +static void __unmask_and_level_IO_APIC_irq(unsigned int irq) | |
6191 | { | |
6192 | - __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); | |
6193 | + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER, | |
6194 | + IO_APIC_REDIR_MASKED); | |
6195 | } | |
6196 | ||
6197 | -static void mask_IO_APIC_irq (unsigned int irq) | |
6198 | +static void mask_IO_APIC_irq(unsigned int irq) | |
6199 | { | |
6200 | unsigned long flags; | |
6201 | ||
6202 | @@ -340,7 +349,7 @@ static void mask_IO_APIC_irq (unsigned i | |
6203 | spin_unlock_irqrestore(&ioapic_lock, flags); | |
6204 | } | |
6205 | ||
6206 | -static void unmask_IO_APIC_irq (unsigned int irq) | |
6207 | +static void unmask_IO_APIC_irq(unsigned int irq) | |
6208 | { | |
6209 | unsigned long flags; | |
6210 | ||
6211 | @@ -352,7 +361,7 @@ static void unmask_IO_APIC_irq (unsigned | |
6212 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) | |
6213 | { | |
6214 | struct IO_APIC_route_entry entry; | |
6215 | - | |
6216 | + | |
6217 | /* Check delivery_mode to be sure we're not clearing an SMI pin */ | |
6218 | entry = ioapic_read_entry(apic, pin); | |
6219 | if (entry.delivery_mode == dest_SMI) | |
6220 | @@ -364,7 +373,7 @@ static void clear_IO_APIC_pin(unsigned i | |
6221 | ioapic_mask_entry(apic, pin); | |
6222 | } | |
6223 | ||
6224 | -static void clear_IO_APIC (void) | |
6225 | +static void clear_IO_APIC(void) | |
6226 | { | |
6227 | int apic, pin; | |
6228 | ||
6229 | @@ -381,7 +390,7 @@ static void set_ioapic_affinity_irq(unsi | |
6230 | struct irq_pin_list *entry = irq_2_pin + irq; | |
6231 | unsigned int apicid_value; | |
6232 | cpumask_t tmp; | |
6233 | - | |
6234 | + | |
6235 | cpus_and(tmp, cpumask, cpu_online_map); | |
6236 | if (cpus_empty(tmp)) | |
6237 | tmp = TARGET_CPUS; | |
6238 | @@ -410,7 +419,7 @@ static void set_ioapic_affinity_irq(unsi | |
6239 | # include <linux/kernel_stat.h> /* kstat */ | |
6240 | # include <linux/slab.h> /* kmalloc() */ | |
6241 | # include <linux/timer.h> | |
6242 | - | |
6243 | + | |
6244 | #define IRQBALANCE_CHECK_ARCH -999 | |
6245 | #define MAX_BALANCED_IRQ_INTERVAL (5*HZ) | |
6246 | #define MIN_BALANCED_IRQ_INTERVAL (HZ/2) | |
6247 | @@ -422,14 +431,14 @@ static int physical_balance __read_mostl | |
6248 | static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL; | |
6249 | ||
6250 | static struct irq_cpu_info { | |
6251 | - unsigned long * last_irq; | |
6252 | - unsigned long * irq_delta; | |
6253 | + unsigned long *last_irq; | |
6254 | + unsigned long *irq_delta; | |
6255 | unsigned long irq; | |
6256 | } irq_cpu_data[NR_CPUS]; | |
6257 | ||
6258 | #define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) | |
6259 | -#define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq]) | |
6260 | -#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq]) | |
6261 | +#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq]) | |
6262 | +#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq]) | |
6263 | ||
6264 | #define IDLE_ENOUGH(cpu,now) \ | |
6265 | (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1)) | |
6266 | @@ -468,8 +477,8 @@ inside: | |
6267 | if (cpu == -1) | |
6268 | cpu = NR_CPUS-1; | |
6269 | } | |
6270 | - } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) || | |
6271 | - (search_idle && !IDLE_ENOUGH(cpu,now))); | |
6272 | + } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) || | |
6273 | + (search_idle && !IDLE_ENOUGH(cpu, now))); | |
6274 | ||
6275 | return cpu; | |
6276 | } | |
6277 | @@ -479,15 +488,14 @@ static inline void balance_irq(int cpu, | |
6278 | unsigned long now = jiffies; | |
6279 | cpumask_t allowed_mask; | |
6280 | unsigned int new_cpu; | |
6281 | - | |
6282 | + | |
6283 | if (irqbalance_disabled) | |
6284 | - return; | |
6285 | + return; | |
6286 | ||
6287 | cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); | |
6288 | new_cpu = move(cpu, allowed_mask, now, 1); | |
6289 | - if (cpu != new_cpu) { | |
6290 | + if (cpu != new_cpu) | |
6291 | set_pending_irq(irq, cpumask_of_cpu(new_cpu)); | |
6292 | - } | |
6293 | } | |
6294 | ||
6295 | static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) | |
6296 | @@ -499,14 +507,14 @@ static inline void rotate_irqs_among_cpu | |
6297 | if (!irq_desc[j].action) | |
6298 | continue; | |
6299 | /* Is it a significant load ? */ | |
6300 | - if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) < | |
6301 | + if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) < | |
6302 | useful_load_threshold) | |
6303 | continue; | |
6304 | balance_irq(i, j); | |
6305 | } | |
6306 | } | |
6307 | balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, | |
6308 | - balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); | |
6309 | + balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); | |
6310 | return; | |
6311 | } | |
6312 | ||
6313 | @@ -535,22 +543,22 @@ static void do_irq_balance(void) | |
6314 | /* Is this an active IRQ or balancing disabled ? */ | |
6315 | if (!irq_desc[j].action || irq_balancing_disabled(j)) | |
6316 | continue; | |
6317 | - if ( package_index == i ) | |
6318 | - IRQ_DELTA(package_index,j) = 0; | |
6319 | + if (package_index == i) | |
6320 | + IRQ_DELTA(package_index, j) = 0; | |
6321 | /* Determine the total count per processor per IRQ */ | |
6322 | value_now = (unsigned long) kstat_cpu(i).irqs[j]; | |
6323 | ||
6324 | /* Determine the activity per processor per IRQ */ | |
6325 | - delta = value_now - LAST_CPU_IRQ(i,j); | |
6326 | + delta = value_now - LAST_CPU_IRQ(i, j); | |
6327 | ||
6328 | /* Update last_cpu_irq[][] for the next time */ | |
6329 | - LAST_CPU_IRQ(i,j) = value_now; | |
6330 | + LAST_CPU_IRQ(i, j) = value_now; | |
6331 | ||
6332 | /* Ignore IRQs whose rate is less than the clock */ | |
6333 | if (delta < useful_load_threshold) | |
6334 | continue; | |
6335 | /* update the load for the processor or package total */ | |
6336 | - IRQ_DELTA(package_index,j) += delta; | |
6337 | + IRQ_DELTA(package_index, j) += delta; | |
6338 | ||
6339 | /* Keep track of the higher numbered sibling as well */ | |
6340 | if (i != package_index) | |
6341 | @@ -576,7 +584,8 @@ static void do_irq_balance(void) | |
6342 | max_cpu_irq = ULONG_MAX; | |
6343 | ||
6344 | tryanothercpu: | |
6345 | - /* Look for heaviest loaded processor. | |
6346 | + /* | |
6347 | + * Look for heaviest loaded processor. | |
6348 | * We may come back to get the next heaviest loaded processor. | |
6349 | * Skip processors with trivial loads. | |
6350 | */ | |
6351 | @@ -585,7 +594,7 @@ tryanothercpu: | |
6352 | for_each_online_cpu(i) { | |
6353 | if (i != CPU_TO_PACKAGEINDEX(i)) | |
6354 | continue; | |
6355 | - if (max_cpu_irq <= CPU_IRQ(i)) | |
6356 | + if (max_cpu_irq <= CPU_IRQ(i)) | |
6357 | continue; | |
6358 | if (tmp_cpu_irq < CPU_IRQ(i)) { | |
6359 | tmp_cpu_irq = CPU_IRQ(i); | |
6360 | @@ -594,8 +603,9 @@ tryanothercpu: | |
6361 | } | |
6362 | ||
6363 | if (tmp_loaded == -1) { | |
6364 | - /* In the case of small number of heavy interrupt sources, | |
6365 | - * loading some of the cpus too much. We use Ingo's original | |
6366 | + /* | |
6367 | + * In the case of small number of heavy interrupt sources, | |
6368 | + * loading some of the cpus too much. We use Ingo's original | |
6369 | * approach to rotate them around. | |
6370 | */ | |
6371 | if (!first_attempt && imbalance >= useful_load_threshold) { | |
6372 | @@ -604,13 +614,14 @@ tryanothercpu: | |
6373 | } | |
6374 | goto not_worth_the_effort; | |
6375 | } | |
6376 | - | |
6377 | + | |
6378 | first_attempt = 0; /* heaviest search */ | |
6379 | max_cpu_irq = tmp_cpu_irq; /* load */ | |
6380 | max_loaded = tmp_loaded; /* processor */ | |
6381 | imbalance = (max_cpu_irq - min_cpu_irq) / 2; | |
6382 | - | |
6383 | - /* if imbalance is less than approx 10% of max load, then | |
6384 | + | |
6385 | + /* | |
6386 | + * if imbalance is less than approx 10% of max load, then | |
6387 | * observe diminishing returns action. - quit | |
6388 | */ | |
6389 | if (imbalance < (max_cpu_irq >> 3)) | |
6390 | @@ -626,26 +637,25 @@ tryanotherirq: | |
6391 | /* Is this an active IRQ? */ | |
6392 | if (!irq_desc[j].action) | |
6393 | continue; | |
6394 | - if (imbalance <= IRQ_DELTA(max_loaded,j)) | |
6395 | + if (imbalance <= IRQ_DELTA(max_loaded, j)) | |
6396 | continue; | |
6397 | /* Try to find the IRQ that is closest to the imbalance | |
6398 | * without going over. | |
6399 | */ | |
6400 | - if (move_this_load < IRQ_DELTA(max_loaded,j)) { | |
6401 | - move_this_load = IRQ_DELTA(max_loaded,j); | |
6402 | + if (move_this_load < IRQ_DELTA(max_loaded, j)) { | |
6403 | + move_this_load = IRQ_DELTA(max_loaded, j); | |
6404 | selected_irq = j; | |
6405 | } | |
6406 | } | |
6407 | - if (selected_irq == -1) { | |
6408 | + if (selected_irq == -1) | |
6409 | goto tryanothercpu; | |
6410 | - } | |
6411 | ||
6412 | imbalance = move_this_load; | |
6413 | - | |
6414 | + | |
6415 | /* For physical_balance case, we accumulated both load | |
6416 | * values in the one of the siblings cpu_irq[], | |
6417 | * to use the same code for physical and logical processors | |
6418 | - * as much as possible. | |
6419 | + * as much as possible. | |
6420 | * | |
6421 | * NOTE: the cpu_irq[] array holds the sum of the load for | |
6422 | * sibling A and sibling B in the slot for the lowest numbered | |
6423 | @@ -674,11 +684,11 @@ tryanotherirq: | |
6424 | /* mark for change destination */ | |
6425 | set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); | |
6426 | ||
6427 | - /* Since we made a change, come back sooner to | |
6428 | + /* Since we made a change, come back sooner to | |
6429 | * check for more variation. | |
6430 | */ | |
6431 | balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, | |
6432 | - balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); | |
6433 | + balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); | |
6434 | return; | |
6435 | } | |
6436 | goto tryanotherirq; | |
6437 | @@ -689,7 +699,7 @@ not_worth_the_effort: | |
6438 | * upward | |
6439 | */ | |
6440 | balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, | |
6441 | - balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); | |
6442 | + balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); | |
6443 | return; | |
6444 | } | |
6445 | ||
6446 | @@ -728,13 +738,13 @@ static int __init balanced_irq_init(void | |
6447 | cpumask_t tmp; | |
6448 | ||
6449 | cpus_shift_right(tmp, cpu_online_map, 2); | |
6450 | - c = &boot_cpu_data; | |
6451 | + c = &boot_cpu_data; | |
6452 | /* When not overwritten by the command line ask subarchitecture. */ | |
6453 | if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) | |
6454 | irqbalance_disabled = NO_BALANCE_IRQ; | |
6455 | if (irqbalance_disabled) | |
6456 | return 0; | |
6457 | - | |
6458 | + | |
6459 | /* disable irqbalance completely if there is only one processor online */ | |
6460 | if (num_online_cpus() < 2) { | |
6461 | irqbalance_disabled = 1; | |
6462 | @@ -748,16 +758,14 @@ static int __init balanced_irq_init(void | |
6463 | physical_balance = 1; | |
6464 | ||
6465 | for_each_online_cpu(i) { | |
6466 | - irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); | |
6467 | - irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); | |
6468 | + irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); | |
6469 | + irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); | |
6470 | if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { | |
6471 | printk(KERN_ERR "balanced_irq_init: out of memory"); | |
6472 | goto failed; | |
6473 | } | |
6474 | - memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS); | |
6475 | - memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS); | |
6476 | } | |
6477 | - | |
6478 | + | |
6479 | printk(KERN_INFO "Starting balanced_irq\n"); | |
6480 | if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) | |
6481 | return 0; | |
6482 | @@ -799,7 +807,7 @@ void send_IPI_self(int vector) | |
6483 | /* | |
6484 | * Send the IPI. The write to APIC_ICR fires this off. | |
6485 | */ | |
6486 | - apic_write_around(APIC_ICR, cfg); | |
6487 | + apic_write(APIC_ICR, cfg); | |
6488 | #endif | |
6489 | } | |
6490 | #endif /* !CONFIG_SMP */ | |
6491 | @@ -853,10 +861,10 @@ static int find_irq_entry(int apic, int | |
6492 | int i; | |
6493 | ||
6494 | for (i = 0; i < mp_irq_entries; i++) | |
6495 | - if (mp_irqs[i].mpc_irqtype == type && | |
6496 | - (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || | |
6497 | - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && | |
6498 | - mp_irqs[i].mpc_dstirq == pin) | |
6499 | + if (mp_irqs[i].mp_irqtype == type && | |
6500 | + (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || | |
6501 | + mp_irqs[i].mp_dstapic == MP_APIC_ALL) && | |
6502 | + mp_irqs[i].mp_dstirq == pin) | |
6503 | return i; | |
6504 | ||
6505 | return -1; | |
6506 | @@ -871,13 +879,13 @@ static int __init find_isa_irq_pin(int i | |
6507 | int i; | |
6508 | ||
6509 | for (i = 0; i < mp_irq_entries; i++) { | |
6510 | - int lbus = mp_irqs[i].mpc_srcbus; | |
6511 | + int lbus = mp_irqs[i].mp_srcbus; | |
6512 | ||
6513 | if (test_bit(lbus, mp_bus_not_pci) && | |
6514 | - (mp_irqs[i].mpc_irqtype == type) && | |
6515 | - (mp_irqs[i].mpc_srcbusirq == irq)) | |
6516 | + (mp_irqs[i].mp_irqtype == type) && | |
6517 | + (mp_irqs[i].mp_srcbusirq == irq)) | |
6518 | ||
6519 | - return mp_irqs[i].mpc_dstirq; | |
6520 | + return mp_irqs[i].mp_dstirq; | |
6521 | } | |
6522 | return -1; | |
6523 | } | |
6524 | @@ -887,17 +895,17 @@ static int __init find_isa_irq_apic(int | |
6525 | int i; | |
6526 | ||
6527 | for (i = 0; i < mp_irq_entries; i++) { | |
6528 | - int lbus = mp_irqs[i].mpc_srcbus; | |
6529 | + int lbus = mp_irqs[i].mp_srcbus; | |
6530 | ||
6531 | if (test_bit(lbus, mp_bus_not_pci) && | |
6532 | - (mp_irqs[i].mpc_irqtype == type) && | |
6533 | - (mp_irqs[i].mpc_srcbusirq == irq)) | |
6534 | + (mp_irqs[i].mp_irqtype == type) && | |
6535 | + (mp_irqs[i].mp_srcbusirq == irq)) | |
6536 | break; | |
6537 | } | |
6538 | if (i < mp_irq_entries) { | |
6539 | int apic; | |
6540 | - for(apic = 0; apic < nr_ioapics; apic++) { | |
6541 | - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) | |
6542 | + for (apic = 0; apic < nr_ioapics; apic++) { | |
6543 | + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) | |
6544 | return apic; | |
6545 | } | |
6546 | } | |
6547 | @@ -918,28 +926,28 @@ int IO_APIC_get_PCI_irq_vector(int bus, | |
6548 | ||
6549 | apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " | |
6550 | "slot:%d, pin:%d.\n", bus, slot, pin); | |
6551 | - if (mp_bus_id_to_pci_bus[bus] == -1) { | |
6552 | + if (test_bit(bus, mp_bus_not_pci)) { | |
6553 | printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); | |
6554 | return -1; | |
6555 | } | |
6556 | for (i = 0; i < mp_irq_entries; i++) { | |
6557 | - int lbus = mp_irqs[i].mpc_srcbus; | |
6558 | + int lbus = mp_irqs[i].mp_srcbus; | |
6559 | ||
6560 | for (apic = 0; apic < nr_ioapics; apic++) | |
6561 | - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || | |
6562 | - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) | |
6563 | + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || | |
6564 | + mp_irqs[i].mp_dstapic == MP_APIC_ALL) | |
6565 | break; | |
6566 | ||
6567 | if (!test_bit(lbus, mp_bus_not_pci) && | |
6568 | - !mp_irqs[i].mpc_irqtype && | |
6569 | + !mp_irqs[i].mp_irqtype && | |
6570 | (bus == lbus) && | |
6571 | - (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { | |
6572 | - int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); | |
6573 | + (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { | |
6574 | + int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq); | |
6575 | ||
6576 | if (!(apic || IO_APIC_IRQ(irq))) | |
6577 | continue; | |
6578 | ||
6579 | - if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) | |
6580 | + if (pin == (mp_irqs[i].mp_srcbusirq & 3)) | |
6581 | return irq; | |
6582 | /* | |
6583 | * Use the first all-but-pin matching entry as a | |
6584 | @@ -954,7 +962,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, | |
6585 | EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); | |
6586 | ||
6587 | /* | |
6588 | - * This function currently is only a helper for the i386 smp boot process where | |
6589 | + * This function currently is only a helper for the i386 smp boot process where | |
6590 | * we need to reprogram the ioredtbls to cater for the cpus which have come online | |
6591 | * so mask in all cases should simply be TARGET_CPUS | |
6592 | */ | |
6593 | @@ -1008,7 +1016,7 @@ static int EISA_ELCR(unsigned int irq) | |
6594 | * EISA conforming in the MP table, that means its trigger type must | |
6595 | * be read in from the ELCR */ | |
6596 | ||
6597 | -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) | |
6598 | +#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) | |
6599 | #define default_EISA_polarity(idx) default_ISA_polarity(idx) | |
6600 | ||
6601 | /* PCI interrupts are always polarity one level triggered, | |
6602 | @@ -1025,118 +1033,115 @@ static int EISA_ELCR(unsigned int irq) | |
6603 | ||
6604 | static int MPBIOS_polarity(int idx) | |
6605 | { | |
6606 | - int bus = mp_irqs[idx].mpc_srcbus; | |
6607 | + int bus = mp_irqs[idx].mp_srcbus; | |
6608 | int polarity; | |
6609 | ||
6610 | /* | |
6611 | * Determine IRQ line polarity (high active or low active): | |
6612 | */ | |
6613 | - switch (mp_irqs[idx].mpc_irqflag & 3) | |
6614 | + switch (mp_irqs[idx].mp_irqflag & 3) { | |
6615 | + case 0: /* conforms, ie. bus-type dependent polarity */ | |
6616 | { | |
6617 | - case 0: /* conforms, ie. bus-type dependent polarity */ | |
6618 | - { | |
6619 | - polarity = test_bit(bus, mp_bus_not_pci)? | |
6620 | - default_ISA_polarity(idx): | |
6621 | - default_PCI_polarity(idx); | |
6622 | - break; | |
6623 | - } | |
6624 | - case 1: /* high active */ | |
6625 | - { | |
6626 | - polarity = 0; | |
6627 | - break; | |
6628 | - } | |
6629 | - case 2: /* reserved */ | |
6630 | - { | |
6631 | - printk(KERN_WARNING "broken BIOS!!\n"); | |
6632 | - polarity = 1; | |
6633 | - break; | |
6634 | - } | |
6635 | - case 3: /* low active */ | |
6636 | - { | |
6637 | - polarity = 1; | |
6638 | - break; | |
6639 | - } | |
6640 | - default: /* invalid */ | |
6641 | - { | |
6642 | - printk(KERN_WARNING "broken BIOS!!\n"); | |
6643 | - polarity = 1; | |
6644 | - break; | |
6645 | - } | |
6646 | + polarity = test_bit(bus, mp_bus_not_pci)? | |
6647 | + default_ISA_polarity(idx): | |
6648 | + default_PCI_polarity(idx); | |
6649 | + break; | |
6650 | + } | |
6651 | + case 1: /* high active */ | |
6652 | + { | |
6653 | + polarity = 0; | |
6654 | + break; | |
6655 | + } | |
6656 | + case 2: /* reserved */ | |
6657 | + { | |
6658 | + printk(KERN_WARNING "broken BIOS!!\n"); | |
6659 | + polarity = 1; | |
6660 | + break; | |
6661 | + } | |
6662 | + case 3: /* low active */ | |
6663 | + { | |
6664 | + polarity = 1; | |
6665 | + break; | |
6666 | + } | |
6667 | + default: /* invalid */ | |
6668 | + { | |
6669 | + printk(KERN_WARNING "broken BIOS!!\n"); | |
6670 | + polarity = 1; | |
6671 | + break; | |
6672 | + } | |
6673 | } | |
6674 | return polarity; | |
6675 | } | |
6676 | ||
6677 | static int MPBIOS_trigger(int idx) | |
6678 | { | |
6679 | - int bus = mp_irqs[idx].mpc_srcbus; | |
6680 | + int bus = mp_irqs[idx].mp_srcbus; | |
6681 | int trigger; | |
6682 | ||
6683 | /* | |
6684 | * Determine IRQ trigger mode (edge or level sensitive): | |
6685 | */ | |
6686 | - switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) | |
6687 | + switch ((mp_irqs[idx].mp_irqflag>>2) & 3) { | |
6688 | + case 0: /* conforms, ie. bus-type dependent */ | |
6689 | { | |
6690 | - case 0: /* conforms, ie. bus-type dependent */ | |
6691 | - { | |
6692 | - trigger = test_bit(bus, mp_bus_not_pci)? | |
6693 | - default_ISA_trigger(idx): | |
6694 | - default_PCI_trigger(idx); | |
6695 | + trigger = test_bit(bus, mp_bus_not_pci)? | |
6696 | + default_ISA_trigger(idx): | |
6697 | + default_PCI_trigger(idx); | |
6698 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) | |
6699 | - switch (mp_bus_id_to_type[bus]) | |
6700 | - { | |
6701 | - case MP_BUS_ISA: /* ISA pin */ | |
6702 | - { | |
6703 | - /* set before the switch */ | |
6704 | - break; | |
6705 | - } | |
6706 | - case MP_BUS_EISA: /* EISA pin */ | |
6707 | - { | |
6708 | - trigger = default_EISA_trigger(idx); | |
6709 | - break; | |
6710 | - } | |
6711 | - case MP_BUS_PCI: /* PCI pin */ | |
6712 | - { | |
6713 | - /* set before the switch */ | |
6714 | - break; | |
6715 | - } | |
6716 | - case MP_BUS_MCA: /* MCA pin */ | |
6717 | - { | |
6718 | - trigger = default_MCA_trigger(idx); | |
6719 | - break; | |
6720 | - } | |
6721 | - default: | |
6722 | - { | |
6723 | - printk(KERN_WARNING "broken BIOS!!\n"); | |
6724 | - trigger = 1; | |
6725 | - break; | |
6726 | - } | |
6727 | - } | |
6728 | -#endif | |
6729 | + switch (mp_bus_id_to_type[bus]) { | |
6730 | + case MP_BUS_ISA: /* ISA pin */ | |
6731 | + { | |
6732 | + /* set before the switch */ | |
6733 | break; | |
6734 | } | |
6735 | - case 1: /* edge */ | |
6736 | + case MP_BUS_EISA: /* EISA pin */ | |
6737 | { | |
6738 | - trigger = 0; | |
6739 | + trigger = default_EISA_trigger(idx); | |
6740 | break; | |
6741 | } | |
6742 | - case 2: /* reserved */ | |
6743 | + case MP_BUS_PCI: /* PCI pin */ | |
6744 | { | |
6745 | - printk(KERN_WARNING "broken BIOS!!\n"); | |
6746 | - trigger = 1; | |
6747 | + /* set before the switch */ | |
6748 | break; | |
6749 | } | |
6750 | - case 3: /* level */ | |
6751 | + case MP_BUS_MCA: /* MCA pin */ | |
6752 | { | |
6753 | - trigger = 1; | |
6754 | + trigger = default_MCA_trigger(idx); | |
6755 | break; | |
6756 | } | |
6757 | - default: /* invalid */ | |
6758 | + default: | |
6759 | { | |
6760 | printk(KERN_WARNING "broken BIOS!!\n"); | |
6761 | - trigger = 0; | |
6762 | + trigger = 1; | |
6763 | break; | |
6764 | } | |
6765 | } | |
6766 | +#endif | |
6767 | + break; | |
6768 | + } | |
6769 | + case 1: /* edge */ | |
6770 | + { | |
6771 | + trigger = 0; | |
6772 | + break; | |
6773 | + } | |
6774 | + case 2: /* reserved */ | |
6775 | + { | |
6776 | + printk(KERN_WARNING "broken BIOS!!\n"); | |
6777 | + trigger = 1; | |
6778 | + break; | |
6779 | + } | |
6780 | + case 3: /* level */ | |
6781 | + { | |
6782 | + trigger = 1; | |
6783 | + break; | |
6784 | + } | |
6785 | + default: /* invalid */ | |
6786 | + { | |
6787 | + printk(KERN_WARNING "broken BIOS!!\n"); | |
6788 | + trigger = 0; | |
6789 | + break; | |
6790 | + } | |
6791 | + } | |
6792 | return trigger; | |
6793 | } | |
6794 | ||
6795 | @@ -1153,16 +1158,16 @@ static inline int irq_trigger(int idx) | |
6796 | static int pin_2_irq(int idx, int apic, int pin) | |
6797 | { | |
6798 | int irq, i; | |
6799 | - int bus = mp_irqs[idx].mpc_srcbus; | |
6800 | + int bus = mp_irqs[idx].mp_srcbus; | |
6801 | ||
6802 | /* | |
6803 | * Debugging check, we are in big trouble if this message pops up! | |
6804 | */ | |
6805 | - if (mp_irqs[idx].mpc_dstirq != pin) | |
6806 | + if (mp_irqs[idx].mp_dstirq != pin) | |
6807 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); | |
6808 | ||
6809 | if (test_bit(bus, mp_bus_not_pci)) | |
6810 | - irq = mp_irqs[idx].mpc_srcbusirq; | |
6811 | + irq = mp_irqs[idx].mp_srcbusirq; | |
6812 | else { | |
6813 | /* | |
6814 | * PCI IRQs are mapped in order | |
6815 | @@ -1204,8 +1209,8 @@ static inline int IO_APIC_irq_trigger(in | |
6816 | ||
6817 | for (apic = 0; apic < nr_ioapics; apic++) { | |
6818 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | |
6819 | - idx = find_irq_entry(apic,pin,mp_INT); | |
6820 | - if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) | |
6821 | + idx = find_irq_entry(apic, pin, mp_INT); | |
6822 | + if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) | |
6823 | return irq_trigger(idx); | |
6824 | } | |
6825 | } | |
6826 | @@ -1291,25 +1296,25 @@ static void __init setup_IO_APIC_irqs(vo | |
6827 | /* | |
6828 | * add it to the IO-APIC irq-routing table: | |
6829 | */ | |
6830 | - memset(&entry,0,sizeof(entry)); | |
6831 | + memset(&entry, 0, sizeof(entry)); | |
6832 | ||
6833 | entry.delivery_mode = INT_DELIVERY_MODE; | |
6834 | entry.dest_mode = INT_DEST_MODE; | |
6835 | entry.mask = 0; /* enable IRQ */ | |
6836 | - entry.dest.logical.logical_dest = | |
6837 | + entry.dest.logical.logical_dest = | |
6838 | cpu_mask_to_apicid(TARGET_CPUS); | |
6839 | ||
6840 | - idx = find_irq_entry(apic,pin,mp_INT); | |
6841 | + idx = find_irq_entry(apic, pin, mp_INT); | |
6842 | if (idx == -1) { | |
6843 | if (first_notcon) { | |
6844 | apic_printk(APIC_VERBOSE, KERN_DEBUG | |
6845 | " IO-APIC (apicid-pin) %d-%d", | |
6846 | - mp_ioapics[apic].mpc_apicid, | |
6847 | + mp_ioapics[apic].mp_apicid, | |
6848 | pin); | |
6849 | first_notcon = 0; | |
6850 | } else | |
6851 | apic_printk(APIC_VERBOSE, ", %d-%d", | |
6852 | - mp_ioapics[apic].mpc_apicid, pin); | |
6853 | + mp_ioapics[apic].mp_apicid, pin); | |
6854 | continue; | |
6855 | } | |
6856 | ||
6857 | @@ -1343,7 +1348,7 @@ static void __init setup_IO_APIC_irqs(vo | |
6858 | vector = assign_irq_vector(irq); | |
6859 | entry.vector = vector; | |
6860 | ioapic_register_intr(irq, vector, IOAPIC_AUTO); | |
6861 | - | |
6862 | + | |
6863 | if (!apic && (irq < 16)) | |
6864 | disable_8259A_irq(irq); | |
6865 | } | |
6866 | @@ -1355,27 +1360,23 @@ static void __init setup_IO_APIC_irqs(vo | |
6867 | apic_printk(APIC_VERBOSE, " not connected.\n"); | |
6868 | } | |
6869 | ||
6870 | +#ifndef CONFIG_XEN | |
6871 | /* | |
6872 | - * Set up the 8259A-master output pin: | |
6873 | + * Set up the timer pin, possibly with the 8259A-master behind. | |
6874 | */ | |
6875 | -#ifndef CONFIG_XEN | |
6876 | -static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) | |
6877 | +static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, | |
6878 | + int vector) | |
6879 | { | |
6880 | struct IO_APIC_route_entry entry; | |
6881 | ||
6882 | - memset(&entry,0,sizeof(entry)); | |
6883 | - | |
6884 | - disable_8259A_irq(0); | |
6885 | - | |
6886 | - /* mask LVT0 */ | |
6887 | - apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | |
6888 | + memset(&entry, 0, sizeof(entry)); | |
6889 | ||
6890 | /* | |
6891 | * We use logical delivery to get the timer IRQ | |
6892 | * to the first CPU. | |
6893 | */ | |
6894 | entry.dest_mode = INT_DEST_MODE; | |
6895 | - entry.mask = 0; /* unmask IRQ now */ | |
6896 | + entry.mask = 1; /* mask IRQ now */ | |
6897 | entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); | |
6898 | entry.delivery_mode = INT_DELIVERY_MODE; | |
6899 | entry.polarity = 0; | |
6900 | @@ -1384,17 +1385,14 @@ static void __init setup_ExtINT_IRQ0_pin | |
6901 | ||
6902 | /* | |
6903 | * The timer IRQ doesn't have to know that behind the | |
6904 | - * scene we have a 8259A-master in AEOI mode ... | |
6905 | + * scene we may have a 8259A-master in AEOI mode ... | |
6906 | */ | |
6907 | - irq_desc[0].chip = &ioapic_chip; | |
6908 | - set_irq_handler(0, handle_edge_irq); | |
6909 | + ioapic_register_intr(0, vector, IOAPIC_EDGE); | |
6910 | ||
6911 | /* | |
6912 | * Add it to the IO-APIC irq-routing table: | |
6913 | */ | |
6914 | ioapic_write_entry(apic, pin, entry); | |
6915 | - | |
6916 | - enable_8259A_irq(0); | |
6917 | } | |
6918 | ||
6919 | void __init print_IO_APIC(void) | |
6920 | @@ -1409,10 +1407,10 @@ void __init print_IO_APIC(void) | |
6921 | if (apic_verbosity == APIC_QUIET) | |
6922 | return; | |
6923 | ||
6924 | - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); | |
6925 | + printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); | |
6926 | for (i = 0; i < nr_ioapics; i++) | |
6927 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", | |
6928 | - mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); | |
6929 | + mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); | |
6930 | ||
6931 | /* | |
6932 | * We are a bit conservative about what we expect. We have to | |
6933 | @@ -1431,7 +1429,7 @@ void __init print_IO_APIC(void) | |
6934 | reg_03.raw = io_apic_read(apic, 3); | |
6935 | spin_unlock_irqrestore(&ioapic_lock, flags); | |
6936 | ||
6937 | - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); | |
6938 | + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); | |
6939 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); | |
6940 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); | |
6941 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); | |
6942 | @@ -1512,7 +1510,7 @@ void __init print_IO_APIC(void) | |
6943 | return; | |
6944 | } | |
6945 | ||
6946 | -static void print_APIC_bitfield (int base) | |
6947 | +static void print_APIC_bitfield(int base) | |
6948 | { | |
6949 | unsigned int v; | |
6950 | int i, j; | |
6951 | @@ -1533,7 +1531,7 @@ static void print_APIC_bitfield (int bas | |
6952 | } | |
6953 | } | |
6954 | ||
6955 | -void /*__init*/ print_local_APIC(void * dummy) | |
6956 | +void /*__init*/ print_local_APIC(void *dummy) | |
6957 | { | |
6958 | unsigned int v, ver, maxlvt; | |
6959 | ||
6960 | @@ -1542,6 +1540,7 @@ void /*__init*/ print_local_APIC(void * | |
6961 | ||
6962 | printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", | |
6963 | smp_processor_id(), hard_smp_processor_id()); | |
6964 | + v = apic_read(APIC_ID); | |
6965 | printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, | |
6966 | GET_APIC_ID(read_apic_id())); | |
6967 | v = apic_read(APIC_LVR); | |
6968 | @@ -1616,9 +1615,9 @@ void /*__init*/ print_local_APIC(void * | |
6969 | printk("\n"); | |
6970 | } | |
6971 | ||
6972 | -void print_all_local_APICs (void) | |
6973 | +void print_all_local_APICs(void) | |
6974 | { | |
6975 | - on_each_cpu(print_local_APIC, NULL, 1, 1); | |
6976 | + on_each_cpu(print_local_APIC, NULL, 1); | |
6977 | } | |
6978 | ||
6979 | void /*__init*/ print_PIC(void) | |
6980 | @@ -1639,11 +1638,11 @@ void /*__init*/ print_PIC(void) | |
6981 | v = inb(0xa0) << 8 | inb(0x20); | |
6982 | printk(KERN_DEBUG "... PIC IRR: %04x\n", v); | |
6983 | ||
6984 | - outb(0x0b,0xa0); | |
6985 | - outb(0x0b,0x20); | |
6986 | + outb(0x0b, 0xa0); | |
6987 | + outb(0x0b, 0x20); | |
6988 | v = inb(0xa0) << 8 | inb(0x20); | |
6989 | - outb(0x0a,0xa0); | |
6990 | - outb(0x0a,0x20); | |
6991 | + outb(0x0a, 0xa0); | |
6992 | + outb(0x0a, 0x20); | |
6993 | ||
6994 | spin_unlock_irqrestore(&i8259A_lock, flags); | |
6995 | ||
6996 | @@ -1652,6 +1651,8 @@ void /*__init*/ print_PIC(void) | |
6997 | v = inb(0x4d1) << 8 | inb(0x4d0); | |
6998 | printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); | |
6999 | } | |
7000 | +#else | |
7001 | +void __init print_IO_APIC(void) {} | |
7002 | #endif /* !CONFIG_XEN */ | |
7003 | ||
7004 | static void __init enable_IO_APIC(void) | |
7005 | @@ -1681,7 +1682,7 @@ static void __init enable_IO_APIC(void) | |
7006 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; | |
7007 | } | |
7008 | #ifndef CONFIG_XEN | |
7009 | - for(apic = 0; apic < nr_ioapics; apic++) { | |
7010 | + for (apic = 0; apic < nr_ioapics; apic++) { | |
7011 | int pin; | |
7012 | /* See if any of the pins is in ExtINT mode */ | |
7013 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | |
7014 | @@ -1774,7 +1775,7 @@ void disable_IO_APIC(void) | |
7015 | * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 | |
7016 | */ | |
7017 | ||
7018 | -#if !defined(CONFIG_XEN) && !defined(CONFIG_X86_NUMAQ) | |
7019 | +#ifndef CONFIG_XEN | |
7020 | static void __init setup_ioapic_ids_from_mpc(void) | |
7021 | { | |
7022 | union IO_APIC_reg_00 reg_00; | |
7023 | @@ -1784,6 +1785,11 @@ static void __init setup_ioapic_ids_from | |
7024 | unsigned char old_id; | |
7025 | unsigned long flags; | |
7026 | ||
7027 | +#ifdef CONFIG_X86_NUMAQ | |
7028 | + if (found_numaq) | |
7029 | + return; | |
7030 | +#endif | |
7031 | + | |
7032 | /* | |
7033 | * Don't check I/O APIC IDs for xAPIC systems. They have | |
7034 | * no meaning without the serial APIC bus. | |
7035 | @@ -1806,15 +1812,15 @@ static void __init setup_ioapic_ids_from | |
7036 | spin_lock_irqsave(&ioapic_lock, flags); | |
7037 | reg_00.raw = io_apic_read(apic, 0); | |
7038 | spin_unlock_irqrestore(&ioapic_lock, flags); | |
7039 | - | |
7040 | - old_id = mp_ioapics[apic].mpc_apicid; | |
7041 | ||
7042 | - if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { | |
7043 | + old_id = mp_ioapics[apic].mp_apicid; | |
7044 | + | |
7045 | + if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { | |
7046 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", | |
7047 | - apic, mp_ioapics[apic].mpc_apicid); | |
7048 | + apic, mp_ioapics[apic].mp_apicid); | |
7049 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", | |
7050 | reg_00.bits.ID); | |
7051 | - mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; | |
7052 | + mp_ioapics[apic].mp_apicid = reg_00.bits.ID; | |
7053 | } | |
7054 | ||
7055 | /* | |
7056 | @@ -1823,9 +1829,9 @@ static void __init setup_ioapic_ids_from | |
7057 | * 'stuck on smp_invalidate_needed IPI wait' messages. | |
7058 | */ | |
7059 | if (check_apicid_used(phys_id_present_map, | |
7060 | - mp_ioapics[apic].mpc_apicid)) { | |
7061 | + mp_ioapics[apic].mp_apicid)) { | |
7062 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", | |
7063 | - apic, mp_ioapics[apic].mpc_apicid); | |
7064 | + apic, mp_ioapics[apic].mp_apicid); | |
7065 | for (i = 0; i < get_physical_broadcast(); i++) | |
7066 | if (!physid_isset(i, phys_id_present_map)) | |
7067 | break; | |
7068 | @@ -1834,13 +1840,13 @@ static void __init setup_ioapic_ids_from | |
7069 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", | |
7070 | i); | |
7071 | physid_set(i, phys_id_present_map); | |
7072 | - mp_ioapics[apic].mpc_apicid = i; | |
7073 | + mp_ioapics[apic].mp_apicid = i; | |
7074 | } else { | |
7075 | physid_mask_t tmp; | |
7076 | - tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid); | |
7077 | + tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); | |
7078 | apic_printk(APIC_VERBOSE, "Setting %d in the " | |
7079 | "phys_id_present_map\n", | |
7080 | - mp_ioapics[apic].mpc_apicid); | |
7081 | + mp_ioapics[apic].mp_apicid); | |
7082 | physids_or(phys_id_present_map, phys_id_present_map, tmp); | |
7083 | } | |
7084 | ||
7085 | @@ -1849,21 +1855,21 @@ static void __init setup_ioapic_ids_from | |
7086 | * We need to adjust the IRQ routing table | |
7087 | * if the ID changed. | |
7088 | */ | |
7089 | - if (old_id != mp_ioapics[apic].mpc_apicid) | |
7090 | + if (old_id != mp_ioapics[apic].mp_apicid) | |
7091 | for (i = 0; i < mp_irq_entries; i++) | |
7092 | - if (mp_irqs[i].mpc_dstapic == old_id) | |
7093 | - mp_irqs[i].mpc_dstapic | |
7094 | - = mp_ioapics[apic].mpc_apicid; | |
7095 | + if (mp_irqs[i].mp_dstapic == old_id) | |
7096 | + mp_irqs[i].mp_dstapic | |
7097 | + = mp_ioapics[apic].mp_apicid; | |
7098 | ||
7099 | /* | |
7100 | * Read the right value from the MPC table and | |
7101 | * write it into the ID register. | |
7102 | - */ | |
7103 | + */ | |
7104 | apic_printk(APIC_VERBOSE, KERN_INFO | |
7105 | "...changing IO-APIC physical APIC ID to %d ...", | |
7106 | - mp_ioapics[apic].mpc_apicid); | |
7107 | + mp_ioapics[apic].mp_apicid); | |
7108 | ||
7109 | - reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; | |
7110 | + reg_00.bits.ID = mp_ioapics[apic].mp_apicid; | |
7111 | spin_lock_irqsave(&ioapic_lock, flags); | |
7112 | io_apic_write(apic, 0, reg_00.raw); | |
7113 | spin_unlock_irqrestore(&ioapic_lock, flags); | |
7114 | @@ -1874,17 +1880,13 @@ static void __init setup_ioapic_ids_from | |
7115 | spin_lock_irqsave(&ioapic_lock, flags); | |
7116 | reg_00.raw = io_apic_read(apic, 0); | |
7117 | spin_unlock_irqrestore(&ioapic_lock, flags); | |
7118 | - if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) | |
7119 | + if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) | |
7120 | printk("could not set ID!\n"); | |
7121 | else | |
7122 | apic_printk(APIC_VERBOSE, " ok.\n"); | |
7123 | } | |
7124 | } | |
7125 | -#else | |
7126 | -static void __init setup_ioapic_ids_from_mpc(void) { } | |
7127 | -#endif | |
7128 | ||
7129 | -#ifndef CONFIG_XEN | |
7130 | int no_timer_check __initdata; | |
7131 | ||
7132 | static int __init notimercheck(char *s) | |
7133 | @@ -2077,45 +2079,53 @@ static inline void init_IO_APIC_traps(vo | |
7134 | * The local APIC irq-chip implementation: | |
7135 | */ | |
7136 | ||
7137 | -static void ack_apic(unsigned int irq) | |
7138 | +static void ack_lapic_irq(unsigned int irq) | |
7139 | { | |
7140 | ack_APIC_irq(); | |
7141 | } | |
7142 | ||
7143 | -static void mask_lapic_irq (unsigned int irq) | |
7144 | +static void mask_lapic_irq(unsigned int irq) | |
7145 | { | |
7146 | unsigned long v; | |
7147 | ||
7148 | v = apic_read(APIC_LVT0); | |
7149 | - apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); | |
7150 | + apic_write(APIC_LVT0, v | APIC_LVT_MASKED); | |
7151 | } | |
7152 | ||
7153 | -static void unmask_lapic_irq (unsigned int irq) | |
7154 | +static void unmask_lapic_irq(unsigned int irq) | |
7155 | { | |
7156 | unsigned long v; | |
7157 | ||
7158 | v = apic_read(APIC_LVT0); | |
7159 | - apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); | |
7160 | + apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); | |
7161 | } | |
7162 | ||
7163 | static struct irq_chip lapic_chip __read_mostly = { | |
7164 | - .name = "local-APIC-edge", | |
7165 | + .name = "local-APIC", | |
7166 | .mask = mask_lapic_irq, | |
7167 | .unmask = unmask_lapic_irq, | |
7168 | - .eoi = ack_apic, | |
7169 | + .ack = ack_lapic_irq, | |
7170 | }; | |
7171 | ||
7172 | +static void lapic_register_intr(int irq, int vector) | |
7173 | +{ | |
7174 | + irq_desc[irq].status &= ~IRQ_LEVEL; | |
7175 | + set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, | |
7176 | + "edge"); | |
7177 | + set_intr_gate(vector, interrupt[irq]); | |
7178 | +} | |
7179 | + | |
7180 | static void __init setup_nmi(void) | |
7181 | { | |
7182 | /* | |
7183 | - * Dirty trick to enable the NMI watchdog ... | |
7184 | + * Dirty trick to enable the NMI watchdog ... | |
7185 | * We put the 8259A master into AEOI mode and | |
7186 | * unmask on all local APICs LVT0 as NMI. | |
7187 | * | |
7188 | * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') | |
7189 | * is from Maciej W. Rozycki - so we do not have to EOI from | |
7190 | * the NMI handler or the timer interrupt. | |
7191 | - */ | |
7192 | + */ | |
7193 | apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); | |
7194 | ||
7195 | enable_NMI_through_LVT0(); | |
7196 | @@ -2191,11 +2201,16 @@ static inline void __init unlock_ExtINT_ | |
7197 | static inline void __init check_timer(void) | |
7198 | { | |
7199 | int apic1, pin1, apic2, pin2; | |
7200 | + int no_pin1 = 0; | |
7201 | int vector; | |
7202 | + unsigned int ver; | |
7203 | unsigned long flags; | |
7204 | ||
7205 | local_irq_save(flags); | |
7206 | ||
7207 | + ver = apic_read(APIC_LVR); | |
7208 | + ver = GET_APIC_VERSION(ver); | |
7209 | + | |
7210 | /* | |
7211 | * get/set the timer IRQ vector: | |
7212 | */ | |
7213 | @@ -2204,34 +2219,54 @@ static inline void __init check_timer(vo | |
7214 | set_intr_gate(vector, interrupt[0]); | |
7215 | ||
7216 | /* | |
7217 | - * Subtle, code in do_timer_interrupt() expects an AEOI | |
7218 | - * mode for the 8259A whenever interrupts are routed | |
7219 | - * through I/O APICs. Also IRQ0 has to be enabled in | |
7220 | - * the 8259A which implies the virtual wire has to be | |
7221 | - * disabled in the local APIC. | |
7222 | + * As IRQ0 is to be enabled in the 8259A, the virtual | |
7223 | + * wire has to be disabled in the local APIC. Also | |
7224 | + * timer interrupts need to be acknowledged manually in | |
7225 | + * the 8259A for the i82489DX when using the NMI | |
7226 | + * watchdog as that APIC treats NMIs as level-triggered. | |
7227 | + * The AEOI mode will finish them in the 8259A | |
7228 | + * automatically. | |
7229 | */ | |
7230 | - apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | |
7231 | + apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | |
7232 | init_8259A(1); | |
7233 | - timer_ack = 1; | |
7234 | - if (timer_over_8254 > 0) | |
7235 | - enable_8259A_irq(0); | |
7236 | + timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); | |
7237 | ||
7238 | pin1 = find_isa_irq_pin(0, mp_INT); | |
7239 | apic1 = find_isa_irq_apic(0, mp_INT); | |
7240 | pin2 = ioapic_i8259.pin; | |
7241 | apic2 = ioapic_i8259.apic; | |
7242 | ||
7243 | - printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", | |
7244 | - vector, apic1, pin1, apic2, pin2); | |
7245 | + apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " | |
7246 | + "apic1=%d pin1=%d apic2=%d pin2=%d\n", | |
7247 | + vector, apic1, pin1, apic2, pin2); | |
7248 | + | |
7249 | + /* | |
7250 | + * Some BIOS writers are clueless and report the ExtINTA | |
7251 | + * I/O APIC input from the cascaded 8259A as the timer | |
7252 | + * interrupt input. So just in case, if only one pin | |
7253 | + * was found above, try it both directly and through the | |
7254 | + * 8259A. | |
7255 | + */ | |
7256 | + if (pin1 == -1) { | |
7257 | + pin1 = pin2; | |
7258 | + apic1 = apic2; | |
7259 | + no_pin1 = 1; | |
7260 | + } else if (pin2 == -1) { | |
7261 | + pin2 = pin1; | |
7262 | + apic2 = apic1; | |
7263 | + } | |
7264 | ||
7265 | if (pin1 != -1) { | |
7266 | /* | |
7267 | * Ok, does IRQ0 through the IOAPIC work? | |
7268 | */ | |
7269 | + if (no_pin1) { | |
7270 | + add_pin_to_irq(0, apic1, pin1); | |
7271 | + setup_timer_IRQ0_pin(apic1, pin1, vector); | |
7272 | + } | |
7273 | unmask_IO_APIC_irq(0); | |
7274 | if (timer_irq_works()) { | |
7275 | if (nmi_watchdog == NMI_IO_APIC) { | |
7276 | - disable_8259A_irq(0); | |
7277 | setup_nmi(); | |
7278 | enable_8259A_irq(0); | |
7279 | } | |
7280 | @@ -2240,71 +2275,77 @@ static inline void __init check_timer(vo | |
7281 | goto out; | |
7282 | } | |
7283 | clear_IO_APIC_pin(apic1, pin1); | |
7284 | - printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to " | |
7285 | - "IO-APIC\n"); | |
7286 | - } | |
7287 | - | |
7288 | - printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); | |
7289 | - if (pin2 != -1) { | |
7290 | - printk("\n..... (found pin %d) ...", pin2); | |
7291 | + if (!no_pin1) | |
7292 | + apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " | |
7293 | + "8254 timer not connected to IO-APIC\n"); | |
7294 | + | |
7295 | + apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " | |
7296 | + "(IRQ0) through the 8259A ...\n"); | |
7297 | + apic_printk(APIC_QUIET, KERN_INFO | |
7298 | + "..... (found apic %d pin %d) ...\n", apic2, pin2); | |
7299 | /* | |
7300 | * legacy devices should be connected to IO APIC #0 | |
7301 | */ | |
7302 | - setup_ExtINT_IRQ0_pin(apic2, pin2, vector); | |
7303 | + replace_pin_at_irq(0, apic1, pin1, apic2, pin2); | |
7304 | + setup_timer_IRQ0_pin(apic2, pin2, vector); | |
7305 | + unmask_IO_APIC_irq(0); | |
7306 | + enable_8259A_irq(0); | |
7307 | if (timer_irq_works()) { | |
7308 | - printk("works.\n"); | |
7309 | - if (pin1 != -1) | |
7310 | - replace_pin_at_irq(0, apic1, pin1, apic2, pin2); | |
7311 | - else | |
7312 | - add_pin_to_irq(0, apic2, pin2); | |
7313 | + apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); | |
7314 | + timer_through_8259 = 1; | |
7315 | if (nmi_watchdog == NMI_IO_APIC) { | |
7316 | + disable_8259A_irq(0); | |
7317 | setup_nmi(); | |
7318 | + enable_8259A_irq(0); | |
7319 | } | |
7320 | goto out; | |
7321 | } | |
7322 | /* | |
7323 | * Cleanup, just in case ... | |
7324 | */ | |
7325 | + disable_8259A_irq(0); | |
7326 | clear_IO_APIC_pin(apic2, pin2); | |
7327 | + apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); | |
7328 | } | |
7329 | - printk(" failed.\n"); | |
7330 | ||
7331 | if (nmi_watchdog == NMI_IO_APIC) { | |
7332 | - printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); | |
7333 | - nmi_watchdog = 0; | |
7334 | + apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " | |
7335 | + "through the IO-APIC - disabling NMI Watchdog!\n"); | |
7336 | + nmi_watchdog = NMI_NONE; | |
7337 | } | |
7338 | + timer_ack = 0; | |
7339 | ||
7340 | - printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); | |
7341 | + apic_printk(APIC_QUIET, KERN_INFO | |
7342 | + "...trying to set up timer as Virtual Wire IRQ...\n"); | |
7343 | ||
7344 | - disable_8259A_irq(0); | |
7345 | - set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, | |
7346 | - "fasteoi"); | |
7347 | - apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ | |
7348 | + lapic_register_intr(0, vector); | |
7349 | + apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ | |
7350 | enable_8259A_irq(0); | |
7351 | ||
7352 | if (timer_irq_works()) { | |
7353 | - printk(" works.\n"); | |
7354 | + apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); | |
7355 | goto out; | |
7356 | } | |
7357 | - apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); | |
7358 | - printk(" failed.\n"); | |
7359 | + disable_8259A_irq(0); | |
7360 | + apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); | |
7361 | + apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); | |
7362 | ||
7363 | - printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); | |
7364 | + apic_printk(APIC_QUIET, KERN_INFO | |
7365 | + "...trying to set up timer as ExtINT IRQ...\n"); | |
7366 | ||
7367 | - timer_ack = 0; | |
7368 | init_8259A(0); | |
7369 | make_8259A_irq(0); | |
7370 | - apic_write_around(APIC_LVT0, APIC_DM_EXTINT); | |
7371 | + apic_write(APIC_LVT0, APIC_DM_EXTINT); | |
7372 | ||
7373 | unlock_ExtINT_logic(); | |
7374 | ||
7375 | if (timer_irq_works()) { | |
7376 | - printk(" works.\n"); | |
7377 | + apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); | |
7378 | goto out; | |
7379 | } | |
7380 | - printk(" failed :(.\n"); | |
7381 | + apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); | |
7382 | panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " | |
7383 | - "report. Then try booting with the 'noapic' option"); | |
7384 | + "report. Then try booting with the 'noapic' option.\n"); | |
7385 | out: | |
7386 | local_irq_restore(flags); | |
7387 | } | |
7388 | @@ -2314,11 +2355,21 @@ int timer_uses_ioapic_pin_0 = 0; | |
7389 | #endif | |
7390 | ||
7391 | /* | |
7392 | - * | |
7393 | - * IRQ's that are handled by the PIC in the MPS IOAPIC case. | |
7394 | - * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. | |
7395 | - * Linux doesn't really care, as it's not actually used | |
7396 | - * for any interrupt handling anyway. | |
7397 | + * Traditionally ISA IRQ2 is the cascade IRQ, and is not available | |
7398 | + * to devices. However there may be an I/O APIC pin available for | |
7399 | + * this interrupt regardless. The pin may be left unconnected, but | |
7400 | + * typically it will be reused as an ExtINT cascade interrupt for | |
7401 | + * the master 8259A. In the MPS case such a pin will normally be | |
7402 | + * reported as an ExtINT interrupt in the MP table. With ACPI | |
7403 | + * there is no provision for ExtINT interrupts, and in the absence | |
7404 | + * of an override it would be treated as an ordinary ISA I/O APIC | |
7405 | + * interrupt, that is edge-triggered and unmasked by default. We | |
7406 | + * used to do this, but it caused problems on some systems because | |
7407 | + * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using | |
7408 | + * the same ExtINT cascade interrupt to drive the local APIC of the | |
7409 | + * bootstrap processor. Therefore we refrain from routing IRQ2 to | |
7410 | + * the I/O APIC in all cases now. No actual device should request | |
7411 | + * it anyway. --macro | |
7412 | */ | |
7413 | #define PIC_IRQS (1 << PIC_CASCADE_IR) | |
7414 | ||
7415 | @@ -2328,25 +2379,22 @@ void __init setup_IO_APIC(void) | |
7416 | int i; | |
7417 | ||
7418 | /* Reserve all the system vectors. */ | |
7419 | - for (i = FIRST_SYSTEM_VECTOR; i < NR_VECTORS; i++) | |
7420 | + for (i = first_system_vector; i < NR_VECTORS; i++) | |
7421 | set_bit(i, used_vectors); | |
7422 | #endif | |
7423 | ||
7424 | enable_IO_APIC(); | |
7425 | ||
7426 | - if (acpi_ioapic) | |
7427 | - io_apic_irqs = ~0; /* all IRQs go through IOAPIC */ | |
7428 | - else | |
7429 | - io_apic_irqs = ~PIC_IRQS; | |
7430 | + io_apic_irqs = ~PIC_IRQS; | |
7431 | ||
7432 | printk("ENABLING IO-APIC IRQs\n"); | |
7433 | ||
7434 | +#ifndef CONFIG_XEN | |
7435 | /* | |
7436 | * Set up IO-APIC IRQ routing. | |
7437 | */ | |
7438 | if (!acpi_ioapic) | |
7439 | setup_ioapic_ids_from_mpc(); | |
7440 | -#ifndef CONFIG_XEN | |
7441 | sync_Arb_IDs(); | |
7442 | #endif | |
7443 | setup_IO_APIC_irqs(); | |
7444 | @@ -2356,28 +2404,14 @@ void __init setup_IO_APIC(void) | |
7445 | print_IO_APIC(); | |
7446 | } | |
7447 | ||
7448 | -static int __init setup_disable_8254_timer(char *s) | |
7449 | -{ | |
7450 | - timer_over_8254 = -1; | |
7451 | - return 1; | |
7452 | -} | |
7453 | -static int __init setup_enable_8254_timer(char *s) | |
7454 | -{ | |
7455 | - timer_over_8254 = 2; | |
7456 | - return 1; | |
7457 | -} | |
7458 | - | |
7459 | -__setup("disable_8254_timer", setup_disable_8254_timer); | |
7460 | -__setup("enable_8254_timer", setup_enable_8254_timer); | |
7461 | - | |
7462 | /* | |
7463 | * Called after all the initialization is done. If we didnt find any | |
7464 | * APIC bugs then we can allow the modify fast path | |
7465 | */ | |
7466 | - | |
7467 | + | |
7468 | static int __init io_apic_bug_finalize(void) | |
7469 | { | |
7470 | - if(sis_apic_bug == -1) | |
7471 | + if (sis_apic_bug == -1) | |
7472 | sis_apic_bug = 0; | |
7473 | if (is_initial_xendomain()) { | |
7474 | struct xen_platform_op op = { .cmd = XENPF_platform_quirk }; | |
7475 | @@ -2396,17 +2430,17 @@ struct sysfs_ioapic_data { | |
7476 | struct sys_device dev; | |
7477 | struct IO_APIC_route_entry entry[0]; | |
7478 | }; | |
7479 | -static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; | |
7480 | +static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS]; | |
7481 | ||
7482 | static int ioapic_suspend(struct sys_device *dev, pm_message_t state) | |
7483 | { | |
7484 | struct IO_APIC_route_entry *entry; | |
7485 | struct sysfs_ioapic_data *data; | |
7486 | int i; | |
7487 | - | |
7488 | + | |
7489 | data = container_of(dev, struct sysfs_ioapic_data, dev); | |
7490 | entry = data->entry; | |
7491 | - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) | |
7492 | + for (i = 0; i < nr_ioapic_registers[dev->id]; i++) | |
7493 | entry[i] = ioapic_read_entry(dev->id, i); | |
7494 | ||
7495 | return 0; | |
7496 | @@ -2419,18 +2453,18 @@ static int ioapic_resume(struct sys_devi | |
7497 | unsigned long flags; | |
7498 | union IO_APIC_reg_00 reg_00; | |
7499 | int i; | |
7500 | - | |
7501 | + | |
7502 | data = container_of(dev, struct sysfs_ioapic_data, dev); | |
7503 | entry = data->entry; | |
7504 | ||
7505 | spin_lock_irqsave(&ioapic_lock, flags); | |
7506 | reg_00.raw = io_apic_read(dev->id, 0); | |
7507 | - if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { | |
7508 | - reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; | |
7509 | + if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { | |
7510 | + reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; | |
7511 | io_apic_write(dev->id, 0, reg_00.raw); | |
7512 | } | |
7513 | spin_unlock_irqrestore(&ioapic_lock, flags); | |
7514 | - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) | |
7515 | + for (i = 0; i < nr_ioapic_registers[dev->id]; i++) | |
7516 | ioapic_write_entry(dev->id, i, entry[i]); | |
7517 | ||
7518 | return 0; | |
7519 | @@ -2444,24 +2478,23 @@ static struct sysdev_class ioapic_sysdev | |
7520 | ||
7521 | static int __init ioapic_init_sysfs(void) | |
7522 | { | |
7523 | - struct sys_device * dev; | |
7524 | + struct sys_device *dev; | |
7525 | int i, size, error = 0; | |
7526 | ||
7527 | error = sysdev_class_register(&ioapic_sysdev_class); | |
7528 | if (error) | |
7529 | return error; | |
7530 | ||
7531 | - for (i = 0; i < nr_ioapics; i++ ) { | |
7532 | - size = sizeof(struct sys_device) + nr_ioapic_registers[i] | |
7533 | + for (i = 0; i < nr_ioapics; i++) { | |
7534 | + size = sizeof(struct sys_device) + nr_ioapic_registers[i] | |
7535 | * sizeof(struct IO_APIC_route_entry); | |
7536 | - mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); | |
7537 | + mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); | |
7538 | if (!mp_ioapic_data[i]) { | |
7539 | printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); | |
7540 | continue; | |
7541 | } | |
7542 | - memset(mp_ioapic_data[i], 0, size); | |
7543 | dev = &mp_ioapic_data[i]->dev; | |
7544 | - dev->id = i; | |
7545 | + dev->id = i; | |
7546 | dev->cls = &ioapic_sysdev_class; | |
7547 | error = sysdev_register(dev); | |
7548 | if (error) { | |
7549 | @@ -2538,7 +2571,7 @@ static int msi_compose_msg(struct pci_de | |
7550 | msg->address_lo = | |
7551 | MSI_ADDR_BASE_LO | | |
7552 | ((INT_DEST_MODE == 0) ? | |
7553 | - MSI_ADDR_DEST_MODE_PHYSICAL: | |
7554 | +MSI_ADDR_DEST_MODE_PHYSICAL: | |
7555 | MSI_ADDR_DEST_MODE_LOGICAL) | | |
7556 | ((INT_DELIVERY_MODE != dest_LowestPrio) ? | |
7557 | MSI_ADDR_REDIRECTION_CPU: | |
7558 | @@ -2549,7 +2582,7 @@ static int msi_compose_msg(struct pci_de | |
7559 | MSI_DATA_TRIGGER_EDGE | | |
7560 | MSI_DATA_LEVEL_ASSERT | | |
7561 | ((INT_DELIVERY_MODE != dest_LowestPrio) ? | |
7562 | - MSI_DATA_DELIVERY_FIXED: | |
7563 | +MSI_DATA_DELIVERY_FIXED: | |
7564 | MSI_DATA_DELIVERY_LOWPRI) | | |
7565 | MSI_DATA_VECTOR(vector); | |
7566 | } | |
7567 | @@ -2720,12 +2753,12 @@ int arch_setup_ht_irq(unsigned int irq, | |
7568 | #endif /* CONFIG_HT_IRQ */ | |
7569 | ||
7570 | /* -------------------------------------------------------------------------- | |
7571 | - ACPI-based IOAPIC Configuration | |
7572 | + ACPI-based IOAPIC Configuration | |
7573 | -------------------------------------------------------------------------- */ | |
7574 | ||
7575 | #ifdef CONFIG_ACPI | |
7576 | ||
7577 | -int __init io_apic_get_unique_id (int ioapic, int apic_id) | |
7578 | +int __init io_apic_get_unique_id(int ioapic, int apic_id) | |
7579 | { | |
7580 | #ifndef CONFIG_XEN | |
7581 | union IO_APIC_reg_00 reg_00; | |
7582 | @@ -2735,10 +2768,10 @@ int __init io_apic_get_unique_id (int io | |
7583 | int i = 0; | |
7584 | ||
7585 | /* | |
7586 | - * The P4 platform supports up to 256 APIC IDs on two separate APIC | |
7587 | - * buses (one for LAPICs, one for IOAPICs), where predecessors only | |
7588 | + * The P4 platform supports up to 256 APIC IDs on two separate APIC | |
7589 | + * buses (one for LAPICs, one for IOAPICs), where predecessors only | |
7590 | * supports up to 16 on one shared APIC bus. | |
7591 | - * | |
7592 | + * | |
7593 | * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full | |
7594 | * advantage of new APIC bus architecture. | |
7595 | */ | |
7596 | @@ -2757,7 +2790,7 @@ int __init io_apic_get_unique_id (int io | |
7597 | } | |
7598 | ||
7599 | /* | |
7600 | - * Every APIC in a system must have a unique ID or we get lots of nice | |
7601 | + * Every APIC in a system must have a unique ID or we get lots of nice | |
7602 | * 'stuck on smp_invalidate_needed IPI wait' messages. | |
7603 | */ | |
7604 | if (check_apicid_used(apic_id_map, apic_id)) { | |
7605 | @@ -2774,7 +2807,7 @@ int __init io_apic_get_unique_id (int io | |
7606 | "trying %d\n", ioapic, apic_id, i); | |
7607 | ||
7608 | apic_id = i; | |
7609 | - } | |
7610 | + } | |
7611 | ||
7612 | tmp = apicid_to_cpu_present(apic_id); | |
7613 | physids_or(apic_id_map, apic_id_map, tmp); | |
7614 | @@ -2802,7 +2835,7 @@ int __init io_apic_get_unique_id (int io | |
7615 | } | |
7616 | ||
7617 | ||
7618 | -int __init io_apic_get_version (int ioapic) | |
7619 | +int __init io_apic_get_version(int ioapic) | |
7620 | { | |
7621 | union IO_APIC_reg_01 reg_01; | |
7622 | unsigned long flags; | |
7623 | @@ -2815,7 +2848,7 @@ int __init io_apic_get_version (int ioap | |
7624 | } | |
7625 | ||
7626 | ||
7627 | -int __init io_apic_get_redir_entries (int ioapic) | |
7628 | +int __init io_apic_get_redir_entries(int ioapic) | |
7629 | { | |
7630 | union IO_APIC_reg_01 reg_01; | |
7631 | unsigned long flags; | |
7632 | @@ -2828,7 +2861,7 @@ int __init io_apic_get_redir_entries (in | |
7633 | } | |
7634 | ||
7635 | ||
7636 | -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) | |
7637 | +int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low) | |
7638 | { | |
7639 | struct IO_APIC_route_entry entry; | |
7640 | ||
7641 | @@ -2844,7 +2877,7 @@ int io_apic_set_pci_routing (int ioapic, | |
7642 | * corresponding device driver registers for this IRQ. | |
7643 | */ | |
7644 | ||
7645 | - memset(&entry,0,sizeof(entry)); | |
7646 | + memset(&entry, 0, sizeof(entry)); | |
7647 | ||
7648 | entry.delivery_mode = INT_DELIVERY_MODE; | |
7649 | entry.dest_mode = INT_DEST_MODE; | |
7650 | @@ -2863,7 +2896,7 @@ int io_apic_set_pci_routing (int ioapic, | |
7651 | ||
7652 | apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " | |
7653 | "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, | |
7654 | - mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, | |
7655 | + mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq, | |
7656 | edge_level, active_high_low); | |
7657 | ||
7658 | ioapic_register_intr(irq, entry.vector, edge_level); | |
7659 | @@ -2884,8 +2917,8 @@ int acpi_get_override_irq(int bus_irq, i | |
7660 | return -1; | |
7661 | ||
7662 | for (i = 0; i < mp_irq_entries; i++) | |
7663 | - if (mp_irqs[i].mpc_irqtype == mp_INT && | |
7664 | - mp_irqs[i].mpc_srcbusirq == bus_irq) | |
7665 | + if (mp_irqs[i].mp_irqtype == mp_INT && | |
7666 | + mp_irqs[i].mp_srcbusirq == bus_irq) | |
7667 | break; | |
7668 | if (i >= mp_irq_entries) | |
7669 | return -1; | |
7670 | @@ -2918,3 +2951,35 @@ static int __init parse_noapic(char *arg | |
7671 | return 0; | |
7672 | } | |
7673 | early_param("noapic", parse_noapic); | |
7674 | + | |
7675 | +#ifndef CONFIG_XEN | |
7676 | +void __init ioapic_init_mappings(void) | |
7677 | +{ | |
7678 | + unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; | |
7679 | + int i; | |
7680 | + | |
7681 | + for (i = 0; i < nr_ioapics; i++) { | |
7682 | + if (smp_found_config) { | |
7683 | + ioapic_phys = mp_ioapics[i].mp_apicaddr; | |
7684 | + if (!ioapic_phys) { | |
7685 | + printk(KERN_ERR | |
7686 | + "WARNING: bogus zero IO-APIC " | |
7687 | + "address found in MPTABLE, " | |
7688 | + "disabling IO/APIC support!\n"); | |
7689 | + smp_found_config = 0; | |
7690 | + skip_ioapic_setup = 1; | |
7691 | + goto fake_ioapic_page; | |
7692 | + } | |
7693 | + } else { | |
7694 | +fake_ioapic_page: | |
7695 | + ioapic_phys = (unsigned long) | |
7696 | + alloc_bootmem_pages(PAGE_SIZE); | |
7697 | + ioapic_phys = __pa(ioapic_phys); | |
7698 | + } | |
7699 | + set_fixmap_nocache(idx, ioapic_phys); | |
7700 | + printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n", | |
7701 | + __fix_to_virt(idx), ioapic_phys); | |
7702 | + idx++; | |
7703 | + } | |
7704 | +} | |
7705 | +#endif | |
82094b55 AF |
7706 | --- sle11-2009-10-16.orig/arch/x86/kernel/io_apic_64-xen.c 2009-03-16 16:38:05.000000000 +0100 |
7707 | +++ sle11-2009-10-16/arch/x86/kernel/io_apic_64-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
7708 | @@ -45,6 +45,7 @@ |
7709 | #include <asm/proto.h> | |
7710 | #include <asm/acpi.h> | |
7711 | #include <asm/dma.h> | |
7712 | +#include <asm/i8259.h> | |
7713 | #include <asm/nmi.h> | |
7714 | #include <asm/msidef.h> | |
7715 | #include <asm/hypertransport.h> | |
7716 | @@ -63,10 +64,16 @@ struct irq_cfg { | |
7717 | }; | |
7718 | ||
7719 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ | |
7720 | -struct irq_cfg irq_cfg[NR_IRQS] __read_mostly; | |
7721 | +static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly; | |
7722 | ||
7723 | static int assign_irq_vector(int irq, cpumask_t mask); | |
7724 | ||
7725 | +#ifndef CONFIG_XEN | |
7726 | +int first_system_vector = 0xfe; | |
7727 | + | |
7728 | +char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; | |
7729 | +#endif | |
7730 | + | |
7731 | #define __apicdebuginit __init | |
7732 | ||
7733 | int sis_apic_bug; /* not actually supported, dummy for compile */ | |
7734 | @@ -89,14 +96,14 @@ unsigned long io_apic_irqs; | |
7735 | ||
7736 | #define clear_IO_APIC() ((void)0) | |
7737 | #else | |
7738 | -int timer_over_8254 __initdata = 1; | |
7739 | +int timer_through_8259 __initdata; | |
7740 | ||
7741 | /* Where if anywhere is the i8259 connect in external int mode */ | |
7742 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; | |
7743 | #endif | |
7744 | ||
7745 | static DEFINE_SPINLOCK(ioapic_lock); | |
7746 | -DEFINE_SPINLOCK(vector_lock); | |
7747 | +static DEFINE_SPINLOCK(vector_lock); | |
7748 | ||
7749 | /* | |
7750 | * # of IRQ routing registers | |
7751 | @@ -104,15 +111,17 @@ DEFINE_SPINLOCK(vector_lock); | |
7752 | int nr_ioapic_registers[MAX_IO_APICS]; | |
7753 | ||
7754 | /* I/O APIC entries */ | |
7755 | -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; | |
7756 | +struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; | |
7757 | int nr_ioapics; | |
7758 | ||
7759 | /* MP IRQ source entries */ | |
7760 | -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; | |
7761 | +struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; | |
7762 | ||
7763 | /* # of MP IRQ source entries */ | |
7764 | int mp_irq_entries; | |
7765 | ||
7766 | +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); | |
7767 | + | |
7768 | /* | |
7769 | * Rough estimation of how many shared IRQs there are, can | |
7770 | * be changed anytime. | |
7771 | @@ -141,7 +150,7 @@ struct io_apic { | |
7772 | static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) | |
7773 | { | |
7774 | return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) | |
7775 | - + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); | |
7776 | + + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); | |
7777 | } | |
7778 | #endif | |
7779 | ||
7780 | @@ -155,7 +164,7 @@ static inline unsigned int io_apic_read( | |
7781 | struct physdev_apic apic_op; | |
7782 | int ret; | |
7783 | ||
7784 | - apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr; | |
7785 | + apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr; | |
7786 | apic_op.reg = reg; | |
7787 | ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op); | |
7788 | if (ret) | |
7789 | @@ -173,7 +182,7 @@ static inline void io_apic_write(unsigne | |
7790 | #else | |
7791 | struct physdev_apic apic_op; | |
7792 | ||
7793 | - apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr; | |
7794 | + apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr; | |
7795 | apic_op.reg = reg; | |
7796 | apic_op.value = value; | |
7797 | WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op)); | |
7798 | @@ -209,7 +218,7 @@ static bool io_apic_level_ack_pending(un | |
7799 | break; | |
7800 | reg = io_apic_read(entry->apic, 0x10 + pin*2); | |
7801 | /* Is the remote IRR bit set? */ | |
7802 | - if ((reg >> 14) & 1) { | |
7803 | + if (reg & IO_APIC_REDIR_REMOTE_IRR) { | |
7804 | spin_unlock_irqrestore(&ioapic_lock, flags); | |
7805 | return true; | |
7806 | } | |
7807 | @@ -308,7 +317,7 @@ static void __target_IO_APIC_irq(unsigne | |
7808 | break; | |
7809 | io_apic_write(apic, 0x11 + pin*2, dest); | |
7810 | reg = io_apic_read(apic, 0x10 + pin*2); | |
7811 | - reg &= ~0x000000ff; | |
7812 | + reg &= ~IO_APIC_REDIR_VECTOR_MASK; | |
7813 | reg |= vector; | |
7814 | io_apic_modify(apic, reg); | |
7815 | if (!entry->next) | |
7816 | @@ -372,6 +381,26 @@ static void add_pin_to_irq(unsigned int | |
7817 | } | |
7818 | ||
7819 | #ifndef CONFIG_XEN | |
7820 | +/* | |
7821 | + * Reroute an IRQ to a different pin. | |
7822 | + */ | |
7823 | +static void __init replace_pin_at_irq(unsigned int irq, | |
7824 | + int oldapic, int oldpin, | |
7825 | + int newapic, int newpin) | |
7826 | +{ | |
7827 | + struct irq_pin_list *entry = irq_2_pin + irq; | |
7828 | + | |
7829 | + while (1) { | |
7830 | + if (entry->apic == oldapic && entry->pin == oldpin) { | |
7831 | + entry->apic = newapic; | |
7832 | + entry->pin = newpin; | |
7833 | + } | |
7834 | + if (!entry->next) | |
7835 | + break; | |
7836 | + entry = irq_2_pin + entry->next; | |
7837 | + } | |
7838 | +} | |
7839 | + | |
7840 | #define __DO_ACTION(R, ACTION, FINAL) \ | |
7841 | \ | |
7842 | { \ | |
7843 | @@ -399,10 +428,11 @@ static void add_pin_to_irq(unsigned int | |
7844 | static void name##_IO_APIC_irq (unsigned int irq) \ | |
7845 | __DO_ACTION(R, ACTION, FINAL) | |
7846 | ||
7847 | -DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) ) | |
7848 | - /* mask = 1 */ | |
7849 | -DO_ACTION( __unmask, 0, &= 0xfffeffff, ) | |
7850 | - /* mask = 0 */ | |
7851 | +/* mask = 1 */ | |
7852 | +DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) | |
7853 | + | |
7854 | +/* mask = 0 */ | |
7855 | +DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) | |
7856 | ||
7857 | static void mask_IO_APIC_irq (unsigned int irq) | |
7858 | { | |
7859 | @@ -465,22 +495,6 @@ static int __init disable_timer_pin_setu | |
7860 | } | |
7861 | __setup("disable_timer_pin_1", disable_timer_pin_setup); | |
7862 | ||
7863 | -#ifndef CONFIG_XEN | |
7864 | -static int __init setup_disable_8254_timer(char *s) | |
7865 | -{ | |
7866 | - timer_over_8254 = -1; | |
7867 | - return 1; | |
7868 | -} | |
7869 | -static int __init setup_enable_8254_timer(char *s) | |
7870 | -{ | |
7871 | - timer_over_8254 = 2; | |
7872 | - return 1; | |
7873 | -} | |
7874 | - | |
7875 | -__setup("disable_8254_timer", setup_disable_8254_timer); | |
7876 | -__setup("enable_8254_timer", setup_enable_8254_timer); | |
7877 | -#endif /* !CONFIG_XEN */ | |
7878 | - | |
7879 | ||
7880 | /* | |
7881 | * Find the IRQ entry number of a certain pin. | |
7882 | @@ -490,10 +504,10 @@ static int find_irq_entry(int apic, int | |
7883 | int i; | |
7884 | ||
7885 | for (i = 0; i < mp_irq_entries; i++) | |
7886 | - if (mp_irqs[i].mpc_irqtype == type && | |
7887 | - (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || | |
7888 | - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && | |
7889 | - mp_irqs[i].mpc_dstirq == pin) | |
7890 | + if (mp_irqs[i].mp_irqtype == type && | |
7891 | + (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || | |
7892 | + mp_irqs[i].mp_dstapic == MP_APIC_ALL) && | |
7893 | + mp_irqs[i].mp_dstirq == pin) | |
7894 | return i; | |
7895 | ||
7896 | return -1; | |
7897 | @@ -508,13 +522,13 @@ static int __init find_isa_irq_pin(int i | |
7898 | int i; | |
7899 | ||
7900 | for (i = 0; i < mp_irq_entries; i++) { | |
7901 | - int lbus = mp_irqs[i].mpc_srcbus; | |
7902 | + int lbus = mp_irqs[i].mp_srcbus; | |
7903 | ||
7904 | if (test_bit(lbus, mp_bus_not_pci) && | |
7905 | - (mp_irqs[i].mpc_irqtype == type) && | |
7906 | - (mp_irqs[i].mpc_srcbusirq == irq)) | |
7907 | + (mp_irqs[i].mp_irqtype == type) && | |
7908 | + (mp_irqs[i].mp_srcbusirq == irq)) | |
7909 | ||
7910 | - return mp_irqs[i].mpc_dstirq; | |
7911 | + return mp_irqs[i].mp_dstirq; | |
7912 | } | |
7913 | return -1; | |
7914 | } | |
7915 | @@ -524,17 +538,17 @@ static int __init find_isa_irq_apic(int | |
7916 | int i; | |
7917 | ||
7918 | for (i = 0; i < mp_irq_entries; i++) { | |
7919 | - int lbus = mp_irqs[i].mpc_srcbus; | |
7920 | + int lbus = mp_irqs[i].mp_srcbus; | |
7921 | ||
7922 | if (test_bit(lbus, mp_bus_not_pci) && | |
7923 | - (mp_irqs[i].mpc_irqtype == type) && | |
7924 | - (mp_irqs[i].mpc_srcbusirq == irq)) | |
7925 | + (mp_irqs[i].mp_irqtype == type) && | |
7926 | + (mp_irqs[i].mp_srcbusirq == irq)) | |
7927 | break; | |
7928 | } | |
7929 | if (i < mp_irq_entries) { | |
7930 | int apic; | |
7931 | for(apic = 0; apic < nr_ioapics; apic++) { | |
7932 | - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) | |
7933 | + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) | |
7934 | return apic; | |
7935 | } | |
7936 | } | |
7937 | @@ -555,28 +569,28 @@ int IO_APIC_get_PCI_irq_vector(int bus, | |
7938 | ||
7939 | apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", | |
7940 | bus, slot, pin); | |
7941 | - if (mp_bus_id_to_pci_bus[bus] == -1) { | |
7942 | + if (test_bit(bus, mp_bus_not_pci)) { | |
7943 | apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); | |
7944 | return -1; | |
7945 | } | |
7946 | for (i = 0; i < mp_irq_entries; i++) { | |
7947 | - int lbus = mp_irqs[i].mpc_srcbus; | |
7948 | + int lbus = mp_irqs[i].mp_srcbus; | |
7949 | ||
7950 | for (apic = 0; apic < nr_ioapics; apic++) | |
7951 | - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || | |
7952 | - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) | |
7953 | + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || | |
7954 | + mp_irqs[i].mp_dstapic == MP_APIC_ALL) | |
7955 | break; | |
7956 | ||
7957 | if (!test_bit(lbus, mp_bus_not_pci) && | |
7958 | - !mp_irqs[i].mpc_irqtype && | |
7959 | + !mp_irqs[i].mp_irqtype && | |
7960 | (bus == lbus) && | |
7961 | - (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { | |
7962 | - int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); | |
7963 | + (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { | |
7964 | + int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); | |
7965 | ||
7966 | if (!(apic || IO_APIC_IRQ(irq))) | |
7967 | continue; | |
7968 | ||
7969 | - if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) | |
7970 | + if (pin == (mp_irqs[i].mp_srcbusirq & 3)) | |
7971 | return irq; | |
7972 | /* | |
7973 | * Use the first all-but-pin matching entry as a | |
7974 | @@ -604,13 +618,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, | |
7975 | ||
7976 | static int MPBIOS_polarity(int idx) | |
7977 | { | |
7978 | - int bus = mp_irqs[idx].mpc_srcbus; | |
7979 | + int bus = mp_irqs[idx].mp_srcbus; | |
7980 | int polarity; | |
7981 | ||
7982 | /* | |
7983 | * Determine IRQ line polarity (high active or low active): | |
7984 | */ | |
7985 | - switch (mp_irqs[idx].mpc_irqflag & 3) | |
7986 | + switch (mp_irqs[idx].mp_irqflag & 3) | |
7987 | { | |
7988 | case 0: /* conforms, ie. bus-type dependent polarity */ | |
7989 | if (test_bit(bus, mp_bus_not_pci)) | |
7990 | @@ -646,13 +660,13 @@ static int MPBIOS_polarity(int idx) | |
7991 | ||
7992 | static int MPBIOS_trigger(int idx) | |
7993 | { | |
7994 | - int bus = mp_irqs[idx].mpc_srcbus; | |
7995 | + int bus = mp_irqs[idx].mp_srcbus; | |
7996 | int trigger; | |
7997 | ||
7998 | /* | |
7999 | * Determine IRQ trigger mode (edge or level sensitive): | |
8000 | */ | |
8001 | - switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) | |
8002 | + switch ((mp_irqs[idx].mp_irqflag>>2) & 3) | |
8003 | { | |
8004 | case 0: /* conforms, ie. bus-type dependent */ | |
8005 | if (test_bit(bus, mp_bus_not_pci)) | |
8006 | @@ -699,16 +713,16 @@ static inline int irq_trigger(int idx) | |
8007 | static int pin_2_irq(int idx, int apic, int pin) | |
8008 | { | |
8009 | int irq, i; | |
8010 | - int bus = mp_irqs[idx].mpc_srcbus; | |
8011 | + int bus = mp_irqs[idx].mp_srcbus; | |
8012 | ||
8013 | /* | |
8014 | * Debugging check, we are in big trouble if this message pops up! | |
8015 | */ | |
8016 | - if (mp_irqs[idx].mpc_dstirq != pin) | |
8017 | + if (mp_irqs[idx].mp_dstirq != pin) | |
8018 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); | |
8019 | ||
8020 | if (test_bit(bus, mp_bus_not_pci)) { | |
8021 | - irq = mp_irqs[idx].mpc_srcbusirq; | |
8022 | + irq = mp_irqs[idx].mp_srcbusirq; | |
8023 | } else { | |
8024 | /* | |
8025 | * PCI IRQs are mapped in order | |
8026 | @@ -722,6 +736,19 @@ static int pin_2_irq(int idx, int apic, | |
8027 | return irq; | |
8028 | } | |
8029 | ||
8030 | +void lock_vector_lock(void) | |
8031 | +{ | |
8032 | + /* Used to the online set of cpus does not change | |
8033 | + * during assign_irq_vector. | |
8034 | + */ | |
8035 | + spin_lock(&vector_lock); | |
8036 | +} | |
8037 | + | |
8038 | +void unlock_vector_lock(void) | |
8039 | +{ | |
8040 | + spin_unlock(&vector_lock); | |
8041 | +} | |
8042 | + | |
8043 | static int __assign_irq_vector(int irq, cpumask_t mask) | |
8044 | { | |
8045 | struct physdev_irq irq_op; | |
8046 | @@ -773,7 +800,7 @@ static void __clear_irq_vector(int irq) | |
8047 | ||
8048 | vector = cfg->vector; | |
8049 | cpus_and(mask, cfg->domain, cpu_online_map); | |
8050 | - for_each_cpu_mask(cpu, mask) | |
8051 | + for_each_cpu_mask_nr(cpu, mask) | |
8052 | per_cpu(vector_irq, cpu)[vector] = -1; | |
8053 | ||
8054 | cfg->vector = 0; | |
8055 | @@ -842,7 +869,7 @@ static void setup_IO_APIC_irq(int apic, | |
8056 | apic_printk(APIC_VERBOSE,KERN_DEBUG | |
8057 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " | |
8058 | "IRQ %d Mode:%i Active:%i)\n", | |
8059 | - apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector, | |
8060 | + apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, | |
8061 | irq, trigger, polarity); | |
8062 | ||
8063 | /* | |
8064 | @@ -883,10 +910,10 @@ static void __init setup_IO_APIC_irqs(vo | |
8065 | idx = find_irq_entry(apic,pin,mp_INT); | |
8066 | if (idx == -1) { | |
8067 | if (first_notcon) { | |
8068 | - apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin); | |
8069 | + apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin); | |
8070 | first_notcon = 0; | |
8071 | } else | |
8072 | - apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin); | |
8073 | + apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin); | |
8074 | continue; | |
8075 | } | |
8076 | if (!first_notcon) { | |
8077 | @@ -908,26 +935,21 @@ static void __init setup_IO_APIC_irqs(vo | |
8078 | ||
8079 | #ifndef CONFIG_XEN | |
8080 | /* | |
8081 | - * Set up the 8259A-master output pin as broadcast to all | |
8082 | - * CPUs. | |
8083 | + * Set up the timer pin, possibly with the 8259A-master behind. | |
8084 | */ | |
8085 | -static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) | |
8086 | +static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, | |
8087 | + int vector) | |
8088 | { | |
8089 | struct IO_APIC_route_entry entry; | |
8090 | ||
8091 | memset(&entry, 0, sizeof(entry)); | |
8092 | ||
8093 | - disable_8259A_irq(0); | |
8094 | - | |
8095 | - /* mask LVT0 */ | |
8096 | - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | |
8097 | - | |
8098 | /* | |
8099 | * We use logical delivery to get the timer IRQ | |
8100 | * to the first CPU. | |
8101 | */ | |
8102 | entry.dest_mode = INT_DEST_MODE; | |
8103 | - entry.mask = 0; /* unmask IRQ now */ | |
8104 | + entry.mask = 1; /* mask IRQ now */ | |
8105 | entry.dest = cpu_mask_to_apicid(TARGET_CPUS); | |
8106 | entry.delivery_mode = INT_DELIVERY_MODE; | |
8107 | entry.polarity = 0; | |
8108 | @@ -936,7 +958,7 @@ static void __init setup_ExtINT_IRQ0_pin | |
8109 | ||
8110 | /* | |
8111 | * The timer IRQ doesn't have to know that behind the | |
8112 | - * scene we have a 8259A-master in AEOI mode ... | |
8113 | + * scene we may have a 8259A-master in AEOI mode ... | |
8114 | */ | |
8115 | set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); | |
8116 | ||
8117 | @@ -944,8 +966,6 @@ static void __init setup_ExtINT_IRQ0_pin | |
8118 | * Add it to the IO-APIC irq-routing table: | |
8119 | */ | |
8120 | ioapic_write_entry(apic, pin, entry); | |
8121 | - | |
8122 | - enable_8259A_irq(0); | |
8123 | } | |
8124 | ||
8125 | void __apicdebuginit print_IO_APIC(void) | |
8126 | @@ -962,7 +982,7 @@ void __apicdebuginit print_IO_APIC(void) | |
8127 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); | |
8128 | for (i = 0; i < nr_ioapics; i++) | |
8129 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", | |
8130 | - mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); | |
8131 | + mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); | |
8132 | ||
8133 | /* | |
8134 | * We are a bit conservative about what we expect. We have to | |
8135 | @@ -980,7 +1000,7 @@ void __apicdebuginit print_IO_APIC(void) | |
8136 | spin_unlock_irqrestore(&ioapic_lock, flags); | |
8137 | ||
8138 | printk("\n"); | |
8139 | - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); | |
8140 | + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); | |
8141 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); | |
8142 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); | |
8143 | ||
8144 | @@ -1072,6 +1092,7 @@ void __apicdebuginit print_local_APIC(vo | |
8145 | ||
8146 | printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", | |
8147 | smp_processor_id(), hard_smp_processor_id()); | |
8148 | + v = apic_read(APIC_ID); | |
8149 | printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); | |
8150 | v = apic_read(APIC_LVR); | |
8151 | printk(KERN_INFO "... APIC VERSION: %08x\n", v); | |
8152 | @@ -1141,7 +1162,7 @@ void __apicdebuginit print_local_APIC(vo | |
8153 | ||
8154 | void print_all_local_APICs (void) | |
8155 | { | |
8156 | - on_each_cpu(print_local_APIC, NULL, 1, 1); | |
8157 | + on_each_cpu(print_local_APIC, NULL, 1); | |
8158 | } | |
8159 | ||
8160 | void __apicdebuginit print_PIC(void) | |
8161 | @@ -1175,6 +1196,8 @@ void __apicdebuginit print_PIC(void) | |
8162 | v = inb(0x4d1) << 8 | inb(0x4d0); | |
8163 | printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); | |
8164 | } | |
8165 | +#else | |
8166 | +void __apicdebuginit print_IO_APIC(void) {} | |
8167 | #endif /* !CONFIG_XEN */ | |
8168 | ||
8169 | void __init enable_IO_APIC(void) | |
8170 | @@ -1359,12 +1382,10 @@ static unsigned int startup_ioapic_irq(u | |
8171 | static int ioapic_retrigger_irq(unsigned int irq) | |
8172 | { | |
8173 | struct irq_cfg *cfg = &irq_cfg[irq]; | |
8174 | - cpumask_t mask; | |
8175 | unsigned long flags; | |
8176 | ||
8177 | spin_lock_irqsave(&vector_lock, flags); | |
8178 | - mask = cpumask_of_cpu(first_cpu(cfg->domain)); | |
8179 | - send_IPI_mask(mask, cfg->vector); | |
8180 | + send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); | |
8181 | spin_unlock_irqrestore(&vector_lock, flags); | |
8182 | ||
8183 | return 1; | |
8184 | @@ -1545,7 +1566,7 @@ static inline void init_IO_APIC_traps(vo | |
8185 | } | |
8186 | ||
8187 | #ifndef CONFIG_XEN | |
8188 | -static void enable_lapic_irq (unsigned int irq) | |
8189 | +static void unmask_lapic_irq(unsigned int irq) | |
8190 | { | |
8191 | unsigned long v; | |
8192 | ||
8193 | @@ -1553,7 +1574,7 @@ static void enable_lapic_irq (unsigned i | |
8194 | apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); | |
8195 | } | |
8196 | ||
8197 | -static void disable_lapic_irq (unsigned int irq) | |
8198 | +static void mask_lapic_irq(unsigned int irq) | |
8199 | { | |
8200 | unsigned long v; | |
8201 | ||
8202 | @@ -1566,19 +1587,20 @@ static void ack_lapic_irq (unsigned int | |
8203 | ack_APIC_irq(); | |
8204 | } | |
8205 | ||
8206 | -static void end_lapic_irq (unsigned int i) { /* nothing */ } | |
8207 | - | |
8208 | -static struct hw_interrupt_type lapic_irq_type __read_mostly = { | |
8209 | - .name = "local-APIC", | |
8210 | - .typename = "local-APIC-edge", | |
8211 | - .startup = NULL, /* startup_irq() not used for IRQ0 */ | |
8212 | - .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ | |
8213 | - .enable = enable_lapic_irq, | |
8214 | - .disable = disable_lapic_irq, | |
8215 | - .ack = ack_lapic_irq, | |
8216 | - .end = end_lapic_irq, | |
8217 | +static struct irq_chip lapic_chip __read_mostly = { | |
8218 | + .name = "local-APIC", | |
8219 | + .mask = mask_lapic_irq, | |
8220 | + .unmask = unmask_lapic_irq, | |
8221 | + .ack = ack_lapic_irq, | |
8222 | }; | |
8223 | ||
8224 | +static void lapic_register_intr(int irq) | |
8225 | +{ | |
8226 | + irq_desc[irq].status &= ~IRQ_LEVEL; | |
8227 | + set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, | |
8228 | + "edge"); | |
8229 | +} | |
8230 | + | |
8231 | static void __init setup_nmi(void) | |
8232 | { | |
8233 | /* | |
8234 | @@ -1664,6 +1686,7 @@ static inline void __init check_timer(vo | |
8235 | struct irq_cfg *cfg = irq_cfg + 0; | |
8236 | int apic1, pin1, apic2, pin2; | |
8237 | unsigned long flags; | |
8238 | + int no_pin1 = 0; | |
8239 | ||
8240 | local_irq_save(flags); | |
8241 | ||
8242 | @@ -1674,34 +1697,48 @@ static inline void __init check_timer(vo | |
8243 | assign_irq_vector(0, TARGET_CPUS); | |
8244 | ||
8245 | /* | |
8246 | - * Subtle, code in do_timer_interrupt() expects an AEOI | |
8247 | - * mode for the 8259A whenever interrupts are routed | |
8248 | - * through I/O APICs. Also IRQ0 has to be enabled in | |
8249 | - * the 8259A which implies the virtual wire has to be | |
8250 | - * disabled in the local APIC. | |
8251 | + * As IRQ0 is to be enabled in the 8259A, the virtual | |
8252 | + * wire has to be disabled in the local APIC. | |
8253 | */ | |
8254 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | |
8255 | init_8259A(1); | |
8256 | - if (timer_over_8254 > 0) | |
8257 | - enable_8259A_irq(0); | |
8258 | ||
8259 | pin1 = find_isa_irq_pin(0, mp_INT); | |
8260 | apic1 = find_isa_irq_apic(0, mp_INT); | |
8261 | pin2 = ioapic_i8259.pin; | |
8262 | apic2 = ioapic_i8259.apic; | |
8263 | ||
8264 | - apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", | |
8265 | - cfg->vector, apic1, pin1, apic2, pin2); | |
8266 | + apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " | |
8267 | + "apic1=%d pin1=%d apic2=%d pin2=%d\n", | |
8268 | + cfg->vector, apic1, pin1, apic2, pin2); | |
8269 | + | |
8270 | + /* | |
8271 | + * Some BIOS writers are clueless and report the ExtINTA | |
8272 | + * I/O APIC input from the cascaded 8259A as the timer | |
8273 | + * interrupt input. So just in case, if only one pin | |
8274 | + * was found above, try it both directly and through the | |
8275 | + * 8259A. | |
8276 | + */ | |
8277 | + if (pin1 == -1) { | |
8278 | + pin1 = pin2; | |
8279 | + apic1 = apic2; | |
8280 | + no_pin1 = 1; | |
8281 | + } else if (pin2 == -1) { | |
8282 | + pin2 = pin1; | |
8283 | + apic2 = apic1; | |
8284 | + } | |
8285 | ||
8286 | if (pin1 != -1) { | |
8287 | /* | |
8288 | * Ok, does IRQ0 through the IOAPIC work? | |
8289 | */ | |
8290 | + if (no_pin1) { | |
8291 | + add_pin_to_irq(0, apic1, pin1); | |
8292 | + setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); | |
8293 | + } | |
8294 | unmask_IO_APIC_irq(0); | |
8295 | if (!no_timer_check && timer_irq_works()) { | |
8296 | - nmi_watchdog_default(); | |
8297 | if (nmi_watchdog == NMI_IO_APIC) { | |
8298 | - disable_8259A_irq(0); | |
8299 | setup_nmi(); | |
8300 | enable_8259A_irq(0); | |
8301 | } | |
8302 | @@ -1710,54 +1747,62 @@ static inline void __init check_timer(vo | |
8303 | goto out; | |
8304 | } | |
8305 | clear_IO_APIC_pin(apic1, pin1); | |
8306 | - apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not " | |
8307 | - "connected to IO-APIC\n"); | |
8308 | - } | |
8309 | - | |
8310 | - apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) " | |
8311 | - "through the 8259A ... "); | |
8312 | - if (pin2 != -1) { | |
8313 | - apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...", | |
8314 | - apic2, pin2); | |
8315 | + if (!no_pin1) | |
8316 | + apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " | |
8317 | + "8254 timer not connected to IO-APIC\n"); | |
8318 | + | |
8319 | + apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " | |
8320 | + "(IRQ0) through the 8259A ...\n"); | |
8321 | + apic_printk(APIC_QUIET, KERN_INFO | |
8322 | + "..... (found apic %d pin %d) ...\n", apic2, pin2); | |
8323 | /* | |
8324 | * legacy devices should be connected to IO APIC #0 | |
8325 | */ | |
8326 | - setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector); | |
8327 | + replace_pin_at_irq(0, apic1, pin1, apic2, pin2); | |
8328 | + setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); | |
8329 | + unmask_IO_APIC_irq(0); | |
8330 | + enable_8259A_irq(0); | |
8331 | if (timer_irq_works()) { | |
8332 | - apic_printk(APIC_VERBOSE," works.\n"); | |
8333 | - nmi_watchdog_default(); | |
8334 | + apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); | |
8335 | + timer_through_8259 = 1; | |
8336 | if (nmi_watchdog == NMI_IO_APIC) { | |
8337 | + disable_8259A_irq(0); | |
8338 | setup_nmi(); | |
8339 | + enable_8259A_irq(0); | |
8340 | } | |
8341 | goto out; | |
8342 | } | |
8343 | /* | |
8344 | * Cleanup, just in case ... | |
8345 | */ | |
8346 | + disable_8259A_irq(0); | |
8347 | clear_IO_APIC_pin(apic2, pin2); | |
8348 | + apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); | |
8349 | } | |
8350 | - apic_printk(APIC_VERBOSE," failed.\n"); | |
8351 | ||
8352 | if (nmi_watchdog == NMI_IO_APIC) { | |
8353 | - printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); | |
8354 | - nmi_watchdog = 0; | |
8355 | + apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " | |
8356 | + "through the IO-APIC - disabling NMI Watchdog!\n"); | |
8357 | + nmi_watchdog = NMI_NONE; | |
8358 | } | |
8359 | ||
8360 | - apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); | |
8361 | + apic_printk(APIC_QUIET, KERN_INFO | |
8362 | + "...trying to set up timer as Virtual Wire IRQ...\n"); | |
8363 | ||
8364 | - disable_8259A_irq(0); | |
8365 | - irq_desc[0].chip = &lapic_irq_type; | |
8366 | + lapic_register_intr(0); | |
8367 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ | |
8368 | enable_8259A_irq(0); | |
8369 | ||
8370 | if (timer_irq_works()) { | |
8371 | - apic_printk(APIC_VERBOSE," works.\n"); | |
8372 | + apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); | |
8373 | goto out; | |
8374 | } | |
8375 | + disable_8259A_irq(0); | |
8376 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); | |
8377 | - apic_printk(APIC_VERBOSE," failed.\n"); | |
8378 | + apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); | |
8379 | ||
8380 | - apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ..."); | |
8381 | + apic_printk(APIC_QUIET, KERN_INFO | |
8382 | + "...trying to set up timer as ExtINT IRQ...\n"); | |
8383 | ||
8384 | init_8259A(0); | |
8385 | make_8259A_irq(0); | |
8386 | @@ -1766,11 +1811,12 @@ static inline void __init check_timer(vo | |
8387 | unlock_ExtINT_logic(); | |
8388 | ||
8389 | if (timer_irq_works()) { | |
8390 | - apic_printk(APIC_VERBOSE," works.\n"); | |
8391 | + apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); | |
8392 | goto out; | |
8393 | } | |
8394 | - apic_printk(APIC_VERBOSE," failed :(.\n"); | |
8395 | - panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n"); | |
8396 | + apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); | |
8397 | + panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " | |
8398 | + "report. Then try booting with the 'noapic' option.\n"); | |
8399 | out: | |
8400 | local_irq_restore(flags); | |
8401 | } | |
8402 | @@ -1788,10 +1834,21 @@ __setup("no_timer_check", notimercheck); | |
8403 | ||
8404 | /* | |
8405 | * | |
8406 | - * IRQs that are handled by the PIC in the MPS IOAPIC case. | |
8407 | - * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. | |
8408 | - * Linux doesn't really care, as it's not actually used | |
8409 | - * for any interrupt handling anyway. | |
8410 | + * Traditionally ISA IRQ2 is the cascade IRQ, and is not available | |
8411 | + * to devices. However there may be an I/O APIC pin available for | |
8412 | + * this interrupt regardless. The pin may be left unconnected, but | |
8413 | + * typically it will be reused as an ExtINT cascade interrupt for | |
8414 | + * the master 8259A. In the MPS case such a pin will normally be | |
8415 | + * reported as an ExtINT interrupt in the MP table. With ACPI | |
8416 | + * there is no provision for ExtINT interrupts, and in the absence | |
8417 | + * of an override it would be treated as an ordinary ISA I/O APIC | |
8418 | + * interrupt, that is edge-triggered and unmasked by default. We | |
8419 | + * used to do this, but it caused problems on some systems because | |
8420 | + * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using | |
8421 | + * the same ExtINT cascade interrupt to drive the local APIC of the | |
8422 | + * bootstrap processor. Therefore we refrain from routing IRQ2 to | |
8423 | + * the I/O APIC in all cases now. No actual device should request | |
8424 | + * it anyway. --macro | |
8425 | */ | |
8426 | #define PIC_IRQS (1<<2) | |
8427 | ||
8428 | @@ -1799,10 +1856,7 @@ void __init setup_IO_APIC(void) | |
8429 | { | |
8430 | enable_IO_APIC(); | |
8431 | ||
8432 | - if (acpi_ioapic) | |
8433 | - io_apic_irqs = ~0; /* all IRQs go through IOAPIC */ | |
8434 | - else | |
8435 | - io_apic_irqs = ~PIC_IRQS; | |
8436 | + io_apic_irqs = ~PIC_IRQS; | |
8437 | ||
8438 | apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); | |
8439 | ||
8440 | @@ -1851,8 +1905,8 @@ static int ioapic_resume(struct sys_devi | |
8441 | ||
8442 | spin_lock_irqsave(&ioapic_lock, flags); | |
8443 | reg_00.raw = io_apic_read(dev->id, 0); | |
8444 | - if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { | |
8445 | - reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; | |
8446 | + if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { | |
8447 | + reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; | |
8448 | io_apic_write(dev->id, 0, reg_00.raw); | |
8449 | } | |
8450 | spin_unlock_irqrestore(&ioapic_lock, flags); | |
8451 | @@ -2254,8 +2308,8 @@ int acpi_get_override_irq(int bus_irq, i | |
8452 | return -1; | |
8453 | ||
8454 | for (i = 0; i < mp_irq_entries; i++) | |
8455 | - if (mp_irqs[i].mpc_irqtype == mp_INT && | |
8456 | - mp_irqs[i].mpc_srcbusirq == bus_irq) | |
8457 | + if (mp_irqs[i].mp_irqtype == mp_INT && | |
8458 | + mp_irqs[i].mp_srcbusirq == bus_irq) | |
8459 | break; | |
8460 | if (i >= mp_irq_entries) | |
8461 | return -1; | |
8462 | @@ -2349,7 +2403,7 @@ void __init ioapic_init_mappings(void) | |
8463 | ioapic_res = ioapic_setup_resources(); | |
8464 | for (i = 0; i < nr_ioapics; i++) { | |
8465 | if (smp_found_config) { | |
8466 | - ioapic_phys = mp_ioapics[i].mpc_apicaddr; | |
8467 | + ioapic_phys = mp_ioapics[i].mp_apicaddr; | |
8468 | } else { | |
8469 | ioapic_phys = (unsigned long) | |
8470 | alloc_bootmem_pages(PAGE_SIZE); | |
82094b55 AF |
8471 | --- sle11-2009-10-16.orig/arch/x86/kernel/ipi-xen.c 2009-03-16 16:38:05.000000000 +0100 |
8472 | +++ sle11-2009-10-16/arch/x86/kernel/ipi-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
8473 | @@ -8,7 +8,6 @@ |
8474 | #include <linux/kernel_stat.h> | |
8475 | #include <linux/mc146818rtc.h> | |
8476 | #include <linux/cache.h> | |
8477 | -#include <linux/interrupt.h> | |
8478 | #include <linux/cpu.h> | |
8479 | #include <linux/module.h> | |
8480 | ||
8481 | @@ -85,7 +84,7 @@ void __send_IPI_shortcut(unsigned int sh | |
8482 | /* | |
8483 | * Send the IPI. The write to APIC_ICR fires this off. | |
8484 | */ | |
8485 | - apic_write_around(APIC_ICR, cfg); | |
8486 | + apic_write(APIC_ICR, cfg); | |
8487 | #else | |
8488 | int cpu; | |
8489 | ||
8490 | @@ -132,7 +131,7 @@ static inline void __send_IPI_dest_field | |
8491 | * prepare target chip field | |
8492 | */ | |
8493 | cfg = __prepare_ICR2(mask); | |
8494 | - apic_write_around(APIC_ICR2, cfg); | |
8495 | + apic_write(APIC_ICR2, cfg); | |
8496 | ||
8497 | /* | |
8498 | * program the ICR | |
8499 | @@ -142,7 +141,7 @@ static inline void __send_IPI_dest_field | |
8500 | /* | |
8501 | * Send the IPI. The write to APIC_ICR fires this off. | |
8502 | */ | |
8503 | - apic_write_around(APIC_ICR, cfg); | |
8504 | + apic_write(APIC_ICR, cfg); | |
8505 | } | |
8506 | #endif | |
8507 | ||
82094b55 AF |
8508 | --- sle11-2009-10-16.orig/arch/x86/kernel/irq_32-xen.c 2009-03-16 16:38:05.000000000 +0100 |
8509 | +++ sle11-2009-10-16/arch/x86/kernel/irq_32-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
8510 | @@ -48,6 +48,29 @@ void ack_bad_irq(unsigned int irq) |
8511 | #endif | |
8512 | } | |
8513 | ||
8514 | +#ifdef CONFIG_DEBUG_STACKOVERFLOW | |
8515 | +/* Debugging check for stack overflow: is there less than 1KB free? */ | |
8516 | +static int check_stack_overflow(void) | |
8517 | +{ | |
8518 | + long sp; | |
8519 | + | |
8520 | + __asm__ __volatile__("andl %%esp,%0" : | |
8521 | + "=r" (sp) : "0" (THREAD_SIZE - 1)); | |
8522 | + | |
8523 | + return sp < (sizeof(struct thread_info) + STACK_WARN); | |
8524 | +} | |
8525 | + | |
8526 | +static void print_stack_overflow(void) | |
8527 | +{ | |
8528 | + printk(KERN_WARNING "low stack detected by irq handler\n"); | |
8529 | + dump_stack(); | |
8530 | +} | |
8531 | + | |
8532 | +#else | |
8533 | +static inline int check_stack_overflow(void) { return 0; } | |
8534 | +static inline void print_stack_overflow(void) { } | |
8535 | +#endif | |
8536 | + | |
8537 | #ifdef CONFIG_4KSTACKS | |
8538 | /* | |
8539 | * per-CPU IRQ handling contexts (thread information and stack) | |
8540 | @@ -59,48 +82,26 @@ union irq_ctx { | |
8541 | ||
8542 | static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; | |
8543 | static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; | |
8544 | -#endif | |
8545 | - | |
8546 | -/* | |
8547 | - * do_IRQ handles all normal device IRQ's (the special | |
8548 | - * SMP cross-CPU interrupts have their own specific | |
8549 | - * handlers). | |
8550 | - */ | |
8551 | -unsigned int do_IRQ(struct pt_regs *regs) | |
8552 | -{ | |
8553 | - struct pt_regs *old_regs; | |
8554 | - /* high bit used in ret_from_ code */ | |
8555 | - int irq = ~regs->orig_ax; | |
8556 | - struct irq_desc *desc = irq_desc + irq; | |
8557 | -#ifdef CONFIG_4KSTACKS | |
8558 | - union irq_ctx *curctx, *irqctx; | |
8559 | - u32 *isp; | |
8560 | -#endif | |
8561 | ||
8562 | - if (unlikely((unsigned)irq >= NR_IRQS)) { | |
8563 | - printk(KERN_EMERG "%s: cannot handle IRQ %d\n", | |
8564 | - __func__, irq); | |
8565 | - BUG(); | |
8566 | - } | |
8567 | +static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; | |
8568 | +static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; | |
8569 | ||
8570 | - old_regs = set_irq_regs(regs); | |
8571 | - /*irq_enter();*/ | |
8572 | -#ifdef CONFIG_DEBUG_STACKOVERFLOW | |
8573 | - /* Debugging check for stack overflow: is there less than 1KB free? */ | |
8574 | - { | |
8575 | - long sp; | |
8576 | - | |
8577 | - __asm__ __volatile__("andl %%esp,%0" : | |
8578 | - "=r" (sp) : "0" (THREAD_SIZE - 1)); | |
8579 | - if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) { | |
8580 | - printk("do_IRQ: stack overflow: %ld\n", | |
8581 | - sp - sizeof(struct thread_info)); | |
8582 | - dump_stack(); | |
8583 | - } | |
8584 | - } | |
8585 | -#endif | |
8586 | +static void call_on_stack(void *func, void *stack) | |
8587 | +{ | |
8588 | + asm volatile("xchgl %%ebx,%%esp \n" | |
8589 | + "call *%%edi \n" | |
8590 | + "movl %%ebx,%%esp \n" | |
8591 | + : "=b" (stack) | |
8592 | + : "0" (stack), | |
8593 | + "D"(func) | |
8594 | + : "memory", "cc", "edx", "ecx", "eax"); | |
8595 | +} | |
8596 | ||
8597 | -#ifdef CONFIG_4KSTACKS | |
8598 | +static inline int | |
8599 | +execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) | |
8600 | +{ | |
8601 | + union irq_ctx *curctx, *irqctx; | |
8602 | + u32 *isp, arg1, arg2; | |
8603 | ||
8604 | curctx = (union irq_ctx *) current_thread_info(); | |
8605 | irqctx = hardirq_ctx[smp_processor_id()]; | |
8606 | @@ -111,52 +112,39 @@ unsigned int do_IRQ(struct pt_regs *regs | |
8607 | * handler) we can't do that and just have to keep using the | |
8608 | * current stack (which is the irq stack already after all) | |
8609 | */ | |
8610 | - if (curctx != irqctx) { | |
8611 | - int arg1, arg2, bx; | |
8612 | - | |
8613 | - /* build the stack frame on the IRQ stack */ | |
8614 | - isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); | |
8615 | - irqctx->tinfo.task = curctx->tinfo.task; | |
8616 | - irqctx->tinfo.previous_esp = current_stack_pointer; | |
8617 | + if (unlikely(curctx == irqctx)) | |
8618 | + return 0; | |
8619 | ||
8620 | - /* | |
8621 | - * Copy the softirq bits in preempt_count so that the | |
8622 | - * softirq checks work in the hardirq context. | |
8623 | - */ | |
8624 | - irqctx->tinfo.preempt_count = | |
8625 | - (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) | | |
8626 | - (curctx->tinfo.preempt_count & SOFTIRQ_MASK); | |
8627 | - | |
8628 | - asm volatile( | |
8629 | - " xchgl %%ebx,%%esp \n" | |
8630 | - " call *%%edi \n" | |
8631 | - " movl %%ebx,%%esp \n" | |
8632 | - : "=a" (arg1), "=d" (arg2), "=b" (bx) | |
8633 | - : "0" (irq), "1" (desc), "2" (isp), | |
8634 | - "D" (desc->handle_irq) | |
8635 | - : "memory", "cc", "ecx" | |
8636 | - ); | |
8637 | - } else | |
8638 | -#endif | |
8639 | - desc->handle_irq(irq, desc); | |
8640 | + /* build the stack frame on the IRQ stack */ | |
8641 | + isp = (u32 *) ((char*)irqctx + sizeof(*irqctx)); | |
8642 | + irqctx->tinfo.task = curctx->tinfo.task; | |
8643 | + irqctx->tinfo.previous_esp = current_stack_pointer; | |
8644 | ||
8645 | - /*irq_exit();*/ | |
8646 | - set_irq_regs(old_regs); | |
8647 | + /* | |
8648 | + * Copy the softirq bits in preempt_count so that the | |
8649 | + * softirq checks work in the hardirq context. | |
8650 | + */ | |
8651 | + irqctx->tinfo.preempt_count = | |
8652 | + (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) | | |
8653 | + (curctx->tinfo.preempt_count & SOFTIRQ_MASK); | |
8654 | + | |
8655 | + if (unlikely(overflow)) | |
8656 | + call_on_stack(print_stack_overflow, isp); | |
8657 | + | |
8658 | + asm volatile("xchgl %%ebx,%%esp \n" | |
8659 | + "call *%%edi \n" | |
8660 | + "movl %%ebx,%%esp \n" | |
8661 | + : "=a" (arg1), "=d" (arg2), "=b" (isp) | |
8662 | + : "0" (irq), "1" (desc), "2" (isp), | |
8663 | + "D" (desc->handle_irq) | |
8664 | + : "memory", "cc", "ecx"); | |
8665 | return 1; | |
8666 | } | |
8667 | ||
8668 | -#ifdef CONFIG_4KSTACKS | |
8669 | - | |
8670 | -static char softirq_stack[NR_CPUS * THREAD_SIZE] | |
8671 | - __attribute__((__section__(".bss.page_aligned"))); | |
8672 | - | |
8673 | -static char hardirq_stack[NR_CPUS * THREAD_SIZE] | |
8674 | - __attribute__((__section__(".bss.page_aligned"))); | |
8675 | - | |
8676 | /* | |
8677 | * allocate per-cpu stacks for hardirq and for softirq processing | |
8678 | */ | |
8679 | -void irq_ctx_init(int cpu) | |
8680 | +void __cpuinit irq_ctx_init(int cpu) | |
8681 | { | |
8682 | union irq_ctx *irqctx; | |
8683 | ||
8684 | @@ -164,25 +152,25 @@ void irq_ctx_init(int cpu) | |
8685 | return; | |
8686 | ||
8687 | irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE]; | |
8688 | - irqctx->tinfo.task = NULL; | |
8689 | - irqctx->tinfo.exec_domain = NULL; | |
8690 | - irqctx->tinfo.cpu = cpu; | |
8691 | - irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; | |
8692 | - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); | |
8693 | + irqctx->tinfo.task = NULL; | |
8694 | + irqctx->tinfo.exec_domain = NULL; | |
8695 | + irqctx->tinfo.cpu = cpu; | |
8696 | + irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; | |
8697 | + irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); | |
8698 | ||
8699 | hardirq_ctx[cpu] = irqctx; | |
8700 | ||
8701 | irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE]; | |
8702 | - irqctx->tinfo.task = NULL; | |
8703 | - irqctx->tinfo.exec_domain = NULL; | |
8704 | - irqctx->tinfo.cpu = cpu; | |
8705 | - irqctx->tinfo.preempt_count = 0; | |
8706 | - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); | |
8707 | + irqctx->tinfo.task = NULL; | |
8708 | + irqctx->tinfo.exec_domain = NULL; | |
8709 | + irqctx->tinfo.cpu = cpu; | |
8710 | + irqctx->tinfo.preempt_count = 0; | |
8711 | + irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); | |
8712 | ||
8713 | softirq_ctx[cpu] = irqctx; | |
8714 | ||
8715 | - printk("CPU %u irqstacks, hard=%p soft=%p\n", | |
8716 | - cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); | |
8717 | + printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", | |
8718 | + cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); | |
8719 | } | |
8720 | ||
8721 | void irq_ctx_exit(int cpu) | |
8722 | @@ -211,25 +199,56 @@ asmlinkage void do_softirq(void) | |
8723 | /* build the stack frame on the softirq stack */ | |
8724 | isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); | |
8725 | ||
8726 | - asm volatile( | |
8727 | - " xchgl %%ebx,%%esp \n" | |
8728 | - " call __do_softirq \n" | |
8729 | - " movl %%ebx,%%esp \n" | |
8730 | - : "=b"(isp) | |
8731 | - : "0"(isp) | |
8732 | - : "memory", "cc", "edx", "ecx", "eax" | |
8733 | - ); | |
8734 | + call_on_stack(__do_softirq, isp); | |
8735 | /* | |
8736 | * Shouldnt happen, we returned above if in_interrupt(): | |
8737 | - */ | |
8738 | + */ | |
8739 | WARN_ON_ONCE(softirq_count()); | |
8740 | } | |
8741 | ||
8742 | local_irq_restore(flags); | |
8743 | } | |
8744 | + | |
8745 | +#else | |
8746 | +static inline int | |
8747 | +execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; } | |
8748 | #endif | |
8749 | ||
8750 | /* | |
8751 | + * do_IRQ handles all normal device IRQ's (the special | |
8752 | + * SMP cross-CPU interrupts have their own specific | |
8753 | + * handlers). | |
8754 | + */ | |
8755 | +unsigned int do_IRQ(struct pt_regs *regs) | |
8756 | +{ | |
8757 | + struct pt_regs *old_regs; | |
8758 | + /* high bit used in ret_from_ code */ | |
8759 | + int overflow, irq = ~regs->orig_ax; | |
8760 | + struct irq_desc *desc = irq_desc + irq; | |
8761 | + | |
8762 | + if (unlikely((unsigned)irq >= NR_IRQS)) { | |
8763 | + printk(KERN_EMERG "%s: cannot handle IRQ %d\n", | |
8764 | + __func__, irq); | |
8765 | + BUG(); | |
8766 | + } | |
8767 | + | |
8768 | + old_regs = set_irq_regs(regs); | |
8769 | + /*irq_enter();*/ | |
8770 | + | |
8771 | + overflow = check_stack_overflow(); | |
8772 | + | |
8773 | + if (!execute_on_irq_stack(overflow, desc, irq)) { | |
8774 | + if (unlikely(overflow)) | |
8775 | + print_stack_overflow(); | |
8776 | + desc->handle_irq(irq, desc); | |
8777 | + } | |
8778 | + | |
8779 | + /*irq_exit();*/ | |
8780 | + set_irq_regs(old_regs); | |
8781 | + return 1; | |
8782 | +} | |
8783 | + | |
8784 | +/* | |
8785 | * Interrupt statistics: | |
8786 | */ | |
8787 | ||
8788 | @@ -337,6 +356,42 @@ skip: | |
8789 | return 0; | |
8790 | } | |
8791 | ||
8792 | +/* | |
8793 | + * /proc/stat helpers | |
8794 | + */ | |
8795 | +u64 arch_irq_stat_cpu(unsigned int cpu) | |
8796 | +{ | |
8797 | + u64 sum = nmi_count(cpu); | |
8798 | + | |
8799 | +#ifdef CONFIG_X86_LOCAL_APIC | |
8800 | + sum += per_cpu(irq_stat, cpu).apic_timer_irqs; | |
8801 | +#endif | |
8802 | +#ifdef CONFIG_SMP | |
8803 | + sum += per_cpu(irq_stat, cpu).irq_resched_count; | |
8804 | + sum += per_cpu(irq_stat, cpu).irq_call_count; | |
8805 | +#ifndef CONFIG_XEN | |
8806 | + sum += per_cpu(irq_stat, cpu).irq_tlb_count; | |
8807 | +#endif | |
8808 | +#endif | |
8809 | +#ifdef CONFIG_X86_MCE | |
8810 | + sum += per_cpu(irq_stat, cpu).irq_thermal_count; | |
8811 | +#endif | |
8812 | +#ifdef CONFIG_X86_LOCAL_APIC | |
8813 | + sum += per_cpu(irq_stat, cpu).irq_spurious_count; | |
8814 | +#endif | |
8815 | + return sum; | |
8816 | +} | |
8817 | + | |
8818 | +u64 arch_irq_stat(void) | |
8819 | +{ | |
8820 | + u64 sum = atomic_read(&irq_err_count); | |
8821 | + | |
8822 | +#ifdef CONFIG_X86_IO_APIC | |
8823 | + sum += atomic_read(&irq_mis_count); | |
8824 | +#endif | |
8825 | + return sum; | |
8826 | +} | |
8827 | + | |
8828 | #ifdef CONFIG_HOTPLUG_CPU | |
8829 | ||
8830 | void fixup_irqs(cpumask_t map) | |
82094b55 AF |
8831 | --- sle11-2009-10-16.orig/arch/x86/kernel/irq_64-xen.c 2009-03-16 16:33:40.000000000 +0100 |
8832 | +++ sle11-2009-10-16/arch/x86/kernel/irq_64-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
8833 | @@ -163,6 +163,34 @@ skip: |
8834 | } | |
8835 | ||
8836 | /* | |
8837 | + * /proc/stat helpers | |
8838 | + */ | |
8839 | +u64 arch_irq_stat_cpu(unsigned int cpu) | |
8840 | +{ | |
8841 | + u64 sum = cpu_pda(cpu)->__nmi_count; | |
8842 | + | |
8843 | + sum += cpu_pda(cpu)->apic_timer_irqs; | |
8844 | +#ifdef CONFIG_SMP | |
8845 | + sum += cpu_pda(cpu)->irq_resched_count; | |
8846 | + sum += cpu_pda(cpu)->irq_call_count; | |
8847 | +#ifndef CONFIG_XEN | |
8848 | + sum += cpu_pda(cpu)->irq_tlb_count; | |
8849 | +#endif | |
8850 | +#endif | |
8851 | +#ifdef CONFIG_X86_MCE | |
8852 | + sum += cpu_pda(cpu)->irq_thermal_count; | |
8853 | + sum += cpu_pda(cpu)->irq_threshold_count; | |
8854 | +#endif | |
8855 | + sum += cpu_pda(cpu)->irq_spurious_count; | |
8856 | + return sum; | |
8857 | +} | |
8858 | + | |
8859 | +u64 arch_irq_stat(void) | |
8860 | +{ | |
8861 | + return atomic_read(&irq_err_count); | |
8862 | +} | |
8863 | + | |
8864 | +/* | |
8865 | * do_IRQ handles all normal device IRQ's (the special | |
8866 | * SMP cross-CPU interrupts have their own specific | |
8867 | * handlers). | |
82094b55 AF |
8868 | --- sle11-2009-10-16.orig/arch/x86/kernel/ldt-xen.c 2009-03-16 16:33:40.000000000 +0100 |
8869 | +++ sle11-2009-10-16/arch/x86/kernel/ldt-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
8870 | @@ -20,9 +20,9 @@ |
8871 | #include <asm/mmu_context.h> | |
8872 | ||
8873 | #ifdef CONFIG_SMP | |
8874 | -static void flush_ldt(void *null) | |
8875 | +static void flush_ldt(void *current_mm) | |
8876 | { | |
8877 | - if (current->active_mm) | |
8878 | + if (current->active_mm == current_mm) | |
8879 | load_LDT(¤t->active_mm->context); | |
8880 | } | |
8881 | #endif | |
8882 | @@ -62,8 +62,6 @@ static int alloc_ldt(mm_context_t *pc, i | |
8883 | ||
8884 | if (reload) { | |
8885 | #ifdef CONFIG_SMP | |
8886 | - cpumask_t mask; | |
8887 | - | |
8888 | preempt_disable(); | |
8889 | #endif | |
8890 | make_pages_readonly(newldt, | |
8891 | @@ -71,9 +69,9 @@ static int alloc_ldt(mm_context_t *pc, i | |
8892 | XENFEAT_writable_descriptor_tables); | |
8893 | load_LDT(pc); | |
8894 | #ifdef CONFIG_SMP | |
8895 | - mask = cpumask_of_cpu(smp_processor_id()); | |
8896 | - if (!cpus_equal(current->mm->cpu_vm_mask, mask)) | |
8897 | - smp_call_function(flush_ldt, NULL, 1, 1); | |
8898 | + if (!cpus_equal(current->mm->cpu_vm_mask, | |
8899 | + cpumask_of_cpu(smp_processor_id()))) | |
8900 | + smp_call_function(flush_ldt, current->mm, 1); | |
8901 | preempt_enable(); | |
8902 | #endif | |
8903 | } | |
82094b55 AF |
8904 | --- sle11-2009-10-16.orig/arch/x86/kernel/machine_kexec_32.c 2008-11-25 12:35:53.000000000 +0100 |
8905 | +++ sle11-2009-10-16/arch/x86/kernel/machine_kexec_32.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
8906 | @@ -68,6 +68,8 @@ void machine_kexec_setup_load_arg(xen_ke |
8907 | xki->page_list[PA_PTE_0] = __ma(kexec_pte0); | |
8908 | xki->page_list[PA_PTE_1] = __ma(kexec_pte1); | |
8909 | ||
8910 | + if (image->type == KEXEC_TYPE_DEFAULT) | |
8911 | + xki->page_list[PA_SWAP_PAGE] = page_to_phys(image->swap_page); | |
8912 | } | |
8913 | ||
8914 | int __init machine_kexec_setup_resources(struct resource *hypervisor, | |
82094b55 AF |
8915 | --- sle11-2009-10-16.orig/arch/x86/kernel/microcode-xen.c 2009-03-16 16:38:05.000000000 +0100 |
8916 | +++ sle11-2009-10-16/arch/x86/kernel/microcode-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
8917 | @@ -5,13 +5,14 @@ |
8918 | * 2006 Shaohua Li <shaohua.li@intel.com> | |
8919 | * | |
8920 | * This driver allows to upgrade microcode on Intel processors | |
8921 | - * belonging to IA-32 family - PentiumPro, Pentium II, | |
8922 | + * belonging to IA-32 family - PentiumPro, Pentium II, | |
8923 | * Pentium III, Xeon, Pentium 4, etc. | |
8924 | * | |
8925 | - * Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual, | |
8926 | - * Order Number 245472 or free download from: | |
8927 | - * | |
8928 | - * http://developer.intel.com/design/pentium4/manuals/245472.htm | |
8929 | + * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture | |
8930 | + * Software Developer's Manual | |
8931 | + * Order Number 253668 or free download from: | |
8932 | + * | |
8933 | + * http://developer.intel.com/design/pentium4/manuals/253668.htm | |
8934 | * | |
8935 | * For more information, go to http://www.urbanmyth.org/microcode | |
8936 | * | |
8937 | @@ -26,6 +27,7 @@ | |
8938 | #include <linux/kernel.h> | |
8939 | #include <linux/init.h> | |
8940 | #include <linux/sched.h> | |
8941 | +#include <linux/smp_lock.h> | |
8942 | #include <linux/cpumask.h> | |
8943 | #include <linux/module.h> | |
8944 | #include <linux/slab.h> | |
8945 | @@ -86,6 +88,7 @@ static int do_microcode_update (const vo | |
8946 | ||
8947 | static int microcode_open (struct inode *unused1, struct file *unused2) | |
8948 | { | |
8949 | + cycle_kernel_lock(); | |
8950 | return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; | |
8951 | } | |
8952 | ||
8953 | @@ -162,7 +165,7 @@ static int request_microcode(void) | |
8954 | c->x86, c->x86_model, c->x86_mask); | |
8955 | error = request_firmware(&firmware, name, µcode_pdev->dev); | |
8956 | if (error) { | |
8957 | - pr_debug("microcode: ucode data file %s load failed\n", name); | |
8958 | + pr_debug("microcode: data file %s load failed\n", name); | |
8959 | return error; | |
8960 | } | |
8961 | ||
8962 | @@ -183,6 +186,9 @@ static int __init microcode_init (void) | |
8963 | { | |
8964 | int error; | |
8965 | ||
8966 | + printk(KERN_INFO | |
8967 | + "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n"); | |
8968 | + | |
8969 | error = microcode_dev_init(); | |
8970 | if (error) | |
8971 | return error; | |
8972 | @@ -195,8 +201,6 @@ static int __init microcode_init (void) | |
8973 | ||
8974 | request_microcode(); | |
8975 | ||
8976 | - printk(KERN_INFO | |
8977 | - "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n"); | |
8978 | return 0; | |
8979 | } | |
8980 | ||
82094b55 AF |
8981 | --- sle11-2009-10-16.orig/arch/x86/kernel/mpparse-xen.c 2009-03-16 16:38:05.000000000 +0100 |
8982 | +++ sle11-2009-10-16/arch/x86/kernel/mpparse-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
8983 | @@ -25,6 +25,9 @@ |
8984 | #include <asm/proto.h> | |
8985 | #include <asm/acpi.h> | |
8986 | #include <asm/bios_ebda.h> | |
8987 | +#include <asm/e820.h> | |
8988 | +#include <asm/trampoline.h> | |
8989 | +#include <asm/setup.h> | |
8990 | ||
8991 | #include <mach_apic.h> | |
8992 | #ifdef CONFIG_X86_32 | |
8993 | @@ -32,27 +35,10 @@ | |
8994 | #include <mach_mpparse.h> | |
8995 | #endif | |
8996 | ||
8997 | -/* Have we found an MP table */ | |
8998 | -int smp_found_config; | |
8999 | - | |
9000 | -/* | |
9001 | - * Various Linux-internal data structures created from the | |
9002 | - * MP-table. | |
9003 | - */ | |
9004 | -#if defined (CONFIG_MCA) || defined (CONFIG_EISA) | |
9005 | -int mp_bus_id_to_type[MAX_MP_BUSSES]; | |
9006 | -#endif | |
9007 | - | |
9008 | -DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); | |
9009 | -int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 }; | |
9010 | - | |
9011 | -static int mp_current_pci_id; | |
9012 | - | |
9013 | -int pic_mode; | |
9014 | - | |
9015 | -/* | |
9016 | - * Intel MP BIOS table parsing routines: | |
9017 | - */ | |
9018 | +static void *_bus_to_virt(unsigned long ma) | |
9019 | +{ | |
9020 | + return is_ISA_range(ma, ma) ? isa_bus_to_virt(ma) : bus_to_virt(ma); | |
9021 | +} | |
9022 | ||
9023 | /* | |
9024 | * Checksum an MP configuration block. | |
9025 | @@ -68,19 +54,7 @@ static int __init mpf_checksum(unsigned | |
9026 | return sum & 0xFF; | |
9027 | } | |
9028 | ||
9029 | -#ifdef CONFIG_X86_NUMAQ | |
9030 | -/* | |
9031 | - * Have to match translation table entries to main table entries by counter | |
9032 | - * hence the mpc_record variable .... can't see a less disgusting way of | |
9033 | - * doing this .... | |
9034 | - */ | |
9035 | - | |
9036 | -static int mpc_record; | |
9037 | -static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] | |
9038 | - __cpuinitdata; | |
9039 | -#endif | |
9040 | - | |
9041 | -static void __cpuinit MP_processor_info(struct mpc_config_processor *m) | |
9042 | +static void __init MP_processor_info(struct mpc_config_processor *m) | |
9043 | { | |
9044 | #ifndef CONFIG_XEN | |
9045 | int apicid; | |
9046 | @@ -90,11 +64,12 @@ static void __cpuinit MP_processor_info( | |
9047 | disabled_cpus++; | |
9048 | return; | |
9049 | } | |
9050 | -#ifdef CONFIG_X86_NUMAQ | |
9051 | - apicid = mpc_apic_id(m, translation_table[mpc_record]); | |
9052 | -#else | |
9053 | - apicid = m->mpc_apicid; | |
9054 | -#endif | |
9055 | + | |
9056 | + if (x86_quirks->mpc_apic_id) | |
9057 | + apicid = x86_quirks->mpc_apic_id(m); | |
9058 | + else | |
9059 | + apicid = m->mpc_apicid; | |
9060 | + | |
9061 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { | |
9062 | bootup_cpu = " (Bootup-CPU)"; | |
9063 | boot_cpu_physical_apicid = m->mpc_apicid; | |
9064 | @@ -107,18 +82,17 @@ static void __cpuinit MP_processor_info( | |
9065 | #endif | |
9066 | } | |
9067 | ||
9068 | +#ifdef CONFIG_X86_IO_APIC | |
9069 | static void __init MP_bus_info(struct mpc_config_bus *m) | |
9070 | { | |
9071 | char str[7]; | |
9072 | - | |
9073 | memcpy(str, m->mpc_bustype, 6); | |
9074 | str[6] = 0; | |
9075 | ||
9076 | -#ifdef CONFIG_X86_NUMAQ | |
9077 | - mpc_oem_bus_info(m, str, translation_table[mpc_record]); | |
9078 | -#else | |
9079 | - Dprintk("Bus #%d is %s\n", m->mpc_busid, str); | |
9080 | -#endif | |
9081 | + if (x86_quirks->mpc_oem_bus_info) | |
9082 | + x86_quirks->mpc_oem_bus_info(m, str); | |
9083 | + else | |
9084 | + apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str); | |
9085 | ||
9086 | #if MAX_MP_BUSSES < 256 | |
9087 | if (m->mpc_busid >= MAX_MP_BUSSES) { | |
9088 | @@ -135,12 +109,10 @@ static void __init MP_bus_info(struct mp | |
9089 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; | |
9090 | #endif | |
9091 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { | |
9092 | -#ifdef CONFIG_X86_NUMAQ | |
9093 | - mpc_oem_pci_bus(m, translation_table[mpc_record]); | |
9094 | -#endif | |
9095 | + if (x86_quirks->mpc_oem_pci_bus) | |
9096 | + x86_quirks->mpc_oem_pci_bus(m); | |
9097 | + | |
9098 | clear_bit(m->mpc_busid, mp_bus_not_pci); | |
9099 | - mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; | |
9100 | - mp_current_pci_id++; | |
9101 | #if defined(CONFIG_EISA) || defined (CONFIG_MCA) | |
9102 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; | |
9103 | } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { | |
9104 | @@ -151,6 +123,7 @@ static void __init MP_bus_info(struct mp | |
9105 | } else | |
9106 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); | |
9107 | } | |
9108 | +#endif | |
9109 | ||
9110 | #ifdef CONFIG_X86_IO_APIC | |
9111 | ||
9112 | @@ -180,117 +153,111 @@ static void __init MP_ioapic_info(struct | |
9113 | if (bad_ioapic(m->mpc_apicaddr)) | |
9114 | return; | |
9115 | ||
9116 | - mp_ioapics[nr_ioapics] = *m; | |
9117 | + mp_ioapics[nr_ioapics].mp_apicaddr = m->mpc_apicaddr; | |
9118 | + mp_ioapics[nr_ioapics].mp_apicid = m->mpc_apicid; | |
9119 | + mp_ioapics[nr_ioapics].mp_type = m->mpc_type; | |
9120 | + mp_ioapics[nr_ioapics].mp_apicver = m->mpc_apicver; | |
9121 | + mp_ioapics[nr_ioapics].mp_flags = m->mpc_flags; | |
9122 | nr_ioapics++; | |
9123 | } | |
9124 | ||
9125 | -static void __init MP_intsrc_info(struct mpc_config_intsrc *m) | |
9126 | +static void print_MP_intsrc_info(struct mpc_config_intsrc *m) | |
9127 | { | |
9128 | - mp_irqs[mp_irq_entries] = *m; | |
9129 | - Dprintk("Int: type %d, pol %d, trig %d, bus %d," | |
9130 | + apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," | |
9131 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", | |
9132 | m->mpc_irqtype, m->mpc_irqflag & 3, | |
9133 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, | |
9134 | m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); | |
9135 | - if (++mp_irq_entries == MAX_IRQ_SOURCES) | |
9136 | - panic("Max # of irq sources exceeded!!\n"); | |
9137 | } | |
9138 | ||
9139 | -#endif | |
9140 | - | |
9141 | -static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) | |
9142 | +static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) | |
9143 | { | |
9144 | - Dprintk("Lint: type %d, pol %d, trig %d, bus %d," | |
9145 | - " IRQ %02x, APIC ID %x, APIC LINT %02x\n", | |
9146 | - m->mpc_irqtype, m->mpc_irqflag & 3, | |
9147 | - (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, | |
9148 | - m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); | |
9149 | + apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," | |
9150 | + " IRQ %02x, APIC ID %x, APIC INT %02x\n", | |
9151 | + mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, | |
9152 | + (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, | |
9153 | + mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); | |
9154 | } | |
9155 | ||
9156 | -#ifdef CONFIG_X86_NUMAQ | |
9157 | -static void __init MP_translation_info(struct mpc_config_translation *m) | |
9158 | +static void __init assign_to_mp_irq(struct mpc_config_intsrc *m, | |
9159 | + struct mp_config_intsrc *mp_irq) | |
9160 | { | |
9161 | - printk(KERN_INFO | |
9162 | - "Translation: record %d, type %d, quad %d, global %d, local %d\n", | |
9163 | - mpc_record, m->trans_type, m->trans_quad, m->trans_global, | |
9164 | - m->trans_local); | |
9165 | + mp_irq->mp_dstapic = m->mpc_dstapic; | |
9166 | + mp_irq->mp_type = m->mpc_type; | |
9167 | + mp_irq->mp_irqtype = m->mpc_irqtype; | |
9168 | + mp_irq->mp_irqflag = m->mpc_irqflag; | |
9169 | + mp_irq->mp_srcbus = m->mpc_srcbus; | |
9170 | + mp_irq->mp_srcbusirq = m->mpc_srcbusirq; | |
9171 | + mp_irq->mp_dstirq = m->mpc_dstirq; | |
9172 | +} | |
9173 | ||
9174 | - if (mpc_record >= MAX_MPC_ENTRY) | |
9175 | - printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); | |
9176 | - else | |
9177 | - translation_table[mpc_record] = m; /* stash this for later */ | |
9178 | - if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) | |
9179 | - node_set_online(m->trans_quad); | |
9180 | +static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, | |
9181 | + struct mpc_config_intsrc *m) | |
9182 | +{ | |
9183 | + m->mpc_dstapic = mp_irq->mp_dstapic; | |
9184 | + m->mpc_type = mp_irq->mp_type; | |
9185 | + m->mpc_irqtype = mp_irq->mp_irqtype; | |
9186 | + m->mpc_irqflag = mp_irq->mp_irqflag; | |
9187 | + m->mpc_srcbus = mp_irq->mp_srcbus; | |
9188 | + m->mpc_srcbusirq = mp_irq->mp_srcbusirq; | |
9189 | + m->mpc_dstirq = mp_irq->mp_dstirq; | |
9190 | } | |
9191 | ||
9192 | -/* | |
9193 | - * Read/parse the MPC oem tables | |
9194 | - */ | |
9195 | +static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, | |
9196 | + struct mpc_config_intsrc *m) | |
9197 | +{ | |
9198 | + if (mp_irq->mp_dstapic != m->mpc_dstapic) | |
9199 | + return 1; | |
9200 | + if (mp_irq->mp_type != m->mpc_type) | |
9201 | + return 2; | |
9202 | + if (mp_irq->mp_irqtype != m->mpc_irqtype) | |
9203 | + return 3; | |
9204 | + if (mp_irq->mp_irqflag != m->mpc_irqflag) | |
9205 | + return 4; | |
9206 | + if (mp_irq->mp_srcbus != m->mpc_srcbus) | |
9207 | + return 5; | |
9208 | + if (mp_irq->mp_srcbusirq != m->mpc_srcbusirq) | |
9209 | + return 6; | |
9210 | + if (mp_irq->mp_dstirq != m->mpc_dstirq) | |
9211 | + return 7; | |
9212 | ||
9213 | -static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, | |
9214 | - unsigned short oemsize) | |
9215 | + return 0; | |
9216 | +} | |
9217 | + | |
9218 | +static void __init MP_intsrc_info(struct mpc_config_intsrc *m) | |
9219 | { | |
9220 | - int count = sizeof(*oemtable); /* the header size */ | |
9221 | - unsigned char *oemptr = ((unsigned char *)oemtable) + count; | |
9222 | + int i; | |
9223 | ||
9224 | - mpc_record = 0; | |
9225 | - printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", | |
9226 | - oemtable); | |
9227 | - if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) { | |
9228 | - printk(KERN_WARNING | |
9229 | - "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", | |
9230 | - oemtable->oem_signature[0], oemtable->oem_signature[1], | |
9231 | - oemtable->oem_signature[2], oemtable->oem_signature[3]); | |
9232 | - return; | |
9233 | - } | |
9234 | - if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) { | |
9235 | - printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); | |
9236 | - return; | |
9237 | - } | |
9238 | - while (count < oemtable->oem_length) { | |
9239 | - switch (*oemptr) { | |
9240 | - case MP_TRANSLATION: | |
9241 | - { | |
9242 | - struct mpc_config_translation *m = | |
9243 | - (struct mpc_config_translation *)oemptr; | |
9244 | - MP_translation_info(m); | |
9245 | - oemptr += sizeof(*m); | |
9246 | - count += sizeof(*m); | |
9247 | - ++mpc_record; | |
9248 | - break; | |
9249 | - } | |
9250 | - default: | |
9251 | - { | |
9252 | - printk(KERN_WARNING | |
9253 | - "Unrecognised OEM table entry type! - %d\n", | |
9254 | - (int)*oemptr); | |
9255 | - return; | |
9256 | - } | |
9257 | - } | |
9258 | + print_MP_intsrc_info(m); | |
9259 | + | |
9260 | + for (i = 0; i < mp_irq_entries; i++) { | |
9261 | + if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m)) | |
9262 | + return; | |
9263 | } | |
9264 | + | |
9265 | + assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]); | |
9266 | + if (++mp_irq_entries == MAX_IRQ_SOURCES) | |
9267 | + panic("Max # of irq sources exceeded!!\n"); | |
9268 | } | |
9269 | ||
9270 | -static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, | |
9271 | - char *productid) | |
9272 | +#endif | |
9273 | + | |
9274 | +static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) | |
9275 | { | |
9276 | - if (strncmp(oem, "IBM NUMA", 8)) | |
9277 | - printk("Warning! May not be a NUMA-Q system!\n"); | |
9278 | - if (mpc->mpc_oemptr) | |
9279 | - smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr, | |
9280 | - mpc->mpc_oemsize); | |
9281 | + apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x," | |
9282 | + " IRQ %02x, APIC ID %x, APIC LINT %02x\n", | |
9283 | + m->mpc_irqtype, m->mpc_irqflag & 3, | |
9284 | + (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, | |
9285 | + m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); | |
9286 | } | |
9287 | -#endif /* CONFIG_X86_NUMAQ */ | |
9288 | ||
9289 | /* | |
9290 | * Read/parse the MPC | |
9291 | */ | |
9292 | ||
9293 | -static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |
9294 | +static int __init smp_check_mpc(struct mp_config_table *mpc, char *oem, | |
9295 | + char *str) | |
9296 | { | |
9297 | - char str[16]; | |
9298 | - char oem[10]; | |
9299 | - int count = sizeof(*mpc); | |
9300 | - unsigned char *mpt = ((unsigned char *)mpc) + count; | |
9301 | ||
9302 | if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) { | |
9303 | printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", | |
9304 | @@ -313,19 +280,41 @@ static int __init smp_read_mpc(struct mp | |
9305 | } | |
9306 | memcpy(oem, mpc->mpc_oem, 8); | |
9307 | oem[8] = 0; | |
9308 | - printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem); | |
9309 | + printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem); | |
9310 | ||
9311 | memcpy(str, mpc->mpc_productid, 12); | |
9312 | str[12] = 0; | |
9313 | - printk("Product ID: %s ", str); | |
9314 | ||
9315 | -#ifdef CONFIG_X86_32 | |
9316 | - mps_oem_check(mpc, oem, str); | |
9317 | -#endif | |
9318 | - printk(KERN_INFO "MPTABLE: Product ID: %s ", str); | |
9319 | + printk(KERN_INFO "MPTABLE: Product ID: %s\n", str); | |
9320 | ||
9321 | printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic); | |
9322 | ||
9323 | + return 1; | |
9324 | +} | |
9325 | + | |
9326 | +static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |
9327 | +{ | |
9328 | + char str[16]; | |
9329 | + char oem[10]; | |
9330 | + | |
9331 | + int count = sizeof(*mpc); | |
9332 | + unsigned char *mpt = ((unsigned char *)mpc) + count; | |
9333 | + | |
9334 | + if (!smp_check_mpc(mpc, oem, str)) | |
9335 | + return 0; | |
9336 | + | |
9337 | +#ifdef CONFIG_X86_32 | |
9338 | + /* | |
9339 | + * need to make sure summit and es7000's mps_oem_check is safe to be | |
9340 | + * called early via genericarch 's mps_oem_check | |
9341 | + */ | |
9342 | + if (early) { | |
9343 | +#ifdef CONFIG_X86_NUMAQ | |
9344 | + numaq_mps_oem_check(mpc, oem, str); | |
9345 | +#endif | |
9346 | + } else | |
9347 | + mps_oem_check(mpc, oem, str); | |
9348 | +#endif | |
9349 | /* save the local APIC address, it might be non-default */ | |
9350 | if (!acpi_lapic) | |
9351 | mp_lapic_addr = mpc->mpc_lapic; | |
9352 | @@ -333,12 +322,17 @@ static int __init smp_read_mpc(struct mp | |
9353 | if (early) | |
9354 | return 1; | |
9355 | ||
9356 | + if (mpc->mpc_oemptr && x86_quirks->smp_read_mpc_oem) { | |
9357 | + struct mp_config_oemtable *oem_table = (struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr; | |
9358 | + x86_quirks->smp_read_mpc_oem(oem_table, mpc->mpc_oemsize); | |
9359 | + } | |
9360 | + | |
9361 | /* | |
9362 | * Now process the configuration blocks. | |
9363 | */ | |
9364 | -#ifdef CONFIG_X86_NUMAQ | |
9365 | - mpc_record = 0; | |
9366 | -#endif | |
9367 | + if (x86_quirks->mpc_record) | |
9368 | + *x86_quirks->mpc_record = 0; | |
9369 | + | |
9370 | while (count < mpc->mpc_length) { | |
9371 | switch (*mpt) { | |
9372 | case MP_PROCESSOR: | |
9373 | @@ -356,7 +350,9 @@ static int __init smp_read_mpc(struct mp | |
9374 | { | |
9375 | struct mpc_config_bus *m = | |
9376 | (struct mpc_config_bus *)mpt; | |
9377 | +#ifdef CONFIG_X86_IO_APIC | |
9378 | MP_bus_info(m); | |
9379 | +#endif | |
9380 | mpt += sizeof(*m); | |
9381 | count += sizeof(*m); | |
9382 | break; | |
9383 | @@ -402,10 +398,14 @@ static int __init smp_read_mpc(struct mp | |
9384 | count = mpc->mpc_length; | |
9385 | break; | |
9386 | } | |
9387 | -#ifdef CONFIG_X86_NUMAQ | |
9388 | - ++mpc_record; | |
9389 | -#endif | |
9390 | + if (x86_quirks->mpc_record) | |
9391 | + (*x86_quirks->mpc_record)++; | |
9392 | } | |
9393 | + | |
9394 | +#ifdef CONFIG_X86_GENERICARCH | |
9395 | + generic_bigsmp_probe(); | |
9396 | +#endif | |
9397 | + | |
9398 | setup_apic_routing(); | |
9399 | if (!num_processors) | |
9400 | printk(KERN_ERR "MPTABLE: no processors registered!\n"); | |
9401 | @@ -431,7 +431,7 @@ static void __init construct_default_ioi | |
9402 | intsrc.mpc_type = MP_INTSRC; | |
9403 | intsrc.mpc_irqflag = 0; /* conforming */ | |
9404 | intsrc.mpc_srcbus = 0; | |
9405 | - intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; | |
9406 | + intsrc.mpc_dstapic = mp_ioapics[0].mp_apicid; | |
9407 | ||
9408 | intsrc.mpc_irqtype = mp_INT; | |
9409 | ||
9410 | @@ -492,40 +492,11 @@ static void __init construct_default_ioi | |
9411 | MP_intsrc_info(&intsrc); | |
9412 | } | |
9413 | ||
9414 | -#endif | |
9415 | ||
9416 | -static inline void __init construct_default_ISA_mptable(int mpc_default_type) | |
9417 | +static void __init construct_ioapic_table(int mpc_default_type) | |
9418 | { | |
9419 | - struct mpc_config_processor processor; | |
9420 | - struct mpc_config_bus bus; | |
9421 | -#ifdef CONFIG_X86_IO_APIC | |
9422 | struct mpc_config_ioapic ioapic; | |
9423 | -#endif | |
9424 | - struct mpc_config_lintsrc lintsrc; | |
9425 | - int linttypes[2] = { mp_ExtINT, mp_NMI }; | |
9426 | - int i; | |
9427 | - | |
9428 | - /* | |
9429 | - * local APIC has default address | |
9430 | - */ | |
9431 | - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | |
9432 | - | |
9433 | - /* | |
9434 | - * 2 CPUs, numbered 0 & 1. | |
9435 | - */ | |
9436 | - processor.mpc_type = MP_PROCESSOR; | |
9437 | - /* Either an integrated APIC or a discrete 82489DX. */ | |
9438 | - processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; | |
9439 | - processor.mpc_cpuflag = CPU_ENABLED; | |
9440 | - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | | |
9441 | - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; | |
9442 | - processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; | |
9443 | - processor.mpc_reserved[0] = 0; | |
9444 | - processor.mpc_reserved[1] = 0; | |
9445 | - for (i = 0; i < 2; i++) { | |
9446 | - processor.mpc_apicid = i; | |
9447 | - MP_processor_info(&processor); | |
9448 | - } | |
9449 | + struct mpc_config_bus bus; | |
9450 | ||
9451 | bus.mpc_type = MP_BUS; | |
9452 | bus.mpc_busid = 0; | |
9453 | @@ -554,7 +525,6 @@ static inline void __init construct_defa | |
9454 | MP_bus_info(&bus); | |
9455 | } | |
9456 | ||
9457 | -#ifdef CONFIG_X86_IO_APIC | |
9458 | ioapic.mpc_type = MP_IOAPIC; | |
9459 | ioapic.mpc_apicid = 2; | |
9460 | ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; | |
9461 | @@ -566,7 +536,42 @@ static inline void __init construct_defa | |
9462 | * We set up most of the low 16 IO-APIC pins according to MPS rules. | |
9463 | */ | |
9464 | construct_default_ioirq_mptable(mpc_default_type); | |
9465 | +} | |
9466 | +#else | |
9467 | +static inline void __init construct_ioapic_table(int mpc_default_type) { } | |
9468 | #endif | |
9469 | + | |
9470 | +static inline void __init construct_default_ISA_mptable(int mpc_default_type) | |
9471 | +{ | |
9472 | + struct mpc_config_processor processor; | |
9473 | + struct mpc_config_lintsrc lintsrc; | |
9474 | + int linttypes[2] = { mp_ExtINT, mp_NMI }; | |
9475 | + int i; | |
9476 | + | |
9477 | + /* | |
9478 | + * local APIC has default address | |
9479 | + */ | |
9480 | + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | |
9481 | + | |
9482 | + /* | |
9483 | + * 2 CPUs, numbered 0 & 1. | |
9484 | + */ | |
9485 | + processor.mpc_type = MP_PROCESSOR; | |
9486 | + /* Either an integrated APIC or a discrete 82489DX. */ | |
9487 | + processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; | |
9488 | + processor.mpc_cpuflag = CPU_ENABLED; | |
9489 | + processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | | |
9490 | + (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; | |
9491 | + processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; | |
9492 | + processor.mpc_reserved[0] = 0; | |
9493 | + processor.mpc_reserved[1] = 0; | |
9494 | + for (i = 0; i < 2; i++) { | |
9495 | + processor.mpc_apicid = i; | |
9496 | + MP_processor_info(&processor); | |
9497 | + } | |
9498 | + | |
9499 | + construct_ioapic_table(mpc_default_type); | |
9500 | + | |
9501 | lintsrc.mpc_type = MP_LINTSRC; | |
9502 | lintsrc.mpc_irqflag = 0; /* conforming */ | |
9503 | lintsrc.mpc_srcbusid = 0; | |
9504 | @@ -584,10 +589,14 @@ static struct intel_mp_floating *mpf_fou | |
9505 | /* | |
9506 | * Scan the memory blocks for an SMP configuration block. | |
9507 | */ | |
9508 | -static void __init __get_smp_config(unsigned early) | |
9509 | +static void __init __get_smp_config(unsigned int early) | |
9510 | { | |
9511 | struct intel_mp_floating *mpf = mpf_found; | |
9512 | ||
9513 | + if (x86_quirks->mach_get_smp_config) { | |
9514 | + if (x86_quirks->mach_get_smp_config(early)) | |
9515 | + return; | |
9516 | + } | |
9517 | if (acpi_lapic && early) | |
9518 | return; | |
9519 | /* | |
9520 | @@ -604,7 +613,7 @@ static void __init __get_smp_config(unsi | |
9521 | ||
9522 | printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", | |
9523 | mpf->mpf_specification); | |
9524 | -#ifdef CONFIG_X86_32 | |
9525 | +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) && !defined(CONFIG_XEN) | |
9526 | if (mpf->mpf_feature2 & (1 << 7)) { | |
9527 | printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); | |
9528 | pic_mode = 1; | |
9529 | @@ -635,8 +644,10 @@ static void __init __get_smp_config(unsi | |
9530 | * Read the physical hardware table. Anything here will | |
9531 | * override the defaults. | |
9532 | */ | |
9533 | - if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr), early)) { | |
9534 | + if (!smp_read_mpc(_bus_to_virt(mpf->mpf_physptr), early)) { | |
9535 | +#ifdef CONFIG_X86_LOCAL_APIC | |
9536 | smp_found_config = 0; | |
9537 | +#endif | |
9538 | printk(KERN_ERR | |
9539 | "BIOS bug, MP table errors detected!...\n"); | |
9540 | printk(KERN_ERR "... disabling SMP support. " | |
9541 | @@ -690,10 +701,11 @@ void __init get_smp_config(void) | |
9542 | static int __init smp_scan_config(unsigned long base, unsigned long length, | |
9543 | unsigned reserve) | |
9544 | { | |
9545 | - unsigned int *bp = isa_bus_to_virt(base); | |
9546 | + unsigned int *bp = _bus_to_virt(base); | |
9547 | struct intel_mp_floating *mpf; | |
9548 | ||
9549 | - Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length); | |
9550 | + apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", | |
9551 | + bp, length); | |
9552 | BUILD_BUG_ON(sizeof(*mpf) != 16); | |
9553 | ||
9554 | while (length > 0) { | |
9555 | @@ -703,16 +715,22 @@ static int __init smp_scan_config(unsign | |
9556 | !mpf_checksum((unsigned char *)bp, 16) && | |
9557 | ((mpf->mpf_specification == 1) | |
9558 | || (mpf->mpf_specification == 4))) { | |
9559 | - | |
9560 | +#ifdef CONFIG_X86_LOCAL_APIC | |
9561 | smp_found_config = 1; | |
9562 | +#endif | |
9563 | mpf_found = mpf; | |
9564 | -#ifdef CONFIG_X86_32 | |
9565 | + | |
9566 | #ifndef CONFIG_XEN | |
9567 | printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", | |
9568 | mpf, virt_to_phys(mpf)); | |
9569 | - reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE, | |
9570 | + | |
9571 | + if (!reserve) | |
9572 | + return 1; | |
9573 | + reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, | |
9574 | BOOTMEM_DEFAULT); | |
9575 | if (mpf->mpf_physptr) { | |
9576 | + unsigned long size = PAGE_SIZE; | |
9577 | +#ifdef CONFIG_X86_32 | |
9578 | /* | |
9579 | * We cannot access to MPC table to compute | |
9580 | * table size yet, as only few megabytes from | |
9581 | @@ -722,27 +740,18 @@ static int __init smp_scan_config(unsign | |
9582 | * PAGE_SIZE from mpg->mpf_physptr yields BUG() | |
9583 | * in reserve_bootmem. | |
9584 | */ | |
9585 | - unsigned long size = PAGE_SIZE; | |
9586 | unsigned long end = max_low_pfn * PAGE_SIZE; | |
9587 | if (mpf->mpf_physptr + size > end) | |
9588 | size = end - mpf->mpf_physptr; | |
9589 | - reserve_bootmem(mpf->mpf_physptr, size, | |
9590 | +#endif | |
9591 | + reserve_bootmem_generic(mpf->mpf_physptr, size, | |
9592 | BOOTMEM_DEFAULT); | |
9593 | } | |
9594 | #else | |
9595 | printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", | |
9596 | - mpf, ((void *)bp - isa_bus_to_virt(base)) + base); | |
9597 | -#endif | |
9598 | -#elif !defined(CONFIG_XEN) | |
9599 | - if (!reserve) | |
9600 | - return 1; | |
9601 | - | |
9602 | - reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE); | |
9603 | - if (mpf->mpf_physptr) | |
9604 | - reserve_bootmem_generic(mpf->mpf_physptr, | |
9605 | - PAGE_SIZE); | |
9606 | + mpf, ((void *)bp - _bus_to_virt(base)) + base); | |
9607 | #endif | |
9608 | - return 1; | |
9609 | + return 1; | |
9610 | } | |
9611 | bp += 4; | |
9612 | length -= 16; | |
9613 | @@ -750,12 +759,16 @@ static int __init smp_scan_config(unsign | |
9614 | return 0; | |
9615 | } | |
9616 | ||
9617 | -static void __init __find_smp_config(unsigned reserve) | |
9618 | +static void __init __find_smp_config(unsigned int reserve) | |
9619 | { | |
9620 | #ifndef CONFIG_XEN | |
9621 | unsigned int address; | |
9622 | #endif | |
9623 | ||
9624 | + if (x86_quirks->mach_find_smp_config) { | |
9625 | + if (x86_quirks->mach_find_smp_config(reserve)) | |
9626 | + return; | |
9627 | + } | |
9628 | /* | |
9629 | * FIXME: Linux assumes you have 640K of base ram.. | |
9630 | * this continues the error... | |
9631 | @@ -802,300 +815,297 @@ void __init find_smp_config(void) | |
9632 | __find_smp_config(1); | |
9633 | } | |
9634 | ||
9635 | -/* -------------------------------------------------------------------------- | |
9636 | - ACPI-based MP Configuration | |
9637 | - -------------------------------------------------------------------------- */ | |
9638 | - | |
9639 | -/* | |
9640 | - * Keep this outside and initialized to 0, for !CONFIG_ACPI builds: | |
9641 | - */ | |
9642 | -int es7000_plat; | |
9643 | - | |
9644 | -#ifdef CONFIG_ACPI | |
9645 | +#ifdef CONFIG_X86_IO_APIC | |
9646 | +static u8 __initdata irq_used[MAX_IRQ_SOURCES]; | |
9647 | ||
9648 | -#ifdef CONFIG_X86_IO_APIC | |
9649 | +static int __init get_MP_intsrc_index(struct mpc_config_intsrc *m) | |
9650 | +{ | |
9651 | + int i; | |
9652 | ||
9653 | -#define MP_ISA_BUS 0 | |
9654 | + if (m->mpc_irqtype != mp_INT) | |
9655 | + return 0; | |
9656 | ||
9657 | -extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; | |
9658 | + if (m->mpc_irqflag != 0x0f) | |
9659 | + return 0; | |
9660 | ||
9661 | -static int mp_find_ioapic(int gsi) | |
9662 | -{ | |
9663 | - int i = 0; | |
9664 | + /* not legacy */ | |
9665 | ||
9666 | - /* Find the IOAPIC that manages this GSI. */ | |
9667 | - for (i = 0; i < nr_ioapics; i++) { | |
9668 | - if ((gsi >= mp_ioapic_routing[i].gsi_base) | |
9669 | - && (gsi <= mp_ioapic_routing[i].gsi_end)) | |
9670 | - return i; | |
9671 | + for (i = 0; i < mp_irq_entries; i++) { | |
9672 | + if (mp_irqs[i].mp_irqtype != mp_INT) | |
9673 | + continue; | |
9674 | + | |
9675 | + if (mp_irqs[i].mp_irqflag != 0x0f) | |
9676 | + continue; | |
9677 | + | |
9678 | + if (mp_irqs[i].mp_srcbus != m->mpc_srcbus) | |
9679 | + continue; | |
9680 | + if (mp_irqs[i].mp_srcbusirq != m->mpc_srcbusirq) | |
9681 | + continue; | |
9682 | + if (irq_used[i]) { | |
9683 | + /* already claimed */ | |
9684 | + return -2; | |
9685 | + } | |
9686 | + irq_used[i] = 1; | |
9687 | + return i; | |
9688 | } | |
9689 | ||
9690 | - printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); | |
9691 | + /* not found */ | |
9692 | return -1; | |
9693 | } | |
9694 | ||
9695 | -static u8 __init uniq_ioapic_id(u8 id) | |
9696 | -{ | |
9697 | -#ifdef CONFIG_X86_32 | |
9698 | - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && | |
9699 | - !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) | |
9700 | - return io_apic_get_unique_id(nr_ioapics, id); | |
9701 | - else | |
9702 | - return id; | |
9703 | -#else | |
9704 | - int i; | |
9705 | - DECLARE_BITMAP(used, 256); | |
9706 | - bitmap_zero(used, 256); | |
9707 | - for (i = 0; i < nr_ioapics; i++) { | |
9708 | - struct mpc_config_ioapic *ia = &mp_ioapics[i]; | |
9709 | - __set_bit(ia->mpc_apicid, used); | |
9710 | - } | |
9711 | - if (!test_bit(id, used)) | |
9712 | - return id; | |
9713 | - return find_first_zero_bit(used, 256); | |
9714 | +#define SPARE_SLOT_NUM 20 | |
9715 | + | |
9716 | +static struct mpc_config_intsrc __initdata *m_spare[SPARE_SLOT_NUM]; | |
9717 | #endif | |
9718 | -} | |
9719 | ||
9720 | -void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | |
9721 | +static int __init replace_intsrc_all(struct mp_config_table *mpc, | |
9722 | + unsigned long mpc_new_phys, | |
9723 | + unsigned long mpc_new_length) | |
9724 | { | |
9725 | - int idx = 0; | |
9726 | - | |
9727 | - if (bad_ioapic(address)) | |
9728 | - return; | |
9729 | +#ifdef CONFIG_X86_IO_APIC | |
9730 | + int i; | |
9731 | + int nr_m_spare = 0; | |
9732 | +#endif | |
9733 | ||
9734 | - idx = nr_ioapics; | |
9735 | + int count = sizeof(*mpc); | |
9736 | + unsigned char *mpt = ((unsigned char *)mpc) + count; | |
9737 | ||
9738 | - mp_ioapics[idx].mpc_type = MP_IOAPIC; | |
9739 | - mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; | |
9740 | - mp_ioapics[idx].mpc_apicaddr = address; | |
9741 | + printk(KERN_INFO "mpc_length %x\n", mpc->mpc_length); | |
9742 | + while (count < mpc->mpc_length) { | |
9743 | + switch (*mpt) { | |
9744 | + case MP_PROCESSOR: | |
9745 | + { | |
9746 | + struct mpc_config_processor *m = | |
9747 | + (struct mpc_config_processor *)mpt; | |
9748 | + mpt += sizeof(*m); | |
9749 | + count += sizeof(*m); | |
9750 | + break; | |
9751 | + } | |
9752 | + case MP_BUS: | |
9753 | + { | |
9754 | + struct mpc_config_bus *m = | |
9755 | + (struct mpc_config_bus *)mpt; | |
9756 | + mpt += sizeof(*m); | |
9757 | + count += sizeof(*m); | |
9758 | + break; | |
9759 | + } | |
9760 | + case MP_IOAPIC: | |
9761 | + { | |
9762 | + mpt += sizeof(struct mpc_config_ioapic); | |
9763 | + count += sizeof(struct mpc_config_ioapic); | |
9764 | + break; | |
9765 | + } | |
9766 | + case MP_INTSRC: | |
9767 | + { | |
9768 | +#ifdef CONFIG_X86_IO_APIC | |
9769 | + struct mpc_config_intsrc *m = | |
9770 | + (struct mpc_config_intsrc *)mpt; | |
9771 | ||
9772 | -#ifndef CONFIG_XEN | |
9773 | - set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); | |
9774 | + apic_printk(APIC_VERBOSE, "OLD "); | |
9775 | + print_MP_intsrc_info(m); | |
9776 | + i = get_MP_intsrc_index(m); | |
9777 | + if (i > 0) { | |
9778 | + assign_to_mpc_intsrc(&mp_irqs[i], m); | |
9779 | + apic_printk(APIC_VERBOSE, "NEW "); | |
9780 | + print_mp_irq_info(&mp_irqs[i]); | |
9781 | + } else if (!i) { | |
9782 | + /* legacy, do nothing */ | |
9783 | + } else if (nr_m_spare < SPARE_SLOT_NUM) { | |
9784 | + /* | |
9785 | + * not found (-1), or duplicated (-2) | |
9786 | + * are invalid entries, | |
9787 | + * we need to use the slot later | |
9788 | + */ | |
9789 | + m_spare[nr_m_spare] = m; | |
9790 | + nr_m_spare++; | |
9791 | + } | |
9792 | #endif | |
9793 | - mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id); | |
9794 | -#ifdef CONFIG_X86_32 | |
9795 | - mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); | |
9796 | -#else | |
9797 | - mp_ioapics[idx].mpc_apicver = 0; | |
9798 | + mpt += sizeof(struct mpc_config_intsrc); | |
9799 | + count += sizeof(struct mpc_config_intsrc); | |
9800 | + break; | |
9801 | + } | |
9802 | + case MP_LINTSRC: | |
9803 | + { | |
9804 | + struct mpc_config_lintsrc *m = | |
9805 | + (struct mpc_config_lintsrc *)mpt; | |
9806 | + mpt += sizeof(*m); | |
9807 | + count += sizeof(*m); | |
9808 | + break; | |
9809 | + } | |
9810 | + default: | |
9811 | + /* wrong mptable */ | |
9812 | + printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); | |
9813 | + printk(KERN_ERR "type %x\n", *mpt); | |
9814 | + print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, | |
9815 | + 1, mpc, mpc->mpc_length, 1); | |
9816 | + goto out; | |
9817 | + } | |
9818 | + } | |
9819 | + | |
9820 | +#ifdef CONFIG_X86_IO_APIC | |
9821 | + for (i = 0; i < mp_irq_entries; i++) { | |
9822 | + if (irq_used[i]) | |
9823 | + continue; | |
9824 | + | |
9825 | + if (mp_irqs[i].mp_irqtype != mp_INT) | |
9826 | + continue; | |
9827 | + | |
9828 | + if (mp_irqs[i].mp_irqflag != 0x0f) | |
9829 | + continue; | |
9830 | + | |
9831 | + if (nr_m_spare > 0) { | |
9832 | + apic_printk(APIC_VERBOSE, "*NEW* found\n"); | |
9833 | + nr_m_spare--; | |
9834 | + assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]); | |
9835 | + m_spare[nr_m_spare] = NULL; | |
9836 | + } else { | |
9837 | + struct mpc_config_intsrc *m = | |
9838 | + (struct mpc_config_intsrc *)mpt; | |
9839 | + count += sizeof(struct mpc_config_intsrc); | |
9840 | + if (!mpc_new_phys) { | |
9841 | + printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count); | |
9842 | + } else { | |
9843 | + if (count <= mpc_new_length) | |
9844 | + printk(KERN_INFO "No spare slots, try to append..., new mpc_length %x\n", count); | |
9845 | + else { | |
9846 | + printk(KERN_ERR "mpc_new_length %lx is too small\n", mpc_new_length); | |
9847 | + goto out; | |
9848 | + } | |
9849 | + } | |
9850 | + assign_to_mpc_intsrc(&mp_irqs[i], m); | |
9851 | + mpc->mpc_length = count; | |
9852 | + mpt += sizeof(struct mpc_config_intsrc); | |
9853 | + } | |
9854 | + print_mp_irq_info(&mp_irqs[i]); | |
9855 | + } | |
9856 | #endif | |
9857 | - /* | |
9858 | - * Build basic GSI lookup table to facilitate gsi->io_apic lookups | |
9859 | - * and to prevent reprogramming of IOAPIC pins (PCI GSIs). | |
9860 | - */ | |
9861 | - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; | |
9862 | - mp_ioapic_routing[idx].gsi_base = gsi_base; | |
9863 | - mp_ioapic_routing[idx].gsi_end = gsi_base + | |
9864 | - io_apic_get_redir_entries(idx); | |
9865 | - | |
9866 | - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " | |
9867 | - "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, | |
9868 | - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, | |
9869 | - mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); | |
9870 | +out: | |
9871 | + /* update checksum */ | |
9872 | + mpc->mpc_checksum = 0; | |
9873 | + mpc->mpc_checksum -= mpf_checksum((unsigned char *)mpc, | |
9874 | + mpc->mpc_length); | |
9875 | ||
9876 | - nr_ioapics++; | |
9877 | + return 0; | |
9878 | } | |
9879 | ||
9880 | -void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) | |
9881 | -{ | |
9882 | - struct mpc_config_intsrc intsrc; | |
9883 | - int ioapic = -1; | |
9884 | - int pin = -1; | |
9885 | - | |
9886 | - /* | |
9887 | - * Convert 'gsi' to 'ioapic.pin'. | |
9888 | - */ | |
9889 | - ioapic = mp_find_ioapic(gsi); | |
9890 | - if (ioapic < 0) | |
9891 | - return; | |
9892 | - pin = gsi - mp_ioapic_routing[ioapic].gsi_base; | |
9893 | +static int __initdata enable_update_mptable; | |
9894 | ||
9895 | - /* | |
9896 | - * TBD: This check is for faulty timer entries, where the override | |
9897 | - * erroneously sets the trigger to level, resulting in a HUGE | |
9898 | - * increase of timer interrupts! | |
9899 | - */ | |
9900 | - if ((bus_irq == 0) && (trigger == 3)) | |
9901 | - trigger = 1; | |
9902 | +static int __init update_mptable_setup(char *str) | |
9903 | +{ | |
9904 | + enable_update_mptable = 1; | |
9905 | + return 0; | |
9906 | +} | |
9907 | +early_param("update_mptable", update_mptable_setup); | |
9908 | ||
9909 | - intsrc.mpc_type = MP_INTSRC; | |
9910 | - intsrc.mpc_irqtype = mp_INT; | |
9911 | - intsrc.mpc_irqflag = (trigger << 2) | polarity; | |
9912 | - intsrc.mpc_srcbus = MP_ISA_BUS; | |
9913 | - intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ | |
9914 | - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ | |
9915 | - intsrc.mpc_dstirq = pin; /* INTIN# */ | |
9916 | +static unsigned long __initdata mpc_new_phys; | |
9917 | +static unsigned long mpc_new_length __initdata = 4096; | |
9918 | ||
9919 | - MP_intsrc_info(&intsrc); | |
9920 | +/* alloc_mptable or alloc_mptable=4k */ | |
9921 | +static int __initdata alloc_mptable; | |
9922 | +static int __init parse_alloc_mptable_opt(char *p) | |
9923 | +{ | |
9924 | + enable_update_mptable = 1; | |
9925 | + alloc_mptable = 1; | |
9926 | + if (!p) | |
9927 | + return 0; | |
9928 | + mpc_new_length = memparse(p, &p); | |
9929 | + return 0; | |
9930 | } | |
9931 | +early_param("alloc_mptable", parse_alloc_mptable_opt); | |
9932 | ||
9933 | -void __init mp_config_acpi_legacy_irqs(void) | |
9934 | +void __init early_reserve_e820_mpc_new(void) | |
9935 | { | |
9936 | - struct mpc_config_intsrc intsrc; | |
9937 | - int i = 0; | |
9938 | - int ioapic = -1; | |
9939 | - | |
9940 | -#if defined (CONFIG_MCA) || defined (CONFIG_EISA) | |
9941 | - /* | |
9942 | - * Fabricate the legacy ISA bus (bus #31). | |
9943 | - */ | |
9944 | - mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; | |
9945 | + if (enable_update_mptable && alloc_mptable) { | |
9946 | + u64 startt = 0; | |
9947 | +#ifdef CONFIG_X86_TRAMPOLINE | |
9948 | + startt = TRAMPOLINE_BASE; | |
9949 | #endif | |
9950 | - set_bit(MP_ISA_BUS, mp_bus_not_pci); | |
9951 | - Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); | |
9952 | - | |
9953 | - /* | |
9954 | - * Older generations of ES7000 have no legacy identity mappings | |
9955 | - */ | |
9956 | - if (es7000_plat == 1) | |
9957 | - return; | |
9958 | - | |
9959 | - /* | |
9960 | - * Locate the IOAPIC that manages the ISA IRQs (0-15). | |
9961 | - */ | |
9962 | - ioapic = mp_find_ioapic(0); | |
9963 | - if (ioapic < 0) | |
9964 | - return; | |
9965 | - | |
9966 | - intsrc.mpc_type = MP_INTSRC; | |
9967 | - intsrc.mpc_irqflag = 0; /* Conforming */ | |
9968 | - intsrc.mpc_srcbus = MP_ISA_BUS; | |
9969 | -#ifdef CONFIG_X86_IO_APIC | |
9970 | - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; | |
9971 | -#endif | |
9972 | - /* | |
9973 | - * Use the default configuration for the IRQs 0-15. Unless | |
9974 | - * overridden by (MADT) interrupt source override entries. | |
9975 | - */ | |
9976 | - for (i = 0; i < 16; i++) { | |
9977 | - int idx; | |
9978 | - | |
9979 | - for (idx = 0; idx < mp_irq_entries; idx++) { | |
9980 | - struct mpc_config_intsrc *irq = mp_irqs + idx; | |
9981 | - | |
9982 | - /* Do we already have a mapping for this ISA IRQ? */ | |
9983 | - if (irq->mpc_srcbus == MP_ISA_BUS | |
9984 | - && irq->mpc_srcbusirq == i) | |
9985 | - break; | |
9986 | - | |
9987 | - /* Do we already have a mapping for this IOAPIC pin */ | |
9988 | - if ((irq->mpc_dstapic == intsrc.mpc_dstapic) && | |
9989 | - (irq->mpc_dstirq == i)) | |
9990 | - break; | |
9991 | - } | |
9992 | - | |
9993 | - if (idx != mp_irq_entries) { | |
9994 | - printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); | |
9995 | - continue; /* IRQ already used */ | |
9996 | - } | |
9997 | - | |
9998 | - intsrc.mpc_irqtype = mp_INT; | |
9999 | - intsrc.mpc_srcbusirq = i; /* Identity mapped */ | |
10000 | - intsrc.mpc_dstirq = i; | |
10001 | - | |
10002 | - MP_intsrc_info(&intsrc); | |
10003 | + mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4); | |
10004 | } | |
10005 | } | |
10006 | ||
10007 | -int mp_register_gsi(u32 gsi, int triggering, int polarity) | |
10008 | +static int __init update_mp_table(void) | |
10009 | { | |
10010 | - int ioapic; | |
10011 | - int ioapic_pin; | |
10012 | -#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN) | |
10013 | -#define MAX_GSI_NUM 4096 | |
10014 | -#define IRQ_COMPRESSION_START 64 | |
10015 | + char str[16]; | |
10016 | + char oem[10]; | |
10017 | + struct intel_mp_floating *mpf; | |
10018 | + struct mp_config_table *mpc; | |
10019 | + struct mp_config_table *mpc_new; | |
10020 | + | |
10021 | + if (!enable_update_mptable) | |
10022 | + return 0; | |
10023 | + | |
10024 | + mpf = mpf_found; | |
10025 | + if (!mpf) | |
10026 | + return 0; | |
10027 | ||
10028 | - static int pci_irq = IRQ_COMPRESSION_START; | |
10029 | /* | |
10030 | - * Mapping between Global System Interrupts, which | |
10031 | - * represent all possible interrupts, and IRQs | |
10032 | - * assigned to actual devices. | |
10033 | + * Now see if we need to go further. | |
10034 | */ | |
10035 | - static int gsi_to_irq[MAX_GSI_NUM]; | |
10036 | -#else | |
10037 | + if (mpf->mpf_feature1 != 0) | |
10038 | + return 0; | |
10039 | ||
10040 | - if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) | |
10041 | - return gsi; | |
10042 | -#endif | |
10043 | + if (!mpf->mpf_physptr) | |
10044 | + return 0; | |
10045 | ||
10046 | - /* Don't set up the ACPI SCI because it's already set up */ | |
10047 | - if (acpi_gbl_FADT.sci_interrupt == gsi) | |
10048 | - return gsi; | |
10049 | + mpc = _bus_to_virt(mpf->mpf_physptr); | |
10050 | ||
10051 | - ioapic = mp_find_ioapic(gsi); | |
10052 | - if (ioapic < 0) { | |
10053 | - printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); | |
10054 | - return gsi; | |
10055 | - } | |
10056 | + if (!smp_check_mpc(mpc, oem, str)) | |
10057 | + return 0; | |
10058 | ||
10059 | - ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; | |
10060 | + printk(KERN_INFO "mpf: %lx\n", (long)arbitrary_virt_to_machine(mpf)); | |
10061 | + printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); | |
10062 | ||
10063 | -#ifndef CONFIG_X86_32 | |
10064 | - if (ioapic_renumber_irq) | |
10065 | - gsi = ioapic_renumber_irq(ioapic, gsi); | |
10066 | -#endif | |
10067 | + if (mpc_new_phys && mpc->mpc_length > mpc_new_length) { | |
10068 | + mpc_new_phys = 0; | |
10069 | + printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n", | |
10070 | + mpc_new_length); | |
10071 | + } | |
10072 | + | |
10073 | + if (!mpc_new_phys) { | |
10074 | + unsigned char old, new; | |
10075 | + /* check if we can change the postion */ | |
10076 | + mpc->mpc_checksum = 0; | |
10077 | + old = mpf_checksum((unsigned char *)mpc, mpc->mpc_length); | |
10078 | + mpc->mpc_checksum = 0xff; | |
10079 | + new = mpf_checksum((unsigned char *)mpc, mpc->mpc_length); | |
10080 | + if (old == new) { | |
10081 | + printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n"); | |
10082 | + return 0; | |
10083 | + } | |
10084 | + printk(KERN_INFO "use in-positon replacing\n"); | |
10085 | + } else { | |
10086 | + maddr_t mpc_new_bus; | |
10087 | ||
10088 | - /* | |
10089 | - * Avoid pin reprogramming. PRTs typically include entries | |
10090 | - * with redundant pin->gsi mappings (but unique PCI devices); | |
10091 | - * we only program the IOAPIC on the first. | |
10092 | - */ | |
10093 | - if (ioapic_pin > MP_MAX_IOAPIC_PIN) { | |
10094 | - printk(KERN_ERR "Invalid reference to IOAPIC pin " | |
10095 | - "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, | |
10096 | - ioapic_pin); | |
10097 | - return gsi; | |
10098 | - } | |
10099 | - if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { | |
10100 | - Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", | |
10101 | - mp_ioapic_routing[ioapic].apic_id, ioapic_pin); | |
10102 | -#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN) | |
10103 | - return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); | |
10104 | -#else | |
10105 | - return gsi; | |
10106 | -#endif | |
10107 | + mpc_new_bus = phys_to_machine(mpc_new_phys); | |
10108 | + mpf->mpf_physptr = mpc_new_bus; | |
10109 | + mpc_new = phys_to_virt(mpc_new_phys); | |
10110 | + memcpy(mpc_new, mpc, mpc->mpc_length); | |
10111 | + mpc = mpc_new; | |
10112 | + /* check if we can modify that */ | |
10113 | + if (mpc_new_bus - mpf->mpf_physptr) { | |
10114 | + struct intel_mp_floating *mpf_new; | |
10115 | + /* steal 16 bytes from [0, 1k) */ | |
10116 | + printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); | |
10117 | + mpf_new = isa_bus_to_virt(0x400 - 16); | |
10118 | + memcpy(mpf_new, mpf, 16); | |
10119 | + mpf = mpf_new; | |
10120 | + mpf->mpf_physptr = mpc_new_bus; | |
10121 | + } | |
10122 | + mpf->mpf_checksum = 0; | |
10123 | + mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16); | |
10124 | + printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr); | |
10125 | } | |
10126 | ||
10127 | - set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed); | |
10128 | -#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN) | |
10129 | /* | |
10130 | - * For GSI >= 64, use IRQ compression | |
10131 | + * only replace the one with mp_INT and | |
10132 | + * MP_IRQ_TRIGGER_LEVEL|MP_IRQ_POLARITY_LOW, | |
10133 | + * already in mp_irqs , stored by ... and mp_config_acpi_gsi, | |
10134 | + * may need pci=routeirq for all coverage | |
10135 | */ | |
10136 | - if ((gsi >= IRQ_COMPRESSION_START) | |
10137 | - && (triggering == ACPI_LEVEL_SENSITIVE)) { | |
10138 | - /* | |
10139 | - * For PCI devices assign IRQs in order, avoiding gaps | |
10140 | - * due to unused I/O APIC pins. | |
10141 | - */ | |
10142 | - int irq = gsi; | |
10143 | - if (gsi < MAX_GSI_NUM) { | |
10144 | - /* | |
10145 | - * Retain the VIA chipset work-around (gsi > 15), but | |
10146 | - * avoid a problem where the 8254 timer (IRQ0) is setup | |
10147 | - * via an override (so it's not on pin 0 of the ioapic), | |
10148 | - * and at the same time, the pin 0 interrupt is a PCI | |
10149 | - * type. The gsi > 15 test could cause these two pins | |
10150 | - * to be shared as IRQ0, and they are not shareable. | |
10151 | - * So test for this condition, and if necessary, avoid | |
10152 | - * the pin collision. | |
10153 | - */ | |
10154 | - gsi = pci_irq++; | |
10155 | - /* | |
10156 | - * Don't assign IRQ used by ACPI SCI | |
10157 | - */ | |
10158 | - if (gsi == acpi_gbl_FADT.sci_interrupt) | |
10159 | - gsi = pci_irq++; | |
10160 | - gsi_to_irq[irq] = gsi; | |
10161 | - } else { | |
10162 | - printk(KERN_ERR "GSI %u is too high\n", gsi); | |
10163 | - return gsi; | |
10164 | - } | |
10165 | - } | |
10166 | -#endif | |
10167 | - io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, | |
10168 | - triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, | |
10169 | - polarity == ACPI_ACTIVE_HIGH ? 0 : 1); | |
10170 | - return gsi; | |
10171 | + replace_intsrc_all(mpc, mpc_new_phys, mpc_new_length); | |
10172 | + | |
10173 | + return 0; | |
10174 | } | |
10175 | ||
10176 | -#endif /* CONFIG_X86_IO_APIC */ | |
10177 | -#endif /* CONFIG_ACPI */ | |
10178 | +late_initcall(update_mp_table); | |
82094b55 AF |
10179 | --- sle11-2009-10-16.orig/arch/x86/kernel/nmi.c 2009-10-28 14:55:02.000000000 +0100 |
10180 | +++ sle11-2009-10-16/arch/x86/kernel/nmi.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
10181 | @@ -27,7 +27,9 @@ |
10182 | #include <linux/kdebug.h> | |
10183 | #include <linux/smp.h> | |
10184 | ||
10185 | +#ifndef CONFIG_XEN | |
10186 | #include <asm/i8259.h> | |
10187 | +#endif | |
10188 | #include <asm/io_apic.h> | |
10189 | #include <asm/smp.h> | |
10190 | #include <asm/nmi.h> | |
10191 | @@ -179,8 +181,10 @@ int __init check_nmi_watchdog(void) | |
10192 | kfree(prev_nmi_count); | |
10193 | return 0; | |
10194 | error: | |
10195 | +#ifndef CONFIG_XEN | |
10196 | if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259) | |
10197 | disable_8259A_irq(0); | |
10198 | +#endif | |
10199 | #ifdef CONFIG_X86_32 | |
10200 | timer_ack = 0; | |
10201 | #endif | |
82094b55 AF |
10202 | --- sle11-2009-10-16.orig/arch/x86/kernel/pci-dma-xen.c 2009-10-22 11:31:59.000000000 +0200 |
10203 | +++ sle11-2009-10-16/arch/x86/kernel/pci-dma-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
10204 | @@ -5,13 +5,13 @@ |
10205 | ||
10206 | #include <asm/proto.h> | |
10207 | #include <asm/dma.h> | |
10208 | -#include <asm/gart.h> | |
10209 | +#include <asm/iommu.h> | |
10210 | #include <asm/calgary.h> | |
10211 | +#include <asm/amd_iommu.h> | |
10212 | ||
10213 | -int forbid_dac __read_mostly; | |
10214 | -EXPORT_SYMBOL(forbid_dac); | |
10215 | +static int forbid_dac __read_mostly; | |
10216 | ||
10217 | -const struct dma_mapping_ops *dma_ops; | |
10218 | +struct dma_mapping_ops *dma_ops; | |
10219 | EXPORT_SYMBOL(dma_ops); | |
10220 | ||
10221 | static int iommu_sac_force __read_mostly; | |
10222 | @@ -74,13 +74,17 @@ early_param("dma32_size", parse_dma32_si | |
10223 | void __init dma32_reserve_bootmem(void) | |
10224 | { | |
10225 | unsigned long size, align; | |
10226 | - if (end_pfn <= MAX_DMA32_PFN) | |
10227 | + if (max_pfn <= MAX_DMA32_PFN) | |
10228 | return; | |
10229 | ||
10230 | + /* | |
10231 | + * check aperture_64.c allocate_aperture() for reason about | |
10232 | + * using 512M as goal | |
10233 | + */ | |
10234 | align = 64ULL<<20; | |
10235 | size = round_up(dma32_bootmem_size, align); | |
10236 | dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, | |
10237 | - __pa(MAX_DMA_ADDRESS)); | |
10238 | + 512ULL<<20); | |
10239 | if (dma32_bootmem_ptr) | |
10240 | dma32_bootmem_size = size; | |
10241 | else | |
10242 | @@ -88,17 +92,14 @@ void __init dma32_reserve_bootmem(void) | |
10243 | } | |
10244 | static void __init dma32_free_bootmem(void) | |
10245 | { | |
10246 | - int node; | |
10247 | ||
10248 | - if (end_pfn <= MAX_DMA32_PFN) | |
10249 | + if (max_pfn <= MAX_DMA32_PFN) | |
10250 | return; | |
10251 | ||
10252 | if (!dma32_bootmem_ptr) | |
10253 | return; | |
10254 | ||
10255 | - for_each_online_node(node) | |
10256 | - free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr), | |
10257 | - dma32_bootmem_size); | |
10258 | + free_bootmem(__pa(dma32_bootmem_ptr), dma32_bootmem_size); | |
10259 | ||
10260 | dma32_bootmem_ptr = NULL; | |
10261 | dma32_bootmem_size = 0; | |
10262 | @@ -107,7 +108,7 @@ static void __init dma32_free_bootmem(vo | |
10263 | #define dma32_free_bootmem() ((void)0) | |
10264 | #endif | |
10265 | ||
10266 | -static const struct dma_mapping_ops swiotlb_dma_ops = { | |
10267 | +static struct dma_mapping_ops swiotlb_dma_ops = { | |
10268 | .mapping_error = swiotlb_dma_mapping_error, | |
10269 | .map_single = swiotlb_map_single_phys, | |
10270 | .unmap_single = swiotlb_unmap_single, | |
10271 | @@ -130,25 +131,31 @@ void __init pci_iommu_alloc(void) | |
10272 | * The order of these functions is important for | |
10273 | * fall-back/fail-over reasons | |
10274 | */ | |
10275 | -#ifdef CONFIG_GART_IOMMU | |
10276 | gart_iommu_hole_init(); | |
10277 | -#endif | |
10278 | ||
10279 | -#ifdef CONFIG_CALGARY_IOMMU | |
10280 | detect_calgary(); | |
10281 | -#endif | |
10282 | ||
10283 | detect_intel_iommu(); | |
10284 | ||
10285 | -#ifdef CONFIG_SWIOTLB | |
10286 | + amd_iommu_detect(); | |
10287 | + | |
10288 | swiotlb_init(); | |
10289 | if (swiotlb) { | |
10290 | printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); | |
10291 | dma_ops = &swiotlb_dma_ops; | |
10292 | } | |
10293 | -#endif | |
10294 | } | |
10295 | ||
10296 | +#ifndef CONFIG_XEN | |
10297 | +unsigned long iommu_num_pages(unsigned long addr, unsigned long len) | |
10298 | +{ | |
10299 | + unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); | |
10300 | + | |
10301 | + return size >> PAGE_SHIFT; | |
10302 | +} | |
10303 | +EXPORT_SYMBOL(iommu_num_pages); | |
10304 | +#endif | |
10305 | + | |
10306 | /* | |
10307 | * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter | |
10308 | * documentation. | |
10309 | @@ -201,9 +208,7 @@ static __init int iommu_setup(char *p) | |
10310 | swiotlb = 1; | |
10311 | #endif | |
10312 | ||
10313 | -#ifdef CONFIG_GART_IOMMU | |
10314 | gart_parse_options(p); | |
10315 | -#endif | |
10316 | ||
10317 | #ifdef CONFIG_CALGARY_IOMMU | |
10318 | if (!strncmp(p, "calgary", 7)) | |
10319 | @@ -245,136 +250,19 @@ int range_straddles_page_boundary(paddr_ | |
10320 | !check_pages_physically_contiguous(pfn, offset, size)); | |
10321 | } | |
10322 | ||
10323 | -#ifdef CONFIG_X86_32 | |
10324 | -int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, | |
10325 | - dma_addr_t device_addr, size_t size, int flags) | |
10326 | -{ | |
10327 | - void __iomem *mem_base = NULL; | |
10328 | - int pages = size >> PAGE_SHIFT; | |
10329 | - int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); | |
10330 | - | |
10331 | - if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) | |
10332 | - goto out; | |
10333 | - if (!size) | |
10334 | - goto out; | |
10335 | - if (dev->dma_mem) | |
10336 | - goto out; | |
10337 | - | |
10338 | - /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ | |
10339 | - | |
10340 | - mem_base = ioremap(bus_addr, size); | |
10341 | - if (!mem_base) | |
10342 | - goto out; | |
10343 | - | |
10344 | - dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); | |
10345 | - if (!dev->dma_mem) | |
10346 | - goto out; | |
10347 | - dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); | |
10348 | - if (!dev->dma_mem->bitmap) | |
10349 | - goto free1_out; | |
10350 | - | |
10351 | - dev->dma_mem->virt_base = mem_base; | |
10352 | - dev->dma_mem->device_base = device_addr; | |
10353 | - dev->dma_mem->size = pages; | |
10354 | - dev->dma_mem->flags = flags; | |
10355 | - | |
10356 | - if (flags & DMA_MEMORY_MAP) | |
10357 | - return DMA_MEMORY_MAP; | |
10358 | - | |
10359 | - return DMA_MEMORY_IO; | |
10360 | - | |
10361 | - free1_out: | |
10362 | - kfree(dev->dma_mem); | |
10363 | - out: | |
10364 | - if (mem_base) | |
10365 | - iounmap(mem_base); | |
10366 | - return 0; | |
10367 | -} | |
10368 | -EXPORT_SYMBOL(dma_declare_coherent_memory); | |
10369 | - | |
10370 | -void dma_release_declared_memory(struct device *dev) | |
10371 | -{ | |
10372 | - struct dma_coherent_mem *mem = dev->dma_mem; | |
10373 | - | |
10374 | - if (!mem) | |
10375 | - return; | |
10376 | - dev->dma_mem = NULL; | |
10377 | - iounmap(mem->virt_base); | |
10378 | - kfree(mem->bitmap); | |
10379 | - kfree(mem); | |
10380 | -} | |
10381 | -EXPORT_SYMBOL(dma_release_declared_memory); | |
10382 | - | |
10383 | -void *dma_mark_declared_memory_occupied(struct device *dev, | |
10384 | - dma_addr_t device_addr, size_t size) | |
10385 | -{ | |
10386 | - struct dma_coherent_mem *mem = dev->dma_mem; | |
10387 | - int pos, err; | |
10388 | - int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1); | |
10389 | - | |
10390 | - pages >>= PAGE_SHIFT; | |
10391 | - | |
10392 | - if (!mem) | |
10393 | - return ERR_PTR(-EINVAL); | |
10394 | - | |
10395 | - pos = (device_addr - mem->device_base) >> PAGE_SHIFT; | |
10396 | - err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); | |
10397 | - if (err != 0) | |
10398 | - return ERR_PTR(err); | |
10399 | - return mem->virt_base + (pos << PAGE_SHIFT); | |
10400 | -} | |
10401 | -EXPORT_SYMBOL(dma_mark_declared_memory_occupied); | |
10402 | - | |
10403 | -static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, | |
10404 | - dma_addr_t *dma_handle, void **ret) | |
10405 | -{ | |
10406 | - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; | |
10407 | - int order = get_order(size); | |
10408 | - | |
10409 | - if (mem) { | |
10410 | - int page = bitmap_find_free_region(mem->bitmap, mem->size, | |
10411 | - order); | |
10412 | - if (page >= 0) { | |
10413 | - *dma_handle = mem->device_base + (page << PAGE_SHIFT); | |
10414 | - *ret = mem->virt_base + (page << PAGE_SHIFT); | |
10415 | - memset(*ret, 0, size); | |
10416 | - } | |
10417 | - if (mem->flags & DMA_MEMORY_EXCLUSIVE) | |
10418 | - *ret = NULL; | |
10419 | - } | |
10420 | - return (mem != NULL); | |
10421 | -} | |
10422 | - | |
10423 | -static int dma_release_coherent(struct device *dev, int order, void *vaddr) | |
10424 | -{ | |
10425 | - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; | |
10426 | - | |
10427 | - if (mem && vaddr >= mem->virt_base && vaddr < | |
10428 | - (mem->virt_base + (mem->size << PAGE_SHIFT))) { | |
10429 | - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; | |
10430 | - | |
10431 | - bitmap_release_region(mem->bitmap, page, order); | |
10432 | - return 1; | |
10433 | - } | |
10434 | - return 0; | |
10435 | -} | |
10436 | -#else | |
10437 | -#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0) | |
10438 | -#define dma_release_coherent(dev, order, vaddr) (0) | |
10439 | -#endif /* CONFIG_X86_32 */ | |
10440 | - | |
10441 | int dma_supported(struct device *dev, u64 mask) | |
10442 | { | |
10443 | + struct dma_mapping_ops *ops = get_dma_ops(dev); | |
10444 | + | |
10445 | #ifdef CONFIG_PCI | |
10446 | if (mask > 0xffffffff && forbid_dac > 0) { | |
10447 | - printk(KERN_INFO "PCI: Disallowing DAC for device %s\n", | |
10448 | - dev->bus_id); | |
10449 | + dev_info(dev, "PCI: Disallowing DAC for device\n"); | |
10450 | return 0; | |
10451 | } | |
10452 | #endif | |
10453 | ||
10454 | - if (dma_ops->dma_supported) | |
10455 | - return dma_ops->dma_supported(dev, mask); | |
10456 | + if (ops->dma_supported) | |
10457 | + return ops->dma_supported(dev, mask); | |
10458 | ||
10459 | /* Copied from i386. Doesn't make much sense, because it will | |
10460 | only work for pci_alloc_coherent. | |
10461 | @@ -395,8 +283,7 @@ int dma_supported(struct device *dev, u6 | |
10462 | type. Normally this doesn't make any difference, but gives | |
10463 | more gentle handling of IOMMU overflow. */ | |
10464 | if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { | |
10465 | - printk(KERN_INFO "%s: Force SAC with mask %Lx\n", | |
10466 | - dev->bus_id, mask); | |
10467 | + dev_info(dev, "Force SAC with mask %Lx\n", mask); | |
10468 | return 0; | |
10469 | } | |
10470 | ||
10471 | @@ -422,6 +309,9 @@ void * | |
10472 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |
10473 | gfp_t gfp) | |
10474 | { | |
10475 | +#ifndef CONFIG_XEN | |
10476 | + struct dma_mapping_ops *ops = get_dma_ops(dev); | |
10477 | +#endif | |
10478 | void *memory = NULL; | |
10479 | struct page *page; | |
10480 | unsigned long dma_mask = 0; | |
10481 | @@ -431,7 +321,7 @@ dma_alloc_coherent(struct device *dev, s | |
10482 | /* ignore region specifiers */ | |
10483 | gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | |
10484 | ||
10485 | - if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) | |
10486 | + if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) | |
10487 | return memory; | |
10488 | ||
10489 | if (!dev) { | |
10490 | @@ -491,8 +381,8 @@ dma_alloc_coherent(struct device *dev, s | |
10491 | /* Let low level make its own zone decisions */ | |
10492 | gfp &= ~(GFP_DMA32|GFP_DMA); | |
10493 | ||
10494 | - if (dma_ops->alloc_coherent) | |
10495 | - return dma_ops->alloc_coherent(dev, size, | |
10496 | + if (ops->alloc_coherent) | |
10497 | + return ops->alloc_coherent(dev, size, | |
10498 | dma_handle, gfp); | |
10499 | return NULL; | |
10500 | } | |
10501 | @@ -504,14 +394,14 @@ dma_alloc_coherent(struct device *dev, s | |
10502 | } | |
10503 | } | |
10504 | ||
10505 | - if (dma_ops->alloc_coherent) { | |
10506 | + if (ops->alloc_coherent) { | |
10507 | free_pages((unsigned long)memory, order); | |
10508 | gfp &= ~(GFP_DMA|GFP_DMA32); | |
10509 | - return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); | |
10510 | + return ops->alloc_coherent(dev, size, dma_handle, gfp); | |
10511 | } | |
10512 | ||
10513 | - if (dma_ops->map_simple) { | |
10514 | - *dma_handle = dma_ops->map_simple(dev, virt_to_bus(memory), | |
10515 | + if (ops->map_simple) { | |
10516 | + *dma_handle = ops->map_simple(dev, virt_to_bus(memory), | |
10517 | size, | |
10518 | PCI_DMA_BIDIRECTIONAL); | |
10519 | if (*dma_handle != bad_dma_address) | |
10520 | @@ -542,13 +432,17 @@ EXPORT_SYMBOL(dma_alloc_coherent); | |
10521 | void dma_free_coherent(struct device *dev, size_t size, | |
10522 | void *vaddr, dma_addr_t bus) | |
10523 | { | |
10524 | +#ifndef CONFIG_XEN | |
10525 | + struct dma_mapping_ops *ops = get_dma_ops(dev); | |
10526 | +#endif | |
10527 | + | |
10528 | int order = get_order(size); | |
10529 | WARN_ON(irqs_disabled()); /* for portability */ | |
10530 | - if (dma_release_coherent(dev, order, vaddr)) | |
10531 | + if (dma_release_from_coherent(dev, order, vaddr)) | |
10532 | return; | |
10533 | #ifndef CONFIG_XEN | |
10534 | - if (dma_ops->unmap_single) | |
10535 | - dma_ops->unmap_single(dev, bus, size, 0); | |
10536 | + if (ops->unmap_single) | |
10537 | + ops->unmap_single(dev, bus, size, 0); | |
10538 | #endif | |
10539 | xen_destroy_contiguous_region((unsigned long)vaddr, order); | |
10540 | free_pages((unsigned long)vaddr, order); | |
10541 | @@ -557,15 +451,13 @@ EXPORT_SYMBOL(dma_free_coherent); | |
10542 | ||
10543 | static int __init pci_iommu_init(void) | |
10544 | { | |
10545 | -#ifdef CONFIG_CALGARY_IOMMU | |
10546 | calgary_iommu_init(); | |
10547 | -#endif | |
10548 | ||
10549 | intel_iommu_init(); | |
10550 | ||
10551 | -#ifdef CONFIG_GART_IOMMU | |
10552 | + amd_iommu_init(); | |
10553 | + | |
10554 | gart_iommu_init(); | |
10555 | -#endif | |
10556 | ||
10557 | no_iommu_init(); | |
10558 | return 0; | |
82094b55 AF |
10559 | --- sle11-2009-10-16.orig/arch/x86/kernel/pci-nommu-xen.c 2009-03-16 16:38:05.000000000 +0100 |
10560 | +++ sle11-2009-10-16/arch/x86/kernel/pci-nommu-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
10561 | @@ -79,18 +79,12 @@ gnttab_unmap_single(struct device *dev, |
10562 | gnttab_dma_unmap_page(dma_addr); | |
10563 | } | |
10564 | ||
10565 | -static int nommu_mapping_error(dma_addr_t dma_addr) | |
10566 | -{ | |
10567 | - return (dma_addr == bad_dma_address); | |
10568 | -} | |
10569 | - | |
10570 | -static const struct dma_mapping_ops nommu_dma_ops = { | |
10571 | +static struct dma_mapping_ops nommu_dma_ops = { | |
10572 | .map_single = gnttab_map_single, | |
10573 | .unmap_single = gnttab_unmap_single, | |
10574 | .map_sg = gnttab_map_sg, | |
10575 | .unmap_sg = gnttab_unmap_sg, | |
10576 | .dma_supported = swiotlb_dma_supported, | |
10577 | - .mapping_error = nommu_mapping_error | |
10578 | }; | |
10579 | ||
10580 | void __init no_iommu_init(void) | |
82094b55 AF |
10581 | --- sle11-2009-10-16.orig/arch/x86/kernel/probe_roms_32.c 2009-10-28 14:55:02.000000000 +0100 |
10582 | +++ sle11-2009-10-16/arch/x86/kernel/probe_roms_32.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
10583 | @@ -99,6 +99,11 @@ void __init probe_roms(void) |
10584 | unsigned char c; | |
10585 | int i; | |
10586 | ||
10587 | +#ifdef CONFIG_XEN | |
10588 | + if (!is_initial_xendomain()) | |
10589 | + return; | |
10590 | +#endif | |
10591 | + | |
10592 | /* video rom */ | |
10593 | upper = adapter_rom_resources[0].start; | |
10594 | for (start = video_rom_resource.start; start < upper; start += 2048) { | |
10595 | @@ -131,7 +136,7 @@ void __init probe_roms(void) | |
10596 | upper = system_rom_resource.start; | |
10597 | ||
10598 | /* check for extension rom (ignore length byte!) */ | |
10599 | - rom = isa_bus_to_virt(extension_rom_resource.start); | |
10600 | + rom = isa_bus_to_virt((unsigned long)extension_rom_resource.start); | |
10601 | if (romsignature(rom)) { | |
10602 | length = extension_rom_resource.end - extension_rom_resource.start + 1; | |
10603 | if (romchecksum(rom, length)) { | |
82094b55 AF |
10604 | --- sle11-2009-10-16.orig/arch/x86/kernel/process-xen.c 2009-03-16 16:38:05.000000000 +0100 |
10605 | +++ sle11-2009-10-16/arch/x86/kernel/process-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
10606 | @@ -6,6 +6,13 @@ |
10607 | #include <linux/sched.h> | |
10608 | #include <linux/module.h> | |
10609 | #include <linux/pm.h> | |
10610 | +#include <linux/clockchips.h> | |
10611 | +#include <asm/system.h> | |
10612 | + | |
10613 | +unsigned long idle_halt; | |
10614 | +EXPORT_SYMBOL(idle_halt); | |
10615 | +unsigned long idle_nomwait; | |
10616 | +EXPORT_SYMBOL(idle_nomwait); | |
10617 | ||
10618 | struct kmem_cache *task_xstate_cachep; | |
10619 | ||
10620 | @@ -45,6 +52,70 @@ void arch_task_cache_init(void) | |
10621 | SLAB_PANIC, NULL); | |
10622 | } | |
10623 | ||
10624 | +/* | |
10625 | + * Idle related variables and functions | |
10626 | + */ | |
10627 | +unsigned long boot_option_idle_override = 0; | |
10628 | +EXPORT_SYMBOL(boot_option_idle_override); | |
10629 | + | |
10630 | +/* | |
10631 | + * Powermanagement idle function, if any.. | |
10632 | + */ | |
10633 | +void (*pm_idle)(void); | |
10634 | +EXPORT_SYMBOL(pm_idle); | |
10635 | + | |
10636 | +#ifdef CONFIG_X86_32 | |
10637 | +/* | |
10638 | + * This halt magic was a workaround for ancient floppy DMA | |
10639 | + * wreckage. It should be safe to remove. | |
10640 | + */ | |
10641 | +static int hlt_counter; | |
10642 | +void disable_hlt(void) | |
10643 | +{ | |
10644 | + hlt_counter++; | |
10645 | +} | |
10646 | +EXPORT_SYMBOL(disable_hlt); | |
10647 | + | |
10648 | +void enable_hlt(void) | |
10649 | +{ | |
10650 | + hlt_counter--; | |
10651 | +} | |
10652 | +EXPORT_SYMBOL(enable_hlt); | |
10653 | + | |
10654 | +static inline int hlt_use_halt(void) | |
10655 | +{ | |
10656 | + return (!hlt_counter && boot_cpu_data.hlt_works_ok); | |
10657 | +} | |
10658 | +#else | |
10659 | +static inline int hlt_use_halt(void) | |
10660 | +{ | |
10661 | + return 1; | |
10662 | +} | |
10663 | +#endif | |
10664 | + | |
10665 | +/* | |
10666 | + * We use this if we don't have any better | |
10667 | + * idle routine.. | |
10668 | + */ | |
10669 | +void xen_idle(void) | |
10670 | +{ | |
10671 | + current_thread_info()->status &= ~TS_POLLING; | |
10672 | + /* | |
10673 | + * TS_POLLING-cleared state must be visible before we | |
10674 | + * test NEED_RESCHED: | |
10675 | + */ | |
10676 | + smp_mb(); | |
10677 | + | |
10678 | + if (!need_resched()) | |
10679 | + safe_halt(); /* enables interrupts racelessly */ | |
10680 | + else | |
10681 | + local_irq_enable(); | |
10682 | + current_thread_info()->status |= TS_POLLING; | |
10683 | +} | |
10684 | +#ifdef CONFIG_APM_MODULE | |
10685 | +EXPORT_SYMBOL(default_idle); | |
10686 | +#endif | |
10687 | + | |
10688 | static void do_nothing(void *unused) | |
10689 | { | |
10690 | } | |
10691 | @@ -61,7 +132,7 @@ void cpu_idle_wait(void) | |
10692 | { | |
10693 | smp_mb(); | |
10694 | /* kick all the CPUs so that they exit out of pm_idle */ | |
10695 | - smp_call_function(do_nothing, NULL, 0, 1); | |
10696 | + smp_call_function(do_nothing, NULL, 1); | |
10697 | } | |
10698 | EXPORT_SYMBOL_GPL(cpu_idle_wait); | |
10699 | ||
10700 | @@ -125,60 +196,175 @@ static void poll_idle(void) | |
10701 | * | |
10702 | * idle=mwait overrides this decision and forces the usage of mwait. | |
10703 | */ | |
10704 | +static int __cpuinitdata force_mwait; | |
10705 | + | |
10706 | +#define MWAIT_INFO 0x05 | |
10707 | +#define MWAIT_ECX_EXTENDED_INFO 0x01 | |
10708 | +#define MWAIT_EDX_C1 0xf0 | |
10709 | + | |
10710 | static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) | |
10711 | { | |
10712 | + u32 eax, ebx, ecx, edx; | |
10713 | + | |
10714 | if (force_mwait) | |
10715 | return 1; | |
10716 | ||
10717 | - if (c->x86_vendor == X86_VENDOR_AMD) { | |
10718 | - switch(c->x86) { | |
10719 | - case 0x10: | |
10720 | - case 0x11: | |
10721 | - return 0; | |
10722 | + if (c->cpuid_level < MWAIT_INFO) | |
10723 | + return 0; | |
10724 | + | |
10725 | + cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx); | |
10726 | + /* Check, whether EDX has extended info about MWAIT */ | |
10727 | + if (!(ecx & MWAIT_ECX_EXTENDED_INFO)) | |
10728 | + return 1; | |
10729 | + | |
10730 | + /* | |
10731 | + * edx enumeratios MONITOR/MWAIT extensions. Check, whether | |
10732 | + * C1 supports MWAIT | |
10733 | + */ | |
10734 | + return (edx & MWAIT_EDX_C1); | |
10735 | +} | |
10736 | + | |
10737 | +/* | |
10738 | + * Check for AMD CPUs, which have potentially C1E support | |
10739 | + */ | |
10740 | +static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) | |
10741 | +{ | |
10742 | + if (c->x86_vendor != X86_VENDOR_AMD) | |
10743 | + return 0; | |
10744 | + | |
10745 | + if (c->x86 < 0x0F) | |
10746 | + return 0; | |
10747 | + | |
10748 | + /* Family 0x0f models < rev F do not have C1E */ | |
10749 | + if (c->x86 == 0x0f && c->x86_model < 0x40) | |
10750 | + return 0; | |
10751 | + | |
10752 | + return 1; | |
10753 | +} | |
10754 | + | |
10755 | +static cpumask_t c1e_mask = CPU_MASK_NONE; | |
10756 | +static int c1e_detected; | |
10757 | + | |
10758 | +void c1e_remove_cpu(int cpu) | |
10759 | +{ | |
10760 | + cpu_clear(cpu, c1e_mask); | |
10761 | +} | |
10762 | + | |
10763 | +/* | |
10764 | + * C1E aware idle routine. We check for C1E active in the interrupt | |
10765 | + * pending message MSR. If we detect C1E, then we handle it the same | |
10766 | + * way as C3 power states (local apic timer and TSC stop) | |
10767 | + */ | |
10768 | +static void c1e_idle(void) | |
10769 | +{ | |
10770 | + if (need_resched()) | |
10771 | + return; | |
10772 | + | |
10773 | + if (!c1e_detected) { | |
10774 | + u32 lo, hi; | |
10775 | + | |
10776 | + rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); | |
10777 | + if (lo & K8_INTP_C1E_ACTIVE_MASK) { | |
10778 | + c1e_detected = 1; | |
10779 | + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | |
10780 | + mark_tsc_unstable("TSC halt in AMD C1E"); | |
10781 | + printk(KERN_INFO "System has AMD C1E enabled\n"); | |
10782 | + set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); | |
10783 | } | |
10784 | } | |
10785 | - return 1; | |
10786 | + | |
10787 | + if (c1e_detected) { | |
10788 | + int cpu = smp_processor_id(); | |
10789 | + | |
10790 | + if (!cpu_isset(cpu, c1e_mask)) { | |
10791 | + cpu_set(cpu, c1e_mask); | |
10792 | + /* | |
10793 | + * Force broadcast so ACPI can not interfere. Needs | |
10794 | + * to run with interrupts enabled as it uses | |
10795 | + * smp_function_call. | |
10796 | + */ | |
10797 | + local_irq_enable(); | |
10798 | + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, | |
10799 | + &cpu); | |
10800 | + printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", | |
10801 | + cpu); | |
10802 | + local_irq_disable(); | |
10803 | + } | |
10804 | + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); | |
10805 | + | |
10806 | + default_idle(); | |
10807 | + | |
10808 | + /* | |
10809 | + * The switch back from broadcast mode needs to be | |
10810 | + * called with interrupts disabled. | |
10811 | + */ | |
10812 | + local_irq_disable(); | |
10813 | + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); | |
10814 | + local_irq_enable(); | |
10815 | + } else | |
10816 | + default_idle(); | |
10817 | } | |
10818 | #endif | |
10819 | ||
10820 | void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | |
10821 | { | |
10822 | #ifndef CONFIG_XEN | |
10823 | - static int selected; | |
10824 | - | |
10825 | - if (selected) | |
10826 | - return; | |
10827 | #ifdef CONFIG_X86_SMP | |
10828 | if (pm_idle == poll_idle && smp_num_siblings > 1) { | |
10829 | printk(KERN_WARNING "WARNING: polling idle and HT enabled," | |
10830 | " performance may degrade.\n"); | |
10831 | } | |
10832 | #endif | |
10833 | + if (pm_idle) | |
10834 | + return; | |
10835 | + | |
10836 | if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { | |
10837 | /* | |
10838 | - * Skip, if setup has overridden idle. | |
10839 | * One CPU supports mwait => All CPUs supports mwait | |
10840 | */ | |
10841 | - if (!pm_idle) { | |
10842 | - printk(KERN_INFO "using mwait in idle threads.\n"); | |
10843 | - pm_idle = mwait_idle; | |
10844 | - } | |
10845 | - } | |
10846 | - selected = 1; | |
10847 | + printk(KERN_INFO "using mwait in idle threads.\n"); | |
10848 | + pm_idle = mwait_idle; | |
10849 | + } else if (check_c1e_idle(c)) { | |
10850 | + printk(KERN_INFO "using C1E aware idle routine\n"); | |
10851 | + pm_idle = c1e_idle; | |
10852 | + } else | |
10853 | + pm_idle = default_idle; | |
10854 | #endif | |
10855 | } | |
10856 | ||
10857 | static int __init idle_setup(char *str) | |
10858 | { | |
10859 | + if (!str) | |
10860 | + return -EINVAL; | |
10861 | + | |
10862 | if (!strcmp(str, "poll")) { | |
10863 | printk("using polling idle threads.\n"); | |
10864 | pm_idle = poll_idle; | |
10865 | - } | |
10866 | #ifndef CONFIG_XEN | |
10867 | - else if (!strcmp(str, "mwait")) | |
10868 | + } else if (!strcmp(str, "mwait")) | |
10869 | force_mwait = 1; | |
10870 | + else if (!strcmp(str, "halt")) { | |
10871 | + /* | |
10872 | + * When the boot option of idle=halt is added, halt is | |
10873 | + * forced to be used for CPU idle. In such case CPU C2/C3 | |
10874 | + * won't be used again. | |
10875 | + * To continue to load the CPU idle driver, don't touch | |
10876 | + * the boot_option_idle_override. | |
10877 | + */ | |
10878 | + pm_idle = default_idle; | |
10879 | + idle_halt = 1; | |
10880 | + return 0; | |
10881 | + } else if (!strcmp(str, "nomwait")) { | |
10882 | + /* | |
10883 | + * If the boot option of "idle=nomwait" is added, | |
10884 | + * it means that mwait will be disabled for CPU C2/C3 | |
10885 | + * states. In such case it won't touch the variable | |
10886 | + * of boot_option_idle_override. | |
10887 | + */ | |
10888 | + idle_nomwait = 1; | |
10889 | + return 0; | |
10890 | #endif | |
10891 | - else | |
10892 | + } else | |
10893 | return -1; | |
10894 | ||
10895 | boot_option_idle_override = 1; | |
82094b55 AF |
10896 | --- sle11-2009-10-16.orig/arch/x86/kernel/process_32-xen.c 2009-03-16 16:38:05.000000000 +0100 |
10897 | +++ sle11-2009-10-16/arch/x86/kernel/process_32-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
10898 | @@ -59,15 +59,11 @@ |
10899 | #include <asm/tlbflush.h> | |
10900 | #include <asm/cpu.h> | |
10901 | #include <asm/kdebug.h> | |
10902 | +#include <asm/idle.h> | |
10903 | ||
10904 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | |
10905 | asmlinkage void cstar_ret_from_fork(void) __asm__("cstar_ret_from_fork"); | |
10906 | ||
10907 | -static int hlt_counter; | |
10908 | - | |
10909 | -unsigned long boot_option_idle_override = 0; | |
10910 | -EXPORT_SYMBOL(boot_option_idle_override); | |
10911 | - | |
10912 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | |
10913 | EXPORT_PER_CPU_SYMBOL(current_task); | |
10914 | ||
10915 | @@ -82,46 +78,27 @@ unsigned long thread_saved_pc(struct tas | |
10916 | return ((unsigned long *)tsk->thread.sp)[3]; | |
10917 | } | |
10918 | ||
10919 | -/* | |
10920 | - * Powermanagement idle function, if any.. | |
10921 | - */ | |
10922 | -void (*pm_idle)(void); | |
10923 | -EXPORT_SYMBOL(pm_idle); | |
10924 | +#ifdef CONFIG_HOTPLUG_CPU | |
10925 | +#ifndef CONFIG_XEN | |
10926 | +#include <asm/nmi.h> | |
10927 | ||
10928 | -void disable_hlt(void) | |
10929 | +static void cpu_exit_clear(void) | |
10930 | { | |
10931 | - hlt_counter++; | |
10932 | -} | |
10933 | + int cpu = raw_smp_processor_id(); | |
10934 | ||
10935 | -EXPORT_SYMBOL(disable_hlt); | |
10936 | - | |
10937 | -void enable_hlt(void) | |
10938 | -{ | |
10939 | - hlt_counter--; | |
10940 | -} | |
10941 | + idle_task_exit(); | |
10942 | ||
10943 | -EXPORT_SYMBOL(enable_hlt); | |
10944 | + cpu_uninit(); | |
10945 | + irq_ctx_exit(cpu); | |
10946 | ||
10947 | -static void xen_idle(void) | |
10948 | -{ | |
10949 | - current_thread_info()->status &= ~TS_POLLING; | |
10950 | - /* | |
10951 | - * TS_POLLING-cleared state must be visible before we | |
10952 | - * test NEED_RESCHED: | |
10953 | - */ | |
10954 | - smp_mb(); | |
10955 | + cpu_clear(cpu, cpu_callout_map); | |
10956 | + cpu_clear(cpu, cpu_callin_map); | |
10957 | ||
10958 | - if (!need_resched()) | |
10959 | - safe_halt(); /* enables interrupts racelessly */ | |
10960 | - else | |
10961 | - local_irq_enable(); | |
10962 | - current_thread_info()->status |= TS_POLLING; | |
10963 | + numa_remove_cpu(cpu); | |
10964 | + c1e_remove_cpu(cpu); | |
10965 | } | |
10966 | -#ifdef CONFIG_APM_MODULE | |
10967 | -EXPORT_SYMBOL(default_idle); | |
10968 | #endif | |
10969 | ||
10970 | -#ifdef CONFIG_HOTPLUG_CPU | |
10971 | static inline void play_dead(void) | |
10972 | { | |
10973 | idle_task_exit(); | |
10974 | @@ -152,13 +129,11 @@ void cpu_idle(void) | |
10975 | ||
10976 | /* endless idle loop with no priority at all */ | |
10977 | while (1) { | |
10978 | - tick_nohz_stop_sched_tick(); | |
10979 | + tick_nohz_stop_sched_tick(1); | |
10980 | while (!need_resched()) { | |
10981 | - void (*idle)(void); | |
10982 | ||
10983 | check_pgt_cache(); | |
10984 | rmb(); | |
10985 | - idle = xen_idle; /* no alternatives */ | |
10986 | ||
10987 | if (rcu_pending(cpu)) | |
10988 | rcu_check_callbacks(cpu, 0); | |
10989 | @@ -168,7 +143,10 @@ void cpu_idle(void) | |
10990 | ||
10991 | local_irq_disable(); | |
10992 | __get_cpu_var(irq_stat).idle_timestamp = jiffies; | |
10993 | - idle(); | |
10994 | + /* Don't trace irqs off for idle */ | |
10995 | + stop_critical_timings(); | |
10996 | + xen_idle(); | |
10997 | + start_critical_timings(); | |
10998 | } | |
10999 | tick_nohz_restart_sched_tick(); | |
11000 | preempt_enable_no_resched(); | |
82094b55 AF |
11001 | --- sle11-2009-10-16.orig/arch/x86/kernel/process_64-xen.c 2009-03-16 16:38:05.000000000 +0100 |
11002 | +++ sle11-2009-10-16/arch/x86/kernel/process_64-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
11003 | @@ -65,15 +65,6 @@ asmlinkage extern void ret_from_fork(voi |
11004 | ||
11005 | unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; | |
11006 | ||
11007 | -unsigned long boot_option_idle_override = 0; | |
11008 | -EXPORT_SYMBOL(boot_option_idle_override); | |
11009 | - | |
11010 | -/* | |
11011 | - * Powermanagement idle function, if any.. | |
11012 | - */ | |
11013 | -void (*pm_idle)(void); | |
11014 | -EXPORT_SYMBOL(pm_idle); | |
11015 | - | |
11016 | static ATOMIC_NOTIFIER_HEAD(idle_notifier); | |
11017 | ||
11018 | void idle_notifier_register(struct notifier_block *n) | |
11019 | @@ -103,25 +94,13 @@ void exit_idle(void) | |
11020 | __exit_idle(); | |
11021 | } | |
11022 | ||
11023 | -static void xen_idle(void) | |
11024 | -{ | |
11025 | - current_thread_info()->status &= ~TS_POLLING; | |
11026 | - /* | |
11027 | - * TS_POLLING-cleared state must be visible before we | |
11028 | - * test NEED_RESCHED: | |
11029 | - */ | |
11030 | - smp_mb(); | |
11031 | - if (!need_resched()) | |
11032 | - safe_halt(); /* enables interrupts racelessly */ | |
11033 | - else | |
11034 | - local_irq_enable(); | |
11035 | - current_thread_info()->status |= TS_POLLING; | |
11036 | -} | |
11037 | - | |
11038 | #ifdef CONFIG_HOTPLUG_CPU | |
11039 | static inline void play_dead(void) | |
11040 | { | |
11041 | idle_task_exit(); | |
11042 | +#ifndef CONFIG_XEN | |
11043 | + c1e_remove_cpu(raw_smp_processor_id()); | |
11044 | +#endif | |
11045 | local_irq_disable(); | |
11046 | cpu_clear(smp_processor_id(), cpu_initialized); | |
11047 | preempt_enable_no_resched(); | |
11048 | @@ -146,12 +125,11 @@ void cpu_idle(void) | |
11049 | current_thread_info()->status |= TS_POLLING; | |
11050 | /* endless idle loop with no priority at all */ | |
11051 | while (1) { | |
11052 | - tick_nohz_stop_sched_tick(); | |
11053 | + tick_nohz_stop_sched_tick(1); | |
11054 | while (!need_resched()) { | |
11055 | - void (*idle)(void); | |
11056 | ||
11057 | rmb(); | |
11058 | - idle = xen_idle; /* no alternatives */ | |
11059 | + | |
11060 | if (cpu_is_offline(smp_processor_id())) | |
11061 | play_dead(); | |
11062 | /* | |
11063 | @@ -161,7 +139,10 @@ void cpu_idle(void) | |
11064 | */ | |
11065 | local_irq_disable(); | |
11066 | enter_idle(); | |
11067 | - idle(); | |
11068 | + /* Don't trace irqs off for idle */ | |
11069 | + stop_critical_timings(); | |
11070 | + xen_idle(); | |
11071 | + start_critical_timings(); | |
11072 | /* In many cases the interrupt that ended idle | |
11073 | has already called exit_idle. But some idle | |
11074 | loops can be woken up without interrupt. */ | |
11075 | @@ -271,7 +252,7 @@ void exit_thread(void) | |
11076 | } | |
11077 | } | |
11078 | ||
11079 | -void load_gs_index(unsigned gs) | |
11080 | +void xen_load_gs_index(unsigned gs) | |
11081 | { | |
11082 | WARN_ON(HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gs)); | |
11083 | } | |
11084 | @@ -373,10 +354,10 @@ int copy_thread(int nr, unsigned long cl | |
11085 | p->thread.fs = me->thread.fs; | |
11086 | p->thread.gs = me->thread.gs; | |
11087 | ||
11088 | - asm("mov %%gs,%0" : "=m" (p->thread.gsindex)); | |
11089 | - asm("mov %%fs,%0" : "=m" (p->thread.fsindex)); | |
11090 | - asm("mov %%es,%0" : "=m" (p->thread.es)); | |
11091 | - asm("mov %%ds,%0" : "=m" (p->thread.ds)); | |
11092 | + savesegment(gs, p->thread.gsindex); | |
11093 | + savesegment(fs, p->thread.fsindex); | |
11094 | + savesegment(es, p->thread.es); | |
11095 | + savesegment(ds, p->thread.ds); | |
11096 | ||
11097 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { | |
11098 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); | |
11099 | @@ -417,7 +398,9 @@ out: | |
11100 | void | |
11101 | start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | |
11102 | { | |
11103 | - asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0)); | |
11104 | + loadsegment(fs, 0); | |
11105 | + loadsegment(es, 0); | |
11106 | + loadsegment(ds, 0); | |
11107 | load_gs_index(0); | |
11108 | regs->ip = new_ip; | |
11109 | regs->sp = new_sp; | |
11110 | @@ -557,8 +540,8 @@ static inline void __switch_to_xtra(stru | |
11111 | struct task_struct * | |
11112 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |
11113 | { | |
11114 | - struct thread_struct *prev = &prev_p->thread, | |
11115 | - *next = &next_p->thread; | |
11116 | + struct thread_struct *prev = &prev_p->thread; | |
11117 | + struct thread_struct *next = &next_p->thread; | |
11118 | int cpu = smp_processor_id(); | |
11119 | #ifndef CONFIG_X86_NO_TSS | |
11120 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | |
11121 | @@ -661,12 +644,25 @@ __switch_to(struct task_struct *prev_p, | |
11122 | */ | |
11123 | if (unlikely(next->es)) | |
11124 | loadsegment(es, next->es); | |
11125 | - | |
11126 | + | |
11127 | if (unlikely(next->ds)) | |
11128 | loadsegment(ds, next->ds); | |
11129 | ||
11130 | + /* | |
11131 | + * Leave lazy mode, flushing any hypercalls made here. | |
11132 | + * This must be done before restoring TLS segments so | |
11133 | + * the GDT and LDT are properly updated, and must be | |
11134 | + * done before math_state_restore, so the TS bit is up | |
11135 | + * to date. | |
11136 | + */ | |
11137 | + arch_leave_lazy_cpu_mode(); | |
11138 | + | |
11139 | /* | |
11140 | * Switch FS and GS. | |
11141 | + * | |
11142 | + * Segment register != 0 always requires a reload. Also | |
11143 | + * reload when it has changed. When prev process used 64bit | |
11144 | + * base always reload to avoid an information leak. | |
11145 | */ | |
11146 | if (unlikely(next->fsindex)) | |
11147 | loadsegment(fs, next->fsindex); | |
11148 | @@ -687,7 +683,8 @@ __switch_to(struct task_struct *prev_p, | |
11149 | write_pda(oldrsp, next->usersp); | |
11150 | write_pda(pcurrent, next_p); | |
11151 | write_pda(kernelstack, | |
11152 | - (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); | |
11153 | + (unsigned long)task_stack_page(next_p) + | |
11154 | + THREAD_SIZE - PDA_STACKOFFSET); | |
11155 | #ifdef CONFIG_CC_STACKPROTECTOR | |
11156 | write_pda(stack_canary, next_p->stack_canary); | |
11157 | ||
11158 | @@ -848,7 +845,7 @@ long do_arch_prctl(struct task_struct *t | |
11159 | set_32bit_tls(task, FS_TLS, addr); | |
11160 | if (doit) { | |
11161 | load_TLS(&task->thread, cpu); | |
11162 | - asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL)); | |
11163 | + loadsegment(fs, FS_TLS_SEL); | |
11164 | } | |
11165 | task->thread.fsindex = FS_TLS_SEL; | |
11166 | task->thread.fs = 0; | |
11167 | @@ -858,7 +855,7 @@ long do_arch_prctl(struct task_struct *t | |
11168 | if (doit) { | |
11169 | /* set the selector to 0 to not confuse | |
11170 | __switch_to */ | |
11171 | - asm volatile("movl %0,%%fs" :: "r" (0)); | |
11172 | + loadsegment(fs, 0); | |
11173 | ret = HYPERVISOR_set_segment_base(SEGBASE_FS, | |
11174 | addr); | |
11175 | } | |
11176 | @@ -882,7 +879,7 @@ long do_arch_prctl(struct task_struct *t | |
11177 | if (task->thread.gsindex == GS_TLS_SEL) | |
11178 | base = read_32bit_tls(task, GS_TLS); | |
11179 | else if (doit) { | |
11180 | - asm("movl %%gs,%0" : "=r" (gsindex)); | |
11181 | + savesegment(gs, gsindex); | |
11182 | if (gsindex) | |
11183 | rdmsrl(MSR_KERNEL_GS_BASE, base); | |
11184 | else | |
82094b55 AF |
11185 | --- sle11-2009-10-16.orig/arch/x86/kernel/quirks-xen.c 2009-03-16 16:33:40.000000000 +0100 |
11186 | +++ sle11-2009-10-16/arch/x86/kernel/quirks-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
11187 | @@ -63,6 +63,7 @@ static enum { |
11188 | ICH_FORCE_HPET_RESUME, | |
11189 | VT8237_FORCE_HPET_RESUME, | |
11190 | NVIDIA_FORCE_HPET_RESUME, | |
11191 | + ATI_FORCE_HPET_RESUME, | |
11192 | } force_hpet_resume_type; | |
11193 | ||
11194 | static void __iomem *rcba_base; | |
11195 | @@ -156,6 +157,8 @@ static void ich_force_enable_hpet(struct | |
11196 | ||
11197 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, | |
11198 | ich_force_enable_hpet); | |
11199 | +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_0, | |
11200 | + ich_force_enable_hpet); | |
11201 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, | |
11202 | ich_force_enable_hpet); | |
11203 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0, | |
11204 | @@ -172,6 +175,12 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_I | |
11205 | ||
11206 | static struct pci_dev *cached_dev; | |
11207 | ||
11208 | +static void hpet_print_force_info(void) | |
11209 | +{ | |
11210 | + printk(KERN_INFO "HPET not enabled in BIOS. " | |
11211 | + "You might try hpet=force boot option\n"); | |
11212 | +} | |
11213 | + | |
11214 | static void old_ich_force_hpet_resume(void) | |
11215 | { | |
11216 | u32 val; | |
11217 | @@ -251,8 +260,12 @@ static void old_ich_force_enable_hpet_us | |
11218 | { | |
11219 | if (hpet_force_user) | |
11220 | old_ich_force_enable_hpet(dev); | |
11221 | + else | |
11222 | + hpet_print_force_info(); | |
11223 | } | |
11224 | ||
11225 | +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1, | |
11226 | + old_ich_force_enable_hpet_user); | |
11227 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, | |
11228 | old_ich_force_enable_hpet_user); | |
11229 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, | |
11230 | @@ -288,9 +301,14 @@ static void vt8237_force_enable_hpet(str | |
11231 | { | |
11232 | u32 uninitialized_var(val); | |
11233 | ||
11234 | - if (!hpet_force_user || hpet_address || force_hpet_address) | |
11235 | + if (hpet_address || force_hpet_address) | |
11236 | return; | |
11237 | ||
11238 | + if (!hpet_force_user) { | |
11239 | + hpet_print_force_info(); | |
11240 | + return; | |
11241 | + } | |
11242 | + | |
11243 | pci_read_config_dword(dev, 0x68, &val); | |
11244 | /* | |
11245 | * Bit 7 is HPET enable bit. | |
11246 | @@ -328,6 +346,36 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_V | |
11247 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, | |
11248 | vt8237_force_enable_hpet); | |
11249 | ||
11250 | +static void ati_force_hpet_resume(void) | |
11251 | +{ | |
11252 | + pci_write_config_dword(cached_dev, 0x14, 0xfed00000); | |
11253 | + printk(KERN_DEBUG "Force enabled HPET at resume\n"); | |
11254 | +} | |
11255 | + | |
11256 | +static void ati_force_enable_hpet(struct pci_dev *dev) | |
11257 | +{ | |
11258 | + u32 uninitialized_var(val); | |
11259 | + | |
11260 | + if (hpet_address || force_hpet_address) | |
11261 | + return; | |
11262 | + | |
11263 | + if (!hpet_force_user) { | |
11264 | + hpet_print_force_info(); | |
11265 | + return; | |
11266 | + } | |
11267 | + | |
11268 | + pci_write_config_dword(dev, 0x14, 0xfed00000); | |
11269 | + pci_read_config_dword(dev, 0x14, &val); | |
11270 | + force_hpet_address = val; | |
11271 | + force_hpet_resume_type = ATI_FORCE_HPET_RESUME; | |
11272 | + dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n", | |
11273 | + force_hpet_address); | |
11274 | + cached_dev = dev; | |
11275 | + return; | |
11276 | +} | |
11277 | +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS, | |
11278 | + ati_force_enable_hpet); | |
11279 | + | |
11280 | /* | |
11281 | * Undocumented chipset feature taken from LinuxBIOS. | |
11282 | */ | |
11283 | @@ -341,8 +389,13 @@ static void nvidia_force_enable_hpet(str | |
11284 | { | |
11285 | u32 uninitialized_var(val); | |
11286 | ||
11287 | - if (!hpet_force_user || hpet_address || force_hpet_address) | |
11288 | + if (hpet_address || force_hpet_address) | |
11289 | + return; | |
11290 | + | |
11291 | + if (!hpet_force_user) { | |
11292 | + hpet_print_force_info(); | |
11293 | return; | |
11294 | + } | |
11295 | ||
11296 | pci_write_config_dword(dev, 0x44, 0xfed00001); | |
11297 | pci_read_config_dword(dev, 0x44, &val); | |
11298 | @@ -395,6 +448,9 @@ void force_hpet_resume(void) | |
11299 | case NVIDIA_FORCE_HPET_RESUME: | |
11300 | nvidia_force_hpet_resume(); | |
11301 | return; | |
11302 | + case ATI_FORCE_HPET_RESUME: | |
11303 | + ati_force_hpet_resume(); | |
11304 | + return; | |
11305 | default: | |
11306 | break; | |
11307 | } | |
82094b55 AF |
11308 | --- sle11-2009-10-16.orig/arch/x86/kernel/setup-xen.c 2009-03-16 16:38:05.000000000 +0100 |
11309 | +++ sle11-2009-10-16/arch/x86/kernel/setup-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
11310 | @@ -1,141 +1,1132 @@ | |
2cb7cef9 BS |
11311 | -#include <linux/kernel.h> |
11312 | +/* | |
11313 | + * Copyright (C) 1995 Linus Torvalds | |
11314 | + * | |
11315 | + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 | |
11316 | + * | |
11317 | + * Memory region support | |
11318 | + * David Parsons <orc@pell.chi.il.us>, July-August 1999 | |
11319 | + * | |
11320 | + * Added E820 sanitization routine (removes overlapping memory regions); | |
11321 | + * Brian Moyle <bmoyle@mvista.com>, February 2001 | |
11322 | + * | |
11323 | + * Moved CPU detection code to cpu/${cpu}.c | |
11324 | + * Patrick Mochel <mochel@osdl.org>, March 2002 | |
11325 | + * | |
11326 | + * Provisions for empty E820 memory regions (reported by certain BIOSes). | |
11327 | + * Alex Achenbach <xela@slit.de>, December 2002. | |
11328 | + * | |
11329 | + */ | |
11330 | + | |
11331 | +/* | |
11332 | + * This file handles the architecture-dependent parts of initialization | |
11333 | + */ | |
11334 | + | |
11335 | +#include <linux/sched.h> | |
11336 | +#include <linux/mm.h> | |
11337 | +#include <linux/mmzone.h> | |
11338 | +#include <linux/screen_info.h> | |
11339 | +#include <linux/ioport.h> | |
11340 | +#include <linux/acpi.h> | |
11341 | +#include <linux/apm_bios.h> | |
11342 | +#include <linux/initrd.h> | |
11343 | +#include <linux/bootmem.h> | |
11344 | +#include <linux/seq_file.h> | |
11345 | +#include <linux/console.h> | |
11346 | +#include <linux/mca.h> | |
11347 | +#include <linux/root_dev.h> | |
11348 | +#include <linux/highmem.h> | |
11349 | #include <linux/module.h> | |
11350 | +#include <linux/efi.h> | |
11351 | #include <linux/init.h> | |
11352 | -#include <linux/bootmem.h> | |
11353 | +#include <linux/edd.h> | |
11354 | +#include <linux/iscsi_ibft.h> | |
11355 | +#include <linux/nodemask.h> | |
11356 | +#include <linux/kexec.h> | |
11357 | +#include <linux/dmi.h> | |
11358 | +#include <linux/pfn.h> | |
11359 | +#include <linux/pci.h> | |
11360 | +#include <asm/pci-direct.h> | |
11361 | +#include <linux/init_ohci1394_dma.h> | |
11362 | +#include <linux/kvm_para.h> | |
11363 | + | |
11364 | +#include <linux/errno.h> | |
11365 | +#include <linux/kernel.h> | |
11366 | +#include <linux/stddef.h> | |
11367 | +#include <linux/unistd.h> | |
11368 | +#include <linux/ptrace.h> | |
11369 | +#include <linux/slab.h> | |
11370 | +#include <linux/user.h> | |
11371 | +#include <linux/delay.h> | |
11372 | + | |
11373 | +#include <linux/kallsyms.h> | |
11374 | +#include <linux/cpufreq.h> | |
11375 | +#include <linux/dma-mapping.h> | |
11376 | +#include <linux/ctype.h> | |
11377 | +#include <linux/uaccess.h> | |
11378 | + | |
11379 | #include <linux/percpu.h> | |
11380 | -#include <asm/smp.h> | |
11381 | -#include <asm/percpu.h> | |
11382 | +#include <linux/crash_dump.h> | |
11383 | + | |
11384 | +#include <video/edid.h> | |
11385 | + | |
11386 | +#include <asm/mtrr.h> | |
11387 | +#include <asm/apic.h> | |
11388 | +#include <asm/e820.h> | |
11389 | +#include <asm/mpspec.h> | |
11390 | +#include <asm/setup.h> | |
11391 | +#include <asm/arch_hooks.h> | |
11392 | +#include <asm/efi.h> | |
11393 | #include <asm/sections.h> | |
11394 | +#include <asm/dmi.h> | |
11395 | +#include <asm/io_apic.h> | |
11396 | +#include <asm/ist.h> | |
11397 | +#include <asm/vmi.h> | |
11398 | +#include <setup_arch.h> | |
11399 | +#include <asm/bios_ebda.h> | |
11400 | +#include <asm/cacheflush.h> | |
11401 | #include <asm/processor.h> | |
11402 | -#include <asm/setup.h> | |
11403 | +#include <asm/bugs.h> | |
11404 | + | |
11405 | +#include <asm/system.h> | |
11406 | +#include <asm/vsyscall.h> | |
11407 | +#include <asm/smp.h> | |
11408 | +#include <asm/desc.h> | |
11409 | +#include <asm/dma.h> | |
11410 | +#include <asm/iommu.h> | |
11411 | +#include <asm/mmu_context.h> | |
11412 | +#include <asm/proto.h> | |
11413 | + | |
11414 | +#include <mach_apic.h> | |
11415 | +#include <asm/paravirt.h> | |
11416 | + | |
11417 | +#include <asm/percpu.h> | |
11418 | #include <asm/topology.h> | |
11419 | -#include <asm/mpspec.h> | |
11420 | #include <asm/apicdef.h> | |
11421 | +#ifdef CONFIG_X86_64 | |
11422 | +#include <asm/numa_64.h> | |
11423 | +#endif | |
11424 | + | |
11425 | +#ifdef CONFIG_XEN | |
11426 | +#include <asm/hypervisor.h> | |
11427 | +#include <xen/interface/kexec.h> | |
11428 | +#include <xen/interface/memory.h> | |
11429 | +#include <xen/interface/nmi.h> | |
11430 | +#include <xen/interface/physdev.h> | |
11431 | +#include <xen/features.h> | |
11432 | +#include <xen/firmware.h> | |
11433 | +#include <xen/xencons.h> | |
11434 | + | |
11435 | +shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; | |
11436 | +EXPORT_SYMBOL(HYPERVISOR_shared_info); | |
11437 | ||
11438 | -#ifdef CONFIG_X86_LOCAL_APIC | |
11439 | -unsigned int num_processors; | |
11440 | -unsigned disabled_cpus __cpuinitdata; | |
11441 | -/* Processor that is doing the boot up */ | |
11442 | -unsigned int boot_cpu_physical_apicid = -1U; | |
11443 | -EXPORT_SYMBOL(boot_cpu_physical_apicid); | |
11444 | +static int xen_panic_event(struct notifier_block *, unsigned long, void *); | |
11445 | +static struct notifier_block xen_panic_block = { | |
11446 | + xen_panic_event, NULL, 0 /* try to go last */ | |
11447 | +}; | |
11448 | ||
11449 | -DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; | |
11450 | -EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); | |
11451 | +unsigned long *phys_to_machine_mapping; | |
11452 | +EXPORT_SYMBOL(phys_to_machine_mapping); | |
11453 | ||
11454 | -/* Bitmask of physically existing CPUs */ | |
11455 | -physid_mask_t phys_cpu_present_map; | |
11456 | +unsigned long *pfn_to_mfn_frame_list_list, | |
11457 | +#ifdef CONFIG_X86_64 | |
11458 | + *pfn_to_mfn_frame_list[512]; | |
11459 | +#else | |
11460 | + *pfn_to_mfn_frame_list[128]; | |
11461 | +#endif | |
11462 | + | |
11463 | +/* Raw start-of-day parameters from the hypervisor. */ | |
11464 | +start_info_t *xen_start_info; | |
11465 | +EXPORT_SYMBOL(xen_start_info); | |
11466 | +#endif | |
11467 | + | |
11468 | +#ifndef ARCH_SETUP | |
11469 | +#define ARCH_SETUP | |
11470 | +#endif | |
11471 | + | |
11472 | +#ifndef CONFIG_XEN | |
11473 | +#ifndef CONFIG_DEBUG_BOOT_PARAMS | |
11474 | +struct boot_params __initdata boot_params; | |
11475 | +#else | |
11476 | +struct boot_params boot_params; | |
11477 | +#endif | |
11478 | #endif | |
11479 | ||
11480 | -#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) | |
11481 | /* | |
11482 | - * Copy data used in early init routines from the initial arrays to the | |
11483 | - * per cpu data areas. These arrays then become expendable and the | |
11484 | - * *_early_ptr's are zeroed indicating that the static arrays are gone. | |
11485 | + * Machine setup.. | |
11486 | */ | |
11487 | -static void __init setup_per_cpu_maps(void) | |
11488 | +static struct resource data_resource = { | |
11489 | + .name = "Kernel data", | |
11490 | + .start = 0, | |
11491 | + .end = 0, | |
11492 | + .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
11493 | +}; | |
11494 | + | |
11495 | +static struct resource code_resource = { | |
11496 | + .name = "Kernel code", | |
11497 | + .start = 0, | |
11498 | + .end = 0, | |
11499 | + .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
11500 | +}; | |
11501 | + | |
11502 | +static struct resource bss_resource = { | |
11503 | + .name = "Kernel bss", | |
11504 | + .start = 0, | |
11505 | + .end = 0, | |
11506 | + .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
11507 | +}; | |
11508 | + | |
11509 | + | |
11510 | +#ifdef CONFIG_X86_32 | |
11511 | +#ifndef CONFIG_XEN | |
11512 | +/* This value is set up by the early boot code to point to the value | |
11513 | + immediately after the boot time page tables. It contains a *physical* | |
11514 | + address, and must not be in the .bss segment! */ | |
11515 | +unsigned long init_pg_tables_start __initdata = ~0UL; | |
11516 | +unsigned long init_pg_tables_end __initdata = ~0UL; | |
11517 | +#endif | |
11518 | + | |
11519 | +static struct resource video_ram_resource = { | |
11520 | + .name = "Video RAM area", | |
11521 | + .start = 0xa0000, | |
11522 | + .end = 0xbffff, | |
11523 | + .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
11524 | +}; | |
11525 | + | |
11526 | +/* cpu data as detected by the assembly code in head.S */ | |
11527 | +struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1}; | |
11528 | +/* common cpu data for all cpus */ | |
11529 | +struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1}; | |
11530 | +EXPORT_SYMBOL(boot_cpu_data); | |
11531 | +#ifndef CONFIG_XEN | |
11532 | +static void set_mca_bus(int x) | |
11533 | +{ | |
11534 | +#ifdef CONFIG_MCA | |
11535 | + MCA_bus = x; | |
11536 | +#endif | |
11537 | +} | |
11538 | + | |
11539 | +unsigned int def_to_bigsmp; | |
11540 | + | |
11541 | +/* for MCA, but anyone else can use it if they want */ | |
11542 | +unsigned int machine_id; | |
11543 | +unsigned int machine_submodel_id; | |
11544 | +unsigned int BIOS_revision; | |
11545 | + | |
11546 | +struct apm_info apm_info; | |
11547 | +EXPORT_SYMBOL(apm_info); | |
11548 | +#endif | |
11549 | + | |
11550 | +#if defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) | |
11551 | +struct ist_info ist_info; | |
11552 | +EXPORT_SYMBOL(ist_info); | |
11553 | +#elif defined(CONFIG_X86_SPEEDSTEP_SMI) | |
11554 | +struct ist_info ist_info; | |
11555 | +#endif | |
11556 | + | |
11557 | +#else | |
11558 | +struct cpuinfo_x86 boot_cpu_data __read_mostly; | |
11559 | +EXPORT_SYMBOL(boot_cpu_data); | |
11560 | +#endif | |
11561 | + | |
11562 | + | |
11563 | +#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) | |
11564 | +unsigned long mmu_cr4_features; | |
11565 | +#else | |
11566 | +unsigned long mmu_cr4_features = X86_CR4_PAE; | |
11567 | +#endif | |
11568 | + | |
11569 | +/* Boot loader ID as an integer, for the benefit of proc_dointvec */ | |
11570 | +int bootloader_type; | |
11571 | + | |
11572 | +/* | |
11573 | + * Early DMI memory | |
11574 | + */ | |
11575 | +int dmi_alloc_index; | |
11576 | +char dmi_alloc_data[DMI_MAX_DATA]; | |
11577 | + | |
11578 | +/* | |
11579 | + * Setup options | |
11580 | + */ | |
11581 | +struct screen_info screen_info; | |
11582 | +EXPORT_SYMBOL(screen_info); | |
11583 | +struct edid_info edid_info; | |
11584 | +EXPORT_SYMBOL_GPL(edid_info); | |
11585 | + | |
11586 | +extern int root_mountflags; | |
11587 | + | |
11588 | +unsigned long saved_video_mode; | |
11589 | + | |
11590 | +#define RAMDISK_IMAGE_START_MASK 0x07FF | |
11591 | +#define RAMDISK_PROMPT_FLAG 0x8000 | |
11592 | +#define RAMDISK_LOAD_FLAG 0x4000 | |
11593 | + | |
11594 | +static char __initdata command_line[COMMAND_LINE_SIZE]; | |
11595 | + | |
11596 | +#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) | |
11597 | +struct edd edd; | |
11598 | +#ifdef CONFIG_EDD_MODULE | |
11599 | +EXPORT_SYMBOL(edd); | |
11600 | +#endif | |
11601 | +#ifndef CONFIG_XEN | |
11602 | +/** | |
11603 | + * copy_edd() - Copy the BIOS EDD information | |
11604 | + * from boot_params into a safe place. | |
11605 | + * | |
11606 | + */ | |
11607 | +static inline void copy_edd(void) | |
11608 | +{ | |
11609 | + memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, | |
11610 | + sizeof(edd.mbr_signature)); | |
11611 | + memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info)); | |
11612 | + edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries; | |
11613 | + edd.edd_info_nr = boot_params.eddbuf_entries; | |
11614 | +} | |
11615 | +#endif | |
11616 | +#else | |
11617 | +static inline void copy_edd(void) | |
11618 | +{ | |
11619 | +} | |
11620 | +#endif | |
11621 | + | |
11622 | +#ifdef CONFIG_BLK_DEV_INITRD | |
11623 | + | |
11624 | +#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN) | |
11625 | + | |
11626 | +#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) | |
11627 | +static void __init relocate_initrd(void) | |
11628 | +{ | |
11629 | + | |
11630 | + u64 ramdisk_image = boot_params.hdr.ramdisk_image; | |
11631 | + u64 ramdisk_size = boot_params.hdr.ramdisk_size; | |
11632 | + u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT; | |
11633 | + u64 ramdisk_here; | |
11634 | + unsigned long slop, clen, mapaddr; | |
11635 | + char *p, *q; | |
11636 | + | |
11637 | + /* We need to move the initrd down into lowmem */ | |
11638 | + ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size, | |
11639 | + PAGE_SIZE); | |
11640 | + | |
11641 | + if (ramdisk_here == -1ULL) | |
11642 | + panic("Cannot find place for new RAMDISK of size %lld\n", | |
11643 | + ramdisk_size); | |
11644 | + | |
11645 | + /* Note: this includes all the lowmem currently occupied by | |
11646 | + the initrd, we rely on that fact to keep the data intact. */ | |
11647 | + reserve_early(ramdisk_here, ramdisk_here + ramdisk_size, | |
11648 | + "NEW RAMDISK"); | |
11649 | + initrd_start = ramdisk_here + PAGE_OFFSET; | |
11650 | + initrd_end = initrd_start + ramdisk_size; | |
11651 | + printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", | |
11652 | + ramdisk_here, ramdisk_here + ramdisk_size); | |
11653 | + | |
11654 | + q = (char *)initrd_start; | |
11655 | + | |
11656 | + /* Copy any lowmem portion of the initrd */ | |
11657 | + if (ramdisk_image < end_of_lowmem) { | |
11658 | + clen = end_of_lowmem - ramdisk_image; | |
11659 | + p = (char *)__va(ramdisk_image); | |
11660 | + memcpy(q, p, clen); | |
11661 | + q += clen; | |
11662 | + ramdisk_image += clen; | |
11663 | + ramdisk_size -= clen; | |
11664 | + } | |
11665 | + | |
11666 | + /* Copy the highmem portion of the initrd */ | |
11667 | + while (ramdisk_size) { | |
11668 | + slop = ramdisk_image & ~PAGE_MASK; | |
11669 | + clen = ramdisk_size; | |
11670 | + if (clen > MAX_MAP_CHUNK-slop) | |
11671 | + clen = MAX_MAP_CHUNK-slop; | |
11672 | + mapaddr = ramdisk_image & PAGE_MASK; | |
11673 | + p = early_ioremap(mapaddr, clen+slop); | |
11674 | + memcpy(q, p+slop, clen); | |
11675 | + early_iounmap(p, clen+slop); | |
11676 | + q += clen; | |
11677 | + ramdisk_image += clen; | |
11678 | + ramdisk_size -= clen; | |
11679 | + } | |
11680 | + /* high pages is not converted by early_res_to_bootmem */ | |
11681 | + ramdisk_image = boot_params.hdr.ramdisk_image; | |
11682 | + ramdisk_size = boot_params.hdr.ramdisk_size; | |
11683 | + printk(KERN_INFO "Move RAMDISK from %016llx - %016llx to" | |
11684 | + " %08llx - %08llx\n", | |
11685 | + ramdisk_image, ramdisk_image + ramdisk_size - 1, | |
11686 | + ramdisk_here, ramdisk_here + ramdisk_size - 1); | |
11687 | +} | |
11688 | +#endif | |
11689 | + | |
11690 | +static void __init reserve_initrd(void) | |
11691 | { | |
11692 | #ifndef CONFIG_XEN | |
11693 | - int cpu; | |
11694 | + u64 ramdisk_image = boot_params.hdr.ramdisk_image; | |
11695 | + u64 ramdisk_size = boot_params.hdr.ramdisk_size; | |
11696 | + u64 ramdisk_end = ramdisk_image + ramdisk_size; | |
11697 | + u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT; | |
11698 | + | |
11699 | + if (!boot_params.hdr.type_of_loader || | |
11700 | + !ramdisk_image || !ramdisk_size) | |
11701 | + return; /* No initrd provided by bootloader */ | |
11702 | +#else | |
11703 | + unsigned long ramdisk_image = __pa(xen_start_info->mod_start); | |
11704 | + unsigned long ramdisk_size = xen_start_info->mod_len; | |
11705 | + unsigned long ramdisk_end = ramdisk_image + ramdisk_size; | |
11706 | + unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; | |
11707 | ||
11708 | - for_each_possible_cpu(cpu) { | |
11709 | - per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu]; | |
11710 | - per_cpu(x86_bios_cpu_apicid, cpu) = | |
11711 | - x86_bios_cpu_apicid_init[cpu]; | |
11712 | -#ifdef CONFIG_NUMA | |
11713 | - per_cpu(x86_cpu_to_node_map, cpu) = | |
11714 | - x86_cpu_to_node_map_init[cpu]; | |
11715 | + if (!xen_start_info->mod_start || !ramdisk_size) | |
11716 | + return; /* No initrd provided by bootloader */ | |
11717 | #endif | |
11718 | + | |
11719 | + initrd_start = 0; | |
11720 | + | |
11721 | + if (ramdisk_size >= (end_of_lowmem>>1)) { | |
11722 | + free_early(ramdisk_image, ramdisk_end); | |
11723 | + printk(KERN_ERR "initrd too large to handle, " | |
11724 | + "disabling initrd\n"); | |
11725 | + return; | |
11726 | } | |
11727 | ||
11728 | - /* indicate the early static arrays will soon be gone */ | |
11729 | - x86_cpu_to_apicid_early_ptr = NULL; | |
11730 | - x86_bios_cpu_apicid_early_ptr = NULL; | |
11731 | -#ifdef CONFIG_NUMA | |
11732 | - x86_cpu_to_node_map_early_ptr = NULL; | |
11733 | + printk(KERN_INFO "RAMDISK: %08lx - %08lx\n", ramdisk_image, | |
11734 | + ramdisk_end); | |
11735 | + | |
11736 | + | |
11737 | + if (ramdisk_end <= end_of_lowmem) { | |
11738 | + /* All in lowmem, easy case */ | |
11739 | + /* | |
11740 | + * don't need to reserve again, already reserved early | |
11741 | + * in i386_start_kernel | |
11742 | + */ | |
11743 | + initrd_start = ramdisk_image + PAGE_OFFSET; | |
11744 | + initrd_end = initrd_start + ramdisk_size; | |
11745 | +#ifdef CONFIG_X86_64_XEN | |
11746 | + initrd_below_start_ok = 1; | |
11747 | #endif | |
11748 | + return; | |
11749 | + } | |
11750 | + | |
11751 | +#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN) | |
11752 | + relocate_initrd(); | |
11753 | +#else | |
11754 | + printk(KERN_ERR "initrd extends beyond end of memory " | |
11755 | + "(0x%08lx > 0x%08lx)\ndisabling initrd\n", | |
11756 | + ramdisk_end, end_of_lowmem); | |
11757 | + initrd_start = 0; | |
11758 | #endif | |
11759 | + free_early(ramdisk_image, ramdisk_end); | |
11760 | } | |
11761 | +#else | |
11762 | +static void __init reserve_initrd(void) | |
11763 | +{ | |
11764 | +} | |
11765 | +#endif /* CONFIG_BLK_DEV_INITRD */ | |
11766 | + | |
11767 | +static void __init parse_setup_data(void) | |
11768 | +{ | |
11769 | +#ifndef CONFIG_XEN | |
11770 | + struct setup_data *data; | |
11771 | + u64 pa_data; | |
11772 | ||
11773 | -#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP | |
11774 | -cpumask_t *cpumask_of_cpu_map __read_mostly; | |
11775 | -EXPORT_SYMBOL(cpumask_of_cpu_map); | |
11776 | + if (boot_params.hdr.version < 0x0209) | |
11777 | + return; | |
11778 | + pa_data = boot_params.hdr.setup_data; | |
11779 | + while (pa_data) { | |
11780 | + data = early_ioremap(pa_data, PAGE_SIZE); | |
11781 | + switch (data->type) { | |
11782 | + case SETUP_E820_EXT: | |
11783 | + parse_e820_ext(data, pa_data); | |
11784 | + break; | |
11785 | + default: | |
11786 | + break; | |
11787 | + } | |
11788 | + pa_data = data->next; | |
11789 | + early_iounmap(data, PAGE_SIZE); | |
11790 | + } | |
11791 | +#endif | |
11792 | +} | |
11793 | ||
11794 | -/* requires nr_cpu_ids to be initialized */ | |
11795 | -static void __init setup_cpumask_of_cpu(void) | |
11796 | +static void __init e820_reserve_setup_data(void) | |
11797 | { | |
11798 | - int i; | |
11799 | +#ifndef CONFIG_XEN | |
11800 | + struct setup_data *data; | |
11801 | + u64 pa_data; | |
11802 | + int found = 0; | |
11803 | + | |
11804 | + if (boot_params.hdr.version < 0x0209) | |
11805 | + return; | |
11806 | + pa_data = boot_params.hdr.setup_data; | |
11807 | + while (pa_data) { | |
11808 | + data = early_ioremap(pa_data, sizeof(*data)); | |
11809 | + e820_update_range(pa_data, sizeof(*data)+data->len, | |
11810 | + E820_RAM, E820_RESERVED_KERN); | |
11811 | + found = 1; | |
11812 | + pa_data = data->next; | |
11813 | + early_iounmap(data, sizeof(*data)); | |
11814 | + } | |
11815 | + if (!found) | |
11816 | + return; | |
11817 | ||
11818 | - /* alloc_bootmem zeroes memory */ | |
11819 | - cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids); | |
11820 | - for (i = 0; i < nr_cpu_ids; i++) | |
11821 | - cpu_set(i, cpumask_of_cpu_map[i]); | |
11822 | + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | |
11823 | + memcpy(&e820_saved, &e820, sizeof(struct e820map)); | |
11824 | + printk(KERN_INFO "extended physical RAM map:\n"); | |
11825 | + e820_print_map("reserve setup_data"); | |
11826 | +#endif | |
11827 | } | |
11828 | -#else | |
11829 | -static inline void setup_cpumask_of_cpu(void) { } | |
11830 | + | |
11831 | +static void __init reserve_early_setup_data(void) | |
11832 | +{ | |
11833 | +#ifndef CONFIG_XEN | |
11834 | + struct setup_data *data; | |
11835 | + u64 pa_data; | |
11836 | + char buf[32]; | |
11837 | + | |
11838 | + if (boot_params.hdr.version < 0x0209) | |
11839 | + return; | |
11840 | + pa_data = boot_params.hdr.setup_data; | |
11841 | + while (pa_data) { | |
11842 | + data = early_ioremap(pa_data, sizeof(*data)); | |
11843 | + sprintf(buf, "setup data %x", data->type); | |
11844 | + reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); | |
11845 | + pa_data = data->next; | |
11846 | + early_iounmap(data, sizeof(*data)); | |
11847 | + } | |
11848 | #endif | |
11849 | +} | |
11850 | ||
11851 | -#ifdef CONFIG_X86_32 | |
11852 | /* | |
11853 | - * Great future not-so-futuristic plan: make i386 and x86_64 do it | |
11854 | - * the same way | |
11855 | + * --------- Crashkernel reservation ------------------------------ | |
11856 | + */ | |
11857 | + | |
11858 | +#ifdef CONFIG_KEXEC | |
11859 | + | |
11860 | +#ifndef CONFIG_XEN | |
11861 | +/** | |
11862 | + * Reserve @size bytes of crashkernel memory at any suitable offset. | |
11863 | + * | |
11864 | + * @size: Size of the crashkernel memory to reserve. | |
11865 | + * Returns the base address on success, and -1ULL on failure. | |
11866 | + */ | |
11867 | +unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) | |
11868 | +{ | |
11869 | + const unsigned long long alignment = 16<<20; /* 16M */ | |
11870 | + unsigned long long start = 0LL; | |
11871 | + | |
11872 | + while (1) { | |
11873 | + int ret; | |
11874 | + | |
11875 | + start = find_e820_area(start, ULONG_MAX, size, alignment); | |
11876 | + if (start == -1ULL) | |
11877 | + return start; | |
11878 | + | |
11879 | + /* try to reserve it */ | |
11880 | + ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE); | |
11881 | + if (ret >= 0) | |
11882 | + return start; | |
11883 | + | |
11884 | + start += alignment; | |
11885 | + } | |
11886 | +} | |
11887 | + | |
11888 | +static inline unsigned long long get_total_mem(void) | |
11889 | +{ | |
11890 | + unsigned long long total; | |
11891 | + | |
11892 | + total = max_low_pfn - min_low_pfn; | |
11893 | +#ifdef CONFIG_HIGHMEM | |
11894 | + total += highend_pfn - highstart_pfn; | |
11895 | +#endif | |
11896 | + | |
11897 | + return total << PAGE_SHIFT; | |
11898 | +} | |
11899 | + | |
11900 | +static void __init reserve_crashkernel(void) | |
11901 | +{ | |
11902 | + unsigned long long total_mem; | |
11903 | + unsigned long long crash_size, crash_base; | |
11904 | + int ret; | |
11905 | + | |
11906 | + total_mem = get_total_mem(); | |
11907 | + | |
11908 | + ret = parse_crashkernel(boot_command_line, total_mem, | |
11909 | + &crash_size, &crash_base); | |
11910 | + if (ret != 0 || crash_size <= 0) | |
11911 | + return; | |
11912 | + | |
11913 | + /* 0 means: find the address automatically */ | |
11914 | + if (crash_base <= 0) { | |
11915 | + crash_base = find_and_reserve_crashkernel(crash_size); | |
11916 | + if (crash_base == -1ULL) { | |
11917 | + pr_info("crashkernel reservation failed. " | |
11918 | + "No suitable area found.\n"); | |
11919 | + return; | |
11920 | + } | |
11921 | + } else { | |
11922 | + ret = reserve_bootmem_generic(crash_base, crash_size, | |
11923 | + BOOTMEM_EXCLUSIVE); | |
11924 | + if (ret < 0) { | |
11925 | + pr_info("crashkernel reservation failed - " | |
11926 | + "memory is in use\n"); | |
11927 | + return; | |
11928 | + } | |
11929 | + } | |
11930 | + | |
11931 | + printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " | |
11932 | + "for crashkernel (System RAM: %ldMB)\n", | |
11933 | + (unsigned long)(crash_size >> 20), | |
11934 | + (unsigned long)(crash_base >> 20), | |
11935 | + (unsigned long)(total_mem >> 20)); | |
11936 | + | |
11937 | + crashk_res.start = crash_base; | |
11938 | + crashk_res.end = crash_base + crash_size - 1; | |
11939 | + insert_resource(&iomem_resource, &crashk_res); | |
11940 | +} | |
11941 | +#else | |
11942 | +#define reserve_crashkernel xen_machine_kexec_setup_resources | |
11943 | +#endif | |
11944 | +#else | |
11945 | +static void __init reserve_crashkernel(void) | |
11946 | +{ | |
11947 | +} | |
11948 | +#endif | |
11949 | + | |
11950 | +static struct resource standard_io_resources[] = { | |
11951 | + { .name = "dma1", .start = 0x00, .end = 0x1f, | |
11952 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO }, | |
11953 | + { .name = "pic1", .start = 0x20, .end = 0x21, | |
11954 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO }, | |
11955 | + { .name = "timer0", .start = 0x40, .end = 0x43, | |
11956 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO }, | |
11957 | + { .name = "timer1", .start = 0x50, .end = 0x53, | |
11958 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO }, | |
11959 | + { .name = "keyboard", .start = 0x60, .end = 0x60, | |
11960 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO }, | |
11961 | + { .name = "keyboard", .start = 0x64, .end = 0x64, | |
11962 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO }, | |
11963 | + { .name = "dma page reg", .start = 0x80, .end = 0x8f, | |
11964 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO }, | |
11965 | + { .name = "pic2", .start = 0xa0, .end = 0xa1, | |
11966 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO }, | |
11967 | + { .name = "dma2", .start = 0xc0, .end = 0xdf, | |
11968 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO }, | |
11969 | + { .name = "fpu", .start = 0xf0, .end = 0xff, | |
11970 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO } | |
11971 | +}; | |
11972 | + | |
11973 | +static void __init reserve_standard_io_resources(void) | |
11974 | +{ | |
11975 | + int i; | |
11976 | + | |
11977 | + /* Nothing to do if not running in dom0. */ | |
11978 | + if (!is_initial_xendomain()) | |
11979 | + return; | |
11980 | + | |
11981 | + /* request I/O space for devices used on all i[345]86 PCs */ | |
11982 | + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) | |
11983 | + request_resource(&ioport_resource, &standard_io_resources[i]); | |
11984 | + | |
11985 | +} | |
11986 | + | |
11987 | +#ifdef CONFIG_PROC_VMCORE | |
11988 | +/* elfcorehdr= specifies the location of elf core header | |
11989 | + * stored by the crashed kernel. This option will be passed | |
11990 | + * by kexec loader to the capture kernel. | |
11991 | */ | |
11992 | -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; | |
11993 | -EXPORT_SYMBOL(__per_cpu_offset); | |
11994 | +static int __init setup_elfcorehdr(char *arg) | |
11995 | +{ | |
11996 | + char *end; | |
11997 | + if (!arg) | |
11998 | + return -EINVAL; | |
11999 | + elfcorehdr_addr = memparse(arg, &end); | |
12000 | + return end > arg ? 0 : -EINVAL; | |
12001 | +} | |
12002 | +early_param("elfcorehdr", setup_elfcorehdr); | |
12003 | #endif | |
12004 | ||
12005 | +static struct x86_quirks default_x86_quirks __initdata; | |
12006 | + | |
12007 | +struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; | |
12008 | + | |
12009 | +/* | |
12010 | + * Determine if we were loaded by an EFI loader. If so, then we have also been | |
12011 | + * passed the efi memmap, systab, etc., so we should use these data structures | |
12012 | + * for initialization. Note, the efi init code path is determined by the | |
12013 | + * global efi_enabled. This allows the same kernel image to be used on existing | |
12014 | + * systems (with a traditional BIOS) as well as on EFI systems. | |
12015 | + */ | |
12016 | /* | |
12017 | - * Great future plan: | |
12018 | - * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. | |
12019 | - * Always point %gs to its beginning | |
12020 | + * setup_arch - architecture-specific boot-time initializations | |
12021 | + * | |
12022 | + * Note: On x86_64, fixmaps are ready for use even before this is called. | |
12023 | */ | |
12024 | -void __init setup_per_cpu_areas(void) | |
12025 | + | |
12026 | +void __init setup_arch(char **cmdline_p) | |
12027 | { | |
12028 | - int i, highest_cpu = 0; | |
12029 | - unsigned long size; | |
12030 | +#ifdef CONFIG_XEN | |
12031 | + unsigned int i; | |
12032 | + unsigned long p2m_pages; | |
12033 | + struct physdev_set_iopl set_iopl; | |
12034 | ||
12035 | -#ifdef CONFIG_HOTPLUG_CPU | |
12036 | - prefill_possible_map(); | |
12037 | +#ifdef CONFIG_X86_32 | |
12038 | + /* Force a quick death if the kernel panics (not domain 0). */ | |
12039 | + extern int panic_timeout; | |
12040 | + if (!panic_timeout && !is_initial_xendomain()) | |
12041 | + panic_timeout = 1; | |
12042 | #endif | |
12043 | ||
12044 | - /* Copy section for each CPU (we discard the original) */ | |
12045 | - size = PERCPU_ENOUGH_ROOM; | |
12046 | - printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", | |
12047 | - size); | |
12048 | - | |
12049 | - for_each_possible_cpu(i) { | |
12050 | - char *ptr; | |
12051 | -#ifndef CONFIG_NEED_MULTIPLE_NODES | |
12052 | - ptr = alloc_bootmem_pages(size); | |
12053 | -#else | |
12054 | - int node = early_cpu_to_node(i); | |
12055 | - if (!node_online(node) || !NODE_DATA(node)) { | |
12056 | - ptr = alloc_bootmem_pages(size); | |
12057 | - printk(KERN_INFO | |
12058 | - "cpu %d has no node or node-local memory\n", i); | |
12059 | - } | |
12060 | - else | |
12061 | - ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); | |
12062 | + /* Register a call for panic conditions. */ | |
12063 | + atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); | |
12064 | + | |
12065 | + WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable, | |
12066 | + VMASST_TYPE_writable_pagetables)); | |
12067 | +#ifdef CONFIG_X86_32 | |
12068 | + WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable, | |
12069 | + VMASST_TYPE_4gb_segments)); | |
12070 | +#endif | |
12071 | +#endif /* CONFIG_XEN */ | |
12072 | + | |
12073 | +#ifdef CONFIG_X86_32 | |
12074 | + memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | |
12075 | + visws_early_detect(); | |
12076 | + pre_setup_arch_hook(); | |
12077 | +#else | |
12078 | + printk(KERN_INFO "Command line: %s\n", boot_command_line); | |
12079 | +#endif | |
12080 | + | |
12081 | + early_cpu_init(); | |
12082 | + early_ioremap_init(); | |
12083 | + | |
12084 | +#ifndef CONFIG_XEN | |
12085 | + ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); | |
12086 | + screen_info = boot_params.screen_info; | |
12087 | + edid_info = boot_params.edid_info; | |
12088 | +#ifdef CONFIG_X86_32 | |
12089 | + apm_info.bios = boot_params.apm_bios_info; | |
12090 | + ist_info = boot_params.ist_info; | |
12091 | + if (boot_params.sys_desc_table.length != 0) { | |
12092 | + set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2); | |
12093 | + machine_id = boot_params.sys_desc_table.table[0]; | |
12094 | + machine_submodel_id = boot_params.sys_desc_table.table[1]; | |
12095 | + BIOS_revision = boot_params.sys_desc_table.table[2]; | |
12096 | + } | |
12097 | +#endif | |
12098 | + saved_video_mode = boot_params.hdr.vid_mode; | |
12099 | + bootloader_type = boot_params.hdr.type_of_loader; | |
12100 | + | |
12101 | +#ifdef CONFIG_BLK_DEV_RAM | |
12102 | + rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; | |
12103 | + rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); | |
12104 | + rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); | |
12105 | +#endif | |
12106 | +#ifdef CONFIG_EFI | |
12107 | + if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, | |
12108 | +#ifdef CONFIG_X86_32 | |
12109 | + "EL32", | |
12110 | +#else | |
12111 | + "EL64", | |
12112 | #endif | |
12113 | - if (!ptr) | |
12114 | - panic("Cannot allocate cpu data for CPU %d\n", i); | |
12115 | + 4)) { | |
12116 | + efi_enabled = 1; | |
12117 | + efi_reserve_early(); | |
12118 | + } | |
12119 | +#endif | |
12120 | +#else /* CONFIG_XEN */ | |
12121 | +#ifdef CONFIG_X86_32 | |
12122 | + /* This must be initialized to UNNAMED_MAJOR for ipconfig to work | |
12123 | + properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd. | |
12124 | + */ | |
12125 | + ROOT_DEV = MKDEV(UNNAMED_MAJOR,0); | |
12126 | +#else | |
12127 | + ROOT_DEV = MKDEV(RAMDISK_MAJOR,0); | |
12128 | +#endif | |
12129 | + if (is_initial_xendomain()) { | |
12130 | + const struct dom0_vga_console_info *info = | |
12131 | + (void *)((char *)xen_start_info + | |
12132 | + xen_start_info->console.dom0.info_off); | |
12133 | + | |
12134 | + dom0_init_screen_info(info, | |
12135 | + xen_start_info->console.dom0.info_size); | |
12136 | + xen_start_info->console.domU.mfn = 0; | |
12137 | + xen_start_info->console.domU.evtchn = 0; | |
12138 | + } else | |
12139 | + screen_info.orig_video_isVGA = 0; | |
12140 | + copy_edid(); | |
12141 | +#endif /* CONFIG_XEN */ | |
12142 | + | |
12143 | + ARCH_SETUP | |
12144 | + | |
12145 | + setup_memory_map(); | |
12146 | + parse_setup_data(); | |
12147 | + /* update the e820_saved too */ | |
12148 | + e820_reserve_setup_data(); | |
12149 | + | |
12150 | + copy_edd(); | |
12151 | + | |
12152 | +#ifndef CONFIG_XEN | |
12153 | + if (!boot_params.hdr.root_flags) | |
12154 | + root_mountflags &= ~MS_RDONLY; | |
12155 | +#endif | |
12156 | + init_mm.start_code = (unsigned long) _text; | |
12157 | + init_mm.end_code = (unsigned long) _etext; | |
12158 | + init_mm.end_data = (unsigned long) _edata; | |
12159 | +#ifdef CONFIG_X86_32 | |
12160 | +#ifndef CONFIG_XEN | |
12161 | + init_mm.brk = init_pg_tables_end + PAGE_OFFSET; | |
12162 | +#else | |
12163 | + init_mm.brk = (PFN_UP(__pa(xen_start_info->pt_base)) + | |
12164 | + xen_start_info->nr_pt_frames) << PAGE_SHIFT; | |
12165 | +#endif | |
12166 | +#else | |
12167 | + init_mm.brk = (unsigned long) &_end; | |
12168 | +#endif | |
12169 | + | |
12170 | + code_resource.start = virt_to_phys(_text); | |
12171 | + code_resource.end = virt_to_phys(_etext)-1; | |
12172 | + data_resource.start = virt_to_phys(_etext); | |
12173 | + data_resource.end = virt_to_phys(_edata)-1; | |
12174 | + bss_resource.start = virt_to_phys(&__bss_start); | |
12175 | + bss_resource.end = virt_to_phys(&__bss_stop)-1; | |
12176 | + | |
12177 | + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | |
12178 | + *cmdline_p = command_line; | |
12179 | + | |
12180 | + parse_early_param(); | |
12181 | + | |
12182 | #ifdef CONFIG_X86_64 | |
12183 | - cpu_pda(i)->data_offset = ptr - __per_cpu_start; | |
12184 | + check_efer(); | |
12185 | +#endif | |
12186 | + | |
12187 | +#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) | |
12188 | + /* | |
12189 | + * Must be before kernel pagetables are setup | |
12190 | + * or fixmap area is touched. | |
12191 | + */ | |
12192 | + vmi_init(); | |
12193 | +#endif | |
12194 | + | |
12195 | + /* after early param, so could get panic from serial */ | |
12196 | + reserve_early_setup_data(); | |
12197 | + | |
12198 | + if (acpi_mps_check()) { | |
12199 | +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN) | |
12200 | + disable_apic = 1; | |
12201 | +#endif | |
12202 | + setup_clear_cpu_cap(X86_FEATURE_APIC); | |
12203 | + } | |
12204 | + | |
12205 | +#ifdef CONFIG_PCI | |
12206 | + if (pci_early_dump_regs) | |
12207 | + early_dump_pci_devices(); | |
12208 | +#endif | |
12209 | + | |
12210 | + finish_e820_parsing(); | |
12211 | + | |
12212 | +#ifdef CONFIG_X86_32 | |
12213 | + probe_roms(); | |
12214 | +#endif | |
12215 | + | |
12216 | +#ifndef CONFIG_XEN | |
12217 | + /* after parse_early_param, so could debug it */ | |
12218 | + insert_resource(&iomem_resource, &code_resource); | |
12219 | + insert_resource(&iomem_resource, &data_resource); | |
12220 | + insert_resource(&iomem_resource, &bss_resource); | |
12221 | + | |
12222 | + if (efi_enabled) | |
12223 | + efi_init(); | |
12224 | + | |
12225 | +#ifdef CONFIG_X86_32 | |
12226 | + if (ppro_with_ram_bug()) { | |
12227 | + e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM, | |
12228 | + E820_RESERVED); | |
12229 | + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | |
12230 | + printk(KERN_INFO "fixed physical RAM map:\n"); | |
12231 | + e820_print_map("bad_ppro"); | |
12232 | + } | |
12233 | #else | |
12234 | - __per_cpu_offset[i] = ptr - __per_cpu_start; | |
12235 | + early_gart_iommu_check(); | |
12236 | #endif | |
12237 | - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); | |
12238 | +#endif /* CONFIG_XEN */ | |
12239 | ||
12240 | - highest_cpu = i; | |
12241 | + /* | |
12242 | + * partially used pages are not usable - thus | |
12243 | + * we are rounding upwards: | |
12244 | + */ | |
12245 | + max_pfn = e820_end_of_ram_pfn(); | |
12246 | + | |
12247 | + /* preallocate 4k for mptable mpc */ | |
12248 | + early_reserve_e820_mpc_new(); | |
12249 | + /* update e820 for memory not covered by WB MTRRs */ | |
12250 | + mtrr_bp_init(); | |
12251 | +#ifndef CONFIG_XEN | |
12252 | + if (mtrr_trim_uncached_memory(max_pfn)) | |
12253 | + max_pfn = e820_end_of_ram_pfn(); | |
12254 | +#endif | |
12255 | + | |
12256 | +#ifdef CONFIG_X86_32 | |
12257 | + /* max_low_pfn get updated here */ | |
12258 | + find_low_pfn_range(); | |
12259 | +#else | |
12260 | + num_physpages = max_pfn; | |
12261 | + max_mapnr = max_pfn; | |
12262 | + | |
12263 | + | |
12264 | + /* How many end-of-memory variables you have, grandma! */ | |
12265 | + /* need this before calling reserve_initrd */ | |
12266 | + if (max_pfn > (1UL<<(32 - PAGE_SHIFT))) | |
12267 | + max_low_pfn = e820_end_of_low_ram_pfn(); | |
12268 | + else | |
12269 | + max_low_pfn = max_pfn; | |
12270 | + | |
12271 | + high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; | |
12272 | +#endif | |
12273 | + | |
12274 | + /* max_pfn_mapped is updated here */ | |
12275 | + max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); | |
12276 | + max_pfn_mapped = max_low_pfn_mapped; | |
12277 | + | |
12278 | +#ifdef CONFIG_X86_64 | |
12279 | + if (max_pfn > max_low_pfn) { | |
12280 | + max_pfn_mapped = init_memory_mapping(1UL<<32, | |
12281 | + max_pfn<<PAGE_SHIFT); | |
12282 | + /* can we preseve max_low_pfn ?*/ | |
12283 | + max_low_pfn = max_pfn; | |
12284 | } | |
12285 | +#endif | |
12286 | ||
12287 | - nr_cpu_ids = highest_cpu + 1; | |
12288 | - printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids); | |
12289 | + /* | |
12290 | + * NOTE: On x86-32, only from this point on, fixmaps are ready for use. | |
12291 | + */ | |
12292 | ||
12293 | - /* Setup percpu data maps */ | |
12294 | - setup_per_cpu_maps(); | |
12295 | +#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT | |
12296 | + if (init_ohci1394_dma_early) | |
12297 | + init_ohci1394_dma_on_all_controllers(); | |
12298 | +#endif | |
12299 | ||
12300 | - /* Setup cpumask_of_cpu map */ | |
12301 | - setup_cpumask_of_cpu(); | |
12302 | -} | |
12303 | + reserve_initrd(); | |
12304 | + | |
12305 | +#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN) | |
12306 | + vsmp_init(); | |
12307 | +#endif | |
12308 | + | |
12309 | + if (is_initial_xendomain()) | |
12310 | + dmi_scan_machine(); | |
12311 | + | |
12312 | + io_delay_init(); | |
12313 | + | |
12314 | +#ifdef CONFIG_ACPI | |
12315 | + if (!is_initial_xendomain()) { | |
12316 | + printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); | |
12317 | + disable_acpi(); | |
12318 | + } | |
12319 | +#endif | |
12320 | + | |
12321 | + /* | |
12322 | + * Parse the ACPI tables for possible boot-time SMP configuration. | |
12323 | + */ | |
12324 | + acpi_boot_table_init(); | |
12325 | + | |
12326 | +#ifdef CONFIG_ACPI_NUMA | |
12327 | + /* | |
12328 | + * Parse SRAT to discover nodes. | |
12329 | + */ | |
12330 | + acpi_numa_init(); | |
12331 | +#endif | |
12332 | + | |
12333 | + initmem_init(0, max_pfn); | |
12334 | ||
12335 | +#ifdef CONFIG_ACPI_SLEEP | |
12336 | + /* | |
12337 | + * Reserve low memory region for sleep support. | |
12338 | + */ | |
12339 | + acpi_reserve_bootmem(); | |
12340 | #endif | |
12341 | +#ifdef CONFIG_X86_FIND_SMP_CONFIG | |
12342 | + /* | |
12343 | + * Find and reserve possible boot-time SMP configuration: | |
12344 | + */ | |
12345 | + find_smp_config(); | |
12346 | +#endif | |
12347 | + reserve_crashkernel(); | |
12348 | + | |
12349 | +#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN) | |
12350 | + /* | |
12351 | + * dma32_reserve_bootmem() allocates bootmem which may conflict | |
12352 | + * with the crashkernel command line, so do that after | |
12353 | + * reserve_crashkernel() | |
12354 | + */ | |
12355 | + dma32_reserve_bootmem(); | |
12356 | +#endif | |
12357 | + | |
12358 | + reserve_ibft_region(); | |
12359 | + | |
12360 | +#ifdef CONFIG_KVM_CLOCK | |
12361 | + kvmclock_init(); | |
12362 | +#endif | |
12363 | + | |
12364 | + xen_pagetable_setup_start(swapper_pg_dir); | |
12365 | + paging_init(); | |
12366 | + xen_pagetable_setup_done(swapper_pg_dir); | |
12367 | + paravirt_post_allocator_init(); | |
12368 | + | |
12369 | +#ifdef CONFIG_X86_64 | |
12370 | + map_vsyscall(); | |
12371 | +#endif | |
12372 | + | |
12373 | +#ifdef CONFIG_XEN | |
12374 | + p2m_pages = max_pfn; | |
12375 | + if (xen_start_info->nr_pages > max_pfn) { | |
12376 | + /* | |
12377 | + * the max_pfn was shrunk (probably by mem= or highmem= | |
12378 | + * kernel parameter); shrink reservation with the HV | |
12379 | + */ | |
12380 | + struct xen_memory_reservation reservation = { | |
12381 | + .address_bits = 0, | |
12382 | + .extent_order = 0, | |
12383 | + .domid = DOMID_SELF | |
12384 | + }; | |
12385 | + unsigned int difference; | |
12386 | + int ret; | |
12387 | + | |
12388 | + difference = xen_start_info->nr_pages - max_pfn; | |
12389 | + | |
12390 | + set_xen_guest_handle(reservation.extent_start, | |
12391 | + ((unsigned long *)xen_start_info->mfn_list) + max_pfn); | |
12392 | + reservation.nr_extents = difference; | |
12393 | + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | |
12394 | + &reservation); | |
12395 | + BUG_ON(ret != difference); | |
12396 | + } | |
12397 | + else if (max_pfn > xen_start_info->nr_pages) | |
12398 | + p2m_pages = xen_start_info->nr_pages; | |
12399 | + | |
12400 | + if (!xen_feature(XENFEAT_auto_translated_physmap)) { | |
12401 | + unsigned long i, j; | |
12402 | + unsigned int k, fpp; | |
12403 | + | |
12404 | + /* Make sure we have a large enough P->M table. */ | |
12405 | + phys_to_machine_mapping = alloc_bootmem_pages( | |
12406 | + max_pfn * sizeof(unsigned long)); | |
12407 | + memset(phys_to_machine_mapping, ~0, | |
12408 | + max_pfn * sizeof(unsigned long)); | |
12409 | + memcpy(phys_to_machine_mapping, | |
12410 | + (unsigned long *)xen_start_info->mfn_list, | |
12411 | + p2m_pages * sizeof(unsigned long)); | |
12412 | + free_bootmem( | |
12413 | + __pa(xen_start_info->mfn_list), | |
12414 | + PFN_PHYS(PFN_UP(xen_start_info->nr_pages * | |
12415 | + sizeof(unsigned long)))); | |
12416 | + | |
12417 | + /* | |
12418 | + * Initialise the list of the frames that specify the list of | |
12419 | + * frames that make up the p2m table. Used by save/restore. | |
12420 | + */ | |
12421 | + pfn_to_mfn_frame_list_list = alloc_bootmem_pages(PAGE_SIZE); | |
12422 | + | |
12423 | + fpp = PAGE_SIZE/sizeof(unsigned long); | |
12424 | + for (i = j = 0, k = -1; i < max_pfn; i += fpp, j++) { | |
12425 | + if (j == fpp) | |
12426 | + j = 0; | |
12427 | + if (j == 0) { | |
12428 | + k++; | |
12429 | + BUG_ON(k>=ARRAY_SIZE(pfn_to_mfn_frame_list)); | |
12430 | + pfn_to_mfn_frame_list[k] = | |
12431 | + alloc_bootmem_pages(PAGE_SIZE); | |
12432 | + pfn_to_mfn_frame_list_list[k] = | |
12433 | + virt_to_mfn(pfn_to_mfn_frame_list[k]); | |
12434 | + } | |
12435 | + pfn_to_mfn_frame_list[k][j] = | |
12436 | + virt_to_mfn(&phys_to_machine_mapping[i]); | |
12437 | + } | |
12438 | + HYPERVISOR_shared_info->arch.max_pfn = max_pfn; | |
12439 | + HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | |
12440 | + virt_to_mfn(pfn_to_mfn_frame_list_list); | |
12441 | + } | |
12442 | + | |
12443 | + /* Mark all ISA DMA channels in-use - using them wouldn't work. */ | |
12444 | + for (i = 0; i < MAX_DMA_CHANNELS; ++i) | |
12445 | + if (i != 4 && request_dma(i, "xen") != 0) | |
12446 | + BUG(); | |
12447 | +#endif /* CONFIG_XEN */ | |
12448 | + | |
12449 | +#ifdef CONFIG_X86_GENERICARCH | |
12450 | + generic_apic_probe(); | |
12451 | +#endif | |
12452 | + | |
12453 | +#ifndef CONFIG_XEN | |
12454 | + early_quirks(); | |
12455 | +#endif | |
12456 | + | |
12457 | + /* | |
12458 | + * Read APIC and some other early information from ACPI tables. | |
12459 | + */ | |
12460 | + acpi_boot_init(); | |
12461 | + | |
12462 | +#if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS) | |
12463 | + /* | |
12464 | + * get boot-time SMP configuration: | |
12465 | + */ | |
12466 | + if (smp_found_config) | |
12467 | + get_smp_config(); | |
12468 | +#endif | |
12469 | + | |
12470 | + prefill_possible_map(); | |
12471 | +#ifdef CONFIG_X86_64 | |
12472 | + init_cpu_to_node(); | |
12473 | +#endif | |
12474 | + | |
12475 | +#ifndef CONFIG_XEN | |
12476 | + init_apic_mappings(); | |
12477 | + ioapic_init_mappings(); | |
12478 | + | |
12479 | + kvm_guest_init(); | |
12480 | + | |
12481 | + e820_reserve_resources(); | |
12482 | + e820_mark_nosave_regions(max_low_pfn); | |
12483 | +#else | |
12484 | + if (is_initial_xendomain()) | |
12485 | + e820_reserve_resources(); | |
12486 | +#endif | |
12487 | + | |
12488 | +#ifdef CONFIG_X86_32 | |
82094b55 AF |
12489 | + if (is_initial_xendomain()) |
12490 | + request_resource(&iomem_resource, &video_ram_resource); | |
2cb7cef9 BS |
12491 | +#endif |
12492 | + reserve_standard_io_resources(); | |
12493 | + | |
12494 | +#ifndef CONFIG_XEN | |
12495 | + e820_setup_gap(); | |
12496 | + | |
12497 | +#ifdef CONFIG_VT | |
12498 | +#if defined(CONFIG_VGA_CONSOLE) | |
12499 | + if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) | |
12500 | + conswitchp = &vga_con; | |
12501 | +#elif defined(CONFIG_DUMMY_CONSOLE) | |
12502 | + conswitchp = &dummy_con; | |
12503 | +#endif | |
12504 | +#endif | |
12505 | +#else /* CONFIG_XEN */ | |
12506 | + if (is_initial_xendomain()) | |
12507 | + e820_setup_gap(); | |
12508 | + | |
12509 | + set_iopl.iopl = 1; | |
12510 | + WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl)); | |
12511 | + | |
12512 | +#ifdef CONFIG_VT | |
12513 | +#ifdef CONFIG_DUMMY_CONSOLE | |
12514 | + conswitchp = &dummy_con; | |
12515 | +#endif | |
12516 | +#ifdef CONFIG_VGA_CONSOLE | |
12517 | + if (is_initial_xendomain()) | |
12518 | + conswitchp = &vga_con; | |
12519 | +#endif | |
12520 | +#endif | |
12521 | +#endif /* CONFIG_XEN */ | |
12522 | +} | |
12523 | + | |
12524 | +#ifdef CONFIG_XEN | |
12525 | +static int | |
12526 | +xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) | |
12527 | +{ | |
12528 | + HYPERVISOR_shutdown(SHUTDOWN_crash); | |
12529 | + /* we're never actually going to get here... */ | |
12530 | + return NOTIFY_DONE; | |
12531 | +} | |
12532 | +#endif /* !CONFIG_XEN */ | |
82094b55 | 12533 | --- sle11-2009-10-16.orig/arch/x86/kernel/setup64-xen.c 2009-03-16 16:38:05.000000000 +0100 |
2cb7cef9 BS |
12534 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 |
12535 | @@ -1,370 +0,0 @@ | |
12536 | -/* | |
12537 | - * X86-64 specific CPU setup. | |
12538 | - * Copyright (C) 1995 Linus Torvalds | |
12539 | - * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen. | |
12540 | - * See setup.c for older changelog. | |
12541 | - * | |
12542 | - * Jun Nakajima <jun.nakajima@intel.com> | |
12543 | - * Modified for Xen | |
12544 | - * | |
12545 | - */ | |
12546 | -#include <linux/init.h> | |
12547 | -#include <linux/kernel.h> | |
12548 | -#include <linux/sched.h> | |
12549 | -#include <linux/string.h> | |
12550 | -#include <linux/bootmem.h> | |
12551 | -#include <linux/bitops.h> | |
12552 | -#include <linux/module.h> | |
12553 | -#include <linux/kgdb.h> | |
12554 | -#include <asm/pda.h> | |
12555 | -#include <asm/pgtable.h> | |
12556 | -#include <asm/processor.h> | |
12557 | -#include <asm/desc.h> | |
12558 | -#include <asm/atomic.h> | |
12559 | -#include <asm/mmu_context.h> | |
12560 | -#include <asm/smp.h> | |
12561 | -#include <asm/i387.h> | |
12562 | -#include <asm/percpu.h> | |
12563 | -#include <asm/proto.h> | |
12564 | -#include <asm/sections.h> | |
12565 | -#include <asm/setup.h> | |
12566 | -#include <asm/genapic.h> | |
12567 | -#ifdef CONFIG_XEN | |
12568 | -#include <asm/hypervisor.h> | |
12569 | -#endif | |
12570 | - | |
12571 | -#ifndef CONFIG_DEBUG_BOOT_PARAMS | |
12572 | -struct boot_params __initdata boot_params; | |
12573 | -#else | |
12574 | -struct boot_params boot_params; | |
12575 | -#endif | |
12576 | - | |
12577 | -cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; | |
12578 | - | |
12579 | -struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; | |
12580 | -EXPORT_SYMBOL(_cpu_pda); | |
12581 | -struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; | |
12582 | - | |
12583 | -#ifndef CONFIG_X86_NO_IDT | |
12584 | -struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | |
12585 | -#endif | |
12586 | - | |
12587 | -char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); | |
12588 | - | |
12589 | -unsigned long __supported_pte_mask __read_mostly = ~0UL; | |
12590 | -EXPORT_SYMBOL(__supported_pte_mask); | |
12591 | - | |
12592 | -static int do_not_nx __cpuinitdata = 0; | |
12593 | - | |
12594 | -/* noexec=on|off | |
12595 | -Control non executable mappings for 64bit processes. | |
12596 | - | |
12597 | -on Enable(default) | |
12598 | -off Disable | |
12599 | -*/ | |
12600 | -static int __init nonx_setup(char *str) | |
12601 | -{ | |
12602 | - if (!str) | |
12603 | - return -EINVAL; | |
12604 | - if (!strncmp(str, "on", 2)) { | |
12605 | - __supported_pte_mask |= _PAGE_NX; | |
12606 | - do_not_nx = 0; | |
12607 | - } else if (!strncmp(str, "off", 3)) { | |
12608 | - do_not_nx = 1; | |
12609 | - __supported_pte_mask &= ~_PAGE_NX; | |
12610 | - } | |
12611 | - return 0; | |
12612 | -} | |
12613 | -early_param("noexec", nonx_setup); | |
12614 | - | |
12615 | -int force_personality32 = 0; | |
12616 | - | |
12617 | -/* noexec32=on|off | |
12618 | -Control non executable heap for 32bit processes. | |
12619 | -To control the stack too use noexec=off | |
12620 | - | |
12621 | -on PROT_READ does not imply PROT_EXEC for 32bit processes (default) | |
12622 | -off PROT_READ implies PROT_EXEC | |
12623 | -*/ | |
12624 | -static int __init nonx32_setup(char *str) | |
12625 | -{ | |
12626 | - if (!strcmp(str, "on")) | |
12627 | - force_personality32 &= ~READ_IMPLIES_EXEC; | |
12628 | - else if (!strcmp(str, "off")) | |
12629 | - force_personality32 |= READ_IMPLIES_EXEC; | |
12630 | - return 1; | |
12631 | -} | |
12632 | -__setup("noexec32=", nonx32_setup); | |
12633 | - | |
12634 | -#ifdef CONFIG_XEN | |
12635 | -static void __init_refok switch_pt(int cpu) | |
12636 | -{ | |
12637 | - if (cpu == 0) | |
12638 | - xen_init_pt(); | |
12639 | - xen_pt_switch(__pa_symbol(init_level4_pgt)); | |
12640 | - xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt))); | |
12641 | -} | |
12642 | -#define switch_pt() switch_pt(cpu) | |
12643 | - | |
12644 | -static void __cpuinit cpu_gdt_init(const struct desc_ptr *gdt_descr) | |
12645 | -{ | |
12646 | - unsigned long frames[16]; | |
12647 | - unsigned long va; | |
12648 | - int f; | |
12649 | - | |
12650 | - for (va = gdt_descr->address, f = 0; | |
12651 | - va < gdt_descr->address + gdt_descr->size; | |
12652 | - va += PAGE_SIZE, f++) { | |
12653 | - frames[f] = virt_to_mfn(va); | |
12654 | - make_page_readonly( | |
12655 | - (void *)va, XENFEAT_writable_descriptor_tables); | |
12656 | - } | |
12657 | - if (HYPERVISOR_set_gdt(frames, (gdt_descr->size + 1) / | |
12658 | - sizeof (struct desc_struct))) | |
12659 | - BUG(); | |
12660 | -} | |
12661 | -#else | |
12662 | -static void switch_pt(void) | |
12663 | -{ | |
12664 | - asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); | |
12665 | -} | |
12666 | - | |
12667 | -static void __cpuinit cpu_gdt_init(const struct desc_ptr *gdt_descr) | |
12668 | -{ | |
12669 | - load_gdt(gdt_descr); | |
12670 | - load_idt(idt_descr); | |
12671 | -} | |
12672 | -#endif | |
12673 | - | |
12674 | -void pda_init(int cpu) | |
12675 | -{ | |
12676 | - struct x8664_pda *pda = cpu_pda(cpu); | |
12677 | - | |
12678 | - /* Setup up data that may be needed in __get_free_pages early */ | |
12679 | - asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); | |
12680 | -#ifndef CONFIG_XEN | |
12681 | - /* Memory clobbers used to order PDA accessed */ | |
12682 | - mb(); | |
12683 | - wrmsrl(MSR_GS_BASE, pda); | |
12684 | - mb(); | |
12685 | -#else | |
12686 | - if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL, | |
12687 | - (unsigned long)pda)) | |
12688 | - BUG(); | |
12689 | -#endif | |
12690 | - pda->cpunumber = cpu; | |
12691 | - pda->irqcount = -1; | |
12692 | - pda->kernelstack = | |
12693 | - (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; | |
12694 | - pda->active_mm = &init_mm; | |
12695 | - pda->mmu_state = 0; | |
12696 | - | |
12697 | - if (cpu == 0) { | |
12698 | - /* others are initialized in smpboot.c */ | |
12699 | - pda->pcurrent = &init_task; | |
12700 | - pda->irqstackptr = boot_cpu_stack; | |
12701 | - } else { | |
12702 | - pda->irqstackptr = (char *) | |
12703 | - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); | |
12704 | - if (!pda->irqstackptr) | |
12705 | - panic("cannot allocate irqstack for cpu %d", cpu); | |
12706 | - } | |
12707 | - | |
12708 | - switch_pt(); | |
12709 | - | |
12710 | - pda->irqstackptr += IRQSTACKSIZE-64; | |
12711 | -} | |
12712 | - | |
12713 | -#ifndef CONFIG_X86_NO_TSS | |
12714 | -char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ] | |
12715 | -__attribute__((section(".bss.page_aligned"))); | |
12716 | -#endif | |
12717 | - | |
12718 | -extern asmlinkage void ignore_sysret(void); | |
12719 | - | |
12720 | -/* May not be marked __init: used by software suspend */ | |
12721 | -void syscall_init(void) | |
12722 | -{ | |
12723 | -#ifndef CONFIG_XEN | |
12724 | - /* | |
12725 | - * LSTAR and STAR live in a bit strange symbiosis. | |
12726 | - * They both write to the same internal register. STAR allows to set CS/DS | |
12727 | - * but only a 32bit target. LSTAR sets the 64bit rip. | |
12728 | - */ | |
12729 | - wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); | |
12730 | - wrmsrl(MSR_LSTAR, system_call); | |
12731 | - wrmsrl(MSR_CSTAR, ignore_sysret); | |
12732 | - | |
12733 | - /* Flags to clear on syscall */ | |
12734 | - wrmsrl(MSR_SYSCALL_MASK, | |
12735 | - X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); | |
12736 | -#endif | |
12737 | -#ifdef CONFIG_IA32_EMULATION | |
12738 | - syscall32_cpu_init (); | |
12739 | -#else | |
12740 | - { | |
12741 | - static const struct callback_register cstar = { | |
12742 | - .type = CALLBACKTYPE_syscall32, | |
12743 | - .address = (unsigned long)ignore_sysret | |
12744 | - }; | |
12745 | - if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar)) | |
12746 | - printk(KERN_WARNING "Unable to register CSTAR callback\n"); | |
12747 | - } | |
12748 | -#endif | |
12749 | -} | |
12750 | - | |
12751 | -void __cpuinit check_efer(void) | |
12752 | -{ | |
12753 | - unsigned long efer; | |
12754 | - | |
12755 | - rdmsrl(MSR_EFER, efer); | |
12756 | - if (!(efer & EFER_NX) || do_not_nx) { | |
12757 | - __supported_pte_mask &= ~_PAGE_NX; | |
12758 | - } | |
12759 | -} | |
12760 | - | |
12761 | -unsigned long kernel_eflags; | |
12762 | - | |
12763 | -#ifndef CONFIG_X86_NO_TSS | |
12764 | -/* | |
12765 | - * Copies of the original ist values from the tss are only accessed during | |
12766 | - * debugging, no special alignment required. | |
12767 | - */ | |
12768 | -DEFINE_PER_CPU(struct orig_ist, orig_ist); | |
12769 | -#endif | |
12770 | - | |
12771 | -/* | |
12772 | - * cpu_init() initializes state that is per-CPU. Some data is already | |
12773 | - * initialized (naturally) in the bootstrap process, such as the GDT | |
12774 | - * and IDT. We reload them nevertheless, this function acts as a | |
12775 | - * 'CPU state barrier', nothing should get across. | |
12776 | - * A lot of state is already set up in PDA init. | |
12777 | - */ | |
12778 | -void __cpuinit cpu_init (void) | |
12779 | -{ | |
12780 | - int cpu = stack_smp_processor_id(); | |
12781 | -#ifndef CONFIG_X86_NO_TSS | |
12782 | - struct tss_struct *t = &per_cpu(init_tss, cpu); | |
12783 | - struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); | |
12784 | - unsigned long v; | |
12785 | - char *estacks = NULL; | |
12786 | - unsigned i; | |
12787 | -#endif | |
12788 | - struct task_struct *me; | |
12789 | - | |
12790 | - /* CPU 0 is initialised in head64.c */ | |
12791 | - if (cpu != 0) { | |
12792 | - pda_init(cpu); | |
12793 | - } | |
12794 | -#ifndef CONFIG_X86_NO_TSS | |
12795 | - else | |
12796 | - estacks = boot_exception_stacks; | |
12797 | -#endif | |
12798 | - | |
12799 | - me = current; | |
12800 | - | |
12801 | - if (cpu_test_and_set(cpu, cpu_initialized)) | |
12802 | - panic("CPU#%d already initialized!\n", cpu); | |
12803 | - | |
12804 | - printk("Initializing CPU#%d\n", cpu); | |
12805 | - | |
12806 | - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | |
12807 | - | |
12808 | - /* | |
12809 | - * Initialize the per-CPU GDT with the boot GDT, | |
12810 | - * and set up the GDT descriptor: | |
12811 | - */ | |
12812 | -#ifndef CONFIG_XEN | |
12813 | - if (cpu) | |
12814 | - memcpy(get_cpu_gdt_table(cpu), cpu_gdt_table, GDT_SIZE); | |
12815 | -#endif | |
12816 | - | |
12817 | - cpu_gdt_descr[cpu].size = GDT_SIZE; | |
12818 | - cpu_gdt_init(&cpu_gdt_descr[cpu]); | |
12819 | - | |
12820 | - memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); | |
12821 | - syscall_init(); | |
12822 | - | |
12823 | - wrmsrl(MSR_FS_BASE, 0); | |
12824 | - wrmsrl(MSR_KERNEL_GS_BASE, 0); | |
12825 | - barrier(); | |
12826 | - | |
12827 | - check_efer(); | |
12828 | - | |
12829 | -#ifndef CONFIG_X86_NO_TSS | |
12830 | - /* | |
12831 | - * set up and load the per-CPU TSS | |
12832 | - */ | |
12833 | - for (v = 0; v < N_EXCEPTION_STACKS; v++) { | |
12834 | - static const unsigned int order[N_EXCEPTION_STACKS] = { | |
12835 | - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, | |
12836 | - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER | |
12837 | - }; | |
12838 | - if (cpu) { | |
12839 | - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); | |
12840 | - if (!estacks) | |
12841 | - panic("Cannot allocate exception stack %ld %d\n", | |
12842 | - v, cpu); | |
12843 | - } | |
12844 | - estacks += PAGE_SIZE << order[v]; | |
12845 | - orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; | |
12846 | - } | |
12847 | - | |
12848 | - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); | |
12849 | - /* | |
12850 | - * <= is required because the CPU will access up to | |
12851 | - * 8 bits beyond the end of the IO permission bitmap. | |
12852 | - */ | |
12853 | - for (i = 0; i <= IO_BITMAP_LONGS; i++) | |
12854 | - t->io_bitmap[i] = ~0UL; | |
12855 | -#endif | |
12856 | - | |
12857 | - atomic_inc(&init_mm.mm_count); | |
12858 | - me->active_mm = &init_mm; | |
12859 | - if (me->mm) | |
12860 | - BUG(); | |
12861 | - enter_lazy_tlb(&init_mm, me); | |
12862 | - | |
12863 | -#ifndef CONFIG_X86_NO_TSS | |
12864 | - set_tss_desc(cpu, t); | |
12865 | -#endif | |
12866 | -#ifndef CONFIG_XEN | |
12867 | - load_TR_desc(); | |
12868 | -#endif | |
12869 | - load_LDT(&init_mm.context); | |
12870 | - | |
12871 | -#ifdef CONFIG_KGDB | |
12872 | - /* | |
12873 | - * If the kgdb is connected no debug regs should be altered. This | |
12874 | - * is only applicable when KGDB and a KGDB I/O module are built | |
12875 | - * into the kernel and you are using early debugging with | |
12876 | - * kgdbwait. KGDB will control the kernel HW breakpoint registers. | |
12877 | - */ | |
12878 | - if (kgdb_connected && arch_kgdb_ops.correct_hw_break) | |
12879 | - arch_kgdb_ops.correct_hw_break(); | |
12880 | - else { | |
12881 | -#endif | |
12882 | - /* | |
12883 | - * Clear all 6 debug registers: | |
12884 | - */ | |
12885 | - | |
12886 | - set_debugreg(0UL, 0); | |
12887 | - set_debugreg(0UL, 1); | |
12888 | - set_debugreg(0UL, 2); | |
12889 | - set_debugreg(0UL, 3); | |
12890 | - set_debugreg(0UL, 6); | |
12891 | - set_debugreg(0UL, 7); | |
12892 | -#ifdef CONFIG_KGDB | |
12893 | - /* If the kgdb is connected no debug regs should be altered. */ | |
12894 | - } | |
12895 | -#endif | |
12896 | - | |
12897 | - fpu_init(); | |
12898 | - | |
12899 | - asm ("pushfq; popq %0" : "=rm" (kernel_eflags)); | |
12900 | - if (raw_irqs_disabled()) | |
12901 | - kernel_eflags &= ~X86_EFLAGS_IF; | |
12902 | - | |
12903 | - if (is_uv_system()) | |
12904 | - uv_cpu_init(); | |
12905 | -} | |
82094b55 | 12906 | --- sle11-2009-10-16.orig/arch/x86/kernel/setup_32-xen.c 2009-03-16 16:38:05.000000000 +0100 |
2cb7cef9 BS |
12907 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 |
12908 | @@ -1,1151 +0,0 @@ | |
12909 | -/* | |
12910 | - * Copyright (C) 1995 Linus Torvalds | |
12911 | - * | |
12912 | - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 | |
12913 | - * | |
12914 | - * Memory region support | |
12915 | - * David Parsons <orc@pell.chi.il.us>, July-August 1999 | |
12916 | - * | |
12917 | - * Added E820 sanitization routine (removes overlapping memory regions); | |
12918 | - * Brian Moyle <bmoyle@mvista.com>, February 2001 | |
12919 | - * | |
12920 | - * Moved CPU detection code to cpu/${cpu}.c | |
12921 | - * Patrick Mochel <mochel@osdl.org>, March 2002 | |
12922 | - * | |
12923 | - * Provisions for empty E820 memory regions (reported by certain BIOSes). | |
12924 | - * Alex Achenbach <xela@slit.de>, December 2002. | |
12925 | - * | |
12926 | - */ | |
12927 | - | |
12928 | -/* | |
12929 | - * This file handles the architecture-dependent parts of initialization | |
12930 | - */ | |
12931 | - | |
12932 | -#include <linux/sched.h> | |
12933 | -#include <linux/mm.h> | |
12934 | -#include <linux/mmzone.h> | |
12935 | -#include <linux/screen_info.h> | |
12936 | -#include <linux/ioport.h> | |
12937 | -#include <linux/acpi.h> | |
12938 | -#include <linux/apm_bios.h> | |
12939 | -#include <linux/initrd.h> | |
12940 | -#include <linux/bootmem.h> | |
12941 | -#include <linux/seq_file.h> | |
12942 | -#include <linux/console.h> | |
12943 | -#include <linux/mca.h> | |
12944 | -#include <linux/root_dev.h> | |
12945 | -#include <linux/highmem.h> | |
12946 | -#include <linux/module.h> | |
12947 | -#include <linux/efi.h> | |
12948 | -#include <linux/init.h> | |
12949 | -#include <linux/edd.h> | |
12950 | -#include <linux/iscsi_ibft.h> | |
12951 | -#include <linux/nodemask.h> | |
12952 | -#include <linux/kernel.h> | |
12953 | -#include <linux/percpu.h> | |
12954 | -#include <linux/notifier.h> | |
12955 | -#include <linux/kexec.h> | |
12956 | -#include <linux/crash_dump.h> | |
12957 | -#include <linux/dmi.h> | |
12958 | -#include <linux/pfn.h> | |
12959 | -#include <linux/pci.h> | |
12960 | -#include <linux/init_ohci1394_dma.h> | |
12961 | -#include <linux/kvm_para.h> | |
12962 | - | |
12963 | -#include <video/edid.h> | |
12964 | - | |
12965 | -#include <asm/mtrr.h> | |
12966 | -#include <asm/apic.h> | |
12967 | -#include <asm/e820.h> | |
12968 | -#include <asm/mpspec.h> | |
12969 | -#include <asm/mmzone.h> | |
12970 | -#include <asm/setup.h> | |
12971 | -#include <asm/arch_hooks.h> | |
12972 | -#include <asm/sections.h> | |
12973 | -#include <asm/io_apic.h> | |
12974 | -#include <asm/ist.h> | |
12975 | -#include <asm/io.h> | |
12976 | -#include <asm/hypervisor.h> | |
12977 | -#include <xen/interface/physdev.h> | |
12978 | -#include <xen/interface/memory.h> | |
12979 | -#include <xen/features.h> | |
12980 | -#include <xen/firmware.h> | |
12981 | -#include <xen/xencons.h> | |
12982 | -#include <setup_arch.h> | |
12983 | -#include <asm/bios_ebda.h> | |
12984 | -#include <asm/cacheflush.h> | |
12985 | -#include <asm/processor.h> | |
12986 | - | |
12987 | -#ifdef CONFIG_XEN | |
12988 | -#include <xen/interface/kexec.h> | |
12989 | -#endif | |
12990 | - | |
12991 | -static int xen_panic_event(struct notifier_block *, unsigned long, void *); | |
12992 | -static struct notifier_block xen_panic_block = { | |
12993 | - xen_panic_event, NULL, 0 /* try to go last */ | |
12994 | -}; | |
12995 | - | |
12996 | -/* | |
12997 | - * Machine setup.. | |
12998 | - */ | |
12999 | -static struct resource data_resource = { | |
13000 | - .name = "Kernel data", | |
13001 | - .start = 0, | |
13002 | - .end = 0, | |
13003 | - .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
13004 | -}; | |
13005 | - | |
13006 | -static struct resource code_resource = { | |
13007 | - .name = "Kernel code", | |
13008 | - .start = 0, | |
13009 | - .end = 0, | |
13010 | - .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
13011 | -}; | |
13012 | - | |
13013 | -static struct resource bss_resource = { | |
13014 | - .name = "Kernel bss", | |
13015 | - .start = 0, | |
13016 | - .end = 0, | |
13017 | - .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
13018 | -}; | |
13019 | - | |
13020 | -static struct resource video_ram_resource = { | |
13021 | - .name = "Video RAM area", | |
13022 | - .start = 0xa0000, | |
13023 | - .end = 0xbffff, | |
13024 | - .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
13025 | -}; | |
13026 | - | |
13027 | -static struct resource standard_io_resources[] = { { | |
13028 | - .name = "dma1", | |
13029 | - .start = 0x0000, | |
13030 | - .end = 0x001f, | |
13031 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
13032 | -}, { | |
13033 | - .name = "pic1", | |
13034 | - .start = 0x0020, | |
13035 | - .end = 0x0021, | |
13036 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
13037 | -}, { | |
13038 | - .name = "timer0", | |
13039 | - .start = 0x0040, | |
13040 | - .end = 0x0043, | |
13041 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
13042 | -}, { | |
13043 | - .name = "timer1", | |
13044 | - .start = 0x0050, | |
13045 | - .end = 0x0053, | |
13046 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
13047 | -}, { | |
13048 | - .name = "keyboard", | |
13049 | - .start = 0x0060, | |
13050 | - .end = 0x0060, | |
13051 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
13052 | -}, { | |
13053 | - .name = "keyboard", | |
13054 | - .start = 0x0064, | |
13055 | - .end = 0x0064, | |
13056 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
13057 | -}, { | |
13058 | - .name = "dma page reg", | |
13059 | - .start = 0x0080, | |
13060 | - .end = 0x008f, | |
13061 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
13062 | -}, { | |
13063 | - .name = "pic2", | |
13064 | - .start = 0x00a0, | |
13065 | - .end = 0x00a1, | |
13066 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
13067 | -}, { | |
13068 | - .name = "dma2", | |
13069 | - .start = 0x00c0, | |
13070 | - .end = 0x00df, | |
13071 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
13072 | -}, { | |
13073 | - .name = "fpu", | |
13074 | - .start = 0x00f0, | |
13075 | - .end = 0x00ff, | |
13076 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
13077 | -} }; | |
13078 | - | |
13079 | -/* cpu data as detected by the assembly code in head.S */ | |
13080 | -struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; | |
13081 | -/* common cpu data for all cpus */ | |
13082 | -struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; | |
13083 | -EXPORT_SYMBOL(boot_cpu_data); | |
13084 | - | |
13085 | -unsigned int def_to_bigsmp; | |
13086 | - | |
13087 | -#ifndef CONFIG_X86_PAE | |
13088 | -unsigned long mmu_cr4_features; | |
13089 | -#else | |
13090 | -unsigned long mmu_cr4_features = X86_CR4_PAE; | |
13091 | -#endif | |
13092 | - | |
13093 | -/* for MCA, but anyone else can use it if they want */ | |
13094 | -unsigned int machine_id; | |
13095 | -unsigned int machine_submodel_id; | |
13096 | -unsigned int BIOS_revision; | |
13097 | - | |
13098 | -/* Boot loader ID as an integer, for the benefit of proc_dointvec */ | |
13099 | -int bootloader_type; | |
13100 | - | |
13101 | -/* user-defined highmem size */ | |
13102 | -static unsigned int highmem_pages = -1; | |
13103 | - | |
13104 | -/* | |
13105 | - * Setup options | |
13106 | - */ | |
13107 | -struct screen_info screen_info; | |
13108 | -EXPORT_SYMBOL(screen_info); | |
13109 | -struct apm_info apm_info; | |
13110 | -EXPORT_SYMBOL(apm_info); | |
13111 | -struct edid_info edid_info; | |
13112 | -EXPORT_SYMBOL_GPL(edid_info); | |
13113 | -#ifndef CONFIG_XEN | |
13114 | -#define copy_edid() (edid_info = boot_params.edid_info) | |
13115 | -#endif | |
13116 | -struct ist_info ist_info; | |
13117 | -#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \ | |
13118 | - defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) | |
13119 | -EXPORT_SYMBOL(ist_info); | |
13120 | -#endif | |
13121 | - | |
13122 | -extern void early_cpu_init(void); | |
13123 | -extern int root_mountflags; | |
13124 | - | |
13125 | -unsigned long saved_video_mode; | |
13126 | - | |
13127 | -#define RAMDISK_IMAGE_START_MASK 0x07FF | |
13128 | -#define RAMDISK_PROMPT_FLAG 0x8000 | |
13129 | -#define RAMDISK_LOAD_FLAG 0x4000 | |
13130 | - | |
13131 | -static char __initdata command_line[COMMAND_LINE_SIZE]; | |
13132 | - | |
13133 | -#ifndef CONFIG_DEBUG_BOOT_PARAMS | |
13134 | -struct boot_params __initdata boot_params; | |
13135 | -#else | |
13136 | -struct boot_params boot_params; | |
13137 | -#endif | |
13138 | - | |
13139 | -/* | |
13140 | - * Point at the empty zero page to start with. We map the real shared_info | |
13141 | - * page as soon as fixmap is up and running. | |
13142 | - */ | |
13143 | -shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; | |
13144 | -EXPORT_SYMBOL(HYPERVISOR_shared_info); | |
13145 | - | |
13146 | -unsigned long *phys_to_machine_mapping; | |
13147 | -unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[16]; | |
13148 | -EXPORT_SYMBOL(phys_to_machine_mapping); | |
13149 | - | |
13150 | -/* Raw start-of-day parameters from the hypervisor. */ | |
13151 | -start_info_t *xen_start_info; | |
13152 | -EXPORT_SYMBOL(xen_start_info); | |
13153 | - | |
13154 | -#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) | |
13155 | -struct edd edd; | |
13156 | -#ifdef CONFIG_EDD_MODULE | |
13157 | -EXPORT_SYMBOL(edd); | |
13158 | -#endif | |
13159 | -#ifndef CONFIG_XEN | |
13160 | -/** | |
13161 | - * copy_edd() - Copy the BIOS EDD information | |
13162 | - * from boot_params into a safe place. | |
13163 | - * | |
13164 | - */ | |
13165 | -static inline void copy_edd(void) | |
13166 | -{ | |
13167 | - memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, | |
13168 | - sizeof(edd.mbr_signature)); | |
13169 | - memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info)); | |
13170 | - edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries; | |
13171 | - edd.edd_info_nr = boot_params.eddbuf_entries; | |
13172 | -} | |
13173 | -#endif | |
13174 | -#else | |
13175 | -static inline void copy_edd(void) | |
13176 | -{ | |
13177 | -} | |
13178 | -#endif | |
13179 | - | |
13180 | -int __initdata user_defined_memmap; | |
13181 | - | |
13182 | -/* | |
13183 | - * "mem=nopentium" disables the 4MB page tables. | |
13184 | - * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM | |
13185 | - * to <mem>, overriding the bios size. | |
13186 | - * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from | |
13187 | - * <start> to <start>+<mem>, overriding the bios size. | |
13188 | - * | |
13189 | - * HPA tells me bootloaders need to parse mem=, so no new | |
13190 | - * option should be mem= [also see Documentation/i386/boot.txt] | |
13191 | - */ | |
13192 | -static int __init parse_mem(char *arg) | |
13193 | -{ | |
13194 | - if (!arg) | |
13195 | - return -EINVAL; | |
13196 | - | |
13197 | - if (strcmp(arg, "nopentium") == 0) { | |
13198 | - setup_clear_cpu_cap(X86_FEATURE_PSE); | |
13199 | - } else { | |
13200 | - /* If the user specifies memory size, we | |
13201 | - * limit the BIOS-provided memory map to | |
13202 | - * that size. exactmap can be used to specify | |
13203 | - * the exact map. mem=number can be used to | |
13204 | - * trim the existing memory map. | |
13205 | - */ | |
13206 | - unsigned long long mem_size; | |
13207 | - | |
13208 | - mem_size = memparse(arg, &arg); | |
13209 | - limit_regions(mem_size); | |
13210 | - user_defined_memmap = 1; | |
13211 | - } | |
13212 | - return 0; | |
13213 | -} | |
13214 | -early_param("mem", parse_mem); | |
13215 | - | |
13216 | -#ifdef CONFIG_PROC_VMCORE | |
13217 | -/* elfcorehdr= specifies the location of elf core header | |
13218 | - * stored by the crashed kernel. | |
13219 | - */ | |
13220 | -static int __init parse_elfcorehdr(char *arg) | |
13221 | -{ | |
13222 | - if (!arg) | |
13223 | - return -EINVAL; | |
13224 | - | |
13225 | - elfcorehdr_addr = memparse(arg, &arg); | |
13226 | - return 0; | |
13227 | -} | |
13228 | -early_param("elfcorehdr", parse_elfcorehdr); | |
13229 | -#endif /* CONFIG_PROC_VMCORE */ | |
13230 | - | |
13231 | -/* | |
13232 | - * highmem=size forces highmem to be exactly 'size' bytes. | |
13233 | - * This works even on boxes that have no highmem otherwise. | |
13234 | - * This also works to reduce highmem size on bigger boxes. | |
13235 | - */ | |
13236 | -static int __init parse_highmem(char *arg) | |
13237 | -{ | |
13238 | - if (!arg) | |
13239 | - return -EINVAL; | |
13240 | - | |
13241 | - highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT; | |
13242 | - return 0; | |
13243 | -} | |
13244 | -early_param("highmem", parse_highmem); | |
13245 | - | |
13246 | -/* | |
13247 | - * vmalloc=size forces the vmalloc area to be exactly 'size' | |
13248 | - * bytes. This can be used to increase (or decrease) the | |
13249 | - * vmalloc area - the default is 128m. | |
13250 | - */ | |
13251 | -static int __init parse_vmalloc(char *arg) | |
13252 | -{ | |
13253 | - if (!arg) | |
13254 | - return -EINVAL; | |
13255 | - | |
13256 | - __VMALLOC_RESERVE = memparse(arg, &arg); | |
13257 | - return 0; | |
13258 | -} | |
13259 | -early_param("vmalloc", parse_vmalloc); | |
13260 | - | |
13261 | -#ifndef CONFIG_XEN | |
13262 | -/* | |
13263 | - * reservetop=size reserves a hole at the top of the kernel address space which | |
13264 | - * a hypervisor can load into later. Needed for dynamically loaded hypervisors, | |
13265 | - * so relocating the fixmap can be done before paging initialization. | |
13266 | - */ | |
13267 | -static int __init parse_reservetop(char *arg) | |
13268 | -{ | |
13269 | - unsigned long address; | |
13270 | - | |
13271 | - if (!arg) | |
13272 | - return -EINVAL; | |
13273 | - | |
13274 | - address = memparse(arg, &arg); | |
13275 | - reserve_top_address(address); | |
13276 | - return 0; | |
13277 | -} | |
13278 | -early_param("reservetop", parse_reservetop); | |
13279 | -#endif | |
13280 | - | |
13281 | -/* | |
13282 | - * Determine low and high memory ranges: | |
13283 | - */ | |
13284 | -unsigned long __init find_max_low_pfn(void) | |
13285 | -{ | |
13286 | - unsigned long max_low_pfn; | |
13287 | - | |
13288 | - max_low_pfn = max_pfn; | |
13289 | - if (max_low_pfn > MAXMEM_PFN) { | |
13290 | - if (highmem_pages == -1) | |
13291 | - highmem_pages = max_pfn - MAXMEM_PFN; | |
13292 | - if (highmem_pages + MAXMEM_PFN < max_pfn) | |
13293 | - max_pfn = MAXMEM_PFN + highmem_pages; | |
13294 | - if (highmem_pages + MAXMEM_PFN > max_pfn) { | |
13295 | - printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages)); | |
13296 | - highmem_pages = 0; | |
13297 | - } | |
13298 | - max_low_pfn = MAXMEM_PFN; | |
13299 | -#ifndef CONFIG_HIGHMEM | |
13300 | - /* Maximum memory usable is what is directly addressable */ | |
13301 | - printk(KERN_WARNING "Warning only %ldMB will be used.\n", | |
13302 | - MAXMEM>>20); | |
13303 | - if (max_pfn > MAX_NONPAE_PFN) | |
13304 | - printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n"); | |
13305 | - else | |
13306 | - printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); | |
13307 | - max_pfn = MAXMEM_PFN; | |
13308 | -#else /* !CONFIG_HIGHMEM */ | |
13309 | -#ifndef CONFIG_HIGHMEM64G | |
13310 | - if (max_pfn > MAX_NONPAE_PFN) { | |
13311 | - max_pfn = MAX_NONPAE_PFN; | |
13312 | - printk(KERN_WARNING "Warning only 4GB will be used.\n"); | |
13313 | - printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n"); | |
13314 | - } | |
13315 | -#endif /* !CONFIG_HIGHMEM64G */ | |
13316 | -#endif /* !CONFIG_HIGHMEM */ | |
13317 | - } else { | |
13318 | - if (highmem_pages == -1) | |
13319 | - highmem_pages = 0; | |
13320 | -#ifdef CONFIG_HIGHMEM | |
13321 | - if (highmem_pages >= max_pfn) { | |
13322 | - printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn)); | |
13323 | - highmem_pages = 0; | |
13324 | - } | |
13325 | - if (highmem_pages) { | |
13326 | - if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){ | |
13327 | - printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages)); | |
13328 | - highmem_pages = 0; | |
13329 | - } | |
13330 | - max_low_pfn -= highmem_pages; | |
13331 | - } | |
13332 | -#else | |
13333 | - if (highmem_pages) | |
13334 | - printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n"); | |
13335 | -#endif | |
13336 | - } | |
13337 | - return max_low_pfn; | |
13338 | -} | |
13339 | - | |
13340 | -#ifndef CONFIG_XEN | |
13341 | -#define BIOS_LOWMEM_KILOBYTES 0x413 | |
13342 | - | |
13343 | -/* | |
13344 | - * The BIOS places the EBDA/XBDA at the top of conventional | |
13345 | - * memory, and usually decreases the reported amount of | |
13346 | - * conventional memory (int 0x12) too. This also contains a | |
13347 | - * workaround for Dell systems that neglect to reserve EBDA. | |
13348 | - * The same workaround also avoids a problem with the AMD768MPX | |
13349 | - * chipset: reserve a page before VGA to prevent PCI prefetch | |
13350 | - * into it (errata #56). Usually the page is reserved anyways, | |
13351 | - * unless you have no PS/2 mouse plugged in. | |
13352 | - */ | |
13353 | -static void __init reserve_ebda_region(void) | |
13354 | -{ | |
13355 | - unsigned int lowmem, ebda_addr; | |
13356 | - | |
13357 | - /* To determine the position of the EBDA and the */ | |
13358 | - /* end of conventional memory, we need to look at */ | |
13359 | - /* the BIOS data area. In a paravirtual environment */ | |
13360 | - /* that area is absent. We'll just have to assume */ | |
13361 | - /* that the paravirt case can handle memory setup */ | |
13362 | - /* correctly, without our help. */ | |
13363 | - if (paravirt_enabled()) | |
13364 | - return; | |
13365 | - | |
13366 | - /* end of low (conventional) memory */ | |
13367 | - lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); | |
13368 | - lowmem <<= 10; | |
13369 | - | |
13370 | - /* start of EBDA area */ | |
13371 | - ebda_addr = get_bios_ebda(); | |
13372 | - | |
13373 | - /* Fixup: bios puts an EBDA in the top 64K segment */ | |
13374 | - /* of conventional memory, but does not adjust lowmem. */ | |
13375 | - if ((lowmem - ebda_addr) <= 0x10000) | |
13376 | - lowmem = ebda_addr; | |
13377 | - | |
13378 | - /* Fixup: bios does not report an EBDA at all. */ | |
13379 | - /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ | |
13380 | - if ((ebda_addr == 0) && (lowmem >= 0x9f000)) | |
13381 | - lowmem = 0x9f000; | |
13382 | - | |
13383 | - /* Paranoia: should never happen, but... */ | |
13384 | - if ((lowmem == 0) || (lowmem >= 0x100000)) | |
13385 | - lowmem = 0x9f000; | |
13386 | - | |
13387 | - /* reserve all memory between lowmem and the 1MB mark */ | |
13388 | - reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT); | |
13389 | -} | |
13390 | -#endif | |
13391 | - | |
13392 | -#ifndef CONFIG_NEED_MULTIPLE_NODES | |
13393 | -static void __init setup_bootmem_allocator(void); | |
13394 | -static unsigned long __init setup_memory(void) | |
13395 | -{ | |
13396 | - /* | |
13397 | - * partially used pages are not usable - thus | |
13398 | - * we are rounding upwards: | |
13399 | - */ | |
13400 | - min_low_pfn = PFN_UP(__pa(xen_start_info->pt_base)) + | |
13401 | - xen_start_info->nr_pt_frames; | |
13402 | - | |
13403 | - max_low_pfn = find_max_low_pfn(); | |
13404 | - | |
13405 | -#ifdef CONFIG_HIGHMEM | |
13406 | - highstart_pfn = highend_pfn = max_pfn; | |
13407 | - if (max_pfn > max_low_pfn) { | |
13408 | - highstart_pfn = max_low_pfn; | |
13409 | - } | |
13410 | - printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", | |
13411 | - pages_to_mb(highend_pfn - highstart_pfn)); | |
13412 | - num_physpages = highend_pfn; | |
13413 | - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; | |
13414 | -#else | |
13415 | - num_physpages = max_low_pfn; | |
13416 | - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; | |
13417 | -#endif | |
13418 | -#ifdef CONFIG_FLATMEM | |
13419 | - max_mapnr = num_physpages; | |
13420 | -#endif | |
13421 | - printk(KERN_NOTICE "%ldMB LOWMEM available.\n", | |
13422 | - pages_to_mb(max_low_pfn)); | |
13423 | - | |
13424 | - setup_bootmem_allocator(); | |
13425 | - | |
13426 | - return max_low_pfn; | |
13427 | -} | |
13428 | - | |
13429 | -static void __init zone_sizes_init(void) | |
13430 | -{ | |
13431 | - unsigned long max_zone_pfns[MAX_NR_ZONES]; | |
13432 | - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | |
13433 | - max_zone_pfns[ZONE_DMA] = | |
13434 | - virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; | |
13435 | - max_zone_pfns[ZONE_NORMAL] = max_low_pfn; | |
13436 | -#ifdef CONFIG_HIGHMEM | |
13437 | - max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; | |
13438 | - add_active_range(0, 0, highend_pfn); | |
13439 | -#else | |
13440 | - add_active_range(0, 0, max_low_pfn); | |
13441 | -#endif | |
13442 | - | |
13443 | - free_area_init_nodes(max_zone_pfns); | |
13444 | -} | |
13445 | -#else | |
13446 | -extern unsigned long __init setup_memory(void); | |
13447 | -extern void zone_sizes_init(void); | |
13448 | -#endif /* !CONFIG_NEED_MULTIPLE_NODES */ | |
13449 | - | |
13450 | -static inline unsigned long long get_total_mem(void) | |
13451 | -{ | |
13452 | - unsigned long long total; | |
13453 | - | |
13454 | - total = max_low_pfn - min_low_pfn; | |
13455 | -#ifdef CONFIG_HIGHMEM | |
13456 | - total += highend_pfn - highstart_pfn; | |
13457 | -#endif | |
13458 | - | |
13459 | - return total << PAGE_SHIFT; | |
13460 | -} | |
13461 | - | |
13462 | -#ifdef CONFIG_KEXEC | |
13463 | -#ifndef CONFIG_XEN | |
13464 | -static void __init reserve_crashkernel(void) | |
13465 | -{ | |
13466 | - unsigned long long total_mem; | |
13467 | - unsigned long long crash_size, crash_base; | |
13468 | - int ret; | |
13469 | - | |
13470 | - total_mem = get_total_mem(); | |
13471 | - | |
13472 | - ret = parse_crashkernel(boot_command_line, total_mem, | |
13473 | - &crash_size, &crash_base); | |
13474 | - if (ret == 0 && crash_size > 0) { | |
13475 | - if (crash_base > 0) { | |
13476 | - printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " | |
13477 | - "for crashkernel (System RAM: %ldMB)\n", | |
13478 | - (unsigned long)(crash_size >> 20), | |
13479 | - (unsigned long)(crash_base >> 20), | |
13480 | - (unsigned long)(total_mem >> 20)); | |
13481 | - | |
13482 | - if (reserve_bootmem(crash_base, crash_size, | |
13483 | - BOOTMEM_EXCLUSIVE) < 0) { | |
13484 | - printk(KERN_INFO "crashkernel reservation " | |
13485 | - "failed - memory is in use\n"); | |
13486 | - return; | |
13487 | - } | |
13488 | - | |
13489 | - crashk_res.start = crash_base; | |
13490 | - crashk_res.end = crash_base + crash_size - 1; | |
13491 | - } else | |
13492 | - printk(KERN_INFO "crashkernel reservation failed - " | |
13493 | - "you have to specify a base address\n"); | |
13494 | - } | |
13495 | -} | |
13496 | -#else | |
13497 | -#define reserve_crashkernel xen_machine_kexec_setup_resources | |
13498 | -#endif | |
13499 | -#else | |
13500 | -static inline void __init reserve_crashkernel(void) | |
13501 | -{} | |
13502 | -#endif | |
13503 | - | |
13504 | -#ifdef CONFIG_BLK_DEV_INITRD | |
13505 | - | |
13506 | -static bool do_relocate_initrd = false; | |
13507 | - | |
13508 | -static void __init reserve_initrd(void) | |
13509 | -{ | |
13510 | - unsigned long ramdisk_image = __pa(xen_start_info->mod_start); | |
13511 | - unsigned long ramdisk_size = xen_start_info->mod_len; | |
13512 | - unsigned long ramdisk_end = ramdisk_image + ramdisk_size; | |
13513 | - unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; | |
13514 | - unsigned long ramdisk_here; | |
13515 | - | |
13516 | - initrd_start = 0; | |
13517 | - | |
13518 | - if (!xen_start_info->mod_start || !ramdisk_size) | |
13519 | - return; /* No initrd provided by bootloader */ | |
13520 | - | |
13521 | - if (ramdisk_end < ramdisk_image) { | |
13522 | - printk(KERN_ERR "initrd wraps around end of memory, " | |
13523 | - "disabling initrd\n"); | |
13524 | - return; | |
13525 | - } | |
13526 | - if (ramdisk_size >= end_of_lowmem/2) { | |
13527 | - printk(KERN_ERR "initrd too large to handle, " | |
13528 | - "disabling initrd\n"); | |
13529 | - return; | |
13530 | - } | |
13531 | - if (ramdisk_end <= end_of_lowmem) { | |
13532 | - /* All in lowmem, easy case */ | |
13533 | - reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT); | |
13534 | - initrd_start = ramdisk_image + PAGE_OFFSET; | |
13535 | - initrd_end = initrd_start+ramdisk_size; | |
13536 | - return; | |
13537 | - } | |
13538 | - | |
13539 | - /* We need to move the initrd down into lowmem */ | |
13540 | - ramdisk_here = (end_of_lowmem - ramdisk_size) & PAGE_MASK; | |
13541 | - | |
13542 | - /* Note: this includes all the lowmem currently occupied by | |
13543 | - the initrd, we rely on that fact to keep the data intact. */ | |
13544 | - reserve_bootmem(ramdisk_here, ramdisk_size, BOOTMEM_DEFAULT); | |
13545 | - initrd_start = ramdisk_here + PAGE_OFFSET; | |
13546 | - initrd_end = initrd_start + ramdisk_size; | |
13547 | - | |
13548 | - do_relocate_initrd = true; | |
13549 | -} | |
13550 | - | |
13551 | -#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) | |
13552 | - | |
13553 | -static void __init relocate_initrd(void) | |
13554 | -{ | |
13555 | - unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | |
13556 | - unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | |
13557 | - unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; | |
13558 | - unsigned long ramdisk_here; | |
13559 | - unsigned long slop, clen, mapaddr; | |
13560 | - char *p, *q; | |
13561 | - | |
13562 | - if (!do_relocate_initrd) | |
13563 | - return; | |
13564 | - | |
13565 | - ramdisk_here = initrd_start - PAGE_OFFSET; | |
13566 | - | |
13567 | - q = (char *)initrd_start; | |
13568 | - | |
13569 | - /* Copy any lowmem portion of the initrd */ | |
13570 | - if (ramdisk_image < end_of_lowmem) { | |
13571 | - clen = end_of_lowmem - ramdisk_image; | |
13572 | - p = (char *)__va(ramdisk_image); | |
13573 | - memcpy(q, p, clen); | |
13574 | - q += clen; | |
13575 | - ramdisk_image += clen; | |
13576 | - ramdisk_size -= clen; | |
13577 | - } | |
13578 | - | |
13579 | - /* Copy the highmem portion of the initrd */ | |
13580 | - while (ramdisk_size) { | |
13581 | - slop = ramdisk_image & ~PAGE_MASK; | |
13582 | - clen = ramdisk_size; | |
13583 | - if (clen > MAX_MAP_CHUNK-slop) | |
13584 | - clen = MAX_MAP_CHUNK-slop; | |
13585 | - mapaddr = ramdisk_image & PAGE_MASK; | |
13586 | - p = early_ioremap(mapaddr, clen+slop); | |
13587 | - memcpy(q, p+slop, clen); | |
13588 | - early_iounmap(p, clen+slop); | |
13589 | - q += clen; | |
13590 | - ramdisk_image += clen; | |
13591 | - ramdisk_size -= clen; | |
13592 | - } | |
13593 | -} | |
13594 | - | |
13595 | -#endif /* CONFIG_BLK_DEV_INITRD */ | |
13596 | - | |
13597 | -void __init setup_bootmem_allocator(void) | |
13598 | -{ | |
13599 | - unsigned long bootmap_size; | |
13600 | - /* | |
13601 | - * Initialize the boot-time allocator (with low memory only): | |
13602 | - */ | |
13603 | - bootmap_size = init_bootmem(min_low_pfn, max_low_pfn); | |
13604 | - | |
13605 | - register_bootmem_low_pages(max_low_pfn); | |
13606 | - | |
13607 | - /* | |
13608 | - * Reserve the bootmem bitmap itself as well. We do this in two | |
13609 | - * steps (first step was init_bootmem()) because this catches | |
13610 | - * the (very unlikely) case of us accidentally initializing the | |
13611 | - * bootmem allocator with an invalid RAM area. | |
13612 | - */ | |
13613 | - reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) + | |
13614 | - bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text), | |
13615 | - BOOTMEM_DEFAULT); | |
13616 | - | |
13617 | -#ifndef CONFIG_XEN | |
13618 | - /* | |
13619 | - * reserve physical page 0 - it's a special BIOS page on many boxes, | |
13620 | - * enabling clean reboots, SMP operation, laptop functions. | |
13621 | - */ | |
13622 | - reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT); | |
13623 | - | |
13624 | - /* reserve EBDA region */ | |
13625 | - reserve_ebda_region(); | |
13626 | - | |
13627 | -#ifdef CONFIG_SMP | |
13628 | - /* | |
13629 | - * But first pinch a few for the stack/trampoline stuff | |
13630 | - * FIXME: Don't need the extra page at 4K, but need to fix | |
13631 | - * trampoline before removing it. (see the GDT stuff) | |
13632 | - */ | |
13633 | - reserve_bootmem(PAGE_SIZE, PAGE_SIZE, BOOTMEM_DEFAULT); | |
13634 | -#endif | |
13635 | -#ifdef CONFIG_ACPI_SLEEP | |
13636 | - /* | |
13637 | - * Reserve low memory region for sleep support. | |
13638 | - */ | |
13639 | - acpi_reserve_bootmem(); | |
13640 | -#endif | |
13641 | -#endif /* !CONFIG_XEN */ | |
13642 | - | |
13643 | -#ifdef CONFIG_BLK_DEV_INITRD | |
13644 | - reserve_initrd(); | |
13645 | -#endif | |
13646 | - numa_kva_reserve(); | |
13647 | - reserve_crashkernel(); | |
13648 | - | |
13649 | - reserve_ibft_region(); | |
13650 | -} | |
13651 | - | |
13652 | -/* | |
13653 | - * The node 0 pgdat is initialized before all of these because | |
13654 | - * it's needed for bootmem. node>0 pgdats have their virtual | |
13655 | - * space allocated before the pagetables are in place to access | |
13656 | - * them, so they can't be cleared then. | |
13657 | - * | |
13658 | - * This should all compile down to nothing when NUMA is off. | |
13659 | - */ | |
13660 | -static void __init remapped_pgdat_init(void) | |
13661 | -{ | |
13662 | - int nid; | |
13663 | - | |
13664 | - for_each_online_node(nid) { | |
13665 | - if (nid != 0) | |
13666 | - memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | |
13667 | - } | |
13668 | -} | |
13669 | - | |
13670 | -#ifdef CONFIG_MCA | |
13671 | -static void set_mca_bus(int x) | |
13672 | -{ | |
13673 | - MCA_bus = x; | |
13674 | -} | |
13675 | -#else | |
13676 | -static void set_mca_bus(int x) { } | |
13677 | -#endif | |
13678 | - | |
13679 | -/* Overridden in paravirt.c if CONFIG_PARAVIRT */ | |
13680 | -char * __init __attribute__((weak)) memory_setup(void) | |
13681 | -{ | |
13682 | - return machine_specific_memory_setup(); | |
13683 | -} | |
13684 | - | |
13685 | -#ifdef CONFIG_NUMA | |
13686 | -/* | |
13687 | - * In the golden day, when everything among i386 and x86_64 will be | |
13688 | - * integrated, this will not live here | |
13689 | - */ | |
13690 | -void *x86_cpu_to_node_map_early_ptr; | |
13691 | -int x86_cpu_to_node_map_init[NR_CPUS] = { | |
13692 | - [0 ... NR_CPUS-1] = NUMA_NO_NODE | |
13693 | -}; | |
13694 | -DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE; | |
13695 | -#endif | |
13696 | - | |
13697 | -/* | |
13698 | - * Determine if we were loaded by an EFI loader. If so, then we have also been | |
13699 | - * passed the efi memmap, systab, etc., so we should use these data structures | |
13700 | - * for initialization. Note, the efi init code path is determined by the | |
13701 | - * global efi_enabled. This allows the same kernel image to be used on existing | |
13702 | - * systems (with a traditional BIOS) as well as on EFI systems. | |
13703 | - */ | |
13704 | -void __init setup_arch(char **cmdline_p) | |
13705 | -{ | |
13706 | - int i, j, k, fpp; | |
13707 | - struct physdev_set_iopl set_iopl; | |
13708 | - unsigned long max_low_pfn; | |
13709 | - unsigned long p2m_pages; | |
13710 | - | |
13711 | - /* Force a quick death if the kernel panics (not domain 0). */ | |
13712 | - extern int panic_timeout; | |
13713 | - if (!panic_timeout && !is_initial_xendomain()) | |
13714 | - panic_timeout = 1; | |
13715 | - | |
13716 | - /* Register a call for panic conditions. */ | |
13717 | - atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); | |
13718 | - | |
13719 | - WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable, | |
13720 | - VMASST_TYPE_4gb_segments)); | |
13721 | - WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable, | |
13722 | - VMASST_TYPE_writable_pagetables)); | |
13723 | - | |
13724 | - memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | |
13725 | - pre_setup_arch_hook(); | |
13726 | - early_cpu_init(); | |
13727 | - early_ioremap_init(); | |
13728 | -#ifdef CONFIG_SMP | |
13729 | - prefill_possible_map(); | |
13730 | -#endif | |
13731 | - | |
13732 | -#ifdef CONFIG_EFI | |
13733 | - if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, | |
13734 | - "EL32", 4)) | |
13735 | - efi_enabled = 1; | |
13736 | -#endif | |
13737 | - | |
13738 | - /* This must be initialized to UNNAMED_MAJOR for ipconfig to work | |
13739 | - properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd. | |
13740 | - */ | |
13741 | - ROOT_DEV = MKDEV(UNNAMED_MAJOR,0); | |
13742 | - screen_info = boot_params.screen_info; | |
13743 | - copy_edid(); | |
13744 | - apm_info.bios = boot_params.apm_bios_info; | |
13745 | - ist_info = boot_params.ist_info; | |
13746 | - saved_video_mode = boot_params.hdr.vid_mode; | |
13747 | - if( boot_params.sys_desc_table.length != 0 ) { | |
13748 | - set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2); | |
13749 | - machine_id = boot_params.sys_desc_table.table[0]; | |
13750 | - machine_submodel_id = boot_params.sys_desc_table.table[1]; | |
13751 | - BIOS_revision = boot_params.sys_desc_table.table[2]; | |
13752 | - } | |
13753 | - bootloader_type = boot_params.hdr.type_of_loader; | |
13754 | - | |
13755 | - if (is_initial_xendomain()) { | |
13756 | - const struct dom0_vga_console_info *info = | |
13757 | - (void *)((char *)xen_start_info + | |
13758 | - xen_start_info->console.dom0.info_off); | |
13759 | - | |
13760 | - dom0_init_screen_info(info, | |
13761 | - xen_start_info->console.dom0.info_size); | |
13762 | - xen_start_info->console.domU.mfn = 0; | |
13763 | - xen_start_info->console.domU.evtchn = 0; | |
13764 | - } else | |
13765 | - screen_info.orig_video_isVGA = 0; | |
13766 | - | |
13767 | -#ifdef CONFIG_BLK_DEV_RAM | |
13768 | - rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; | |
13769 | - rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); | |
13770 | - rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); | |
13771 | -#endif | |
13772 | - | |
13773 | - ARCH_SETUP | |
13774 | - | |
13775 | - printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | |
13776 | - print_memory_map(memory_setup()); | |
13777 | - | |
13778 | - copy_edd(); | |
13779 | - | |
13780 | - if (!boot_params.hdr.root_flags) | |
13781 | - root_mountflags &= ~MS_RDONLY; | |
13782 | - init_mm.start_code = (unsigned long) _text; | |
13783 | - init_mm.end_code = (unsigned long) _etext; | |
13784 | - init_mm.end_data = (unsigned long) _edata; | |
13785 | - init_mm.brk = (PFN_UP(__pa(xen_start_info->pt_base)) + | |
13786 | - xen_start_info->nr_pt_frames) << PAGE_SHIFT; | |
13787 | - | |
13788 | - code_resource.start = virt_to_phys(_text); | |
13789 | - code_resource.end = virt_to_phys(_etext)-1; | |
13790 | - data_resource.start = virt_to_phys(_etext); | |
13791 | - data_resource.end = virt_to_phys(_edata)-1; | |
13792 | - bss_resource.start = virt_to_phys(&__bss_start); | |
13793 | - bss_resource.end = virt_to_phys(&__bss_stop)-1; | |
13794 | - | |
13795 | - if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE) | |
13796 | - i = COMMAND_LINE_SIZE; | |
13797 | - memcpy(boot_command_line, xen_start_info->cmd_line, i); | |
13798 | - boot_command_line[i - 1] = '\0'; | |
13799 | - parse_early_param(); | |
13800 | - | |
13801 | - if (user_defined_memmap) { | |
13802 | - printk(KERN_INFO "user-defined physical RAM map:\n"); | |
13803 | - print_memory_map("user"); | |
13804 | - } | |
13805 | - | |
13806 | - strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | |
13807 | - *cmdline_p = command_line; | |
13808 | - | |
13809 | - if (efi_enabled) | |
13810 | - efi_init(); | |
13811 | - | |
13812 | - /* update e820 for memory not covered by WB MTRRs */ | |
13813 | - propagate_e820_map(); | |
13814 | - mtrr_bp_init(); | |
13815 | -#ifndef CONFIG_XEN | |
13816 | - if (mtrr_trim_uncached_memory(max_pfn)) | |
13817 | - propagate_e820_map(); | |
13818 | -#endif | |
13819 | - | |
13820 | - max_low_pfn = setup_memory(); | |
13821 | - | |
13822 | -#ifdef CONFIG_KVM_CLOCK | |
13823 | - kvmclock_init(); | |
13824 | -#endif | |
13825 | - | |
13826 | -#ifdef CONFIG_VMI | |
13827 | - /* | |
13828 | - * Must be after max_low_pfn is determined, and before kernel | |
13829 | - * pagetables are setup. | |
13830 | - */ | |
13831 | - vmi_init(); | |
13832 | -#endif | |
13833 | - kvm_guest_init(); | |
13834 | - | |
13835 | - /* | |
13836 | - * NOTE: before this point _nobody_ is allowed to allocate | |
13837 | - * any memory using the bootmem allocator. Although the | |
13838 | - * allocator is now initialised only the first 8Mb of the kernel | |
13839 | - * virtual address space has been mapped. All allocations before | |
13840 | - * paging_init() has completed must use the alloc_bootmem_low_pages() | |
13841 | - * variant (which allocates DMA'able memory) and care must be taken | |
13842 | - * not to exceed the 8Mb limit. | |
13843 | - */ | |
13844 | - | |
13845 | -#ifdef CONFIG_SMP | |
13846 | - smp_alloc_memory(); /* AP processor realmode stacks in low memory*/ | |
13847 | -#endif | |
13848 | - paging_init(); | |
13849 | - | |
13850 | - /* | |
13851 | - * NOTE: On x86-32, only from this point on, fixmaps are ready for use. | |
13852 | - */ | |
13853 | - | |
13854 | -#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT | |
13855 | - if (init_ohci1394_dma_early) | |
13856 | - init_ohci1394_dma_on_all_controllers(); | |
13857 | -#endif | |
13858 | - | |
13859 | - remapped_pgdat_init(); | |
13860 | - sparse_init(); | |
13861 | - zone_sizes_init(); | |
13862 | - | |
13863 | -#ifdef CONFIG_X86_FIND_SMP_CONFIG | |
13864 | - /* | |
13865 | - * Find and reserve possible boot-time SMP configuration: | |
13866 | - */ | |
13867 | - find_smp_config(); | |
13868 | -#endif | |
13869 | - | |
13870 | - p2m_pages = max_pfn; | |
13871 | - if (xen_start_info->nr_pages > max_pfn) { | |
13872 | - /* | |
13873 | - * the max_pfn was shrunk (probably by mem= or highmem= | |
13874 | - * kernel parameter); shrink reservation with the HV | |
13875 | - */ | |
13876 | - struct xen_memory_reservation reservation = { | |
13877 | - .address_bits = 0, | |
13878 | - .extent_order = 0, | |
13879 | - .domid = DOMID_SELF | |
13880 | - }; | |
13881 | - unsigned int difference; | |
13882 | - int ret; | |
13883 | - | |
13884 | - difference = xen_start_info->nr_pages - max_pfn; | |
13885 | - | |
13886 | - set_xen_guest_handle(reservation.extent_start, | |
13887 | - ((unsigned long *)xen_start_info->mfn_list) + max_pfn); | |
13888 | - reservation.nr_extents = difference; | |
13889 | - ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | |
13890 | - &reservation); | |
13891 | - BUG_ON (ret != difference); | |
13892 | - } | |
13893 | - else if (max_pfn > xen_start_info->nr_pages) | |
13894 | - p2m_pages = xen_start_info->nr_pages; | |
13895 | - | |
13896 | - /* Make sure we have a correctly sized P->M table. */ | |
13897 | - if (!xen_feature(XENFEAT_auto_translated_physmap)) { | |
13898 | - phys_to_machine_mapping = alloc_bootmem_low_pages( | |
13899 | - max_pfn * sizeof(unsigned long)); | |
13900 | - memset(phys_to_machine_mapping, ~0, | |
13901 | - max_pfn * sizeof(unsigned long)); | |
13902 | - memcpy(phys_to_machine_mapping, | |
13903 | - (unsigned long *)xen_start_info->mfn_list, | |
13904 | - p2m_pages * sizeof(unsigned long)); | |
13905 | - free_bootmem( | |
13906 | - __pa(xen_start_info->mfn_list), | |
13907 | - PFN_PHYS(PFN_UP(xen_start_info->nr_pages * | |
13908 | - sizeof(unsigned long)))); | |
13909 | - | |
13910 | - /* | |
13911 | - * Initialise the list of the frames that specify the list of | |
13912 | - * frames that make up the p2m table. Used by save/restore | |
13913 | - */ | |
13914 | - pfn_to_mfn_frame_list_list = alloc_bootmem_low_pages(PAGE_SIZE); | |
13915 | - | |
13916 | - fpp = PAGE_SIZE/sizeof(unsigned long); | |
13917 | - for (i=0, j=0, k=-1; i< max_pfn; i+=fpp, j++) { | |
13918 | - if ((j % fpp) == 0) { | |
13919 | - k++; | |
13920 | - BUG_ON(k>=16); | |
13921 | - pfn_to_mfn_frame_list[k] = | |
13922 | - alloc_bootmem_low_pages(PAGE_SIZE); | |
13923 | - pfn_to_mfn_frame_list_list[k] = | |
13924 | - virt_to_mfn(pfn_to_mfn_frame_list[k]); | |
13925 | - j=0; | |
13926 | - } | |
13927 | - pfn_to_mfn_frame_list[k][j] = | |
13928 | - virt_to_mfn(&phys_to_machine_mapping[i]); | |
13929 | - } | |
13930 | - HYPERVISOR_shared_info->arch.max_pfn = max_pfn; | |
13931 | - HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | |
13932 | - virt_to_mfn(pfn_to_mfn_frame_list_list); | |
13933 | - } | |
13934 | - | |
13935 | - /* Mark all ISA DMA channels in-use - using them wouldn't work. */ | |
13936 | - for (i = 0; i < MAX_DMA_CHANNELS; ++i) | |
13937 | - if (i != 4 && request_dma(i, "xen") != 0) | |
13938 | - BUG(); | |
13939 | - | |
13940 | - /* | |
13941 | - * NOTE: at this point the bootmem allocator is fully available. | |
13942 | - */ | |
13943 | - | |
13944 | -#ifdef CONFIG_BLK_DEV_INITRD | |
13945 | - relocate_initrd(); | |
13946 | -#endif | |
13947 | - | |
13948 | - paravirt_post_allocator_init(); | |
13949 | - | |
13950 | - if (is_initial_xendomain()) | |
13951 | - dmi_scan_machine(); | |
13952 | - | |
13953 | - io_delay_init(); | |
13954 | - | |
13955 | -#if defined(CONFIG_X86_SMP) && !defined(CONFIG_XEN) | |
13956 | - /* | |
13957 | - * setup to use the early static init tables during kernel startup | |
13958 | - * X86_SMP will exclude sub-arches that don't deal well with it. | |
13959 | - */ | |
13960 | - x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; | |
13961 | - x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init; | |
13962 | -#ifdef CONFIG_NUMA | |
13963 | - x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init; | |
13964 | -#endif | |
13965 | -#endif | |
13966 | - | |
13967 | -#ifdef CONFIG_X86_GENERICARCH | |
13968 | - generic_apic_probe(); | |
13969 | -#endif | |
13970 | - | |
13971 | - set_iopl.iopl = 1; | |
13972 | - WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl)); | |
13973 | - | |
13974 | -#ifdef CONFIG_ACPI | |
13975 | - if (!is_initial_xendomain()) { | |
13976 | - printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); | |
13977 | - acpi_disabled = 1; | |
13978 | - acpi_ht = 0; | |
13979 | - } | |
13980 | - | |
13981 | - /* | |
13982 | - * Parse the ACPI tables for possible boot-time SMP configuration. | |
13983 | - */ | |
13984 | - acpi_boot_table_init(); | |
13985 | -#endif | |
13986 | - | |
13987 | -#ifndef CONFIG_XEN | |
13988 | - early_quirks(); | |
13989 | -#endif | |
13990 | - | |
13991 | -#ifdef CONFIG_ACPI | |
13992 | - acpi_boot_init(); | |
13993 | - | |
13994 | -#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) | |
13995 | - if (def_to_bigsmp) | |
13996 | - printk(KERN_WARNING "More than 8 CPUs detected and " | |
13997 | - "CONFIG_X86_PC cannot handle it.\nUse " | |
13998 | - "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n"); | |
13999 | -#endif | |
14000 | -#endif | |
14001 | -#ifdef CONFIG_X86_LOCAL_APIC | |
14002 | - if (smp_found_config) | |
14003 | - get_smp_config(); | |
14004 | -#endif | |
14005 | - | |
14006 | - e820_register_memory(); | |
14007 | - e820_mark_nosave_regions(); | |
14008 | - | |
14009 | - if (is_initial_xendomain()) { | |
14010 | -#ifdef CONFIG_VT | |
14011 | -#if defined(CONFIG_VGA_CONSOLE) | |
14012 | - if (!efi_enabled || | |
14013 | - (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) | |
14014 | - conswitchp = &vga_con; | |
14015 | -#elif defined(CONFIG_DUMMY_CONSOLE) | |
14016 | - conswitchp = &dummy_con; | |
14017 | -#endif | |
14018 | -#endif | |
14019 | - } else { | |
14020 | -#if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE) | |
14021 | - conswitchp = &dummy_con; | |
14022 | -#endif | |
14023 | - } | |
14024 | -} | |
14025 | - | |
14026 | -static int | |
14027 | -xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) | |
14028 | -{ | |
14029 | - HYPERVISOR_shutdown(SHUTDOWN_crash); | |
14030 | - /* we're never actually going to get here... */ | |
14031 | - return NOTIFY_DONE; | |
14032 | -} | |
14033 | - | |
14034 | -/* | |
14035 | - * Request address space for all standard resources | |
14036 | - * | |
14037 | - * This is called just before pcibios_init(), which is also a | |
14038 | - * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). | |
14039 | - */ | |
14040 | -static int __init request_standard_resources(void) | |
14041 | -{ | |
14042 | - int i; | |
14043 | - | |
14044 | - /* Nothing to do if not running in dom0. */ | |
14045 | - if (!is_initial_xendomain()) | |
14046 | - return 0; | |
14047 | - | |
14048 | - printk(KERN_INFO "Setting up standard PCI resources\n"); | |
14049 | - init_iomem_resources(&code_resource, &data_resource, &bss_resource); | |
14050 | - | |
14051 | - request_resource(&iomem_resource, &video_ram_resource); | |
14052 | - | |
14053 | - /* request I/O space for devices used on all i[345]86 PCs */ | |
14054 | - for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) | |
14055 | - request_resource(&ioport_resource, &standard_io_resources[i]); | |
14056 | - return 0; | |
14057 | -} | |
14058 | - | |
14059 | -subsys_initcall(request_standard_resources); | |
82094b55 | 14060 | --- sle11-2009-10-16.orig/arch/x86/kernel/setup_64-xen.c 2009-03-16 16:38:05.000000000 +0100 |
2cb7cef9 BS |
14061 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 |
14062 | @@ -1,1433 +0,0 @@ | |
14063 | -/* | |
14064 | - * Copyright (C) 1995 Linus Torvalds | |
14065 | - */ | |
14066 | - | |
14067 | -/* | |
14068 | - * This file handles the architecture-dependent parts of initialization | |
14069 | - */ | |
14070 | - | |
14071 | -#include <linux/errno.h> | |
14072 | -#include <linux/sched.h> | |
14073 | -#include <linux/kernel.h> | |
14074 | -#include <linux/mm.h> | |
14075 | -#include <linux/stddef.h> | |
14076 | -#include <linux/unistd.h> | |
14077 | -#include <linux/ptrace.h> | |
14078 | -#include <linux/slab.h> | |
14079 | -#include <linux/user.h> | |
14080 | -#include <linux/screen_info.h> | |
14081 | -#include <linux/ioport.h> | |
14082 | -#include <linux/delay.h> | |
14083 | -#include <linux/init.h> | |
14084 | -#include <linux/initrd.h> | |
14085 | -#include <linux/highmem.h> | |
14086 | -#include <linux/bootmem.h> | |
14087 | -#include <linux/module.h> | |
14088 | -#include <asm/processor.h> | |
14089 | -#include <linux/console.h> | |
14090 | -#include <linux/seq_file.h> | |
14091 | -#include <linux/crash_dump.h> | |
14092 | -#include <linux/root_dev.h> | |
14093 | -#include <linux/pci.h> | |
14094 | -#include <asm/pci-direct.h> | |
14095 | -#include <linux/efi.h> | |
14096 | -#include <linux/acpi.h> | |
14097 | -#include <linux/kallsyms.h> | |
14098 | -#include <linux/edd.h> | |
14099 | -#include <linux/iscsi_ibft.h> | |
14100 | -#include <linux/mmzone.h> | |
14101 | -#include <linux/kexec.h> | |
14102 | -#include <linux/cpufreq.h> | |
14103 | -#include <linux/dmi.h> | |
14104 | -#include <linux/dma-mapping.h> | |
14105 | -#include <linux/ctype.h> | |
14106 | -#include <linux/sort.h> | |
14107 | -#include <linux/uaccess.h> | |
14108 | -#include <linux/init_ohci1394_dma.h> | |
14109 | -#include <linux/kvm_para.h> | |
14110 | - | |
14111 | -#include <asm/mtrr.h> | |
14112 | -#include <asm/uaccess.h> | |
14113 | -#include <asm/system.h> | |
14114 | -#include <asm/vsyscall.h> | |
14115 | -#include <asm/io.h> | |
14116 | -#include <asm/smp.h> | |
14117 | -#include <asm/msr.h> | |
14118 | -#include <asm/desc.h> | |
14119 | -#include <video/edid.h> | |
14120 | -#include <asm/e820.h> | |
14121 | -#include <asm/dma.h> | |
14122 | -#include <asm/gart.h> | |
14123 | -#include <asm/mpspec.h> | |
14124 | -#include <asm/mmu_context.h> | |
14125 | -#include <asm/proto.h> | |
14126 | -#include <asm/setup.h> | |
14127 | -#include <asm/numa.h> | |
14128 | -#include <asm/sections.h> | |
14129 | -#include <asm/dmi.h> | |
14130 | -#include <asm/cacheflush.h> | |
14131 | -#include <asm/mce.h> | |
14132 | -#include <asm/ds.h> | |
14133 | -#include <asm/topology.h> | |
14134 | -#include <asm/pat.h> | |
14135 | - | |
14136 | -#include <mach_apic.h> | |
14137 | -#ifdef CONFIG_XEN | |
14138 | -#include <linux/percpu.h> | |
14139 | -#include <xen/interface/physdev.h> | |
14140 | -#include "setup_arch_pre.h" | |
14141 | -#include <asm/hypervisor.h> | |
14142 | -#include <xen/interface/nmi.h> | |
14143 | -#include <xen/features.h> | |
14144 | -#include <xen/firmware.h> | |
14145 | -#include <xen/xencons.h> | |
14146 | -#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) | |
14147 | -#define PFN_PHYS(x) ((x) << PAGE_SHIFT) | |
14148 | -#include <asm/mach-xen/setup_arch_post.h> | |
14149 | -#include <xen/interface/memory.h> | |
14150 | - | |
14151 | -#ifdef CONFIG_XEN | |
14152 | -#include <xen/interface/kexec.h> | |
14153 | -#endif | |
14154 | - | |
14155 | -extern unsigned long start_pfn; | |
14156 | -extern struct edid_info edid_info; | |
14157 | - | |
14158 | -shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; | |
14159 | -EXPORT_SYMBOL(HYPERVISOR_shared_info); | |
14160 | - | |
14161 | -static int xen_panic_event(struct notifier_block *, unsigned long, void *); | |
14162 | -static struct notifier_block xen_panic_block = { | |
14163 | - xen_panic_event, NULL, 0 /* try to go last */ | |
14164 | -}; | |
14165 | - | |
14166 | -unsigned long *phys_to_machine_mapping; | |
14167 | -unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[512]; | |
14168 | - | |
14169 | -EXPORT_SYMBOL(phys_to_machine_mapping); | |
14170 | - | |
14171 | -DEFINE_PER_CPU(multicall_entry_t, multicall_list[8]); | |
14172 | -DEFINE_PER_CPU(int, nr_multicall_ents); | |
14173 | - | |
14174 | -/* Raw start-of-day parameters from the hypervisor. */ | |
14175 | -start_info_t *xen_start_info; | |
14176 | -EXPORT_SYMBOL(xen_start_info); | |
14177 | -#endif | |
14178 | - | |
14179 | -/* | |
14180 | - * Machine setup.. | |
14181 | - */ | |
14182 | - | |
14183 | -struct cpuinfo_x86 boot_cpu_data __read_mostly; | |
14184 | -EXPORT_SYMBOL(boot_cpu_data); | |
14185 | - | |
14186 | -__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; | |
14187 | - | |
14188 | -unsigned long mmu_cr4_features; | |
14189 | - | |
14190 | -/* Boot loader ID as an integer, for the benefit of proc_dointvec */ | |
14191 | -int bootloader_type; | |
14192 | - | |
14193 | -unsigned long saved_video_mode; | |
14194 | - | |
14195 | -int force_mwait __cpuinitdata; | |
14196 | - | |
14197 | -/* | |
14198 | - * Early DMI memory | |
14199 | - */ | |
14200 | -int dmi_alloc_index; | |
14201 | -char dmi_alloc_data[DMI_MAX_DATA]; | |
14202 | - | |
14203 | -/* | |
14204 | - * Setup options | |
14205 | - */ | |
14206 | -struct screen_info screen_info; | |
14207 | -EXPORT_SYMBOL(screen_info); | |
14208 | -struct sys_desc_table_struct { | |
14209 | - unsigned short length; | |
14210 | - unsigned char table[0]; | |
14211 | -}; | |
14212 | - | |
14213 | -struct edid_info edid_info; | |
14214 | -EXPORT_SYMBOL_GPL(edid_info); | |
14215 | - | |
14216 | -extern int root_mountflags; | |
14217 | - | |
14218 | -char __initdata command_line[COMMAND_LINE_SIZE]; | |
14219 | - | |
14220 | -static struct resource standard_io_resources[] = { | |
14221 | - { .name = "dma1", .start = 0x00, .end = 0x1f, | |
14222 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, | |
14223 | - { .name = "pic1", .start = 0x20, .end = 0x21, | |
14224 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, | |
14225 | - { .name = "timer0", .start = 0x40, .end = 0x43, | |
14226 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, | |
14227 | - { .name = "timer1", .start = 0x50, .end = 0x53, | |
14228 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, | |
14229 | - { .name = "keyboard", .start = 0x60, .end = 0x60, | |
14230 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, | |
14231 | - { .name = "keyboard", .start = 0x64, .end = 0x64, | |
14232 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, | |
14233 | - { .name = "dma page reg", .start = 0x80, .end = 0x8f, | |
14234 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, | |
14235 | - { .name = "pic2", .start = 0xa0, .end = 0xa1, | |
14236 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, | |
14237 | - { .name = "dma2", .start = 0xc0, .end = 0xdf, | |
14238 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, | |
14239 | - { .name = "fpu", .start = 0xf0, .end = 0xff, | |
14240 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO } | |
14241 | -}; | |
14242 | - | |
14243 | -#define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM) | |
14244 | - | |
14245 | -static struct resource data_resource = { | |
14246 | - .name = "Kernel data", | |
14247 | - .start = 0, | |
14248 | - .end = 0, | |
14249 | - .flags = IORESOURCE_RAM, | |
14250 | -}; | |
14251 | -static struct resource code_resource = { | |
14252 | - .name = "Kernel code", | |
14253 | - .start = 0, | |
14254 | - .end = 0, | |
14255 | - .flags = IORESOURCE_RAM, | |
14256 | -}; | |
14257 | -static struct resource bss_resource = { | |
14258 | - .name = "Kernel bss", | |
14259 | - .start = 0, | |
14260 | - .end = 0, | |
14261 | - .flags = IORESOURCE_RAM, | |
14262 | -}; | |
14263 | - | |
14264 | -static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); | |
14265 | - | |
14266 | -#ifdef CONFIG_PROC_VMCORE | |
14267 | -/* elfcorehdr= specifies the location of elf core header | |
14268 | - * stored by the crashed kernel. This option will be passed | |
14269 | - * by kexec loader to the capture kernel. | |
14270 | - */ | |
14271 | -static int __init setup_elfcorehdr(char *arg) | |
14272 | -{ | |
14273 | - char *end; | |
14274 | - if (!arg) | |
14275 | - return -EINVAL; | |
14276 | - elfcorehdr_addr = memparse(arg, &end); | |
14277 | - return end > arg ? 0 : -EINVAL; | |
14278 | -} | |
14279 | -early_param("elfcorehdr", setup_elfcorehdr); | |
14280 | -#endif | |
14281 | - | |
14282 | -#ifndef CONFIG_NUMA | |
14283 | -static void __init | |
14284 | -contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |
14285 | -{ | |
14286 | - unsigned long bootmap_size, bootmap; | |
14287 | - | |
14288 | - bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; | |
14289 | - bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size, | |
14290 | - PAGE_SIZE); | |
14291 | - if (bootmap == -1L) | |
14292 | - panic("Cannot find bootmem map of size %ld\n", bootmap_size); | |
14293 | - bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); | |
14294 | - e820_register_active_regions(0, start_pfn, end_pfn); | |
14295 | -#ifdef CONFIG_XEN | |
14296 | - free_bootmem_with_active_regions(0, xen_start_info->nr_pages); | |
14297 | - early_res_to_bootmem(0, xen_start_info->nr_pages<<PAGE_SHIFT); | |
14298 | -#else | |
14299 | - free_bootmem_with_active_regions(0, end_pfn); | |
14300 | - early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); | |
14301 | -#endif | |
14302 | - reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); | |
14303 | -} | |
14304 | -#endif | |
14305 | - | |
14306 | -#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) | |
14307 | -struct edd edd; | |
14308 | -#ifdef CONFIG_EDD_MODULE | |
14309 | -EXPORT_SYMBOL(edd); | |
14310 | -#endif | |
14311 | -#ifndef CONFIG_XEN | |
14312 | -/** | |
14313 | - * copy_edd() - Copy the BIOS EDD information | |
14314 | - * from boot_params into a safe place. | |
14315 | - * | |
14316 | - */ | |
14317 | -static inline void copy_edd(void) | |
14318 | -{ | |
14319 | - memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, | |
14320 | - sizeof(edd.mbr_signature)); | |
14321 | - memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info)); | |
14322 | - edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries; | |
14323 | - edd.edd_info_nr = boot_params.eddbuf_entries; | |
14324 | -} | |
14325 | -#endif | |
14326 | -#else | |
14327 | -static inline void copy_edd(void) | |
14328 | -{ | |
14329 | -} | |
14330 | -#endif | |
14331 | - | |
14332 | -#ifdef CONFIG_KEXEC | |
14333 | -#ifndef CONFIG_XEN | |
14334 | -static void __init reserve_crashkernel(void) | |
14335 | -{ | |
14336 | - unsigned long long total_mem; | |
14337 | - unsigned long long crash_size, crash_base; | |
14338 | - int ret; | |
14339 | - | |
14340 | - total_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT; | |
14341 | - | |
14342 | - ret = parse_crashkernel(boot_command_line, total_mem, | |
14343 | - &crash_size, &crash_base); | |
14344 | - if (ret == 0 && crash_size) { | |
14345 | - if (crash_base <= 0) { | |
14346 | - printk(KERN_INFO "crashkernel reservation failed - " | |
14347 | - "you have to specify a base address\n"); | |
14348 | - return; | |
14349 | - } | |
14350 | - | |
14351 | - if (reserve_bootmem(crash_base, crash_size, | |
14352 | - BOOTMEM_EXCLUSIVE) < 0) { | |
14353 | - printk(KERN_INFO "crashkernel reservation failed - " | |
14354 | - "memory is in use\n"); | |
14355 | - return; | |
14356 | - } | |
14357 | - | |
14358 | - printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " | |
14359 | - "for crashkernel (System RAM: %ldMB)\n", | |
14360 | - (unsigned long)(crash_size >> 20), | |
14361 | - (unsigned long)(crash_base >> 20), | |
14362 | - (unsigned long)(total_mem >> 20)); | |
14363 | - crashk_res.start = crash_base; | |
14364 | - crashk_res.end = crash_base + crash_size - 1; | |
14365 | - insert_resource(&iomem_resource, &crashk_res); | |
14366 | - } | |
14367 | -} | |
14368 | -#else | |
14369 | -#define reserve_crashkernel xen_machine_kexec_setup_resources | |
14370 | -#endif | |
14371 | -#else | |
14372 | -static inline void __init reserve_crashkernel(void) | |
14373 | -{} | |
14374 | -#endif | |
14375 | - | |
14376 | -/* Overridden in paravirt.c if CONFIG_PARAVIRT */ | |
14377 | -void __attribute__((weak)) __init memory_setup(void) | |
14378 | -{ | |
14379 | - machine_specific_memory_setup(); | |
14380 | -} | |
14381 | - | |
14382 | -static void __init parse_setup_data(void) | |
14383 | -{ | |
14384 | - struct setup_data *data; | |
14385 | - unsigned long pa_data; | |
14386 | - | |
14387 | - if (boot_params.hdr.version < 0x0209) | |
14388 | - return; | |
14389 | - pa_data = boot_params.hdr.setup_data; | |
14390 | - while (pa_data) { | |
14391 | - data = early_ioremap(pa_data, PAGE_SIZE); | |
14392 | - switch (data->type) { | |
14393 | - default: | |
14394 | - break; | |
14395 | - } | |
14396 | -#ifndef CONFIG_DEBUG_BOOT_PARAMS | |
14397 | - free_early(pa_data, pa_data+sizeof(*data)+data->len); | |
14398 | -#endif | |
14399 | - pa_data = data->next; | |
14400 | - early_iounmap(data, PAGE_SIZE); | |
14401 | - } | |
14402 | -} | |
14403 | - | |
14404 | -#ifdef CONFIG_PCI_MMCONFIG | |
14405 | -extern void __cpuinit fam10h_check_enable_mmcfg(void); | |
14406 | -extern void __init check_enable_amd_mmconf_dmi(void); | |
14407 | -#else | |
14408 | -void __cpuinit fam10h_check_enable_mmcfg(void) | |
14409 | -{ | |
14410 | -} | |
14411 | -void __init check_enable_amd_mmconf_dmi(void) | |
14412 | -{ | |
14413 | -} | |
14414 | -#endif | |
14415 | - | |
14416 | -/* | |
14417 | - * setup_arch - architecture-specific boot-time initializations | |
14418 | - * | |
14419 | - * Note: On x86_64, fixmaps are ready for use even before this is called. | |
14420 | - */ | |
14421 | -void __init setup_arch(char **cmdline_p) | |
14422 | -{ | |
14423 | - unsigned i; | |
14424 | - | |
14425 | -#ifdef CONFIG_XEN | |
14426 | - extern struct e820map machine_e820; | |
14427 | - | |
14428 | - printk(KERN_INFO "Command line: %s\n", boot_command_line); | |
14429 | - | |
14430 | - /* Register a call for panic conditions. */ | |
14431 | - atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); | |
14432 | - | |
14433 | - WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable, | |
14434 | - VMASST_TYPE_writable_pagetables)); | |
14435 | - | |
14436 | - early_ioremap_init(); | |
14437 | - | |
14438 | - ROOT_DEV = MKDEV(RAMDISK_MAJOR,0); | |
14439 | - screen_info = boot_params.screen_info; | |
14440 | - | |
14441 | - if (is_initial_xendomain()) { | |
14442 | - const struct dom0_vga_console_info *info = | |
14443 | - (void *)((char *)xen_start_info + | |
14444 | - xen_start_info->console.dom0.info_off); | |
14445 | - | |
14446 | - dom0_init_screen_info(info, | |
14447 | - xen_start_info->console.dom0.info_size); | |
14448 | - xen_start_info->console.domU.mfn = 0; | |
14449 | - xen_start_info->console.domU.evtchn = 0; | |
14450 | - } else | |
14451 | - screen_info.orig_video_isVGA = 0; | |
14452 | - | |
14453 | - copy_edid(); | |
14454 | -#else | |
14455 | - printk(KERN_INFO "Command line: %s\n", boot_command_line); | |
14456 | - | |
14457 | - ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); | |
14458 | - screen_info = boot_params.screen_info; | |
14459 | - edid_info = boot_params.edid_info; | |
14460 | -#endif /* !CONFIG_XEN */ | |
14461 | - saved_video_mode = boot_params.hdr.vid_mode; | |
14462 | - bootloader_type = boot_params.hdr.type_of_loader; | |
14463 | - | |
14464 | -#ifdef CONFIG_BLK_DEV_RAM | |
14465 | - rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; | |
14466 | - rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); | |
14467 | - rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); | |
14468 | -#endif | |
14469 | -#ifdef CONFIG_EFI | |
14470 | - if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, | |
14471 | - "EL64", 4)) | |
14472 | - efi_enabled = 1; | |
14473 | -#endif | |
14474 | - | |
14475 | - ARCH_SETUP | |
14476 | - | |
14477 | - memory_setup(); | |
14478 | - copy_edd(); | |
14479 | - | |
14480 | - if (!boot_params.hdr.root_flags) | |
14481 | - root_mountflags &= ~MS_RDONLY; | |
14482 | - init_mm.start_code = (unsigned long) &_text; | |
14483 | - init_mm.end_code = (unsigned long) &_etext; | |
14484 | - init_mm.end_data = (unsigned long) &_edata; | |
14485 | - init_mm.brk = (unsigned long) &_end; | |
14486 | - | |
14487 | - code_resource.start = virt_to_phys(&_text); | |
14488 | - code_resource.end = virt_to_phys(&_etext)-1; | |
14489 | - data_resource.start = virt_to_phys(&_etext); | |
14490 | - data_resource.end = virt_to_phys(&_edata)-1; | |
14491 | - bss_resource.start = virt_to_phys(&__bss_start); | |
14492 | - bss_resource.end = virt_to_phys(&__bss_stop)-1; | |
14493 | - | |
14494 | - early_identify_cpu(&boot_cpu_data); | |
14495 | - | |
14496 | - strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | |
14497 | - *cmdline_p = command_line; | |
14498 | - | |
14499 | - parse_setup_data(); | |
14500 | - | |
14501 | - parse_early_param(); | |
14502 | - | |
14503 | -#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT | |
14504 | - if (init_ohci1394_dma_early) | |
14505 | - init_ohci1394_dma_on_all_controllers(); | |
14506 | -#endif | |
14507 | - | |
14508 | - finish_e820_parsing(); | |
14509 | - | |
14510 | -#ifndef CONFIG_XEN | |
14511 | - /* after parse_early_param, so could debug it */ | |
14512 | - insert_resource(&iomem_resource, &code_resource); | |
14513 | - insert_resource(&iomem_resource, &data_resource); | |
14514 | - insert_resource(&iomem_resource, &bss_resource); | |
14515 | -#endif | |
14516 | - | |
14517 | - early_gart_iommu_check(); | |
14518 | - | |
14519 | - e820_register_active_regions(0, 0, -1UL); | |
14520 | - /* | |
14521 | - * partially used pages are not usable - thus | |
14522 | - * we are rounding upwards: | |
14523 | - */ | |
14524 | - end_pfn = e820_end_of_ram(); | |
14525 | - /* update e820 for memory not covered by WB MTRRs */ | |
14526 | - mtrr_bp_init(); | |
14527 | -#ifndef CONFIG_XEN | |
14528 | - if (mtrr_trim_uncached_memory(end_pfn)) { | |
14529 | - e820_register_active_regions(0, 0, -1UL); | |
14530 | - end_pfn = e820_end_of_ram(); | |
14531 | - } | |
14532 | -#endif | |
14533 | - | |
14534 | - num_physpages = end_pfn; | |
14535 | - max_mapnr = end_pfn; | |
14536 | - | |
14537 | - check_efer(); | |
14538 | - | |
14539 | - max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT)); | |
14540 | - if (efi_enabled) | |
14541 | - efi_init(); | |
14542 | - | |
14543 | -#ifndef CONFIG_XEN | |
14544 | - vsmp_init(); | |
14545 | -#endif | |
14546 | - | |
14547 | - if (is_initial_xendomain()) | |
14548 | - dmi_scan_machine(); | |
14549 | - | |
14550 | - io_delay_init(); | |
14551 | - | |
14552 | -#ifdef CONFIG_KVM_CLOCK | |
14553 | - kvmclock_init(); | |
14554 | -#endif | |
14555 | - | |
14556 | -#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) | |
14557 | - /* setup to use the early static init tables during kernel startup */ | |
14558 | - x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; | |
14559 | - x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init; | |
14560 | -#ifdef CONFIG_NUMA | |
14561 | - x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init; | |
14562 | -#endif | |
14563 | -#endif | |
14564 | - | |
14565 | - /* How many end-of-memory variables you have, grandma! */ | |
14566 | - max_low_pfn = end_pfn; | |
14567 | - max_pfn = end_pfn; | |
14568 | - high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1; | |
14569 | - | |
14570 | - /* Remove active ranges so rediscovery with NUMA-awareness happens */ | |
14571 | - remove_all_active_ranges(); | |
14572 | - | |
14573 | -#ifdef CONFIG_ACPI_NUMA | |
14574 | - /* | |
14575 | - * Parse SRAT to discover nodes. | |
14576 | - */ | |
14577 | - acpi_numa_init(); | |
14578 | -#endif | |
14579 | - | |
14580 | -#ifdef CONFIG_NUMA | |
14581 | - numa_initmem_init(0, end_pfn); | |
14582 | -#else | |
14583 | - contig_initmem_init(0, end_pfn); | |
14584 | -#endif | |
14585 | - | |
14586 | -#ifndef CONFIG_XEN | |
14587 | - dma32_reserve_bootmem(); | |
14588 | - | |
14589 | -#ifdef CONFIG_ACPI_SLEEP | |
14590 | - /* | |
14591 | - * Reserve low memory region for sleep support. | |
14592 | - */ | |
14593 | - acpi_reserve_bootmem(); | |
14594 | -#endif | |
14595 | - | |
14596 | - if (efi_enabled) | |
14597 | - efi_reserve_bootmem(); | |
14598 | -#endif | |
14599 | - | |
14600 | -#ifdef CONFIG_BLK_DEV_INITRD | |
14601 | -#ifdef CONFIG_XEN | |
14602 | - if (xen_start_info->mod_start) { | |
14603 | - unsigned long ramdisk_image = __pa(xen_start_info->mod_start); | |
14604 | - unsigned long ramdisk_size = xen_start_info->mod_len; | |
14605 | -#else | |
14606 | - if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | |
14607 | - unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | |
14608 | - unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | |
14609 | -#endif | |
14610 | - unsigned long ramdisk_end = ramdisk_image + ramdisk_size; | |
14611 | - unsigned long end_of_mem = end_pfn << PAGE_SHIFT; | |
14612 | - | |
14613 | - if (ramdisk_end <= end_of_mem) { | |
14614 | - /* | |
14615 | - * don't need to reserve again, already reserved early | |
14616 | - * in x86_64_start_kernel, and early_res_to_bootmem | |
14617 | - * convert that to reserved in bootmem | |
14618 | - */ | |
14619 | - initrd_start = ramdisk_image + PAGE_OFFSET; | |
14620 | - initrd_end = initrd_start+ramdisk_size; | |
14621 | -#ifdef CONFIG_XEN | |
14622 | - initrd_below_start_ok = 1; | |
14623 | -#endif | |
14624 | - } else { | |
14625 | - free_bootmem(ramdisk_image, ramdisk_size); | |
14626 | - printk(KERN_ERR "initrd extends beyond end of memory " | |
14627 | - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", | |
14628 | - ramdisk_end, end_of_mem); | |
14629 | - initrd_start = 0; | |
14630 | - } | |
14631 | - } | |
14632 | -#endif | |
14633 | - reserve_crashkernel(); | |
14634 | - | |
14635 | - reserve_ibft_region(); | |
14636 | - | |
14637 | - paging_init(); | |
14638 | - map_vsyscall(); | |
14639 | -#ifdef CONFIG_X86_LOCAL_APIC | |
14640 | - /* | |
14641 | - * Find and reserve possible boot-time SMP configuration: | |
14642 | - */ | |
14643 | - find_smp_config(); | |
14644 | -#endif | |
14645 | -#ifdef CONFIG_XEN | |
14646 | - { | |
14647 | - int i, j, k, fpp; | |
14648 | - unsigned long p2m_pages; | |
14649 | - | |
14650 | - p2m_pages = end_pfn; | |
14651 | - if (xen_start_info->nr_pages > end_pfn) { | |
14652 | - /* | |
14653 | - * the end_pfn was shrunk (probably by mem= or highmem= | |
14654 | - * kernel parameter); shrink reservation with the HV | |
14655 | - */ | |
14656 | - struct xen_memory_reservation reservation = { | |
14657 | - .address_bits = 0, | |
14658 | - .extent_order = 0, | |
14659 | - .domid = DOMID_SELF | |
14660 | - }; | |
14661 | - unsigned int difference; | |
14662 | - int ret; | |
14663 | - | |
14664 | - difference = xen_start_info->nr_pages - end_pfn; | |
14665 | - | |
14666 | - set_xen_guest_handle(reservation.extent_start, | |
14667 | - ((unsigned long *)xen_start_info->mfn_list) + end_pfn); | |
14668 | - reservation.nr_extents = difference; | |
14669 | - ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | |
14670 | - &reservation); | |
14671 | - BUG_ON (ret != difference); | |
14672 | - } | |
14673 | - else if (end_pfn > xen_start_info->nr_pages) | |
14674 | - p2m_pages = xen_start_info->nr_pages; | |
14675 | - | |
14676 | - if (!xen_feature(XENFEAT_auto_translated_physmap)) { | |
14677 | - /* Make sure we have a large enough P->M table. */ | |
14678 | - phys_to_machine_mapping = alloc_bootmem_pages( | |
14679 | - end_pfn * sizeof(unsigned long)); | |
14680 | - memset(phys_to_machine_mapping, ~0, | |
14681 | - end_pfn * sizeof(unsigned long)); | |
14682 | - memcpy(phys_to_machine_mapping, | |
14683 | - (unsigned long *)xen_start_info->mfn_list, | |
14684 | - p2m_pages * sizeof(unsigned long)); | |
14685 | - free_bootmem( | |
14686 | - __pa(xen_start_info->mfn_list), | |
14687 | - PFN_PHYS(PFN_UP(xen_start_info->nr_pages * | |
14688 | - sizeof(unsigned long)))); | |
14689 | - | |
14690 | - /* | |
14691 | - * Initialise the list of the frames that specify the | |
14692 | - * list of frames that make up the p2m table. Used by | |
14693 | - * save/restore. | |
14694 | - */ | |
14695 | - pfn_to_mfn_frame_list_list = alloc_bootmem_pages(PAGE_SIZE); | |
14696 | - | |
14697 | - fpp = PAGE_SIZE/sizeof(unsigned long); | |
14698 | - for (i=0, j=0, k=-1; i< end_pfn; i+=fpp, j++) { | |
14699 | - if ((j % fpp) == 0) { | |
14700 | - k++; | |
14701 | - BUG_ON(k>=fpp); | |
14702 | - pfn_to_mfn_frame_list[k] = | |
14703 | - alloc_bootmem_pages(PAGE_SIZE); | |
14704 | - pfn_to_mfn_frame_list_list[k] = | |
14705 | - virt_to_mfn(pfn_to_mfn_frame_list[k]); | |
14706 | - j=0; | |
14707 | - } | |
14708 | - pfn_to_mfn_frame_list[k][j] = | |
14709 | - virt_to_mfn(&phys_to_machine_mapping[i]); | |
14710 | - } | |
14711 | - HYPERVISOR_shared_info->arch.max_pfn = end_pfn; | |
14712 | - HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | |
14713 | - virt_to_mfn(pfn_to_mfn_frame_list_list); | |
14714 | - } | |
14715 | - | |
14716 | - /* Mark all ISA DMA channels in-use - using them wouldn't work. */ | |
14717 | - for (i = 0; i < MAX_DMA_CHANNELS; ++i) | |
14718 | - if (i != 4 && request_dma(i, "xen") != 0) | |
14719 | - BUG(); | |
14720 | - } | |
14721 | - | |
14722 | -#ifdef CONFIG_ACPI | |
14723 | - if (!is_initial_xendomain()) { | |
14724 | - acpi_disabled = 1; | |
14725 | - acpi_ht = 0; | |
14726 | - } | |
14727 | -#endif | |
14728 | -#endif | |
14729 | - | |
14730 | -#ifndef CONFIG_XEN | |
14731 | - early_quirks(); | |
14732 | -#endif | |
14733 | - | |
14734 | -#ifdef CONFIG_ACPI | |
14735 | - /* | |
14736 | - * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). | |
14737 | - * Call this early for SRAT node setup. | |
14738 | - */ | |
14739 | - acpi_boot_table_init(); | |
14740 | - | |
14741 | - /* | |
14742 | - * Read APIC and some other early information from ACPI tables. | |
14743 | - */ | |
14744 | - acpi_boot_init(); | |
14745 | -#endif | |
14746 | - | |
14747 | - init_cpu_to_node(); | |
14748 | - | |
14749 | -#ifdef CONFIG_X86_LOCAL_APIC | |
14750 | - /* | |
14751 | - * get boot-time SMP configuration: | |
14752 | - */ | |
14753 | - if (smp_found_config) | |
14754 | - get_smp_config(); | |
14755 | -#ifndef CONFIG_XEN | |
14756 | - init_apic_mappings(); | |
14757 | - ioapic_init_mappings(); | |
14758 | -#endif | |
14759 | -#endif | |
14760 | -#if defined(CONFIG_XEN) && defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU) | |
14761 | - prefill_possible_map(); | |
14762 | -#endif | |
14763 | - | |
14764 | - kvm_guest_init(); | |
14765 | - | |
14766 | - /* | |
14767 | - * We trust e820 completely. No explicit ROM probing in memory. | |
14768 | - */ | |
14769 | -#ifdef CONFIG_XEN | |
14770 | - if (is_initial_xendomain()) | |
14771 | - e820_reserve_resources(machine_e820.map, machine_e820.nr_map); | |
14772 | -#else | |
14773 | - e820_reserve_resources(e820.map, e820.nr_map); | |
14774 | - e820_mark_nosave_regions(); | |
14775 | -#endif | |
14776 | - | |
14777 | - /* request I/O space for devices used on all i[345]86 PCs */ | |
14778 | - for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) | |
14779 | - request_resource(&ioport_resource, &standard_io_resources[i]); | |
14780 | - | |
14781 | -#ifdef CONFIG_XEN | |
14782 | - if (is_initial_xendomain()) | |
14783 | - e820_setup_gap(machine_e820.map, machine_e820.nr_map); | |
14784 | -#else | |
14785 | - e820_setup_gap(e820.map, e820.nr_map); | |
14786 | -#endif | |
14787 | - | |
14788 | -#ifdef CONFIG_XEN | |
14789 | - { | |
14790 | - struct physdev_set_iopl set_iopl; | |
14791 | - | |
14792 | - set_iopl.iopl = 1; | |
14793 | - WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl)); | |
14794 | - | |
14795 | - if (is_initial_xendomain()) { | |
14796 | -#ifdef CONFIG_VT | |
14797 | -#if defined(CONFIG_VGA_CONSOLE) | |
14798 | - conswitchp = &vga_con; | |
14799 | -#elif defined(CONFIG_DUMMY_CONSOLE) | |
14800 | - conswitchp = &dummy_con; | |
14801 | -#endif | |
14802 | -#endif | |
14803 | - } else { | |
14804 | -#if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE) | |
14805 | - conswitchp = &dummy_con; | |
14806 | -#endif | |
14807 | - } | |
14808 | - } | |
14809 | -#else /* CONFIG_XEN */ | |
14810 | - | |
14811 | -#ifdef CONFIG_VT | |
14812 | -#if defined(CONFIG_VGA_CONSOLE) | |
14813 | - if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) | |
14814 | - conswitchp = &vga_con; | |
14815 | -#elif defined(CONFIG_DUMMY_CONSOLE) | |
14816 | - conswitchp = &dummy_con; | |
14817 | -#endif | |
14818 | -#endif | |
14819 | - | |
14820 | -#endif /* !CONFIG_XEN */ | |
14821 | - | |
14822 | - /* do this before identify_cpu for boot cpu */ | |
14823 | - check_enable_amd_mmconf_dmi(); | |
14824 | -} | |
14825 | - | |
14826 | -#ifdef CONFIG_XEN | |
14827 | -static int | |
14828 | -xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) | |
14829 | -{ | |
14830 | - HYPERVISOR_shutdown(SHUTDOWN_crash); | |
14831 | - /* we're never actually going to get here... */ | |
14832 | - return NOTIFY_DONE; | |
14833 | -} | |
14834 | -#endif /* !CONFIG_XEN */ | |
14835 | - | |
14836 | - | |
14837 | -static int __cpuinit get_model_name(struct cpuinfo_x86 *c) | |
14838 | -{ | |
14839 | - unsigned int *v; | |
14840 | - | |
14841 | - if (c->extended_cpuid_level < 0x80000004) | |
14842 | - return 0; | |
14843 | - | |
14844 | - v = (unsigned int *) c->x86_model_id; | |
14845 | - cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); | |
14846 | - cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); | |
14847 | - cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); | |
14848 | - c->x86_model_id[48] = 0; | |
14849 | - return 1; | |
14850 | -} | |
14851 | - | |
14852 | - | |
14853 | -static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | |
14854 | -{ | |
14855 | - unsigned int n, dummy, eax, ebx, ecx, edx; | |
14856 | - | |
14857 | - n = c->extended_cpuid_level; | |
14858 | - | |
14859 | - if (n >= 0x80000005) { | |
14860 | - cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); | |
14861 | - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), " | |
14862 | - "D cache %dK (%d bytes/line)\n", | |
14863 | - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); | |
14864 | - c->x86_cache_size = (ecx>>24) + (edx>>24); | |
14865 | - /* On K8 L1 TLB is inclusive, so don't count it */ | |
14866 | - c->x86_tlbsize = 0; | |
14867 | - } | |
14868 | - | |
14869 | - if (n >= 0x80000006) { | |
14870 | - cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); | |
14871 | - ecx = cpuid_ecx(0x80000006); | |
14872 | - c->x86_cache_size = ecx >> 16; | |
14873 | - c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); | |
14874 | - | |
14875 | - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", | |
14876 | - c->x86_cache_size, ecx & 0xFF); | |
14877 | - } | |
14878 | - if (n >= 0x80000008) { | |
14879 | - cpuid(0x80000008, &eax, &dummy, &dummy, &dummy); | |
14880 | - c->x86_virt_bits = (eax >> 8) & 0xff; | |
14881 | - c->x86_phys_bits = eax & 0xff; | |
14882 | - } | |
14883 | -} | |
14884 | - | |
14885 | -#ifdef CONFIG_NUMA | |
14886 | -static int __cpuinit nearby_node(int apicid) | |
14887 | -{ | |
14888 | - int i, node; | |
14889 | - | |
14890 | - for (i = apicid - 1; i >= 0; i--) { | |
14891 | - node = apicid_to_node[i]; | |
14892 | - if (node != NUMA_NO_NODE && node_online(node)) | |
14893 | - return node; | |
14894 | - } | |
14895 | - for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { | |
14896 | - node = apicid_to_node[i]; | |
14897 | - if (node != NUMA_NO_NODE && node_online(node)) | |
14898 | - return node; | |
14899 | - } | |
14900 | - return first_node(node_online_map); /* Shouldn't happen */ | |
14901 | -} | |
14902 | -#endif | |
14903 | - | |
14904 | -/* | |
14905 | - * On a AMD dual core setup the lower bits of the APIC id distingush the cores. | |
14906 | - * Assumes number of cores is a power of two. | |
14907 | - */ | |
14908 | -static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) | |
14909 | -{ | |
14910 | -#ifdef CONFIG_SMP | |
14911 | - unsigned bits; | |
14912 | -#ifdef CONFIG_NUMA | |
14913 | - int cpu = smp_processor_id(); | |
14914 | - int node = 0; | |
14915 | - unsigned apicid = hard_smp_processor_id(); | |
14916 | -#endif | |
14917 | - bits = c->x86_coreid_bits; | |
14918 | - | |
14919 | - /* Low order bits define the core id (index of core in socket) */ | |
14920 | - c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); | |
14921 | - /* Convert the initial APIC ID into the socket ID */ | |
14922 | - c->phys_proc_id = c->initial_apicid >> bits; | |
14923 | - | |
14924 | -#ifdef CONFIG_NUMA | |
14925 | - node = c->phys_proc_id; | |
14926 | - if (apicid_to_node[apicid] != NUMA_NO_NODE) | |
14927 | - node = apicid_to_node[apicid]; | |
14928 | - if (!node_online(node)) { | |
14929 | - /* Two possibilities here: | |
14930 | - - The CPU is missing memory and no node was created. | |
14931 | - In that case try picking one from a nearby CPU | |
14932 | - - The APIC IDs differ from the HyperTransport node IDs | |
14933 | - which the K8 northbridge parsing fills in. | |
14934 | - Assume they are all increased by a constant offset, | |
14935 | - but in the same order as the HT nodeids. | |
14936 | - If that doesn't result in a usable node fall back to the | |
14937 | - path for the previous case. */ | |
14938 | - | |
14939 | - int ht_nodeid = c->initial_apicid; | |
14940 | - | |
14941 | - if (ht_nodeid >= 0 && | |
14942 | - apicid_to_node[ht_nodeid] != NUMA_NO_NODE) | |
14943 | - node = apicid_to_node[ht_nodeid]; | |
14944 | - /* Pick a nearby node */ | |
14945 | - if (!node_online(node)) | |
14946 | - node = nearby_node(apicid); | |
14947 | - } | |
14948 | - numa_set_node(cpu, node); | |
14949 | - | |
14950 | - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); | |
14951 | -#endif | |
14952 | -#endif | |
14953 | -} | |
14954 | - | |
14955 | -static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) | |
14956 | -{ | |
14957 | -#ifdef CONFIG_SMP | |
14958 | - unsigned bits, ecx; | |
14959 | - | |
14960 | - /* Multi core CPU? */ | |
14961 | - if (c->extended_cpuid_level < 0x80000008) | |
14962 | - return; | |
14963 | - | |
14964 | - ecx = cpuid_ecx(0x80000008); | |
14965 | - | |
14966 | - c->x86_max_cores = (ecx & 0xff) + 1; | |
14967 | - | |
14968 | - /* CPU telling us the core id bits shift? */ | |
14969 | - bits = (ecx >> 12) & 0xF; | |
14970 | - | |
14971 | - /* Otherwise recompute */ | |
14972 | - if (bits == 0) { | |
14973 | - while ((1 << bits) < c->x86_max_cores) | |
14974 | - bits++; | |
14975 | - } | |
14976 | - | |
14977 | - c->x86_coreid_bits = bits; | |
14978 | - | |
14979 | -#endif | |
14980 | -} | |
14981 | - | |
14982 | -#define ENABLE_C1E_MASK 0x18000000 | |
14983 | -#define CPUID_PROCESSOR_SIGNATURE 1 | |
14984 | -#define CPUID_XFAM 0x0ff00000 | |
14985 | -#define CPUID_XFAM_K8 0x00000000 | |
14986 | -#define CPUID_XFAM_10H 0x00100000 | |
14987 | -#define CPUID_XFAM_11H 0x00200000 | |
14988 | -#define CPUID_XMOD 0x000f0000 | |
14989 | -#define CPUID_XMOD_REV_F 0x00040000 | |
14990 | - | |
14991 | -#ifndef CONFIG_XEN | |
14992 | -/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ | |
14993 | -static __cpuinit int amd_apic_timer_broken(void) | |
14994 | -{ | |
14995 | - u32 lo, hi, eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); | |
14996 | - | |
14997 | - switch (eax & CPUID_XFAM) { | |
14998 | - case CPUID_XFAM_K8: | |
14999 | - if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) | |
15000 | - break; | |
15001 | - case CPUID_XFAM_10H: | |
15002 | - case CPUID_XFAM_11H: | |
15003 | - rdmsr(MSR_K8_ENABLE_C1E, lo, hi); | |
15004 | - if (lo & ENABLE_C1E_MASK) | |
15005 | - return 1; | |
15006 | - break; | |
15007 | - default: | |
15008 | - /* err on the side of caution */ | |
15009 | - return 1; | |
15010 | - } | |
15011 | - return 0; | |
15012 | -} | |
15013 | -#endif | |
15014 | - | |
15015 | -static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |
15016 | -{ | |
15017 | - early_init_amd_mc(c); | |
15018 | - | |
15019 | - /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ | |
15020 | - if (c->x86_power & (1<<8)) | |
15021 | - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | |
15022 | -} | |
15023 | - | |
15024 | -static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |
15025 | -{ | |
15026 | - unsigned level; | |
15027 | - | |
15028 | -#ifdef CONFIG_SMP | |
15029 | - unsigned long value; | |
15030 | - | |
15031 | - /* | |
15032 | - * Disable TLB flush filter by setting HWCR.FFDIS on K8 | |
15033 | - * bit 6 of msr C001_0015 | |
15034 | - * | |
15035 | - * Errata 63 for SH-B3 steppings | |
15036 | - * Errata 122 for all steppings (F+ have it disabled by default) | |
15037 | - */ | |
15038 | - if (c->x86 == 15) { | |
15039 | - rdmsrl(MSR_K8_HWCR, value); | |
15040 | - value |= 1 << 6; | |
15041 | - wrmsrl(MSR_K8_HWCR, value); | |
15042 | - } | |
15043 | -#endif | |
15044 | - | |
15045 | - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; | |
15046 | - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ | |
15047 | - clear_cpu_cap(c, 0*32+31); | |
15048 | - | |
15049 | - /* On C+ stepping K8 rep microcode works well for copy/memset */ | |
15050 | - level = cpuid_eax(1); | |
15051 | - if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || | |
15052 | - level >= 0x0f58)) | |
15053 | - set_cpu_cap(c, X86_FEATURE_REP_GOOD); | |
15054 | - if (c->x86 == 0x10 || c->x86 == 0x11) | |
15055 | - set_cpu_cap(c, X86_FEATURE_REP_GOOD); | |
15056 | - | |
15057 | - /* Enable workaround for FXSAVE leak */ | |
15058 | - if (c->x86 >= 6) | |
15059 | - set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); | |
15060 | - | |
15061 | - level = get_model_name(c); | |
15062 | - if (!level) { | |
15063 | - switch (c->x86) { | |
15064 | - case 15: | |
15065 | - /* Should distinguish Models here, but this is only | |
15066 | - a fallback anyways. */ | |
15067 | - strcpy(c->x86_model_id, "Hammer"); | |
15068 | - break; | |
15069 | - } | |
15070 | - } | |
15071 | - display_cacheinfo(c); | |
15072 | - | |
15073 | - /* Multi core CPU? */ | |
15074 | - if (c->extended_cpuid_level >= 0x80000008) | |
15075 | - amd_detect_cmp(c); | |
15076 | - | |
15077 | - if (c->extended_cpuid_level >= 0x80000006 && | |
15078 | - (cpuid_edx(0x80000006) & 0xf000)) | |
15079 | - num_cache_leaves = 4; | |
15080 | - else | |
15081 | - num_cache_leaves = 3; | |
15082 | - | |
15083 | - if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11) | |
15084 | - set_cpu_cap(c, X86_FEATURE_K8); | |
15085 | - | |
15086 | - /* MFENCE stops RDTSC speculation */ | |
15087 | - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); | |
15088 | - | |
15089 | - if (c->x86 == 0x10) | |
15090 | - fam10h_check_enable_mmcfg(); | |
15091 | - | |
15092 | -#ifndef CONFIG_XEN | |
15093 | - if (amd_apic_timer_broken()) | |
15094 | - disable_apic_timer = 1; | |
15095 | - | |
15096 | - if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { | |
15097 | - unsigned long long tseg; | |
15098 | - | |
15099 | - /* | |
15100 | - * Split up direct mapping around the TSEG SMM area. | |
15101 | - * Don't do it for gbpages because there seems very little | |
15102 | - * benefit in doing so. | |
15103 | - */ | |
15104 | - if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) && | |
15105 | - (tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT))) | |
15106 | - set_memory_4k((unsigned long)__va(tseg), 1); | |
15107 | - } | |
15108 | -#endif | |
15109 | -} | |
15110 | - | |
15111 | -void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |
15112 | -{ | |
15113 | -#ifdef CONFIG_SMP | |
15114 | - u32 eax, ebx, ecx, edx; | |
15115 | - int index_msb, core_bits; | |
15116 | - | |
15117 | - cpuid(1, &eax, &ebx, &ecx, &edx); | |
15118 | - | |
15119 | - | |
15120 | - if (!cpu_has(c, X86_FEATURE_HT)) | |
15121 | - return; | |
15122 | - if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) | |
15123 | - goto out; | |
15124 | - | |
15125 | - smp_num_siblings = (ebx & 0xff0000) >> 16; | |
15126 | - | |
15127 | - if (smp_num_siblings == 1) { | |
15128 | - printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); | |
15129 | - } else if (smp_num_siblings > 1) { | |
15130 | - | |
15131 | - if (smp_num_siblings > NR_CPUS) { | |
15132 | - printk(KERN_WARNING "CPU: Unsupported number of " | |
15133 | - "siblings %d", smp_num_siblings); | |
15134 | - smp_num_siblings = 1; | |
15135 | - return; | |
15136 | - } | |
15137 | - | |
15138 | - index_msb = get_count_order(smp_num_siblings); | |
15139 | - c->phys_proc_id = phys_pkg_id(index_msb); | |
15140 | - | |
15141 | - smp_num_siblings = smp_num_siblings / c->x86_max_cores; | |
15142 | - | |
15143 | - index_msb = get_count_order(smp_num_siblings); | |
15144 | - | |
15145 | - core_bits = get_count_order(c->x86_max_cores); | |
15146 | - | |
15147 | - c->cpu_core_id = phys_pkg_id(index_msb) & | |
15148 | - ((1 << core_bits) - 1); | |
15149 | - } | |
15150 | -out: | |
15151 | - if ((c->x86_max_cores * smp_num_siblings) > 1) { | |
15152 | - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | |
15153 | - c->phys_proc_id); | |
15154 | - printk(KERN_INFO "CPU: Processor Core ID: %d\n", | |
15155 | - c->cpu_core_id); | |
15156 | - } | |
15157 | - | |
15158 | -#endif | |
15159 | -} | |
15160 | - | |
15161 | -/* | |
15162 | - * find out the number of processor cores on the die | |
15163 | - */ | |
15164 | -static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) | |
15165 | -{ | |
15166 | - unsigned int eax, t; | |
15167 | - | |
15168 | - if (c->cpuid_level < 4) | |
15169 | - return 1; | |
15170 | - | |
15171 | - cpuid_count(4, 0, &eax, &t, &t, &t); | |
15172 | - | |
15173 | - if (eax & 0x1f) | |
15174 | - return ((eax >> 26) + 1); | |
15175 | - else | |
15176 | - return 1; | |
15177 | -} | |
15178 | - | |
15179 | -static void __cpuinit srat_detect_node(void) | |
15180 | -{ | |
15181 | -#ifdef CONFIG_NUMA | |
15182 | - unsigned node; | |
15183 | - int cpu = smp_processor_id(); | |
15184 | - int apicid = hard_smp_processor_id(); | |
15185 | - | |
15186 | - /* Don't do the funky fallback heuristics the AMD version employs | |
15187 | - for now. */ | |
15188 | - node = apicid_to_node[apicid]; | |
15189 | - if (node == NUMA_NO_NODE || !node_online(node)) | |
15190 | - node = first_node(node_online_map); | |
15191 | - numa_set_node(cpu, node); | |
15192 | - | |
15193 | - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); | |
15194 | -#endif | |
15195 | -} | |
15196 | - | |
15197 | -static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |
15198 | -{ | |
15199 | - if ((c->x86 == 0xf && c->x86_model >= 0x03) || | |
15200 | - (c->x86 == 0x6 && c->x86_model >= 0x0e)) | |
15201 | - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | |
15202 | -} | |
15203 | - | |
15204 | -static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |
15205 | -{ | |
15206 | - /* Cache sizes */ | |
15207 | - unsigned n; | |
15208 | - | |
15209 | - init_intel_cacheinfo(c); | |
15210 | - if (c->cpuid_level > 9) { | |
15211 | - unsigned eax = cpuid_eax(10); | |
15212 | - /* Check for version and the number of counters */ | |
15213 | - if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) | |
15214 | - set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); | |
15215 | - } | |
15216 | - | |
15217 | - if (cpu_has_ds) { | |
15218 | - unsigned int l1, l2; | |
15219 | - rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); | |
15220 | - if (!(l1 & (1<<11))) | |
15221 | - set_cpu_cap(c, X86_FEATURE_BTS); | |
15222 | - if (!(l1 & (1<<12))) | |
15223 | - set_cpu_cap(c, X86_FEATURE_PEBS); | |
15224 | - } | |
15225 | - | |
15226 | - | |
15227 | - if (cpu_has_bts) | |
15228 | - ds_init_intel(c); | |
15229 | - | |
15230 | - n = c->extended_cpuid_level; | |
15231 | - if (n >= 0x80000008) { | |
15232 | - unsigned eax = cpuid_eax(0x80000008); | |
15233 | - c->x86_virt_bits = (eax >> 8) & 0xff; | |
15234 | - c->x86_phys_bits = eax & 0xff; | |
15235 | - /* CPUID workaround for Intel 0F34 CPU */ | |
15236 | - if (c->x86_vendor == X86_VENDOR_INTEL && | |
15237 | - c->x86 == 0xF && c->x86_model == 0x3 && | |
15238 | - c->x86_mask == 0x4) | |
15239 | - c->x86_phys_bits = 36; | |
15240 | - } | |
15241 | - | |
15242 | - if (c->x86 == 15) | |
15243 | - c->x86_cache_alignment = c->x86_clflush_size * 2; | |
15244 | - if (c->x86 == 6) | |
15245 | - set_cpu_cap(c, X86_FEATURE_REP_GOOD); | |
15246 | - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | |
15247 | - c->x86_max_cores = intel_num_cpu_cores(c); | |
15248 | - | |
15249 | - srat_detect_node(); | |
15250 | -} | |
15251 | - | |
15252 | -static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) | |
15253 | -{ | |
15254 | - if (c->x86 == 0x6 && c->x86_model >= 0xf) | |
15255 | - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | |
15256 | -} | |
15257 | - | |
15258 | -static void __cpuinit init_centaur(struct cpuinfo_x86 *c) | |
15259 | -{ | |
15260 | - /* Cache sizes */ | |
15261 | - unsigned n; | |
15262 | - | |
15263 | - n = c->extended_cpuid_level; | |
15264 | - if (n >= 0x80000008) { | |
15265 | - unsigned eax = cpuid_eax(0x80000008); | |
15266 | - c->x86_virt_bits = (eax >> 8) & 0xff; | |
15267 | - c->x86_phys_bits = eax & 0xff; | |
15268 | - } | |
15269 | - | |
15270 | - if (c->x86 == 0x6 && c->x86_model >= 0xf) { | |
15271 | - c->x86_cache_alignment = c->x86_clflush_size * 2; | |
15272 | - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | |
15273 | - set_cpu_cap(c, X86_FEATURE_REP_GOOD); | |
15274 | - } | |
15275 | - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | |
15276 | -} | |
15277 | - | |
15278 | -static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) | |
15279 | -{ | |
15280 | - char *v = c->x86_vendor_id; | |
15281 | - | |
15282 | - if (!strcmp(v, "AuthenticAMD")) | |
15283 | - c->x86_vendor = X86_VENDOR_AMD; | |
15284 | - else if (!strcmp(v, "GenuineIntel")) | |
15285 | - c->x86_vendor = X86_VENDOR_INTEL; | |
15286 | - else if (!strcmp(v, "CentaurHauls")) | |
15287 | - c->x86_vendor = X86_VENDOR_CENTAUR; | |
15288 | - else | |
15289 | - c->x86_vendor = X86_VENDOR_UNKNOWN; | |
15290 | -} | |
15291 | - | |
15292 | -/* Do some early cpuid on the boot CPU to get some parameter that are | |
15293 | - needed before check_bugs. Everything advanced is in identify_cpu | |
15294 | - below. */ | |
15295 | -static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |
15296 | -{ | |
15297 | - u32 tfms, xlvl; | |
15298 | - | |
15299 | - c->loops_per_jiffy = loops_per_jiffy; | |
15300 | - c->x86_cache_size = -1; | |
15301 | - c->x86_vendor = X86_VENDOR_UNKNOWN; | |
15302 | - c->x86_model = c->x86_mask = 0; /* So far unknown... */ | |
15303 | - c->x86_vendor_id[0] = '\0'; /* Unset */ | |
15304 | - c->x86_model_id[0] = '\0'; /* Unset */ | |
15305 | - c->x86_clflush_size = 64; | |
15306 | - c->x86_cache_alignment = c->x86_clflush_size; | |
15307 | - c->x86_max_cores = 1; | |
15308 | - c->x86_coreid_bits = 0; | |
15309 | - c->extended_cpuid_level = 0; | |
15310 | - memset(&c->x86_capability, 0, sizeof c->x86_capability); | |
15311 | - | |
15312 | - /* Get vendor name */ | |
15313 | - cpuid(0x00000000, (unsigned int *)&c->cpuid_level, | |
15314 | - (unsigned int *)&c->x86_vendor_id[0], | |
15315 | - (unsigned int *)&c->x86_vendor_id[8], | |
15316 | - (unsigned int *)&c->x86_vendor_id[4]); | |
15317 | - | |
15318 | - get_cpu_vendor(c); | |
15319 | - | |
15320 | - /* Initialize the standard set of capabilities */ | |
15321 | - /* Note that the vendor-specific code below might override */ | |
15322 | - | |
15323 | - /* Intel-defined flags: level 0x00000001 */ | |
15324 | - if (c->cpuid_level >= 0x00000001) { | |
15325 | - __u32 misc; | |
15326 | - cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4], | |
15327 | - &c->x86_capability[0]); | |
15328 | - c->x86 = (tfms >> 8) & 0xf; | |
15329 | - c->x86_model = (tfms >> 4) & 0xf; | |
15330 | - c->x86_mask = tfms & 0xf; | |
15331 | - if (c->x86 == 0xf) | |
15332 | - c->x86 += (tfms >> 20) & 0xff; | |
15333 | - if (c->x86 >= 0x6) | |
15334 | - c->x86_model += ((tfms >> 16) & 0xF) << 4; | |
15335 | - if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) | |
15336 | - c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; | |
15337 | - } else { | |
15338 | - /* Have CPUID level 0 only - unheard of */ | |
15339 | - c->x86 = 4; | |
15340 | - } | |
15341 | - | |
15342 | - c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; | |
15343 | -#ifdef CONFIG_SMP | |
15344 | - c->phys_proc_id = c->initial_apicid; | |
15345 | -#endif | |
15346 | - /* AMD-defined flags: level 0x80000001 */ | |
15347 | - xlvl = cpuid_eax(0x80000000); | |
15348 | - c->extended_cpuid_level = xlvl; | |
15349 | - if ((xlvl & 0xffff0000) == 0x80000000) { | |
15350 | - if (xlvl >= 0x80000001) { | |
15351 | - c->x86_capability[1] = cpuid_edx(0x80000001); | |
15352 | - c->x86_capability[6] = cpuid_ecx(0x80000001); | |
15353 | - } | |
15354 | - if (xlvl >= 0x80000004) | |
15355 | - get_model_name(c); /* Default name */ | |
15356 | - } | |
15357 | - | |
15358 | - /* Transmeta-defined flags: level 0x80860001 */ | |
15359 | - xlvl = cpuid_eax(0x80860000); | |
15360 | - if ((xlvl & 0xffff0000) == 0x80860000) { | |
15361 | - /* Don't set x86_cpuid_level here for now to not confuse. */ | |
15362 | - if (xlvl >= 0x80860001) | |
15363 | - c->x86_capability[2] = cpuid_edx(0x80860001); | |
15364 | - } | |
15365 | - | |
15366 | - c->extended_cpuid_level = cpuid_eax(0x80000000); | |
15367 | - if (c->extended_cpuid_level >= 0x80000007) | |
15368 | - c->x86_power = cpuid_edx(0x80000007); | |
15369 | - | |
15370 | - switch (c->x86_vendor) { | |
15371 | - case X86_VENDOR_AMD: | |
15372 | - early_init_amd(c); | |
15373 | - break; | |
15374 | - case X86_VENDOR_INTEL: | |
15375 | - early_init_intel(c); | |
15376 | - break; | |
15377 | - case X86_VENDOR_CENTAUR: | |
15378 | - early_init_centaur(c); | |
15379 | - break; | |
15380 | - } | |
15381 | - | |
15382 | - validate_pat_support(c); | |
15383 | -} | |
15384 | - | |
15385 | -/* | |
15386 | - * This does the hard work of actually picking apart the CPU stuff... | |
15387 | - */ | |
15388 | -void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |
15389 | -{ | |
15390 | - int i; | |
15391 | - | |
15392 | - early_identify_cpu(c); | |
15393 | - | |
15394 | - init_scattered_cpuid_features(c); | |
15395 | - | |
15396 | - c->apicid = phys_pkg_id(0); | |
15397 | - | |
15398 | - /* | |
15399 | - * Vendor-specific initialization. In this section we | |
15400 | - * canonicalize the feature flags, meaning if there are | |
15401 | - * features a certain CPU supports which CPUID doesn't | |
15402 | - * tell us, CPUID claiming incorrect flags, or other bugs, | |
15403 | - * we handle them here. | |
15404 | - * | |
15405 | - * At the end of this section, c->x86_capability better | |
15406 | - * indicate the features this CPU genuinely supports! | |
15407 | - */ | |
15408 | - switch (c->x86_vendor) { | |
15409 | - case X86_VENDOR_AMD: | |
15410 | - init_amd(c); | |
15411 | - break; | |
15412 | - | |
15413 | - case X86_VENDOR_INTEL: | |
15414 | - init_intel(c); | |
15415 | - break; | |
15416 | - | |
15417 | - case X86_VENDOR_CENTAUR: | |
15418 | - init_centaur(c); | |
15419 | - break; | |
15420 | - | |
15421 | - case X86_VENDOR_UNKNOWN: | |
15422 | - default: | |
15423 | - display_cacheinfo(c); | |
15424 | - break; | |
15425 | - } | |
15426 | - | |
15427 | - detect_ht(c); | |
15428 | - | |
15429 | - /* | |
15430 | - * On SMP, boot_cpu_data holds the common feature set between | |
15431 | - * all CPUs; so make sure that we indicate which features are | |
15432 | - * common between the CPUs. The first time this routine gets | |
15433 | - * executed, c == &boot_cpu_data. | |
15434 | - */ | |
15435 | - if (c != &boot_cpu_data) { | |
15436 | - /* AND the already accumulated flags with these */ | |
15437 | - for (i = 0; i < NCAPINTS; i++) | |
15438 | - boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; | |
15439 | - } | |
15440 | - | |
15441 | - /* Clear all flags overriden by options */ | |
15442 | - for (i = 0; i < NCAPINTS; i++) | |
15443 | - c->x86_capability[i] &= ~cleared_cpu_caps[i]; | |
15444 | - | |
15445 | -#ifdef CONFIG_X86_MCE | |
15446 | - mcheck_init(c); | |
15447 | -#endif | |
15448 | - select_idle_routine(c); | |
15449 | - | |
15450 | -#ifdef CONFIG_NUMA | |
15451 | - numa_add_cpu(smp_processor_id()); | |
15452 | -#endif | |
15453 | - | |
15454 | -} | |
15455 | - | |
15456 | -void __cpuinit identify_boot_cpu(void) | |
15457 | -{ | |
15458 | - identify_cpu(&boot_cpu_data); | |
15459 | -} | |
15460 | - | |
15461 | -void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | |
15462 | -{ | |
15463 | - BUG_ON(c == &boot_cpu_data); | |
15464 | - identify_cpu(c); | |
15465 | - mtrr_ap_init(); | |
15466 | -} | |
15467 | - | |
15468 | -static __init int setup_noclflush(char *arg) | |
15469 | -{ | |
15470 | - setup_clear_cpu_cap(X86_FEATURE_CLFLSH); | |
15471 | - return 1; | |
15472 | -} | |
15473 | -__setup("noclflush", setup_noclflush); | |
15474 | - | |
15475 | -void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) | |
15476 | -{ | |
15477 | - if (c->x86_model_id[0]) | |
15478 | - printk(KERN_CONT "%s", c->x86_model_id); | |
15479 | - | |
15480 | - if (c->x86_mask || c->cpuid_level >= 0) | |
15481 | - printk(KERN_CONT " stepping %02x\n", c->x86_mask); | |
15482 | - else | |
15483 | - printk(KERN_CONT "\n"); | |
15484 | -} | |
15485 | - | |
15486 | -static __init int setup_disablecpuid(char *arg) | |
15487 | -{ | |
15488 | - int bit; | |
15489 | - if (get_option(&arg, &bit) && bit < NCAPINTS*32) | |
15490 | - setup_clear_cpu_cap(bit); | |
15491 | - else | |
15492 | - return 0; | |
15493 | - return 1; | |
15494 | -} | |
15495 | -__setup("clearcpuid=", setup_disablecpuid); | |
15496 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
82094b55 | 15497 | +++ sle11-2009-10-16/arch/x86/kernel/setup_percpu-xen.c 2009-06-04 10:21:39.000000000 +0200 |
2cb7cef9 BS |
15498 | @@ -0,0 +1,385 @@ |
15499 | +#include <linux/kernel.h> | |
15500 | +#include <linux/module.h> | |
15501 | +#include <linux/init.h> | |
15502 | +#include <linux/bootmem.h> | |
15503 | +#include <linux/percpu.h> | |
15504 | +#include <linux/kexec.h> | |
15505 | +#include <linux/crash_dump.h> | |
15506 | +#include <asm/smp.h> | |
15507 | +#include <asm/percpu.h> | |
15508 | +#include <asm/sections.h> | |
15509 | +#include <asm/processor.h> | |
15510 | +#include <asm/setup.h> | |
15511 | +#include <asm/topology.h> | |
15512 | +#include <asm/mpspec.h> | |
15513 | +#include <asm/apicdef.h> | |
15514 | +#include <asm/highmem.h> | |
15515 | + | |
15516 | +#ifdef CONFIG_X86_LOCAL_APIC | |
15517 | +unsigned int num_processors; | |
15518 | +unsigned disabled_cpus __cpuinitdata; | |
15519 | +/* Processor that is doing the boot up */ | |
15520 | +unsigned int boot_cpu_physical_apicid = -1U; | |
15521 | +unsigned int max_physical_apicid; | |
15522 | +EXPORT_SYMBOL(boot_cpu_physical_apicid); | |
15523 | + | |
15524 | +/* Bitmask of physically existing CPUs */ | |
15525 | +physid_mask_t phys_cpu_present_map; | |
15526 | +#endif | |
15527 | + | |
15528 | +/* map cpu index to physical APIC ID */ | |
15529 | +#ifndef CONFIG_XEN | |
15530 | +DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); | |
15531 | +DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); | |
15532 | +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); | |
15533 | +EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | |
15534 | +#else | |
15535 | +DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; | |
15536 | +EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); | |
15537 | +#endif | |
15538 | + | |
15539 | +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | |
15540 | +#define X86_64_NUMA 1 | |
15541 | + | |
15542 | +/* map cpu index to node index */ | |
15543 | +DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); | |
15544 | +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); | |
15545 | + | |
15546 | +/* which logical CPUs are on which nodes */ | |
15547 | +cpumask_t *node_to_cpumask_map; | |
15548 | +EXPORT_SYMBOL(node_to_cpumask_map); | |
15549 | + | |
15550 | +/* setup node_to_cpumask_map */ | |
15551 | +static void __init setup_node_to_cpumask_map(void); | |
15552 | + | |
15553 | +#else | |
15554 | +static inline void setup_node_to_cpumask_map(void) { } | |
15555 | +#endif | |
15556 | + | |
15557 | +#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) | |
15558 | +/* | |
15559 | + * Copy data used in early init routines from the initial arrays to the | |
15560 | + * per cpu data areas. These arrays then become expendable and the | |
15561 | + * *_early_ptr's are zeroed indicating that the static arrays are gone. | |
15562 | + */ | |
15563 | +static void __init setup_per_cpu_maps(void) | |
15564 | +{ | |
15565 | +#ifndef CONFIG_XEN | |
15566 | + int cpu; | |
15567 | + | |
15568 | + for_each_possible_cpu(cpu) { | |
15569 | + per_cpu(x86_cpu_to_apicid, cpu) = | |
15570 | + early_per_cpu_map(x86_cpu_to_apicid, cpu); | |
15571 | + per_cpu(x86_bios_cpu_apicid, cpu) = | |
15572 | + early_per_cpu_map(x86_bios_cpu_apicid, cpu); | |
15573 | +#ifdef X86_64_NUMA | |
15574 | + per_cpu(x86_cpu_to_node_map, cpu) = | |
15575 | + early_per_cpu_map(x86_cpu_to_node_map, cpu); | |
15576 | +#endif | |
15577 | + } | |
15578 | + | |
15579 | + /* indicate the early static arrays will soon be gone */ | |
15580 | + early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; | |
15581 | + early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; | |
15582 | +#ifdef X86_64_NUMA | |
15583 | + early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; | |
15584 | +#endif | |
15585 | +#endif | |
15586 | +} | |
15587 | + | |
15588 | +#ifdef CONFIG_X86_32 | |
15589 | +/* | |
15590 | + * Great future not-so-futuristic plan: make i386 and x86_64 do it | |
15591 | + * the same way | |
15592 | + */ | |
15593 | +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; | |
15594 | +EXPORT_SYMBOL(__per_cpu_offset); | |
15595 | +static inline void setup_cpu_pda_map(void) { } | |
15596 | + | |
15597 | +#elif !defined(CONFIG_SMP) | |
15598 | +static inline void setup_cpu_pda_map(void) { } | |
15599 | + | |
15600 | +#else /* CONFIG_SMP && CONFIG_X86_64 */ | |
15601 | + | |
15602 | +/* | |
15603 | + * Allocate cpu_pda pointer table and array via alloc_bootmem. | |
15604 | + */ | |
15605 | +static void __init setup_cpu_pda_map(void) | |
15606 | +{ | |
15607 | + char *pda; | |
15608 | + struct x8664_pda **new_cpu_pda; | |
15609 | + unsigned long size; | |
15610 | + int cpu; | |
15611 | + | |
15612 | + size = roundup(sizeof(struct x8664_pda), cache_line_size()); | |
15613 | + | |
15614 | + /* allocate cpu_pda array and pointer table */ | |
15615 | + { | |
15616 | + unsigned long tsize = nr_cpu_ids * sizeof(void *); | |
15617 | + unsigned long asize = size * (nr_cpu_ids - 1); | |
15618 | + | |
15619 | + tsize = roundup(tsize, cache_line_size()); | |
15620 | + new_cpu_pda = alloc_bootmem(tsize + asize); | |
15621 | + pda = (char *)new_cpu_pda + tsize; | |
15622 | + } | |
15623 | + | |
15624 | + /* initialize pointer table to static pda's */ | |
15625 | + for_each_possible_cpu(cpu) { | |
15626 | + if (cpu == 0) { | |
15627 | + /* leave boot cpu pda in place */ | |
15628 | + new_cpu_pda[0] = cpu_pda(0); | |
15629 | + continue; | |
15630 | + } | |
15631 | + new_cpu_pda[cpu] = (struct x8664_pda *)pda; | |
15632 | + new_cpu_pda[cpu]->in_bootmem = 1; | |
15633 | + pda += size; | |
15634 | + } | |
15635 | + | |
15636 | + /* point to new pointer table */ | |
15637 | + _cpu_pda = new_cpu_pda; | |
15638 | +} | |
15639 | +#endif | |
15640 | + | |
15641 | +/* | |
15642 | + * Great future plan: | |
15643 | + * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. | |
15644 | + * Always point %gs to its beginning | |
15645 | + */ | |
15646 | +void __init setup_per_cpu_areas(void) | |
15647 | +{ | |
15648 | + ssize_t size = PERCPU_ENOUGH_ROOM; | |
15649 | + char *ptr; | |
15650 | + int cpu; | |
15651 | + | |
15652 | + /* Setup cpu_pda map */ | |
15653 | + setup_cpu_pda_map(); | |
15654 | + | |
15655 | + /* Copy section for each CPU (we discard the original) */ | |
15656 | + size = PERCPU_ENOUGH_ROOM; | |
15657 | + printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", | |
15658 | + size); | |
15659 | + | |
15660 | + for_each_possible_cpu(cpu) { | |
15661 | +#ifndef CONFIG_NEED_MULTIPLE_NODES | |
15662 | + ptr = alloc_bootmem_pages(size); | |
15663 | +#else | |
15664 | + int node = early_cpu_to_node(cpu); | |
15665 | + if (!node_online(node) || !NODE_DATA(node)) { | |
15666 | + ptr = alloc_bootmem_pages(size); | |
15667 | + printk(KERN_INFO | |
15668 | + "cpu %d has no node %d or node-local memory\n", | |
15669 | + cpu, node); | |
15670 | + } | |
15671 | + else | |
15672 | + ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); | |
15673 | +#endif | |
15674 | + per_cpu_offset(cpu) = ptr - __per_cpu_start; | |
15675 | + memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); | |
15676 | + | |
15677 | + } | |
15678 | + | |
15679 | + printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", | |
15680 | + NR_CPUS, nr_cpu_ids, nr_node_ids); | |
15681 | + | |
15682 | + /* Setup percpu data maps */ | |
15683 | + setup_per_cpu_maps(); | |
15684 | + | |
15685 | + /* Setup node to cpumask map */ | |
15686 | + setup_node_to_cpumask_map(); | |
15687 | +} | |
15688 | + | |
15689 | +#endif | |
15690 | + | |
15691 | +#ifdef X86_64_NUMA | |
15692 | + | |
15693 | +/* | |
15694 | + * Allocate node_to_cpumask_map based on number of available nodes | |
15695 | + * Requires node_possible_map to be valid. | |
15696 | + * | |
15697 | + * Note: node_to_cpumask() is not valid until after this is done. | |
15698 | + */ | |
15699 | +static void __init setup_node_to_cpumask_map(void) | |
15700 | +{ | |
15701 | + unsigned int node, num = 0; | |
15702 | + cpumask_t *map; | |
15703 | + | |
15704 | + /* setup nr_node_ids if not done yet */ | |
15705 | + if (nr_node_ids == MAX_NUMNODES) { | |
15706 | + for_each_node_mask(node, node_possible_map) | |
15707 | + num = node; | |
15708 | + nr_node_ids = num + 1; | |
15709 | + } | |
15710 | + | |
15711 | + /* allocate the map */ | |
15712 | + map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); | |
15713 | + | |
15714 | + pr_debug(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", | |
15715 | + map, nr_node_ids); | |
15716 | + | |
15717 | + /* node_to_cpumask() will now work */ | |
15718 | + node_to_cpumask_map = map; | |
15719 | +} | |
15720 | + | |
15721 | +void __cpuinit numa_set_node(int cpu, int node) | |
15722 | +{ | |
15723 | + int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); | |
15724 | + | |
15725 | + if (cpu_pda(cpu) && node != NUMA_NO_NODE) | |
15726 | + cpu_pda(cpu)->nodenumber = node; | |
15727 | + | |
15728 | + if (cpu_to_node_map) | |
15729 | + cpu_to_node_map[cpu] = node; | |
15730 | + | |
15731 | + else if (per_cpu_offset(cpu)) | |
15732 | + per_cpu(x86_cpu_to_node_map, cpu) = node; | |
15733 | + | |
15734 | + else | |
15735 | + pr_debug("Setting node for non-present cpu %d\n", cpu); | |
15736 | +} | |
15737 | + | |
15738 | +void __cpuinit numa_clear_node(int cpu) | |
15739 | +{ | |
15740 | + numa_set_node(cpu, NUMA_NO_NODE); | |
15741 | +} | |
15742 | + | |
15743 | +#ifndef CONFIG_DEBUG_PER_CPU_MAPS | |
15744 | + | |
15745 | +void __cpuinit numa_add_cpu(int cpu) | |
15746 | +{ | |
15747 | + cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | |
15748 | +} | |
15749 | + | |
15750 | +void __cpuinit numa_remove_cpu(int cpu) | |
15751 | +{ | |
15752 | + cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); | |
15753 | +} | |
15754 | + | |
15755 | +#else /* CONFIG_DEBUG_PER_CPU_MAPS */ | |
15756 | + | |
15757 | +/* | |
15758 | + * --------- debug versions of the numa functions --------- | |
15759 | + */ | |
15760 | +static void __cpuinit numa_set_cpumask(int cpu, int enable) | |
15761 | +{ | |
15762 | + int node = cpu_to_node(cpu); | |
15763 | + cpumask_t *mask; | |
15764 | + char buf[64]; | |
15765 | + | |
15766 | + if (node_to_cpumask_map == NULL) { | |
15767 | + printk(KERN_ERR "node_to_cpumask_map NULL\n"); | |
15768 | + dump_stack(); | |
15769 | + return; | |
15770 | + } | |
15771 | + | |
15772 | + mask = &node_to_cpumask_map[node]; | |
15773 | + if (enable) | |
15774 | + cpu_set(cpu, *mask); | |
15775 | + else | |
15776 | + cpu_clear(cpu, *mask); | |
15777 | + | |
15778 | + cpulist_scnprintf(buf, sizeof(buf), *mask); | |
15779 | + printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", | |
15780 | + enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); | |
15781 | + } | |
15782 | + | |
15783 | +void __cpuinit numa_add_cpu(int cpu) | |
15784 | +{ | |
15785 | + numa_set_cpumask(cpu, 1); | |
15786 | +} | |
15787 | + | |
15788 | +void __cpuinit numa_remove_cpu(int cpu) | |
15789 | +{ | |
15790 | + numa_set_cpumask(cpu, 0); | |
15791 | +} | |
15792 | + | |
15793 | +int cpu_to_node(int cpu) | |
15794 | +{ | |
15795 | + if (early_per_cpu_ptr(x86_cpu_to_node_map)) { | |
15796 | + printk(KERN_WARNING | |
15797 | + "cpu_to_node(%d): usage too early!\n", cpu); | |
15798 | + dump_stack(); | |
15799 | + return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | |
15800 | + } | |
15801 | + return per_cpu(x86_cpu_to_node_map, cpu); | |
15802 | +} | |
15803 | +EXPORT_SYMBOL(cpu_to_node); | |
15804 | + | |
15805 | +/* | |
15806 | + * Same function as cpu_to_node() but used if called before the | |
15807 | + * per_cpu areas are setup. | |
15808 | + */ | |
15809 | +int early_cpu_to_node(int cpu) | |
15810 | +{ | |
15811 | + if (early_per_cpu_ptr(x86_cpu_to_node_map)) | |
15812 | + return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | |
15813 | + | |
15814 | + if (!per_cpu_offset(cpu)) { | |
15815 | + printk(KERN_WARNING | |
15816 | + "early_cpu_to_node(%d): no per_cpu area!\n", cpu); | |
15817 | + dump_stack(); | |
15818 | + return NUMA_NO_NODE; | |
15819 | + } | |
15820 | + return per_cpu(x86_cpu_to_node_map, cpu); | |
15821 | +} | |
15822 | + | |
15823 | + | |
15824 | +/* empty cpumask */ | |
15825 | +static const cpumask_t cpu_mask_none; | |
15826 | + | |
15827 | +/* | |
15828 | + * Returns a pointer to the bitmask of CPUs on Node 'node'. | |
15829 | + */ | |
15830 | +const cpumask_t *_node_to_cpumask_ptr(int node) | |
15831 | +{ | |
15832 | + if (node_to_cpumask_map == NULL) { | |
15833 | + printk(KERN_WARNING | |
15834 | + "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", | |
15835 | + node); | |
15836 | + dump_stack(); | |
15837 | + return (const cpumask_t *)&cpu_online_map; | |
15838 | + } | |
15839 | + if (node >= nr_node_ids) { | |
15840 | + printk(KERN_WARNING | |
15841 | + "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n", | |
15842 | + node, nr_node_ids); | |
15843 | + dump_stack(); | |
15844 | + return &cpu_mask_none; | |
15845 | + } | |
15846 | + return &node_to_cpumask_map[node]; | |
15847 | +} | |
15848 | +EXPORT_SYMBOL(_node_to_cpumask_ptr); | |
15849 | + | |
15850 | +/* | |
15851 | + * Returns a bitmask of CPUs on Node 'node'. | |
15852 | + * | |
15853 | + * Side note: this function creates the returned cpumask on the stack | |
15854 | + * so with a high NR_CPUS count, excessive stack space is used. The | |
15855 | + * node_to_cpumask_ptr function should be used whenever possible. | |
15856 | + */ | |
15857 | +cpumask_t node_to_cpumask(int node) | |
15858 | +{ | |
15859 | + if (node_to_cpumask_map == NULL) { | |
15860 | + printk(KERN_WARNING | |
15861 | + "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); | |
15862 | + dump_stack(); | |
15863 | + return cpu_online_map; | |
15864 | + } | |
15865 | + if (node >= nr_node_ids) { | |
15866 | + printk(KERN_WARNING | |
15867 | + "node_to_cpumask(%d): node > nr_node_ids(%d)\n", | |
15868 | + node, nr_node_ids); | |
15869 | + dump_stack(); | |
15870 | + return cpu_mask_none; | |
15871 | + } | |
15872 | + return node_to_cpumask_map[node]; | |
15873 | +} | |
15874 | +EXPORT_SYMBOL(node_to_cpumask); | |
15875 | + | |
15876 | +/* | |
15877 | + * --------- end of debug versions of the numa functions --------- | |
15878 | + */ | |
15879 | + | |
15880 | +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */ | |
15881 | + | |
15882 | +#endif /* X86_64_NUMA */ | |
15883 | + | |
82094b55 AF |
15884 | --- sle11-2009-10-16.orig/arch/x86/kernel/smp-xen.c 2009-03-16 16:38:05.000000000 +0100 |
15885 | +++ sle11-2009-10-16/arch/x86/kernel/smp-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
15886 | @@ -121,132 +121,14 @@ void xen_smp_send_reschedule(int cpu) |
15887 | send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); | |
15888 | } | |
15889 | ||
15890 | -/* | |
15891 | - * Structure and data for smp_call_function(). This is designed to minimise | |
15892 | - * static memory requirements. It also looks cleaner. | |
15893 | - */ | |
15894 | -static DEFINE_SPINLOCK(call_lock); | |
15895 | - | |
15896 | -struct call_data_struct { | |
15897 | - void (*func) (void *info); | |
15898 | - void *info; | |
15899 | - atomic_t started; | |
15900 | - atomic_t finished; | |
15901 | - int wait; | |
15902 | -}; | |
15903 | - | |
15904 | -void lock_ipi_call_lock(void) | |
15905 | +void xen_send_call_func_single_ipi(int cpu) | |
15906 | { | |
15907 | - spin_lock_irq(&call_lock); | |
15908 | + send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNC_SINGLE_VECTOR); | |
15909 | } | |
15910 | ||
15911 | -void unlock_ipi_call_lock(void) | |
15912 | +void xen_send_call_func_ipi(cpumask_t mask) | |
15913 | { | |
15914 | - spin_unlock_irq(&call_lock); | |
15915 | -} | |
15916 | - | |
15917 | -static struct call_data_struct *call_data; | |
15918 | - | |
15919 | -static void __smp_call_function(void (*func) (void *info), void *info, | |
15920 | - int nonatomic, int wait) | |
15921 | -{ | |
15922 | - struct call_data_struct data; | |
15923 | - int cpus = num_online_cpus() - 1; | |
15924 | - | |
15925 | - if (!cpus) | |
15926 | - return; | |
15927 | - | |
15928 | - data.func = func; | |
15929 | - data.info = info; | |
15930 | - atomic_set(&data.started, 0); | |
15931 | - data.wait = wait; | |
15932 | - if (wait) | |
15933 | - atomic_set(&data.finished, 0); | |
15934 | - | |
15935 | - call_data = &data; | |
15936 | - mb(); | |
15937 | - | |
15938 | - /* Send a message to all other CPUs and wait for them to respond */ | |
15939 | - send_IPI_allbutself(CALL_FUNCTION_VECTOR); | |
15940 | - | |
15941 | - /* Wait for response */ | |
15942 | - while (atomic_read(&data.started) != cpus) | |
15943 | - cpu_relax(); | |
15944 | - | |
15945 | - if (wait) | |
15946 | - while (atomic_read(&data.finished) != cpus) | |
15947 | - cpu_relax(); | |
15948 | -} | |
15949 | - | |
15950 | - | |
15951 | -/** | |
15952 | - * smp_call_function_mask(): Run a function on a set of other CPUs. | |
15953 | - * @mask: The set of cpus to run on. Must not include the current cpu. | |
15954 | - * @func: The function to run. This must be fast and non-blocking. | |
15955 | - * @info: An arbitrary pointer to pass to the function. | |
15956 | - * @wait: If true, wait (atomically) until function has completed on other CPUs. | |
15957 | - * | |
15958 | - * Returns 0 on success, else a negative status code. | |
15959 | - * | |
15960 | - * If @wait is true, then returns once @func has returned; otherwise | |
15961 | - * it returns just before the target cpu calls @func. | |
15962 | - * | |
15963 | - * You must not call this function with disabled interrupts or from a | |
15964 | - * hardware interrupt handler or from a bottom half handler. | |
15965 | - */ | |
15966 | -int | |
15967 | -xen_smp_call_function_mask(cpumask_t mask, | |
15968 | - void (*func)(void *), void *info, | |
15969 | - int wait) | |
15970 | -{ | |
15971 | - struct call_data_struct data; | |
15972 | - cpumask_t allbutself; | |
15973 | - int cpus; | |
15974 | - | |
15975 | - /* Can deadlock when called with interrupts disabled */ | |
15976 | - WARN_ON(irqs_disabled()); | |
15977 | - | |
15978 | - /* Holding any lock stops cpus from going down. */ | |
15979 | - spin_lock(&call_lock); | |
15980 | - | |
15981 | - allbutself = cpu_online_map; | |
15982 | - cpu_clear(smp_processor_id(), allbutself); | |
15983 | - | |
15984 | - cpus_and(mask, mask, allbutself); | |
15985 | - cpus = cpus_weight(mask); | |
15986 | - | |
15987 | - if (!cpus) { | |
15988 | - spin_unlock(&call_lock); | |
15989 | - return 0; | |
15990 | - } | |
15991 | - | |
15992 | - data.func = func; | |
15993 | - data.info = info; | |
15994 | - atomic_set(&data.started, 0); | |
15995 | - data.wait = wait; | |
15996 | - if (wait) | |
15997 | - atomic_set(&data.finished, 0); | |
15998 | - | |
15999 | - call_data = &data; | |
16000 | - wmb(); | |
16001 | - | |
16002 | - /* Send a message to other CPUs */ | |
16003 | - if (cpus_equal(mask, allbutself) && | |
16004 | - cpus_equal(cpu_online_map, cpu_callout_map)) | |
16005 | - send_IPI_allbutself(CALL_FUNCTION_VECTOR); | |
16006 | - else | |
16007 | - send_IPI_mask(mask, CALL_FUNCTION_VECTOR); | |
16008 | - | |
16009 | - /* Wait for response */ | |
16010 | - while (atomic_read(&data.started) != cpus) | |
16011 | - cpu_relax(); | |
16012 | - | |
16013 | - if (wait) | |
16014 | - while (atomic_read(&data.finished) != cpus) | |
16015 | - cpu_relax(); | |
16016 | - spin_unlock(&call_lock); | |
16017 | - | |
16018 | - return 0; | |
16019 | + send_IPI_mask(mask, CALL_FUNCTION_VECTOR); | |
16020 | } | |
16021 | ||
16022 | static void stop_this_cpu(void *dummy) | |
16023 | @@ -268,15 +150,10 @@ static void stop_this_cpu(void *dummy) | |
16024 | ||
16025 | void xen_smp_send_stop(void) | |
16026 | { | |
16027 | - int nolock; | |
16028 | unsigned long flags; | |
16029 | ||
16030 | - /* Don't deadlock on the call lock in panic */ | |
16031 | - nolock = !spin_trylock(&call_lock); | |
16032 | + smp_call_function(stop_this_cpu, NULL, 0); | |
16033 | local_irq_save(flags); | |
16034 | - __smp_call_function(stop_this_cpu, NULL, 0, 0); | |
16035 | - if (!nolock) | |
16036 | - spin_unlock(&call_lock); | |
16037 | disable_all_local_evtchn(); | |
16038 | local_irq_restore(flags); | |
16039 | } | |
16040 | @@ -298,21 +175,8 @@ irqreturn_t smp_reschedule_interrupt(int | |
16041 | ||
16042 | irqreturn_t smp_call_function_interrupt(int irq, void *dev_id) | |
16043 | { | |
16044 | - void (*func) (void *info) = call_data->func; | |
16045 | - void *info = call_data->info; | |
16046 | - int wait = call_data->wait; | |
16047 | - | |
16048 | - /* | |
16049 | - * Notify initiating CPU that I've grabbed the data and am | |
16050 | - * about to execute the function | |
16051 | - */ | |
16052 | - mb(); | |
16053 | - atomic_inc(&call_data->started); | |
16054 | - /* | |
16055 | - * At this point the info structure may be out of scope unless wait==1 | |
16056 | - */ | |
16057 | irq_enter(); | |
16058 | - (*func)(info); | |
16059 | + generic_smp_call_function_interrupt(); | |
16060 | #ifdef CONFIG_X86_32 | |
16061 | __get_cpu_var(irq_stat).irq_call_count++; | |
16062 | #else | |
16063 | @@ -320,10 +184,19 @@ irqreturn_t smp_call_function_interrupt( | |
16064 | #endif | |
16065 | irq_exit(); | |
16066 | ||
16067 | - if (wait) { | |
16068 | - mb(); | |
16069 | - atomic_inc(&call_data->finished); | |
16070 | - } | |
16071 | + return IRQ_HANDLED; | |
16072 | +} | |
16073 | + | |
16074 | +irqreturn_t smp_call_function_single_interrupt(int irq, void *dev_id) | |
16075 | +{ | |
16076 | + irq_enter(); | |
16077 | + generic_smp_call_function_single_interrupt(); | |
16078 | +#ifdef CONFIG_X86_32 | |
16079 | + __get_cpu_var(irq_stat).irq_call_count++; | |
16080 | +#else | |
16081 | + add_pda(irq_call_count, 1); | |
16082 | +#endif | |
16083 | + irq_exit(); | |
16084 | ||
16085 | return IRQ_HANDLED; | |
16086 | } | |
82094b55 AF |
16087 | --- sle11-2009-10-16.orig/arch/x86/kernel/time_32-xen.c 2009-10-28 14:58:12.000000000 +0100 |
16088 | +++ sle11-2009-10-16/arch/x86/kernel/time_32-xen.c 2009-10-28 14:58:19.000000000 +0100 | |
2cb7cef9 BS |
16089 | @@ -468,7 +468,7 @@ irqreturn_t timer_interrupt(int irq, voi |
16090 | ||
16091 | /* Keep nmi watchdog up to date */ | |
16092 | #ifdef __i386__ | |
16093 | - per_cpu(irq_stat, smp_processor_id()).irq0_irqs++; | |
16094 | + x86_add_percpu(irq_stat.irq0_irqs, 1); | |
16095 | #else | |
16096 | add_pda(irq0_irqs, 1); | |
16097 | #endif | |
82094b55 | 16098 | @@ -747,9 +747,7 @@ void __init time_init(void) |
2cb7cef9 BS |
16099 | |
16100 | update_wallclock(); | |
16101 | ||
16102 | -#ifndef CONFIG_X86_64 | |
16103 | use_tsc_delay(); | |
16104 | -#endif | |
16105 | ||
16106 | /* Cannot request_irq() until kmem is initialised. */ | |
16107 | late_time_init = setup_cpu0_timer_irq; | |
82094b55 | 16108 | @@ -806,7 +804,8 @@ static void stop_hz_timer(void) |
2cb7cef9 BS |
16109 | |
16110 | /* Leave ourselves in tick mode if rcu or softirq or timer pending. */ | |
16111 | if (rcu_needs_cpu(cpu) || local_softirq_pending() || | |
16112 | - (j = next_timer_interrupt(), time_before_eq(j, jiffies))) { | |
16113 | + (j = get_next_timer_interrupt(jiffies), | |
16114 | + time_before_eq(j, jiffies))) { | |
16115 | cpu_clear(cpu, nohz_cpu_mask); | |
16116 | j = jiffies + 1; | |
16117 | } | |
82094b55 AF |
16118 | --- sle11-2009-10-16.orig/arch/x86/kernel/traps_32-xen.c 2009-03-16 16:38:05.000000000 +0100 |
16119 | +++ sle11-2009-10-16/arch/x86/kernel/traps_32-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
16120 | @@ -1,5 +1,6 @@ |
16121 | /* | |
16122 | * Copyright (C) 1991, 1992 Linus Torvalds | |
16123 | + * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | |
16124 | * | |
16125 | * Pentium III FXSR, SSE support | |
16126 | * Gareth Hughes <gareth@valinux.com>, May 2000 | |
16127 | @@ -57,11 +58,10 @@ | |
16128 | #include <asm/nmi.h> | |
16129 | #include <asm/smp.h> | |
16130 | #include <asm/io.h> | |
16131 | +#include <asm/traps.h> | |
16132 | ||
16133 | #include "mach_traps.h" | |
16134 | ||
16135 | -int panic_on_unrecovered_nmi; | |
16136 | - | |
16137 | #ifndef CONFIG_XEN | |
16138 | DECLARE_BITMAP(used_vectors, NR_VECTORS); | |
16139 | EXPORT_SYMBOL_GPL(used_vectors); | |
16140 | @@ -82,43 +82,22 @@ gate_desc idt_table[256] | |
16141 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; | |
16142 | #endif | |
16143 | ||
16144 | -asmlinkage void divide_error(void); | |
16145 | -asmlinkage void debug(void); | |
16146 | -asmlinkage void nmi(void); | |
16147 | -asmlinkage void int3(void); | |
16148 | -asmlinkage void overflow(void); | |
16149 | -asmlinkage void bounds(void); | |
16150 | -asmlinkage void invalid_op(void); | |
16151 | -asmlinkage void device_not_available(void); | |
16152 | -asmlinkage void coprocessor_segment_overrun(void); | |
16153 | -asmlinkage void invalid_TSS(void); | |
16154 | -asmlinkage void segment_not_present(void); | |
16155 | -asmlinkage void stack_segment(void); | |
16156 | -asmlinkage void general_protection(void); | |
16157 | -asmlinkage void page_fault(void); | |
16158 | -asmlinkage void coprocessor_error(void); | |
16159 | -asmlinkage void simd_coprocessor_error(void); | |
16160 | -asmlinkage void alignment_check(void); | |
16161 | -#ifndef CONFIG_XEN | |
16162 | -asmlinkage void spurious_interrupt_bug(void); | |
16163 | -#else | |
16164 | -asmlinkage void fixup_4gb_segment(void); | |
16165 | -#endif | |
16166 | -asmlinkage void machine_check(void); | |
16167 | - | |
16168 | +int panic_on_unrecovered_nmi; | |
16169 | int kstack_depth_to_print = 24; | |
16170 | static unsigned int code_bytes = 64; | |
16171 | +static int ignore_nmis; | |
16172 | +static int die_counter; | |
16173 | ||
16174 | void printk_address(unsigned long address, int reliable) | |
16175 | { | |
16176 | #ifdef CONFIG_KALLSYMS | |
16177 | - char namebuf[KSYM_NAME_LEN]; | |
16178 | unsigned long offset = 0; | |
16179 | unsigned long symsize; | |
16180 | const char *symname; | |
16181 | - char reliab[4] = ""; | |
16182 | - char *delim = ":"; | |
16183 | char *modname; | |
16184 | + char *delim = ":"; | |
16185 | + char namebuf[KSYM_NAME_LEN]; | |
16186 | + char reliab[4] = ""; | |
16187 | ||
16188 | symname = kallsyms_lookup(address, &symsize, &offset, | |
16189 | &modname, namebuf); | |
16190 | @@ -138,22 +117,23 @@ void printk_address(unsigned long addres | |
16191 | #endif | |
16192 | } | |
16193 | ||
16194 | -static inline int valid_stack_ptr(struct thread_info *tinfo, void *p, unsigned size) | |
16195 | +static inline int valid_stack_ptr(struct thread_info *tinfo, | |
16196 | + void *p, unsigned int size) | |
16197 | { | |
16198 | - return p > (void *)tinfo && | |
16199 | - p <= (void *)tinfo + THREAD_SIZE - size; | |
16200 | + void *t = tinfo; | |
16201 | + return p > t && p <= t + THREAD_SIZE - size; | |
16202 | } | |
16203 | ||
16204 | /* The form of the top of the frame on the stack */ | |
16205 | struct stack_frame { | |
16206 | - struct stack_frame *next_frame; | |
16207 | - unsigned long return_address; | |
16208 | + struct stack_frame *next_frame; | |
16209 | + unsigned long return_address; | |
16210 | }; | |
16211 | ||
16212 | static inline unsigned long | |
16213 | print_context_stack(struct thread_info *tinfo, | |
16214 | - unsigned long *stack, unsigned long bp, | |
16215 | - const struct stacktrace_ops *ops, void *data) | |
16216 | + unsigned long *stack, unsigned long bp, | |
16217 | + const struct stacktrace_ops *ops, void *data) | |
16218 | { | |
16219 | struct stack_frame *frame = (struct stack_frame *)bp; | |
16220 | ||
16221 | @@ -175,8 +155,6 @@ print_context_stack(struct thread_info * | |
16222 | return bp; | |
16223 | } | |
16224 | ||
16225 | -#define MSG(msg) ops->warning(data, msg) | |
16226 | - | |
16227 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | |
16228 | unsigned long *stack, unsigned long bp, | |
16229 | const struct stacktrace_ops *ops, void *data) | |
16230 | @@ -186,7 +164,6 @@ void dump_trace(struct task_struct *task | |
16231 | ||
16232 | if (!stack) { | |
16233 | unsigned long dummy; | |
16234 | - | |
16235 | stack = &dummy; | |
16236 | if (task != current) | |
16237 | stack = (unsigned long *)task->thread.sp; | |
16238 | @@ -204,7 +181,7 @@ void dump_trace(struct task_struct *task | |
16239 | } | |
16240 | #endif | |
16241 | ||
16242 | - while (1) { | |
16243 | + for (;;) { | |
16244 | struct thread_info *context; | |
16245 | ||
16246 | context = (struct thread_info *) | |
16247 | @@ -256,15 +233,15 @@ static void print_trace_address(void *da | |
16248 | } | |
16249 | ||
16250 | static const struct stacktrace_ops print_trace_ops = { | |
16251 | - .warning = print_trace_warning, | |
16252 | - .warning_symbol = print_trace_warning_symbol, | |
16253 | - .stack = print_trace_stack, | |
16254 | - .address = print_trace_address, | |
16255 | + .warning = print_trace_warning, | |
16256 | + .warning_symbol = print_trace_warning_symbol, | |
16257 | + .stack = print_trace_stack, | |
16258 | + .address = print_trace_address, | |
16259 | }; | |
16260 | ||
16261 | static void | |
16262 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | |
16263 | - unsigned long *stack, unsigned long bp, char *log_lvl) | |
16264 | + unsigned long *stack, unsigned long bp, char *log_lvl) | |
16265 | { | |
16266 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | |
16267 | printk("%s =======================\n", log_lvl); | |
16268 | @@ -359,15 +336,14 @@ void show_registers(struct pt_regs *regs | |
16269 | printk(KERN_EMERG "Code: "); | |
16270 | ||
16271 | ip = (u8 *)regs->ip - code_prologue; | |
16272 | - if (ip < (u8 *)PAGE_OFFSET || | |
16273 | - probe_kernel_address(ip, c)) { | |
16274 | + if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { | |
16275 | /* try starting at EIP */ | |
16276 | ip = (u8 *)regs->ip; | |
16277 | code_len = code_len - code_prologue + 1; | |
16278 | } | |
16279 | for (i = 0; i < code_len; i++, ip++) { | |
16280 | if (ip < (u8 *)PAGE_OFFSET || | |
16281 | - probe_kernel_address(ip, c)) { | |
16282 | + probe_kernel_address(ip, c)) { | |
16283 | printk(" Bad EIP value."); | |
16284 | break; | |
16285 | } | |
16286 | @@ -392,7 +368,53 @@ int is_valid_bugaddr(unsigned long ip) | |
16287 | return ud2 == 0x0b0f; | |
16288 | } | |
16289 | ||
16290 | -static int die_counter; | |
16291 | +static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | |
16292 | +static int die_owner = -1; | |
16293 | +static unsigned int die_nest_count; | |
16294 | + | |
16295 | +unsigned __kprobes long oops_begin(void) | |
16296 | +{ | |
16297 | + unsigned long flags; | |
16298 | + | |
16299 | + oops_enter(); | |
16300 | + | |
16301 | + if (die_owner != raw_smp_processor_id()) { | |
16302 | + console_verbose(); | |
16303 | + raw_local_irq_save(flags); | |
16304 | + __raw_spin_lock(&die_lock); | |
16305 | + die_owner = smp_processor_id(); | |
16306 | + die_nest_count = 0; | |
16307 | + bust_spinlocks(1); | |
16308 | + } else { | |
16309 | + raw_local_irq_save(flags); | |
16310 | + } | |
16311 | + die_nest_count++; | |
16312 | + return flags; | |
16313 | +} | |
16314 | + | |
16315 | +void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | |
16316 | +{ | |
16317 | + bust_spinlocks(0); | |
16318 | + die_owner = -1; | |
16319 | + add_taint(TAINT_DIE); | |
16320 | + __raw_spin_unlock(&die_lock); | |
16321 | + raw_local_irq_restore(flags); | |
16322 | + | |
16323 | + if (!regs) | |
16324 | + return; | |
16325 | + | |
16326 | + if (kexec_should_crash(current)) | |
16327 | + crash_kexec(regs); | |
16328 | + | |
16329 | + if (in_interrupt()) | |
16330 | + panic("Fatal exception in interrupt"); | |
16331 | + | |
16332 | + if (panic_on_oops) | |
16333 | + panic("Fatal exception"); | |
16334 | + | |
16335 | + oops_exit(); | |
16336 | + do_exit(signr); | |
16337 | +} | |
16338 | ||
16339 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | |
16340 | { | |
16341 | @@ -410,26 +432,22 @@ int __kprobes __die(const char *str, str | |
16342 | printk("DEBUG_PAGEALLOC"); | |
16343 | #endif | |
16344 | printk("\n"); | |
16345 | - | |
16346 | if (notify_die(DIE_OOPS, str, regs, err, | |
16347 | - current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) { | |
16348 | + current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | |
16349 | + return 1; | |
16350 | ||
16351 | - show_registers(regs); | |
16352 | - /* Executive summary in case the oops scrolled away */ | |
16353 | - sp = (unsigned long) (®s->sp); | |
16354 | - savesegment(ss, ss); | |
16355 | - if (user_mode(regs)) { | |
16356 | - sp = regs->sp; | |
16357 | - ss = regs->ss & 0xffff; | |
16358 | - } | |
16359 | - printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | |
16360 | - print_symbol("%s", regs->ip); | |
16361 | - printk(" SS:ESP %04x:%08lx\n", ss, sp); | |
16362 | - | |
16363 | - return 0; | |
16364 | - } | |
16365 | - | |
16366 | - return 1; | |
16367 | + show_registers(regs); | |
16368 | + /* Executive summary in case the oops scrolled away */ | |
16369 | + sp = (unsigned long) (®s->sp); | |
16370 | + savesegment(ss, ss); | |
16371 | + if (user_mode(regs)) { | |
16372 | + sp = regs->sp; | |
16373 | + ss = regs->ss & 0xffff; | |
16374 | + } | |
16375 | + printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | |
16376 | + print_symbol("%s", regs->ip); | |
16377 | + printk(" SS:ESP %04x:%08lx\n", ss, sp); | |
16378 | + return 0; | |
16379 | } | |
16380 | ||
16381 | /* | |
16382 | @@ -438,31 +456,9 @@ int __kprobes __die(const char *str, str | |
16383 | */ | |
16384 | void die(const char *str, struct pt_regs *regs, long err) | |
16385 | { | |
16386 | - static struct { | |
16387 | - raw_spinlock_t lock; | |
16388 | - u32 lock_owner; | |
16389 | - int lock_owner_depth; | |
16390 | - } die = { | |
16391 | - .lock = __RAW_SPIN_LOCK_UNLOCKED, | |
16392 | - .lock_owner = -1, | |
16393 | - .lock_owner_depth = 0 | |
16394 | - }; | |
16395 | - unsigned long flags; | |
16396 | - | |
16397 | - oops_enter(); | |
16398 | + unsigned long flags = oops_begin(); | |
16399 | ||
16400 | - if (die.lock_owner != raw_smp_processor_id()) { | |
16401 | - console_verbose(); | |
16402 | - raw_local_irq_save(flags); | |
16403 | - __raw_spin_lock(&die.lock); | |
16404 | - die.lock_owner = smp_processor_id(); | |
16405 | - die.lock_owner_depth = 0; | |
16406 | - bust_spinlocks(1); | |
16407 | - } else { | |
16408 | - raw_local_irq_save(flags); | |
16409 | - } | |
16410 | - | |
16411 | - if (++die.lock_owner_depth < 3) { | |
16412 | + if (die_nest_count < 3) { | |
16413 | report_bug(regs->ip, regs); | |
16414 | ||
16415 | if (__die(str, regs, err)) | |
16416 | @@ -471,26 +467,7 @@ void die(const char *str, struct pt_regs | |
16417 | printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); | |
16418 | } | |
16419 | ||
16420 | - bust_spinlocks(0); | |
16421 | - die.lock_owner = -1; | |
16422 | - add_taint(TAINT_DIE); | |
16423 | - __raw_spin_unlock(&die.lock); | |
16424 | - raw_local_irq_restore(flags); | |
16425 | - | |
16426 | - if (!regs) | |
16427 | - return; | |
16428 | - | |
16429 | - if (kexec_should_crash(current)) | |
16430 | - crash_kexec(regs); | |
16431 | - | |
16432 | - if (in_interrupt()) | |
16433 | - panic("Fatal exception in interrupt"); | |
16434 | - | |
16435 | - if (panic_on_oops) | |
16436 | - panic("Fatal exception"); | |
16437 | - | |
16438 | - oops_exit(); | |
16439 | - do_exit(SIGSEGV); | |
16440 | + oops_end(flags, regs, SIGSEGV); | |
16441 | } | |
16442 | ||
16443 | static inline void | |
16444 | @@ -554,7 +531,7 @@ void do_##name(struct pt_regs *regs, lon | |
16445 | { \ | |
16446 | trace_hardirqs_fixup(); \ | |
16447 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | |
16448 | - == NOTIFY_STOP) \ | |
16449 | + == NOTIFY_STOP) \ | |
16450 | return; \ | |
16451 | do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ | |
16452 | } | |
16453 | @@ -570,7 +547,7 @@ void do_##name(struct pt_regs *regs, lon | |
16454 | info.si_code = sicode; \ | |
16455 | info.si_addr = (void __user *)siaddr; \ | |
16456 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | |
16457 | - == NOTIFY_STOP) \ | |
16458 | + == NOTIFY_STOP) \ | |
16459 | return; \ | |
16460 | do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ | |
16461 | } | |
16462 | @@ -579,7 +556,7 @@ void do_##name(struct pt_regs *regs, lon | |
16463 | void do_##name(struct pt_regs *regs, long error_code) \ | |
16464 | { \ | |
16465 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | |
16466 | - == NOTIFY_STOP) \ | |
16467 | + == NOTIFY_STOP) \ | |
16468 | return; \ | |
16469 | do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ | |
16470 | } | |
16471 | @@ -594,28 +571,29 @@ void do_##name(struct pt_regs *regs, lon | |
16472 | info.si_addr = (void __user *)siaddr; \ | |
16473 | trace_hardirqs_fixup(); \ | |
16474 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | |
16475 | - == NOTIFY_STOP) \ | |
16476 | + == NOTIFY_STOP) \ | |
16477 | return; \ | |
16478 | do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ | |
16479 | } | |
16480 | ||
16481 | -DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) | |
16482 | +DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) | |
16483 | #ifndef CONFIG_KPROBES | |
16484 | DO_VM86_ERROR(3, SIGTRAP, "int3", int3) | |
16485 | #endif | |
16486 | DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow) | |
16487 | DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds) | |
16488 | -DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) | |
16489 | -DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | |
16490 | +DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) | |
16491 | +DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | |
16492 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) | |
16493 | -DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) | |
16494 | -DO_ERROR(12, SIGBUS, "stack segment", stack_segment) | |
16495 | +DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) | |
16496 | +DO_ERROR(12, SIGBUS, "stack segment", stack_segment) | |
16497 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) | |
16498 | DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1) | |
16499 | ||
16500 | -void __kprobes do_general_protection(struct pt_regs * regs, | |
16501 | - long error_code) | |
16502 | +void __kprobes | |
16503 | +do_general_protection(struct pt_regs *regs, long error_code) | |
16504 | { | |
16505 | + struct task_struct *tsk; | |
16506 | struct thread_struct *thread; | |
16507 | ||
16508 | thread = ¤t->thread; | |
16509 | @@ -623,23 +601,24 @@ void __kprobes do_general_protection(str | |
16510 | if (regs->flags & X86_VM_MASK) | |
16511 | goto gp_in_vm86; | |
16512 | ||
16513 | + tsk = current; | |
16514 | if (!user_mode(regs)) | |
16515 | goto gp_in_kernel; | |
16516 | ||
16517 | - current->thread.error_code = error_code; | |
16518 | - current->thread.trap_no = 13; | |
16519 | + tsk->thread.error_code = error_code; | |
16520 | + tsk->thread.trap_no = 13; | |
16521 | ||
16522 | - if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) && | |
16523 | - printk_ratelimit()) { | |
16524 | + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && | |
16525 | + printk_ratelimit()) { | |
16526 | printk(KERN_INFO | |
16527 | - "%s[%d] general protection ip:%lx sp:%lx error:%lx", | |
16528 | - current->comm, task_pid_nr(current), | |
16529 | - regs->ip, regs->sp, error_code); | |
16530 | + "%s[%d] general protection ip:%lx sp:%lx error:%lx", | |
16531 | + tsk->comm, task_pid_nr(tsk), | |
16532 | + regs->ip, regs->sp, error_code); | |
16533 | print_vma_addr(" in ", regs->ip); | |
16534 | printk("\n"); | |
16535 | } | |
16536 | ||
16537 | - force_sig(SIGSEGV, current); | |
16538 | + force_sig(SIGSEGV, tsk); | |
16539 | return; | |
16540 | ||
16541 | gp_in_vm86: | |
16542 | @@ -648,14 +627,15 @@ gp_in_vm86: | |
16543 | return; | |
16544 | ||
16545 | gp_in_kernel: | |
16546 | - if (!fixup_exception(regs)) { | |
16547 | - current->thread.error_code = error_code; | |
16548 | - current->thread.trap_no = 13; | |
16549 | - if (notify_die(DIE_GPF, "general protection fault", regs, | |
16550 | + if (fixup_exception(regs)) | |
16551 | + return; | |
16552 | + | |
16553 | + tsk->thread.error_code = error_code; | |
16554 | + tsk->thread.trap_no = 13; | |
16555 | + if (notify_die(DIE_GPF, "general protection fault", regs, | |
16556 | error_code, 13, SIGSEGV) == NOTIFY_STOP) | |
16557 | - return; | |
16558 | - die("general protection fault", regs, error_code); | |
16559 | - } | |
16560 | + return; | |
16561 | + die("general protection fault", regs, error_code); | |
16562 | } | |
16563 | ||
16564 | static notrace __kprobes void | |
16565 | @@ -722,9 +702,9 @@ unknown_nmi_error(unsigned char reason, | |
16566 | ||
16567 | static DEFINE_SPINLOCK(nmi_print_lock); | |
16568 | ||
16569 | -void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg) | |
16570 | +void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) | |
16571 | { | |
16572 | - if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP) | |
16573 | + if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | |
16574 | return; | |
16575 | ||
16576 | spin_lock(&nmi_print_lock); | |
16577 | @@ -733,10 +713,12 @@ void notrace __kprobes die_nmi(struct pt | |
16578 | * to get a message out: | |
16579 | */ | |
16580 | bust_spinlocks(1); | |
16581 | - printk(KERN_EMERG "%s", msg); | |
16582 | + printk(KERN_EMERG "%s", str); | |
16583 | printk(" on CPU%d, ip %08lx, registers:\n", | |
16584 | smp_processor_id(), regs->ip); | |
16585 | show_registers(regs); | |
16586 | + if (do_panic) | |
16587 | + panic("Non maskable interrupt"); | |
16588 | console_silent(); | |
16589 | spin_unlock(&nmi_print_lock); | |
16590 | bust_spinlocks(0); | |
16591 | @@ -756,14 +738,17 @@ void notrace __kprobes die_nmi(struct pt | |
16592 | static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |
16593 | { | |
16594 | unsigned char reason = 0; | |
16595 | + int cpu; | |
16596 | ||
16597 | - /* Only the BSP gets external NMIs from the system: */ | |
16598 | - if (!smp_processor_id()) | |
16599 | + cpu = smp_processor_id(); | |
16600 | + | |
16601 | + /* Only the BSP gets external NMIs from the system. */ | |
16602 | + if (!cpu) | |
16603 | reason = get_nmi_reason(); | |
16604 | ||
16605 | if (!(reason & 0xc0)) { | |
16606 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) | |
16607 | - == NOTIFY_STOP) | |
16608 | + == NOTIFY_STOP) | |
16609 | return; | |
16610 | #ifdef CONFIG_X86_LOCAL_APIC | |
16611 | /* | |
16612 | @@ -772,7 +757,7 @@ static notrace __kprobes void default_do | |
16613 | */ | |
16614 | if (nmi_watchdog_tick(regs, reason)) | |
16615 | return; | |
16616 | - if (!do_nmi_callback(regs, smp_processor_id())) | |
16617 | + if (!do_nmi_callback(regs, cpu)) | |
16618 | unknown_nmi_error(reason, regs); | |
16619 | #else | |
16620 | unknown_nmi_error(reason, regs); | |
16621 | @@ -782,6 +767,8 @@ static notrace __kprobes void default_do | |
16622 | } | |
16623 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) | |
16624 | return; | |
16625 | + | |
16626 | + /* AK: following checks seem to be broken on modern chipsets. FIXME */ | |
16627 | if (reason & 0x80) | |
16628 | mem_parity_error(reason, regs); | |
16629 | if (reason & 0x40) | |
16630 | @@ -793,8 +780,6 @@ static notrace __kprobes void default_do | |
16631 | reassert_nmi(); | |
16632 | } | |
16633 | ||
16634 | -static int ignore_nmis; | |
16635 | - | |
16636 | notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) | |
16637 | { | |
16638 | int cpu; | |
16639 | @@ -879,7 +864,7 @@ void __kprobes do_debug(struct pt_regs * | |
16640 | tsk->thread.debugctlmsr = 0; | |
16641 | ||
16642 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | |
16643 | - SIGTRAP) == NOTIFY_STOP) | |
16644 | + SIGTRAP) == NOTIFY_STOP) | |
16645 | return; | |
16646 | /* It's safe to allow irq's after DR6 has been saved */ | |
16647 | if (regs->flags & X86_EFLAGS_IF) | |
16648 | @@ -940,9 +925,8 @@ clear_TF_reenable: | |
16649 | void math_error(void __user *ip) | |
16650 | { | |
16651 | struct task_struct *task; | |
16652 | - unsigned short cwd; | |
16653 | - unsigned short swd; | |
16654 | siginfo_t info; | |
16655 | + unsigned short cwd, swd; | |
16656 | ||
16657 | /* | |
16658 | * Save the info for the exception handler and clear the error. | |
16659 | @@ -961,7 +945,7 @@ void math_error(void __user *ip) | |
16660 | * C1 reg you need in case of a stack fault, 0x040 is the stack | |
16661 | * fault bit. We should only be taking one exception at a time, | |
16662 | * so if this combination doesn't produce any single exception, | |
16663 | - * then we have a bad program that isn't syncronizing its FPU usage | |
16664 | + * then we have a bad program that isn't synchronizing its FPU usage | |
16665 | * and it will suffer the consequences since we won't be able to | |
16666 | * fully reproduce the context of the exception | |
16667 | */ | |
16668 | @@ -970,7 +954,7 @@ void math_error(void __user *ip) | |
16669 | switch (swd & ~cwd & 0x3f) { | |
16670 | case 0x000: /* No unmasked exception */ | |
16671 | return; | |
16672 | - default: /* Multiple exceptions */ | |
16673 | + default: /* Multiple exceptions */ | |
16674 | break; | |
16675 | case 0x001: /* Invalid Op */ | |
16676 | /* | |
16677 | @@ -1006,8 +990,8 @@ void do_coprocessor_error(struct pt_regs | |
16678 | static void simd_math_error(void __user *ip) | |
16679 | { | |
16680 | struct task_struct *task; | |
16681 | - unsigned short mxcsr; | |
16682 | siginfo_t info; | |
16683 | + unsigned short mxcsr; | |
16684 | ||
16685 | /* | |
16686 | * Save the info for the exception handler and clear the error. | |
16687 | @@ -1084,7 +1068,7 @@ void do_spurious_interrupt_bug(struct pt | |
16688 | ||
16689 | unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) | |
16690 | { | |
16691 | - struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt; | |
16692 | + struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); | |
16693 | unsigned long base = (kesp - uesp) & -THREAD_SIZE; | |
16694 | unsigned long new_kesp = kesp - base; | |
16695 | unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; | |
82094b55 AF |
16696 | --- sle11-2009-10-16.orig/arch/x86/kernel/traps_64-xen.c 2009-03-16 16:38:05.000000000 +0100 |
16697 | +++ sle11-2009-10-16/arch/x86/kernel/traps_64-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
16698 | @@ -10,73 +10,56 @@ |
16699 | * 'Traps.c' handles hardware traps and faults after we have saved some | |
16700 | * state in 'entry.S'. | |
16701 | */ | |
16702 | -#include <linux/sched.h> | |
16703 | +#include <linux/moduleparam.h> | |
16704 | +#include <linux/interrupt.h> | |
16705 | +#include <linux/kallsyms.h> | |
16706 | +#include <linux/spinlock.h> | |
16707 | +#include <linux/kprobes.h> | |
16708 | +#include <linux/uaccess.h> | |
16709 | +#include <linux/utsname.h> | |
16710 | +#include <linux/kdebug.h> | |
16711 | #include <linux/kernel.h> | |
16712 | +#include <linux/module.h> | |
16713 | +#include <linux/ptrace.h> | |
16714 | #include <linux/string.h> | |
16715 | +#include <linux/unwind.h> | |
16716 | +#include <linux/delay.h> | |
16717 | #include <linux/errno.h> | |
16718 | -#include <linux/ptrace.h> | |
16719 | +#include <linux/kexec.h> | |
16720 | +#include <linux/sched.h> | |
16721 | #include <linux/timer.h> | |
16722 | -#include <linux/mm.h> | |
16723 | #include <linux/init.h> | |
16724 | -#include <linux/delay.h> | |
16725 | -#include <linux/spinlock.h> | |
16726 | -#include <linux/interrupt.h> | |
16727 | -#include <linux/kallsyms.h> | |
16728 | -#include <linux/module.h> | |
16729 | -#include <linux/moduleparam.h> | |
16730 | -#include <linux/nmi.h> | |
16731 | -#include <linux/kprobes.h> | |
16732 | -#include <linux/kexec.h> | |
16733 | -#include <linux/unwind.h> | |
16734 | -#include <linux/uaccess.h> | |
16735 | #include <linux/bug.h> | |
16736 | -#include <linux/kdebug.h> | |
16737 | -#include <linux/utsname.h> | |
16738 | - | |
16739 | -#include <mach_traps.h> | |
16740 | +#include <linux/nmi.h> | |
16741 | +#include <linux/mm.h> | |
16742 | ||
16743 | #if defined(CONFIG_EDAC) | |
16744 | #include <linux/edac.h> | |
16745 | #endif | |
16746 | ||
16747 | -#include <asm/system.h> | |
16748 | -#include <asm/io.h> | |
16749 | -#include <asm/atomic.h> | |
16750 | +#include <asm/stacktrace.h> | |
16751 | +#include <asm/processor.h> | |
16752 | #include <asm/debugreg.h> | |
16753 | +#include <asm/atomic.h> | |
16754 | +#include <asm/system.h> | |
16755 | +#include <asm/unwind.h> | |
16756 | #include <asm/desc.h> | |
16757 | #include <asm/i387.h> | |
16758 | -#include <asm/processor.h> | |
16759 | -#include <asm/unwind.h> | |
16760 | +#include <asm/nmi.h> | |
16761 | #include <asm/smp.h> | |
16762 | +#include <asm/io.h> | |
16763 | #include <asm/pgalloc.h> | |
16764 | -#include <asm/pda.h> | |
16765 | #include <asm/proto.h> | |
16766 | -#include <asm/nmi.h> | |
16767 | -#include <asm/stacktrace.h> | |
16768 | +#include <asm/pda.h> | |
16769 | +#include <asm/traps.h> | |
16770 | ||
16771 | -asmlinkage void divide_error(void); | |
16772 | -asmlinkage void debug(void); | |
16773 | -asmlinkage void nmi(void); | |
16774 | -asmlinkage void int3(void); | |
16775 | -asmlinkage void overflow(void); | |
16776 | -asmlinkage void bounds(void); | |
16777 | -asmlinkage void invalid_op(void); | |
16778 | -asmlinkage void device_not_available(void); | |
16779 | -asmlinkage void double_fault(void); | |
16780 | -asmlinkage void coprocessor_segment_overrun(void); | |
16781 | -asmlinkage void invalid_TSS(void); | |
16782 | -asmlinkage void segment_not_present(void); | |
16783 | -asmlinkage void stack_segment(void); | |
16784 | -asmlinkage void general_protection(void); | |
16785 | -asmlinkage void page_fault(void); | |
16786 | -asmlinkage void coprocessor_error(void); | |
16787 | -asmlinkage void simd_coprocessor_error(void); | |
16788 | -asmlinkage void reserved(void); | |
16789 | -asmlinkage void alignment_check(void); | |
16790 | -asmlinkage void machine_check(void); | |
16791 | -asmlinkage void spurious_interrupt_bug(void); | |
16792 | +#include <mach_traps.h> | |
16793 | ||
16794 | +int panic_on_unrecovered_nmi; | |
16795 | +int kstack_depth_to_print = 12; | |
16796 | static unsigned int code_bytes = 64; | |
16797 | +static int ignore_nmis; | |
16798 | +static int die_counter; | |
16799 | ||
16800 | static inline void conditional_sti(struct pt_regs *regs) | |
16801 | { | |
16802 | @@ -100,34 +83,9 @@ static inline void preempt_conditional_c | |
16803 | dec_preempt_count(); | |
16804 | } | |
16805 | ||
16806 | -int kstack_depth_to_print = 12; | |
16807 | - | |
16808 | void printk_address(unsigned long address, int reliable) | |
16809 | { | |
16810 | -#ifdef CONFIG_KALLSYMS | |
16811 | - unsigned long offset = 0, symsize; | |
16812 | - const char *symname; | |
16813 | - char *modname; | |
16814 | - char *delim = ":"; | |
16815 | - char namebuf[KSYM_NAME_LEN]; | |
16816 | - char reliab[4] = ""; | |
16817 | - | |
16818 | - symname = kallsyms_lookup(address, &symsize, &offset, | |
16819 | - &modname, namebuf); | |
16820 | - if (!symname) { | |
16821 | - printk(" [<%016lx>]\n", address); | |
16822 | - return; | |
16823 | - } | |
16824 | - if (!reliable) | |
16825 | - strcpy(reliab, "? "); | |
16826 | - | |
16827 | - if (!modname) | |
16828 | - modname = delim = ""; | |
16829 | - printk(" [<%016lx>] %s%s%s%s%s+0x%lx/0x%lx\n", | |
16830 | - address, reliab, delim, modname, delim, symname, offset, symsize); | |
16831 | -#else | |
16832 | - printk(" [<%016lx>]\n", address); | |
16833 | -#endif | |
16834 | + printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address); | |
16835 | } | |
16836 | ||
16837 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |
16838 | @@ -206,8 +164,6 @@ static unsigned long *in_exception_stack | |
16839 | return NULL; | |
16840 | } | |
16841 | ||
16842 | -#define MSG(txt) ops->warning(data, txt) | |
16843 | - | |
16844 | /* | |
16845 | * x86-64 can have up to three kernel stacks: | |
16846 | * process stack | |
16847 | @@ -234,11 +190,11 @@ struct stack_frame { | |
16848 | unsigned long return_address; | |
16849 | }; | |
16850 | ||
16851 | - | |
16852 | -static inline unsigned long print_context_stack(struct thread_info *tinfo, | |
16853 | - unsigned long *stack, unsigned long bp, | |
16854 | - const struct stacktrace_ops *ops, void *data, | |
16855 | - unsigned long *end) | |
16856 | +static inline unsigned long | |
16857 | +print_context_stack(struct thread_info *tinfo, | |
16858 | + unsigned long *stack, unsigned long bp, | |
16859 | + const struct stacktrace_ops *ops, void *data, | |
16860 | + unsigned long *end) | |
16861 | { | |
16862 | struct stack_frame *frame = (struct stack_frame *)bp; | |
16863 | ||
16864 | @@ -260,7 +216,7 @@ static inline unsigned long print_contex | |
16865 | return bp; | |
16866 | } | |
16867 | ||
16868 | -void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | |
16869 | +void dump_trace(struct task_struct *task, struct pt_regs *regs, | |
16870 | unsigned long *stack, unsigned long bp, | |
16871 | const struct stacktrace_ops *ops, void *data) | |
16872 | { | |
16873 | @@ -269,36 +225,34 @@ void dump_trace(struct task_struct *tsk, | |
16874 | unsigned used = 0; | |
16875 | struct thread_info *tinfo; | |
16876 | ||
16877 | - if (!tsk) | |
16878 | - tsk = current; | |
16879 | - tinfo = task_thread_info(tsk); | |
16880 | + if (!task) | |
16881 | + task = current; | |
16882 | ||
16883 | if (!stack) { | |
16884 | unsigned long dummy; | |
16885 | stack = &dummy; | |
16886 | - if (tsk && tsk != current) | |
16887 | - stack = (unsigned long *)tsk->thread.sp; | |
16888 | + if (task && task != current) | |
16889 | + stack = (unsigned long *)task->thread.sp; | |
16890 | } | |
16891 | ||
16892 | #ifdef CONFIG_FRAME_POINTER | |
16893 | if (!bp) { | |
16894 | - if (tsk == current) { | |
16895 | + if (task == current) { | |
16896 | /* Grab bp right from our regs */ | |
16897 | - asm("movq %%rbp, %0" : "=r" (bp):); | |
16898 | + asm("movq %%rbp, %0" : "=r" (bp) :); | |
16899 | } else { | |
16900 | /* bp is the last reg pushed by switch_to */ | |
16901 | - bp = *(unsigned long *) tsk->thread.sp; | |
16902 | + bp = *(unsigned long *) task->thread.sp; | |
16903 | } | |
16904 | } | |
16905 | #endif | |
16906 | ||
16907 | - | |
16908 | - | |
16909 | /* | |
16910 | * Print function call entries in all stacks, starting at the | |
16911 | * current stack address. If the stacks consist of nested | |
16912 | * exceptions | |
16913 | */ | |
16914 | + tinfo = task_thread_info(task); | |
16915 | for (;;) { | |
16916 | char *id; | |
16917 | unsigned long *estack_end; | |
16918 | @@ -383,18 +337,24 @@ static const struct stacktrace_ops print | |
16919 | .address = print_trace_address, | |
16920 | }; | |
16921 | ||
16922 | -void | |
16923 | -show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack, | |
16924 | - unsigned long bp) | |
16925 | +static void | |
16926 | +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | |
16927 | + unsigned long *stack, unsigned long bp, char *log_lvl) | |
16928 | { | |
16929 | printk("\nCall Trace:\n"); | |
16930 | - dump_trace(tsk, regs, stack, bp, &print_trace_ops, NULL); | |
16931 | + dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | |
16932 | printk("\n"); | |
16933 | } | |
16934 | ||
16935 | +void show_trace(struct task_struct *task, struct pt_regs *regs, | |
16936 | + unsigned long *stack, unsigned long bp) | |
16937 | +{ | |
16938 | + show_trace_log_lvl(task, regs, stack, bp, ""); | |
16939 | +} | |
16940 | + | |
16941 | static void | |
16942 | -_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp, | |
16943 | - unsigned long bp) | |
16944 | +show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |
16945 | + unsigned long *sp, unsigned long bp, char *log_lvl) | |
16946 | { | |
16947 | unsigned long *stack; | |
16948 | int i; | |
16949 | @@ -406,14 +366,14 @@ _show_stack(struct task_struct *tsk, str | |
16950 | // back trace for this cpu. | |
16951 | ||
16952 | if (sp == NULL) { | |
16953 | - if (tsk) | |
16954 | - sp = (unsigned long *)tsk->thread.sp; | |
16955 | + if (task) | |
16956 | + sp = (unsigned long *)task->thread.sp; | |
16957 | else | |
16958 | sp = (unsigned long *)&sp; | |
16959 | } | |
16960 | ||
16961 | stack = sp; | |
16962 | - for(i=0; i < kstack_depth_to_print; i++) { | |
16963 | + for (i = 0; i < kstack_depth_to_print; i++) { | |
16964 | if (stack >= irqstack && stack <= irqstack_end) { | |
16965 | if (stack == irqstack_end) { | |
16966 | stack = (unsigned long *) (irqstack_end[-1]); | |
16967 | @@ -428,12 +388,12 @@ _show_stack(struct task_struct *tsk, str | |
16968 | printk(" %016lx", *stack++); | |
16969 | touch_nmi_watchdog(); | |
16970 | } | |
16971 | - show_trace(tsk, regs, sp, bp); | |
16972 | + show_trace_log_lvl(task, regs, sp, bp, log_lvl); | |
16973 | } | |
16974 | ||
16975 | -void show_stack(struct task_struct *tsk, unsigned long * sp) | |
16976 | +void show_stack(struct task_struct *task, unsigned long *sp) | |
16977 | { | |
16978 | - _show_stack(tsk, NULL, sp, 0); | |
16979 | + show_stack_log_lvl(task, NULL, sp, 0, ""); | |
16980 | } | |
16981 | ||
16982 | /* | |
16983 | @@ -441,8 +401,8 @@ void show_stack(struct task_struct *tsk, | |
16984 | */ | |
16985 | void dump_stack(void) | |
16986 | { | |
16987 | - unsigned long dummy; | |
16988 | unsigned long bp = 0; | |
16989 | + unsigned long stack; | |
16990 | ||
16991 | #ifdef CONFIG_FRAME_POINTER | |
16992 | if (!bp) | |
16993 | @@ -454,7 +414,7 @@ void dump_stack(void) | |
16994 | init_utsname()->release, | |
16995 | (int)strcspn(init_utsname()->version, " "), | |
16996 | init_utsname()->version); | |
16997 | - show_trace(NULL, NULL, &dummy, bp); | |
16998 | + show_trace(NULL, NULL, &stack, bp); | |
16999 | } | |
17000 | ||
17001 | EXPORT_SYMBOL(dump_stack); | |
17002 | @@ -465,12 +425,8 @@ void show_registers(struct pt_regs *regs | |
17003 | unsigned long sp; | |
17004 | const int cpu = smp_processor_id(); | |
17005 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; | |
17006 | - u8 *ip; | |
17007 | - unsigned int code_prologue = code_bytes * 43 / 64; | |
17008 | - unsigned int code_len = code_bytes; | |
17009 | ||
17010 | sp = regs->sp; | |
17011 | - ip = (u8 *) regs->ip - code_prologue; | |
17012 | printk("CPU %d ", cpu); | |
17013 | __show_regs(regs); | |
17014 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", | |
17015 | @@ -481,15 +437,22 @@ void show_registers(struct pt_regs *regs | |
17016 | * time of the fault.. | |
17017 | */ | |
17018 | if (!user_mode(regs)) { | |
17019 | + unsigned int code_prologue = code_bytes * 43 / 64; | |
17020 | + unsigned int code_len = code_bytes; | |
17021 | unsigned char c; | |
17022 | + u8 *ip; | |
17023 | + | |
17024 | printk("Stack: "); | |
17025 | - _show_stack(NULL, regs, (unsigned long *)sp, regs->bp); | |
17026 | + show_stack_log_lvl(NULL, regs, (unsigned long *)sp, | |
17027 | + regs->bp, ""); | |
17028 | printk("\n"); | |
17029 | ||
17030 | printk(KERN_EMERG "Code: "); | |
17031 | + | |
17032 | + ip = (u8 *)regs->ip - code_prologue; | |
17033 | if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { | |
17034 | /* try starting at RIP */ | |
17035 | - ip = (u8 *) regs->ip; | |
17036 | + ip = (u8 *)regs->ip; | |
17037 | code_len = code_len - code_prologue + 1; | |
17038 | } | |
17039 | for (i = 0; i < code_len; i++, ip++) { | |
17040 | @@ -505,7 +468,7 @@ void show_registers(struct pt_regs *regs | |
17041 | } | |
17042 | } | |
17043 | printk("\n"); | |
17044 | -} | |
17045 | +} | |
17046 | ||
17047 | int is_valid_bugaddr(unsigned long ip) | |
17048 | { | |
17049 | @@ -545,7 +508,7 @@ unsigned __kprobes long oops_begin(void) | |
17050 | } | |
17051 | ||
17052 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | |
17053 | -{ | |
17054 | +{ | |
17055 | die_owner = -1; | |
17056 | bust_spinlocks(0); | |
17057 | die_nest_count--; | |
17058 | @@ -563,10 +526,9 @@ void __kprobes oops_end(unsigned long fl | |
17059 | do_exit(signr); | |
17060 | } | |
17061 | ||
17062 | -int __kprobes __die(const char * str, struct pt_regs * regs, long err) | |
17063 | +int __kprobes __die(const char *str, struct pt_regs *regs, long err) | |
17064 | { | |
17065 | - static int die_counter; | |
17066 | - printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter); | |
17067 | + printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff, ++die_counter); | |
17068 | #ifdef CONFIG_PREEMPT | |
17069 | printk("PREEMPT "); | |
17070 | #endif | |
17071 | @@ -577,8 +539,10 @@ int __kprobes __die(const char * str, st | |
17072 | printk("DEBUG_PAGEALLOC"); | |
17073 | #endif | |
17074 | printk("\n"); | |
17075 | - if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | |
17076 | + if (notify_die(DIE_OOPS, str, regs, err, | |
17077 | + current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | |
17078 | return 1; | |
17079 | + | |
17080 | show_registers(regs); | |
17081 | add_taint(TAINT_DIE); | |
17082 | /* Executive summary in case the oops scrolled away */ | |
17083 | @@ -590,7 +554,7 @@ int __kprobes __die(const char * str, st | |
17084 | return 0; | |
17085 | } | |
17086 | ||
17087 | -void die(const char * str, struct pt_regs * regs, long err) | |
17088 | +void die(const char *str, struct pt_regs *regs, long err) | |
17089 | { | |
17090 | unsigned long flags = oops_begin(); | |
17091 | ||
17092 | @@ -608,8 +572,7 @@ die_nmi(char *str, struct pt_regs *regs, | |
17093 | { | |
17094 | unsigned long flags; | |
17095 | ||
17096 | - if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == | |
17097 | - NOTIFY_STOP) | |
17098 | + if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | |
17099 | return; | |
17100 | ||
17101 | flags = oops_begin(); | |
17102 | @@ -617,7 +580,9 @@ die_nmi(char *str, struct pt_regs *regs, | |
17103 | * We are in trouble anyway, lets at least try | |
17104 | * to get a message out. | |
17105 | */ | |
17106 | - printk(str, smp_processor_id()); | |
17107 | + printk(KERN_EMERG "%s", str); | |
17108 | + printk(" on CPU%d, ip %08lx, registers:\n", | |
17109 | + smp_processor_id(), regs->ip); | |
17110 | show_registers(regs); | |
17111 | if (kexec_should_crash(current)) | |
17112 | crash_kexec(regs); | |
17113 | @@ -630,44 +595,44 @@ die_nmi(char *str, struct pt_regs *regs, | |
17114 | } | |
17115 | #endif | |
17116 | ||
17117 | -static void __kprobes do_trap(int trapnr, int signr, char *str, | |
17118 | - struct pt_regs * regs, long error_code, | |
17119 | - siginfo_t *info) | |
17120 | +static void __kprobes | |
17121 | +do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, | |
17122 | + long error_code, siginfo_t *info) | |
17123 | { | |
17124 | struct task_struct *tsk = current; | |
17125 | ||
17126 | - if (user_mode(regs)) { | |
17127 | - /* | |
17128 | - * We want error_code and trap_no set for userspace | |
17129 | - * faults and kernelspace faults which result in | |
17130 | - * die(), but not kernelspace faults which are fixed | |
17131 | - * up. die() gives the process no chance to handle | |
17132 | - * the signal and notice the kernel fault information, | |
17133 | - * so that won't result in polluting the information | |
17134 | - * about previously queued, but not yet delivered, | |
17135 | - * faults. See also do_general_protection below. | |
17136 | - */ | |
17137 | - tsk->thread.error_code = error_code; | |
17138 | - tsk->thread.trap_no = trapnr; | |
17139 | + if (!user_mode(regs)) | |
17140 | + goto kernel_trap; | |
17141 | ||
17142 | - if (show_unhandled_signals && unhandled_signal(tsk, signr) && | |
17143 | - printk_ratelimit()) { | |
17144 | - printk(KERN_INFO | |
17145 | - "%s[%d] trap %s ip:%lx sp:%lx error:%lx", | |
17146 | - tsk->comm, tsk->pid, str, | |
17147 | - regs->ip, regs->sp, error_code); | |
17148 | - print_vma_addr(" in ", regs->ip); | |
17149 | - printk("\n"); | |
17150 | - } | |
17151 | + /* | |
17152 | + * We want error_code and trap_no set for userspace faults and | |
17153 | + * kernelspace faults which result in die(), but not | |
17154 | + * kernelspace faults which are fixed up. die() gives the | |
17155 | + * process no chance to handle the signal and notice the | |
17156 | + * kernel fault information, so that won't result in polluting | |
17157 | + * the information about previously queued, but not yet | |
17158 | + * delivered, faults. See also do_general_protection below. | |
17159 | + */ | |
17160 | + tsk->thread.error_code = error_code; | |
17161 | + tsk->thread.trap_no = trapnr; | |
17162 | ||
17163 | - if (info) | |
17164 | - force_sig_info(signr, info, tsk); | |
17165 | - else | |
17166 | - force_sig(signr, tsk); | |
17167 | - return; | |
17168 | + if (show_unhandled_signals && unhandled_signal(tsk, signr) && | |
17169 | + printk_ratelimit()) { | |
17170 | + printk(KERN_INFO | |
17171 | + "%s[%d] trap %s ip:%lx sp:%lx error:%lx", | |
17172 | + tsk->comm, tsk->pid, str, | |
17173 | + regs->ip, regs->sp, error_code); | |
17174 | + print_vma_addr(" in ", regs->ip); | |
17175 | + printk("\n"); | |
17176 | } | |
17177 | ||
17178 | + if (info) | |
17179 | + force_sig_info(signr, info, tsk); | |
17180 | + else | |
17181 | + force_sig(signr, tsk); | |
17182 | + return; | |
17183 | ||
17184 | +kernel_trap: | |
17185 | if (!fixup_exception(regs)) { | |
17186 | tsk->thread.error_code = error_code; | |
17187 | tsk->thread.trap_no = trapnr; | |
17188 | @@ -677,41 +642,39 @@ static void __kprobes do_trap(int trapnr | |
17189 | } | |
17190 | ||
17191 | #define DO_ERROR(trapnr, signr, str, name) \ | |
17192 | -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ | |
17193 | -{ \ | |
17194 | - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | |
17195 | - == NOTIFY_STOP) \ | |
17196 | - return; \ | |
17197 | +asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ | |
17198 | +{ \ | |
17199 | + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | |
17200 | + == NOTIFY_STOP) \ | |
17201 | + return; \ | |
17202 | conditional_sti(regs); \ | |
17203 | - do_trap(trapnr, signr, str, regs, error_code, NULL); \ | |
17204 | + do_trap(trapnr, signr, str, regs, error_code, NULL); \ | |
17205 | } | |
17206 | ||
17207 | -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | |
17208 | -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ | |
17209 | -{ \ | |
17210 | - siginfo_t info; \ | |
17211 | - info.si_signo = signr; \ | |
17212 | - info.si_errno = 0; \ | |
17213 | - info.si_code = sicode; \ | |
17214 | - info.si_addr = (void __user *)siaddr; \ | |
17215 | - trace_hardirqs_fixup(); \ | |
17216 | - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | |
17217 | - == NOTIFY_STOP) \ | |
17218 | - return; \ | |
17219 | +#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | |
17220 | +asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ | |
17221 | +{ \ | |
17222 | + siginfo_t info; \ | |
17223 | + info.si_signo = signr; \ | |
17224 | + info.si_errno = 0; \ | |
17225 | + info.si_code = sicode; \ | |
17226 | + info.si_addr = (void __user *)siaddr; \ | |
17227 | + trace_hardirqs_fixup(); \ | |
17228 | + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | |
17229 | + == NOTIFY_STOP) \ | |
17230 | + return; \ | |
17231 | conditional_sti(regs); \ | |
17232 | - do_trap(trapnr, signr, str, regs, error_code, &info); \ | |
17233 | + do_trap(trapnr, signr, str, regs, error_code, &info); \ | |
17234 | } | |
17235 | ||
17236 | -DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) | |
17237 | -DO_ERROR( 4, SIGSEGV, "overflow", overflow) | |
17238 | -DO_ERROR( 5, SIGSEGV, "bounds", bounds) | |
17239 | -DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) | |
17240 | -DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) | |
17241 | -DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | |
17242 | +DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) | |
17243 | +DO_ERROR(4, SIGSEGV, "overflow", overflow) | |
17244 | +DO_ERROR(5, SIGSEGV, "bounds", bounds) | |
17245 | +DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) | |
17246 | +DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | |
17247 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) | |
17248 | -DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) | |
17249 | +DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) | |
17250 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) | |
17251 | -DO_ERROR(18, SIGSEGV, "reserved", reserved) | |
17252 | ||
17253 | /* Runs on IST stack */ | |
17254 | asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) | |
17255 | @@ -741,31 +704,34 @@ asmlinkage void do_double_fault(struct p | |
17256 | die(str, regs, error_code); | |
17257 | } | |
17258 | ||
17259 | -asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, | |
17260 | - long error_code) | |
17261 | +asmlinkage void __kprobes | |
17262 | +do_general_protection(struct pt_regs *regs, long error_code) | |
17263 | { | |
17264 | - struct task_struct *tsk = current; | |
17265 | + struct task_struct *tsk; | |
17266 | ||
17267 | conditional_sti(regs); | |
17268 | ||
17269 | - if (user_mode(regs)) { | |
17270 | - tsk->thread.error_code = error_code; | |
17271 | - tsk->thread.trap_no = 13; | |
17272 | + tsk = current; | |
17273 | + if (!user_mode(regs)) | |
17274 | + goto gp_in_kernel; | |
17275 | ||
17276 | - if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && | |
17277 | - printk_ratelimit()) { | |
17278 | - printk(KERN_INFO | |
17279 | - "%s[%d] general protection ip:%lx sp:%lx error:%lx", | |
17280 | - tsk->comm, tsk->pid, | |
17281 | - regs->ip, regs->sp, error_code); | |
17282 | - print_vma_addr(" in ", regs->ip); | |
17283 | - printk("\n"); | |
17284 | - } | |
17285 | + tsk->thread.error_code = error_code; | |
17286 | + tsk->thread.trap_no = 13; | |
17287 | ||
17288 | - force_sig(SIGSEGV, tsk); | |
17289 | - return; | |
17290 | - } | |
17291 | + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && | |
17292 | + printk_ratelimit()) { | |
17293 | + printk(KERN_INFO | |
17294 | + "%s[%d] general protection ip:%lx sp:%lx error:%lx", | |
17295 | + tsk->comm, tsk->pid, | |
17296 | + regs->ip, regs->sp, error_code); | |
17297 | + print_vma_addr(" in ", regs->ip); | |
17298 | + printk("\n"); | |
17299 | + } | |
17300 | ||
17301 | + force_sig(SIGSEGV, tsk); | |
17302 | + return; | |
17303 | + | |
17304 | +gp_in_kernel: | |
17305 | if (fixup_exception(regs)) | |
17306 | return; | |
17307 | ||
17308 | @@ -778,14 +744,14 @@ asmlinkage void __kprobes do_general_pro | |
17309 | } | |
17310 | ||
17311 | static notrace __kprobes void | |
17312 | -mem_parity_error(unsigned char reason, struct pt_regs * regs) | |
17313 | +mem_parity_error(unsigned char reason, struct pt_regs *regs) | |
17314 | { | |
17315 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", | |
17316 | reason); | |
17317 | printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); | |
17318 | ||
17319 | #if defined(CONFIG_EDAC) | |
17320 | - if(edac_handler_set()) { | |
17321 | + if (edac_handler_set()) { | |
17322 | edac_atomic_assert_error(); | |
17323 | return; | |
17324 | } | |
17325 | @@ -801,7 +767,7 @@ mem_parity_error(unsigned char reason, s | |
17326 | } | |
17327 | ||
17328 | static notrace __kprobes void | |
17329 | -io_check_error(unsigned char reason, struct pt_regs * regs) | |
17330 | +io_check_error(unsigned char reason, struct pt_regs *regs) | |
17331 | { | |
17332 | printk("NMI: IOCK error (debug interrupt?)\n"); | |
17333 | show_registers(regs); | |
17334 | @@ -827,14 +793,14 @@ unknown_nmi_error(unsigned char reason, | |
17335 | ||
17336 | /* Runs on IST stack. This code must keep interrupts off all the time. | |
17337 | Nested NMIs are prevented by the CPU. */ | |
17338 | -asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |
17339 | +asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |
17340 | { | |
17341 | unsigned char reason = 0; | |
17342 | int cpu; | |
17343 | ||
17344 | cpu = smp_processor_id(); | |
17345 | ||
17346 | - /* Only the BSP gets external NMIs from the system. */ | |
17347 | + /* Only the BSP gets external NMIs from the system. */ | |
17348 | if (!cpu) | |
17349 | reason = get_nmi_reason(); | |
17350 | ||
17351 | @@ -847,33 +813,58 @@ asmlinkage notrace __kprobes void defau | |
17352 | * Ok, so this is none of the documented NMI sources, | |
17353 | * so it must be the NMI watchdog. | |
17354 | */ | |
17355 | - if (nmi_watchdog_tick(regs,reason)) | |
17356 | + if (nmi_watchdog_tick(regs, reason)) | |
17357 | return; | |
17358 | #endif | |
17359 | - if (!do_nmi_callback(regs,cpu)) | |
17360 | + if (!do_nmi_callback(regs, cpu)) | |
17361 | unknown_nmi_error(reason, regs); | |
17362 | ||
17363 | return; | |
17364 | } | |
17365 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) | |
17366 | - return; | |
17367 | + return; | |
17368 | ||
17369 | /* AK: following checks seem to be broken on modern chipsets. FIXME */ | |
17370 | - | |
17371 | if (reason & 0x80) | |
17372 | mem_parity_error(reason, regs); | |
17373 | if (reason & 0x40) | |
17374 | io_check_error(reason, regs); | |
17375 | } | |
17376 | ||
17377 | +asmlinkage notrace __kprobes void | |
17378 | +do_nmi(struct pt_regs *regs, long error_code) | |
17379 | +{ | |
17380 | + nmi_enter(); | |
17381 | + | |
17382 | + add_pda(__nmi_count, 1); | |
17383 | + | |
17384 | + if (!ignore_nmis) | |
17385 | + default_do_nmi(regs); | |
17386 | + | |
17387 | + nmi_exit(); | |
17388 | +} | |
17389 | + | |
17390 | +void stop_nmi(void) | |
17391 | +{ | |
17392 | + acpi_nmi_disable(); | |
17393 | + ignore_nmis++; | |
17394 | +} | |
17395 | + | |
17396 | +void restart_nmi(void) | |
17397 | +{ | |
17398 | + ignore_nmis--; | |
17399 | + acpi_nmi_enable(); | |
17400 | +} | |
17401 | + | |
17402 | /* runs on IST stack. */ | |
17403 | asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) | |
17404 | { | |
17405 | trace_hardirqs_fixup(); | |
17406 | ||
17407 | - if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { | |
17408 | + if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) | |
17409 | + == NOTIFY_STOP) | |
17410 | return; | |
17411 | - } | |
17412 | + | |
17413 | preempt_conditional_sti(regs); | |
17414 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); | |
17415 | preempt_conditional_cli(regs); | |
17416 | @@ -904,8 +895,8 @@ asmlinkage __kprobes struct pt_regs *syn | |
17417 | asmlinkage void __kprobes do_debug(struct pt_regs * regs, | |
17418 | unsigned long error_code) | |
17419 | { | |
17420 | - unsigned long condition; | |
17421 | struct task_struct *tsk = current; | |
17422 | + unsigned long condition; | |
17423 | siginfo_t info; | |
17424 | ||
17425 | trace_hardirqs_fixup(); | |
17426 | @@ -926,21 +917,19 @@ asmlinkage void __kprobes do_debug(struc | |
17427 | ||
17428 | /* Mask out spurious debug traps due to lazy DR7 setting */ | |
17429 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { | |
17430 | - if (!tsk->thread.debugreg7) { | |
17431 | + if (!tsk->thread.debugreg7) | |
17432 | goto clear_dr7; | |
17433 | - } | |
17434 | } | |
17435 | ||
17436 | tsk->thread.debugreg6 = condition; | |
17437 | ||
17438 | - | |
17439 | /* | |
17440 | * Single-stepping through TF: make sure we ignore any events in | |
17441 | * kernel space (but re-enable TF when returning to user mode). | |
17442 | */ | |
17443 | if (condition & DR_STEP) { | |
17444 | - if (!user_mode(regs)) | |
17445 | - goto clear_TF_reenable; | |
17446 | + if (!user_mode(regs)) | |
17447 | + goto clear_TF_reenable; | |
17448 | } | |
17449 | ||
17450 | /* Ok, finally something we can handle */ | |
17451 | @@ -953,7 +942,7 @@ asmlinkage void __kprobes do_debug(struc | |
17452 | force_sig_info(SIGTRAP, &info, tsk); | |
17453 | ||
17454 | clear_dr7: | |
17455 | - set_debugreg(0UL, 7); | |
17456 | + set_debugreg(0, 7); | |
17457 | preempt_conditional_cli(regs); | |
17458 | return; | |
17459 | ||
17460 | @@ -961,6 +950,7 @@ clear_TF_reenable: | |
17461 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | |
17462 | regs->flags &= ~X86_EFLAGS_TF; | |
17463 | preempt_conditional_cli(regs); | |
17464 | + return; | |
17465 | } | |
17466 | ||
17467 | static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) | |
17468 | @@ -983,7 +973,7 @@ static int kernel_math_error(struct pt_r | |
17469 | asmlinkage void do_coprocessor_error(struct pt_regs *regs) | |
17470 | { | |
17471 | void __user *ip = (void __user *)(regs->ip); | |
17472 | - struct task_struct * task; | |
17473 | + struct task_struct *task; | |
17474 | siginfo_t info; | |
17475 | unsigned short cwd, swd; | |
17476 | ||
17477 | @@ -1016,30 +1006,30 @@ asmlinkage void do_coprocessor_error(str | |
17478 | cwd = get_fpu_cwd(task); | |
17479 | swd = get_fpu_swd(task); | |
17480 | switch (swd & ~cwd & 0x3f) { | |
17481 | - case 0x000: | |
17482 | - default: | |
17483 | - break; | |
17484 | - case 0x001: /* Invalid Op */ | |
17485 | - /* | |
17486 | - * swd & 0x240 == 0x040: Stack Underflow | |
17487 | - * swd & 0x240 == 0x240: Stack Overflow | |
17488 | - * User must clear the SF bit (0x40) if set | |
17489 | - */ | |
17490 | - info.si_code = FPE_FLTINV; | |
17491 | - break; | |
17492 | - case 0x002: /* Denormalize */ | |
17493 | - case 0x010: /* Underflow */ | |
17494 | - info.si_code = FPE_FLTUND; | |
17495 | - break; | |
17496 | - case 0x004: /* Zero Divide */ | |
17497 | - info.si_code = FPE_FLTDIV; | |
17498 | - break; | |
17499 | - case 0x008: /* Overflow */ | |
17500 | - info.si_code = FPE_FLTOVF; | |
17501 | - break; | |
17502 | - case 0x020: /* Precision */ | |
17503 | - info.si_code = FPE_FLTRES; | |
17504 | - break; | |
17505 | + case 0x000: /* No unmasked exception */ | |
17506 | + default: /* Multiple exceptions */ | |
17507 | + break; | |
17508 | + case 0x001: /* Invalid Op */ | |
17509 | + /* | |
17510 | + * swd & 0x240 == 0x040: Stack Underflow | |
17511 | + * swd & 0x240 == 0x240: Stack Overflow | |
17512 | + * User must clear the SF bit (0x40) if set | |
17513 | + */ | |
17514 | + info.si_code = FPE_FLTINV; | |
17515 | + break; | |
17516 | + case 0x002: /* Denormalize */ | |
17517 | + case 0x010: /* Underflow */ | |
17518 | + info.si_code = FPE_FLTUND; | |
17519 | + break; | |
17520 | + case 0x004: /* Zero Divide */ | |
17521 | + info.si_code = FPE_FLTDIV; | |
17522 | + break; | |
17523 | + case 0x008: /* Overflow */ | |
17524 | + info.si_code = FPE_FLTOVF; | |
17525 | + break; | |
17526 | + case 0x020: /* Precision */ | |
17527 | + info.si_code = FPE_FLTRES; | |
17528 | + break; | |
17529 | } | |
17530 | force_sig_info(SIGFPE, &info, task); | |
17531 | } | |
17532 | @@ -1052,7 +1042,7 @@ asmlinkage void bad_intr(void) | |
17533 | asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) | |
17534 | { | |
17535 | void __user *ip = (void __user *)(regs->ip); | |
17536 | - struct task_struct * task; | |
17537 | + struct task_struct *task; | |
17538 | siginfo_t info; | |
17539 | unsigned short mxcsr; | |
17540 | ||
17541 | @@ -1080,25 +1070,25 @@ asmlinkage void do_simd_coprocessor_erro | |
17542 | */ | |
17543 | mxcsr = get_fpu_mxcsr(task); | |
17544 | switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { | |
17545 | - case 0x000: | |
17546 | - default: | |
17547 | - break; | |
17548 | - case 0x001: /* Invalid Op */ | |
17549 | - info.si_code = FPE_FLTINV; | |
17550 | - break; | |
17551 | - case 0x002: /* Denormalize */ | |
17552 | - case 0x010: /* Underflow */ | |
17553 | - info.si_code = FPE_FLTUND; | |
17554 | - break; | |
17555 | - case 0x004: /* Zero Divide */ | |
17556 | - info.si_code = FPE_FLTDIV; | |
17557 | - break; | |
17558 | - case 0x008: /* Overflow */ | |
17559 | - info.si_code = FPE_FLTOVF; | |
17560 | - break; | |
17561 | - case 0x020: /* Precision */ | |
17562 | - info.si_code = FPE_FLTRES; | |
17563 | - break; | |
17564 | + case 0x000: | |
17565 | + default: | |
17566 | + break; | |
17567 | + case 0x001: /* Invalid Op */ | |
17568 | + info.si_code = FPE_FLTINV; | |
17569 | + break; | |
17570 | + case 0x002: /* Denormalize */ | |
17571 | + case 0x010: /* Underflow */ | |
17572 | + info.si_code = FPE_FLTUND; | |
17573 | + break; | |
17574 | + case 0x004: /* Zero Divide */ | |
17575 | + info.si_code = FPE_FLTDIV; | |
17576 | + break; | |
17577 | + case 0x008: /* Overflow */ | |
17578 | + info.si_code = FPE_FLTOVF; | |
17579 | + break; | |
17580 | + case 0x020: /* Precision */ | |
17581 | + info.si_code = FPE_FLTRES; | |
17582 | + break; | |
17583 | } | |
17584 | force_sig_info(SIGFPE, &info, task); | |
17585 | } | |
17586 | @@ -1118,7 +1108,7 @@ asmlinkage void __attribute__((weak)) mc | |
17587 | } | |
17588 | ||
17589 | /* | |
17590 | - * 'math_state_restore()' saves the current math information in the | |
17591 | + * 'math_state_restore()' saves the current math information in the | |
17592 | * old math state array, and gets the new ones from the current task | |
17593 | * | |
17594 | * Careful.. There are problems with IBM-designed IRQ13 behaviour. | |
17595 | @@ -1145,7 +1135,14 @@ asmlinkage void math_state_restore(void) | |
17596 | ||
17597 | /* clts(); */ /* 'clts' is done for us by Xen during virtual trap. */ | |
17598 | ||
17599 | - restore_fpu_checking(&me->thread.xstate->fxsave); | |
17600 | + /* | |
17601 | + * Paranoid restore. send a SIGSEGV if we fail to restore the state. | |
17602 | + */ | |
17603 | + if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) { | |
17604 | + stts(); | |
17605 | + force_sig(SIGSEGV, me); | |
17606 | + return; | |
17607 | + } | |
17608 | task_thread_info(me)->status |= TS_USEDFPU; | |
17609 | me->fpu_counter++; | |
17610 | } | |
17611 | @@ -1190,13 +1187,12 @@ void __init trap_init(void) | |
17612 | ret = HYPERVISOR_set_trap_table(trap_table); | |
17613 | if (ret) | |
17614 | printk("HYPERVISOR_set_trap_table failed: error %d\n", ret); | |
17615 | - | |
17616 | /* | |
17617 | * initialize the per thread extended state: | |
17618 | */ | |
17619 | - init_thread_xstate(); | |
17620 | + init_thread_xstate(); | |
17621 | /* | |
17622 | - * Should be a barrier for any external CPU state. | |
17623 | + * Should be a barrier for any external CPU state: | |
17624 | */ | |
17625 | cpu_init(); | |
17626 | } | |
17627 | @@ -1212,27 +1208,25 @@ void __cpuinit smp_trap_init(trap_info_t | |
17628 | } | |
17629 | } | |
17630 | ||
17631 | - | |
17632 | static int __init oops_setup(char *s) | |
17633 | -{ | |
17634 | +{ | |
17635 | if (!s) | |
17636 | return -EINVAL; | |
17637 | if (!strcmp(s, "panic")) | |
17638 | panic_on_oops = 1; | |
17639 | return 0; | |
17640 | -} | |
17641 | +} | |
17642 | early_param("oops", oops_setup); | |
17643 | ||
17644 | static int __init kstack_setup(char *s) | |
17645 | { | |
17646 | if (!s) | |
17647 | return -EINVAL; | |
17648 | - kstack_depth_to_print = simple_strtoul(s,NULL,0); | |
17649 | + kstack_depth_to_print = simple_strtoul(s, NULL, 0); | |
17650 | return 0; | |
17651 | } | |
17652 | early_param("kstack", kstack_setup); | |
17653 | ||
17654 | - | |
17655 | static int __init code_bytes_setup(char *s) | |
17656 | { | |
17657 | code_bytes = simple_strtoul(s, NULL, 0); | |
82094b55 AF |
17658 | --- sle11-2009-10-16.orig/arch/x86/kernel/vsyscall_64-xen.c 2009-03-16 16:38:05.000000000 +0100 |
17659 | +++ sle11-2009-10-16/arch/x86/kernel/vsyscall_64-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
17660 | @@ -42,7 +42,8 @@ |
17661 | #include <asm/topology.h> | |
17662 | #include <asm/vgtod.h> | |
17663 | ||
17664 | -#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) | |
17665 | +#define __vsyscall(nr) \ | |
17666 | + __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace | |
17667 | #define __syscall_clobber "r11","cx","memory" | |
17668 | ||
17669 | /* | |
17670 | @@ -264,10 +265,7 @@ static void __cpuinit vsyscall_set_cpu(i | |
17671 | d |= cpu; | |
17672 | d |= (node & 0xf) << 12; | |
17673 | d |= (node >> 4) << 48; | |
17674 | - if (HYPERVISOR_update_descriptor(virt_to_machine(get_cpu_gdt_table(cpu) | |
17675 | - + GDT_ENTRY_PER_CPU), | |
17676 | - d)) | |
17677 | - BUG(); | |
17678 | + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); | |
17679 | } | |
17680 | ||
17681 | static void __cpuinit cpu_vsyscall_init(void *arg) | |
17682 | @@ -281,7 +279,7 @@ cpu_vsyscall_notifier(struct notifier_bl | |
17683 | { | |
17684 | long cpu = (long)arg; | |
17685 | if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) | |
17686 | - smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); | |
17687 | + smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1); | |
17688 | return NOTIFY_DONE; | |
17689 | } | |
17690 | ||
17691 | @@ -311,7 +309,7 @@ static int __init vsyscall_init(void) | |
17692 | #ifdef CONFIG_SYSCTL | |
17693 | register_sysctl_table(kernel_root_table2); | |
17694 | #endif | |
17695 | - on_each_cpu(cpu_vsyscall_init, NULL, 0, 1); | |
17696 | + on_each_cpu(cpu_vsyscall_init, NULL, 1); | |
17697 | hotcpu_notifier(cpu_vsyscall_notifier, 0); | |
17698 | return 0; | |
17699 | } | |
82094b55 AF |
17700 | --- sle11-2009-10-16.orig/arch/x86/mach-xen/setup.c 2009-03-16 16:33:40.000000000 +0100 |
17701 | +++ sle11-2009-10-16/arch/x86/mach-xen/setup.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
17702 | @@ -17,6 +17,8 @@ |
17703 | #include <xen/interface/callback.h> | |
17704 | #include <xen/interface/memory.h> | |
17705 | ||
17706 | +#ifdef CONFIG_X86_32 | |
17707 | + | |
17708 | #ifdef CONFIG_HOTPLUG_CPU | |
17709 | #define DEFAULT_SEND_IPI (1) | |
17710 | #else | |
17711 | @@ -44,51 +46,6 @@ static int __init print_ipi_mode(void) | |
17712 | ||
17713 | late_initcall(print_ipi_mode); | |
17714 | ||
17715 | -/** | |
17716 | - * machine_specific_memory_setup - Hook for machine specific memory setup. | |
17717 | - * | |
17718 | - * Description: | |
17719 | - * This is included late in kernel/setup.c so that it can make | |
17720 | - * use of all of the static functions. | |
17721 | - **/ | |
17722 | - | |
17723 | -char * __init machine_specific_memory_setup(void) | |
17724 | -{ | |
17725 | - int rc; | |
17726 | - struct xen_memory_map memmap; | |
17727 | - /* | |
17728 | - * This is rather large for a stack variable but this early in | |
17729 | - * the boot process we know we have plenty slack space. | |
17730 | - */ | |
17731 | - struct e820entry map[E820MAX]; | |
17732 | - | |
17733 | - memmap.nr_entries = E820MAX; | |
17734 | - set_xen_guest_handle(memmap.buffer, map); | |
17735 | - | |
17736 | - rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); | |
17737 | - if ( rc == -ENOSYS ) { | |
17738 | - memmap.nr_entries = 1; | |
17739 | - map[0].addr = 0ULL; | |
17740 | - map[0].size = PFN_PHYS((unsigned long long)xen_start_info->nr_pages); | |
17741 | - /* 8MB slack (to balance backend allocations). */ | |
17742 | - map[0].size += 8ULL << 20; | |
17743 | - map[0].type = E820_RAM; | |
17744 | - rc = 0; | |
17745 | - } | |
17746 | - BUG_ON(rc); | |
17747 | - | |
17748 | - sanitize_e820_map(map, (char *)&memmap.nr_entries); | |
17749 | - | |
17750 | - BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0); | |
17751 | - | |
17752 | - return "Xen"; | |
17753 | -} | |
17754 | - | |
17755 | - | |
17756 | -extern void hypervisor_callback(void); | |
17757 | -extern void failsafe_callback(void); | |
17758 | -extern void nmi(void); | |
17759 | - | |
17760 | unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; | |
17761 | EXPORT_SYMBOL(machine_to_phys_mapping); | |
17762 | unsigned int machine_to_phys_order; | |
17763 | @@ -121,33 +78,66 @@ void __init pre_setup_arch_hook(void) | |
17764 | (unsigned long *)xen_start_info->mfn_list; | |
17765 | } | |
17766 | ||
17767 | +#endif /* CONFIG_X86_32 */ | |
17768 | + | |
17769 | +extern void hypervisor_callback(void); | |
17770 | +extern void failsafe_callback(void); | |
17771 | +extern void nmi(void); | |
17772 | + | |
17773 | +#ifdef CONFIG_X86_64 | |
17774 | +#include <asm/proto.h> | |
17775 | +#define CALLBACK_ADDR(fn) ((unsigned long)(fn)) | |
17776 | +#else | |
17777 | +#define CALLBACK_ADDR(fn) { __KERNEL_CS, (unsigned long)(fn) } | |
17778 | +#endif | |
17779 | + | |
17780 | void __init machine_specific_arch_setup(void) | |
17781 | { | |
17782 | int ret; | |
17783 | static struct callback_register __initdata event = { | |
17784 | .type = CALLBACKTYPE_event, | |
17785 | - .address = { __KERNEL_CS, (unsigned long)hypervisor_callback }, | |
17786 | + .address = CALLBACK_ADDR(hypervisor_callback) | |
17787 | }; | |
17788 | static struct callback_register __initdata failsafe = { | |
17789 | .type = CALLBACKTYPE_failsafe, | |
17790 | - .address = { __KERNEL_CS, (unsigned long)failsafe_callback }, | |
17791 | + .address = CALLBACK_ADDR(failsafe_callback) | |
17792 | + }; | |
17793 | +#ifdef CONFIG_X86_64 | |
17794 | + static struct callback_register __initdata syscall = { | |
17795 | + .type = CALLBACKTYPE_syscall, | |
17796 | + .address = CALLBACK_ADDR(system_call) | |
17797 | }; | |
17798 | +#endif | |
17799 | +#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_32) | |
17800 | static struct callback_register __initdata nmi_cb = { | |
17801 | .type = CALLBACKTYPE_nmi, | |
17802 | - .address = { __KERNEL_CS, (unsigned long)nmi }, | |
17803 | + .address = CALLBACK_ADDR(nmi) | |
17804 | }; | |
17805 | +#endif | |
17806 | ||
17807 | ret = HYPERVISOR_callback_op(CALLBACKOP_register, &event); | |
17808 | if (ret == 0) | |
17809 | ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe); | |
17810 | +#ifdef CONFIG_X86_64 | |
17811 | + if (ret == 0) | |
17812 | + ret = HYPERVISOR_callback_op(CALLBACKOP_register, &syscall); | |
17813 | +#endif | |
17814 | #if CONFIG_XEN_COMPAT <= 0x030002 | |
17815 | +#ifdef CONFIG_X86_32 | |
17816 | if (ret == -ENOSYS) | |
17817 | ret = HYPERVISOR_set_callbacks( | |
17818 | event.address.cs, event.address.eip, | |
17819 | failsafe.address.cs, failsafe.address.eip); | |
17820 | +#else | |
17821 | + ret = HYPERVISOR_set_callbacks( | |
17822 | + event.address, | |
17823 | + failsafe.address, | |
17824 | + syscall.address); | |
17825 | +#endif | |
17826 | #endif | |
17827 | BUG_ON(ret); | |
17828 | ||
17829 | +#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_32) | |
17830 | ret = HYPERVISOR_callback_op(CALLBACKOP_register, &nmi_cb); | |
17831 | #if CONFIG_XEN_COMPAT <= 0x030002 | |
17832 | if (ret == -ENOSYS) { | |
17833 | @@ -158,15 +148,43 @@ void __init machine_specific_arch_setup( | |
17834 | HYPERVISOR_nmi_op(XENNMI_register_callback, &cb); | |
17835 | } | |
17836 | #endif | |
17837 | +#endif | |
17838 | ||
17839 | +#ifdef CONFIG_X86_32 | |
17840 | /* Do an early initialization of the fixmap area */ | |
17841 | { | |
17842 | extern pte_t swapper_pg_fixmap[PTRS_PER_PTE]; | |
17843 | unsigned long addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE); | |
17844 | pud_t *pud = pud_offset(swapper_pg_dir + pgd_index(addr), addr); | |
17845 | pmd_t *pmd = pmd_offset(pud, addr); | |
17846 | + unsigned int i; | |
17847 | ||
17848 | make_lowmem_page_readonly(swapper_pg_fixmap, XENFEAT_writable_page_tables); | |
17849 | set_pmd(pmd, __pmd(__pa_symbol(swapper_pg_fixmap) | _PAGE_TABLE)); | |
17850 | + | |
17851 | +#define __FIXADDR_TOP (-PAGE_SIZE) | |
17852 | +#define FIX_BUG_ON(fix) BUILD_BUG_ON(pmd_index(__fix_to_virt(FIX_##fix)) \ | |
17853 | + != pmd_index(__fix_to_virt(FIX_EARLYCON_MEM_BASE))) | |
17854 | + FIX_BUG_ON(SHARED_INFO); | |
17855 | + FIX_BUG_ON(ISAMAP_BEGIN); | |
17856 | + FIX_BUG_ON(ISAMAP_END); | |
17857 | +#undef __FIXADDR_TOP | |
17858 | + BUG_ON(pte_index(hypervisor_virt_start)); | |
17859 | + | |
17860 | + /* Switch to the real shared_info page, and clear the | |
17861 | + * dummy page. */ | |
17862 | + set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); | |
17863 | + HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); | |
17864 | + memset(empty_zero_page, 0, sizeof(empty_zero_page)); | |
17865 | + | |
17866 | + /* Setup mapping of lower 1st MB */ | |
17867 | + for (i = 0; i < NR_FIX_ISAMAPS; i++) | |
17868 | + if (is_initial_xendomain()) | |
17869 | + set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE); | |
17870 | + else | |
17871 | + __set_fixmap(FIX_ISAMAP_BEGIN - i, | |
17872 | + virt_to_machine(empty_zero_page), | |
17873 | + PAGE_KERNEL_RO); | |
17874 | } | |
17875 | +#endif | |
17876 | } | |
82094b55 AF |
17877 | --- sle11-2009-10-16.orig/arch/x86/mm/fault-xen.c 2009-03-16 16:38:05.000000000 +0100 |
17878 | +++ sle11-2009-10-16/arch/x86/mm/fault-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
17879 | @@ -10,6 +10,7 @@ |
17880 | #include <linux/string.h> | |
17881 | #include <linux/types.h> | |
17882 | #include <linux/ptrace.h> | |
17883 | +#include <linux/mmiotrace.h> | |
17884 | #include <linux/mman.h> | |
17885 | #include <linux/mm.h> | |
17886 | #include <linux/smp.h> | |
17887 | @@ -49,17 +50,23 @@ | |
17888 | #define PF_RSVD (1<<3) | |
17889 | #define PF_INSTR (1<<4) | |
17890 | ||
17891 | +static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) | |
17892 | +{ | |
17893 | +#ifdef CONFIG_MMIOTRACE_HOOKS | |
17894 | + if (unlikely(is_kmmio_active())) | |
17895 | + if (kmmio_handler(regs, addr) == 1) | |
17896 | + return -1; | |
17897 | +#endif | |
17898 | + return 0; | |
17899 | +} | |
17900 | + | |
17901 | static inline int notify_page_fault(struct pt_regs *regs) | |
17902 | { | |
17903 | #ifdef CONFIG_KPROBES | |
17904 | int ret = 0; | |
17905 | ||
17906 | /* kprobe_running() needs smp_processor_id() */ | |
17907 | -#ifdef CONFIG_X86_32 | |
17908 | if (!user_mode_vm(regs)) { | |
17909 | -#else | |
17910 | - if (!user_mode(regs)) { | |
17911 | -#endif | |
17912 | preempt_disable(); | |
17913 | if (kprobe_running() && kprobe_fault_handler(regs, 14)) | |
17914 | ret = 1; | |
17915 | @@ -409,11 +416,7 @@ static void show_fault_oops(struct pt_re | |
17916 | printk(KERN_CONT "NULL pointer dereference"); | |
17917 | else | |
17918 | printk(KERN_CONT "paging request"); | |
17919 | -#ifdef CONFIG_X86_32 | |
17920 | - printk(KERN_CONT " at %08lx\n", address); | |
17921 | -#else | |
17922 | - printk(KERN_CONT " at %016lx\n", address); | |
17923 | -#endif | |
17924 | + printk(KERN_CONT " at %p\n", (void *) address); | |
17925 | printk(KERN_ALERT "IP:"); | |
17926 | printk_address(regs->ip, 1); | |
17927 | dump_pagetable(address); | |
17928 | @@ -628,6 +631,8 @@ void __kprobes do_page_fault(struct pt_r | |
17929 | ||
17930 | if (notify_page_fault(regs)) | |
17931 | return; | |
17932 | + if (unlikely(kmmio_fault(regs, address))) | |
17933 | + return; | |
17934 | ||
17935 | /* | |
17936 | * We fault-in kernel-space virtual memory on-demand. The | |
17937 | @@ -831,14 +836,10 @@ bad_area_nosemaphore: | |
17938 | if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && | |
17939 | printk_ratelimit()) { | |
17940 | printk( | |
17941 | -#ifdef CONFIG_X86_32 | |
17942 | - "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx", | |
17943 | -#else | |
17944 | - "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx", | |
17945 | -#endif | |
17946 | + "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", | |
17947 | task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, | |
17948 | - tsk->comm, task_pid_nr(tsk), address, regs->ip, | |
17949 | - regs->sp, error_code); | |
17950 | + tsk->comm, task_pid_nr(tsk), address, | |
17951 | + (void *) regs->ip, (void *) regs->sp, error_code); | |
17952 | print_vma_addr(" in ", regs->ip); | |
17953 | printk("\n"); | |
17954 | } | |
17955 | @@ -946,81 +947,45 @@ LIST_HEAD(pgd_list); | |
17956 | void vmalloc_sync_all(void) | |
17957 | { | |
17958 | #ifdef CONFIG_X86_32 | |
17959 | - /* | |
17960 | - * Note that races in the updates of insync and start aren't | |
17961 | - * problematic: insync can only get set bits added, and updates to | |
17962 | - * start are only improving performance (without affecting correctness | |
17963 | - * if undone). | |
17964 | - * XEN: To work on PAE, we need to iterate over PMDs rather than PGDs. | |
17965 | - * This change works just fine with 2-level paging too. | |
17966 | - */ | |
17967 | -#define sync_index(a) ((a) >> PMD_SHIFT) | |
17968 | - static DECLARE_BITMAP(insync, PTRS_PER_PGD*PTRS_PER_PMD); | |
17969 | - static unsigned long start = TASK_SIZE; | |
17970 | - unsigned long address; | |
17971 | + unsigned long address = VMALLOC_START & PGDIR_MASK; | |
17972 | ||
17973 | if (SHARED_KERNEL_PMD) | |
17974 | return; | |
17975 | ||
17976 | BUILD_BUG_ON(TASK_SIZE & ~PMD_MASK); | |
17977 | - for (address = start; | |
17978 | - address < hypervisor_virt_start; | |
17979 | - address += PMD_SIZE) { | |
17980 | - if (!test_bit(sync_index(address), insync)) { | |
17981 | - unsigned long flags; | |
17982 | - struct page *page; | |
17983 | - | |
17984 | - spin_lock_irqsave(&pgd_lock, flags); | |
17985 | - /* XEN: failure path assumes non-empty pgd_list. */ | |
17986 | - if (unlikely(list_empty(&pgd_list))) { | |
17987 | - spin_unlock_irqrestore(&pgd_lock, flags); | |
17988 | - return; | |
17989 | - } | |
17990 | - list_for_each_entry(page, &pgd_list, lru) { | |
17991 | - if (!vmalloc_sync_one(page_address(page), | |
17992 | - address)) | |
17993 | - break; | |
17994 | - } | |
17995 | - spin_unlock_irqrestore(&pgd_lock, flags); | |
17996 | - if (!page) | |
17997 | - set_bit(sync_index(address), insync); | |
17998 | + for (; address < hypervisor_virt_start; address += PMD_SIZE) { | |
17999 | + unsigned long flags; | |
18000 | + struct page *page; | |
18001 | + | |
18002 | + spin_lock_irqsave(&pgd_lock, flags); | |
18003 | + list_for_each_entry(page, &pgd_list, lru) { | |
18004 | + if (!vmalloc_sync_one(page_address(page), | |
18005 | + address)) | |
18006 | + break; | |
18007 | } | |
18008 | - if (address == start && test_bit(sync_index(address), insync)) | |
18009 | - start = address + PMD_SIZE; | |
18010 | + spin_unlock_irqrestore(&pgd_lock, flags); | |
18011 | } | |
18012 | #else /* CONFIG_X86_64 */ | |
18013 | - /* | |
18014 | - * Note that races in the updates of insync and start aren't | |
18015 | - * problematic: insync can only get set bits added, and updates to | |
18016 | - * start are only improving performance (without affecting correctness | |
18017 | - * if undone). | |
18018 | - */ | |
18019 | - static DECLARE_BITMAP(insync, PTRS_PER_PGD); | |
18020 | - static unsigned long start = VMALLOC_START & PGDIR_MASK; | |
18021 | + unsigned long start = VMALLOC_START & PGDIR_MASK; | |
18022 | unsigned long address; | |
18023 | ||
18024 | for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { | |
18025 | - if (!test_bit(pgd_index(address), insync)) { | |
18026 | - const pgd_t *pgd_ref = pgd_offset_k(address); | |
18027 | - unsigned long flags; | |
18028 | - struct page *page; | |
18029 | - | |
18030 | - if (pgd_none(*pgd_ref)) | |
18031 | - continue; | |
18032 | - spin_lock_irqsave(&pgd_lock, flags); | |
18033 | - list_for_each_entry(page, &pgd_list, lru) { | |
18034 | - pgd_t *pgd; | |
18035 | - pgd = (pgd_t *)page_address(page) + pgd_index(address); | |
18036 | - if (pgd_none(*pgd)) | |
18037 | - set_pgd(pgd, *pgd_ref); | |
18038 | - else | |
18039 | - BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); | |
18040 | - } | |
18041 | - spin_unlock_irqrestore(&pgd_lock, flags); | |
18042 | - set_bit(pgd_index(address), insync); | |
18043 | + const pgd_t *pgd_ref = pgd_offset_k(address); | |
18044 | + unsigned long flags; | |
18045 | + struct page *page; | |
18046 | + | |
18047 | + if (pgd_none(*pgd_ref)) | |
18048 | + continue; | |
18049 | + spin_lock_irqsave(&pgd_lock, flags); | |
18050 | + list_for_each_entry(page, &pgd_list, lru) { | |
18051 | + pgd_t *pgd; | |
18052 | + pgd = (pgd_t *)page_address(page) + pgd_index(address); | |
18053 | + if (pgd_none(*pgd)) | |
18054 | + set_pgd(pgd, *pgd_ref); | |
18055 | + else | |
18056 | + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); | |
18057 | } | |
18058 | - if (address == start) | |
18059 | - start = address + PGDIR_SIZE; | |
18060 | + spin_unlock_irqrestore(&pgd_lock, flags); | |
18061 | } | |
18062 | #endif | |
18063 | } | |
82094b55 AF |
18064 | --- sle11-2009-10-16.orig/arch/x86/mm/hypervisor.c 2009-05-14 11:18:39.000000000 +0200 |
18065 | +++ sle11-2009-10-16/arch/x86/mm/hypervisor.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
18066 | @@ -709,6 +709,72 @@ void xen_destroy_contiguous_region(unsig |
18067 | } | |
18068 | EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); | |
18069 | ||
18070 | +int __init early_create_contiguous_region(unsigned long pfn, | |
18071 | + unsigned int order, | |
18072 | + unsigned int address_bits) | |
18073 | +{ | |
18074 | + unsigned long *in_frames = discontig_frames, out_frame = pfn; | |
18075 | + unsigned int i; | |
18076 | + int rc, success; | |
18077 | + struct xen_memory_exchange exchange = { | |
18078 | + .in = { | |
18079 | + .nr_extents = 1UL << order, | |
18080 | + .extent_order = 0, | |
18081 | + .domid = DOMID_SELF | |
18082 | + }, | |
18083 | + .out = { | |
18084 | + .nr_extents = 1, | |
18085 | + .extent_order = order, | |
18086 | + .address_bits = address_bits, | |
18087 | + .domid = DOMID_SELF | |
18088 | + } | |
18089 | + }; | |
18090 | + | |
18091 | + if (xen_feature(XENFEAT_auto_translated_physmap)) | |
18092 | + return 0; | |
18093 | + | |
18094 | + if (unlikely(order > MAX_CONTIG_ORDER)) | |
18095 | + return -ENOMEM; | |
18096 | + | |
18097 | + for (i = 0; i < (1U << order); ++i) { | |
18098 | + in_frames[i] = pfn_to_mfn(pfn + i); | |
18099 | + set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY); | |
18100 | + } | |
18101 | + | |
18102 | + set_xen_guest_handle(exchange.in.extent_start, in_frames); | |
18103 | + set_xen_guest_handle(exchange.out.extent_start, &out_frame); | |
18104 | + | |
18105 | + rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange); | |
18106 | + success = (exchange.nr_exchanged == (1UL << order)); | |
18107 | + BUG_ON(!success && (exchange.nr_exchanged || !rc)); | |
18108 | + BUG_ON(success && rc); | |
18109 | +#if CONFIG_XEN_COMPAT <= 0x030002 | |
18110 | + if (unlikely(rc == -ENOSYS)) { | |
18111 | + /* Compatibility when XENMEM_exchange is unavailable. */ | |
18112 | + if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, | |
18113 | + &exchange.in) != (1UL << order)) | |
18114 | + BUG(); | |
18115 | + success = (HYPERVISOR_memory_op(XENMEM_populate_physmap, | |
18116 | + &exchange.out) == 1); | |
18117 | + if (!success) { | |
18118 | + for (i = 0; i < (1U << order); ++i) | |
18119 | + in_frames[i] = pfn + i; | |
18120 | + if (HYPERVISOR_memory_op(XENMEM_populate_physmap, | |
18121 | + &exchange.in) != (1UL << order)) | |
18122 | + BUG(); | |
18123 | + } | |
18124 | + } | |
18125 | +#endif | |
18126 | + | |
18127 | + for (i = 0; i < (1U << order); ++i, ++out_frame) { | |
18128 | + if (!success) | |
18129 | + out_frame = in_frames[i]; | |
18130 | + set_phys_to_machine(pfn + i, out_frame); | |
18131 | + } | |
18132 | + | |
18133 | + return success ? 0 : -ENOMEM; | |
18134 | +} | |
18135 | + | |
18136 | static void undo_limit_pages(struct page *pages, unsigned int order) | |
18137 | { | |
18138 | BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); | |
18139 | @@ -875,42 +941,9 @@ int write_ldt_entry(struct desc_struct * | |
18140 | return HYPERVISOR_update_descriptor(mach_lp, *(const u64*)desc); | |
18141 | } | |
18142 | ||
18143 | -#define MAX_BATCHED_FULL_PTES 32 | |
18144 | - | |
18145 | -int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd, | |
18146 | - unsigned long addr, unsigned long end, pgprot_t newprot, | |
18147 | - int dirty_accountable) | |
18148 | +int write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc, | |
18149 | + int type) | |
18150 | { | |
18151 | - int rc = 0, i = 0; | |
18152 | - mmu_update_t u[MAX_BATCHED_FULL_PTES]; | |
18153 | - pte_t *pte; | |
18154 | - spinlock_t *ptl; | |
18155 | - | |
18156 | - if (!xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) | |
18157 | - return 0; | |
18158 | - | |
18159 | - pte = pte_offset_map_lock(mm, pmd, addr, &ptl); | |
18160 | - do { | |
18161 | - if (pte_present(*pte)) { | |
18162 | - pte_t ptent = pte_modify(*pte, newprot); | |
18163 | - | |
18164 | - if (dirty_accountable && pte_dirty(ptent)) | |
18165 | - ptent = pte_mkwrite(ptent); | |
18166 | - u[i].ptr = (__pmd_val(*pmd) & PHYSICAL_PAGE_MASK) | |
18167 | - | ((unsigned long)pte & ~PAGE_MASK) | |
18168 | - | MMU_PT_UPDATE_PRESERVE_AD; | |
18169 | - u[i].val = __pte_val(ptent); | |
18170 | - if (++i == MAX_BATCHED_FULL_PTES) { | |
18171 | - if ((rc = HYPERVISOR_mmu_update( | |
18172 | - &u[0], i, NULL, DOMID_SELF)) != 0) | |
18173 | - break; | |
18174 | - i = 0; | |
18175 | - } | |
18176 | - } | |
18177 | - } while (pte++, addr += PAGE_SIZE, addr != end); | |
18178 | - if (i) | |
18179 | - rc = HYPERVISOR_mmu_update( &u[0], i, NULL, DOMID_SELF); | |
18180 | - pte_unmap_unlock(pte - 1, ptl); | |
18181 | - BUG_ON(rc && rc != -ENOSYS); | |
18182 | - return !rc; | |
18183 | + maddr_t mach_gp = virt_to_machine(gdt + entry); | |
18184 | + return HYPERVISOR_update_descriptor(mach_gp, *(const u64*)desc); | |
18185 | } | |
82094b55 AF |
18186 | --- sle11-2009-10-16.orig/arch/x86/mm/init_32-xen.c 2009-03-16 16:38:05.000000000 +0100 |
18187 | +++ sle11-2009-10-16/arch/x86/mm/init_32-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
18188 | @@ -54,6 +54,7 @@ |
18189 | ||
18190 | unsigned int __VMALLOC_RESERVE = 128 << 20; | |
18191 | ||
18192 | +unsigned long max_low_pfn_mapped; | |
18193 | unsigned long max_pfn_mapped; | |
18194 | ||
18195 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | |
18196 | @@ -61,6 +62,27 @@ unsigned long highstart_pfn, highend_pfn | |
18197 | ||
18198 | static noinline int do_test_wp_bit(void); | |
18199 | ||
18200 | + | |
18201 | +static unsigned long __initdata table_start; | |
18202 | +static unsigned long __initdata table_end; | |
18203 | +static unsigned long __initdata table_top; | |
18204 | + | |
18205 | +static int __initdata after_init_bootmem; | |
18206 | + | |
18207 | +static __init void *alloc_low_page(unsigned long *phys) | |
18208 | +{ | |
18209 | + unsigned long pfn = table_end++; | |
18210 | + void *adr; | |
18211 | + | |
18212 | + if (pfn >= table_top) | |
18213 | + panic("alloc_low_page: ran out of memory"); | |
18214 | + | |
18215 | + adr = __va(pfn * PAGE_SIZE); | |
18216 | + memset(adr, 0, PAGE_SIZE); | |
18217 | + *phys = pfn * PAGE_SIZE; | |
18218 | + return adr; | |
18219 | +} | |
18220 | + | |
18221 | /* | |
18222 | * Creates a middle page table and puts a pointer to it in the | |
18223 | * given global directory entry. This only returns the gd entry | |
18224 | @@ -72,9 +94,12 @@ static pmd_t * __init one_md_table_init( | |
18225 | pmd_t *pmd_table; | |
18226 | ||
18227 | #ifdef CONFIG_X86_PAE | |
18228 | + unsigned long phys; | |
18229 | if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) { | |
18230 | - pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); | |
18231 | - | |
18232 | + if (after_init_bootmem) | |
18233 | + pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); | |
18234 | + else | |
18235 | + pmd_table = (pmd_t *)alloc_low_page(&phys); | |
18236 | paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); | |
18237 | make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables); | |
18238 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | |
18239 | @@ -101,12 +126,16 @@ static pte_t * __init one_page_table_ini | |
18240 | #endif | |
18241 | pte_t *page_table = NULL; | |
18242 | ||
18243 | + if (after_init_bootmem) { | |
18244 | #ifdef CONFIG_DEBUG_PAGEALLOC | |
18245 | - page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); | |
18246 | + page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); | |
18247 | #endif | |
18248 | - if (!page_table) { | |
18249 | - page_table = | |
18250 | + if (!page_table) | |
18251 | + page_table = | |
18252 | (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); | |
18253 | + } else { | |
18254 | + unsigned long phys; | |
18255 | + page_table = (pte_t *)alloc_low_page(&phys); | |
18256 | } | |
18257 | ||
18258 | paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); | |
18259 | @@ -167,24 +196,24 @@ static inline int is_kernel_text(unsigne | |
18260 | * of max_low_pfn pages, by creating page tables starting from address | |
18261 | * PAGE_OFFSET: | |
18262 | */ | |
18263 | -static void __init kernel_physical_mapping_init(pgd_t *pgd_base) | |
18264 | +static void __init kernel_physical_mapping_init(pgd_t *pgd_base, | |
18265 | + unsigned long start_pfn, | |
18266 | + unsigned long end_pfn, | |
18267 | + int use_pse) | |
18268 | { | |
18269 | int pgd_idx, pmd_idx, pte_ofs; | |
18270 | unsigned long pfn; | |
18271 | pgd_t *pgd; | |
18272 | pmd_t *pmd; | |
18273 | pte_t *pte; | |
18274 | + unsigned pages_2m = 0, pages_4k = 0; | |
18275 | ||
18276 | - unsigned long max_ram_pfn = xen_start_info->nr_pages; | |
18277 | - if (max_ram_pfn > max_low_pfn) | |
18278 | - max_ram_pfn = max_low_pfn; | |
18279 | + if (!cpu_has_pse) | |
18280 | + use_pse = 0; | |
18281 | ||
18282 | - pgd_idx = pgd_index(PAGE_OFFSET); | |
18283 | + pfn = start_pfn; | |
18284 | + pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET); | |
18285 | pgd = pgd_base + pgd_idx; | |
18286 | - pfn = 0; | |
18287 | - pmd_idx = pmd_index(PAGE_OFFSET); | |
18288 | - pte_ofs = pte_index(PAGE_OFFSET); | |
18289 | - | |
18290 | for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { | |
18291 | #ifdef CONFIG_XEN | |
18292 | /* | |
18293 | @@ -198,10 +227,16 @@ static void __init kernel_physical_mappi | |
18294 | #else | |
18295 | pmd = one_md_table_init(pgd); | |
18296 | #endif | |
18297 | - if (pfn >= max_low_pfn) | |
18298 | + | |
18299 | + if (pfn >= end_pfn) | |
18300 | continue; | |
18301 | +#ifdef CONFIG_X86_PAE | |
18302 | + pmd_idx = pmd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET); | |
18303 | pmd += pmd_idx; | |
18304 | - for (; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; | |
18305 | +#else | |
18306 | + pmd_idx = 0; | |
18307 | +#endif | |
18308 | + for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn; | |
18309 | pmd++, pmd_idx++) { | |
18310 | unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET; | |
18311 | ||
18312 | @@ -211,13 +246,8 @@ static void __init kernel_physical_mappi | |
18313 | /* | |
18314 | * Map with big pages if possible, otherwise | |
18315 | * create normal page tables: | |
18316 | - * | |
18317 | - * Don't use a large page for the first 2/4MB of memory | |
18318 | - * because there are often fixed size MTRRs in there | |
18319 | - * and overlapping MTRRs into large pages can cause | |
18320 | - * slowdowns. | |
18321 | */ | |
18322 | - if (cpu_has_pse && !(pgd_idx == 0 && pmd_idx == 0)) { | |
18323 | + if (use_pse) { | |
18324 | unsigned int addr2; | |
18325 | pgprot_t prot = PAGE_KERNEL_LARGE; | |
18326 | ||
18327 | @@ -228,49 +258,35 @@ static void __init kernel_physical_mappi | |
18328 | is_kernel_text(addr2)) | |
18329 | prot = PAGE_KERNEL_LARGE_EXEC; | |
18330 | ||
18331 | + pages_2m++; | |
18332 | set_pmd(pmd, pfn_pmd(pfn, prot)); | |
18333 | ||
18334 | pfn += PTRS_PER_PTE; | |
18335 | - max_pfn_mapped = pfn; | |
18336 | continue; | |
18337 | } | |
18338 | pte = one_page_table_init(pmd); | |
18339 | ||
18340 | - for (pte += pte_ofs; | |
18341 | - pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; | |
18342 | + pte_ofs = pte_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET); | |
18343 | + pte += pte_ofs; | |
18344 | + for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn; | |
18345 | pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) { | |
18346 | pgprot_t prot = PAGE_KERNEL; | |
18347 | ||
18348 | /* XEN: Only map initial RAM allocation. */ | |
18349 | - if ((pfn >= max_ram_pfn) || pte_present(*pte)) | |
18350 | + if (pfn >= xen_start_info->nr_pages || pte_present(*pte)) | |
18351 | continue; | |
18352 | if (is_kernel_text(addr)) | |
18353 | prot = PAGE_KERNEL_EXEC; | |
18354 | ||
18355 | + pages_4k++; | |
18356 | set_pte(pte, pfn_pte(pfn, prot)); | |
18357 | } | |
18358 | - max_pfn_mapped = pfn; | |
18359 | - pte_ofs = 0; | |
18360 | } | |
18361 | - pmd_idx = 0; | |
18362 | } | |
18363 | + update_page_count(PG_LEVEL_2M, pages_2m); | |
18364 | + update_page_count(PG_LEVEL_4K, pages_4k); | |
18365 | } | |
18366 | ||
18367 | -#ifndef CONFIG_XEN | |
18368 | - | |
18369 | -static inline int page_kills_ppro(unsigned long pagenr) | |
18370 | -{ | |
18371 | - if (pagenr >= 0x70000 && pagenr <= 0x7003F) | |
18372 | - return 1; | |
18373 | - return 0; | |
18374 | -} | |
18375 | - | |
18376 | -#else | |
18377 | - | |
18378 | -#define page_kills_ppro(p) 0 | |
18379 | - | |
18380 | -#endif | |
18381 | - | |
18382 | /* | |
18383 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address | |
18384 | * is valid. The argument is a physical page number. | |
18385 | @@ -331,30 +347,63 @@ static void __init permanent_kmaps_init( | |
18386 | pkmap_page_table = pte; | |
18387 | } | |
18388 | ||
18389 | -void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) | |
18390 | +static void __init add_one_highpage_init(struct page *page, int pfn) | |
18391 | { | |
18392 | - if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { | |
18393 | - ClearPageReserved(page); | |
18394 | - init_page_count(page); | |
18395 | - if (pfn < xen_start_info->nr_pages) | |
18396 | - __free_page(page); | |
18397 | - totalhigh_pages++; | |
18398 | - } else | |
18399 | - SetPageReserved(page); | |
18400 | + ClearPageReserved(page); | |
18401 | + init_page_count(page); | |
18402 | + if (pfn < xen_start_info->nr_pages) | |
18403 | + __free_page(page); | |
18404 | + totalhigh_pages++; | |
18405 | +} | |
18406 | + | |
18407 | +struct add_highpages_data { | |
18408 | + unsigned long start_pfn; | |
18409 | + unsigned long end_pfn; | |
18410 | +}; | |
18411 | + | |
18412 | +static int __init add_highpages_work_fn(unsigned long start_pfn, | |
18413 | + unsigned long end_pfn, void *datax) | |
18414 | +{ | |
18415 | + int node_pfn; | |
18416 | + struct page *page; | |
18417 | + unsigned long final_start_pfn, final_end_pfn; | |
18418 | + struct add_highpages_data *data; | |
18419 | + | |
18420 | + data = (struct add_highpages_data *)datax; | |
18421 | + | |
18422 | + final_start_pfn = max(start_pfn, data->start_pfn); | |
18423 | + final_end_pfn = min(end_pfn, data->end_pfn); | |
18424 | + if (final_start_pfn >= final_end_pfn) | |
18425 | + return 0; | |
18426 | + | |
18427 | + for (node_pfn = final_start_pfn; node_pfn < final_end_pfn; | |
18428 | + node_pfn++) { | |
18429 | + if (!pfn_valid(node_pfn)) | |
18430 | + continue; | |
18431 | + page = pfn_to_page(node_pfn); | |
18432 | + add_one_highpage_init(page, node_pfn); | |
18433 | + } | |
18434 | + | |
18435 | + return 0; | |
18436 | + | |
18437 | +} | |
18438 | + | |
18439 | +void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, | |
18440 | + unsigned long end_pfn) | |
18441 | +{ | |
18442 | + struct add_highpages_data data; | |
18443 | + | |
18444 | + data.start_pfn = start_pfn; | |
18445 | + data.end_pfn = end_pfn; | |
18446 | + | |
18447 | + work_with_active_regions(nid, add_highpages_work_fn, &data); | |
18448 | } | |
18449 | ||
18450 | #ifndef CONFIG_NUMA | |
18451 | -static void __init set_highmem_pages_init(int bad_ppro) | |
18452 | +static void __init set_highmem_pages_init(void) | |
18453 | { | |
18454 | - int pfn; | |
18455 | + add_highpages_with_active_regions(0, highstart_pfn, highend_pfn); | |
18456 | ||
18457 | - for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) { | |
18458 | - /* | |
18459 | - * Holes under sparsemem might not have no mem_map[]: | |
18460 | - */ | |
18461 | - if (pfn_valid(pfn)) | |
18462 | - add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); | |
18463 | - } | |
18464 | totalram_pages += totalhigh_pages; | |
18465 | } | |
18466 | #endif /* !CONFIG_NUMA */ | |
18467 | @@ -362,24 +411,11 @@ static void __init set_highmem_pages_ini | |
18468 | #else | |
18469 | # define kmap_init() do { } while (0) | |
18470 | # define permanent_kmaps_init(pgd_base) do { } while (0) | |
18471 | -# define set_highmem_pages_init(bad_ppro) do { } while (0) | |
18472 | +# define set_highmem_pages_init() do { } while (0) | |
18473 | #endif /* CONFIG_HIGHMEM */ | |
18474 | ||
18475 | -pteval_t __PAGE_KERNEL = _PAGE_KERNEL; | |
18476 | -EXPORT_SYMBOL(__PAGE_KERNEL); | |
18477 | - | |
18478 | -pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; | |
18479 | - | |
18480 | pgd_t *swapper_pg_dir; | |
18481 | ||
18482 | -static void __init xen_pagetable_setup_start(pgd_t *base) | |
18483 | -{ | |
18484 | -} | |
18485 | - | |
18486 | -static void __init xen_pagetable_setup_done(pgd_t *base) | |
18487 | -{ | |
18488 | -} | |
18489 | - | |
18490 | /* | |
18491 | * Build a proper pagetable for the kernel mappings. Up until this | |
18492 | * point, we've been running on some set of pagetables constructed by | |
18493 | @@ -399,27 +435,10 @@ static void __init xen_pagetable_setup_d | |
18494 | * be partially populated, and so it avoids stomping on any existing | |
18495 | * mappings. | |
18496 | */ | |
18497 | -static void __init pagetable_init(void) | |
18498 | +static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base) | |
18499 | { | |
18500 | - pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base; | |
18501 | unsigned long vaddr, end; | |
18502 | ||
18503 | - xen_pagetable_setup_start(pgd_base); | |
18504 | - | |
18505 | - /* Enable PSE if available */ | |
18506 | - if (cpu_has_pse) | |
18507 | - set_in_cr4(X86_CR4_PSE); | |
18508 | - | |
18509 | - /* Enable PGE if available */ | |
18510 | - if (cpu_has_pge) { | |
18511 | - set_in_cr4(X86_CR4_PGE); | |
18512 | - __PAGE_KERNEL |= _PAGE_GLOBAL; | |
18513 | - __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL; | |
18514 | - } | |
18515 | - | |
18516 | - kernel_physical_mapping_init(pgd_base); | |
18517 | - remap_numa_kva(); | |
18518 | - | |
18519 | /* | |
18520 | * Fixed mappings, only the page table structure has to be | |
18521 | * created - mappings will be set by set_fixmap(): | |
18522 | @@ -429,10 +448,13 @@ static void __init pagetable_init(void) | |
18523 | end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; | |
18524 | page_table_range_init(vaddr, end, pgd_base); | |
18525 | early_ioremap_reset(); | |
18526 | +} | |
18527 | ||
18528 | - permanent_kmaps_init(pgd_base); | |
18529 | +static void __init pagetable_init(void) | |
18530 | +{ | |
18531 | + pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base; | |
18532 | ||
18533 | - xen_pagetable_setup_done(pgd_base); | |
18534 | + permanent_kmaps_init(pgd_base); | |
18535 | } | |
18536 | ||
18537 | #if defined(CONFIG_ACPI_SLEEP) && !defined(CONFIG_XEN) | |
18538 | @@ -475,7 +497,7 @@ void zap_low_mappings(void) | |
18539 | ||
18540 | int nx_enabled; | |
18541 | ||
18542 | -pteval_t __supported_pte_mask __read_mostly = ~_PAGE_NX; | |
18543 | +pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL); | |
18544 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | |
18545 | ||
18546 | #ifdef CONFIG_X86_PAE | |
18547 | @@ -528,42 +550,364 @@ static void __init set_nx(void) | |
18548 | } | |
18549 | #endif | |
18550 | ||
18551 | +/* user-defined highmem size */ | |
18552 | +static unsigned int highmem_pages = -1; | |
18553 | + | |
18554 | /* | |
18555 | - * paging_init() sets up the page tables - note that the first 8MB are | |
18556 | - * already mapped by head.S. | |
18557 | - * | |
18558 | - * This routines also unmaps the page at virtual kernel address 0, so | |
18559 | - * that we can trap those pesky NULL-reference errors in the kernel. | |
18560 | + * highmem=size forces highmem to be exactly 'size' bytes. | |
18561 | + * This works even on boxes that have no highmem otherwise. | |
18562 | + * This also works to reduce highmem size on bigger boxes. | |
18563 | */ | |
18564 | -void __init paging_init(void) | |
18565 | +static int __init parse_highmem(char *arg) | |
18566 | +{ | |
18567 | + if (!arg) | |
18568 | + return -EINVAL; | |
18569 | + | |
18570 | + highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT; | |
18571 | + return 0; | |
18572 | +} | |
18573 | +early_param("highmem", parse_highmem); | |
18574 | + | |
18575 | +/* | |
18576 | + * Determine low and high memory ranges: | |
18577 | + */ | |
18578 | +void __init find_low_pfn_range(void) | |
18579 | +{ | |
18580 | + /* it could update max_pfn */ | |
18581 | + | |
18582 | + /* max_low_pfn is 0, we already have early_res support */ | |
18583 | + | |
18584 | + max_low_pfn = max_pfn; | |
18585 | + if (max_low_pfn > MAXMEM_PFN) { | |
18586 | + if (highmem_pages == -1) | |
18587 | + highmem_pages = max_pfn - MAXMEM_PFN; | |
18588 | + if (highmem_pages + MAXMEM_PFN < max_pfn) | |
18589 | + max_pfn = MAXMEM_PFN + highmem_pages; | |
18590 | + if (highmem_pages + MAXMEM_PFN > max_pfn) { | |
18591 | + printk(KERN_WARNING "only %luMB highmem pages " | |
18592 | + "available, ignoring highmem size of %uMB.\n", | |
18593 | + pages_to_mb(max_pfn - MAXMEM_PFN), | |
18594 | + pages_to_mb(highmem_pages)); | |
18595 | + highmem_pages = 0; | |
18596 | + } | |
18597 | + max_low_pfn = MAXMEM_PFN; | |
18598 | +#ifndef CONFIG_HIGHMEM | |
18599 | + /* Maximum memory usable is what is directly addressable */ | |
18600 | + printk(KERN_WARNING "Warning only %ldMB will be used.\n", | |
18601 | + MAXMEM>>20); | |
18602 | + if (max_pfn > MAX_NONPAE_PFN) | |
18603 | + printk(KERN_WARNING | |
18604 | + "Use a HIGHMEM64G enabled kernel.\n"); | |
18605 | + else | |
18606 | + printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); | |
18607 | + max_pfn = MAXMEM_PFN; | |
18608 | +#else /* !CONFIG_HIGHMEM */ | |
18609 | +#ifndef CONFIG_HIGHMEM64G | |
18610 | + if (max_pfn > MAX_NONPAE_PFN) { | |
18611 | + max_pfn = MAX_NONPAE_PFN; | |
18612 | + printk(KERN_WARNING "Warning only 4GB will be used." | |
18613 | + "Use a HIGHMEM64G enabled kernel.\n"); | |
18614 | + } | |
18615 | +#endif /* !CONFIG_HIGHMEM64G */ | |
18616 | +#endif /* !CONFIG_HIGHMEM */ | |
18617 | + } else { | |
18618 | + if (highmem_pages == -1) | |
18619 | + highmem_pages = 0; | |
18620 | +#ifdef CONFIG_HIGHMEM | |
18621 | + if (highmem_pages >= max_pfn) { | |
18622 | + printk(KERN_ERR "highmem size specified (%uMB) is " | |
18623 | + "bigger than pages available (%luMB)!.\n", | |
18624 | + pages_to_mb(highmem_pages), | |
18625 | + pages_to_mb(max_pfn)); | |
18626 | + highmem_pages = 0; | |
18627 | + } | |
18628 | + if (highmem_pages) { | |
18629 | + if (max_low_pfn - highmem_pages < | |
18630 | + 64*1024*1024/PAGE_SIZE){ | |
18631 | + printk(KERN_ERR "highmem size %uMB results in " | |
18632 | + "smaller than 64MB lowmem, ignoring it.\n" | |
18633 | + , pages_to_mb(highmem_pages)); | |
18634 | + highmem_pages = 0; | |
18635 | + } | |
18636 | + max_low_pfn -= highmem_pages; | |
18637 | + } | |
18638 | +#else | |
18639 | + if (highmem_pages) | |
18640 | + printk(KERN_ERR "ignoring highmem size on non-highmem" | |
18641 | + " kernel!\n"); | |
18642 | +#endif | |
18643 | + } | |
18644 | +} | |
18645 | + | |
18646 | +#ifndef CONFIG_NEED_MULTIPLE_NODES | |
18647 | +void __init initmem_init(unsigned long start_pfn, | |
18648 | + unsigned long end_pfn) | |
18649 | +{ | |
18650 | +#ifdef CONFIG_HIGHMEM | |
18651 | + highstart_pfn = highend_pfn = max_pfn; | |
18652 | + if (max_pfn > max_low_pfn) | |
18653 | + highstart_pfn = max_low_pfn; | |
18654 | + memory_present(0, 0, highend_pfn); | |
18655 | + e820_register_active_regions(0, 0, highend_pfn); | |
18656 | + printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", | |
18657 | + pages_to_mb(highend_pfn - highstart_pfn)); | |
18658 | + num_physpages = highend_pfn; | |
18659 | + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; | |
18660 | +#else | |
18661 | + memory_present(0, 0, max_low_pfn); | |
18662 | + e820_register_active_regions(0, 0, max_low_pfn); | |
18663 | + num_physpages = max_low_pfn; | |
18664 | + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; | |
18665 | +#endif | |
18666 | +#ifdef CONFIG_FLATMEM | |
18667 | + max_mapnr = num_physpages; | |
18668 | +#endif | |
18669 | + printk(KERN_NOTICE "%ldMB LOWMEM available.\n", | |
18670 | + pages_to_mb(max_low_pfn)); | |
18671 | + | |
18672 | + setup_bootmem_allocator(); | |
18673 | +} | |
18674 | +#endif /* !CONFIG_NEED_MULTIPLE_NODES */ | |
18675 | + | |
18676 | +static void __init zone_sizes_init(void) | |
18677 | +{ | |
18678 | + unsigned long max_zone_pfns[MAX_NR_ZONES]; | |
18679 | + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | |
18680 | + max_zone_pfns[ZONE_DMA] = | |
18681 | + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; | |
18682 | + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; | |
18683 | +#ifdef CONFIG_HIGHMEM | |
18684 | + max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; | |
18685 | +#endif | |
18686 | + | |
18687 | + free_area_init_nodes(max_zone_pfns); | |
18688 | +} | |
18689 | + | |
18690 | +void __init setup_bootmem_allocator(void) | |
18691 | { | |
18692 | int i; | |
18693 | + unsigned long bootmap_size, bootmap; | |
18694 | + unsigned long end_pfn = min(max_low_pfn, xen_start_info->nr_pages); | |
18695 | + | |
18696 | + /* | |
18697 | + * Initialize the boot-time allocator (with low memory only): | |
18698 | + */ | |
18699 | + bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; | |
18700 | + bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT, | |
18701 | + min(max_pfn_mapped, xen_start_info->nr_pages)<<PAGE_SHIFT, | |
18702 | + bootmap_size, PAGE_SIZE); | |
18703 | + if (bootmap == -1L) | |
18704 | + panic("Cannot find bootmem map of size %ld\n", bootmap_size); | |
18705 | + reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); | |
18706 | + | |
18707 | + /* don't touch min_low_pfn */ | |
18708 | + bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, | |
18709 | + min_low_pfn, end_pfn); | |
18710 | + printk(KERN_INFO " mapped low ram: 0 - %08lx\n", | |
18711 | + max_pfn_mapped<<PAGE_SHIFT); | |
18712 | + printk(KERN_INFO " low ram: %08lx - %08lx\n", | |
18713 | + min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT); | |
18714 | + printk(KERN_INFO " bootmap %08lx - %08lx\n", | |
18715 | + bootmap, bootmap + bootmap_size); | |
18716 | + for_each_online_node(i) | |
18717 | + free_bootmem_with_active_regions(i, end_pfn); | |
18718 | + early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); | |
18719 | + | |
18720 | + after_init_bootmem = 1; | |
18721 | +} | |
18722 | + | |
18723 | +static unsigned long __init extend_init_mapping(unsigned long tables_space) | |
18724 | +{ | |
18725 | + unsigned long start_pfn = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) | |
18726 | + + xen_start_info->nr_pt_frames; | |
18727 | + unsigned long start = start_pfn, va = (unsigned long)&_text; | |
18728 | + pgd_t *pgd; | |
18729 | + pud_t *pud; | |
18730 | + pmd_t *pmd; | |
18731 | + pte_t *pte; | |
18732 | + | |
18733 | + /* Ensure init mappings cover kernel text/data and initial tables. */ | |
18734 | + while (va < PAGE_OFFSET + (start_pfn << PAGE_SHIFT) + tables_space) { | |
18735 | + pgd = pgd_offset_k(va); | |
18736 | + pud = pud_offset(pgd, va); | |
18737 | + pmd = pmd_offset(pud, va); | |
18738 | + if (pmd_none(*pmd)) { | |
18739 | + unsigned long pa = start_pfn++ << PAGE_SHIFT; | |
18740 | + | |
18741 | + memset(__va(pa), 0, PAGE_SIZE); | |
18742 | + make_lowmem_page_readonly(__va(pa), | |
18743 | + XENFEAT_writable_page_tables); | |
18744 | + xen_l2_entry_update(pmd, __pmd(pa | _KERNPG_TABLE)); | |
18745 | + } | |
18746 | + pte = pte_offset_kernel(pmd, va); | |
18747 | + if (pte_none(*pte)) { | |
18748 | + pte_t new_pte = __pte(__pa(va) | _KERNPG_TABLE); | |
18749 | + | |
18750 | + if (HYPERVISOR_update_va_mapping(va, new_pte, 0)) | |
18751 | + BUG(); | |
18752 | + } | |
18753 | + va += PAGE_SIZE; | |
18754 | + } | |
18755 | + | |
18756 | + /* Finally, blow away any spurious initial mappings. */ | |
18757 | + while (1) { | |
18758 | + pgd = pgd_offset_k(va); | |
18759 | + pud = pud_offset(pgd, va); | |
18760 | + pmd = pmd_offset(pud, va); | |
18761 | + if (pmd_none(*pmd)) | |
18762 | + break; | |
18763 | + if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0)) | |
18764 | + BUG(); | |
18765 | + va += PAGE_SIZE; | |
18766 | + } | |
18767 | + | |
18768 | + if (start_pfn > start) | |
18769 | + reserve_early(start << PAGE_SHIFT, | |
18770 | + start_pfn << PAGE_SHIFT, "INITMAP"); | |
18771 | + | |
18772 | + return start_pfn; | |
18773 | +} | |
18774 | + | |
18775 | +static void __init find_early_table_space(unsigned long end) | |
18776 | +{ | |
18777 | + unsigned long puds, pmds, ptes, tables; | |
18778 | + | |
18779 | + puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; | |
18780 | + tables = PAGE_ALIGN(puds * sizeof(pud_t)); | |
18781 | + | |
18782 | + pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; | |
18783 | + tables += PAGE_ALIGN(pmds * sizeof(pmd_t)); | |
18784 | + | |
18785 | + if (cpu_has_pse) { | |
18786 | + unsigned long extra; | |
18787 | + | |
18788 | + extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); | |
18789 | + extra += PMD_SIZE; | |
18790 | + ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; | |
18791 | + } else | |
18792 | + ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; | |
18793 | + | |
18794 | + tables += PAGE_ALIGN(ptes * sizeof(pte_t)); | |
18795 | + | |
18796 | + /* for fixmap */ | |
18797 | + tables += PAGE_SIZE | |
18798 | + * ((((FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK) | |
18799 | + - (__fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK)) | |
18800 | + >> PMD_SHIFT); | |
18801 | + | |
18802 | + table_start = extend_init_mapping(tables); | |
18803 | + | |
18804 | + table_end = table_start; | |
18805 | + table_top = table_start + (tables>>PAGE_SHIFT); | |
18806 | + | |
18807 | + printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", | |
18808 | + end, table_start << PAGE_SHIFT, | |
18809 | + (table_start << PAGE_SHIFT) + tables); | |
18810 | +} | |
18811 | + | |
18812 | +unsigned long __init_refok init_memory_mapping(unsigned long start, | |
18813 | + unsigned long end) | |
18814 | +{ | |
18815 | + pgd_t *pgd_base = swapper_pg_dir; | |
18816 | + unsigned long start_pfn, end_pfn; | |
18817 | + unsigned long big_page_start; | |
18818 | + | |
18819 | + /* | |
18820 | + * Find space for the kernel direct mapping tables. | |
18821 | + */ | |
18822 | + if (!after_init_bootmem) | |
18823 | + find_early_table_space(end); | |
18824 | ||
18825 | #ifdef CONFIG_X86_PAE | |
18826 | set_nx(); | |
18827 | if (nx_enabled) | |
18828 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); | |
18829 | #endif | |
18830 | + | |
18831 | + /* Enable PSE if available */ | |
18832 | + if (cpu_has_pse) | |
18833 | + set_in_cr4(X86_CR4_PSE); | |
18834 | + | |
18835 | + /* Enable PGE if available */ | |
18836 | + if (cpu_has_pge) { | |
18837 | + set_in_cr4(X86_CR4_PGE); | |
18838 | + __supported_pte_mask |= _PAGE_GLOBAL; | |
18839 | + } | |
18840 | + | |
18841 | + /* | |
18842 | + * Don't use a large page for the first 2/4MB of memory | |
18843 | + * because there are often fixed size MTRRs in there | |
18844 | + * and overlapping MTRRs into large pages can cause | |
18845 | + * slowdowns. | |
18846 | + */ | |
18847 | + big_page_start = PMD_SIZE; | |
18848 | + | |
18849 | + if (start < big_page_start) { | |
18850 | + start_pfn = start >> PAGE_SHIFT; | |
18851 | + end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT); | |
18852 | + } else { | |
18853 | + /* head is not big page alignment ? */ | |
18854 | + start_pfn = start >> PAGE_SHIFT; | |
18855 | + end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) | |
18856 | + << (PMD_SHIFT - PAGE_SHIFT); | |
18857 | + } | |
18858 | + if (start_pfn < end_pfn) | |
18859 | + kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0); | |
18860 | + | |
18861 | + /* big page range */ | |
18862 | + start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) | |
18863 | + << (PMD_SHIFT - PAGE_SHIFT); | |
18864 | + if (start_pfn < (big_page_start >> PAGE_SHIFT)) | |
18865 | + start_pfn = big_page_start >> PAGE_SHIFT; | |
18866 | + end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); | |
18867 | + if (start_pfn < end_pfn) | |
18868 | + kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, | |
18869 | + cpu_has_pse); | |
18870 | + | |
18871 | + /* tail is not big page alignment ? */ | |
18872 | + start_pfn = end_pfn; | |
18873 | + if (start_pfn > (big_page_start>>PAGE_SHIFT)) { | |
18874 | + end_pfn = end >> PAGE_SHIFT; | |
18875 | + if (start_pfn < end_pfn) | |
18876 | + kernel_physical_mapping_init(pgd_base, start_pfn, | |
18877 | + end_pfn, 0); | |
18878 | + } | |
18879 | + | |
18880 | + early_ioremap_page_table_range_init(pgd_base); | |
18881 | + | |
18882 | + __flush_tlb_all(); | |
18883 | + | |
18884 | + if (!after_init_bootmem) | |
18885 | + reserve_early(table_start << PAGE_SHIFT, | |
18886 | + table_end << PAGE_SHIFT, "PGTABLE"); | |
18887 | + | |
18888 | + if (!after_init_bootmem) | |
18889 | + early_memtest(start, end); | |
18890 | + | |
18891 | + return end >> PAGE_SHIFT; | |
18892 | +} | |
18893 | + | |
18894 | + | |
18895 | +/* | |
18896 | + * paging_init() sets up the page tables - note that the first 8MB are | |
18897 | + * already mapped by head.S. | |
18898 | + * | |
18899 | + * This routines also unmaps the page at virtual kernel address 0, so | |
18900 | + * that we can trap those pesky NULL-reference errors in the kernel. | |
18901 | + */ | |
18902 | +void __init paging_init(void) | |
18903 | +{ | |
18904 | pagetable_init(); | |
18905 | ||
18906 | __flush_tlb_all(); | |
18907 | ||
18908 | kmap_init(); | |
18909 | ||
18910 | - /* Switch to the real shared_info page, and clear the | |
18911 | - * dummy page. */ | |
18912 | - set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); | |
18913 | - HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); | |
18914 | - memset(empty_zero_page, 0, sizeof(empty_zero_page)); | |
18915 | - | |
18916 | - /* Setup mapping of lower 1st MB */ | |
18917 | - for (i = 0; i < NR_FIX_ISAMAPS; i++) | |
18918 | - if (is_initial_xendomain()) | |
18919 | - set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE); | |
18920 | - else | |
18921 | - __set_fixmap(FIX_ISAMAP_BEGIN - i, | |
18922 | - virt_to_machine(empty_zero_page), | |
18923 | - PAGE_KERNEL_RO); | |
18924 | + /* | |
18925 | + * NOTE: at this point the bootmem allocator is fully available. | |
18926 | + */ | |
18927 | + sparse_init(); | |
18928 | + zone_sizes_init(); | |
18929 | } | |
18930 | ||
18931 | /* | |
18932 | @@ -598,7 +942,7 @@ static struct kcore_list kcore_mem, kcor | |
18933 | void __init mem_init(void) | |
18934 | { | |
18935 | int codesize, reservedpages, datasize, initsize; | |
18936 | - int tmp, bad_ppro; | |
18937 | + int tmp; | |
18938 | unsigned long pfn; | |
18939 | ||
18940 | pci_iommu_alloc(); | |
18941 | @@ -606,19 +950,6 @@ void __init mem_init(void) | |
18942 | #ifdef CONFIG_FLATMEM | |
18943 | BUG_ON(!mem_map); | |
18944 | #endif | |
18945 | - bad_ppro = ppro_with_ram_bug(); | |
18946 | - | |
18947 | -#ifdef CONFIG_HIGHMEM | |
18948 | - /* check that fixmap and pkmap do not overlap */ | |
18949 | - if (PKMAP_BASE + LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { | |
18950 | - printk(KERN_ERR | |
18951 | - "fixmap and kmap areas overlap - this will crash\n"); | |
18952 | - printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n", | |
18953 | - PKMAP_BASE, PKMAP_BASE + LAST_PKMAP*PAGE_SIZE, | |
18954 | - FIXADDR_START); | |
18955 | - BUG(); | |
18956 | - } | |
18957 | -#endif | |
18958 | /* this will put all low memory onto the freelists */ | |
18959 | totalram_pages += free_all_bootmem(); | |
18960 | /* XEN: init and count low-mem pages outside initial allocation. */ | |
18961 | @@ -636,7 +967,7 @@ void __init mem_init(void) | |
18962 | if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) | |
18963 | reservedpages++; | |
18964 | ||
18965 | - set_highmem_pages_init(bad_ppro); | |
18966 | + set_highmem_pages_init(); | |
18967 | ||
18968 | codesize = (unsigned long) &_etext - (unsigned long) &_text; | |
18969 | datasize = (unsigned long) &_edata - (unsigned long) &_etext; | |
18970 | @@ -657,7 +988,6 @@ void __init mem_init(void) | |
18971 | (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) | |
18972 | ); | |
18973 | ||
18974 | -#if 1 /* double-sanity-check paranoia */ | |
18975 | printk(KERN_INFO "virtual kernel memory layout:\n" | |
18976 | " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" | |
18977 | #ifdef CONFIG_HIGHMEM | |
18978 | @@ -698,7 +1028,6 @@ void __init mem_init(void) | |
18979 | #endif | |
18980 | BUG_ON(VMALLOC_START > VMALLOC_END); | |
18981 | BUG_ON((unsigned long)high_memory > VMALLOC_START); | |
18982 | -#endif /* double-sanity-check paranoia */ | |
18983 | ||
18984 | if (boot_cpu_data.wp_works_ok < 0) | |
18985 | test_wp_bit(); | |
18986 | @@ -755,6 +1084,8 @@ void mark_rodata_ro(void) | |
18987 | unsigned long start = PFN_ALIGN(_text); | |
18988 | unsigned long size = PFN_ALIGN(_etext) - start; | |
18989 | ||
18990 | +#ifndef CONFIG_DYNAMIC_FTRACE | |
18991 | + /* Dynamic tracing modifies the kernel text section */ | |
18992 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); | |
18993 | printk(KERN_INFO "Write protecting the kernel text: %luk\n", | |
18994 | size >> 10); | |
18995 | @@ -767,6 +1098,8 @@ void mark_rodata_ro(void) | |
18996 | printk(KERN_INFO "Testing CPA: write protecting again\n"); | |
18997 | set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); | |
18998 | #endif | |
18999 | +#endif /* CONFIG_DYNAMIC_FTRACE */ | |
19000 | + | |
19001 | start += size; | |
19002 | size = (unsigned long)__end_rodata - start; | |
19003 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); | |
19004 | @@ -829,3 +1162,9 @@ void free_initrd_mem(unsigned long start | |
19005 | free_init_pages("initrd memory", start, end); | |
19006 | } | |
19007 | #endif | |
19008 | + | |
19009 | +int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, | |
19010 | + int flags) | |
19011 | +{ | |
19012 | + return reserve_bootmem(phys, len, flags); | |
19013 | +} | |
82094b55 AF |
19014 | --- sle11-2009-10-16.orig/arch/x86/mm/init_64-xen.c 2009-03-16 16:38:05.000000000 +0100 |
19015 | +++ sle11-2009-10-16/arch/x86/mm/init_64-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
19016 | @@ -21,6 +21,7 @@ |
19017 | #include <linux/swap.h> | |
19018 | #include <linux/smp.h> | |
19019 | #include <linux/init.h> | |
19020 | +#include <linux/initrd.h> | |
19021 | #include <linux/pagemap.h> | |
19022 | #include <linux/bootmem.h> | |
19023 | #include <linux/proc_fs.h> | |
19024 | @@ -52,6 +53,14 @@ | |
19025 | ||
19026 | #include <xen/features.h> | |
19027 | ||
19028 | +/* | |
19029 | + * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | |
19030 | + * The direct mapping extends to max_pfn_mapped, so that we can directly access | |
19031 | + * apertures, ACPI and other tables without having to play with fixmaps. | |
19032 | + */ | |
19033 | +unsigned long max_low_pfn_mapped; | |
19034 | +unsigned long max_pfn_mapped; | |
19035 | + | |
19036 | #if CONFIG_XEN_COMPAT <= 0x030002 | |
19037 | unsigned int __kernel_page_user; | |
19038 | EXPORT_SYMBOL(__kernel_page_user); | |
19039 | @@ -60,13 +69,12 @@ EXPORT_SYMBOL(__kernel_page_user); | |
19040 | int after_bootmem; | |
19041 | ||
19042 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | |
19043 | -extern unsigned long start_pfn; | |
19044 | ||
19045 | extern pmd_t level2_fixmap_pgt[PTRS_PER_PMD]; | |
19046 | extern pte_t level1_fixmap_pgt[PTRS_PER_PTE]; | |
19047 | ||
19048 | #ifndef CONFIG_XEN | |
19049 | -int direct_gbpages __meminitdata | |
19050 | +int direct_gbpages | |
19051 | #ifdef CONFIG_DIRECT_GBPAGES | |
19052 | = 1 | |
19053 | #endif | |
19054 | @@ -145,55 +153,23 @@ void __meminit early_make_page_readonly( | |
19055 | * around without checking the pgd every time. | |
19056 | */ | |
19057 | ||
19058 | -void show_mem(void) | |
19059 | -{ | |
19060 | - long i, total = 0, reserved = 0; | |
19061 | - long shared = 0, cached = 0; | |
19062 | - struct page *page; | |
19063 | - pg_data_t *pgdat; | |
19064 | - | |
19065 | - printk(KERN_INFO "Mem-info:\n"); | |
19066 | - show_free_areas(); | |
19067 | - for_each_online_pgdat(pgdat) { | |
19068 | - for (i = 0; i < pgdat->node_spanned_pages; ++i) { | |
19069 | - /* | |
19070 | - * This loop can take a while with 256 GB and | |
19071 | - * 4k pages so defer the NMI watchdog: | |
19072 | - */ | |
19073 | - if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) | |
19074 | - touch_nmi_watchdog(); | |
19075 | - | |
19076 | - if (!pfn_valid(pgdat->node_start_pfn + i)) | |
19077 | - continue; | |
19078 | - | |
19079 | - page = pfn_to_page(pgdat->node_start_pfn + i); | |
19080 | - total++; | |
19081 | - if (PageReserved(page)) | |
19082 | - reserved++; | |
19083 | - else if (PageSwapCache(page)) | |
19084 | - cached++; | |
19085 | - else if (page_count(page)) | |
19086 | - shared += page_count(page) - 1; | |
19087 | - } | |
19088 | - } | |
19089 | - printk(KERN_INFO "%lu pages of RAM\n", total); | |
19090 | - printk(KERN_INFO "%lu reserved pages\n", reserved); | |
19091 | - printk(KERN_INFO "%lu pages shared\n", shared); | |
19092 | - printk(KERN_INFO "%lu pages swap cached\n", cached); | |
19093 | -} | |
19094 | - | |
19095 | static unsigned long __meminitdata table_start; | |
19096 | -static unsigned long __meminitdata table_end; | |
19097 | +static unsigned long __meminitdata table_cur; | |
19098 | +static unsigned long __meminitdata table_top; | |
19099 | ||
19100 | -static __init void *spp_getpage(void) | |
19101 | +/* | |
19102 | + * NOTE: This function is marked __ref because it calls __init function | |
19103 | + * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. | |
19104 | + */ | |
19105 | +static __ref void *spp_getpage(void) | |
19106 | { | |
19107 | void *ptr; | |
19108 | ||
19109 | if (after_bootmem) | |
19110 | ptr = (void *) get_zeroed_page(GFP_ATOMIC); | |
19111 | - else if (start_pfn < table_end) { | |
19112 | - ptr = __va(start_pfn << PAGE_SHIFT); | |
19113 | - start_pfn++; | |
19114 | + else if (table_cur < table_top) { | |
19115 | + ptr = __va(table_cur << PAGE_SHIFT); | |
19116 | + table_cur++; | |
19117 | memset(ptr, 0, PAGE_SIZE); | |
19118 | } else | |
19119 | ptr = alloc_bootmem_pages(PAGE_SIZE); | |
19120 | @@ -208,30 +184,18 @@ static __init void *spp_getpage(void) | |
19121 | return ptr; | |
19122 | } | |
19123 | ||
19124 | -#define pgd_offset_u(address) (__user_pgd(init_level4_pgt) + pgd_index(address)) | |
19125 | -#define pud_offset_u(address) (level3_user_pgt + pud_index(address)) | |
19126 | - | |
19127 | -static __init void | |
19128 | -set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot, int user_mode) | |
19129 | +void | |
19130 | +set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) | |
19131 | { | |
19132 | - pgd_t *pgd; | |
19133 | pud_t *pud; | |
19134 | pmd_t *pmd; | |
19135 | - pte_t *pte, new_pte; | |
19136 | - | |
19137 | - pr_debug("set_pte_phys %lx to %lx\n", vaddr, phys); | |
19138 | + pte_t *pte; | |
19139 | ||
19140 | - pgd = (user_mode ? pgd_offset_u(vaddr) : pgd_offset_k(vaddr)); | |
19141 | - if (pgd_none(*pgd)) { | |
19142 | - printk(KERN_ERR | |
19143 | - "PGD FIXMAP MISSING, it should be setup in head.S!\n"); | |
19144 | - return; | |
19145 | - } | |
19146 | - pud = (user_mode ? pud_offset_u(vaddr) : pud_offset(pgd, vaddr)); | |
19147 | + pud = pud_page + pud_index(vaddr); | |
19148 | if (pud_none(*pud)) { | |
19149 | pmd = (pmd_t *) spp_getpage(); | |
19150 | make_page_readonly(pmd, XENFEAT_writable_page_tables); | |
19151 | - set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER)); | |
19152 | + pud_populate(&init_mm, pud, pmd); | |
19153 | if (pmd != pmd_offset(pud, 0)) { | |
19154 | printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n", | |
19155 | pmd, pmd_offset(pud, 0)); | |
19156 | @@ -242,19 +206,20 @@ set_pte_phys(unsigned long vaddr, unsign | |
19157 | if (pmd_none(*pmd)) { | |
19158 | pte = (pte_t *) spp_getpage(); | |
19159 | make_page_readonly(pte, XENFEAT_writable_page_tables); | |
19160 | - set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER)); | |
19161 | + pmd_populate_kernel(&init_mm, pmd, pte); | |
19162 | if (pte != pte_offset_kernel(pmd, 0)) { | |
19163 | printk(KERN_ERR "PAGETABLE BUG #02!\n"); | |
19164 | return; | |
19165 | } | |
19166 | } | |
19167 | - if (pgprot_val(prot)) | |
19168 | - new_pte = pfn_pte(phys >> PAGE_SHIFT, prot); | |
19169 | - else | |
19170 | - new_pte = __pte(0); | |
19171 | ||
19172 | pte = pte_offset_kernel(pmd, vaddr); | |
19173 | if (!pte_none(*pte) && __pte_val(new_pte) && | |
19174 | +#ifdef CONFIG_ACPI | |
19175 | + /* __acpi_map_table() fails to properly call clear_fixmap() */ | |
19176 | + (vaddr < __fix_to_virt(FIX_ACPI_END) || | |
19177 | + vaddr > __fix_to_virt(FIX_ACPI_BEGIN)) && | |
19178 | +#endif | |
19179 | __pte_val(*pte) != (__pte_val(new_pte) & __supported_pte_mask)) | |
19180 | pte_ERROR(*pte); | |
19181 | set_pte(pte, new_pte); | |
19182 | @@ -266,15 +231,13 @@ set_pte_phys(unsigned long vaddr, unsign | |
19183 | __flush_tlb_one(vaddr); | |
19184 | } | |
19185 | ||
19186 | -static __init void | |
19187 | -set_pte_phys_ma(unsigned long vaddr, unsigned long phys, pgprot_t prot) | |
19188 | +void | |
19189 | +set_pte_vaddr(unsigned long vaddr, pte_t pteval) | |
19190 | { | |
19191 | pgd_t *pgd; | |
19192 | - pud_t *pud; | |
19193 | - pmd_t *pmd; | |
19194 | - pte_t *pte, new_pte; | |
19195 | + pud_t *pud_page; | |
19196 | ||
19197 | - pr_debug("set_pte_phys_ma %lx to %lx\n", vaddr, phys); | |
19198 | + pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, __pte_val(pteval)); | |
19199 | ||
19200 | pgd = pgd_offset_k(vaddr); | |
19201 | if (pgd_none(*pgd)) { | |
19202 | @@ -282,47 +245,51 @@ set_pte_phys_ma(unsigned long vaddr, uns | |
19203 | "PGD FIXMAP MISSING, it should be setup in head.S!\n"); | |
19204 | return; | |
19205 | } | |
19206 | - pud = pud_offset(pgd, vaddr); | |
19207 | - if (pud_none(*pud)) { | |
19208 | - pmd = (pmd_t *) spp_getpage(); | |
19209 | - make_page_readonly(pmd, XENFEAT_writable_page_tables); | |
19210 | - set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER)); | |
19211 | - if (pmd != pmd_offset(pud, 0)) { | |
19212 | - printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n", | |
19213 | - pmd, pmd_offset(pud, 0)); | |
19214 | + pud_page = (pud_t*)pgd_page_vaddr(*pgd); | |
19215 | + set_pte_vaddr_pud(pud_page, vaddr, pteval); | |
19216 | +} | |
19217 | + | |
19218 | +#ifndef CONFIG_XEN | |
19219 | +/* | |
19220 | + * Create large page table mappings for a range of physical addresses. | |
19221 | + */ | |
19222 | +static void __init __init_extra_mapping(unsigned long phys, unsigned long size, | |
19223 | + pgprot_t prot) | |
19224 | +{ | |
19225 | + pgd_t *pgd; | |
19226 | + pud_t *pud; | |
19227 | + pmd_t *pmd; | |
19228 | + | |
19229 | + BUG_ON((phys & ~PMD_MASK) || (size & ~PMD_MASK)); | |
19230 | + for (; size; phys += PMD_SIZE, size -= PMD_SIZE) { | |
19231 | + pgd = pgd_offset_k((unsigned long)__va(phys)); | |
19232 | + if (pgd_none(*pgd)) { | |
19233 | + pud = (pud_t *) spp_getpage(); | |
19234 | + set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE | | |
19235 | + _PAGE_USER)); | |
19236 | } | |
19237 | - } | |
19238 | - pmd = pmd_offset(pud, vaddr); | |
19239 | - if (pmd_none(*pmd)) { | |
19240 | - pte = (pte_t *) spp_getpage(); | |
19241 | - make_page_readonly(pte, XENFEAT_writable_page_tables); | |
19242 | - set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER)); | |
19243 | - if (pte != pte_offset_kernel(pmd, 0)) { | |
19244 | - printk(KERN_ERR "PAGETABLE BUG #02!\n"); | |
19245 | - return; | |
19246 | + pud = pud_offset(pgd, (unsigned long)__va(phys)); | |
19247 | + if (pud_none(*pud)) { | |
19248 | + pmd = (pmd_t *) spp_getpage(); | |
19249 | + set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | | |
19250 | + _PAGE_USER)); | |
19251 | } | |
19252 | + pmd = pmd_offset(pud, phys); | |
19253 | + BUG_ON(!pmd_none(*pmd)); | |
19254 | + set_pmd(pmd, __pmd(phys | pgprot_val(prot))); | |
19255 | } | |
19256 | - new_pte = pfn_pte_ma(phys >> PAGE_SHIFT, prot); | |
19257 | +} | |
19258 | ||
19259 | - pte = pte_offset_kernel(pmd, vaddr); | |
19260 | - if (!pte_none(*pte) && __pte_val(new_pte) && | |
19261 | -#ifdef CONFIG_ACPI | |
19262 | - /* __acpi_map_table() fails to properly call clear_fixmap() */ | |
19263 | - (vaddr < __fix_to_virt(FIX_ACPI_END) || | |
19264 | - vaddr > __fix_to_virt(FIX_ACPI_BEGIN)) && | |
19265 | -#endif | |
19266 | - __pte_val(*pte) != (__pte_val(new_pte) & __supported_pte_mask)) | |
19267 | - pte_ERROR(*pte); | |
19268 | - set_pte(pte, new_pte); | |
19269 | +void __init init_extra_mapping_wb(unsigned long phys, unsigned long size) | |
19270 | +{ | |
19271 | + __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE); | |
19272 | +} | |
19273 | ||
19274 | - /* | |
19275 | - * It's enough to flush this one mapping. | |
19276 | - * (PGE mappings get flushed as well) | |
19277 | - */ | |
19278 | - __flush_tlb_one(vaddr); | |
19279 | +void __init init_extra_mapping_uc(unsigned long phys, unsigned long size) | |
19280 | +{ | |
19281 | + __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE_NOCACHE); | |
19282 | } | |
19283 | ||
19284 | -#ifndef CONFIG_XEN | |
19285 | /* | |
19286 | * The head.S code sets up the kernel high mapping: | |
19287 | * | |
19288 | @@ -352,63 +319,52 @@ void __init cleanup_highmap(void) | |
19289 | } | |
19290 | #endif | |
19291 | ||
19292 | -/* NOTE: this is meant to be run only at boot */ | |
19293 | -void __init __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot) | |
19294 | -{ | |
19295 | - unsigned long address = __fix_to_virt(idx); | |
19296 | - | |
19297 | - if (idx >= __end_of_fixed_addresses) { | |
19298 | - printk(KERN_ERR "Invalid __set_fixmap\n"); | |
19299 | - return; | |
19300 | - } | |
19301 | - switch (idx) { | |
19302 | - case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: | |
19303 | - set_pte_phys(address, phys, prot, 0); | |
19304 | - set_pte_phys(address, phys, prot, 1); | |
19305 | - break; | |
19306 | - case FIX_EARLYCON_MEM_BASE: | |
19307 | - xen_l1_entry_update(level1_fixmap_pgt + pte_index(address), | |
19308 | - pfn_pte_ma(phys >> PAGE_SHIFT, prot)); | |
19309 | - break; | |
19310 | - default: | |
19311 | - set_pte_phys_ma(address, phys, prot); | |
19312 | - break; | |
19313 | - } | |
19314 | -} | |
19315 | - | |
19316 | -static __meminit void *alloc_static_page(unsigned long *phys) | |
19317 | +static __ref void *alloc_low_page(unsigned long *phys) | |
19318 | { | |
19319 | - unsigned long va = (start_pfn << PAGE_SHIFT) + __START_KERNEL_map; | |
19320 | + unsigned long pfn; | |
19321 | + void *adr; | |
19322 | ||
19323 | if (after_bootmem) { | |
19324 | - void *adr = (void *)get_zeroed_page(GFP_ATOMIC); | |
19325 | + adr = (void *)get_zeroed_page(GFP_ATOMIC); | |
19326 | *phys = __pa(adr); | |
19327 | ||
19328 | return adr; | |
19329 | } | |
19330 | ||
19331 | - *phys = start_pfn << PAGE_SHIFT; | |
19332 | - start_pfn++; | |
19333 | - memset((void *)va, 0, PAGE_SIZE); | |
19334 | - return (void *)va; | |
19335 | + BUG_ON(!table_cur); | |
19336 | + pfn = table_cur++; | |
19337 | + if (pfn >= table_top) | |
19338 | + panic("alloc_low_page: ran out of memory"); | |
19339 | + | |
19340 | + adr = early_ioremap(pfn_to_mfn(pfn) * PAGE_SIZE, PAGE_SIZE); | |
19341 | + memset(adr, 0, PAGE_SIZE); | |
19342 | + *phys = pfn * PAGE_SIZE; | |
19343 | + return adr; | |
19344 | } | |
19345 | ||
19346 | -#define PTE_SIZE PAGE_SIZE | |
19347 | +static __ref void unmap_low_page(void *adr) | |
19348 | +{ | |
19349 | + if (after_bootmem) | |
19350 | + return; | |
19351 | + | |
19352 | + early_iounmap(adr, PAGE_SIZE); | |
19353 | +} | |
19354 | ||
19355 | static inline int __meminit make_readonly(unsigned long paddr) | |
19356 | { | |
19357 | extern char __vsyscall_0; | |
19358 | int readonly = 0; | |
19359 | ||
19360 | - /* Make new page tables read-only. */ | |
19361 | + /* Make new page tables read-only on the first pass. */ | |
19362 | if (!xen_feature(XENFEAT_writable_page_tables) | |
19363 | + && !max_pfn_mapped | |
19364 | && (paddr >= (table_start << PAGE_SHIFT)) | |
19365 | - && (paddr < (table_end << PAGE_SHIFT))) | |
19366 | + && (paddr < (table_top << PAGE_SHIFT))) | |
19367 | readonly = 1; | |
19368 | /* Make old page tables read-only. */ | |
19369 | if (!xen_feature(XENFEAT_writable_page_tables) | |
19370 | && (paddr >= (xen_start_info->pt_base - __START_KERNEL_map)) | |
19371 | - && (paddr < (start_pfn << PAGE_SHIFT))) | |
19372 | + && (paddr < (table_cur << PAGE_SHIFT))) | |
19373 | readonly = 1; | |
19374 | ||
19375 | /* | |
19376 | @@ -425,118 +381,131 @@ static inline int __meminit make_readonl | |
19377 | return readonly; | |
19378 | } | |
19379 | ||
19380 | -#ifndef CONFIG_XEN | |
19381 | -/* Must run before zap_low_mappings */ | |
19382 | -__meminit void *early_ioremap(unsigned long addr, unsigned long size) | |
19383 | +static unsigned long __meminit | |
19384 | +phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) | |
19385 | { | |
19386 | - pmd_t *pmd, *last_pmd; | |
19387 | - unsigned long vaddr; | |
19388 | - int i, pmds; | |
19389 | - | |
19390 | - pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; | |
19391 | - vaddr = __START_KERNEL_map; | |
19392 | - pmd = level2_kernel_pgt; | |
19393 | - last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1; | |
19394 | - | |
19395 | - for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) { | |
19396 | - for (i = 0; i < pmds; i++) { | |
19397 | - if (pmd_present(pmd[i])) | |
19398 | - goto continue_outer_loop; | |
19399 | - } | |
19400 | - vaddr += addr & ~PMD_MASK; | |
19401 | - addr &= PMD_MASK; | |
19402 | + unsigned pages = 0; | |
19403 | + unsigned long last_map_addr = end; | |
19404 | + int i; | |
19405 | + | |
19406 | + pte_t *pte = pte_page + pte_index(addr); | |
19407 | + | |
19408 | + for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) { | |
19409 | + unsigned long pteval = addr | __PAGE_KERNEL; | |
19410 | ||
19411 | - for (i = 0; i < pmds; i++, addr += PMD_SIZE) | |
19412 | - set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); | |
19413 | - __flush_tlb_all(); | |
19414 | - | |
19415 | - return (void *)vaddr; | |
19416 | -continue_outer_loop: | |
19417 | - ; | |
19418 | + if (addr >= end || | |
19419 | + (!after_bootmem && | |
19420 | + (addr >> PAGE_SHIFT) >= xen_start_info->nr_pages)) | |
19421 | + break; | |
19422 | + | |
19423 | + if (__pte_val(*pte)) | |
19424 | + continue; | |
19425 | + | |
19426 | + if (make_readonly(addr)) | |
19427 | + pteval &= ~_PAGE_RW; | |
19428 | + if (0) | |
19429 | + printk(" pte=%p addr=%lx pte=%016lx\n", | |
19430 | + pte, addr, pteval); | |
19431 | + if (!after_bootmem) | |
19432 | + *pte = __pte(pteval & __supported_pte_mask); | |
19433 | + else | |
19434 | + set_pte(pte, __pte(pteval & __supported_pte_mask)); | |
19435 | + last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE; | |
19436 | + pages++; | |
19437 | } | |
19438 | - printk("early_ioremap(0x%lx, %lu) failed\n", addr, size); | |
19439 | - return NULL; | |
19440 | + update_page_count(PG_LEVEL_4K, pages); | |
19441 | + | |
19442 | + return last_map_addr; | |
19443 | } | |
19444 | ||
19445 | -/* | |
19446 | - * To avoid virtual aliases later: | |
19447 | - */ | |
19448 | -__meminit void early_iounmap(void *addr, unsigned long size) | |
19449 | +static unsigned long __meminit | |
19450 | +phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end) | |
19451 | { | |
19452 | - unsigned long vaddr; | |
19453 | - pmd_t *pmd; | |
19454 | - int i, pmds; | |
19455 | - | |
19456 | - vaddr = (unsigned long)addr; | |
19457 | - pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; | |
19458 | - pmd = level2_kernel_pgt + pmd_index(vaddr); | |
19459 | - | |
19460 | - for (i = 0; i < pmds; i++) | |
19461 | - pmd_clear(pmd + i); | |
19462 | + pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); | |
19463 | ||
19464 | - __flush_tlb_all(); | |
19465 | + BUG_ON(!max_pfn_mapped); | |
19466 | + return phys_pte_init(pte, address, end); | |
19467 | } | |
19468 | -#endif | |
19469 | ||
19470 | static unsigned long __meminit | |
19471 | -phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) | |
19472 | +phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |
19473 | + unsigned long page_size_mask) | |
19474 | { | |
19475 | + unsigned long pages = 0; | |
19476 | + unsigned long last_map_addr = end; | |
19477 | + unsigned long start = address; | |
19478 | + | |
19479 | int i = pmd_index(address); | |
19480 | ||
19481 | - for (; i < PTRS_PER_PMD; i++) { | |
19482 | + for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) { | |
19483 | unsigned long pte_phys; | |
19484 | - pmd_t *pmd = pmd_page + i; | |
19485 | - pte_t *pte, *pte_save; | |
19486 | - int k; | |
19487 | + pmd_t *pmd = pmd_page + pmd_index(address); | |
19488 | + pte_t *pte; | |
19489 | ||
19490 | if (address >= end) | |
19491 | break; | |
19492 | ||
19493 | if (__pmd_val(*pmd)) { | |
19494 | - address += PMD_SIZE; | |
19495 | + if (!pmd_large(*pmd)) { | |
19496 | + spin_lock(&init_mm.page_table_lock); | |
19497 | + last_map_addr = phys_pte_update(pmd, address, | |
19498 | + end); | |
19499 | + spin_unlock(&init_mm.page_table_lock); | |
19500 | + } | |
19501 | + /* Count entries we're using from level2_ident_pgt */ | |
19502 | + if (start == 0) | |
19503 | + pages++; | |
19504 | continue; | |
19505 | } | |
19506 | ||
19507 | - pte = alloc_static_page(&pte_phys); | |
19508 | - pte_save = pte; | |
19509 | - for (k = 0; k < PTRS_PER_PTE; pte++, k++, address += PTE_SIZE) { | |
19510 | - unsigned long pteval = address | _PAGE_NX | _KERNPG_TABLE; | |
19511 | - | |
19512 | - if (address >= (after_bootmem | |
19513 | - ? end | |
19514 | - : xen_start_info->nr_pages << PAGE_SHIFT)) | |
19515 | - pteval = 0; | |
19516 | - else if (make_readonly(address)) | |
19517 | - pteval &= ~_PAGE_RW; | |
19518 | - set_pte(pte, __pte(pteval & __supported_pte_mask)); | |
19519 | + if (page_size_mask & (1<<PG_LEVEL_2M)) { | |
19520 | + pages++; | |
19521 | + spin_lock(&init_mm.page_table_lock); | |
19522 | + set_pte((pte_t *)pmd, | |
19523 | + pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); | |
19524 | + spin_unlock(&init_mm.page_table_lock); | |
19525 | + last_map_addr = (address & PMD_MASK) + PMD_SIZE; | |
19526 | + continue; | |
19527 | } | |
19528 | + | |
19529 | + pte = alloc_low_page(&pte_phys); | |
19530 | + last_map_addr = phys_pte_init(pte, address, end); | |
19531 | + unmap_low_page(pte); | |
19532 | + | |
19533 | if (!after_bootmem) { | |
19534 | - early_make_page_readonly(pte_save, XENFEAT_writable_page_tables); | |
19535 | - *pmd = __pmd(pte_phys | _KERNPG_TABLE); | |
19536 | + if (max_pfn_mapped) | |
19537 | + make_page_readonly(__va(pte_phys), | |
19538 | + XENFEAT_writable_page_tables); | |
19539 | + *pmd = __pmd(pte_phys | _PAGE_TABLE); | |
19540 | } else { | |
19541 | - make_page_readonly(pte_save, XENFEAT_writable_page_tables); | |
19542 | - set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE)); | |
19543 | + make_page_readonly(pte, XENFEAT_writable_page_tables); | |
19544 | + spin_lock(&init_mm.page_table_lock); | |
19545 | + pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); | |
19546 | + spin_unlock(&init_mm.page_table_lock); | |
19547 | } | |
19548 | } | |
19549 | - return address; | |
19550 | + update_page_count(PG_LEVEL_2M, pages); | |
19551 | + return last_map_addr; | |
19552 | } | |
19553 | ||
19554 | static unsigned long __meminit | |
19555 | -phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) | |
19556 | +phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end, | |
19557 | + unsigned long page_size_mask) | |
19558 | { | |
19559 | pmd_t *pmd = pmd_offset(pud, 0); | |
19560 | unsigned long last_map_addr; | |
19561 | ||
19562 | - spin_lock(&init_mm.page_table_lock); | |
19563 | - last_map_addr = phys_pmd_init(pmd, address, end); | |
19564 | - spin_unlock(&init_mm.page_table_lock); | |
19565 | + BUG_ON(!max_pfn_mapped); | |
19566 | + last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask); | |
19567 | __flush_tlb_all(); | |
19568 | return last_map_addr; | |
19569 | } | |
19570 | ||
19571 | static unsigned long __meminit | |
19572 | -phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) | |
19573 | +phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |
19574 | + unsigned long page_size_mask) | |
19575 | { | |
19576 | + unsigned long pages = 0; | |
19577 | unsigned long last_map_addr = end; | |
19578 | int i = pud_index(addr); | |
19579 | ||
19580 | @@ -550,29 +519,55 @@ phys_pud_init(pud_t *pud_page, unsigned | |
19581 | ||
19582 | if (__pud_val(*pud)) { | |
19583 | if (!pud_large(*pud)) | |
19584 | - last_map_addr = phys_pmd_update(pud, addr, end); | |
19585 | + last_map_addr = phys_pmd_update(pud, addr, end, | |
19586 | + page_size_mask); | |
19587 | continue; | |
19588 | } | |
19589 | ||
19590 | - if (direct_gbpages) { | |
19591 | + if (page_size_mask & (1<<PG_LEVEL_1G)) { | |
19592 | + pages++; | |
19593 | + spin_lock(&init_mm.page_table_lock); | |
19594 | set_pte((pte_t *)pud, | |
19595 | pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); | |
19596 | + spin_unlock(&init_mm.page_table_lock); | |
19597 | last_map_addr = (addr & PUD_MASK) + PUD_SIZE; | |
19598 | continue; | |
19599 | } | |
19600 | ||
19601 | - pmd = alloc_static_page(&pmd_phys); | |
19602 | - | |
19603 | - spin_lock(&init_mm.page_table_lock); | |
19604 | - *pud = __pud(pmd_phys | _KERNPG_TABLE); | |
19605 | - last_map_addr = phys_pmd_init(pmd, addr, end); | |
19606 | - spin_unlock(&init_mm.page_table_lock); | |
19607 | + pmd = alloc_low_page(&pmd_phys); | |
19608 | + last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask); | |
19609 | + unmap_low_page(pmd); | |
19610 | ||
19611 | - early_make_page_readonly(pmd, XENFEAT_writable_page_tables); | |
19612 | + if (!after_bootmem) { | |
19613 | + if (max_pfn_mapped) | |
19614 | + make_page_readonly(__va(pmd_phys), | |
19615 | + XENFEAT_writable_page_tables); | |
19616 | + if (page_size_mask & (1 << PG_LEVEL_NUM)) | |
19617 | + xen_l3_entry_update(pud, __pud(pmd_phys | _PAGE_TABLE)); | |
19618 | + else | |
19619 | + *pud = __pud(pmd_phys | _PAGE_TABLE); | |
19620 | + } else { | |
19621 | + make_page_readonly(pmd, XENFEAT_writable_page_tables); | |
19622 | + spin_lock(&init_mm.page_table_lock); | |
19623 | + pud_populate(&init_mm, pud, __va(pmd_phys)); | |
19624 | + spin_unlock(&init_mm.page_table_lock); | |
19625 | + } | |
19626 | } | |
19627 | __flush_tlb_all(); | |
19628 | + update_page_count(PG_LEVEL_1G, pages); | |
19629 | ||
19630 | - return last_map_addr >> PAGE_SHIFT; | |
19631 | + return last_map_addr; | |
19632 | +} | |
19633 | + | |
19634 | +static unsigned long __meminit | |
19635 | +phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end, | |
19636 | + unsigned long page_size_mask) | |
19637 | +{ | |
19638 | + pud_t *pud; | |
19639 | + | |
19640 | + pud = (pud_t *)pgd_page_vaddr(*pgd); | |
19641 | + | |
19642 | + return phys_pud_init(pud, addr, end, page_size_mask | (1 << PG_LEVEL_NUM)); | |
19643 | } | |
19644 | ||
19645 | void __init xen_init_pt(void) | |
19646 | @@ -651,86 +646,36 @@ void __init xen_init_pt(void) | |
19647 | } | |
19648 | } | |
19649 | ||
19650 | -static void __init extend_init_mapping(unsigned long tables_space) | |
19651 | -{ | |
19652 | - unsigned long va = __START_KERNEL_map; | |
19653 | - unsigned long start = start_pfn; | |
19654 | - unsigned long phys, addr, *pte_page; | |
19655 | - pmd_t *pmd; | |
19656 | - pte_t *pte, new_pte; | |
19657 | - unsigned long *page = (unsigned long *)init_level4_pgt; | |
19658 | - | |
19659 | - addr = page[pgd_index(va)]; | |
19660 | - addr_to_page(addr, page); | |
19661 | - addr = page[pud_index(va)]; | |
19662 | - addr_to_page(addr, page); | |
19663 | - | |
19664 | - /* Kill mapping of low 1MB. */ | |
19665 | - while (va < (unsigned long)&_text) { | |
19666 | - if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0)) | |
19667 | - BUG(); | |
19668 | - va += PAGE_SIZE; | |
19669 | - } | |
19670 | - | |
19671 | - /* Ensure init mappings cover kernel text/data and initial tables. */ | |
19672 | - while (va < (__START_KERNEL_map | |
19673 | - + (start_pfn << PAGE_SHIFT) | |
19674 | - + tables_space)) { | |
19675 | - pmd = (pmd_t *)&page[pmd_index(va)]; | |
19676 | - if (pmd_none(*pmd)) { | |
19677 | - pte_page = alloc_static_page(&phys); | |
19678 | - early_make_page_readonly( | |
19679 | - pte_page, XENFEAT_writable_page_tables); | |
19680 | - set_pmd(pmd, __pmd(phys | _KERNPG_TABLE)); | |
19681 | - } else { | |
19682 | - addr = page[pmd_index(va)]; | |
19683 | - addr_to_page(addr, pte_page); | |
19684 | - } | |
19685 | - pte = (pte_t *)&pte_page[pte_index(va)]; | |
19686 | - if (pte_none(*pte)) { | |
19687 | - new_pte = pfn_pte( | |
19688 | - (va - __START_KERNEL_map) >> PAGE_SHIFT, | |
19689 | - __pgprot(_KERNPG_TABLE)); | |
19690 | - xen_l1_entry_update(pte, new_pte); | |
19691 | - } | |
19692 | - va += PAGE_SIZE; | |
19693 | - } | |
19694 | - | |
19695 | - /* Finally, blow away any spurious initial mappings. */ | |
19696 | - while (1) { | |
19697 | - pmd = (pmd_t *)&page[pmd_index(va)]; | |
19698 | - if (pmd_none(*pmd)) | |
19699 | - break; | |
19700 | - if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0)) | |
19701 | - BUG(); | |
19702 | - va += PAGE_SIZE; | |
19703 | - } | |
19704 | - | |
19705 | - if (start_pfn > start) | |
19706 | - reserve_early(start << PAGE_SHIFT, | |
19707 | - start_pfn << PAGE_SHIFT, "INITMAP"); | |
19708 | -} | |
19709 | - | |
19710 | static void __init find_early_table_space(unsigned long end) | |
19711 | { | |
19712 | unsigned long puds, pmds, ptes, tables; | |
19713 | ||
19714 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; | |
19715 | + tables = round_up(puds * sizeof(pud_t), PAGE_SIZE); | |
19716 | pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; | |
19717 | - ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT; | |
19718 | + tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE); | |
19719 | ||
19720 | - tables = round_up(puds * 8, PAGE_SIZE) + | |
19721 | - round_up(pmds * 8, PAGE_SIZE) + | |
19722 | - round_up(ptes * 8, PAGE_SIZE); | |
19723 | + ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; | |
19724 | + tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE); | |
19725 | ||
19726 | - extend_init_mapping(tables); | |
19727 | + if (!table_top) { | |
19728 | + table_start = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) + | |
19729 | + xen_start_info->nr_pt_frames; | |
19730 | + table_cur = table_start; | |
19731 | + } else { | |
19732 | + /* | |
19733 | + * [table_start, table_top) gets passed to reserve_early(), | |
19734 | + * so we must not use table_cur here, despite continuing | |
19735 | + * to allocate from there. table_cur possibly being below | |
19736 | + * table_start is otoh not a problem. | |
19737 | + */ | |
19738 | + table_start = table_top; | |
19739 | + } | |
19740 | ||
19741 | - table_start = start_pfn; | |
19742 | - table_end = table_start + (tables>>PAGE_SHIFT); | |
19743 | + table_top = table_cur + (tables >> PAGE_SHIFT); | |
19744 | ||
19745 | - early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n", | |
19746 | - end, table_start << PAGE_SHIFT, | |
19747 | - (table_start << PAGE_SHIFT) + tables); | |
19748 | + printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", | |
19749 | + end, table_cur << PAGE_SHIFT, table_top << PAGE_SHIFT); | |
19750 | } | |
19751 | ||
19752 | static void __init xen_finish_init_mapping(void) | |
19753 | @@ -752,18 +697,18 @@ static void __init xen_finish_init_mappi | |
19754 | xen_start_info->mod_start = (unsigned long) | |
19755 | __va(__pa(xen_start_info->mod_start)); | |
19756 | ||
19757 | - /* Destroy the Xen-created mappings beyond the kernel image as | |
19758 | - * well as the temporary mappings created above. Prevents | |
19759 | - * overlap with modules area (if init mapping is very big). | |
19760 | - */ | |
19761 | + /* Destroy the Xen-created mappings beyond the kernel image. */ | |
19762 | start = PAGE_ALIGN((unsigned long)_end); | |
19763 | - end = __START_KERNEL_map + (table_end << PAGE_SHIFT); | |
19764 | + end = __START_KERNEL_map + (table_start << PAGE_SHIFT); | |
19765 | for (; start < end; start += PAGE_SIZE) | |
19766 | if (HYPERVISOR_update_va_mapping(start, __pte_ma(0), 0)) | |
19767 | BUG(); | |
19768 | ||
19769 | - /* Allocate pte's for initial fixmaps from 'start_pfn' allocator. */ | |
19770 | - table_end = ~0UL; | |
19771 | + /* Allocate pte's for initial fixmaps from 'table_cur' allocator. */ | |
19772 | + start = table_top; | |
19773 | + WARN(table_cur != start, "start=%lx cur=%lx top=%lx\n", | |
19774 | + table_start, table_cur, start); | |
19775 | + table_top = ~0UL; | |
19776 | ||
19777 | /* Switch to the real shared_info page, and clear the dummy page. */ | |
19778 | set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); | |
19779 | @@ -780,8 +725,7 @@ static void __init xen_finish_init_mappi | |
19780 | << PAGE_SHIFT, | |
19781 | PAGE_KERNEL_RO); | |
19782 | ||
19783 | - /* Disable the 'start_pfn' allocator. */ | |
19784 | - table_end = start_pfn; | |
19785 | + table_top = max(table_cur, start); | |
19786 | } | |
19787 | ||
19788 | static void __init init_gbpages(void) | |
19789 | @@ -794,126 +738,91 @@ static void __init init_gbpages(void) | |
19790 | #endif | |
19791 | } | |
19792 | ||
19793 | -#ifdef CONFIG_MEMTEST_BOOTPARAM | |
19794 | - | |
19795 | -static void __init memtest(unsigned long start_phys, unsigned long size, | |
19796 | - unsigned pattern) | |
19797 | +static unsigned long __init kernel_physical_mapping_init(unsigned long start, | |
19798 | + unsigned long end, | |
19799 | + unsigned long page_size_mask) | |
19800 | { | |
19801 | - unsigned long i; | |
19802 | - unsigned long *start; | |
19803 | - unsigned long start_bad; | |
19804 | - unsigned long last_bad; | |
19805 | - unsigned long val; | |
19806 | - unsigned long start_phys_aligned; | |
19807 | - unsigned long count; | |
19808 | - unsigned long incr; | |
19809 | - | |
19810 | - switch (pattern) { | |
19811 | - case 0: | |
19812 | - val = 0UL; | |
19813 | - break; | |
19814 | - case 1: | |
19815 | - val = -1UL; | |
19816 | - break; | |
19817 | - case 2: | |
19818 | - val = 0x5555555555555555UL; | |
19819 | - break; | |
19820 | - case 3: | |
19821 | - val = 0xaaaaaaaaaaaaaaaaUL; | |
19822 | - break; | |
19823 | - default: | |
19824 | - return; | |
19825 | - } | |
19826 | - | |
19827 | - incr = sizeof(unsigned long); | |
19828 | - start_phys_aligned = ALIGN(start_phys, incr); | |
19829 | - count = (size - (start_phys_aligned - start_phys))/incr; | |
19830 | - start = __va(start_phys_aligned); | |
19831 | - start_bad = 0; | |
19832 | - last_bad = 0; | |
19833 | - | |
19834 | - for (i = 0; i < count; i++) | |
19835 | - start[i] = val; | |
19836 | - for (i = 0; i < count; i++, start++, start_phys_aligned += incr) { | |
19837 | - if (*start != val) { | |
19838 | - if (start_phys_aligned == last_bad + incr) { | |
19839 | - last_bad += incr; | |
19840 | - } else { | |
19841 | - if (start_bad) { | |
19842 | - printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved", | |
19843 | - val, start_bad, last_bad + incr); | |
19844 | - reserve_early(start_bad, last_bad - start_bad, "BAD RAM"); | |
19845 | - } | |
19846 | - start_bad = last_bad = start_phys_aligned; | |
19847 | - } | |
19848 | - } | |
19849 | - } | |
19850 | - if (start_bad) { | |
19851 | - printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved", | |
19852 | - val, start_bad, last_bad + incr); | |
19853 | - reserve_early(start_bad, last_bad - start_bad, "BAD RAM"); | |
19854 | - } | |
19855 | ||
19856 | -} | |
19857 | + unsigned long next, last_map_addr = end; | |
19858 | ||
19859 | -static int memtest_pattern __initdata = CONFIG_MEMTEST_BOOTPARAM_VALUE; | |
19860 | + start = (unsigned long)__va(start); | |
19861 | + end = (unsigned long)__va(end); | |
19862 | ||
19863 | -static int __init parse_memtest(char *arg) | |
19864 | -{ | |
19865 | - if (arg) | |
19866 | - memtest_pattern = simple_strtoul(arg, NULL, 0); | |
19867 | - return 0; | |
19868 | -} | |
19869 | + for (; start < end; start = next) { | |
19870 | + pgd_t *pgd = pgd_offset_k(start); | |
19871 | + unsigned long pud_phys; | |
19872 | + pud_t *pud; | |
19873 | ||
19874 | -early_param("memtest", parse_memtest); | |
19875 | + next = (start + PGDIR_SIZE) & PGDIR_MASK; | |
19876 | + if (next > end) | |
19877 | + next = end; | |
19878 | ||
19879 | -static void __init early_memtest(unsigned long start, unsigned long end) | |
19880 | -{ | |
19881 | - u64 t_start, t_size; | |
19882 | - unsigned pattern; | |
19883 | + if (__pgd_val(*pgd)) { | |
19884 | + last_map_addr = phys_pud_update(pgd, __pa(start), | |
19885 | + __pa(end), page_size_mask); | |
19886 | + continue; | |
19887 | + } | |
19888 | ||
19889 | - if (!memtest_pattern) | |
19890 | - return; | |
19891 | + pud = alloc_low_page(&pud_phys); | |
19892 | + last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), | |
19893 | + page_size_mask); | |
19894 | + unmap_low_page(pud); | |
19895 | + | |
19896 | + if(!after_bootmem) { | |
19897 | + if (max_pfn_mapped) | |
19898 | + make_page_readonly(__va(pud_phys), | |
19899 | + XENFEAT_writable_page_tables); | |
19900 | + xen_l4_entry_update(pgd, __pgd(pud_phys | _PAGE_TABLE)); | |
19901 | + } else { | |
19902 | + make_page_readonly(pud, XENFEAT_writable_page_tables); | |
19903 | + spin_lock(&init_mm.page_table_lock); | |
19904 | + pgd_populate(&init_mm, pgd, __va(pud_phys)); | |
19905 | + spin_unlock(&init_mm.page_table_lock); | |
19906 | + } | |
19907 | + } | |
19908 | ||
19909 | - printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern); | |
19910 | - for (pattern = 0; pattern < memtest_pattern; pattern++) { | |
19911 | - t_start = start; | |
19912 | - t_size = 0; | |
19913 | - while (t_start < end) { | |
19914 | - t_start = find_e820_area_size(t_start, &t_size, 1); | |
19915 | + return last_map_addr; | |
19916 | +} | |
19917 | ||
19918 | - /* done ? */ | |
19919 | - if (t_start >= end) | |
19920 | - break; | |
19921 | - if (t_start + t_size > end) | |
19922 | - t_size = end - t_start; | |
19923 | +struct map_range { | |
19924 | + unsigned long start; | |
19925 | + unsigned long end; | |
19926 | + unsigned page_size_mask; | |
19927 | +}; | |
19928 | ||
19929 | - printk(KERN_CONT "\n %016llx - %016llx pattern %d", | |
19930 | - (unsigned long long)t_start, | |
19931 | - (unsigned long long)t_start + t_size, pattern); | |
19932 | +#define NR_RANGE_MR 5 | |
19933 | ||
19934 | - memtest(t_start, t_size, pattern); | |
19935 | +static int save_mr(struct map_range *mr, int nr_range, | |
19936 | + unsigned long start_pfn, unsigned long end_pfn, | |
19937 | + unsigned long page_size_mask) | |
19938 | +{ | |
19939 | ||
19940 | - t_start += t_size; | |
19941 | - } | |
19942 | + if (start_pfn < end_pfn) { | |
19943 | + if (nr_range >= NR_RANGE_MR) | |
19944 | + panic("run out of range for init_memory_mapping\n"); | |
19945 | + mr[nr_range].start = start_pfn<<PAGE_SHIFT; | |
19946 | + mr[nr_range].end = end_pfn<<PAGE_SHIFT; | |
19947 | + mr[nr_range].page_size_mask = page_size_mask; | |
19948 | + nr_range++; | |
19949 | } | |
19950 | - printk(KERN_CONT "\n"); | |
19951 | -} | |
19952 | -#else | |
19953 | -static void __init early_memtest(unsigned long start, unsigned long end) | |
19954 | -{ | |
19955 | + | |
19956 | + return nr_range; | |
19957 | } | |
19958 | -#endif | |
19959 | ||
19960 | /* | |
19961 | * Setup the direct mapping of the physical memory at PAGE_OFFSET. | |
19962 | * This runs before bootmem is initialized and gets pages directly from | |
19963 | * the physical memory. To access them they are temporarily mapped. | |
19964 | */ | |
19965 | -unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long end) | |
19966 | +unsigned long __init_refok init_memory_mapping(unsigned long start, | |
19967 | + unsigned long end) | |
19968 | { | |
19969 | - unsigned long next, last_map_addr = end; | |
19970 | - unsigned long start_phys = start, end_phys = end; | |
19971 | + unsigned long last_map_addr = 0; | |
19972 | + unsigned long page_size_mask = 0; | |
19973 | + unsigned long start_pfn, end_pfn; | |
19974 | + | |
19975 | + struct map_range mr[NR_RANGE_MR]; | |
19976 | + int nr_range, i; | |
19977 | ||
19978 | printk(KERN_INFO "init_memory_mapping\n"); | |
19979 | ||
19980 | @@ -924,51 +833,150 @@ unsigned long __init_refok init_memory_m | |
19981 | * memory mapped. Unfortunately this is done currently before the | |
19982 | * nodes are discovered. | |
19983 | */ | |
19984 | - if (!after_bootmem) { | |
19985 | + if (!after_bootmem) | |
19986 | init_gbpages(); | |
19987 | - find_early_table_space(end); | |
19988 | + | |
19989 | + if (direct_gbpages) | |
19990 | + page_size_mask |= 1 << PG_LEVEL_1G; | |
19991 | + if (cpu_has_pse) | |
19992 | + page_size_mask |= 1 << PG_LEVEL_2M; | |
19993 | + | |
19994 | + memset(mr, 0, sizeof(mr)); | |
19995 | + nr_range = 0; | |
19996 | + | |
19997 | + /* head if not big page alignment ?*/ | |
19998 | + start_pfn = start >> PAGE_SHIFT; | |
19999 | + end_pfn = ((start + (PMD_SIZE - 1)) >> PMD_SHIFT) | |
20000 | + << (PMD_SHIFT - PAGE_SHIFT); | |
20001 | + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | |
20002 | + | |
20003 | + /* big page (2M) range*/ | |
20004 | + start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) | |
20005 | + << (PMD_SHIFT - PAGE_SHIFT); | |
20006 | + end_pfn = ((start + (PUD_SIZE - 1))>>PUD_SHIFT) | |
20007 | + << (PUD_SHIFT - PAGE_SHIFT); | |
20008 | + if (end_pfn > ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT))) | |
20009 | + end_pfn = ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT)); | |
20010 | + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | |
20011 | + page_size_mask & (1<<PG_LEVEL_2M)); | |
20012 | + | |
20013 | + /* big page (1G) range */ | |
20014 | + start_pfn = end_pfn; | |
20015 | + end_pfn = (end>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); | |
20016 | + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | |
20017 | + page_size_mask & | |
20018 | + ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); | |
20019 | + | |
20020 | + /* tail is not big page (1G) alignment */ | |
20021 | + start_pfn = end_pfn; | |
20022 | + end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); | |
20023 | + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | |
20024 | + page_size_mask & (1<<PG_LEVEL_2M)); | |
20025 | + | |
20026 | + /* tail is not big page (2M) alignment */ | |
20027 | + start_pfn = end_pfn; | |
20028 | + end_pfn = end>>PAGE_SHIFT; | |
20029 | + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | |
20030 | + | |
20031 | + /* try to merge same page size and continuous */ | |
20032 | + for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { | |
20033 | + unsigned long old_start; | |
20034 | + if (mr[i].end != mr[i+1].start || | |
20035 | + mr[i].page_size_mask != mr[i+1].page_size_mask) | |
20036 | + continue; | |
20037 | + /* move it */ | |
20038 | + old_start = mr[i].start; | |
20039 | + memmove(&mr[i], &mr[i+1], | |
20040 | + (nr_range - 1 - i) * sizeof (struct map_range)); | |
20041 | + mr[i--].start = old_start; | |
20042 | + nr_range--; | |
20043 | } | |
20044 | ||
20045 | - start = (unsigned long)__va(start); | |
20046 | - end = (unsigned long)__va(end); | |
20047 | + for (i = 0; i < nr_range; i++) | |
20048 | + printk(KERN_DEBUG " %010lx - %010lx page %s\n", | |
20049 | + mr[i].start, mr[i].end, | |
20050 | + (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( | |
20051 | + (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); | |
20052 | ||
20053 | - for (; start < end; start = next) { | |
20054 | - pgd_t *pgd = pgd_offset_k(start); | |
20055 | - unsigned long pud_phys; | |
20056 | - pud_t *pud; | |
20057 | + if (!after_bootmem) | |
20058 | + find_early_table_space(end); | |
20059 | ||
20060 | - if (after_bootmem) | |
20061 | - pud = pud_offset(pgd, start & PGDIR_MASK); | |
20062 | - else | |
20063 | - pud = alloc_static_page(&pud_phys); | |
20064 | - next = start + PGDIR_SIZE; | |
20065 | - if (next > end) | |
20066 | - next = end; | |
20067 | - last_map_addr = phys_pud_init(pud, __pa(start), __pa(next)); | |
20068 | - if (!after_bootmem) { | |
20069 | - early_make_page_readonly(pud, XENFEAT_writable_page_tables); | |
20070 | - set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); | |
20071 | + if (!start) { | |
20072 | + unsigned long addr, va = __START_KERNEL_map; | |
20073 | + unsigned long *page = (unsigned long *)init_level4_pgt; | |
20074 | + | |
20075 | + /* Kill mapping of memory below _text. */ | |
20076 | + while (va < (unsigned long)&_text) { | |
20077 | + if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0)) | |
20078 | + BUG(); | |
20079 | + va += PAGE_SIZE; | |
20080 | + } | |
20081 | + | |
20082 | + /* Blow away any spurious initial mappings. */ | |
20083 | + va = __START_KERNEL_map + (table_start << PAGE_SHIFT); | |
20084 | + addr = page[pgd_index(va)]; | |
20085 | + addr_to_page(addr, page); | |
20086 | + addr = page[pud_index(va)]; | |
20087 | + addr_to_page(addr, page); | |
20088 | + while (pmd_index(va) | pte_index(va)) { | |
20089 | + if (pmd_none(*(pmd_t *)&page[pmd_index(va)])) | |
20090 | + break; | |
20091 | + if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0)) | |
20092 | + BUG(); | |
20093 | + va += PAGE_SIZE; | |
20094 | } | |
20095 | } | |
20096 | ||
20097 | - if (!after_bootmem) { | |
20098 | - BUG_ON(start_pfn != table_end); | |
20099 | + for (i = 0; i < nr_range; i++) | |
20100 | + last_map_addr = kernel_physical_mapping_init( | |
20101 | + mr[i].start, mr[i].end, | |
20102 | + mr[i].page_size_mask); | |
20103 | + | |
20104 | + BUG_ON(table_cur > table_top); | |
20105 | + if (!start) | |
20106 | xen_finish_init_mapping(); | |
20107 | - } | |
20108 | + else if (table_cur < table_top) | |
20109 | + /* Disable the 'table_cur' allocator. */ | |
20110 | + table_top = table_cur; | |
20111 | ||
20112 | __flush_tlb_all(); | |
20113 | ||
20114 | - if (!after_bootmem) | |
20115 | + if (!after_bootmem && table_top > table_start) | |
20116 | reserve_early(table_start << PAGE_SHIFT, | |
20117 | - table_end << PAGE_SHIFT, "PGTABLE"); | |
20118 | + table_top << PAGE_SHIFT, "PGTABLE"); | |
20119 | + | |
20120 | + printk(KERN_INFO "last_map_addr: %lx end: %lx\n", | |
20121 | + last_map_addr, end); | |
20122 | ||
20123 | if (!after_bootmem) | |
20124 | - early_memtest(start_phys, end_phys); | |
20125 | + early_memtest(start, end); | |
20126 | ||
20127 | - return last_map_addr; | |
20128 | + return last_map_addr >> PAGE_SHIFT; | |
20129 | } | |
20130 | ||
20131 | #ifndef CONFIG_NUMA | |
20132 | +void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |
20133 | +{ | |
20134 | + unsigned long bootmap_size, bootmap; | |
20135 | + | |
20136 | + e820_register_active_regions(0, start_pfn, end_pfn); | |
20137 | +#ifdef CONFIG_XEN | |
20138 | + if (end_pfn > xen_start_info->nr_pages) | |
20139 | + end_pfn = xen_start_info->nr_pages; | |
20140 | +#endif | |
20141 | + bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; | |
20142 | + bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size, | |
20143 | + PAGE_SIZE); | |
20144 | + if (bootmap == -1L) | |
20145 | + panic("Cannot find bootmem map of size %ld\n", bootmap_size); | |
20146 | + /* don't touch min_low_pfn */ | |
20147 | + bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, | |
20148 | + 0, end_pfn); | |
20149 | + free_bootmem_with_active_regions(0, end_pfn); | |
20150 | + early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); | |
20151 | + reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); | |
20152 | +} | |
20153 | + | |
20154 | void __init paging_init(void) | |
20155 | { | |
20156 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | |
20157 | @@ -976,9 +984,9 @@ void __init paging_init(void) | |
20158 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | |
20159 | max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; | |
20160 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; | |
20161 | - max_zone_pfns[ZONE_NORMAL] = end_pfn; | |
20162 | + max_zone_pfns[ZONE_NORMAL] = max_pfn; | |
20163 | ||
20164 | - memory_present(0, 0, end_pfn); | |
20165 | + memory_present(0, 0, max_pfn); | |
20166 | sparse_init(); | |
20167 | free_area_init_nodes(max_zone_pfns); | |
20168 | ||
20169 | @@ -1069,8 +1077,8 @@ void __init mem_init(void) | |
20170 | init_page_count(pfn_to_page(pfn)); | |
20171 | totalram_pages++; | |
20172 | } | |
20173 | - reservedpages = end_pfn - totalram_pages - | |
20174 | - absent_pages_in_range(0, end_pfn); | |
20175 | + reservedpages = max_pfn - totalram_pages - | |
20176 | + absent_pages_in_range(0, max_pfn); | |
20177 | after_bootmem = 1; | |
20178 | ||
20179 | codesize = (unsigned long) &_etext - (unsigned long) &_text; | |
20180 | @@ -1089,7 +1097,7 @@ void __init mem_init(void) | |
20181 | printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " | |
20182 | "%ldk reserved, %ldk data, %ldk init)\n", | |
20183 | (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), | |
20184 | - end_pfn << (PAGE_SHIFT-10), | |
20185 | + max_pfn << (PAGE_SHIFT-10), | |
20186 | codesize >> 10, | |
20187 | reservedpages << (PAGE_SHIFT-10), | |
20188 | datasize >> 10, | |
20189 | @@ -1152,6 +1160,13 @@ EXPORT_SYMBOL_GPL(rodata_test_data); | |
20190 | void mark_rodata_ro(void) | |
20191 | { | |
20192 | unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); | |
20193 | + unsigned long rodata_start = | |
20194 | + ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; | |
20195 | + | |
20196 | +#ifdef CONFIG_DYNAMIC_FTRACE | |
20197 | + /* Dynamic tracing modifies the kernel text section */ | |
20198 | + start = rodata_start; | |
20199 | +#endif | |
20200 | ||
20201 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", | |
20202 | (end - start) >> 10); | |
20203 | @@ -1161,8 +1176,7 @@ void mark_rodata_ro(void) | |
20204 | * The rodata section (but not the kernel text!) should also be | |
20205 | * not-executable. | |
20206 | */ | |
20207 | - start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; | |
20208 | - set_memory_nx(start, (end - start) >> PAGE_SHIFT); | |
20209 | + set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT); | |
20210 | ||
20211 | rodata_test(); | |
20212 | ||
20213 | @@ -1184,24 +1198,26 @@ void free_initrd_mem(unsigned long start | |
20214 | } | |
20215 | #endif | |
20216 | ||
20217 | -void __init reserve_bootmem_generic(unsigned long phys, unsigned len) | |
20218 | +int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, | |
20219 | + int flags) | |
20220 | { | |
20221 | #ifdef CONFIG_NUMA | |
20222 | int nid, next_nid; | |
20223 | + int ret; | |
20224 | #endif | |
20225 | unsigned long pfn = phys >> PAGE_SHIFT; | |
20226 | ||
20227 | - if (pfn >= end_pfn) { | |
20228 | + if (pfn >= max_pfn) { | |
20229 | /* | |
20230 | * This can happen with kdump kernels when accessing | |
20231 | * firmware tables: | |
20232 | */ | |
20233 | if (pfn < max_pfn_mapped) | |
20234 | - return; | |
20235 | + return -EFAULT; | |
20236 | ||
20237 | - printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n", | |
20238 | + printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %lu\n", | |
20239 | phys, len); | |
20240 | - return; | |
20241 | + return -EFAULT; | |
20242 | } | |
20243 | ||
20244 | /* Should check here against the e820 map to avoid double free */ | |
20245 | @@ -1209,9 +1225,13 @@ void __init reserve_bootmem_generic(unsi | |
20246 | nid = phys_to_nid(phys); | |
20247 | next_nid = phys_to_nid(phys + len - 1); | |
20248 | if (nid == next_nid) | |
20249 | - reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); | |
20250 | + ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags); | |
20251 | else | |
20252 | - reserve_bootmem(phys, len, BOOTMEM_DEFAULT); | |
20253 | + ret = reserve_bootmem(phys, len, flags); | |
20254 | + | |
20255 | + if (ret != 0) | |
20256 | + return ret; | |
20257 | + | |
20258 | #else | |
20259 | reserve_bootmem(phys, len, BOOTMEM_DEFAULT); | |
20260 | #endif | |
20261 | @@ -1222,6 +1242,8 @@ void __init reserve_bootmem_generic(unsi | |
20262 | set_dma_reserve(dma_reserve); | |
20263 | } | |
20264 | #endif | |
20265 | + | |
20266 | + return 0; | |
20267 | } | |
20268 | ||
20269 | int kern_addr_valid(unsigned long addr) | |
20270 | @@ -1326,7 +1348,7 @@ vmemmap_populate(struct page *start_page | |
20271 | pmd_t *pmd; | |
20272 | ||
20273 | for (; addr < end; addr = next) { | |
20274 | - next = pmd_addr_end(addr, end); | |
20275 | + void *p = NULL; | |
20276 | ||
20277 | pgd = vmemmap_pgd_populate(addr, node); | |
20278 | if (!pgd) | |
20279 | @@ -1336,33 +1358,51 @@ vmemmap_populate(struct page *start_page | |
20280 | if (!pud) | |
20281 | return -ENOMEM; | |
20282 | ||
20283 | - pmd = pmd_offset(pud, addr); | |
20284 | - if (pmd_none(*pmd)) { | |
20285 | - pte_t entry; | |
20286 | - void *p; | |
20287 | + if (!cpu_has_pse) { | |
20288 | + next = (addr + PAGE_SIZE) & PAGE_MASK; | |
20289 | + pmd = vmemmap_pmd_populate(pud, addr, node); | |
20290 | + | |
20291 | + if (!pmd) | |
20292 | + return -ENOMEM; | |
20293 | + | |
20294 | + p = vmemmap_pte_populate(pmd, addr, node); | |
20295 | ||
20296 | - p = vmemmap_alloc_block(PMD_SIZE, node); | |
20297 | if (!p) | |
20298 | return -ENOMEM; | |
20299 | ||
20300 | - entry = pfn_pte(__pa(p) >> PAGE_SHIFT, | |
20301 | - PAGE_KERNEL_LARGE); | |
20302 | - set_pmd(pmd, __pmd_ma(__pte_val(entry))); | |
20303 | - | |
20304 | - /* check to see if we have contiguous blocks */ | |
20305 | - if (p_end != p || node_start != node) { | |
20306 | - if (p_start) | |
20307 | - printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", | |
20308 | - addr_start, addr_end-1, p_start, p_end-1, node_start); | |
20309 | - addr_start = addr; | |
20310 | - node_start = node; | |
20311 | - p_start = p; | |
20312 | - } | |
20313 | - addr_end = addr + PMD_SIZE; | |
20314 | - p_end = p + PMD_SIZE; | |
20315 | + addr_end = addr + PAGE_SIZE; | |
20316 | + p_end = p + PAGE_SIZE; | |
20317 | } else { | |
20318 | - vmemmap_verify((pte_t *)pmd, node, addr, next); | |
20319 | + next = pmd_addr_end(addr, end); | |
20320 | + | |
20321 | + pmd = pmd_offset(pud, addr); | |
20322 | + if (pmd_none(*pmd)) { | |
20323 | + pte_t entry; | |
20324 | + | |
20325 | + p = vmemmap_alloc_block(PMD_SIZE, node); | |
20326 | + if (!p) | |
20327 | + return -ENOMEM; | |
20328 | + | |
20329 | + entry = pfn_pte(__pa(p) >> PAGE_SHIFT, | |
20330 | + PAGE_KERNEL_LARGE); | |
20331 | + set_pmd(pmd, __pmd_ma(__pte_val(entry))); | |
20332 | + | |
20333 | + /* check to see if we have contiguous blocks */ | |
20334 | + if (p_end != p || node_start != node) { | |
20335 | + if (p_start) | |
20336 | + printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", | |
20337 | + addr_start, addr_end-1, p_start, p_end-1, node_start); | |
20338 | + addr_start = addr; | |
20339 | + node_start = node; | |
20340 | + p_start = p; | |
20341 | + } | |
20342 | + | |
20343 | + addr_end = addr + PMD_SIZE; | |
20344 | + p_end = p + PMD_SIZE; | |
20345 | + } else | |
20346 | + vmemmap_verify((pte_t *)pmd, node, addr, next); | |
20347 | } | |
20348 | + | |
20349 | } | |
20350 | return 0; | |
20351 | } | |
82094b55 AF |
20352 | --- sle11-2009-10-16.orig/arch/x86/mm/ioremap-xen.c 2009-03-16 16:38:05.000000000 +0100 |
20353 | +++ sle11-2009-10-16/arch/x86/mm/ioremap-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
20354 | @@ -13,6 +13,7 @@ |
20355 | #include <linux/pfn.h> | |
20356 | #include <linux/slab.h> | |
20357 | #include <linux/vmalloc.h> | |
20358 | +#include <linux/mmiotrace.h> | |
20359 | ||
20360 | #include <asm/cacheflush.h> | |
20361 | #include <asm/e820.h> | |
20362 | @@ -274,7 +275,8 @@ int ioremap_check_change_attr(unsigned l | |
20363 | for (sz = rc = 0; sz < size && !rc; ++mfn, sz += PAGE_SIZE) { | |
20364 | unsigned long pfn = mfn_to_local_pfn(mfn); | |
20365 | ||
20366 | - if (pfn >= max_pfn_mapped) | |
20367 | + if (pfn >= max_low_pfn_mapped && | |
20368 | + (pfn < (1UL<<(32 - PAGE_SHIFT)) || pfn >= max_pfn_mapped)) | |
20369 | continue; | |
20370 | rc = ioremap_change_attr((unsigned long)__va(pfn << PAGE_SHIFT), | |
20371 | PAGE_SIZE, prot_val); | |
20372 | @@ -297,11 +299,14 @@ static void __iomem *__ioremap_caller(re | |
20373 | { | |
20374 | unsigned long mfn, offset, vaddr; | |
20375 | resource_size_t last_addr; | |
20376 | + const resource_size_t unaligned_phys_addr = phys_addr; | |
20377 | + const unsigned long unaligned_size = size; | |
20378 | struct vm_struct *area; | |
20379 | unsigned long new_prot_val; | |
20380 | pgprot_t prot; | |
20381 | int retval; | |
20382 | domid_t domid = DOMID_IO; | |
20383 | + void __iomem *ret_addr; | |
20384 | ||
20385 | /* Don't allow wraparound or zero size */ | |
20386 | last_addr = phys_addr + size - 1; | |
20387 | @@ -318,7 +323,7 @@ static void __iomem *__ioremap_caller(re | |
20388 | /* | |
20389 | * Don't remap the low PCI/ISA area, it's always mapped.. | |
20390 | */ | |
20391 | - if (is_initial_xendomain() && last_addr < ISA_END_ADDRESS) | |
20392 | + if (is_initial_xendomain() && is_ISA_range(phys_addr, last_addr)) | |
20393 | return (__force void __iomem *)isa_bus_to_virt((unsigned long)phys_addr); | |
20394 | ||
20395 | /* | |
20396 | @@ -342,7 +347,7 @@ static void __iomem *__ioremap_caller(re | |
20397 | phys_addr &= PAGE_MASK; | |
20398 | size = PAGE_ALIGN(last_addr+1) - phys_addr; | |
20399 | ||
20400 | - retval = reserve_memtype(phys_addr, phys_addr + size, | |
20401 | + retval = reserve_memtype(phys_addr, (u64)phys_addr + size, | |
20402 | prot_val, &new_prot_val); | |
20403 | if (retval) { | |
20404 | pr_debug("Warning: reserve_memtype returned %d\n", retval); | |
20405 | @@ -410,7 +415,10 @@ static void __iomem *__ioremap_caller(re | |
20406 | return NULL; | |
20407 | } | |
20408 | ||
20409 | - return (void __iomem *) (vaddr + offset); | |
20410 | + ret_addr = (void __iomem *) (vaddr + offset); | |
20411 | + mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); | |
20412 | + | |
20413 | + return ret_addr; | |
20414 | } | |
20415 | ||
20416 | /** | |
20417 | @@ -438,7 +446,7 @@ void __iomem *ioremap_nocache(resource_s | |
20418 | { | |
20419 | /* | |
20420 | * Ideally, this should be: | |
20421 | - * pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS; | |
20422 | + * pat_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS; | |
20423 | * | |
20424 | * Till we fix all X drivers to use ioremap_wc(), we will use | |
20425 | * UC MINUS. | |
20426 | @@ -462,7 +470,7 @@ EXPORT_SYMBOL(ioremap_nocache); | |
20427 | */ | |
20428 | void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size) | |
20429 | { | |
20430 | - if (pat_wc_enabled) | |
20431 | + if (pat_enabled) | |
20432 | return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC, | |
20433 | __builtin_return_address(0)); | |
20434 | else | |
20435 | @@ -502,6 +510,14 @@ static void __iomem *ioremap_default(res | |
20436 | } | |
20437 | #endif | |
20438 | ||
20439 | +void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, | |
20440 | + unsigned long prot_val) | |
20441 | +{ | |
20442 | + return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK), | |
20443 | + __builtin_return_address(0)); | |
20444 | +} | |
20445 | +EXPORT_SYMBOL(ioremap_prot); | |
20446 | + | |
20447 | /** | |
20448 | * iounmap - Free a IO remapping | |
20449 | * @addr: virtual address from ioremap_* | |
20450 | @@ -526,6 +542,8 @@ void iounmap(volatile void __iomem *addr | |
20451 | addr = (volatile void __iomem *) | |
20452 | (PAGE_MASK & (unsigned long __force)addr); | |
20453 | ||
20454 | + mmiotrace_iounmap(addr); | |
20455 | + | |
20456 | /* Use the vm area unlocked, assuming the caller | |
20457 | ensures there isn't another iounmap for the same address | |
20458 | in parallel. Reuse of the virtual address is prevented by | |
20459 | @@ -533,7 +551,7 @@ void iounmap(volatile void __iomem *addr | |
20460 | cpa takes care of the direct mappings. */ | |
20461 | read_lock(&vmlist_lock); | |
20462 | for (p = vmlist; p; p = p->next) { | |
20463 | - if (p->addr == addr) | |
20464 | + if (p->addr == (void __force *)addr) | |
20465 | break; | |
20466 | } | |
20467 | read_unlock(&vmlist_lock); | |
20468 | @@ -547,7 +565,7 @@ void iounmap(volatile void __iomem *addr | |
20469 | free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p)); | |
20470 | ||
20471 | /* Finally remove it */ | |
20472 | - o = remove_vm_area((void *)addr); | |
20473 | + o = remove_vm_area((void __force *)addr); | |
20474 | BUG_ON(p != o || o == NULL); | |
20475 | kfree(p); | |
20476 | } | |
20477 | @@ -567,7 +585,7 @@ void *xlate_dev_mem_ptr(unsigned long ph | |
20478 | if (page_is_ram(start >> PAGE_SHIFT)) | |
20479 | return __va(phys); | |
20480 | ||
20481 | - addr = (void *)ioremap_default(start, PAGE_SIZE); | |
20482 | + addr = (void __force *)ioremap_default(start, PAGE_SIZE); | |
20483 | if (addr) | |
20484 | addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); | |
20485 | ||
20486 | @@ -595,8 +613,7 @@ static int __init early_ioremap_debug_se | |
20487 | early_param("early_ioremap_debug", early_ioremap_debug_setup); | |
20488 | ||
20489 | static __initdata int after_paging_init; | |
20490 | -static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] | |
20491 | - __section(.bss.page_aligned); | |
20492 | +static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; | |
20493 | ||
20494 | #ifdef CONFIG_X86_32 | |
20495 | static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) | |
20496 | @@ -695,10 +712,11 @@ static void __init __early_set_fixmap(en | |
20497 | return; | |
20498 | } | |
20499 | pte = early_ioremap_pte(addr); | |
20500 | + | |
20501 | if (pgprot_val(flags)) | |
20502 | set_pte(pte, pfn_pte_ma(phys >> PAGE_SHIFT, flags)); | |
20503 | else | |
20504 | - pte_clear(NULL, addr, pte); | |
20505 | + pte_clear(&init_mm, addr, pte); | |
20506 | __flush_tlb_one(addr); | |
20507 | } | |
20508 | ||
20509 | @@ -726,13 +744,11 @@ static int __init check_early_ioremap_le | |
20510 | { | |
20511 | if (!early_ioremap_nested) | |
20512 | return 0; | |
20513 | - | |
20514 | - printk(KERN_WARNING | |
20515 | + WARN(1, KERN_WARNING | |
20516 | "Debug warning: early ioremap leak of %d areas detected.\n", | |
20517 | - early_ioremap_nested); | |
20518 | + early_ioremap_nested); | |
20519 | printk(KERN_WARNING | |
20520 | - "please boot with early_ioremap_debug and report the dmesg.\n"); | |
20521 | - WARN_ON(1); | |
20522 | + "please boot with early_ioremap_debug and report the dmesg.\n"); | |
20523 | ||
20524 | return 1; | |
20525 | } | |
82094b55 AF |
20526 | --- sle11-2009-10-16.orig/arch/x86/mm/pageattr-xen.c 2009-03-16 16:38:05.000000000 +0100 |
20527 | +++ sle11-2009-10-16/arch/x86/mm/pageattr-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
20528 | @@ -34,6 +34,47 @@ struct cpa_data { |
20529 | unsigned force_split : 1; | |
20530 | }; | |
20531 | ||
20532 | +#ifdef CONFIG_PROC_FS | |
20533 | +static unsigned long direct_pages_count[PG_LEVEL_NUM]; | |
20534 | + | |
20535 | +void update_page_count(int level, unsigned long pages) | |
20536 | +{ | |
20537 | + unsigned long flags; | |
20538 | + | |
20539 | + /* Protect against CPA */ | |
20540 | + spin_lock_irqsave(&pgd_lock, flags); | |
20541 | + direct_pages_count[level] += pages; | |
20542 | + spin_unlock_irqrestore(&pgd_lock, flags); | |
20543 | +} | |
20544 | + | |
20545 | +static void split_page_count(int level) | |
20546 | +{ | |
20547 | + direct_pages_count[level]--; | |
20548 | + direct_pages_count[level - 1] += PTRS_PER_PTE; | |
20549 | +} | |
20550 | + | |
20551 | +int arch_report_meminfo(char *page) | |
20552 | +{ | |
20553 | + int n = sprintf(page, "DirectMap4k: %8lu kB\n", | |
20554 | + direct_pages_count[PG_LEVEL_4K] << 2); | |
20555 | +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | |
20556 | + n += sprintf(page + n, "DirectMap2M: %8lu kB\n", | |
20557 | + direct_pages_count[PG_LEVEL_2M] << 11); | |
20558 | +#else | |
20559 | + n += sprintf(page + n, "DirectMap4M: %8lu kB\n", | |
20560 | + direct_pages_count[PG_LEVEL_2M] << 12); | |
20561 | +#endif | |
20562 | +#ifdef CONFIG_X86_64 | |
20563 | + if (direct_gbpages) | |
20564 | + n += sprintf(page + n, "DirectMap1G: %8lu kB\n", | |
20565 | + direct_pages_count[PG_LEVEL_1G] << 20); | |
20566 | +#endif | |
20567 | + return n; | |
20568 | +} | |
20569 | +#else | |
20570 | +static inline void split_page_count(int level) { } | |
20571 | +#endif | |
20572 | + | |
20573 | #ifdef CONFIG_X86_64 | |
20574 | ||
20575 | static inline unsigned long highmap_start_pfn(void) | |
20576 | @@ -106,7 +147,7 @@ static void cpa_flush_all(unsigned long | |
20577 | { | |
20578 | BUG_ON(irqs_disabled()); | |
20579 | ||
20580 | - on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1); | |
20581 | + on_each_cpu(__cpa_flush_all, (void *) cache, 1); | |
20582 | } | |
20583 | ||
20584 | static void __cpa_flush_range(void *arg) | |
20585 | @@ -127,7 +168,7 @@ static void cpa_flush_range(unsigned lon | |
20586 | BUG_ON(irqs_disabled()); | |
20587 | WARN_ON(PAGE_ALIGN(start) != start); | |
20588 | ||
20589 | - on_each_cpu(__cpa_flush_range, NULL, 1, 1); | |
20590 | + on_each_cpu(__cpa_flush_range, NULL, 1); | |
20591 | ||
20592 | if (!cache) | |
20593 | return; | |
20594 | @@ -229,6 +270,7 @@ pte_t *lookup_address(unsigned long addr | |
20595 | ||
20596 | return pte_offset_kernel(pmd, address); | |
20597 | } | |
20598 | +EXPORT_SYMBOL_GPL(lookup_address); | |
20599 | ||
20600 | /* | |
20601 | * Set the new pmd in all the pgds we know about: | |
20602 | @@ -509,6 +551,16 @@ static int split_large_page(pte_t *kpte, | |
20603 | } | |
20604 | #endif | |
20605 | ||
20606 | + if (address >= (unsigned long)__va(0) && | |
20607 | + address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT)) | |
20608 | + split_page_count(level); | |
20609 | + | |
20610 | +#ifdef CONFIG_X86_64 | |
20611 | + if (address >= (unsigned long)__va(1UL<<32) && | |
20612 | + address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT)) | |
20613 | + split_page_count(level); | |
20614 | +#endif | |
20615 | + | |
20616 | /* | |
20617 | * Get the target mfn from the original entry: | |
20618 | */ | |
20619 | @@ -566,10 +618,9 @@ repeat: | |
20620 | if (!__pte_val(old_pte)) { | |
20621 | if (!primary) | |
20622 | return 0; | |
20623 | - printk(KERN_WARNING "CPA: called for zero pte. " | |
20624 | + WARN(1, KERN_WARNING "CPA: called for zero pte. " | |
20625 | "vaddr = %lx cpa->vaddr = %lx\n", address, | |
20626 | cpa->vaddr); | |
20627 | - WARN_ON(1); | |
20628 | return -EINVAL; | |
20629 | } | |
20630 | ||
20631 | @@ -634,15 +685,24 @@ static int cpa_process_alias(struct cpa_ | |
20632 | struct cpa_data alias_cpa; | |
20633 | int ret = 0; | |
20634 | ||
20635 | - if (cpa->pfn > max_pfn_mapped) | |
20636 | + if (cpa->pfn >= max_pfn_mapped) | |
20637 | return 0; | |
20638 | ||
20639 | +#ifdef CONFIG_X86_64 | |
20640 | + if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT))) | |
20641 | + return 0; | |
20642 | +#endif | |
20643 | /* | |
20644 | * No need to redo, when the primary call touched the direct | |
20645 | * mapping already: | |
20646 | */ | |
20647 | - if (!within(cpa->vaddr, PAGE_OFFSET, | |
20648 | - PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) { | |
20649 | + if (!(within(cpa->vaddr, PAGE_OFFSET, | |
20650 | + PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT)) | |
20651 | +#ifdef CONFIG_X86_64 | |
20652 | + || within(cpa->vaddr, PAGE_OFFSET + (1UL<<32), | |
20653 | + PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)) | |
20654 | +#endif | |
20655 | + )) { | |
20656 | ||
20657 | alias_cpa = *cpa; | |
20658 | alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); | |
20659 | @@ -796,6 +856,51 @@ static inline int change_page_attr_clear | |
20660 | return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0); | |
20661 | } | |
20662 | ||
20663 | +#ifdef CONFIG_XEN | |
20664 | +static void _free_memtype(u64 pstart, u64 pend) | |
20665 | +{ | |
20666 | + u64 pa = pstart &= __PHYSICAL_MASK; | |
20667 | + u64 ma = phys_to_machine(pa); | |
20668 | + | |
20669 | + while ((pa += PAGE_SIZE) < pend) { | |
20670 | + if (phys_to_machine(pa) != ma + (pa - pstart)) { | |
20671 | + free_memtype(ma, ma + (pa - pstart)); | |
20672 | + pstart = pa; | |
20673 | + ma = phys_to_machine(pa); | |
20674 | + } | |
20675 | + } | |
20676 | + free_memtype(ma, ma + (pend - pstart)); | |
20677 | +} | |
20678 | +#define free_memtype _free_memtype | |
20679 | + | |
20680 | +static int _reserve_memtype(u64 pstart, u64 pend, unsigned long req_type) | |
20681 | +{ | |
20682 | + u64 pcur = pstart &= __PHYSICAL_MASK, pa = pcur; | |
20683 | + u64 ma = phys_to_machine(pa); | |
20684 | + int rc = 0; | |
20685 | + | |
20686 | + while ((pa += PAGE_SIZE) < pend) { | |
20687 | + if (phys_to_machine(pa) != ma + (pa - pcur)) { | |
20688 | + rc = reserve_memtype(ma, ma + (pa - pcur), | |
20689 | + req_type, NULL); | |
20690 | + if (rc) | |
20691 | + break; | |
20692 | + pcur = pa; | |
20693 | + ma = phys_to_machine(pa); | |
20694 | + } | |
20695 | + } | |
20696 | + if (likely(!rc)) | |
20697 | + rc = reserve_memtype(ma, ma + (pend - pcur), req_type, NULL); | |
20698 | + | |
20699 | + if (unlikely(!rc) && pstart < pcur) | |
20700 | + _free_memtype(pstart, pcur); | |
20701 | + | |
20702 | + return rc; | |
20703 | +} | |
20704 | +#define reserve_memtype(s, e, r, n) \ | |
20705 | + _reserve_memtype(s, e, BUILD_BUG_ON_ZERO(n) ?: (r)) | |
20706 | +#endif | |
20707 | + | |
20708 | int _set_memory_uc(unsigned long addr, int numpages) | |
20709 | { | |
20710 | /* | |
20711 | @@ -810,7 +915,7 @@ int set_memory_uc(unsigned long addr, in | |
20712 | /* | |
20713 | * for now UC MINUS. see comments in ioremap_nocache() | |
20714 | */ | |
20715 | - if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, | |
20716 | + if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, | |
20717 | _PAGE_CACHE_UC_MINUS, NULL)) | |
20718 | return -EINVAL; | |
20719 | ||
20720 | @@ -826,10 +931,10 @@ int _set_memory_wc(unsigned long addr, i | |
20721 | ||
20722 | int set_memory_wc(unsigned long addr, int numpages) | |
20723 | { | |
20724 | - if (!pat_wc_enabled) | |
20725 | + if (!pat_enabled) | |
20726 | return set_memory_uc(addr, numpages); | |
20727 | ||
20728 | - if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, | |
20729 | + if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, | |
20730 | _PAGE_CACHE_WC, NULL)) | |
20731 | return -EINVAL; | |
20732 | ||
20733 | @@ -845,7 +950,7 @@ int _set_memory_wb(unsigned long addr, i | |
20734 | ||
20735 | int set_memory_wb(unsigned long addr, int numpages) | |
20736 | { | |
20737 | - free_memtype(addr, addr + numpages * PAGE_SIZE); | |
20738 | + free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); | |
20739 | ||
20740 | return _set_memory_wb(addr, numpages); | |
20741 | } | |
82094b55 AF |
20742 | --- sle11-2009-10-16.orig/arch/x86/mm/pat-xen.c 2009-03-16 16:38:05.000000000 +0100 |
20743 | +++ sle11-2009-10-16/arch/x86/mm/pat-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
20744 | @@ -12,6 +12,8 @@ |
20745 | #include <linux/gfp.h> | |
20746 | #include <linux/fs.h> | |
20747 | #include <linux/bootmem.h> | |
20748 | +#include <linux/debugfs.h> | |
20749 | +#include <linux/seq_file.h> | |
20750 | ||
20751 | #include <asm/msr.h> | |
20752 | #include <asm/tlbflush.h> | |
20753 | @@ -26,11 +28,11 @@ | |
20754 | #include <asm/io.h> | |
20755 | ||
20756 | #ifdef CONFIG_X86_PAT | |
20757 | -int __read_mostly pat_wc_enabled = 1; | |
20758 | +int __read_mostly pat_enabled = 1; | |
20759 | ||
20760 | void __cpuinit pat_disable(char *reason) | |
20761 | { | |
20762 | - pat_wc_enabled = 0; | |
20763 | + pat_enabled = 0; | |
20764 | printk(KERN_INFO "%s\n", reason); | |
20765 | } | |
20766 | ||
20767 | @@ -42,6 +44,19 @@ static int __init nopat(char *str) | |
20768 | early_param("nopat", nopat); | |
20769 | #endif | |
20770 | ||
20771 | + | |
20772 | +static int debug_enable; | |
20773 | +static int __init pat_debug_setup(char *str) | |
20774 | +{ | |
20775 | + debug_enable = 1; | |
20776 | + return 0; | |
20777 | +} | |
20778 | +__setup("debugpat", pat_debug_setup); | |
20779 | + | |
20780 | +#define dprintk(fmt, arg...) \ | |
20781 | + do { if (debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) | |
20782 | + | |
20783 | + | |
20784 | static u64 __read_mostly boot_pat_state; | |
20785 | ||
20786 | enum { | |
20787 | @@ -53,24 +68,25 @@ enum { | |
20788 | PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */ | |
20789 | }; | |
20790 | ||
20791 | -#define PAT(x,y) ((u64)PAT_ ## y << ((x)*8)) | |
20792 | +#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8)) | |
20793 | ||
20794 | void pat_init(void) | |
20795 | { | |
20796 | u64 pat; | |
20797 | ||
20798 | - if (!pat_wc_enabled) | |
20799 | + if (!pat_enabled) | |
20800 | return; | |
20801 | ||
20802 | /* Paranoia check. */ | |
20803 | - if (!cpu_has_pat) { | |
20804 | - printk(KERN_ERR "PAT enabled, but CPU feature cleared\n"); | |
20805 | + if (!cpu_has_pat && boot_pat_state) { | |
20806 | /* | |
20807 | - * Panic if this happens on the secondary CPU, and we | |
20808 | + * If this happens we are on a secondary CPU, but | |
20809 | * switched to PAT on the boot CPU. We have no way to | |
20810 | * undo PAT. | |
20811 | - */ | |
20812 | - BUG_ON(boot_pat_state); | |
20813 | + */ | |
20814 | + printk(KERN_ERR "PAT enabled, " | |
20815 | + "but not supported by secondary CPU\n"); | |
20816 | + BUG(); | |
20817 | } | |
20818 | ||
20819 | #ifndef CONFIG_XEN | |
20820 | @@ -87,8 +103,8 @@ void pat_init(void) | |
20821 | * 011 UC _PAGE_CACHE_UC | |
20822 | * PAT bit unused | |
20823 | */ | |
20824 | - pat = PAT(0,WB) | PAT(1,WC) | PAT(2,UC_MINUS) | PAT(3,UC) | | |
20825 | - PAT(4,WB) | PAT(5,WC) | PAT(6,UC_MINUS) | PAT(7,UC); | |
20826 | + pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | | |
20827 | + PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); | |
20828 | ||
20829 | /* Boot CPU check */ | |
20830 | if (!boot_pat_state) | |
20831 | @@ -113,13 +129,13 @@ void pat_init(void) | |
20832 | static char *cattr_name(unsigned long flags) | |
20833 | { | |
20834 | switch (flags & _PAGE_CACHE_MASK) { | |
20835 | - case _PAGE_CACHE_UC: return "uncached"; | |
20836 | - case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; | |
20837 | - case _PAGE_CACHE_WB: return "write-back"; | |
20838 | - case _PAGE_CACHE_WC: return "write-combining"; | |
20839 | - case _PAGE_CACHE_WP: return "write-protected"; | |
20840 | - case _PAGE_CACHE_WT: return "write-through"; | |
20841 | - default: return "broken"; | |
20842 | + case _PAGE_CACHE_UC: return "uncached"; | |
20843 | + case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; | |
20844 | + case _PAGE_CACHE_WB: return "write-back"; | |
20845 | + case _PAGE_CACHE_WC: return "write-combining"; | |
20846 | + case _PAGE_CACHE_WP: return "write-protected"; | |
20847 | + case _PAGE_CACHE_WT: return "write-through"; | |
20848 | + default: return "broken"; | |
20849 | } | |
20850 | } | |
20851 | ||
20852 | @@ -157,49 +173,55 @@ static DEFINE_SPINLOCK(memtype_lock); / | |
20853 | * The intersection is based on "Effective Memory Type" tables in IA-32 | |
20854 | * SDM vol 3a | |
20855 | */ | |
20856 | -static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot, | |
20857 | - unsigned long *ret_prot) | |
20858 | +static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type) | |
20859 | { | |
20860 | - unsigned long pat_type; | |
20861 | - u8 mtrr_type; | |
20862 | - | |
20863 | - pat_type = prot & _PAGE_CACHE_MASK; | |
20864 | - prot &= (~_PAGE_CACHE_MASK); | |
20865 | - | |
20866 | - /* | |
20867 | - * We return the PAT request directly for types where PAT takes | |
20868 | - * precedence with respect to MTRR and for UC_MINUS. | |
20869 | - * Consistency checks with other PAT requests is done later | |
20870 | - * while going through memtype list. | |
20871 | - */ | |
20872 | - if (pat_type == _PAGE_CACHE_WC) { | |
20873 | - *ret_prot = prot | _PAGE_CACHE_WC; | |
20874 | - return 0; | |
20875 | - } else if (pat_type == _PAGE_CACHE_UC_MINUS) { | |
20876 | - *ret_prot = prot | _PAGE_CACHE_UC_MINUS; | |
20877 | - return 0; | |
20878 | - } else if (pat_type == _PAGE_CACHE_UC) { | |
20879 | - *ret_prot = prot | _PAGE_CACHE_UC; | |
20880 | - return 0; | |
20881 | - } | |
20882 | - | |
20883 | /* | |
20884 | * Look for MTRR hint to get the effective type in case where PAT | |
20885 | * request is for WB. | |
20886 | */ | |
20887 | - mtrr_type = mtrr_type_lookup(start, end); | |
20888 | + if (req_type == _PAGE_CACHE_WB) { | |
20889 | + u8 mtrr_type; | |
20890 | ||
20891 | - if (mtrr_type == MTRR_TYPE_UNCACHABLE) { | |
20892 | - *ret_prot = prot | _PAGE_CACHE_UC; | |
20893 | - } else if (mtrr_type == MTRR_TYPE_WRCOMB) { | |
20894 | - *ret_prot = prot | _PAGE_CACHE_WC; | |
20895 | - } else { | |
20896 | - *ret_prot = prot | _PAGE_CACHE_WB; | |
20897 | + mtrr_type = mtrr_type_lookup(start, end); | |
20898 | + if (mtrr_type == MTRR_TYPE_UNCACHABLE) | |
20899 | + return _PAGE_CACHE_UC; | |
20900 | + if (mtrr_type == MTRR_TYPE_WRCOMB) | |
20901 | + return _PAGE_CACHE_WC; | |
20902 | + } | |
20903 | + | |
20904 | + return req_type; | |
20905 | +} | |
20906 | + | |
20907 | +static int chk_conflict(struct memtype *new, struct memtype *entry, | |
20908 | + unsigned long *type) | |
20909 | +{ | |
20910 | + if (new->type != entry->type) { | |
20911 | + if (type) { | |
20912 | + new->type = entry->type; | |
20913 | + *type = entry->type; | |
20914 | + } else | |
20915 | + goto conflict; | |
20916 | } | |
20917 | ||
20918 | + /* check overlaps with more than one entry in the list */ | |
20919 | + list_for_each_entry_continue(entry, &memtype_list, nd) { | |
20920 | + if (new->end <= entry->start) | |
20921 | + break; | |
20922 | + else if (new->type != entry->type) | |
20923 | + goto conflict; | |
20924 | + } | |
20925 | return 0; | |
20926 | + | |
20927 | + conflict: | |
20928 | + printk(KERN_INFO "%s:%d conflicting memory types " | |
20929 | + "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start, | |
20930 | + new->end, cattr_name(new->type), cattr_name(entry->type)); | |
20931 | + return -EBUSY; | |
20932 | } | |
20933 | ||
20934 | +static struct memtype *cached_entry; | |
20935 | +static u64 cached_start; | |
20936 | + | |
20937 | /* | |
20938 | * req_type typically has one of the: | |
20939 | * - _PAGE_CACHE_WB | |
20940 | @@ -210,37 +232,36 @@ static int pat_x_mtrr_type(u64 start, u6 | |
20941 | * req_type will have a special case value '-1', when requester want to inherit | |
20942 | * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS. | |
20943 | * | |
20944 | - * If ret_type is NULL, function will return an error if it cannot reserve the | |
20945 | - * region with req_type. If ret_type is non-null, function will return | |
20946 | - * available type in ret_type in case of no error. In case of any error | |
20947 | + * If new_type is NULL, function will return an error if it cannot reserve the | |
20948 | + * region with req_type. If new_type is non-NULL, function will return | |
20949 | + * available type in new_type in case of no error. In case of any error | |
20950 | * it will return a negative return value. | |
20951 | */ | |
20952 | int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |
20953 | - unsigned long *ret_type) | |
20954 | + unsigned long *new_type) | |
20955 | { | |
20956 | - struct memtype *new_entry = NULL; | |
20957 | - struct memtype *parse; | |
20958 | + struct memtype *new, *entry; | |
20959 | unsigned long actual_type; | |
20960 | + struct list_head *where; | |
20961 | int err = 0; | |
20962 | ||
20963 | - /* Only track when pat_wc_enabled */ | |
20964 | - if (!pat_wc_enabled) { | |
20965 | + BUG_ON(start >= end); /* end is exclusive */ | |
20966 | + | |
20967 | + if (!pat_enabled) { | |
20968 | /* This is identical to page table setting without PAT */ | |
20969 | - if (ret_type) { | |
20970 | - if (req_type == -1) { | |
20971 | - *ret_type = _PAGE_CACHE_WB; | |
20972 | - } else { | |
20973 | - *ret_type = req_type; | |
20974 | - } | |
20975 | + if (new_type) { | |
20976 | + if (req_type == -1) | |
20977 | + *new_type = _PAGE_CACHE_WB; | |
20978 | + else | |
20979 | + *new_type = req_type & _PAGE_CACHE_MASK; | |
20980 | } | |
20981 | return 0; | |
20982 | } | |
20983 | ||
20984 | /* Low ISA region is always mapped WB in page table. No need to track */ | |
20985 | - if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) { | |
20986 | - if (ret_type) | |
20987 | - *ret_type = _PAGE_CACHE_WB; | |
20988 | - | |
20989 | + if (is_ISA_range(start, end - 1)) { | |
20990 | + if (new_type) | |
20991 | + *new_type = _PAGE_CACHE_WB; | |
20992 | return 0; | |
20993 | } | |
20994 | ||
20995 | @@ -253,206 +274,118 @@ int reserve_memtype(u64 start, u64 end, | |
20996 | */ | |
20997 | u8 mtrr_type = mtrr_type_lookup(start, end); | |
20998 | ||
20999 | - if (mtrr_type == MTRR_TYPE_WRBACK) { | |
21000 | - req_type = _PAGE_CACHE_WB; | |
21001 | + if (mtrr_type == MTRR_TYPE_WRBACK) | |
21002 | actual_type = _PAGE_CACHE_WB; | |
21003 | - } else { | |
21004 | - req_type = _PAGE_CACHE_UC_MINUS; | |
21005 | + else | |
21006 | actual_type = _PAGE_CACHE_UC_MINUS; | |
21007 | - } | |
21008 | - } else { | |
21009 | - req_type &= _PAGE_CACHE_MASK; | |
21010 | - err = pat_x_mtrr_type(start, end, req_type, &actual_type); | |
21011 | - } | |
21012 | - | |
21013 | - if (err) { | |
21014 | - if (ret_type) | |
21015 | - *ret_type = actual_type; | |
21016 | + } else | |
21017 | + actual_type = pat_x_mtrr_type(start, end, | |
21018 | + req_type & _PAGE_CACHE_MASK); | |
21019 | ||
21020 | - return -EINVAL; | |
21021 | - } | |
21022 | - | |
21023 | - new_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL); | |
21024 | - if (!new_entry) | |
21025 | + new = kmalloc(sizeof(struct memtype), GFP_KERNEL); | |
21026 | + if (!new) | |
21027 | return -ENOMEM; | |
21028 | ||
21029 | - new_entry->start = start; | |
21030 | - new_entry->end = end; | |
21031 | - new_entry->type = actual_type; | |
21032 | + new->start = start; | |
21033 | + new->end = end; | |
21034 | + new->type = actual_type; | |
21035 | ||
21036 | - if (ret_type) | |
21037 | - *ret_type = actual_type; | |
21038 | + if (new_type) | |
21039 | + *new_type = actual_type; | |
21040 | ||
21041 | spin_lock(&memtype_lock); | |
21042 | ||
21043 | - /* Search for existing mapping that overlaps the current range */ | |
21044 | - list_for_each_entry(parse, &memtype_list, nd) { | |
21045 | - struct memtype *saved_ptr; | |
21046 | + if (cached_entry && start >= cached_start) | |
21047 | + entry = cached_entry; | |
21048 | + else | |
21049 | + entry = list_entry(&memtype_list, struct memtype, nd); | |
21050 | ||
21051 | - if (parse->start >= end) { | |
21052 | - pr_debug("New Entry\n"); | |
21053 | - list_add(&new_entry->nd, parse->nd.prev); | |
21054 | - new_entry = NULL; | |
21055 | + /* Search for existing mapping that overlaps the current range */ | |
21056 | + where = NULL; | |
21057 | + list_for_each_entry_continue(entry, &memtype_list, nd) { | |
21058 | + if (end <= entry->start) { | |
21059 | + where = entry->nd.prev; | |
21060 | + cached_entry = list_entry(where, struct memtype, nd); | |
21061 | break; | |
21062 | - } | |
21063 | - | |
21064 | - if (start <= parse->start && end >= parse->start) { | |
21065 | - if (actual_type != parse->type && ret_type) { | |
21066 | - actual_type = parse->type; | |
21067 | - *ret_type = actual_type; | |
21068 | - new_entry->type = actual_type; | |
21069 | - } | |
21070 | - | |
21071 | - if (actual_type != parse->type) { | |
21072 | - printk( | |
21073 | - KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", | |
21074 | - current->comm, current->pid, | |
21075 | - start, end, | |
21076 | - cattr_name(actual_type), | |
21077 | - cattr_name(parse->type)); | |
21078 | - err = -EBUSY; | |
21079 | - break; | |
21080 | - } | |
21081 | - | |
21082 | - saved_ptr = parse; | |
21083 | - /* | |
21084 | - * Check to see whether the request overlaps more | |
21085 | - * than one entry in the list | |
21086 | - */ | |
21087 | - list_for_each_entry_continue(parse, &memtype_list, nd) { | |
21088 | - if (end <= parse->start) { | |
21089 | - break; | |
21090 | - } | |
21091 | - | |
21092 | - if (actual_type != parse->type) { | |
21093 | - printk( | |
21094 | - KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", | |
21095 | - current->comm, current->pid, | |
21096 | - start, end, | |
21097 | - cattr_name(actual_type), | |
21098 | - cattr_name(parse->type)); | |
21099 | - err = -EBUSY; | |
21100 | - break; | |
21101 | - } | |
21102 | - } | |
21103 | - | |
21104 | - if (err) { | |
21105 | - break; | |
21106 | + } else if (start <= entry->start) { /* end > entry->start */ | |
21107 | + err = chk_conflict(new, entry, new_type); | |
21108 | + if (!err) { | |
21109 | + dprintk("Overlap at 0x%Lx-0x%Lx\n", | |
21110 | + entry->start, entry->end); | |
21111 | + where = entry->nd.prev; | |
21112 | + cached_entry = list_entry(where, | |
21113 | + struct memtype, nd); | |
21114 | } | |
21115 | - | |
21116 | - pr_debug("Overlap at 0x%Lx-0x%Lx\n", | |
21117 | - saved_ptr->start, saved_ptr->end); | |
21118 | - /* No conflict. Go ahead and add this new entry */ | |
21119 | - list_add(&new_entry->nd, saved_ptr->nd.prev); | |
21120 | - new_entry = NULL; | |
21121 | break; | |
21122 | - } | |
21123 | - | |
21124 | - if (start < parse->end) { | |
21125 | - if (actual_type != parse->type && ret_type) { | |
21126 | - actual_type = parse->type; | |
21127 | - *ret_type = actual_type; | |
21128 | - new_entry->type = actual_type; | |
21129 | - } | |
21130 | - | |
21131 | - if (actual_type != parse->type) { | |
21132 | - printk( | |
21133 | - KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", | |
21134 | - current->comm, current->pid, | |
21135 | - start, end, | |
21136 | - cattr_name(actual_type), | |
21137 | - cattr_name(parse->type)); | |
21138 | - err = -EBUSY; | |
21139 | - break; | |
21140 | - } | |
21141 | - | |
21142 | - saved_ptr = parse; | |
21143 | - /* | |
21144 | - * Check to see whether the request overlaps more | |
21145 | - * than one entry in the list | |
21146 | - */ | |
21147 | - list_for_each_entry_continue(parse, &memtype_list, nd) { | |
21148 | - if (end <= parse->start) { | |
21149 | - break; | |
21150 | - } | |
21151 | - | |
21152 | - if (actual_type != parse->type) { | |
21153 | - printk( | |
21154 | - KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", | |
21155 | - current->comm, current->pid, | |
21156 | - start, end, | |
21157 | - cattr_name(actual_type), | |
21158 | - cattr_name(parse->type)); | |
21159 | - err = -EBUSY; | |
21160 | - break; | |
21161 | + } else if (start < entry->end) { /* start > entry->start */ | |
21162 | + err = chk_conflict(new, entry, new_type); | |
21163 | + if (!err) { | |
21164 | + dprintk("Overlap at 0x%Lx-0x%Lx\n", | |
21165 | + entry->start, entry->end); | |
21166 | + cached_entry = list_entry(entry->nd.prev, | |
21167 | + struct memtype, nd); | |
21168 | + | |
21169 | + /* | |
21170 | + * Move to right position in the linked | |
21171 | + * list to add this new entry | |
21172 | + */ | |
21173 | + list_for_each_entry_continue(entry, | |
21174 | + &memtype_list, nd) { | |
21175 | + if (start <= entry->start) { | |
21176 | + where = entry->nd.prev; | |
21177 | + break; | |
21178 | + } | |
21179 | } | |
21180 | } | |
21181 | - | |
21182 | - if (err) { | |
21183 | - break; | |
21184 | - } | |
21185 | - | |
21186 | - pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n", | |
21187 | - saved_ptr->start, saved_ptr->end); | |
21188 | - /* No conflict. Go ahead and add this new entry */ | |
21189 | - list_add(&new_entry->nd, &saved_ptr->nd); | |
21190 | - new_entry = NULL; | |
21191 | break; | |
21192 | } | |
21193 | } | |
21194 | ||
21195 | if (err) { | |
21196 | - printk(KERN_INFO | |
21197 | - "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n", | |
21198 | - start, end, cattr_name(new_entry->type), | |
21199 | - cattr_name(req_type)); | |
21200 | - kfree(new_entry); | |
21201 | + printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " | |
21202 | + "track %s, req %s\n", | |
21203 | + start, end, cattr_name(new->type), cattr_name(req_type)); | |
21204 | + kfree(new); | |
21205 | spin_unlock(&memtype_lock); | |
21206 | return err; | |
21207 | } | |
21208 | ||
21209 | - if (new_entry) { | |
21210 | - /* No conflict. Not yet added to the list. Add to the tail */ | |
21211 | - list_add_tail(&new_entry->nd, &memtype_list); | |
21212 | - pr_debug("New Entry\n"); | |
21213 | - } | |
21214 | + cached_start = start; | |
21215 | ||
21216 | - if (ret_type) { | |
21217 | - pr_debug( | |
21218 | - "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", | |
21219 | - start, end, cattr_name(actual_type), | |
21220 | - cattr_name(req_type), cattr_name(*ret_type)); | |
21221 | - } else { | |
21222 | - pr_debug( | |
21223 | - "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n", | |
21224 | - start, end, cattr_name(actual_type), | |
21225 | - cattr_name(req_type)); | |
21226 | - } | |
21227 | + if (where) | |
21228 | + list_add(&new->nd, where); | |
21229 | + else | |
21230 | + list_add_tail(&new->nd, &memtype_list); | |
21231 | ||
21232 | spin_unlock(&memtype_lock); | |
21233 | + | |
21234 | + dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", | |
21235 | + start, end, cattr_name(new->type), cattr_name(req_type), | |
21236 | + new_type ? cattr_name(*new_type) : "-"); | |
21237 | + | |
21238 | return err; | |
21239 | } | |
21240 | ||
21241 | int free_memtype(u64 start, u64 end) | |
21242 | { | |
21243 | - struct memtype *ml; | |
21244 | + struct memtype *entry; | |
21245 | int err = -EINVAL; | |
21246 | ||
21247 | - /* Only track when pat_wc_enabled */ | |
21248 | - if (!pat_wc_enabled) { | |
21249 | + if (!pat_enabled) | |
21250 | return 0; | |
21251 | - } | |
21252 | ||
21253 | /* Low ISA region is always mapped WB. No need to track */ | |
21254 | - if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) { | |
21255 | + if (is_ISA_range(start, end - 1)) | |
21256 | return 0; | |
21257 | - } | |
21258 | ||
21259 | spin_lock(&memtype_lock); | |
21260 | - list_for_each_entry(ml, &memtype_list, nd) { | |
21261 | - if (ml->start == start && ml->end == end) { | |
21262 | - list_del(&ml->nd); | |
21263 | - kfree(ml); | |
21264 | + list_for_each_entry(entry, &memtype_list, nd) { | |
21265 | + if (entry->start == start && entry->end == end) { | |
21266 | + if (cached_entry == entry || cached_start == start) | |
21267 | + cached_entry = NULL; | |
21268 | + | |
21269 | + list_del(&entry->nd); | |
21270 | + kfree(entry); | |
21271 | err = 0; | |
21272 | break; | |
21273 | } | |
21274 | @@ -464,27 +397,19 @@ int free_memtype(u64 start, u64 end) | |
21275 | current->comm, current->pid, start, end); | |
21276 | } | |
21277 | ||
21278 | - pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end); | |
21279 | + dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); | |
21280 | return err; | |
21281 | } | |
21282 | ||
21283 | ||
21284 | -/* | |
21285 | - * /dev/mem mmap interface. The memtype used for mapping varies: | |
21286 | - * - Use UC for mappings with O_SYNC flag | |
21287 | - * - Without O_SYNC flag, if there is any conflict in reserve_memtype, | |
21288 | - * inherit the memtype from existing mapping. | |
21289 | - * - Else use UC_MINUS memtype (for backward compatibility with existing | |
21290 | - * X drivers. | |
21291 | - */ | |
21292 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long mfn, | |
21293 | unsigned long size, pgprot_t vma_prot) | |
21294 | { | |
21295 | return vma_prot; | |
21296 | } | |
21297 | ||
21298 | -#ifdef CONFIG_NONPROMISC_DEVMEM | |
21299 | -/* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/ | |
21300 | +#ifdef CONFIG_STRICT_DEVMEM | |
21301 | +/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/ | |
21302 | static inline int range_is_allowed(unsigned long mfn, unsigned long size) | |
21303 | { | |
21304 | return 1; | |
21305 | @@ -508,20 +433,20 @@ static inline int range_is_allowed(unsig | |
21306 | } | |
21307 | return 1; | |
21308 | } | |
21309 | -#endif /* CONFIG_NONPROMISC_DEVMEM */ | |
21310 | +#endif /* CONFIG_STRICT_DEVMEM */ | |
21311 | ||
21312 | int phys_mem_access_prot_allowed(struct file *file, unsigned long mfn, | |
21313 | unsigned long size, pgprot_t *vma_prot) | |
21314 | { | |
21315 | u64 addr = (u64)mfn << PAGE_SHIFT; | |
21316 | - unsigned long flags = _PAGE_CACHE_UC_MINUS; | |
21317 | + unsigned long flags = -1; | |
21318 | int retval; | |
21319 | ||
21320 | if (!range_is_allowed(mfn, size)) | |
21321 | return 0; | |
21322 | ||
21323 | if (file->f_flags & O_SYNC) { | |
21324 | - flags = _PAGE_CACHE_UC; | |
21325 | + flags = _PAGE_CACHE_UC_MINUS; | |
21326 | } | |
21327 | ||
21328 | #ifndef CONFIG_X86_32 | |
21329 | @@ -534,25 +459,26 @@ int phys_mem_access_prot_allowed(struct | |
21330 | * caching for the high addresses through the KEN pin, but | |
21331 | * we maintain the tradition of paranoia in this code. | |
21332 | */ | |
21333 | - if (!pat_wc_enabled && | |
21334 | - ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) || | |
21335 | - test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) || | |
21336 | - test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) || | |
21337 | - test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) && | |
21338 | - (pfn << PAGE_SHIFT) >= __pa(high_memory)) { | |
21339 | + if (!pat_enabled && | |
21340 | + !(boot_cpu_has(X86_FEATURE_MTRR) || | |
21341 | + boot_cpu_has(X86_FEATURE_K6_MTRR) || | |
21342 | + boot_cpu_has(X86_FEATURE_CYRIX_ARR) || | |
21343 | + boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) && | |
21344 | + (pfn << PAGE_SHIFT) >= __pa(high_memory)) { | |
21345 | flags = _PAGE_CACHE_UC; | |
21346 | } | |
21347 | #endif | |
21348 | #endif | |
21349 | ||
21350 | /* | |
21351 | - * With O_SYNC, we can only take UC mapping. Fail if we cannot. | |
21352 | + * With O_SYNC, we can only take UC_MINUS mapping. Fail if we cannot. | |
21353 | + * | |
21354 | * Without O_SYNC, we want to get | |
21355 | * - WB for WB-able memory and no other conflicting mappings | |
21356 | * - UC_MINUS for non-WB-able memory with no other conflicting mappings | |
21357 | * - Inherit from confliting mappings otherwise | |
21358 | */ | |
21359 | - if (flags != _PAGE_CACHE_UC_MINUS) { | |
21360 | + if (flags != -1) { | |
21361 | retval = reserve_memtype(addr, addr + size, flags, NULL); | |
21362 | } else { | |
21363 | retval = reserve_memtype(addr, addr + size, -1, &flags); | |
21364 | @@ -600,3 +526,88 @@ void unmap_devmem(unsigned long mfn, uns | |
21365 | free_memtype(addr, addr + size); | |
21366 | } | |
21367 | ||
21368 | +#if defined(CONFIG_DEBUG_FS) | |
21369 | + | |
21370 | +/* get Nth element of the linked list */ | |
21371 | +static struct memtype *memtype_get_idx(loff_t pos) | |
21372 | +{ | |
21373 | + struct memtype *list_node, *print_entry; | |
21374 | + int i = 1; | |
21375 | + | |
21376 | + print_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL); | |
21377 | + if (!print_entry) | |
21378 | + return NULL; | |
21379 | + | |
21380 | + spin_lock(&memtype_lock); | |
21381 | + list_for_each_entry(list_node, &memtype_list, nd) { | |
21382 | + if (pos == i) { | |
21383 | + *print_entry = *list_node; | |
21384 | + spin_unlock(&memtype_lock); | |
21385 | + return print_entry; | |
21386 | + } | |
21387 | + ++i; | |
21388 | + } | |
21389 | + spin_unlock(&memtype_lock); | |
21390 | + kfree(print_entry); | |
21391 | + return NULL; | |
21392 | +} | |
21393 | + | |
21394 | +static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) | |
21395 | +{ | |
21396 | + if (*pos == 0) { | |
21397 | + ++*pos; | |
21398 | + seq_printf(seq, "PAT memtype list:\n"); | |
21399 | + } | |
21400 | + | |
21401 | + return memtype_get_idx(*pos); | |
21402 | +} | |
21403 | + | |
21404 | +static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |
21405 | +{ | |
21406 | + ++*pos; | |
21407 | + return memtype_get_idx(*pos); | |
21408 | +} | |
21409 | + | |
21410 | +static void memtype_seq_stop(struct seq_file *seq, void *v) | |
21411 | +{ | |
21412 | +} | |
21413 | + | |
21414 | +static int memtype_seq_show(struct seq_file *seq, void *v) | |
21415 | +{ | |
21416 | + struct memtype *print_entry = (struct memtype *)v; | |
21417 | + | |
21418 | + seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type), | |
21419 | + print_entry->start, print_entry->end); | |
21420 | + kfree(print_entry); | |
21421 | + return 0; | |
21422 | +} | |
21423 | + | |
21424 | +static struct seq_operations memtype_seq_ops = { | |
21425 | + .start = memtype_seq_start, | |
21426 | + .next = memtype_seq_next, | |
21427 | + .stop = memtype_seq_stop, | |
21428 | + .show = memtype_seq_show, | |
21429 | +}; | |
21430 | + | |
21431 | +static int memtype_seq_open(struct inode *inode, struct file *file) | |
21432 | +{ | |
21433 | + return seq_open(file, &memtype_seq_ops); | |
21434 | +} | |
21435 | + | |
21436 | +static const struct file_operations memtype_fops = { | |
21437 | + .open = memtype_seq_open, | |
21438 | + .read = seq_read, | |
21439 | + .llseek = seq_lseek, | |
21440 | + .release = seq_release, | |
21441 | +}; | |
21442 | + | |
21443 | +static int __init pat_memtype_list_init(void) | |
21444 | +{ | |
21445 | + debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir, | |
21446 | + NULL, &memtype_fops); | |
21447 | + return 0; | |
21448 | +} | |
21449 | + | |
21450 | +late_initcall(pat_memtype_list_init); | |
21451 | + | |
21452 | +#endif /* CONFIG_DEBUG_FS */ | |
82094b55 AF |
21453 | --- sle11-2009-10-16.orig/arch/x86/mm/pgtable-xen.c 2009-03-16 16:38:05.000000000 +0100 |
21454 | +++ sle11-2009-10-16/arch/x86/mm/pgtable-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
21455 | @@ -4,6 +4,7 @@ |
21456 | #include <asm/pgalloc.h> | |
21457 | #include <asm/pgtable.h> | |
21458 | #include <asm/tlb.h> | |
21459 | +#include <asm/fixmap.h> | |
21460 | #include <asm/hypervisor.h> | |
21461 | #include <asm/mmu_context.h> | |
21462 | ||
21463 | @@ -410,15 +411,9 @@ static inline void pgd_list_del(pgd_t *p | |
21464 | static void pgd_ctor(void *p) | |
21465 | { | |
21466 | pgd_t *pgd = p; | |
21467 | - unsigned long flags; | |
21468 | ||
21469 | pgd_test_and_unpin(pgd); | |
21470 | ||
21471 | - /* Clear usermode parts of PGD */ | |
21472 | - memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t)); | |
21473 | - | |
21474 | - spin_lock_irqsave(&pgd_lock, flags); | |
21475 | - | |
21476 | /* If the pgd points to a shared pagetable level (either the | |
21477 | ptes in non-PAE, or shared PMD in PAE), then just copy the | |
21478 | references from swapper_pg_dir. */ | |
21479 | @@ -440,13 +435,9 @@ static void pgd_ctor(void *p) | |
21480 | __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE); | |
21481 | #endif | |
21482 | ||
21483 | -#ifndef CONFIG_X86_PAE | |
21484 | /* list required to sync kernel mapping updates */ | |
21485 | if (!SHARED_KERNEL_PMD) | |
21486 | pgd_list_add(pgd); | |
21487 | -#endif | |
21488 | - | |
21489 | - spin_unlock_irqrestore(&pgd_lock, flags); | |
21490 | } | |
21491 | ||
21492 | static void pgd_dtor(void *pgd) | |
21493 | @@ -475,33 +466,6 @@ static void pgd_dtor(void *pgd) | |
21494 | ||
21495 | #ifdef CONFIG_X86_PAE | |
21496 | /* | |
21497 | - * Mop up any pmd pages which may still be attached to the pgd. | |
21498 | - * Normally they will be freed by munmap/exit_mmap, but any pmd we | |
21499 | - * preallocate which never got a corresponding vma will need to be | |
21500 | - * freed manually. | |
21501 | - */ | |
21502 | -static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) | |
21503 | -{ | |
21504 | - int i; | |
21505 | - | |
21506 | - for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) { | |
21507 | - pgd_t pgd = pgdp[i]; | |
21508 | - | |
21509 | - if (__pgd_val(pgd) != 0) { | |
21510 | - pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); | |
21511 | - | |
21512 | - pgdp[i] = xen_make_pgd(0); | |
21513 | - | |
21514 | - paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); | |
21515 | - pmd_free(mm, pmd); | |
21516 | - } | |
21517 | - } | |
21518 | - | |
21519 | - if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) | |
21520 | - xen_destroy_contiguous_region((unsigned long)pgdp, 0); | |
21521 | -} | |
21522 | - | |
21523 | -/* | |
21524 | * In PAE mode, we need to do a cr3 reload (=tlb flush) when | |
21525 | * updating the top-level pagetable entries to guarantee the | |
21526 | * processor notices the update. Since this is expensive, and | |
21527 | @@ -512,61 +476,7 @@ static void pgd_mop_up_pmds(struct mm_st | |
21528 | * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate | |
21529 | * and initialize the kernel pmds here. | |
21530 | */ | |
21531 | -static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) | |
21532 | -{ | |
21533 | - pud_t *pud; | |
21534 | - pmd_t *pmds[UNSHARED_PTRS_PER_PGD]; | |
21535 | - unsigned long addr, flags; | |
21536 | - int i; | |
21537 | - | |
21538 | - /* | |
21539 | - * We can race save/restore (if we sleep during a GFP_KERNEL memory | |
21540 | - * allocation). We therefore store virtual addresses of pmds as they | |
21541 | - * do not change across save/restore, and poke the machine addresses | |
21542 | - * into the pgdir under the pgd_lock. | |
21543 | - */ | |
21544 | - for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; i++, addr += PUD_SIZE) { | |
21545 | - pmds[i] = pmd_alloc_one(mm, addr); | |
21546 | - if (!pmds[i]) | |
21547 | - goto out_oom; | |
21548 | - } | |
21549 | - | |
21550 | - spin_lock_irqsave(&pgd_lock, flags); | |
21551 | - | |
21552 | - /* Protect against save/restore: move below 4GB under pgd_lock. */ | |
21553 | - if (!xen_feature(XENFEAT_pae_pgdir_above_4gb) | |
21554 | - && xen_create_contiguous_region((unsigned long)pgd, 0, 32)) { | |
21555 | - spin_unlock_irqrestore(&pgd_lock, flags); | |
21556 | -out_oom: | |
21557 | - while (i--) | |
21558 | - pmd_free(mm, pmds[i]); | |
21559 | - return 0; | |
21560 | - } | |
21561 | - | |
21562 | - /* Copy kernel pmd contents and write-protect the new pmds. */ | |
21563 | - pud = pud_offset(pgd, 0); | |
21564 | - for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; | |
21565 | - i++, pud++, addr += PUD_SIZE) { | |
21566 | - if (i >= KERNEL_PGD_BOUNDARY) { | |
21567 | - memcpy(pmds[i], | |
21568 | - (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), | |
21569 | - sizeof(pmd_t) * PTRS_PER_PMD); | |
21570 | - make_lowmem_page_readonly( | |
21571 | - pmds[i], XENFEAT_writable_page_tables); | |
21572 | - } | |
21573 | - | |
21574 | - /* It is safe to poke machine addresses of pmds under the pgd_lock. */ | |
21575 | - pud_populate(mm, pud, pmds[i]); | |
21576 | - } | |
21577 | - | |
21578 | - /* List required to sync kernel mapping updates and | |
21579 | - * to pin/unpin on save/restore. */ | |
21580 | - pgd_list_add(pgd); | |
21581 | - | |
21582 | - spin_unlock_irqrestore(&pgd_lock, flags); | |
21583 | - | |
21584 | - return 1; | |
21585 | -} | |
21586 | +#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD | |
21587 | ||
21588 | void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) | |
21589 | { | |
21590 | @@ -596,16 +506,101 @@ void pud_populate(struct mm_struct *mm, | |
21591 | xen_tlb_flush(); | |
21592 | } | |
21593 | #else /* !CONFIG_X86_PAE */ | |
21594 | + | |
21595 | /* No need to prepopulate any pagetable entries in non-PAE modes. */ | |
21596 | -static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) | |
21597 | +#define PREALLOCATED_PMDS 0 | |
21598 | + | |
21599 | +#endif /* CONFIG_X86_PAE */ | |
21600 | + | |
21601 | +static void free_pmds(pmd_t *pmds[], struct mm_struct *mm, bool contig) | |
21602 | { | |
21603 | - return 1; | |
21604 | + int i; | |
21605 | + | |
21606 | +#ifdef CONFIG_X86_PAE | |
21607 | + if (contig) | |
21608 | + xen_destroy_contiguous_region((unsigned long)mm->pgd, 0); | |
21609 | +#endif | |
21610 | + | |
21611 | + for(i = 0; i < PREALLOCATED_PMDS; i++) | |
21612 | + if (pmds[i]) | |
21613 | + pmd_free(mm, pmds[i]); | |
21614 | } | |
21615 | ||
21616 | -static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd) | |
21617 | +static int preallocate_pmds(pmd_t *pmds[], struct mm_struct *mm) | |
21618 | { | |
21619 | + int i; | |
21620 | + bool failed = false; | |
21621 | + | |
21622 | + for(i = 0; i < PREALLOCATED_PMDS; i++) { | |
21623 | + pmd_t *pmd = pmd_alloc_one(mm, i << PUD_SHIFT); | |
21624 | + if (pmd == NULL) | |
21625 | + failed = true; | |
21626 | + pmds[i] = pmd; | |
21627 | + } | |
21628 | + | |
21629 | + if (failed) { | |
21630 | + free_pmds(pmds, mm, false); | |
21631 | + return -ENOMEM; | |
21632 | + } | |
21633 | + | |
21634 | + return 0; | |
21635 | +} | |
21636 | + | |
21637 | +/* | |
21638 | + * Mop up any pmd pages which may still be attached to the pgd. | |
21639 | + * Normally they will be freed by munmap/exit_mmap, but any pmd we | |
21640 | + * preallocate which never got a corresponding vma will need to be | |
21641 | + * freed manually. | |
21642 | + */ | |
21643 | +static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) | |
21644 | +{ | |
21645 | + int i; | |
21646 | + | |
21647 | + for(i = 0; i < PREALLOCATED_PMDS; i++) { | |
21648 | + pgd_t pgd = pgdp[i]; | |
21649 | + | |
21650 | + if (__pgd_val(pgd) != 0) { | |
21651 | + pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); | |
21652 | + | |
21653 | + pgdp[i] = xen_make_pgd(0); | |
21654 | + | |
21655 | + paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); | |
21656 | + pmd_free(mm, pmd); | |
21657 | + } | |
21658 | + } | |
21659 | + | |
21660 | +#ifdef CONFIG_X86_PAE | |
21661 | + if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) | |
21662 | + xen_destroy_contiguous_region((unsigned long)pgdp, 0); | |
21663 | +#endif | |
21664 | +} | |
21665 | + | |
21666 | +static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) | |
21667 | +{ | |
21668 | + pud_t *pud; | |
21669 | + unsigned long addr; | |
21670 | + int i; | |
21671 | + | |
21672 | + if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */ | |
21673 | + return; | |
21674 | + | |
21675 | + pud = pud_offset(pgd, 0); | |
21676 | + for (addr = i = 0; i < PREALLOCATED_PMDS; | |
21677 | + i++, pud++, addr += PUD_SIZE) { | |
21678 | + pmd_t *pmd = pmds[i]; | |
21679 | + | |
21680 | + if (i >= KERNEL_PGD_BOUNDARY) { | |
21681 | + memcpy(pmd, | |
21682 | + (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), | |
21683 | + sizeof(pmd_t) * PTRS_PER_PMD); | |
21684 | + make_lowmem_page_readonly( | |
21685 | + pmd, XENFEAT_writable_page_tables); | |
21686 | + } | |
21687 | + | |
21688 | + /* It is safe to poke machine addresses of pmds under the pgd_lock. */ | |
21689 | + pud_populate(mm, pud, pmd); | |
21690 | + } | |
21691 | } | |
21692 | -#endif /* CONFIG_X86_PAE */ | |
21693 | ||
21694 | #ifdef CONFIG_X86_64 | |
21695 | /* We allocate two contiguous pages for kernel and user. */ | |
21696 | @@ -616,19 +611,52 @@ static void pgd_mop_up_pmds(struct mm_st | |
21697 | ||
21698 | pgd_t *pgd_alloc(struct mm_struct *mm) | |
21699 | { | |
21700 | - pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER); | |
21701 | + pgd_t *pgd; | |
21702 | + pmd_t *pmds[PREALLOCATED_PMDS]; | |
21703 | + unsigned long flags; | |
21704 | + | |
21705 | + pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER); | |
21706 | + | |
21707 | + if (pgd == NULL) | |
21708 | + goto out; | |
21709 | ||
21710 | - /* so that alloc_pd can use it */ | |
21711 | mm->pgd = pgd; | |
21712 | - if (pgd) | |
21713 | - pgd_ctor(pgd); | |
21714 | ||
21715 | - if (pgd && !pgd_prepopulate_pmd(mm, pgd)) { | |
21716 | - free_pages((unsigned long)pgd, PGD_ORDER); | |
21717 | - pgd = NULL; | |
21718 | + if (preallocate_pmds(pmds, mm) != 0) | |
21719 | + goto out_free_pgd; | |
21720 | + | |
21721 | + if (paravirt_pgd_alloc(mm) != 0) | |
21722 | + goto out_free_pmds; | |
21723 | + | |
21724 | + /* | |
21725 | + * Make sure that pre-populating the pmds is atomic with | |
21726 | + * respect to anything walking the pgd_list, so that they | |
21727 | + * never see a partially populated pgd. | |
21728 | + */ | |
21729 | + spin_lock_irqsave(&pgd_lock, flags); | |
21730 | + | |
21731 | +#ifdef CONFIG_X86_PAE | |
21732 | + /* Protect against save/restore: move below 4GB under pgd_lock. */ | |
21733 | + if (!xen_feature(XENFEAT_pae_pgdir_above_4gb) | |
21734 | + && xen_create_contiguous_region((unsigned long)pgd, 0, 32)) { | |
21735 | + spin_unlock_irqrestore(&pgd_lock, flags); | |
21736 | + goto out_free_pmds; | |
21737 | } | |
21738 | +#endif | |
21739 | + | |
21740 | + pgd_ctor(pgd); | |
21741 | + pgd_prepopulate_pmd(mm, pgd, pmds); | |
21742 | + | |
21743 | + spin_unlock_irqrestore(&pgd_lock, flags); | |
21744 | ||
21745 | return pgd; | |
21746 | + | |
21747 | +out_free_pmds: | |
21748 | + free_pmds(pmds, mm, !xen_feature(XENFEAT_pae_pgdir_above_4gb)); | |
21749 | +out_free_pgd: | |
21750 | + free_pages((unsigned long)pgd, PGD_ORDER); | |
21751 | +out: | |
21752 | + return NULL; | |
21753 | } | |
21754 | ||
21755 | void pgd_free(struct mm_struct *mm, pgd_t *pgd) | |
21756 | @@ -644,6 +672,7 @@ void pgd_free(struct mm_struct *mm, pgd_ | |
21757 | pgd_dtor(pgd); | |
21758 | ||
21759 | pgd_mop_up_pmds(mm, pgd); | |
21760 | + paravirt_pgd_free(mm, pgd); | |
21761 | free_pages((unsigned long)pgd, PGD_ORDER); | |
21762 | } | |
21763 | ||
21764 | @@ -685,7 +714,7 @@ int ptep_test_and_clear_young(struct vm_ | |
21765 | ||
21766 | if (pte_young(*ptep)) | |
21767 | ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, | |
21768 | - &ptep->pte); | |
21769 | + (unsigned long *) &ptep->pte); | |
21770 | ||
21771 | if (ret) | |
21772 | pte_update(vma->vm_mm, addr, ptep); | |
21773 | @@ -707,3 +736,42 @@ int ptep_clear_flush_young(struct vm_are | |
21774 | ||
21775 | return young; | |
21776 | } | |
21777 | + | |
21778 | +int fixmaps_set; | |
21779 | + | |
21780 | +void xen_set_fixmap(enum fixed_addresses idx, maddr_t phys, pgprot_t flags) | |
21781 | +{ | |
21782 | + unsigned long address = __fix_to_virt(idx); | |
21783 | + pte_t pte; | |
21784 | + | |
21785 | + if (idx >= __end_of_fixed_addresses) { | |
21786 | + BUG(); | |
21787 | + return; | |
21788 | + } | |
21789 | + | |
21790 | + switch (idx) { | |
21791 | +#ifdef CONFIG_X86_64 | |
21792 | + extern pte_t level1_fixmap_pgt[PTRS_PER_PTE]; | |
21793 | + | |
21794 | + case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: | |
21795 | + pte = pfn_pte(phys >> PAGE_SHIFT, flags); | |
21796 | + set_pte_vaddr_pud(level3_user_pgt, address, pte); | |
21797 | + break; | |
21798 | + case FIX_EARLYCON_MEM_BASE: | |
21799 | + xen_l1_entry_update(level1_fixmap_pgt + pte_index(address), | |
21800 | + pfn_pte_ma(phys >> PAGE_SHIFT, flags)); | |
21801 | + fixmaps_set++; | |
21802 | + return; | |
21803 | +#else | |
21804 | + case FIX_WP_TEST: | |
21805 | + case FIX_VDSO: | |
21806 | + pte = pfn_pte(phys >> PAGE_SHIFT, flags); | |
21807 | + break; | |
21808 | +#endif | |
21809 | + default: | |
21810 | + pte = pfn_pte_ma(phys >> PAGE_SHIFT, flags); | |
21811 | + break; | |
21812 | + } | |
21813 | + set_pte_vaddr(address, pte); | |
21814 | + fixmaps_set++; | |
21815 | +} | |
82094b55 AF |
21816 | --- sle11-2009-10-16.orig/arch/x86/mm/pgtable_32-xen.c 2009-03-16 16:38:05.000000000 +0100 |
21817 | +++ sle11-2009-10-16/arch/x86/mm/pgtable_32-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
21818 | @@ -25,51 +25,49 @@ |
21819 | #include <xen/features.h> | |
21820 | #include <asm/hypervisor.h> | |
21821 | ||
21822 | -void show_mem(void) | |
21823 | +/* | |
21824 | + * Associate a virtual page frame with a given physical page frame | |
21825 | + * and protection flags for that frame. | |
21826 | + */ | |
21827 | +void set_pte_vaddr(unsigned long vaddr, pte_t pteval) | |
21828 | { | |
21829 | - int total = 0, reserved = 0; | |
21830 | - int shared = 0, cached = 0; | |
21831 | - int highmem = 0; | |
21832 | - struct page *page; | |
21833 | - pg_data_t *pgdat; | |
21834 | - unsigned long i; | |
21835 | - unsigned long flags; | |
21836 | - | |
21837 | - printk(KERN_INFO "Mem-info:\n"); | |
21838 | - show_free_areas(); | |
21839 | - for_each_online_pgdat(pgdat) { | |
21840 | - pgdat_resize_lock(pgdat, &flags); | |
21841 | - for (i = 0; i < pgdat->node_spanned_pages; ++i) { | |
21842 | - if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) | |
21843 | - touch_nmi_watchdog(); | |
21844 | - page = pgdat_page_nr(pgdat, i); | |
21845 | - total++; | |
21846 | - if (PageHighMem(page)) | |
21847 | - highmem++; | |
21848 | - if (PageReserved(page)) | |
21849 | - reserved++; | |
21850 | - else if (PageSwapCache(page)) | |
21851 | - cached++; | |
21852 | - else if (page_count(page)) | |
21853 | - shared += page_count(page) - 1; | |
21854 | - } | |
21855 | - pgdat_resize_unlock(pgdat, &flags); | |
21856 | - } | |
21857 | - printk(KERN_INFO "%d pages of RAM\n", total); | |
21858 | - printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); | |
21859 | - printk(KERN_INFO "%d reserved pages\n", reserved); | |
21860 | - printk(KERN_INFO "%d pages shared\n", shared); | |
21861 | - printk(KERN_INFO "%d pages swap cached\n", cached); | |
21862 | - | |
21863 | - printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY)); | |
21864 | - printk(KERN_INFO "%lu pages writeback\n", | |
21865 | - global_page_state(NR_WRITEBACK)); | |
21866 | - printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED)); | |
21867 | - printk(KERN_INFO "%lu pages slab\n", | |
21868 | - global_page_state(NR_SLAB_RECLAIMABLE) + | |
21869 | - global_page_state(NR_SLAB_UNRECLAIMABLE)); | |
21870 | - printk(KERN_INFO "%lu pages pagetables\n", | |
21871 | - global_page_state(NR_PAGETABLE)); | |
21872 | +#ifndef CONFIG_XEN | |
21873 | + pgd_t *pgd; | |
21874 | + pud_t *pud; | |
21875 | + pmd_t *pmd; | |
21876 | + pte_t *pte; | |
21877 | + | |
21878 | + pgd = swapper_pg_dir + pgd_index(vaddr); | |
21879 | + if (pgd_none(*pgd)) { | |
21880 | + BUG(); | |
21881 | + return; | |
21882 | + } | |
21883 | + pud = pud_offset(pgd, vaddr); | |
21884 | + if (pud_none(*pud)) { | |
21885 | + BUG(); | |
21886 | + return; | |
21887 | + } | |
21888 | + pmd = pmd_offset(pud, vaddr); | |
21889 | + if (pmd_none(*pmd)) { | |
21890 | + BUG(); | |
21891 | + return; | |
21892 | + } | |
21893 | + pte = pte_offset_kernel(pmd, vaddr); | |
21894 | + if (pte_val(pteval)) | |
21895 | + set_pte_present(&init_mm, vaddr, pte, pteval); | |
21896 | + else | |
21897 | + pte_clear(&init_mm, vaddr, pte); | |
21898 | + | |
21899 | + /* | |
21900 | + * It's enough to flush this one mapping. | |
21901 | + * (PGE mappings get flushed as well) | |
21902 | + */ | |
21903 | + __flush_tlb_one(vaddr); | |
21904 | +#else | |
21905 | + if (HYPERVISOR_update_va_mapping(vaddr, pteval, | |
21906 | + UVMF_INVLPG|UVMF_ALL)) | |
21907 | + BUG(); | |
21908 | +#endif | |
21909 | } | |
21910 | ||
21911 | /* | |
21912 | @@ -107,35 +105,10 @@ void set_pmd_pfn(unsigned long vaddr, un | |
21913 | __flush_tlb_one(vaddr); | |
21914 | } | |
21915 | ||
21916 | -static int fixmaps; | |
21917 | unsigned long hypervisor_virt_start = HYPERVISOR_VIRT_START; | |
21918 | unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - PAGE_SIZE); | |
21919 | EXPORT_SYMBOL(__FIXADDR_TOP); | |
21920 | ||
21921 | -void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags) | |
21922 | -{ | |
21923 | - unsigned long address = __fix_to_virt(idx); | |
21924 | - pte_t pte; | |
21925 | - | |
21926 | - if (idx >= __end_of_fixed_addresses) { | |
21927 | - BUG(); | |
21928 | - return; | |
21929 | - } | |
21930 | - switch (idx) { | |
21931 | - case FIX_WP_TEST: | |
21932 | - case FIX_VDSO: | |
21933 | - pte = pfn_pte(phys >> PAGE_SHIFT, flags); | |
21934 | - break; | |
21935 | - default: | |
21936 | - pte = pfn_pte_ma(phys >> PAGE_SHIFT, flags); | |
21937 | - break; | |
21938 | - } | |
21939 | - if (HYPERVISOR_update_va_mapping(address, pte, | |
21940 | - UVMF_INVLPG|UVMF_ALL)) | |
21941 | - BUG(); | |
21942 | - fixmaps++; | |
21943 | -} | |
21944 | - | |
21945 | /** | |
21946 | * reserve_top_address - reserves a hole in the top of kernel address space | |
21947 | * @reserve - size of hole to reserve | |
21948 | @@ -145,13 +118,48 @@ void __set_fixmap (enum fixed_addresses | |
21949 | */ | |
21950 | void __init reserve_top_address(unsigned long reserve) | |
21951 | { | |
21952 | - BUG_ON(fixmaps > 0); | |
21953 | + BUG_ON(fixmaps_set > 0); | |
21954 | printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", | |
21955 | (int)-reserve); | |
21956 | __FIXADDR_TOP = -reserve - PAGE_SIZE; | |
21957 | __VMALLOC_RESERVE += reserve; | |
21958 | } | |
21959 | ||
21960 | +/* | |
21961 | + * vmalloc=size forces the vmalloc area to be exactly 'size' | |
21962 | + * bytes. This can be used to increase (or decrease) the | |
21963 | + * vmalloc area - the default is 128m. | |
21964 | + */ | |
21965 | +static int __init parse_vmalloc(char *arg) | |
21966 | +{ | |
21967 | + if (!arg) | |
21968 | + return -EINVAL; | |
21969 | + | |
21970 | + __VMALLOC_RESERVE = memparse(arg, &arg); | |
21971 | + return 0; | |
21972 | +} | |
21973 | +early_param("vmalloc", parse_vmalloc); | |
21974 | + | |
21975 | +#ifndef CONFIG_XEN | |
21976 | +/* | |
21977 | + * reservetop=size reserves a hole at the top of the kernel address space which | |
21978 | + * a hypervisor can load into later. Needed for dynamically loaded hypervisors, | |
21979 | + * so relocating the fixmap can be done before paging initialization. | |
21980 | + */ | |
21981 | +static int __init parse_reservetop(char *arg) | |
21982 | +{ | |
21983 | + unsigned long address; | |
21984 | + | |
21985 | + if (!arg) | |
21986 | + return -EINVAL; | |
21987 | + | |
21988 | + address = memparse(arg, &arg); | |
21989 | + reserve_top_address(address); | |
21990 | + return 0; | |
21991 | +} | |
21992 | +early_param("reservetop", parse_reservetop); | |
21993 | +#endif | |
21994 | + | |
21995 | void make_lowmem_page_readonly(void *va, unsigned int feature) | |
21996 | { | |
21997 | pte_t *pte; | |
82094b55 AF |
21998 | --- sle11-2009-10-16.orig/arch/x86/pci/amd_bus.c 2009-10-28 14:55:02.000000000 +0100 |
21999 | +++ sle11-2009-10-16/arch/x86/pci/amd_bus.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
22000 | @@ -607,6 +607,14 @@ static int __init pci_io_ecs_init(void) |
22001 | for_each_online_cpu(cpu) | |
22002 | amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE, | |
22003 | (void *)(long)cpu); | |
22004 | +#ifdef CONFIG_XEN | |
22005 | + { | |
22006 | + u64 reg; | |
22007 | + rdmsrl(MSR_AMD64_NB_CFG, reg); | |
22008 | + if (!(reg & ENABLE_CF8_EXT_CFG)) | |
22009 | + return 0; | |
22010 | + } | |
22011 | +#endif | |
22012 | pci_probe |= PCI_HAS_IO_ECS; | |
22013 | ||
22014 | return 0; | |
22015 | @@ -614,6 +622,10 @@ static int __init pci_io_ecs_init(void) | |
22016 | ||
22017 | static int __init amd_postcore_init(void) | |
22018 | { | |
22019 | +#ifdef CONFIG_XEN | |
22020 | + if (!is_initial_xendomain()) | |
22021 | + return 0; | |
22022 | +#endif | |
22023 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) | |
22024 | return 0; | |
22025 | ||
82094b55 AF |
22026 | --- sle11-2009-10-16.orig/arch/x86/pci/irq-xen.c 2009-03-16 16:38:05.000000000 +0100 |
22027 | +++ sle11-2009-10-16/arch/x86/pci/irq-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
22028 | @@ -11,8 +11,8 @@ |
22029 | #include <linux/slab.h> | |
22030 | #include <linux/interrupt.h> | |
22031 | #include <linux/dmi.h> | |
22032 | -#include <asm/io.h> | |
22033 | -#include <asm/smp.h> | |
22034 | +#include <linux/io.h> | |
22035 | +#include <linux/smp.h> | |
22036 | #include <asm/io_apic.h> | |
22037 | #include <linux/irq.h> | |
22038 | #include <linux/acpi.h> | |
22039 | @@ -45,7 +45,8 @@ struct irq_router { | |
22040 | char *name; | |
22041 | u16 vendor, device; | |
22042 | int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq); | |
22043 | - int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new); | |
22044 | + int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, | |
22045 | + int new); | |
22046 | }; | |
22047 | ||
22048 | struct irq_router_handler { | |
22049 | @@ -61,7 +62,7 @@ void (*pcibios_disable_irq)(struct pci_d | |
22050 | * and perform checksum verification. | |
22051 | */ | |
22052 | ||
22053 | -static inline struct irq_routing_table * pirq_check_routing_table(u8 *addr) | |
22054 | +static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr) | |
22055 | { | |
22056 | struct irq_routing_table *rt; | |
22057 | int i; | |
22058 | @@ -74,10 +75,11 @@ static inline struct irq_routing_table * | |
22059 | rt->size < sizeof(struct irq_routing_table)) | |
22060 | return NULL; | |
22061 | sum = 0; | |
22062 | - for (i=0; i < rt->size; i++) | |
22063 | + for (i = 0; i < rt->size; i++) | |
22064 | sum += addr[i]; | |
22065 | if (!sum) { | |
22066 | - DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", rt); | |
22067 | + DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", | |
22068 | + rt); | |
22069 | return rt; | |
22070 | } | |
22071 | return NULL; | |
22072 | @@ -104,7 +106,9 @@ static struct irq_routing_table * __init | |
22073 | return rt; | |
22074 | printk(KERN_WARNING "PCI: PIRQ table NOT found at pirqaddr\n"); | |
22075 | } | |
22076 | - for(addr = (u8 *) isa_bus_to_virt(0xf0000); addr < (u8 *) isa_bus_to_virt(0x100000); addr += 16) { | |
22077 | + for (addr = (u8 *) isa_bus_to_virt(0xf0000); | |
22078 | + addr < (u8 *) isa_bus_to_virt(0x100000); | |
22079 | + addr += 16) { | |
22080 | rt = pirq_check_routing_table(addr); | |
22081 | if (rt) | |
22082 | return rt; | |
22083 | @@ -126,20 +130,20 @@ static void __init pirq_peer_trick(void) | |
22084 | struct irq_info *e; | |
22085 | ||
22086 | memset(busmap, 0, sizeof(busmap)); | |
22087 | - for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) { | |
22088 | + for (i = 0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) { | |
22089 | e = &rt->slots[i]; | |
22090 | #ifdef DEBUG | |
22091 | { | |
22092 | int j; | |
22093 | DBG(KERN_DEBUG "%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot); | |
22094 | - for(j=0; j<4; j++) | |
22095 | + for (j = 0; j < 4; j++) | |
22096 | DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap); | |
22097 | DBG("\n"); | |
22098 | } | |
22099 | #endif | |
22100 | busmap[e->bus] = 1; | |
22101 | } | |
22102 | - for(i = 1; i < 256; i++) { | |
22103 | + for (i = 1; i < 256; i++) { | |
22104 | int node; | |
22105 | if (!busmap[i] || pci_find_bus(0, i)) | |
22106 | continue; | |
22107 | @@ -187,7 +191,8 @@ static unsigned int read_config_nybble(s | |
22108 | return (nr & 1) ? (x >> 4) : (x & 0xf); | |
22109 | } | |
22110 | ||
22111 | -static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val) | |
22112 | +static void write_config_nybble(struct pci_dev *router, unsigned offset, | |
22113 | + unsigned nr, unsigned int val) | |
22114 | { | |
22115 | u8 x; | |
22116 | unsigned reg = offset + (nr >> 1); | |
22117 | @@ -289,7 +294,7 @@ static int pirq_ite_get(struct pci_dev * | |
22118 | static const unsigned char pirqmap[4] = { 1, 0, 2, 3 }; | |
22119 | ||
22120 | WARN_ON_ONCE(pirq > 4); | |
22121 | - return read_config_nybble(router,0x43, pirqmap[pirq-1]); | |
22122 | + return read_config_nybble(router, 0x43, pirqmap[pirq-1]); | |
22123 | } | |
22124 | ||
22125 | static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) | |
22126 | @@ -318,7 +323,7 @@ static int pirq_opti_set(struct pci_dev | |
22127 | ||
22128 | /* | |
22129 | * Cyrix: nibble offset 0x5C | |
22130 | - * 0x5C bits 7:4 is INTB bits 3:0 is INTA | |
22131 | + * 0x5C bits 7:4 is INTB bits 3:0 is INTA | |
22132 | * 0x5D bits 7:4 is INTD bits 3:0 is INTC | |
22133 | */ | |
22134 | static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) | |
22135 | @@ -354,7 +359,7 @@ static int pirq_cyrix_set(struct pci_dev | |
22136 | * Apparently there are systems implementing PCI routing table using | |
22137 | * link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D. | |
22138 | * We try our best to handle both link mappings. | |
22139 | - * | |
22140 | + * | |
22141 | * Currently (2003-05-21) it appears most SiS chipsets follow the | |
22142 | * definition of routing registers from the SiS-5595 southbridge. | |
22143 | * According to the SiS 5595 datasheets the revision id's of the | |
22144 | @@ -374,7 +379,7 @@ static int pirq_cyrix_set(struct pci_dev | |
22145 | * | |
22146 | * 0x62: USBIRQ: | |
22147 | * bit 6 OHCI function disabled (0), enabled (1) | |
22148 | - * | |
22149 | + * | |
22150 | * 0x6a: ACPI/SCI IRQ: bits 4-6 reserved | |
22151 | * | |
22152 | * 0x7e: Data Acq. Module IRQ - bits 4-6 reserved | |
22153 | @@ -437,7 +442,7 @@ static int pirq_vlsi_get(struct pci_dev | |
22154 | { | |
22155 | WARN_ON_ONCE(pirq >= 9); | |
22156 | if (pirq > 8) { | |
22157 | - printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); | |
22158 | + dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq); | |
22159 | return 0; | |
22160 | } | |
22161 | return read_config_nybble(router, 0x74, pirq-1); | |
22162 | @@ -447,7 +452,7 @@ static int pirq_vlsi_set(struct pci_dev | |
22163 | { | |
22164 | WARN_ON_ONCE(pirq >= 9); | |
22165 | if (pirq > 8) { | |
22166 | - printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); | |
22167 | + dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq); | |
22168 | return 0; | |
22169 | } | |
22170 | write_config_nybble(router, 0x74, pirq-1, irq); | |
22171 | @@ -471,7 +476,8 @@ static int pirq_serverworks_get(struct p | |
22172 | return inb(0xc01) & 0xf; | |
22173 | } | |
22174 | ||
22175 | -static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) | |
22176 | +static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, | |
22177 | + int pirq, int irq) | |
22178 | { | |
22179 | outb(pirq, 0xc00); | |
22180 | outb(irq, 0xc01); | |
22181 | @@ -491,22 +497,20 @@ static int pirq_amd756_get(struct pci_de | |
22182 | u8 irq; | |
22183 | irq = 0; | |
22184 | if (pirq <= 4) | |
22185 | - { | |
22186 | irq = read_config_nybble(router, 0x56, pirq - 1); | |
22187 | - } | |
22188 | - printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", | |
22189 | - dev->vendor, dev->device, pirq, irq); | |
22190 | + dev_info(&dev->dev, | |
22191 | + "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n", | |
22192 | + dev->vendor, dev->device, pirq, irq); | |
22193 | return irq; | |
22194 | } | |
22195 | ||
22196 | static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) | |
22197 | { | |
22198 | - printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", | |
22199 | - dev->vendor, dev->device, pirq, irq); | |
22200 | + dev_info(&dev->dev, | |
22201 | + "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n", | |
22202 | + dev->vendor, dev->device, pirq, irq); | |
22203 | if (pirq <= 4) | |
22204 | - { | |
22205 | write_config_nybble(router, 0x56, pirq - 1, irq); | |
22206 | - } | |
22207 | return 1; | |
22208 | } | |
22209 | ||
22210 | @@ -553,50 +557,51 @@ static __init int intel_router_probe(str | |
22211 | if (pci_dev_present(pirq_440gx)) | |
22212 | return 0; | |
22213 | ||
22214 | - switch(device) | |
22215 | - { | |
22216 | - case PCI_DEVICE_ID_INTEL_82371FB_0: | |
22217 | - case PCI_DEVICE_ID_INTEL_82371SB_0: | |
22218 | - case PCI_DEVICE_ID_INTEL_82371AB_0: | |
22219 | - case PCI_DEVICE_ID_INTEL_82371MX: | |
22220 | - case PCI_DEVICE_ID_INTEL_82443MX_0: | |
22221 | - case PCI_DEVICE_ID_INTEL_82801AA_0: | |
22222 | - case PCI_DEVICE_ID_INTEL_82801AB_0: | |
22223 | - case PCI_DEVICE_ID_INTEL_82801BA_0: | |
22224 | - case PCI_DEVICE_ID_INTEL_82801BA_10: | |
22225 | - case PCI_DEVICE_ID_INTEL_82801CA_0: | |
22226 | - case PCI_DEVICE_ID_INTEL_82801CA_12: | |
22227 | - case PCI_DEVICE_ID_INTEL_82801DB_0: | |
22228 | - case PCI_DEVICE_ID_INTEL_82801E_0: | |
22229 | - case PCI_DEVICE_ID_INTEL_82801EB_0: | |
22230 | - case PCI_DEVICE_ID_INTEL_ESB_1: | |
22231 | - case PCI_DEVICE_ID_INTEL_ICH6_0: | |
22232 | - case PCI_DEVICE_ID_INTEL_ICH6_1: | |
22233 | - case PCI_DEVICE_ID_INTEL_ICH7_0: | |
22234 | - case PCI_DEVICE_ID_INTEL_ICH7_1: | |
22235 | - case PCI_DEVICE_ID_INTEL_ICH7_30: | |
22236 | - case PCI_DEVICE_ID_INTEL_ICH7_31: | |
22237 | - case PCI_DEVICE_ID_INTEL_ESB2_0: | |
22238 | - case PCI_DEVICE_ID_INTEL_ICH8_0: | |
22239 | - case PCI_DEVICE_ID_INTEL_ICH8_1: | |
22240 | - case PCI_DEVICE_ID_INTEL_ICH8_2: | |
22241 | - case PCI_DEVICE_ID_INTEL_ICH8_3: | |
22242 | - case PCI_DEVICE_ID_INTEL_ICH8_4: | |
22243 | - case PCI_DEVICE_ID_INTEL_ICH9_0: | |
22244 | - case PCI_DEVICE_ID_INTEL_ICH9_1: | |
22245 | - case PCI_DEVICE_ID_INTEL_ICH9_2: | |
22246 | - case PCI_DEVICE_ID_INTEL_ICH9_3: | |
22247 | - case PCI_DEVICE_ID_INTEL_ICH9_4: | |
22248 | - case PCI_DEVICE_ID_INTEL_ICH9_5: | |
22249 | - case PCI_DEVICE_ID_INTEL_TOLAPAI_0: | |
22250 | - case PCI_DEVICE_ID_INTEL_ICH10_0: | |
22251 | - case PCI_DEVICE_ID_INTEL_ICH10_1: | |
22252 | - case PCI_DEVICE_ID_INTEL_ICH10_2: | |
22253 | - case PCI_DEVICE_ID_INTEL_ICH10_3: | |
22254 | - r->name = "PIIX/ICH"; | |
22255 | - r->get = pirq_piix_get; | |
22256 | - r->set = pirq_piix_set; | |
22257 | - return 1; | |
22258 | + switch (device) { | |
22259 | + case PCI_DEVICE_ID_INTEL_82371FB_0: | |
22260 | + case PCI_DEVICE_ID_INTEL_82371SB_0: | |
22261 | + case PCI_DEVICE_ID_INTEL_82371AB_0: | |
22262 | + case PCI_DEVICE_ID_INTEL_82371MX: | |
22263 | + case PCI_DEVICE_ID_INTEL_82443MX_0: | |
22264 | + case PCI_DEVICE_ID_INTEL_82801AA_0: | |
22265 | + case PCI_DEVICE_ID_INTEL_82801AB_0: | |
22266 | + case PCI_DEVICE_ID_INTEL_82801BA_0: | |
22267 | + case PCI_DEVICE_ID_INTEL_82801BA_10: | |
22268 | + case PCI_DEVICE_ID_INTEL_82801CA_0: | |
22269 | + case PCI_DEVICE_ID_INTEL_82801CA_12: | |
22270 | + case PCI_DEVICE_ID_INTEL_82801DB_0: | |
22271 | + case PCI_DEVICE_ID_INTEL_82801E_0: | |
22272 | + case PCI_DEVICE_ID_INTEL_82801EB_0: | |
22273 | + case PCI_DEVICE_ID_INTEL_ESB_1: | |
22274 | + case PCI_DEVICE_ID_INTEL_ICH6_0: | |
22275 | + case PCI_DEVICE_ID_INTEL_ICH6_1: | |
22276 | + case PCI_DEVICE_ID_INTEL_ICH7_0: | |
22277 | + case PCI_DEVICE_ID_INTEL_ICH7_1: | |
22278 | + case PCI_DEVICE_ID_INTEL_ICH7_30: | |
22279 | + case PCI_DEVICE_ID_INTEL_ICH7_31: | |
22280 | + case PCI_DEVICE_ID_INTEL_ESB2_0: | |
22281 | + case PCI_DEVICE_ID_INTEL_ICH8_0: | |
22282 | + case PCI_DEVICE_ID_INTEL_ICH8_1: | |
22283 | + case PCI_DEVICE_ID_INTEL_ICH8_2: | |
22284 | + case PCI_DEVICE_ID_INTEL_ICH8_3: | |
22285 | + case PCI_DEVICE_ID_INTEL_ICH8_4: | |
22286 | + case PCI_DEVICE_ID_INTEL_ICH9_0: | |
22287 | + case PCI_DEVICE_ID_INTEL_ICH9_1: | |
22288 | + case PCI_DEVICE_ID_INTEL_ICH9_2: | |
22289 | + case PCI_DEVICE_ID_INTEL_ICH9_3: | |
22290 | + case PCI_DEVICE_ID_INTEL_ICH9_4: | |
22291 | + case PCI_DEVICE_ID_INTEL_ICH9_5: | |
22292 | + case PCI_DEVICE_ID_INTEL_TOLAPAI_0: | |
22293 | + case PCI_DEVICE_ID_INTEL_ICH10_0: | |
22294 | + case PCI_DEVICE_ID_INTEL_ICH10_1: | |
22295 | + case PCI_DEVICE_ID_INTEL_ICH10_2: | |
22296 | + case PCI_DEVICE_ID_INTEL_ICH10_3: | |
22297 | + case PCI_DEVICE_ID_INTEL_PCH_0: | |
22298 | + case PCI_DEVICE_ID_INTEL_PCH_1: | |
22299 | + r->name = "PIIX/ICH"; | |
22300 | + r->get = pirq_piix_get; | |
22301 | + r->set = pirq_piix_set; | |
22302 | + return 1; | |
22303 | } | |
22304 | return 0; | |
22305 | } | |
22306 | @@ -610,7 +615,7 @@ static __init int via_router_probe(struc | |
22307 | * workarounds for some buggy BIOSes | |
22308 | */ | |
22309 | if (device == PCI_DEVICE_ID_VIA_82C586_0) { | |
22310 | - switch(router->device) { | |
22311 | + switch (router->device) { | |
22312 | case PCI_DEVICE_ID_VIA_82C686: | |
22313 | /* | |
22314 | * Asus k7m bios wrongly reports 82C686A | |
22315 | @@ -635,7 +640,7 @@ static __init int via_router_probe(struc | |
22316 | } | |
22317 | } | |
22318 | ||
22319 | - switch(device) { | |
22320 | + switch (device) { | |
22321 | case PCI_DEVICE_ID_VIA_82C586_0: | |
22322 | r->name = "VIA"; | |
22323 | r->get = pirq_via586_get; | |
22324 | @@ -658,28 +663,27 @@ static __init int via_router_probe(struc | |
22325 | ||
22326 | static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) | |
22327 | { | |
22328 | - switch(device) | |
22329 | - { | |
22330 | - case PCI_DEVICE_ID_VLSI_82C534: | |
22331 | - r->name = "VLSI 82C534"; | |
22332 | - r->get = pirq_vlsi_get; | |
22333 | - r->set = pirq_vlsi_set; | |
22334 | - return 1; | |
22335 | + switch (device) { | |
22336 | + case PCI_DEVICE_ID_VLSI_82C534: | |
22337 | + r->name = "VLSI 82C534"; | |
22338 | + r->get = pirq_vlsi_get; | |
22339 | + r->set = pirq_vlsi_set; | |
22340 | + return 1; | |
22341 | } | |
22342 | return 0; | |
22343 | } | |
22344 | ||
22345 | ||
22346 | -static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) | |
22347 | +static __init int serverworks_router_probe(struct irq_router *r, | |
22348 | + struct pci_dev *router, u16 device) | |
22349 | { | |
22350 | - switch(device) | |
22351 | - { | |
22352 | - case PCI_DEVICE_ID_SERVERWORKS_OSB4: | |
22353 | - case PCI_DEVICE_ID_SERVERWORKS_CSB5: | |
22354 | - r->name = "ServerWorks"; | |
22355 | - r->get = pirq_serverworks_get; | |
22356 | - r->set = pirq_serverworks_set; | |
22357 | - return 1; | |
22358 | + switch (device) { | |
22359 | + case PCI_DEVICE_ID_SERVERWORKS_OSB4: | |
22360 | + case PCI_DEVICE_ID_SERVERWORKS_CSB5: | |
22361 | + r->name = "ServerWorks"; | |
22362 | + r->get = pirq_serverworks_get; | |
22363 | + r->set = pirq_serverworks_set; | |
22364 | + return 1; | |
22365 | } | |
22366 | return 0; | |
22367 | } | |
22368 | @@ -688,7 +692,7 @@ static __init int sis_router_probe(struc | |
22369 | { | |
22370 | if (device != PCI_DEVICE_ID_SI_503) | |
22371 | return 0; | |
22372 | - | |
22373 | + | |
22374 | r->name = "SIS"; | |
22375 | r->get = pirq_sis_get; | |
22376 | r->set = pirq_sis_set; | |
22377 | @@ -697,50 +701,45 @@ static __init int sis_router_probe(struc | |
22378 | ||
22379 | static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) | |
22380 | { | |
22381 | - switch(device) | |
22382 | - { | |
22383 | - case PCI_DEVICE_ID_CYRIX_5520: | |
22384 | - r->name = "NatSemi"; | |
22385 | - r->get = pirq_cyrix_get; | |
22386 | - r->set = pirq_cyrix_set; | |
22387 | - return 1; | |
22388 | + switch (device) { | |
22389 | + case PCI_DEVICE_ID_CYRIX_5520: | |
22390 | + r->name = "NatSemi"; | |
22391 | + r->get = pirq_cyrix_get; | |
22392 | + r->set = pirq_cyrix_set; | |
22393 | + return 1; | |
22394 | } | |
22395 | return 0; | |
22396 | } | |
22397 | ||
22398 | static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) | |
22399 | { | |
22400 | - switch(device) | |
22401 | - { | |
22402 | - case PCI_DEVICE_ID_OPTI_82C700: | |
22403 | - r->name = "OPTI"; | |
22404 | - r->get = pirq_opti_get; | |
22405 | - r->set = pirq_opti_set; | |
22406 | - return 1; | |
22407 | + switch (device) { | |
22408 | + case PCI_DEVICE_ID_OPTI_82C700: | |
22409 | + r->name = "OPTI"; | |
22410 | + r->get = pirq_opti_get; | |
22411 | + r->set = pirq_opti_set; | |
22412 | + return 1; | |
22413 | } | |
22414 | return 0; | |
22415 | } | |
22416 | ||
22417 | static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) | |
22418 | { | |
22419 | - switch(device) | |
22420 | - { | |
22421 | - case PCI_DEVICE_ID_ITE_IT8330G_0: | |
22422 | - r->name = "ITE"; | |
22423 | - r->get = pirq_ite_get; | |
22424 | - r->set = pirq_ite_set; | |
22425 | - return 1; | |
22426 | + switch (device) { | |
22427 | + case PCI_DEVICE_ID_ITE_IT8330G_0: | |
22428 | + r->name = "ITE"; | |
22429 | + r->get = pirq_ite_get; | |
22430 | + r->set = pirq_ite_set; | |
22431 | + return 1; | |
22432 | } | |
22433 | return 0; | |
22434 | } | |
22435 | ||
22436 | static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) | |
22437 | { | |
22438 | - switch(device) | |
22439 | - { | |
22440 | + switch (device) { | |
22441 | case PCI_DEVICE_ID_AL_M1533: | |
22442 | case PCI_DEVICE_ID_AL_M1563: | |
22443 | - printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n"); | |
22444 | r->name = "ALI"; | |
22445 | r->get = pirq_ali_get; | |
22446 | r->set = pirq_ali_set; | |
22447 | @@ -751,25 +750,24 @@ static __init int ali_router_probe(struc | |
22448 | ||
22449 | static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) | |
22450 | { | |
22451 | - switch(device) | |
22452 | - { | |
22453 | - case PCI_DEVICE_ID_AMD_VIPER_740B: | |
22454 | - r->name = "AMD756"; | |
22455 | - break; | |
22456 | - case PCI_DEVICE_ID_AMD_VIPER_7413: | |
22457 | - r->name = "AMD766"; | |
22458 | - break; | |
22459 | - case PCI_DEVICE_ID_AMD_VIPER_7443: | |
22460 | - r->name = "AMD768"; | |
22461 | - break; | |
22462 | - default: | |
22463 | - return 0; | |
22464 | + switch (device) { | |
22465 | + case PCI_DEVICE_ID_AMD_VIPER_740B: | |
22466 | + r->name = "AMD756"; | |
22467 | + break; | |
22468 | + case PCI_DEVICE_ID_AMD_VIPER_7413: | |
22469 | + r->name = "AMD766"; | |
22470 | + break; | |
22471 | + case PCI_DEVICE_ID_AMD_VIPER_7443: | |
22472 | + r->name = "AMD768"; | |
22473 | + break; | |
22474 | + default: | |
22475 | + return 0; | |
22476 | } | |
22477 | r->get = pirq_amd756_get; | |
22478 | r->set = pirq_amd756_set; | |
22479 | return 1; | |
22480 | } | |
22481 | - | |
22482 | + | |
22483 | static __init int pico_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) | |
22484 | { | |
22485 | switch (device) { | |
22486 | @@ -811,7 +809,7 @@ static struct pci_dev *pirq_router_dev; | |
22487 | * FIXME: should we have an option to say "generic for | |
22488 | * chipset" ? | |
22489 | */ | |
22490 | - | |
22491 | + | |
22492 | static void __init pirq_find_router(struct irq_router *r) | |
22493 | { | |
22494 | struct irq_routing_table *rt = pirq_table; | |
22495 | @@ -830,7 +828,7 @@ static void __init pirq_find_router(stru | |
22496 | r->name = "default"; | |
22497 | r->get = NULL; | |
22498 | r->set = NULL; | |
22499 | - | |
22500 | + | |
22501 | DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", | |
22502 | rt->rtr_vendor, rt->rtr_device); | |
22503 | ||
22504 | @@ -841,19 +839,19 @@ static void __init pirq_find_router(stru | |
22505 | return; | |
22506 | } | |
22507 | ||
22508 | - for( h = pirq_routers; h->vendor; h++) { | |
22509 | + for (h = pirq_routers; h->vendor; h++) { | |
22510 | /* First look for a router match */ | |
22511 | - if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device)) | |
22512 | + if (rt->rtr_vendor == h->vendor && | |
22513 | + h->probe(r, pirq_router_dev, rt->rtr_device)) | |
22514 | break; | |
22515 | /* Fall back to a device match */ | |
22516 | - if (pirq_router_dev->vendor == h->vendor && h->probe(r, pirq_router_dev, pirq_router_dev->device)) | |
22517 | + if (pirq_router_dev->vendor == h->vendor && | |
22518 | + h->probe(r, pirq_router_dev, pirq_router_dev->device)) | |
22519 | break; | |
22520 | } | |
22521 | - printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n", | |
22522 | - pirq_router.name, | |
22523 | - pirq_router_dev->vendor, | |
22524 | - pirq_router_dev->device, | |
22525 | - pci_name(pirq_router_dev)); | |
22526 | + dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n", | |
22527 | + pirq_router.name, | |
22528 | + pirq_router_dev->vendor, pirq_router_dev->device); | |
22529 | ||
22530 | /* The device remains referenced for the kernel lifetime */ | |
22531 | } | |
22532 | @@ -861,11 +859,13 @@ static void __init pirq_find_router(stru | |
22533 | static struct irq_info *pirq_get_info(struct pci_dev *dev) | |
22534 | { | |
22535 | struct irq_routing_table *rt = pirq_table; | |
22536 | - int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); | |
22537 | + int entries = (rt->size - sizeof(struct irq_routing_table)) / | |
22538 | + sizeof(struct irq_info); | |
22539 | struct irq_info *info; | |
22540 | ||
22541 | for (info = rt->slots; entries--; info++) | |
22542 | - if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn)) | |
22543 | + if (info->bus == dev->bus->number && | |
22544 | + PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn)) | |
22545 | return info; | |
22546 | return NULL; | |
22547 | } | |
22548 | @@ -884,7 +884,7 @@ static int pcibios_lookup_irq(struct pci | |
22549 | /* Find IRQ pin */ | |
22550 | pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); | |
22551 | if (!pin) { | |
22552 | - DBG(KERN_DEBUG " -> no interrupt pin\n"); | |
22553 | + dev_dbg(&dev->dev, "no interrupt pin\n"); | |
22554 | return 0; | |
22555 | } | |
22556 | pin = pin - 1; | |
22557 | @@ -893,20 +893,21 @@ static int pcibios_lookup_irq(struct pci | |
22558 | ||
22559 | if (!pirq_table) | |
22560 | return 0; | |
22561 | - | |
22562 | - DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin); | |
22563 | + | |
22564 | info = pirq_get_info(dev); | |
22565 | if (!info) { | |
22566 | - DBG(" -> not found in routing table\n" KERN_DEBUG); | |
22567 | + dev_dbg(&dev->dev, "PCI INT %c not found in routing table\n", | |
22568 | + 'A' + pin); | |
22569 | return 0; | |
22570 | } | |
22571 | pirq = info->irq[pin].link; | |
22572 | mask = info->irq[pin].bitmap; | |
22573 | if (!pirq) { | |
22574 | - DBG(" -> not routed\n" KERN_DEBUG); | |
22575 | + dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin); | |
22576 | return 0; | |
22577 | } | |
22578 | - DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs); | |
22579 | + dev_dbg(&dev->dev, "PCI INT %c -> PIRQ %02x, mask %04x, excl %04x", | |
22580 | + 'A' + pin, pirq, mask, pirq_table->exclusive_irqs); | |
22581 | mask &= pcibios_irq_mask; | |
22582 | ||
22583 | /* Work around broken HP Pavilion Notebooks which assign USB to | |
22584 | @@ -919,7 +920,8 @@ static int pcibios_lookup_irq(struct pci | |
22585 | } | |
22586 | ||
22587 | /* same for Acer Travelmate 360, but with CB and irq 11 -> 10 */ | |
22588 | - if (acer_tm360_irqrouting && dev->irq == 11 && dev->vendor == PCI_VENDOR_ID_O2) { | |
22589 | + if (acer_tm360_irqrouting && dev->irq == 11 && | |
22590 | + dev->vendor == PCI_VENDOR_ID_O2) { | |
22591 | pirq = 0x68; | |
22592 | mask = 0x400; | |
22593 | dev->irq = r->get(pirq_router_dev, dev, pirq); | |
22594 | @@ -932,51 +934,50 @@ static int pcibios_lookup_irq(struct pci | |
22595 | */ | |
22596 | newirq = dev->irq; | |
22597 | if (newirq && !((1 << newirq) & mask)) { | |
22598 | - if ( pci_probe & PCI_USE_PIRQ_MASK) newirq = 0; | |
22599 | - else printk("\n" KERN_WARNING | |
22600 | - "PCI: IRQ %i for device %s doesn't match PIRQ mask " | |
22601 | - "- try pci=usepirqmask\n" KERN_DEBUG, newirq, | |
22602 | - pci_name(dev)); | |
22603 | + if (pci_probe & PCI_USE_PIRQ_MASK) | |
22604 | + newirq = 0; | |
22605 | + else | |
22606 | + dev_warn(&dev->dev, "IRQ %d doesn't match PIRQ mask " | |
22607 | + "%#x; try pci=usepirqmask\n", newirq, mask); | |
22608 | } | |
22609 | if (!newirq && assign) { | |
22610 | for (i = 0; i < 16; i++) { | |
22611 | if (!(mask & (1 << i))) | |
22612 | continue; | |
22613 | - if (pirq_penalty[i] < pirq_penalty[newirq] && can_request_irq(i, IRQF_SHARED)) | |
22614 | + if (pirq_penalty[i] < pirq_penalty[newirq] && | |
22615 | + can_request_irq(i, IRQF_SHARED)) | |
22616 | newirq = i; | |
22617 | } | |
22618 | } | |
22619 | - DBG(" -> newirq=%d", newirq); | |
22620 | + dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin, newirq); | |
22621 | ||
22622 | /* Check if it is hardcoded */ | |
22623 | if ((pirq & 0xf0) == 0xf0) { | |
22624 | irq = pirq & 0xf; | |
22625 | - DBG(" -> hardcoded IRQ %d\n", irq); | |
22626 | - msg = "Hardcoded"; | |
22627 | - } else if ( r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ | |
22628 | - ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask)) ) { | |
22629 | - DBG(" -> got IRQ %d\n", irq); | |
22630 | - msg = "Found"; | |
22631 | + msg = "hardcoded"; | |
22632 | + } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ | |
22633 | + ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) { | |
22634 | + msg = "found"; | |
22635 | eisa_set_level_irq(irq); | |
22636 | - } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { | |
22637 | - DBG(" -> assigning IRQ %d", newirq); | |
22638 | + } else if (newirq && r->set && | |
22639 | + (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { | |
22640 | if (r->set(pirq_router_dev, dev, pirq, newirq)) { | |
22641 | eisa_set_level_irq(newirq); | |
22642 | - DBG(" ... OK\n"); | |
22643 | - msg = "Assigned"; | |
22644 | + msg = "assigned"; | |
22645 | irq = newirq; | |
22646 | } | |
22647 | } | |
22648 | ||
22649 | if (!irq) { | |
22650 | - DBG(" ... failed\n"); | |
22651 | if (newirq && mask == (1 << newirq)) { | |
22652 | - msg = "Guessed"; | |
22653 | + msg = "guessed"; | |
22654 | irq = newirq; | |
22655 | - } else | |
22656 | + } else { | |
22657 | + dev_dbg(&dev->dev, "can't route interrupt\n"); | |
22658 | return 0; | |
22659 | + } | |
22660 | } | |
22661 | - printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, pci_name(dev)); | |
22662 | + dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin, irq); | |
22663 | ||
22664 | /* Update IRQ for all devices with the same pirq value */ | |
22665 | while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { | |
22666 | @@ -988,20 +989,25 @@ static int pcibios_lookup_irq(struct pci | |
22667 | if (!info) | |
22668 | continue; | |
22669 | if (info->irq[pin].link == pirq) { | |
22670 | - /* We refuse to override the dev->irq information. Give a warning! */ | |
22671 | - if ( dev2->irq && dev2->irq != irq && \ | |
22672 | + /* | |
22673 | + * We refuse to override the dev->irq | |
22674 | + * information. Give a warning! | |
22675 | + */ | |
22676 | + if (dev2->irq && dev2->irq != irq && \ | |
22677 | (!(pci_probe & PCI_USE_PIRQ_MASK) || \ | |
22678 | - ((1 << dev2->irq) & mask)) ) { | |
22679 | + ((1 << dev2->irq) & mask))) { | |
22680 | #ifndef CONFIG_PCI_MSI | |
22681 | - printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", | |
22682 | - pci_name(dev2), dev2->irq, irq); | |
22683 | + dev_info(&dev2->dev, "IRQ routing conflict: " | |
22684 | + "have IRQ %d, want IRQ %d\n", | |
22685 | + dev2->irq, irq); | |
22686 | #endif | |
22687 | - continue; | |
22688 | - } | |
22689 | + continue; | |
22690 | + } | |
22691 | dev2->irq = irq; | |
22692 | pirq_penalty[irq]++; | |
22693 | if (dev != dev2) | |
22694 | - printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, pci_name(dev2)); | |
22695 | + dev_info(&dev->dev, "sharing IRQ %d with %s\n", | |
22696 | + irq, pci_name(dev2)); | |
22697 | } | |
22698 | } | |
22699 | return 1; | |
22700 | @@ -1015,15 +1021,20 @@ static void __init pcibios_fixup_irqs(vo | |
22701 | DBG(KERN_DEBUG "PCI: IRQ fixup\n"); | |
22702 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | |
22703 | /* | |
22704 | - * If the BIOS has set an out of range IRQ number, just ignore it. | |
22705 | - * Also keep track of which IRQ's are already in use. | |
22706 | + * If the BIOS has set an out of range IRQ number, just | |
22707 | + * ignore it. Also keep track of which IRQ's are | |
22708 | + * already in use. | |
22709 | */ | |
22710 | if (dev->irq >= 16) { | |
22711 | - DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", pci_name(dev), dev->irq); | |
22712 | + dev_dbg(&dev->dev, "ignoring bogus IRQ %d\n", dev->irq); | |
22713 | dev->irq = 0; | |
22714 | } | |
22715 | - /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */ | |
22716 | - if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000) | |
22717 | + /* | |
22718 | + * If the IRQ is already assigned to a PCI device, | |
22719 | + * ignore its ISA use penalty | |
22720 | + */ | |
22721 | + if (pirq_penalty[dev->irq] >= 100 && | |
22722 | + pirq_penalty[dev->irq] < 100000) | |
22723 | pirq_penalty[dev->irq] = 0; | |
22724 | pirq_penalty[dev->irq]++; | |
22725 | } | |
22726 | @@ -1035,13 +1046,17 @@ static void __init pcibios_fixup_irqs(vo | |
22727 | /* | |
22728 | * Recalculate IRQ numbers if we use the I/O APIC. | |
22729 | */ | |
22730 | - if (io_apic_assign_pci_irqs) | |
22731 | - { | |
22732 | + if (io_apic_assign_pci_irqs) { | |
22733 | int irq; | |
22734 | ||
22735 | if (pin) { | |
22736 | - pin--; /* interrupt pins are numbered starting from 1 */ | |
22737 | - irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin); | |
22738 | + /* | |
22739 | + * interrupt pins are numbered starting | |
22740 | + * from 1 | |
22741 | + */ | |
22742 | + pin--; | |
22743 | + irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, | |
22744 | + PCI_SLOT(dev->devfn), pin); | |
22745 | /* | |
22746 | * Busses behind bridges are typically not listed in the MP-table. | |
22747 | * In this case we have to look up the IRQ based on the parent bus, | |
22748 | @@ -1049,18 +1064,18 @@ static void __init pcibios_fixup_irqs(vo | |
22749 | * busses itself so we should get into this branch reliably. | |
22750 | */ | |
22751 | if (irq < 0 && dev->bus->parent) { /* go back to the bridge */ | |
22752 | - struct pci_dev * bridge = dev->bus->self; | |
22753 | + struct pci_dev *bridge = dev->bus->self; | |
22754 | ||
22755 | pin = (pin + PCI_SLOT(dev->devfn)) % 4; | |
22756 | - irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, | |
22757 | + irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, | |
22758 | PCI_SLOT(bridge->devfn), pin); | |
22759 | if (irq >= 0) | |
22760 | - printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", | |
22761 | - pci_name(bridge), 'A' + pin, irq); | |
22762 | + dev_warn(&dev->dev, "using bridge %s INT %c to get IRQ %d\n", | |
22763 | + pci_name(bridge), | |
22764 | + 'A' + pin, irq); | |
22765 | } | |
22766 | if (irq >= 0) { | |
22767 | - printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", | |
22768 | - pci_name(dev), 'A' + pin, irq); | |
22769 | + dev_info(&dev->dev, "PCI->APIC IRQ transform: INT %c -> IRQ %d\n", 'A' + pin, irq); | |
22770 | dev->irq = irq; | |
22771 | } | |
22772 | } | |
22773 | @@ -1082,7 +1097,8 @@ static int __init fix_broken_hp_bios_irq | |
22774 | { | |
22775 | if (!broken_hp_bios_irq9) { | |
22776 | broken_hp_bios_irq9 = 1; | |
22777 | - printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident); | |
22778 | + printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", | |
22779 | + d->ident); | |
22780 | } | |
22781 | return 0; | |
22782 | } | |
22783 | @@ -1095,7 +1111,8 @@ static int __init fix_acer_tm360_irqrout | |
22784 | { | |
22785 | if (!acer_tm360_irqrouting) { | |
22786 | acer_tm360_irqrouting = 1; | |
22787 | - printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident); | |
22788 | + printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", | |
22789 | + d->ident); | |
22790 | } | |
22791 | return 0; | |
22792 | } | |
22793 | @@ -1107,7 +1124,8 @@ static struct dmi_system_id __initdata p | |
22794 | .matches = { | |
22795 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), | |
22796 | DMI_MATCH(DMI_BIOS_VERSION, "GE.M1.03"), | |
22797 | - DMI_MATCH(DMI_PRODUCT_VERSION, "HP Pavilion Notebook Model GE"), | |
22798 | + DMI_MATCH(DMI_PRODUCT_VERSION, | |
22799 | + "HP Pavilion Notebook Model GE"), | |
22800 | DMI_MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736"), | |
22801 | }, | |
22802 | }, | |
22803 | @@ -1122,7 +1140,7 @@ static struct dmi_system_id __initdata p | |
22804 | { } | |
22805 | }; | |
22806 | ||
22807 | -static int __init pcibios_irq_init(void) | |
22808 | +int __init pcibios_irq_init(void) | |
22809 | { | |
22810 | DBG(KERN_DEBUG "PCI: IRQ init\n"); | |
22811 | ||
22812 | @@ -1142,11 +1160,14 @@ static int __init pcibios_irq_init(void) | |
22813 | pirq_find_router(&pirq_router); | |
22814 | if (pirq_table->exclusive_irqs) { | |
22815 | int i; | |
22816 | - for (i=0; i<16; i++) | |
22817 | + for (i = 0; i < 16; i++) | |
22818 | if (!(pirq_table->exclusive_irqs & (1 << i))) | |
22819 | pirq_penalty[i] += 100; | |
22820 | } | |
22821 | - /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */ | |
22822 | + /* | |
22823 | + * If we're using the I/O APIC, avoid using the PCI IRQ | |
22824 | + * routing table | |
22825 | + */ | |
22826 | if (io_apic_assign_pci_irqs) | |
22827 | pirq_table = NULL; | |
22828 | } | |
22829 | @@ -1157,9 +1178,6 @@ static int __init pcibios_irq_init(void) | |
22830 | return 0; | |
22831 | } | |
22832 | ||
22833 | -subsys_initcall(pcibios_irq_init); | |
22834 | - | |
22835 | - | |
22836 | static void pirq_penalize_isa_irq(int irq, int active) | |
22837 | { | |
22838 | /* | |
22839 | @@ -1193,7 +1211,7 @@ static int pirq_enable_irq(struct pci_de | |
22840 | if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) { | |
22841 | char *msg = ""; | |
22842 | ||
22843 | - pin--; /* interrupt pins are numbered starting from 1 */ | |
22844 | + pin--; /* interrupt pins are numbered starting from 1 */ | |
22845 | ||
22846 | if (io_apic_assign_pci_irqs) { | |
22847 | int irq; | |
22848 | @@ -1207,35 +1225,41 @@ static int pirq_enable_irq(struct pci_de | |
22849 | */ | |
22850 | temp_dev = dev; | |
22851 | while (irq < 0 && dev->bus->parent) { /* go back to the bridge */ | |
22852 | - struct pci_dev * bridge = dev->bus->self; | |
22853 | + struct pci_dev *bridge = dev->bus->self; | |
22854 | ||
22855 | pin = (pin + PCI_SLOT(dev->devfn)) % 4; | |
22856 | - irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, | |
22857 | + irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, | |
22858 | PCI_SLOT(bridge->devfn), pin); | |
22859 | if (irq >= 0) | |
22860 | - printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", | |
22861 | - pci_name(bridge), 'A' + pin, irq); | |
22862 | + dev_warn(&dev->dev, "using bridge %s " | |
22863 | + "INT %c to get IRQ %d\n", | |
22864 | + pci_name(bridge), 'A' + pin, | |
22865 | + irq); | |
22866 | dev = bridge; | |
22867 | } | |
22868 | dev = temp_dev; | |
22869 | if (irq >= 0) { | |
22870 | - printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", | |
22871 | - pci_name(dev), 'A' + pin, irq); | |
22872 | + dev_info(&dev->dev, "PCI->APIC IRQ transform: " | |
22873 | + "INT %c -> IRQ %d\n", 'A' + pin, irq); | |
22874 | dev->irq = irq; | |
22875 | return 0; | |
22876 | } else | |
22877 | - msg = " Probably buggy MP table."; | |
22878 | + msg = "; probably buggy MP table"; | |
22879 | } else if (pci_probe & PCI_BIOS_IRQ_SCAN) | |
22880 | msg = ""; | |
22881 | else | |
22882 | - msg = " Please try using pci=biosirq."; | |
22883 | + msg = "; please try using pci=biosirq"; | |
22884 | ||
22885 | - /* With IDE legacy devices the IRQ lookup failure is not a problem.. */ | |
22886 | - if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5)) | |
22887 | + /* | |
22888 | + * With IDE legacy devices the IRQ lookup failure is not | |
22889 | + * a problem.. | |
22890 | + */ | |
22891 | + if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && | |
22892 | + !(dev->class & 0x5)) | |
22893 | return 0; | |
22894 | ||
22895 | - printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", | |
22896 | - 'A' + pin, pci_name(dev), msg); | |
22897 | + dev_warn(&dev->dev, "can't find IRQ for PCI INT %c%s\n", | |
22898 | + 'A' + pin, msg); | |
22899 | } | |
22900 | return 0; | |
22901 | } | |
82094b55 AF |
22902 | --- sle11-2009-10-16.orig/arch/x86/vdso/Makefile 2009-03-16 16:33:40.000000000 +0100 |
22903 | +++ sle11-2009-10-16/arch/x86/vdso/Makefile 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
22904 | @@ -65,9 +65,7 @@ obj-$(VDSO32-y) += vdso32-syms.lds |
22905 | vdso32.so-$(VDSO32-y) += int80 | |
22906 | vdso32.so-$(CONFIG_COMPAT) += syscall | |
22907 | vdso32.so-$(VDSO32-y) += sysenter | |
22908 | -xen-vdso32-$(subst 1,$(CONFIG_COMPAT),$(shell expr $(CONFIG_XEN_COMPAT)0 '<' 0x0302000)) += int80 | |
22909 | -xen-vdso32-$(CONFIG_X86_32) += syscall | |
22910 | -vdso32.so-$(CONFIG_XEN) += $(xen-vdso32-y) | |
22911 | +vdso32.so-$(CONFIG_X86_XEN) += syscall | |
22912 | ||
22913 | vdso32-images = $(vdso32.so-y:%=vdso32-%.so) | |
22914 | ||
82094b55 AF |
22915 | --- sle11-2009-10-16.orig/arch/x86/vdso/vdso32.S 2009-03-16 16:33:40.000000000 +0100 |
22916 | +++ sle11-2009-10-16/arch/x86/vdso/vdso32.S 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
22917 | @@ -9,7 +9,7 @@ vdso32_int80_end: |
22918 | ||
22919 | .globl vdso32_syscall_start, vdso32_syscall_end | |
22920 | vdso32_syscall_start: | |
22921 | -#ifdef CONFIG_COMPAT | |
22922 | +#if defined(CONFIG_COMPAT) || defined(CONFIG_X86_XEN) | |
22923 | .incbin "arch/x86/vdso/vdso32-syscall.so" | |
22924 | #endif | |
22925 | vdso32_syscall_end: | |
22926 | @@ -19,16 +19,4 @@ vdso32_sysenter_start: | |
22927 | .incbin "arch/x86/vdso/vdso32-sysenter.so" | |
22928 | vdso32_sysenter_end: | |
22929 | ||
22930 | -#if defined(CONFIG_X86_64_XEN) && CONFIG_XEN_COMPAT < 0x030200 | |
22931 | - .globl vdso32_int80_start, vdso32_int80_end | |
22932 | -vdso32_int80_start: | |
22933 | - .incbin "arch/x86/vdso/vdso32-int80.so" | |
22934 | -vdso32_int80_end: | |
22935 | -#elif defined(CONFIG_X86_XEN) | |
22936 | - .globl vdso32_syscall_start, vdso32_syscall_end | |
22937 | -vdso32_syscall_start: | |
22938 | - .incbin "arch/x86/vdso/vdso32-syscall.so" | |
22939 | -vdso32_syscall_end: | |
22940 | -#endif | |
22941 | - | |
22942 | __FINIT | |
82094b55 AF |
22943 | --- sle11-2009-10-16.orig/arch/x86/vdso/vdso32-setup-xen.c 2009-03-16 16:38:05.000000000 +0100 |
22944 | +++ sle11-2009-10-16/arch/x86/vdso/vdso32-setup-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
22945 | @@ -195,50 +195,28 @@ static __init void relocate_vdso(Elf32_E |
22946 | } | |
22947 | } | |
22948 | ||
22949 | -/* | |
22950 | - * These symbols are defined by vdso32.S to mark the bounds | |
22951 | - * of the ELF DSO images included therein. | |
22952 | - */ | |
22953 | -extern const char vdso32_default_start, vdso32_default_end; | |
22954 | -extern const char vdso32_sysenter_start, vdso32_sysenter_end; | |
22955 | static struct page *vdso32_pages[1]; | |
22956 | ||
22957 | #ifdef CONFIG_X86_64 | |
22958 | ||
22959 | -#if CONFIG_XEN_COMPAT < 0x030200 | |
22960 | -static int use_int80 = 1; | |
22961 | -#endif | |
22962 | -static int use_sysenter __read_mostly = -1; | |
22963 | - | |
22964 | -#define vdso32_sysenter() (use_sysenter > 0) | |
22965 | +#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32)) | |
22966 | +#define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32)) | |
22967 | ||
22968 | -/* May not be __init: called during resume */ | |
22969 | -void syscall32_cpu_init(void) | |
22970 | +void __cpuinit syscall32_cpu_init(void) | |
22971 | { | |
22972 | - static const struct callback_register cstar = { | |
22973 | + static const struct callback_register __cpuinitconst cstar = { | |
22974 | .type = CALLBACKTYPE_syscall32, | |
22975 | .address = (unsigned long)ia32_cstar_target | |
22976 | }; | |
22977 | - static const struct callback_register sysenter = { | |
22978 | + static const struct callback_register __cpuinitconst sysenter = { | |
22979 | .type = CALLBACKTYPE_sysenter, | |
22980 | .address = (unsigned long)ia32_sysenter_target | |
22981 | }; | |
22982 | ||
22983 | - if ((HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) < 0) || | |
22984 | - (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar) < 0)) | |
22985 | -#if CONFIG_XEN_COMPAT < 0x030200 | |
22986 | - return; | |
22987 | - use_int80 = 0; | |
22988 | -#else | |
22989 | - BUG(); | |
22990 | -#endif | |
22991 | - | |
22992 | - if (use_sysenter < 0) { | |
22993 | - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | |
22994 | - use_sysenter = 1; | |
22995 | - if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) | |
22996 | - use_sysenter = 1; | |
22997 | - } | |
22998 | + if (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) < 0) | |
22999 | + setup_clear_cpu_cap(X86_FEATURE_SYSENTER32); | |
23000 | + if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar) < 0) | |
23001 | + setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); | |
23002 | } | |
23003 | ||
23004 | #define compat_uses_vma 1 | |
23005 | @@ -250,6 +228,7 @@ static inline void map_compat_vdso(int m | |
23006 | #else /* CONFIG_X86_32 */ | |
23007 | ||
23008 | #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP)) | |
23009 | +#define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32)) | |
23010 | ||
23011 | extern asmlinkage void ia32pv_cstar_target(void); | |
23012 | static const struct callback_register __cpuinitconst cstar = { | |
23013 | @@ -265,13 +244,13 @@ void __cpuinit enable_sep_cpu(void) | |
23014 | .address = { __KERNEL_CS, (unsigned long)ia32pv_sysenter_target }, | |
23015 | }; | |
23016 | ||
23017 | - if (boot_cpu_has(X86_FEATURE_SYSCALL)) { | |
23018 | + if (vdso32_syscall()) { | |
23019 | if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar) != 0) | |
23020 | BUG(); | |
23021 | return; | |
23022 | } | |
23023 | ||
23024 | - if (!boot_cpu_has(X86_FEATURE_SEP)) | |
23025 | + if (!vdso32_sysenter()) | |
23026 | return; | |
23027 | ||
23028 | if (xen_feature(XENFEAT_supervisor_mode_kernel)) | |
23029 | @@ -341,34 +320,26 @@ int __init sysenter_setup(void) | |
23030 | ||
23031 | #ifdef CONFIG_X86_32 | |
23032 | gate_vma_init(); | |
23033 | -#endif | |
23034 | ||
23035 | -#if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT < 0x030200 | |
23036 | - if (use_int80) { | |
23037 | - extern const char vdso32_int80_start, vdso32_int80_end; | |
23038 | - | |
23039 | - vsyscall = &vdso32_int80_start; | |
23040 | - vsyscall_len = &vdso32_int80_end - &vdso32_int80_start; | |
23041 | - } else | |
23042 | -#elif defined(CONFIG_X86_32) | |
23043 | - if (boot_cpu_has(X86_FEATURE_SYSCALL) | |
23044 | - && (boot_cpu_data.x86_vendor != X86_VENDOR_AMD | |
23045 | - || HYPERVISOR_callback_op(CALLBACKOP_register, &cstar) != 0)) | |
23046 | - setup_clear_cpu_cap(X86_FEATURE_SYSCALL); | |
23047 | - barrier(); /* until clear_bit()'s constraints are correct ... */ | |
23048 | if (boot_cpu_has(X86_FEATURE_SYSCALL)) { | |
23049 | - extern const char vdso32_syscall_start, vdso32_syscall_end; | |
23050 | - | |
23051 | + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD | |
23052 | + && HYPERVISOR_callback_op(CALLBACKOP_register, &cstar) == 0) | |
23053 | + setup_force_cpu_cap(X86_FEATURE_SYSCALL32); | |
23054 | + else { | |
23055 | + setup_clear_cpu_cap(X86_FEATURE_SYSCALL); | |
23056 | + setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); | |
23057 | + } | |
23058 | + } | |
23059 | +#endif | |
23060 | + if (vdso32_syscall()) { | |
23061 | vsyscall = &vdso32_syscall_start; | |
23062 | vsyscall_len = &vdso32_syscall_end - &vdso32_syscall_start; | |
23063 | - } else | |
23064 | -#endif | |
23065 | - if (!vdso32_sysenter()) { | |
23066 | - vsyscall = &vdso32_default_start; | |
23067 | - vsyscall_len = &vdso32_default_end - &vdso32_default_start; | |
23068 | - } else { | |
23069 | + } else if (vdso32_sysenter()){ | |
23070 | vsyscall = &vdso32_sysenter_start; | |
23071 | vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start; | |
23072 | + } else { | |
23073 | + vsyscall = &vdso32_int80_start; | |
23074 | + vsyscall_len = &vdso32_int80_end - &vdso32_int80_start; | |
23075 | } | |
23076 | ||
23077 | memcpy(syscall_page, vsyscall, vsyscall_len); | |
82094b55 AF |
23078 | --- sle11-2009-10-16.orig/arch/x86/xen/Kconfig 2009-02-16 16:17:21.000000000 +0100 |
23079 | +++ sle11-2009-10-16/arch/x86/xen/Kconfig 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
23080 | @@ -17,7 +17,7 @@ config XEN_MAX_DOMAIN_MEMORY |
23081 | int "Maximum allowed size of a domain in gigabytes" | |
23082 | default 8 if X86_32 | |
23083 | default 32 if X86_64 | |
23084 | - depends on XEN | |
23085 | + depends on PARAVIRT_XEN | |
23086 | help | |
23087 | The pseudo-physical to machine address array is sized | |
23088 | according to the maximum possible memory size of a Xen | |
23089 | @@ -26,5 +26,5 @@ config XEN_MAX_DOMAIN_MEMORY | |
23090 | ||
23091 | config XEN_SAVE_RESTORE | |
23092 | bool | |
23093 | - depends on PM | |
23094 | + depends on PARAVIRT_XEN && PM | |
23095 | default y | |
23096 | \ No newline at end of file | |
82094b55 AF |
23097 | --- sle11-2009-10-16.orig/drivers/acpi/processor_core.c 2009-08-26 11:54:44.000000000 +0200 |
23098 | +++ sle11-2009-10-16/drivers/acpi/processor_core.c 2009-08-26 12:04:00.000000000 +0200 | |
23099 | @@ -730,9 +730,11 @@ static int __cpuinit acpi_processor_star | |
2cb7cef9 BS |
23100 | if (result) |
23101 | goto end; | |
23102 | ||
23103 | - sysdev = get_cpu_sysdev(pr->id); | |
23104 | - if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) | |
23105 | - return -EFAULT; | |
23106 | + if (pr->id != -1) { | |
23107 | + sysdev = get_cpu_sysdev(pr->id); | |
23108 | + if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) | |
23109 | + return -EFAULT; | |
23110 | + } | |
23111 | ||
23112 | status = acpi_install_notify_handler(pr->handle, ACPI_DEVICE_NOTIFY, | |
23113 | acpi_processor_notify, pr); | |
82094b55 | 23114 | @@ -904,7 +906,8 @@ static int acpi_processor_remove(struct |
2cb7cef9 BS |
23115 | status = acpi_remove_notify_handler(pr->handle, ACPI_DEVICE_NOTIFY, |
23116 | acpi_processor_notify); | |
23117 | ||
23118 | - sysfs_remove_link(&device->dev.kobj, "sysdev"); | |
23119 | + if (pr->id != -1) | |
23120 | + sysfs_remove_link(&device->dev.kobj, "sysdev"); | |
23121 | ||
23122 | acpi_processor_remove_fs(device); | |
23123 | ||
82094b55 AF |
23124 | --- sle11-2009-10-16.orig/drivers/char/tpm/tpm_vtpm.c 2009-08-26 11:52:33.000000000 +0200 |
23125 | +++ sle11-2009-10-16/drivers/char/tpm/tpm_vtpm.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
23126 | @@ -347,7 +347,7 @@ static int _vtpm_send_queued(struct tpm_ |
23127 | { | |
23128 | int rc; | |
23129 | int error = 0; | |
23130 | - long flags; | |
23131 | + unsigned long flags; | |
23132 | unsigned char buffer[1]; | |
23133 | struct vtpm_state *vtpms; | |
23134 | vtpms = (struct vtpm_state *)chip_get_private(chip); | |
82094b55 AF |
23135 | --- sle11-2009-10-16.orig/drivers/misc/Kconfig 2009-10-28 14:55:02.000000000 +0100 |
23136 | +++ sle11-2009-10-16/drivers/misc/Kconfig 2009-08-26 12:04:11.000000000 +0200 | |
23137 | @@ -440,7 +440,7 @@ config ENCLOSURE_SERVICES | |
2cb7cef9 BS |
23138 | config SGI_XP |
23139 | tristate "Support communication between SGI SSIs" | |
23140 | depends on NET | |
23141 | - depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_64) && SMP | |
23142 | + depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_64) && SMP && !XEN | |
23143 | select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 | |
23144 | select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 | |
23145 | select SGI_GRU if (IA64_GENERIC || IA64_SGI_UV || X86_64) && SMP | |
82094b55 | 23146 | @@ -467,7 +467,7 @@ config HP_ILO |
2cb7cef9 BS |
23147 | |
23148 | config SGI_GRU | |
23149 | tristate "SGI GRU driver" | |
23150 | - depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP | |
23151 | + depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP && !XEN | |
23152 | default n | |
23153 | select MMU_NOTIFIER | |
23154 | ---help--- | |
82094b55 AF |
23155 | --- sle11-2009-10-16.orig/drivers/pci/msi-xen.c 2009-03-16 16:38:05.000000000 +0100 |
23156 | +++ sle11-2009-10-16/drivers/pci/msi-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
23157 | @@ -90,12 +90,10 @@ arch_teardown_msi_irqs(struct pci_dev *d |
23158 | } | |
23159 | #endif | |
23160 | ||
23161 | -static void msi_set_enable(struct pci_dev *dev, int enable) | |
23162 | +static void __msi_set_enable(struct pci_dev *dev, int pos, int enable) | |
23163 | { | |
23164 | - int pos; | |
23165 | u16 control; | |
23166 | ||
23167 | - pos = pci_find_capability(dev, PCI_CAP_ID_MSI); | |
23168 | if (pos) { | |
23169 | pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); | |
23170 | control &= ~PCI_MSI_FLAGS_ENABLE; | |
23171 | @@ -105,6 +103,11 @@ static void msi_set_enable(struct pci_de | |
23172 | } | |
23173 | } | |
23174 | ||
23175 | +static void msi_set_enable(struct pci_dev *dev, int enable) | |
23176 | +{ | |
23177 | + __msi_set_enable(dev, pci_find_capability(dev, PCI_CAP_ID_MSI), enable); | |
23178 | +} | |
23179 | + | |
23180 | static void msix_set_enable(struct pci_dev *dev, int enable) | |
23181 | { | |
23182 | int pos; | |
23183 | @@ -568,9 +571,8 @@ int pci_enable_msi(struct pci_dev* dev) | |
23184 | ||
23185 | /* Check whether driver already requested for MSI-X irqs */ | |
23186 | if (dev->msix_enabled) { | |
23187 | - printk(KERN_INFO "PCI: %s: Can't enable MSI. " | |
23188 | - "Device already has MSI-X enabled\n", | |
23189 | - pci_name(dev)); | |
23190 | + dev_info(&dev->dev, "can't enable MSI " | |
23191 | + "(MSI-X already enabled)\n"); | |
23192 | return -EINVAL; | |
23193 | } | |
23194 | ||
23195 | @@ -702,9 +704,8 @@ int pci_enable_msix(struct pci_dev* dev, | |
23196 | temp = dev->irq; | |
23197 | /* Check whether driver already requested for MSI vector */ | |
23198 | if (dev->msi_enabled) { | |
23199 | - printk(KERN_INFO "PCI: %s: Can't enable MSI-X. " | |
23200 | - "Device already has an MSI irq assigned\n", | |
23201 | - pci_name(dev)); | |
23202 | + dev_info(&dev->dev, "can't enable MSI-X " | |
23203 | + "(MSI IRQ already assigned)\n"); | |
23204 | return -EINVAL; | |
23205 | } | |
23206 | ||
82094b55 AF |
23207 | --- sle11-2009-10-16.orig/drivers/pci/quirks.c 2009-10-28 14:55:02.000000000 +0100 |
23208 | +++ sle11-2009-10-16/drivers/pci/quirks.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
23209 | @@ -44,9 +44,8 @@ static void __devinit quirk_release_reso |
23210 | /* PCI Host Bridge isn't a target device */ | |
23211 | return; | |
23212 | } | |
23213 | - printk(KERN_INFO | |
23214 | - "PCI: Disable memory decoding and release memory resources [%s].\n", | |
23215 | - pci_name(dev)); | |
23216 | + dev_info(&dev->dev, | |
23217 | + "disable memory decoding and release memory resources\n"); | |
23218 | pci_read_config_word(dev, PCI_COMMAND, &command); | |
23219 | command &= ~PCI_COMMAND_MEMORY; | |
23220 | pci_write_config_word(dev, PCI_COMMAND, command); | |
82094b55 AF |
23221 | --- sle11-2009-10-16.orig/drivers/pci/setup-res.c 2009-10-28 14:55:02.000000000 +0100 |
23222 | +++ sle11-2009-10-16/drivers/pci/setup-res.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
23223 | @@ -129,7 +129,7 @@ int pci_claim_resource(struct pci_dev *d |
23224 | #ifdef CONFIG_PCI_REASSIGN | |
23225 | void pci_disable_bridge_window(struct pci_dev *dev) | |
23226 | { | |
23227 | - printk(KERN_DEBUG "PCI: Disable bridge window on %s\n", pci_name(dev)); | |
23228 | + dev_dbg(&dev->dev, "disable bridge window\n"); | |
23229 | ||
23230 | /* MMIO Base/Limit */ | |
23231 | pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0); | |
23232 | @@ -190,8 +190,8 @@ int pci_assign_resource(struct pci_dev * | |
23233 | res->flags &= ~IORESOURCE_STARTALIGN; | |
23234 | if (resno < PCI_BRIDGE_RESOURCES) { | |
23235 | #ifdef CONFIG_PCI_REASSIGN | |
23236 | - printk(KERN_DEBUG "PCI: Assign resource(%d) on %s " | |
23237 | - "%016llx - %016llx\n", resno, pci_name(dev), | |
23238 | + dev_dbg(&dev->dev, "assign resource(%d) " | |
23239 | + "%016llx - %016llx\n", resno, | |
23240 | (unsigned long long)res->start, | |
23241 | (unsigned long long)res->end); | |
23242 | #endif | |
23243 | @@ -235,8 +235,8 @@ int pci_assign_resource_fixed(struct pci | |
23244 | (unsigned long long)res->end); | |
23245 | } else if (resno < PCI_BRIDGE_RESOURCES) { | |
23246 | #ifdef CONFIG_PCI_REASSIGN | |
23247 | - printk(KERN_DEBUG "PCI: Assign resource(%d) on %s " | |
23248 | - "%016llx - %016llx\n", resno, pci_name(dev), | |
23249 | + dev_dbg(&dev->dev, "assign resource(%d) " | |
23250 | + "%016llx - %016llx\n", resno, | |
23251 | (unsigned long long)res->start, | |
23252 | (unsigned long long)res->end); | |
23253 | #endif | |
82094b55 AF |
23254 | --- sle11-2009-10-16.orig/drivers/xen/Makefile 2009-03-16 16:38:05.000000000 +0100 |
23255 | +++ sle11-2009-10-16/drivers/xen/Makefile 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
23256 | @@ -1,4 +1,4 @@ |
23257 | -obj-$(CONFIG_PARAVIRT_XEN) += grant-table.o features.o events.o | |
23258 | +obj-$(CONFIG_PARAVIRT_XEN) += grant-table.o features.o events.o manage.o | |
23259 | xen-xencomm-$(CONFIG_PARAVIRT_XEN) := xencomm.o | |
23260 | xen-balloon-$(CONFIG_PARAVIRT_XEN) := balloon.o | |
23261 | ||
82094b55 AF |
23262 | --- sle11-2009-10-16.orig/drivers/xen/balloon/balloon.c 2009-06-29 15:28:36.000000000 +0200 |
23263 | +++ sle11-2009-10-16/drivers/xen/balloon/balloon.c 2009-06-29 15:30:29.000000000 +0200 | |
2cb7cef9 BS |
23264 | @@ -84,7 +84,7 @@ static unsigned long frame_list[PAGE_SIZ |
23265 | /* VM /proc information for memory */ | |
23266 | extern unsigned long totalram_pages; | |
23267 | ||
23268 | -#if !defined(MODULE) && defined(CONFIG_HIGHMEM) | |
23269 | +#ifdef CONFIG_HIGHMEM | |
23270 | extern unsigned long totalhigh_pages; | |
23271 | #define inc_totalhigh_pages() (totalhigh_pages++) | |
23272 | #define dec_totalhigh_pages() (totalhigh_pages--) | |
82094b55 AF |
23273 | --- sle11-2009-10-16.orig/drivers/xen/balloon/sysfs.c 2009-06-29 15:29:24.000000000 +0200 |
23274 | +++ sle11-2009-10-16/drivers/xen/balloon/sysfs.c 2009-06-29 15:31:06.000000000 +0200 | |
2cb7cef9 BS |
23275 | @@ -45,6 +45,7 @@ |
23276 | ||
23277 | #define BALLOON_SHOW(name, format, args...) \ | |
23278 | static ssize_t show_##name(struct sys_device *dev, \ | |
23279 | + struct sysdev_attribute *attr, \ | |
23280 | char *buf) \ | |
23281 | { \ | |
23282 | return sprintf(buf, format, ##args); \ | |
23283 | @@ -56,14 +57,15 @@ BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(b | |
23284 | BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(bs.balloon_high)); | |
23285 | BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(bs.driver_pages)); | |
23286 | ||
23287 | -static ssize_t show_target_kb(struct sys_device *dev, char *buf) | |
23288 | +static ssize_t show_target_kb(struct sys_device *dev, | |
23289 | + struct sysdev_attribute *attr, char *buf) | |
23290 | { | |
23291 | return sprintf(buf, "%lu\n", PAGES2KB(bs.target_pages)); | |
23292 | } | |
23293 | ||
23294 | static ssize_t store_target_kb(struct sys_device *dev, | |
23295 | - const char *buf, | |
23296 | - size_t count) | |
23297 | + struct sysdev_attribute *attr, | |
23298 | + const char *buf, size_t count) | |
23299 | { | |
23300 | char memstring[64], *endchar; | |
23301 | unsigned long long target_bytes; | |
82094b55 AF |
23302 | --- sle11-2009-10-16.orig/drivers/xen/blktap/blktap.c 2009-04-20 11:40:14.000000000 +0200 |
23303 | +++ sle11-2009-10-16/drivers/xen/blktap/blktap.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
23304 | @@ -54,6 +54,7 @@ |
23305 | #include <linux/gfp.h> | |
23306 | #include <linux/poll.h> | |
23307 | #include <linux/delay.h> | |
23308 | +#include <linux/nsproxy.h> | |
23309 | #include <asm/tlbflush.h> | |
23310 | ||
23311 | #define MAX_TAP_DEV 256 /*the maximum number of tapdisk ring devices */ | |
23312 | @@ -498,7 +499,7 @@ found: | |
23313 | ||
23314 | if ((class = get_xen_class()) != NULL) | |
23315 | device_create(class, NULL, MKDEV(blktap_major, minor), | |
23316 | - "blktap%d", minor); | |
23317 | + NULL, "blktap%d", minor); | |
23318 | } | |
23319 | ||
23320 | out: | |
23321 | @@ -1683,7 +1684,8 @@ static int __init blkif_init(void) | |
23322 | * We only create the device when a request of a new device is | |
23323 | * made. | |
23324 | */ | |
23325 | - device_create(class, NULL, MKDEV(blktap_major, 0), "blktap0"); | |
23326 | + device_create(class, NULL, MKDEV(blktap_major, 0), NULL, | |
23327 | + "blktap0"); | |
23328 | } else { | |
23329 | /* this is bad, but not fatal */ | |
23330 | WPRINTK("blktap: sysfs xen_class not created\n"); | |
82094b55 AF |
23331 | --- sle11-2009-10-16.orig/drivers/xen/char/mem.c 2009-03-16 16:38:05.000000000 +0100 |
23332 | +++ sle11-2009-10-16/drivers/xen/char/mem.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
23333 | @@ -35,7 +35,7 @@ static inline int uncached_access(struct |
23334 | ||
23335 | static inline int range_is_allowed(unsigned long pfn, unsigned long size) | |
23336 | { | |
23337 | -#ifdef CONFIG_NONPROMISC_DEVMEM | |
23338 | +#ifdef CONFIG_STRICT_DEVMEM | |
23339 | u64 from = ((u64)pfn) << PAGE_SHIFT; | |
23340 | u64 to = from + size; | |
23341 | u64 cursor = from; | |
23342 | @@ -172,7 +172,10 @@ static void mmap_mem_close(struct vm_are | |
23343 | ||
23344 | static struct vm_operations_struct mmap_mem_ops = { | |
23345 | .open = mmap_mem_open, | |
23346 | - .close = mmap_mem_close | |
23347 | + .close = mmap_mem_close, | |
23348 | +#ifdef CONFIG_HAVE_IOREMAP_PROT | |
23349 | + .access = generic_access_phys | |
23350 | +#endif | |
23351 | }; | |
23352 | ||
23353 | static int xen_mmap_mem(struct file * file, struct vm_area_struct * vma) | |
82094b55 AF |
23354 | --- sle11-2009-10-16.orig/drivers/xen/console/console.c 2009-03-16 16:38:05.000000000 +0100 |
23355 | +++ sle11-2009-10-16/drivers/xen/console/console.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
23356 | @@ -432,9 +432,7 @@ static void __xencons_tx_flush(void) |
23357 | ||
23358 | if (work_done && (xencons_tty != NULL)) { | |
23359 | wake_up_interruptible(&xencons_tty->write_wait); | |
23360 | - if ((xencons_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) && | |
23361 | - (xencons_tty->ldisc.write_wakeup != NULL)) | |
23362 | - (xencons_tty->ldisc.write_wakeup)(xencons_tty); | |
23363 | + tty_wakeup(xencons_tty); | |
23364 | } | |
23365 | } | |
23366 | ||
23367 | @@ -635,8 +633,8 @@ static void xencons_close(struct tty_str | |
23368 | tty->closing = 1; | |
23369 | tty_wait_until_sent(tty, 0); | |
23370 | tty_driver_flush_buffer(tty); | |
23371 | - if (tty->ldisc.flush_buffer != NULL) | |
23372 | - tty->ldisc.flush_buffer(tty); | |
23373 | + if (tty->ldisc.ops->flush_buffer != NULL) | |
23374 | + tty->ldisc.ops->flush_buffer(tty); | |
23375 | tty->closing = 0; | |
23376 | spin_lock_irqsave(&xencons_lock, flags); | |
23377 | xencons_tty = NULL; | |
82094b55 AF |
23378 | --- sle11-2009-10-16.orig/drivers/xen/core/evtchn.c 2009-03-16 16:33:40.000000000 +0100 |
23379 | +++ sle11-2009-10-16/drivers/xen/core/evtchn.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
23380 | @@ -746,8 +746,9 @@ static struct irq_chip dynirq_chip = { |
23381 | }; | |
23382 | ||
23383 | /* Bitmap indicating which PIRQs require Xen to be notified on unmask. */ | |
23384 | -static int pirq_eoi_does_unmask; | |
23385 | +static bool pirq_eoi_does_unmask; | |
23386 | static unsigned long *pirq_needs_eoi; | |
23387 | +static DECLARE_BITMAP(probing_pirq, NR_PIRQS); | |
23388 | ||
23389 | static void pirq_unmask_and_notify(unsigned int evtchn, unsigned int irq) | |
23390 | { | |
23391 | @@ -794,25 +795,31 @@ static inline void pirq_query_unmask(int | |
23392 | set_bit(irq - PIRQ_BASE, pirq_needs_eoi); | |
23393 | } | |
23394 | ||
23395 | -/* | |
23396 | - * On startup, if there is no action associated with the IRQ then we are | |
23397 | - * probing. In this case we should not share with others as it will confuse us. | |
23398 | - */ | |
23399 | -#define probing_irq(_irq) (irq_desc[(_irq)].action == NULL) | |
23400 | +static int set_type_pirq(unsigned int irq, unsigned int type) | |
23401 | +{ | |
23402 | + if (type != IRQ_TYPE_PROBE) | |
23403 | + return -EINVAL; | |
23404 | + set_bit(irq - PIRQ_BASE, probing_pirq); | |
23405 | + return 0; | |
23406 | +} | |
23407 | ||
23408 | static unsigned int startup_pirq(unsigned int irq) | |
23409 | { | |
23410 | struct evtchn_bind_pirq bind_pirq; | |
23411 | int evtchn = evtchn_from_irq(irq); | |
23412 | ||
23413 | - if (VALID_EVTCHN(evtchn)) | |
23414 | + if (VALID_EVTCHN(evtchn)) { | |
23415 | + clear_bit(irq - PIRQ_BASE, probing_pirq); | |
23416 | goto out; | |
23417 | + } | |
23418 | ||
23419 | bind_pirq.pirq = evtchn_get_xen_pirq(irq); | |
23420 | /* NB. We are happy to share unless we are probing. */ | |
23421 | - bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE; | |
23422 | + bind_pirq.flags = test_and_clear_bit(irq - PIRQ_BASE, probing_pirq) | |
23423 | + || (irq_desc[irq].status & IRQ_AUTODETECT) | |
23424 | + ? 0 : BIND_PIRQ__WILL_SHARE; | |
23425 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) { | |
23426 | - if (!probing_irq(irq)) | |
23427 | + if (bind_pirq.flags) | |
23428 | printk(KERN_INFO "Failed to obtain physical IRQ %d\n", | |
23429 | irq); | |
23430 | return 0; | |
23431 | @@ -891,6 +898,7 @@ static struct irq_chip pirq_chip = { | |
23432 | .mask_ack = ack_pirq, | |
23433 | .ack = ack_pirq, | |
23434 | .end = end_pirq, | |
23435 | + .set_type = set_type_pirq, | |
23436 | #ifdef CONFIG_SMP | |
23437 | .set_affinity = set_affinity_irq, | |
23438 | #endif | |
23439 | @@ -1003,6 +1011,7 @@ void xen_poll_irq(int irq) | |
23440 | BUG(); | |
23441 | } | |
23442 | ||
23443 | +#ifdef CONFIG_PM_SLEEP | |
23444 | static void restore_cpu_virqs(unsigned int cpu) | |
23445 | { | |
23446 | struct evtchn_bind_virq bind_virq; | |
23447 | @@ -1095,6 +1104,7 @@ void irq_resume(void) | |
23448 | } | |
23449 | ||
23450 | } | |
23451 | +#endif | |
23452 | ||
23453 | #if defined(CONFIG_X86_IO_APIC) | |
23454 | #define identity_mapped_irq(irq) (!IO_APIC_IRQ((irq) - PIRQ_BASE)) | |
23455 | @@ -1177,7 +1187,7 @@ void __init xen_init_IRQ(void) | |
23456 | * BITS_TO_LONGS(ALIGN(NR_PIRQS, PAGE_SIZE * 8))); | |
23457 | eoi_gmfn.gmfn = virt_to_machine(pirq_needs_eoi) >> PAGE_SHIFT; | |
23458 | if (HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn, &eoi_gmfn) == 0) | |
23459 | - pirq_eoi_does_unmask = 1; | |
23460 | + pirq_eoi_does_unmask = true; | |
23461 | ||
23462 | /* No event channels are 'live' right now. */ | |
23463 | for (i = 0; i < NR_EVENT_CHANNELS; i++) | |
82094b55 AF |
23464 | --- sle11-2009-10-16.orig/drivers/xen/core/gnttab.c 2008-12-01 11:25:57.000000000 +0100 |
23465 | +++ sle11-2009-10-16/drivers/xen/core/gnttab.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
23466 | @@ -449,6 +449,7 @@ static int map_pte_fn(pte_t *pte, struct |
23467 | return 0; | |
23468 | } | |
23469 | ||
23470 | +#ifdef CONFIG_PM_SLEEP | |
23471 | static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, | |
23472 | unsigned long addr, void *data) | |
23473 | { | |
23474 | @@ -456,6 +457,7 @@ static int unmap_pte_fn(pte_t *pte, stru | |
23475 | set_pte_at(&init_mm, addr, pte, __pte(0)); | |
23476 | return 0; | |
23477 | } | |
23478 | +#endif | |
23479 | ||
23480 | void *arch_gnttab_alloc_shared(unsigned long *frames) | |
23481 | { | |
23482 | @@ -633,6 +635,75 @@ void __gnttab_dma_map_page(struct page * | |
23483 | } while (unlikely(read_seqretry(&gnttab_dma_lock, seq))); | |
23484 | } | |
23485 | ||
23486 | +#ifdef __HAVE_ARCH_PTE_SPECIAL | |
23487 | + | |
23488 | +static unsigned int GNTMAP_pte_special; | |
23489 | + | |
23490 | +bool gnttab_pre_map_adjust(unsigned int cmd, struct gnttab_map_grant_ref *map, | |
23491 | + unsigned int count) | |
23492 | +{ | |
23493 | + unsigned int i; | |
23494 | + | |
23495 | + if (unlikely(cmd != GNTTABOP_map_grant_ref)) | |
23496 | + count = 0; | |
23497 | + | |
23498 | + for (i = 0; i < count; ++i, ++map) { | |
23499 | + if (!(map->flags & GNTMAP_host_map) | |
23500 | + || !(map->flags & GNTMAP_application_map)) | |
23501 | + continue; | |
23502 | + if (GNTMAP_pte_special) | |
23503 | + map->flags |= GNTMAP_pte_special; | |
23504 | + else { | |
23505 | + BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); | |
23506 | + return true; | |
23507 | + } | |
23508 | + } | |
23509 | + | |
23510 | + return false; | |
23511 | +} | |
23512 | +EXPORT_SYMBOL(gnttab_pre_map_adjust); | |
23513 | + | |
23514 | +#if CONFIG_XEN_COMPAT < 0x030400 | |
23515 | +int gnttab_post_map_adjust(const struct gnttab_map_grant_ref *map, unsigned int count) | |
23516 | +{ | |
23517 | + unsigned int i; | |
23518 | + int rc = 0; | |
23519 | + | |
23520 | + for (i = 0; i < count && rc == 0; ++i, ++map) { | |
23521 | + pte_t pte; | |
23522 | + | |
23523 | + if (!(map->flags & GNTMAP_host_map) | |
23524 | + || !(map->flags & GNTMAP_application_map)) | |
23525 | + continue; | |
23526 | + | |
23527 | +#ifdef CONFIG_X86 | |
23528 | + pte = __pte_ma((map->dev_bus_addr | _PAGE_PRESENT | _PAGE_USER | |
23529 | + | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_NX | |
23530 | + | _PAGE_SPECIAL) | |
23531 | + & __supported_pte_mask); | |
23532 | +#else | |
23533 | +#error Architecture not yet supported. | |
23534 | +#endif | |
23535 | + if (!(map->flags & GNTMAP_readonly)) | |
23536 | + pte = pte_mkwrite(pte); | |
23537 | + | |
23538 | + if (map->flags & GNTMAP_contains_pte) { | |
23539 | + mmu_update_t u; | |
23540 | + | |
23541 | + u.ptr = map->host_addr; | |
23542 | + u.val = __pte_val(pte); | |
23543 | + rc = HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF); | |
23544 | + } else | |
23545 | + rc = HYPERVISOR_update_va_mapping(map->host_addr, pte, 0); | |
23546 | + } | |
23547 | + | |
23548 | + return rc; | |
23549 | +} | |
23550 | +EXPORT_SYMBOL(gnttab_post_map_adjust); | |
23551 | +#endif | |
23552 | + | |
23553 | +#endif /* __HAVE_ARCH_PTE_SPECIAL */ | |
23554 | + | |
23555 | int gnttab_resume(void) | |
23556 | { | |
23557 | if (max_nr_grant_frames() < nr_grant_frames) | |
23558 | @@ -640,6 +711,7 @@ int gnttab_resume(void) | |
23559 | return gnttab_map(0, nr_grant_frames - 1); | |
23560 | } | |
23561 | ||
23562 | +#ifdef CONFIG_PM_SLEEP | |
23563 | int gnttab_suspend(void) | |
23564 | { | |
23565 | #ifdef CONFIG_X86 | |
23566 | @@ -649,6 +721,7 @@ int gnttab_suspend(void) | |
23567 | #endif | |
23568 | return 0; | |
23569 | } | |
23570 | +#endif | |
23571 | ||
23572 | #else /* !CONFIG_XEN */ | |
23573 | ||
23574 | @@ -759,6 +832,18 @@ int __devinit gnttab_init(void) | |
23575 | gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; | |
23576 | gnttab_free_head = NR_RESERVED_ENTRIES; | |
23577 | ||
23578 | +#if defined(CONFIG_XEN) && defined(__HAVE_ARCH_PTE_SPECIAL) | |
23579 | + if (!xen_feature(XENFEAT_auto_translated_physmap) | |
23580 | + && xen_feature(XENFEAT_gnttab_map_avail_bits)) { | |
23581 | +#ifdef CONFIG_X86 | |
23582 | + GNTMAP_pte_special = (__pte_val(pte_mkspecial(__pte_ma(0))) | |
23583 | + >> _PAGE_BIT_UNUSED1) << _GNTMAP_guest_avail0; | |
23584 | +#else | |
23585 | +#error Architecture not yet supported. | |
23586 | +#endif | |
23587 | + } | |
23588 | +#endif | |
23589 | + | |
23590 | return 0; | |
23591 | ||
23592 | ini_nomem: | |
82094b55 AF |
23593 | --- sle11-2009-10-16.orig/drivers/xen/core/machine_kexec.c 2009-03-16 16:38:05.000000000 +0100 |
23594 | +++ sle11-2009-10-16/drivers/xen/core/machine_kexec.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
23595 | @@ -91,7 +91,7 @@ void __init xen_machine_kexec_setup_reso |
23596 | xen_hypervisor_res.start = range.start; | |
23597 | xen_hypervisor_res.end = range.start + range.size - 1; | |
23598 | xen_hypervisor_res.flags = IORESOURCE_BUSY | IORESOURCE_MEM; | |
23599 | -#ifdef CONFIG_X86_64 | |
23600 | +#ifdef CONFIG_X86 | |
23601 | insert_resource(&iomem_resource, &xen_hypervisor_res); | |
23602 | #endif | |
23603 | ||
23604 | @@ -106,7 +106,7 @@ void __init xen_machine_kexec_setup_reso | |
23605 | if (range.size) { | |
23606 | crashk_res.start = range.start; | |
23607 | crashk_res.end = range.start + range.size - 1; | |
23608 | -#ifdef CONFIG_X86_64 | |
23609 | +#ifdef CONFIG_X86 | |
23610 | insert_resource(&iomem_resource, &crashk_res); | |
23611 | #endif | |
23612 | } | |
23613 | @@ -160,7 +160,7 @@ void __init xen_machine_kexec_setup_reso | |
23614 | return; | |
23615 | } | |
23616 | ||
23617 | -#ifndef CONFIG_X86_64 | |
23618 | +#ifndef CONFIG_X86 | |
23619 | void __init xen_machine_kexec_register_resources(struct resource *res) | |
23620 | { | |
23621 | request_resource(res, &xen_hypervisor_res); | |
82094b55 AF |
23622 | --- sle11-2009-10-16.orig/drivers/xen/core/machine_reboot.c 2009-10-28 14:55:02.000000000 +0100 |
23623 | +++ sle11-2009-10-16/drivers/xen/core/machine_reboot.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
23624 | @@ -57,6 +57,7 @@ EXPORT_SYMBOL(machine_restart); |
23625 | EXPORT_SYMBOL(machine_halt); | |
23626 | EXPORT_SYMBOL(machine_power_off); | |
23627 | ||
23628 | +#ifdef CONFIG_PM_SLEEP | |
23629 | static void pre_suspend(void) | |
23630 | { | |
23631 | HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; | |
23632 | @@ -111,6 +112,7 @@ static void post_suspend(int suspend_can | |
23633 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | |
23634 | virt_to_mfn(pfn_to_mfn_frame_list_list); | |
23635 | } | |
23636 | +#endif | |
23637 | ||
23638 | #else /* !(defined(__i386__) || defined(__x86_64__)) */ | |
23639 | ||
23640 | @@ -129,6 +131,7 @@ static void post_suspend(int suspend_can | |
23641 | ||
23642 | #endif | |
23643 | ||
23644 | +#ifdef CONFIG_PM_SLEEP | |
23645 | struct suspend { | |
23646 | int fast_suspend; | |
23647 | void (*resume_notifier)(int); | |
23648 | @@ -222,7 +225,8 @@ int __xen_suspend(int fast_suspend, void | |
23649 | ||
23650 | if (fast_suspend) { | |
23651 | xenbus_suspend(); | |
23652 | - err = stop_machine_run(take_machine_down, &suspend, 0); | |
23653 | + err = stop_machine(take_machine_down, &suspend, | |
23654 | + &cpumask_of_cpu(0)); | |
23655 | if (err < 0) | |
23656 | xenbus_suspend_cancel(); | |
23657 | } else { | |
23658 | @@ -245,3 +249,4 @@ int __xen_suspend(int fast_suspend, void | |
23659 | ||
23660 | return 0; | |
23661 | } | |
23662 | +#endif | |
82094b55 AF |
23663 | --- sle11-2009-10-16.orig/drivers/xen/core/reboot.c 2009-02-16 16:17:21.000000000 +0100 |
23664 | +++ sle11-2009-10-16/drivers/xen/core/reboot.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
23665 | @@ -29,17 +29,12 @@ MODULE_LICENSE("Dual BSD/GPL"); |
23666 | /* Ignore multiple shutdown requests. */ | |
23667 | static int shutting_down = SHUTDOWN_INVALID; | |
23668 | ||
23669 | -/* Was last suspend request cancelled? */ | |
23670 | -static int suspend_cancelled; | |
23671 | - | |
23672 | /* Can we leave APs online when we suspend? */ | |
23673 | static int fast_suspend; | |
23674 | ||
23675 | static void __shutdown_handler(struct work_struct *unused); | |
23676 | static DECLARE_DELAYED_WORK(shutdown_work, __shutdown_handler); | |
23677 | ||
23678 | -static int setup_suspend_evtchn(void); | |
23679 | - | |
23680 | int __xen_suspend(int fast_suspend, void (*resume_notifier)(int)); | |
23681 | ||
23682 | static int shutdown_process(void *__unused) | |
23683 | @@ -69,6 +64,13 @@ static int shutdown_process(void *__unus | |
23684 | return 0; | |
23685 | } | |
23686 | ||
23687 | +#ifdef CONFIG_PM_SLEEP | |
23688 | + | |
23689 | +static int setup_suspend_evtchn(void); | |
23690 | + | |
23691 | +/* Was last suspend request cancelled? */ | |
23692 | +static int suspend_cancelled; | |
23693 | + | |
23694 | static void xen_resume_notifier(int _suspend_cancelled) | |
23695 | { | |
23696 | int old_state = xchg(&shutting_down, SHUTDOWN_RESUMING); | |
23697 | @@ -118,6 +120,10 @@ static int xen_suspend(void *__unused) | |
23698 | return 0; | |
23699 | } | |
23700 | ||
23701 | +#else | |
23702 | +# define xen_suspend NULL | |
23703 | +#endif | |
23704 | + | |
23705 | static void switch_shutdown_state(int new_state) | |
23706 | { | |
23707 | int prev_state, old_state = SHUTDOWN_INVALID; | |
23708 | @@ -194,8 +200,10 @@ static void shutdown_handler(struct xenb | |
23709 | new_state = SHUTDOWN_POWEROFF; | |
23710 | else if (strcmp(str, "reboot") == 0) | |
23711 | ctrl_alt_del(); | |
23712 | +#ifdef CONFIG_PM_SLEEP | |
23713 | else if (strcmp(str, "suspend") == 0) | |
23714 | new_state = SHUTDOWN_SUSPEND; | |
23715 | +#endif | |
23716 | else if (strcmp(str, "halt") == 0) | |
23717 | new_state = SHUTDOWN_HALT; | |
23718 | else | |
23719 | @@ -247,6 +255,7 @@ static struct xenbus_watch sysrq_watch = | |
23720 | .callback = sysrq_handler | |
23721 | }; | |
23722 | ||
23723 | +#ifdef CONFIG_PM_SLEEP | |
23724 | static irqreturn_t suspend_int(int irq, void* dev_id) | |
23725 | { | |
23726 | switch_shutdown_state(SHUTDOWN_SUSPEND); | |
23727 | @@ -274,6 +283,9 @@ static int setup_suspend_evtchn(void) | |
23728 | ||
23729 | return 0; | |
23730 | } | |
23731 | +#else | |
23732 | +#define setup_suspend_evtchn() 0 | |
23733 | +#endif | |
23734 | ||
23735 | static int setup_shutdown_watcher(void) | |
23736 | { | |
82094b55 AF |
23737 | --- sle11-2009-10-16.orig/drivers/xen/core/smpboot.c 2009-03-16 16:38:05.000000000 +0100 |
23738 | +++ sle11-2009-10-16/drivers/xen/core/smpboot.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
23739 | @@ -27,6 +27,7 @@ |
23740 | ||
23741 | extern irqreturn_t smp_reschedule_interrupt(int, void *); | |
23742 | extern irqreturn_t smp_call_function_interrupt(int, void *); | |
23743 | +extern irqreturn_t smp_call_function_single_interrupt(int, void *); | |
23744 | ||
23745 | extern int local_setup_timer(unsigned int cpu); | |
23746 | extern void local_teardown_timer(unsigned int cpu); | |
23747 | @@ -50,8 +51,10 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); | |
23748 | ||
23749 | static DEFINE_PER_CPU(int, resched_irq); | |
23750 | static DEFINE_PER_CPU(int, callfunc_irq); | |
23751 | +static DEFINE_PER_CPU(int, call1func_irq); | |
23752 | static char resched_name[NR_CPUS][15]; | |
23753 | static char callfunc_name[NR_CPUS][15]; | |
23754 | +static char call1func_name[NR_CPUS][15]; | |
23755 | ||
23756 | #ifdef CONFIG_X86_LOCAL_APIC | |
23757 | #define set_cpu_to_apicid(cpu, apicid) (per_cpu(x86_cpu_to_apicid, cpu) = (apicid)) | |
23758 | @@ -73,15 +76,13 @@ void __init prefill_possible_map(void) | |
23759 | ||
23760 | for (i = 0; i < NR_CPUS; i++) { | |
23761 | rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); | |
23762 | - if (rc >= 0) | |
23763 | + if (rc >= 0) { | |
23764 | cpu_set(i, cpu_possible_map); | |
23765 | + nr_cpu_ids = i + 1; | |
23766 | + } | |
23767 | } | |
23768 | } | |
23769 | ||
23770 | -void __init smp_alloc_memory(void) | |
23771 | -{ | |
23772 | -} | |
23773 | - | |
23774 | static inline void | |
23775 | set_cpu_sibling_map(unsigned int cpu) | |
23776 | { | |
23777 | @@ -110,7 +111,8 @@ static int __cpuinit xen_smp_intr_init(u | |
23778 | { | |
23779 | int rc; | |
23780 | ||
23781 | - per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1; | |
23782 | + per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = | |
23783 | + per_cpu(call1func_irq, cpu) = -1; | |
23784 | ||
23785 | sprintf(resched_name[cpu], "resched%u", cpu); | |
23786 | rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR, | |
23787 | @@ -134,6 +136,17 @@ static int __cpuinit xen_smp_intr_init(u | |
23788 | goto fail; | |
23789 | per_cpu(callfunc_irq, cpu) = rc; | |
23790 | ||
23791 | + sprintf(call1func_name[cpu], "call1func%u", cpu); | |
23792 | + rc = bind_ipi_to_irqhandler(CALL_FUNC_SINGLE_VECTOR, | |
23793 | + cpu, | |
23794 | + smp_call_function_single_interrupt, | |
23795 | + IRQF_DISABLED|IRQF_NOBALANCING, | |
23796 | + call1func_name[cpu], | |
23797 | + NULL); | |
23798 | + if (rc < 0) | |
23799 | + goto fail; | |
23800 | + per_cpu(call1func_irq, cpu) = rc; | |
23801 | + | |
23802 | rc = xen_spinlock_init(cpu); | |
23803 | if (rc < 0) | |
23804 | goto fail; | |
23805 | @@ -148,6 +161,8 @@ static int __cpuinit xen_smp_intr_init(u | |
23806 | unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); | |
23807 | if (per_cpu(callfunc_irq, cpu) >= 0) | |
23808 | unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); | |
23809 | + if (per_cpu(call1func_irq, cpu) >= 0) | |
23810 | + unbind_from_irqhandler(per_cpu(call1func_irq, cpu), NULL); | |
23811 | xen_spinlock_cleanup(cpu); | |
23812 | return rc; | |
23813 | } | |
23814 | @@ -160,6 +175,7 @@ static void __cpuexit xen_smp_intr_exit( | |
23815 | ||
23816 | unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); | |
23817 | unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); | |
23818 | + unbind_from_irqhandler(per_cpu(call1func_irq, cpu), NULL); | |
23819 | xen_spinlock_cleanup(cpu); | |
23820 | } | |
23821 | #endif | |
23822 | @@ -167,11 +183,7 @@ static void __cpuexit xen_smp_intr_exit( | |
23823 | void __cpuinit cpu_bringup(void) | |
23824 | { | |
23825 | cpu_init(); | |
23826 | -#ifdef __i386__ | |
23827 | identify_secondary_cpu(¤t_cpu_data); | |
23828 | -#else | |
23829 | - identify_cpu(¤t_cpu_data); | |
23830 | -#endif | |
23831 | touch_softlockup_watchdog(); | |
23832 | preempt_disable(); | |
23833 | local_irq_enable(); | |
23834 | @@ -251,9 +263,6 @@ void __init smp_prepare_cpus(unsigned in | |
23835 | struct task_struct *idle; | |
23836 | int apicid; | |
23837 | struct vcpu_get_physid cpu_id; | |
23838 | -#ifdef __x86_64__ | |
23839 | - struct desc_ptr *gdt_descr; | |
23840 | -#endif | |
23841 | void *gdt_addr; | |
23842 | ||
23843 | apicid = 0; | |
23844 | @@ -266,7 +275,7 @@ void __init smp_prepare_cpus(unsigned in | |
23845 | ||
23846 | current_thread_info()->cpu = 0; | |
23847 | ||
23848 | - for (cpu = 0; cpu < NR_CPUS; cpu++) { | |
23849 | + for_each_possible_cpu (cpu) { | |
23850 | cpus_clear(per_cpu(cpu_sibling_map, cpu)); | |
23851 | cpus_clear(per_cpu(cpu_core_map, cpu)); | |
23852 | } | |
23853 | @@ -293,21 +302,10 @@ void __init smp_prepare_cpus(unsigned in | |
23854 | if (IS_ERR(idle)) | |
23855 | panic("failed fork for CPU %d", cpu); | |
23856 | ||
23857 | -#ifdef __x86_64__ | |
23858 | - gdt_descr = &cpu_gdt_descr[cpu]; | |
23859 | - gdt_descr->address = get_zeroed_page(GFP_KERNEL); | |
23860 | - if (unlikely(!gdt_descr->address)) { | |
23861 | - printk(KERN_CRIT "CPU%d failed to allocate GDT\n", | |
23862 | - cpu); | |
23863 | - continue; | |
23864 | - } | |
23865 | - gdt_descr->size = GDT_SIZE; | |
23866 | - memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE); | |
23867 | - gdt_addr = (void *)gdt_descr->address; | |
23868 | -#else | |
23869 | +#ifdef __i386__ | |
23870 | init_gdt(cpu); | |
23871 | - gdt_addr = get_cpu_gdt_table(cpu); | |
23872 | #endif | |
23873 | + gdt_addr = get_cpu_gdt_table(cpu); | |
23874 | make_page_readonly(gdt_addr, XENFEAT_writable_descriptor_tables); | |
23875 | ||
23876 | apicid = cpu; | |
23877 | @@ -353,8 +351,8 @@ void __init smp_prepare_boot_cpu(void) | |
23878 | { | |
23879 | #ifdef __i386__ | |
23880 | init_gdt(smp_processor_id()); | |
23881 | - switch_to_new_gdt(); | |
23882 | #endif | |
23883 | + switch_to_new_gdt(); | |
23884 | prefill_possible_map(); | |
23885 | } | |
23886 | ||
82094b55 AF |
23887 | --- sle11-2009-10-16.orig/drivers/xen/core/spinlock.c 2009-03-16 16:33:40.000000000 +0100 |
23888 | +++ sle11-2009-10-16/drivers/xen/core/spinlock.c 2009-06-04 10:36:24.000000000 +0200 | |
2cb7cef9 BS |
23889 | @@ -5,6 +5,8 @@ |
23890 | * portions of this file. | |
23891 | */ | |
23892 | ||
23893 | +#if CONFIG_XEN_COMPAT >= 0x030200 | |
23894 | + | |
23895 | #include <linux/init.h> | |
23896 | #include <linux/irq.h> | |
23897 | #include <linux/kernel.h> | |
23898 | @@ -73,9 +75,9 @@ int xen_spin_wait(raw_spinlock_t *lock, | |
23899 | /* announce we're spinning */ | |
23900 | spinning.ticket = token; | |
23901 | spinning.lock = lock; | |
23902 | - spinning.prev = __get_cpu_var(spinning); | |
23903 | + spinning.prev = x86_read_percpu(spinning); | |
23904 | smp_wmb(); | |
23905 | - __get_cpu_var(spinning) = &spinning; | |
23906 | + x86_write_percpu(spinning, &spinning); | |
23907 | ||
23908 | /* clear pending */ | |
23909 | xen_clear_irq_pending(irq); | |
23910 | @@ -102,7 +104,7 @@ int xen_spin_wait(raw_spinlock_t *lock, | |
23911 | kstat_this_cpu.irqs[irq] += !rc; | |
23912 | ||
23913 | /* announce we're done */ | |
23914 | - __get_cpu_var(spinning) = spinning.prev; | |
23915 | + x86_write_percpu(spinning, spinning.prev); | |
23916 | rm_lock = &__get_cpu_var(spinning_rm_lock); | |
23917 | raw_local_irq_save(flags); | |
23918 | __raw_write_lock(rm_lock); | |
23919 | @@ -159,3 +161,5 @@ void xen_spin_kick(raw_spinlock_t *lock, | |
23920 | } | |
23921 | } | |
23922 | EXPORT_SYMBOL(xen_spin_kick); | |
23923 | + | |
23924 | +#endif /* CONFIG_XEN_COMPAT >= 0x030200 */ | |
82094b55 AF |
23925 | --- sle11-2009-10-16.orig/drivers/xen/fbfront/xenfb.c 2009-03-16 16:38:05.000000000 +0100 |
23926 | +++ sle11-2009-10-16/drivers/xen/fbfront/xenfb.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
23927 | @@ -18,6 +18,7 @@ |
23928 | * frame buffer. | |
23929 | */ | |
23930 | ||
23931 | +#include <linux/console.h> | |
23932 | #include <linux/kernel.h> | |
23933 | #include <linux/errno.h> | |
23934 | #include <linux/fb.h> | |
23935 | @@ -545,6 +546,28 @@ static unsigned long vmalloc_to_mfn(void | |
23936 | return pfn_to_mfn(vmalloc_to_pfn(address)); | |
23937 | } | |
23938 | ||
23939 | +static __devinit void | |
23940 | +xenfb_make_preferred_console(void) | |
23941 | +{ | |
23942 | + struct console *c; | |
23943 | + | |
23944 | + if (console_set_on_cmdline) | |
23945 | + return; | |
23946 | + | |
23947 | + acquire_console_sem(); | |
23948 | + for (c = console_drivers; c; c = c->next) { | |
23949 | + if (!strcmp(c->name, "tty") && c->index == 0) | |
23950 | + break; | |
23951 | + } | |
23952 | + release_console_sem(); | |
23953 | + if (c) { | |
23954 | + unregister_console(c); | |
23955 | + c->flags |= CON_CONSDEV; | |
23956 | + c->flags &= ~CON_PRINTBUFFER; /* don't print again */ | |
23957 | + register_console(c); | |
23958 | + } | |
23959 | +} | |
23960 | + | |
23961 | static int __devinit xenfb_probe(struct xenbus_device *dev, | |
23962 | const struct xenbus_device_id *id) | |
23963 | { | |
23964 | @@ -665,6 +688,7 @@ static int __devinit xenfb_probe(struct | |
23965 | if (ret < 0) | |
23966 | goto error; | |
23967 | ||
23968 | + xenfb_make_preferred_console(); | |
23969 | return 0; | |
23970 | ||
23971 | error_nomem: | |
23972 | @@ -882,4 +906,5 @@ static void __exit xenfb_cleanup(void) | |
23973 | module_init(xenfb_init); | |
23974 | module_exit(xenfb_cleanup); | |
23975 | ||
23976 | +MODULE_DESCRIPTION("Xen virtual framebuffer device frontend"); | |
23977 | MODULE_LICENSE("GPL"); | |
82094b55 AF |
23978 | --- sle11-2009-10-16.orig/drivers/xen/fbfront/xenkbd.c 2009-03-04 11:25:55.000000000 +0100 |
23979 | +++ sle11-2009-10-16/drivers/xen/fbfront/xenkbd.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
23980 | @@ -350,4 +350,5 @@ static void __exit xenkbd_cleanup(void) |
23981 | module_init(xenkbd_init); | |
23982 | module_exit(xenkbd_cleanup); | |
23983 | ||
23984 | +MODULE_DESCRIPTION("Xen virtual keyboard/pointer device frontend"); | |
23985 | MODULE_LICENSE("GPL"); | |
82094b55 AF |
23986 | --- sle11-2009-10-16.orig/drivers/xen/gntdev/gntdev.c 2009-03-16 16:38:05.000000000 +0100 |
23987 | +++ sle11-2009-10-16/drivers/xen/gntdev/gntdev.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
23988 | @@ -418,7 +418,7 @@ static int __init gntdev_init(void) |
23989 | } | |
23990 | ||
23991 | device = device_create(class, NULL, MKDEV(gntdev_major, 0), | |
23992 | - GNTDEV_NAME); | |
23993 | + NULL, GNTDEV_NAME); | |
23994 | if (IS_ERR(device)) { | |
23995 | printk(KERN_ERR "Error creating gntdev device in xen_class\n"); | |
23996 | printk(KERN_ERR "gntdev created with major number = %d\n", | |
82094b55 AF |
23997 | --- sle11-2009-10-16.orig/drivers/xen/netfront/accel.c 2009-03-30 16:39:19.000000000 +0200 |
23998 | +++ sle11-2009-10-16/drivers/xen/netfront/accel.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
23999 | @@ -28,6 +28,7 @@ |
24000 | * IN THE SOFTWARE. | |
24001 | */ | |
24002 | ||
24003 | +#include <linux/version.h> | |
24004 | #include <linux/netdevice.h> | |
24005 | #include <linux/skbuff.h> | |
24006 | #include <linux/list.h> | |
82094b55 AF |
24007 | --- sle11-2009-10-16.orig/drivers/xen/netfront/netfront.c 2009-03-30 16:40:17.000000000 +0200 |
24008 | +++ sle11-2009-10-16/drivers/xen/netfront/netfront.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24009 | @@ -640,7 +640,7 @@ static int network_open(struct net_devic |
24010 | } | |
24011 | spin_unlock_bh(&np->rx_lock); | |
24012 | ||
24013 | - network_maybe_wake_tx(dev); | |
24014 | + netif_start_queue(dev); | |
24015 | ||
24016 | return 0; | |
24017 | } | |
82094b55 AF |
24018 | --- sle11-2009-10-16.orig/drivers/xen/sfc_netback/accel.h 2009-03-30 16:00:09.000000000 +0200 |
24019 | +++ sle11-2009-10-16/drivers/xen/sfc_netback/accel.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24020 | @@ -25,6 +25,7 @@ |
24021 | #ifndef NETBACK_ACCEL_H | |
24022 | #define NETBACK_ACCEL_H | |
24023 | ||
24024 | +#include <linux/version.h> | |
24025 | #include <linux/slab.h> | |
24026 | #include <linux/ip.h> | |
24027 | #include <linux/tcp.h> | |
82094b55 AF |
24028 | --- sle11-2009-10-16.orig/drivers/xen/sfc_netfront/accel.h 2009-03-30 16:34:56.000000000 +0200 |
24029 | +++ sle11-2009-10-16/drivers/xen/sfc_netfront/accel.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24030 | @@ -35,6 +35,7 @@ |
24031 | #include <xen/evtchn.h> | |
24032 | ||
24033 | #include <linux/kernel.h> | |
24034 | +#include <linux/version.h> | |
24035 | #include <linux/list.h> | |
24036 | ||
24037 | enum netfront_accel_post_status { | |
82094b55 AF |
24038 | --- sle11-2009-10-16.orig/drivers/xen/xenbus/xenbus_client.c 2009-03-24 10:13:17.000000000 +0100 |
24039 | +++ sle11-2009-10-16/drivers/xen/xenbus/xenbus_client.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24040 | @@ -150,7 +150,7 @@ int xenbus_watch_pathfmt(struct xenbus_d |
24041 | char *path; | |
24042 | ||
24043 | va_start(ap, pathfmt); | |
24044 | - path = kvasprintf(GFP_KERNEL, pathfmt, ap); | |
24045 | + path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap); | |
24046 | va_end(ap); | |
24047 | ||
24048 | if (!path) { | |
82094b55 AF |
24049 | --- sle11-2009-10-16.orig/drivers/xen/xenbus/xenbus_comms.c 2009-02-16 16:17:21.000000000 +0100 |
24050 | +++ sle11-2009-10-16/drivers/xen/xenbus/xenbus_comms.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24051 | @@ -228,14 +228,11 @@ int xb_init_comms(void) |
24052 | intf->rsp_cons = intf->rsp_prod; | |
24053 | } | |
24054 | ||
24055 | +#if defined(CONFIG_XEN) || defined(MODULE) | |
24056 | if (xenbus_irq) | |
24057 | unbind_from_irqhandler(xenbus_irq, &xb_waitq); | |
24058 | ||
24059 | -#if defined(CONFIG_XEN) || defined(MODULE) | |
24060 | err = bind_caller_port_to_irqhandler( | |
24061 | -#else | |
24062 | - err = bind_evtchn_to_irqhandler( | |
24063 | -#endif | |
24064 | xen_store_evtchn, wake_waiting, | |
24065 | 0, "xenbus", &xb_waitq); | |
24066 | if (err <= 0) { | |
24067 | @@ -244,6 +241,20 @@ int xb_init_comms(void) | |
24068 | } | |
24069 | ||
24070 | xenbus_irq = err; | |
24071 | +#else | |
24072 | + if (xenbus_irq) { | |
24073 | + /* Already have an irq; assume we're resuming */ | |
24074 | + rebind_evtchn_irq(xen_store_evtchn, xenbus_irq); | |
24075 | + } else { | |
24076 | + err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, | |
24077 | + 0, "xenbus", &xb_waitq); | |
24078 | + if (err <= 0) { | |
24079 | + printk(KERN_ERR "XENBUS request irq failed %i\n", err); | |
24080 | + return err; | |
24081 | + } | |
24082 | + xenbus_irq = err; | |
24083 | + } | |
24084 | +#endif | |
24085 | ||
24086 | return 0; | |
24087 | } | |
82094b55 AF |
24088 | --- sle11-2009-10-16.orig/drivers/xen/xenbus/xenbus_probe.c 2009-03-16 16:38:05.000000000 +0100 |
24089 | +++ sle11-2009-10-16/drivers/xen/xenbus/xenbus_probe.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24090 | @@ -36,6 +36,7 @@ |
24091 | __FUNCTION__, __LINE__, ##args) | |
24092 | ||
24093 | #include <linux/kernel.h> | |
24094 | +#include <linux/version.h> | |
24095 | #include <linux/err.h> | |
24096 | #include <linux/string.h> | |
24097 | #include <linux/ctype.h> | |
82094b55 AF |
24098 | --- sle11-2009-10-16.orig/fs/aio.c 2009-03-24 10:13:25.000000000 +0100 |
24099 | +++ sle11-2009-10-16/fs/aio.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24100 | @@ -1335,7 +1335,7 @@ static int make_aio_fd(struct kioctx *io |
24101 | int fd; | |
24102 | struct file *file; | |
24103 | ||
24104 | - fd = anon_inode_getfd("[aioq]", &aioq_fops, ioctx); | |
24105 | + fd = anon_inode_getfd("[aioq]", &aioq_fops, ioctx, 0); | |
24106 | if (fd < 0) | |
24107 | return fd; | |
24108 | ||
82094b55 AF |
24109 | --- sle11-2009-10-16.orig/include/asm-generic/pgtable.h 2009-03-04 11:28:34.000000000 +0100 |
24110 | +++ sle11-2009-10-16/include/asm-generic/pgtable.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24111 | @@ -99,10 +99,6 @@ static inline void ptep_set_wrprotect(st |
24112 | } | |
24113 | #endif | |
24114 | ||
24115 | -#ifndef arch_change_pte_range | |
24116 | -#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) 0 | |
24117 | -#endif | |
24118 | - | |
24119 | #ifndef __HAVE_ARCH_PTE_SAME | |
24120 | #define pte_same(A,B) (pte_val(A) == pte_val(B)) | |
24121 | #endif | |
82094b55 AF |
24122 | --- sle11-2009-10-16.orig/include/asm-x86/dma-mapping.h 2009-03-16 16:38:05.000000000 +0100 |
24123 | +++ sle11-2009-10-16/include/asm-x86/dma-mapping.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24124 | @@ -74,7 +74,7 @@ static inline struct dma_mapping_ops *ge |
24125 | /* Make sure we keep the same behaviour */ | |
24126 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | |
24127 | { | |
24128 | -#ifdef CONFIG_X86_32 | |
24129 | +#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN) | |
24130 | return 0; | |
24131 | #else | |
24132 | struct dma_mapping_ops *ops = get_dma_ops(dev); | |
82094b55 AF |
24133 | --- sle11-2009-10-16.orig/include/asm-x86/kexec.h 2008-12-01 11:11:08.000000000 +0100 |
24134 | +++ sle11-2009-10-16/include/asm-x86/kexec.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24135 | @@ -10,6 +10,7 @@ |
24136 | # define VA_PTE_0 5 | |
24137 | # define PA_PTE_1 6 | |
24138 | # define VA_PTE_1 7 | |
24139 | +# ifndef CONFIG_XEN | |
24140 | # define PA_SWAP_PAGE 8 | |
24141 | # ifdef CONFIG_X86_PAE | |
24142 | # define PA_PMD_0 9 | |
24143 | @@ -20,6 +21,18 @@ | |
24144 | # else | |
24145 | # define PAGES_NR 9 | |
24146 | # endif | |
24147 | +# else /* CONFIG_XEN */ | |
24148 | +/* | |
24149 | + * The hypervisor interface implicitly requires that all entries (except | |
24150 | + * for possibly the final one) are arranged in matching PA_/VA_ pairs. | |
24151 | + */ | |
24152 | +# define PA_PMD_0 8 | |
24153 | +# define VA_PMD_0 9 | |
24154 | +# define PA_PMD_1 10 | |
24155 | +# define VA_PMD_1 11 | |
24156 | +# define PA_SWAP_PAGE 12 | |
24157 | +# define PAGES_NR 13 | |
24158 | +# endif /* CONFIG_XEN */ | |
24159 | #else | |
24160 | # define PA_CONTROL_PAGE 0 | |
24161 | # define VA_CONTROL_PAGE 1 | |
82094b55 AF |
24162 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/desc.h 2009-03-16 16:38:05.000000000 +0100 |
24163 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/desc.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24164 | @@ -31,11 +31,17 @@ extern struct desc_ptr idt_descr; |
24165 | extern gate_desc idt_table[]; | |
24166 | #endif | |
24167 | ||
24168 | +struct gdt_page { | |
24169 | + struct desc_struct gdt[GDT_ENTRIES]; | |
24170 | +} __attribute__((aligned(PAGE_SIZE))); | |
24171 | +DECLARE_PER_CPU(struct gdt_page, gdt_page); | |
24172 | + | |
24173 | +static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) | |
24174 | +{ | |
24175 | + return per_cpu(gdt_page, cpu).gdt; | |
24176 | +} | |
24177 | + | |
24178 | #ifdef CONFIG_X86_64 | |
24179 | -extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; | |
24180 | -extern struct desc_ptr cpu_gdt_descr[]; | |
24181 | -/* the cpu gdt accessor */ | |
24182 | -#define get_cpu_gdt_table(x) ((struct desc_struct *)cpu_gdt_descr[x].address) | |
24183 | ||
24184 | static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func, | |
24185 | unsigned dpl, unsigned ist, unsigned seg) | |
24186 | @@ -53,16 +59,6 @@ static inline void pack_gate(gate_desc * | |
24187 | } | |
24188 | ||
24189 | #else | |
24190 | -struct gdt_page { | |
24191 | - struct desc_struct gdt[GDT_ENTRIES]; | |
24192 | -} __attribute__((aligned(PAGE_SIZE))); | |
24193 | -DECLARE_PER_CPU(struct gdt_page, gdt_page); | |
24194 | - | |
24195 | -static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) | |
24196 | -{ | |
24197 | - return per_cpu(gdt_page, cpu).gdt; | |
24198 | -} | |
24199 | - | |
24200 | static inline void pack_gate(gate_desc *gate, unsigned char type, | |
24201 | unsigned long base, unsigned dpl, unsigned flags, | |
24202 | unsigned short seg) | |
24203 | @@ -333,6 +329,28 @@ static inline void set_intr_gate(unsigne | |
24204 | _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); | |
24205 | } | |
24206 | ||
24207 | +#define SYS_VECTOR_FREE 0 | |
24208 | +#define SYS_VECTOR_ALLOCED 1 | |
24209 | + | |
24210 | +extern int first_system_vector; | |
24211 | +extern char system_vectors[]; | |
24212 | + | |
24213 | +static inline void alloc_system_vector(int vector) | |
24214 | +{ | |
24215 | + if (system_vectors[vector] == SYS_VECTOR_FREE) { | |
24216 | + system_vectors[vector] = SYS_VECTOR_ALLOCED; | |
24217 | + if (first_system_vector > vector) | |
24218 | + first_system_vector = vector; | |
24219 | + } else | |
24220 | + BUG(); | |
24221 | +} | |
24222 | + | |
24223 | +static inline void alloc_intr_gate(unsigned int n, void *addr) | |
24224 | +{ | |
24225 | + alloc_system_vector(n); | |
24226 | + set_intr_gate(n, addr); | |
24227 | +} | |
24228 | + | |
24229 | /* | |
24230 | * This routine sets up an interrupt gate at directory privilege level 3. | |
24231 | */ | |
82094b55 AF |
24232 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/fixmap.h 2009-03-16 16:38:05.000000000 +0100 |
24233 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/fixmap.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24234 | @@ -7,7 +7,58 @@ |
24235 | # include "fixmap_64.h" | |
24236 | #endif | |
24237 | ||
24238 | +extern int fixmaps_set; | |
24239 | + | |
24240 | +void xen_set_fixmap(enum fixed_addresses, maddr_t, pgprot_t); | |
24241 | + | |
24242 | +static inline void __set_fixmap(enum fixed_addresses idx, | |
24243 | + maddr_t phys, pgprot_t flags) | |
24244 | +{ | |
24245 | + xen_set_fixmap(idx, phys, flags); | |
24246 | +} | |
24247 | + | |
24248 | +#define set_fixmap(idx, phys) \ | |
24249 | + __set_fixmap(idx, phys, PAGE_KERNEL) | |
24250 | + | |
24251 | +/* | |
24252 | + * Some hardware wants to get fixmapped without caching. | |
24253 | + */ | |
24254 | +#define set_fixmap_nocache(idx, phys) \ | |
24255 | + __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) | |
24256 | + | |
24257 | #define clear_fixmap(idx) \ | |
24258 | __set_fixmap(idx, 0, __pgprot(0)) | |
24259 | ||
24260 | +#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) | |
24261 | +#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) | |
24262 | + | |
24263 | +extern void __this_fixmap_does_not_exist(void); | |
24264 | + | |
24265 | +/* | |
24266 | + * 'index to address' translation. If anyone tries to use the idx | |
24267 | + * directly without translation, we catch the bug with a NULL-deference | |
24268 | + * kernel oops. Illegal ranges of incoming indices are caught too. | |
24269 | + */ | |
24270 | +static __always_inline unsigned long fix_to_virt(const unsigned int idx) | |
24271 | +{ | |
24272 | + /* | |
24273 | + * this branch gets completely eliminated after inlining, | |
24274 | + * except when someone tries to use fixaddr indices in an | |
24275 | + * illegal way. (such as mixing up address types or using | |
24276 | + * out-of-range indices). | |
24277 | + * | |
24278 | + * If it doesn't get removed, the linker will complain | |
24279 | + * loudly with a reasonably clear error message.. | |
24280 | + */ | |
24281 | + if (idx >= __end_of_fixed_addresses) | |
24282 | + __this_fixmap_does_not_exist(); | |
24283 | + | |
24284 | + return __fix_to_virt(idx); | |
24285 | +} | |
24286 | + | |
24287 | +static inline unsigned long virt_to_fix(const unsigned long vaddr) | |
24288 | +{ | |
24289 | + BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); | |
24290 | + return __virt_to_fix(vaddr); | |
24291 | +} | |
24292 | #endif | |
82094b55 AF |
24293 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/fixmap_32.h 2009-03-16 16:38:05.000000000 +0100 |
24294 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/fixmap_32.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24295 | @@ -58,10 +58,17 @@ enum fixed_addresses { |
24296 | #ifdef CONFIG_X86_LOCAL_APIC | |
24297 | FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ | |
24298 | #endif | |
24299 | -#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_XEN) | |
24300 | +#ifndef CONFIG_XEN | |
24301 | +#ifdef CONFIG_X86_IO_APIC | |
24302 | FIX_IO_APIC_BASE_0, | |
24303 | FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1, | |
24304 | #endif | |
24305 | +#else | |
24306 | + FIX_SHARED_INFO, | |
24307 | +#define NR_FIX_ISAMAPS 256 | |
24308 | + FIX_ISAMAP_END, | |
24309 | + FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1, | |
24310 | +#endif | |
24311 | #ifdef CONFIG_X86_VISWS_APIC | |
24312 | FIX_CO_CPU, /* Cobalt timer */ | |
24313 | FIX_CO_APIC, /* Cobalt APIC Redirection Table */ | |
24314 | @@ -78,51 +85,38 @@ enum fixed_addresses { | |
24315 | FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ | |
24316 | FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, | |
24317 | #endif | |
24318 | -#ifdef CONFIG_ACPI | |
24319 | - FIX_ACPI_BEGIN, | |
24320 | - FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, | |
24321 | -#endif | |
24322 | #ifdef CONFIG_PCI_MMCONFIG | |
24323 | FIX_PCIE_MCFG, | |
24324 | #endif | |
24325 | #ifdef CONFIG_PARAVIRT | |
24326 | FIX_PARAVIRT_BOOTMAP, | |
24327 | #endif | |
24328 | - FIX_SHARED_INFO, | |
24329 | -#define NR_FIX_ISAMAPS 256 | |
24330 | - FIX_ISAMAP_END, | |
24331 | - FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1, | |
24332 | __end_of_permanent_fixed_addresses, | |
24333 | /* | |
24334 | * 256 temporary boot-time mappings, used by early_ioremap(), | |
24335 | * before ioremap() is functional. | |
24336 | * | |
24337 | - * We round it up to the next 512 pages boundary so that we | |
24338 | + * We round it up to the next 256 pages boundary so that we | |
24339 | * can have a single pgd entry and a single pte table: | |
24340 | */ | |
24341 | #define NR_FIX_BTMAPS 64 | |
24342 | #define FIX_BTMAPS_NESTING 4 | |
24343 | - FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 512 - | |
24344 | - (__end_of_permanent_fixed_addresses & 511), | |
24345 | + FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - | |
24346 | + (__end_of_permanent_fixed_addresses & 255), | |
24347 | FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1, | |
24348 | FIX_WP_TEST, | |
24349 | +#ifdef CONFIG_ACPI | |
24350 | + FIX_ACPI_BEGIN, | |
24351 | + FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, | |
24352 | +#endif | |
24353 | #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT | |
24354 | FIX_OHCI1394_BASE, | |
24355 | #endif | |
24356 | __end_of_fixed_addresses | |
24357 | }; | |
24358 | ||
24359 | -extern void __set_fixmap(enum fixed_addresses idx, | |
24360 | - maddr_t phys, pgprot_t flags); | |
24361 | extern void reserve_top_address(unsigned long reserve); | |
24362 | ||
24363 | -#define set_fixmap(idx, phys) \ | |
24364 | - __set_fixmap(idx, phys, PAGE_KERNEL) | |
24365 | -/* | |
24366 | - * Some hardware wants to get fixmapped without caching. | |
24367 | - */ | |
24368 | -#define set_fixmap_nocache(idx, phys) \ | |
24369 | - __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) | |
24370 | ||
24371 | #define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) | |
24372 | ||
24373 | @@ -131,38 +125,5 @@ extern void reserve_top_address(unsigned | |
24374 | #define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) | |
24375 | #define FIXADDR_BOOT_START (FIXADDR_TOP - __FIXADDR_BOOT_SIZE) | |
24376 | ||
24377 | -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) | |
24378 | -#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) | |
24379 | - | |
24380 | -extern void __this_fixmap_does_not_exist(void); | |
24381 | - | |
24382 | -/* | |
24383 | - * 'index to address' translation. If anyone tries to use the idx | |
24384 | - * directly without tranlation, we catch the bug with a NULL-deference | |
24385 | - * kernel oops. Illegal ranges of incoming indices are caught too. | |
24386 | - */ | |
24387 | -static __always_inline unsigned long fix_to_virt(const unsigned int idx) | |
24388 | -{ | |
24389 | - /* | |
24390 | - * this branch gets completely eliminated after inlining, | |
24391 | - * except when someone tries to use fixaddr indices in an | |
24392 | - * illegal way. (such as mixing up address types or using | |
24393 | - * out-of-range indices). | |
24394 | - * | |
24395 | - * If it doesn't get removed, the linker will complain | |
24396 | - * loudly with a reasonably clear error message.. | |
24397 | - */ | |
24398 | - if (idx >= __end_of_fixed_addresses) | |
24399 | - __this_fixmap_does_not_exist(); | |
24400 | - | |
24401 | - return __fix_to_virt(idx); | |
24402 | -} | |
24403 | - | |
24404 | -static inline unsigned long virt_to_fix(const unsigned long vaddr) | |
24405 | -{ | |
24406 | - BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); | |
24407 | - return __virt_to_fix(vaddr); | |
24408 | -} | |
24409 | - | |
24410 | #endif /* !__ASSEMBLY__ */ | |
24411 | #endif | |
82094b55 AF |
24412 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/fixmap_64.h 2009-03-16 16:38:05.000000000 +0100 |
24413 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/fixmap_64.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24414 | @@ -12,6 +12,7 @@ |
24415 | #define _ASM_FIXMAP_64_H | |
24416 | ||
24417 | #include <linux/kernel.h> | |
24418 | +#include <asm/acpi.h> | |
24419 | #include <asm/apicdef.h> | |
24420 | #include <asm/page.h> | |
24421 | #include <asm/vsyscall.h> | |
24422 | @@ -40,7 +41,6 @@ enum fixed_addresses { | |
24423 | VSYSCALL_HPET, | |
24424 | FIX_DBGP_BASE, | |
24425 | FIX_EARLYCON_MEM_BASE, | |
24426 | - FIX_HPET_BASE, | |
24427 | #ifdef CONFIG_X86_LOCAL_APIC | |
24428 | FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ | |
24429 | #endif | |
24430 | @@ -53,14 +53,21 @@ enum fixed_addresses { | |
24431 | FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE | |
24432 | + MAX_EFI_IO_PAGES - 1, | |
24433 | #endif | |
24434 | +#ifdef CONFIG_PARAVIRT | |
24435 | + FIX_PARAVIRT_BOOTMAP, | |
24436 | +#else | |
24437 | + FIX_SHARED_INFO, | |
24438 | +#endif | |
24439 | #ifdef CONFIG_ACPI | |
24440 | FIX_ACPI_BEGIN, | |
24441 | FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, | |
24442 | #endif | |
24443 | - FIX_SHARED_INFO, | |
24444 | #define NR_FIX_ISAMAPS 256 | |
24445 | FIX_ISAMAP_END, | |
24446 | FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1, | |
24447 | +#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT | |
24448 | + FIX_OHCI1394_BASE, | |
24449 | +#endif | |
24450 | __end_of_permanent_fixed_addresses, | |
24451 | /* | |
24452 | * 256 temporary boot-time mappings, used by early_ioremap(), | |
24453 | @@ -71,27 +78,12 @@ enum fixed_addresses { | |
24454 | */ | |
24455 | #define NR_FIX_BTMAPS 64 | |
24456 | #define FIX_BTMAPS_NESTING 4 | |
24457 | - FIX_BTMAP_END = | |
24458 | - __end_of_permanent_fixed_addresses + 512 - | |
24459 | + FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 512 - | |
24460 | (__end_of_permanent_fixed_addresses & 511), | |
24461 | FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1, | |
24462 | -#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT | |
24463 | - FIX_OHCI1394_BASE, | |
24464 | -#endif | |
24465 | __end_of_fixed_addresses | |
24466 | }; | |
24467 | ||
24468 | -extern void __set_fixmap(enum fixed_addresses idx, | |
24469 | - unsigned long phys, pgprot_t flags); | |
24470 | - | |
24471 | -#define set_fixmap(idx, phys) \ | |
24472 | - __set_fixmap(idx, phys, PAGE_KERNEL) | |
24473 | -/* | |
24474 | - * Some hardware wants to get fixmapped without caching. | |
24475 | - */ | |
24476 | -#define set_fixmap_nocache(idx, phys) \ | |
24477 | - __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) | |
24478 | - | |
24479 | #define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) | |
24480 | #define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) | |
24481 | #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) | |
24482 | @@ -100,30 +92,4 @@ extern void __set_fixmap(enum fixed_addr | |
24483 | #define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) | |
24484 | #define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) | |
24485 | ||
24486 | -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) | |
24487 | - | |
24488 | -extern void __this_fixmap_does_not_exist(void); | |
24489 | - | |
24490 | -/* | |
24491 | - * 'index to address' translation. If anyone tries to use the idx | |
24492 | - * directly without translation, we catch the bug with a NULL-deference | |
24493 | - * kernel oops. Illegal ranges of incoming indices are caught too. | |
24494 | - */ | |
24495 | -static __always_inline unsigned long fix_to_virt(const unsigned int idx) | |
24496 | -{ | |
24497 | - /* | |
24498 | - * this branch gets completely eliminated after inlining, | |
24499 | - * except when someone tries to use fixaddr indices in an | |
24500 | - * illegal way. (such as mixing up address types or using | |
24501 | - * out-of-range indices). | |
24502 | - * | |
24503 | - * If it doesn't get removed, the linker will complain | |
24504 | - * loudly with a reasonably clear error message.. | |
24505 | - */ | |
24506 | - if (idx >= __end_of_fixed_addresses) | |
24507 | - __this_fixmap_does_not_exist(); | |
24508 | - | |
24509 | - return __fix_to_virt(idx); | |
24510 | -} | |
24511 | - | |
24512 | #endif | |
82094b55 AF |
24513 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/highmem.h 2009-03-16 16:38:05.000000000 +0100 |
24514 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/highmem.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24515 | @@ -74,6 +74,9 @@ struct page *kmap_atomic_to_page(void *p |
24516 | ||
24517 | #define flush_cache_kmaps() do { } while (0) | |
24518 | ||
24519 | +extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, | |
24520 | + unsigned long end_pfn); | |
24521 | + | |
24522 | void clear_highpage(struct page *); | |
24523 | static inline void clear_user_highpage(struct page *page, unsigned long vaddr) | |
24524 | { | |
82094b55 AF |
24525 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/hypercall.h 2009-02-16 16:18:36.000000000 +0100 |
24526 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/hypercall.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24527 | @@ -323,9 +323,19 @@ static inline int __must_check |
24528 | HYPERVISOR_grant_table_op( | |
24529 | unsigned int cmd, void *uop, unsigned int count) | |
24530 | { | |
24531 | + bool fixup = false; | |
24532 | + int rc; | |
24533 | + | |
24534 | if (arch_use_lazy_mmu_mode()) | |
24535 | xen_multicall_flush(false); | |
24536 | - return _hypercall3(int, grant_table_op, cmd, uop, count); | |
24537 | +#ifdef GNTTABOP_map_grant_ref | |
24538 | + if (cmd == GNTTABOP_map_grant_ref) | |
24539 | +#endif | |
24540 | + fixup = gnttab_pre_map_adjust(cmd, uop, count); | |
24541 | + rc = _hypercall3(int, grant_table_op, cmd, uop, count); | |
24542 | + if (rc == 0 && fixup) | |
24543 | + rc = gnttab_post_map_adjust(uop, count); | |
24544 | + return rc; | |
24545 | } | |
24546 | ||
24547 | static inline int __must_check | |
82094b55 AF |
24548 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/hypervisor.h 2009-03-16 16:33:40.000000000 +0100 |
24549 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/hypervisor.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24550 | @@ -35,7 +35,6 @@ |
24551 | ||
24552 | #include <linux/types.h> | |
24553 | #include <linux/kernel.h> | |
24554 | -#include <linux/version.h> | |
24555 | #include <linux/errno.h> | |
24556 | #include <xen/interface/xen.h> | |
24557 | #include <xen/interface/platform.h> | |
24558 | @@ -112,6 +111,8 @@ int xen_create_contiguous_region( | |
24559 | unsigned long vstart, unsigned int order, unsigned int address_bits); | |
24560 | void xen_destroy_contiguous_region( | |
24561 | unsigned long vstart, unsigned int order); | |
24562 | +int early_create_contiguous_region(unsigned long pfn, unsigned int order, | |
24563 | + unsigned int address_bits); | |
24564 | ||
24565 | struct page; | |
24566 | ||
24567 | @@ -181,6 +182,29 @@ static inline void xen_multicall_flush(b | |
24568 | ||
24569 | #endif /* CONFIG_XEN && !MODULE */ | |
24570 | ||
24571 | +#ifdef CONFIG_XEN | |
24572 | + | |
24573 | +struct gnttab_map_grant_ref; | |
24574 | +bool gnttab_pre_map_adjust(unsigned int cmd, struct gnttab_map_grant_ref *, | |
24575 | + unsigned int count); | |
24576 | +#if CONFIG_XEN_COMPAT < 0x030400 | |
24577 | +int gnttab_post_map_adjust(const struct gnttab_map_grant_ref *, unsigned int); | |
24578 | +#else | |
24579 | +static inline int gnttab_post_map_adjust(const struct gnttab_map_grant_ref *m, | |
24580 | + unsigned int count) | |
24581 | +{ | |
24582 | + BUG(); | |
24583 | + return -ENOSYS; | |
24584 | +} | |
24585 | +#endif | |
24586 | + | |
24587 | +#else /* !CONFIG_XEN */ | |
24588 | + | |
24589 | +#define gnttab_pre_map_adjust(...) false | |
24590 | +#define gnttab_post_map_adjust(...) ({ BUG(); -ENOSYS; }) | |
24591 | + | |
24592 | +#endif /* CONFIG_XEN */ | |
24593 | + | |
24594 | #if defined(CONFIG_X86_64) | |
24595 | #define MULTI_UVMFLAGS_INDEX 2 | |
24596 | #define MULTI_UVMDOMID_INDEX 3 | |
82094b55 AF |
24597 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/io.h 2009-03-16 16:38:05.000000000 +0100 |
24598 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/io.h 2009-09-24 11:02:00.000000000 +0200 | |
24599 | @@ -3,20 +3,140 @@ | |
2cb7cef9 BS |
24600 | |
24601 | #define ARCH_HAS_IOREMAP_WC | |
24602 | ||
24603 | +#include <linux/compiler.h> | |
24604 | + | |
24605 | +/* | |
24606 | + * early_ioremap() and early_iounmap() are for temporary early boot-time | |
24607 | + * mappings, before the real ioremap() is functional. | |
24608 | + * A boot-time mapping is currently limited to at most 16 pages. | |
24609 | + */ | |
24610 | +#ifndef __ASSEMBLY__ | |
24611 | +extern void early_ioremap_init(void); | |
24612 | +extern void early_ioremap_clear(void); | |
24613 | +extern void early_ioremap_reset(void); | |
24614 | +extern void *early_ioremap(unsigned long offset, unsigned long size); | |
24615 | +extern void early_iounmap(void *addr, unsigned long size); | |
24616 | +extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); | |
24617 | +#endif | |
24618 | + | |
24619 | +#define build_mmio_read(name, size, type, reg, barrier) \ | |
24620 | +static inline type name(const volatile void __iomem *addr) \ | |
24621 | +{ type ret; asm volatile("mov" size " %1,%0":reg (ret) \ | |
24622 | +:"m" (*(volatile type __force *)addr) barrier); return ret; } | |
24623 | + | |
24624 | +#define build_mmio_write(name, size, type, reg, barrier) \ | |
24625 | +static inline void name(type val, volatile void __iomem *addr) \ | |
24626 | +{ asm volatile("mov" size " %0,%1": :reg (val), \ | |
24627 | +"m" (*(volatile type __force *)addr) barrier); } | |
24628 | + | |
24629 | +build_mmio_read(readb, "b", unsigned char, "=q", :"memory") | |
24630 | +build_mmio_read(readw, "w", unsigned short, "=r", :"memory") | |
24631 | +build_mmio_read(readl, "l", unsigned int, "=r", :"memory") | |
24632 | + | |
24633 | +build_mmio_read(__readb, "b", unsigned char, "=q", ) | |
24634 | +build_mmio_read(__readw, "w", unsigned short, "=r", ) | |
24635 | +build_mmio_read(__readl, "l", unsigned int, "=r", ) | |
24636 | + | |
24637 | +build_mmio_write(writeb, "b", unsigned char, "q", :"memory") | |
24638 | +build_mmio_write(writew, "w", unsigned short, "r", :"memory") | |
24639 | +build_mmio_write(writel, "l", unsigned int, "r", :"memory") | |
24640 | + | |
24641 | +build_mmio_write(__writeb, "b", unsigned char, "q", ) | |
24642 | +build_mmio_write(__writew, "w", unsigned short, "r", ) | |
24643 | +build_mmio_write(__writel, "l", unsigned int, "r", ) | |
24644 | + | |
24645 | +#define readb_relaxed(a) __readb(a) | |
24646 | +#define readw_relaxed(a) __readw(a) | |
24647 | +#define readl_relaxed(a) __readl(a) | |
24648 | +#define __raw_readb __readb | |
24649 | +#define __raw_readw __readw | |
24650 | +#define __raw_readl __readl | |
24651 | + | |
24652 | +#define __raw_writeb __writeb | |
24653 | +#define __raw_writew __writew | |
24654 | +#define __raw_writel __writel | |
24655 | + | |
24656 | +#define mmiowb() barrier() | |
24657 | + | |
24658 | +#ifdef CONFIG_X86_64 | |
24659 | +build_mmio_read(readq, "q", unsigned long, "=r", :"memory") | |
24660 | +build_mmio_read(__readq, "q", unsigned long, "=r", ) | |
24661 | +build_mmio_write(writeq, "q", unsigned long, "r", :"memory") | |
24662 | +build_mmio_write(__writeq, "q", unsigned long, "r", ) | |
24663 | + | |
24664 | +#define readq_relaxed(a) __readq(a) | |
24665 | +#define __raw_readq __readq | |
24666 | +#define __raw_writeq writeq | |
24667 | + | |
24668 | +/* Let people know we have them */ | |
24669 | +#define readq readq | |
24670 | +#define writeq writeq | |
24671 | +#endif | |
24672 | + | |
24673 | +#define native_io_delay xen_io_delay | |
24674 | + | |
24675 | #ifdef CONFIG_X86_32 | |
24676 | -# include "io_32.h" | |
24677 | +# include "../../io_32.h" | |
24678 | #else | |
24679 | -# include "io_64.h" | |
24680 | +# include "../../io_64.h" | |
24681 | +#endif | |
24682 | + | |
24683 | +#if defined(__KERNEL__) && !defined(__ASSEMBLY__) | |
24684 | + | |
24685 | +/* We will be supplying our own /dev/mem implementation */ | |
24686 | +#define ARCH_HAS_DEV_MEM | |
24687 | + | |
24688 | +#define page_to_pseudophys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) | |
24689 | +#undef page_to_phys | |
24690 | +#define page_to_phys(page) (phys_to_machine(page_to_pseudophys(page))) | |
24691 | +#define page_to_bus(page) (phys_to_machine(page_to_pseudophys(page))) | |
24692 | + | |
24693 | +#define bvec_to_pseudophys(bv) (page_to_pseudophys((bv)->bv_page) + \ | |
24694 | + (unsigned long) (bv)->bv_offset) | |
24695 | + | |
24696 | +#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ | |
24697 | + (bvec_to_phys(vec1) + (vec1)->bv_len == bvec_to_phys(vec2) \ | |
24698 | + && bvec_to_pseudophys(vec1) + (vec1)->bv_len \ | |
24699 | + == bvec_to_pseudophys(vec2)) | |
24700 | + | |
24701 | +#undef virt_to_bus | |
24702 | +#undef bus_to_virt | |
24703 | +#define virt_to_bus(_x) phys_to_machine(__pa(_x)) | |
24704 | +#define bus_to_virt(_x) __va(machine_to_phys(_x)) | |
24705 | + | |
24706 | +#include <asm/fixmap.h> | |
24707 | + | |
82094b55 | 24708 | +#undef __ISA_IO_base |
2cb7cef9 BS |
24709 | +#undef isa_virt_to_bus |
24710 | +#undef isa_page_to_bus | |
24711 | +#undef isa_bus_to_virt | |
82094b55 AF |
24712 | +#define isa_virt_to_bus(_x) ({ \ |
24713 | + unsigned long _va_ = (unsigned long)(_x); \ | |
24714 | + _va_ - fix_to_virt(FIX_ISAMAP_BEGIN) < (NR_FIX_ISAMAPS << PAGE_SHIFT) \ | |
24715 | + ? _va_ - fix_to_virt(FIX_ISAMAP_BEGIN) \ | |
24716 | + : ({ BUG(); (unsigned long)virt_to_bus(_va_); }); }) | |
24717 | +#define isa_bus_to_virt(_x) ((void *)fix_to_virt(FIX_ISAMAP_BEGIN) + (_x)) | |
2cb7cef9 BS |
24718 | + |
24719 | #endif | |
24720 | ||
24721 | extern void *xlate_dev_mem_ptr(unsigned long phys); | |
24722 | extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr); | |
24723 | ||
24724 | -extern void map_devmem(unsigned long pfn, unsigned long len, pgprot_t); | |
24725 | -extern void unmap_devmem(unsigned long pfn, unsigned long len, pgprot_t); | |
24726 | - | |
24727 | extern int ioremap_check_change_attr(unsigned long mfn, unsigned long size, | |
24728 | unsigned long prot_val); | |
24729 | extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size); | |
24730 | ||
24731 | +/* | |
24732 | + * early_ioremap() and early_iounmap() are for temporary early boot-time | |
24733 | + * mappings, before the real ioremap() is functional. | |
24734 | + * A boot-time mapping is currently limited to at most 16 pages. | |
24735 | + */ | |
24736 | +extern void early_ioremap_init(void); | |
24737 | +extern void early_ioremap_clear(void); | |
24738 | +extern void early_ioremap_reset(void); | |
24739 | +extern void *early_ioremap(unsigned long offset, unsigned long size); | |
24740 | +extern void early_iounmap(void *addr, unsigned long size); | |
24741 | +extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); | |
24742 | + | |
24743 | + | |
24744 | #endif /* _ASM_X86_IO_H */ | |
24745 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
82094b55 | 24746 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/irq_vectors.h 2009-06-04 10:21:39.000000000 +0200 |
2cb7cef9 BS |
24747 | @@ -0,0 +1,52 @@ |
24748 | +#ifndef _ASM_IRQ_VECTORS_H | |
24749 | +#define _ASM_IRQ_VECTORS_H | |
24750 | + | |
24751 | +#ifdef CONFIG_X86_32 | |
24752 | +# define SYSCALL_VECTOR 0x80 | |
24753 | +#else | |
24754 | +# define IA32_SYSCALL_VECTOR 0x80 | |
24755 | +#endif | |
24756 | + | |
24757 | +#define RESCHEDULE_VECTOR 0 | |
24758 | +#define CALL_FUNCTION_VECTOR 1 | |
24759 | +#define CALL_FUNC_SINGLE_VECTOR 2 | |
24760 | +#define SPIN_UNLOCK_VECTOR 3 | |
24761 | +#define NR_IPIS 4 | |
24762 | + | |
24763 | +/* | |
24764 | + * The maximum number of vectors supported by i386 processors | |
24765 | + * is limited to 256. For processors other than i386, NR_VECTORS | |
24766 | + * should be changed accordingly. | |
24767 | + */ | |
24768 | +#define NR_VECTORS 256 | |
24769 | + | |
24770 | +#define FIRST_VM86_IRQ 3 | |
24771 | +#define LAST_VM86_IRQ 15 | |
24772 | +#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) | |
24773 | + | |
24774 | +/* | |
24775 | + * The flat IRQ space is divided into two regions: | |
24776 | + * 1. A one-to-one mapping of real physical IRQs. This space is only used | |
24777 | + * if we have physical device-access privilege. This region is at the | |
24778 | + * start of the IRQ space so that existing device drivers do not need | |
24779 | + * to be modified to translate physical IRQ numbers into our IRQ space. | |
24780 | + * 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These | |
24781 | + * are bound using the provided bind/unbind functions. | |
24782 | + */ | |
24783 | + | |
24784 | +#define PIRQ_BASE 0 | |
24785 | +#if defined(NR_CPUS) && defined(MAX_IO_APICS) | |
24786 | +# if NR_CPUS < MAX_IO_APICS | |
24787 | +# define NR_PIRQS (NR_VECTORS + 32 * NR_CPUS) | |
24788 | +# else | |
24789 | +# define NR_PIRQS (NR_VECTORS + 32 * MAX_IO_APICS) | |
24790 | +# endif | |
24791 | +#endif | |
24792 | + | |
24793 | +#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS) | |
24794 | +#define NR_DYNIRQS 256 | |
24795 | + | |
24796 | +#define NR_IRQS (NR_PIRQS + NR_DYNIRQS) | |
24797 | +#define NR_IRQ_VECTORS NR_IRQS | |
24798 | + | |
24799 | +#endif /* _ASM_IRQ_VECTORS_H */ | |
82094b55 AF |
24800 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/irqflags.h 2009-03-16 16:38:05.000000000 +0100 |
24801 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/irqflags.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24802 | @@ -118,7 +118,7 @@ static inline void halt(void) |
24803 | ||
24804 | #ifndef CONFIG_X86_64 | |
24805 | #define INTERRUPT_RETURN iret | |
24806 | -#define ENABLE_INTERRUPTS_SYSCALL_RET __ENABLE_INTERRUPTS ; \ | |
24807 | +#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \ | |
24808 | sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \ | |
24809 | __TEST_PENDING ; \ | |
24810 | jnz 14f /* process more events if necessary... */ ; \ | |
24811 | @@ -175,18 +175,6 @@ static inline void trace_hardirqs_fixup_ | |
24812 | #else | |
24813 | ||
24814 | #ifdef CONFIG_X86_64 | |
24815 | -/* | |
24816 | - * Currently paravirt can't handle swapgs nicely when we | |
24817 | - * don't have a stack we can rely on (such as a user space | |
24818 | - * stack). So we either find a way around these or just fault | |
24819 | - * and emulate if a guest tries to call swapgs directly. | |
24820 | - * | |
24821 | - * Either way, this is a good way to document that we don't | |
24822 | - * have a reliable stack. x86_64 only. | |
24823 | - */ | |
24824 | -#define SWAPGS_UNSAFE_STACK swapgs | |
24825 | -#define ARCH_TRACE_IRQS_ON call trace_hardirqs_on_thunk | |
24826 | -#define ARCH_TRACE_IRQS_OFF call trace_hardirqs_off_thunk | |
24827 | #define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk | |
24828 | #define ARCH_LOCKDEP_SYS_EXIT_IRQ \ | |
24829 | TRACE_IRQS_ON; \ | |
24830 | @@ -198,24 +186,6 @@ static inline void trace_hardirqs_fixup_ | |
24831 | TRACE_IRQS_OFF; | |
24832 | ||
24833 | #else | |
24834 | -#define ARCH_TRACE_IRQS_ON \ | |
24835 | - pushl %eax; \ | |
24836 | - pushl %ecx; \ | |
24837 | - pushl %edx; \ | |
24838 | - call trace_hardirqs_on; \ | |
24839 | - popl %edx; \ | |
24840 | - popl %ecx; \ | |
24841 | - popl %eax; | |
24842 | - | |
24843 | -#define ARCH_TRACE_IRQS_OFF \ | |
24844 | - pushl %eax; \ | |
24845 | - pushl %ecx; \ | |
24846 | - pushl %edx; \ | |
24847 | - call trace_hardirqs_off; \ | |
24848 | - popl %edx; \ | |
24849 | - popl %ecx; \ | |
24850 | - popl %eax; | |
24851 | - | |
24852 | #define ARCH_LOCKDEP_SYS_EXIT \ | |
24853 | pushl %eax; \ | |
24854 | pushl %ecx; \ | |
24855 | @@ -229,8 +199,8 @@ static inline void trace_hardirqs_fixup_ | |
24856 | #endif | |
24857 | ||
24858 | #ifdef CONFIG_TRACE_IRQFLAGS | |
24859 | -# define TRACE_IRQS_ON ARCH_TRACE_IRQS_ON | |
24860 | -# define TRACE_IRQS_OFF ARCH_TRACE_IRQS_OFF | |
24861 | +# define TRACE_IRQS_ON call trace_hardirqs_on_thunk; | |
24862 | +# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk; | |
24863 | #else | |
24864 | # define TRACE_IRQS_ON | |
24865 | # define TRACE_IRQS_OFF | |
82094b55 AF |
24866 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/mmu_context.h 2009-02-16 16:18:36.000000000 +0100 |
24867 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/mmu_context.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24868 | @@ -1,5 +1,42 @@ |
24869 | +#ifndef __ASM_X86_MMU_CONTEXT_H | |
24870 | +#define __ASM_X86_MMU_CONTEXT_H | |
24871 | + | |
24872 | +#include <asm/desc.h> | |
24873 | +#include <asm/atomic.h> | |
24874 | +#include <asm/pgalloc.h> | |
24875 | +#include <asm/tlbflush.h> | |
24876 | + | |
24877 | +void arch_exit_mmap(struct mm_struct *mm); | |
24878 | +void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); | |
24879 | + | |
24880 | +void mm_pin(struct mm_struct *mm); | |
24881 | +void mm_unpin(struct mm_struct *mm); | |
24882 | +void mm_pin_all(void); | |
24883 | + | |
24884 | +static inline void xen_activate_mm(struct mm_struct *prev, | |
24885 | + struct mm_struct *next) | |
24886 | +{ | |
24887 | + if (!PagePinned(virt_to_page(next->pgd))) | |
24888 | + mm_pin(next); | |
24889 | +} | |
24890 | + | |
24891 | +/* | |
24892 | + * Used for LDT copy/destruction. | |
24893 | + */ | |
24894 | +int init_new_context(struct task_struct *tsk, struct mm_struct *mm); | |
24895 | +void destroy_context(struct mm_struct *mm); | |
24896 | + | |
24897 | #ifdef CONFIG_X86_32 | |
24898 | # include "mmu_context_32.h" | |
24899 | #else | |
24900 | # include "mmu_context_64.h" | |
24901 | #endif | |
24902 | + | |
24903 | +#define activate_mm(prev, next) \ | |
24904 | +do { \ | |
24905 | + xen_activate_mm(prev, next); \ | |
24906 | + switch_mm((prev), (next), NULL); \ | |
24907 | +} while (0); | |
24908 | + | |
24909 | + | |
24910 | +#endif /* __ASM_X86_MMU_CONTEXT_H */ | |
82094b55 AF |
24911 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/mmu_context_32.h 2009-03-16 16:38:05.000000000 +0100 |
24912 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/mmu_context_32.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24913 | @@ -1,32 +1,6 @@ |
24914 | #ifndef __I386_SCHED_H | |
24915 | #define __I386_SCHED_H | |
24916 | ||
24917 | -#include <asm/desc.h> | |
24918 | -#include <asm/atomic.h> | |
24919 | -#include <asm/pgalloc.h> | |
24920 | -#include <asm/tlbflush.h> | |
24921 | - | |
24922 | -void arch_exit_mmap(struct mm_struct *mm); | |
24923 | -void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); | |
24924 | - | |
24925 | -void mm_pin(struct mm_struct *mm); | |
24926 | -void mm_unpin(struct mm_struct *mm); | |
24927 | -void mm_pin_all(void); | |
24928 | - | |
24929 | -static inline void xen_activate_mm(struct mm_struct *prev, | |
24930 | - struct mm_struct *next) | |
24931 | -{ | |
24932 | - if (!PagePinned(virt_to_page(next->pgd))) | |
24933 | - mm_pin(next); | |
24934 | -} | |
24935 | - | |
24936 | -/* | |
24937 | - * Used for LDT copy/destruction. | |
24938 | - */ | |
24939 | -int init_new_context(struct task_struct *tsk, struct mm_struct *mm); | |
24940 | -void destroy_context(struct mm_struct *mm); | |
24941 | - | |
24942 | - | |
24943 | static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) | |
24944 | { | |
24945 | #if 0 /* XEN: no lazy tlb */ | |
24946 | @@ -107,10 +81,4 @@ static inline void switch_mm(struct mm_s | |
24947 | #define deactivate_mm(tsk, mm) \ | |
24948 | asm("movl %0,%%gs": :"r" (0)); | |
24949 | ||
24950 | -#define activate_mm(prev, next) \ | |
24951 | -do { \ | |
24952 | - xen_activate_mm(prev, next); \ | |
24953 | - switch_mm((prev), (next), NULL); \ | |
24954 | -} while (0) | |
24955 | - | |
24956 | #endif | |
82094b55 AF |
24957 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/mmu_context_64.h 2009-03-16 16:38:05.000000000 +0100 |
24958 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/mmu_context_64.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
24959 | @@ -1,23 +1,6 @@ |
24960 | #ifndef __X86_64_MMU_CONTEXT_H | |
24961 | #define __X86_64_MMU_CONTEXT_H | |
24962 | ||
24963 | -#include <asm/desc.h> | |
24964 | -#include <asm/atomic.h> | |
24965 | -#include <asm/pgalloc.h> | |
24966 | -#include <asm/page.h> | |
24967 | -#include <asm/pda.h> | |
24968 | -#include <asm/pgtable.h> | |
24969 | -#include <asm/tlbflush.h> | |
24970 | - | |
24971 | -void arch_exit_mmap(struct mm_struct *mm); | |
24972 | -void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); | |
24973 | - | |
24974 | -/* | |
24975 | - * possibly do the LDT unload here? | |
24976 | - */ | |
24977 | -int init_new_context(struct task_struct *tsk, struct mm_struct *mm); | |
24978 | -void destroy_context(struct mm_struct *mm); | |
24979 | - | |
24980 | static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) | |
24981 | { | |
24982 | #if defined(CONFIG_SMP) && !defined(CONFIG_XEN) | |
24983 | @@ -58,10 +41,6 @@ static inline void __prepare_arch_switch | |
24984 | } | |
24985 | } | |
24986 | ||
24987 | -extern void mm_pin(struct mm_struct *mm); | |
24988 | -extern void mm_unpin(struct mm_struct *mm); | |
24989 | -void mm_pin_all(void); | |
24990 | - | |
24991 | static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, | |
24992 | struct task_struct *tsk) | |
24993 | { | |
24994 | @@ -124,11 +103,4 @@ do { \ | |
24995 | asm volatile("movl %0,%%fs"::"r"(0)); \ | |
24996 | } while (0) | |
24997 | ||
24998 | -static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) | |
24999 | -{ | |
25000 | - if (!PagePinned(virt_to_page(next->pgd))) | |
25001 | - mm_pin(next); | |
25002 | - switch_mm(prev, next, NULL); | |
25003 | -} | |
25004 | - | |
25005 | #endif | |
82094b55 AF |
25006 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/page.h 2009-03-16 16:38:05.000000000 +0100 |
25007 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/page.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
25008 | @@ -16,9 +16,9 @@ |
25009 | * below. The preprocessor will warn if the two definitions aren't identical. | |
25010 | */ | |
25011 | #define _PAGE_BIT_PRESENT 0 | |
25012 | -#define _PAGE_PRESENT (_AC(1, L)<<_PAGE_BIT_PRESENT) | |
25013 | -#define _PAGE_BIT_IO 9 | |
25014 | -#define _PAGE_IO (_AC(1, L)<<_PAGE_BIT_IO) | |
25015 | +#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) | |
25016 | +#define _PAGE_BIT_IO 11 | |
25017 | +#define _PAGE_IO (_AT(pteval_t, 1) << _PAGE_BIT_IO) | |
25018 | ||
25019 | #define __PHYSICAL_MASK ((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1) | |
25020 | #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) | |
25021 | @@ -28,8 +28,11 @@ | |
25022 | (ie, 32-bit PAE). */ | |
25023 | #define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK) | |
25024 | ||
25025 | -/* PTE_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */ | |
25026 | -#define PTE_MASK ((pteval_t)PHYSICAL_PAGE_MASK) | |
25027 | +/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */ | |
25028 | +#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK) | |
25029 | + | |
25030 | +/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */ | |
25031 | +#define PTE_FLAGS_MASK (~PTE_PFN_MASK) | |
25032 | ||
25033 | #define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) | |
25034 | #define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) | |
25035 | @@ -39,8 +42,7 @@ | |
25036 | #define HPAGE_MASK (~(HPAGE_SIZE - 1)) | |
25037 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) | |
25038 | ||
25039 | -/* to align the pointer to the (next) page boundary */ | |
25040 | -#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) | |
25041 | +#define HUGE_MAX_HSTATE 2 | |
25042 | ||
25043 | #ifndef __ASSEMBLY__ | |
25044 | #include <linux/types.h> | |
25045 | @@ -61,9 +63,17 @@ | |
25046 | ||
25047 | #ifndef __ASSEMBLY__ | |
25048 | ||
25049 | +typedef struct { pgdval_t pgd; } pgd_t; | |
25050 | +typedef struct { pgprotval_t pgprot; } pgprot_t; | |
25051 | + | |
25052 | extern int page_is_ram(unsigned long pagenr); | |
25053 | extern int devmem_is_allowed(unsigned long pagenr); | |
25054 | +extern void map_devmem(unsigned long pfn, unsigned long size, | |
25055 | + pgprot_t vma_prot); | |
25056 | +extern void unmap_devmem(unsigned long pfn, unsigned long size, | |
25057 | + pgprot_t vma_prot); | |
25058 | ||
25059 | +extern unsigned long max_low_pfn_mapped; | |
25060 | extern unsigned long max_pfn_mapped; | |
25061 | ||
25062 | struct page; | |
25063 | @@ -84,15 +94,11 @@ static inline void copy_user_page(void * | |
25064 | alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) | |
25065 | #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE | |
25066 | ||
25067 | -typedef struct { pgprotval_t pgprot; } pgprot_t; | |
25068 | - | |
25069 | #define pgprot_val(x) ((x).pgprot) | |
25070 | #define __pgprot(x) ((pgprot_t) { (x) } ) | |
25071 | ||
25072 | #include <asm/maddr.h> | |
25073 | ||
25074 | -typedef struct { pgdval_t pgd; } pgd_t; | |
25075 | - | |
25076 | #define __pgd_ma(x) ((pgd_t) { (x) } ) | |
25077 | static inline pgd_t xen_make_pgd(pgdval_t val) | |
25078 | { | |
25079 | @@ -196,6 +202,11 @@ static inline pteval_t xen_pte_val(pte_t | |
25080 | return ret; | |
25081 | } | |
25082 | ||
25083 | +static inline pteval_t xen_pte_flags(pte_t pte) | |
25084 | +{ | |
25085 | + return __pte_val(pte) & PTE_FLAGS_MASK; | |
25086 | +} | |
25087 | + | |
25088 | #define pgd_val(x) xen_pgd_val(x) | |
25089 | #define __pgd(x) xen_make_pgd(x) | |
25090 | ||
25091 | @@ -210,6 +221,7 @@ static inline pteval_t xen_pte_val(pte_t | |
25092 | #endif | |
25093 | ||
25094 | #define pte_val(x) xen_pte_val(x) | |
25095 | +#define pte_flags(x) xen_pte_flags(x) | |
25096 | #define __pte(x) xen_make_pte(x) | |
25097 | ||
25098 | #define __pa(x) __phys_addr((unsigned long)(x)) | |
82094b55 AF |
25099 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/page_64.h 2009-03-16 16:38:05.000000000 +0100 |
25100 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/page_64.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
25101 | @@ -26,6 +26,12 @@ |
25102 | #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) | |
25103 | #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) | |
25104 | ||
25105 | +/* | |
25106 | + * Set __PAGE_OFFSET to the most negative possible address + | |
25107 | + * PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a | |
25108 | + * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's | |
25109 | + * what Xen requires. | |
25110 | + */ | |
25111 | #define __PAGE_OFFSET _AC(0xffff880000000000, UL) | |
25112 | ||
25113 | #define __PHYSICAL_START CONFIG_PHYSICAL_START | |
25114 | @@ -63,7 +69,8 @@ | |
25115 | void clear_page(void *page); | |
25116 | void copy_page(void *to, void *from); | |
25117 | ||
25118 | -extern unsigned long end_pfn; | |
25119 | +/* duplicated to the one in bootmem.h */ | |
25120 | +extern unsigned long max_pfn; | |
25121 | ||
25122 | static inline unsigned long __phys_addr(unsigned long x) | |
25123 | { | |
25124 | @@ -91,6 +98,11 @@ typedef union { pteval_t pte; unsigned i | |
25125 | extern unsigned long init_memory_mapping(unsigned long start, | |
25126 | unsigned long end); | |
25127 | ||
25128 | +extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); | |
25129 | + | |
25130 | +extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); | |
25131 | +extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); | |
25132 | + | |
25133 | #endif /* !__ASSEMBLY__ */ | |
25134 | ||
25135 | #ifdef CONFIG_FLATMEM | |
82094b55 AF |
25136 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pci.h 2009-03-16 16:38:05.000000000 +0100 |
25137 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pci.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
25138 | @@ -21,6 +21,8 @@ struct pci_sysdata { |
25139 | #endif | |
25140 | }; | |
25141 | ||
25142 | +extern int pci_routeirq; | |
25143 | + | |
25144 | /* scan a bus after allocating a pci_sysdata for it */ | |
25145 | extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, | |
25146 | int node); | |
82094b55 AF |
25147 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pci_32.h 2009-02-16 16:18:36.000000000 +0100 |
25148 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pci_32.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
25149 | @@ -38,12 +38,14 @@ struct pci_dev; |
25150 | #define PCI_DMA_BUS_IS_PHYS (1) | |
25151 | ||
25152 | /* pci_unmap_{page,single} is a nop so... */ | |
25153 | -#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) | |
25154 | -#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) | |
25155 | -#define pci_unmap_addr(PTR, ADDR_NAME) (0) | |
25156 | -#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) | |
25157 | -#define pci_unmap_len(PTR, LEN_NAME) (0) | |
25158 | -#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) | |
25159 | +#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME[0]; | |
25160 | +#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) unsigned LEN_NAME[0]; | |
25161 | +#define pci_unmap_addr(PTR, ADDR_NAME) sizeof((PTR)->ADDR_NAME) | |
25162 | +#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ | |
25163 | + do { break; } while (pci_unmap_addr(PTR, ADDR_NAME)) | |
25164 | +#define pci_unmap_len(PTR, LEN_NAME) sizeof((PTR)->LEN_NAME) | |
25165 | +#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ | |
25166 | + do { break; } while (pci_unmap_len(PTR, LEN_NAME)) | |
25167 | ||
25168 | #endif | |
25169 | ||
82094b55 AF |
25170 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pgalloc.h 2009-03-16 16:38:05.000000000 +0100 |
25171 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pgalloc.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
25172 | @@ -7,6 +7,9 @@ |
25173 | ||
25174 | #include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */ | |
25175 | ||
25176 | +static inline int paravirt_pgd_alloc(struct mm_struct *mm) { return 0; } | |
25177 | +static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) {} | |
25178 | + | |
25179 | static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) {} | |
25180 | static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {} | |
25181 | static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn, | |
82094b55 AF |
25182 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pgtable.h 2009-03-16 16:38:05.000000000 +0100 |
25183 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pgtable.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
25184 | @@ -13,11 +13,12 @@ |
25185 | #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ | |
25186 | #define _PAGE_BIT_PAT 7 /* on 4KB pages */ | |
25187 | #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ | |
25188 | -#define _PAGE_BIT_IO 9 /* Mapped page is I/O or foreign and | |
25189 | +#define _PAGE_BIT_UNUSED1 9 /* available for programmer */ | |
25190 | +#define _PAGE_BIT_UNUSED2 10 | |
25191 | +#define _PAGE_BIT_IO 11 /* Mapped page is I/O or foreign and | |
25192 | * has no associated page struct. */ | |
25193 | -#define _PAGE_BIT_UNUSED2 10 /* available for programmer */ | |
25194 | -#define _PAGE_BIT_UNUSED3 11 | |
25195 | #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ | |
25196 | +#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 | |
25197 | #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ | |
25198 | ||
25199 | /* If _PAGE_BIT_PRESENT is clear, we use these: */ | |
25200 | @@ -28,34 +29,31 @@ | |
25201 | /* if the user mapped it with PROT_NONE; pte_present gives true */ | |
25202 | #define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL | |
25203 | ||
25204 | -/* | |
25205 | - * Note: we use _AC(1, L) instead of _AC(1, UL) so that we get a | |
25206 | - * sign-extended value on 32-bit with all 1's in the upper word, | |
25207 | - * which preserves the upper pte values on 64-bit ptes: | |
25208 | - */ | |
25209 | -#define _PAGE_PRESENT (_AC(1, L)<<_PAGE_BIT_PRESENT) | |
25210 | -#define _PAGE_RW (_AC(1, L)<<_PAGE_BIT_RW) | |
25211 | -#define _PAGE_USER (_AC(1, L)<<_PAGE_BIT_USER) | |
25212 | -#define _PAGE_PWT (_AC(1, L)<<_PAGE_BIT_PWT) | |
25213 | -#define _PAGE_PCD (_AC(1, L)<<_PAGE_BIT_PCD) | |
25214 | -#define _PAGE_ACCESSED (_AC(1, L)<<_PAGE_BIT_ACCESSED) | |
25215 | -#define _PAGE_DIRTY (_AC(1, L)<<_PAGE_BIT_DIRTY) | |
25216 | -#define _PAGE_PSE (_AC(1, L)<<_PAGE_BIT_PSE) /* 2MB page */ | |
25217 | -#define _PAGE_GLOBAL (_AC(1, L)<<_PAGE_BIT_GLOBAL) /* Global TLB entry */ | |
25218 | -#define _PAGE_IO (_AC(1, L)<<_PAGE_BIT_IO) | |
25219 | -#define _PAGE_UNUSED2 (_AC(1, L)<<_PAGE_BIT_UNUSED2) | |
25220 | -#define _PAGE_UNUSED3 (_AC(1, L)<<_PAGE_BIT_UNUSED3) | |
25221 | -#define _PAGE_PAT (_AC(1, L)<<_PAGE_BIT_PAT) | |
25222 | -#define _PAGE_PAT_LARGE (_AC(1, L)<<_PAGE_BIT_PAT_LARGE) | |
25223 | +#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) | |
25224 | +#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) | |
25225 | +#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER) | |
25226 | +#define _PAGE_PWT (_AT(pteval_t, 1) << _PAGE_BIT_PWT) | |
25227 | +#define _PAGE_PCD (_AT(pteval_t, 1) << _PAGE_BIT_PCD) | |
25228 | +#define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED) | |
25229 | +#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) | |
25230 | +#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) | |
25231 | +#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) | |
25232 | +#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) | |
25233 | +#define _PAGE_UNUSED2 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED2) | |
25234 | +#define _PAGE_IO (_AT(pteval_t, 1) << _PAGE_BIT_IO) | |
25235 | +#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) | |
25236 | +#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) | |
25237 | +#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) | |
25238 | +#define __HAVE_ARCH_PTE_SPECIAL | |
25239 | ||
25240 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | |
25241 | -#define _PAGE_NX (_AC(1, ULL) << _PAGE_BIT_NX) | |
25242 | +#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) | |
25243 | #else | |
25244 | -#define _PAGE_NX 0 | |
25245 | +#define _PAGE_NX (_AT(pteval_t, 0)) | |
25246 | #endif | |
25247 | ||
25248 | -#define _PAGE_FILE (_AC(1, L)<<_PAGE_BIT_FILE) | |
25249 | -#define _PAGE_PROTNONE (_AC(1, L)<<_PAGE_BIT_PROTNONE) | |
25250 | +#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE) | |
25251 | +#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) | |
25252 | ||
25253 | #ifndef __ASSEMBLY__ | |
25254 | #if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT <= 0x030002 | |
25255 | @@ -71,8 +69,8 @@ extern unsigned int __kernel_page_user; | |
25256 | _PAGE_DIRTY | __kernel_page_user) | |
25257 | ||
25258 | /* Set of bits not changed in pte_modify */ | |
25259 | -#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_CACHE_MASK | _PAGE_IO | \ | |
25260 | - _PAGE_ACCESSED | _PAGE_DIRTY) | |
25261 | +#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_CACHE_MASK | _PAGE_IO | \ | |
25262 | + _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY) | |
25263 | ||
25264 | /* | |
25265 | * PAT settings are part of the hypervisor interface, which sets the | |
25266 | @@ -102,19 +100,9 @@ extern unsigned int __kernel_page_user; | |
25267 | #define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ | |
25268 | _PAGE_ACCESSED) | |
25269 | ||
25270 | -#ifdef CONFIG_X86_32 | |
25271 | -#define _PAGE_KERNEL_EXEC \ | |
25272 | - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) | |
25273 | -#define _PAGE_KERNEL (_PAGE_KERNEL_EXEC | _PAGE_NX) | |
25274 | - | |
25275 | -#ifndef __ASSEMBLY__ | |
25276 | -extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC; | |
25277 | -#endif /* __ASSEMBLY__ */ | |
25278 | -#else | |
25279 | #define __PAGE_KERNEL_EXEC \ | |
25280 | (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | __kernel_page_user) | |
25281 | #define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) | |
25282 | -#endif | |
25283 | ||
25284 | #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) | |
25285 | #define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) | |
25286 | @@ -125,25 +113,22 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KE | |
25287 | #define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) | |
25288 | #define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT) | |
25289 | #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) | |
25290 | +#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) | |
25291 | #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) | |
25292 | ||
25293 | -/* | |
25294 | - * We don't support GLOBAL page in xenolinux64 | |
25295 | - */ | |
25296 | -#define MAKE_GLOBAL(x) __pgprot((x)) | |
25297 | - | |
25298 | -#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL) | |
25299 | -#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO) | |
25300 | -#define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC) | |
25301 | -#define PAGE_KERNEL_RX MAKE_GLOBAL(__PAGE_KERNEL_RX) | |
25302 | -#define PAGE_KERNEL_WC MAKE_GLOBAL(__PAGE_KERNEL_WC) | |
25303 | -#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE) | |
25304 | -#define PAGE_KERNEL_UC_MINUS MAKE_GLOBAL(__PAGE_KERNEL_UC_MINUS) | |
25305 | -#define PAGE_KERNEL_EXEC_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_EXEC_NOCACHE) | |
25306 | -#define PAGE_KERNEL_LARGE MAKE_GLOBAL(__PAGE_KERNEL_LARGE) | |
25307 | -#define PAGE_KERNEL_LARGE_EXEC MAKE_GLOBAL(__PAGE_KERNEL_LARGE_EXEC) | |
25308 | -#define PAGE_KERNEL_VSYSCALL MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL) | |
25309 | -#define PAGE_KERNEL_VSYSCALL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL_NOCACHE) | |
25310 | +#define PAGE_KERNEL __pgprot(__PAGE_KERNEL) | |
25311 | +#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) | |
25312 | +#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) | |
25313 | +#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX) | |
25314 | +#define PAGE_KERNEL_WC __pgprot(__PAGE_KERNEL_WC) | |
25315 | +#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) | |
25316 | +#define PAGE_KERNEL_UC_MINUS __pgprot(__PAGE_KERNEL_UC_MINUS) | |
25317 | +#define PAGE_KERNEL_EXEC_NOCACHE __pgprot(__PAGE_KERNEL_EXEC_NOCACHE) | |
25318 | +#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) | |
25319 | +#define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE) | |
25320 | +#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) | |
25321 | +#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL) | |
25322 | +#define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE) | |
25323 | ||
25324 | /* xwr */ | |
25325 | #define __P000 PAGE_NONE | |
25326 | @@ -182,27 +167,27 @@ extern struct list_head pgd_list; | |
25327 | */ | |
25328 | static inline int pte_dirty(pte_t pte) | |
25329 | { | |
25330 | - return __pte_val(pte) & _PAGE_DIRTY; | |
25331 | + return pte_flags(pte) & _PAGE_DIRTY; | |
25332 | } | |
25333 | ||
25334 | static inline int pte_young(pte_t pte) | |
25335 | { | |
25336 | - return __pte_val(pte) & _PAGE_ACCESSED; | |
25337 | + return pte_flags(pte) & _PAGE_ACCESSED; | |
25338 | } | |
25339 | ||
25340 | static inline int pte_write(pte_t pte) | |
25341 | { | |
25342 | - return __pte_val(pte) & _PAGE_RW; | |
25343 | + return pte_flags(pte) & _PAGE_RW; | |
25344 | } | |
25345 | ||
25346 | static inline int pte_file(pte_t pte) | |
25347 | { | |
25348 | - return __pte_val(pte) & _PAGE_FILE; | |
25349 | + return pte_flags(pte) & _PAGE_FILE; | |
25350 | } | |
25351 | ||
25352 | static inline int pte_huge(pte_t pte) | |
25353 | { | |
25354 | - return __pte_val(pte) & _PAGE_PSE; | |
25355 | + return pte_flags(pte) & _PAGE_PSE; | |
25356 | } | |
25357 | ||
25358 | static inline int pte_global(pte_t pte) | |
25359 | @@ -212,12 +197,12 @@ static inline int pte_global(pte_t pte) | |
25360 | ||
25361 | static inline int pte_exec(pte_t pte) | |
25362 | { | |
25363 | - return !(__pte_val(pte) & _PAGE_NX); | |
25364 | + return !(pte_flags(pte) & _PAGE_NX); | |
25365 | } | |
25366 | ||
25367 | static inline int pte_special(pte_t pte) | |
25368 | { | |
25369 | - return 0; | |
25370 | + return pte_flags(pte) & _PAGE_SPECIAL; | |
25371 | } | |
25372 | ||
25373 | static inline int pmd_large(pmd_t pte) | |
25374 | @@ -228,22 +213,22 @@ static inline int pmd_large(pmd_t pte) | |
25375 | ||
25376 | static inline pte_t pte_mkclean(pte_t pte) | |
25377 | { | |
25378 | - return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_DIRTY); | |
25379 | + return __pte_ma(__pte_val(pte) & ~_PAGE_DIRTY); | |
25380 | } | |
25381 | ||
25382 | static inline pte_t pte_mkold(pte_t pte) | |
25383 | { | |
25384 | - return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED); | |
25385 | + return __pte_ma(__pte_val(pte) & ~_PAGE_ACCESSED); | |
25386 | } | |
25387 | ||
25388 | static inline pte_t pte_wrprotect(pte_t pte) | |
25389 | { | |
25390 | - return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_RW); | |
25391 | + return __pte_ma(__pte_val(pte) & ~_PAGE_RW); | |
25392 | } | |
25393 | ||
25394 | static inline pte_t pte_mkexec(pte_t pte) | |
25395 | { | |
25396 | - return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_NX); | |
25397 | + return __pte_ma(__pte_val(pte) & ~_PAGE_NX); | |
25398 | } | |
25399 | ||
25400 | static inline pte_t pte_mkdirty(pte_t pte) | |
25401 | @@ -268,7 +253,7 @@ static inline pte_t pte_mkhuge(pte_t pte | |
25402 | ||
25403 | static inline pte_t pte_clrhuge(pte_t pte) | |
25404 | { | |
25405 | - return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_PSE); | |
25406 | + return __pte_ma(__pte_val(pte) & ~_PAGE_PSE); | |
25407 | } | |
25408 | ||
25409 | static inline pte_t pte_mkglobal(pte_t pte) | |
25410 | @@ -283,35 +268,46 @@ static inline pte_t pte_clrglobal(pte_t | |
25411 | ||
25412 | static inline pte_t pte_mkspecial(pte_t pte) | |
25413 | { | |
25414 | - return pte; | |
25415 | + return __pte_ma(__pte_val(pte) | _PAGE_SPECIAL); | |
25416 | } | |
25417 | ||
25418 | extern pteval_t __supported_pte_mask; | |
25419 | ||
25420 | static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) | |
25421 | { | |
25422 | - return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) | | |
25423 | - pgprot_val(pgprot)) & __supported_pte_mask); | |
25424 | + pgprotval_t prot = pgprot_val(pgprot); | |
25425 | + | |
25426 | + if (prot & _PAGE_PRESENT) | |
25427 | + prot &= __supported_pte_mask; | |
25428 | + return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) | prot); | |
25429 | } | |
25430 | ||
25431 | static inline pte_t pfn_pte_ma(unsigned long page_nr, pgprot_t pgprot) | |
25432 | { | |
25433 | - return __pte_ma((((phys_addr_t)page_nr << PAGE_SHIFT) | | |
25434 | - pgprot_val(pgprot)) & __supported_pte_mask); | |
25435 | + pgprotval_t prot = pgprot_val(pgprot); | |
25436 | + | |
25437 | + if (prot & _PAGE_PRESENT) | |
25438 | + prot &= __supported_pte_mask; | |
25439 | + return __pte_ma(((phys_addr_t)page_nr << PAGE_SHIFT) | prot); | |
25440 | } | |
25441 | ||
25442 | static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) | |
25443 | { | |
25444 | - return __pmd((((phys_addr_t)page_nr << PAGE_SHIFT) | | |
25445 | - pgprot_val(pgprot)) & __supported_pte_mask); | |
25446 | + pgprotval_t prot = pgprot_val(pgprot); | |
25447 | + | |
25448 | + if (prot & _PAGE_PRESENT) | |
25449 | + prot &= __supported_pte_mask; | |
25450 | + return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) | prot); | |
25451 | } | |
25452 | ||
25453 | static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) | |
25454 | { | |
25455 | - pteval_t val = pte_val(pte); | |
25456 | + pgprotval_t prot = pgprot_val(newprot); | |
25457 | + pteval_t val = pte_val(pte) & _PAGE_CHG_MASK; | |
25458 | ||
25459 | - val &= _PAGE_CHG_MASK; | |
25460 | - val |= pgprot_val(newprot) & (~_PAGE_CHG_MASK) & __supported_pte_mask; | |
25461 | + if (prot & _PAGE_PRESENT) | |
25462 | + prot &= __supported_pte_mask; | |
25463 | + val |= prot & ~_PAGE_CHG_MASK; | |
25464 | ||
25465 | return __pte(val); | |
25466 | } | |
25467 | @@ -325,9 +321,11 @@ static inline pgprot_t pgprot_modify(pgp | |
25468 | return __pgprot(preservebits | addbits); | |
25469 | } | |
25470 | ||
25471 | -#define pte_pgprot(x) __pgprot(__pte_val(x) & ~PTE_MASK) | |
25472 | +#define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK) | |
25473 | ||
25474 | -#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) | |
25475 | +#define canon_pgprot(p) __pgprot(pgprot_val(p) & _PAGE_PRESENT \ | |
25476 | + ? pgprot_val(p) & __supported_pte_mask \ | |
25477 | + : pgprot_val(p)) | |
25478 | ||
25479 | #ifndef __ASSEMBLY__ | |
25480 | #define __HAVE_PHYS_MEM_ACCESS_PROT | |
25481 | @@ -338,6 +336,17 @@ int phys_mem_access_prot_allowed(struct | |
25482 | unsigned long size, pgprot_t *vma_prot); | |
25483 | #endif | |
25484 | ||
25485 | +/* Install a pte for a particular vaddr in kernel space. */ | |
25486 | +void set_pte_vaddr(unsigned long vaddr, pte_t pte); | |
25487 | + | |
25488 | +#ifndef CONFIG_XEN | |
25489 | +extern void native_pagetable_setup_start(pgd_t *base); | |
25490 | +extern void native_pagetable_setup_done(pgd_t *base); | |
25491 | +#else | |
25492 | +static inline void xen_pagetable_setup_start(pgd_t *base) {} | |
25493 | +static inline void xen_pagetable_setup_done(pgd_t *base) {} | |
25494 | +#endif | |
25495 | + | |
25496 | #define set_pte(ptep, pte) xen_set_pte(ptep, pte) | |
25497 | #define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte) | |
25498 | ||
25499 | @@ -373,6 +382,26 @@ int phys_mem_access_prot_allowed(struct | |
25500 | # include "pgtable_64.h" | |
25501 | #endif | |
25502 | ||
25503 | +/* | |
25504 | + * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] | |
25505 | + * | |
25506 | + * this macro returns the index of the entry in the pgd page which would | |
25507 | + * control the given virtual address | |
25508 | + */ | |
25509 | +#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) | |
25510 | + | |
25511 | +/* | |
25512 | + * pgd_offset() returns a (pgd_t *) | |
25513 | + * pgd_index() is used get the offset into the pgd page's array of pgd_t's; | |
25514 | + */ | |
25515 | +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) | |
25516 | +/* | |
25517 | + * a shortcut which implies the use of the kernel's pgd, instead | |
25518 | + * of a process's | |
25519 | + */ | |
25520 | +#define pgd_offset_k(address) pgd_offset(&init_mm, (address)) | |
25521 | + | |
25522 | + | |
25523 | #define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET) | |
25524 | #define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY) | |
25525 | ||
25526 | @@ -383,8 +412,15 @@ enum { | |
25527 | PG_LEVEL_4K, | |
25528 | PG_LEVEL_2M, | |
25529 | PG_LEVEL_1G, | |
25530 | + PG_LEVEL_NUM | |
25531 | }; | |
25532 | ||
25533 | +#ifdef CONFIG_PROC_FS | |
25534 | +extern void update_page_count(int level, unsigned long pages); | |
25535 | +#else | |
25536 | +static inline void update_page_count(int level, unsigned long pages) { } | |
25537 | +#endif | |
25538 | + | |
25539 | /* | |
25540 | * Helper function that returns the kernel pagetable entry controlling | |
25541 | * the virtual address 'address'. NULL means no pagetable entry present. | |
25542 | @@ -441,6 +477,8 @@ static inline void xen_pte_clear(struct | |
25543 | * race with other CPU's that might be updating the dirty | |
25544 | * bit at the same time. | |
25545 | */ | |
25546 | +struct vm_area_struct; | |
25547 | + | |
25548 | #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS | |
25549 | extern int ptep_set_access_flags(struct vm_area_struct *vma, | |
25550 | unsigned long address, pte_t *ptep, | |
25551 | @@ -523,9 +561,6 @@ static inline void clone_pgd_range(pgd_t | |
25552 | memcpy(dst, src, count * sizeof(pgd_t)); | |
25553 | } | |
25554 | ||
25555 | -#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \ | |
25556 | - xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) | |
25557 | - | |
25558 | #define arbitrary_virt_to_machine(va) \ | |
25559 | ({ \ | |
25560 | unsigned int __lvl; \ | |
25561 | @@ -548,6 +583,34 @@ struct page *kmap_atomic_to_page(void *) | |
25562 | #define ptep_to_machine(ptep) virt_to_machine(ptep) | |
25563 | #endif | |
25564 | ||
25565 | +#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION | |
25566 | +static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, | |
25567 | + pte_t *ptep) | |
25568 | +{ | |
25569 | +#if CONFIG_XEN_COMPAT < 0x030300 | |
25570 | + if (unlikely(!xen_feature(XENFEAT_mmu_pt_update_preserve_ad))) | |
25571 | + return ptep_get_and_clear(mm, addr, ptep); | |
25572 | +#endif | |
25573 | + return *ptep; | |
25574 | +} | |
25575 | + | |
25576 | +static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | |
25577 | + pte_t *ptep, pte_t pte) | |
25578 | +{ | |
25579 | + mmu_update_t u; | |
25580 | + | |
25581 | +#if CONFIG_XEN_COMPAT < 0x030300 | |
25582 | + if (unlikely(!xen_feature(XENFEAT_mmu_pt_update_preserve_ad))) { | |
25583 | + set_pte_at(mm, addr, ptep, pte); | |
25584 | + return; | |
25585 | + } | |
25586 | +#endif | |
25587 | + u.ptr = ptep_to_machine(ptep) | MMU_PT_UPDATE_PRESERVE_AD; | |
25588 | + u.val = __pte_val(pte); | |
25589 | + if (HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF)) | |
25590 | + BUG(); | |
25591 | +} | |
25592 | + | |
25593 | #include <asm-generic/pgtable.h> | |
25594 | ||
25595 | #include <xen/features.h> | |
25596 | @@ -576,10 +639,6 @@ int touch_pte_range(struct mm_struct *mm | |
25597 | unsigned long address, | |
25598 | unsigned long size); | |
25599 | ||
25600 | -int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd, | |
25601 | - unsigned long addr, unsigned long end, pgprot_t newprot, | |
25602 | - int dirty_accountable); | |
25603 | - | |
25604 | #endif /* __ASSEMBLY__ */ | |
25605 | ||
25606 | #endif /* _ASM_X86_PGTABLE_H */ | |
82094b55 AF |
25607 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h 2009-03-16 16:38:05.000000000 +0100 |
25608 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pgtable-3level.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
25609 | @@ -14,11 +14,11 @@ |
25610 | #define pmd_ERROR(e) \ | |
25611 | printk("%s:%d: bad pmd %p(%016Lx pfn %08Lx).\n", \ | |
25612 | __FILE__, __LINE__, &(e), __pmd_val(e), \ | |
25613 | - (pmd_val(e) & PTE_MASK) >> PAGE_SHIFT) | |
25614 | + (pmd_val(e) & PTE_PFN_MASK) >> PAGE_SHIFT) | |
25615 | #define pgd_ERROR(e) \ | |
25616 | printk("%s:%d: bad pgd %p(%016Lx pfn %08Lx).\n", \ | |
25617 | __FILE__, __LINE__, &(e), __pgd_val(e), \ | |
25618 | - (pgd_val(e) & PTE_MASK) >> PAGE_SHIFT) | |
25619 | + (pgd_val(e) & PTE_PFN_MASK) >> PAGE_SHIFT) | |
25620 | ||
25621 | static inline int pud_none(pud_t pud) | |
25622 | { | |
25623 | @@ -27,7 +27,7 @@ static inline int pud_none(pud_t pud) | |
25624 | } | |
25625 | static inline int pud_bad(pud_t pud) | |
25626 | { | |
25627 | - return (__pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0; | |
25628 | + return (__pud_val(pud) & ~(PTE_PFN_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0; | |
25629 | } | |
25630 | ||
25631 | static inline int pud_present(pud_t pud) | |
25632 | @@ -102,9 +102,9 @@ static inline void pud_clear(pud_t *pudp | |
25633 | xen_tlb_flush(); | |
25634 | } | |
25635 | ||
25636 | -#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_MASK)) | |
25637 | +#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_PFN_MASK)) | |
25638 | ||
25639 | -#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_MASK)) | |
25640 | +#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_PFN_MASK)) | |
25641 | ||
25642 | ||
25643 | /* Find an entry in the second-level page table.. */ | |
82094b55 AF |
25644 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pgtable_32.h 2009-03-16 16:38:05.000000000 +0100 |
25645 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pgtable_32.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
25646 | @@ -89,10 +89,10 @@ extern unsigned long pg0[]; |
25647 | /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t. | |
25648 | can temporarily clear it. */ | |
25649 | #define pmd_present(x) (__pmd_val(x)) | |
25650 | -#define pmd_bad(x) ((__pmd_val(x) & (~PTE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT)) | |
25651 | +#define pmd_bad(x) ((__pmd_val(x) & (PTE_FLAGS_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT)) | |
25652 | #else | |
25653 | #define pmd_present(x) (__pmd_val(x) & _PAGE_PRESENT) | |
25654 | -#define pmd_bad(x) ((__pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) | |
25655 | +#define pmd_bad(x) ((__pmd_val(x) & (PTE_FLAGS_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) | |
25656 | #endif | |
25657 | ||
25658 | ||
25659 | @@ -119,26 +119,6 @@ extern unsigned long pg0[]; | |
25660 | */ | |
25661 | #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) | |
25662 | ||
25663 | -/* | |
25664 | - * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] | |
25665 | - * | |
25666 | - * this macro returns the index of the entry in the pgd page which would | |
25667 | - * control the given virtual address | |
25668 | - */ | |
25669 | -#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) | |
25670 | -#define pgd_index_k(addr) pgd_index((addr)) | |
25671 | - | |
25672 | -/* | |
25673 | - * pgd_offset() returns a (pgd_t *) | |
25674 | - * pgd_index() is used get the offset into the pgd page's array of pgd_t's; | |
25675 | - */ | |
25676 | -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) | |
25677 | - | |
25678 | -/* | |
25679 | - * a shortcut which implies the use of the kernel's pgd, instead | |
25680 | - * of a process's | |
25681 | - */ | |
25682 | -#define pgd_offset_k(address) pgd_offset(&init_mm, (address)) | |
25683 | ||
25684 | static inline int pud_large(pud_t pud) { return 0; } | |
25685 | ||
25686 | @@ -165,7 +145,7 @@ static inline int pud_large(pud_t pud) { | |
25687 | #define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT)) | |
25688 | ||
25689 | #define pmd_page_vaddr(pmd) \ | |
25690 | - ((unsigned long)__va(pmd_val((pmd)) & PTE_MASK)) | |
25691 | + ((unsigned long)__va(pmd_val((pmd)) & PTE_PFN_MASK)) | |
25692 | ||
25693 | #if defined(CONFIG_HIGHPTE) | |
25694 | #define pte_offset_map(dir, address) \ | |
82094b55 AF |
25695 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-03-16 16:38:05.000000000 +0100 |
25696 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
25697 | @@ -23,6 +23,8 @@ extern void xen_init_pt(void); |
25698 | extern pud_t level3_kernel_pgt[512]; | |
25699 | extern pud_t level3_ident_pgt[512]; | |
25700 | extern pmd_t level2_kernel_pgt[512]; | |
25701 | +extern pmd_t level2_fixmap_pgt[512]; | |
25702 | +extern pmd_t level2_ident_pgt[512]; | |
25703 | extern pgd_t init_level4_pgt[]; | |
25704 | ||
25705 | #define swapper_pg_dir init_level4_pgt | |
25706 | @@ -79,6 +81,9 @@ extern void paging_init(void); | |
25707 | ||
25708 | struct mm_struct; | |
25709 | ||
25710 | +void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); | |
25711 | + | |
25712 | + | |
25713 | #define __xen_pte_clear(ptep) xen_set_pte(ptep, __pte(0)) | |
25714 | ||
25715 | static inline void xen_set_pte(pte_t *ptep, pte_t pte) | |
25716 | @@ -145,29 +150,29 @@ static inline void xen_pgd_clear(pgd_t * | |
25717 | #define PGDIR_MASK (~(PGDIR_SIZE - 1)) | |
25718 | ||
25719 | ||
25720 | -#define MAXMEM _AC(0x00003fffffffffff, UL) | |
25721 | +#define MAXMEM _AC(0x000004ffffffffff, UL) | |
25722 | #define VMALLOC_START _AC(0xffffc20000000000, UL) | |
25723 | #define VMALLOC_END _AC(0xffffe1ffffffffff, UL) | |
25724 | #define VMEMMAP_START _AC(0xffffe20000000000, UL) | |
25725 | #define MODULES_VADDR _AC(0xffffffffa0000000, UL) | |
25726 | -#define MODULES_END _AC(0xfffffffffff00000, UL) | |
25727 | +#define MODULES_END _AC(0xffffffffff000000, UL) | |
25728 | #define MODULES_LEN (MODULES_END - MODULES_VADDR) | |
25729 | ||
25730 | #ifndef __ASSEMBLY__ | |
25731 | ||
25732 | static inline int pgd_bad(pgd_t pgd) | |
25733 | { | |
25734 | - return (__pgd_val(pgd) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE; | |
25735 | + return (__pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; | |
25736 | } | |
25737 | ||
25738 | static inline int pud_bad(pud_t pud) | |
25739 | { | |
25740 | - return (__pud_val(pud) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE; | |
25741 | + return (__pud_val(pud) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; | |
25742 | } | |
25743 | ||
25744 | static inline int pmd_bad(pmd_t pmd) | |
25745 | { | |
25746 | - return (__pmd_val(pmd) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE; | |
25747 | + return (__pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; | |
25748 | } | |
25749 | ||
25750 | #define pte_none(x) (!(x).pte) | |
25751 | @@ -175,7 +180,7 @@ static inline int pmd_bad(pmd_t pmd) | |
25752 | ||
25753 | #define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */ | |
25754 | ||
25755 | -#define __pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT) | |
25756 | +#define __pte_mfn(_pte) (((_pte).pte & PTE_PFN_MASK) >> PAGE_SHIFT) | |
25757 | #define pte_mfn(_pte) ((_pte).pte & _PAGE_PRESENT ? \ | |
25758 | __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte))) | |
25759 | #define pte_pfn(_pte) ((_pte).pte & _PAGE_IO ? max_mapnr : \ | |
25760 | @@ -200,11 +205,8 @@ static inline int pmd_bad(pmd_t pmd) | |
25761 | * Level 4 access. | |
25762 | */ | |
25763 | #define pgd_page_vaddr(pgd) \ | |
25764 | - ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_MASK)) | |
25765 | + ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_PFN_MASK)) | |
25766 | #define pgd_page(pgd) (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT)) | |
25767 | -#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) | |
25768 | -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) | |
25769 | -#define pgd_offset_k(address) (init_level4_pgt + pgd_index((address))) | |
25770 | #define pgd_present(pgd) (__pgd_val(pgd) & _PAGE_PRESENT) | |
25771 | static inline int pgd_large(pgd_t pgd) { return 0; } | |
25772 | #define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE) | |
25773 | @@ -226,7 +228,7 @@ static inline int pud_large(pud_t pte) | |
25774 | } | |
25775 | ||
25776 | /* PMD - Level 2 access */ | |
25777 | -#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_MASK)) | |
25778 | +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_PFN_MASK)) | |
25779 | #define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT)) | |
25780 | ||
25781 | #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) | |
82094b55 AF |
25782 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/processor.h 2009-03-16 16:38:05.000000000 +0100 |
25783 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/processor.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
25784 | @@ -134,7 +134,7 @@ extern __u32 cleared_cpu_caps[NCAPINTS |
25785 | #ifdef CONFIG_SMP | |
25786 | DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); | |
25787 | #define cpu_data(cpu) per_cpu(cpu_info, cpu) | |
25788 | -#define current_cpu_data cpu_data(smp_processor_id()) | |
25789 | +#define current_cpu_data __get_cpu_var(cpu_info) | |
25790 | #else | |
25791 | #define cpu_data(cpu) boot_cpu_data | |
25792 | #define current_cpu_data boot_cpu_data | |
25793 | @@ -153,7 +153,7 @@ static inline int hlt_works(int cpu) | |
25794 | ||
25795 | extern void cpu_detect(struct cpuinfo_x86 *c); | |
25796 | ||
25797 | -extern void identify_cpu(struct cpuinfo_x86 *); | |
25798 | +extern void early_cpu_init(void); | |
25799 | extern void identify_boot_cpu(void); | |
25800 | extern void identify_secondary_cpu(struct cpuinfo_x86 *); | |
25801 | extern void print_cpu_info(struct cpuinfo_x86 *); | |
25802 | @@ -267,15 +267,11 @@ struct tss_struct { | |
25803 | struct thread_struct *io_bitmap_owner; | |
25804 | ||
25805 | /* | |
25806 | - * Pad the TSS to be cacheline-aligned (size is 0x100): | |
25807 | - */ | |
25808 | - unsigned long __cacheline_filler[35]; | |
25809 | - /* | |
25810 | * .. and then another 0x100 bytes for the emergency kernel stack: | |
25811 | */ | |
25812 | unsigned long stack[64]; | |
25813 | ||
25814 | -} __attribute__((packed)); | |
25815 | +} ____cacheline_aligned; | |
25816 | ||
25817 | DECLARE_PER_CPU(struct tss_struct, init_tss); | |
25818 | ||
25819 | @@ -668,11 +664,36 @@ static inline void __sti_mwait(unsigned | |
25820 | ||
25821 | extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); | |
25822 | ||
25823 | -extern int force_mwait; | |
25824 | - | |
25825 | extern void select_idle_routine(const struct cpuinfo_x86 *c); | |
25826 | ||
25827 | extern unsigned long boot_option_idle_override; | |
25828 | +extern unsigned long idle_halt; | |
25829 | +extern unsigned long idle_nomwait; | |
25830 | + | |
25831 | +#ifndef CONFIG_XEN | |
25832 | +/* | |
25833 | + * on systems with caches, caches must be flashed as the absolute | |
25834 | + * last instruction before going into a suspended halt. Otherwise, | |
25835 | + * dirty data can linger in the cache and become stale on resume, | |
25836 | + * leading to strange errors. | |
25837 | + * | |
25838 | + * perform a variety of operations to guarantee that the compiler | |
25839 | + * will not reorder instructions. wbinvd itself is serializing | |
25840 | + * so the processor will not reorder. | |
25841 | + * | |
25842 | + * Systems without cache can just go into halt. | |
25843 | + */ | |
25844 | +static inline void wbinvd_halt(void) | |
25845 | +{ | |
25846 | + mb(); | |
25847 | + /* check for clflush to determine if wbinvd is legal */ | |
25848 | + if (cpu_has_clflush) | |
25849 | + asm volatile("cli; wbinvd; 1: hlt; jmp 1b" : : : "memory"); | |
25850 | + else | |
25851 | + while (1) | |
25852 | + halt(); | |
25853 | +} | |
25854 | +#endif | |
25855 | ||
25856 | extern void enable_sep_cpu(void); | |
25857 | extern int sysenter_setup(void); | |
82094b55 AF |
25858 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/segment.h 2009-03-16 16:38:05.000000000 +0100 |
25859 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/segment.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
25860 | @@ -1,6 +1,15 @@ |
25861 | #ifndef _ASM_X86_SEGMENT_H_ | |
25862 | #define _ASM_X86_SEGMENT_H_ | |
25863 | ||
25864 | +/* Constructor for a conventional segment GDT (or LDT) entry */ | |
25865 | +/* This is a macro so it can be used in initializers */ | |
25866 | +#define GDT_ENTRY(flags, base, limit) \ | |
25867 | + ((((base) & 0xff000000ULL) << (56-24)) | \ | |
25868 | + (((flags) & 0x0000f0ffULL) << 40) | \ | |
25869 | + (((limit) & 0x000f0000ULL) << (48-16)) | \ | |
25870 | + (((base) & 0x00ffffffULL) << 16) | \ | |
25871 | + (((limit) & 0x0000ffffULL))) | |
25872 | + | |
25873 | /* Simple and small GDT entries for booting only */ | |
25874 | ||
25875 | #define GDT_ENTRY_BOOT_CS 2 | |
25876 | @@ -61,18 +70,14 @@ | |
25877 | #define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) | |
25878 | ||
25879 | #define GDT_ENTRY_DEFAULT_USER_CS 14 | |
25880 | -#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3) | |
25881 | ||
25882 | #define GDT_ENTRY_DEFAULT_USER_DS 15 | |
25883 | -#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3) | |
25884 | ||
25885 | #define GDT_ENTRY_KERNEL_BASE 12 | |
25886 | ||
25887 | #define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0) | |
25888 | -#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) | |
25889 | ||
25890 | #define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1) | |
25891 | -#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) | |
25892 | ||
25893 | #define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4) | |
25894 | #define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5) | |
25895 | @@ -143,10 +148,11 @@ | |
25896 | #else | |
25897 | #include <asm/cache.h> | |
25898 | ||
25899 | -#define __KERNEL_CS 0x10 | |
25900 | -#define __KERNEL_DS 0x18 | |
25901 | +#define GDT_ENTRY_KERNEL32_CS 1 | |
25902 | +#define GDT_ENTRY_KERNEL_CS 2 | |
25903 | +#define GDT_ENTRY_KERNEL_DS 3 | |
25904 | ||
25905 | -#define __KERNEL32_CS 0x08 | |
25906 | +#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS * 8) | |
25907 | ||
25908 | /* | |
25909 | * we cannot use the same code segment descriptor for user and kernel | |
25910 | @@ -154,10 +160,10 @@ | |
25911 | * The segment offset needs to contain a RPL. Grr. -AK | |
25912 | * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) | |
25913 | */ | |
25914 | - | |
25915 | -#define __USER32_CS 0x23 /* 4*8+3 */ | |
25916 | -#define __USER_DS 0x2b /* 5*8+3 */ | |
25917 | -#define __USER_CS 0x33 /* 6*8+3 */ | |
25918 | +#define GDT_ENTRY_DEFAULT_USER32_CS 4 | |
25919 | +#define GDT_ENTRY_DEFAULT_USER_DS 5 | |
25920 | +#define GDT_ENTRY_DEFAULT_USER_CS 6 | |
25921 | +#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS * 8 + 3) | |
25922 | #define __USER32_DS __USER_DS | |
25923 | ||
25924 | #define GDT_ENTRY_TSS 8 /* needs two entries */ | |
25925 | @@ -179,6 +185,11 @@ | |
25926 | ||
25927 | #endif | |
25928 | ||
25929 | +#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) | |
25930 | +#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) | |
25931 | +#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS* 8 + 3) | |
25932 | +#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS* 8 + 3) | |
25933 | + | |
25934 | /* User mode is privilege level 3 */ | |
25935 | #define USER_RPL 0x3 | |
25936 | /* LDT segment has TI set, GDT has it cleared */ | |
82094b55 AF |
25937 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/smp.h 2009-03-16 16:38:05.000000000 +0100 |
25938 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/smp.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
25939 | @@ -25,23 +25,16 @@ extern cpumask_t cpu_initialized; |
25940 | extern void (*mtrr_hook)(void); | |
25941 | extern void zap_low_mappings(void); | |
25942 | ||
25943 | +extern int __cpuinit get_local_pda(int cpu); | |
25944 | + | |
25945 | extern int smp_num_siblings; | |
25946 | extern unsigned int num_processors; | |
25947 | extern cpumask_t cpu_initialized; | |
25948 | ||
25949 | -#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) | |
25950 | -extern u16 x86_cpu_to_apicid_init[]; | |
25951 | -extern u16 x86_bios_cpu_apicid_init[]; | |
25952 | -extern void *x86_cpu_to_apicid_early_ptr; | |
25953 | -extern void *x86_bios_cpu_apicid_early_ptr; | |
25954 | -#else | |
25955 | -#define x86_cpu_to_apicid_early_ptr NULL | |
25956 | -#define x86_bios_cpu_apicid_early_ptr NULL | |
25957 | -#endif | |
25958 | - | |
25959 | DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); | |
25960 | DECLARE_PER_CPU(cpumask_t, cpu_core_map); | |
25961 | DECLARE_PER_CPU(u16, cpu_llc_id); | |
25962 | + | |
25963 | DECLARE_PER_CPU(u16, x86_cpu_to_apicid); | |
25964 | DECLARE_PER_CPU(u16, x86_bios_cpu_apicid); | |
25965 | ||
25966 | @@ -63,9 +56,9 @@ struct smp_ops { | |
25967 | ||
25968 | void (*smp_send_stop)(void); | |
25969 | void (*smp_send_reschedule)(int cpu); | |
25970 | - int (*smp_call_function_mask)(cpumask_t mask, | |
25971 | - void (*func)(void *info), void *info, | |
25972 | - int wait); | |
25973 | + | |
25974 | + void (*send_call_func_ipi)(cpumask_t mask); | |
25975 | + void (*send_call_func_single_ipi)(int cpu); | |
25976 | }; | |
25977 | ||
25978 | /* Globals due to paravirt */ | |
25979 | @@ -106,11 +99,14 @@ static inline void smp_send_reschedule(i | |
25980 | smp_ops.smp_send_reschedule(cpu); | |
25981 | } | |
25982 | ||
25983 | -static inline int smp_call_function_mask(cpumask_t mask, | |
25984 | - void (*func) (void *info), void *info, | |
25985 | - int wait) | |
25986 | +static inline void arch_send_call_function_single_ipi(int cpu) | |
25987 | { | |
25988 | - return smp_ops.smp_call_function_mask(mask, func, info, wait); | |
25989 | + smp_ops.send_call_func_single_ipi(cpu); | |
25990 | +} | |
25991 | + | |
25992 | +static inline void arch_send_call_function_ipi(cpumask_t mask) | |
25993 | +{ | |
25994 | + smp_ops.send_call_func_ipi(mask); | |
25995 | } | |
25996 | ||
25997 | void native_smp_prepare_boot_cpu(void); | |
25998 | @@ -122,23 +118,19 @@ int native_cpu_up(unsigned int cpunum); | |
25999 | ||
26000 | void xen_smp_send_stop(void); | |
26001 | void xen_smp_send_reschedule(int cpu); | |
26002 | -int xen_smp_call_function_mask(cpumask_t mask, | |
26003 | - void (*func) (void *info), void *info, | |
26004 | - int wait); | |
26005 | +void xen_send_call_func_ipi(cpumask_t mask); | |
26006 | +void xen_send_call_func_single_ipi(int cpu); | |
26007 | ||
26008 | #define smp_send_stop xen_smp_send_stop | |
26009 | #define smp_send_reschedule xen_smp_send_reschedule | |
26010 | -#define smp_call_function_mask xen_smp_call_function_mask | |
26011 | - | |
26012 | -extern void prefill_possible_map(void); | |
26013 | +#define arch_send_call_function_single_ipi xen_send_call_func_single_ipi | |
26014 | +#define arch_send_call_function_ipi xen_send_call_func_ipi | |
26015 | ||
26016 | #endif /* CONFIG_XEN */ | |
26017 | ||
26018 | extern int __cpu_disable(void); | |
26019 | extern void __cpu_die(unsigned int cpu); | |
26020 | ||
26021 | -extern void prefill_possible_map(void); | |
26022 | - | |
26023 | void smp_store_cpu_info(int id); | |
26024 | #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) | |
26025 | ||
26026 | @@ -149,6 +141,14 @@ static inline int num_booting_cpus(void) | |
26027 | } | |
26028 | #endif /* CONFIG_SMP */ | |
26029 | ||
26030 | +#if defined(CONFIG_SMP) && (defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_XEN)) | |
26031 | +extern void prefill_possible_map(void); | |
26032 | +#else | |
26033 | +static inline void prefill_possible_map(void) | |
26034 | +{ | |
26035 | +} | |
26036 | +#endif | |
26037 | + | |
26038 | extern unsigned disabled_cpus __cpuinitdata; | |
26039 | ||
26040 | #ifdef CONFIG_X86_32_SMP | |
26041 | @@ -216,12 +216,8 @@ static inline int hard_smp_processor_id( | |
26042 | #endif /* CONFIG_X86_LOCAL_APIC */ | |
26043 | ||
26044 | #ifdef CONFIG_HOTPLUG_CPU | |
26045 | -extern void cpu_exit_clear(void); | |
26046 | extern void cpu_uninit(void); | |
26047 | #endif | |
26048 | ||
26049 | -extern void smp_alloc_memory(void); | |
26050 | -extern void lock_ipi_call_lock(void); | |
26051 | -extern void unlock_ipi_call_lock(void); | |
26052 | #endif /* __ASSEMBLY__ */ | |
26053 | #endif | |
82094b55 AF |
26054 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/spinlock.h 2009-03-16 16:38:05.000000000 +0100 |
26055 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/spinlock.h 2009-06-04 11:09:05.000000000 +0200 | |
2cb7cef9 BS |
26056 | @@ -38,6 +38,11 @@ |
26057 | # define UNLOCK_LOCK_PREFIX | |
26058 | #endif | |
26059 | ||
26060 | +/* | |
26061 | + * Xen versions prior to 3.2.x have a race condition with HYPERVISOR_poll(). | |
26062 | + */ | |
26063 | +#if CONFIG_XEN_COMPAT >= 0x030200 | |
26064 | + | |
26065 | int xen_spinlock_init(unsigned int cpu); | |
26066 | void xen_spinlock_cleanup(unsigned int cpu); | |
26067 | extern int xen_spin_wait(raw_spinlock_t *, unsigned int token); | |
26068 | @@ -65,14 +70,14 @@ extern void xen_spin_kick(raw_spinlock_t | |
26069 | */ | |
26070 | #if (NR_CPUS < 256) | |
26071 | #define TICKET_SHIFT 8 | |
26072 | -#define __raw_spin_lock_preamble \ | |
26073 | +#define __ticket_spin_lock_preamble \ | |
26074 | asm(LOCK_PREFIX "xaddw %w0, %2\n\t" \ | |
26075 | "cmpb %h0, %b0\n\t" \ | |
26076 | "sete %1" \ | |
26077 | : "=&Q" (token), "=qm" (free), "+m" (lock->slock) \ | |
26078 | : "0" (0x0100) \ | |
26079 | : "memory", "cc") | |
26080 | -#define __raw_spin_lock_body \ | |
26081 | +#define __ticket_spin_lock_body \ | |
26082 | asm("1:\t" \ | |
26083 | "cmpb %h0, %b0\n\t" \ | |
26084 | "je 2f\n\t" \ | |
26085 | @@ -88,7 +93,7 @@ extern void xen_spin_kick(raw_spinlock_t | |
26086 | : "memory", "cc") | |
26087 | ||
26088 | ||
26089 | -static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock) | |
26090 | +static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) | |
26091 | { | |
26092 | int tmp, new; | |
26093 | ||
26094 | @@ -107,7 +112,7 @@ static __always_inline int __raw_spin_tr | |
26095 | return tmp; | |
26096 | } | |
26097 | ||
26098 | -static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) | |
26099 | +static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) | |
26100 | { | |
26101 | unsigned int token; | |
26102 | unsigned char kick; | |
26103 | @@ -124,7 +129,7 @@ static __always_inline void __raw_spin_u | |
26104 | } | |
26105 | #else | |
26106 | #define TICKET_SHIFT 16 | |
26107 | -#define __raw_spin_lock_preamble \ | |
26108 | +#define __ticket_spin_lock_preamble \ | |
26109 | do { \ | |
26110 | unsigned int tmp; \ | |
26111 | asm(LOCK_PREFIX "xaddl %0, %2\n\t" \ | |
26112 | @@ -136,7 +141,7 @@ static __always_inline void __raw_spin_u | |
26113 | : "0" (0x00010000) \ | |
26114 | : "memory", "cc"); \ | |
26115 | } while (0) | |
26116 | -#define __raw_spin_lock_body \ | |
26117 | +#define __ticket_spin_lock_body \ | |
26118 | do { \ | |
26119 | unsigned int tmp; \ | |
26120 | asm("shldl $16, %0, %2\n" \ | |
26121 | @@ -155,7 +160,7 @@ static __always_inline void __raw_spin_u | |
26122 | : "memory", "cc"); \ | |
26123 | } while (0) | |
26124 | ||
26125 | -static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock) | |
26126 | +static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) | |
26127 | { | |
26128 | int tmp; | |
26129 | int new; | |
26130 | @@ -177,7 +182,7 @@ static __always_inline int __raw_spin_tr | |
26131 | return tmp; | |
26132 | } | |
26133 | ||
26134 | -static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) | |
26135 | +static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) | |
26136 | { | |
26137 | unsigned int token, tmp; | |
26138 | bool kick; | |
26139 | @@ -195,49 +200,161 @@ static __always_inline void __raw_spin_u | |
26140 | } | |
26141 | #endif | |
26142 | ||
26143 | -static inline int __raw_spin_is_locked(raw_spinlock_t *lock) | |
26144 | +static inline int __ticket_spin_is_locked(raw_spinlock_t *lock) | |
26145 | { | |
26146 | int tmp = ACCESS_ONCE(lock->slock); | |
26147 | ||
26148 | return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1)); | |
26149 | } | |
26150 | ||
26151 | -static inline int __raw_spin_is_contended(raw_spinlock_t *lock) | |
26152 | +static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) | |
26153 | { | |
26154 | int tmp = ACCESS_ONCE(lock->slock); | |
26155 | ||
26156 | return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1; | |
26157 | } | |
26158 | ||
26159 | -static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) | |
26160 | +static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) | |
26161 | { | |
26162 | unsigned int token, count; | |
26163 | bool free; | |
26164 | ||
26165 | - __raw_spin_lock_preamble; | |
26166 | + __ticket_spin_lock_preamble; | |
26167 | if (unlikely(!free)) | |
26168 | token = xen_spin_adjust(lock, token); | |
26169 | do { | |
26170 | count = 1 << 10; | |
26171 | - __raw_spin_lock_body; | |
26172 | + __ticket_spin_lock_body; | |
26173 | } while (unlikely(!count) && !xen_spin_wait(lock, token)); | |
26174 | } | |
26175 | ||
26176 | -static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock, | |
26177 | - unsigned long flags) | |
26178 | +static __always_inline void __ticket_spin_lock_flags(raw_spinlock_t *lock, | |
26179 | + unsigned long flags) | |
26180 | { | |
26181 | unsigned int token, count; | |
26182 | bool free; | |
26183 | ||
26184 | - __raw_spin_lock_preamble; | |
26185 | + __ticket_spin_lock_preamble; | |
26186 | if (unlikely(!free)) | |
26187 | token = xen_spin_adjust(lock, token); | |
26188 | do { | |
26189 | count = 1 << 10; | |
26190 | - __raw_spin_lock_body; | |
26191 | + __ticket_spin_lock_body; | |
26192 | } while (unlikely(!count) && !xen_spin_wait_flags(lock, &token, flags)); | |
26193 | } | |
26194 | ||
26195 | +#define __raw_spin(n) __ticket_spin_##n | |
26196 | + | |
26197 | +#else /* CONFIG_XEN_COMPAT < 0x030200 */ | |
26198 | +/* | |
26199 | + * Define virtualization-friendly old-style lock byte lock, for use in | |
26200 | + * pv_lock_ops if desired. | |
26201 | + * | |
26202 | + * This differs from the pre-2.6.24 spinlock by always using xchgb | |
26203 | + * rather than decb to take the lock; this allows it to use a | |
26204 | + * zero-initialized lock structure. It also maintains a 1-byte | |
26205 | + * contention counter, so that we can implement | |
26206 | + * __byte_spin_is_contended. | |
26207 | + */ | |
26208 | +struct __byte_spinlock { | |
26209 | + u8 lock; | |
26210 | +#if NR_CPUS < 256 | |
26211 | + u8 spinners; | |
26212 | +#else | |
26213 | +#error NR_CPUS >= 256 support not implemented | |
26214 | +#endif | |
26215 | +}; | |
26216 | + | |
26217 | +static inline int xen_spinlock_init(unsigned int cpu) { return 0; } | |
26218 | +static inline void xen_spinlock_cleanup(unsigned int cpu) {} | |
26219 | + | |
26220 | +static inline int __byte_spin_is_locked(raw_spinlock_t *lock) | |
26221 | +{ | |
26222 | + struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; | |
26223 | + return bl->lock != 0; | |
26224 | +} | |
26225 | + | |
26226 | +static inline int __byte_spin_is_contended(raw_spinlock_t *lock) | |
26227 | +{ | |
26228 | + struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; | |
26229 | + return bl->spinners != 0; | |
26230 | +} | |
26231 | + | |
26232 | +static inline void __byte_spin_lock(raw_spinlock_t *lock) | |
26233 | +{ | |
26234 | + struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; | |
26235 | + s8 val = 1; | |
26236 | + | |
26237 | + asm("1: xchgb %1, %0\n" | |
26238 | + " test %1,%1\n" | |
26239 | + " jz 3f\n" | |
26240 | + " " LOCK_PREFIX "incb %2\n" | |
26241 | + "2: rep;nop\n" | |
26242 | + " cmpb $1, %0\n" | |
26243 | + " je 2b\n" | |
26244 | + " " LOCK_PREFIX "decb %2\n" | |
26245 | + " jmp 1b\n" | |
26246 | + "3:" | |
26247 | + : "+m" (bl->lock), "+q" (val), "+m" (bl->spinners): : "memory"); | |
26248 | +} | |
26249 | + | |
26250 | +#define __byte_spin_lock_flags(lock, flags) __byte_spin_lock(lock) | |
26251 | + | |
26252 | +static inline int __byte_spin_trylock(raw_spinlock_t *lock) | |
26253 | +{ | |
26254 | + struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; | |
26255 | + u8 old = 1; | |
26256 | + | |
26257 | + asm("xchgb %1,%0" | |
26258 | + : "+m" (bl->lock), "+q" (old) : : "memory"); | |
26259 | + | |
26260 | + return old == 0; | |
26261 | +} | |
26262 | + | |
26263 | +static inline void __byte_spin_unlock(raw_spinlock_t *lock) | |
26264 | +{ | |
26265 | + struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; | |
26266 | + smp_wmb(); | |
26267 | + bl->lock = 0; | |
26268 | +} | |
26269 | + | |
26270 | +#define __raw_spin(n) __byte_spin_##n | |
26271 | + | |
26272 | +#endif /* CONFIG_XEN_COMPAT */ | |
26273 | + | |
26274 | +static inline int __raw_spin_is_locked(raw_spinlock_t *lock) | |
26275 | +{ | |
26276 | + return __raw_spin(is_locked)(lock); | |
26277 | +} | |
26278 | + | |
26279 | +static inline int __raw_spin_is_contended(raw_spinlock_t *lock) | |
26280 | +{ | |
26281 | + return __raw_spin(is_contended)(lock); | |
26282 | +} | |
26283 | + | |
26284 | +static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) | |
26285 | +{ | |
26286 | + __raw_spin(lock)(lock); | |
26287 | +} | |
26288 | + | |
26289 | +static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock, | |
26290 | + unsigned long flags) | |
26291 | +{ | |
26292 | + __raw_spin(lock_flags)(lock, flags); | |
26293 | +} | |
26294 | + | |
26295 | +static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock) | |
26296 | +{ | |
26297 | + return __raw_spin(trylock)(lock); | |
26298 | +} | |
26299 | + | |
26300 | +static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) | |
26301 | +{ | |
26302 | + __raw_spin(unlock)(lock); | |
26303 | +} | |
26304 | + | |
26305 | +#undef __raw_spin | |
26306 | + | |
26307 | static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) | |
26308 | { | |
26309 | while (__raw_spin_is_locked(lock)) | |
82094b55 AF |
26310 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/system.h 2009-03-16 16:38:05.000000000 +0100 |
26311 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/system.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
26312 | @@ -137,7 +137,7 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" |
26313 | #define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base)) | |
26314 | #define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1)) | |
26315 | ||
26316 | -extern void load_gs_index(unsigned); | |
26317 | +extern void xen_load_gs_index(unsigned); | |
26318 | ||
26319 | /* | |
26320 | * Load a segment. Fall back on loading the zero | |
26321 | @@ -154,14 +154,14 @@ extern void load_gs_index(unsigned); | |
26322 | "jmp 2b\n" \ | |
26323 | ".previous\n" \ | |
26324 | _ASM_EXTABLE(1b,3b) \ | |
26325 | - : :"r" (value), "r" (0)) | |
26326 | + : :"r" (value), "r" (0) : "memory") | |
26327 | ||
26328 | ||
26329 | /* | |
26330 | * Save a segment register away | |
26331 | */ | |
26332 | #define savesegment(seg, value) \ | |
26333 | - asm volatile("mov %%" #seg ",%0":"=rm" (value)) | |
26334 | + asm("mov %%" #seg ",%0":"=r" (value) : : "memory") | |
26335 | ||
26336 | static inline unsigned long get_limit(unsigned long segment) | |
26337 | { | |
26338 | @@ -269,6 +269,7 @@ static inline void xen_wbinvd(void) | |
26339 | #ifdef CONFIG_X86_64 | |
26340 | #define read_cr8() (xen_read_cr8()) | |
26341 | #define write_cr8(x) (xen_write_cr8(x)) | |
26342 | +#define load_gs_index xen_load_gs_index | |
26343 | #endif | |
26344 | ||
26345 | /* Clear the 'TS' bit */ | |
26346 | @@ -287,13 +288,12 @@ static inline void clflush(volatile void | |
26347 | void disable_hlt(void); | |
26348 | void enable_hlt(void); | |
26349 | ||
26350 | -extern int es7000_plat; | |
26351 | void cpu_idle_wait(void); | |
26352 | ||
26353 | extern unsigned long arch_align_stack(unsigned long sp); | |
26354 | extern void free_init_pages(char *what, unsigned long begin, unsigned long end); | |
26355 | ||
26356 | -void default_idle(void); | |
26357 | +void xen_idle(void); | |
26358 | ||
26359 | /* | |
26360 | * Force strict CPU ordering. | |
82094b55 AF |
26361 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/xor_64.h 2009-03-16 16:38:05.000000000 +0100 |
26362 | +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/xor_64.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
26363 | @@ -1,3 +1,6 @@ |
26364 | +#ifndef ASM_X86__XOR_64_H | |
26365 | +#define ASM_X86__XOR_64_H | |
26366 | + | |
26367 | /* | |
26368 | * x86-64 changes / gcc fixes from Andi Kleen. | |
26369 | * Copyright 2002 Andi Kleen, SuSE Labs. | |
26370 | @@ -330,3 +333,5 @@ do { \ | |
26371 | We may also be able to load into the L1 only depending on how the cpu | |
26372 | deals with a load to a line that is being prefetched. */ | |
26373 | #define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_sse) | |
26374 | + | |
26375 | +#endif /* ASM_X86__XOR_64_H */ | |
82094b55 | 26376 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/irq_vectors.h 2009-03-16 16:33:40.000000000 +0100 |
2cb7cef9 BS |
26377 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 |
26378 | @@ -1,126 +0,0 @@ | |
26379 | -/* | |
26380 | - * This file should contain #defines for all of the interrupt vector | |
26381 | - * numbers used by this architecture. | |
26382 | - * | |
26383 | - * In addition, there are some standard defines: | |
26384 | - * | |
26385 | - * FIRST_EXTERNAL_VECTOR: | |
26386 | - * The first free place for external interrupts | |
26387 | - * | |
26388 | - * SYSCALL_VECTOR: | |
26389 | - * The IRQ vector a syscall makes the user to kernel transition | |
26390 | - * under. | |
26391 | - * | |
26392 | - * TIMER_IRQ: | |
26393 | - * The IRQ number the timer interrupt comes in at. | |
26394 | - * | |
26395 | - * NR_IRQS: | |
26396 | - * The total number of interrupt vectors (including all the | |
26397 | - * architecture specific interrupts) needed. | |
26398 | - * | |
26399 | - */ | |
26400 | -#ifndef _ASM_IRQ_VECTORS_H | |
26401 | -#define _ASM_IRQ_VECTORS_H | |
26402 | - | |
26403 | -/* | |
26404 | - * IDT vectors usable for external interrupt sources start | |
26405 | - * at 0x20: | |
26406 | - */ | |
26407 | -#define FIRST_EXTERNAL_VECTOR 0x20 | |
26408 | - | |
26409 | -#define SYSCALL_VECTOR 0x80 | |
26410 | - | |
26411 | -/* | |
26412 | - * Vectors 0x20-0x2f are used for ISA interrupts. | |
26413 | - */ | |
26414 | - | |
26415 | -#if 0 | |
26416 | -/* | |
26417 | - * Special IRQ vectors used by the SMP architecture, 0xf0-0xff | |
26418 | - * | |
26419 | - * some of the following vectors are 'rare', they are merged | |
26420 | - * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. | |
26421 | - * TLB, reschedule and local APIC vectors are performance-critical. | |
26422 | - * | |
26423 | - * Vectors 0xf0-0xfa are free (reserved for future Linux use). | |
26424 | - */ | |
26425 | -#define SPURIOUS_APIC_VECTOR 0xff | |
26426 | -#define ERROR_APIC_VECTOR 0xfe | |
26427 | -#define INVALIDATE_TLB_VECTOR 0xfd | |
26428 | -#define RESCHEDULE_VECTOR 0xfc | |
26429 | -#define CALL_FUNCTION_VECTOR 0xfb | |
26430 | - | |
26431 | -#define THERMAL_APIC_VECTOR 0xf0 | |
26432 | -/* | |
26433 | - * Local APIC timer IRQ vector is on a different priority level, | |
26434 | - * to work around the 'lost local interrupt if more than 2 IRQ | |
26435 | - * sources per level' errata. | |
26436 | - */ | |
26437 | -#define LOCAL_TIMER_VECTOR 0xef | |
26438 | -#endif | |
26439 | - | |
26440 | -#define SPURIOUS_APIC_VECTOR 0xff | |
26441 | -#define ERROR_APIC_VECTOR 0xfe | |
26442 | - | |
26443 | -/* | |
26444 | - * First APIC vector available to drivers: (vectors 0x30-0xee) | |
26445 | - * we start at 0x31 to spread out vectors evenly between priority | |
26446 | - * levels. (0x80 is the syscall vector) | |
26447 | - */ | |
26448 | -#define FIRST_DEVICE_VECTOR 0x31 | |
26449 | -#define FIRST_SYSTEM_VECTOR 0xef | |
26450 | - | |
26451 | -/* | |
26452 | - * 16 8259A IRQ's, 208 potential APIC interrupt sources. | |
26453 | - * Right now the APIC is mostly only used for SMP. | |
26454 | - * 256 vectors is an architectural limit. (we can have | |
26455 | - * more than 256 devices theoretically, but they will | |
26456 | - * have to use shared interrupts) | |
26457 | - * Since vectors 0x00-0x1f are used/reserved for the CPU, | |
26458 | - * the usable vector space is 0x20-0xff (224 vectors) | |
26459 | - */ | |
26460 | - | |
26461 | -#define RESCHEDULE_VECTOR 0 | |
26462 | -#define CALL_FUNCTION_VECTOR 1 | |
26463 | -#define SPIN_UNLOCK_VECTOR 2 | |
26464 | -#define NR_IPIS 3 | |
26465 | - | |
26466 | -/* | |
26467 | - * The maximum number of vectors supported by i386 processors | |
26468 | - * is limited to 256. For processors other than i386, NR_VECTORS | |
26469 | - * should be changed accordingly. | |
26470 | - */ | |
26471 | -#define NR_VECTORS 256 | |
26472 | - | |
26473 | -#define FPU_IRQ 13 | |
26474 | - | |
26475 | -#define FIRST_VM86_IRQ 3 | |
26476 | -#define LAST_VM86_IRQ 15 | |
26477 | -#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) | |
26478 | - | |
26479 | -/* | |
26480 | - * The flat IRQ space is divided into two regions: | |
26481 | - * 1. A one-to-one mapping of real physical IRQs. This space is only used | |
26482 | - * if we have physical device-access privilege. This region is at the | |
26483 | - * start of the IRQ space so that existing device drivers do not need | |
26484 | - * to be modified to translate physical IRQ numbers into our IRQ space. | |
26485 | - * 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These | |
26486 | - * are bound using the provided bind/unbind functions. | |
26487 | - */ | |
26488 | - | |
26489 | -#define PIRQ_BASE 0 | |
26490 | -#if !defined(MAX_IO_APICS) | |
26491 | -# define NR_PIRQS (NR_VECTORS + 32 * NR_CPUS) | |
26492 | -#elif NR_CPUS < MAX_IO_APICS | |
26493 | -# define NR_PIRQS (NR_VECTORS + 32 * NR_CPUS) | |
26494 | -#else | |
26495 | -# define NR_PIRQS (NR_VECTORS + 32 * MAX_IO_APICS) | |
26496 | -#endif | |
26497 | - | |
26498 | -#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS) | |
26499 | -#define NR_DYNIRQS 256 | |
26500 | - | |
26501 | -#define NR_IRQS (NR_PIRQS + NR_DYNIRQS) | |
26502 | -#define NR_IRQ_VECTORS NR_IRQS | |
26503 | - | |
26504 | -#endif /* _ASM_IRQ_VECTORS_H */ | |
82094b55 | 26505 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/setup_arch_post.h 2009-10-28 14:55:02.000000000 +0100 |
2cb7cef9 BS |
26506 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 |
26507 | @@ -1,63 +0,0 @@ | |
26508 | -/** | |
26509 | - * machine_specific_* - Hooks for machine specific setup. | |
26510 | - * | |
26511 | - * Description: | |
26512 | - * This is included late in kernel/setup.c so that it can make | |
26513 | - * use of all of the static functions. | |
26514 | - **/ | |
26515 | - | |
26516 | -#include <xen/interface/callback.h> | |
26517 | - | |
26518 | -extern void hypervisor_callback(void); | |
26519 | -extern void failsafe_callback(void); | |
26520 | -extern void nmi(void); | |
26521 | - | |
26522 | -static void __init machine_specific_arch_setup(void) | |
26523 | -{ | |
26524 | - int ret; | |
26525 | - static struct callback_register __initdata event = { | |
26526 | - .type = CALLBACKTYPE_event, | |
26527 | - .address = (unsigned long) hypervisor_callback, | |
26528 | - }; | |
26529 | - static struct callback_register __initdata failsafe = { | |
26530 | - .type = CALLBACKTYPE_failsafe, | |
26531 | - .address = (unsigned long)failsafe_callback, | |
26532 | - }; | |
26533 | - static struct callback_register __initdata syscall = { | |
26534 | - .type = CALLBACKTYPE_syscall, | |
26535 | - .address = (unsigned long)system_call, | |
26536 | - }; | |
26537 | -#ifdef CONFIG_X86_LOCAL_APIC | |
26538 | - static struct callback_register __initdata nmi_cb = { | |
26539 | - .type = CALLBACKTYPE_nmi, | |
26540 | - .address = (unsigned long)nmi, | |
26541 | - }; | |
26542 | -#endif | |
26543 | - | |
26544 | - ret = HYPERVISOR_callback_op(CALLBACKOP_register, &event); | |
26545 | - if (ret == 0) | |
26546 | - ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe); | |
26547 | - if (ret == 0) | |
26548 | - ret = HYPERVISOR_callback_op(CALLBACKOP_register, &syscall); | |
26549 | -#if CONFIG_XEN_COMPAT <= 0x030002 | |
26550 | - if (ret == -ENOSYS) | |
26551 | - ret = HYPERVISOR_set_callbacks( | |
26552 | - event.address, | |
26553 | - failsafe.address, | |
26554 | - syscall.address); | |
26555 | -#endif | |
26556 | - BUG_ON(ret); | |
26557 | - | |
26558 | -#ifdef CONFIG_X86_LOCAL_APIC | |
26559 | - ret = HYPERVISOR_callback_op(CALLBACKOP_register, &nmi_cb); | |
26560 | -#if CONFIG_XEN_COMPAT <= 0x030002 | |
26561 | - if (ret == -ENOSYS) { | |
26562 | - static struct xennmi_callback __initdata cb = { | |
26563 | - .handler_address = (unsigned long)nmi | |
26564 | - }; | |
26565 | - | |
26566 | - HYPERVISOR_nmi_op(XENNMI_register_callback, &cb); | |
26567 | - } | |
26568 | -#endif | |
26569 | -#endif | |
26570 | -} | |
82094b55 | 26571 | --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/setup_arch_pre.h 2009-10-28 14:55:02.000000000 +0100 |
2cb7cef9 BS |
26572 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 |
26573 | @@ -1,5 +0,0 @@ | |
26574 | -/* Hook to call BIOS initialisation function */ | |
26575 | - | |
26576 | -#define ARCH_SETUP machine_specific_arch_setup(); | |
26577 | - | |
26578 | -static void __init machine_specific_arch_setup(void); | |
82094b55 AF |
26579 | --- sle11-2009-10-16.orig/include/asm-x86/traps.h 2009-10-28 14:55:02.000000000 +0100 |
26580 | +++ sle11-2009-10-16/include/asm-x86/traps.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
26581 | @@ -23,6 +23,9 @@ asmlinkage void spurious_interrupt_bug(v |
26582 | #ifdef CONFIG_X86_MCE | |
26583 | asmlinkage void machine_check(void); | |
26584 | #endif /* CONFIG_X86_MCE */ | |
26585 | +#ifdef CONFIG_X86_XEN | |
26586 | +asmlinkage void fixup_4gb_segment(void); | |
26587 | +#endif | |
26588 | ||
26589 | void do_divide_error(struct pt_regs *, long); | |
26590 | void do_overflow(struct pt_regs *, long); | |
26591 | @@ -48,6 +51,9 @@ void math_error(void __user *); | |
26592 | void do_coprocessor_error(struct pt_regs *, long); | |
26593 | void do_simd_coprocessor_error(struct pt_regs *, long); | |
26594 | void do_spurious_interrupt_bug(struct pt_regs *, long); | |
26595 | +#ifdef CONFIG_XEN | |
26596 | +void do_fixup_4gb_segment(struct pt_regs *, long); | |
26597 | +#endif | |
26598 | unsigned long patch_espfix_desc(unsigned long, unsigned long); | |
26599 | asmlinkage void math_emulate(long); | |
26600 | ||
82094b55 AF |
26601 | --- sle11-2009-10-16.orig/include/asm-x86/xen/interface_64.h 2009-10-28 14:55:02.000000000 +0100 |
26602 | +++ sle11-2009-10-16/include/asm-x86/xen/interface_64.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
26603 | @@ -136,7 +136,7 @@ struct cpu_user_regs { |
26604 | uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ | |
26605 | uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ | |
26606 | }; | |
26607 | -DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs); | |
26608 | +DEFINE_XEN_GUEST_HANDLE_STRUCT(cpu_user_regs); | |
26609 | ||
26610 | #undef __DECL_REG | |
26611 | ||
82094b55 AF |
26612 | --- sle11-2009-10-16.orig/include/linux/page-flags.h 2009-03-16 16:38:05.000000000 +0100 |
26613 | +++ sle11-2009-10-16/include/linux/page-flags.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
26614 | @@ -110,9 +110,11 @@ enum pageflags { |
26615 | /* Filesystems */ | |
26616 | PG_checked = PG_owner_priv_1, | |
26617 | ||
26618 | +#ifdef CONFIG_PARAVIRT_XEN | |
26619 | /* XEN */ | |
26620 | PG_pinned = PG_owner_priv_1, | |
26621 | PG_savepinned = PG_dirty, | |
26622 | +#endif | |
26623 | ||
26624 | /* SLOB */ | |
26625 | PG_slob_page = PG_active, | |
26626 | @@ -187,8 +189,12 @@ PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, | |
26627 | PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active) | |
26628 | __PAGEFLAG(Slab, slab) | |
26629 | PAGEFLAG(Checked, checked) /* Used by some filesystems */ | |
26630 | +#if defined(CONFIG_XEN) || defined(CONFIG_PARAVIRT_XEN) | |
26631 | PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */ | |
26632 | +#endif | |
26633 | +#ifdef CONFIG_PARAVIRT_XEN | |
26634 | PAGEFLAG(SavePinned, savepinned); /* Xen */ | |
26635 | +#endif | |
26636 | PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) | |
26637 | PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private) | |
26638 | __SETPAGEFLAG(Private, private) | |
82094b55 AF |
26639 | --- sle11-2009-10-16.orig/include/xen/interface/memory.h 2009-03-16 16:38:05.000000000 +0100 |
26640 | +++ sle11-2009-10-16/include/xen/interface/memory.h 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
26641 | @@ -82,6 +82,7 @@ struct xen_memory_reservation { |
26642 | domid_t domid; | |
26643 | ||
26644 | }; | |
26645 | +DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_memory_reservation); | |
26646 | typedef struct xen_memory_reservation xen_memory_reservation_t; | |
26647 | DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t); | |
26648 | ||
26649 | @@ -167,11 +168,7 @@ struct xen_machphys_mfn_list { | |
26650 | * any large discontiguities in the machine address space, 2MB gaps in | |
26651 | * the machphys table will be represented by an MFN base of zero. | |
26652 | */ | |
26653 | -#ifndef CONFIG_PARAVIRT_XEN | |
26654 | XEN_GUEST_HANDLE(xen_pfn_t) extent_start; | |
26655 | -#else | |
26656 | - ulong extent_start; | |
26657 | -#endif | |
26658 | ||
26659 | /* | |
26660 | * Number of extents written to the above array. This will be smaller | |
26661 | @@ -179,6 +176,7 @@ struct xen_machphys_mfn_list { | |
26662 | */ | |
26663 | unsigned int nr_extents; | |
26664 | }; | |
26665 | +DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); | |
26666 | typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t; | |
26667 | DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t); | |
26668 | ||
26669 | @@ -218,6 +216,7 @@ struct xen_add_to_physmap { | |
26670 | /* GPFN where the source mapping page should appear. */ | |
26671 | xen_pfn_t gpfn; | |
26672 | }; | |
26673 | +DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_add_to_physmap); | |
26674 | typedef struct xen_add_to_physmap xen_add_to_physmap_t; | |
26675 | DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t); | |
26676 | ||
26677 | @@ -250,21 +249,13 @@ struct xen_translate_gpfn_list { | |
26678 | xen_ulong_t nr_gpfns; | |
26679 | ||
26680 | /* List of GPFNs to translate. */ | |
26681 | -#ifndef CONFIG_PARAVIRT_XEN | |
26682 | XEN_GUEST_HANDLE(xen_pfn_t) gpfn_list; | |
26683 | -#else | |
26684 | - ulong gpfn_list; | |
26685 | -#endif | |
26686 | ||
26687 | /* | |
26688 | * Output list to contain MFN translations. May be the same as the input | |
26689 | * list (in which case each input GPFN is overwritten with the output MFN). | |
26690 | */ | |
26691 | -#ifndef CONFIG_PARAVIRT_XEN | |
26692 | XEN_GUEST_HANDLE(xen_pfn_t) mfn_list; | |
26693 | -#else | |
26694 | - ulong mfn_list; | |
26695 | -#endif | |
26696 | }; | |
26697 | DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list); | |
26698 | typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t; | |
82094b55 AF |
26699 | --- sle11-2009-10-16.orig/kernel/hrtimer.c 2009-10-28 14:55:02.000000000 +0100 |
26700 | +++ sle11-2009-10-16/kernel/hrtimer.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
26701 | @@ -1084,7 +1084,7 @@ ktime_t hrtimer_get_remaining(const stru |
26702 | } | |
26703 | EXPORT_SYMBOL_GPL(hrtimer_get_remaining); | |
26704 | ||
26705 | -#ifdef CONFIG_NO_HZ | |
26706 | +#if defined(CONFIG_NO_HZ) || defined(CONFIG_NO_IDLE_HZ) | |
26707 | /** | |
26708 | * hrtimer_get_next_event - get the time until next expiry event | |
26709 | * | |
82094b55 AF |
26710 | --- sle11-2009-10-16.orig/kernel/kexec.c 2009-02-17 12:38:20.000000000 +0100 |
26711 | +++ sle11-2009-10-16/kernel/kexec.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
26712 | @@ -54,7 +54,7 @@ int dump_after_notifier; |
26713 | unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; | |
26714 | u32 | |
26715 | #if defined(CONFIG_XEN) && defined(CONFIG_X86) | |
26716 | -__attribute__((__section__(".bss.page_aligned"), __aligned__(PAGE_SIZE))) | |
26717 | +__page_aligned_bss | |
26718 | #endif | |
26719 | vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; | |
26720 | size_t vmcoreinfo_size; | |
82094b55 AF |
26721 | --- sle11-2009-10-16.orig/kernel/timer.c 2009-10-28 14:55:02.000000000 +0100 |
26722 | +++ sle11-2009-10-16/kernel/timer.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
26723 | @@ -884,7 +884,7 @@ static inline void __run_timers(struct t |
26724 | spin_unlock_irq(&base->lock); | |
26725 | } | |
26726 | ||
26727 | -#ifdef CONFIG_NO_HZ | |
26728 | +#if defined(CONFIG_NO_HZ) || defined(CONFIG_NO_IDLE_HZ) | |
26729 | /* | |
26730 | * Find out when the next timer event is due to happen. This | |
26731 | * is used on S/390 to stop all activity when a cpus is idle. | |
82094b55 AF |
26732 | --- sle11-2009-10-16.orig/lib/swiotlb-xen.c 2009-03-16 16:38:05.000000000 +0100 |
26733 | +++ sle11-2009-10-16/lib/swiotlb-xen.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
26734 | @@ -750,7 +750,7 @@ swiotlb_sync_sg_for_device(struct device |
26735 | } | |
26736 | ||
26737 | int | |
26738 | -swiotlb_dma_mapping_error(dma_addr_t dma_addr) | |
26739 | +swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) | |
26740 | { | |
26741 | return (dma_addr == virt_to_bus(io_tlb_overflow_buffer)); | |
26742 | } | |
82094b55 AF |
26743 | --- sle11-2009-10-16.orig/mm/mprotect.c 2009-03-04 11:28:34.000000000 +0100 |
26744 | +++ sle11-2009-10-16/mm/mprotect.c 2009-06-04 10:21:39.000000000 +0200 | |
2cb7cef9 BS |
26745 | @@ -92,8 +92,6 @@ static inline void change_pmd_range(stru |
26746 | next = pmd_addr_end(addr, end); | |
26747 | if (pmd_none_or_clear_bad(pmd)) | |
26748 | continue; | |
26749 | - if (arch_change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable)) | |
26750 | - continue; | |
26751 | change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable); | |
26752 | } while (pmd++, addr = next, addr != end); | |
26753 | } |