]>
Commit | Line | Data |
---|---|---|
cc90b958 BS |
1 | From: kernel.org |
2 | Subject: 2.6.25 | |
3 | Patch-mainline: 2.6.25 | |
4 | ||
5 | Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> | |
6 | ||
7 | Automatically created from "patches.kernel.org/patch-2.6.25" by xen-port-patches.py | |
8 | ||
00e5a55c BS |
9 | --- sle11-2009-05-14.orig/arch/x86/Kconfig 2009-02-16 16:18:36.000000000 +0100 |
10 | +++ sle11-2009-05-14/arch/x86/Kconfig 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
11 | @@ -27,7 +27,7 @@ config X86 |
12 | select HAVE_KRETPROBES | |
13 | select HAVE_DYNAMIC_FTRACE | |
14 | select HAVE_FTRACE | |
15 | - select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) | |
16 | + select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) && !XEN | |
17 | select HAVE_ARCH_KGDB if !X86_VOYAGER | |
18 | select HAVE_ARCH_TRACEHOOK | |
19 | select HAVE_GENERIC_DMA_COHERENT if X86_32 | |
00e5a55c | 20 | @@ -211,14 +211,12 @@ config X86_TRAMPOLINE |
cc90b958 BS |
21 | default y |
22 | ||
23 | config X86_NO_TSS | |
24 | - bool | |
25 | + def_bool y | |
26 | depends on XEN | |
27 | - default y | |
28 | ||
29 | config X86_NO_IDT | |
30 | - bool | |
31 | + def_bool y | |
32 | depends on XEN | |
33 | - default y | |
34 | ||
35 | config KTIME_SCALAR | |
36 | def_bool X86_32 | |
00e5a55c | 37 | @@ -728,9 +726,8 @@ config X86_VISWS_APIC |
cc90b958 BS |
38 | depends on X86_32 && X86_VISWS |
39 | ||
40 | config X86_XEN_GENAPIC | |
41 | - bool | |
42 | + def_bool y | |
43 | depends on X86_64_XEN | |
44 | - default y | |
45 | ||
46 | config X86_MCE | |
47 | bool "Machine Check Exception" | |
00e5a55c | 48 | @@ -1117,7 +1114,7 @@ config ARCH_DISCONTIGMEM_DEFAULT |
cc90b958 BS |
49 | |
50 | config ARCH_SPARSEMEM_DEFAULT | |
51 | def_bool y | |
52 | - depends on X86_64 | |
53 | + depends on X86_64 && !X86_64_XEN | |
54 | ||
55 | config ARCH_SPARSEMEM_ENABLE | |
56 | def_bool y | |
00e5a55c | 57 | @@ -1747,10 +1744,10 @@ config PCI_MMCONFIG |
cc90b958 BS |
58 | depends on X86_64 && PCI && ACPI |
59 | ||
60 | config XEN_PCIDEV_FRONTEND | |
61 | - bool "Xen PCI Frontend" if X86_64 | |
62 | + def_bool y | |
63 | + prompt "Xen PCI Frontend" if X86_64 | |
64 | depends on PCI && XEN && (PCI_GOXEN_FE || PCI_GOANY || X86_64) | |
65 | select HOTPLUG | |
66 | - default y | |
67 | help | |
68 | The PCI device frontend driver allows the kernel to import arbitrary | |
69 | PCI devices from a PCI backend to support PCI driver domains. | |
00e5a55c | 70 | @@ -1758,7 +1755,6 @@ config XEN_PCIDEV_FRONTEND |
cc90b958 BS |
71 | config XEN_PCIDEV_FE_DEBUG |
72 | bool "Xen PCI Frontend Debugging" | |
73 | depends on XEN_PCIDEV_FRONTEND | |
74 | - default n | |
75 | help | |
76 | Enables some debug statements within the PCI Frontend. | |
77 | ||
00e5a55c BS |
78 | --- sle11-2009-05-14.orig/arch/x86/Kconfig.debug 2009-02-02 09:40:56.000000000 +0100 |
79 | +++ sle11-2009-05-14/arch/x86/Kconfig.debug 2009-03-16 16:33:40.000000000 +0100 | |
80 | @@ -279,6 +279,7 @@ config DEBUG_BOOT_PARAMS | |
cc90b958 BS |
81 | bool "Debug boot parameters" |
82 | depends on DEBUG_KERNEL | |
83 | depends on DEBUG_FS | |
84 | + depends on !XEN | |
85 | help | |
86 | This option will cause struct boot_params to be exported via debugfs. | |
87 | ||
00e5a55c BS |
88 | --- sle11-2009-05-14.orig/arch/x86/ia32/ia32entry-xen.S 2009-02-16 16:18:36.000000000 +0100 |
89 | +++ sle11-2009-05-14/arch/x86/ia32/ia32entry-xen.S 2009-03-16 16:33:40.000000000 +0100 | |
90 | @@ -12,7 +12,6 @@ | |
91 | #include <asm/ia32_unistd.h> | |
92 | #include <asm/thread_info.h> | |
93 | #include <asm/segment.h> | |
94 | -#include <asm/vsyscall32.h> | |
95 | #include <asm/irqflags.h> | |
96 | #include <linux/linkage.h> | |
97 | ||
98 | @@ -99,10 +98,11 @@ ENTRY(ia32_sysenter_target) | |
99 | CFI_RESTORE rcx | |
100 | movl %ebp,%ebp /* zero extension */ | |
101 | movl %eax,%eax | |
102 | + movl 48-THREAD_SIZE+threadinfo_sysenter_return(%rsp),%r10d | |
103 | movl $__USER32_DS,40(%rsp) | |
104 | movq %rbp,32(%rsp) | |
105 | movl $__USER32_CS,16(%rsp) | |
106 | - movl $VSYSCALL32_SYSEXIT,8(%rsp) | |
107 | + movq %r10,8(%rsp) | |
108 | movq %rax,(%rsp) | |
109 | cld | |
110 | SAVE_ARGS 0,0,1 | |
111 | @@ -582,8 +582,8 @@ ia32_sys_call_table: | |
112 | .quad compat_sys_futex /* 240 */ | |
113 | .quad compat_sys_sched_setaffinity | |
114 | .quad compat_sys_sched_getaffinity | |
115 | - .quad sys32_set_thread_area | |
116 | - .quad sys32_get_thread_area | |
117 | + .quad sys_set_thread_area | |
118 | + .quad sys_get_thread_area | |
119 | .quad compat_sys_io_setup /* 245 */ | |
120 | .quad sys_io_destroy | |
121 | .quad compat_sys_io_getevents | |
122 | @@ -661,7 +661,9 @@ ia32_sys_call_table: | |
123 | .quad sys_epoll_pwait | |
124 | .quad compat_sys_utimensat /* 320 */ | |
125 | .quad compat_sys_signalfd | |
126 | - .quad compat_sys_timerfd | |
127 | + .quad sys_timerfd_create | |
128 | .quad sys_eventfd | |
129 | .quad sys32_fallocate | |
130 | + .quad compat_sys_timerfd_settime /* 325 */ | |
131 | + .quad compat_sys_timerfd_gettime | |
132 | ia32_syscall_end: | |
133 | --- sle11-2009-05-14.orig/arch/x86/kernel/Makefile 2009-02-16 16:18:36.000000000 +0100 | |
134 | +++ sle11-2009-05-14/arch/x86/kernel/Makefile 2009-03-16 16:33:40.000000000 +0100 | |
135 | @@ -120,11 +120,10 @@ ifeq ($(CONFIG_X86_64),y) | |
136 | ||
137 | obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o | |
138 | ||
139 | + obj-$(CONFIG_XEN) += nmi_64.o | |
140 | time_64-$(CONFIG_XEN) += time_32.o | |
141 | pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o | |
142 | endif | |
143 | ||
144 | disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \ | |
145 | smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o | |
146 | -disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += mpparse_64.o | |
147 | -%/head_64.o %/head_64.s: asflags-$(CONFIG_XEN) := | |
148 | --- sle11-2009-05-14.orig/arch/x86/kernel/acpi/boot.c 2008-12-01 11:11:08.000000000 +0100 | |
149 | +++ sle11-2009-05-14/arch/x86/kernel/acpi/boot.c 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
150 | @@ -133,6 +133,9 @@ char *__init __acpi_map_table(unsigned l |
151 | #ifndef CONFIG_XEN | |
152 | if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT)) | |
153 | return __va(phys); | |
154 | +#else | |
155 | + if (phys + size <= (NR_FIX_ISAMAPS << PAGE_SHIFT)) | |
156 | + return isa_bus_to_virt(phys); | |
157 | #endif | |
158 | ||
159 | offset = phys & (PAGE_SIZE - 1); | |
00e5a55c BS |
160 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 |
161 | +++ sle11-2009-05-14/arch/x86/kernel/acpi/sleep-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
162 | @@ -0,0 +1,95 @@ | |
163 | +/* | |
164 | + * sleep.c - x86-specific ACPI sleep support. | |
165 | + * | |
166 | + * Copyright (C) 2001-2003 Patrick Mochel | |
167 | + * Copyright (C) 2001-2003 Pavel Machek <pavel@suse.cz> | |
168 | + */ | |
169 | + | |
170 | +#include <linux/acpi.h> | |
171 | +#include <linux/bootmem.h> | |
172 | +#include <linux/dmi.h> | |
173 | +#include <linux/cpumask.h> | |
174 | + | |
175 | +#include <asm/smp.h> | |
176 | + | |
177 | +#ifndef CONFIG_ACPI_PV_SLEEP | |
178 | +/* address in low memory of the wakeup routine. */ | |
179 | +unsigned long acpi_wakeup_address = 0; | |
180 | +unsigned long acpi_realmode_flags; | |
181 | +extern char wakeup_start, wakeup_end; | |
182 | + | |
183 | +extern unsigned long acpi_copy_wakeup_routine(unsigned long); | |
184 | +#endif | |
185 | + | |
186 | +/** | |
187 | + * acpi_save_state_mem - save kernel state | |
188 | + * | |
189 | + * Create an identity mapped page table and copy the wakeup routine to | |
190 | + * low memory. | |
191 | + */ | |
192 | +int acpi_save_state_mem(void) | |
193 | +{ | |
194 | +#ifndef CONFIG_ACPI_PV_SLEEP | |
195 | + if (!acpi_wakeup_address) { | |
196 | + printk(KERN_ERR "Could not allocate memory during boot, S3 disabled\n"); | |
197 | + return -ENOMEM; | |
198 | + } | |
199 | + memcpy((void *)acpi_wakeup_address, &wakeup_start, | |
200 | + &wakeup_end - &wakeup_start); | |
201 | + acpi_copy_wakeup_routine(acpi_wakeup_address); | |
202 | +#endif | |
203 | + | |
204 | + return 0; | |
205 | +} | |
206 | + | |
207 | +/* | |
208 | + * acpi_restore_state - undo effects of acpi_save_state_mem | |
209 | + */ | |
210 | +void acpi_restore_state_mem(void) | |
211 | +{ | |
212 | +} | |
213 | + | |
214 | + | |
215 | +/** | |
216 | + * acpi_reserve_bootmem - do _very_ early ACPI initialisation | |
217 | + * | |
218 | + * We allocate a page from the first 1MB of memory for the wakeup | |
219 | + * routine for when we come back from a sleep state. The | |
220 | + * runtime allocator allows specification of <16MB pages, but not | |
221 | + * <1MB pages. | |
222 | + */ | |
223 | +void __init acpi_reserve_bootmem(void) | |
224 | +{ | |
225 | +#ifndef CONFIG_ACPI_PV_SLEEP | |
226 | + if ((&wakeup_end - &wakeup_start) > PAGE_SIZE*2) { | |
227 | + printk(KERN_ERR | |
228 | + "ACPI: Wakeup code way too big, S3 disabled.\n"); | |
229 | + return; | |
230 | + } | |
231 | + | |
232 | + acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2); | |
233 | + if (!acpi_wakeup_address) | |
234 | + printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); | |
235 | +#endif | |
236 | +} | |
237 | + | |
238 | + | |
239 | +#ifndef CONFIG_ACPI_PV_SLEEP | |
240 | +static int __init acpi_sleep_setup(char *str) | |
241 | +{ | |
242 | + while ((str != NULL) && (*str != '\0')) { | |
243 | + if (strncmp(str, "s3_bios", 7) == 0) | |
244 | + acpi_realmode_flags |= 1; | |
245 | + if (strncmp(str, "s3_mode", 7) == 0) | |
246 | + acpi_realmode_flags |= 2; | |
247 | + if (strncmp(str, "s3_beep", 7) == 0) | |
248 | + acpi_realmode_flags |= 4; | |
249 | + str = strchr(str, ','); | |
250 | + if (str != NULL) | |
251 | + str += strspn(str, ", \t"); | |
252 | + } | |
253 | + return 1; | |
254 | +} | |
255 | + | |
256 | +__setup("acpi_sleep=", acpi_sleep_setup); | |
257 | +#endif /* CONFIG_ACPI_PV_SLEEP */ | |
258 | --- sle11-2009-05-14.orig/arch/x86/kernel/acpi/sleep_32-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
259 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
cc90b958 BS |
260 | @@ -1,117 +0,0 @@ |
261 | -/* | |
262 | - * sleep.c - x86-specific ACPI sleep support. | |
263 | - * | |
264 | - * Copyright (C) 2001-2003 Patrick Mochel | |
265 | - * Copyright (C) 2001-2003 Pavel Machek <pavel@suse.cz> | |
266 | - */ | |
267 | - | |
268 | -#include <linux/acpi.h> | |
269 | -#include <linux/bootmem.h> | |
270 | -#include <linux/dmi.h> | |
271 | -#include <linux/cpumask.h> | |
272 | - | |
273 | -#include <asm/smp.h> | |
274 | - | |
275 | -#ifndef CONFIG_ACPI_PV_SLEEP | |
276 | -/* address in low memory of the wakeup routine. */ | |
277 | -unsigned long acpi_wakeup_address = 0; | |
278 | -unsigned long acpi_realmode_flags; | |
279 | -extern char wakeup_start, wakeup_end; | |
280 | - | |
281 | -extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); | |
282 | -#endif | |
283 | - | |
284 | -/** | |
285 | - * acpi_save_state_mem - save kernel state | |
286 | - * | |
287 | - * Create an identity mapped page table and copy the wakeup routine to | |
288 | - * low memory. | |
289 | - */ | |
290 | -int acpi_save_state_mem(void) | |
291 | -{ | |
292 | -#ifndef CONFIG_ACPI_PV_SLEEP | |
293 | - if (!acpi_wakeup_address) | |
294 | - return 1; | |
295 | - memcpy((void *)acpi_wakeup_address, &wakeup_start, | |
296 | - &wakeup_end - &wakeup_start); | |
297 | - acpi_copy_wakeup_routine(acpi_wakeup_address); | |
298 | -#endif | |
299 | - return 0; | |
300 | -} | |
301 | - | |
302 | -/* | |
303 | - * acpi_restore_state - undo effects of acpi_save_state_mem | |
304 | - */ | |
305 | -void acpi_restore_state_mem(void) | |
306 | -{ | |
307 | -} | |
308 | - | |
309 | -/** | |
310 | - * acpi_reserve_bootmem - do _very_ early ACPI initialisation | |
311 | - * | |
312 | - * We allocate a page from the first 1MB of memory for the wakeup | |
313 | - * routine for when we come back from a sleep state. The | |
314 | - * runtime allocator allows specification of <16MB pages, but not | |
315 | - * <1MB pages. | |
316 | - */ | |
317 | -void __init acpi_reserve_bootmem(void) | |
318 | -{ | |
319 | -#ifndef CONFIG_ACPI_PV_SLEEP | |
320 | - if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) { | |
321 | - printk(KERN_ERR | |
322 | - "ACPI: Wakeup code way too big, S3 disabled.\n"); | |
323 | - return; | |
324 | - } | |
325 | - | |
326 | - acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); | |
327 | - if (!acpi_wakeup_address) | |
328 | - printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); | |
329 | -#endif | |
330 | -} | |
331 | - | |
332 | -#ifndef CONFIG_ACPI_PV_SLEEP | |
333 | -static int __init acpi_sleep_setup(char *str) | |
334 | -{ | |
335 | - while ((str != NULL) && (*str != '\0')) { | |
336 | - if (strncmp(str, "s3_bios", 7) == 0) | |
337 | - acpi_realmode_flags |= 1; | |
338 | - if (strncmp(str, "s3_mode", 7) == 0) | |
339 | - acpi_realmode_flags |= 2; | |
340 | - if (strncmp(str, "s3_beep", 7) == 0) | |
341 | - acpi_realmode_flags |= 4; | |
342 | - str = strchr(str, ','); | |
343 | - if (str != NULL) | |
344 | - str += strspn(str, ", \t"); | |
345 | - } | |
346 | - return 1; | |
347 | -} | |
348 | - | |
349 | -__setup("acpi_sleep=", acpi_sleep_setup); | |
350 | - | |
351 | -/* Ouch, we want to delete this. We already have better version in userspace, in | |
352 | - s2ram from suspend.sf.net project */ | |
353 | -static __init int reset_videomode_after_s3(const struct dmi_system_id *d) | |
354 | -{ | |
355 | - acpi_realmode_flags |= 2; | |
356 | - return 0; | |
357 | -} | |
358 | - | |
359 | -static __initdata struct dmi_system_id acpisleep_dmi_table[] = { | |
360 | - { /* Reset video mode after returning from ACPI S3 sleep */ | |
361 | - .callback = reset_videomode_after_s3, | |
362 | - .ident = "Toshiba Satellite 4030cdt", | |
363 | - .matches = { | |
364 | - DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), | |
365 | - }, | |
366 | - }, | |
367 | - {} | |
368 | -}; | |
369 | - | |
370 | -static int __init acpisleep_dmi_init(void) | |
371 | -{ | |
372 | - dmi_check_system(acpisleep_dmi_table); | |
373 | - return 0; | |
374 | -} | |
375 | - | |
376 | -core_initcall(acpisleep_dmi_init); | |
377 | -#endif /* CONFIG_ACPI_PV_SLEEP */ | |
00e5a55c BS |
378 | --- sle11-2009-05-14.orig/arch/x86/kernel/acpi/sleep_64-xen.c 2009-02-16 16:18:36.000000000 +0100 |
379 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
cc90b958 BS |
380 | @@ -1,125 +0,0 @@ |
381 | -/* | |
382 | - * acpi.c - Architecture-Specific Low-Level ACPI Support | |
383 | - * | |
384 | - * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> | |
385 | - * Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com> | |
386 | - * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org> | |
387 | - * Copyright (C) 2002 Andi Kleen, SuSE Labs (x86-64 port) | |
388 | - * Copyright (C) 2003 Pavel Machek, SuSE Labs | |
389 | - * | |
390 | - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
391 | - * | |
392 | - * This program is free software; you can redistribute it and/or modify | |
393 | - * it under the terms of the GNU General Public License as published by | |
394 | - * the Free Software Foundation; either version 2 of the License, or | |
395 | - * (at your option) any later version. | |
396 | - * | |
397 | - * This program is distributed in the hope that it will be useful, | |
398 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
399 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
400 | - * GNU General Public License for more details. | |
401 | - * | |
402 | - * You should have received a copy of the GNU General Public License | |
403 | - * along with this program; if not, write to the Free Software | |
404 | - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
405 | - * | |
406 | - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
407 | - */ | |
408 | - | |
409 | -#include <linux/kernel.h> | |
410 | -#include <linux/init.h> | |
411 | -#include <linux/types.h> | |
412 | -#include <linux/stddef.h> | |
413 | -#include <linux/slab.h> | |
414 | -#include <linux/pci.h> | |
415 | -#include <linux/bootmem.h> | |
416 | -#include <linux/acpi.h> | |
417 | -#include <linux/cpumask.h> | |
418 | - | |
419 | -#include <asm/mpspec.h> | |
420 | -#include <asm/io.h> | |
421 | -#include <asm/apic.h> | |
422 | -#include <asm/apicdef.h> | |
423 | -#include <asm/page.h> | |
424 | -#include <asm/pgtable.h> | |
425 | -#include <asm/pgalloc.h> | |
426 | -#include <asm/io_apic.h> | |
427 | -#include <asm/proto.h> | |
428 | -#include <asm/tlbflush.h> | |
429 | - | |
430 | -/* -------------------------------------------------------------------------- | |
431 | - Low-Level Sleep Support | |
432 | - -------------------------------------------------------------------------- */ | |
433 | - | |
434 | -#ifndef CONFIG_ACPI_PV_SLEEP | |
435 | -/* address in low memory of the wakeup routine. */ | |
436 | -unsigned long acpi_wakeup_address = 0; | |
437 | -unsigned long acpi_realmode_flags; | |
438 | -extern char wakeup_start, wakeup_end; | |
439 | - | |
440 | -extern unsigned long acpi_copy_wakeup_routine(unsigned long); | |
441 | -#endif | |
442 | - | |
443 | -/** | |
444 | - * acpi_save_state_mem - save kernel state | |
445 | - * | |
446 | - * Create an identity mapped page table and copy the wakeup routine to | |
447 | - * low memory. | |
448 | - */ | |
449 | -int acpi_save_state_mem(void) | |
450 | -{ | |
451 | -#ifndef CONFIG_ACPI_PV_SLEEP | |
452 | - memcpy((void *)acpi_wakeup_address, &wakeup_start, | |
453 | - &wakeup_end - &wakeup_start); | |
454 | - acpi_copy_wakeup_routine(acpi_wakeup_address); | |
455 | -#endif | |
456 | - return 0; | |
457 | -} | |
458 | - | |
459 | -/* | |
460 | - * acpi_restore_state | |
461 | - */ | |
462 | -void acpi_restore_state_mem(void) | |
463 | -{ | |
464 | -} | |
465 | - | |
466 | -/** | |
467 | - * acpi_reserve_bootmem - do _very_ early ACPI initialisation | |
468 | - * | |
469 | - * We allocate a page in low memory for the wakeup | |
470 | - * routine for when we come back from a sleep state. The | |
471 | - * runtime allocator allows specification of <16M pages, but not | |
472 | - * <1M pages. | |
473 | - */ | |
474 | -void __init acpi_reserve_bootmem(void) | |
475 | -{ | |
476 | -#ifndef CONFIG_ACPI_PV_SLEEP | |
477 | - acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2); | |
478 | - if ((&wakeup_end - &wakeup_start) > (PAGE_SIZE*2)) | |
479 | - printk(KERN_CRIT | |
480 | - "ACPI: Wakeup code way too big, will crash on attempt" | |
481 | - " to suspend\n"); | |
482 | -#endif | |
483 | -} | |
484 | - | |
485 | -#ifndef CONFIG_ACPI_PV_SLEEP | |
486 | -static int __init acpi_sleep_setup(char *str) | |
487 | -{ | |
488 | - while ((str != NULL) && (*str != '\0')) { | |
489 | - if (strncmp(str, "s3_bios", 7) == 0) | |
490 | - acpi_realmode_flags |= 1; | |
491 | - if (strncmp(str, "s3_mode", 7) == 0) | |
492 | - acpi_realmode_flags |= 2; | |
493 | - if (strncmp(str, "s3_beep", 7) == 0) | |
494 | - acpi_realmode_flags |= 4; | |
495 | - str = strchr(str, ','); | |
496 | - if (str != NULL) | |
497 | - str += strspn(str, ", \t"); | |
498 | - } | |
499 | - | |
500 | - return 1; | |
501 | -} | |
502 | - | |
503 | -__setup("acpi_sleep=", acpi_sleep_setup); | |
504 | -#endif /* CONFIG_ACPI_PV_SLEEP */ | |
505 | - | |
00e5a55c BS |
506 | --- sle11-2009-05-14.orig/arch/x86/kernel/apic_32-xen.c 2008-12-15 11:27:22.000000000 +0100 |
507 | +++ sle11-2009-05-14/arch/x86/kernel/apic_32-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
508 | @@ -86,7 +86,7 @@ int setup_profiling_timer(unsigned int m | |
509 | * This initializes the IO-APIC and APIC hardware if this is | |
510 | * a UP kernel. | |
511 | */ | |
512 | -int __init APIC_init_uniprocessor (void) | |
513 | +int __init APIC_init_uniprocessor(void) | |
514 | { | |
515 | #ifdef CONFIG_X86_IO_APIC | |
516 | if (smp_found_config) | |
517 | --- sle11-2009-05-14.orig/arch/x86/kernel/apic_64-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
518 | +++ sle11-2009-05-14/arch/x86/kernel/apic_64-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
519 | @@ -34,34 +34,17 @@ | |
520 | #include <asm/hpet.h> | |
521 | #include <asm/idle.h> | |
522 | ||
523 | -int apic_verbosity; | |
524 | +int disable_apic; | |
525 | ||
526 | /* | |
527 | - * 'what should we do if we get a hw irq event on an illegal vector'. | |
528 | - * each architecture has to answer this themselves. | |
529 | + * Debug level, exported for io_apic.c | |
530 | */ | |
531 | -void ack_bad_irq(unsigned int irq) | |
532 | -{ | |
533 | - printk("unexpected IRQ trap at irq %02x\n", irq); | |
534 | - /* | |
535 | - * Currently unexpected vectors happen only on SMP and APIC. | |
536 | - * We _must_ ack these because every local APIC has only N | |
537 | - * irq slots per priority level, and a 'hanging, unacked' IRQ | |
538 | - * holds up an irq slot - in excessive cases (when multiple | |
539 | - * unexpected vectors occur) that might lock up the APIC | |
540 | - * completely. | |
541 | - * But don't ack when the APIC is disabled. -AK | |
542 | - */ | |
543 | - if (!disable_apic) | |
544 | - ack_APIC_irq(); | |
545 | -} | |
546 | - | |
547 | -int setup_profiling_timer(unsigned int multiplier) | |
548 | -{ | |
549 | - return -EINVAL; | |
550 | -} | |
551 | +int apic_verbosity; | |
552 | ||
553 | -void smp_local_timer_interrupt(void) | |
cc90b958 | 554 | +/* |
00e5a55c | 555 | + * The guts of the apic timer interrupt |
cc90b958 BS |
556 | + */ |
557 | +static void local_apic_timer_interrupt(void) | |
558 | { | |
559 | #ifndef CONFIG_XEN | |
560 | int cpu = smp_processor_id(); | |
561 | @@ -121,11 +104,34 @@ void smp_apic_timer_interrupt(struct pt_ | |
562 | */ | |
563 | exit_idle(); | |
564 | irq_enter(); | |
565 | - smp_local_timer_interrupt(); | |
566 | + local_apic_timer_interrupt(); | |
567 | irq_exit(); | |
568 | set_irq_regs(old_regs); | |
569 | } | |
570 | ||
571 | +int setup_profiling_timer(unsigned int multiplier) | |
572 | +{ | |
573 | + return -EINVAL; | |
574 | +} | |
575 | + | |
576 | +/* | |
577 | + * This initializes the IO-APIC and APIC hardware if this is | |
578 | + * a UP kernel. | |
579 | + */ | |
580 | +int __init APIC_init_uniprocessor(void) | |
581 | +{ | |
582 | +#ifdef CONFIG_X86_IO_APIC | |
583 | + if (smp_found_config && !skip_ioapic_setup && nr_ioapics) | |
584 | + setup_IO_APIC(); | |
585 | +#endif | |
586 | + | |
587 | + return 1; | |
588 | +} | |
589 | + | |
590 | +/* | |
591 | + * Local APIC interrupts | |
592 | + */ | |
593 | + | |
594 | /* | |
595 | * This interrupt should _never_ happen with our APIC/SMP architecture | |
596 | */ | |
597 | @@ -150,7 +156,6 @@ asmlinkage void smp_spurious_interrupt(v | |
598 | /* | |
599 | * This interrupt should never happen with our APIC/SMP architecture | |
600 | */ | |
601 | - | |
602 | asmlinkage void smp_error_interrupt(void) | |
603 | { | |
604 | unsigned int v, v1; | |
605 | @@ -178,19 +183,3 @@ asmlinkage void smp_error_interrupt(void | |
606 | smp_processor_id(), v , v1); | |
607 | irq_exit(); | |
608 | } | |
609 | - | |
610 | -int disable_apic; | |
611 | - | |
612 | -/* | |
613 | - * This initializes the IO-APIC and APIC hardware if this is | |
614 | - * a UP kernel. | |
615 | - */ | |
616 | -int __init APIC_init_uniprocessor (void) | |
617 | -{ | |
618 | -#ifdef CONFIG_X86_IO_APIC | |
619 | - if (smp_found_config && !skip_ioapic_setup && nr_ioapics) | |
620 | - setup_IO_APIC(); | |
621 | -#endif | |
622 | - | |
623 | - return 1; | |
624 | -} | |
00e5a55c BS |
625 | --- sle11-2009-05-14.orig/arch/x86/kernel/asm-offsets_32.c 2009-02-16 16:17:21.000000000 +0100 |
626 | +++ sle11-2009-05-14/arch/x86/kernel/asm-offsets_32.c 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
627 | @@ -23,8 +23,10 @@ |
628 | #include <xen/interface/xen.h> | |
629 | #endif | |
630 | ||
631 | +#ifdef CONFIG_LGUEST_GUEST | |
632 | #include <linux/lguest.h> | |
633 | #include "../../../drivers/lguest/lg.h" | |
634 | +#endif | |
635 | ||
636 | /* workaround for a warning with -Wmissing-prototypes */ | |
637 | void foo(void); | |
00e5a55c BS |
638 | --- sle11-2009-05-14.orig/arch/x86/kernel/cpu/common-xen.c 2009-02-16 16:18:36.000000000 +0100 |
639 | +++ sle11-2009-05-14/arch/x86/kernel/cpu/common-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
640 | @@ -27,45 +27,50 @@ |
641 | #include "cpu.h" | |
642 | ||
643 | DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { | |
644 | - [GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 }, | |
645 | - [GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 }, | |
646 | - [GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 }, | |
647 | - [GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 }, | |
648 | + [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, | |
649 | + [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, | |
650 | + [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, | |
651 | + [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, | |
652 | #ifndef CONFIG_XEN | |
653 | /* | |
654 | * Segments used for calling PnP BIOS have byte granularity. | |
655 | * They code segments and data segments have fixed 64k limits, | |
656 | * the transfer segment sizes are set at run time. | |
657 | */ | |
658 | - [GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ | |
659 | - [GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */ | |
660 | - [GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */ | |
661 | - [GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */ | |
662 | - [GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */ | |
663 | + /* 32-bit code */ | |
664 | + [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, | |
665 | + /* 16-bit code */ | |
666 | + [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, | |
667 | + /* 16-bit data */ | |
668 | + [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, | |
669 | + /* 16-bit data */ | |
670 | + [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, | |
671 | + /* 16-bit data */ | |
672 | + [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, | |
673 | /* | |
674 | * The APM segments have byte granularity and their bases | |
675 | * are set at run time. All have 64k limits. | |
676 | */ | |
677 | - [GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ | |
678 | + /* 32-bit code */ | |
679 | + [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, | |
680 | /* 16-bit code */ | |
681 | - [GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 }, | |
682 | - [GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */ | |
683 | + [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, | |
684 | + /* data */ | |
685 | + [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, | |
686 | ||
687 | - [GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 }, | |
688 | + [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, | |
689 | #endif | |
690 | - [GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 }, | |
691 | + [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, | |
692 | } }; | |
693 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); | |
694 | ||
695 | +__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; | |
696 | + | |
697 | static int cachesize_override __cpuinitdata = -1; | |
698 | -static int disable_x86_fxsr __cpuinitdata; | |
699 | static int disable_x86_serial_nr __cpuinitdata = 1; | |
700 | -static int disable_x86_sep __cpuinitdata; | |
701 | ||
702 | struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; | |
703 | ||
704 | -extern int disable_pse; | |
705 | - | |
706 | static void __cpuinit default_init(struct cpuinfo_x86 * c) | |
707 | { | |
708 | /* Not much we can do here... */ | |
709 | @@ -214,16 +219,8 @@ static void __cpuinit get_cpu_vendor(str | |
710 | ||
711 | static int __init x86_fxsr_setup(char * s) | |
712 | { | |
713 | - /* Tell all the other CPUs to not use it... */ | |
714 | - disable_x86_fxsr = 1; | |
715 | - | |
716 | - /* | |
717 | - * ... and clear the bits early in the boot_cpu_data | |
718 | - * so that the bootup process doesn't try to do this | |
719 | - * either. | |
720 | - */ | |
721 | - clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability); | |
722 | - clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability); | |
723 | + setup_clear_cpu_cap(X86_FEATURE_FXSR); | |
724 | + setup_clear_cpu_cap(X86_FEATURE_XMM); | |
725 | return 1; | |
726 | } | |
727 | __setup("nofxsr", x86_fxsr_setup); | |
728 | @@ -231,7 +228,7 @@ __setup("nofxsr", x86_fxsr_setup); | |
729 | ||
730 | static int __init x86_sep_setup(char * s) | |
731 | { | |
732 | - disable_x86_sep = 1; | |
733 | + setup_clear_cpu_cap(X86_FEATURE_SEP); | |
734 | return 1; | |
735 | } | |
736 | __setup("nosep", x86_sep_setup); | |
737 | @@ -268,10 +265,10 @@ static int __cpuinit have_cpuid_p(void) | |
738 | void __init cpu_detect(struct cpuinfo_x86 *c) | |
739 | { | |
740 | /* Get vendor name */ | |
741 | - cpuid(0x00000000, &c->cpuid_level, | |
742 | - (int *)&c->x86_vendor_id[0], | |
743 | - (int *)&c->x86_vendor_id[8], | |
744 | - (int *)&c->x86_vendor_id[4]); | |
745 | + cpuid(0x00000000, (unsigned int *)&c->cpuid_level, | |
746 | + (unsigned int *)&c->x86_vendor_id[0], | |
747 | + (unsigned int *)&c->x86_vendor_id[8], | |
748 | + (unsigned int *)&c->x86_vendor_id[4]); | |
749 | ||
750 | c->x86 = 4; | |
751 | if (c->cpuid_level >= 0x00000001) { | |
752 | @@ -284,9 +281,38 @@ void __init cpu_detect(struct cpuinfo_x8 | |
753 | if (c->x86 >= 0x6) | |
754 | c->x86_model += ((tfms >> 16) & 0xF) << 4; | |
755 | c->x86_mask = tfms & 15; | |
756 | - if (cap0 & (1<<19)) | |
757 | + if (cap0 & (1<<19)) { | |
758 | c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; | |
759 | + c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; | |
760 | + } | |
761 | + } | |
762 | +} | |
763 | +static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) | |
764 | +{ | |
765 | + u32 tfms, xlvl; | |
766 | + unsigned int ebx; | |
767 | + | |
768 | + memset(&c->x86_capability, 0, sizeof c->x86_capability); | |
769 | + if (have_cpuid_p()) { | |
770 | + /* Intel-defined flags: level 0x00000001 */ | |
771 | + if (c->cpuid_level >= 0x00000001) { | |
772 | + u32 capability, excap; | |
773 | + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); | |
774 | + c->x86_capability[0] = capability; | |
775 | + c->x86_capability[4] = excap; | |
776 | + } | |
777 | + | |
778 | + /* AMD-defined flags: level 0x80000001 */ | |
779 | + xlvl = cpuid_eax(0x80000000); | |
780 | + if ((xlvl & 0xffff0000) == 0x80000000) { | |
781 | + if (xlvl >= 0x80000001) { | |
782 | + c->x86_capability[1] = cpuid_edx(0x80000001); | |
783 | + c->x86_capability[6] = cpuid_ecx(0x80000001); | |
784 | + } | |
785 | + } | |
786 | + | |
787 | } | |
788 | + | |
789 | } | |
790 | ||
791 | /* Do minimum CPU detection early. | |
792 | @@ -300,6 +326,7 @@ static void __init early_cpu_detect(void | |
793 | struct cpuinfo_x86 *c = &boot_cpu_data; | |
794 | ||
795 | c->x86_cache_alignment = 32; | |
796 | + c->x86_clflush_size = 32; | |
797 | ||
798 | if (!have_cpuid_p()) | |
799 | return; | |
800 | @@ -307,19 +334,30 @@ static void __init early_cpu_detect(void | |
801 | cpu_detect(c); | |
802 | ||
803 | get_cpu_vendor(c, 1); | |
804 | + | |
805 | + switch (c->x86_vendor) { | |
806 | + case X86_VENDOR_AMD: | |
807 | + early_init_amd(c); | |
808 | + break; | |
809 | + case X86_VENDOR_INTEL: | |
810 | + early_init_intel(c); | |
811 | + break; | |
812 | + } | |
813 | + | |
814 | + early_get_cap(c); | |
815 | } | |
816 | ||
817 | static void __cpuinit generic_identify(struct cpuinfo_x86 * c) | |
818 | { | |
819 | u32 tfms, xlvl; | |
820 | - int ebx; | |
821 | + unsigned int ebx; | |
822 | ||
823 | if (have_cpuid_p()) { | |
824 | /* Get vendor name */ | |
825 | - cpuid(0x00000000, &c->cpuid_level, | |
826 | - (int *)&c->x86_vendor_id[0], | |
827 | - (int *)&c->x86_vendor_id[8], | |
828 | - (int *)&c->x86_vendor_id[4]); | |
829 | + cpuid(0x00000000, (unsigned int *)&c->cpuid_level, | |
830 | + (unsigned int *)&c->x86_vendor_id[0], | |
831 | + (unsigned int *)&c->x86_vendor_id[8], | |
832 | + (unsigned int *)&c->x86_vendor_id[4]); | |
833 | ||
834 | get_cpu_vendor(c, 0); | |
835 | /* Initialize the standard set of capabilities */ | |
836 | @@ -364,8 +402,6 @@ static void __cpuinit generic_identify(s | |
837 | init_scattered_cpuid_features(c); | |
838 | } | |
839 | ||
840 | - early_intel_workaround(c); | |
841 | - | |
842 | #ifdef CONFIG_X86_HT | |
843 | c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; | |
844 | #endif | |
845 | @@ -399,7 +435,7 @@ __setup("serialnumber", x86_serial_nr_se | |
846 | /* | |
847 | * This does the hard work of actually picking apart the CPU stuff... | |
848 | */ | |
849 | -static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |
850 | +void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |
851 | { | |
852 | int i; | |
853 | ||
854 | @@ -425,20 +461,9 @@ static void __cpuinit identify_cpu(struc | |
855 | ||
856 | generic_identify(c); | |
857 | ||
858 | - printk(KERN_DEBUG "CPU: After generic identify, caps:"); | |
859 | - for (i = 0; i < NCAPINTS; i++) | |
860 | - printk(" %08lx", c->x86_capability[i]); | |
861 | - printk("\n"); | |
862 | - | |
863 | - if (this_cpu->c_identify) { | |
864 | + if (this_cpu->c_identify) | |
865 | this_cpu->c_identify(c); | |
866 | ||
867 | - printk(KERN_DEBUG "CPU: After vendor identify, caps:"); | |
868 | - for (i = 0; i < NCAPINTS; i++) | |
869 | - printk(" %08lx", c->x86_capability[i]); | |
870 | - printk("\n"); | |
871 | - } | |
872 | - | |
873 | /* | |
874 | * Vendor-specific initialization. In this section we | |
875 | * canonicalize the feature flags, meaning if there are | |
876 | @@ -460,23 +485,6 @@ static void __cpuinit identify_cpu(struc | |
877 | * we do "generic changes." | |
878 | */ | |
879 | ||
880 | - /* TSC disabled? */ | |
881 | - if ( tsc_disable ) | |
882 | - clear_bit(X86_FEATURE_TSC, c->x86_capability); | |
883 | - | |
884 | - /* FXSR disabled? */ | |
885 | - if (disable_x86_fxsr) { | |
886 | - clear_bit(X86_FEATURE_FXSR, c->x86_capability); | |
887 | - clear_bit(X86_FEATURE_XMM, c->x86_capability); | |
888 | - } | |
889 | - | |
890 | - /* SEP disabled? */ | |
891 | - if (disable_x86_sep) | |
892 | - clear_bit(X86_FEATURE_SEP, c->x86_capability); | |
893 | - | |
894 | - if (disable_pse) | |
895 | - clear_bit(X86_FEATURE_PSE, c->x86_capability); | |
896 | - | |
897 | /* If the model name is still unset, do table lookup. */ | |
898 | if ( !c->x86_model_id[0] ) { | |
899 | char *p; | |
900 | @@ -489,13 +497,6 @@ static void __cpuinit identify_cpu(struc | |
901 | c->x86, c->x86_model); | |
902 | } | |
903 | ||
904 | - /* Now the feature flags better reflect actual CPU features! */ | |
905 | - | |
906 | - printk(KERN_DEBUG "CPU: After all inits, caps:"); | |
907 | - for (i = 0; i < NCAPINTS; i++) | |
908 | - printk(" %08lx", c->x86_capability[i]); | |
909 | - printk("\n"); | |
910 | - | |
911 | /* | |
912 | * On SMP, boot_cpu_data holds the common feature set between | |
913 | * all CPUs; so make sure that we indicate which features are | |
914 | @@ -508,8 +509,14 @@ static void __cpuinit identify_cpu(struc | |
915 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; | |
916 | } | |
917 | ||
918 | + /* Clear all flags overriden by options */ | |
919 | + for (i = 0; i < NCAPINTS; i++) | |
920 | + c->x86_capability[i] &= ~cleared_cpu_caps[i]; | |
921 | + | |
922 | /* Init Machine Check Exception if available. */ | |
923 | mcheck_init(c); | |
924 | + | |
925 | + select_idle_routine(c); | |
926 | } | |
927 | ||
928 | void __init identify_boot_cpu(void) | |
929 | @@ -517,7 +524,6 @@ void __init identify_boot_cpu(void) | |
930 | identify_cpu(&boot_cpu_data); | |
931 | sysenter_setup(); | |
932 | enable_sep_cpu(); | |
933 | - mtrr_bp_init(); | |
934 | } | |
935 | ||
936 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | |
937 | @@ -574,6 +580,13 @@ void __cpuinit detect_ht(struct cpuinfo_ | |
938 | } | |
939 | #endif | |
940 | ||
941 | +static __init int setup_noclflush(char *arg) | |
942 | +{ | |
943 | + setup_clear_cpu_cap(X86_FEATURE_CLFLSH); | |
944 | + return 1; | |
945 | +} | |
946 | +__setup("noclflush", setup_noclflush); | |
947 | + | |
948 | void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) | |
949 | { | |
950 | char *vendor = NULL; | |
951 | @@ -597,6 +610,17 @@ void __cpuinit print_cpu_info(struct cpu | |
952 | printk("\n"); | |
953 | } | |
954 | ||
955 | +static __init int setup_disablecpuid(char *arg) | |
956 | +{ | |
957 | + int bit; | |
958 | + if (get_option(&arg, &bit) && bit < NCAPINTS*32) | |
959 | + setup_clear_cpu_cap(bit); | |
960 | + else | |
961 | + return 0; | |
962 | + return 1; | |
963 | +} | |
964 | +__setup("clearcpuid=", setup_disablecpuid); | |
965 | + | |
966 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; | |
967 | ||
968 | /* This is hacky. :) | |
969 | @@ -606,16 +630,6 @@ cpumask_t cpu_initialized __cpuinitdata | |
970 | * They will insert themselves into the cpu_devs structure. | |
971 | * Then, when cpu_init() is called, we can just iterate over that array. | |
972 | */ | |
973 | - | |
974 | -extern int intel_cpu_init(void); | |
975 | -extern int cyrix_init_cpu(void); | |
976 | -extern int nsc_init_cpu(void); | |
977 | -extern int amd_init_cpu(void); | |
978 | -extern int centaur_init_cpu(void); | |
979 | -extern int transmeta_init_cpu(void); | |
980 | -extern int nexgen_init_cpu(void); | |
981 | -extern int umc_init_cpu(void); | |
982 | - | |
983 | void __init early_cpu_init(void) | |
984 | { | |
985 | intel_cpu_init(); | |
986 | @@ -627,21 +641,13 @@ void __init early_cpu_init(void) | |
987 | nexgen_init_cpu(); | |
988 | umc_init_cpu(); | |
989 | early_cpu_detect(); | |
990 | - | |
991 | -#ifdef CONFIG_DEBUG_PAGEALLOC | |
992 | - /* pse is not compatible with on-the-fly unmapping, | |
993 | - * disable it even if the cpus claim to support it. | |
994 | - */ | |
995 | - clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); | |
996 | - disable_pse = 1; | |
997 | -#endif | |
998 | } | |
999 | ||
1000 | /* Make sure %fs is initialized properly in idle threads */ | |
1001 | -struct pt_regs * __devinit idle_regs(struct pt_regs *regs) | |
1002 | +struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) | |
1003 | { | |
1004 | memset(regs, 0, sizeof(struct pt_regs)); | |
1005 | - regs->xfs = __KERNEL_PERCPU; | |
1006 | + regs->fs = __KERNEL_PERCPU; | |
1007 | return regs; | |
1008 | } | |
1009 | ||
1010 | @@ -649,7 +655,7 @@ struct pt_regs * __devinit idle_regs(str | |
1011 | * it's on the real one. */ | |
1012 | void switch_to_new_gdt(void) | |
1013 | { | |
1014 | - struct Xgt_desc_struct gdt_descr; | |
1015 | + struct desc_ptr gdt_descr; | |
1016 | unsigned long va, frames[16]; | |
1017 | int f; | |
1018 | ||
1019 | @@ -692,12 +698,6 @@ void __cpuinit cpu_init(void) | |
1020 | ||
1021 | if (cpu_has_vme || cpu_has_de) | |
1022 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | |
1023 | - if (tsc_disable && cpu_has_tsc) { | |
1024 | - printk(KERN_NOTICE "Disabling TSC...\n"); | |
1025 | - /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ | |
1026 | - clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); | |
1027 | - set_in_cr4(X86_CR4_TSD); | |
1028 | - } | |
1029 | ||
1030 | switch_to_new_gdt(); | |
1031 | ||
1032 | @@ -710,7 +710,7 @@ void __cpuinit cpu_init(void) | |
1033 | BUG(); | |
1034 | enter_lazy_tlb(&init_mm, curr); | |
1035 | ||
1036 | - load_esp0(t, thread); | |
1037 | + load_sp0(t, thread); | |
1038 | ||
1039 | load_LDT(&init_mm.context); | |
1040 | ||
00e5a55c BS |
1041 | --- sle11-2009-05-14.orig/arch/x86/kernel/cpu/mtrr/main-xen.c 2009-02-16 16:17:21.000000000 +0100 |
1042 | +++ sle11-2009-05-14/arch/x86/kernel/cpu/mtrr/main-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
1043 | @@ -33,7 +33,7 @@ struct mtrr_ops generic_mtrr_ops = { |
1044 | ||
1045 | struct mtrr_ops *mtrr_if = &generic_mtrr_ops; | |
1046 | unsigned int num_var_ranges; | |
1047 | -unsigned int *usage_table; | |
1048 | +unsigned int mtrr_usage_table[MAX_VAR_RANGES]; | |
1049 | ||
1050 | static void __init set_num_var_ranges(void) | |
1051 | { | |
1052 | @@ -52,17 +52,12 @@ static void __init init_table(void) | |
1053 | int i, max; | |
1054 | ||
1055 | max = num_var_ranges; | |
1056 | - if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL)) | |
1057 | - == NULL) { | |
1058 | - printk(KERN_ERR "mtrr: could not allocate\n"); | |
1059 | - return; | |
1060 | - } | |
1061 | for (i = 0; i < max; i++) | |
1062 | - usage_table[i] = 0; | |
1063 | + mtrr_usage_table[i] = 0; | |
1064 | } | |
1065 | ||
1066 | int mtrr_add_page(unsigned long base, unsigned long size, | |
1067 | - unsigned int type, char increment) | |
1068 | + unsigned int type, bool increment) | |
1069 | { | |
1070 | int error; | |
1071 | struct xen_platform_op op; | |
1072 | @@ -81,7 +76,7 @@ int mtrr_add_page(unsigned long base, un | |
1073 | } | |
1074 | ||
1075 | if (increment) | |
1076 | - ++usage_table[op.u.add_memtype.reg]; | |
1077 | + ++mtrr_usage_table[op.u.add_memtype.reg]; | |
1078 | ||
1079 | mutex_unlock(&mtrr_mutex); | |
1080 | ||
1081 | @@ -103,7 +98,7 @@ static int mtrr_check(unsigned long base | |
1082 | ||
1083 | int | |
1084 | mtrr_add(unsigned long base, unsigned long size, unsigned int type, | |
1085 | - char increment) | |
1086 | + bool increment) | |
1087 | { | |
1088 | if (mtrr_check(base, size)) | |
1089 | return -EINVAL; | |
1090 | @@ -136,11 +131,11 @@ int mtrr_del_page(int reg, unsigned long | |
1091 | goto out; | |
1092 | } | |
1093 | } | |
1094 | - if (usage_table[reg] < 1) { | |
1095 | + if (mtrr_usage_table[reg] < 1) { | |
1096 | printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); | |
1097 | goto out; | |
1098 | } | |
1099 | - if (--usage_table[reg] < 1) { | |
1100 | + if (--mtrr_usage_table[reg] < 1) { | |
1101 | op.cmd = XENPF_del_memtype; | |
1102 | op.u.del_memtype.handle = 0; | |
1103 | op.u.del_memtype.reg = reg; | |
00e5a55c BS |
1104 | --- sle11-2009-05-14.orig/arch/x86/kernel/e820_32-xen.c 2009-02-16 16:18:36.000000000 +0100 |
1105 | +++ sle11-2009-05-14/arch/x86/kernel/e820_32-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
1106 | @@ -7,7 +7,6 @@ |
1107 | #include <linux/kexec.h> | |
1108 | #include <linux/module.h> | |
1109 | #include <linux/mm.h> | |
1110 | -#include <linux/efi.h> | |
1111 | #include <linux/pfn.h> | |
1112 | #include <linux/uaccess.h> | |
1113 | #include <linux/suspend.h> | |
1114 | @@ -18,11 +17,6 @@ | |
1115 | #include <asm/setup.h> | |
1116 | #include <xen/interface/memory.h> | |
1117 | ||
1118 | -#ifdef CONFIG_EFI | |
1119 | -int efi_enabled = 0; | |
1120 | -EXPORT_SYMBOL(efi_enabled); | |
1121 | -#endif | |
1122 | - | |
1123 | struct e820map e820; | |
1124 | struct change_member { | |
1125 | struct e820entry *pbios; /* pointer to original bios entry */ | |
1126 | @@ -38,26 +32,6 @@ unsigned long pci_mem_start = 0x10000000 | |
1127 | EXPORT_SYMBOL(pci_mem_start); | |
1128 | #endif | |
1129 | extern int user_defined_memmap; | |
1130 | -struct resource data_resource = { | |
1131 | - .name = "Kernel data", | |
1132 | - .start = 0, | |
1133 | - .end = 0, | |
1134 | - .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
1135 | -}; | |
1136 | - | |
1137 | -struct resource code_resource = { | |
1138 | - .name = "Kernel code", | |
1139 | - .start = 0, | |
1140 | - .end = 0, | |
1141 | - .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
1142 | -}; | |
1143 | - | |
1144 | -struct resource bss_resource = { | |
1145 | - .name = "Kernel bss", | |
1146 | - .start = 0, | |
1147 | - .end = 0, | |
1148 | - .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
1149 | -}; | |
1150 | ||
1151 | static struct resource system_rom_resource = { | |
1152 | .name = "System ROM", | |
1153 | @@ -112,60 +86,6 @@ static struct resource video_rom_resourc | |
1154 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
1155 | }; | |
1156 | ||
1157 | -static struct resource video_ram_resource = { | |
1158 | - .name = "Video RAM area", | |
1159 | - .start = 0xa0000, | |
1160 | - .end = 0xbffff, | |
1161 | - .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
1162 | -}; | |
1163 | - | |
1164 | -static struct resource standard_io_resources[] = { { | |
1165 | - .name = "dma1", | |
1166 | - .start = 0x0000, | |
1167 | - .end = 0x001f, | |
1168 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
1169 | -}, { | |
1170 | - .name = "pic1", | |
1171 | - .start = 0x0020, | |
1172 | - .end = 0x0021, | |
1173 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
1174 | -}, { | |
1175 | - .name = "timer0", | |
1176 | - .start = 0x0040, | |
1177 | - .end = 0x0043, | |
1178 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
1179 | -}, { | |
1180 | - .name = "timer1", | |
1181 | - .start = 0x0050, | |
1182 | - .end = 0x0053, | |
1183 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
1184 | -}, { | |
1185 | - .name = "keyboard", | |
1186 | - .start = 0x0060, | |
1187 | - .end = 0x006f, | |
1188 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
1189 | -}, { | |
1190 | - .name = "dma page reg", | |
1191 | - .start = 0x0080, | |
1192 | - .end = 0x008f, | |
1193 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
1194 | -}, { | |
1195 | - .name = "pic2", | |
1196 | - .start = 0x00a0, | |
1197 | - .end = 0x00a1, | |
1198 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
1199 | -}, { | |
1200 | - .name = "dma2", | |
1201 | - .start = 0x00c0, | |
1202 | - .end = 0x00df, | |
1203 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
1204 | -}, { | |
1205 | - .name = "fpu", | |
1206 | - .start = 0x00f0, | |
1207 | - .end = 0x00ff, | |
1208 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
1209 | -} }; | |
1210 | - | |
1211 | #define ROMSIGNATURE 0xaa55 | |
1212 | ||
1213 | static int __init romsignature(const unsigned char *rom) | |
1214 | @@ -272,10 +192,9 @@ static struct e820map machine_e820; | |
1215 | * Request address space for all standard RAM and ROM resources | |
1216 | * and also for regions reported as reserved by the e820. | |
1217 | */ | |
1218 | -static void __init | |
1219 | -legacy_init_iomem_resources(struct resource *code_resource, | |
1220 | - struct resource *data_resource, | |
1221 | - struct resource *bss_resource) | |
1222 | +void __init init_iomem_resources(struct resource *code_resource, | |
1223 | + struct resource *data_resource, | |
1224 | + struct resource *bss_resource) | |
1225 | { | |
1226 | int i; | |
1227 | ||
1228 | @@ -324,39 +243,6 @@ legacy_init_iomem_resources(struct resou | |
1229 | ||
1230 | #undef e820 | |
1231 | ||
1232 | -/* | |
1233 | - * Request address space for all standard resources | |
1234 | - * | |
1235 | - * This is called just before pcibios_init(), which is also a | |
1236 | - * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). | |
1237 | - */ | |
1238 | -static int __init request_standard_resources(void) | |
1239 | -{ | |
1240 | - int i; | |
1241 | - | |
1242 | - /* Nothing to do if not running in dom0. */ | |
1243 | - if (!is_initial_xendomain()) | |
1244 | - return 0; | |
1245 | - | |
1246 | - printk("Setting up standard PCI resources\n"); | |
1247 | - if (efi_enabled) | |
1248 | - efi_initialize_iomem_resources(&code_resource, | |
1249 | - &data_resource, &bss_resource); | |
1250 | - else | |
1251 | - legacy_init_iomem_resources(&code_resource, | |
1252 | - &data_resource, &bss_resource); | |
1253 | - | |
1254 | - /* EFI systems may still have VGA */ | |
1255 | - request_resource(&iomem_resource, &video_ram_resource); | |
1256 | - | |
1257 | - /* request I/O space for devices used on all i[345]86 PCs */ | |
1258 | - for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) | |
1259 | - request_resource(&ioport_resource, &standard_io_resources[i]); | |
1260 | - return 0; | |
1261 | -} | |
1262 | - | |
1263 | -subsys_initcall(request_standard_resources); | |
1264 | - | |
1265 | #if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) | |
1266 | /** | |
1267 | * e820_mark_nosave_regions - Find the ranges of physical addresses that do not | |
1268 | @@ -393,19 +279,17 @@ void __init add_memory_region(unsigned l | |
1269 | { | |
1270 | int x; | |
1271 | ||
1272 | - if (!efi_enabled) { | |
1273 | - x = e820.nr_map; | |
1274 | - | |
1275 | - if (x == E820MAX) { | |
1276 | - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); | |
1277 | - return; | |
1278 | - } | |
1279 | + x = e820.nr_map; | |
1280 | ||
1281 | - e820.map[x].addr = start; | |
1282 | - e820.map[x].size = size; | |
1283 | - e820.map[x].type = type; | |
1284 | - e820.nr_map++; | |
1285 | + if (x == E820MAX) { | |
1286 | + printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); | |
1287 | + return; | |
1288 | } | |
1289 | + | |
1290 | + e820.map[x].addr = start; | |
1291 | + e820.map[x].size = size; | |
1292 | + e820.map[x].type = type; | |
1293 | + e820.nr_map++; | |
1294 | } /* add_memory_region */ | |
1295 | ||
1296 | /* | |
1297 | @@ -642,29 +526,6 @@ int __init copy_e820_map(struct e820entr | |
1298 | } | |
1299 | ||
1300 | /* | |
1301 | - * Callback for efi_memory_walk. | |
1302 | - */ | |
1303 | -static int __init | |
1304 | -efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) | |
1305 | -{ | |
1306 | - unsigned long *max_pfn = arg, pfn; | |
1307 | - | |
1308 | - if (start < end) { | |
1309 | - pfn = PFN_UP(end -1); | |
1310 | - if (pfn > *max_pfn) | |
1311 | - *max_pfn = pfn; | |
1312 | - } | |
1313 | - return 0; | |
1314 | -} | |
1315 | - | |
1316 | -static int __init | |
1317 | -efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) | |
1318 | -{ | |
1319 | - memory_present(0, PFN_UP(start), PFN_DOWN(end)); | |
1320 | - return 0; | |
1321 | -} | |
1322 | - | |
1323 | -/* | |
1324 | * Find the highest page frame number we have available | |
1325 | */ | |
1326 | void __init find_max_pfn(void) | |
1327 | @@ -672,11 +533,6 @@ void __init find_max_pfn(void) | |
1328 | int i; | |
1329 | ||
1330 | max_pfn = 0; | |
1331 | - if (efi_enabled) { | |
1332 | - efi_memmap_walk(efi_find_max_pfn, &max_pfn); | |
1333 | - efi_memmap_walk(efi_memory_present_wrapper, NULL); | |
1334 | - return; | |
1335 | - } | |
1336 | ||
1337 | for (i = 0; i < e820.nr_map; i++) { | |
1338 | unsigned long start, end; | |
1339 | @@ -694,34 +550,12 @@ void __init find_max_pfn(void) | |
1340 | } | |
1341 | ||
1342 | /* | |
1343 | - * Free all available memory for boot time allocation. Used | |
1344 | - * as a callback function by efi_memory_walk() | |
1345 | - */ | |
1346 | - | |
1347 | -static int __init | |
1348 | -free_available_memory(unsigned long start, unsigned long end, void *arg) | |
1349 | -{ | |
1350 | - /* check max_low_pfn */ | |
1351 | - if (start >= (max_low_pfn << PAGE_SHIFT)) | |
1352 | - return 0; | |
1353 | - if (end >= (max_low_pfn << PAGE_SHIFT)) | |
1354 | - end = max_low_pfn << PAGE_SHIFT; | |
1355 | - if (start < end) | |
1356 | - free_bootmem(start, end - start); | |
1357 | - | |
1358 | - return 0; | |
1359 | -} | |
1360 | -/* | |
1361 | * Register fully available low RAM pages with the bootmem allocator. | |
1362 | */ | |
1363 | void __init register_bootmem_low_pages(unsigned long max_low_pfn) | |
1364 | { | |
1365 | int i; | |
1366 | ||
1367 | - if (efi_enabled) { | |
1368 | - efi_memmap_walk(free_available_memory, NULL); | |
1369 | - return; | |
1370 | - } | |
1371 | for (i = 0; i < e820.nr_map; i++) { | |
1372 | unsigned long curr_pfn, last_pfn, size; | |
1373 | /* | |
1374 | @@ -855,56 +689,12 @@ void __init print_memory_map(char *who) | |
1375 | } | |
1376 | } | |
1377 | ||
1378 | -static __init __always_inline void efi_limit_regions(unsigned long long size) | |
1379 | -{ | |
1380 | - unsigned long long current_addr = 0; | |
1381 | - efi_memory_desc_t *md, *next_md; | |
1382 | - void *p, *p1; | |
1383 | - int i, j; | |
1384 | - | |
1385 | - j = 0; | |
1386 | - p1 = memmap.map; | |
1387 | - for (p = p1, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) { | |
1388 | - md = p; | |
1389 | - next_md = p1; | |
1390 | - current_addr = md->phys_addr + | |
1391 | - PFN_PHYS(md->num_pages); | |
1392 | - if (is_available_memory(md)) { | |
1393 | - if (md->phys_addr >= size) continue; | |
1394 | - memcpy(next_md, md, memmap.desc_size); | |
1395 | - if (current_addr >= size) { | |
1396 | - next_md->num_pages -= | |
1397 | - PFN_UP(current_addr-size); | |
1398 | - } | |
1399 | - p1 += memmap.desc_size; | |
1400 | - next_md = p1; | |
1401 | - j++; | |
1402 | - } else if ((md->attribute & EFI_MEMORY_RUNTIME) == | |
1403 | - EFI_MEMORY_RUNTIME) { | |
1404 | - /* In order to make runtime services | |
1405 | - * available we have to include runtime | |
1406 | - * memory regions in memory map */ | |
1407 | - memcpy(next_md, md, memmap.desc_size); | |
1408 | - p1 += memmap.desc_size; | |
1409 | - next_md = p1; | |
1410 | - j++; | |
1411 | - } | |
1412 | - } | |
1413 | - memmap.nr_map = j; | |
1414 | - memmap.map_end = memmap.map + | |
1415 | - (memmap.nr_map * memmap.desc_size); | |
1416 | -} | |
1417 | - | |
1418 | void __init limit_regions(unsigned long long size) | |
1419 | { | |
1420 | unsigned long long current_addr = 0; | |
1421 | int i; | |
1422 | ||
1423 | print_memory_map("limit_regions start"); | |
1424 | - if (efi_enabled) { | |
1425 | - efi_limit_regions(size); | |
1426 | - return; | |
1427 | - } | |
1428 | for (i = 0; i < e820.nr_map; i++) { | |
1429 | current_addr = e820.map[i].addr + e820.map[i].size; | |
1430 | if (current_addr < size) | |
1431 | @@ -1056,3 +846,44 @@ static int __init parse_memmap(char *arg | |
1432 | return 0; | |
1433 | } | |
1434 | early_param("memmap", parse_memmap); | |
1435 | + | |
1436 | +#ifndef CONFIG_XEN | |
1437 | +void __init update_memory_range(u64 start, u64 size, unsigned old_type, | |
1438 | + unsigned new_type) | |
1439 | +{ | |
1440 | + int i; | |
1441 | + | |
1442 | + BUG_ON(old_type == new_type); | |
1443 | + | |
1444 | + for (i = 0; i < e820.nr_map; i++) { | |
1445 | + struct e820entry *ei = &e820.map[i]; | |
1446 | + u64 final_start, final_end; | |
1447 | + if (ei->type != old_type) | |
1448 | + continue; | |
1449 | + /* totally covered? */ | |
1450 | + if (ei->addr >= start && ei->size <= size) { | |
1451 | + ei->type = new_type; | |
1452 | + continue; | |
1453 | + } | |
1454 | + /* partially covered */ | |
1455 | + final_start = max(start, ei->addr); | |
1456 | + final_end = min(start + size, ei->addr + ei->size); | |
1457 | + if (final_start >= final_end) | |
1458 | + continue; | |
1459 | + add_memory_region(final_start, final_end - final_start, | |
1460 | + new_type); | |
1461 | + } | |
1462 | +} | |
1463 | + | |
1464 | +void __init update_e820(void) | |
1465 | +{ | |
1466 | + u8 nr_map; | |
1467 | + | |
1468 | + nr_map = e820.nr_map; | |
1469 | + if (sanitize_e820_map(e820.map, &nr_map)) | |
1470 | + return; | |
1471 | + e820.nr_map = nr_map; | |
1472 | + printk(KERN_INFO "modified physical RAM map:\n"); | |
1473 | + print_memory_map("modified"); | |
1474 | +} | |
1475 | +#endif | |
00e5a55c BS |
1476 | --- sle11-2009-05-14.orig/arch/x86/kernel/e820_64-xen.c 2009-02-16 16:18:36.000000000 +0100 |
1477 | +++ sle11-2009-05-14/arch/x86/kernel/e820_64-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
1478 | @@ -1,4 +1,4 @@ |
1479 | -/* | |
1480 | +/* | |
1481 | * Handle the memory map. | |
1482 | * The functions here do the job until bootmem takes over. | |
1483 | * | |
1484 | @@ -26,6 +26,7 @@ | |
1485 | #include <asm/proto.h> | |
1486 | #include <asm/setup.h> | |
1487 | #include <asm/sections.h> | |
1488 | +#include <asm/kdebug.h> | |
1489 | #include <xen/interface/memory.h> | |
1490 | ||
1491 | struct e820map e820 __initdata; | |
1492 | @@ -33,96 +34,103 @@ struct e820map e820 __initdata; | |
1493 | struct e820map machine_e820; | |
1494 | #endif | |
1495 | ||
1496 | -/* | |
1497 | +/* | |
1498 | * PFN of last memory page. | |
1499 | */ | |
1500 | -unsigned long end_pfn; | |
1501 | -EXPORT_SYMBOL(end_pfn); | |
1502 | +unsigned long end_pfn; | |
1503 | ||
1504 | -/* | |
1505 | +/* | |
1506 | * end_pfn only includes RAM, while end_pfn_map includes all e820 entries. | |
1507 | * The direct mapping extends to end_pfn_map, so that we can directly access | |
1508 | * apertures, ACPI and other tables without having to play with fixmaps. | |
1509 | - */ | |
1510 | -unsigned long end_pfn_map; | |
1511 | + */ | |
1512 | +unsigned long end_pfn_map; | |
1513 | ||
1514 | -/* | |
1515 | +/* | |
1516 | * Last pfn which the user wants to use. | |
1517 | */ | |
1518 | static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; | |
1519 | ||
1520 | -extern struct resource code_resource, data_resource, bss_resource; | |
1521 | - | |
1522 | -/* Check for some hardcoded bad areas that early boot is not allowed to touch */ | |
1523 | -static inline int bad_addr(unsigned long *addrp, unsigned long size) | |
1524 | -{ | |
1525 | - unsigned long addr = *addrp, last = addr + size; | |
1526 | +/* | |
1527 | + * Early reserved memory areas. | |
1528 | + */ | |
1529 | +#define MAX_EARLY_RES 20 | |
1530 | ||
1531 | +struct early_res { | |
1532 | + unsigned long start, end; | |
1533 | + char name[16]; | |
1534 | +}; | |
1535 | +static struct early_res early_res[MAX_EARLY_RES] __initdata = { | |
1536 | #ifndef CONFIG_XEN | |
1537 | - /* various gunk below that needed for SMP startup */ | |
1538 | - if (addr < 0x8000) { | |
1539 | - *addrp = PAGE_ALIGN(0x8000); | |
1540 | - return 1; | |
1541 | - } | |
1542 | - | |
1543 | - /* direct mapping tables of the kernel */ | |
1544 | - if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) { | |
1545 | - *addrp = PAGE_ALIGN(table_end << PAGE_SHIFT); | |
1546 | - return 1; | |
1547 | - } | |
1548 | - | |
1549 | - /* initrd */ | |
1550 | -#ifdef CONFIG_BLK_DEV_INITRD | |
1551 | - if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | |
1552 | - unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | |
1553 | - unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | |
1554 | - unsigned long ramdisk_end = ramdisk_image+ramdisk_size; | |
1555 | - | |
1556 | - if (last >= ramdisk_image && addr < ramdisk_end) { | |
1557 | - *addrp = PAGE_ALIGN(ramdisk_end); | |
1558 | - return 1; | |
1559 | - } | |
1560 | - } | |
1561 | + { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ | |
1562 | +#ifdef CONFIG_SMP | |
1563 | + { SMP_TRAMPOLINE_BASE, SMP_TRAMPOLINE_BASE + 2*PAGE_SIZE, "SMP_TRAMPOLINE" }, | |
1564 | #endif | |
1565 | - /* kernel code */ | |
1566 | - if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) { | |
1567 | - *addrp = PAGE_ALIGN(__pa_symbol(&_end)); | |
1568 | - return 1; | |
1569 | - } | |
1570 | +#endif | |
1571 | + {} | |
1572 | +}; | |
1573 | ||
1574 | - if (last >= ebda_addr && addr < ebda_addr + ebda_size) { | |
1575 | - *addrp = PAGE_ALIGN(ebda_addr + ebda_size); | |
1576 | - return 1; | |
1577 | +void __init reserve_early(unsigned long start, unsigned long end, char *name) | |
1578 | +{ | |
1579 | + int i; | |
1580 | + struct early_res *r; | |
1581 | + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | |
1582 | + r = &early_res[i]; | |
1583 | + if (end > r->start && start < r->end) | |
1584 | + panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n", | |
1585 | + start, end - 1, name?name:"", r->start, r->end - 1, r->name); | |
1586 | } | |
1587 | + if (i >= MAX_EARLY_RES) | |
1588 | + panic("Too many early reservations"); | |
1589 | + r = &early_res[i]; | |
1590 | + r->start = start; | |
1591 | + r->end = end; | |
1592 | + if (name) | |
1593 | + strncpy(r->name, name, sizeof(r->name) - 1); | |
1594 | +} | |
1595 | ||
1596 | -#ifdef CONFIG_NUMA | |
1597 | - /* NUMA memory to node map */ | |
1598 | - if (last >= nodemap_addr && addr < nodemap_addr + nodemap_size) { | |
1599 | - *addrp = nodemap_addr + nodemap_size; | |
1600 | - return 1; | |
1601 | +void __init early_res_to_bootmem(void) | |
1602 | +{ | |
1603 | + int i; | |
1604 | + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | |
1605 | + struct early_res *r = &early_res[i]; | |
1606 | + printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i, | |
1607 | + r->start, r->end - 1, r->name); | |
1608 | + reserve_bootmem_generic(r->start, r->end - r->start); | |
1609 | } | |
1610 | -#endif | |
1611 | - /* XXX ramdisk image here? */ | |
1612 | -#else | |
1613 | - if (last < (table_end<<PAGE_SHIFT)) { | |
1614 | - *addrp = table_end << PAGE_SHIFT; | |
1615 | - return 1; | |
1616 | +} | |
1617 | + | |
1618 | +/* Check for already reserved areas */ | |
1619 | +static inline int bad_addr(unsigned long *addrp, unsigned long size) | |
1620 | +{ | |
1621 | + int i; | |
1622 | + unsigned long addr = *addrp, last; | |
1623 | + int changed = 0; | |
1624 | +again: | |
1625 | + last = addr + size; | |
1626 | + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | |
1627 | + struct early_res *r = &early_res[i]; | |
1628 | + if (last >= r->start && addr < r->end) { | |
1629 | + *addrp = addr = r->end; | |
1630 | + changed = 1; | |
1631 | + goto again; | |
1632 | + } | |
1633 | } | |
1634 | -#endif | |
1635 | - return 0; | |
1636 | -} | |
1637 | + return changed; | |
1638 | +} | |
1639 | ||
1640 | /* | |
1641 | * This function checks if any part of the range <start,end> is mapped | |
1642 | * with type. | |
1643 | */ | |
1644 | -int e820_any_mapped(unsigned long start, unsigned long end, unsigned type) | |
1645 | -{ | |
1646 | +int | |
1647 | +e820_any_mapped(unsigned long start, unsigned long end, unsigned type) | |
1648 | +{ | |
1649 | int i; | |
1650 | ||
1651 | #ifndef CONFIG_XEN | |
1652 | - for (i = 0; i < e820.nr_map; i++) { | |
1653 | - struct e820entry *ei = &e820.map[i]; | |
1654 | + for (i = 0; i < e820.nr_map; i++) { | |
1655 | + struct e820entry *ei = &e820.map[i]; | |
1656 | #else | |
1657 | if (!is_initial_xendomain()) | |
1658 | return 0; | |
1659 | @@ -130,12 +138,12 @@ int e820_any_mapped(unsigned long start, | |
1660 | const struct e820entry *ei = &machine_e820.map[i]; | |
1661 | #endif | |
1662 | ||
1663 | - if (type && ei->type != type) | |
1664 | + if (type && ei->type != type) | |
1665 | continue; | |
1666 | if (ei->addr >= end || ei->addr + ei->size <= start) | |
1667 | - continue; | |
1668 | - return 1; | |
1669 | - } | |
1670 | + continue; | |
1671 | + return 1; | |
1672 | + } | |
1673 | return 0; | |
1674 | } | |
1675 | EXPORT_SYMBOL_GPL(e820_any_mapped); | |
1676 | @@ -146,7 +154,8 @@ EXPORT_SYMBOL_GPL(e820_any_mapped); | |
1677 | * Note: this function only works correct if the e820 table is sorted and | |
1678 | * not-overlapping, which is the case | |
1679 | */ | |
1680 | -int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type) | |
1681 | +int __init e820_all_mapped(unsigned long start, unsigned long end, | |
1682 | + unsigned type) | |
1683 | { | |
1684 | int i; | |
1685 | ||
1686 | @@ -171,65 +180,77 @@ int __init e820_all_mapped(unsigned long | |
1687 | */ | |
1688 | if (ei->addr <= start) | |
1689 | start = ei->addr + ei->size; | |
1690 | - /* if start is now at or beyond end, we're done, full coverage */ | |
1691 | + /* | |
1692 | + * if start is now at or beyond end, we're done, full | |
1693 | + * coverage | |
1694 | + */ | |
1695 | if (start >= end) | |
1696 | - return 1; /* we're done */ | |
1697 | + return 1; | |
1698 | } | |
1699 | return 0; | |
1700 | } | |
1701 | ||
1702 | -/* | |
1703 | - * Find a free area in a specific range. | |
1704 | - */ | |
1705 | -unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsigned size) | |
1706 | -{ | |
1707 | - int i; | |
1708 | - for (i = 0; i < e820.nr_map; i++) { | |
1709 | - struct e820entry *ei = &e820.map[i]; | |
1710 | - unsigned long addr = ei->addr, last; | |
1711 | - if (ei->type != E820_RAM) | |
1712 | - continue; | |
1713 | - if (addr < start) | |
1714 | +/* | |
1715 | + * Find a free area with specified alignment in a specific range. | |
1716 | + */ | |
1717 | +unsigned long __init find_e820_area(unsigned long start, unsigned long end, | |
1718 | + unsigned size, unsigned long align) | |
1719 | +{ | |
1720 | + int i; | |
1721 | + unsigned long mask = ~(align - 1); | |
1722 | + | |
1723 | + for (i = 0; i < e820.nr_map; i++) { | |
1724 | + struct e820entry *ei = &e820.map[i]; | |
1725 | + unsigned long addr = ei->addr, last; | |
1726 | + | |
1727 | + if (ei->type != E820_RAM) | |
1728 | + continue; | |
1729 | + if (addr < start) | |
1730 | addr = start; | |
1731 | - if (addr > ei->addr + ei->size) | |
1732 | - continue; | |
1733 | + if (addr > ei->addr + ei->size) | |
1734 | + continue; | |
1735 | while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size) | |
1736 | ; | |
1737 | - last = PAGE_ALIGN(addr) + size; | |
1738 | + addr = (addr + align - 1) & mask; | |
1739 | + last = addr + size; | |
1740 | if (last > ei->addr + ei->size) | |
1741 | continue; | |
1742 | - if (last > end) | |
1743 | + if (last > end) | |
1744 | continue; | |
1745 | - return addr; | |
1746 | - } | |
1747 | - return -1UL; | |
1748 | -} | |
1749 | + return addr; | |
1750 | + } | |
1751 | + return -1UL; | |
1752 | +} | |
1753 | ||
1754 | /* | |
1755 | * Find the highest page frame number we have available | |
1756 | */ | |
1757 | unsigned long __init e820_end_of_ram(void) | |
1758 | { | |
1759 | - unsigned long end_pfn = 0; | |
1760 | + unsigned long end_pfn; | |
1761 | + | |
1762 | end_pfn = find_max_pfn_with_active_regions(); | |
1763 | - | |
1764 | - if (end_pfn > end_pfn_map) | |
1765 | + | |
1766 | + if (end_pfn > end_pfn_map) | |
1767 | end_pfn_map = end_pfn; | |
1768 | if (end_pfn_map > MAXMEM>>PAGE_SHIFT) | |
1769 | end_pfn_map = MAXMEM>>PAGE_SHIFT; | |
1770 | if (end_pfn > end_user_pfn) | |
1771 | end_pfn = end_user_pfn; | |
1772 | - if (end_pfn > end_pfn_map) | |
1773 | - end_pfn = end_pfn_map; | |
1774 | + if (end_pfn > end_pfn_map) | |
1775 | + end_pfn = end_pfn_map; | |
1776 | ||
1777 | - printk("end_pfn_map = %lu\n", end_pfn_map); | |
1778 | - return end_pfn; | |
1779 | + printk(KERN_INFO "end_pfn_map = %lu\n", end_pfn_map); | |
1780 | + return end_pfn; | |
1781 | } | |
1782 | ||
1783 | /* | |
1784 | * Mark e820 reserved areas as busy for the resource manager. | |
1785 | */ | |
1786 | -void __init e820_reserve_resources(struct e820entry *e820, int nr_map) | |
1787 | +void __init e820_reserve_resources(struct e820entry *e820, int nr_map, | |
1788 | + struct resource *code_resource, | |
1789 | + struct resource *data_resource, | |
1790 | + struct resource *bss_resource) | |
1791 | { | |
1792 | int i; | |
1793 | for (i = 0; i < nr_map; i++) { | |
1794 | @@ -247,14 +268,14 @@ void __init e820_reserve_resources(struc | |
1795 | request_resource(&iomem_resource, res); | |
1796 | if (e820[i].type == E820_RAM) { | |
1797 | /* | |
1798 | - * We don't know which RAM region contains kernel data, | |
1799 | - * so we try it repeatedly and let the resource manager | |
1800 | - * test it. | |
1801 | + * We don't know which RAM region contains kernel data, | |
1802 | + * so we try it repeatedly and let the resource manager | |
1803 | + * test it. | |
1804 | */ | |
1805 | #ifndef CONFIG_XEN | |
1806 | - request_resource(res, &code_resource); | |
1807 | - request_resource(res, &data_resource); | |
1808 | - request_resource(res, &bss_resource); | |
1809 | + request_resource(res, code_resource); | |
1810 | + request_resource(res, data_resource); | |
1811 | + request_resource(res, bss_resource); | |
1812 | #endif | |
1813 | #ifdef CONFIG_KEXEC | |
1814 | if (crashk_res.start != crashk_res.end) | |
1815 | @@ -357,9 +378,9 @@ e820_register_active_regions(int nid, un | |
1816 | add_active_range(nid, ei_startpfn, ei_endpfn); | |
1817 | } | |
1818 | ||
1819 | -/* | |
1820 | +/* | |
1821 | * Add a memory region to the kernel e820 map. | |
1822 | - */ | |
1823 | + */ | |
1824 | void __init add_memory_region(unsigned long start, unsigned long size, int type) | |
1825 | { | |
1826 | int x = e820.nr_map; | |
1827 | @@ -384,9 +405,7 @@ unsigned long __init e820_hole_size(unsi | |
1828 | { | |
1829 | unsigned long start_pfn = start >> PAGE_SHIFT; | |
1830 | unsigned long end_pfn = end >> PAGE_SHIFT; | |
1831 | - unsigned long ei_startpfn; | |
1832 | - unsigned long ei_endpfn; | |
1833 | - unsigned long ram = 0; | |
1834 | + unsigned long ei_startpfn, ei_endpfn, ram = 0; | |
1835 | int i; | |
1836 | ||
1837 | for (i = 0; i < e820.nr_map; i++) { | |
1838 | @@ -398,28 +417,31 @@ unsigned long __init e820_hole_size(unsi | |
1839 | return end - start - (ram << PAGE_SHIFT); | |
1840 | } | |
1841 | ||
1842 | -void __init e820_print_map(char *who) | |
1843 | +static void __init e820_print_map(char *who) | |
1844 | { | |
1845 | int i; | |
1846 | ||
1847 | for (i = 0; i < e820.nr_map; i++) { | |
1848 | printk(KERN_INFO " %s: %016Lx - %016Lx ", who, | |
1849 | - (unsigned long long) e820.map[i].addr, | |
1850 | - (unsigned long long) (e820.map[i].addr + e820.map[i].size)); | |
1851 | + (unsigned long long) e820.map[i].addr, | |
1852 | + (unsigned long long) | |
1853 | + (e820.map[i].addr + e820.map[i].size)); | |
1854 | switch (e820.map[i].type) { | |
1855 | - case E820_RAM: printk("(usable)\n"); | |
1856 | - break; | |
1857 | + case E820_RAM: | |
1858 | + printk(KERN_CONT "(usable)\n"); | |
1859 | + break; | |
1860 | case E820_RESERVED: | |
1861 | - printk("(reserved)\n"); | |
1862 | - break; | |
1863 | + printk(KERN_CONT "(reserved)\n"); | |
1864 | + break; | |
1865 | case E820_ACPI: | |
1866 | - printk("(ACPI data)\n"); | |
1867 | - break; | |
1868 | + printk(KERN_CONT "(ACPI data)\n"); | |
1869 | + break; | |
1870 | case E820_NVS: | |
1871 | - printk("(ACPI NVS)\n"); | |
1872 | - break; | |
1873 | - default: printk("type %u\n", e820.map[i].type); | |
1874 | - break; | |
1875 | + printk(KERN_CONT "(ACPI NVS)\n"); | |
1876 | + break; | |
1877 | + default: | |
1878 | + printk(KERN_CONT "type %u\n", e820.map[i].type); | |
1879 | + break; | |
1880 | } | |
1881 | } | |
1882 | } | |
1883 | @@ -427,11 +449,11 @@ void __init e820_print_map(char *who) | |
1884 | /* | |
1885 | * Sanitize the BIOS e820 map. | |
1886 | * | |
1887 | - * Some e820 responses include overlapping entries. The following | |
1888 | + * Some e820 responses include overlapping entries. The following | |
1889 | * replaces the original e820 map with a new one, removing overlaps. | |
1890 | * | |
1891 | */ | |
1892 | -static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | |
1893 | +static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) | |
1894 | { | |
1895 | struct change_member { | |
1896 | struct e820entry *pbios; /* pointer to original bios entry */ | |
1897 | @@ -451,7 +473,8 @@ static int __init sanitize_e820_map(stru | |
1898 | int i; | |
1899 | ||
1900 | /* | |
1901 | - Visually we're performing the following (1,2,3,4 = memory types)... | |
1902 | + Visually we're performing the following | |
1903 | + (1,2,3,4 = memory types)... | |
1904 | ||
1905 | Sample memory map (w/overlaps): | |
1906 | ____22__________________ | |
1907 | @@ -493,22 +516,23 @@ static int __init sanitize_e820_map(stru | |
1908 | old_nr = *pnr_map; | |
1909 | ||
1910 | /* bail out if we find any unreasonable addresses in bios map */ | |
1911 | - for (i=0; i<old_nr; i++) | |
1912 | + for (i = 0; i < old_nr; i++) | |
1913 | if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) | |
1914 | return -1; | |
1915 | ||
1916 | /* create pointers for initial change-point information (for sorting) */ | |
1917 | - for (i=0; i < 2*old_nr; i++) | |
1918 | + for (i = 0; i < 2 * old_nr; i++) | |
1919 | change_point[i] = &change_point_list[i]; | |
1920 | ||
1921 | /* record all known change-points (starting and ending addresses), | |
1922 | omitting those that are for empty memory regions */ | |
1923 | chgidx = 0; | |
1924 | - for (i=0; i < old_nr; i++) { | |
1925 | + for (i = 0; i < old_nr; i++) { | |
1926 | if (biosmap[i].size != 0) { | |
1927 | change_point[chgidx]->addr = biosmap[i].addr; | |
1928 | change_point[chgidx++]->pbios = &biosmap[i]; | |
1929 | - change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; | |
1930 | + change_point[chgidx]->addr = biosmap[i].addr + | |
1931 | + biosmap[i].size; | |
1932 | change_point[chgidx++]->pbios = &biosmap[i]; | |
1933 | } | |
1934 | } | |
1935 | @@ -518,75 +542,106 @@ static int __init sanitize_e820_map(stru | |
1936 | still_changing = 1; | |
1937 | while (still_changing) { | |
1938 | still_changing = 0; | |
1939 | - for (i=1; i < chg_nr; i++) { | |
1940 | - /* if <current_addr> > <last_addr>, swap */ | |
1941 | - /* or, if current=<start_addr> & last=<end_addr>, swap */ | |
1942 | - if ((change_point[i]->addr < change_point[i-1]->addr) || | |
1943 | - ((change_point[i]->addr == change_point[i-1]->addr) && | |
1944 | - (change_point[i]->addr == change_point[i]->pbios->addr) && | |
1945 | - (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) | |
1946 | - ) | |
1947 | - { | |
1948 | + for (i = 1; i < chg_nr; i++) { | |
1949 | + unsigned long long curaddr, lastaddr; | |
1950 | + unsigned long long curpbaddr, lastpbaddr; | |
1951 | + | |
1952 | + curaddr = change_point[i]->addr; | |
1953 | + lastaddr = change_point[i - 1]->addr; | |
1954 | + curpbaddr = change_point[i]->pbios->addr; | |
1955 | + lastpbaddr = change_point[i - 1]->pbios->addr; | |
1956 | + | |
1957 | + /* | |
1958 | + * swap entries, when: | |
1959 | + * | |
1960 | + * curaddr > lastaddr or | |
1961 | + * curaddr == lastaddr and curaddr == curpbaddr and | |
1962 | + * lastaddr != lastpbaddr | |
1963 | + */ | |
1964 | + if (curaddr < lastaddr || | |
1965 | + (curaddr == lastaddr && curaddr == curpbaddr && | |
1966 | + lastaddr != lastpbaddr)) { | |
1967 | change_tmp = change_point[i]; | |
1968 | change_point[i] = change_point[i-1]; | |
1969 | change_point[i-1] = change_tmp; | |
1970 | - still_changing=1; | |
1971 | + still_changing = 1; | |
1972 | } | |
1973 | } | |
1974 | } | |
1975 | ||
1976 | /* create a new bios memory map, removing overlaps */ | |
1977 | - overlap_entries=0; /* number of entries in the overlap table */ | |
1978 | - new_bios_entry=0; /* index for creating new bios map entries */ | |
1979 | + overlap_entries = 0; /* number of entries in the overlap table */ | |
1980 | + new_bios_entry = 0; /* index for creating new bios map entries */ | |
1981 | last_type = 0; /* start with undefined memory type */ | |
1982 | last_addr = 0; /* start with 0 as last starting address */ | |
1983 | + | |
1984 | /* loop through change-points, determining affect on the new bios map */ | |
1985 | - for (chgidx=0; chgidx < chg_nr; chgidx++) | |
1986 | - { | |
1987 | + for (chgidx = 0; chgidx < chg_nr; chgidx++) { | |
1988 | /* keep track of all overlapping bios entries */ | |
1989 | - if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) | |
1990 | - { | |
1991 | - /* add map entry to overlap list (> 1 entry implies an overlap) */ | |
1992 | - overlap_list[overlap_entries++]=change_point[chgidx]->pbios; | |
1993 | - } | |
1994 | - else | |
1995 | - { | |
1996 | - /* remove entry from list (order independent, so swap with last) */ | |
1997 | - for (i=0; i<overlap_entries; i++) | |
1998 | - { | |
1999 | - if (overlap_list[i] == change_point[chgidx]->pbios) | |
2000 | - overlap_list[i] = overlap_list[overlap_entries-1]; | |
2001 | + if (change_point[chgidx]->addr == | |
2002 | + change_point[chgidx]->pbios->addr) { | |
2003 | + /* | |
2004 | + * add map entry to overlap list (> 1 entry | |
2005 | + * implies an overlap) | |
2006 | + */ | |
2007 | + overlap_list[overlap_entries++] = | |
2008 | + change_point[chgidx]->pbios; | |
2009 | + } else { | |
2010 | + /* | |
2011 | + * remove entry from list (order independent, | |
2012 | + * so swap with last) | |
2013 | + */ | |
2014 | + for (i = 0; i < overlap_entries; i++) { | |
2015 | + if (overlap_list[i] == | |
2016 | + change_point[chgidx]->pbios) | |
2017 | + overlap_list[i] = | |
2018 | + overlap_list[overlap_entries-1]; | |
2019 | } | |
2020 | overlap_entries--; | |
2021 | } | |
2022 | - /* if there are overlapping entries, decide which "type" to use */ | |
2023 | - /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ | |
2024 | + /* | |
2025 | + * if there are overlapping entries, decide which | |
2026 | + * "type" to use (larger value takes precedence -- | |
2027 | + * 1=usable, 2,3,4,4+=unusable) | |
2028 | + */ | |
2029 | current_type = 0; | |
2030 | - for (i=0; i<overlap_entries; i++) | |
2031 | + for (i = 0; i < overlap_entries; i++) | |
2032 | if (overlap_list[i]->type > current_type) | |
2033 | current_type = overlap_list[i]->type; | |
2034 | - /* continue building up new bios map based on this information */ | |
2035 | + /* | |
2036 | + * continue building up new bios map based on this | |
2037 | + * information | |
2038 | + */ | |
2039 | if (current_type != last_type) { | |
2040 | if (last_type != 0) { | |
2041 | new_bios[new_bios_entry].size = | |
2042 | change_point[chgidx]->addr - last_addr; | |
2043 | - /* move forward only if the new size was non-zero */ | |
2044 | + /* | |
2045 | + * move forward only if the new size | |
2046 | + * was non-zero | |
2047 | + */ | |
2048 | if (new_bios[new_bios_entry].size != 0) | |
2049 | + /* | |
2050 | + * no more space left for new | |
2051 | + * bios entries ? | |
2052 | + */ | |
2053 | if (++new_bios_entry >= E820MAX) | |
2054 | - break; /* no more space left for new bios entries */ | |
2055 | + break; | |
2056 | } | |
2057 | if (current_type != 0) { | |
2058 | - new_bios[new_bios_entry].addr = change_point[chgidx]->addr; | |
2059 | + new_bios[new_bios_entry].addr = | |
2060 | + change_point[chgidx]->addr; | |
2061 | new_bios[new_bios_entry].type = current_type; | |
2062 | - last_addr=change_point[chgidx]->addr; | |
2063 | + last_addr = change_point[chgidx]->addr; | |
2064 | } | |
2065 | last_type = current_type; | |
2066 | } | |
2067 | } | |
2068 | - new_nr = new_bios_entry; /* retain count for new bios entries */ | |
2069 | + /* retain count for new bios entries */ | |
2070 | + new_nr = new_bios_entry; | |
2071 | ||
2072 | /* copy new bios mapping into original location */ | |
2073 | - memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); | |
2074 | + memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); | |
2075 | *pnr_map = new_nr; | |
2076 | ||
2077 | return 0; | |
2078 | @@ -601,7 +656,7 @@ static int __init sanitize_e820_map(stru | |
2079 | * will have given us a memory map that we can use to properly | |
2080 | * set up memory. If we aren't, we'll fake a memory map. | |
2081 | */ | |
2082 | -static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) | |
2083 | +static int __init copy_e820_map(struct e820entry *biosmap, int nr_map) | |
2084 | { | |
2085 | #ifndef CONFIG_XEN | |
2086 | /* Only one memory region (or negative)? Ignore it */ | |
2087 | @@ -622,7 +677,7 @@ static int __init copy_e820_map(struct e | |
2088 | return -1; | |
2089 | ||
2090 | add_memory_region(start, size, type); | |
2091 | - } while (biosmap++,--nr_map); | |
2092 | + } while (biosmap++, --nr_map); | |
2093 | ||
2094 | #ifdef CONFIG_XEN | |
2095 | if (is_initial_xendomain()) { | |
2096 | @@ -641,15 +696,17 @@ static int __init copy_e820_map(struct e | |
2097 | return 0; | |
2098 | } | |
2099 | ||
2100 | -void early_panic(char *msg) | |
2101 | +static void early_panic(char *msg) | |
2102 | { | |
2103 | early_printk(msg); | |
2104 | panic(msg); | |
2105 | } | |
2106 | ||
2107 | -#ifndef CONFIG_XEN | |
2108 | -void __init setup_memory_region(void) | |
2109 | +/* We're not void only for x86 32-bit compat */ | |
2110 | +char * __init machine_specific_memory_setup(void) | |
2111 | { | |
2112 | +#ifndef CONFIG_XEN | |
2113 | + char *who = "BIOS-e820"; | |
2114 | /* | |
2115 | * Try to copy the BIOS-supplied E820-map. | |
2116 | * | |
2117 | @@ -659,14 +716,8 @@ void __init setup_memory_region(void) | |
2118 | sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); | |
2119 | if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) | |
2120 | early_panic("Cannot find a valid memory map"); | |
2121 | - printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | |
2122 | - e820_print_map("BIOS-e820"); | |
2123 | -} | |
2124 | - | |
2125 | #else /* CONFIG_XEN */ | |
2126 | - | |
2127 | -void __init setup_memory_region(void) | |
2128 | -{ | |
2129 | + char *who = "Xen"; | |
2130 | int rc; | |
2131 | struct xen_memory_map memmap; | |
2132 | /* | |
2133 | @@ -694,11 +745,13 @@ void __init setup_memory_region(void) | |
2134 | ||
2135 | if (copy_e820_map(map, (char)memmap.nr_entries) < 0) | |
2136 | early_panic("Cannot find a valid memory map"); | |
2137 | - | |
2138 | +#endif | |
2139 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | |
2140 | - e820_print_map("Xen"); | |
2141 | + e820_print_map(who); | |
2142 | + | |
2143 | + /* In case someone cares... */ | |
2144 | + return who; | |
2145 | } | |
2146 | -#endif | |
2147 | ||
2148 | static int __init parse_memopt(char *p) | |
2149 | { | |
2150 | @@ -709,7 +762,7 @@ static int __init parse_memopt(char *p) | |
2151 | if (!p) | |
2152 | return -EINVAL; | |
2153 | end_user_pfn = memparse(p, &p); | |
2154 | - end_user_pfn >>= PAGE_SHIFT; | |
2155 | + end_user_pfn >>= PAGE_SHIFT; | |
2156 | ||
2157 | end = end_user_pfn<<PAGE_SHIFT; | |
2158 | i = e820.nr_map-1; | |
2159 | @@ -727,7 +780,7 @@ static int __init parse_memopt(char *p) | |
2160 | } | |
2161 | ||
2162 | return 0; | |
2163 | -} | |
2164 | +} | |
2165 | early_param("mem", parse_memopt); | |
2166 | ||
2167 | static int userdef __initdata; | |
2168 | @@ -739,9 +792,9 @@ static int __init parse_memmap_opt(char | |
2169 | ||
2170 | if (!strcmp(p, "exactmap")) { | |
2171 | #ifdef CONFIG_CRASH_DUMP | |
2172 | - /* If we are doing a crash dump, we | |
2173 | - * still need to know the real mem | |
2174 | - * size before original memory map is | |
2175 | + /* | |
2176 | + * If we are doing a crash dump, we still need to know | |
2177 | + * the real mem size before original memory map is | |
2178 | * reset. | |
2179 | */ | |
2180 | e820_register_active_regions(0, 0, -1UL); | |
2181 | @@ -758,6 +811,8 @@ static int __init parse_memmap_opt(char | |
2182 | mem_size = memparse(p, &p); | |
2183 | if (p == oldp) | |
2184 | return -EINVAL; | |
2185 | + | |
2186 | + userdef = 1; | |
2187 | if (*p == '@') { | |
2188 | start_at = memparse(p+1, &p); | |
2189 | add_memory_region(start_at, mem_size, E820_RAM); | |
2190 | @@ -777,11 +832,58 @@ early_param("memmap", parse_memmap_opt); | |
2191 | void __init finish_e820_parsing(void) | |
2192 | { | |
2193 | if (userdef) { | |
2194 | + char nr = e820.nr_map; | |
2195 | + | |
2196 | + if (sanitize_e820_map(e820.map, &nr) < 0) | |
2197 | + early_panic("Invalid user supplied memory map"); | |
2198 | + e820.nr_map = nr; | |
2199 | + | |
2200 | printk(KERN_INFO "user-defined physical RAM map:\n"); | |
2201 | e820_print_map("user"); | |
2202 | } | |
2203 | } | |
2204 | ||
2205 | +#ifndef CONFIG_XEN | |
2206 | +void __init update_memory_range(u64 start, u64 size, unsigned old_type, | |
2207 | + unsigned new_type) | |
2208 | +{ | |
2209 | + int i; | |
2210 | + | |
2211 | + BUG_ON(old_type == new_type); | |
2212 | + | |
2213 | + for (i = 0; i < e820.nr_map; i++) { | |
2214 | + struct e820entry *ei = &e820.map[i]; | |
2215 | + u64 final_start, final_end; | |
2216 | + if (ei->type != old_type) | |
2217 | + continue; | |
2218 | + /* totally covered? */ | |
2219 | + if (ei->addr >= start && ei->size <= size) { | |
2220 | + ei->type = new_type; | |
2221 | + continue; | |
2222 | + } | |
2223 | + /* partially covered */ | |
2224 | + final_start = max(start, ei->addr); | |
2225 | + final_end = min(start + size, ei->addr + ei->size); | |
2226 | + if (final_start >= final_end) | |
2227 | + continue; | |
2228 | + add_memory_region(final_start, final_end - final_start, | |
2229 | + new_type); | |
2230 | + } | |
2231 | +} | |
2232 | + | |
2233 | +void __init update_e820(void) | |
2234 | +{ | |
2235 | + u8 nr_map; | |
2236 | + | |
2237 | + nr_map = e820.nr_map; | |
2238 | + if (sanitize_e820_map(e820.map, &nr_map)) | |
2239 | + return; | |
2240 | + e820.nr_map = nr_map; | |
2241 | + printk(KERN_INFO "modified physical RAM map:\n"); | |
2242 | + e820_print_map("modified"); | |
2243 | +} | |
2244 | +#endif | |
2245 | + | |
2246 | unsigned long pci_mem_start = 0xaeedbabe; | |
2247 | EXPORT_SYMBOL(pci_mem_start); | |
2248 | ||
2249 | @@ -825,8 +927,10 @@ __init void e820_setup_gap(struct e820en | |
2250 | ||
2251 | if (!found) { | |
2252 | gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; | |
2253 | - printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit address range\n" | |
2254 | - KERN_ERR "PCI: Unassigned devices with 32bit resource registers may break!\n"); | |
2255 | + printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " | |
2256 | + "address range\n" | |
2257 | + KERN_ERR "PCI: Unassigned devices with 32bit resource " | |
2258 | + "registers may break!\n"); | |
2259 | } | |
2260 | ||
2261 | /* | |
2262 | @@ -839,8 +943,9 @@ __init void e820_setup_gap(struct e820en | |
2263 | /* Fun with two's complement */ | |
2264 | pci_mem_start = (gapstart + round) & -round; | |
2265 | ||
2266 | - printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", | |
2267 | - pci_mem_start, gapstart, gapsize); | |
2268 | + printk(KERN_INFO | |
2269 | + "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", | |
2270 | + pci_mem_start, gapstart, gapsize); | |
2271 | } | |
2272 | ||
2273 | int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) | |
00e5a55c BS |
2274 | --- sle11-2009-05-14.orig/arch/x86/kernel/early_printk-xen.c 2009-02-16 16:18:36.000000000 +0100 |
2275 | +++ sle11-2009-05-14/arch/x86/kernel/early_printk-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
2276 | @@ -222,7 +222,7 @@ static struct console simnow_console = { |
2277 | }; | |
2278 | ||
2279 | /* Direct interface for emergencies */ | |
2280 | -struct console *early_console = &early_vga_console; | |
2281 | +static struct console *early_console = &early_vga_console; | |
2282 | static int early_console_initialized = 0; | |
2283 | ||
2284 | void early_printk(const char *fmt, ...) | |
00e5a55c BS |
2285 | --- sle11-2009-05-14.orig/arch/x86/kernel/entry_32-xen.S 2009-05-14 11:18:18.000000000 +0200 |
2286 | +++ sle11-2009-05-14/arch/x86/kernel/entry_32-xen.S 2009-05-14 11:18:32.000000000 +0200 | |
cc90b958 BS |
2287 | @@ -59,7 +59,7 @@ |
2288 | * for paravirtualization. The following will never clobber any registers: | |
2289 | * INTERRUPT_RETURN (aka. "iret") | |
2290 | * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") | |
2291 | - * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). | |
2292 | + * ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit"). | |
2293 | * | |
2294 | * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must | |
2295 | * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). | |
2296 | @@ -282,16 +282,21 @@ END(resume_kernel) | |
2297 | #endif | |
2298 | CFI_ENDPROC | |
2299 | ||
2300 | + .macro test_tif ti_reg # system call tracing in operation / emulation | |
2301 | + /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ | |
2302 | + testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(\ti_reg) | |
2303 | + .endm | |
2304 | + | |
2305 | /* SYSENTER_RETURN points to after the "sysenter" instruction in | |
2306 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ | |
2307 | ||
2308 | # sysenter call handler stub | |
2309 | -ENTRY(sysenter_entry) | |
2310 | +ENTRY(ia32_sysenter_target) | |
2311 | CFI_STARTPROC simple | |
2312 | CFI_SIGNAL_FRAME | |
2313 | CFI_DEF_CFA esp, 0 | |
2314 | CFI_REGISTER esp, ebp | |
2315 | - movl SYSENTER_stack_esp0(%esp),%esp | |
2316 | + movl SYSENTER_stack_sp0(%esp),%esp | |
2317 | sysenter_past_esp: | |
2318 | /* | |
2319 | * No need to follow this irqs on/off section: the syscall | |
2320 | @@ -334,9 +339,7 @@ sysenter_past_esp: | |
2321 | CFI_ADJUST_CFA_OFFSET 4 | |
2322 | SAVE_ALL | |
2323 | GET_THREAD_INFO(%ebp) | |
2324 | - | |
2325 | - /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ | |
2326 | - testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) | |
2327 | + test_tif %ebp | |
2328 | jnz syscall_trace_entry | |
2329 | cmpl $(nr_syscalls), %eax | |
2330 | jae syscall_badsys | |
2331 | @@ -354,7 +357,7 @@ sysenter_past_esp: | |
2332 | xorl %ebp,%ebp | |
2333 | TRACE_IRQS_ON | |
2334 | 1: mov PT_FS(%esp), %fs | |
2335 | - ENABLE_INTERRUPTS_SYSEXIT | |
2336 | + ENABLE_INTERRUPTS_SYSCALL_RET | |
2337 | CFI_ENDPROC | |
2338 | .pushsection .fixup,"ax" | |
2339 | 2: movl $0,PT_FS(%esp) | |
2340 | @@ -363,10 +366,10 @@ sysenter_past_esp: | |
2341 | .align 4 | |
2342 | .long 1b,2b | |
2343 | .popsection | |
2344 | -ENDPROC(sysenter_entry) | |
2345 | +ENDPROC(ia32_sysenter_target) | |
2346 | ||
2347 | # pv sysenter call handler stub | |
2348 | -ENTRY(sysenter_entry_pv) | |
2349 | +ENTRY(ia32pv_sysenter_target) | |
2350 | RING0_INT_FRAME | |
2351 | movl $__USER_DS,16(%esp) | |
2352 | movl %ebp,12(%esp) | |
2353 | @@ -389,7 +392,7 @@ ENTRY(sysenter_entry_pv) | |
2354 | .previous | |
2355 | /* fall through */ | |
2356 | CFI_ENDPROC | |
2357 | -ENDPROC(sysenter_entry_pv) | |
2358 | +ENDPROC(ia32pv_sysenter_target) | |
2359 | ||
2360 | # system call handler stub | |
2361 | ENTRY(system_call) | |
2362 | @@ -398,9 +401,7 @@ ENTRY(system_call) | |
2363 | CFI_ADJUST_CFA_OFFSET 4 | |
2364 | SAVE_ALL | |
2365 | GET_THREAD_INFO(%ebp) | |
2366 | - # system call tracing in operation / emulation | |
2367 | - /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ | |
2368 | - testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) | |
2369 | + test_tif %ebp | |
2370 | jnz syscall_trace_entry | |
2371 | cmpl $(nr_syscalls), %eax | |
2372 | jae syscall_badsys | |
2373 | @@ -452,7 +453,8 @@ restore_nocheck_notrace: | |
2374 | RESTORE_REGS | |
2375 | addl $4, %esp # skip orig_eax/error_code | |
2376 | CFI_ADJUST_CFA_OFFSET -4 | |
2377 | -1: INTERRUPT_RETURN | |
2378 | +irq_return: | |
2379 | + INTERRUPT_RETURN | |
2380 | .section .fixup,"ax" | |
2381 | iret_exc: | |
2382 | pushl $0 # no error code | |
2383 | @@ -461,7 +463,7 @@ iret_exc: | |
2384 | .previous | |
2385 | .section __ex_table,"a" | |
2386 | .align 4 | |
2387 | - .long 1b,iret_exc | |
2388 | + .long irq_return,iret_exc | |
2389 | .previous | |
2390 | ||
2391 | CFI_RESTORE_STATE | |
2392 | @@ -657,7 +659,7 @@ END(syscall_badsys) | |
2393 | * Build the entry stubs and pointer table with | |
2394 | * some assembler magic. | |
2395 | */ | |
2396 | -.data | |
2397 | +.section .rodata,"a" | |
2398 | ENTRY(interrupt) | |
2399 | .text | |
2400 | ||
00e5a55c | 2401 | @@ -963,7 +965,7 @@ END(device_not_available) |
cc90b958 BS |
2402 | * that sets up the real kernel stack. Check here, since we can't |
2403 | * allow the wrong stack to be used. | |
2404 | * | |
2405 | - * "SYSENTER_stack_esp0+12" is because the NMI/debug handler will have | |
2406 | + * "SYSENTER_stack_sp0+12" is because the NMI/debug handler will have | |
2407 | * already pushed 3 words if it hits on the sysenter instruction: | |
2408 | * eflags, cs and eip. | |
2409 | * | |
00e5a55c | 2410 | @@ -975,7 +977,7 @@ END(device_not_available) |
cc90b958 BS |
2411 | cmpw $__KERNEL_CS,4(%esp); \ |
2412 | jne ok; \ | |
2413 | label: \ | |
2414 | - movl SYSENTER_stack_esp0+offset(%esp),%esp; \ | |
2415 | + movl SYSENTER_stack_sp0+offset(%esp),%esp; \ | |
2416 | CFI_DEF_CFA esp, 0; \ | |
2417 | CFI_UNDEFINED eip; \ | |
2418 | pushfl; \ | |
00e5a55c | 2419 | @@ -990,7 +992,7 @@ label: \ |
cc90b958 BS |
2420 | KPROBE_ENTRY(debug) |
2421 | RING0_INT_FRAME | |
2422 | #ifndef CONFIG_XEN | |
2423 | - cmpl $sysenter_entry,(%esp) | |
2424 | + cmpl $ia32_sysenter_target,(%esp) | |
2425 | jne debug_stack_correct | |
2426 | FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) | |
2427 | debug_stack_correct: | |
00e5a55c | 2428 | @@ -1023,7 +1025,7 @@ KPROBE_ENTRY(nmi) |
cc90b958 BS |
2429 | popl %eax |
2430 | CFI_ADJUST_CFA_OFFSET -4 | |
2431 | je nmi_espfix_stack | |
2432 | - cmpl $sysenter_entry,(%esp) | |
2433 | + cmpl $ia32_sysenter_target,(%esp) | |
2434 | je nmi_stack_fixup | |
2435 | pushl %eax | |
2436 | CFI_ADJUST_CFA_OFFSET 4 | |
00e5a55c | 2437 | @@ -1036,7 +1038,7 @@ KPROBE_ENTRY(nmi) |
cc90b958 BS |
2438 | popl %eax |
2439 | CFI_ADJUST_CFA_OFFSET -4 | |
2440 | jae nmi_stack_correct | |
2441 | - cmpl $sysenter_entry,12(%esp) | |
2442 | + cmpl $ia32_sysenter_target,12(%esp) | |
2443 | je nmi_debug_stack_check | |
2444 | nmi_stack_correct: | |
2445 | /* We have a RING0_INT_FRAME here */ | |
00e5a55c | 2446 | @@ -1089,12 +1091,8 @@ nmi_espfix_stack: |
cc90b958 BS |
2447 | RESTORE_REGS |
2448 | lss 12+4(%esp), %esp # back to espfix stack | |
2449 | CFI_ADJUST_CFA_OFFSET -24 | |
2450 | -1: INTERRUPT_RETURN | |
2451 | + jmp irq_return | |
2452 | CFI_ENDPROC | |
2453 | -.section __ex_table,"a" | |
2454 | - .align 4 | |
2455 | - .long 1b,iret_exc | |
2456 | -.previous | |
2457 | #else | |
2458 | KPROBE_ENTRY(nmi) | |
2459 | RING0_INT_FRAME | |
00e5a55c | 2460 | @@ -1112,17 +1110,17 @@ KPROBE_END(nmi) |
cc90b958 BS |
2461 | |
2462 | #ifdef CONFIG_PARAVIRT | |
2463 | ENTRY(native_iret) | |
2464 | -1: iret | |
2465 | + iret | |
2466 | .section __ex_table,"a" | |
2467 | .align 4 | |
2468 | - .long 1b,iret_exc | |
2469 | + .long native_iret, iret_exc | |
2470 | .previous | |
2471 | END(native_iret) | |
2472 | ||
2473 | -ENTRY(native_irq_enable_sysexit) | |
2474 | +ENTRY(native_irq_enable_syscall_ret) | |
2475 | sti | |
2476 | sysexit | |
2477 | -END(native_irq_enable_sysexit) | |
2478 | +END(native_irq_enable_syscall_ret) | |
2479 | #endif | |
2480 | ||
2481 | KPROBE_ENTRY(int3) | |
00e5a55c | 2482 | @@ -1271,7 +1269,144 @@ ENTRY(kernel_thread_helper) |
cc90b958 BS |
2483 | CFI_ENDPROC |
2484 | ENDPROC(kernel_thread_helper) | |
2485 | ||
2486 | +#include <asm/alternative-asm.h> | |
2487 | + | |
2488 | + # pv syscall call handler stub | |
2489 | +ENTRY(ia32pv_cstar_target) | |
2490 | + RING0_INT_FRAME | |
2491 | + movl $__USER_DS,16(%esp) | |
2492 | + movl %ebp,%ecx | |
2493 | + movl $__USER_CS,4(%esp) | |
2494 | + movl 12(%esp),%ebp | |
2495 | + pushl %eax # save orig_eax | |
2496 | + CFI_ADJUST_CFA_OFFSET 4 | |
2497 | +/* | |
2498 | + * Load the potential sixth argument from user stack. | |
2499 | + * Careful about security. | |
2500 | + */ | |
2501 | + cmpl $__PAGE_OFFSET-4,%ebp | |
2502 | + CFI_REMEMBER_STATE | |
2503 | + ja cstar_fault | |
2504 | +1: movl (%ebp),%ebp | |
2505 | +.section __ex_table,"a" | |
2506 | + .align 4 | |
2507 | + .long 1b,cstar_fault | |
2508 | +.previous | |
2509 | + SAVE_ALL | |
2510 | + GET_THREAD_INFO(%ebp) | |
2511 | + test_tif %ebp | |
2512 | + jnz cstar_trace_entry | |
2513 | + cmpl $nr_syscalls,%eax | |
2514 | + jae cstar_badsys | |
2515 | +.Lcstar_call: | |
2516 | + btl %eax,cstar_special | |
2517 | + jc .Lcstar_special | |
2518 | + call *cstar_call_table(,%eax,4) | |
2519 | + movl %eax,PT_EAX(%esp) # store the return value | |
2520 | +.Lcstar_exit: | |
2521 | + movl PT_ECX(%esp),%ecx | |
2522 | + movl %ecx,PT_EBP(%esp) # put user EBP back in place | |
2523 | + jmp syscall_exit | |
2524 | +.Lcstar_special: | |
2525 | + movl PT_ECX(%esp),%ecx | |
2526 | + movl %ecx,PT_EBP(%esp) # put user EBP back in place | |
2527 | + jmp syscall_call | |
2528 | +cstar_set_tif: | |
2529 | + movl $cstar_clear_tif,(%esp) # replace return address | |
2530 | + LOCK_PREFIX | |
2531 | + orl $_TIF_CSTAR,TI_flags(%ebp) | |
2532 | + jmp *sys_call_table(,%eax,4) | |
2533 | +cstar_clear_tif: | |
2534 | + movl %eax,PT_EAX(%esp) # store the return value | |
2535 | + LOCK_PREFIX | |
2536 | + andl $~_TIF_CSTAR,TI_flags(%ebp) | |
2537 | + jmp .Lcstar_exit | |
2538 | +cstar_trace_entry: | |
2539 | + movl $-ENOSYS,PT_EAX(%esp) | |
2540 | + cmpl $nr_syscalls,%eax | |
2541 | + jae 1f | |
2542 | + btl %eax,cstar_special | |
2543 | + jc .Lcstar_trace_special | |
2544 | +1: movl %esp,%eax | |
2545 | + xorl %edx,%edx | |
2546 | + LOCK_PREFIX | |
2547 | + orl $_TIF_CSTAR,TI_flags(%ebp) | |
2548 | + call do_syscall_trace | |
2549 | + LOCK_PREFIX | |
2550 | + andl $~_TIF_CSTAR,TI_flags(%ebp) | |
2551 | + testl %eax,%eax | |
2552 | + jne .Lcstar_resume # ret != 0 -> running under PTRACE_SYSEMU, | |
2553 | + # so must skip actual syscall | |
2554 | + movl PT_ORIG_EAX(%esp),%eax | |
2555 | + cmpl $nr_syscalls,%eax | |
2556 | + jb .Lcstar_call | |
2557 | + jmp .Lcstar_exit | |
2558 | +.Lcstar_trace_special: | |
2559 | + movl PT_ECX(%esp),%ecx | |
2560 | + movl %esp,%eax | |
2561 | + xorl %edx,%edx | |
2562 | + movl %ecx,PT_EBP(%esp) # put user EBP back in place | |
2563 | + call do_syscall_trace | |
2564 | + testl %eax,%eax | |
2565 | + jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, | |
2566 | + # so must skip actual syscall | |
2567 | + movl PT_ORIG_EAX(%esp),%eax | |
2568 | + cmpl $nr_syscalls,%eax | |
2569 | + jb syscall_call | |
2570 | + jmp syscall_exit | |
2571 | +cstar_badsys: | |
2572 | + movl $-ENOSYS,PT_EAX(%esp) | |
2573 | +.Lcstar_resume: | |
2574 | + movl PT_ECX(%esp),%ecx | |
2575 | + movl %ecx,PT_EBP(%esp) # put user EBP back in place | |
2576 | + jmp resume_userspace | |
2577 | + CFI_RESTORE_STATE | |
2578 | +cstar_fault: | |
2579 | + movl $-EFAULT,%eax | |
2580 | + SAVE_ALL | |
2581 | + GET_THREAD_INFO(%ebp) | |
2582 | + jmp .Lcstar_resume | |
2583 | + CFI_ENDPROC | |
2584 | +ENDPROC(ia32pv_cstar_target) | |
2585 | + | |
2586 | +ENTRY(cstar_ret_from_fork) | |
2587 | + CFI_STARTPROC | |
2588 | + movl PT_ECX(%esp),%ecx | |
2589 | + GET_THREAD_INFO(%ebp) | |
2590 | + movl %ecx,PT_EBP(%esp) # put user EBP back in place | |
2591 | + LOCK_PREFIX | |
2592 | + andl $~_TIF_CSTAR,TI_flags(%ebp) | |
2593 | + jmp ret_from_fork | |
2594 | + CFI_ENDPROC | |
2595 | +END(ret_from_fork) | |
2596 | + | |
2597 | .section .rodata,"a" | |
2598 | #include "syscall_table_32.S" | |
2599 | ||
2600 | syscall_table_size=(.-sys_call_table) | |
2601 | + | |
2602 | +#include <asm/unistd.h> | |
2603 | +cstar_special: | |
2604 | +nr=0 | |
2605 | +mask=0 | |
2606 | +.rept nr_syscalls+31 | |
2607 | + .irp n, __NR_sigreturn, __NR_rt_sigreturn | |
2608 | + .if nr == \n | |
2609 | + mask = mask | (1 << (\n & 31)) | |
2610 | + .endif | |
2611 | + .endr | |
2612 | + nr = nr + 1 | |
2613 | + .if (nr & 31) == 0 | |
2614 | + .long mask | |
2615 | + mask = 0 | |
2616 | + .endif | |
2617 | +.endr | |
2618 | +#define sys_call_table cstar_call_table | |
2619 | +#define sys_fork cstar_set_tif | |
2620 | +#define sys_clone cstar_set_tif | |
2621 | +#define sys_vfork cstar_set_tif | |
2622 | +#include "syscall_table_32.S" | |
2623 | +#undef sys_call_table | |
2624 | +#undef sys_fork | |
2625 | +#undef sys_clone | |
2626 | +#undef sys_vfork | |
00e5a55c BS |
2627 | --- sle11-2009-05-14.orig/arch/x86/kernel/entry_64-xen.S 2009-02-16 16:18:36.000000000 +0100 |
2628 | +++ sle11-2009-05-14/arch/x86/kernel/entry_64-xen.S 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
2629 | @@ -54,17 +54,22 @@ |
2630 | #include <asm/page.h> | |
2631 | #include <asm/irqflags.h> | |
2632 | #include <asm/errno.h> | |
2633 | -#include <xen/interface/arch-x86_64.h> | |
2634 | +#include <xen/interface/xen.h> | |
2635 | #include <xen/interface/features.h> | |
2636 | ||
2637 | -#include "xen_entry_64.S" | |
2638 | - | |
2639 | .code64 | |
2640 | ||
2641 | #ifndef CONFIG_PREEMPT | |
2642 | #define retint_kernel retint_restore_args | |
2643 | #endif | |
2644 | ||
2645 | +#ifdef CONFIG_PARAVIRT | |
2646 | +ENTRY(native_irq_enable_syscall_ret) | |
2647 | + movq %gs:pda_oldrsp,%rsp | |
2648 | + swapgs | |
2649 | + sysretq | |
2650 | +#endif /* CONFIG_PARAVIRT */ | |
2651 | + | |
2652 | ||
2653 | .macro TRACE_IRQS_IRETQ offset=ARGOFFSET | |
2654 | #ifdef CONFIG_TRACE_IRQFLAGS | |
2655 | @@ -277,7 +282,7 @@ ret_from_sys_call: | |
2656 | sysret_check: | |
2657 | LOCKDEP_SYS_EXIT | |
2658 | GET_THREAD_INFO(%rcx) | |
2659 | - XEN_BLOCK_EVENTS(%rsi) | |
2660 | + DISABLE_INTERRUPTS(CLBR_NONE) | |
2661 | TRACE_IRQS_OFF | |
2662 | movl threadinfo_flags(%rcx),%edx | |
2663 | andl %edi,%edx | |
2664 | @@ -287,7 +292,7 @@ sysret_check: | |
2665 | * sysretq will re-enable interrupts: | |
2666 | */ | |
2667 | TRACE_IRQS_ON | |
2668 | - XEN_UNBLOCK_EVENTS(%rsi) | |
2669 | + ENABLE_INTERRUPTS(CLBR_NONE) | |
2670 | RESTORE_ARGS 0,8,0 | |
2671 | HYPERVISOR_IRET VGCF_IN_SYSCALL | |
2672 | ||
2673 | @@ -298,7 +303,7 @@ sysret_careful: | |
2674 | bt $TIF_NEED_RESCHED,%edx | |
2675 | jnc sysret_signal | |
2676 | TRACE_IRQS_ON | |
2677 | - XEN_UNBLOCK_EVENTS(%rsi) | |
2678 | + ENABLE_INTERRUPTS(CLBR_NONE) | |
2679 | pushq %rdi | |
2680 | CFI_ADJUST_CFA_OFFSET 8 | |
2681 | call schedule | |
2682 | @@ -309,9 +314,8 @@ sysret_careful: | |
2683 | /* Handle a signal */ | |
2684 | sysret_signal: | |
2685 | TRACE_IRQS_ON | |
2686 | -/* sti */ | |
2687 | - XEN_UNBLOCK_EVENTS(%rsi) | |
2688 | - testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx | |
2689 | + ENABLE_INTERRUPTS(CLBR_NONE) | |
2690 | + testl $_TIF_DO_NOTIFY_MASK,%edx | |
2691 | jz 1f | |
2692 | ||
2693 | /* Really a signal */ | |
2694 | @@ -323,7 +327,7 @@ sysret_signal: | |
2695 | 1: movl $_TIF_NEED_RESCHED,%edi | |
2696 | /* Use IRET because user could have changed frame. This | |
2697 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ | |
2698 | - XEN_BLOCK_EVENTS(%rsi) | |
2699 | + DISABLE_INTERRUPTS(CLBR_NONE) | |
2700 | TRACE_IRQS_OFF | |
2701 | jmp int_with_check | |
2702 | ||
2703 | @@ -355,7 +359,7 @@ tracesys: | |
2704 | */ | |
2705 | .globl int_ret_from_sys_call | |
2706 | int_ret_from_sys_call: | |
2707 | - XEN_BLOCK_EVENTS(%rsi) | |
2708 | + DISABLE_INTERRUPTS(CLBR_NONE) | |
2709 | TRACE_IRQS_OFF | |
2710 | testb $3,CS-ARGOFFSET(%rsp) | |
2711 | jnz 1f | |
2712 | @@ -381,22 +385,20 @@ int_careful: | |
2713 | bt $TIF_NEED_RESCHED,%edx | |
2714 | jnc int_very_careful | |
2715 | TRACE_IRQS_ON | |
2716 | -/* sti */ | |
2717 | - XEN_UNBLOCK_EVENTS(%rsi) | |
2718 | + ENABLE_INTERRUPTS(CLBR_NONE) | |
2719 | pushq %rdi | |
2720 | CFI_ADJUST_CFA_OFFSET 8 | |
2721 | call schedule | |
2722 | popq %rdi | |
2723 | CFI_ADJUST_CFA_OFFSET -8 | |
2724 | - XEN_BLOCK_EVENTS(%rsi) | |
2725 | + DISABLE_INTERRUPTS(CLBR_NONE) | |
2726 | TRACE_IRQS_OFF | |
2727 | jmp int_with_check | |
2728 | ||
2729 | /* handle signals and tracing -- both require a full stack frame */ | |
2730 | int_very_careful: | |
2731 | TRACE_IRQS_ON | |
2732 | -/* sti */ | |
2733 | - XEN_UNBLOCK_EVENTS(%rsi) | |
2734 | + ENABLE_INTERRUPTS(CLBR_NONE) | |
2735 | SAVE_REST | |
2736 | /* Check for syscall exit trace */ | |
2737 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx | |
2738 | @@ -411,7 +413,7 @@ int_very_careful: | |
2739 | jmp int_restore_rest | |
2740 | ||
2741 | int_signal: | |
2742 | - testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx | |
2743 | + testl $_TIF_DO_NOTIFY_MASK,%edx | |
2744 | jz 1f | |
2745 | movq %rsp,%rdi # &ptregs -> arg1 | |
2746 | xorl %esi,%esi # oldset -> arg2 | |
2747 | @@ -419,7 +421,7 @@ int_signal: | |
2748 | 1: movl $_TIF_NEED_RESCHED,%edi | |
2749 | int_restore_rest: | |
2750 | RESTORE_REST | |
2751 | - XEN_BLOCK_EVENTS(%rsi) | |
2752 | + DISABLE_INTERRUPTS(CLBR_NONE) | |
2753 | TRACE_IRQS_OFF | |
2754 | jmp int_with_check | |
2755 | CFI_ENDPROC | |
2756 | @@ -474,6 +476,7 @@ ENTRY(stub_execve) | |
2757 | CFI_REGISTER rip, r11 | |
2758 | SAVE_REST | |
2759 | FIXUP_TOP_OF_STACK %r11 | |
2760 | + movq %rsp, %rcx | |
2761 | call sys_execve | |
2762 | RESTORE_TOP_OF_STACK %r11 | |
2763 | movq %rax,RAX(%rsp) | |
2764 | @@ -526,11 +529,10 @@ retint_check: | |
2765 | retint_restore_args: /* return to kernel space */ | |
2766 | movl EFLAGS-REST_SKIP(%rsp), %eax | |
2767 | shr $9, %eax # EAX[0] == IRET_EFLAGS.IF | |
2768 | - XEN_GET_VCPU_INFO(%rsi) | |
2769 | + GET_VCPU_INFO | |
2770 | andb evtchn_upcall_mask(%rsi),%al | |
2771 | andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask | |
2772 | jnz restore_all_enable_events # != 0 => enable event delivery | |
2773 | - XEN_PUT_VCPU_INFO(%rsi) | |
2774 | ||
2775 | RESTORE_ARGS 0,8,0 | |
2776 | HYPERVISOR_IRET 0 | |
2777 | @@ -541,31 +543,29 @@ retint_careful: | |
2778 | bt $TIF_NEED_RESCHED,%edx | |
2779 | jnc retint_signal | |
2780 | TRACE_IRQS_ON | |
2781 | - XEN_UNBLOCK_EVENTS(%rsi) | |
2782 | -/* sti */ | |
2783 | + ENABLE_INTERRUPTS(CLBR_NONE) | |
2784 | pushq %rdi | |
2785 | CFI_ADJUST_CFA_OFFSET 8 | |
2786 | call schedule | |
2787 | popq %rdi | |
2788 | CFI_ADJUST_CFA_OFFSET -8 | |
2789 | GET_THREAD_INFO(%rcx) | |
2790 | - XEN_BLOCK_EVENTS(%rsi) | |
2791 | -/* cli */ | |
2792 | + DISABLE_INTERRUPTS(CLBR_NONE) | |
2793 | TRACE_IRQS_OFF | |
2794 | jmp retint_check | |
2795 | ||
2796 | retint_signal: | |
2797 | - testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx | |
2798 | + testl $_TIF_DO_NOTIFY_MASK,%edx | |
2799 | jz retint_restore_args | |
2800 | TRACE_IRQS_ON | |
2801 | - XEN_UNBLOCK_EVENTS(%rsi) | |
2802 | + ENABLE_INTERRUPTS(CLBR_NONE) | |
2803 | SAVE_REST | |
2804 | movq $-1,ORIG_RAX(%rsp) | |
2805 | xorl %esi,%esi # oldset | |
2806 | movq %rsp,%rdi # &pt_regs | |
2807 | call do_notify_resume | |
2808 | RESTORE_REST | |
2809 | - XEN_BLOCK_EVENTS(%rsi) | |
2810 | + DISABLE_INTERRUPTS(CLBR_NONE) | |
2811 | TRACE_IRQS_OFF | |
2812 | movl $_TIF_NEED_RESCHED,%edi | |
2813 | GET_THREAD_INFO(%rcx) | |
2814 | @@ -702,7 +702,7 @@ END(spurious_interrupt) | |
2815 | rdmsr | |
2816 | testl %edx,%edx | |
2817 | js 1f | |
2818 | - swapgs | |
2819 | + SWAPGS | |
2820 | xorl %ebx,%ebx | |
2821 | 1: | |
2822 | #endif | |
2823 | @@ -719,8 +719,7 @@ END(spurious_interrupt) | |
2824 | .if \ist | |
2825 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | |
2826 | .endif | |
2827 | -/* cli */ | |
2828 | - XEN_BLOCK_EVENTS(%rsi) | |
2829 | + DISABLE_INTERRUPTS(CLBR_NONE) | |
2830 | .if \irqtrace | |
2831 | TRACE_IRQS_OFF | |
2832 | .endif | |
2833 | @@ -749,10 +748,10 @@ paranoid_swapgs\trace: | |
2834 | .if \trace | |
2835 | TRACE_IRQS_IRETQ 0 | |
2836 | .endif | |
2837 | - swapgs | |
2838 | + SWAPGS_UNSAFE_STACK | |
2839 | paranoid_restore\trace: | |
2840 | RESTORE_ALL 8 | |
2841 | - iretq | |
2842 | + jmp irq_return | |
2843 | paranoid_userspace\trace: | |
2844 | GET_THREAD_INFO(%rcx) | |
2845 | movl threadinfo_flags(%rcx),%ebx | |
2846 | @@ -767,11 +766,11 @@ paranoid_userspace\trace: | |
2847 | .if \trace | |
2848 | TRACE_IRQS_ON | |
2849 | .endif | |
2850 | - sti | |
2851 | + ENABLE_INTERRUPTS(CLBR_NONE) | |
2852 | xorl %esi,%esi /* arg2: oldset */ | |
2853 | movq %rsp,%rdi /* arg1: &pt_regs */ | |
2854 | call do_notify_resume | |
2855 | - cli | |
2856 | + DISABLE_INTERRUPTS(CLBR_NONE) | |
2857 | .if \trace | |
2858 | TRACE_IRQS_OFF | |
2859 | .endif | |
2860 | @@ -780,9 +779,9 @@ paranoid_schedule\trace: | |
2861 | .if \trace | |
2862 | TRACE_IRQS_ON | |
2863 | .endif | |
2864 | - sti | |
2865 | + ENABLE_INTERRUPTS(CLBR_ANY) | |
2866 | call schedule | |
2867 | - cli | |
2868 | + DISABLE_INTERRUPTS(CLBR_ANY) | |
2869 | .if \trace | |
2870 | TRACE_IRQS_OFF | |
2871 | .endif | |
2872 | @@ -846,8 +845,7 @@ error_call_handler: | |
2873 | call *%rax | |
2874 | error_exit: | |
2875 | RESTORE_REST | |
2876 | -/* cli */ | |
2877 | - XEN_BLOCK_EVENTS(%rsi) | |
2878 | + DISABLE_INTERRUPTS(CLBR_NONE) | |
2879 | TRACE_IRQS_OFF | |
2880 | GET_THREAD_INFO(%rcx) | |
2881 | testb $3,CS-ARGOFFSET(%rsp) | |
2882 | @@ -875,7 +873,7 @@ error_kernelspace: | |
2883 | iret run with kernel gs again, so don't set the user space flag. | |
2884 | B stepping K8s sometimes report an truncated RIP for IRET | |
2885 | exceptions returning to compat mode. Check for these here too. */ | |
2886 | - leaq iret_label(%rip),%rbp | |
2887 | + leaq irq_return(%rip),%rbp | |
2888 | cmpq %rbp,RIP(%rsp) | |
2889 | je error_swapgs | |
2890 | movl %ebp,%ebp /* zero extend */ | |
2891 | @@ -930,19 +928,17 @@ END(do_hypervisor_callback) | |
2892 | restore_all_enable_events: | |
2893 | CFI_DEFAULT_STACK adj=1 | |
2894 | TRACE_IRQS_ON | |
2895 | - XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up... | |
2896 | + __ENABLE_INTERRUPTS | |
2897 | ||
2898 | scrit: /**** START OF CRITICAL REGION ****/ | |
2899 | - XEN_TEST_PENDING(%rsi) | |
2900 | + __TEST_PENDING | |
2901 | CFI_REMEMBER_STATE | |
2902 | jnz 14f # process more events if necessary... | |
2903 | - XEN_PUT_VCPU_INFO(%rsi) | |
2904 | RESTORE_ARGS 0,8,0 | |
2905 | HYPERVISOR_IRET 0 | |
2906 | ||
2907 | CFI_RESTORE_STATE | |
2908 | -14: XEN_LOCKED_BLOCK_EVENTS(%rsi) | |
2909 | - XEN_PUT_VCPU_INFO(%rsi) | |
2910 | +14: __DISABLE_INTERRUPTS | |
2911 | SAVE_REST | |
2912 | movq %rsp,%rdi # set the argument again | |
2913 | jmp 11b | |
2914 | @@ -1086,15 +1082,16 @@ ENDPROC(child_rip) | |
2915 | * rdi: name, rsi: argv, rdx: envp | |
2916 | * | |
2917 | * We want to fallback into: | |
2918 | - * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs) | |
2919 | + * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs) | |
2920 | * | |
2921 | * do_sys_execve asm fallback arguments: | |
2922 | - * rdi: name, rsi: argv, rdx: envp, fake frame on the stack | |
2923 | + * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack | |
2924 | */ | |
2925 | ENTRY(kernel_execve) | |
2926 | CFI_STARTPROC | |
2927 | FAKE_STACK_FRAME $0 | |
2928 | SAVE_ALL | |
2929 | + movq %rsp,%rcx | |
2930 | call sys_execve | |
2931 | movq %rax, RAX(%rsp) | |
2932 | RESTORE_REST | |
2933 | @@ -1144,7 +1141,7 @@ do_nmi_callback: | |
2934 | call do_nmi | |
2935 | orl $NMI_MASK,EFLAGS(%rsp) | |
2936 | RESTORE_REST | |
2937 | - XEN_BLOCK_EVENTS(%rsi) | |
2938 | + DISABLE_INTERRUPTS(CLBR_NONE) | |
2939 | TRACE_IRQS_OFF | |
2940 | GET_THREAD_INFO(%rcx) | |
2941 | jmp retint_restore_args | |
00e5a55c BS |
2942 | --- sle11-2009-05-14.orig/arch/x86/kernel/fixup.c 2009-05-14 10:56:29.000000000 +0200 |
2943 | +++ sle11-2009-05-14/arch/x86/kernel/fixup.c 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
2944 | @@ -36,7 +36,7 @@ |
2945 | ||
2946 | #define DP(_f, _args...) printk(KERN_ALERT " " _f "\n" , ## _args ) | |
2947 | ||
2948 | -fastcall void do_fixup_4gb_segment(struct pt_regs *regs, long error_code) | |
2949 | +void do_fixup_4gb_segment(struct pt_regs *regs, long error_code) | |
2950 | { | |
2951 | static unsigned long printed = 0; | |
2952 | char info[100]; | |
00e5a55c BS |
2953 | --- sle11-2009-05-14.orig/arch/x86/kernel/genapic_64-xen.c 2009-02-16 16:18:36.000000000 +0100 |
2954 | +++ sle11-2009-05-14/arch/x86/kernel/genapic_64-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
2955 | @@ -24,20 +24,13 @@ |
2956 | #include <acpi/acpi_bus.h> | |
2957 | #endif | |
2958 | ||
2959 | -/* | |
2960 | - * which logical CPU number maps to which CPU (physical APIC ID) | |
2961 | - * | |
2962 | - * The following static array is used during kernel startup | |
2963 | - * and the x86_cpu_to_apicid_ptr contains the address of the | |
2964 | - * array during this time. Is it zeroed when the per_cpu | |
2965 | - * data area is removed. | |
2966 | - */ | |
2967 | +/* which logical CPU number maps to which CPU (physical APIC ID) */ | |
2968 | #ifndef CONFIG_XEN | |
2969 | -u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata | |
2970 | +u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata | |
2971 | = { [0 ... NR_CPUS-1] = BAD_APICID }; | |
2972 | -void *x86_cpu_to_apicid_ptr; | |
2973 | +void *x86_cpu_to_apicid_early_ptr; | |
2974 | #endif | |
2975 | -DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID; | |
2976 | +DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; | |
2977 | EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); | |
2978 | ||
2979 | #ifndef CONFIG_XEN | |
00e5a55c BS |
2980 | --- sle11-2009-05-14.orig/arch/x86/kernel/head64-xen.c 2009-02-16 16:18:36.000000000 +0100 |
2981 | +++ sle11-2009-05-14/arch/x86/kernel/head64-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
2982 | @@ -16,6 +16,7 @@ |
2983 | #include <linux/kernel.h> | |
2984 | #include <linux/string.h> | |
2985 | #include <linux/percpu.h> | |
2986 | +#include <linux/start_kernel.h> | |
2987 | #include <linux/module.h> | |
2988 | ||
2989 | #include <asm/processor.h> | |
2990 | @@ -26,6 +27,8 @@ | |
2991 | #include <asm/pgtable.h> | |
2992 | #include <asm/tlbflush.h> | |
2993 | #include <asm/sections.h> | |
2994 | +#include <asm/kdebug.h> | |
2995 | +#include <asm/e820.h> | |
2996 | ||
2997 | unsigned long start_pfn; | |
2998 | ||
2999 | @@ -34,7 +37,7 @@ static void __init zap_identity_mappings | |
3000 | { | |
3001 | pgd_t *pgd = pgd_offset_k(0UL); | |
3002 | pgd_clear(pgd); | |
3003 | - __flush_tlb(); | |
3004 | + __flush_tlb_all(); | |
3005 | } | |
3006 | ||
3007 | /* Don't add a printk in there. printk relies on the PDA which is not initialized | |
3008 | @@ -72,6 +75,37 @@ EXPORT_SYMBOL(machine_to_phys_mapping); | |
3009 | unsigned int machine_to_phys_order; | |
3010 | EXPORT_SYMBOL(machine_to_phys_order); | |
3011 | ||
3012 | +#define EBDA_ADDR_POINTER 0x40E | |
3013 | + | |
3014 | +static __init void reserve_ebda(void) | |
3015 | +{ | |
3016 | +#ifndef CONFIG_XEN | |
3017 | + unsigned ebda_addr, ebda_size; | |
3018 | + | |
3019 | + /* | |
3020 | + * there is a real-mode segmented pointer pointing to the | |
3021 | + * 4K EBDA area at 0x40E | |
3022 | + */ | |
3023 | + ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER); | |
3024 | + ebda_addr <<= 4; | |
3025 | + | |
3026 | + if (!ebda_addr) | |
3027 | + return; | |
3028 | + | |
3029 | + ebda_size = *(unsigned short *)__va(ebda_addr); | |
3030 | + | |
3031 | + /* Round EBDA up to pages */ | |
3032 | + if (ebda_size == 0) | |
3033 | + ebda_size = 1; | |
3034 | + ebda_size <<= 10; | |
3035 | + ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE); | |
3036 | + if (ebda_size > 64*1024) | |
3037 | + ebda_size = 64*1024; | |
3038 | + | |
3039 | + reserve_early(ebda_addr, ebda_addr + ebda_size, "EBDA"); | |
3040 | +#endif | |
3041 | +} | |
3042 | + | |
3043 | void __init x86_64_start_kernel(char * real_mode_data) | |
3044 | { | |
3045 | struct xen_machphys_mapping mapping; | |
3046 | @@ -103,8 +137,16 @@ void __init x86_64_start_kernel(char * r | |
3047 | /* Make NULL pointers segfault */ | |
3048 | zap_identity_mappings(); | |
3049 | ||
3050 | - for (i = 0; i < IDT_ENTRIES; i++) | |
3051 | + /* Cleanup the over mapped high alias */ | |
3052 | + cleanup_highmap(); | |
3053 | + | |
3054 | + for (i = 0; i < IDT_ENTRIES; i++) { | |
3055 | +#ifdef CONFIG_EARLY_PRINTK | |
3056 | + set_intr_gate(i, &early_idt_handlers[i]); | |
3057 | +#else | |
3058 | set_intr_gate(i, early_idt_handler); | |
3059 | +#endif | |
3060 | + } | |
3061 | load_idt((const struct desc_ptr *)&idt_descr); | |
3062 | #endif | |
3063 | ||
3064 | @@ -115,8 +157,19 @@ void __init x86_64_start_kernel(char * r | |
3065 | ||
3066 | pda_init(0); | |
3067 | copy_bootdata(__va(real_mode_data)); | |
3068 | -#ifdef CONFIG_SMP | |
3069 | - cpu_set(0, cpu_online_map); | |
3070 | -#endif | |
3071 | + | |
3072 | + reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); | |
3073 | + | |
3074 | + reserve_early(round_up(__pa_symbol(&_end), PAGE_SIZE), | |
3075 | + start_pfn << PAGE_SHIFT, "Xen provided"); | |
3076 | + | |
3077 | + reserve_ebda(); | |
3078 | + | |
3079 | + /* | |
3080 | + * At this point everything still needed from the boot loader | |
3081 | + * or BIOS or kernel text should be early reserved or marked not | |
3082 | + * RAM in e820. All other memory is free game. | |
3083 | + */ | |
3084 | + | |
3085 | start_kernel(); | |
3086 | } | |
00e5a55c BS |
3087 | --- sle11-2009-05-14.orig/arch/x86/kernel/head_32-xen.S 2009-02-16 16:17:21.000000000 +0100 |
3088 | +++ sle11-2009-05-14/arch/x86/kernel/head_32-xen.S 2009-03-16 16:33:40.000000000 +0100 | |
3089 | @@ -3,6 +3,7 @@ | |
3090 | .text | |
3091 | #include <linux/elfnote.h> | |
3092 | #include <linux/threads.h> | |
3093 | +#include <linux/init.h> | |
3094 | #include <linux/linkage.h> | |
3095 | #include <asm/segment.h> | |
3096 | #include <asm/page.h> | |
3097 | @@ -88,7 +89,7 @@ ENTRY(_stext) | |
3098 | */ | |
3099 | .section ".bss.page_aligned","wa" | |
3100 | .align PAGE_SIZE_asm | |
3101 | -ENTRY(swapper_pg_pmd) | |
3102 | +ENTRY(swapper_pg_fixmap) | |
3103 | .fill 1024,4,0 | |
3104 | ENTRY(empty_zero_page) | |
3105 | .fill 4096,1,0 | |
3106 | --- sle11-2009-05-14.orig/arch/x86/kernel/init_task-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
3107 | +++ sle11-2009-05-14/arch/x86/kernel/init_task-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
3108 | @@ -19,7 +19,7 @@ static struct sighand_struct init_sighan |
3109 | #endif | |
3110 | struct mm_struct init_mm = INIT_MM(init_mm); | |
3111 | #undef swapper_pg_dir | |
3112 | -EXPORT_SYMBOL(init_mm); | |
3113 | +EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */ | |
3114 | ||
3115 | /* | |
3116 | * Initial thread structure. | |
00e5a55c BS |
3117 | --- sle11-2009-05-14.orig/arch/x86/kernel/io_apic_32-xen.c 2009-02-16 16:18:36.000000000 +0100 |
3118 | +++ sle11-2009-05-14/arch/x86/kernel/io_apic_32-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
3119 | @@ -35,6 +35,7 @@ |
3120 | #include <linux/htirq.h> | |
3121 | #include <linux/freezer.h> | |
3122 | #include <linux/kthread.h> | |
3123 | +#include <linux/jiffies.h> /* time_after() */ | |
3124 | ||
3125 | #include <asm/io.h> | |
3126 | #include <asm/smp.h> | |
3127 | @@ -48,8 +49,6 @@ | |
3128 | #include <mach_apic.h> | |
3129 | #include <mach_apicdef.h> | |
3130 | ||
3131 | -#include "io_ports.h" | |
3132 | - | |
3133 | #ifdef CONFIG_XEN | |
3134 | #include <xen/interface/xen.h> | |
3135 | #include <xen/interface/physdev.h> | |
3136 | @@ -400,7 +399,7 @@ static void set_ioapic_affinity_irq(unsi | |
3137 | # include <asm/processor.h> /* kernel_thread() */ | |
3138 | # include <linux/kernel_stat.h> /* kstat */ | |
3139 | # include <linux/slab.h> /* kmalloc() */ | |
3140 | -# include <linux/timer.h> /* time_after() */ | |
3141 | +# include <linux/timer.h> | |
3142 | ||
3143 | #define IRQBALANCE_CHECK_ARCH -999 | |
3144 | #define MAX_BALANCED_IRQ_INTERVAL (5*HZ) | |
3145 | @@ -777,7 +776,7 @@ late_initcall(balanced_irq_init); | |
3146 | #endif | |
3147 | ||
3148 | #ifndef CONFIG_SMP | |
3149 | -void fastcall send_IPI_self(int vector) | |
3150 | +void send_IPI_self(int vector) | |
3151 | { | |
3152 | #ifndef CONFIG_XEN | |
3153 | unsigned int cfg; | |
3154 | @@ -1959,7 +1958,7 @@ static int __init timer_irq_works(void) | |
3155 | * might have cached one ExtINT interrupt. Finally, at | |
3156 | * least one tick may be lost due to delays. | |
3157 | */ | |
3158 | - if (jiffies - t1 > 4) | |
3159 | + if (time_after(jiffies, t1 + 4)) | |
3160 | return 1; | |
3161 | ||
3162 | return 0; | |
3163 | @@ -2142,7 +2141,7 @@ static struct irq_chip lapic_chip __read | |
3164 | .eoi = ack_apic, | |
3165 | }; | |
3166 | ||
3167 | -static void setup_nmi (void) | |
3168 | +static void __init setup_nmi(void) | |
3169 | { | |
3170 | /* | |
3171 | * Dirty trick to enable the NMI watchdog ... | |
3172 | @@ -2155,7 +2154,7 @@ static void setup_nmi (void) | |
3173 | */ | |
3174 | apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); | |
3175 | ||
3176 | - on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1); | |
3177 | + enable_NMI_through_LVT0(); | |
3178 | ||
3179 | apic_printk(APIC_VERBOSE, " done.\n"); | |
3180 | } | |
3181 | @@ -2479,7 +2478,7 @@ static int ioapic_resume(struct sys_devi | |
3182 | } | |
3183 | ||
3184 | static struct sysdev_class ioapic_sysdev_class = { | |
3185 | - set_kset_name("ioapic"), | |
3186 | + .name = "ioapic", | |
3187 | .suspend = ioapic_suspend, | |
3188 | .resume = ioapic_resume, | |
3189 | }; | |
00e5a55c BS |
3190 | --- sle11-2009-05-14.orig/arch/x86/kernel/io_apic_64-xen.c 2009-02-16 16:18:36.000000000 +0100 |
3191 | +++ sle11-2009-05-14/arch/x86/kernel/io_apic_64-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
3192 | @@ -32,9 +32,11 @@ |
3193 | #include <linux/msi.h> | |
3194 | #include <linux/htirq.h> | |
3195 | #include <linux/dmar.h> | |
3196 | +#include <linux/jiffies.h> | |
3197 | #ifdef CONFIG_ACPI | |
3198 | #include <acpi/acpi_bus.h> | |
3199 | #endif | |
3200 | +#include <linux/bootmem.h> | |
3201 | ||
3202 | #include <asm/idle.h> | |
3203 | #include <asm/io.h> | |
3204 | @@ -1064,7 +1066,7 @@ void __apicdebuginit print_local_APIC(vo | |
3205 | v = apic_read(APIC_LVR); | |
3206 | printk(KERN_INFO "... APIC VERSION: %08x\n", v); | |
3207 | ver = GET_APIC_VERSION(v); | |
3208 | - maxlvt = get_maxlvt(); | |
3209 | + maxlvt = lapic_get_maxlvt(); | |
3210 | ||
3211 | v = apic_read(APIC_TASKPRI); | |
3212 | printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); | |
3213 | @@ -1165,7 +1167,7 @@ void __apicdebuginit print_PIC(void) | |
3214 | } | |
3215 | #endif /* !CONFIG_XEN */ | |
3216 | ||
3217 | -static void __init enable_IO_APIC(void) | |
3218 | +void __init enable_IO_APIC(void) | |
3219 | { | |
3220 | union IO_APIC_reg_01 reg_01; | |
3221 | #ifndef CONFIG_XEN | |
3222 | @@ -1299,7 +1301,7 @@ static int __init timer_irq_works(void) | |
3223 | */ | |
3224 | ||
3225 | /* jiffies wrap? */ | |
3226 | - if (jiffies - t1 > 4) | |
3227 | + if (time_after(jiffies, t1 + 4)) | |
3228 | return 1; | |
3229 | return 0; | |
3230 | } | |
3231 | @@ -1412,7 +1414,7 @@ static void irq_complete_move(unsigned i | |
3232 | if (likely(!cfg->move_in_progress)) | |
3233 | return; | |
3234 | ||
3235 | - vector = ~get_irq_regs()->orig_rax; | |
3236 | + vector = ~get_irq_regs()->orig_ax; | |
3237 | me = smp_processor_id(); | |
3238 | if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { | |
3239 | cpumask_t cleanup_mask; | |
3240 | @@ -1439,7 +1441,7 @@ static void ack_apic_level(unsigned int | |
3241 | int do_unmask_irq = 0; | |
3242 | ||
3243 | irq_complete_move(irq); | |
3244 | -#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) | |
3245 | +#ifdef CONFIG_GENERIC_PENDING_IRQ | |
3246 | /* If we are moving the irq we need to mask it */ | |
3247 | if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { | |
3248 | do_unmask_irq = 1; | |
3249 | @@ -1570,7 +1572,7 @@ static struct hw_interrupt_type lapic_ir | |
3250 | .end = end_lapic_irq, | |
3251 | }; | |
3252 | ||
3253 | -static void setup_nmi (void) | |
3254 | +static void __init setup_nmi(void) | |
3255 | { | |
3256 | /* | |
3257 | * Dirty trick to enable the NMI watchdog ... | |
3258 | @@ -1583,7 +1585,7 @@ static void setup_nmi (void) | |
3259 | */ | |
3260 | printk(KERN_INFO "activating NMI Watchdog ..."); | |
3261 | ||
3262 | - enable_NMI_through_LVT0(NULL); | |
3263 | + enable_NMI_through_LVT0(); | |
3264 | ||
3265 | printk(" done.\n"); | |
3266 | } | |
3267 | @@ -1659,7 +1661,7 @@ static inline void unlock_ExtINT_logic(v | |
3268 | * | |
3269 | * FIXME: really need to revamp this for modern platforms only. | |
3270 | */ | |
3271 | -static inline void check_timer(void) | |
3272 | +static inline void __init check_timer(void) | |
3273 | { | |
3274 | struct irq_cfg *cfg = irq_cfg + 0; | |
3275 | int apic1, pin1, apic2, pin2; | |
3276 | @@ -1863,7 +1865,7 @@ static int ioapic_resume(struct sys_devi | |
3277 | } | |
3278 | ||
3279 | static struct sysdev_class ioapic_sysdev_class = { | |
3280 | - set_kset_name("ioapic"), | |
3281 | + .name = "ioapic", | |
3282 | .suspend = ioapic_suspend, | |
3283 | .resume = ioapic_resume, | |
3284 | }; | |
3285 | @@ -2303,5 +2305,93 @@ void __init setup_ioapic_dest(void) | |
3286 | } | |
3287 | } | |
3288 | #endif | |
3289 | -#endif /* !CONFIG_XEN */ | |
3290 | ||
3291 | +#define IOAPIC_RESOURCE_NAME_SIZE 11 | |
3292 | + | |
3293 | +static struct resource *ioapic_resources; | |
3294 | + | |
3295 | +static struct resource * __init ioapic_setup_resources(void) | |
3296 | +{ | |
3297 | + unsigned long n; | |
3298 | + struct resource *res; | |
3299 | + char *mem; | |
3300 | + int i; | |
3301 | + | |
3302 | + if (nr_ioapics <= 0) | |
3303 | + return NULL; | |
3304 | + | |
3305 | + n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); | |
3306 | + n *= nr_ioapics; | |
3307 | + | |
3308 | + mem = alloc_bootmem(n); | |
3309 | + res = (void *)mem; | |
3310 | + | |
3311 | + if (mem != NULL) { | |
3312 | + memset(mem, 0, n); | |
3313 | + mem += sizeof(struct resource) * nr_ioapics; | |
3314 | + | |
3315 | + for (i = 0; i < nr_ioapics; i++) { | |
3316 | + res[i].name = mem; | |
3317 | + res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; | |
3318 | + sprintf(mem, "IOAPIC %u", i); | |
3319 | + mem += IOAPIC_RESOURCE_NAME_SIZE; | |
3320 | + } | |
3321 | + } | |
3322 | + | |
3323 | + ioapic_resources = res; | |
3324 | + | |
3325 | + return res; | |
3326 | +} | |
3327 | + | |
3328 | +void __init ioapic_init_mappings(void) | |
3329 | +{ | |
3330 | + unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; | |
3331 | + struct resource *ioapic_res; | |
3332 | + int i; | |
3333 | + | |
3334 | + ioapic_res = ioapic_setup_resources(); | |
3335 | + for (i = 0; i < nr_ioapics; i++) { | |
3336 | + if (smp_found_config) { | |
3337 | + ioapic_phys = mp_ioapics[i].mpc_apicaddr; | |
3338 | + } else { | |
3339 | + ioapic_phys = (unsigned long) | |
3340 | + alloc_bootmem_pages(PAGE_SIZE); | |
3341 | + ioapic_phys = __pa(ioapic_phys); | |
3342 | + } | |
3343 | + set_fixmap_nocache(idx, ioapic_phys); | |
3344 | + apic_printk(APIC_VERBOSE, | |
3345 | + "mapped IOAPIC to %016lx (%016lx)\n", | |
3346 | + __fix_to_virt(idx), ioapic_phys); | |
3347 | + idx++; | |
3348 | + | |
3349 | + if (ioapic_res != NULL) { | |
3350 | + ioapic_res->start = ioapic_phys; | |
3351 | + ioapic_res->end = ioapic_phys + (4 * 1024) - 1; | |
3352 | + ioapic_res++; | |
3353 | + } | |
3354 | + } | |
3355 | +} | |
3356 | + | |
3357 | +static int __init ioapic_insert_resources(void) | |
3358 | +{ | |
3359 | + int i; | |
3360 | + struct resource *r = ioapic_resources; | |
3361 | + | |
3362 | + if (!r) { | |
3363 | + printk(KERN_ERR | |
3364 | + "IO APIC resources could be not be allocated.\n"); | |
3365 | + return -1; | |
3366 | + } | |
3367 | + | |
3368 | + for (i = 0; i < nr_ioapics; i++) { | |
3369 | + insert_resource(&iomem_resource, r); | |
3370 | + r++; | |
3371 | + } | |
3372 | + | |
3373 | + return 0; | |
3374 | +} | |
3375 | + | |
3376 | +/* Insert the IO APIC resources after PCI initialization has occured to handle | |
3377 | + * IO APICS that are mapped in on a BAR in PCI space. */ | |
3378 | +late_initcall(ioapic_insert_resources); | |
3379 | +#endif /* !CONFIG_XEN */ | |
00e5a55c BS |
3380 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 |
3381 | +++ sle11-2009-05-14/arch/x86/kernel/ioport-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
3382 | @@ -0,0 +1,112 @@ | |
3383 | +/* | |
3384 | + * This contains the io-permission bitmap code - written by obz, with changes | |
3385 | + * by Linus. 32/64 bits code unification by Miguel Botón. | |
3386 | + */ | |
3387 | + | |
3388 | +#include <linux/sched.h> | |
3389 | +#include <linux/kernel.h> | |
3390 | +#include <linux/capability.h> | |
3391 | +#include <linux/errno.h> | |
3392 | +#include <linux/types.h> | |
3393 | +#include <linux/ioport.h> | |
3394 | +#include <linux/smp.h> | |
3395 | +#include <linux/stddef.h> | |
3396 | +#include <linux/slab.h> | |
3397 | +#include <linux/thread_info.h> | |
3398 | +#include <linux/syscalls.h> | |
3399 | +#include <xen/interface/physdev.h> | |
3400 | + | |
3401 | +/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ | |
3402 | +static void set_bitmap(unsigned long *bitmap, unsigned int base, | |
3403 | + unsigned int extent, int new_value) | |
3404 | +{ | |
3405 | + unsigned int i; | |
3406 | + | |
3407 | + for (i = base; i < base + extent; i++) { | |
3408 | + if (new_value) | |
3409 | + __set_bit(i, bitmap); | |
3410 | + else | |
3411 | + __clear_bit(i, bitmap); | |
3412 | + } | |
3413 | +} | |
3414 | + | |
3415 | +/* | |
3416 | + * this changes the io permissions bitmap in the current task. | |
3417 | + */ | |
3418 | +asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | |
3419 | +{ | |
3420 | + struct thread_struct * t = ¤t->thread; | |
3421 | + struct physdev_set_iobitmap set_iobitmap; | |
3422 | + | |
3423 | + if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) | |
3424 | + return -EINVAL; | |
3425 | + if (turn_on && !capable(CAP_SYS_RAWIO)) | |
3426 | + return -EPERM; | |
3427 | + | |
3428 | + /* | |
3429 | + * If it's the first ioperm() call in this thread's lifetime, set the | |
3430 | + * IO bitmap up. ioperm() is much less timing critical than clone(), | |
3431 | + * this is why we delay this operation until now: | |
3432 | + */ | |
3433 | + if (!t->io_bitmap_ptr) { | |
3434 | + unsigned long *bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); | |
3435 | + | |
3436 | + if (!bitmap) | |
3437 | + return -ENOMEM; | |
3438 | + | |
3439 | + memset(bitmap, 0xff, IO_BITMAP_BYTES); | |
3440 | + t->io_bitmap_ptr = bitmap; | |
3441 | + set_thread_flag(TIF_IO_BITMAP); | |
3442 | + | |
3443 | + set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap); | |
3444 | + set_iobitmap.nr_ports = IO_BITMAP_BITS; | |
3445 | + WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, | |
3446 | + &set_iobitmap)); | |
3447 | + } | |
3448 | + | |
3449 | + set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); | |
3450 | + | |
3451 | + return 0; | |
3452 | +} | |
3453 | + | |
3454 | +/* | |
3455 | + * sys_iopl has to be used when you want to access the IO ports | |
3456 | + * beyond the 0x3ff range: to get the full 65536 ports bitmapped | |
3457 | + * you'd need 8kB of bitmaps/process, which is a bit excessive. | |
3458 | + */ | |
3459 | +static int do_iopl(unsigned int level, struct thread_struct *t) | |
3460 | +{ | |
3461 | + unsigned int old = t->iopl >> 12; | |
3462 | + | |
3463 | + if (level > 3) | |
3464 | + return -EINVAL; | |
3465 | + /* Trying to gain more privileges? */ | |
3466 | + if (level > old) { | |
3467 | + if (!capable(CAP_SYS_RAWIO)) | |
3468 | + return -EPERM; | |
3469 | + } | |
3470 | + | |
3471 | + return 0; | |
3472 | +} | |
3473 | + | |
3474 | +#ifdef CONFIG_X86_32 | |
3475 | +asmlinkage long sys_iopl(unsigned long regsp) | |
3476 | +{ | |
3477 | + struct pt_regs *regs = (struct pt_regs *)®sp; | |
3478 | + unsigned int level = regs->bx; | |
3479 | +#else | |
3480 | +asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs) | |
3481 | +{ | |
3482 | +#endif | |
3483 | + struct thread_struct *t = ¤t->thread; | |
3484 | + int rc; | |
3485 | + | |
3486 | + rc = do_iopl(level, t); | |
3487 | + if (rc < 0) | |
3488 | + goto out; | |
3489 | + | |
3490 | + t->iopl = level << 12; | |
3491 | + set_iopl_mask(t->iopl); | |
3492 | +out: | |
3493 | + return rc; | |
3494 | +} | |
3495 | --- sle11-2009-05-14.orig/arch/x86/kernel/ioport_32-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
3496 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
3497 | @@ -1,121 +0,0 @@ | |
3498 | -/* | |
3499 | - * This contains the io-permission bitmap code - written by obz, with changes | |
3500 | - * by Linus. | |
3501 | - */ | |
3502 | - | |
3503 | -#include <linux/sched.h> | |
3504 | -#include <linux/kernel.h> | |
3505 | -#include <linux/capability.h> | |
3506 | -#include <linux/errno.h> | |
3507 | -#include <linux/types.h> | |
3508 | -#include <linux/ioport.h> | |
3509 | -#include <linux/smp.h> | |
3510 | -#include <linux/stddef.h> | |
3511 | -#include <linux/slab.h> | |
3512 | -#include <linux/thread_info.h> | |
3513 | -#include <linux/syscalls.h> | |
3514 | -#include <xen/interface/physdev.h> | |
3515 | - | |
3516 | -/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ | |
3517 | -static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) | |
3518 | -{ | |
3519 | - unsigned long mask; | |
3520 | - unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG); | |
3521 | - unsigned int low_index = base & (BITS_PER_LONG-1); | |
3522 | - int length = low_index + extent; | |
3523 | - | |
3524 | - if (low_index != 0) { | |
3525 | - mask = (~0UL << low_index); | |
3526 | - if (length < BITS_PER_LONG) | |
3527 | - mask &= ~(~0UL << length); | |
3528 | - if (new_value) | |
3529 | - *bitmap_base++ |= mask; | |
3530 | - else | |
3531 | - *bitmap_base++ &= ~mask; | |
3532 | - length -= BITS_PER_LONG; | |
3533 | - } | |
3534 | - | |
3535 | - mask = (new_value ? ~0UL : 0UL); | |
3536 | - while (length >= BITS_PER_LONG) { | |
3537 | - *bitmap_base++ = mask; | |
3538 | - length -= BITS_PER_LONG; | |
3539 | - } | |
3540 | - | |
3541 | - if (length > 0) { | |
3542 | - mask = ~(~0UL << length); | |
3543 | - if (new_value) | |
3544 | - *bitmap_base++ |= mask; | |
3545 | - else | |
3546 | - *bitmap_base++ &= ~mask; | |
3547 | - } | |
3548 | -} | |
3549 | - | |
3550 | - | |
3551 | -/* | |
3552 | - * this changes the io permissions bitmap in the current task. | |
3553 | - */ | |
3554 | -asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | |
3555 | -{ | |
3556 | - struct thread_struct * t = ¤t->thread; | |
3557 | - unsigned long *bitmap; | |
3558 | - struct physdev_set_iobitmap set_iobitmap; | |
3559 | - | |
3560 | - if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) | |
3561 | - return -EINVAL; | |
3562 | - if (turn_on && !capable(CAP_SYS_RAWIO)) | |
3563 | - return -EPERM; | |
3564 | - | |
3565 | - /* | |
3566 | - * If it's the first ioperm() call in this thread's lifetime, set the | |
3567 | - * IO bitmap up. ioperm() is much less timing critical than clone(), | |
3568 | - * this is why we delay this operation until now: | |
3569 | - */ | |
3570 | - if (!t->io_bitmap_ptr) { | |
3571 | - bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); | |
3572 | - if (!bitmap) | |
3573 | - return -ENOMEM; | |
3574 | - | |
3575 | - memset(bitmap, 0xff, IO_BITMAP_BYTES); | |
3576 | - t->io_bitmap_ptr = bitmap; | |
3577 | - set_thread_flag(TIF_IO_BITMAP); | |
3578 | - | |
3579 | - set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap); | |
3580 | - set_iobitmap.nr_ports = IO_BITMAP_BITS; | |
3581 | - WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, | |
3582 | - &set_iobitmap)); | |
3583 | - } | |
3584 | - | |
3585 | - set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); | |
3586 | - | |
3587 | - return 0; | |
3588 | -} | |
3589 | - | |
3590 | -/* | |
3591 | - * sys_iopl has to be used when you want to access the IO ports | |
3592 | - * beyond the 0x3ff range: to get the full 65536 ports bitmapped | |
cc90b958 BS |
3593 | - * you'd need 8kB of bitmaps/process, which is a bit excessive. |
3594 | - * | |
3595 | - * Here we just change the eflags value on the stack: we allow | |
3596 | - * only the super-user to do it. This depends on the stack-layout | |
3597 | - * on system-call entry - see also fork() and the signal handling | |
3598 | - * code. | |
3599 | - */ | |
3600 | - | |
3601 | -asmlinkage long sys_iopl(unsigned long unused) | |
3602 | -{ | |
3603 | - volatile struct pt_regs * regs = (struct pt_regs *) &unused; | |
3604 | - unsigned int level = regs->ebx; | |
3605 | - struct thread_struct *t = ¤t->thread; | |
3606 | - unsigned int old = (t->iopl >> 12) & 3; | |
3607 | - | |
3608 | - if (level > 3) | |
3609 | - return -EINVAL; | |
3610 | - /* Trying to gain more privileges? */ | |
3611 | - if (level > old) { | |
3612 | - if (!capable(CAP_SYS_RAWIO)) | |
3613 | - return -EPERM; | |
3614 | - } | |
3615 | - t->iopl = level << 12; | |
3616 | - set_iopl_mask(t->iopl); | |
3617 | - return 0; | |
3618 | -} | |
00e5a55c BS |
3619 | --- sle11-2009-05-14.orig/arch/x86/kernel/ioport_64-xen.c 2009-02-16 16:18:36.000000000 +0100 |
3620 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
cc90b958 BS |
3621 | @@ -1,99 +0,0 @@ |
3622 | -/* | |
3623 | - * This contains the io-permission bitmap code - written by obz, with changes | |
3624 | - * by Linus. | |
3625 | - */ | |
3626 | - | |
3627 | -#include <linux/sched.h> | |
3628 | -#include <linux/kernel.h> | |
3629 | -#include <linux/capability.h> | |
3630 | -#include <linux/errno.h> | |
3631 | -#include <linux/types.h> | |
3632 | -#include <linux/ioport.h> | |
3633 | -#include <linux/mm.h> | |
3634 | -#include <linux/smp.h> | |
3635 | -#include <linux/stddef.h> | |
3636 | -#include <linux/slab.h> | |
3637 | -#include <linux/thread_info.h> | |
3638 | -#include <linux/syscalls.h> | |
3639 | -#include <xen/interface/physdev.h> | |
3640 | - | |
3641 | -/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ | |
3642 | -static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) | |
3643 | -{ | |
3644 | - int i; | |
3645 | - | |
3646 | - if (new_value) | |
3647 | - for (i = base; i < base + extent; i++) | |
3648 | - __set_bit(i, bitmap); | |
3649 | - else | |
3650 | - for (i = base; i < base + extent; i++) | |
3651 | - clear_bit(i, bitmap); | |
3652 | -} | |
3653 | - | |
3654 | -/* | |
3655 | - * this changes the io permissions bitmap in the current task. | |
3656 | - */ | |
3657 | -asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | |
3658 | -{ | |
3659 | - struct thread_struct * t = ¤t->thread; | |
3660 | - unsigned long *bitmap; | |
3661 | - struct physdev_set_iobitmap set_iobitmap; | |
3662 | - | |
3663 | - if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) | |
3664 | - return -EINVAL; | |
3665 | - if (turn_on && !capable(CAP_SYS_RAWIO)) | |
3666 | - return -EPERM; | |
3667 | - | |
3668 | - /* | |
3669 | - * If it's the first ioperm() call in this thread's lifetime, set the | |
3670 | - * IO bitmap up. ioperm() is much less timing critical than clone(), | |
3671 | - * this is why we delay this operation until now: | |
3672 | - */ | |
3673 | - if (!t->io_bitmap_ptr) { | |
3674 | - bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); | |
3675 | - if (!bitmap) | |
3676 | - return -ENOMEM; | |
3677 | - | |
3678 | - memset(bitmap, 0xff, IO_BITMAP_BYTES); | |
3679 | - t->io_bitmap_ptr = bitmap; | |
3680 | - set_thread_flag(TIF_IO_BITMAP); | |
3681 | - | |
3682 | - set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap); | |
3683 | - set_iobitmap.nr_ports = IO_BITMAP_BITS; | |
3684 | - WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, | |
3685 | - &set_iobitmap)); | |
3686 | - } | |
3687 | - | |
3688 | - set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); | |
3689 | - | |
3690 | - return 0; | |
3691 | -} | |
3692 | - | |
3693 | -/* | |
3694 | - * sys_iopl has to be used when you want to access the IO ports | |
3695 | - * beyond the 0x3ff range: to get the full 65536 ports bitmapped | |
3696 | - * you'd need 8kB of bitmaps/process, which is a bit excessive. | |
3697 | - * | |
3698 | - */ | |
3699 | - | |
3700 | -asmlinkage long sys_iopl(unsigned int new_iopl, struct pt_regs *regs) | |
3701 | -{ | |
3702 | - unsigned int old_iopl = current->thread.iopl; | |
3703 | - struct physdev_set_iopl set_iopl; | |
3704 | - | |
3705 | - if (new_iopl > 3) | |
3706 | - return -EINVAL; | |
3707 | - | |
3708 | - /* Need "raw I/O" privileges for direct port access. */ | |
3709 | - if ((new_iopl > old_iopl) && !capable(CAP_SYS_RAWIO)) | |
3710 | - return -EPERM; | |
3711 | - | |
3712 | - /* Change our version of the privilege levels. */ | |
3713 | - current->thread.iopl = new_iopl; | |
3714 | - | |
3715 | - /* Force the change at ring 0. */ | |
3716 | - set_iopl.iopl = (new_iopl == 0) ? 1 : new_iopl; | |
3717 | - WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl)); | |
3718 | - | |
3719 | - return 0; | |
3720 | -} | |
00e5a55c BS |
3721 | --- sle11-2009-05-14.orig/arch/x86/kernel/irq_32-xen.c 2009-02-16 16:18:36.000000000 +0100 |
3722 | +++ sle11-2009-05-14/arch/x86/kernel/irq_32-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
3723 | @@ -66,11 +66,11 @@ static union irq_ctx *softirq_ctx[NR_CPU |
3724 | * SMP cross-CPU interrupts have their own specific | |
3725 | * handlers). | |
3726 | */ | |
3727 | -fastcall unsigned int do_IRQ(struct pt_regs *regs) | |
3728 | +unsigned int do_IRQ(struct pt_regs *regs) | |
3729 | { | |
3730 | struct pt_regs *old_regs; | |
3731 | /* high bit used in ret_from_ code */ | |
3732 | - int irq = ~regs->orig_eax; | |
3733 | + int irq = ~regs->orig_ax; | |
3734 | struct irq_desc *desc = irq_desc + irq; | |
3735 | #ifdef CONFIG_4KSTACKS | |
3736 | union irq_ctx *curctx, *irqctx; | |
3737 | @@ -88,13 +88,13 @@ fastcall unsigned int do_IRQ(struct pt_r | |
3738 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | |
3739 | /* Debugging check for stack overflow: is there less than 1KB free? */ | |
3740 | { | |
3741 | - long esp; | |
3742 | + long sp; | |
3743 | ||
3744 | __asm__ __volatile__("andl %%esp,%0" : | |
3745 | - "=r" (esp) : "0" (THREAD_SIZE - 1)); | |
3746 | - if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { | |
3747 | + "=r" (sp) : "0" (THREAD_SIZE - 1)); | |
3748 | + if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) { | |
3749 | printk("do_IRQ: stack overflow: %ld\n", | |
3750 | - esp - sizeof(struct thread_info)); | |
3751 | + sp - sizeof(struct thread_info)); | |
3752 | dump_stack(); | |
3753 | } | |
3754 | } | |
3755 | @@ -112,7 +112,7 @@ fastcall unsigned int do_IRQ(struct pt_r | |
3756 | * current stack (which is the irq stack already after all) | |
3757 | */ | |
3758 | if (curctx != irqctx) { | |
3759 | - int arg1, arg2, ebx; | |
3760 | + int arg1, arg2, bx; | |
3761 | ||
3762 | /* build the stack frame on the IRQ stack */ | |
3763 | isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); | |
3764 | @@ -128,10 +128,10 @@ fastcall unsigned int do_IRQ(struct pt_r | |
3765 | (curctx->tinfo.preempt_count & SOFTIRQ_MASK); | |
3766 | ||
3767 | asm volatile( | |
3768 | - " xchgl %%ebx,%%esp \n" | |
3769 | - " call *%%edi \n" | |
3770 | - " movl %%ebx,%%esp \n" | |
3771 | - : "=a" (arg1), "=d" (arg2), "=b" (ebx) | |
3772 | + " xchgl %%ebx,%%esp \n" | |
3773 | + " call *%%edi \n" | |
3774 | + " movl %%ebx,%%esp \n" | |
3775 | + : "=a" (arg1), "=d" (arg2), "=b" (bx) | |
3776 | : "0" (irq), "1" (desc), "2" (isp), | |
3777 | "D" (desc->handle_irq) | |
3778 | : "memory", "cc" | |
00e5a55c BS |
3779 | --- sle11-2009-05-14.orig/arch/x86/kernel/irq_64-xen.c 2009-02-16 16:18:36.000000000 +0100 |
3780 | +++ sle11-2009-05-14/arch/x86/kernel/irq_64-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
3781 | @@ -20,6 +20,28 @@ |
3782 | ||
3783 | atomic_t irq_err_count; | |
3784 | ||
3785 | +/* | |
3786 | + * 'what should we do if we get a hw irq event on an illegal vector'. | |
3787 | + * each architecture has to answer this themselves. | |
3788 | + */ | |
3789 | +void ack_bad_irq(unsigned int irq) | |
3790 | +{ | |
3791 | + printk(KERN_WARNING "unexpected IRQ trap at irq %02x\n", irq); | |
3792 | +#ifdef CONFIG_X86_LOCAL_APIC | |
3793 | + /* | |
3794 | + * Currently unexpected vectors happen only on SMP and APIC. | |
3795 | + * We _must_ ack these because every local APIC has only N | |
3796 | + * irq slots per priority level, and a 'hanging, unacked' IRQ | |
3797 | + * holds up an irq slot - in excessive cases (when multiple | |
3798 | + * unexpected vectors occur) that might lock up the APIC | |
3799 | + * completely. | |
3800 | + * But don't ack when the APIC is disabled. -AK | |
3801 | + */ | |
3802 | + if (!disable_apic) | |
3803 | + ack_APIC_irq(); | |
3804 | +#endif | |
3805 | +} | |
3806 | + | |
3807 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | |
3808 | /* | |
3809 | * Probabilistic stack overflow check: | |
3810 | @@ -33,11 +55,11 @@ static inline void stack_overflow_check( | |
3811 | u64 curbase = (u64)task_stack_page(current); | |
3812 | static unsigned long warned = -60*HZ; | |
3813 | ||
3814 | - if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE && | |
3815 | - regs->rsp < curbase + sizeof(struct thread_info) + 128 && | |
3816 | + if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE && | |
3817 | + regs->sp < curbase + sizeof(struct thread_info) + 128 && | |
3818 | time_after(jiffies, warned + 60*HZ)) { | |
3819 | - printk("do_IRQ: %s near stack overflow (cur:%Lx,rsp:%lx)\n", | |
3820 | - current->comm, curbase, regs->rsp); | |
3821 | + printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", | |
3822 | + current->comm, curbase, regs->sp); | |
3823 | show_stack(NULL,NULL); | |
3824 | warned = jiffies; | |
3825 | } | |
3826 | @@ -150,7 +172,7 @@ asmlinkage unsigned int do_IRQ(struct pt | |
3827 | struct pt_regs *old_regs = set_irq_regs(regs); | |
3828 | ||
3829 | /* high bit used in ret_from_ code */ | |
3830 | - unsigned irq = ~regs->orig_rax; | |
3831 | + unsigned irq = ~regs->orig_ax; | |
3832 | ||
3833 | /*exit_idle();*/ | |
3834 | /*irq_enter();*/ | |
3835 | @@ -251,14 +273,3 @@ asmlinkage void do_softirq(void) | |
3836 | } | |
3837 | local_irq_restore(flags); | |
3838 | } | |
3839 | - | |
3840 | -#ifndef CONFIG_X86_LOCAL_APIC | |
3841 | -/* | |
3842 | - * 'what should we do if we get a hw irq event on an illegal vector'. | |
3843 | - * each architecture has to answer this themselves. | |
3844 | - */ | |
3845 | -void ack_bad_irq(unsigned int irq) | |
3846 | -{ | |
3847 | - printk("unexpected IRQ trap at irq %02x\n", irq); | |
3848 | -} | |
3849 | -#endif | |
00e5a55c BS |
3850 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 |
3851 | +++ sle11-2009-05-14/arch/x86/kernel/ldt-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
3852 | @@ -0,0 +1,272 @@ | |
3853 | +/* | |
3854 | + * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds | |
3855 | + * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> | |
3856 | + * Copyright (C) 2002 Andi Kleen | |
3857 | + * | |
3858 | + * This handles calls from both 32bit and 64bit mode. | |
3859 | + */ | |
cc90b958 | 3860 | + |
00e5a55c BS |
3861 | +#include <linux/errno.h> |
3862 | +#include <linux/sched.h> | |
3863 | +#include <linux/string.h> | |
3864 | +#include <linux/mm.h> | |
3865 | +#include <linux/smp.h> | |
3866 | +#include <linux/vmalloc.h> | |
cc90b958 | 3867 | + |
00e5a55c BS |
3868 | +#include <asm/uaccess.h> |
3869 | +#include <asm/system.h> | |
3870 | +#include <asm/ldt.h> | |
3871 | +#include <asm/desc.h> | |
3872 | +#include <asm/mmu_context.h> | |
cc90b958 | 3873 | + |
00e5a55c BS |
3874 | +#ifdef CONFIG_SMP |
3875 | +static void flush_ldt(void *null) | |
cc90b958 | 3876 | +{ |
00e5a55c BS |
3877 | + if (current->active_mm) |
3878 | + load_LDT(¤t->active_mm->context); | |
cc90b958 | 3879 | +} |
00e5a55c | 3880 | +#endif |
cc90b958 | 3881 | + |
00e5a55c | 3882 | +static int alloc_ldt(mm_context_t *pc, int mincount, int reload) |
cc90b958 | 3883 | +{ |
00e5a55c BS |
3884 | + void *oldldt, *newldt; |
3885 | + int oldsize; | |
cc90b958 | 3886 | + |
00e5a55c | 3887 | + if (mincount <= pc->size) |
cc90b958 | 3888 | + return 0; |
00e5a55c BS |
3889 | + oldsize = pc->size; |
3890 | + mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) & | |
3891 | + (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1)); | |
3892 | + if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) | |
3893 | + newldt = vmalloc(mincount * LDT_ENTRY_SIZE); | |
3894 | + else | |
3895 | + newldt = (void *)__get_free_page(GFP_KERNEL); | |
cc90b958 | 3896 | + |
00e5a55c BS |
3897 | + if (!newldt) |
3898 | + return -ENOMEM; | |
cc90b958 | 3899 | + |
00e5a55c BS |
3900 | + if (oldsize) |
3901 | + memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE); | |
3902 | + oldldt = pc->ldt; | |
3903 | + memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, | |
3904 | + (mincount - oldsize) * LDT_ENTRY_SIZE); | |
cc90b958 | 3905 | + |
00e5a55c BS |
3906 | +#ifdef CONFIG_X86_64 |
3907 | + /* CHECKME: Do we really need this ? */ | |
3908 | + wmb(); | |
cc90b958 | 3909 | +#endif |
00e5a55c BS |
3910 | + pc->ldt = newldt; |
3911 | + wmb(); | |
3912 | + pc->size = mincount; | |
3913 | + wmb(); | |
cc90b958 | 3914 | + |
00e5a55c BS |
3915 | + if (reload) { |
3916 | +#ifdef CONFIG_SMP | |
3917 | + cpumask_t mask; | |
cc90b958 | 3918 | + |
00e5a55c BS |
3919 | + preempt_disable(); |
3920 | +#endif | |
3921 | + make_pages_readonly(newldt, | |
3922 | + (mincount * LDT_ENTRY_SIZE) / PAGE_SIZE, | |
3923 | + XENFEAT_writable_descriptor_tables); | |
3924 | + load_LDT(pc); | |
3925 | +#ifdef CONFIG_SMP | |
3926 | + mask = cpumask_of_cpu(smp_processor_id()); | |
3927 | + if (!cpus_equal(current->mm->cpu_vm_mask, mask)) | |
3928 | + smp_call_function(flush_ldt, NULL, 1, 1); | |
3929 | + preempt_enable(); | |
3930 | +#endif | |
cc90b958 | 3931 | + } |
00e5a55c BS |
3932 | + if (oldsize) { |
3933 | + make_pages_writable(oldldt, | |
3934 | + (oldsize * LDT_ENTRY_SIZE) / PAGE_SIZE, | |
3935 | + XENFEAT_writable_descriptor_tables); | |
3936 | + if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) | |
3937 | + vfree(oldldt); | |
3938 | + else | |
3939 | + put_page(virt_to_page(oldldt)); | |
cc90b958 | 3940 | + } |
00e5a55c | 3941 | + return 0; |
cc90b958 BS |
3942 | +} |
3943 | + | |
00e5a55c | 3944 | +static inline int copy_ldt(mm_context_t *new, mm_context_t *old) |
cc90b958 | 3945 | +{ |
00e5a55c BS |
3946 | + int err = alloc_ldt(new, old->size, 0); |
3947 | + | |
3948 | + if (err < 0) | |
3949 | + return err; | |
3950 | + memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE); | |
3951 | + make_pages_readonly(new->ldt, | |
3952 | + (new->size * LDT_ENTRY_SIZE) / PAGE_SIZE, | |
3953 | + XENFEAT_writable_descriptor_tables); | |
3954 | + return 0; | |
cc90b958 | 3955 | +} |
cc90b958 | 3956 | + |
00e5a55c BS |
3957 | +/* |
3958 | + * we do not have to muck with descriptors here, that is | |
3959 | + * done in switch_mm() as needed. | |
3960 | + */ | |
3961 | +int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | |
cc90b958 | 3962 | +{ |
00e5a55c BS |
3963 | + struct mm_struct *old_mm; |
3964 | + int retval = 0; | |
3965 | + | |
3966 | + memset(&mm->context, 0, sizeof(mm->context)); | |
3967 | + mutex_init(&mm->context.lock); | |
3968 | + old_mm = current->mm; | |
3969 | + if (old_mm) | |
3970 | + mm->context.vdso = old_mm->context.vdso; | |
3971 | + if (old_mm && old_mm->context.size > 0) { | |
3972 | + mutex_lock(&old_mm->context.lock); | |
3973 | + retval = copy_ldt(&mm->context, &old_mm->context); | |
3974 | + mutex_unlock(&old_mm->context.lock); | |
3975 | + } | |
3976 | + return retval; | |
cc90b958 | 3977 | +} |
cc90b958 | 3978 | + |
00e5a55c BS |
3979 | +/* |
3980 | + * No need to lock the MM as we are the last user | |
cc90b958 | 3981 | + * |
00e5a55c | 3982 | + * 64bit: Don't touch the LDT register - we're already in the next thread. |
cc90b958 | 3983 | + */ |
00e5a55c | 3984 | +void destroy_context(struct mm_struct *mm) |
cc90b958 | 3985 | +{ |
00e5a55c BS |
3986 | + if (mm->context.size) { |
3987 | + /* CHECKME: Can this ever happen ? */ | |
3988 | + if (mm == current->active_mm) | |
3989 | + clear_LDT(); | |
3990 | + make_pages_writable(mm->context.ldt, | |
3991 | + (mm->context.size * LDT_ENTRY_SIZE) / PAGE_SIZE, | |
3992 | + XENFEAT_writable_descriptor_tables); | |
3993 | + if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) | |
3994 | + vfree(mm->context.ldt); | |
3995 | + else | |
3996 | + put_page(virt_to_page(mm->context.ldt)); | |
3997 | + mm->context.size = 0; | |
3998 | + } | |
3999 | +} | |
cc90b958 | 4000 | + |
00e5a55c BS |
4001 | +static int read_ldt(void __user *ptr, unsigned long bytecount) |
4002 | +{ | |
4003 | + int err; | |
4004 | + unsigned long size; | |
4005 | + struct mm_struct *mm = current->mm; | |
cc90b958 | 4006 | + |
00e5a55c BS |
4007 | + if (!mm->context.size) |
4008 | + return 0; | |
4009 | + if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) | |
4010 | + bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; | |
cc90b958 | 4011 | + |
00e5a55c BS |
4012 | + mutex_lock(&mm->context.lock); |
4013 | + size = mm->context.size * LDT_ENTRY_SIZE; | |
4014 | + if (size > bytecount) | |
4015 | + size = bytecount; | |
cc90b958 | 4016 | + |
00e5a55c BS |
4017 | + err = 0; |
4018 | + if (copy_to_user(ptr, mm->context.ldt, size)) | |
4019 | + err = -EFAULT; | |
4020 | + mutex_unlock(&mm->context.lock); | |
4021 | + if (err < 0) | |
4022 | + goto error_return; | |
4023 | + if (size != bytecount) { | |
4024 | + /* zero-fill the rest */ | |
4025 | + if (clear_user(ptr + size, bytecount - size) != 0) { | |
4026 | + err = -EFAULT; | |
4027 | + goto error_return; | |
4028 | + } | |
cc90b958 | 4029 | + } |
00e5a55c BS |
4030 | + return bytecount; |
4031 | +error_return: | |
4032 | + return err; | |
4033 | +} | |
cc90b958 | 4034 | + |
00e5a55c BS |
4035 | +static int read_default_ldt(void __user *ptr, unsigned long bytecount) |
4036 | +{ | |
4037 | + /* CHECKME: Can we use _one_ random number ? */ | |
4038 | +#ifdef CONFIG_X86_32 | |
4039 | + unsigned long size = 5 * sizeof(struct desc_struct); | |
4040 | +#else | |
4041 | + unsigned long size = 128; | |
4042 | +#endif | |
4043 | + if (bytecount > size) | |
4044 | + bytecount = size; | |
4045 | + if (clear_user(ptr, bytecount)) | |
4046 | + return -EFAULT; | |
4047 | + return bytecount; | |
4048 | +} | |
cc90b958 | 4049 | + |
00e5a55c BS |
4050 | +static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) |
4051 | +{ | |
4052 | + struct mm_struct *mm = current->mm; | |
4053 | + struct desc_struct ldt; | |
4054 | + int error; | |
4055 | + struct user_desc ldt_info; | |
cc90b958 | 4056 | + |
00e5a55c BS |
4057 | + error = -EINVAL; |
4058 | + if (bytecount != sizeof(ldt_info)) | |
4059 | + goto out; | |
4060 | + error = -EFAULT; | |
4061 | + if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) | |
4062 | + goto out; | |
4063 | + | |
4064 | + error = -EINVAL; | |
4065 | + if (ldt_info.entry_number >= LDT_ENTRIES) | |
4066 | + goto out; | |
4067 | + if (ldt_info.contents == 3) { | |
4068 | + if (oldmode) | |
4069 | + goto out; | |
4070 | + if (ldt_info.seg_not_present == 0) | |
4071 | + goto out; | |
cc90b958 BS |
4072 | + } |
4073 | + | |
00e5a55c BS |
4074 | + mutex_lock(&mm->context.lock); |
4075 | + if (ldt_info.entry_number >= mm->context.size) { | |
4076 | + error = alloc_ldt(¤t->mm->context, | |
4077 | + ldt_info.entry_number + 1, 1); | |
4078 | + if (error < 0) | |
4079 | + goto out_unlock; | |
4080 | + } | |
cc90b958 | 4081 | + |
00e5a55c BS |
4082 | + /* Allow LDTs to be cleared by the user. */ |
4083 | + if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { | |
4084 | + if (oldmode || LDT_empty(&ldt_info)) { | |
4085 | + memset(&ldt, 0, sizeof(ldt)); | |
4086 | + goto install; | |
4087 | + } | |
4088 | + } | |
cc90b958 | 4089 | + |
00e5a55c BS |
4090 | + fill_ldt(&ldt, &ldt_info); |
4091 | + if (oldmode) | |
4092 | + ldt.avl = 0; | |
cc90b958 | 4093 | + |
00e5a55c BS |
4094 | + /* Install the new entry ... */ |
4095 | +install: | |
4096 | + error = write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt); | |
cc90b958 | 4097 | + |
00e5a55c BS |
4098 | +out_unlock: |
4099 | + mutex_unlock(&mm->context.lock); | |
4100 | +out: | |
4101 | + return error; | |
4102 | +} | |
cc90b958 | 4103 | + |
00e5a55c BS |
4104 | +asmlinkage int sys_modify_ldt(int func, void __user *ptr, |
4105 | + unsigned long bytecount) | |
cc90b958 | 4106 | +{ |
00e5a55c | 4107 | + int ret = -ENOSYS; |
cc90b958 | 4108 | + |
00e5a55c BS |
4109 | + switch (func) { |
4110 | + case 0: | |
4111 | + ret = read_ldt(ptr, bytecount); | |
4112 | + break; | |
4113 | + case 1: | |
4114 | + ret = write_ldt(ptr, bytecount, 1); | |
4115 | + break; | |
4116 | + case 2: | |
4117 | + ret = read_default_ldt(ptr, bytecount); | |
4118 | + break; | |
4119 | + case 0x11: | |
4120 | + ret = write_ldt(ptr, bytecount, 0); | |
4121 | + break; | |
4122 | + } | |
4123 | + return ret; | |
cc90b958 | 4124 | +} |
00e5a55c BS |
4125 | --- sle11-2009-05-14.orig/arch/x86/kernel/ldt_32-xen.c 2009-02-16 16:18:36.000000000 +0100 |
4126 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
4127 | @@ -1,265 +0,0 @@ | |
4128 | -/* | |
4129 | - * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds | |
4130 | - * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> | |
4131 | - */ | |
4132 | - | |
4133 | -#include <linux/errno.h> | |
4134 | -#include <linux/sched.h> | |
4135 | -#include <linux/string.h> | |
4136 | -#include <linux/mm.h> | |
4137 | -#include <linux/smp.h> | |
4138 | -#include <linux/vmalloc.h> | |
4139 | -#include <linux/slab.h> | |
4140 | - | |
4141 | -#include <asm/uaccess.h> | |
4142 | -#include <asm/system.h> | |
4143 | -#include <asm/ldt.h> | |
4144 | -#include <asm/desc.h> | |
4145 | -#include <asm/mmu_context.h> | |
4146 | - | |
4147 | -#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ | |
4148 | -static void flush_ldt(void *null) | |
4149 | -{ | |
4150 | - if (current->active_mm) | |
4151 | - load_LDT(¤t->active_mm->context); | |
4152 | -} | |
4153 | -#endif | |
4154 | - | |
4155 | -static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |
4156 | -{ | |
4157 | - void *oldldt; | |
4158 | - void *newldt; | |
4159 | - int oldsize; | |
4160 | - | |
4161 | - if (mincount <= pc->size) | |
4162 | - return 0; | |
4163 | - oldsize = pc->size; | |
4164 | - mincount = (mincount+511)&(~511); | |
4165 | - if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) | |
4166 | - newldt = vmalloc(mincount*LDT_ENTRY_SIZE); | |
4167 | - else | |
4168 | - newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); | |
4169 | - | |
4170 | - if (!newldt) | |
4171 | - return -ENOMEM; | |
4172 | - | |
4173 | - if (oldsize) | |
4174 | - memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); | |
4175 | - oldldt = pc->ldt; | |
4176 | - memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); | |
4177 | - pc->ldt = newldt; | |
4178 | - wmb(); | |
4179 | - pc->size = mincount; | |
4180 | - wmb(); | |
4181 | - | |
4182 | - if (reload) { | |
4183 | -#ifdef CONFIG_SMP | |
4184 | - cpumask_t mask; | |
4185 | - preempt_disable(); | |
4186 | -#endif | |
4187 | - make_pages_readonly( | |
4188 | - pc->ldt, | |
4189 | - (pc->size * LDT_ENTRY_SIZE) / PAGE_SIZE, | |
4190 | - XENFEAT_writable_descriptor_tables); | |
4191 | - load_LDT(pc); | |
4192 | -#ifdef CONFIG_SMP | |
4193 | - mask = cpumask_of_cpu(smp_processor_id()); | |
4194 | - if (!cpus_equal(current->mm->cpu_vm_mask, mask)) | |
4195 | - smp_call_function(flush_ldt, NULL, 1, 1); | |
4196 | - preempt_enable(); | |
4197 | -#endif | |
4198 | - } | |
4199 | - if (oldsize) { | |
4200 | - make_pages_writable( | |
4201 | - oldldt, | |
4202 | - (oldsize * LDT_ENTRY_SIZE) / PAGE_SIZE, | |
4203 | - XENFEAT_writable_descriptor_tables); | |
4204 | - if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) | |
4205 | - vfree(oldldt); | |
4206 | - else | |
4207 | - kfree(oldldt); | |
4208 | - } | |
4209 | - return 0; | |
4210 | -} | |
4211 | - | |
4212 | -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) | |
4213 | -{ | |
4214 | - int err = alloc_ldt(new, old->size, 0); | |
4215 | - if (err < 0) | |
4216 | - return err; | |
4217 | - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); | |
4218 | - make_pages_readonly( | |
4219 | - new->ldt, | |
4220 | - (new->size * LDT_ENTRY_SIZE) / PAGE_SIZE, | |
4221 | - XENFEAT_writable_descriptor_tables); | |
4222 | - return 0; | |
4223 | -} | |
4224 | - | |
4225 | -/* | |
4226 | - * we do not have to muck with descriptors here, that is | |
4227 | - * done in switch_mm() as needed. | |
4228 | - */ | |
4229 | -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | |
4230 | -{ | |
4231 | - struct mm_struct * old_mm; | |
4232 | - int retval = 0; | |
4233 | - | |
4234 | - mutex_init(&mm->context.lock); | |
4235 | - mm->context.size = 0; | |
4236 | - mm->context.has_foreign_mappings = 0; | |
4237 | - old_mm = current->mm; | |
4238 | - if (old_mm && old_mm->context.size > 0) { | |
4239 | - mutex_lock(&old_mm->context.lock); | |
4240 | - retval = copy_ldt(&mm->context, &old_mm->context); | |
4241 | - mutex_unlock(&old_mm->context.lock); | |
4242 | - } | |
4243 | - return retval; | |
4244 | -} | |
4245 | - | |
4246 | -/* | |
4247 | - * No need to lock the MM as we are the last user | |
4248 | - */ | |
4249 | -void destroy_context(struct mm_struct *mm) | |
4250 | -{ | |
4251 | - if (mm->context.size) { | |
4252 | - if (mm == current->active_mm) | |
4253 | - clear_LDT(); | |
4254 | - make_pages_writable( | |
4255 | - mm->context.ldt, | |
4256 | - (mm->context.size * LDT_ENTRY_SIZE) / PAGE_SIZE, | |
4257 | - XENFEAT_writable_descriptor_tables); | |
4258 | - if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) | |
4259 | - vfree(mm->context.ldt); | |
4260 | - else | |
4261 | - kfree(mm->context.ldt); | |
4262 | - mm->context.size = 0; | |
4263 | - } | |
4264 | -} | |
4265 | - | |
4266 | -static int read_ldt(void __user * ptr, unsigned long bytecount) | |
4267 | -{ | |
4268 | - int err; | |
4269 | - unsigned long size; | |
4270 | - struct mm_struct * mm = current->mm; | |
4271 | - | |
4272 | - if (!mm->context.size) | |
4273 | - return 0; | |
4274 | - if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) | |
4275 | - bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; | |
4276 | - | |
4277 | - mutex_lock(&mm->context.lock); | |
4278 | - size = mm->context.size*LDT_ENTRY_SIZE; | |
4279 | - if (size > bytecount) | |
4280 | - size = bytecount; | |
4281 | - | |
4282 | - err = 0; | |
4283 | - if (copy_to_user(ptr, mm->context.ldt, size)) | |
4284 | - err = -EFAULT; | |
4285 | - mutex_unlock(&mm->context.lock); | |
4286 | - if (err < 0) | |
4287 | - goto error_return; | |
4288 | - if (size != bytecount) { | |
4289 | - /* zero-fill the rest */ | |
4290 | - if (clear_user(ptr+size, bytecount-size) != 0) { | |
4291 | - err = -EFAULT; | |
4292 | - goto error_return; | |
4293 | - } | |
4294 | - } | |
4295 | - return bytecount; | |
4296 | -error_return: | |
4297 | - return err; | |
4298 | -} | |
4299 | - | |
4300 | -static int read_default_ldt(void __user * ptr, unsigned long bytecount) | |
4301 | -{ | |
4302 | - int err; | |
4303 | - unsigned long size; | |
4304 | - | |
4305 | - err = 0; | |
4306 | - size = 5*sizeof(struct desc_struct); | |
4307 | - if (size > bytecount) | |
4308 | - size = bytecount; | |
4309 | - | |
4310 | - err = size; | |
4311 | - if (clear_user(ptr, size)) | |
4312 | - err = -EFAULT; | |
4313 | - | |
4314 | - return err; | |
4315 | -} | |
4316 | - | |
4317 | -static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) | |
4318 | -{ | |
4319 | - struct mm_struct * mm = current->mm; | |
4320 | - __u32 entry_1, entry_2; | |
4321 | - int error; | |
4322 | - struct user_desc ldt_info; | |
4323 | - | |
4324 | - error = -EINVAL; | |
4325 | - if (bytecount != sizeof(ldt_info)) | |
4326 | - goto out; | |
4327 | - error = -EFAULT; | |
4328 | - if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) | |
4329 | - goto out; | |
4330 | - | |
4331 | - error = -EINVAL; | |
4332 | - if (ldt_info.entry_number >= LDT_ENTRIES) | |
4333 | - goto out; | |
4334 | - if (ldt_info.contents == 3) { | |
4335 | - if (oldmode) | |
4336 | - goto out; | |
4337 | - if (ldt_info.seg_not_present == 0) | |
4338 | - goto out; | |
4339 | - } | |
4340 | - | |
4341 | - mutex_lock(&mm->context.lock); | |
4342 | - if (ldt_info.entry_number >= mm->context.size) { | |
4343 | - error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); | |
4344 | - if (error < 0) | |
4345 | - goto out_unlock; | |
4346 | - } | |
4347 | - | |
4348 | - /* Allow LDTs to be cleared by the user. */ | |
4349 | - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { | |
4350 | - if (oldmode || LDT_empty(&ldt_info)) { | |
4351 | - entry_1 = 0; | |
4352 | - entry_2 = 0; | |
4353 | - goto install; | |
4354 | - } | |
4355 | - } | |
4356 | - | |
4357 | - entry_1 = LDT_entry_a(&ldt_info); | |
4358 | - entry_2 = LDT_entry_b(&ldt_info); | |
4359 | - if (oldmode) | |
4360 | - entry_2 &= ~(1 << 20); | |
4361 | - | |
4362 | - /* Install the new entry ... */ | |
4363 | -install: | |
4364 | - error = write_ldt_entry(mm->context.ldt, ldt_info.entry_number, | |
4365 | - entry_1, entry_2); | |
4366 | - | |
4367 | -out_unlock: | |
4368 | - mutex_unlock(&mm->context.lock); | |
4369 | -out: | |
4370 | - return error; | |
4371 | -} | |
4372 | - | |
4373 | -asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) | |
4374 | -{ | |
4375 | - int ret = -ENOSYS; | |
4376 | - | |
4377 | - switch (func) { | |
4378 | - case 0: | |
4379 | - ret = read_ldt(ptr, bytecount); | |
4380 | - break; | |
4381 | - case 1: | |
4382 | - ret = write_ldt(ptr, bytecount, 1); | |
4383 | - break; | |
4384 | - case 2: | |
4385 | - ret = read_default_ldt(ptr, bytecount); | |
4386 | - break; | |
4387 | - case 0x11: | |
4388 | - ret = write_ldt(ptr, bytecount, 0); | |
4389 | - break; | |
4390 | - } | |
4391 | - return ret; | |
4392 | -} | |
4393 | --- sle11-2009-05-14.orig/arch/x86/kernel/ldt_64-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
4394 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
4395 | @@ -1,271 +0,0 @@ | |
4396 | -/* | |
4397 | - * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds | |
4398 | - * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> | |
4399 | - * Copyright (C) 2002 Andi Kleen | |
4400 | - * | |
4401 | - * This handles calls from both 32bit and 64bit mode. | |
4402 | - */ | |
4403 | - | |
4404 | -#include <linux/errno.h> | |
4405 | -#include <linux/sched.h> | |
4406 | -#include <linux/string.h> | |
4407 | -#include <linux/mm.h> | |
4408 | -#include <linux/smp.h> | |
4409 | -#include <linux/vmalloc.h> | |
4410 | -#include <linux/slab.h> | |
4411 | - | |
4412 | -#include <asm/uaccess.h> | |
4413 | -#include <asm/system.h> | |
4414 | -#include <asm/ldt.h> | |
4415 | -#include <asm/desc.h> | |
4416 | -#include <asm/proto.h> | |
4417 | -#include <asm/pgalloc.h> | |
4418 | - | |
4419 | -#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ | |
4420 | -static void flush_ldt(void *null) | |
4421 | -{ | |
4422 | - if (current->active_mm) | |
4423 | - load_LDT(¤t->active_mm->context); | |
4424 | -} | |
4425 | -#endif | |
4426 | - | |
4427 | -static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload) | |
4428 | -{ | |
4429 | - void *oldldt; | |
4430 | - void *newldt; | |
4431 | - unsigned oldsize; | |
4432 | - | |
4433 | - if (mincount <= (unsigned)pc->size) | |
4434 | - return 0; | |
4435 | - oldsize = pc->size; | |
4436 | - mincount = (mincount+511)&(~511); | |
4437 | - if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) | |
4438 | - newldt = vmalloc(mincount*LDT_ENTRY_SIZE); | |
4439 | - else | |
4440 | - newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); | |
4441 | - | |
4442 | - if (!newldt) | |
4443 | - return -ENOMEM; | |
4444 | - | |
4445 | - if (oldsize) | |
4446 | - memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); | |
4447 | - oldldt = pc->ldt; | |
4448 | - memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); | |
4449 | - wmb(); | |
4450 | - pc->ldt = newldt; | |
4451 | - wmb(); | |
4452 | - pc->size = mincount; | |
4453 | - wmb(); | |
4454 | - if (reload) { | |
4455 | -#ifdef CONFIG_SMP | |
4456 | - cpumask_t mask; | |
4457 | - | |
4458 | - preempt_disable(); | |
4459 | -#endif | |
4460 | - make_pages_readonly( | |
4461 | - pc->ldt, | |
4462 | - (pc->size * LDT_ENTRY_SIZE) / PAGE_SIZE, | |
4463 | - XENFEAT_writable_descriptor_tables); | |
4464 | - load_LDT(pc); | |
4465 | -#ifdef CONFIG_SMP | |
4466 | - mask = cpumask_of_cpu(smp_processor_id()); | |
4467 | - if (!cpus_equal(current->mm->cpu_vm_mask, mask)) | |
4468 | - smp_call_function(flush_ldt, NULL, 1, 1); | |
4469 | - preempt_enable(); | |
4470 | -#endif | |
4471 | - } | |
4472 | - if (oldsize) { | |
4473 | - make_pages_writable( | |
4474 | - oldldt, | |
4475 | - (oldsize * LDT_ENTRY_SIZE) / PAGE_SIZE, | |
4476 | - XENFEAT_writable_descriptor_tables); | |
4477 | - if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) | |
4478 | - vfree(oldldt); | |
4479 | - else | |
4480 | - kfree(oldldt); | |
4481 | - } | |
4482 | - return 0; | |
4483 | -} | |
4484 | - | |
4485 | -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) | |
4486 | -{ | |
4487 | - int err = alloc_ldt(new, old->size, 0); | |
4488 | - if (err < 0) | |
4489 | - return err; | |
4490 | - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); | |
4491 | - make_pages_readonly( | |
4492 | - new->ldt, | |
4493 | - (new->size * LDT_ENTRY_SIZE) / PAGE_SIZE, | |
4494 | - XENFEAT_writable_descriptor_tables); | |
4495 | - return 0; | |
4496 | -} | |
4497 | - | |
4498 | -/* | |
4499 | - * we do not have to muck with descriptors here, that is | |
4500 | - * done in switch_mm() as needed. | |
4501 | - */ | |
4502 | -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | |
4503 | -{ | |
4504 | - struct mm_struct * old_mm; | |
4505 | - int retval = 0; | |
4506 | - | |
4507 | - memset(&mm->context, 0, sizeof(mm->context)); | |
4508 | - mutex_init(&mm->context.lock); | |
4509 | - old_mm = current->mm; | |
4510 | - if (old_mm) | |
4511 | - mm->context.vdso = old_mm->context.vdso; | |
4512 | - if (old_mm && old_mm->context.size > 0) { | |
4513 | - mutex_lock(&old_mm->context.lock); | |
4514 | - retval = copy_ldt(&mm->context, &old_mm->context); | |
4515 | - mutex_unlock(&old_mm->context.lock); | |
4516 | - } | |
4517 | - return retval; | |
4518 | -} | |
4519 | - | |
4520 | -/* | |
4521 | - * | |
4522 | - * Don't touch the LDT register - we're already in the next thread. | |
4523 | - */ | |
4524 | -void destroy_context(struct mm_struct *mm) | |
4525 | -{ | |
4526 | - if (mm->context.size) { | |
4527 | - if (mm == current->active_mm) | |
4528 | - clear_LDT(); | |
4529 | - make_pages_writable( | |
4530 | - mm->context.ldt, | |
4531 | - (mm->context.size * LDT_ENTRY_SIZE) / PAGE_SIZE, | |
4532 | - XENFEAT_writable_descriptor_tables); | |
4533 | - if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) | |
4534 | - vfree(mm->context.ldt); | |
4535 | - else | |
4536 | - kfree(mm->context.ldt); | |
4537 | - mm->context.size = 0; | |
4538 | - } | |
4539 | -} | |
4540 | - | |
4541 | -static int read_ldt(void __user * ptr, unsigned long bytecount) | |
4542 | -{ | |
4543 | - int err; | |
4544 | - unsigned long size; | |
4545 | - struct mm_struct * mm = current->mm; | |
4546 | - | |
4547 | - if (!mm->context.size) | |
4548 | - return 0; | |
4549 | - if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) | |
4550 | - bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; | |
4551 | - | |
4552 | - mutex_lock(&mm->context.lock); | |
4553 | - size = mm->context.size*LDT_ENTRY_SIZE; | |
4554 | - if (size > bytecount) | |
4555 | - size = bytecount; | |
4556 | - | |
4557 | - err = 0; | |
4558 | - if (copy_to_user(ptr, mm->context.ldt, size)) | |
4559 | - err = -EFAULT; | |
4560 | - mutex_unlock(&mm->context.lock); | |
4561 | - if (err < 0) | |
4562 | - goto error_return; | |
4563 | - if (size != bytecount) { | |
4564 | - /* zero-fill the rest */ | |
4565 | - if (clear_user(ptr+size, bytecount-size) != 0) { | |
4566 | - err = -EFAULT; | |
4567 | - goto error_return; | |
4568 | - } | |
4569 | - } | |
4570 | - return bytecount; | |
4571 | -error_return: | |
4572 | - return err; | |
4573 | -} | |
4574 | - | |
4575 | -static int read_default_ldt(void __user * ptr, unsigned long bytecount) | |
4576 | -{ | |
4577 | - /* Arbitrary number */ | |
4578 | - /* x86-64 default LDT is all zeros */ | |
4579 | - if (bytecount > 128) | |
4580 | - bytecount = 128; | |
4581 | - if (clear_user(ptr, bytecount)) | |
4582 | - return -EFAULT; | |
4583 | - return bytecount; | |
4584 | -} | |
4585 | - | |
4586 | -static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) | |
4587 | -{ | |
4588 | - struct task_struct *me = current; | |
4589 | - struct mm_struct * mm = me->mm; | |
4590 | - __u32 entry_1, entry_2, *lp; | |
4591 | - unsigned long mach_lp; | |
4592 | - int error; | |
4593 | - struct user_desc ldt_info; | |
4594 | - | |
4595 | - error = -EINVAL; | |
4596 | - | |
4597 | - if (bytecount != sizeof(ldt_info)) | |
4598 | - goto out; | |
4599 | - error = -EFAULT; | |
4600 | - if (copy_from_user(&ldt_info, ptr, bytecount)) | |
4601 | - goto out; | |
4602 | - | |
4603 | - error = -EINVAL; | |
4604 | - if (ldt_info.entry_number >= LDT_ENTRIES) | |
4605 | - goto out; | |
4606 | - if (ldt_info.contents == 3) { | |
4607 | - if (oldmode) | |
4608 | - goto out; | |
4609 | - if (ldt_info.seg_not_present == 0) | |
4610 | - goto out; | |
4611 | - } | |
4612 | - | |
4613 | - mutex_lock(&mm->context.lock); | |
4614 | - if (ldt_info.entry_number >= (unsigned)mm->context.size) { | |
4615 | - error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); | |
4616 | - if (error < 0) | |
4617 | - goto out_unlock; | |
4618 | - } | |
4619 | - | |
4620 | - lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt); | |
4621 | - mach_lp = arbitrary_virt_to_machine(lp); | |
4622 | - | |
4623 | - /* Allow LDTs to be cleared by the user. */ | |
4624 | - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { | |
4625 | - if (oldmode || LDT_empty(&ldt_info)) { | |
4626 | - entry_1 = 0; | |
4627 | - entry_2 = 0; | |
4628 | - goto install; | |
4629 | - } | |
4630 | - } | |
4631 | - | |
4632 | - entry_1 = LDT_entry_a(&ldt_info); | |
4633 | - entry_2 = LDT_entry_b(&ldt_info); | |
4634 | - if (oldmode) | |
4635 | - entry_2 &= ~(1 << 20); | |
4636 | - | |
4637 | - /* Install the new entry ... */ | |
4638 | -install: | |
4639 | - error = HYPERVISOR_update_descriptor(mach_lp, (unsigned long)((entry_1 | (unsigned long) entry_2 << 32))); | |
4640 | - | |
4641 | -out_unlock: | |
4642 | - mutex_unlock(&mm->context.lock); | |
4643 | -out: | |
4644 | - return error; | |
4645 | -} | |
4646 | - | |
4647 | -asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) | |
4648 | -{ | |
4649 | - int ret = -ENOSYS; | |
4650 | - | |
4651 | - switch (func) { | |
4652 | - case 0: | |
4653 | - ret = read_ldt(ptr, bytecount); | |
4654 | - break; | |
4655 | - case 1: | |
4656 | - ret = write_ldt(ptr, bytecount, 1); | |
4657 | - break; | |
4658 | - case 2: | |
4659 | - ret = read_default_ldt(ptr, bytecount); | |
4660 | - break; | |
4661 | - case 0x11: | |
4662 | - ret = write_ldt(ptr, bytecount, 0); | |
4663 | - break; | |
4664 | - } | |
4665 | - return ret; | |
4666 | -} | |
4667 | --- sle11-2009-05-14.orig/arch/x86/kernel/machine_kexec_64.c 2008-11-25 12:35:54.000000000 +0100 | |
4668 | +++ sle11-2009-05-14/arch/x86/kernel/machine_kexec_64.c 2009-03-16 16:33:40.000000000 +0100 | |
4669 | @@ -300,7 +300,9 @@ void machine_kexec(struct kimage *image) | |
4670 | ||
4671 | void arch_crash_save_vmcoreinfo(void) | |
4672 | { | |
4673 | +#ifndef CONFIG_XEN /* could really be CONFIG_RELOCATABLE */ | |
4674 | VMCOREINFO_SYMBOL(phys_base); | |
cc90b958 | 4675 | +#endif |
00e5a55c BS |
4676 | VMCOREINFO_SYMBOL(init_level4_pgt); |
4677 | ||
4678 | #ifdef CONFIG_NUMA | |
4679 | --- sle11-2009-05-14.orig/arch/x86/kernel/microcode-xen.c 2009-02-16 16:17:21.000000000 +0100 | |
4680 | +++ sle11-2009-05-14/arch/x86/kernel/microcode-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
4681 | @@ -167,7 +167,7 @@ static int request_microcode(void) | |
4682 | } | |
4683 | ||
4684 | op.cmd = XENPF_microcode_update; | |
4685 | - set_xen_guest_handle(op.u.microcode.data, (void *)firmware->data); | |
4686 | + set_xen_guest_handle(op.u.microcode.data, firmware->data); | |
4687 | op.u.microcode.length = firmware->size; | |
4688 | error = HYPERVISOR_platform_op(&op); | |
4689 | ||
4690 | --- sle11-2009-05-14.orig/arch/x86/kernel/mpparse_32-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
4691 | +++ sle11-2009-05-14/arch/x86/kernel/mpparse_32-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
4692 | @@ -68,7 +68,7 @@ unsigned int def_to_bigsmp = 0; | |
4693 | /* Processor that is doing the boot up */ | |
4694 | unsigned int boot_cpu_physical_apicid = -1U; | |
4695 | /* Internal processor count */ | |
4696 | -unsigned int __cpuinitdata num_processors; | |
4697 | +unsigned int num_processors; | |
4698 | ||
4699 | /* Bitmask of physically existing CPUs */ | |
4700 | physid_mask_t phys_cpu_present_map; | |
4701 | @@ -265,7 +265,7 @@ static void __init MP_ioapic_info (struc | |
4702 | if (!(m->mpc_flags & MPC_APIC_USABLE)) | |
4703 | return; | |
4704 | ||
4705 | - printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n", | |
4706 | + printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", | |
4707 | m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); | |
4708 | if (nr_ioapics >= MAX_IO_APICS) { | |
4709 | printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n", | |
4710 | @@ -412,9 +412,9 @@ static int __init smp_read_mpc(struct mp | |
4711 | ||
4712 | mps_oem_check(mpc, oem, str); | |
4713 | ||
4714 | - printk("APIC at: 0x%lX\n",mpc->mpc_lapic); | |
4715 | + printk("APIC at: 0x%X\n", mpc->mpc_lapic); | |
4716 | ||
4717 | - /* | |
cc90b958 | 4718 | + /* |
00e5a55c BS |
4719 | * Save the local APIC address (it might be non-default) -- but only |
4720 | * if we're not using ACPI. | |
4721 | */ | |
4722 | @@ -728,7 +728,7 @@ static int __init smp_scan_config (unsig | |
4723 | unsigned long *bp = isa_bus_to_virt(base); | |
4724 | struct intel_mp_floating *mpf; | |
4725 | ||
4726 | - Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length); | |
4727 | + printk(KERN_INFO "Scan SMP from %p for %ld bytes.\n", bp,length); | |
4728 | if (sizeof(*mpf) != 16) | |
4729 | printk("Error: MPF size\n"); | |
4730 | ||
4731 | @@ -742,9 +742,10 @@ static int __init smp_scan_config (unsig | |
4732 | ||
4733 | smp_found_config = 1; | |
4734 | #ifndef CONFIG_XEN | |
4735 | - printk(KERN_INFO "found SMP MP-table at %08lx\n", | |
4736 | - virt_to_phys(mpf)); | |
4737 | - reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE); | |
4738 | + printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", | |
4739 | + mpf, virt_to_phys(mpf)); | |
4740 | + reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE, | |
4741 | + BOOTMEM_DEFAULT); | |
4742 | if (mpf->mpf_physptr) { | |
4743 | /* | |
4744 | * We cannot access to MPC table to compute | |
4745 | @@ -759,11 +760,12 @@ static int __init smp_scan_config (unsig | |
4746 | unsigned long end = max_low_pfn * PAGE_SIZE; | |
4747 | if (mpf->mpf_physptr + size > end) | |
4748 | size = end - mpf->mpf_physptr; | |
4749 | - reserve_bootmem(mpf->mpf_physptr, size); | |
4750 | + reserve_bootmem(mpf->mpf_physptr, size, | |
4751 | + BOOTMEM_DEFAULT); | |
4752 | } | |
4753 | #else | |
4754 | - printk(KERN_INFO "found SMP MP-table at %08lx\n", | |
4755 | - ((unsigned long)bp - (unsigned long)isa_bus_to_virt(base)) + base); | |
4756 | + printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", | |
4757 | + mpf, ((void *)bp - isa_bus_to_virt(base)) + base); | |
4758 | #endif | |
4759 | ||
4760 | mpf_found = mpf; | |
4761 | @@ -940,14 +942,14 @@ void __init mp_register_ioapic(u8 id, u3 | |
4762 | */ | |
4763 | mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; | |
4764 | mp_ioapic_routing[idx].gsi_base = gsi_base; | |
4765 | - mp_ioapic_routing[idx].gsi_end = gsi_base + | |
4766 | + mp_ioapic_routing[idx].gsi_end = gsi_base + | |
4767 | io_apic_get_redir_entries(idx); | |
4768 | ||
4769 | - printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " | |
4770 | - "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, | |
4771 | - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, | |
4772 | - mp_ioapic_routing[idx].gsi_base, | |
4773 | - mp_ioapic_routing[idx].gsi_end); | |
4774 | + printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " | |
4775 | + "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, | |
4776 | + mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, | |
4777 | + mp_ioapic_routing[idx].gsi_base, | |
4778 | + mp_ioapic_routing[idx].gsi_end); | |
4779 | } | |
4780 | ||
4781 | void __init | |
4782 | @@ -1063,15 +1065,16 @@ void __init mp_config_acpi_legacy_irqs ( | |
4783 | } | |
4784 | ||
4785 | #define MAX_GSI_NUM 4096 | |
4786 | +#define IRQ_COMPRESSION_START 64 | |
4787 | ||
4788 | int mp_register_gsi(u32 gsi, int triggering, int polarity) | |
4789 | { | |
4790 | int ioapic = -1; | |
4791 | int ioapic_pin = 0; | |
4792 | int idx, bit = 0; | |
4793 | - static int pci_irq = 16; | |
4794 | + static int pci_irq = IRQ_COMPRESSION_START; | |
4795 | /* | |
4796 | - * Mapping between Global System Interrups, which | |
4797 | + * Mapping between Global System Interrupts, which | |
4798 | * represent all possible interrupts, and IRQs | |
4799 | * assigned to actual devices. | |
4800 | */ | |
4801 | @@ -1108,12 +1111,16 @@ int mp_register_gsi(u32 gsi, int trigger | |
4802 | if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { | |
4803 | Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", | |
4804 | mp_ioapic_routing[ioapic].apic_id, ioapic_pin); | |
4805 | - return gsi_to_irq[gsi]; | |
4806 | + return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); | |
4807 | } | |
4808 | ||
4809 | mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit); | |
4810 | ||
4811 | - if (triggering == ACPI_LEVEL_SENSITIVE) { | |
4812 | + /* | |
4813 | + * For GSI >= 64, use IRQ compression | |
4814 | + */ | |
4815 | + if ((gsi >= IRQ_COMPRESSION_START) | |
4816 | + && (triggering == ACPI_LEVEL_SENSITIVE)) { | |
4817 | /* | |
4818 | * For PCI devices assign IRQs in order, avoiding gaps | |
4819 | * due to unused I/O APIC pins. | |
4820 | --- sle11-2009-05-14.orig/arch/x86/kernel/mpparse_64-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
4821 | +++ sle11-2009-05-14/arch/x86/kernel/mpparse_64-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
4822 | @@ -60,14 +60,20 @@ unsigned int boot_cpu_id = -1U; | |
4823 | EXPORT_SYMBOL(boot_cpu_id); | |
4824 | ||
4825 | /* Internal processor count */ | |
4826 | -unsigned int num_processors __cpuinitdata = 0; | |
4827 | +unsigned int num_processors; | |
4828 | ||
4829 | unsigned disabled_cpus __cpuinitdata; | |
4830 | ||
4831 | /* Bitmask of physically existing CPUs */ | |
4832 | physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; | |
4833 | ||
4834 | -u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; | |
4835 | +#ifndef CONFIG_XEN | |
4836 | +u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata | |
4837 | + = { [0 ... NR_CPUS-1] = BAD_APICID }; | |
4838 | +void *x86_bios_cpu_apicid_early_ptr; | |
4839 | +#endif | |
4840 | +DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; | |
4841 | +EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | |
4842 | ||
4843 | ||
4844 | /* | |
4845 | @@ -119,24 +125,22 @@ static void __cpuinit MP_processor_info( | |
4846 | physid_set(m->mpc_apicid, phys_cpu_present_map); | |
4847 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { | |
4848 | /* | |
4849 | - * bios_cpu_apicid is required to have processors listed | |
4850 | + * x86_bios_cpu_apicid is required to have processors listed | |
4851 | * in same order as logical cpu numbers. Hence the first | |
4852 | * entry is BSP, and so on. | |
4853 | */ | |
4854 | cpu = 0; | |
4855 | } | |
4856 | - bios_cpu_apicid[cpu] = m->mpc_apicid; | |
4857 | - /* | |
4858 | - * We get called early in the the start_kernel initialization | |
4859 | - * process when the per_cpu data area is not yet setup, so we | |
4860 | - * use a static array that is removed after the per_cpu data | |
4861 | - * area is created. | |
4862 | - */ | |
4863 | - if (x86_cpu_to_apicid_ptr) { | |
4864 | - u8 *x86_cpu_to_apicid = (u8 *)x86_cpu_to_apicid_ptr; | |
4865 | - x86_cpu_to_apicid[cpu] = m->mpc_apicid; | |
4866 | + /* are we being called early in kernel startup? */ | |
4867 | + if (x86_cpu_to_apicid_early_ptr) { | |
4868 | + u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; | |
4869 | + u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; | |
cc90b958 | 4870 | + |
00e5a55c BS |
4871 | + cpu_to_apicid[cpu] = m->mpc_apicid; |
4872 | + bios_cpu_apicid[cpu] = m->mpc_apicid; | |
4873 | } else { | |
4874 | per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid; | |
4875 | + per_cpu(x86_bios_cpu_apicid, cpu) = m->mpc_apicid; | |
4876 | } | |
4877 | ||
4878 | cpu_set(cpu, cpu_possible_map); | |
4879 | --- sle11-2009-05-14.orig/arch/x86/kernel/pci-dma-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
4880 | +++ sle11-2009-05-14/arch/x86/kernel/pci-dma-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
4881 | @@ -434,3 +434,23 @@ dma_sync_single_for_device(struct device | |
4882 | swiotlb_sync_single_for_device(dev, dma_handle, size, direction); | |
4883 | } | |
4884 | EXPORT_SYMBOL(dma_sync_single_for_device); | |
cc90b958 | 4885 | + |
00e5a55c BS |
4886 | +void |
4887 | +dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, | |
4888 | + enum dma_data_direction direction) | |
cc90b958 | 4889 | +{ |
00e5a55c BS |
4890 | + if (swiotlb) |
4891 | + swiotlb_sync_sg_for_cpu(dev,sg,nelems,direction); | |
4892 | + flush_write_buffers(); | |
cc90b958 | 4893 | +} |
00e5a55c | 4894 | +EXPORT_SYMBOL(dma_sync_sg_for_cpu); |
cc90b958 | 4895 | + |
00e5a55c BS |
4896 | +void |
4897 | +dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, | |
4898 | + enum dma_data_direction direction) | |
cc90b958 | 4899 | +{ |
00e5a55c BS |
4900 | + if (swiotlb) |
4901 | + swiotlb_sync_sg_for_device(dev,sg,nelems,direction); | |
4902 | + flush_write_buffers(); | |
cc90b958 | 4903 | +} |
00e5a55c BS |
4904 | +EXPORT_SYMBOL(dma_sync_sg_for_device); |
4905 | --- sle11-2009-05-14.orig/arch/x86/kernel/process_32-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
4906 | +++ sle11-2009-05-14/arch/x86/kernel/process_32-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
4907 | @@ -23,7 +23,6 @@ | |
4908 | #include <linux/slab.h> | |
4909 | #include <linux/vmalloc.h> | |
4910 | #include <linux/user.h> | |
4911 | -#include <linux/a.out.h> | |
4912 | #include <linux/interrupt.h> | |
4913 | #include <linux/utsname.h> | |
4914 | #include <linux/delay.h> | |
4915 | @@ -59,8 +58,10 @@ | |
4916 | ||
4917 | #include <asm/tlbflush.h> | |
4918 | #include <asm/cpu.h> | |
4919 | +#include <asm/kdebug.h> | |
4920 | ||
4921 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | |
4922 | +asmlinkage void cstar_ret_from_fork(void) __asm__("cstar_ret_from_fork"); | |
4923 | ||
4924 | static int hlt_counter; | |
4925 | ||
4926 | @@ -78,7 +79,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number); | |
4927 | */ | |
4928 | unsigned long thread_saved_pc(struct task_struct *tsk) | |
4929 | { | |
4930 | - return ((unsigned long *)tsk->thread.esp)[3]; | |
4931 | + return ((unsigned long *)tsk->thread.sp)[3]; | |
4932 | } | |
4933 | ||
4934 | /* | |
4935 | @@ -86,7 +87,6 @@ unsigned long thread_saved_pc(struct tas | |
4936 | */ | |
4937 | void (*pm_idle)(void); | |
4938 | EXPORT_SYMBOL(pm_idle); | |
4939 | -static DEFINE_PER_CPU(unsigned int, cpu_idle_state); | |
4940 | ||
4941 | void disable_hlt(void) | |
4942 | { | |
4943 | @@ -107,7 +107,7 @@ EXPORT_SYMBOL(enable_hlt); | |
4944 | * to poll the ->work.need_resched flag instead of waiting for the | |
4945 | * cross-CPU IPI to arrive. Use this option with caution. | |
4946 | */ | |
4947 | -static void poll_idle (void) | |
4948 | +static void poll_idle(void) | |
4949 | { | |
4950 | cpu_relax(); | |
4951 | } | |
4952 | @@ -122,10 +122,19 @@ static void xen_idle(void) | |
4953 | smp_mb(); | |
4954 | ||
4955 | local_irq_disable(); | |
4956 | - if (!need_resched()) | |
4957 | + if (!need_resched()) { | |
4958 | + ktime_t t0, t1; | |
4959 | + u64 t0n, t1n; | |
cc90b958 | 4960 | + |
00e5a55c BS |
4961 | + t0 = ktime_get(); |
4962 | + t0n = ktime_to_ns(t0); | |
4963 | safe_halt(); /* enables interrupts racelessly */ | |
4964 | - else | |
4965 | - local_irq_enable(); | |
4966 | + local_irq_disable(); | |
4967 | + t1 = ktime_get(); | |
4968 | + t1n = ktime_to_ns(t1); | |
4969 | + sched_clock_idle_wakeup_event(t1n - t0n); | |
cc90b958 | 4970 | + } |
00e5a55c BS |
4971 | + local_irq_enable(); |
4972 | current_thread_info()->status |= TS_POLLING; | |
4973 | } | |
4974 | #ifdef CONFIG_APM_MODULE | |
4975 | @@ -168,13 +177,13 @@ void cpu_idle(void) | |
4976 | while (!need_resched()) { | |
4977 | void (*idle)(void); | |
4978 | ||
4979 | - if (__get_cpu_var(cpu_idle_state)) | |
4980 | - __get_cpu_var(cpu_idle_state) = 0; | |
4981 | - | |
4982 | check_pgt_cache(); | |
4983 | rmb(); | |
4984 | idle = xen_idle; /* no alternatives */ | |
4985 | ||
4986 | + if (rcu_pending(cpu)) | |
4987 | + rcu_check_callbacks(cpu, 0); | |
cc90b958 | 4988 | + |
00e5a55c BS |
4989 | if (cpu_is_offline(cpu)) |
4990 | play_dead(); | |
4991 | ||
4992 | @@ -192,40 +201,19 @@ static void do_nothing(void *unused) | |
4993 | { | |
4994 | } | |
4995 | ||
4996 | +/* | |
4997 | + * cpu_idle_wait - Used to ensure that all the CPUs discard old value of | |
4998 | + * pm_idle and update to new pm_idle value. Required while changing pm_idle | |
4999 | + * handler on SMP systems. | |
5000 | + * | |
5001 | + * Caller must have changed pm_idle to the new value before the call. Old | |
5002 | + * pm_idle value will not be used by any CPU after the return of this function. | |
5003 | + */ | |
5004 | void cpu_idle_wait(void) | |
5005 | { | |
5006 | - unsigned int cpu, this_cpu = get_cpu(); | |
5007 | - cpumask_t map, tmp = current->cpus_allowed; | |
5008 | - | |
5009 | - set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); | |
5010 | - put_cpu(); | |
5011 | - | |
5012 | - cpus_clear(map); | |
5013 | - for_each_online_cpu(cpu) { | |
5014 | - per_cpu(cpu_idle_state, cpu) = 1; | |
5015 | - cpu_set(cpu, map); | |
5016 | - } | |
5017 | - | |
5018 | - __get_cpu_var(cpu_idle_state) = 0; | |
5019 | - | |
5020 | - wmb(); | |
5021 | - do { | |
5022 | - ssleep(1); | |
5023 | - for_each_online_cpu(cpu) { | |
5024 | - if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) | |
5025 | - cpu_clear(cpu, map); | |
5026 | - } | |
5027 | - cpus_and(map, map, cpu_online_map); | |
5028 | - /* | |
5029 | - * We waited 1 sec, if a CPU still did not call idle | |
5030 | - * it may be because it is in idle and not waking up | |
5031 | - * because it has nothing to do. | |
5032 | - * Give all the remaining CPUS a kick. | |
5033 | - */ | |
5034 | - smp_call_function_mask(map, do_nothing, 0, 0); | |
5035 | - } while (!cpus_empty(map)); | |
5036 | - | |
5037 | - set_cpus_allowed(current, tmp); | |
5038 | + smp_mb(); | |
5039 | + /* kick all the CPUs so that they exit out of pm_idle */ | |
5040 | + smp_call_function(do_nothing, NULL, 0, 1); | |
5041 | } | |
5042 | EXPORT_SYMBOL_GPL(cpu_idle_wait); | |
5043 | ||
5044 | @@ -251,15 +239,15 @@ void __show_registers(struct pt_regs *re | |
5045 | { | |
5046 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; | |
5047 | unsigned long d0, d1, d2, d3, d6, d7; | |
5048 | - unsigned long esp; | |
5049 | + unsigned long sp; | |
5050 | unsigned short ss, gs; | |
5051 | ||
5052 | if (user_mode_vm(regs)) { | |
5053 | - esp = regs->esp; | |
5054 | - ss = regs->xss & 0xffff; | |
5055 | + sp = regs->sp; | |
5056 | + ss = regs->ss & 0xffff; | |
5057 | savesegment(gs, gs); | |
5058 | } else { | |
5059 | - esp = (unsigned long) (®s->esp); | |
5060 | + sp = (unsigned long) (®s->sp); | |
5061 | savesegment(ss, ss); | |
5062 | savesegment(gs, gs); | |
5063 | } | |
5064 | @@ -272,17 +260,17 @@ void __show_registers(struct pt_regs *re | |
5065 | init_utsname()->version); | |
5066 | ||
5067 | printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", | |
5068 | - 0xffff & regs->xcs, regs->eip, regs->eflags, | |
5069 | + 0xffff & regs->cs, regs->ip, regs->flags, | |
5070 | smp_processor_id()); | |
5071 | - print_symbol("EIP is at %s\n", regs->eip); | |
5072 | + print_symbol("EIP is at %s\n", regs->ip); | |
5073 | ||
5074 | printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", | |
5075 | - regs->eax, regs->ebx, regs->ecx, regs->edx); | |
5076 | + regs->ax, regs->bx, regs->cx, regs->dx); | |
5077 | printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", | |
5078 | - regs->esi, regs->edi, regs->ebp, esp); | |
5079 | + regs->si, regs->di, regs->bp, sp); | |
5080 | printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", | |
5081 | - regs->xds & 0xffff, regs->xes & 0xffff, | |
5082 | - regs->xfs & 0xffff, gs, ss); | |
5083 | + regs->ds & 0xffff, regs->es & 0xffff, | |
5084 | + regs->fs & 0xffff, gs, ss); | |
5085 | ||
5086 | if (!all) | |
5087 | return; | |
5088 | @@ -310,12 +298,12 @@ void __show_registers(struct pt_regs *re | |
5089 | void show_regs(struct pt_regs *regs) | |
5090 | { | |
5091 | __show_registers(regs, 1); | |
5092 | - show_trace(NULL, regs, ®s->esp); | |
5093 | + show_trace(NULL, regs, ®s->sp, regs->bp); | |
5094 | } | |
5095 | ||
5096 | /* | |
5097 | - * This gets run with %ebx containing the | |
5098 | - * function to call, and %edx containing | |
5099 | + * This gets run with %bx containing the | |
5100 | + * function to call, and %dx containing | |
5101 | * the "args". | |
5102 | */ | |
5103 | extern void kernel_thread_helper(void); | |
5104 | @@ -329,16 +317,16 @@ int kernel_thread(int (*fn)(void *), voi | |
5105 | ||
5106 | memset(®s, 0, sizeof(regs)); | |
5107 | ||
5108 | - regs.ebx = (unsigned long) fn; | |
5109 | - regs.edx = (unsigned long) arg; | |
5110 | + regs.bx = (unsigned long) fn; | |
5111 | + regs.dx = (unsigned long) arg; | |
5112 | ||
5113 | - regs.xds = __USER_DS; | |
5114 | - regs.xes = __USER_DS; | |
5115 | - regs.xfs = __KERNEL_PERCPU; | |
5116 | - regs.orig_eax = -1; | |
5117 | - regs.eip = (unsigned long) kernel_thread_helper; | |
5118 | - regs.xcs = __KERNEL_CS | get_kernel_rpl(); | |
5119 | - regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; | |
5120 | + regs.ds = __USER_DS; | |
5121 | + regs.es = __USER_DS; | |
5122 | + regs.fs = __KERNEL_PERCPU; | |
5123 | + regs.orig_ax = -1; | |
5124 | + regs.ip = (unsigned long) kernel_thread_helper; | |
5125 | + regs.cs = __KERNEL_CS | get_kernel_rpl(); | |
5126 | + regs.flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; | |
5127 | ||
5128 | /* Ok, create the new process.. */ | |
5129 | return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); | |
5130 | @@ -368,7 +356,12 @@ void flush_thread(void) | |
5131 | { | |
5132 | struct task_struct *tsk = current; | |
5133 | ||
5134 | - memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); | |
5135 | + tsk->thread.debugreg0 = 0; | |
5136 | + tsk->thread.debugreg1 = 0; | |
5137 | + tsk->thread.debugreg2 = 0; | |
5138 | + tsk->thread.debugreg3 = 0; | |
5139 | + tsk->thread.debugreg6 = 0; | |
5140 | + tsk->thread.debugreg7 = 0; | |
5141 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | |
5142 | clear_tsk_thread_flag(tsk, TIF_DEBUG); | |
5143 | /* | |
5144 | @@ -393,7 +386,7 @@ void prepare_to_copy(struct task_struct | |
5145 | unlazy_fpu(tsk); | |
5146 | } | |
5147 | ||
5148 | -int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, | |
5149 | +int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | |
5150 | unsigned long unused, | |
5151 | struct task_struct * p, struct pt_regs * regs) | |
5152 | { | |
5153 | @@ -403,17 +396,19 @@ int copy_thread(int nr, unsigned long cl | |
5154 | ||
5155 | childregs = task_pt_regs(p); | |
5156 | *childregs = *regs; | |
5157 | - childregs->eax = 0; | |
5158 | - childregs->esp = esp; | |
5159 | + childregs->ax = 0; | |
5160 | + childregs->sp = sp; | |
5161 | ||
5162 | - p->thread.esp = (unsigned long) childregs; | |
5163 | - p->thread.esp0 = (unsigned long) (childregs+1); | |
5164 | + p->thread.sp = (unsigned long) childregs; | |
5165 | + p->thread.sp0 = (unsigned long) (childregs+1); | |
5166 | ||
5167 | - p->thread.eip = (unsigned long) ret_from_fork; | |
5168 | + p->thread.ip = (unsigned long) ret_from_fork; | |
5169 | ||
5170 | - savesegment(gs,p->thread.gs); | |
5171 | + savesegment(gs, p->thread.gs); | |
5172 | ||
5173 | tsk = current; | |
5174 | + if (test_tsk_thread_flag(tsk, TIF_CSTAR)) | |
5175 | + p->thread.ip = (unsigned long) cstar_ret_from_fork; | |
5176 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { | |
5177 | p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, | |
5178 | IO_BITMAP_BYTES, GFP_KERNEL); | |
5179 | @@ -424,34 +419,17 @@ int copy_thread(int nr, unsigned long cl | |
5180 | set_tsk_thread_flag(p, TIF_IO_BITMAP); | |
5181 | } | |
5182 | ||
5183 | + err = 0; | |
cc90b958 | 5184 | + |
00e5a55c BS |
5185 | /* |
5186 | * Set a new TLS for the child thread? | |
5187 | */ | |
5188 | - if (clone_flags & CLONE_SETTLS) { | |
5189 | - struct desc_struct *desc; | |
5190 | - struct user_desc info; | |
5191 | - int idx; | |
5192 | - | |
5193 | - err = -EFAULT; | |
5194 | - if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info))) | |
5195 | - goto out; | |
5196 | - err = -EINVAL; | |
5197 | - if (LDT_empty(&info)) | |
5198 | - goto out; | |
5199 | - | |
5200 | - idx = info.entry_number; | |
5201 | - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) | |
5202 | - goto out; | |
5203 | - | |
5204 | - desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; | |
5205 | - desc->a = LDT_entry_a(&info); | |
5206 | - desc->b = LDT_entry_b(&info); | |
5207 | - } | |
5208 | + if (clone_flags & CLONE_SETTLS) | |
5209 | + err = do_set_thread_area(p, -1, | |
5210 | + (struct user_desc __user *)childregs->si, 0); | |
5211 | ||
5212 | p->thread.iopl = current->thread.iopl; | |
5213 | ||
5214 | - err = 0; | |
5215 | - out: | |
5216 | if (err && p->thread.io_bitmap_ptr) { | |
5217 | kfree(p->thread.io_bitmap_ptr); | |
5218 | p->thread.io_bitmap_max = 0; | |
5219 | @@ -459,67 +437,8 @@ int copy_thread(int nr, unsigned long cl | |
5220 | return err; | |
5221 | } | |
5222 | ||
5223 | -/* | |
5224 | - * fill in the user structure for a core dump.. | |
5225 | - */ | |
5226 | -void dump_thread(struct pt_regs * regs, struct user * dump) | |
5227 | -{ | |
5228 | - int i; | |
5229 | - | |
5230 | -/* changed the size calculations - should hopefully work better. lbt */ | |
5231 | - dump->magic = CMAGIC; | |
5232 | - dump->start_code = 0; | |
5233 | - dump->start_stack = regs->esp & ~(PAGE_SIZE - 1); | |
5234 | - dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; | |
5235 | - dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; | |
5236 | - dump->u_dsize -= dump->u_tsize; | |
5237 | - dump->u_ssize = 0; | |
5238 | - for (i = 0; i < 8; i++) | |
5239 | - dump->u_debugreg[i] = current->thread.debugreg[i]; | |
5240 | - | |
5241 | - if (dump->start_stack < TASK_SIZE) | |
5242 | - dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; | |
5243 | - | |
5244 | - dump->regs.ebx = regs->ebx; | |
5245 | - dump->regs.ecx = regs->ecx; | |
5246 | - dump->regs.edx = regs->edx; | |
5247 | - dump->regs.esi = regs->esi; | |
5248 | - dump->regs.edi = regs->edi; | |
5249 | - dump->regs.ebp = regs->ebp; | |
5250 | - dump->regs.eax = regs->eax; | |
5251 | - dump->regs.ds = regs->xds; | |
5252 | - dump->regs.es = regs->xes; | |
5253 | - dump->regs.fs = regs->xfs; | |
5254 | - savesegment(gs,dump->regs.gs); | |
5255 | - dump->regs.orig_eax = regs->orig_eax; | |
5256 | - dump->regs.eip = regs->eip; | |
5257 | - dump->regs.cs = regs->xcs; | |
5258 | - dump->regs.eflags = regs->eflags; | |
5259 | - dump->regs.esp = regs->esp; | |
5260 | - dump->regs.ss = regs->xss; | |
5261 | - | |
5262 | - dump->u_fpvalid = dump_fpu (regs, &dump->i387); | |
5263 | -} | |
5264 | -EXPORT_SYMBOL(dump_thread); | |
5265 | - | |
5266 | -/* | |
5267 | - * Capture the user space registers if the task is not running (in user space) | |
5268 | - */ | |
5269 | -int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) | |
5270 | -{ | |
5271 | - struct pt_regs ptregs = *task_pt_regs(tsk); | |
5272 | - ptregs.xcs &= 0xffff; | |
5273 | - ptregs.xds &= 0xffff; | |
5274 | - ptregs.xes &= 0xffff; | |
5275 | - ptregs.xss &= 0xffff; | |
5276 | - | |
5277 | - elf_core_copy_regs(regs, &ptregs); | |
5278 | - | |
5279 | - return 1; | |
5280 | -} | |
5281 | - | |
5282 | #ifdef CONFIG_SECCOMP | |
5283 | -void hard_disable_TSC(void) | |
5284 | +static void hard_disable_TSC(void) | |
5285 | { | |
5286 | write_cr4(read_cr4() | X86_CR4_TSD); | |
5287 | } | |
5288 | @@ -534,7 +453,7 @@ void disable_TSC(void) | |
5289 | hard_disable_TSC(); | |
5290 | preempt_enable(); | |
5291 | } | |
5292 | -void hard_enable_TSC(void) | |
5293 | +static void hard_enable_TSC(void) | |
5294 | { | |
5295 | write_cr4(read_cr4() & ~X86_CR4_TSD); | |
5296 | } | |
5297 | @@ -543,18 +462,32 @@ void hard_enable_TSC(void) | |
5298 | static noinline void | |
5299 | __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) | |
5300 | { | |
5301 | - struct thread_struct *next; | |
5302 | + struct thread_struct *prev, *next; | |
5303 | + unsigned long debugctl; | |
5304 | ||
5305 | + prev = &prev_p->thread; | |
5306 | next = &next_p->thread; | |
5307 | ||
5308 | + debugctl = prev->debugctlmsr; | |
5309 | + if (next->ds_area_msr != prev->ds_area_msr) { | |
5310 | + /* we clear debugctl to make sure DS | |
5311 | + * is not in use when we change it */ | |
5312 | + debugctl = 0; | |
5313 | + wrmsrl(MSR_IA32_DEBUGCTLMSR, 0); | |
5314 | + wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0); | |
cc90b958 | 5315 | + } |
cc90b958 | 5316 | + |
00e5a55c BS |
5317 | + if (next->debugctlmsr != debugctl) |
5318 | + wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0); | |
cc90b958 | 5319 | + |
00e5a55c BS |
5320 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { |
5321 | - set_debugreg(next->debugreg[0], 0); | |
5322 | - set_debugreg(next->debugreg[1], 1); | |
5323 | - set_debugreg(next->debugreg[2], 2); | |
5324 | - set_debugreg(next->debugreg[3], 3); | |
5325 | + set_debugreg(next->debugreg0, 0); | |
5326 | + set_debugreg(next->debugreg1, 1); | |
5327 | + set_debugreg(next->debugreg2, 2); | |
5328 | + set_debugreg(next->debugreg3, 3); | |
5329 | /* no 4 and 5 */ | |
5330 | - set_debugreg(next->debugreg[6], 6); | |
5331 | - set_debugreg(next->debugreg[7], 7); | |
5332 | + set_debugreg(next->debugreg6, 6); | |
5333 | + set_debugreg(next->debugreg7, 7); | |
5334 | } | |
5335 | ||
5336 | #ifdef CONFIG_SECCOMP | |
5337 | @@ -567,6 +500,14 @@ __switch_to_xtra(struct task_struct *pre | |
5338 | hard_enable_TSC(); | |
5339 | } | |
5340 | #endif | |
cc90b958 | 5341 | + |
00e5a55c BS |
5342 | +#ifdef X86_BTS |
5343 | + if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | |
5344 | + ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | |
cc90b958 | 5345 | + |
00e5a55c BS |
5346 | + if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) |
5347 | + ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | |
5348 | +#endif | |
5349 | } | |
5350 | ||
5351 | /* | |
5352 | @@ -592,11 +533,11 @@ __switch_to_xtra(struct task_struct *pre | |
5353 | * More important, however, is the fact that this allows us much | |
5354 | * more flexibility. | |
5355 | * | |
5356 | - * The return value (in %eax) will be the "prev" task after | |
5357 | + * The return value (in %ax) will be the "prev" task after | |
5358 | * the task-switch, and shows up in ret_from_fork in entry.S, | |
5359 | * for example. | |
5360 | */ | |
5361 | -struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |
5362 | +struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |
5363 | { | |
5364 | struct thread_struct *prev = &prev_p->thread, | |
5365 | *next = &next_p->thread; | |
5366 | @@ -632,12 +573,12 @@ struct task_struct fastcall * __switch_t | |
5367 | #endif | |
5368 | ||
5369 | /* | |
5370 | - * Reload esp0. | |
5371 | - * This is load_esp0(tss, next) with a multicall. | |
5372 | + * Reload sp0. | |
5373 | + * This is load_sp0(tss, next) with a multicall. | |
5374 | */ | |
5375 | mcl->op = __HYPERVISOR_stack_switch; | |
5376 | mcl->args[0] = __KERNEL_DS; | |
5377 | - mcl->args[1] = next->esp0; | |
5378 | + mcl->args[1] = next->sp0; | |
5379 | mcl++; | |
5380 | ||
5381 | /* | |
5382 | @@ -734,7 +675,7 @@ struct task_struct fastcall * __switch_t | |
5383 | ||
5384 | asmlinkage int sys_fork(struct pt_regs regs) | |
5385 | { | |
5386 | - return do_fork(SIGCHLD, regs.esp, ®s, 0, NULL, NULL); | |
5387 | + return do_fork(SIGCHLD, regs.sp, ®s, 0, NULL, NULL); | |
5388 | } | |
5389 | ||
5390 | asmlinkage int sys_clone(struct pt_regs regs) | |
5391 | @@ -743,12 +684,12 @@ asmlinkage int sys_clone(struct pt_regs | |
5392 | unsigned long newsp; | |
5393 | int __user *parent_tidptr, *child_tidptr; | |
5394 | ||
5395 | - clone_flags = regs.ebx; | |
5396 | - newsp = regs.ecx; | |
5397 | - parent_tidptr = (int __user *)regs.edx; | |
5398 | - child_tidptr = (int __user *)regs.edi; | |
5399 | + clone_flags = regs.bx; | |
5400 | + newsp = regs.cx; | |
5401 | + parent_tidptr = (int __user *)regs.dx; | |
5402 | + child_tidptr = (int __user *)regs.di; | |
5403 | if (!newsp) | |
5404 | - newsp = regs.esp; | |
5405 | + newsp = regs.sp; | |
5406 | return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr); | |
5407 | } | |
5408 | ||
5409 | @@ -764,7 +705,7 @@ asmlinkage int sys_clone(struct pt_regs | |
5410 | */ | |
5411 | asmlinkage int sys_vfork(struct pt_regs regs) | |
5412 | { | |
5413 | - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0, NULL, NULL); | |
5414 | + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, ®s, 0, NULL, NULL); | |
5415 | } | |
5416 | ||
5417 | /* | |
5418 | @@ -775,18 +716,15 @@ asmlinkage int sys_execve(struct pt_regs | |
5419 | int error; | |
5420 | char * filename; | |
5421 | ||
5422 | - filename = getname((char __user *) regs.ebx); | |
5423 | + filename = getname((char __user *) regs.bx); | |
5424 | error = PTR_ERR(filename); | |
5425 | if (IS_ERR(filename)) | |
5426 | goto out; | |
5427 | error = do_execve(filename, | |
5428 | - (char __user * __user *) regs.ecx, | |
5429 | - (char __user * __user *) regs.edx, | |
5430 | + (char __user * __user *) regs.cx, | |
5431 | + (char __user * __user *) regs.dx, | |
5432 | ®s); | |
5433 | if (error == 0) { | |
5434 | - task_lock(current); | |
5435 | - current->ptrace &= ~PT_DTRACE; | |
5436 | - task_unlock(current); | |
5437 | /* Make sure we don't return using sysenter.. */ | |
5438 | set_thread_flag(TIF_IRET); | |
5439 | } | |
5440 | @@ -800,145 +738,37 @@ out: | |
5441 | ||
5442 | unsigned long get_wchan(struct task_struct *p) | |
5443 | { | |
5444 | - unsigned long ebp, esp, eip; | |
5445 | + unsigned long bp, sp, ip; | |
5446 | unsigned long stack_page; | |
5447 | int count = 0; | |
5448 | if (!p || p == current || p->state == TASK_RUNNING) | |
5449 | return 0; | |
5450 | stack_page = (unsigned long)task_stack_page(p); | |
5451 | - esp = p->thread.esp; | |
5452 | - if (!stack_page || esp < stack_page || esp > top_esp+stack_page) | |
5453 | + sp = p->thread.sp; | |
5454 | + if (!stack_page || sp < stack_page || sp > top_esp+stack_page) | |
5455 | return 0; | |
5456 | - /* include/asm-i386/system.h:switch_to() pushes ebp last. */ | |
5457 | - ebp = *(unsigned long *) esp; | |
5458 | + /* include/asm-i386/system.h:switch_to() pushes bp last. */ | |
5459 | + bp = *(unsigned long *) sp; | |
5460 | do { | |
5461 | - if (ebp < stack_page || ebp > top_ebp+stack_page) | |
5462 | + if (bp < stack_page || bp > top_ebp+stack_page) | |
5463 | return 0; | |
5464 | - eip = *(unsigned long *) (ebp+4); | |
5465 | - if (!in_sched_functions(eip)) | |
5466 | - return eip; | |
5467 | - ebp = *(unsigned long *) ebp; | |
5468 | + ip = *(unsigned long *) (bp+4); | |
5469 | + if (!in_sched_functions(ip)) | |
5470 | + return ip; | |
5471 | + bp = *(unsigned long *) bp; | |
5472 | } while (count++ < 16); | |
5473 | return 0; | |
5474 | } | |
5475 | ||
5476 | -/* | |
5477 | - * sys_alloc_thread_area: get a yet unused TLS descriptor index. | |
5478 | - */ | |
5479 | -static int get_free_idx(void) | |
5480 | -{ | |
5481 | - struct thread_struct *t = ¤t->thread; | |
5482 | - int idx; | |
cc90b958 | 5483 | - |
00e5a55c BS |
5484 | - for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++) |
5485 | - if (desc_empty(t->tls_array + idx)) | |
5486 | - return idx + GDT_ENTRY_TLS_MIN; | |
5487 | - return -ESRCH; | |
5488 | -} | |
cc90b958 | 5489 | - |
00e5a55c BS |
5490 | -/* |
5491 | - * Set a given TLS descriptor: | |
5492 | - */ | |
5493 | -asmlinkage int sys_set_thread_area(struct user_desc __user *u_info) | |
5494 | -{ | |
5495 | - struct thread_struct *t = ¤t->thread; | |
5496 | - struct user_desc info; | |
5497 | - struct desc_struct *desc; | |
5498 | - int cpu, idx; | |
cc90b958 | 5499 | - |
00e5a55c BS |
5500 | - if (copy_from_user(&info, u_info, sizeof(info))) |
5501 | - return -EFAULT; | |
5502 | - idx = info.entry_number; | |
5503 | - | |
5504 | - /* | |
5505 | - * index -1 means the kernel should try to find and | |
5506 | - * allocate an empty descriptor: | |
cc90b958 | 5507 | - */ |
00e5a55c BS |
5508 | - if (idx == -1) { |
5509 | - idx = get_free_idx(); | |
5510 | - if (idx < 0) | |
5511 | - return idx; | |
5512 | - if (put_user(idx, &u_info->entry_number)) | |
5513 | - return -EFAULT; | |
5514 | - } | |
cc90b958 | 5515 | - |
00e5a55c BS |
5516 | - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) |
5517 | - return -EINVAL; | |
cc90b958 | 5518 | - |
00e5a55c | 5519 | - desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN; |
cc90b958 | 5520 | - |
00e5a55c BS |
5521 | - /* |
5522 | - * We must not get preempted while modifying the TLS. | |
5523 | - */ | |
5524 | - cpu = get_cpu(); | |
cc90b958 | 5525 | - |
00e5a55c BS |
5526 | - if (LDT_empty(&info)) { |
5527 | - desc->a = 0; | |
5528 | - desc->b = 0; | |
5529 | - } else { | |
5530 | - desc->a = LDT_entry_a(&info); | |
5531 | - desc->b = LDT_entry_b(&info); | |
cc90b958 | 5532 | - } |
00e5a55c BS |
5533 | - load_TLS(t, cpu); |
5534 | - | |
5535 | - put_cpu(); | |
5536 | - | |
5537 | - return 0; | |
cc90b958 | 5538 | -} |
cc90b958 | 5539 | - |
00e5a55c BS |
5540 | -/* |
5541 | - * Get the current Thread-Local Storage area: | |
5542 | - */ | |
cc90b958 | 5543 | - |
00e5a55c BS |
5544 | -#define GET_BASE(desc) ( \ |
5545 | - (((desc)->a >> 16) & 0x0000ffff) | \ | |
5546 | - (((desc)->b << 16) & 0x00ff0000) | \ | |
5547 | - ( (desc)->b & 0xff000000) ) | |
5548 | - | |
5549 | -#define GET_LIMIT(desc) ( \ | |
5550 | - ((desc)->a & 0x0ffff) | \ | |
5551 | - ((desc)->b & 0xf0000) ) | |
5552 | - | |
5553 | -#define GET_32BIT(desc) (((desc)->b >> 22) & 1) | |
5554 | -#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) | |
5555 | -#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) | |
5556 | -#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) | |
5557 | -#define GET_PRESENT(desc) (((desc)->b >> 15) & 1) | |
5558 | -#define GET_USEABLE(desc) (((desc)->b >> 20) & 1) | |
5559 | - | |
5560 | -asmlinkage int sys_get_thread_area(struct user_desc __user *u_info) | |
cc90b958 | 5561 | -{ |
00e5a55c BS |
5562 | - struct user_desc info; |
5563 | - struct desc_struct *desc; | |
5564 | - int idx; | |
cc90b958 | 5565 | - |
00e5a55c BS |
5566 | - if (get_user(idx, &u_info->entry_number)) |
5567 | - return -EFAULT; | |
5568 | - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) | |
5569 | - return -EINVAL; | |
cc90b958 | 5570 | - |
00e5a55c BS |
5571 | - memset(&info, 0, sizeof(info)); |
5572 | - | |
5573 | - desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; | |
5574 | - | |
5575 | - info.entry_number = idx; | |
5576 | - info.base_addr = GET_BASE(desc); | |
5577 | - info.limit = GET_LIMIT(desc); | |
5578 | - info.seg_32bit = GET_32BIT(desc); | |
5579 | - info.contents = GET_CONTENTS(desc); | |
5580 | - info.read_exec_only = !GET_WRITABLE(desc); | |
5581 | - info.limit_in_pages = GET_LIMIT_PAGES(desc); | |
5582 | - info.seg_not_present = !GET_PRESENT(desc); | |
5583 | - info.useable = GET_USEABLE(desc); | |
5584 | - | |
5585 | - if (copy_to_user(u_info, &info, sizeof(info))) | |
5586 | - return -EFAULT; | |
5587 | - return 0; | |
cc90b958 BS |
5588 | -} |
5589 | - | |
00e5a55c BS |
5590 | unsigned long arch_align_stack(unsigned long sp) |
5591 | { | |
5592 | if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) | |
5593 | sp -= get_random_int() % 8192; | |
5594 | return sp & ~0xf; | |
5595 | } | |
5596 | + | |
5597 | +unsigned long arch_randomize_brk(struct mm_struct *mm) | |
5598 | +{ | |
5599 | + unsigned long range_end = mm->brk + 0x02000000; | |
5600 | + return randomize_range(mm->brk, range_end, 0) ? : mm->brk; | |
5601 | +} | |
5602 | --- sle11-2009-05-14.orig/arch/x86/kernel/process_64-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
5603 | +++ sle11-2009-05-14/arch/x86/kernel/process_64-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
5604 | @@ -3,7 +3,7 @@ | |
5605 | * | |
5606 | * Pentium III FXSR, SSE support | |
5607 | * Gareth Hughes <gareth@valinux.com>, May 2000 | |
5608 | - * | |
5609 | + * | |
5610 | * X86-64 port | |
5611 | * Andi Kleen. | |
5612 | * | |
5613 | @@ -22,19 +22,18 @@ | |
5614 | #include <linux/cpu.h> | |
5615 | #include <linux/errno.h> | |
5616 | #include <linux/sched.h> | |
5617 | +#include <linux/fs.h> | |
5618 | #include <linux/kernel.h> | |
5619 | #include <linux/mm.h> | |
5620 | -#include <linux/fs.h> | |
5621 | #include <linux/elfcore.h> | |
5622 | #include <linux/smp.h> | |
5623 | #include <linux/slab.h> | |
5624 | #include <linux/user.h> | |
5625 | -#include <linux/module.h> | |
5626 | -#include <linux/a.out.h> | |
5627 | #include <linux/interrupt.h> | |
5628 | +#include <linux/utsname.h> | |
5629 | #include <linux/delay.h> | |
5630 | +#include <linux/module.h> | |
5631 | #include <linux/ptrace.h> | |
5632 | -#include <linux/utsname.h> | |
5633 | #include <linux/random.h> | |
5634 | #include <linux/notifier.h> | |
5635 | #include <linux/kprobes.h> | |
5636 | @@ -73,7 +72,6 @@ EXPORT_SYMBOL(boot_option_idle_override) | |
5637 | */ | |
5638 | void (*pm_idle)(void); | |
5639 | EXPORT_SYMBOL(pm_idle); | |
5640 | -static DEFINE_PER_CPU(unsigned int, cpu_idle_state); | |
5641 | ||
5642 | static ATOMIC_NOTIFIER_HEAD(idle_notifier); | |
5643 | ||
5644 | @@ -81,13 +79,6 @@ void idle_notifier_register(struct notif | |
5645 | { | |
5646 | atomic_notifier_chain_register(&idle_notifier, n); | |
5647 | } | |
5648 | -EXPORT_SYMBOL_GPL(idle_notifier_register); | |
cc90b958 | 5649 | - |
00e5a55c BS |
5650 | -void idle_notifier_unregister(struct notifier_block *n) |
5651 | -{ | |
5652 | - atomic_notifier_chain_unregister(&idle_notifier, n); | |
5653 | -} | |
5654 | -EXPORT_SYMBOL(idle_notifier_unregister); | |
5655 | ||
5656 | void enter_idle(void) | |
5657 | { | |
5658 | @@ -116,7 +107,7 @@ void exit_idle(void) | |
5659 | * to poll the ->need_resched flag instead of waiting for the | |
5660 | * cross-CPU IPI to arrive. Use this option with caution. | |
5661 | */ | |
5662 | -static void poll_idle (void) | |
5663 | +static void poll_idle(void) | |
5664 | { | |
5665 | local_irq_enable(); | |
5666 | cpu_relax(); | |
5667 | @@ -131,10 +122,19 @@ static void xen_idle(void) | |
5668 | */ | |
5669 | smp_mb(); | |
5670 | local_irq_disable(); | |
5671 | - if (!need_resched()) | |
5672 | - safe_halt(); | |
5673 | - else | |
5674 | - local_irq_enable(); | |
5675 | + if (!need_resched()) { | |
5676 | + ktime_t t0, t1; | |
5677 | + u64 t0n, t1n; | |
5678 | + | |
5679 | + t0 = ktime_get(); | |
5680 | + t0n = ktime_to_ns(t0); | |
5681 | + safe_halt(); /* enables interrupts racelessly */ | |
5682 | + local_irq_disable(); | |
5683 | + t1 = ktime_get(); | |
5684 | + t1n = ktime_to_ns(t1); | |
5685 | + sched_clock_idle_wakeup_event(t1n - t0n); | |
5686 | + } | |
5687 | + local_irq_enable(); | |
5688 | current_thread_info()->status |= TS_POLLING; | |
5689 | } | |
5690 | ||
5691 | @@ -161,19 +161,15 @@ static inline void play_dead(void) | |
5692 | * low exit latency (ie sit in a loop waiting for | |
5693 | * somebody to say that they'd like to reschedule) | |
5694 | */ | |
5695 | -void cpu_idle (void) | |
5696 | +void cpu_idle(void) | |
5697 | { | |
5698 | current_thread_info()->status |= TS_POLLING; | |
5699 | /* endless idle loop with no priority at all */ | |
5700 | while (1) { | |
5701 | + tick_nohz_stop_sched_tick(); | |
5702 | while (!need_resched()) { | |
5703 | void (*idle)(void); | |
5704 | ||
5705 | - if (__get_cpu_var(cpu_idle_state)) | |
5706 | - __get_cpu_var(cpu_idle_state) = 0; | |
cc90b958 | 5707 | - |
00e5a55c | 5708 | - tick_nohz_stop_sched_tick(); |
cc90b958 | 5709 | - |
00e5a55c BS |
5710 | rmb(); |
5711 | idle = xen_idle; /* no alternatives */ | |
5712 | if (cpu_is_offline(smp_processor_id())) | |
5713 | @@ -203,49 +199,27 @@ static void do_nothing(void *unused) | |
5714 | { | |
5715 | } | |
5716 | ||
5717 | +/* | |
5718 | + * cpu_idle_wait - Used to ensure that all the CPUs discard old value of | |
5719 | + * pm_idle and update to new pm_idle value. Required while changing pm_idle | |
5720 | + * handler on SMP systems. | |
5721 | + * | |
5722 | + * Caller must have changed pm_idle to the new value before the call. Old | |
5723 | + * pm_idle value will not be used by any CPU after the return of this function. | |
5724 | + */ | |
5725 | void cpu_idle_wait(void) | |
5726 | { | |
5727 | - unsigned int cpu, this_cpu = get_cpu(); | |
5728 | - cpumask_t map, tmp = current->cpus_allowed; | |
cc90b958 | 5729 | - |
00e5a55c | 5730 | - set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); |
cc90b958 | 5731 | - put_cpu(); |
cc90b958 | 5732 | - |
00e5a55c BS |
5733 | - cpus_clear(map); |
5734 | - for_each_online_cpu(cpu) { | |
5735 | - per_cpu(cpu_idle_state, cpu) = 1; | |
5736 | - cpu_set(cpu, map); | |
5737 | - } | |
cc90b958 | 5738 | - |
00e5a55c | 5739 | - __get_cpu_var(cpu_idle_state) = 0; |
cc90b958 | 5740 | - |
00e5a55c BS |
5741 | - wmb(); |
5742 | - do { | |
5743 | - ssleep(1); | |
5744 | - for_each_online_cpu(cpu) { | |
5745 | - if (cpu_isset(cpu, map) && | |
5746 | - !per_cpu(cpu_idle_state, cpu)) | |
5747 | - cpu_clear(cpu, map); | |
5748 | - } | |
5749 | - cpus_and(map, map, cpu_online_map); | |
5750 | - /* | |
5751 | - * We waited 1 sec, if a CPU still did not call idle | |
5752 | - * it may be because it is in idle and not waking up | |
5753 | - * because it has nothing to do. | |
5754 | - * Give all the remaining CPUS a kick. | |
5755 | - */ | |
5756 | - smp_call_function_mask(map, do_nothing, 0, 0); | |
5757 | - } while (!cpus_empty(map)); | |
cc90b958 | 5758 | - |
00e5a55c BS |
5759 | - set_cpus_allowed(current, tmp); |
5760 | + smp_mb(); | |
5761 | + /* kick all the CPUs so that they exit out of pm_idle */ | |
5762 | + smp_call_function(do_nothing, NULL, 0, 1); | |
5763 | } | |
5764 | EXPORT_SYMBOL_GPL(cpu_idle_wait); | |
5765 | ||
5766 | -void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | |
5767 | +void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | |
5768 | { | |
5769 | } | |
5770 | ||
5771 | -static int __init idle_setup (char *str) | |
5772 | +static int __init idle_setup(char *str) | |
5773 | { | |
5774 | if (!strcmp(str, "poll")) { | |
5775 | printk("using polling idle threads.\n"); | |
5776 | @@ -260,13 +234,13 @@ static int __init idle_setup (char *str) | |
5777 | } | |
5778 | early_param("idle", idle_setup); | |
5779 | ||
5780 | -/* Prints also some state that isn't saved in the pt_regs */ | |
5781 | +/* Prints also some state that isn't saved in the pt_regs */ | |
5782 | void __show_regs(struct pt_regs * regs) | |
5783 | { | |
5784 | unsigned long fs, gs, shadowgs; | |
5785 | unsigned long d0, d1, d2, d3, d6, d7; | |
5786 | - unsigned int fsindex,gsindex; | |
5787 | - unsigned int ds,cs,es; | |
5788 | + unsigned int fsindex, gsindex; | |
5789 | + unsigned int ds, cs, es; | |
5790 | ||
5791 | printk("\n"); | |
5792 | print_modules(); | |
5793 | @@ -275,16 +249,16 @@ void __show_regs(struct pt_regs * regs) | |
5794 | init_utsname()->release, | |
5795 | (int)strcspn(init_utsname()->version, " "), | |
5796 | init_utsname()->version); | |
5797 | - printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); | |
5798 | - printk_address(regs->rip); | |
5799 | - printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, | |
5800 | - regs->eflags); | |
5801 | + printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); | |
5802 | + printk_address(regs->ip, 1); | |
5803 | + printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp, | |
5804 | + regs->flags); | |
5805 | printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", | |
5806 | - regs->rax, regs->rbx, regs->rcx); | |
5807 | + regs->ax, regs->bx, regs->cx); | |
5808 | printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", | |
5809 | - regs->rdx, regs->rsi, regs->rdi); | |
5810 | + regs->dx, regs->si, regs->di); | |
5811 | printk("RBP: %016lx R08: %016lx R09: %016lx\n", | |
5812 | - regs->rbp, regs->r8, regs->r9); | |
5813 | + regs->bp, regs->r8, regs->r9); | |
5814 | printk("R10: %016lx R11: %016lx R12: %016lx\n", | |
5815 | regs->r10, regs->r11, regs->r12); | |
5816 | printk("R13: %016lx R14: %016lx R15: %016lx\n", | |
5817 | @@ -318,7 +292,7 @@ void show_regs(struct pt_regs *regs) | |
5818 | { | |
5819 | printk("CPU %d:", smp_processor_id()); | |
5820 | __show_regs(regs); | |
5821 | - show_trace(NULL, regs, (void *)(regs + 1)); | |
5822 | + show_trace(NULL, regs, (void *)(regs + 1), regs->bp); | |
5823 | } | |
5824 | ||
5825 | /* | |
5826 | @@ -329,7 +303,7 @@ void exit_thread(void) | |
5827 | struct task_struct *me = current; | |
5828 | struct thread_struct *t = &me->thread; | |
5829 | ||
5830 | - if (me->thread.io_bitmap_ptr) { | |
5831 | + if (me->thread.io_bitmap_ptr) { | |
5832 | #ifndef CONFIG_X86_NO_TSS | |
5833 | struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); | |
5834 | #endif | |
5835 | @@ -382,7 +356,7 @@ void flush_thread(void) | |
5836 | tsk->thread.debugreg3 = 0; | |
5837 | tsk->thread.debugreg6 = 0; | |
5838 | tsk->thread.debugreg7 = 0; | |
5839 | - memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | |
5840 | + memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | |
5841 | /* | |
5842 | * Forget coprocessor state.. | |
5843 | */ | |
5844 | @@ -405,26 +379,21 @@ void release_thread(struct task_struct * | |
5845 | ||
5846 | static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) | |
5847 | { | |
5848 | - struct user_desc ud = { | |
5849 | + struct user_desc ud = { | |
5850 | .base_addr = addr, | |
5851 | .limit = 0xfffff, | |
5852 | .seg_32bit = 1, | |
5853 | .limit_in_pages = 1, | |
5854 | .useable = 1, | |
5855 | }; | |
5856 | - struct n_desc_struct *desc = (void *)t->thread.tls_array; | |
5857 | + struct desc_struct *desc = t->thread.tls_array; | |
5858 | desc += tls; | |
5859 | - desc->a = LDT_entry_a(&ud); | |
5860 | - desc->b = LDT_entry_b(&ud); | |
5861 | + fill_ldt(desc, &ud); | |
5862 | } | |
5863 | ||
5864 | static inline u32 read_32bit_tls(struct task_struct *t, int tls) | |
5865 | { | |
5866 | - struct desc_struct *desc = (void *)t->thread.tls_array; | |
5867 | - desc += tls; | |
5868 | - return desc->base0 | | |
5869 | - (((u32)desc->base1) << 16) | | |
5870 | - (((u32)desc->base2) << 24); | |
5871 | + return get_desc_base(&t->thread.tls_array[tls]); | |
5872 | } | |
5873 | ||
5874 | /* | |
5875 | @@ -436,7 +405,7 @@ void prepare_to_copy(struct task_struct | |
5876 | unlazy_fpu(tsk); | |
5877 | } | |
5878 | ||
5879 | -int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, | |
5880 | +int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | |
5881 | unsigned long unused, | |
5882 | struct task_struct * p, struct pt_regs * regs) | |
5883 | { | |
5884 | @@ -448,14 +417,14 @@ int copy_thread(int nr, unsigned long cl | |
5885 | (THREAD_SIZE + task_stack_page(p))) - 1; | |
5886 | *childregs = *regs; | |
5887 | ||
5888 | - childregs->rax = 0; | |
5889 | - childregs->rsp = rsp; | |
5890 | - if (rsp == ~0UL) | |
5891 | - childregs->rsp = (unsigned long)childregs; | |
cc90b958 | 5892 | - |
00e5a55c BS |
5893 | - p->thread.rsp = (unsigned long) childregs; |
5894 | - p->thread.rsp0 = (unsigned long) (childregs+1); | |
5895 | - p->thread.userrsp = me->thread.userrsp; | |
5896 | + childregs->ax = 0; | |
5897 | + childregs->sp = sp; | |
5898 | + if (sp == ~0UL) | |
5899 | + childregs->sp = (unsigned long)childregs; | |
5900 | + | |
5901 | + p->thread.sp = (unsigned long) childregs; | |
5902 | + p->thread.sp0 = (unsigned long) (childregs+1); | |
5903 | + p->thread.usersp = me->thread.usersp; | |
5904 | ||
5905 | set_tsk_thread_flag(p, TIF_FORK); | |
5906 | ||
5907 | @@ -476,7 +445,7 @@ int copy_thread(int nr, unsigned long cl | |
5908 | memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, | |
5909 | IO_BITMAP_BYTES); | |
5910 | set_tsk_thread_flag(p, TIF_IO_BITMAP); | |
5911 | - } | |
5912 | + } | |
5913 | ||
5914 | /* | |
5915 | * Set a new TLS for the child thread? | |
5916 | @@ -484,7 +453,8 @@ int copy_thread(int nr, unsigned long cl | |
5917 | if (clone_flags & CLONE_SETTLS) { | |
5918 | #ifdef CONFIG_IA32_EMULATION | |
5919 | if (test_thread_flag(TIF_IA32)) | |
5920 | - err = ia32_child_tls(p, childregs); | |
5921 | + err = do_set_thread_area(p, -1, | |
5922 | + (struct user_desc __user *)childregs->si, 0); | |
5923 | else | |
5924 | #endif | |
5925 | err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); | |
5926 | @@ -502,26 +472,32 @@ out: | |
5927 | return err; | |
5928 | } | |
5929 | ||
5930 | -static inline void __save_init_fpu( struct task_struct *tsk ) | |
cc90b958 | 5931 | -{ |
00e5a55c BS |
5932 | - asm volatile( "rex64 ; fxsave %0 ; fnclex" |
5933 | - : "=m" (tsk->thread.i387.fxsave)); | |
5934 | - tsk->thread_info->status &= ~TS_USEDFPU; | |
cc90b958 BS |
5935 | -} |
5936 | - | |
00e5a55c BS |
5937 | /* |
5938 | * This special macro can be used to load a debugging register | |
5939 | */ | |
5940 | -#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r) | |
5941 | +#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r) | |
5942 | ||
5943 | static inline void __switch_to_xtra(struct task_struct *prev_p, | |
5944 | - struct task_struct *next_p) | |
5945 | + struct task_struct *next_p) | |
5946 | { | |
5947 | struct thread_struct *prev, *next; | |
5948 | + unsigned long debugctl; | |
5949 | ||
5950 | prev = &prev_p->thread, | |
5951 | next = &next_p->thread; | |
5952 | ||
5953 | + debugctl = prev->debugctlmsr; | |
5954 | + if (next->ds_area_msr != prev->ds_area_msr) { | |
5955 | + /* we clear debugctl to make sure DS | |
5956 | + * is not in use when we change it */ | |
5957 | + debugctl = 0; | |
5958 | + wrmsrl(MSR_IA32_DEBUGCTLMSR, 0); | |
5959 | + wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); | |
5960 | + } | |
5961 | + | |
5962 | + if (next->debugctlmsr != debugctl) | |
5963 | + wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr); | |
5964 | + | |
5965 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | |
5966 | loaddebug(next, 0); | |
5967 | loaddebug(next, 1); | |
5968 | @@ -531,12 +507,20 @@ static inline void __switch_to_xtra(stru | |
5969 | loaddebug(next, 6); | |
5970 | loaddebug(next, 7); | |
5971 | } | |
5972 | + | |
5973 | +#ifdef X86_BTS | |
5974 | + if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | |
5975 | + ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | |
5976 | + | |
5977 | + if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | |
5978 | + ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | |
5979 | +#endif | |
5980 | } | |
5981 | ||
5982 | /* | |
5983 | * switch_to(x,y) should switch tasks from x to y. | |
5984 | * | |
5985 | - * This could still be optimized: | |
5986 | + * This could still be optimized: | |
5987 | * - fold all the options into a flag word and test it with a single test. | |
5988 | * - could test fs/gs bitsliced | |
5989 | * | |
5990 | @@ -547,7 +531,7 @@ __switch_to(struct task_struct *prev_p, | |
5991 | { | |
5992 | struct thread_struct *prev = &prev_p->thread, | |
5993 | *next = &next_p->thread; | |
5994 | - int cpu = smp_processor_id(); | |
5995 | + int cpu = smp_processor_id(); | |
5996 | #ifndef CONFIG_X86_NO_TSS | |
5997 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | |
5998 | #endif | |
5999 | @@ -581,11 +565,12 @@ __switch_to(struct task_struct *prev_p, | |
6000 | prev_p->fpu_counter = 0; | |
6001 | ||
6002 | /* | |
6003 | - * Reload esp0, LDT and the page table pointer: | |
6004 | + * Reload sp0. | |
6005 | + * This is load_sp0(tss, next) with a multicall. | |
6006 | */ | |
6007 | mcl->op = __HYPERVISOR_stack_switch; | |
6008 | mcl->args[0] = __KERNEL_DS; | |
6009 | - mcl->args[1] = next->rsp0; | |
6010 | + mcl->args[1] = next->sp0; | |
6011 | mcl++; | |
6012 | ||
6013 | /* | |
6014 | @@ -593,11 +578,12 @@ __switch_to(struct task_struct *prev_p, | |
6015 | * This is load_TLS(next, cpu) with multicalls. | |
6016 | */ | |
6017 | #define C(i) do { \ | |
6018 | - if (unlikely(next->tls_array[i] != prev->tls_array[i])) { \ | |
6019 | + if (unlikely(next->tls_array[i].a != prev->tls_array[i].a || \ | |
6020 | + next->tls_array[i].b != prev->tls_array[i].b)) { \ | |
6021 | mcl->op = __HYPERVISOR_update_descriptor; \ | |
6022 | mcl->args[0] = virt_to_machine( \ | |
6023 | - &cpu_gdt(cpu)[GDT_ENTRY_TLS_MIN + i]); \ | |
6024 | - mcl->args[1] = next->tls_array[i]; \ | |
6025 | + &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]);\ | |
6026 | + mcl->args[1] = *(u64 *)&next->tls_array[i]; \ | |
6027 | mcl++; \ | |
6028 | } \ | |
6029 | } while (0) | |
6030 | @@ -605,7 +591,7 @@ __switch_to(struct task_struct *prev_p, | |
6031 | #undef C | |
6032 | ||
6033 | if (unlikely(prev->iopl != next->iopl)) { | |
6034 | - iopl_op.iopl = (next->iopl == 0) ? 1 : next->iopl; | |
6035 | + iopl_op.iopl = (next->iopl == 0) ? 1 : (next->iopl >> 12) & 3; | |
6036 | #if CONFIG_XEN_COMPAT > 0x030002 | |
6037 | mcl->op = __HYPERVISOR_physdev_op; | |
6038 | mcl->args[0] = PHYSDEVOP_set_iopl; | |
6039 | @@ -669,8 +655,8 @@ __switch_to(struct task_struct *prev_p, | |
6040 | /* | |
6041 | * Switch the PDA context. | |
6042 | */ | |
6043 | - prev->userrsp = read_pda(oldrsp); | |
6044 | - write_pda(oldrsp, next->userrsp); | |
6045 | + prev->usersp = read_pda(oldrsp); | |
6046 | + write_pda(oldrsp, next->usersp); | |
6047 | write_pda(pcurrent, next_p); | |
6048 | write_pda(kernelstack, | |
6049 | (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); | |
6050 | @@ -687,7 +673,8 @@ __switch_to(struct task_struct *prev_p, | |
6051 | /* | |
6052 | * Now maybe reload the debug registers | |
6053 | */ | |
6054 | - if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)) | |
6055 | + if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT || | |
6056 | + task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) | |
6057 | __switch_to_xtra(prev_p, next_p); | |
6058 | ||
6059 | /* If the task has used fpu the last 5 timeslices, just do a full | |
6060 | @@ -702,23 +689,18 @@ __switch_to(struct task_struct *prev_p, | |
6061 | /* | |
6062 | * sys_execve() executes a new program. | |
6063 | */ | |
6064 | -asmlinkage | |
6065 | +asmlinkage | |
6066 | long sys_execve(char __user *name, char __user * __user *argv, | |
6067 | - char __user * __user *envp, struct pt_regs regs) | |
6068 | + char __user * __user *envp, struct pt_regs *regs) | |
6069 | { | |
6070 | long error; | |
6071 | char * filename; | |
6072 | ||
6073 | filename = getname(name); | |
6074 | error = PTR_ERR(filename); | |
6075 | - if (IS_ERR(filename)) | |
6076 | + if (IS_ERR(filename)) | |
6077 | return error; | |
6078 | - error = do_execve(filename, argv, envp, ®s); | |
6079 | - if (error == 0) { | |
6080 | - task_lock(current); | |
6081 | - current->ptrace &= ~PT_DTRACE; | |
6082 | - task_unlock(current); | |
cc90b958 | 6083 | - } |
00e5a55c BS |
6084 | + error = do_execve(filename, argv, envp, regs); |
6085 | putname(filename); | |
6086 | return error; | |
6087 | } | |
6088 | @@ -728,18 +710,18 @@ void set_personality_64bit(void) | |
6089 | /* inherit personality from parent */ | |
6090 | ||
6091 | /* Make sure to be in 64bit mode */ | |
6092 | - clear_thread_flag(TIF_IA32); | |
6093 | + clear_thread_flag(TIF_IA32); | |
6094 | ||
6095 | /* TBD: overwrites user setup. Should have two bits. | |
6096 | But 64bit processes have always behaved this way, | |
6097 | so it's not too bad. The main problem is just that | |
6098 | - 32bit childs are affected again. */ | |
6099 | + 32bit childs are affected again. */ | |
6100 | current->personality &= ~READ_IMPLIES_EXEC; | |
6101 | } | |
6102 | ||
6103 | asmlinkage long sys_fork(struct pt_regs *regs) | |
6104 | { | |
6105 | - return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL); | |
6106 | + return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); | |
6107 | } | |
6108 | ||
6109 | asmlinkage long | |
6110 | @@ -747,7 +729,7 @@ sys_clone(unsigned long clone_flags, uns | |
6111 | void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) | |
6112 | { | |
6113 | if (!newsp) | |
6114 | - newsp = regs->rsp; | |
6115 | + newsp = regs->sp; | |
6116 | return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); | |
6117 | } | |
6118 | ||
6119 | @@ -763,29 +745,29 @@ sys_clone(unsigned long clone_flags, uns | |
6120 | */ | |
6121 | asmlinkage long sys_vfork(struct pt_regs *regs) | |
6122 | { | |
6123 | - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0, | |
6124 | + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, | |
6125 | NULL, NULL); | |
6126 | } | |
6127 | ||
6128 | unsigned long get_wchan(struct task_struct *p) | |
6129 | { | |
6130 | unsigned long stack; | |
6131 | - u64 fp,rip; | |
6132 | + u64 fp,ip; | |
6133 | int count = 0; | |
6134 | ||
6135 | if (!p || p == current || p->state==TASK_RUNNING) | |
6136 | return 0; | |
6137 | stack = (unsigned long)task_stack_page(p); | |
6138 | - if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE) | |
6139 | + if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE) | |
6140 | return 0; | |
6141 | - fp = *(u64 *)(p->thread.rsp); | |
6142 | + fp = *(u64 *)(p->thread.sp); | |
6143 | do { | |
6144 | if (fp < (unsigned long)stack || | |
6145 | fp > (unsigned long)stack+THREAD_SIZE) | |
6146 | return 0; | |
6147 | - rip = *(u64 *)(fp+8); | |
6148 | - if (!in_sched_functions(rip)) | |
6149 | - return rip; | |
6150 | + ip = *(u64 *)(fp+8); | |
6151 | + if (!in_sched_functions(ip)) | |
6152 | + return ip; | |
6153 | fp = *(u64 *)fp; | |
6154 | } while (count++ < 16); | |
6155 | return 0; | |
6156 | @@ -827,19 +809,19 @@ long do_arch_prctl(struct task_struct *t | |
6157 | /* Not strictly needed for fs, but do it for symmetry | |
6158 | with gs */ | |
6159 | if (addr >= TASK_SIZE_OF(task)) | |
6160 | - return -EPERM; | |
6161 | + return -EPERM; | |
6162 | cpu = get_cpu(); | |
6163 | - /* handle small bases via the GDT because that's faster to | |
6164 | + /* handle small bases via the GDT because that's faster to | |
6165 | switch. */ | |
6166 | - if (addr <= 0xffffffff) { | |
6167 | + if (addr <= 0xffffffff) { | |
6168 | set_32bit_tls(task, FS_TLS, addr); | |
6169 | - if (doit) { | |
6170 | - load_TLS(&task->thread, cpu); | |
6171 | + if (doit) { | |
6172 | + load_TLS(&task->thread, cpu); | |
6173 | asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL)); | |
6174 | } | |
6175 | task->thread.fsindex = FS_TLS_SEL; | |
6176 | task->thread.fs = 0; | |
6177 | - } else { | |
6178 | + } else { | |
6179 | task->thread.fsindex = 0; | |
6180 | task->thread.fs = addr; | |
6181 | if (doit) { | |
6182 | @@ -852,24 +834,24 @@ long do_arch_prctl(struct task_struct *t | |
6183 | } | |
6184 | put_cpu(); | |
6185 | break; | |
6186 | - case ARCH_GET_FS: { | |
6187 | - unsigned long base; | |
6188 | + case ARCH_GET_FS: { | |
6189 | + unsigned long base; | |
6190 | if (task->thread.fsindex == FS_TLS_SEL) | |
6191 | base = read_32bit_tls(task, FS_TLS); | |
6192 | else if (doit) | |
6193 | rdmsrl(MSR_FS_BASE, base); | |
6194 | else | |
6195 | base = task->thread.fs; | |
6196 | - ret = put_user(base, (unsigned long __user *)addr); | |
6197 | - break; | |
6198 | + ret = put_user(base, (unsigned long __user *)addr); | |
6199 | + break; | |
6200 | } | |
6201 | - case ARCH_GET_GS: { | |
6202 | + case ARCH_GET_GS: { | |
6203 | unsigned long base; | |
6204 | unsigned gsindex; | |
6205 | if (task->thread.gsindex == GS_TLS_SEL) | |
6206 | base = read_32bit_tls(task, GS_TLS); | |
6207 | else if (doit) { | |
6208 | - asm("movl %%gs,%0" : "=r" (gsindex)); | |
6209 | + asm("movl %%gs,%0" : "=r" (gsindex)); | |
6210 | if (gsindex) | |
6211 | rdmsrl(MSR_KERNEL_GS_BASE, base); | |
6212 | else | |
6213 | @@ -877,40 +859,21 @@ long do_arch_prctl(struct task_struct *t | |
6214 | } | |
6215 | else | |
6216 | base = task->thread.gs; | |
6217 | - ret = put_user(base, (unsigned long __user *)addr); | |
6218 | + ret = put_user(base, (unsigned long __user *)addr); | |
6219 | break; | |
6220 | } | |
6221 | ||
6222 | default: | |
6223 | ret = -EINVAL; | |
6224 | break; | |
6225 | - } | |
6226 | + } | |
6227 | ||
6228 | - return ret; | |
6229 | -} | |
6230 | + return ret; | |
6231 | +} | |
6232 | ||
6233 | long sys_arch_prctl(int code, unsigned long addr) | |
6234 | { | |
6235 | return do_arch_prctl(current, code, addr); | |
6236 | -} | |
cc90b958 | 6237 | - |
00e5a55c BS |
6238 | -/* |
6239 | - * Capture the user space registers if the task is not running (in user space) | |
6240 | - */ | |
6241 | -int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) | |
cc90b958 | 6242 | -{ |
00e5a55c | 6243 | - struct pt_regs *pp, ptregs; |
cc90b958 | 6244 | - |
00e5a55c | 6245 | - pp = task_pt_regs(tsk); |
cc90b958 | 6246 | - |
00e5a55c BS |
6247 | - ptregs = *pp; |
6248 | - ptregs.cs &= 0xffff; | |
6249 | - ptregs.ss &= 0xffff; | |
cc90b958 | 6250 | - |
00e5a55c BS |
6251 | - elf_core_copy_regs(regs, &ptregs); |
6252 | - | |
6253 | - boot_option_idle_override = 1; | |
6254 | - return 1; | |
6255 | } | |
6256 | ||
6257 | unsigned long arch_align_stack(unsigned long sp) | |
6258 | @@ -919,3 +882,9 @@ unsigned long arch_align_stack(unsigned | |
6259 | sp -= get_random_int() % 8192; | |
6260 | return sp & ~0xf; | |
6261 | } | |
6262 | + | |
6263 | +unsigned long arch_randomize_brk(struct mm_struct *mm) | |
6264 | +{ | |
6265 | + unsigned long range_end = mm->brk + 0x02000000; | |
6266 | + return randomize_range(mm->brk, range_end, 0) ? : mm->brk; | |
6267 | +} | |
6268 | --- sle11-2009-05-14.orig/arch/x86/kernel/quirks-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
6269 | +++ sle11-2009-05-14/arch/x86/kernel/quirks-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
6270 | @@ -9,7 +9,7 @@ | |
6271 | static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) | |
6272 | { | |
6273 | u8 config, rev; | |
6274 | - u32 word; | |
6275 | + u16 word; | |
6276 | ||
6277 | /* BIOS may enable hardware IRQ balancing for | |
6278 | * E7520/E7320/E7525(revision ID 0x9 and below) | |
6279 | @@ -24,14 +24,17 @@ static void __devinit quirk_intel_irqbal | |
6280 | pci_read_config_byte(dev, 0xf4, &config); | |
6281 | pci_write_config_byte(dev, 0xf4, config|0x2); | |
6282 | ||
6283 | - /* read xTPR register */ | |
6284 | - raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); | |
6285 | + /* | |
6286 | + * read xTPR register. We may not have a pci_dev for device 8 | |
6287 | + * because it might be hidden until the above write. | |
6288 | + */ | |
6289 | + pci_bus_read_config_word(dev->bus, PCI_DEVFN(8, 0), 0x4c, &word); | |
6290 | ||
6291 | if (!(word & (1 << 13))) { | |
6292 | struct xen_platform_op op; | |
6293 | ||
6294 | - printk(KERN_INFO "Intel E7520/7320/7525 detected. " | |
6295 | - "Disabling irq balancing and affinity\n"); | |
6296 | + dev_info(&dev->dev, "Intel E7520/7320/7525 detected; " | |
6297 | + "disabling irq balancing and affinity\n"); | |
6298 | op.cmd = XENPF_platform_quirk; | |
6299 | op.u.platform_quirk.quirk_id = QUIRK_NOIRQBALANCING; | |
6300 | WARN_ON(HYPERVISOR_platform_op(&op)); | |
6301 | @@ -102,14 +105,16 @@ static void ich_force_enable_hpet(struct | |
6302 | pci_read_config_dword(dev, 0xF0, &rcba); | |
6303 | rcba &= 0xFFFFC000; | |
6304 | if (rcba == 0) { | |
6305 | - printk(KERN_DEBUG "RCBA disabled. Cannot force enable HPET\n"); | |
6306 | + dev_printk(KERN_DEBUG, &dev->dev, "RCBA disabled; " | |
6307 | + "cannot force enable HPET\n"); | |
6308 | return; | |
6309 | } | |
6310 | ||
6311 | /* use bits 31:14, 16 kB aligned */ | |
6312 | rcba_base = ioremap_nocache(rcba, 0x4000); | |
6313 | if (rcba_base == NULL) { | |
6314 | - printk(KERN_DEBUG "ioremap failed. Cannot force enable HPET\n"); | |
6315 | + dev_printk(KERN_DEBUG, &dev->dev, "ioremap failed; " | |
6316 | + "cannot force enable HPET\n"); | |
6317 | return; | |
6318 | } | |
6319 | ||
6320 | @@ -120,8 +125,8 @@ static void ich_force_enable_hpet(struct | |
6321 | /* HPET is enabled in HPTC. Just not reported by BIOS */ | |
6322 | val = val & 0x3; | |
6323 | force_hpet_address = 0xFED00000 | (val << 12); | |
6324 | - printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", | |
6325 | - force_hpet_address); | |
6326 | + dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at " | |
6327 | + "0x%lx\n", force_hpet_address); | |
6328 | iounmap(rcba_base); | |
6329 | return; | |
6330 | } | |
6331 | @@ -140,11 +145,12 @@ static void ich_force_enable_hpet(struct | |
6332 | if (err) { | |
6333 | force_hpet_address = 0; | |
6334 | iounmap(rcba_base); | |
6335 | - printk(KERN_DEBUG "Failed to force enable HPET\n"); | |
6336 | + dev_printk(KERN_DEBUG, &dev->dev, | |
6337 | + "Failed to force enable HPET\n"); | |
6338 | } else { | |
6339 | force_hpet_resume_type = ICH_FORCE_HPET_RESUME; | |
6340 | - printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", | |
6341 | - force_hpet_address); | |
6342 | + dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at " | |
6343 | + "0x%lx\n", force_hpet_address); | |
6344 | } | |
6345 | } | |
6346 | ||
6347 | @@ -160,6 +166,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_I | |
6348 | ich_force_enable_hpet); | |
6349 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, | |
6350 | ich_force_enable_hpet); | |
6351 | +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, | |
6352 | + ich_force_enable_hpet); | |
6353 | ||
6354 | ||
6355 | static struct pci_dev *cached_dev; | |
6356 | @@ -204,8 +212,8 @@ static void old_ich_force_enable_hpet(st | |
6357 | if (val & 0x4) { | |
6358 | val &= 0x3; | |
6359 | force_hpet_address = 0xFED00000 | (val << 12); | |
6360 | - printk(KERN_DEBUG "HPET at base address 0x%lx\n", | |
6361 | - force_hpet_address); | |
6362 | + dev_printk(KERN_DEBUG, &dev->dev, "HPET at 0x%lx\n", | |
6363 | + force_hpet_address); | |
6364 | return; | |
6365 | } | |
6366 | ||
6367 | @@ -225,14 +233,14 @@ static void old_ich_force_enable_hpet(st | |
6368 | /* HPET is enabled in HPTC. Just not reported by BIOS */ | |
6369 | val &= 0x3; | |
6370 | force_hpet_address = 0xFED00000 | (val << 12); | |
6371 | - printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", | |
6372 | - force_hpet_address); | |
6373 | + dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at " | |
6374 | + "0x%lx\n", force_hpet_address); | |
6375 | cached_dev = dev; | |
6376 | force_hpet_resume_type = OLD_ICH_FORCE_HPET_RESUME; | |
6377 | return; | |
6378 | } | |
6379 | ||
6380 | - printk(KERN_DEBUG "Failed to force enable HPET\n"); | |
6381 | + dev_printk(KERN_DEBUG, &dev->dev, "Failed to force enable HPET\n"); | |
6382 | } | |
6383 | ||
6384 | /* | |
6385 | @@ -290,8 +298,8 @@ static void vt8237_force_enable_hpet(str | |
6386 | */ | |
6387 | if (val & 0x80) { | |
6388 | force_hpet_address = (val & ~0x3ff); | |
6389 | - printk(KERN_DEBUG "HPET at base address 0x%lx\n", | |
6390 | - force_hpet_address); | |
6391 | + dev_printk(KERN_DEBUG, &dev->dev, "HPET at 0x%lx\n", | |
6392 | + force_hpet_address); | |
6393 | return; | |
6394 | } | |
6395 | ||
6396 | @@ -305,14 +313,14 @@ static void vt8237_force_enable_hpet(str | |
6397 | pci_read_config_dword(dev, 0x68, &val); | |
6398 | if (val & 0x80) { | |
6399 | force_hpet_address = (val & ~0x3ff); | |
6400 | - printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", | |
6401 | - force_hpet_address); | |
6402 | + dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at " | |
6403 | + "0x%lx\n", force_hpet_address); | |
6404 | cached_dev = dev; | |
6405 | force_hpet_resume_type = VT8237_FORCE_HPET_RESUME; | |
6406 | return; | |
6407 | } | |
6408 | ||
6409 | - printk(KERN_DEBUG "Failed to force enable HPET\n"); | |
6410 | + dev_printk(KERN_DEBUG, &dev->dev, "Failed to force enable HPET\n"); | |
6411 | } | |
6412 | ||
6413 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, | |
6414 | @@ -340,7 +348,7 @@ static void nvidia_force_enable_hpet(str | |
6415 | pci_read_config_dword(dev, 0x44, &val); | |
6416 | force_hpet_address = val & 0xfffffffe; | |
6417 | force_hpet_resume_type = NVIDIA_FORCE_HPET_RESUME; | |
6418 | - printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", | |
6419 | + dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n", | |
6420 | force_hpet_address); | |
6421 | cached_dev = dev; | |
6422 | return; | |
6423 | @@ -353,6 +361,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_N | |
6424 | nvidia_force_enable_hpet); | |
6425 | ||
6426 | /* LPC bridges */ | |
6427 | +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0260, | |
6428 | + nvidia_force_enable_hpet); | |
6429 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0360, | |
6430 | nvidia_force_enable_hpet); | |
6431 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0361, | |
6432 | @@ -373,19 +383,19 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_N | |
6433 | void force_hpet_resume(void) | |
6434 | { | |
6435 | switch (force_hpet_resume_type) { | |
6436 | - case ICH_FORCE_HPET_RESUME: | |
6437 | - return ich_force_hpet_resume(); | |
cc90b958 | 6438 | - |
00e5a55c BS |
6439 | - case OLD_ICH_FORCE_HPET_RESUME: |
6440 | - return old_ich_force_hpet_resume(); | |
cc90b958 | 6441 | - |
00e5a55c BS |
6442 | - case VT8237_FORCE_HPET_RESUME: |
6443 | - return vt8237_force_hpet_resume(); | |
cc90b958 | 6444 | - |
00e5a55c BS |
6445 | - case NVIDIA_FORCE_HPET_RESUME: |
6446 | - return nvidia_force_hpet_resume(); | |
cc90b958 | 6447 | - |
00e5a55c BS |
6448 | - default: |
6449 | + case ICH_FORCE_HPET_RESUME: | |
6450 | + ich_force_hpet_resume(); | |
6451 | + return; | |
6452 | + case OLD_ICH_FORCE_HPET_RESUME: | |
6453 | + old_ich_force_hpet_resume(); | |
6454 | + return; | |
6455 | + case VT8237_FORCE_HPET_RESUME: | |
6456 | + vt8237_force_hpet_resume(); | |
6457 | + return; | |
6458 | + case NVIDIA_FORCE_HPET_RESUME: | |
6459 | + nvidia_force_hpet_resume(); | |
6460 | + return; | |
6461 | + default: | |
6462 | break; | |
6463 | } | |
6464 | } | |
6465 | --- sle11-2009-05-14.orig/arch/x86/kernel/rtc.c 2009-05-14 10:56:29.000000000 +0200 | |
6466 | +++ sle11-2009-05-14/arch/x86/kernel/rtc.c 2009-03-16 16:33:40.000000000 +0100 | |
6467 | @@ -181,6 +181,10 @@ unsigned long read_persistent_clock(void | |
6468 | { | |
6469 | unsigned long retval, flags; | |
6470 | ||
6471 | +#ifdef CONFIG_XEN | |
6472 | + if (!is_initial_xendomain()) | |
6473 | + return xen_read_persistent_clock(); | |
6474 | +#endif | |
6475 | spin_lock_irqsave(&rtc_lock, flags); | |
6476 | retval = get_wallclock(); | |
6477 | spin_unlock_irqrestore(&rtc_lock, flags); | |
6478 | @@ -190,6 +194,10 @@ unsigned long read_persistent_clock(void | |
6479 | ||
6480 | int update_persistent_clock(struct timespec now) | |
6481 | { | |
6482 | +#ifdef CONFIG_XEN | |
6483 | + if (xen_update_persistent_clock() < 0 || xen_independent_wallclock()) | |
6484 | + return 0; | |
6485 | +#endif | |
6486 | return set_rtc_mmss(now.tv_sec); | |
6487 | } | |
6488 | ||
6489 | --- sle11-2009-05-14.orig/arch/x86/kernel/setup64-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
6490 | +++ sle11-2009-05-14/arch/x86/kernel/setup64-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
6491 | @@ -31,7 +31,11 @@ | |
6492 | #include <asm/hypervisor.h> | |
6493 | #endif | |
6494 | ||
6495 | +#ifndef CONFIG_DEBUG_BOOT_PARAMS | |
6496 | struct boot_params __initdata boot_params; | |
6497 | +#else | |
6498 | +struct boot_params boot_params; | |
6499 | +#endif | |
6500 | ||
6501 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; | |
6502 | ||
6503 | @@ -47,6 +51,7 @@ char boot_cpu_stack[IRQSTACKSIZE] __attr | |
6504 | ||
6505 | unsigned long __supported_pte_mask __read_mostly = ~0UL; | |
6506 | EXPORT_SYMBOL(__supported_pte_mask); | |
6507 | + | |
6508 | static int do_not_nx __cpuinitdata = 0; | |
6509 | ||
6510 | /* noexec=on|off | |
6511 | @@ -90,6 +95,45 @@ static int __init nonx32_setup(char *str | |
6512 | __setup("noexec32=", nonx32_setup); | |
6513 | ||
6514 | /* | |
6515 | + * Copy data used in early init routines from the initial arrays to the | |
6516 | + * per cpu data areas. These arrays then become expendable and the | |
6517 | + * *_early_ptr's are zeroed indicating that the static arrays are gone. | |
6518 | + */ | |
6519 | +static void __init setup_per_cpu_maps(void) | |
6520 | +{ | |
6521 | +#ifndef CONFIG_XEN | |
6522 | + int cpu; | |
6523 | + | |
6524 | + for_each_possible_cpu(cpu) { | |
6525 | +#ifdef CONFIG_SMP | |
6526 | + if (per_cpu_offset(cpu)) { | |
6527 | +#endif | |
6528 | + per_cpu(x86_cpu_to_apicid, cpu) = | |
6529 | + x86_cpu_to_apicid_init[cpu]; | |
6530 | + per_cpu(x86_bios_cpu_apicid, cpu) = | |
6531 | + x86_bios_cpu_apicid_init[cpu]; | |
6532 | +#ifdef CONFIG_NUMA | |
6533 | + per_cpu(x86_cpu_to_node_map, cpu) = | |
6534 | + x86_cpu_to_node_map_init[cpu]; | |
6535 | +#endif | |
6536 | +#ifdef CONFIG_SMP | |
6537 | + } | |
6538 | + else | |
6539 | + printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n", | |
6540 | + cpu); | |
6541 | +#endif | |
6542 | + } | |
6543 | + | |
6544 | + /* indicate the early static arrays will soon be gone */ | |
6545 | + x86_cpu_to_apicid_early_ptr = NULL; | |
6546 | + x86_bios_cpu_apicid_early_ptr = NULL; | |
6547 | +#ifdef CONFIG_NUMA | |
6548 | + x86_cpu_to_node_map_early_ptr = NULL; | |
6549 | +#endif | |
6550 | +#endif | |
6551 | +} | |
6552 | + | |
6553 | +/* | |
6554 | * Great future plan: | |
6555 | * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. | |
6556 | * Always point %gs to its beginning | |
6557 | @@ -109,19 +153,24 @@ void __init setup_per_cpu_areas(void) | |
6558 | printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size); | |
6559 | for_each_cpu_mask (i, cpu_possible_map) { | |
6560 | char *ptr; | |
6561 | +#ifndef CONFIG_NEED_MULTIPLE_NODES | |
6562 | + ptr = alloc_bootmem_pages(size); | |
6563 | +#else | |
6564 | + int node = early_cpu_to_node(i); | |
6565 | ||
6566 | - if (!NODE_DATA(cpu_to_node(i))) { | |
6567 | - printk("cpu with no node %d, num_online_nodes %d\n", | |
6568 | - i, num_online_nodes()); | |
6569 | + if (!node_online(node) || !NODE_DATA(node)) | |
6570 | ptr = alloc_bootmem_pages(size); | |
6571 | - } else { | |
6572 | - ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size); | |
6573 | - } | |
6574 | + else | |
6575 | + ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); | |
6576 | +#endif | |
6577 | if (!ptr) | |
6578 | panic("Cannot allocate cpu data for CPU %d\n", i); | |
6579 | cpu_pda(i)->data_offset = ptr - __per_cpu_start; | |
6580 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); | |
6581 | } | |
6582 | + | |
6583 | + /* setup percpu data maps early */ | |
6584 | + setup_per_cpu_maps(); | |
6585 | } | |
6586 | ||
6587 | #ifdef CONFIG_XEN | |
6588 | @@ -224,7 +273,8 @@ void syscall_init(void) | |
6589 | wrmsrl(MSR_CSTAR, ignore_sysret); | |
6590 | ||
6591 | /* Flags to clear on syscall */ | |
6592 | - wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); | |
6593 | + wrmsrl(MSR_SYSCALL_MASK, | |
6594 | + X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); | |
6595 | #endif | |
6596 | #ifdef CONFIG_IA32_EMULATION | |
6597 | syscall32_cpu_init (); | |
6598 | @@ -303,7 +353,7 @@ void __cpuinit cpu_init (void) | |
6599 | */ | |
6600 | #ifndef CONFIG_XEN | |
6601 | if (cpu) | |
6602 | - memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE); | |
6603 | + memcpy(get_cpu_gdt_table(cpu), cpu_gdt_table, GDT_SIZE); | |
6604 | #endif | |
6605 | ||
6606 | cpu_gdt_descr[cpu].size = GDT_SIZE; | |
6607 | @@ -334,10 +384,10 @@ void __cpuinit cpu_init (void) | |
6608 | v, cpu); | |
6609 | } | |
6610 | estacks += PAGE_SIZE << order[v]; | |
6611 | - orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks; | |
6612 | + orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; | |
6613 | } | |
6614 | ||
6615 | - t->io_bitmap_base = offsetof(struct tss_struct, io_bitmap); | |
6616 | + t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); | |
6617 | /* | |
6618 | * <= is required because the CPU will access up to | |
6619 | * 8 bits beyond the end of the IO permission bitmap. | |
6620 | --- sle11-2009-05-14.orig/arch/x86/kernel/setup_32-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
6621 | +++ sle11-2009-05-14/arch/x86/kernel/setup_32-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
6622 | @@ -47,9 +47,12 @@ | |
6623 | #include <linux/crash_dump.h> | |
6624 | #include <linux/dmi.h> | |
6625 | #include <linux/pfn.h> | |
6626 | +#include <linux/pci.h> | |
6627 | +#include <linux/init_ohci1394_dma.h> | |
6628 | ||
6629 | #include <video/edid.h> | |
6630 | ||
6631 | +#include <asm/mtrr.h> | |
6632 | #include <asm/apic.h> | |
6633 | #include <asm/e820.h> | |
6634 | #include <asm/mpspec.h> | |
6635 | @@ -79,14 +82,83 @@ static struct notifier_block xen_panic_b | |
6636 | xen_panic_event, NULL, 0 /* try to go last */ | |
6637 | }; | |
6638 | ||
6639 | -int disable_pse __cpuinitdata = 0; | |
cc90b958 | 6640 | - |
00e5a55c BS |
6641 | /* |
6642 | * Machine setup.. | |
6643 | */ | |
6644 | -extern struct resource code_resource; | |
6645 | -extern struct resource data_resource; | |
6646 | -extern struct resource bss_resource; | |
6647 | +static struct resource data_resource = { | |
6648 | + .name = "Kernel data", | |
6649 | + .start = 0, | |
6650 | + .end = 0, | |
6651 | + .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
6652 | +}; | |
6653 | + | |
6654 | +static struct resource code_resource = { | |
6655 | + .name = "Kernel code", | |
6656 | + .start = 0, | |
6657 | + .end = 0, | |
6658 | + .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
6659 | +}; | |
6660 | + | |
6661 | +static struct resource bss_resource = { | |
6662 | + .name = "Kernel bss", | |
6663 | + .start = 0, | |
6664 | + .end = 0, | |
6665 | + .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
6666 | +}; | |
6667 | + | |
6668 | +static struct resource video_ram_resource = { | |
6669 | + .name = "Video RAM area", | |
6670 | + .start = 0xa0000, | |
6671 | + .end = 0xbffff, | |
6672 | + .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
6673 | +}; | |
6674 | + | |
6675 | +static struct resource standard_io_resources[] = { { | |
6676 | + .name = "dma1", | |
6677 | + .start = 0x0000, | |
6678 | + .end = 0x001f, | |
6679 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
6680 | +}, { | |
6681 | + .name = "pic1", | |
6682 | + .start = 0x0020, | |
6683 | + .end = 0x0021, | |
6684 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
6685 | +}, { | |
6686 | + .name = "timer0", | |
6687 | + .start = 0x0040, | |
6688 | + .end = 0x0043, | |
6689 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
6690 | +}, { | |
6691 | + .name = "timer1", | |
6692 | + .start = 0x0050, | |
6693 | + .end = 0x0053, | |
6694 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
6695 | +}, { | |
6696 | + .name = "keyboard", | |
6697 | + .start = 0x0060, | |
6698 | + .end = 0x006f, | |
6699 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
6700 | +}, { | |
6701 | + .name = "dma page reg", | |
6702 | + .start = 0x0080, | |
6703 | + .end = 0x008f, | |
6704 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
6705 | +}, { | |
6706 | + .name = "pic2", | |
6707 | + .start = 0x00a0, | |
6708 | + .end = 0x00a1, | |
6709 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
6710 | +}, { | |
6711 | + .name = "dma2", | |
6712 | + .start = 0x00c0, | |
6713 | + .end = 0x00df, | |
6714 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
6715 | +}, { | |
6716 | + .name = "fpu", | |
6717 | + .start = 0x00f0, | |
6718 | + .end = 0x00ff, | |
6719 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
6720 | +} }; | |
6721 | ||
6722 | /* cpu data as detected by the assembly code in head.S */ | |
6723 | struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; | |
6724 | @@ -94,13 +166,16 @@ struct cpuinfo_x86 new_cpu_data __cpuini | |
6725 | struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; | |
6726 | EXPORT_SYMBOL(boot_cpu_data); | |
6727 | ||
6728 | +#ifndef CONFIG_X86_PAE | |
6729 | unsigned long mmu_cr4_features; | |
6730 | +#else | |
6731 | +unsigned long mmu_cr4_features = X86_CR4_PAE; | |
6732 | +#endif | |
6733 | ||
6734 | /* for MCA, but anyone else can use it if they want */ | |
6735 | unsigned int machine_id; | |
6736 | unsigned int machine_submodel_id; | |
6737 | unsigned int BIOS_revision; | |
6738 | -unsigned int mca_pentium_flag; | |
6739 | ||
6740 | /* Boot loader ID as an integer, for the benefit of proc_dointvec */ | |
6741 | int bootloader_type; | |
6742 | @@ -131,13 +206,17 @@ extern int root_mountflags; | |
6743 | ||
6744 | unsigned long saved_videomode; | |
6745 | ||
6746 | -#define RAMDISK_IMAGE_START_MASK 0x07FF | |
6747 | +#define RAMDISK_IMAGE_START_MASK 0x07FF | |
6748 | #define RAMDISK_PROMPT_FLAG 0x8000 | |
6749 | -#define RAMDISK_LOAD_FLAG 0x4000 | |
6750 | +#define RAMDISK_LOAD_FLAG 0x4000 | |
6751 | ||
6752 | static char __initdata command_line[COMMAND_LINE_SIZE]; | |
6753 | ||
6754 | +#ifndef CONFIG_DEBUG_BOOT_PARAMS | |
6755 | struct boot_params __initdata boot_params; | |
6756 | +#else | |
6757 | +struct boot_params boot_params; | |
6758 | +#endif | |
6759 | ||
6760 | /* | |
6761 | * Point at the empty zero page to start with. We map the real shared_info | |
6762 | @@ -198,8 +277,7 @@ static int __init parse_mem(char *arg) | |
6763 | return -EINVAL; | |
6764 | ||
6765 | if (strcmp(arg, "nopentium") == 0) { | |
6766 | - clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); | |
6767 | - disable_pse = 1; | |
6768 | + setup_clear_cpu_cap(X86_FEATURE_PSE); | |
6769 | } else { | |
6770 | /* If the user specifies memory size, we | |
6771 | * limit the BIOS-provided memory map to | |
6772 | @@ -208,7 +286,7 @@ static int __init parse_mem(char *arg) | |
6773 | * trim the existing memory map. | |
6774 | */ | |
6775 | unsigned long long mem_size; | |
6776 | - | |
6777 | + | |
6778 | mem_size = memparse(arg, &arg); | |
6779 | limit_regions(mem_size); | |
6780 | user_defined_memmap = 1; | |
6781 | @@ -350,7 +428,7 @@ static void __init reserve_ebda_region(v | |
6782 | unsigned int addr; | |
6783 | addr = get_bios_ebda(); | |
6784 | if (addr) | |
6785 | - reserve_bootmem(addr, PAGE_SIZE); | |
6786 | + reserve_bootmem(addr, PAGE_SIZE, BOOTMEM_DEFAULT); | |
6787 | } | |
6788 | #endif | |
6789 | ||
6790 | @@ -365,8 +443,6 @@ static unsigned long __init setup_memory | |
6791 | min_low_pfn = PFN_UP(__pa(xen_start_info->pt_base)) + | |
6792 | xen_start_info->nr_pt_frames; | |
6793 | ||
6794 | - find_max_pfn(); | |
cc90b958 | 6795 | - |
00e5a55c BS |
6796 | max_low_pfn = find_max_low_pfn(); |
6797 | ||
6798 | #ifdef CONFIG_HIGHMEM | |
6799 | @@ -447,7 +523,8 @@ static void __init reserve_crashkernel(v | |
6800 | (unsigned long)(total_mem >> 20)); | |
6801 | crashk_res.start = crash_base; | |
6802 | crashk_res.end = crash_base + crash_size - 1; | |
6803 | - reserve_bootmem(crash_base, crash_size); | |
6804 | + reserve_bootmem(crash_base, crash_size, | |
6805 | + BOOTMEM_DEFAULT); | |
6806 | } else | |
6807 | printk(KERN_INFO "crashkernel reservation failed - " | |
6808 | "you have to specify a base address\n"); | |
6809 | @@ -461,6 +538,99 @@ static inline void __init reserve_crashk | |
6810 | {} | |
6811 | #endif | |
6812 | ||
6813 | +#ifdef CONFIG_BLK_DEV_INITRD | |
6814 | + | |
6815 | +static bool do_relocate_initrd = false; | |
6816 | + | |
6817 | +static void __init reserve_initrd(void) | |
6818 | +{ | |
6819 | + unsigned long ramdisk_image = __pa(xen_start_info->mod_start); | |
6820 | + unsigned long ramdisk_size = xen_start_info->mod_len; | |
6821 | + unsigned long ramdisk_end = ramdisk_image + ramdisk_size; | |
6822 | + unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; | |
6823 | + unsigned long ramdisk_here; | |
6824 | + | |
6825 | + initrd_start = 0; | |
6826 | + | |
6827 | + if (!xen_start_info->mod_start || !ramdisk_size) | |
6828 | + return; /* No initrd provided by bootloader */ | |
6829 | + | |
6830 | + if (ramdisk_end < ramdisk_image) { | |
6831 | + printk(KERN_ERR "initrd wraps around end of memory, " | |
6832 | + "disabling initrd\n"); | |
6833 | + return; | |
6834 | + } | |
6835 | + if (ramdisk_size >= end_of_lowmem/2) { | |
6836 | + printk(KERN_ERR "initrd too large to handle, " | |
6837 | + "disabling initrd\n"); | |
6838 | + return; | |
6839 | + } | |
6840 | + if (ramdisk_end <= end_of_lowmem) { | |
6841 | + /* All in lowmem, easy case */ | |
6842 | + reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT); | |
6843 | + initrd_start = ramdisk_image + PAGE_OFFSET; | |
6844 | + initrd_end = initrd_start+ramdisk_size; | |
6845 | + return; | |
6846 | + } | |
6847 | + | |
6848 | + /* We need to move the initrd down into lowmem */ | |
6849 | + ramdisk_here = (end_of_lowmem - ramdisk_size) & PAGE_MASK; | |
6850 | + | |
6851 | + /* Note: this includes all the lowmem currently occupied by | |
6852 | + the initrd, we rely on that fact to keep the data intact. */ | |
6853 | + reserve_bootmem(ramdisk_here, ramdisk_size, BOOTMEM_DEFAULT); | |
6854 | + initrd_start = ramdisk_here + PAGE_OFFSET; | |
6855 | + initrd_end = initrd_start + ramdisk_size; | |
6856 | + | |
6857 | + do_relocate_initrd = true; | |
6858 | +} | |
6859 | + | |
6860 | +#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) | |
6861 | + | |
6862 | +static void __init relocate_initrd(void) | |
6863 | +{ | |
6864 | + unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | |
6865 | + unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | |
6866 | + unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; | |
6867 | + unsigned long ramdisk_here; | |
6868 | + unsigned long slop, clen, mapaddr; | |
6869 | + char *p, *q; | |
6870 | + | |
6871 | + if (!do_relocate_initrd) | |
6872 | + return; | |
6873 | + | |
6874 | + ramdisk_here = initrd_start - PAGE_OFFSET; | |
6875 | + | |
6876 | + q = (char *)initrd_start; | |
6877 | + | |
6878 | + /* Copy any lowmem portion of the initrd */ | |
6879 | + if (ramdisk_image < end_of_lowmem) { | |
6880 | + clen = end_of_lowmem - ramdisk_image; | |
6881 | + p = (char *)__va(ramdisk_image); | |
6882 | + memcpy(q, p, clen); | |
6883 | + q += clen; | |
6884 | + ramdisk_image += clen; | |
6885 | + ramdisk_size -= clen; | |
6886 | + } | |
6887 | + | |
6888 | + /* Copy the highmem portion of the initrd */ | |
6889 | + while (ramdisk_size) { | |
6890 | + slop = ramdisk_image & ~PAGE_MASK; | |
6891 | + clen = ramdisk_size; | |
6892 | + if (clen > MAX_MAP_CHUNK-slop) | |
6893 | + clen = MAX_MAP_CHUNK-slop; | |
6894 | + mapaddr = ramdisk_image & PAGE_MASK; | |
6895 | + p = early_ioremap(mapaddr, clen+slop); | |
6896 | + memcpy(q, p+slop, clen); | |
6897 | + early_iounmap(p, clen+slop); | |
6898 | + q += clen; | |
6899 | + ramdisk_image += clen; | |
6900 | + ramdisk_size -= clen; | |
6901 | + } | |
6902 | +} | |
6903 | + | |
6904 | +#endif /* CONFIG_BLK_DEV_INITRD */ | |
6905 | + | |
6906 | void __init setup_bootmem_allocator(void) | |
6907 | { | |
6908 | unsigned long bootmap_size; | |
6909 | @@ -478,14 +648,15 @@ void __init setup_bootmem_allocator(void | |
6910 | * bootmem allocator with an invalid RAM area. | |
6911 | */ | |
6912 | reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) + | |
6913 | - bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text)); | |
6914 | + bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text), | |
6915 | + BOOTMEM_DEFAULT); | |
6916 | ||
6917 | #ifndef CONFIG_XEN | |
6918 | /* | |
6919 | * reserve physical page 0 - it's a special BIOS page on many boxes, | |
6920 | * enabling clean reboots, SMP operation, laptop functions. | |
6921 | */ | |
6922 | - reserve_bootmem(0, PAGE_SIZE); | |
6923 | + reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT); | |
6924 | ||
6925 | /* reserve EBDA region, it's a 4K region */ | |
6926 | reserve_ebda_region(); | |
6927 | @@ -495,7 +666,7 @@ void __init setup_bootmem_allocator(void | |
6928 | unless you have no PS/2 mouse plugged in. */ | |
6929 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | |
6930 | boot_cpu_data.x86 == 6) | |
6931 | - reserve_bootmem(0xa0000 - 4096, 4096); | |
6932 | + reserve_bootmem(0xa0000 - 4096, 4096, BOOTMEM_DEFAULT); | |
6933 | ||
6934 | #ifdef CONFIG_SMP | |
6935 | /* | |
6936 | @@ -503,7 +674,7 @@ void __init setup_bootmem_allocator(void | |
6937 | * FIXME: Don't need the extra page at 4K, but need to fix | |
6938 | * trampoline before removing it. (see the GDT stuff) | |
6939 | */ | |
6940 | - reserve_bootmem(PAGE_SIZE, PAGE_SIZE); | |
6941 | + reserve_bootmem(PAGE_SIZE, PAGE_SIZE, BOOTMEM_DEFAULT); | |
6942 | #endif | |
6943 | #ifdef CONFIG_ACPI_SLEEP | |
6944 | /* | |
6945 | @@ -511,29 +682,12 @@ void __init setup_bootmem_allocator(void | |
6946 | */ | |
6947 | acpi_reserve_bootmem(); | |
6948 | #endif | |
6949 | - numa_kva_reserve(); | |
6950 | #endif /* !CONFIG_XEN */ | |
6951 | ||
6952 | #ifdef CONFIG_BLK_DEV_INITRD | |
6953 | - if (xen_start_info->mod_start) { | |
6954 | - unsigned long ramdisk_image = __pa(xen_start_info->mod_start); | |
6955 | - unsigned long ramdisk_size = xen_start_info->mod_len; | |
6956 | - unsigned long ramdisk_end = ramdisk_image + ramdisk_size; | |
6957 | - unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; | |
cc90b958 | 6958 | - |
00e5a55c BS |
6959 | - if (ramdisk_end <= end_of_lowmem) { |
6960 | - /*reserve_bootmem(ramdisk_image, ramdisk_size);*/ | |
6961 | - initrd_start = ramdisk_image + PAGE_OFFSET; | |
6962 | - initrd_end = initrd_start+ramdisk_size; | |
6963 | - initrd_below_start_ok = 1; | |
6964 | - } else { | |
6965 | - printk(KERN_ERR "initrd extends beyond end of memory " | |
6966 | - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", | |
6967 | - ramdisk_end, end_of_lowmem); | |
6968 | - initrd_start = 0; | |
6969 | - } | |
cc90b958 | 6970 | - } |
00e5a55c BS |
6971 | + reserve_initrd(); |
6972 | #endif | |
6973 | + numa_kva_reserve(); | |
6974 | reserve_crashkernel(); | |
6975 | } | |
6976 | ||
6977 | @@ -600,20 +754,14 @@ void __init setup_arch(char **cmdline_p) | |
6978 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | |
6979 | pre_setup_arch_hook(); | |
6980 | early_cpu_init(); | |
6981 | + early_ioremap_init(); | |
6982 | #ifdef CONFIG_SMP | |
6983 | prefill_possible_map(); | |
6984 | #endif | |
6985 | ||
cc90b958 | 6986 | - /* |
00e5a55c BS |
6987 | - * FIXME: This isn't an official loader_type right |
6988 | - * now but does currently work with elilo. | |
6989 | - * If we were configured as an EFI kernel, check to make | |
6990 | - * sure that we were loaded correctly from elilo and that | |
6991 | - * the system table is valid. If not, then initialize normally. | |
cc90b958 | 6992 | - */ |
00e5a55c BS |
6993 | #ifdef CONFIG_EFI |
6994 | - if ((boot_params.hdr.type_of_loader == 0x50) && | |
6995 | - boot_params.efi_info.efi_systab) | |
6996 | + if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, | |
6997 | + "EL32", 4)) | |
6998 | efi_enabled = 1; | |
6999 | #endif | |
7000 | ||
7001 | @@ -653,12 +801,9 @@ void __init setup_arch(char **cmdline_p) | |
7002 | #endif | |
7003 | ||
7004 | ARCH_SETUP | |
7005 | - if (efi_enabled) | |
7006 | - efi_init(); | |
7007 | - else { | |
7008 | - printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | |
7009 | - print_memory_map(memory_setup()); | |
7010 | - } | |
7011 | + | |
7012 | + printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | |
7013 | + print_memory_map(memory_setup()); | |
7014 | ||
7015 | copy_edd(); | |
7016 | ||
7017 | @@ -691,6 +836,17 @@ void __init setup_arch(char **cmdline_p) | |
7018 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | |
7019 | *cmdline_p = command_line; | |
7020 | ||
7021 | + if (efi_enabled) | |
7022 | + efi_init(); | |
7023 | + | |
7024 | + /* update e820 for memory not covered by WB MTRRs */ | |
7025 | + find_max_pfn(); | |
7026 | + mtrr_bp_init(); | |
7027 | +#ifndef CONFIG_XEN | |
7028 | + if (mtrr_trim_uncached_memory(max_pfn)) | |
7029 | + find_max_pfn(); | |
7030 | +#endif | |
7031 | + | |
7032 | max_low_pfn = setup_memory(); | |
7033 | ||
7034 | #ifdef CONFIG_VMI | |
7035 | @@ -715,6 +871,16 @@ void __init setup_arch(char **cmdline_p) | |
7036 | smp_alloc_memory(); /* AP processor realmode stacks in low memory*/ | |
7037 | #endif | |
7038 | paging_init(); | |
7039 | + | |
7040 | + /* | |
7041 | + * NOTE: On x86-32, only from this point on, fixmaps are ready for use. | |
7042 | + */ | |
7043 | + | |
7044 | +#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT | |
7045 | + if (init_ohci1394_dma_early) | |
7046 | + init_ohci1394_dma_on_all_controllers(); | |
7047 | +#endif | |
7048 | + | |
7049 | remapped_pgdat_init(); | |
7050 | sparse_init(); | |
7051 | zone_sizes_init(); | |
7052 | @@ -800,16 +966,20 @@ void __init setup_arch(char **cmdline_p) | |
7053 | * NOTE: at this point the bootmem allocator is fully available. | |
7054 | */ | |
7055 | ||
7056 | +#ifdef CONFIG_BLK_DEV_INITRD | |
7057 | + relocate_initrd(); | |
7058 | +#endif | |
7059 | + | |
7060 | paravirt_post_allocator_init(); | |
7061 | ||
7062 | if (is_initial_xendomain()) | |
7063 | dmi_scan_machine(); | |
7064 | ||
7065 | + io_delay_init(); | |
7066 | + | |
7067 | #ifdef CONFIG_X86_GENERICARCH | |
7068 | generic_apic_probe(); | |
7069 | -#endif | |
7070 | - if (efi_enabled) | |
7071 | - efi_map_memmap(); | |
7072 | +#endif | |
7073 | ||
7074 | set_iopl.iopl = 1; | |
7075 | WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl)); | |
7076 | @@ -827,7 +997,7 @@ void __init setup_arch(char **cmdline_p) | |
7077 | acpi_boot_table_init(); | |
7078 | #endif | |
7079 | ||
7080 | -#if defined(CONFIG_PCI) && !defined(CONFIG_XEN) | |
7081 | +#ifndef CONFIG_XEN | |
7082 | early_quirks(); | |
7083 | #endif | |
7084 | ||
7085 | @@ -873,3 +1043,30 @@ xen_panic_event(struct notifier_block *t | |
7086 | /* we're never actually going to get here... */ | |
7087 | return NOTIFY_DONE; | |
7088 | } | |
7089 | + | |
7090 | +/* | |
7091 | + * Request address space for all standard resources | |
7092 | + * | |
7093 | + * This is called just before pcibios_init(), which is also a | |
7094 | + * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). | |
7095 | + */ | |
7096 | +static int __init request_standard_resources(void) | |
7097 | +{ | |
7098 | + int i; | |
7099 | + | |
7100 | + /* Nothing to do if not running in dom0. */ | |
7101 | + if (!is_initial_xendomain()) | |
7102 | + return 0; | |
7103 | + | |
7104 | + printk(KERN_INFO "Setting up standard PCI resources\n"); | |
7105 | + init_iomem_resources(&code_resource, &data_resource, &bss_resource); | |
7106 | + | |
7107 | + request_resource(&iomem_resource, &video_ram_resource); | |
7108 | + | |
7109 | + /* request I/O space for devices used on all i[345]86 PCs */ | |
7110 | + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) | |
7111 | + request_resource(&ioport_resource, &standard_io_resources[i]); | |
7112 | + return 0; | |
7113 | +} | |
7114 | + | |
7115 | +subsys_initcall(request_standard_resources); | |
7116 | --- sle11-2009-05-14.orig/arch/x86/kernel/setup_64-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
7117 | +++ sle11-2009-05-14/arch/x86/kernel/setup_64-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
7118 | @@ -15,7 +15,6 @@ | |
7119 | #include <linux/ptrace.h> | |
7120 | #include <linux/slab.h> | |
7121 | #include <linux/user.h> | |
7122 | -#include <linux/a.out.h> | |
7123 | #include <linux/screen_info.h> | |
7124 | #include <linux/ioport.h> | |
7125 | #include <linux/delay.h> | |
7126 | @@ -30,6 +29,7 @@ | |
7127 | #include <linux/crash_dump.h> | |
7128 | #include <linux/root_dev.h> | |
7129 | #include <linux/pci.h> | |
7130 | +#include <linux/efi.h> | |
7131 | #include <linux/acpi.h> | |
7132 | #include <linux/kallsyms.h> | |
7133 | #include <linux/edd.h> | |
7134 | @@ -39,10 +39,13 @@ | |
7135 | #include <linux/dmi.h> | |
7136 | #include <linux/dma-mapping.h> | |
7137 | #include <linux/ctype.h> | |
7138 | +#include <linux/uaccess.h> | |
7139 | +#include <linux/init_ohci1394_dma.h> | |
7140 | ||
7141 | #include <asm/mtrr.h> | |
7142 | #include <asm/uaccess.h> | |
7143 | #include <asm/system.h> | |
7144 | +#include <asm/vsyscall.h> | |
7145 | #include <asm/io.h> | |
7146 | #include <asm/smp.h> | |
7147 | #include <asm/msr.h> | |
7148 | @@ -50,6 +53,7 @@ | |
7149 | #include <video/edid.h> | |
7150 | #include <asm/e820.h> | |
7151 | #include <asm/dma.h> | |
7152 | +#include <asm/gart.h> | |
7153 | #include <asm/mpspec.h> | |
7154 | #include <asm/mmu_context.h> | |
7155 | #include <asm/proto.h> | |
7156 | @@ -59,6 +63,9 @@ | |
7157 | #include <asm/sections.h> | |
7158 | #include <asm/dmi.h> | |
7159 | #include <asm/cacheflush.h> | |
7160 | +#include <asm/mce.h> | |
7161 | +#include <asm/ds.h> | |
7162 | +#include <asm/topology.h> | |
7163 | #ifdef CONFIG_XEN | |
7164 | #include <linux/percpu.h> | |
7165 | #include <xen/interface/physdev.h> | |
7166 | @@ -108,6 +115,8 @@ EXPORT_SYMBOL(xen_start_info); | |
7167 | struct cpuinfo_x86 boot_cpu_data __read_mostly; | |
7168 | EXPORT_SYMBOL(boot_cpu_data); | |
7169 | ||
7170 | +__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; | |
7171 | + | |
7172 | unsigned long mmu_cr4_features; | |
7173 | ||
7174 | /* Boot loader ID as an integer, for the benefit of proc_dointvec */ | |
7175 | @@ -117,7 +126,7 @@ unsigned long saved_video_mode; | |
7176 | ||
7177 | int force_mwait __cpuinitdata; | |
7178 | ||
cc90b958 | 7179 | -/* |
00e5a55c BS |
7180 | +/* |
7181 | * Early DMI memory | |
7182 | */ | |
7183 | int dmi_alloc_index; | |
7184 | @@ -163,25 +172,27 @@ struct resource standard_io_resources[] | |
7185 | ||
7186 | #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM) | |
7187 | ||
7188 | -struct resource data_resource = { | |
7189 | +static struct resource data_resource = { | |
7190 | .name = "Kernel data", | |
7191 | .start = 0, | |
7192 | .end = 0, | |
7193 | .flags = IORESOURCE_RAM, | |
7194 | }; | |
7195 | -struct resource code_resource = { | |
7196 | +static struct resource code_resource = { | |
7197 | .name = "Kernel code", | |
7198 | .start = 0, | |
7199 | .end = 0, | |
7200 | .flags = IORESOURCE_RAM, | |
7201 | }; | |
7202 | -struct resource bss_resource = { | |
7203 | +static struct resource bss_resource = { | |
7204 | .name = "Kernel bss", | |
7205 | .start = 0, | |
7206 | .end = 0, | |
7207 | .flags = IORESOURCE_RAM, | |
7208 | }; | |
7209 | ||
7210 | +static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); | |
7211 | + | |
7212 | #ifdef CONFIG_PROC_VMCORE | |
7213 | /* elfcorehdr= specifies the location of elf core header | |
7214 | * stored by the crashed kernel. This option will be passed | |
7215 | @@ -205,9 +216,10 @@ contig_initmem_init(unsigned long start_ | |
7216 | unsigned long bootmap_size, bootmap; | |
7217 | ||
7218 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; | |
7219 | - bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); | |
7220 | + bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size, | |
7221 | + PAGE_SIZE); | |
7222 | if (bootmap == -1L) | |
7223 | - panic("Cannot find bootmem map of size %ld\n",bootmap_size); | |
7224 | + panic("Cannot find bootmem map of size %ld\n", bootmap_size); | |
7225 | bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); | |
7226 | e820_register_active_regions(0, start_pfn, end_pfn); | |
7227 | #ifdef CONFIG_XEN | |
7228 | @@ -215,8 +227,8 @@ contig_initmem_init(unsigned long start_ | |
7229 | #else | |
7230 | free_bootmem_with_active_regions(0, end_pfn); | |
7231 | #endif | |
7232 | - reserve_bootmem(bootmap, bootmap_size); | |
7233 | -} | |
7234 | + reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); | |
7235 | +} | |
7236 | #endif | |
7237 | ||
7238 | #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) | |
7239 | @@ -249,27 +261,35 @@ static inline void copy_edd(void) | |
7240 | #ifndef CONFIG_XEN | |
7241 | static void __init reserve_crashkernel(void) | |
7242 | { | |
7243 | - unsigned long long free_mem; | |
7244 | + unsigned long long total_mem; | |
7245 | unsigned long long crash_size, crash_base; | |
7246 | int ret; | |
7247 | ||
7248 | - free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT; | |
7249 | + total_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT; | |
7250 | ||
7251 | - ret = parse_crashkernel(boot_command_line, free_mem, | |
7252 | + ret = parse_crashkernel(boot_command_line, total_mem, | |
7253 | &crash_size, &crash_base); | |
7254 | if (ret == 0 && crash_size) { | |
7255 | - if (crash_base > 0) { | |
7256 | - printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " | |
7257 | - "for crashkernel (System RAM: %ldMB)\n", | |
7258 | - (unsigned long)(crash_size >> 20), | |
7259 | - (unsigned long)(crash_base >> 20), | |
7260 | - (unsigned long)(free_mem >> 20)); | |
7261 | - crashk_res.start = crash_base; | |
7262 | - crashk_res.end = crash_base + crash_size - 1; | |
7263 | - reserve_bootmem(crash_base, crash_size); | |
7264 | - } else | |
7265 | + if (crash_base <= 0) { | |
7266 | printk(KERN_INFO "crashkernel reservation failed - " | |
7267 | "you have to specify a base address\n"); | |
7268 | + return; | |
7269 | + } | |
7270 | + | |
7271 | + if (reserve_bootmem(crash_base, crash_size, | |
7272 | + BOOTMEM_EXCLUSIVE) < 0) { | |
7273 | + printk(KERN_INFO "crashkernel reservation failed - " | |
7274 | + "memory is in use\n"); | |
7275 | + return; | |
7276 | + } | |
7277 | + | |
7278 | + printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " | |
7279 | + "for crashkernel (System RAM: %ldMB)\n", | |
7280 | + (unsigned long)(crash_size >> 20), | |
7281 | + (unsigned long)(crash_base >> 20), | |
7282 | + (unsigned long)(total_mem >> 20)); | |
7283 | + crashk_res.start = crash_base; | |
7284 | + crashk_res.end = crash_base + crash_size - 1; | |
7285 | } | |
7286 | } | |
7287 | #else | |
7288 | @@ -280,37 +300,21 @@ static inline void __init reserve_crashk | |
7289 | {} | |
7290 | #endif | |
7291 | ||
7292 | -#ifndef CONFIG_XEN | |
7293 | -#define EBDA_ADDR_POINTER 0x40E | |
cc90b958 | 7294 | - |
00e5a55c BS |
7295 | -unsigned __initdata ebda_addr; |
7296 | -unsigned __initdata ebda_size; | |
cc90b958 | 7297 | - |
00e5a55c BS |
7298 | -static void discover_ebda(void) |
7299 | +/* Overridden in paravirt.c if CONFIG_PARAVIRT */ | |
7300 | +void __attribute__((weak)) __init memory_setup(void) | |
7301 | { | |
cc90b958 | 7302 | - /* |
00e5a55c BS |
7303 | - * there is a real-mode segmented pointer pointing to the |
7304 | - * 4K EBDA area at 0x40E | |
cc90b958 | 7305 | - */ |
00e5a55c BS |
7306 | - ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER); |
7307 | - ebda_addr <<= 4; | |
cc90b958 | 7308 | - |
00e5a55c | 7309 | - ebda_size = *(unsigned short *)__va(ebda_addr); |
cc90b958 | 7310 | - |
00e5a55c BS |
7311 | - /* Round EBDA up to pages */ |
7312 | - if (ebda_size == 0) | |
7313 | - ebda_size = 1; | |
7314 | - ebda_size <<= 10; | |
7315 | - ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE); | |
7316 | - if (ebda_size > 64*1024) | |
7317 | - ebda_size = 64*1024; | |
7318 | + machine_specific_memory_setup(); | |
7319 | } | |
7320 | -#else | |
7321 | -#define discover_ebda() ((void)0) | |
7322 | -#endif | |
7323 | ||
7324 | +/* | |
7325 | + * setup_arch - architecture-specific boot-time initializations | |
7326 | + * | |
7327 | + * Note: On x86_64, fixmaps are ready for use even before this is called. | |
7328 | + */ | |
7329 | void __init setup_arch(char **cmdline_p) | |
7330 | { | |
7331 | + unsigned i; | |
7332 | + | |
7333 | #ifdef CONFIG_XEN | |
7334 | extern struct e820map machine_e820; | |
7335 | ||
7336 | @@ -319,6 +323,11 @@ void __init setup_arch(char **cmdline_p) | |
7337 | /* Register a call for panic conditions. */ | |
7338 | atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); | |
7339 | ||
7340 | + WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable, | |
7341 | + VMASST_TYPE_writable_pagetables)); | |
7342 | + | |
7343 | + early_ioremap_init(); | |
7344 | + | |
7345 | ROOT_DEV = MKDEV(RAMDISK_MAJOR,0); | |
7346 | screen_info = boot_params.screen_info; | |
7347 | ||
7348 | @@ -335,11 +344,6 @@ void __init setup_arch(char **cmdline_p) | |
7349 | screen_info.orig_video_isVGA = 0; | |
7350 | ||
7351 | copy_edid(); | |
cc90b958 | 7352 | - |
00e5a55c BS |
7353 | - WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable, |
7354 | - VMASST_TYPE_writable_pagetables)); | |
cc90b958 | 7355 | - |
00e5a55c BS |
7356 | - ARCH_SETUP |
7357 | #else | |
7358 | printk(KERN_INFO "Command line: %s\n", boot_command_line); | |
7359 | ||
7360 | @@ -355,7 +359,15 @@ void __init setup_arch(char **cmdline_p) | |
7361 | rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); | |
7362 | rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); | |
7363 | #endif | |
7364 | - setup_memory_region(); | |
7365 | +#ifdef CONFIG_EFI | |
7366 | + if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, | |
7367 | + "EL64", 4)) | |
7368 | + efi_enabled = 1; | |
7369 | +#endif | |
7370 | + | |
7371 | + ARCH_SETUP | |
7372 | + | |
7373 | + memory_setup(); | |
7374 | copy_edd(); | |
7375 | ||
7376 | if (!boot_params.hdr.root_flags) | |
7377 | @@ -379,28 +391,51 @@ void __init setup_arch(char **cmdline_p) | |
7378 | ||
7379 | parse_early_param(); | |
7380 | ||
7381 | +#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT | |
7382 | + if (init_ohci1394_dma_early) | |
7383 | + init_ohci1394_dma_on_all_controllers(); | |
7384 | +#endif | |
7385 | + | |
7386 | finish_e820_parsing(); | |
7387 | ||
7388 | + early_gart_iommu_check(); | |
7389 | + | |
7390 | e820_register_active_regions(0, 0, -1UL); | |
7391 | /* | |
7392 | * partially used pages are not usable - thus | |
7393 | * we are rounding upwards: | |
7394 | */ | |
7395 | end_pfn = e820_end_of_ram(); | |
7396 | + /* update e820 for memory not covered by WB MTRRs */ | |
7397 | + mtrr_bp_init(); | |
7398 | +#ifndef CONFIG_XEN | |
7399 | + if (mtrr_trim_uncached_memory(end_pfn)) { | |
7400 | + e820_register_active_regions(0, 0, -1UL); | |
7401 | + end_pfn = e820_end_of_ram(); | |
7402 | + } | |
7403 | +#endif | |
7404 | + | |
7405 | num_physpages = end_pfn; | |
7406 | + max_mapnr = end_pfn; | |
7407 | ||
7408 | check_efer(); | |
7409 | ||
7410 | - discover_ebda(); | |
cc90b958 | 7411 | - |
00e5a55c BS |
7412 | init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); |
7413 | + if (efi_enabled) | |
7414 | + efi_init(); | |
7415 | ||
7416 | if (is_initial_xendomain()) | |
7417 | dmi_scan_machine(); | |
7418 | ||
7419 | + io_delay_init(); | |
7420 | + | |
7421 | #if defined(CONFIG_SMP) && !defined(CONFIG_XEN) | |
7422 | - /* setup to use the static apicid table during kernel startup */ | |
7423 | - x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init; | |
7424 | + /* setup to use the early static init tables during kernel startup */ | |
7425 | + x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; | |
7426 | + x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init; | |
7427 | +#ifdef CONFIG_NUMA | |
7428 | + x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init; | |
7429 | +#endif | |
7430 | #endif | |
7431 | ||
7432 | /* How many end-of-memory variables you have, grandma! */ | |
7433 | @@ -419,54 +454,25 @@ void __init setup_arch(char **cmdline_p) | |
7434 | #endif | |
7435 | ||
7436 | #ifdef CONFIG_NUMA | |
7437 | - numa_initmem_init(0, end_pfn); | |
7438 | + numa_initmem_init(0, end_pfn); | |
7439 | #else | |
7440 | contig_initmem_init(0, end_pfn); | |
7441 | #endif | |
7442 | ||
7443 | -#ifdef CONFIG_XEN | |
cc90b958 | 7444 | - /* |
00e5a55c BS |
7445 | - * Reserve kernel, physmap, start info, initial page tables, and |
7446 | - * direct mapping. | |
cc90b958 | 7447 | - */ |
00e5a55c BS |
7448 | - reserve_bootmem_generic(__pa_symbol(&_text), |
7449 | - (table_end << PAGE_SHIFT) - __pa_symbol(&_text)); | |
7450 | -#else | |
7451 | - /* Reserve direct mapping */ | |
7452 | - reserve_bootmem_generic(table_start << PAGE_SHIFT, | |
7453 | - (table_end - table_start) << PAGE_SHIFT); | |
cc90b958 | 7454 | - |
00e5a55c BS |
7455 | - /* reserve kernel */ |
7456 | - reserve_bootmem_generic(__pa_symbol(&_text), | |
7457 | - __pa_symbol(&_end) - __pa_symbol(&_text)); | |
7458 | + early_res_to_bootmem(); | |
7459 | ||
7460 | +#ifndef CONFIG_XEN | |
7461 | +#ifdef CONFIG_ACPI_SLEEP | |
7462 | /* | |
7463 | - * reserve physical page 0 - it's a special BIOS page on many boxes, | |
7464 | - * enabling clean reboots, SMP operation, laptop functions. | |
7465 | + * Reserve low memory region for sleep support. | |
7466 | */ | |
7467 | - reserve_bootmem_generic(0, PAGE_SIZE); | |
cc90b958 | 7468 | - |
00e5a55c BS |
7469 | - /* reserve ebda region */ |
7470 | - if (ebda_addr) | |
7471 | - reserve_bootmem_generic(ebda_addr, ebda_size); | |
7472 | -#ifdef CONFIG_NUMA | |
7473 | - /* reserve nodemap region */ | |
7474 | - if (nodemap_addr) | |
7475 | - reserve_bootmem_generic(nodemap_addr, nodemap_size); | |
7476 | + acpi_reserve_bootmem(); | |
7477 | #endif | |
7478 | ||
7479 | -#ifdef CONFIG_SMP | |
7480 | - /* Reserve SMP trampoline */ | |
7481 | - reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE); | |
7482 | -#endif | |
7483 | + if (efi_enabled) | |
7484 | + efi_reserve_bootmem(); | |
7485 | #endif | |
7486 | ||
7487 | -#ifdef CONFIG_ACPI_SLEEP | |
7488 | - /* | |
7489 | - * Reserve low memory region for sleep support. | |
7490 | - */ | |
7491 | - acpi_reserve_bootmem(); | |
7492 | -#endif | |
7493 | #ifdef CONFIG_BLK_DEV_INITRD | |
7494 | #ifdef CONFIG_XEN | |
7495 | if (xen_start_info->mod_start) { | |
7496 | @@ -490,6 +496,8 @@ void __init setup_arch(char **cmdline_p) | |
7497 | initrd_below_start_ok = 1; | |
7498 | #endif | |
7499 | } else { | |
7500 | + /* Assumes everything on node 0 */ | |
7501 | + free_bootmem(ramdisk_image, ramdisk_size); | |
7502 | printk(KERN_ERR "initrd extends beyond end of memory " | |
7503 | "(0x%08lx > 0x%08lx)\ndisabling initrd\n", | |
7504 | ramdisk_end, end_of_mem); | |
7505 | @@ -499,10 +507,11 @@ void __init setup_arch(char **cmdline_p) | |
7506 | #endif | |
7507 | reserve_crashkernel(); | |
7508 | paging_init(); | |
7509 | + map_vsyscall(); | |
7510 | #ifdef CONFIG_X86_LOCAL_APIC | |
7511 | /* | |
7512 | - * Find and reserve possible boot-time SMP configuration: | |
7513 | - */ | |
7514 | + * Find and reserve possible boot-time SMP configuration: | |
7515 | + */ | |
7516 | find_smp_config(); | |
7517 | #endif | |
7518 | #ifdef CONFIG_XEN | |
7519 | @@ -590,16 +599,10 @@ void __init setup_arch(char **cmdline_p) | |
7520 | #endif | |
7521 | #endif | |
7522 | ||
7523 | -#if defined(CONFIG_PCI) && !defined(CONFIG_XEN) | |
7524 | +#ifndef CONFIG_XEN | |
7525 | early_quirks(); | |
7526 | #endif | |
7527 | ||
7528 | - /* | |
7529 | - * set this early, so we dont allocate cpu0 | |
7530 | - * if MADT list doesnt list BSP first | |
7531 | - * mpparse.c/MP_processor_info() allocates logical cpu numbers. | |
7532 | - */ | |
7533 | - cpu_set(0, cpu_present_map); | |
7534 | #ifdef CONFIG_ACPI | |
7535 | /* | |
7536 | * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). | |
7537 | @@ -623,6 +626,7 @@ void __init setup_arch(char **cmdline_p) | |
7538 | get_smp_config(); | |
7539 | #ifndef CONFIG_XEN | |
7540 | init_apic_mappings(); | |
7541 | + ioapic_init_mappings(); | |
7542 | #endif | |
7543 | #endif | |
7544 | #if defined(CONFIG_XEN) && defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU) | |
7545 | @@ -634,18 +638,17 @@ void __init setup_arch(char **cmdline_p) | |
7546 | */ | |
7547 | #ifdef CONFIG_XEN | |
7548 | if (is_initial_xendomain()) | |
7549 | - e820_reserve_resources(machine_e820.map, machine_e820.nr_map); | |
7550 | + e820_reserve_resources(machine_e820.map, machine_e820.nr_map, | |
7551 | + &code_resource, &data_resource, &bss_resource); | |
7552 | #else | |
7553 | - e820_reserve_resources(e820.map, e820.nr_map); | |
7554 | + e820_reserve_resources(e820.map, e820.nr_map, | |
7555 | + &code_resource, &data_resource, &bss_resource); | |
7556 | e820_mark_nosave_regions(); | |
7557 | #endif | |
7558 | ||
7559 | - { | |
7560 | - unsigned i; | |
7561 | /* request I/O space for devices used on all i[345]86 PCs */ | |
7562 | for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) | |
7563 | request_resource(&ioport_resource, &standard_io_resources[i]); | |
7564 | - } | |
7565 | ||
7566 | #ifdef CONFIG_XEN | |
7567 | if (is_initial_xendomain()) | |
7568 | @@ -679,7 +682,8 @@ void __init setup_arch(char **cmdline_p) | |
7569 | ||
7570 | #ifdef CONFIG_VT | |
7571 | #if defined(CONFIG_VGA_CONSOLE) | |
7572 | - conswitchp = &vga_con; | |
7573 | + if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) | |
7574 | + conswitchp = &vga_con; | |
7575 | #elif defined(CONFIG_DUMMY_CONSOLE) | |
7576 | conswitchp = &dummy_con; | |
7577 | #endif | |
7578 | @@ -723,9 +727,10 @@ static void __cpuinit display_cacheinfo( | |
7579 | ||
7580 | if (n >= 0x80000005) { | |
7581 | cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); | |
7582 | - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", | |
7583 | - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); | |
7584 | - c->x86_cache_size=(ecx>>24)+(edx>>24); | |
7585 | + printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), " | |
7586 | + "D cache %dK (%d bytes/line)\n", | |
7587 | + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); | |
7588 | + c->x86_cache_size = (ecx>>24) + (edx>>24); | |
7589 | /* On K8 L1 TLB is inclusive, so don't count it */ | |
7590 | c->x86_tlbsize = 0; | |
7591 | } | |
7592 | @@ -739,27 +744,25 @@ static void __cpuinit display_cacheinfo( | |
7593 | printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", | |
7594 | c->x86_cache_size, ecx & 0xFF); | |
7595 | } | |
cc90b958 | 7596 | - |
00e5a55c BS |
7597 | - if (n >= 0x80000007) |
7598 | - cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power); | |
7599 | if (n >= 0x80000008) { | |
7600 | - cpuid(0x80000008, &eax, &dummy, &dummy, &dummy); | |
7601 | + cpuid(0x80000008, &eax, &dummy, &dummy, &dummy); | |
7602 | c->x86_virt_bits = (eax >> 8) & 0xff; | |
7603 | c->x86_phys_bits = eax & 0xff; | |
7604 | } | |
7605 | } | |
7606 | ||
7607 | #ifdef CONFIG_NUMA | |
7608 | -static int nearby_node(int apicid) | |
7609 | +static int __cpuinit nearby_node(int apicid) | |
7610 | { | |
7611 | - int i; | |
7612 | + int i, node; | |
7613 | + | |
7614 | for (i = apicid - 1; i >= 0; i--) { | |
7615 | - int node = apicid_to_node[i]; | |
7616 | + node = apicid_to_node[i]; | |
7617 | if (node != NUMA_NO_NODE && node_online(node)) | |
7618 | return node; | |
7619 | } | |
7620 | for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { | |
7621 | - int node = apicid_to_node[i]; | |
7622 | + node = apicid_to_node[i]; | |
7623 | if (node != NUMA_NO_NODE && node_online(node)) | |
7624 | return node; | |
7625 | } | |
7626 | @@ -771,7 +774,7 @@ static int nearby_node(int apicid) | |
7627 | * On a AMD dual core setup the lower bits of the APIC id distingush the cores. | |
7628 | * Assumes number of cores is a power of two. | |
7629 | */ | |
7630 | -static void __init amd_detect_cmp(struct cpuinfo_x86 *c) | |
7631 | +static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) | |
7632 | { | |
7633 | #ifdef CONFIG_SMP | |
7634 | unsigned bits; | |
7635 | @@ -780,7 +783,54 @@ static void __init amd_detect_cmp(struct | |
7636 | int node = 0; | |
7637 | unsigned apicid = hard_smp_processor_id(); | |
7638 | #endif | |
7639 | - unsigned ecx = cpuid_ecx(0x80000008); | |
7640 | + bits = c->x86_coreid_bits; | |
cc90b958 | 7641 | + |
00e5a55c BS |
7642 | + /* Low order bits define the core id (index of core in socket) */ |
7643 | + c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1); | |
7644 | + /* Convert the APIC ID into the socket ID */ | |
7645 | + c->phys_proc_id = phys_pkg_id(bits); | |
cc90b958 | 7646 | + |
00e5a55c BS |
7647 | +#ifdef CONFIG_NUMA |
7648 | + node = c->phys_proc_id; | |
7649 | + if (apicid_to_node[apicid] != NUMA_NO_NODE) | |
7650 | + node = apicid_to_node[apicid]; | |
7651 | + if (!node_online(node)) { | |
7652 | + /* Two possibilities here: | |
7653 | + - The CPU is missing memory and no node was created. | |
7654 | + In that case try picking one from a nearby CPU | |
7655 | + - The APIC IDs differ from the HyperTransport node IDs | |
7656 | + which the K8 northbridge parsing fills in. | |
7657 | + Assume they are all increased by a constant offset, | |
7658 | + but in the same order as the HT nodeids. | |
7659 | + If that doesn't result in a usable node fall back to the | |
7660 | + path for the previous case. */ | |
7661 | + | |
7662 | + int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits); | |
7663 | + | |
7664 | + if (ht_nodeid >= 0 && | |
7665 | + apicid_to_node[ht_nodeid] != NUMA_NO_NODE) | |
7666 | + node = apicid_to_node[ht_nodeid]; | |
7667 | + /* Pick a nearby node */ | |
7668 | + if (!node_online(node)) | |
7669 | + node = nearby_node(apicid); | |
7670 | + } | |
7671 | + numa_set_node(cpu, node); | |
7672 | + | |
7673 | + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); | |
7674 | +#endif | |
cc90b958 | 7675 | +#endif |
00e5a55c | 7676 | +} |
cc90b958 | 7677 | + |
00e5a55c BS |
7678 | +static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) |
7679 | +{ | |
7680 | +#ifdef CONFIG_SMP | |
7681 | + unsigned bits, ecx; | |
cc90b958 | 7682 | + |
00e5a55c BS |
7683 | + /* Multi core CPU? */ |
7684 | + if (c->extended_cpuid_level < 0x80000008) | |
7685 | + return; | |
cc90b958 | 7686 | + |
00e5a55c BS |
7687 | + ecx = cpuid_ecx(0x80000008); |
7688 | ||
7689 | c->x86_max_cores = (ecx & 0xff) + 1; | |
7690 | ||
7691 | @@ -793,37 +843,8 @@ static void __init amd_detect_cmp(struct | |
7692 | bits++; | |
7693 | } | |
7694 | ||
7695 | - /* Low order bits define the core id (index of core in socket) */ | |
7696 | - c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1); | |
7697 | - /* Convert the APIC ID into the socket ID */ | |
7698 | - c->phys_proc_id = phys_pkg_id(bits); | |
7699 | - | |
7700 | -#ifdef CONFIG_NUMA | |
7701 | - node = c->phys_proc_id; | |
7702 | - if (apicid_to_node[apicid] != NUMA_NO_NODE) | |
7703 | - node = apicid_to_node[apicid]; | |
7704 | - if (!node_online(node)) { | |
7705 | - /* Two possibilities here: | |
7706 | - - The CPU is missing memory and no node was created. | |
7707 | - In that case try picking one from a nearby CPU | |
7708 | - - The APIC IDs differ from the HyperTransport node IDs | |
7709 | - which the K8 northbridge parsing fills in. | |
7710 | - Assume they are all increased by a constant offset, | |
7711 | - but in the same order as the HT nodeids. | |
7712 | - If that doesn't result in a usable node fall back to the | |
7713 | - path for the previous case. */ | |
7714 | - int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits); | |
7715 | - if (ht_nodeid >= 0 && | |
7716 | - apicid_to_node[ht_nodeid] != NUMA_NO_NODE) | |
7717 | - node = apicid_to_node[ht_nodeid]; | |
7718 | - /* Pick a nearby node */ | |
7719 | - if (!node_online(node)) | |
7720 | - node = nearby_node(apicid); | |
7721 | - } | |
7722 | - numa_set_node(cpu, node); | |
7723 | + c->x86_coreid_bits = bits; | |
7724 | ||
7725 | - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); | |
7726 | -#endif | |
7727 | #endif | |
7728 | } | |
7729 | ||
7730 | @@ -840,8 +861,8 @@ static void __init amd_detect_cmp(struct | |
7731 | /* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ | |
7732 | static __cpuinit int amd_apic_timer_broken(void) | |
7733 | { | |
7734 | - u32 lo, hi; | |
7735 | - u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); | |
7736 | + u32 lo, hi, eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); | |
cc90b958 | 7737 | + |
00e5a55c BS |
7738 | switch (eax & CPUID_XFAM) { |
7739 | case CPUID_XFAM_K8: | |
7740 | if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) | |
7741 | @@ -860,6 +881,15 @@ static __cpuinit int amd_apic_timer_brok | |
7742 | } | |
7743 | #endif | |
7744 | ||
7745 | +static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |
7746 | +{ | |
7747 | + early_init_amd_mc(c); | |
cc90b958 | 7748 | + |
00e5a55c BS |
7749 | + /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ |
7750 | + if (c->x86_power & (1<<8)) | |
7751 | + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | |
cc90b958 | 7752 | +} |
cc90b958 | 7753 | + |
00e5a55c BS |
7754 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) |
7755 | { | |
7756 | unsigned level; | |
7757 | @@ -870,7 +900,7 @@ static void __cpuinit init_amd(struct cp | |
7758 | /* | |
7759 | * Disable TLB flush filter by setting HWCR.FFDIS on K8 | |
7760 | * bit 6 of msr C001_0015 | |
7761 | - * | |
7762 | + * | |
7763 | * Errata 63 for SH-B3 steppings | |
7764 | * Errata 122 for all steppings (F+ have it disabled by default) | |
7765 | */ | |
7766 | @@ -883,35 +913,32 @@ static void __cpuinit init_amd(struct cp | |
7767 | ||
7768 | /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; | |
7769 | 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ | |
7770 | - clear_bit(0*32+31, &c->x86_capability); | |
7771 | - | |
7772 | + clear_bit(0*32+31, (unsigned long *)&c->x86_capability); | |
cc90b958 | 7773 | + |
00e5a55c BS |
7774 | /* On C+ stepping K8 rep microcode works well for copy/memset */ |
7775 | level = cpuid_eax(1); | |
7776 | - if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)) | |
7777 | - set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); | |
7778 | + if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || | |
7779 | + level >= 0x0f58)) | |
7780 | + set_cpu_cap(c, X86_FEATURE_REP_GOOD); | |
7781 | if (c->x86 == 0x10 || c->x86 == 0x11) | |
7782 | - set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); | |
7783 | + set_cpu_cap(c, X86_FEATURE_REP_GOOD); | |
7784 | ||
7785 | /* Enable workaround for FXSAVE leak */ | |
7786 | if (c->x86 >= 6) | |
7787 | - set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability); | |
7788 | + set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); | |
7789 | ||
7790 | level = get_model_name(c); | |
7791 | if (!level) { | |
7792 | - switch (c->x86) { | |
7793 | + switch (c->x86) { | |
7794 | case 15: | |
7795 | /* Should distinguish Models here, but this is only | |
7796 | a fallback anyways. */ | |
7797 | strcpy(c->x86_model_id, "Hammer"); | |
7798 | - break; | |
7799 | - } | |
7800 | - } | |
7801 | + break; | |
7802 | + } | |
7803 | + } | |
7804 | display_cacheinfo(c); | |
7805 | ||
7806 | - /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ | |
7807 | - if (c->x86_power & (1<<8)) | |
7808 | - set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); | |
7809 | - | |
7810 | /* Multi core CPU? */ | |
7811 | if (c->extended_cpuid_level >= 0x80000008) | |
7812 | amd_detect_cmp(c); | |
7813 | @@ -923,14 +950,10 @@ static void __cpuinit init_amd(struct cp | |
7814 | num_cache_leaves = 3; | |
7815 | ||
7816 | if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11) | |
7817 | - set_bit(X86_FEATURE_K8, &c->x86_capability); | |
7818 | - | |
7819 | - /* RDTSC can be speculated around */ | |
7820 | - clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | |
7821 | + set_cpu_cap(c, X86_FEATURE_K8); | |
7822 | ||
7823 | - /* Family 10 doesn't support C states in MWAIT so don't use it */ | |
7824 | - if (c->x86 == 0x10 && !force_mwait) | |
7825 | - clear_bit(X86_FEATURE_MWAIT, &c->x86_capability); | |
7826 | + /* MFENCE stops RDTSC speculation */ | |
7827 | + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); | |
7828 | ||
7829 | #ifndef CONFIG_XEN | |
7830 | if (amd_apic_timer_broken()) | |
7831 | @@ -938,28 +961,29 @@ static void __cpuinit init_amd(struct cp | |
7832 | #endif | |
7833 | } | |
7834 | ||
7835 | -static void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |
7836 | +void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |
7837 | { | |
7838 | #ifdef CONFIG_SMP | |
7839 | - u32 eax, ebx, ecx, edx; | |
7840 | - int index_msb, core_bits; | |
7841 | + u32 eax, ebx, ecx, edx; | |
7842 | + int index_msb, core_bits; | |
7843 | ||
7844 | cpuid(1, &eax, &ebx, &ecx, &edx); | |
7845 | ||
7846 | ||
7847 | if (!cpu_has(c, X86_FEATURE_HT)) | |
7848 | return; | |
7849 | - if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) | |
7850 | + if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) | |
7851 | goto out; | |
7852 | ||
7853 | smp_num_siblings = (ebx & 0xff0000) >> 16; | |
7854 | ||
7855 | if (smp_num_siblings == 1) { | |
7856 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); | |
7857 | - } else if (smp_num_siblings > 1 ) { | |
7858 | + } else if (smp_num_siblings > 1) { | |
7859 | ||
7860 | if (smp_num_siblings > NR_CPUS) { | |
7861 | - printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); | |
7862 | + printk(KERN_WARNING "CPU: Unsupported number of " | |
7863 | + "siblings %d", smp_num_siblings); | |
7864 | smp_num_siblings = 1; | |
7865 | return; | |
7866 | } | |
7867 | @@ -969,7 +993,7 @@ static void __cpuinit detect_ht(struct c | |
7868 | ||
7869 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; | |
7870 | ||
7871 | - index_msb = get_count_order(smp_num_siblings) ; | |
7872 | + index_msb = get_count_order(smp_num_siblings); | |
7873 | ||
7874 | core_bits = get_count_order(c->x86_max_cores); | |
7875 | ||
7876 | @@ -978,8 +1002,10 @@ static void __cpuinit detect_ht(struct c | |
7877 | } | |
7878 | out: | |
7879 | if ((c->x86_max_cores * smp_num_siblings) > 1) { | |
7880 | - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", c->phys_proc_id); | |
7881 | - printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id); | |
7882 | + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | |
7883 | + c->phys_proc_id); | |
7884 | + printk(KERN_INFO "CPU: Processor Core ID: %d\n", | |
7885 | + c->cpu_core_id); | |
7886 | } | |
7887 | ||
7888 | #endif | |
7889 | @@ -1003,7 +1029,7 @@ static int __cpuinit intel_num_cpu_cores | |
7890 | return 1; | |
7891 | } | |
7892 | ||
7893 | -static void srat_detect_node(void) | |
7894 | +static void __cpuinit srat_detect_node(void) | |
7895 | { | |
7896 | #ifdef CONFIG_NUMA | |
7897 | unsigned node; | |
7898 | @@ -1013,7 +1039,7 @@ static void srat_detect_node(void) | |
7899 | /* Don't do the funky fallback heuristics the AMD version employs | |
7900 | for now. */ | |
7901 | node = apicid_to_node[apicid]; | |
7902 | - if (node == NUMA_NO_NODE) | |
7903 | + if (node == NUMA_NO_NODE || !node_online(node)) | |
7904 | node = first_node(node_online_map); | |
7905 | numa_set_node(cpu, node); | |
7906 | ||
7907 | @@ -1021,28 +1047,39 @@ static void srat_detect_node(void) | |
7908 | #endif | |
7909 | } | |
7910 | ||
7911 | +static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |
cc90b958 | 7912 | +{ |
00e5a55c BS |
7913 | + if ((c->x86 == 0xf && c->x86_model >= 0x03) || |
7914 | + (c->x86 == 0x6 && c->x86_model >= 0x0e)) | |
7915 | + set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); | |
cc90b958 BS |
7916 | +} |
7917 | + | |
00e5a55c BS |
7918 | static void __cpuinit init_intel(struct cpuinfo_x86 *c) |
7919 | { | |
7920 | /* Cache sizes */ | |
7921 | unsigned n; | |
7922 | ||
7923 | init_intel_cacheinfo(c); | |
7924 | - if (c->cpuid_level > 9 ) { | |
7925 | + if (c->cpuid_level > 9) { | |
7926 | unsigned eax = cpuid_eax(10); | |
7927 | /* Check for version and the number of counters */ | |
7928 | if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) | |
7929 | - set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability); | |
7930 | + set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); | |
7931 | } | |
7932 | ||
7933 | if (cpu_has_ds) { | |
7934 | unsigned int l1, l2; | |
7935 | rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); | |
7936 | if (!(l1 & (1<<11))) | |
7937 | - set_bit(X86_FEATURE_BTS, c->x86_capability); | |
7938 | + set_cpu_cap(c, X86_FEATURE_BTS); | |
7939 | if (!(l1 & (1<<12))) | |
7940 | - set_bit(X86_FEATURE_PEBS, c->x86_capability); | |
7941 | + set_cpu_cap(c, X86_FEATURE_PEBS); | |
7942 | } | |
7943 | ||
cc90b958 | 7944 | + |
00e5a55c BS |
7945 | + if (cpu_has_bts) |
7946 | + ds_init_intel(c); | |
cc90b958 | 7947 | + |
00e5a55c BS |
7948 | n = c->extended_cpuid_level; |
7949 | if (n >= 0x80000008) { | |
7950 | unsigned eax = cpuid_eax(0x80000008); | |
7951 | @@ -1059,14 +1096,11 @@ static void __cpuinit init_intel(struct | |
7952 | c->x86_cache_alignment = c->x86_clflush_size * 2; | |
7953 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || | |
7954 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) | |
7955 | - set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); | |
7956 | + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | |
7957 | if (c->x86 == 6) | |
7958 | - set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); | |
7959 | - if (c->x86 == 15) | |
7960 | - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | |
7961 | - else | |
7962 | - clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | |
7963 | - c->x86_max_cores = intel_num_cpu_cores(c); | |
7964 | + set_cpu_cap(c, X86_FEATURE_REP_GOOD); | |
7965 | + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | |
7966 | + c->x86_max_cores = intel_num_cpu_cores(c); | |
7967 | ||
7968 | srat_detect_node(); | |
7969 | } | |
7970 | @@ -1083,18 +1117,12 @@ static void __cpuinit get_cpu_vendor(str | |
7971 | c->x86_vendor = X86_VENDOR_UNKNOWN; | |
7972 | } | |
7973 | ||
7974 | -struct cpu_model_info { | |
7975 | - int vendor; | |
7976 | - int family; | |
7977 | - char *model_names[16]; | |
7978 | -}; | |
7979 | - | |
7980 | /* Do some early cpuid on the boot CPU to get some parameter that are | |
7981 | needed before check_bugs. Everything advanced is in identify_cpu | |
7982 | below. */ | |
7983 | -void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |
7984 | +static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |
7985 | { | |
7986 | - u32 tfms; | |
7987 | + u32 tfms, xlvl; | |
7988 | ||
7989 | c->loops_per_jiffy = loops_per_jiffy; | |
7990 | c->x86_cache_size = -1; | |
7991 | @@ -1105,6 +1133,7 @@ void __cpuinit early_identify_cpu(struct | |
7992 | c->x86_clflush_size = 64; | |
7993 | c->x86_cache_alignment = c->x86_clflush_size; | |
7994 | c->x86_max_cores = 1; | |
7995 | + c->x86_coreid_bits = 0; | |
7996 | c->extended_cpuid_level = 0; | |
7997 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | |
7998 | ||
7999 | @@ -1113,7 +1142,7 @@ void __cpuinit early_identify_cpu(struct | |
8000 | (unsigned int *)&c->x86_vendor_id[0], | |
8001 | (unsigned int *)&c->x86_vendor_id[8], | |
8002 | (unsigned int *)&c->x86_vendor_id[4]); | |
8003 | - | |
cc90b958 | 8004 | + |
00e5a55c BS |
8005 | get_cpu_vendor(c); |
8006 | ||
8007 | /* Initialize the standard set of capabilities */ | |
8008 | @@ -1131,7 +1160,7 @@ void __cpuinit early_identify_cpu(struct | |
8009 | c->x86 += (tfms >> 20) & 0xff; | |
8010 | if (c->x86 >= 0x6) | |
8011 | c->x86_model += ((tfms >> 16) & 0xF) << 4; | |
8012 | - if (c->x86_capability[0] & (1<<19)) | |
8013 | + if (c->x86_capability[0] & (1<<19)) | |
8014 | c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; | |
8015 | } else { | |
8016 | /* Have CPUID level 0 only - unheard of */ | |
8017 | @@ -1141,18 +1170,6 @@ void __cpuinit early_identify_cpu(struct | |
8018 | #ifdef CONFIG_SMP | |
8019 | c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; | |
8020 | #endif | |
8021 | -} | |
8022 | - | |
8023 | -/* | |
8024 | - * This does the hard work of actually picking apart the CPU stuff... | |
8025 | - */ | |
8026 | -void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |
8027 | -{ | |
8028 | - int i; | |
8029 | - u32 xlvl; | |
8030 | - | |
8031 | - early_identify_cpu(c); | |
8032 | - | |
8033 | /* AMD-defined flags: level 0x80000001 */ | |
8034 | xlvl = cpuid_eax(0x80000000); | |
8035 | c->extended_cpuid_level = xlvl; | |
8036 | @@ -1173,6 +1190,30 @@ void __cpuinit identify_cpu(struct cpuin | |
8037 | c->x86_capability[2] = cpuid_edx(0x80860001); | |
8038 | } | |
8039 | ||
8040 | + c->extended_cpuid_level = cpuid_eax(0x80000000); | |
8041 | + if (c->extended_cpuid_level >= 0x80000007) | |
8042 | + c->x86_power = cpuid_edx(0x80000007); | |
cc90b958 | 8043 | + |
00e5a55c BS |
8044 | + switch (c->x86_vendor) { |
8045 | + case X86_VENDOR_AMD: | |
8046 | + early_init_amd(c); | |
8047 | + break; | |
8048 | + case X86_VENDOR_INTEL: | |
8049 | + early_init_intel(c); | |
8050 | + break; | |
cc90b958 | 8051 | + } |
cc90b958 | 8052 | + |
cc90b958 BS |
8053 | +} |
8054 | + | |
00e5a55c BS |
8055 | +/* |
8056 | + * This does the hard work of actually picking apart the CPU stuff... | |
8057 | + */ | |
8058 | +void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |
cc90b958 | 8059 | +{ |
00e5a55c | 8060 | + int i; |
cc90b958 | 8061 | + |
00e5a55c | 8062 | + early_identify_cpu(c); |
cc90b958 | 8063 | + |
00e5a55c BS |
8064 | init_scattered_cpuid_features(c); |
8065 | ||
8066 | c->apicid = phys_pkg_id(0); | |
8067 | @@ -1202,8 +1243,7 @@ void __cpuinit identify_cpu(struct cpuin | |
8068 | break; | |
8069 | } | |
8070 | ||
8071 | - select_idle_routine(c); | |
8072 | - detect_ht(c); | |
8073 | + detect_ht(c); | |
8074 | ||
8075 | /* | |
8076 | * On SMP, boot_cpu_data holds the common feature set between | |
8077 | @@ -1213,31 +1253,55 @@ void __cpuinit identify_cpu(struct cpuin | |
8078 | */ | |
8079 | if (c != &boot_cpu_data) { | |
8080 | /* AND the already accumulated flags with these */ | |
8081 | - for (i = 0 ; i < NCAPINTS ; i++) | |
8082 | + for (i = 0; i < NCAPINTS; i++) | |
8083 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; | |
8084 | } | |
8085 | ||
8086 | + /* Clear all flags overriden by options */ | |
8087 | + for (i = 0; i < NCAPINTS; i++) | |
8088 | + c->x86_capability[i] &= ~cleared_cpu_caps[i]; | |
cc90b958 | 8089 | + |
00e5a55c BS |
8090 | #ifdef CONFIG_X86_MCE |
8091 | mcheck_init(c); | |
8092 | #endif | |
8093 | + select_idle_routine(c); | |
cc90b958 | 8094 | + |
00e5a55c BS |
8095 | if (c != &boot_cpu_data) |
8096 | mtrr_ap_init(); | |
8097 | #ifdef CONFIG_NUMA | |
8098 | numa_add_cpu(smp_processor_id()); | |
8099 | #endif | |
cc90b958 | 8100 | + |
00e5a55c BS |
8101 | } |
8102 | - | |
cc90b958 | 8103 | + |
00e5a55c BS |
8104 | +static __init int setup_noclflush(char *arg) |
8105 | +{ | |
8106 | + setup_clear_cpu_cap(X86_FEATURE_CLFLSH); | |
8107 | + return 1; | |
cc90b958 | 8108 | +} |
00e5a55c BS |
8109 | +__setup("noclflush", setup_noclflush); |
8110 | ||
8111 | void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) | |
8112 | { | |
8113 | if (c->x86_model_id[0]) | |
8114 | - printk("%s", c->x86_model_id); | |
8115 | + printk(KERN_CONT "%s", c->x86_model_id); | |
cc90b958 | 8116 | + |
00e5a55c BS |
8117 | + if (c->x86_mask || c->cpuid_level >= 0) |
8118 | + printk(KERN_CONT " stepping %02x\n", c->x86_mask); | |
8119 | + else | |
8120 | + printk(KERN_CONT "\n"); | |
8121 | +} | |
8122 | ||
8123 | - if (c->x86_mask || c->cpuid_level >= 0) | |
8124 | - printk(" stepping %02x\n", c->x86_mask); | |
8125 | +static __init int setup_disablecpuid(char *arg) | |
cc90b958 | 8126 | +{ |
00e5a55c BS |
8127 | + int bit; |
8128 | + if (get_option(&arg, &bit) && bit < NCAPINTS*32) | |
8129 | + setup_clear_cpu_cap(bit); | |
8130 | else | |
8131 | - printk("\n"); | |
8132 | + return 0; | |
8133 | + return 1; | |
8134 | } | |
8135 | +__setup("clearcpuid=", setup_disablecpuid); | |
8136 | ||
8137 | /* | |
8138 | * Get CPU information for use by the procfs. | |
8139 | @@ -1246,116 +1310,41 @@ void __cpuinit print_cpu_info(struct cpu | |
8140 | static int show_cpuinfo(struct seq_file *m, void *v) | |
8141 | { | |
8142 | struct cpuinfo_x86 *c = v; | |
8143 | - int cpu = 0; | |
8144 | - | |
8145 | - /* | |
8146 | - * These flag bits must match the definitions in <asm/cpufeature.h>. | |
8147 | - * NULL means this bit is undefined or reserved; either way it doesn't | |
8148 | - * have meaning as far as Linux is concerned. Note that it's important | |
8149 | - * to realize there is a difference between this table and CPUID -- if | |
8150 | - * applications want to get the raw CPUID data, they should access | |
8151 | - * /dev/cpu/<cpu_nr>/cpuid instead. | |
8152 | - */ | |
8153 | - static const char *const x86_cap_flags[] = { | |
8154 | - /* Intel-defined */ | |
8155 | - "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", | |
8156 | - "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", | |
8157 | - "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", | |
8158 | - "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", | |
8159 | - | |
8160 | - /* AMD-defined */ | |
8161 | - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
8162 | - NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, | |
8163 | - NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL, | |
8164 | - NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", | |
8165 | - "3dnowext", "3dnow", | |
8166 | - | |
8167 | - /* Transmeta-defined */ | |
8168 | - "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, | |
8169 | - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
8170 | - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
8171 | - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
8172 | - | |
8173 | - /* Other (Linux-defined) */ | |
8174 | - "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", | |
8175 | - NULL, NULL, NULL, NULL, | |
8176 | - "constant_tsc", "up", NULL, "arch_perfmon", | |
8177 | - "pebs", "bts", NULL, "sync_rdtsc", | |
8178 | - "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
8179 | - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
8180 | - | |
8181 | - /* Intel-defined (#2) */ | |
8182 | - "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", | |
8183 | - "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, | |
8184 | - NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", | |
8185 | - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
8186 | - | |
8187 | - /* VIA/Cyrix/Centaur-defined */ | |
8188 | - NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", | |
8189 | - "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, | |
8190 | - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
8191 | - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
8192 | - | |
8193 | - /* AMD-defined (#2) */ | |
8194 | - "lahf_lm", "cmp_legacy", "svm", "extapic", | |
8195 | - "cr8_legacy", "abm", "sse4a", "misalignsse", | |
8196 | - "3dnowprefetch", "osvw", "ibs", "sse5", | |
8197 | - "skinit", "wdt", NULL, NULL, | |
8198 | - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
8199 | - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
8200 | - | |
8201 | - /* Auxiliary (Linux-defined) */ | |
8202 | - "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
8203 | - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
8204 | - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
8205 | - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
8206 | - }; | |
8207 | - static const char *const x86_power_flags[] = { | |
8208 | - "ts", /* temperature sensor */ | |
8209 | - "fid", /* frequency id control */ | |
8210 | - "vid", /* voltage id control */ | |
8211 | - "ttp", /* thermal trip */ | |
8212 | - "tm", | |
8213 | - "stc", | |
8214 | - "100mhzsteps", | |
8215 | - "hwpstate", | |
8216 | - "", /* tsc invariant mapped to constant_tsc */ | |
8217 | - /* nothing */ | |
8218 | - }; | |
8219 | - | |
8220 | + int cpu = 0, i; | |
8221 | ||
8222 | #ifdef CONFIG_SMP | |
8223 | cpu = c->cpu_index; | |
8224 | #endif | |
8225 | ||
8226 | - seq_printf(m,"processor\t: %u\n" | |
8227 | - "vendor_id\t: %s\n" | |
8228 | - "cpu family\t: %d\n" | |
8229 | - "model\t\t: %d\n" | |
8230 | - "model name\t: %s\n", | |
8231 | - (unsigned)cpu, | |
8232 | - c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", | |
8233 | - c->x86, | |
8234 | - (int)c->x86_model, | |
8235 | - c->x86_model_id[0] ? c->x86_model_id : "unknown"); | |
8236 | - | |
8237 | + seq_printf(m, "processor\t: %u\n" | |
8238 | + "vendor_id\t: %s\n" | |
8239 | + "cpu family\t: %d\n" | |
8240 | + "model\t\t: %d\n" | |
8241 | + "model name\t: %s\n", | |
8242 | + (unsigned)cpu, | |
8243 | + c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", | |
8244 | + c->x86, | |
8245 | + (int)c->x86_model, | |
8246 | + c->x86_model_id[0] ? c->x86_model_id : "unknown"); | |
cc90b958 | 8247 | + |
00e5a55c BS |
8248 | if (c->x86_mask || c->cpuid_level >= 0) |
8249 | seq_printf(m, "stepping\t: %d\n", c->x86_mask); | |
8250 | else | |
8251 | seq_printf(m, "stepping\t: unknown\n"); | |
8252 | - | |
8253 | - if (cpu_has(c,X86_FEATURE_TSC)) { | |
cc90b958 | 8254 | + |
00e5a55c BS |
8255 | + if (cpu_has(c, X86_FEATURE_TSC)) { |
8256 | unsigned int freq = cpufreq_quick_get((unsigned)cpu); | |
cc90b958 | 8257 | + |
00e5a55c BS |
8258 | if (!freq) |
8259 | freq = cpu_khz; | |
8260 | seq_printf(m, "cpu MHz\t\t: %u.%03u\n", | |
8261 | - freq / 1000, (freq % 1000)); | |
8262 | + freq / 1000, (freq % 1000)); | |
8263 | } | |
8264 | ||
8265 | /* Cache size */ | |
8266 | - if (c->x86_cache_size >= 0) | |
8267 | + if (c->x86_cache_size >= 0) | |
8268 | seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); | |
8269 | - | |
cc90b958 | 8270 | + |
00e5a55c BS |
8271 | #ifdef CONFIG_SMP |
8272 | if (smp_num_siblings * c->x86_max_cores > 1) { | |
8273 | seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); | |
8274 | @@ -1364,48 +1353,43 @@ static int show_cpuinfo(struct seq_file | |
8275 | seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); | |
8276 | seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); | |
8277 | } | |
8278 | -#endif | |
8279 | +#endif | |
8280 | ||
8281 | seq_printf(m, | |
8282 | - "fpu\t\t: yes\n" | |
8283 | - "fpu_exception\t: yes\n" | |
8284 | - "cpuid level\t: %d\n" | |
8285 | - "wp\t\t: yes\n" | |
8286 | - "flags\t\t:", | |
8287 | + "fpu\t\t: yes\n" | |
8288 | + "fpu_exception\t: yes\n" | |
8289 | + "cpuid level\t: %d\n" | |
8290 | + "wp\t\t: yes\n" | |
8291 | + "flags\t\t:", | |
8292 | c->cpuid_level); | |
8293 | ||
8294 | - { | |
8295 | - int i; | |
8296 | - for ( i = 0 ; i < 32*NCAPINTS ; i++ ) | |
8297 | - if (cpu_has(c, i) && x86_cap_flags[i] != NULL) | |
8298 | - seq_printf(m, " %s", x86_cap_flags[i]); | |
8299 | - } | |
8300 | - | |
8301 | + for (i = 0; i < 32*NCAPINTS; i++) | |
8302 | + if (cpu_has(c, i) && x86_cap_flags[i] != NULL) | |
8303 | + seq_printf(m, " %s", x86_cap_flags[i]); | |
cc90b958 | 8304 | + |
00e5a55c BS |
8305 | seq_printf(m, "\nbogomips\t: %lu.%02lu\n", |
8306 | c->loops_per_jiffy/(500000/HZ), | |
8307 | (c->loops_per_jiffy/(5000/HZ)) % 100); | |
8308 | ||
8309 | - if (c->x86_tlbsize > 0) | |
8310 | + if (c->x86_tlbsize > 0) | |
8311 | seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize); | |
8312 | seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size); | |
8313 | seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment); | |
8314 | ||
8315 | - seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", | |
8316 | + seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", | |
8317 | c->x86_phys_bits, c->x86_virt_bits); | |
8318 | ||
8319 | seq_printf(m, "power management:"); | |
8320 | - { | |
8321 | - unsigned i; | |
8322 | - for (i = 0; i < 32; i++) | |
8323 | - if (c->x86_power & (1 << i)) { | |
8324 | - if (i < ARRAY_SIZE(x86_power_flags) && | |
8325 | - x86_power_flags[i]) | |
8326 | - seq_printf(m, "%s%s", | |
8327 | - x86_power_flags[i][0]?" ":"", | |
8328 | - x86_power_flags[i]); | |
8329 | - else | |
8330 | - seq_printf(m, " [%d]", i); | |
8331 | - } | |
8332 | + for (i = 0; i < 32; i++) { | |
8333 | + if (c->x86_power & (1 << i)) { | |
8334 | + if (i < ARRAY_SIZE(x86_power_flags) && | |
8335 | + x86_power_flags[i]) | |
8336 | + seq_printf(m, "%s%s", | |
8337 | + x86_power_flags[i][0]?" ":"", | |
8338 | + x86_power_flags[i]); | |
8339 | + else | |
8340 | + seq_printf(m, " [%d]", i); | |
8341 | + } | |
8342 | } | |
8343 | ||
8344 | seq_printf(m, "\n\n"); | |
8345 | @@ -1432,8 +1416,8 @@ static void c_stop(struct seq_file *m, v | |
8346 | { | |
8347 | } | |
8348 | ||
8349 | -struct seq_operations cpuinfo_op = { | |
8350 | - .start =c_start, | |
8351 | +const struct seq_operations cpuinfo_op = { | |
8352 | + .start = c_start, | |
8353 | .next = c_next, | |
8354 | .stop = c_stop, | |
8355 | .show = show_cpuinfo, | |
8356 | --- sle11-2009-05-14.orig/arch/x86/kernel/smp_32-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
8357 | +++ sle11-2009-05-14/arch/x86/kernel/smp_32-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
8358 | @@ -168,7 +168,7 @@ void __send_IPI_shortcut(unsigned int sh | |
8359 | } | |
8360 | } | |
8361 | ||
8362 | -void fastcall send_IPI_self(int vector) | |
8363 | +void send_IPI_self(int vector) | |
8364 | { | |
8365 | __send_IPI_shortcut(APIC_DEST_SELF, vector); | |
8366 | } | |
8367 | @@ -224,13 +224,14 @@ static DEFINE_SPINLOCK(tlbstate_lock); | |
8368 | * We need to reload %cr3 since the page tables may be going | |
8369 | * away from under us.. | |
8370 | */ | |
8371 | -void leave_mm(unsigned long cpu) | |
8372 | +void leave_mm(int cpu) | |
8373 | { | |
8374 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) | |
8375 | BUG(); | |
8376 | cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); | |
8377 | load_cr3(swapper_pg_dir); | |
8378 | } | |
8379 | +EXPORT_SYMBOL_GPL(leave_mm); | |
8380 | ||
8381 | /* | |
8382 | * | |
8383 | --- sle11-2009-05-14.orig/arch/x86/kernel/smp_64-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
8384 | +++ sle11-2009-05-14/arch/x86/kernel/smp_64-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
8385 | @@ -33,7 +33,7 @@ | |
8386 | ||
8387 | #ifndef CONFIG_XEN | |
8388 | /* | |
8389 | - * Smarter SMP flushing macros. | |
8390 | + * Smarter SMP flushing macros. | |
8391 | * c/o Linus Torvalds. | |
8392 | * | |
8393 | * These mean you can really definitely utterly forget about | |
8394 | @@ -41,15 +41,15 @@ | |
8395 | * | |
8396 | * Optimizations Manfred Spraul <manfred@colorfullife.com> | |
8397 | * | |
8398 | - * More scalable flush, from Andi Kleen | |
8399 | + * More scalable flush, from Andi Kleen | |
8400 | * | |
8401 | - * To avoid global state use 8 different call vectors. | |
8402 | - * Each CPU uses a specific vector to trigger flushes on other | |
8403 | - * CPUs. Depending on the received vector the target CPUs look into | |
8404 | + * To avoid global state use 8 different call vectors. | |
8405 | + * Each CPU uses a specific vector to trigger flushes on other | |
8406 | + * CPUs. Depending on the received vector the target CPUs look into | |
8407 | * the right per cpu variable for the flush data. | |
8408 | * | |
8409 | - * With more than 8 CPUs they are hashed to the 8 available | |
8410 | - * vectors. The limited global vector space forces us to this right now. | |
8411 | + * With more than 8 CPUs they are hashed to the 8 available | |
8412 | + * vectors. The limited global vector space forces us to this right now. | |
8413 | * In future when interrupts are split into per CPU domains this could be | |
8414 | * fixed, at the cost of triggering multiple IPIs in some cases. | |
8415 | */ | |
8416 | @@ -59,7 +59,6 @@ union smp_flush_state { | |
8417 | cpumask_t flush_cpumask; | |
8418 | struct mm_struct *flush_mm; | |
8419 | unsigned long flush_va; | |
8420 | -#define FLUSH_ALL -1ULL | |
8421 | spinlock_t tlbstate_lock; | |
8422 | }; | |
8423 | char pad[SMP_CACHE_BYTES]; | |
8424 | @@ -71,16 +70,17 @@ union smp_flush_state { | |
8425 | static DEFINE_PER_CPU(union smp_flush_state, flush_state); | |
8426 | ||
8427 | /* | |
8428 | - * We cannot call mmdrop() because we are in interrupt context, | |
8429 | + * We cannot call mmdrop() because we are in interrupt context, | |
8430 | * instead update mm->cpu_vm_mask. | |
8431 | */ | |
8432 | -static inline void leave_mm(unsigned long cpu) | |
8433 | +void leave_mm(int cpu) | |
8434 | { | |
8435 | if (read_pda(mmu_state) == TLBSTATE_OK) | |
8436 | BUG(); | |
8437 | cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); | |
8438 | load_cr3(swapper_pg_dir); | |
8439 | } | |
8440 | +EXPORT_SYMBOL_GPL(leave_mm); | |
8441 | ||
8442 | /* | |
8443 | * | |
8444 | @@ -89,25 +89,25 @@ static inline void leave_mm(unsigned lon | |
8445 | * 1) switch_mm() either 1a) or 1b) | |
8446 | * 1a) thread switch to a different mm | |
8447 | * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); | |
8448 | - * Stop ipi delivery for the old mm. This is not synchronized with | |
8449 | - * the other cpus, but smp_invalidate_interrupt ignore flush ipis | |
8450 | - * for the wrong mm, and in the worst case we perform a superfluous | |
8451 | - * tlb flush. | |
8452 | + * Stop ipi delivery for the old mm. This is not synchronized with | |
8453 | + * the other cpus, but smp_invalidate_interrupt ignore flush ipis | |
8454 | + * for the wrong mm, and in the worst case we perform a superfluous | |
8455 | + * tlb flush. | |
8456 | * 1a2) set cpu mmu_state to TLBSTATE_OK | |
8457 | - * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 | |
8458 | + * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 | |
8459 | * was in lazy tlb mode. | |
8460 | * 1a3) update cpu active_mm | |
8461 | - * Now cpu0 accepts tlb flushes for the new mm. | |
8462 | + * Now cpu0 accepts tlb flushes for the new mm. | |
8463 | * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); | |
8464 | - * Now the other cpus will send tlb flush ipis. | |
8465 | + * Now the other cpus will send tlb flush ipis. | |
8466 | * 1a4) change cr3. | |
8467 | * 1b) thread switch without mm change | |
8468 | * cpu active_mm is correct, cpu0 already handles | |
8469 | * flush ipis. | |
8470 | * 1b1) set cpu mmu_state to TLBSTATE_OK | |
8471 | * 1b2) test_and_set the cpu bit in cpu_vm_mask. | |
8472 | - * Atomically set the bit [other cpus will start sending flush ipis], | |
8473 | - * and test the bit. | |
8474 | + * Atomically set the bit [other cpus will start sending flush ipis], | |
8475 | + * and test the bit. | |
8476 | * 1b3) if the bit was 0: leave_mm was called, flush the tlb. | |
8477 | * 2) switch %%esp, ie current | |
8478 | * | |
8479 | @@ -141,12 +141,12 @@ asmlinkage void smp_invalidate_interrupt | |
8480 | * orig_rax contains the negated interrupt vector. | |
8481 | * Use that to determine where the sender put the data. | |
8482 | */ | |
8483 | - sender = ~regs->orig_rax - INVALIDATE_TLB_VECTOR_START; | |
8484 | + sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; | |
8485 | f = &per_cpu(flush_state, sender); | |
8486 | ||
8487 | if (!cpu_isset(cpu, f->flush_cpumask)) | |
8488 | goto out; | |
8489 | - /* | |
8490 | + /* | |
8491 | * This was a BUG() but until someone can quote me the | |
8492 | * line from the intel manual that guarantees an IPI to | |
8493 | * multiple CPUs is retried _only_ on the erroring CPUs | |
8494 | @@ -154,10 +154,10 @@ asmlinkage void smp_invalidate_interrupt | |
8495 | * | |
8496 | * BUG(); | |
8497 | */ | |
8498 | - | |
cc90b958 | 8499 | + |
00e5a55c BS |
8500 | if (f->flush_mm == read_pda(active_mm)) { |
8501 | if (read_pda(mmu_state) == TLBSTATE_OK) { | |
8502 | - if (f->flush_va == FLUSH_ALL) | |
8503 | + if (f->flush_va == TLB_FLUSH_ALL) | |
8504 | local_flush_tlb(); | |
8505 | else | |
8506 | __flush_tlb_one(f->flush_va); | |
8507 | @@ -170,19 +170,22 @@ out: | |
8508 | add_pda(irq_tlb_count, 1); | |
8509 | } | |
8510 | ||
8511 | -static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | |
8512 | - unsigned long va) | |
8513 | +void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, | |
8514 | + unsigned long va) | |
8515 | { | |
8516 | int sender; | |
8517 | union smp_flush_state *f; | |
8518 | + cpumask_t cpumask = *cpumaskp; | |
8519 | ||
8520 | /* Caller has disabled preemption */ | |
8521 | sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; | |
8522 | f = &per_cpu(flush_state, sender); | |
8523 | ||
8524 | - /* Could avoid this lock when | |
8525 | - num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is | |
8526 | - probably not worth checking this for a cache-hot lock. */ | |
cc90b958 | 8527 | + /* |
00e5a55c BS |
8528 | + * Could avoid this lock when |
8529 | + * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is | |
8530 | + * probably not worth checking this for a cache-hot lock. | |
cc90b958 | 8531 | + */ |
00e5a55c BS |
8532 | spin_lock(&f->tlbstate_lock); |
8533 | ||
8534 | f->flush_mm = mm; | |
8535 | @@ -206,14 +209,14 @@ static void flush_tlb_others(cpumask_t c | |
8536 | int __cpuinit init_smp_flush(void) | |
8537 | { | |
8538 | int i; | |
cc90b958 | 8539 | + |
00e5a55c BS |
8540 | for_each_cpu_mask(i, cpu_possible_map) { |
8541 | spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); | |
8542 | } | |
8543 | return 0; | |
8544 | } | |
8545 | - | |
8546 | core_initcall(init_smp_flush); | |
8547 | - | |
cc90b958 | 8548 | + |
00e5a55c BS |
8549 | void flush_tlb_current_task(void) |
8550 | { | |
8551 | struct mm_struct *mm = current->mm; | |
8552 | @@ -225,10 +228,9 @@ void flush_tlb_current_task(void) | |
8553 | ||
8554 | local_flush_tlb(); | |
8555 | if (!cpus_empty(cpu_mask)) | |
8556 | - flush_tlb_others(cpu_mask, mm, FLUSH_ALL); | |
8557 | + flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); | |
8558 | preempt_enable(); | |
8559 | } | |
8560 | -EXPORT_SYMBOL(flush_tlb_current_task); | |
8561 | ||
8562 | void flush_tlb_mm (struct mm_struct * mm) | |
8563 | { | |
8564 | @@ -245,11 +247,10 @@ void flush_tlb_mm (struct mm_struct * mm | |
8565 | leave_mm(smp_processor_id()); | |
8566 | } | |
8567 | if (!cpus_empty(cpu_mask)) | |
8568 | - flush_tlb_others(cpu_mask, mm, FLUSH_ALL); | |
8569 | + flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); | |
8570 | ||
8571 | preempt_enable(); | |
8572 | } | |
8573 | -EXPORT_SYMBOL(flush_tlb_mm); | |
8574 | ||
8575 | void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) | |
8576 | { | |
8577 | @@ -263,8 +264,8 @@ void flush_tlb_page(struct vm_area_struc | |
8578 | if (current->active_mm == mm) { | |
8579 | if(current->mm) | |
8580 | __flush_tlb_one(va); | |
8581 | - else | |
8582 | - leave_mm(smp_processor_id()); | |
8583 | + else | |
8584 | + leave_mm(smp_processor_id()); | |
8585 | } | |
8586 | ||
8587 | if (!cpus_empty(cpu_mask)) | |
8588 | @@ -272,7 +273,6 @@ void flush_tlb_page(struct vm_area_struc | |
8589 | ||
8590 | preempt_enable(); | |
8591 | } | |
8592 | -EXPORT_SYMBOL(flush_tlb_page); | |
8593 | ||
8594 | static void do_flush_tlb_all(void* info) | |
8595 | { | |
8596 | @@ -330,11 +330,9 @@ void unlock_ipi_call_lock(void) | |
8597 | * this function sends a 'generic call function' IPI to all other CPU | |
8598 | * of the system defined in the mask. | |
8599 | */ | |
8600 | - | |
8601 | -static int | |
8602 | -__smp_call_function_mask(cpumask_t mask, | |
8603 | - void (*func)(void *), void *info, | |
8604 | - int wait) | |
8605 | +static int __smp_call_function_mask(cpumask_t mask, | |
8606 | + void (*func)(void *), void *info, | |
8607 | + int wait) | |
8608 | { | |
8609 | struct call_data_struct data; | |
8610 | cpumask_t allbutself; | |
8611 | @@ -422,11 +420,10 @@ EXPORT_SYMBOL(smp_call_function_mask); | |
8612 | */ | |
8613 | ||
8614 | int smp_call_function_single (int cpu, void (*func) (void *info), void *info, | |
8615 | - int nonatomic, int wait) | |
8616 | + int nonatomic, int wait) | |
8617 | { | |
8618 | /* prevent preemption and reschedule on another processor */ | |
8619 | - int ret; | |
8620 | - int me = get_cpu(); | |
8621 | + int ret, me = get_cpu(); | |
8622 | ||
8623 | /* Can deadlock when called with interrupts disabled */ | |
8624 | WARN_ON(irqs_disabled()); | |
8625 | @@ -476,9 +473,9 @@ static void stop_this_cpu(void *dummy) | |
8626 | */ | |
8627 | cpu_clear(smp_processor_id(), cpu_online_map); | |
8628 | disable_all_local_evtchn(); | |
8629 | - for (;;) | |
8630 | + for (;;) | |
8631 | halt(); | |
8632 | -} | |
cc90b958 | 8633 | +} |
00e5a55c BS |
8634 | |
8635 | void smp_send_stop(void) | |
8636 | { | |
8637 | --- sle11-2009-05-14.orig/arch/x86/kernel/time_32-xen.c 2009-03-24 10:12:35.000000000 +0100 | |
8638 | +++ sle11-2009-05-14/arch/x86/kernel/time_32-xen.c 2009-03-24 10:12:48.000000000 +0100 | |
8639 | @@ -28,21 +28,9 @@ | |
8640 | * serialize accesses to xtime/lost_ticks). | |
8641 | */ | |
8642 | ||
8643 | -#include <linux/errno.h> | |
8644 | -#include <linux/sched.h> | |
8645 | -#include <linux/kernel.h> | |
8646 | -#include <linux/param.h> | |
8647 | -#include <linux/string.h> | |
8648 | -#include <linux/mm.h> | |
8649 | +#include <linux/init.h> | |
8650 | #include <linux/interrupt.h> | |
8651 | #include <linux/time.h> | |
8652 | -#include <linux/delay.h> | |
8653 | -#include <linux/init.h> | |
8654 | -#include <linux/smp.h> | |
8655 | -#include <linux/module.h> | |
8656 | -#include <linux/sysdev.h> | |
8657 | -#include <linux/bcd.h> | |
8658 | -#include <linux/efi.h> | |
8659 | #include <linux/mca.h> | |
8660 | #include <linux/sysctl.h> | |
8661 | #include <linux/percpu.h> | |
8662 | @@ -50,26 +38,10 @@ | |
8663 | #include <linux/posix-timers.h> | |
8664 | #include <linux/cpufreq.h> | |
8665 | #include <linux/clocksource.h> | |
8666 | +#include <linux/sysdev.h> | |
8667 | ||
8668 | -#include <asm/io.h> | |
8669 | -#include <asm/smp.h> | |
8670 | -#include <asm/irq.h> | |
8671 | -#include <asm/msr.h> | |
8672 | #include <asm/delay.h> | |
8673 | -#include <asm/mpspec.h> | |
8674 | -#include <asm/uaccess.h> | |
8675 | -#include <asm/processor.h> | |
8676 | -#include <asm/timer.h> | |
8677 | #include <asm/time.h> | |
8678 | -#include <asm/sections.h> | |
8679 | - | |
8680 | -#include "mach_time.h" | |
8681 | - | |
8682 | -#include <linux/timex.h> | |
8683 | - | |
8684 | -#include <asm/hpet.h> | |
8685 | - | |
8686 | -#include <asm/arch_hooks.h> | |
8687 | ||
8688 | #include <xen/evtchn.h> | |
8689 | #include <xen/sysctl.h> | |
8690 | @@ -89,9 +61,6 @@ volatile unsigned long __jiffies __secti | |
8691 | unsigned int cpu_khz; /* Detected as we calibrate the TSC */ | |
8692 | EXPORT_SYMBOL(cpu_khz); | |
8693 | ||
8694 | -DEFINE_SPINLOCK(rtc_lock); | |
8695 | -EXPORT_SYMBOL(rtc_lock); | |
8696 | - | |
8697 | /* These are peridically updated in shared_info, and then copied here. */ | |
8698 | struct shadow_time_info { | |
8699 | u64 tsc_timestamp; /* TSC at last update of time vals. */ | |
8700 | @@ -154,6 +123,11 @@ static int __init __independent_wallcloc | |
8701 | } | |
8702 | __setup("independent_wallclock", __independent_wallclock); | |
8703 | ||
8704 | +int xen_independent_wallclock(void) | |
cc90b958 | 8705 | +{ |
00e5a55c | 8706 | + return independent_wallclock; |
cc90b958 BS |
8707 | +} |
8708 | + | |
00e5a55c BS |
8709 | /* Permitted clock jitter, in nsecs, beyond which a warning will be printed. */ |
8710 | static unsigned long permitted_clock_jitter = 10000000UL; /* 10ms */ | |
8711 | static int __init __permitted_clock_jitter(char *str) | |
8712 | @@ -223,7 +197,6 @@ static inline u64 get64(volatile u64 *pt | |
8713 | return cmpxchg64(ptr, 0, 0); | |
8714 | #else | |
8715 | return *ptr; | |
8716 | -#define cmpxchg64 cmpxchg | |
8717 | #endif | |
8718 | } | |
8719 | ||
8720 | @@ -233,7 +206,6 @@ static inline u64 get64_local(volatile u | |
8721 | return cmpxchg64_local(ptr, 0, 0); | |
8722 | #else | |
8723 | return *ptr; | |
8724 | -#define cmpxchg64_local cmpxchg_local | |
8725 | #endif | |
8726 | } | |
8727 | ||
8728 | @@ -339,35 +311,6 @@ static inline int time_values_up_to_date | |
8729 | return (dst->version == src->version); | |
8730 | } | |
8731 | ||
8732 | -/* | |
8733 | - * This is a special lock that is owned by the CPU and holds the index | |
8734 | - * register we are working with. It is required for NMI access to the | |
8735 | - * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details. | |
8736 | - */ | |
8737 | -volatile unsigned long cmos_lock = 0; | |
8738 | -EXPORT_SYMBOL(cmos_lock); | |
8739 | - | |
8740 | -/* Routines for accessing the CMOS RAM/RTC. */ | |
8741 | -unsigned char rtc_cmos_read(unsigned char addr) | |
8742 | -{ | |
8743 | - unsigned char val; | |
8744 | - lock_cmos_prefix(addr); | |
8745 | - outb_p(addr, RTC_PORT(0)); | |
8746 | - val = inb_p(RTC_PORT(1)); | |
8747 | - lock_cmos_suffix(addr); | |
8748 | - return val; | |
8749 | -} | |
8750 | -EXPORT_SYMBOL(rtc_cmos_read); | |
8751 | - | |
8752 | -void rtc_cmos_write(unsigned char val, unsigned char addr) | |
8753 | -{ | |
8754 | - lock_cmos_prefix(addr); | |
8755 | - outb_p(addr, RTC_PORT(0)); | |
8756 | - outb_p(val, RTC_PORT(1)); | |
8757 | - lock_cmos_suffix(addr); | |
8758 | -} | |
8759 | -EXPORT_SYMBOL(rtc_cmos_write); | |
8760 | - | |
8761 | static void sync_xen_wallclock(unsigned long dummy); | |
8762 | static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0); | |
8763 | static void sync_xen_wallclock(unsigned long dummy) | |
8764 | @@ -376,7 +319,8 @@ static void sync_xen_wallclock(unsigned | |
8765 | s64 nsec; | |
8766 | struct xen_platform_op op; | |
8767 | ||
8768 | - if (!ntp_synced() || independent_wallclock || !is_initial_xendomain()) | |
8769 | + BUG_ON(!is_initial_xendomain()); | |
8770 | + if (!ntp_synced() || independent_wallclock) | |
8771 | return; | |
8772 | ||
8773 | write_seqlock_irq(&xtime_lock); | |
8774 | @@ -399,23 +343,6 @@ static void sync_xen_wallclock(unsigned | |
8775 | mod_timer(&sync_xen_wallclock_timer, jiffies + 60*HZ); | |
8776 | } | |
8777 | ||
8778 | -static int set_rtc_mmss(unsigned long nowtime) | |
8779 | -{ | |
8780 | - int retval; | |
8781 | - unsigned long flags; | |
8782 | - | |
8783 | - if (independent_wallclock || !is_initial_xendomain()) | |
8784 | - return 0; | |
8785 | - | |
8786 | - /* gets recalled with irq locally disabled */ | |
8787 | - /* XXX - does irqsave resolve this? -johnstul */ | |
8788 | - spin_lock_irqsave(&rtc_lock, flags); | |
8789 | - retval = set_wallclock(nowtime); | |
8790 | - spin_unlock_irqrestore(&rtc_lock, flags); | |
8791 | - | |
8792 | - return retval; | |
8793 | -} | |
8794 | - | |
8795 | static unsigned long long local_clock(void) | |
8796 | { | |
8797 | unsigned int cpu = get_cpu(); | |
8798 | @@ -498,28 +425,24 @@ unsigned long profile_pc(struct pt_regs | |
8799 | ||
8800 | #if defined(CONFIG_SMP) || defined(__x86_64__) | |
8801 | # ifdef __i386__ | |
8802 | - if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->xcs) | |
8803 | + if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->cs) | |
8804 | # else | |
8805 | if (!user_mode(regs) | |
8806 | # endif | |
8807 | && in_lock_functions(pc)) { | |
8808 | # ifdef CONFIG_FRAME_POINTER | |
8809 | -# ifdef __i386__ | |
8810 | - return ((unsigned long *)regs->ebp)[1]; | |
8811 | -# else | |
8812 | - return ((unsigned long *)regs->rbp)[1]; | |
8813 | -# endif | |
8814 | + return ((unsigned long *)regs->bp)[1]; | |
8815 | # else | |
8816 | # ifdef __i386__ | |
8817 | - unsigned long *sp = (unsigned long *)®s->esp; | |
8818 | + unsigned long *sp = (unsigned long *)®s->sp; | |
8819 | # else | |
8820 | - unsigned long *sp = (unsigned long *)regs->rsp; | |
8821 | + unsigned long *sp = (unsigned long *)regs->sp; | |
8822 | # endif | |
8823 | ||
8824 | /* Return address is either directly at stack pointer | |
8825 | - or above a saved eflags. Eflags has bits 22-31 zero, | |
8826 | + or above a saved flags. Eflags has bits 22-31 zero, | |
8827 | kernel addresses don't. */ | |
8828 | - if (sp[0] >> 22) | |
8829 | + if (sp[0] >> 22) | |
8830 | return sp[0]; | |
8831 | if (sp[1] >> 22) | |
8832 | return sp[1]; | |
8833 | @@ -748,25 +671,32 @@ static void init_missing_ticks_accountin | |
8834 | runstate->time[RUNSTATE_offline]; | |
8835 | } | |
8836 | ||
8837 | -/* not static: needed by APM */ | |
8838 | -unsigned long read_persistent_clock(void) | |
8839 | +unsigned long xen_read_persistent_clock(void) | |
8840 | { | |
8841 | - unsigned long retval; | |
8842 | - unsigned long flags; | |
8843 | - | |
8844 | - spin_lock_irqsave(&rtc_lock, flags); | |
8845 | + const shared_info_t *s = HYPERVISOR_shared_info; | |
8846 | + u32 version, sec, nsec; | |
8847 | + u64 delta; | |
8848 | ||
8849 | - retval = get_wallclock(); | |
8850 | + do { | |
8851 | + version = s->wc_version; | |
8852 | + rmb(); | |
8853 | + sec = s->wc_sec; | |
8854 | + nsec = s->wc_nsec; | |
8855 | + rmb(); | |
8856 | + } while ((s->wc_version & 1) | (version ^ s->wc_version)); | |
8857 | ||
8858 | - spin_unlock_irqrestore(&rtc_lock, flags); | |
8859 | + delta = local_clock() + (u64)sec * NSEC_PER_SEC + nsec; | |
8860 | + do_div(delta, NSEC_PER_SEC); | |
8861 | ||
8862 | - return retval; | |
8863 | + return delta; | |
8864 | } | |
8865 | ||
8866 | -int update_persistent_clock(struct timespec now) | |
8867 | +int xen_update_persistent_clock(void) | |
8868 | { | |
8869 | + if (!is_initial_xendomain()) | |
8870 | + return -1; | |
8871 | mod_timer(&sync_xen_wallclock_timer, jiffies + 1); | |
8872 | - return set_rtc_mmss(now.tv_sec); | |
8873 | + return 0; | |
8874 | } | |
8875 | ||
8876 | extern void (*late_time_init)(void); | |
8877 | --- sle11-2009-05-14.orig/arch/x86/kernel/traps_32-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
8878 | +++ sle11-2009-05-14/arch/x86/kernel/traps_32-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
8879 | @@ -79,7 +79,8 @@ char ignore_fpu_irq = 0; | |
8880 | * F0 0F bug workaround.. We have a special link segment | |
8881 | * for this. | |
8882 | */ | |
8883 | -struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, }; | |
8884 | +gate_desc idt_table[256] | |
8885 | + __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; | |
8886 | #endif | |
8887 | ||
8888 | asmlinkage void divide_error(void); | |
8889 | @@ -109,6 +110,34 @@ asmlinkage void machine_check(void); | |
8890 | int kstack_depth_to_print = 24; | |
8891 | static unsigned int code_bytes = 64; | |
8892 | ||
8893 | +void printk_address(unsigned long address, int reliable) | |
cc90b958 | 8894 | +{ |
00e5a55c BS |
8895 | +#ifdef CONFIG_KALLSYMS |
8896 | + unsigned long offset = 0, symsize; | |
8897 | + const char *symname; | |
8898 | + char *modname; | |
8899 | + char *delim = ":"; | |
8900 | + char namebuf[128]; | |
8901 | + char reliab[4] = ""; | |
cc90b958 | 8902 | + |
00e5a55c BS |
8903 | + symname = kallsyms_lookup(address, &symsize, &offset, |
8904 | + &modname, namebuf); | |
8905 | + if (!symname) { | |
8906 | + printk(" [<%08lx>]\n", address); | |
8907 | + return; | |
8908 | + } | |
8909 | + if (!reliable) | |
8910 | + strcpy(reliab, "? "); | |
cc90b958 | 8911 | + |
00e5a55c BS |
8912 | + if (!modname) |
8913 | + modname = delim = ""; | |
8914 | + printk(" [<%08lx>] %s%s%s%s%s+0x%lx/0x%lx\n", | |
8915 | + address, reliab, delim, modname, delim, symname, offset, symsize); | |
8916 | +#else | |
8917 | + printk(" [<%08lx>]\n", address); | |
cc90b958 | 8918 | +#endif |
cc90b958 | 8919 | +} |
cc90b958 | 8920 | + |
00e5a55c BS |
8921 | static inline int valid_stack_ptr(struct thread_info *tinfo, void *p, unsigned size) |
8922 | { | |
8923 | return p > (void *)tinfo && | |
8924 | @@ -122,48 +151,35 @@ struct stack_frame { | |
8925 | }; | |
8926 | ||
8927 | static inline unsigned long print_context_stack(struct thread_info *tinfo, | |
8928 | - unsigned long *stack, unsigned long ebp, | |
8929 | + unsigned long *stack, unsigned long bp, | |
8930 | const struct stacktrace_ops *ops, void *data) | |
8931 | { | |
8932 | -#ifdef CONFIG_FRAME_POINTER | |
8933 | - struct stack_frame *frame = (struct stack_frame *)ebp; | |
8934 | - while (valid_stack_ptr(tinfo, frame, sizeof(*frame))) { | |
8935 | - struct stack_frame *next; | |
8936 | - unsigned long addr; | |
8937 | + struct stack_frame *frame = (struct stack_frame *)bp; | |
8938 | ||
8939 | - addr = frame->return_address; | |
8940 | - ops->address(data, addr); | |
8941 | - /* | |
8942 | - * break out of recursive entries (such as | |
8943 | - * end_of_stack_stop_unwind_function). Also, | |
8944 | - * we can never allow a frame pointer to | |
8945 | - * move downwards! | |
8946 | - */ | |
8947 | - next = frame->next_frame; | |
8948 | - if (next <= frame) | |
8949 | - break; | |
8950 | - frame = next; | |
8951 | - } | |
8952 | -#else | |
8953 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack))) { | |
8954 | unsigned long addr; | |
8955 | ||
8956 | - addr = *stack++; | |
8957 | - if (__kernel_text_address(addr)) | |
8958 | - ops->address(data, addr); | |
8959 | + addr = *stack; | |
8960 | + if (__kernel_text_address(addr)) { | |
8961 | + if ((unsigned long) stack == bp + 4) { | |
8962 | + ops->address(data, addr, 1); | |
8963 | + frame = frame->next_frame; | |
8964 | + bp = (unsigned long) frame; | |
8965 | + } else { | |
8966 | + ops->address(data, addr, bp == 0); | |
8967 | + } | |
8968 | + } | |
8969 | + stack++; | |
8970 | } | |
8971 | -#endif | |
8972 | - return ebp; | |
8973 | + return bp; | |
8974 | } | |
8975 | ||
8976 | #define MSG(msg) ops->warning(data, msg) | |
8977 | ||
8978 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | |
8979 | - unsigned long *stack, | |
8980 | + unsigned long *stack, unsigned long bp, | |
8981 | const struct stacktrace_ops *ops, void *data) | |
8982 | { | |
8983 | - unsigned long ebp = 0; | |
8984 | - | |
8985 | if (!task) | |
8986 | task = current; | |
8987 | ||
8988 | @@ -171,17 +187,17 @@ void dump_trace(struct task_struct *task | |
8989 | unsigned long dummy; | |
8990 | stack = &dummy; | |
8991 | if (task != current) | |
8992 | - stack = (unsigned long *)task->thread.esp; | |
8993 | + stack = (unsigned long *)task->thread.sp; | |
8994 | } | |
8995 | ||
8996 | #ifdef CONFIG_FRAME_POINTER | |
8997 | - if (!ebp) { | |
8998 | + if (!bp) { | |
8999 | if (task == current) { | |
9000 | - /* Grab ebp right from our regs */ | |
9001 | - asm ("movl %%ebp, %0" : "=r" (ebp) : ); | |
9002 | + /* Grab bp right from our regs */ | |
9003 | + asm ("movl %%ebp, %0" : "=r" (bp) : ); | |
9004 | } else { | |
9005 | - /* ebp is the last reg pushed by switch_to */ | |
9006 | - ebp = *(unsigned long *) task->thread.esp; | |
9007 | + /* bp is the last reg pushed by switch_to */ | |
9008 | + bp = *(unsigned long *) task->thread.sp; | |
9009 | } | |
9010 | } | |
9011 | #endif | |
9012 | @@ -190,7 +206,7 @@ void dump_trace(struct task_struct *task | |
9013 | struct thread_info *context; | |
9014 | context = (struct thread_info *) | |
9015 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); | |
9016 | - ebp = print_context_stack(context, stack, ebp, ops, data); | |
9017 | + bp = print_context_stack(context, stack, bp, ops, data); | |
9018 | /* Should be after the line below, but somewhere | |
9019 | in early boot context comes out corrupted and we | |
9020 | can't reference it -AK */ | |
9021 | @@ -225,9 +241,11 @@ static int print_trace_stack(void *data, | |
9022 | /* | |
9023 | * Print one address/symbol entries per line. | |
9024 | */ | |
9025 | -static void print_trace_address(void *data, unsigned long addr) | |
9026 | +static void print_trace_address(void *data, unsigned long addr, int reliable) | |
9027 | { | |
9028 | printk("%s [<%08lx>] ", (char *)data, addr); | |
9029 | + if (!reliable) | |
9030 | + printk("? "); | |
9031 | print_symbol("%s\n", addr); | |
9032 | touch_nmi_watchdog(); | |
9033 | } | |
9034 | @@ -241,32 +259,32 @@ static const struct stacktrace_ops print | |
9035 | ||
9036 | static void | |
9037 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | |
9038 | - unsigned long * stack, char *log_lvl) | |
9039 | + unsigned long *stack, unsigned long bp, char *log_lvl) | |
9040 | { | |
9041 | - dump_trace(task, regs, stack, &print_trace_ops, log_lvl); | |
9042 | + dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | |
9043 | printk("%s =======================\n", log_lvl); | |
9044 | } | |
9045 | ||
9046 | void show_trace(struct task_struct *task, struct pt_regs *regs, | |
9047 | - unsigned long * stack) | |
9048 | + unsigned long *stack, unsigned long bp) | |
9049 | { | |
9050 | - show_trace_log_lvl(task, regs, stack, ""); | |
9051 | + show_trace_log_lvl(task, regs, stack, bp, ""); | |
9052 | } | |
9053 | ||
9054 | static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |
9055 | - unsigned long *esp, char *log_lvl) | |
9056 | + unsigned long *sp, unsigned long bp, char *log_lvl) | |
9057 | { | |
9058 | unsigned long *stack; | |
9059 | int i; | |
9060 | ||
9061 | - if (esp == NULL) { | |
9062 | + if (sp == NULL) { | |
9063 | if (task) | |
9064 | - esp = (unsigned long*)task->thread.esp; | |
9065 | + sp = (unsigned long*)task->thread.sp; | |
9066 | else | |
9067 | - esp = (unsigned long *)&esp; | |
9068 | + sp = (unsigned long *)&sp; | |
9069 | } | |
9070 | ||
9071 | - stack = esp; | |
9072 | + stack = sp; | |
9073 | for(i = 0; i < kstack_depth_to_print; i++) { | |
9074 | if (kstack_end(stack)) | |
9075 | break; | |
9076 | @@ -275,13 +293,13 @@ static void show_stack_log_lvl(struct ta | |
9077 | printk("%08lx ", *stack++); | |
9078 | } | |
9079 | printk("\n%sCall Trace:\n", log_lvl); | |
9080 | - show_trace_log_lvl(task, regs, esp, log_lvl); | |
9081 | + show_trace_log_lvl(task, regs, sp, bp, log_lvl); | |
9082 | } | |
9083 | ||
9084 | -void show_stack(struct task_struct *task, unsigned long *esp) | |
9085 | +void show_stack(struct task_struct *task, unsigned long *sp) | |
9086 | { | |
9087 | printk(" "); | |
9088 | - show_stack_log_lvl(task, NULL, esp, ""); | |
9089 | + show_stack_log_lvl(task, NULL, sp, 0, ""); | |
9090 | } | |
9091 | ||
9092 | /* | |
9093 | @@ -290,13 +308,19 @@ void show_stack(struct task_struct *task | |
9094 | void dump_stack(void) | |
9095 | { | |
9096 | unsigned long stack; | |
9097 | + unsigned long bp = 0; | |
cc90b958 | 9098 | + |
00e5a55c BS |
9099 | +#ifdef CONFIG_FRAME_POINTER |
9100 | + if (!bp) | |
9101 | + asm("movl %%ebp, %0" : "=r" (bp):); | |
9102 | +#endif | |
9103 | ||
9104 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | |
9105 | current->pid, current->comm, print_tainted(), | |
9106 | init_utsname()->release, | |
9107 | (int)strcspn(init_utsname()->version, " "), | |
9108 | init_utsname()->version); | |
9109 | - show_trace(current, NULL, &stack); | |
9110 | + show_trace(current, NULL, &stack, bp); | |
9111 | } | |
9112 | ||
9113 | EXPORT_SYMBOL(dump_stack); | |
9114 | @@ -315,30 +339,30 @@ void show_registers(struct pt_regs *regs | |
9115 | * time of the fault.. | |
9116 | */ | |
9117 | if (!user_mode_vm(regs)) { | |
9118 | - u8 *eip; | |
9119 | + u8 *ip; | |
9120 | unsigned int code_prologue = code_bytes * 43 / 64; | |
9121 | unsigned int code_len = code_bytes; | |
9122 | unsigned char c; | |
9123 | ||
9124 | printk("\n" KERN_EMERG "Stack: "); | |
9125 | - show_stack_log_lvl(NULL, regs, ®s->esp, KERN_EMERG); | |
9126 | + show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); | |
9127 | ||
9128 | printk(KERN_EMERG "Code: "); | |
9129 | ||
9130 | - eip = (u8 *)regs->eip - code_prologue; | |
9131 | - if (eip < (u8 *)PAGE_OFFSET || | |
9132 | - probe_kernel_address(eip, c)) { | |
9133 | + ip = (u8 *)regs->ip - code_prologue; | |
9134 | + if (ip < (u8 *)PAGE_OFFSET || | |
9135 | + probe_kernel_address(ip, c)) { | |
9136 | /* try starting at EIP */ | |
9137 | - eip = (u8 *)regs->eip; | |
9138 | + ip = (u8 *)regs->ip; | |
9139 | code_len = code_len - code_prologue + 1; | |
9140 | } | |
9141 | - for (i = 0; i < code_len; i++, eip++) { | |
9142 | - if (eip < (u8 *)PAGE_OFFSET || | |
9143 | - probe_kernel_address(eip, c)) { | |
9144 | + for (i = 0; i < code_len; i++, ip++) { | |
9145 | + if (ip < (u8 *)PAGE_OFFSET || | |
9146 | + probe_kernel_address(ip, c)) { | |
9147 | printk(" Bad EIP value."); | |
9148 | break; | |
9149 | } | |
9150 | - if (eip == (u8 *)regs->eip) | |
9151 | + if (ip == (u8 *)regs->ip) | |
9152 | printk("<%02x> ", c); | |
9153 | else | |
9154 | printk("%02x ", c); | |
9155 | @@ -347,18 +371,57 @@ void show_registers(struct pt_regs *regs | |
9156 | printk("\n"); | |
9157 | } | |
9158 | ||
9159 | -int is_valid_bugaddr(unsigned long eip) | |
9160 | +int is_valid_bugaddr(unsigned long ip) | |
9161 | { | |
9162 | unsigned short ud2; | |
9163 | ||
9164 | - if (eip < PAGE_OFFSET) | |
9165 | + if (ip < PAGE_OFFSET) | |
9166 | return 0; | |
9167 | - if (probe_kernel_address((unsigned short *)eip, ud2)) | |
9168 | + if (probe_kernel_address((unsigned short *)ip, ud2)) | |
9169 | return 0; | |
9170 | ||
9171 | return ud2 == 0x0b0f; | |
9172 | } | |
9173 | ||
9174 | +static int die_counter; | |
cc90b958 | 9175 | + |
00e5a55c | 9176 | +int __kprobes __die(const char * str, struct pt_regs * regs, long err) |
cc90b958 | 9177 | +{ |
00e5a55c BS |
9178 | + unsigned long sp; |
9179 | + unsigned short ss; | |
cc90b958 | 9180 | + |
00e5a55c BS |
9181 | + printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); |
9182 | +#ifdef CONFIG_PREEMPT | |
9183 | + printk("PREEMPT "); | |
9184 | +#endif | |
9185 | +#ifdef CONFIG_SMP | |
9186 | + printk("SMP "); | |
cc90b958 | 9187 | +#endif |
cc90b958 | 9188 | +#ifdef CONFIG_DEBUG_PAGEALLOC |
00e5a55c | 9189 | + printk("DEBUG_PAGEALLOC"); |
cc90b958 | 9190 | +#endif |
00e5a55c | 9191 | + printk("\n"); |
cc90b958 | 9192 | + |
00e5a55c BS |
9193 | + if (notify_die(DIE_OOPS, str, regs, err, |
9194 | + current->thread.trap_no, SIGSEGV) != | |
9195 | + NOTIFY_STOP) { | |
9196 | + show_registers(regs); | |
9197 | + /* Executive summary in case the oops scrolled away */ | |
9198 | + sp = (unsigned long) (®s->sp); | |
9199 | + savesegment(ss, ss); | |
9200 | + if (user_mode(regs)) { | |
9201 | + sp = regs->sp; | |
9202 | + ss = regs->ss & 0xffff; | |
9203 | + } | |
9204 | + printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | |
9205 | + print_symbol("%s", regs->ip); | |
9206 | + printk(" SS:ESP %04x:%08lx\n", ss, sp); | |
9207 | + return 0; | |
9208 | + } else { | |
9209 | + return 1; | |
9210 | + } | |
cc90b958 BS |
9211 | +} |
9212 | + | |
00e5a55c BS |
9213 | /* |
9214 | * This is gone through when something in the kernel has done something bad and | |
9215 | * is about to be terminated. | |
9216 | @@ -374,7 +437,6 @@ void die(const char * str, struct pt_reg | |
9217 | .lock_owner = -1, | |
9218 | .lock_owner_depth = 0 | |
9219 | }; | |
9220 | - static int die_counter; | |
9221 | unsigned long flags; | |
9222 | ||
9223 | oops_enter(); | |
9224 | @@ -390,43 +452,13 @@ void die(const char * str, struct pt_reg | |
9225 | raw_local_irq_save(flags); | |
9226 | ||
9227 | if (++die.lock_owner_depth < 3) { | |
9228 | - unsigned long esp; | |
9229 | - unsigned short ss; | |
9230 | - | |
9231 | - report_bug(regs->eip, regs); | |
9232 | - | |
9233 | - printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, | |
9234 | - ++die_counter); | |
9235 | -#ifdef CONFIG_PREEMPT | |
9236 | - printk("PREEMPT "); | |
9237 | -#endif | |
9238 | -#ifdef CONFIG_SMP | |
9239 | - printk("SMP "); | |
9240 | -#endif | |
9241 | -#ifdef CONFIG_DEBUG_PAGEALLOC | |
9242 | - printk("DEBUG_PAGEALLOC"); | |
9243 | -#endif | |
9244 | - printk("\n"); | |
9245 | + report_bug(regs->ip, regs); | |
9246 | ||
9247 | - if (notify_die(DIE_OOPS, str, regs, err, | |
9248 | - current->thread.trap_no, SIGSEGV) != | |
9249 | - NOTIFY_STOP) { | |
9250 | - show_registers(regs); | |
9251 | - /* Executive summary in case the oops scrolled away */ | |
9252 | - esp = (unsigned long) (®s->esp); | |
9253 | - savesegment(ss, ss); | |
9254 | - if (user_mode(regs)) { | |
9255 | - esp = regs->esp; | |
9256 | - ss = regs->xss & 0xffff; | |
9257 | - } | |
9258 | - printk(KERN_EMERG "EIP: [<%08lx>] ", regs->eip); | |
9259 | - print_symbol("%s", regs->eip); | |
9260 | - printk(" SS:ESP %04x:%08lx\n", ss, esp); | |
9261 | - } | |
9262 | - else | |
9263 | + if (__die(str, regs, err)) | |
9264 | regs = NULL; | |
9265 | - } else | |
9266 | + } else { | |
9267 | printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); | |
9268 | + } | |
9269 | ||
9270 | bust_spinlocks(0); | |
9271 | die.lock_owner = -1; | |
9272 | @@ -462,7 +494,7 @@ static void __kprobes do_trap(int trapnr | |
9273 | { | |
9274 | struct task_struct *tsk = current; | |
9275 | ||
9276 | - if (regs->eflags & VM_MASK) { | |
9277 | + if (regs->flags & VM_MASK) { | |
9278 | if (vm86) | |
9279 | goto vm86_trap; | |
9280 | goto trap_signal; | |
9281 | @@ -508,7 +540,7 @@ static void __kprobes do_trap(int trapnr | |
9282 | } | |
9283 | ||
9284 | #define DO_ERROR(trapnr, signr, str, name) \ | |
9285 | -fastcall void do_##name(struct pt_regs * regs, long error_code) \ | |
9286 | +void do_##name(struct pt_regs * regs, long error_code) \ | |
9287 | { \ | |
9288 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | |
9289 | == NOTIFY_STOP) \ | |
9290 | @@ -517,7 +549,7 @@ fastcall void do_##name(struct pt_regs * | |
9291 | } | |
9292 | ||
9293 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \ | |
9294 | -fastcall void do_##name(struct pt_regs * regs, long error_code) \ | |
9295 | +void do_##name(struct pt_regs * regs, long error_code) \ | |
9296 | { \ | |
9297 | siginfo_t info; \ | |
9298 | if (irq) \ | |
9299 | @@ -533,7 +565,7 @@ fastcall void do_##name(struct pt_regs * | |
9300 | } | |
9301 | ||
9302 | #define DO_VM86_ERROR(trapnr, signr, str, name) \ | |
9303 | -fastcall void do_##name(struct pt_regs * regs, long error_code) \ | |
9304 | +void do_##name(struct pt_regs * regs, long error_code) \ | |
9305 | { \ | |
9306 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | |
9307 | == NOTIFY_STOP) \ | |
9308 | @@ -542,7 +574,7 @@ fastcall void do_##name(struct pt_regs * | |
9309 | } | |
9310 | ||
9311 | #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | |
9312 | -fastcall void do_##name(struct pt_regs * regs, long error_code) \ | |
9313 | +void do_##name(struct pt_regs * regs, long error_code) \ | |
9314 | { \ | |
9315 | siginfo_t info; \ | |
9316 | info.si_signo = signr; \ | |
9317 | @@ -556,13 +588,13 @@ fastcall void do_##name(struct pt_regs * | |
9318 | do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ | |
9319 | } | |
9320 | ||
9321 | -DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip) | |
9322 | +DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) | |
9323 | #ifndef CONFIG_KPROBES | |
9324 | DO_VM86_ERROR( 3, SIGTRAP, "int3", int3) | |
9325 | #endif | |
9326 | DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow) | |
9327 | DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds) | |
9328 | -DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip, 0) | |
9329 | +DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) | |
9330 | DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | |
9331 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) | |
9332 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) | |
9333 | @@ -570,10 +602,10 @@ DO_ERROR(12, SIGBUS, "stack segment", s | |
9334 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) | |
9335 | DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1) | |
9336 | ||
9337 | -fastcall void __kprobes do_general_protection(struct pt_regs * regs, | |
9338 | +void __kprobes do_general_protection(struct pt_regs * regs, | |
9339 | long error_code) | |
9340 | { | |
9341 | - if (regs->eflags & VM_MASK) | |
9342 | + if (regs->flags & VM_MASK) | |
9343 | goto gp_in_vm86; | |
9344 | ||
9345 | if (!user_mode(regs)) | |
9346 | @@ -582,11 +614,14 @@ fastcall void __kprobes do_general_prote | |
9347 | current->thread.error_code = error_code; | |
9348 | current->thread.trap_no = 13; | |
9349 | if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) && | |
9350 | - printk_ratelimit()) | |
9351 | + printk_ratelimit()) { | |
9352 | printk(KERN_INFO | |
9353 | - "%s[%d] general protection eip:%lx esp:%lx error:%lx\n", | |
9354 | + "%s[%d] general protection ip:%lx sp:%lx error:%lx", | |
9355 | current->comm, task_pid_nr(current), | |
9356 | - regs->eip, regs->esp, error_code); | |
9357 | + regs->ip, regs->sp, error_code); | |
9358 | + print_vma_addr(" in ", regs->ip); | |
9359 | + printk("\n"); | |
cc90b958 | 9360 | + } |
00e5a55c BS |
9361 | |
9362 | force_sig(SIGSEGV, current); | |
9363 | return; | |
9364 | @@ -675,8 +710,8 @@ void __kprobes die_nmi(struct pt_regs *r | |
9365 | */ | |
9366 | bust_spinlocks(1); | |
9367 | printk(KERN_EMERG "%s", msg); | |
9368 | - printk(" on CPU%d, eip %08lx, registers:\n", | |
9369 | - smp_processor_id(), regs->eip); | |
9370 | + printk(" on CPU%d, ip %08lx, registers:\n", | |
9371 | + smp_processor_id(), regs->ip); | |
9372 | show_registers(regs); | |
9373 | console_silent(); | |
9374 | spin_unlock(&nmi_print_lock); | |
9375 | @@ -733,7 +768,7 @@ static __kprobes void default_do_nmi(str | |
9376 | ||
9377 | static int ignore_nmis; | |
9378 | ||
9379 | -fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code) | |
9380 | +__kprobes void do_nmi(struct pt_regs * regs, long error_code) | |
9381 | { | |
9382 | int cpu; | |
9383 | ||
9384 | @@ -762,7 +797,7 @@ void restart_nmi(void) | |
9385 | } | |
9386 | ||
9387 | #ifdef CONFIG_KPROBES | |
9388 | -fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) | |
9389 | +void __kprobes do_int3(struct pt_regs *regs, long error_code) | |
9390 | { | |
9391 | trace_hardirqs_fixup(); | |
9392 | ||
9393 | @@ -798,7 +833,7 @@ fastcall void __kprobes do_int3(struct p | |
9394 | * find every occurrence of the TF bit that could be saved away even | |
9395 | * by user code) | |
9396 | */ | |
9397 | -fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code) | |
9398 | +void __kprobes do_debug(struct pt_regs * regs, long error_code) | |
9399 | { | |
9400 | unsigned int condition; | |
9401 | struct task_struct *tsk = current; | |
9402 | @@ -807,24 +842,30 @@ fastcall void __kprobes do_debug(struct | |
9403 | ||
9404 | get_debugreg(condition, 6); | |
9405 | ||
cc90b958 | 9406 | + /* |
00e5a55c | 9407 | + * The processor cleared BTF, so don't mark that we need it set. |
cc90b958 | 9408 | + */ |
00e5a55c BS |
9409 | + clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); |
9410 | + tsk->thread.debugctlmsr = 0; | |
cc90b958 | 9411 | + |
00e5a55c BS |
9412 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, |
9413 | SIGTRAP) == NOTIFY_STOP) | |
9414 | return; | |
9415 | /* It's safe to allow irq's after DR6 has been saved */ | |
9416 | - if (regs->eflags & X86_EFLAGS_IF) | |
9417 | + if (regs->flags & X86_EFLAGS_IF) | |
9418 | local_irq_enable(); | |
9419 | ||
9420 | /* Mask out spurious debug traps due to lazy DR7 setting */ | |
9421 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { | |
9422 | - if (!tsk->thread.debugreg[7]) | |
9423 | + if (!tsk->thread.debugreg7) | |
9424 | goto clear_dr7; | |
9425 | } | |
9426 | ||
9427 | - if (regs->eflags & VM_MASK) | |
9428 | + if (regs->flags & VM_MASK) | |
9429 | goto debug_vm86; | |
9430 | ||
9431 | /* Save debug status register where ptrace can see it */ | |
9432 | - tsk->thread.debugreg[6] = condition; | |
9433 | + tsk->thread.debugreg6 = condition; | |
9434 | ||
9435 | /* | |
9436 | * Single-stepping through TF: make sure we ignore any events in | |
9437 | @@ -856,7 +897,7 @@ debug_vm86: | |
9438 | ||
9439 | clear_TF_reenable: | |
9440 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | |
9441 | - regs->eflags &= ~TF_MASK; | |
9442 | + regs->flags &= ~TF_MASK; | |
9443 | return; | |
9444 | } | |
9445 | ||
9446 | @@ -865,7 +906,7 @@ clear_TF_reenable: | |
9447 | * the correct behaviour even in the presence of the asynchronous | |
9448 | * IRQ13 behaviour | |
9449 | */ | |
9450 | -void math_error(void __user *eip) | |
9451 | +void math_error(void __user *ip) | |
9452 | { | |
9453 | struct task_struct * task; | |
9454 | siginfo_t info; | |
9455 | @@ -881,7 +922,7 @@ void math_error(void __user *eip) | |
9456 | info.si_signo = SIGFPE; | |
9457 | info.si_errno = 0; | |
9458 | info.si_code = __SI_FAULT; | |
9459 | - info.si_addr = eip; | |
9460 | + info.si_addr = ip; | |
9461 | /* | |
9462 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | |
9463 | * status. 0x3f is the exception bits in these regs, 0x200 is the | |
9464 | @@ -924,13 +965,13 @@ void math_error(void __user *eip) | |
9465 | force_sig_info(SIGFPE, &info, task); | |
9466 | } | |
9467 | ||
9468 | -fastcall void do_coprocessor_error(struct pt_regs * regs, long error_code) | |
9469 | +void do_coprocessor_error(struct pt_regs * regs, long error_code) | |
9470 | { | |
9471 | ignore_fpu_irq = 1; | |
9472 | - math_error((void __user *)regs->eip); | |
9473 | + math_error((void __user *)regs->ip); | |
9474 | } | |
9475 | ||
9476 | -static void simd_math_error(void __user *eip) | |
9477 | +static void simd_math_error(void __user *ip) | |
9478 | { | |
9479 | struct task_struct * task; | |
9480 | siginfo_t info; | |
9481 | @@ -946,7 +987,7 @@ static void simd_math_error(void __user | |
9482 | info.si_signo = SIGFPE; | |
9483 | info.si_errno = 0; | |
9484 | info.si_code = __SI_FAULT; | |
9485 | - info.si_addr = eip; | |
9486 | + info.si_addr = ip; | |
9487 | /* | |
9488 | * The SIMD FPU exceptions are handled a little differently, as there | |
9489 | * is only a single status/control register. Thus, to determine which | |
9490 | @@ -978,19 +1019,19 @@ static void simd_math_error(void __user | |
9491 | force_sig_info(SIGFPE, &info, task); | |
9492 | } | |
9493 | ||
9494 | -fastcall void do_simd_coprocessor_error(struct pt_regs * regs, | |
9495 | +void do_simd_coprocessor_error(struct pt_regs * regs, | |
9496 | long error_code) | |
9497 | { | |
9498 | if (cpu_has_xmm) { | |
9499 | /* Handle SIMD FPU exceptions on PIII+ processors. */ | |
9500 | ignore_fpu_irq = 1; | |
9501 | - simd_math_error((void __user *)regs->eip); | |
9502 | + simd_math_error((void __user *)regs->ip); | |
9503 | } else { | |
9504 | /* | |
9505 | * Handle strange cache flush from user space exception | |
9506 | * in all other cases. This is undocumented behaviour. | |
9507 | */ | |
9508 | - if (regs->eflags & VM_MASK) { | |
9509 | + if (regs->flags & VM_MASK) { | |
9510 | handle_vm86_fault((struct kernel_vm86_regs *)regs, | |
9511 | error_code); | |
9512 | return; | |
9513 | @@ -1003,7 +1044,7 @@ fastcall void do_simd_coprocessor_error( | |
9514 | } | |
9515 | ||
9516 | #ifndef CONFIG_XEN | |
9517 | -fastcall void do_spurious_interrupt_bug(struct pt_regs * regs, | |
9518 | +void do_spurious_interrupt_bug(struct pt_regs * regs, | |
9519 | long error_code) | |
9520 | { | |
9521 | #if 0 | |
9522 | @@ -1012,7 +1053,7 @@ fastcall void do_spurious_interrupt_bug( | |
9523 | #endif | |
9524 | } | |
9525 | ||
9526 | -fastcall unsigned long patch_espfix_desc(unsigned long uesp, | |
9527 | +unsigned long patch_espfix_desc(unsigned long uesp, | |
9528 | unsigned long kesp) | |
9529 | { | |
9530 | struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt; | |
9531 | @@ -1072,7 +1113,7 @@ asmlinkage void math_emulate(long arg) | |
9532 | * NB. All these are "trap gates" (i.e. events_mask isn't set) except | |
9533 | * for those that specify <dpl>|4 in the second field. | |
9534 | */ | |
9535 | -static trap_info_t __cpuinitdata trap_table[] = { | |
9536 | +static const trap_info_t __cpuinitconst trap_table[] = { | |
9537 | { 0, 0, __KERNEL_CS, (unsigned long)divide_error }, | |
9538 | { 1, 0|4, __KERNEL_CS, (unsigned long)debug }, | |
9539 | { 3, 3|4, __KERNEL_CS, (unsigned long)int3 }, | |
9540 | @@ -1105,17 +1146,12 @@ void __init trap_init(void) | |
9541 | if (ret) | |
9542 | printk("HYPERVISOR_set_trap_table failed: error %d\n", ret); | |
9543 | ||
cc90b958 | 9544 | + /* |
00e5a55c BS |
9545 | + * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. |
9546 | + * Generate a build-time error if the alignment is wrong. | |
cc90b958 | 9547 | + */ |
00e5a55c BS |
9548 | + BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15); |
9549 | if (cpu_has_fxsr) { | |
9550 | - /* | |
9551 | - * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. | |
9552 | - * Generates a compile-time "error: zero width for bit-field" if | |
9553 | - * the alignment is wrong. | |
9554 | - */ | |
9555 | - struct fxsrAlignAssert { | |
9556 | - int _:!(offsetof(struct task_struct, | |
9557 | - thread.i387.fxsave) & 15); | |
9558 | - }; | |
9559 | - | |
9560 | printk(KERN_INFO "Enabling fast FPU save and restore... "); | |
9561 | set_in_cr4(X86_CR4_OSFXSR); | |
9562 | printk("done.\n"); | |
9563 | --- sle11-2009-05-14.orig/arch/x86/kernel/traps_64-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
9564 | +++ sle11-2009-05-14/arch/x86/kernel/traps_64-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
9565 | @@ -74,38 +74,41 @@ asmlinkage void alignment_check(void); | |
9566 | asmlinkage void machine_check(void); | |
9567 | asmlinkage void spurious_interrupt_bug(void); | |
9568 | ||
9569 | +static unsigned int code_bytes = 64; | |
cc90b958 | 9570 | + |
00e5a55c BS |
9571 | static inline void conditional_sti(struct pt_regs *regs) |
9572 | { | |
9573 | - if (regs->eflags & X86_EFLAGS_IF) | |
9574 | + if (regs->flags & X86_EFLAGS_IF) | |
9575 | local_irq_enable(); | |
9576 | } | |
9577 | ||
9578 | static inline void preempt_conditional_sti(struct pt_regs *regs) | |
9579 | { | |
9580 | - preempt_disable(); | |
9581 | - if (regs->eflags & X86_EFLAGS_IF) | |
9582 | + inc_preempt_count(); | |
9583 | + if (regs->flags & X86_EFLAGS_IF) | |
9584 | local_irq_enable(); | |
9585 | } | |
9586 | ||
9587 | static inline void preempt_conditional_cli(struct pt_regs *regs) | |
9588 | { | |
9589 | - if (regs->eflags & X86_EFLAGS_IF) | |
9590 | + if (regs->flags & X86_EFLAGS_IF) | |
9591 | local_irq_disable(); | |
9592 | /* Make sure to not schedule here because we could be running | |
9593 | on an exception stack. */ | |
9594 | - preempt_enable_no_resched(); | |
9595 | + dec_preempt_count(); | |
9596 | } | |
9597 | ||
9598 | int kstack_depth_to_print = 12; | |
9599 | ||
9600 | -#ifdef CONFIG_KALLSYMS | |
9601 | -void printk_address(unsigned long address) | |
9602 | +void printk_address(unsigned long address, int reliable) | |
9603 | { | |
9604 | +#ifdef CONFIG_KALLSYMS | |
9605 | unsigned long offset = 0, symsize; | |
9606 | const char *symname; | |
9607 | char *modname; | |
9608 | char *delim = ":"; | |
9609 | - char namebuf[128]; | |
9610 | + char namebuf[KSYM_NAME_LEN]; | |
9611 | + char reliab[4] = ""; | |
9612 | ||
9613 | symname = kallsyms_lookup(address, &symsize, &offset, | |
9614 | &modname, namebuf); | |
9615 | @@ -113,17 +116,17 @@ void printk_address(unsigned long addres | |
9616 | printk(" [<%016lx>]\n", address); | |
9617 | return; | |
9618 | } | |
9619 | + if (!reliable) | |
9620 | + strcpy(reliab, "? "); | |
cc90b958 | 9621 | + |
00e5a55c BS |
9622 | if (!modname) |
9623 | - modname = delim = ""; | |
9624 | - printk(" [<%016lx>] %s%s%s%s+0x%lx/0x%lx\n", | |
9625 | - address, delim, modname, delim, symname, offset, symsize); | |
9626 | -} | |
9627 | + modname = delim = ""; | |
9628 | + printk(" [<%016lx>] %s%s%s%s%s+0x%lx/0x%lx\n", | |
9629 | + address, reliab, delim, modname, delim, symname, offset, symsize); | |
9630 | #else | |
9631 | -void printk_address(unsigned long address) | |
9632 | -{ | |
9633 | printk(" [<%016lx>]\n", address); | |
9634 | -} | |
9635 | #endif | |
9636 | +} | |
9637 | ||
9638 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |
9639 | unsigned *usedp, char **idp) | |
9640 | @@ -210,14 +213,53 @@ static unsigned long *in_exception_stack | |
9641 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | |
9642 | */ | |
9643 | ||
9644 | -static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) | |
9645 | +static inline int valid_stack_ptr(struct thread_info *tinfo, | |
9646 | + void *p, unsigned int size, void *end) | |
9647 | { | |
9648 | - void *t = (void *)tinfo; | |
9649 | - return p > t && p < t + THREAD_SIZE - 3; | |
9650 | + void *t = tinfo; | |
9651 | + if (end) { | |
9652 | + if (p < end && p >= (end-THREAD_SIZE)) | |
9653 | + return 1; | |
9654 | + else | |
9655 | + return 0; | |
9656 | + } | |
9657 | + return p > t && p < t + THREAD_SIZE - size; | |
cc90b958 BS |
9658 | +} |
9659 | + | |
00e5a55c BS |
9660 | +/* The form of the top of the frame on the stack */ |
9661 | +struct stack_frame { | |
9662 | + struct stack_frame *next_frame; | |
9663 | + unsigned long return_address; | |
9664 | +}; | |
cc90b958 | 9665 | + |
cc90b958 | 9666 | + |
00e5a55c BS |
9667 | +static inline unsigned long print_context_stack(struct thread_info *tinfo, |
9668 | + unsigned long *stack, unsigned long bp, | |
9669 | + const struct stacktrace_ops *ops, void *data, | |
9670 | + unsigned long *end) | |
cc90b958 | 9671 | +{ |
00e5a55c | 9672 | + struct stack_frame *frame = (struct stack_frame *)bp; |
cc90b958 | 9673 | + |
00e5a55c BS |
9674 | + while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { |
9675 | + unsigned long addr; | |
cc90b958 | 9676 | + |
00e5a55c BS |
9677 | + addr = *stack; |
9678 | + if (__kernel_text_address(addr)) { | |
9679 | + if ((unsigned long) stack == bp + 8) { | |
9680 | + ops->address(data, addr, 1); | |
9681 | + frame = frame->next_frame; | |
9682 | + bp = (unsigned long) frame; | |
9683 | + } else { | |
9684 | + ops->address(data, addr, bp == 0); | |
9685 | + } | |
9686 | + } | |
9687 | + stack++; | |
9688 | + } | |
9689 | + return bp; | |
9690 | } | |
9691 | ||
9692 | void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | |
9693 | - unsigned long *stack, | |
9694 | + unsigned long *stack, unsigned long bp, | |
9695 | const struct stacktrace_ops *ops, void *data) | |
9696 | { | |
9697 | const unsigned cpu = get_cpu(); | |
9698 | @@ -227,36 +269,28 @@ void dump_trace(struct task_struct *tsk, | |
9699 | ||
9700 | if (!tsk) | |
9701 | tsk = current; | |
9702 | + tinfo = task_thread_info(tsk); | |
9703 | ||
9704 | if (!stack) { | |
9705 | unsigned long dummy; | |
9706 | stack = &dummy; | |
9707 | if (tsk && tsk != current) | |
9708 | - stack = (unsigned long *)tsk->thread.rsp; | |
9709 | + stack = (unsigned long *)tsk->thread.sp; | |
9710 | } | |
9711 | ||
9712 | - /* | |
9713 | - * Print function call entries within a stack. 'cond' is the | |
9714 | - * "end of stackframe" condition, that the 'stack++' | |
9715 | - * iteration will eventually trigger. | |
9716 | - */ | |
9717 | -#define HANDLE_STACK(cond) \ | |
9718 | - do while (cond) { \ | |
9719 | - unsigned long addr = *stack++; \ | |
9720 | - /* Use unlocked access here because except for NMIs \ | |
9721 | - we should be already protected against module unloads */ \ | |
9722 | - if (__kernel_text_address(addr)) { \ | |
9723 | - /* \ | |
9724 | - * If the address is either in the text segment of the \ | |
9725 | - * kernel, or in the region which contains vmalloc'ed \ | |
9726 | - * memory, it *may* be the address of a calling \ | |
9727 | - * routine; if so, print it so that someone tracing \ | |
9728 | - * down the cause of the crash will be able to figure \ | |
9729 | - * out the call path that was taken. \ | |
9730 | - */ \ | |
9731 | - ops->address(data, addr); \ | |
9732 | - } \ | |
9733 | - } while (0) | |
9734 | +#ifdef CONFIG_FRAME_POINTER | |
9735 | + if (!bp) { | |
9736 | + if (tsk == current) { | |
9737 | + /* Grab bp right from our regs */ | |
9738 | + asm("movq %%rbp, %0" : "=r" (bp):); | |
9739 | + } else { | |
9740 | + /* bp is the last reg pushed by switch_to */ | |
9741 | + bp = *(unsigned long *) tsk->thread.sp; | |
cc90b958 BS |
9742 | + } |
9743 | + } | |
9744 | +#endif | |
cc90b958 | 9745 | + |
cc90b958 | 9746 | + |
00e5a55c BS |
9747 | |
9748 | /* | |
9749 | * Print function call entries in all stacks, starting at the | |
9750 | @@ -272,7 +306,9 @@ void dump_trace(struct task_struct *tsk, | |
9751 | if (estack_end) { | |
9752 | if (ops->stack(data, id) < 0) | |
9753 | break; | |
9754 | - HANDLE_STACK (stack < estack_end); | |
cc90b958 | 9755 | + |
00e5a55c BS |
9756 | + bp = print_context_stack(tinfo, stack, bp, ops, |
9757 | + data, estack_end); | |
9758 | ops->stack(data, "<EOE>"); | |
9759 | /* | |
9760 | * We link to the next stack via the | |
9761 | @@ -290,7 +326,8 @@ void dump_trace(struct task_struct *tsk, | |
9762 | if (stack >= irqstack && stack < irqstack_end) { | |
9763 | if (ops->stack(data, "IRQ") < 0) | |
9764 | break; | |
9765 | - HANDLE_STACK (stack < irqstack_end); | |
9766 | + bp = print_context_stack(tinfo, stack, bp, | |
9767 | + ops, data, irqstack_end); | |
9768 | /* | |
9769 | * We link to the next stack (which would be | |
9770 | * the process stack normally) the last | |
9771 | @@ -308,9 +345,7 @@ void dump_trace(struct task_struct *tsk, | |
9772 | /* | |
9773 | * This handles the process stack: | |
9774 | */ | |
9775 | - tinfo = task_thread_info(tsk); | |
9776 | - HANDLE_STACK (valid_stack_ptr(tinfo, stack)); | |
9777 | -#undef HANDLE_STACK | |
9778 | + bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); | |
9779 | put_cpu(); | |
9780 | } | |
9781 | EXPORT_SYMBOL(dump_trace); | |
9782 | @@ -333,10 +368,10 @@ static int print_trace_stack(void *data, | |
9783 | return 0; | |
9784 | } | |
9785 | ||
9786 | -static void print_trace_address(void *data, unsigned long addr) | |
9787 | +static void print_trace_address(void *data, unsigned long addr, int reliable) | |
9788 | { | |
9789 | touch_nmi_watchdog(); | |
9790 | - printk_address(addr); | |
9791 | + printk_address(addr, reliable); | |
9792 | } | |
9793 | ||
9794 | static const struct stacktrace_ops print_trace_ops = { | |
9795 | @@ -347,15 +382,17 @@ static const struct stacktrace_ops print | |
9796 | }; | |
9797 | ||
9798 | void | |
9799 | -show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack) | |
9800 | +show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack, | |
9801 | + unsigned long bp) | |
9802 | { | |
9803 | printk("\nCall Trace:\n"); | |
9804 | - dump_trace(tsk, regs, stack, &print_trace_ops, NULL); | |
9805 | + dump_trace(tsk, regs, stack, bp, &print_trace_ops, NULL); | |
9806 | printk("\n"); | |
9807 | } | |
9808 | ||
9809 | static void | |
9810 | -_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp) | |
9811 | +_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp, | |
9812 | + unsigned long bp) | |
9813 | { | |
9814 | unsigned long *stack; | |
9815 | int i; | |
9816 | @@ -366,14 +403,14 @@ _show_stack(struct task_struct *tsk, str | |
9817 | // debugging aid: "show_stack(NULL, NULL);" prints the | |
9818 | // back trace for this cpu. | |
9819 | ||
9820 | - if (rsp == NULL) { | |
9821 | + if (sp == NULL) { | |
9822 | if (tsk) | |
9823 | - rsp = (unsigned long *)tsk->thread.rsp; | |
9824 | + sp = (unsigned long *)tsk->thread.sp; | |
9825 | else | |
9826 | - rsp = (unsigned long *)&rsp; | |
9827 | + sp = (unsigned long *)&sp; | |
9828 | } | |
9829 | ||
9830 | - stack = rsp; | |
9831 | + stack = sp; | |
9832 | for(i=0; i < kstack_depth_to_print; i++) { | |
9833 | if (stack >= irqstack && stack <= irqstack_end) { | |
9834 | if (stack == irqstack_end) { | |
9835 | @@ -389,12 +426,12 @@ _show_stack(struct task_struct *tsk, str | |
9836 | printk(" %016lx", *stack++); | |
9837 | touch_nmi_watchdog(); | |
9838 | } | |
9839 | - show_trace(tsk, regs, rsp); | |
9840 | + show_trace(tsk, regs, sp, bp); | |
9841 | } | |
9842 | ||
9843 | -void show_stack(struct task_struct *tsk, unsigned long * rsp) | |
9844 | +void show_stack(struct task_struct *tsk, unsigned long * sp) | |
9845 | { | |
9846 | - _show_stack(tsk, NULL, rsp); | |
9847 | + _show_stack(tsk, NULL, sp, 0); | |
9848 | } | |
9849 | ||
9850 | /* | |
9851 | @@ -403,13 +440,19 @@ void show_stack(struct task_struct *tsk, | |
9852 | void dump_stack(void) | |
9853 | { | |
9854 | unsigned long dummy; | |
9855 | + unsigned long bp = 0; | |
9856 | + | |
9857 | +#ifdef CONFIG_FRAME_POINTER | |
9858 | + if (!bp) | |
9859 | + asm("movq %%rbp, %0" : "=r" (bp):); | |
cc90b958 | 9860 | +#endif |
00e5a55c BS |
9861 | |
9862 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | |
9863 | current->pid, current->comm, print_tainted(), | |
9864 | init_utsname()->release, | |
9865 | (int)strcspn(init_utsname()->version, " "), | |
9866 | init_utsname()->version); | |
9867 | - show_trace(NULL, NULL, &dummy); | |
9868 | + show_trace(NULL, NULL, &dummy, bp); | |
9869 | } | |
9870 | ||
9871 | EXPORT_SYMBOL(dump_stack); | |
9872 | @@ -417,12 +460,15 @@ EXPORT_SYMBOL(dump_stack); | |
9873 | void show_registers(struct pt_regs *regs) | |
9874 | { | |
9875 | int i; | |
9876 | - int in_kernel = !user_mode(regs); | |
9877 | - unsigned long rsp; | |
9878 | + unsigned long sp; | |
9879 | const int cpu = smp_processor_id(); | |
9880 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; | |
9881 | + u8 *ip; | |
9882 | + unsigned int code_prologue = code_bytes * 43 / 64; | |
9883 | + unsigned int code_len = code_bytes; | |
9884 | ||
9885 | - rsp = regs->rsp; | |
9886 | + sp = regs->sp; | |
9887 | + ip = (u8 *) regs->ip - code_prologue; | |
9888 | printk("CPU %d ", cpu); | |
9889 | __show_regs(regs); | |
9890 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", | |
9891 | @@ -432,45 +478,43 @@ void show_registers(struct pt_regs *regs | |
9892 | * When in-kernel, we also print out the stack and code at the | |
9893 | * time of the fault.. | |
9894 | */ | |
9895 | - if (in_kernel) { | |
9896 | + if (!user_mode(regs)) { | |
9897 | + unsigned char c; | |
9898 | printk("Stack: "); | |
9899 | - _show_stack(NULL, regs, (unsigned long*)rsp); | |
9900 | + _show_stack(NULL, regs, (unsigned long *)sp, regs->bp); | |
9901 | + printk("\n"); | |
9902 | ||
9903 | - printk("\nCode: "); | |
9904 | - if (regs->rip < PAGE_OFFSET) | |
9905 | - goto bad; | |
9906 | - | |
9907 | - for (i=0; i<20; i++) { | |
9908 | - unsigned char c; | |
9909 | - if (__get_user(c, &((unsigned char*)regs->rip)[i])) { | |
9910 | -bad: | |
9911 | + printk(KERN_EMERG "Code: "); | |
9912 | + if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { | |
9913 | + /* try starting at RIP */ | |
9914 | + ip = (u8 *) regs->ip; | |
9915 | + code_len = code_len - code_prologue + 1; | |
9916 | + } | |
9917 | + for (i = 0; i < code_len; i++, ip++) { | |
9918 | + if (ip < (u8 *)PAGE_OFFSET || | |
9919 | + probe_kernel_address(ip, c)) { | |
9920 | printk(" Bad RIP value."); | |
9921 | break; | |
9922 | } | |
9923 | - printk("%02x ", c); | |
9924 | + if (ip == (u8 *)regs->ip) | |
9925 | + printk("<%02x> ", c); | |
9926 | + else | |
9927 | + printk("%02x ", c); | |
9928 | } | |
9929 | } | |
9930 | printk("\n"); | |
9931 | } | |
9932 | ||
9933 | -int is_valid_bugaddr(unsigned long rip) | |
9934 | +int is_valid_bugaddr(unsigned long ip) | |
9935 | { | |
9936 | unsigned short ud2; | |
9937 | ||
9938 | - if (__copy_from_user(&ud2, (const void __user *) rip, sizeof(ud2))) | |
9939 | + if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2))) | |
9940 | return 0; | |
9941 | ||
9942 | return ud2 == 0x0b0f; | |
9943 | } | |
9944 | ||
9945 | -#ifdef CONFIG_BUG | |
9946 | -void out_of_line_bug(void) | |
9947 | -{ | |
9948 | - BUG(); | |
9949 | -} | |
9950 | -EXPORT_SYMBOL(out_of_line_bug); | |
9951 | -#endif | |
9952 | - | |
9953 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | |
9954 | static int die_owner = -1; | |
9955 | static unsigned int die_nest_count; | |
9956 | @@ -498,7 +542,7 @@ unsigned __kprobes long oops_begin(void) | |
9957 | return flags; | |
9958 | } | |
9959 | ||
9960 | -void __kprobes oops_end(unsigned long flags) | |
9961 | +void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | |
9962 | { | |
9963 | die_owner = -1; | |
9964 | bust_spinlocks(0); | |
9965 | @@ -507,12 +551,17 @@ void __kprobes oops_end(unsigned long fl | |
9966 | /* Nest count reaches zero, release the lock. */ | |
9967 | __raw_spin_unlock(&die_lock); | |
9968 | raw_local_irq_restore(flags); | |
9969 | + if (!regs) { | |
9970 | + oops_exit(); | |
9971 | + return; | |
cc90b958 | 9972 | + } |
00e5a55c BS |
9973 | if (panic_on_oops) |
9974 | panic("Fatal exception"); | |
9975 | oops_exit(); | |
9976 | + do_exit(signr); | |
9977 | } | |
9978 | ||
9979 | -void __kprobes __die(const char * str, struct pt_regs * regs, long err) | |
9980 | +int __kprobes __die(const char * str, struct pt_regs * regs, long err) | |
9981 | { | |
9982 | static int die_counter; | |
9983 | printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter); | |
9984 | @@ -526,15 +575,17 @@ void __kprobes __die(const char * str, s | |
9985 | printk("DEBUG_PAGEALLOC"); | |
9986 | #endif | |
9987 | printk("\n"); | |
9988 | - notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV); | |
9989 | + if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | |
9990 | + return 1; | |
9991 | show_registers(regs); | |
9992 | add_taint(TAINT_DIE); | |
9993 | /* Executive summary in case the oops scrolled away */ | |
9994 | printk(KERN_ALERT "RIP "); | |
9995 | - printk_address(regs->rip); | |
9996 | - printk(" RSP <%016lx>\n", regs->rsp); | |
9997 | + printk_address(regs->ip, 1); | |
9998 | + printk(" RSP <%016lx>\n", regs->sp); | |
9999 | if (kexec_should_crash(current)) | |
10000 | crash_kexec(regs); | |
10001 | + return 0; | |
10002 | } | |
10003 | ||
10004 | void die(const char * str, struct pt_regs * regs, long err) | |
10005 | @@ -542,11 +593,11 @@ void die(const char * str, struct pt_reg | |
10006 | unsigned long flags = oops_begin(); | |
10007 | ||
10008 | if (!user_mode(regs)) | |
10009 | - report_bug(regs->rip, regs); | |
10010 | + report_bug(regs->ip, regs); | |
10011 | ||
10012 | - __die(str, regs, err); | |
10013 | - oops_end(flags); | |
10014 | - do_exit(SIGSEGV); | |
10015 | + if (__die(str, regs, err)) | |
10016 | + regs = NULL; | |
10017 | + oops_end(flags, regs, SIGSEGV); | |
10018 | } | |
10019 | ||
10020 | #if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_SYSCTL) | |
10021 | @@ -564,10 +615,10 @@ void __kprobes die_nmi(char *str, struct | |
10022 | crash_kexec(regs); | |
10023 | if (do_panic || panic_on_oops) | |
10024 | panic("Non maskable interrupt"); | |
10025 | - oops_end(flags); | |
10026 | + oops_end(flags, NULL, SIGBUS); | |
10027 | nmi_exit(); | |
10028 | local_irq_enable(); | |
10029 | - do_exit(SIGSEGV); | |
10030 | + do_exit(SIGBUS); | |
10031 | } | |
10032 | #endif | |
10033 | ||
10034 | @@ -592,11 +643,14 @@ static void __kprobes do_trap(int trapnr | |
10035 | tsk->thread.trap_no = trapnr; | |
10036 | ||
10037 | if (show_unhandled_signals && unhandled_signal(tsk, signr) && | |
10038 | - printk_ratelimit()) | |
10039 | + printk_ratelimit()) { | |
10040 | printk(KERN_INFO | |
10041 | - "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", | |
10042 | + "%s[%d] trap %s ip:%lx sp:%lx error:%lx", | |
10043 | tsk->comm, tsk->pid, str, | |
10044 | - regs->rip, regs->rsp, error_code); | |
10045 | + regs->ip, regs->sp, error_code); | |
10046 | + print_vma_addr(" in ", regs->ip); | |
10047 | + printk("\n"); | |
cc90b958 | 10048 | + } |
00e5a55c BS |
10049 | |
10050 | if (info) | |
10051 | force_sig_info(signr, info, tsk); | |
10052 | @@ -606,19 +660,12 @@ static void __kprobes do_trap(int trapnr | |
10053 | } | |
10054 | ||
10055 | ||
10056 | - /* kernel trap */ | |
10057 | - { | |
10058 | - const struct exception_table_entry *fixup; | |
10059 | - fixup = search_exception_tables(regs->rip); | |
10060 | - if (fixup) | |
10061 | - regs->rip = fixup->fixup; | |
10062 | - else { | |
10063 | - tsk->thread.error_code = error_code; | |
10064 | - tsk->thread.trap_no = trapnr; | |
10065 | - die(str, regs, error_code); | |
10066 | - } | |
10067 | - return; | |
10068 | + if (!fixup_exception(regs)) { | |
10069 | + tsk->thread.error_code = error_code; | |
10070 | + tsk->thread.trap_no = trapnr; | |
10071 | + die(str, regs, error_code); | |
10072 | } | |
10073 | + return; | |
10074 | } | |
10075 | ||
10076 | #define DO_ERROR(trapnr, signr, str, name) \ | |
10077 | @@ -647,10 +694,10 @@ asmlinkage void do_##name(struct pt_regs | |
10078 | do_trap(trapnr, signr, str, regs, error_code, &info); \ | |
10079 | } | |
10080 | ||
10081 | -DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip) | |
10082 | +DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) | |
10083 | DO_ERROR( 4, SIGSEGV, "overflow", overflow) | |
10084 | DO_ERROR( 5, SIGSEGV, "bounds", bounds) | |
10085 | -DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->rip) | |
10086 | +DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) | |
10087 | DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) | |
10088 | DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | |
10089 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) | |
10090 | @@ -698,32 +745,28 @@ asmlinkage void __kprobes do_general_pro | |
10091 | tsk->thread.trap_no = 13; | |
10092 | ||
10093 | if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && | |
10094 | - printk_ratelimit()) | |
10095 | + printk_ratelimit()) { | |
10096 | printk(KERN_INFO | |
10097 | - "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", | |
10098 | + "%s[%d] general protection ip:%lx sp:%lx error:%lx", | |
10099 | tsk->comm, tsk->pid, | |
10100 | - regs->rip, regs->rsp, error_code); | |
10101 | + regs->ip, regs->sp, error_code); | |
10102 | + print_vma_addr(" in ", regs->ip); | |
10103 | + printk("\n"); | |
10104 | + } | |
10105 | ||
10106 | force_sig(SIGSEGV, tsk); | |
10107 | return; | |
10108 | } | |
10109 | ||
10110 | - /* kernel gp */ | |
10111 | - { | |
10112 | - const struct exception_table_entry *fixup; | |
10113 | - fixup = search_exception_tables(regs->rip); | |
10114 | - if (fixup) { | |
10115 | - regs->rip = fixup->fixup; | |
10116 | - return; | |
10117 | - } | |
10118 | + if (fixup_exception(regs)) | |
10119 | + return; | |
10120 | ||
10121 | - tsk->thread.error_code = error_code; | |
10122 | - tsk->thread.trap_no = 13; | |
10123 | - if (notify_die(DIE_GPF, "general protection fault", regs, | |
10124 | - error_code, 13, SIGSEGV) == NOTIFY_STOP) | |
10125 | - return; | |
10126 | - die("general protection fault", regs, error_code); | |
10127 | - } | |
10128 | + tsk->thread.error_code = error_code; | |
10129 | + tsk->thread.trap_no = 13; | |
10130 | + if (notify_die(DIE_GPF, "general protection fault", regs, | |
10131 | + error_code, 13, SIGSEGV) == NOTIFY_STOP) | |
10132 | + return; | |
10133 | + die("general protection fault", regs, error_code); | |
10134 | } | |
10135 | ||
10136 | static __kprobes void | |
10137 | @@ -833,15 +876,15 @@ asmlinkage __kprobes struct pt_regs *syn | |
10138 | { | |
10139 | struct pt_regs *regs = eregs; | |
10140 | /* Did already sync */ | |
10141 | - if (eregs == (struct pt_regs *)eregs->rsp) | |
10142 | + if (eregs == (struct pt_regs *)eregs->sp) | |
10143 | ; | |
10144 | /* Exception from user space */ | |
10145 | else if (user_mode(eregs)) | |
10146 | regs = task_pt_regs(current); | |
10147 | /* Exception from kernel and interrupts are enabled. Move to | |
10148 | kernel process stack. */ | |
10149 | - else if (eregs->eflags & X86_EFLAGS_IF) | |
10150 | - regs = (struct pt_regs *)(eregs->rsp -= sizeof(struct pt_regs)); | |
10151 | + else if (eregs->flags & X86_EFLAGS_IF) | |
10152 | + regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); | |
10153 | if (eregs != regs) | |
10154 | *regs = *eregs; | |
10155 | return regs; | |
10156 | @@ -859,6 +902,12 @@ asmlinkage void __kprobes do_debug(struc | |
10157 | ||
10158 | get_debugreg(condition, 6); | |
10159 | ||
cc90b958 | 10160 | + /* |
00e5a55c | 10161 | + * The processor cleared BTF, so don't mark that we need it set. |
cc90b958 | 10162 | + */ |
00e5a55c BS |
10163 | + clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); |
10164 | + tsk->thread.debugctlmsr = 0; | |
10165 | + | |
10166 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | |
10167 | SIGTRAP) == NOTIFY_STOP) | |
10168 | return; | |
10169 | @@ -874,27 +923,14 @@ asmlinkage void __kprobes do_debug(struc | |
10170 | ||
10171 | tsk->thread.debugreg6 = condition; | |
10172 | ||
10173 | - /* Mask out spurious TF errors due to lazy TF clearing */ | |
cc90b958 BS |
10174 | + |
10175 | + /* | |
00e5a55c BS |
10176 | + * Single-stepping through TF: make sure we ignore any events in |
10177 | + * kernel space (but re-enable TF when returning to user mode). | |
cc90b958 | 10178 | + */ |
00e5a55c BS |
10179 | if (condition & DR_STEP) { |
10180 | - /* | |
10181 | - * The TF error should be masked out only if the current | |
10182 | - * process is not traced and if the TRAP flag has been set | |
10183 | - * previously by a tracing process (condition detected by | |
10184 | - * the PT_DTRACE flag); remember that the i386 TRAP flag | |
10185 | - * can be modified by the process itself in user mode, | |
10186 | - * allowing programs to debug themselves without the ptrace() | |
10187 | - * interface. | |
10188 | - */ | |
10189 | if (!user_mode(regs)) | |
10190 | goto clear_TF_reenable; | |
10191 | - /* | |
10192 | - * Was the TF flag set by a debugger? If so, clear it now, | |
10193 | - * so that register information is correct. | |
10194 | - */ | |
10195 | - if (tsk->ptrace & PT_DTRACE) { | |
10196 | - regs->eflags &= ~TF_MASK; | |
10197 | - tsk->ptrace &= ~PT_DTRACE; | |
10198 | - } | |
10199 | } | |
10200 | ||
10201 | /* Ok, finally something we can handle */ | |
10202 | @@ -903,7 +939,7 @@ asmlinkage void __kprobes do_debug(struc | |
10203 | info.si_signo = SIGTRAP; | |
10204 | info.si_errno = 0; | |
10205 | info.si_code = TRAP_BRKPT; | |
10206 | - info.si_addr = user_mode(regs) ? (void __user *)regs->rip : NULL; | |
10207 | + info.si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL; | |
10208 | force_sig_info(SIGTRAP, &info, tsk); | |
10209 | ||
10210 | clear_dr7: | |
10211 | @@ -913,18 +949,15 @@ clear_dr7: | |
10212 | ||
10213 | clear_TF_reenable: | |
10214 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | |
10215 | - regs->eflags &= ~TF_MASK; | |
10216 | + regs->flags &= ~X86_EFLAGS_TF; | |
10217 | preempt_conditional_cli(regs); | |
10218 | } | |
10219 | ||
10220 | static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) | |
10221 | { | |
10222 | - const struct exception_table_entry *fixup; | |
10223 | - fixup = search_exception_tables(regs->rip); | |
10224 | - if (fixup) { | |
10225 | - regs->rip = fixup->fixup; | |
10226 | + if (fixup_exception(regs)) | |
10227 | return 1; | |
10228 | - } | |
cc90b958 | 10229 | + |
00e5a55c BS |
10230 | notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); |
10231 | /* Illegal floating point operation in the kernel */ | |
10232 | current->thread.trap_no = trapnr; | |
10233 | @@ -939,7 +972,7 @@ static int kernel_math_error(struct pt_r | |
10234 | */ | |
10235 | asmlinkage void do_coprocessor_error(struct pt_regs *regs) | |
10236 | { | |
10237 | - void __user *rip = (void __user *)(regs->rip); | |
10238 | + void __user *ip = (void __user *)(regs->ip); | |
10239 | struct task_struct * task; | |
10240 | siginfo_t info; | |
10241 | unsigned short cwd, swd; | |
10242 | @@ -959,7 +992,7 @@ asmlinkage void do_coprocessor_error(str | |
10243 | info.si_signo = SIGFPE; | |
10244 | info.si_errno = 0; | |
10245 | info.si_code = __SI_FAULT; | |
10246 | - info.si_addr = rip; | |
10247 | + info.si_addr = ip; | |
10248 | /* | |
10249 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | |
10250 | * status. 0x3f is the exception bits in these regs, 0x200 is the | |
10251 | @@ -1008,7 +1041,7 @@ asmlinkage void bad_intr(void) | |
10252 | ||
10253 | asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) | |
10254 | { | |
10255 | - void __user *rip = (void __user *)(regs->rip); | |
10256 | + void __user *ip = (void __user *)(regs->ip); | |
10257 | struct task_struct * task; | |
10258 | siginfo_t info; | |
10259 | unsigned short mxcsr; | |
10260 | @@ -1028,7 +1061,7 @@ asmlinkage void do_simd_coprocessor_erro | |
10261 | info.si_signo = SIGFPE; | |
10262 | info.si_errno = 0; | |
10263 | info.si_code = __SI_FAULT; | |
10264 | - info.si_addr = rip; | |
10265 | + info.si_addr = ip; | |
10266 | /* | |
10267 | * The SIMD FPU exceptions are handled a little differently, as there | |
10268 | * is only a single status/control register. Thus, to determine which | |
10269 | @@ -1092,13 +1125,14 @@ asmlinkage void math_state_restore(void) | |
10270 | task_thread_info(me)->status |= TS_USEDFPU; | |
10271 | me->fpu_counter++; | |
10272 | } | |
10273 | +EXPORT_SYMBOL_GPL(math_state_restore); | |
10274 | ||
10275 | ||
10276 | /* | |
10277 | * NB. All these are "interrupt gates" (i.e. events_mask is set) because we | |
10278 | * specify <dpl>|4 in the second field. | |
10279 | */ | |
10280 | -static trap_info_t __cpuinitdata trap_table[] = { | |
10281 | +static const trap_info_t __cpuinitconst trap_table[] = { | |
10282 | { 0, 0|4, __KERNEL_CS, (unsigned long)divide_error }, | |
10283 | { 1, 0|4, __KERNEL_CS, (unsigned long)debug }, | |
10284 | { 3, 3|4, __KERNEL_CS, (unsigned long)int3 }, | |
10285 | @@ -1169,3 +1203,14 @@ static int __init kstack_setup(char *s) | |
10286 | return 0; | |
10287 | } | |
10288 | early_param("kstack", kstack_setup); | |
cc90b958 | 10289 | + |
cc90b958 | 10290 | + |
00e5a55c BS |
10291 | +static int __init code_bytes_setup(char *s) |
10292 | +{ | |
10293 | + code_bytes = simple_strtoul(s, NULL, 0); | |
10294 | + if (code_bytes > 8192) | |
10295 | + code_bytes = 8192; | |
10296 | + | |
10297 | + return 1; | |
10298 | +} | |
10299 | +__setup("code_bytes=", code_bytes_setup); | |
10300 | --- sle11-2009-05-14.orig/arch/x86/kernel/vsyscall_64-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
10301 | +++ sle11-2009-05-14/arch/x86/kernel/vsyscall_64-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
10302 | @@ -43,12 +43,7 @@ | |
10303 | #include <asm/vgtod.h> | |
10304 | ||
10305 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) | |
10306 | -#define __syscall_clobber "r11","rcx","memory" | |
10307 | -#define __pa_vsymbol(x) \ | |
10308 | - ({unsigned long v; \ | |
10309 | - extern char __vsyscall_0; \ | |
10310 | - asm("" : "=r" (v) : "0" (x)); \ | |
10311 | - ((v - VSYSCALL_START) + __pa_symbol(&__vsyscall_0)); }) | |
10312 | +#define __syscall_clobber "r11","cx","memory" | |
10313 | ||
10314 | /* | |
10315 | * vsyscall_gtod_data contains data that is : | |
10316 | @@ -102,7 +97,7 @@ static __always_inline void do_get_tz(st | |
10317 | static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) | |
10318 | { | |
10319 | int ret; | |
10320 | - asm volatile("vsysc2: syscall" | |
10321 | + asm volatile("syscall" | |
10322 | : "=a" (ret) | |
10323 | : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) | |
10324 | : __syscall_clobber ); | |
10325 | @@ -112,7 +107,7 @@ static __always_inline int gettimeofday( | |
10326 | static __always_inline long time_syscall(long *t) | |
10327 | { | |
10328 | long secs; | |
10329 | - asm volatile("vsysc1: syscall" | |
10330 | + asm volatile("syscall" | |
10331 | : "=a" (secs) | |
10332 | : "0" (__NR_time),"D" (t) : __syscall_clobber); | |
10333 | return secs; | |
10334 | @@ -190,7 +185,7 @@ time_t __vsyscall(1) vtime(time_t *t) | |
10335 | long __vsyscall(2) | |
10336 | vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) | |
10337 | { | |
10338 | - unsigned int dummy, p; | |
10339 | + unsigned int p; | |
10340 | unsigned long j = 0; | |
10341 | ||
10342 | /* Fast cache - only recompute value once per jiffies and avoid | |
10343 | @@ -205,7 +200,7 @@ vgetcpu(unsigned *cpu, unsigned *node, s | |
10344 | p = tcache->blob[1]; | |
10345 | } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { | |
10346 | /* Load per CPU data from RDTSCP */ | |
10347 | - rdtscp(dummy, dummy, p); | |
10348 | + native_read_tscp(&p); | |
10349 | } else { | |
10350 | /* Load per CPU data from GDT */ | |
10351 | asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); | |
10352 | @@ -228,42 +223,11 @@ long __vsyscall(3) venosys_1(void) | |
10353 | ||
10354 | #ifdef CONFIG_SYSCTL | |
10355 | ||
10356 | -#define SYSCALL 0x050f | |
10357 | -#define NOP2 0x9090 | |
10358 | - | |
10359 | -/* | |
10360 | - * NOP out syscall in vsyscall page when not needed. | |
10361 | - */ | |
10362 | -static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp, | |
10363 | - void __user *buffer, size_t *lenp, loff_t *ppos) | |
10364 | +static int | |
10365 | +vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp, | |
10366 | + void __user *buffer, size_t *lenp, loff_t *ppos) | |
10367 | { | |
10368 | - extern u16 vsysc1, vsysc2; | |
10369 | - u16 __iomem *map1; | |
10370 | - u16 __iomem *map2; | |
10371 | - int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); | |
10372 | - if (!write) | |
10373 | - return ret; | |
10374 | - /* gcc has some trouble with __va(__pa()), so just do it this | |
10375 | - way. */ | |
10376 | - map1 = ioremap(__pa_vsymbol(&vsysc1), 2); | |
10377 | - if (!map1) | |
10378 | - return -ENOMEM; | |
10379 | - map2 = ioremap(__pa_vsymbol(&vsysc2), 2); | |
10380 | - if (!map2) { | |
10381 | - ret = -ENOMEM; | |
10382 | - goto out; | |
10383 | - } | |
10384 | - if (!vsyscall_gtod_data.sysctl_enabled) { | |
10385 | - writew(SYSCALL, map1); | |
10386 | - writew(SYSCALL, map2); | |
10387 | - } else { | |
10388 | - writew(NOP2, map1); | |
10389 | - writew(NOP2, map2); | |
10390 | - } | |
10391 | - iounmap(map2); | |
10392 | -out: | |
10393 | - iounmap(map1); | |
10394 | - return ret; | |
10395 | + return proc_dointvec(ctl, write, filp, buffer, lenp, ppos); | |
10396 | } | |
10397 | ||
10398 | static ctl_table kernel_table2[] = { | |
10399 | @@ -279,7 +243,6 @@ static ctl_table kernel_root_table2[] = | |
10400 | .child = kernel_table2 }, | |
10401 | {} | |
10402 | }; | |
10403 | - | |
10404 | #endif | |
10405 | ||
10406 | /* Assume __initcall executes before all user space. Hopefully kmod | |
10407 | @@ -301,7 +264,7 @@ static void __cpuinit vsyscall_set_cpu(i | |
10408 | d |= cpu; | |
10409 | d |= (node & 0xf) << 12; | |
10410 | d |= (node >> 4) << 48; | |
10411 | - if (HYPERVISOR_update_descriptor(virt_to_machine(cpu_gdt(cpu) | |
10412 | + if (HYPERVISOR_update_descriptor(virt_to_machine(get_cpu_gdt_table(cpu) | |
10413 | + GDT_ENTRY_PER_CPU), | |
10414 | d)) | |
10415 | BUG(); | |
10416 | @@ -322,7 +285,7 @@ cpu_vsyscall_notifier(struct notifier_bl | |
10417 | return NOTIFY_DONE; | |
10418 | } | |
10419 | ||
10420 | -static void __init map_vsyscall(void) | |
10421 | +void __init map_vsyscall(void) | |
10422 | { | |
10423 | extern char __vsyscall_0; | |
10424 | unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); | |
10425 | @@ -338,7 +301,6 @@ static int __init vsyscall_init(void) | |
10426 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); | |
10427 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); | |
10428 | BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu)); | |
10429 | - map_vsyscall(); | |
10430 | #ifdef CONFIG_XEN | |
10431 | vsyscall_gtod_data.sysctl_enabled = 0; /* disable vgettimeofay() */ | |
10432 | if (boot_cpu_has(X86_FEATURE_RDTSCP)) | |
10433 | --- sle11-2009-05-14.orig/arch/x86/kernel/xen_entry_64.S 2009-05-14 10:56:29.000000000 +0200 | |
10434 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
10435 | @@ -1,36 +0,0 @@ | |
10436 | -/* | |
10437 | - * Copied from arch/xen/i386/kernel/entry.S | |
10438 | - */ | |
10439 | -/* Offsets into shared_info_t. */ | |
10440 | -#define evtchn_upcall_pending /* 0 */ | |
10441 | -#define evtchn_upcall_mask 1 | |
10442 | - | |
10443 | -#define sizeof_vcpu_shift 6 | |
10444 | - | |
10445 | -#ifdef CONFIG_SMP | |
10446 | -//#define preempt_disable(reg) incl threadinfo_preempt_count(reg) | |
10447 | -//#define preempt_enable(reg) decl threadinfo_preempt_count(reg) | |
10448 | -#define preempt_disable(reg) | |
10449 | -#define preempt_enable(reg) | |
10450 | -#define XEN_GET_VCPU_INFO(reg) preempt_disable(%rbp) ; \ | |
10451 | - movq %gs:pda_cpunumber,reg ; \ | |
10452 | - shl $32, reg ; \ | |
10453 | - shr $32-sizeof_vcpu_shift,reg ; \ | |
10454 | - addq HYPERVISOR_shared_info,reg | |
10455 | -#define XEN_PUT_VCPU_INFO(reg) preempt_enable(%rbp) ; \ | |
10456 | -#define XEN_PUT_VCPU_INFO_fixup .byte 0xff,0xff,0xff | |
10457 | -#else | |
10458 | -#define XEN_GET_VCPU_INFO(reg) movq HYPERVISOR_shared_info,reg | |
10459 | -#define XEN_PUT_VCPU_INFO(reg) | |
10460 | -#define XEN_PUT_VCPU_INFO_fixup | |
10461 | -#endif | |
10462 | - | |
10463 | -#define XEN_LOCKED_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg) | |
10464 | -#define XEN_LOCKED_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg) | |
10465 | -#define XEN_BLOCK_EVENTS(reg) XEN_GET_VCPU_INFO(reg) ; \ | |
10466 | - XEN_LOCKED_BLOCK_EVENTS(reg) ; \ | |
10467 | - XEN_PUT_VCPU_INFO(reg) | |
10468 | -#define XEN_UNBLOCK_EVENTS(reg) XEN_GET_VCPU_INFO(reg) ; \ | |
10469 | - XEN_LOCKED_UNBLOCK_EVENTS(reg) ; \ | |
10470 | - XEN_PUT_VCPU_INFO(reg) | |
10471 | -#define XEN_TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg) | |
10472 | --- sle11-2009-05-14.orig/arch/x86/mach-xen/setup.c 2009-02-16 16:17:21.000000000 +0100 | |
10473 | +++ sle11-2009-05-14/arch/x86/mach-xen/setup.c 2009-03-16 16:33:40.000000000 +0100 | |
10474 | @@ -161,15 +161,12 @@ void __init machine_specific_arch_setup( | |
10475 | ||
10476 | /* Do an early initialization of the fixmap area */ | |
10477 | { | |
10478 | - extern pte_t swapper_pg_pmd[PTRS_PER_PTE]; | |
10479 | + extern pte_t swapper_pg_fixmap[PTRS_PER_PTE]; | |
10480 | unsigned long addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE); | |
10481 | - pgd_t *pgd = (pgd_t *)xen_start_info->pt_base; | |
10482 | - pud_t *pud = pud_offset(pgd + pgd_index(addr), addr); | |
10483 | + pud_t *pud = pud_offset(swapper_pg_dir + pgd_index(addr), addr); | |
10484 | pmd_t *pmd = pmd_offset(pud, addr); | |
10485 | ||
10486 | - swapper_pg_dir = pgd; | |
10487 | - init_mm.pgd = pgd; | |
10488 | - make_lowmem_page_readonly(swapper_pg_pmd, XENFEAT_writable_page_tables); | |
10489 | - set_pmd(pmd, __pmd(__pa_symbol(swapper_pg_pmd) | _PAGE_TABLE)); | |
10490 | + make_lowmem_page_readonly(swapper_pg_fixmap, XENFEAT_writable_page_tables); | |
10491 | + set_pmd(pmd, __pmd(__pa_symbol(swapper_pg_fixmap) | _PAGE_TABLE)); | |
10492 | } | |
10493 | } | |
10494 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
10495 | +++ sle11-2009-05-14/arch/x86/mm/fault-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
10496 | @@ -0,0 +1,1025 @@ | |
10497 | +/* | |
10498 | + * Copyright (C) 1995 Linus Torvalds | |
10499 | + * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs. | |
10500 | + */ | |
cc90b958 | 10501 | + |
00e5a55c BS |
10502 | +#include <linux/signal.h> |
10503 | +#include <linux/sched.h> | |
10504 | +#include <linux/kernel.h> | |
10505 | +#include <linux/errno.h> | |
10506 | +#include <linux/string.h> | |
10507 | +#include <linux/types.h> | |
10508 | +#include <linux/ptrace.h> | |
10509 | +#include <linux/mman.h> | |
10510 | +#include <linux/mm.h> | |
10511 | +#include <linux/smp.h> | |
10512 | +#include <linux/interrupt.h> | |
10513 | +#include <linux/init.h> | |
10514 | +#include <linux/tty.h> | |
10515 | +#include <linux/vt_kern.h> /* For unblank_screen() */ | |
10516 | +#include <linux/compiler.h> | |
10517 | +#include <linux/highmem.h> | |
10518 | +#include <linux/bootmem.h> /* for max_low_pfn */ | |
10519 | +#include <linux/vmalloc.h> | |
10520 | +#include <linux/module.h> | |
10521 | +#include <linux/kprobes.h> | |
10522 | +#include <linux/uaccess.h> | |
10523 | +#include <linux/kdebug.h> | |
cc90b958 | 10524 | + |
00e5a55c BS |
10525 | +#include <asm/system.h> |
10526 | +#include <asm/desc.h> | |
10527 | +#include <asm/segment.h> | |
10528 | +#include <asm/pgalloc.h> | |
10529 | +#include <asm/smp.h> | |
10530 | +#include <asm/tlbflush.h> | |
10531 | +#include <asm/proto.h> | |
10532 | +#include <asm-generic/sections.h> | |
cc90b958 | 10533 | + |
00e5a55c BS |
10534 | +/* |
10535 | + * Page fault error code bits | |
10536 | + * bit 0 == 0 means no page found, 1 means protection fault | |
10537 | + * bit 1 == 0 means read, 1 means write | |
10538 | + * bit 2 == 0 means kernel, 1 means user-mode | |
10539 | + * bit 3 == 1 means use of reserved bit detected | |
10540 | + * bit 4 == 1 means fault was an instruction fetch | |
10541 | + */ | |
10542 | +#define PF_PROT (1<<0) | |
10543 | +#define PF_WRITE (1<<1) | |
10544 | +#define PF_USER (1<<2) | |
10545 | +#define PF_RSVD (1<<3) | |
10546 | +#define PF_INSTR (1<<4) | |
cc90b958 | 10547 | + |
00e5a55c | 10548 | +static inline int notify_page_fault(struct pt_regs *regs) |
cc90b958 | 10549 | +{ |
00e5a55c BS |
10550 | +#ifdef CONFIG_KPROBES |
10551 | + int ret = 0; | |
cc90b958 | 10552 | + |
00e5a55c | 10553 | + /* kprobe_running() needs smp_processor_id() */ |
cc90b958 | 10554 | +#ifdef CONFIG_X86_32 |
00e5a55c BS |
10555 | + if (!user_mode_vm(regs)) { |
10556 | +#else | |
10557 | + if (!user_mode(regs)) { | |
cc90b958 | 10558 | +#endif |
00e5a55c BS |
10559 | + preempt_disable(); |
10560 | + if (kprobe_running() && kprobe_fault_handler(regs, 14)) | |
10561 | + ret = 1; | |
10562 | + preempt_enable(); | |
cc90b958 | 10563 | + } |
00e5a55c BS |
10564 | + |
10565 | + return ret; | |
10566 | +#else | |
10567 | + return 0; | |
cc90b958 | 10568 | +#endif |
00e5a55c BS |
10569 | +} |
10570 | + | |
10571 | +/* | |
10572 | + * X86_32 | |
10573 | + * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. | |
10574 | + * Check that here and ignore it. | |
10575 | + * | |
10576 | + * X86_64 | |
10577 | + * Sometimes the CPU reports invalid exceptions on prefetch. | |
10578 | + * Check that here and ignore it. | |
10579 | + * | |
10580 | + * Opcode checker based on code by Richard Brunner | |
10581 | + */ | |
10582 | +static int is_prefetch(struct pt_regs *regs, unsigned long addr, | |
10583 | + unsigned long error_code) | |
10584 | +{ | |
10585 | + unsigned char *instr; | |
10586 | + int scan_more = 1; | |
10587 | + int prefetch = 0; | |
10588 | + unsigned char *max_instr; | |
cc90b958 BS |
10589 | + |
10590 | + /* | |
00e5a55c BS |
10591 | + * If it was a exec (instruction fetch) fault on NX page, then |
10592 | + * do not ignore the fault: | |
cc90b958 | 10593 | + */ |
00e5a55c BS |
10594 | + if (error_code & PF_INSTR) |
10595 | + return 0; | |
cc90b958 | 10596 | + |
00e5a55c BS |
10597 | + instr = (unsigned char *)convert_ip_to_linear(current, regs); |
10598 | + max_instr = instr + 15; | |
cc90b958 | 10599 | + |
00e5a55c BS |
10600 | + if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) |
10601 | + return 0; | |
cc90b958 | 10602 | + |
00e5a55c BS |
10603 | + while (scan_more && instr < max_instr) { |
10604 | + unsigned char opcode; | |
10605 | + unsigned char instr_hi; | |
10606 | + unsigned char instr_lo; | |
cc90b958 | 10607 | + |
00e5a55c BS |
10608 | + if (probe_kernel_address(instr, opcode)) |
10609 | + break; | |
cc90b958 | 10610 | + |
00e5a55c BS |
10611 | + instr_hi = opcode & 0xf0; |
10612 | + instr_lo = opcode & 0x0f; | |
10613 | + instr++; | |
cc90b958 | 10614 | + |
00e5a55c BS |
10615 | + switch (instr_hi) { |
10616 | + case 0x20: | |
10617 | + case 0x30: | |
10618 | + /* | |
10619 | + * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. | |
10620 | + * In X86_64 long mode, the CPU will signal invalid | |
10621 | + * opcode if some of these prefixes are present so | |
10622 | + * X86_64 will never get here anyway | |
10623 | + */ | |
10624 | + scan_more = ((instr_lo & 7) == 0x6); | |
10625 | + break; | |
10626 | +#ifdef CONFIG_X86_64 | |
10627 | + case 0x40: | |
10628 | + /* | |
10629 | + * In AMD64 long mode 0x40..0x4F are valid REX prefixes | |
10630 | + * Need to figure out under what instruction mode the | |
10631 | + * instruction was issued. Could check the LDT for lm, | |
10632 | + * but for now it's good enough to assume that long | |
10633 | + * mode only uses well known segments or kernel. | |
10634 | + */ | |
10635 | + scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS); | |
10636 | + break; | |
10637 | +#endif | |
10638 | + case 0x60: | |
10639 | + /* 0x64 thru 0x67 are valid prefixes in all modes. */ | |
10640 | + scan_more = (instr_lo & 0xC) == 0x4; | |
10641 | + break; | |
10642 | + case 0xF0: | |
10643 | + /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */ | |
10644 | + scan_more = !instr_lo || (instr_lo>>1) == 1; | |
10645 | + break; | |
10646 | + case 0x00: | |
10647 | + /* Prefetch instruction is 0x0F0D or 0x0F18 */ | |
10648 | + scan_more = 0; | |
10649 | + | |
10650 | + if (probe_kernel_address(instr, opcode)) | |
10651 | + break; | |
10652 | + prefetch = (instr_lo == 0xF) && | |
10653 | + (opcode == 0x0D || opcode == 0x18); | |
10654 | + break; | |
10655 | + default: | |
10656 | + scan_more = 0; | |
10657 | + break; | |
10658 | + } | |
cc90b958 | 10659 | + } |
00e5a55c BS |
10660 | + return prefetch; |
10661 | +} | |
cc90b958 | 10662 | + |
00e5a55c BS |
10663 | +static void force_sig_info_fault(int si_signo, int si_code, |
10664 | + unsigned long address, struct task_struct *tsk) | |
10665 | +{ | |
10666 | + siginfo_t info; | |
cc90b958 | 10667 | + |
00e5a55c BS |
10668 | + info.si_signo = si_signo; |
10669 | + info.si_errno = 0; | |
10670 | + info.si_code = si_code; | |
10671 | + info.si_addr = (void __user *)address; | |
10672 | + force_sig_info(si_signo, &info, tsk); | |
10673 | +} | |
cc90b958 | 10674 | + |
00e5a55c BS |
10675 | +#ifdef CONFIG_X86_64 |
10676 | +static int bad_address(void *p) | |
10677 | +{ | |
10678 | + unsigned long dummy; | |
10679 | + return probe_kernel_address((unsigned long *)p, dummy); | |
10680 | +} | |
10681 | +#endif | |
cc90b958 | 10682 | + |
00e5a55c BS |
10683 | +static void dump_pagetable(unsigned long address) |
10684 | +{ | |
10685 | +#ifdef CONFIG_X86_32 | |
10686 | + __typeof__(pte_val(__pte(0))) page; | |
cc90b958 | 10687 | + |
00e5a55c BS |
10688 | + page = read_cr3(); |
10689 | + page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT]; | |
10690 | +#ifdef CONFIG_X86_PAE | |
10691 | + printk("*pdpt = %016Lx ", page); | |
10692 | + if ((page & _PAGE_PRESENT) | |
10693 | + && mfn_to_local_pfn(page >> PAGE_SHIFT) < max_low_pfn) { | |
10694 | + page = mfn_to_pfn(page >> PAGE_SHIFT); | |
10695 | + page <<= PAGE_SHIFT; | |
10696 | + page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT) | |
10697 | + & (PTRS_PER_PMD - 1)]; | |
10698 | + printk(KERN_CONT "*pde = %016Lx ", page); | |
10699 | + page &= ~_PAGE_NX; | |
10700 | + } | |
10701 | +#else | |
10702 | + printk("*pde = %08lx ", page); | |
10703 | +#endif | |
cc90b958 BS |
10704 | + |
10705 | + /* | |
00e5a55c BS |
10706 | + * We must not directly access the pte in the highpte |
10707 | + * case if the page table is located in highmem. | |
10708 | + * And let's rather not kmap-atomic the pte, just in case | |
10709 | + * it's allocated already. | |
cc90b958 | 10710 | + */ |
00e5a55c BS |
10711 | + if ((page & _PAGE_PRESENT) |
10712 | + && mfn_to_local_pfn(page >> PAGE_SHIFT) < max_low_pfn | |
10713 | + && !(page & _PAGE_PSE)) { | |
10714 | + page = mfn_to_pfn(page >> PAGE_SHIFT); | |
10715 | + page <<= PAGE_SHIFT; | |
10716 | + page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT) | |
10717 | + & (PTRS_PER_PTE - 1)]; | |
10718 | + printk(KERN_CONT "*pte = %0*Lx ", sizeof(page)*2, (u64)page); | |
cc90b958 BS |
10719 | + } |
10720 | + | |
00e5a55c BS |
10721 | + printk(KERN_CONT "\n"); |
10722 | +#else /* CONFIG_X86_64 */ | |
10723 | + pgd_t *pgd; | |
10724 | + pud_t *pud; | |
10725 | + pmd_t *pmd; | |
10726 | + pte_t *pte; | |
cc90b958 | 10727 | + |
00e5a55c | 10728 | + pgd = (pgd_t *)read_cr3(); |
cc90b958 | 10729 | + |
00e5a55c BS |
10730 | + pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); |
10731 | + pgd += pgd_index(address); | |
10732 | + if (bad_address(pgd)) goto bad; | |
10733 | + printk("PGD %lx ", pgd_val(*pgd)); | |
10734 | + if (!pgd_present(*pgd)) goto ret; | |
cc90b958 | 10735 | + |
00e5a55c BS |
10736 | + pud = pud_offset(pgd, address); |
10737 | + if (bad_address(pud)) goto bad; | |
10738 | + printk(KERN_CONT "PUD %lx ", pud_val(*pud)); | |
10739 | + if (!pud_present(*pud) || pud_large(*pud)) | |
10740 | + goto ret; | |
cc90b958 | 10741 | + |
00e5a55c BS |
10742 | + pmd = pmd_offset(pud, address); |
10743 | + if (bad_address(pmd)) goto bad; | |
10744 | + printk(KERN_CONT "PMD %lx ", pmd_val(*pmd)); | |
10745 | + if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret; | |
10746 | + | |
10747 | + pte = pte_offset_kernel(pmd, address); | |
10748 | + if (bad_address(pte)) goto bad; | |
10749 | + printk(KERN_CONT "PTE %lx", pte_val(*pte)); | |
10750 | +ret: | |
10751 | + printk(KERN_CONT "\n"); | |
10752 | + return; | |
10753 | +bad: | |
10754 | + printk("BAD\n"); | |
10755 | +#endif | |
10756 | +} | |
cc90b958 | 10757 | + |
00e5a55c BS |
10758 | +#ifdef CONFIG_X86_32 |
10759 | +static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) | |
10760 | +{ | |
10761 | + unsigned index = pgd_index(address); | |
10762 | + pgd_t *pgd_k; | |
10763 | + pud_t *pud, *pud_k; | |
10764 | + pmd_t *pmd, *pmd_k; | |
cc90b958 | 10765 | + |
00e5a55c BS |
10766 | + pgd += index; |
10767 | + pgd_k = init_mm.pgd + index; | |
cc90b958 | 10768 | + |
00e5a55c BS |
10769 | + if (!pgd_present(*pgd_k)) |
10770 | + return NULL; | |
cc90b958 BS |
10771 | + |
10772 | + /* | |
00e5a55c BS |
10773 | + * set_pgd(pgd, *pgd_k); here would be useless on PAE |
10774 | + * and redundant with the set_pmd() on non-PAE. As would | |
10775 | + * set_pud. | |
cc90b958 | 10776 | + */ |
cc90b958 | 10777 | + |
00e5a55c BS |
10778 | + pud = pud_offset(pgd, address); |
10779 | + pud_k = pud_offset(pgd_k, address); | |
10780 | + if (!pud_present(*pud_k)) | |
10781 | + return NULL; | |
cc90b958 | 10782 | + |
00e5a55c BS |
10783 | + pmd = pmd_offset(pud, address); |
10784 | + pmd_k = pmd_offset(pud_k, address); | |
10785 | + if (!pmd_present(*pmd_k)) | |
10786 | + return NULL; | |
10787 | + if (!pmd_present(*pmd)) { | |
10788 | + bool lazy = x86_read_percpu(xen_lazy_mmu); | |
cc90b958 | 10789 | + |
00e5a55c BS |
10790 | + x86_write_percpu(xen_lazy_mmu, false); |
10791 | +#if CONFIG_XEN_COMPAT > 0x030002 | |
10792 | + set_pmd(pmd, *pmd_k); | |
10793 | +#else | |
cc90b958 | 10794 | + /* |
00e5a55c BS |
10795 | + * When running on older Xen we must launder *pmd_k through |
10796 | + * pmd_val() to ensure that _PAGE_PRESENT is correctly set. | |
cc90b958 | 10797 | + */ |
00e5a55c BS |
10798 | + set_pmd(pmd, __pmd(pmd_val(*pmd_k))); |
10799 | +#endif | |
10800 | + x86_write_percpu(xen_lazy_mmu, lazy); | |
10801 | + } else | |
10802 | + BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); | |
10803 | + return pmd_k; | |
10804 | +} | |
10805 | +#endif | |
cc90b958 | 10806 | + |
00e5a55c BS |
10807 | +#ifdef CONFIG_X86_64 |
10808 | +static const char errata93_warning[] = | |
10809 | +KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" | |
10810 | +KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" | |
10811 | +KERN_ERR "******* Please consider a BIOS update.\n" | |
10812 | +KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"; | |
10813 | +#endif | |
cc90b958 | 10814 | + |
00e5a55c BS |
10815 | +/* Workaround for K8 erratum #93 & buggy BIOS. |
10816 | + BIOS SMM functions are required to use a specific workaround | |
10817 | + to avoid corruption of the 64bit RIP register on C stepping K8. | |
10818 | + A lot of BIOS that didn't get tested properly miss this. | |
10819 | + The OS sees this as a page fault with the upper 32bits of RIP cleared. | |
10820 | + Try to work around it here. | |
10821 | + Note we only handle faults in kernel here. | |
10822 | + Does nothing for X86_32 | |
10823 | + */ | |
10824 | +static int is_errata93(struct pt_regs *regs, unsigned long address) | |
10825 | +{ | |
10826 | +#ifdef CONFIG_X86_64 | |
10827 | + static int warned; | |
10828 | + if (address != regs->ip) | |
10829 | + return 0; | |
10830 | + if ((address >> 32) != 0) | |
10831 | + return 0; | |
10832 | + address |= 0xffffffffUL << 32; | |
10833 | + if ((address >= (u64)_stext && address <= (u64)_etext) || | |
10834 | + (address >= MODULES_VADDR && address <= MODULES_END)) { | |
10835 | + if (!warned) { | |
10836 | + printk(errata93_warning); | |
10837 | + warned = 1; | |
cc90b958 | 10838 | + } |
00e5a55c BS |
10839 | + regs->ip = address; |
10840 | + return 1; | |
cc90b958 | 10841 | + } |
00e5a55c | 10842 | +#endif |
cc90b958 BS |
10843 | + return 0; |
10844 | +} | |
10845 | + | |
00e5a55c BS |
10846 | +/* |
10847 | + * Work around K8 erratum #100 K8 in compat mode occasionally jumps to illegal | |
10848 | + * addresses >4GB. We catch this in the page fault handler because these | |
10849 | + * addresses are not reachable. Just detect this case and return. Any code | |
10850 | + * segment in LDT is compatibility mode. | |
10851 | + */ | |
10852 | +static int is_errata100(struct pt_regs *regs, unsigned long address) | |
cc90b958 | 10853 | +{ |
00e5a55c BS |
10854 | +#ifdef CONFIG_X86_64 |
10855 | + if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && | |
10856 | + (address >> 32)) | |
10857 | + return 1; | |
10858 | +#endif | |
10859 | + return 0; | |
cc90b958 BS |
10860 | +} |
10861 | + | |
00e5a55c | 10862 | +void do_invalid_op(struct pt_regs *, unsigned long); |
cc90b958 | 10863 | + |
00e5a55c BS |
10864 | +static int is_f00f_bug(struct pt_regs *regs, unsigned long address) |
10865 | +{ | |
10866 | +#ifdef CONFIG_X86_F00F_BUG | |
10867 | + unsigned long nr; | |
cc90b958 | 10868 | + /* |
00e5a55c | 10869 | + * Pentium F0 0F C7 C8 bug workaround. |
cc90b958 | 10870 | + */ |
00e5a55c BS |
10871 | + if (boot_cpu_data.f00f_bug) { |
10872 | + nr = (address - idt_descr.address) >> 3; | |
cc90b958 | 10873 | + |
00e5a55c BS |
10874 | + if (nr == 6) { |
10875 | + do_invalid_op(regs, 0); | |
10876 | + return 1; | |
10877 | + } | |
cc90b958 | 10878 | + } |
00e5a55c BS |
10879 | +#endif |
10880 | + return 0; | |
10881 | +} | |
cc90b958 | 10882 | + |
00e5a55c BS |
10883 | +static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, |
10884 | + unsigned long address) | |
10885 | +{ | |
10886 | +#ifdef CONFIG_X86_32 | |
10887 | + if (!oops_may_print()) | |
10888 | + return; | |
10889 | +#endif | |
cc90b958 | 10890 | + |
00e5a55c BS |
10891 | +#ifdef CONFIG_X86_PAE |
10892 | + if (error_code & PF_INSTR) { | |
10893 | + unsigned int level; | |
10894 | + pte_t *pte = lookup_address(address, &level); | |
cc90b958 | 10895 | + |
00e5a55c BS |
10896 | + if (pte && pte_present(*pte) && !pte_exec(*pte)) |
10897 | + printk(KERN_CRIT "kernel tried to execute " | |
10898 | + "NX-protected page - exploit attempt? " | |
10899 | + "(uid: %d)\n", current->uid); | |
10900 | + } | |
10901 | +#endif | |
cc90b958 | 10902 | + |
00e5a55c BS |
10903 | + printk(KERN_ALERT "BUG: unable to handle kernel "); |
10904 | + if (address < PAGE_SIZE) | |
10905 | + printk(KERN_CONT "NULL pointer dereference"); | |
cc90b958 | 10906 | + else |
00e5a55c BS |
10907 | + printk(KERN_CONT "paging request"); |
10908 | +#ifdef CONFIG_X86_32 | |
10909 | + printk(KERN_CONT " at %08lx\n", address); | |
10910 | +#else | |
10911 | + printk(KERN_CONT " at %016lx\n", address); | |
10912 | +#endif | |
10913 | + printk(KERN_ALERT "IP:"); | |
10914 | + printk_address(regs->ip, 1); | |
10915 | + dump_pagetable(address); | |
cc90b958 BS |
10916 | +} |
10917 | + | |
00e5a55c BS |
10918 | +#ifdef CONFIG_X86_64 |
10919 | +static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, | |
10920 | + unsigned long error_code) | |
cc90b958 | 10921 | +{ |
00e5a55c BS |
10922 | + unsigned long flags = oops_begin(); |
10923 | + struct task_struct *tsk; | |
10924 | + | |
10925 | + printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", | |
10926 | + current->comm, address); | |
10927 | + dump_pagetable(address); | |
10928 | + tsk = current; | |
10929 | + tsk->thread.cr2 = address; | |
10930 | + tsk->thread.trap_no = 14; | |
10931 | + tsk->thread.error_code = error_code; | |
10932 | + if (__die("Bad pagetable", regs, error_code)) | |
10933 | + regs = NULL; | |
10934 | + oops_end(flags, regs, SIGKILL); | |
cc90b958 | 10935 | +} |
00e5a55c | 10936 | +#endif |
cc90b958 | 10937 | + |
00e5a55c | 10938 | +static int spurious_fault_check(unsigned long error_code, pte_t *pte) |
cc90b958 | 10939 | +{ |
00e5a55c BS |
10940 | + if ((error_code & PF_WRITE) && !pte_write(*pte)) |
10941 | + return 0; | |
10942 | + if ((error_code & PF_INSTR) && !pte_exec(*pte)) | |
10943 | + return 0; | |
cc90b958 | 10944 | + |
00e5a55c | 10945 | + return 1; |
cc90b958 | 10946 | +} |
cc90b958 | 10947 | + |
00e5a55c BS |
10948 | +/* |
10949 | + * Handle a spurious fault caused by a stale TLB entry. This allows | |
10950 | + * us to lazily refresh the TLB when increasing the permissions of a | |
10951 | + * kernel page (RO -> RW or NX -> X). Doing it eagerly is very | |
10952 | + * expensive since that implies doing a full cross-processor TLB | |
10953 | + * flush, even if no stale TLB entries exist on other processors. | |
10954 | + * There are no security implications to leaving a stale TLB when | |
10955 | + * increasing the permissions on a page. | |
10956 | + */ | |
10957 | +static int spurious_fault(unsigned long address, | |
10958 | + unsigned long error_code) | |
cc90b958 | 10959 | +{ |
00e5a55c BS |
10960 | + pgd_t *pgd; |
10961 | + pud_t *pud; | |
10962 | + pmd_t *pmd; | |
10963 | + pte_t *pte; | |
cc90b958 | 10964 | + |
00e5a55c BS |
10965 | + /* Reserved-bit violation or user access to kernel space? */ |
10966 | + if (error_code & (PF_USER | PF_RSVD)) | |
10967 | + return 0; | |
cc90b958 | 10968 | + |
00e5a55c BS |
10969 | + pgd = init_mm.pgd + pgd_index(address); |
10970 | + if (!pgd_present(*pgd)) | |
10971 | + return 0; | |
cc90b958 | 10972 | + |
00e5a55c BS |
10973 | + pud = pud_offset(pgd, address); |
10974 | + if (!pud_present(*pud)) | |
10975 | + return 0; | |
cc90b958 | 10976 | + |
00e5a55c BS |
10977 | + if (pud_large(*pud)) |
10978 | + return spurious_fault_check(error_code, (pte_t *) pud); | |
cc90b958 | 10979 | + |
00e5a55c BS |
10980 | + pmd = pmd_offset(pud, address); |
10981 | + if (!pmd_present(*pmd)) | |
10982 | + return 0; | |
cc90b958 | 10983 | + |
00e5a55c BS |
10984 | + if (pmd_large(*pmd)) |
10985 | + return spurious_fault_check(error_code, (pte_t *) pmd); | |
cc90b958 | 10986 | + |
00e5a55c BS |
10987 | + pte = pte_offset_kernel(pmd, address); |
10988 | + if (!pte_present(*pte)) | |
10989 | + return 0; | |
10990 | + | |
10991 | + return spurious_fault_check(error_code, pte); | |
cc90b958 | 10992 | +} |
cc90b958 | 10993 | + |
00e5a55c BS |
10994 | +/* |
10995 | + * X86_32 | |
10996 | + * Handle a fault on the vmalloc or module mapping area | |
10997 | + * | |
10998 | + * X86_64 | |
10999 | + * Handle a fault on the vmalloc area | |
11000 | + * | |
11001 | + * This assumes no large pages in there. | |
11002 | + */ | |
11003 | +static int vmalloc_fault(unsigned long address) | |
cc90b958 | 11004 | +{ |
00e5a55c BS |
11005 | +#ifdef CONFIG_X86_32 |
11006 | + unsigned long pgd_paddr; | |
11007 | + pmd_t *pmd_k; | |
11008 | + pte_t *pte_k; | |
11009 | + /* | |
11010 | + * Synchronize this task's top level page-table | |
11011 | + * with the 'reference' page table. | |
11012 | + * | |
11013 | + * Do _not_ use "current" here. We might be inside | |
11014 | + * an interrupt in the middle of a task switch.. | |
11015 | + */ | |
11016 | + pgd_paddr = read_cr3(); | |
11017 | + pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); | |
11018 | + if (!pmd_k) | |
11019 | + return -1; | |
11020 | + pte_k = pte_offset_kernel(pmd_k, address); | |
11021 | + if (!pte_present(*pte_k)) | |
11022 | + return -1; | |
11023 | + return 0; | |
11024 | +#else | |
11025 | + pgd_t *pgd, *pgd_ref; | |
11026 | + pud_t *pud, *pud_ref; | |
11027 | + pmd_t *pmd, *pmd_ref; | |
11028 | + pte_t *pte, *pte_ref; | |
cc90b958 | 11029 | + |
00e5a55c BS |
11030 | + /* Make sure we are in vmalloc area */ |
11031 | + if (!(address >= VMALLOC_START && address < VMALLOC_END)) | |
11032 | + return -1; | |
cc90b958 | 11033 | + |
00e5a55c BS |
11034 | + /* Copy kernel mappings over when needed. This can also |
11035 | + happen within a race in page table update. In the later | |
11036 | + case just flush. */ | |
cc90b958 | 11037 | + |
00e5a55c BS |
11038 | + /* On Xen the line below does not always work. Needs investigating! */ |
11039 | + /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/ | |
11040 | + pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK); | |
11041 | + pgd += pgd_index(address); | |
11042 | + pgd_ref = pgd_offset_k(address); | |
11043 | + if (pgd_none(*pgd_ref)) | |
11044 | + return -1; | |
11045 | + if (pgd_none(*pgd)) | |
11046 | + set_pgd(pgd, *pgd_ref); | |
11047 | + else | |
11048 | + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); | |
cc90b958 | 11049 | + |
00e5a55c BS |
11050 | + /* Below here mismatches are bugs because these lower tables |
11051 | + are shared */ | |
cc90b958 | 11052 | + |
00e5a55c BS |
11053 | + pud = pud_offset(pgd, address); |
11054 | + pud_ref = pud_offset(pgd_ref, address); | |
11055 | + if (pud_none(*pud_ref)) | |
11056 | + return -1; | |
11057 | + if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref)) | |
11058 | + BUG(); | |
11059 | + pmd = pmd_offset(pud, address); | |
11060 | + pmd_ref = pmd_offset(pud_ref, address); | |
11061 | + if (pmd_none(*pmd_ref)) | |
11062 | + return -1; | |
11063 | + if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref)) | |
11064 | + BUG(); | |
11065 | + pte_ref = pte_offset_kernel(pmd_ref, address); | |
11066 | + if (!pte_present(*pte_ref)) | |
11067 | + return -1; | |
11068 | + pte = pte_offset_kernel(pmd, address); | |
11069 | + /* Don't use pte_page here, because the mappings can point | |
11070 | + outside mem_map, and the NUMA hash lookup cannot handle | |
11071 | + that. */ | |
11072 | + if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) | |
11073 | + BUG(); | |
11074 | + return 0; | |
11075 | +#endif | |
cc90b958 | 11076 | +} |
cc90b958 | 11077 | + |
00e5a55c BS |
11078 | +int show_unhandled_signals = 1; |
11079 | + | |
11080 | +/* | |
11081 | + * This routine handles page faults. It determines the address, | |
11082 | + * and the problem, and then passes it off to one of the appropriate | |
11083 | + * routines. | |
11084 | + */ | |
11085 | +#ifdef CONFIG_X86_64 | |
11086 | +asmlinkage | |
11087 | +#endif | |
11088 | +void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |
cc90b958 | 11089 | +{ |
00e5a55c BS |
11090 | + struct task_struct *tsk; |
11091 | + struct mm_struct *mm; | |
11092 | + struct vm_area_struct *vma; | |
11093 | + unsigned long address; | |
11094 | + int write, si_code; | |
11095 | + int fault; | |
11096 | +#ifdef CONFIG_X86_64 | |
11097 | + unsigned long flags; | |
11098 | +#endif | |
cc90b958 | 11099 | + |
00e5a55c BS |
11100 | + /* |
11101 | + * We can fault from pretty much anywhere, with unknown IRQ state. | |
11102 | + */ | |
11103 | + trace_hardirqs_fixup(); | |
cc90b958 | 11104 | + |
00e5a55c BS |
11105 | + /* Set the "privileged fault" bit to something sane. */ |
11106 | + if (user_mode_vm(regs)) | |
11107 | + error_code |= PF_USER; | |
11108 | + else | |
11109 | + error_code &= ~PF_USER; | |
cc90b958 | 11110 | + |
00e5a55c BS |
11111 | + tsk = current; |
11112 | + mm = tsk->mm; | |
11113 | + prefetchw(&mm->mmap_sem); | |
11114 | + | |
11115 | + /* get the address */ | |
11116 | + address = read_cr2(); | |
11117 | + | |
11118 | + si_code = SEGV_MAPERR; | |
11119 | + | |
11120 | + if (notify_page_fault(regs)) | |
11121 | + return; | |
11122 | + | |
11123 | + /* | |
11124 | + * We fault-in kernel-space virtual memory on-demand. The | |
11125 | + * 'reference' page table is init_mm.pgd. | |
11126 | + * | |
11127 | + * NOTE! We MUST NOT take any locks for this case. We may | |
11128 | + * be in an interrupt or a critical region, and should | |
11129 | + * only copy the information from the master page table, | |
11130 | + * nothing more. | |
11131 | + * | |
11132 | + * This verifies that the fault happens in kernel space | |
11133 | + * (error_code & 4) == 0, and that the fault was not a | |
11134 | + * protection error (error_code & 9) == 0. | |
11135 | + */ | |
11136 | +#ifdef CONFIG_X86_32 | |
11137 | + if (unlikely(address >= TASK_SIZE)) { | |
11138 | +#else | |
11139 | + if (unlikely(address >= TASK_SIZE64)) { | |
11140 | +#endif | |
11141 | + /* Faults in hypervisor area can never be patched up. */ | |
11142 | +#if defined(CONFIG_X86_XEN) | |
11143 | + if (address >= hypervisor_virt_start) | |
11144 | + goto bad_area_nosemaphore; | |
11145 | +#elif defined(CONFIG_X86_64_XEN) | |
11146 | + if (address >= HYPERVISOR_VIRT_START | |
11147 | + && address < HYPERVISOR_VIRT_END) | |
11148 | + goto bad_area_nosemaphore; | |
11149 | +#endif | |
11150 | + if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && | |
11151 | + vmalloc_fault(address) >= 0) | |
11152 | + return; | |
cc90b958 | 11153 | + |
00e5a55c BS |
11154 | + /* Can handle a stale RO->RW TLB */ |
11155 | + if (spurious_fault(address, error_code)) | |
11156 | + return; | |
cc90b958 | 11157 | + |
00e5a55c BS |
11158 | + /* |
11159 | + * Don't take the mm semaphore here. If we fixup a prefetch | |
11160 | + * fault we could otherwise deadlock. | |
11161 | + */ | |
11162 | + goto bad_area_nosemaphore; | |
11163 | + } | |
cc90b958 | 11164 | + |
cc90b958 | 11165 | + |
00e5a55c BS |
11166 | +#ifdef CONFIG_X86_32 |
11167 | + /* It's safe to allow irq's after cr2 has been saved and the vmalloc | |
11168 | + fault has been handled. */ | |
11169 | + if (regs->flags & (X86_EFLAGS_IF|VM_MASK)) | |
11170 | + local_irq_enable(); | |
cc90b958 | 11171 | + |
00e5a55c BS |
11172 | + /* |
11173 | + * If we're in an interrupt, have no user context or are running in an | |
11174 | + * atomic region then we must not take the fault. | |
11175 | + */ | |
11176 | + if (in_atomic() || !mm) | |
11177 | + goto bad_area_nosemaphore; | |
11178 | +#else /* CONFIG_X86_64 */ | |
11179 | + if (likely(regs->flags & X86_EFLAGS_IF)) | |
11180 | + local_irq_enable(); | |
cc90b958 | 11181 | + |
00e5a55c BS |
11182 | + if (unlikely(error_code & PF_RSVD)) |
11183 | + pgtable_bad(address, regs, error_code); | |
cc90b958 BS |
11184 | + |
11185 | + /* | |
00e5a55c BS |
11186 | + * If we're in an interrupt, have no user context or are running in an |
11187 | + * atomic region then we must not take the fault. | |
cc90b958 | 11188 | + */ |
00e5a55c BS |
11189 | + if (unlikely(in_atomic() || !mm)) |
11190 | + goto bad_area_nosemaphore; | |
cc90b958 BS |
11191 | + |
11192 | + /* | |
00e5a55c BS |
11193 | + * User-mode registers count as a user access even for any |
11194 | + * potential system fault or CPU buglet. | |
cc90b958 | 11195 | + */ |
00e5a55c BS |
11196 | + if (user_mode_vm(regs)) |
11197 | + error_code |= PF_USER; | |
11198 | +again: | |
11199 | +#endif | |
11200 | + /* When running in the kernel we expect faults to occur only to | |
11201 | + * addresses in user space. All other faults represent errors in the | |
11202 | + * kernel and should generate an OOPS. Unfortunately, in the case of an | |
11203 | + * erroneous fault occurring in a code path which already holds mmap_sem | |
11204 | + * we will deadlock attempting to validate the fault against the | |
11205 | + * address space. Luckily the kernel only validly references user | |
11206 | + * space from well defined areas of code, which are listed in the | |
11207 | + * exceptions table. | |
11208 | + * | |
11209 | + * As the vast majority of faults will be valid we will only perform | |
11210 | + * the source reference check when there is a possibility of a deadlock. | |
11211 | + * Attempt to lock the address space, if we cannot we then validate the | |
11212 | + * source. If this is invalid we can skip the address space check, | |
11213 | + * thus avoiding the deadlock. | |
11214 | + */ | |
11215 | + if (!down_read_trylock(&mm->mmap_sem)) { | |
11216 | + if ((error_code & PF_USER) == 0 && | |
11217 | + !search_exception_tables(regs->ip)) | |
11218 | + goto bad_area_nosemaphore; | |
11219 | + down_read(&mm->mmap_sem); | |
11220 | + } | |
11221 | + | |
11222 | + vma = find_vma(mm, address); | |
11223 | + if (!vma) | |
11224 | + goto bad_area; | |
11225 | + if (vma->vm_start <= address) | |
11226 | + goto good_area; | |
11227 | + if (!(vma->vm_flags & VM_GROWSDOWN)) | |
11228 | + goto bad_area; | |
11229 | + if (error_code & PF_USER) { | |
11230 | + /* | |
11231 | + * Accessing the stack below %sp is always a bug. | |
11232 | + * The large cushion allows instructions like enter | |
11233 | + * and pusha to work. ("enter $65535,$31" pushes | |
11234 | + * 32 pointers and then decrements %sp by 65535.) | |
11235 | + */ | |
11236 | + if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp) | |
11237 | + goto bad_area; | |
11238 | + } | |
11239 | + if (expand_stack(vma, address)) | |
11240 | + goto bad_area; | |
11241 | +/* | |
11242 | + * Ok, we have a good vm_area for this memory access, so | |
11243 | + * we can handle it.. | |
11244 | + */ | |
11245 | +good_area: | |
11246 | + si_code = SEGV_ACCERR; | |
11247 | + write = 0; | |
11248 | + switch (error_code & (PF_PROT|PF_WRITE)) { | |
11249 | + default: /* 3: write, present */ | |
11250 | + /* fall through */ | |
11251 | + case PF_WRITE: /* write, not present */ | |
11252 | + if (!(vma->vm_flags & VM_WRITE)) | |
11253 | + goto bad_area; | |
11254 | + write++; | |
11255 | + break; | |
11256 | + case PF_PROT: /* read, present */ | |
11257 | + goto bad_area; | |
11258 | + case 0: /* read, not present */ | |
11259 | + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) | |
11260 | + goto bad_area; | |
11261 | + } | |
cc90b958 | 11262 | + |
00e5a55c BS |
11263 | +#ifdef CONFIG_X86_32 |
11264 | +survive: | |
11265 | +#endif | |
cc90b958 | 11266 | + /* |
00e5a55c BS |
11267 | + * If for any reason at all we couldn't handle the fault, |
11268 | + * make sure we exit gracefully rather than endlessly redo | |
11269 | + * the fault. | |
cc90b958 | 11270 | + */ |
00e5a55c BS |
11271 | + fault = handle_mm_fault(mm, vma, address, write); |
11272 | + if (unlikely(fault & VM_FAULT_ERROR)) { | |
11273 | + if (fault & VM_FAULT_OOM) | |
11274 | + goto out_of_memory; | |
11275 | + else if (fault & VM_FAULT_SIGBUS) | |
11276 | + goto do_sigbus; | |
11277 | + BUG(); | |
11278 | + } | |
11279 | + if (fault & VM_FAULT_MAJOR) | |
11280 | + tsk->maj_flt++; | |
11281 | + else | |
11282 | + tsk->min_flt++; | |
cc90b958 | 11283 | + |
00e5a55c | 11284 | +#ifdef CONFIG_X86_32 |
cc90b958 | 11285 | + /* |
00e5a55c | 11286 | + * Did it hit the DOS screen memory VA from vm86 mode? |
cc90b958 | 11287 | + */ |
00e5a55c BS |
11288 | + if (v8086_mode(regs)) { |
11289 | + unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT; | |
11290 | + if (bit < 32) | |
11291 | + tsk->thread.screen_bitmap |= 1 << bit; | |
11292 | + } | |
11293 | +#endif | |
11294 | + up_read(&mm->mmap_sem); | |
11295 | + return; | |
cc90b958 | 11296 | + |
00e5a55c BS |
11297 | +/* |
11298 | + * Something tried to access memory that isn't in our memory map.. | |
11299 | + * Fix it, but check if it's kernel or user first.. | |
11300 | + */ | |
11301 | +bad_area: | |
11302 | + up_read(&mm->mmap_sem); | |
cc90b958 | 11303 | + |
00e5a55c BS |
11304 | +bad_area_nosemaphore: |
11305 | + /* User mode accesses just cause a SIGSEGV */ | |
11306 | + if (error_code & PF_USER) { | |
11307 | + /* | |
11308 | + * It's possible to have interrupts off here. | |
11309 | + */ | |
11310 | + local_irq_enable(); | |
cc90b958 | 11311 | + |
00e5a55c BS |
11312 | + /* |
11313 | + * Valid to do another page fault here because this one came | |
11314 | + * from user space. | |
11315 | + */ | |
11316 | + if (is_prefetch(regs, address, error_code)) | |
11317 | + return; | |
cc90b958 | 11318 | + |
00e5a55c BS |
11319 | + if (is_errata100(regs, address)) |
11320 | + return; | |
cc90b958 | 11321 | + |
00e5a55c BS |
11322 | + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && |
11323 | + printk_ratelimit()) { | |
11324 | + printk( | |
11325 | +#ifdef CONFIG_X86_32 | |
11326 | + "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx", | |
11327 | +#else | |
11328 | + "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx", | |
11329 | +#endif | |
11330 | + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, | |
11331 | + tsk->comm, task_pid_nr(tsk), address, regs->ip, | |
11332 | + regs->sp, error_code); | |
11333 | + print_vma_addr(" in ", regs->ip); | |
11334 | + printk("\n"); | |
11335 | + } | |
cc90b958 | 11336 | + |
00e5a55c BS |
11337 | + tsk->thread.cr2 = address; |
11338 | + /* Kernel addresses are always protection faults */ | |
11339 | + tsk->thread.error_code = error_code | (address >= TASK_SIZE); | |
11340 | + tsk->thread.trap_no = 14; | |
11341 | + force_sig_info_fault(SIGSEGV, si_code, address, tsk); | |
11342 | + return; | |
11343 | + } | |
cc90b958 | 11344 | + |
00e5a55c BS |
11345 | + if (is_f00f_bug(regs, address)) |
11346 | + return; | |
11347 | + | |
11348 | +no_context: | |
11349 | + /* Are we prepared to handle this kernel fault? */ | |
11350 | + if (fixup_exception(regs)) | |
11351 | + return; | |
11352 | + | |
11353 | + /* | |
11354 | + * X86_32 | |
11355 | + * Valid to do another page fault here, because if this fault | |
11356 | + * had been triggered by is_prefetch fixup_exception would have | |
11357 | + * handled it. | |
11358 | + * | |
11359 | + * X86_64 | |
11360 | + * Hall of shame of CPU/BIOS bugs. | |
11361 | + */ | |
11362 | + if (is_prefetch(regs, address, error_code)) | |
11363 | + return; | |
11364 | + | |
11365 | + if (is_errata93(regs, address)) | |
11366 | + return; | |
11367 | + | |
11368 | +/* | |
11369 | + * Oops. The kernel tried to access some bad page. We'll have to | |
11370 | + * terminate things with extreme prejudice. | |
11371 | + */ | |
11372 | +#ifdef CONFIG_X86_32 | |
11373 | + bust_spinlocks(1); | |
cc90b958 | 11374 | +#else |
00e5a55c | 11375 | + flags = oops_begin(); |
cc90b958 | 11376 | +#endif |
cc90b958 | 11377 | + |
00e5a55c | 11378 | + show_fault_oops(regs, error_code, address); |
cc90b958 | 11379 | + |
00e5a55c BS |
11380 | + tsk->thread.cr2 = address; |
11381 | + tsk->thread.trap_no = 14; | |
11382 | + tsk->thread.error_code = error_code; | |
cc90b958 | 11383 | + |
00e5a55c BS |
11384 | +#ifdef CONFIG_X86_32 |
11385 | + die("Oops", regs, error_code); | |
11386 | + bust_spinlocks(0); | |
11387 | + do_exit(SIGKILL); | |
11388 | +#else | |
11389 | + if (__die("Oops", regs, error_code)) | |
11390 | + regs = NULL; | |
11391 | + /* Executive summary in case the body of the oops scrolled away */ | |
11392 | + printk(KERN_EMERG "CR2: %016lx\n", address); | |
11393 | + oops_end(flags, regs, SIGKILL); | |
11394 | +#endif | |
11395 | + | |
11396 | +/* | |
11397 | + * We ran out of memory, or some other thing happened to us that made | |
11398 | + * us unable to handle the page fault gracefully. | |
11399 | + */ | |
11400 | +out_of_memory: | |
11401 | + up_read(&mm->mmap_sem); | |
11402 | + if (is_global_init(tsk)) { | |
11403 | + yield(); | |
11404 | +#ifdef CONFIG_X86_32 | |
11405 | + down_read(&mm->mmap_sem); | |
11406 | + goto survive; | |
11407 | +#else | |
11408 | + goto again; | |
cc90b958 | 11409 | +#endif |
cc90b958 | 11410 | + } |
cc90b958 | 11411 | + |
00e5a55c BS |
11412 | + printk("VM: killing process %s\n", tsk->comm); |
11413 | + if (error_code & PF_USER) | |
11414 | + do_group_exit(SIGKILL); | |
11415 | + goto no_context; | |
cc90b958 | 11416 | + |
00e5a55c BS |
11417 | +do_sigbus: |
11418 | + up_read(&mm->mmap_sem); | |
cc90b958 | 11419 | + |
00e5a55c BS |
11420 | + /* Kernel mode? Handle exceptions or die */ |
11421 | + if (!(error_code & PF_USER)) | |
11422 | + goto no_context; | |
11423 | +#ifdef CONFIG_X86_32 | |
11424 | + /* User space => ok to do another page fault */ | |
11425 | + if (is_prefetch(regs, address, error_code)) | |
11426 | + return; | |
cc90b958 | 11427 | +#endif |
00e5a55c BS |
11428 | + tsk->thread.cr2 = address; |
11429 | + tsk->thread.error_code = error_code; | |
11430 | + tsk->thread.trap_no = 14; | |
11431 | + force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); | |
cc90b958 BS |
11432 | +} |
11433 | + | |
00e5a55c BS |
11434 | +DEFINE_SPINLOCK(pgd_lock); |
11435 | +LIST_HEAD(pgd_list); | |
cc90b958 | 11436 | + |
00e5a55c | 11437 | +void vmalloc_sync_all(void) |
cc90b958 | 11438 | +{ |
00e5a55c BS |
11439 | +#ifdef CONFIG_X86_32 |
11440 | + /* | |
11441 | + * Note that races in the updates of insync and start aren't | |
11442 | + * problematic: insync can only get set bits added, and updates to | |
11443 | + * start are only improving performance (without affecting correctness | |
11444 | + * if undone). | |
11445 | + * XEN: To work on PAE, we need to iterate over PMDs rather than PGDs. | |
11446 | + * This change works just fine with 2-level paging too. | |
11447 | + */ | |
11448 | +#define sync_index(a) ((a) >> PMD_SHIFT) | |
11449 | + static DECLARE_BITMAP(insync, PTRS_PER_PGD*PTRS_PER_PMD); | |
11450 | + static unsigned long start = TASK_SIZE; | |
11451 | + unsigned long address; | |
cc90b958 | 11452 | + |
00e5a55c | 11453 | + if (SHARED_KERNEL_PMD) |
cc90b958 BS |
11454 | + return; |
11455 | + | |
00e5a55c BS |
11456 | + BUILD_BUG_ON(TASK_SIZE & ~PMD_MASK); |
11457 | + for (address = start; | |
11458 | + address < hypervisor_virt_start; | |
11459 | + address += PMD_SIZE) { | |
11460 | + if (!test_bit(sync_index(address), insync)) { | |
11461 | + unsigned long flags; | |
11462 | + struct page *page; | |
cc90b958 | 11463 | + |
00e5a55c BS |
11464 | + spin_lock_irqsave(&pgd_lock, flags); |
11465 | + /* XEN: failure path assumes non-empty pgd_list. */ | |
11466 | + if (unlikely(list_empty(&pgd_list))) { | |
11467 | + spin_unlock_irqrestore(&pgd_lock, flags); | |
11468 | + return; | |
11469 | + } | |
11470 | + list_for_each_entry(page, &pgd_list, lru) { | |
11471 | + if (!vmalloc_sync_one(page_address(page), | |
11472 | + address)) | |
11473 | + break; | |
11474 | + } | |
11475 | + spin_unlock_irqrestore(&pgd_lock, flags); | |
11476 | + if (!page) | |
11477 | + set_bit(sync_index(address), insync); | |
11478 | + } | |
11479 | + if (address == start && test_bit(sync_index(address), insync)) | |
11480 | + start = address + PMD_SIZE; | |
11481 | + } | |
11482 | +#else /* CONFIG_X86_64 */ | |
11483 | + /* | |
11484 | + * Note that races in the updates of insync and start aren't | |
11485 | + * problematic: insync can only get set bits added, and updates to | |
11486 | + * start are only improving performance (without affecting correctness | |
11487 | + * if undone). | |
11488 | + */ | |
11489 | + static DECLARE_BITMAP(insync, PTRS_PER_PGD); | |
11490 | + static unsigned long start = VMALLOC_START & PGDIR_MASK; | |
11491 | + unsigned long address; | |
cc90b958 | 11492 | + |
00e5a55c BS |
11493 | + for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { |
11494 | + if (!test_bit(pgd_index(address), insync)) { | |
11495 | + const pgd_t *pgd_ref = pgd_offset_k(address); | |
11496 | + unsigned long flags; | |
11497 | + struct page *page; | |
cc90b958 | 11498 | + |
00e5a55c BS |
11499 | + if (pgd_none(*pgd_ref)) |
11500 | + continue; | |
11501 | + spin_lock_irqsave(&pgd_lock, flags); | |
11502 | + list_for_each_entry(page, &pgd_list, lru) { | |
11503 | + pgd_t *pgd; | |
11504 | + pgd = (pgd_t *)page_address(page) + pgd_index(address); | |
11505 | + if (pgd_none(*pgd)) | |
11506 | + set_pgd(pgd, *pgd_ref); | |
11507 | + else | |
11508 | + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); | |
11509 | + } | |
11510 | + spin_unlock_irqrestore(&pgd_lock, flags); | |
11511 | + set_bit(pgd_index(address), insync); | |
11512 | + } | |
11513 | + if (address == start) | |
11514 | + start = address + PGDIR_SIZE; | |
11515 | + } | |
11516 | + /* Check that there is no need to do the same for the modules area. */ | |
11517 | + BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); | |
11518 | + BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == | |
11519 | + (__START_KERNEL & PGDIR_MASK))); | |
cc90b958 | 11520 | +#endif |
00e5a55c BS |
11521 | +} |
11522 | --- sle11-2009-05-14.orig/arch/x86/mm/fault_32-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
11523 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
11524 | @@ -1,757 +0,0 @@ | |
11525 | -/* | |
11526 | - * linux/arch/i386/mm/fault.c | |
11527 | - * | |
11528 | - * Copyright (C) 1995 Linus Torvalds | |
11529 | - */ | |
11530 | - | |
11531 | -#include <linux/signal.h> | |
11532 | -#include <linux/sched.h> | |
11533 | -#include <linux/kernel.h> | |
11534 | -#include <linux/errno.h> | |
11535 | -#include <linux/string.h> | |
11536 | -#include <linux/types.h> | |
11537 | -#include <linux/ptrace.h> | |
11538 | -#include <linux/mman.h> | |
11539 | -#include <linux/mm.h> | |
11540 | -#include <linux/smp.h> | |
11541 | -#include <linux/interrupt.h> | |
11542 | -#include <linux/init.h> | |
11543 | -#include <linux/tty.h> | |
11544 | -#include <linux/vt_kern.h> /* For unblank_screen() */ | |
11545 | -#include <linux/highmem.h> | |
11546 | -#include <linux/bootmem.h> /* for max_low_pfn */ | |
11547 | -#include <linux/vmalloc.h> | |
11548 | -#include <linux/module.h> | |
11549 | -#include <linux/kprobes.h> | |
11550 | -#include <linux/uaccess.h> | |
11551 | -#include <linux/kdebug.h> | |
11552 | -#include <linux/kprobes.h> | |
11553 | - | |
11554 | -#include <asm/system.h> | |
11555 | -#include <asm/desc.h> | |
11556 | -#include <asm/segment.h> | |
11557 | - | |
11558 | -extern void die(const char *,struct pt_regs *,long); | |
11559 | - | |
11560 | -#ifdef CONFIG_KPROBES | |
11561 | -static inline int notify_page_fault(struct pt_regs *regs) | |
11562 | -{ | |
11563 | - int ret = 0; | |
11564 | - | |
11565 | - /* kprobe_running() needs smp_processor_id() */ | |
11566 | - if (!user_mode_vm(regs)) { | |
11567 | - preempt_disable(); | |
11568 | - if (kprobe_running() && kprobe_fault_handler(regs, 14)) | |
11569 | - ret = 1; | |
11570 | - preempt_enable(); | |
11571 | - } | |
11572 | - | |
11573 | - return ret; | |
11574 | -} | |
11575 | -#else | |
11576 | -static inline int notify_page_fault(struct pt_regs *regs) | |
11577 | -{ | |
11578 | - return 0; | |
11579 | -} | |
11580 | -#endif | |
11581 | - | |
11582 | -/* | |
11583 | - * Return EIP plus the CS segment base. The segment limit is also | |
11584 | - * adjusted, clamped to the kernel/user address space (whichever is | |
11585 | - * appropriate), and returned in *eip_limit. | |
11586 | - * | |
11587 | - * The segment is checked, because it might have been changed by another | |
11588 | - * task between the original faulting instruction and here. | |
11589 | - * | |
11590 | - * If CS is no longer a valid code segment, or if EIP is beyond the | |
11591 | - * limit, or if it is a kernel address when CS is not a kernel segment, | |
11592 | - * then the returned value will be greater than *eip_limit. | |
11593 | - * | |
11594 | - * This is slow, but is very rarely executed. | |
11595 | - */ | |
11596 | -static inline unsigned long get_segment_eip(struct pt_regs *regs, | |
11597 | - unsigned long *eip_limit) | |
11598 | -{ | |
11599 | - unsigned long eip = regs->eip; | |
11600 | - unsigned seg = regs->xcs & 0xffff; | |
11601 | - u32 seg_ar, seg_limit, base, *desc; | |
11602 | - | |
11603 | - /* Unlikely, but must come before segment checks. */ | |
11604 | - if (unlikely(regs->eflags & VM_MASK)) { | |
11605 | - base = seg << 4; | |
11606 | - *eip_limit = base + 0xffff; | |
11607 | - return base + (eip & 0xffff); | |
11608 | - } | |
11609 | - | |
11610 | - /* The standard kernel/user address space limit. */ | |
11611 | - *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg; | |
11612 | - | |
11613 | - /* By far the most common cases. */ | |
11614 | - if (likely(SEGMENT_IS_FLAT_CODE(seg))) | |
11615 | - return eip; | |
11616 | - | |
11617 | - /* Check the segment exists, is within the current LDT/GDT size, | |
11618 | - that kernel/user (ring 0..3) has the appropriate privilege, | |
11619 | - that it's a code segment, and get the limit. */ | |
11620 | - __asm__ ("larl %3,%0; lsll %3,%1" | |
11621 | - : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg)); | |
11622 | - if ((~seg_ar & 0x9800) || eip > seg_limit) { | |
11623 | - *eip_limit = 0; | |
11624 | - return 1; /* So that returned eip > *eip_limit. */ | |
11625 | - } | |
11626 | - | |
11627 | - /* Get the GDT/LDT descriptor base. | |
11628 | - When you look for races in this code remember that | |
11629 | - LDT and other horrors are only used in user space. */ | |
11630 | - if (seg & (1<<2)) { | |
11631 | - /* Must lock the LDT while reading it. */ | |
11632 | - mutex_lock(¤t->mm->context.lock); | |
11633 | - desc = current->mm->context.ldt; | |
11634 | - desc = (void *)desc + (seg & ~7); | |
11635 | - } else { | |
11636 | - /* Must disable preemption while reading the GDT. */ | |
11637 | - desc = (u32 *)get_cpu_gdt_table(get_cpu()); | |
11638 | - desc = (void *)desc + (seg & ~7); | |
11639 | - } | |
11640 | - | |
11641 | - /* Decode the code segment base from the descriptor */ | |
11642 | - base = get_desc_base((unsigned long *)desc); | |
11643 | - | |
11644 | - if (seg & (1<<2)) { | |
11645 | - mutex_unlock(¤t->mm->context.lock); | |
11646 | - } else | |
11647 | - put_cpu(); | |
11648 | - | |
11649 | - /* Adjust EIP and segment limit, and clamp at the kernel limit. | |
11650 | - It's legitimate for segments to wrap at 0xffffffff. */ | |
11651 | - seg_limit += base; | |
11652 | - if (seg_limit < *eip_limit && seg_limit >= base) | |
11653 | - *eip_limit = seg_limit; | |
11654 | - return eip + base; | |
11655 | -} | |
11656 | - | |
11657 | -/* | |
11658 | - * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. | |
11659 | - * Check that here and ignore it. | |
11660 | - */ | |
11661 | -static int __is_prefetch(struct pt_regs *regs, unsigned long addr) | |
11662 | -{ | |
11663 | - unsigned long limit; | |
11664 | - unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit); | |
11665 | - int scan_more = 1; | |
11666 | - int prefetch = 0; | |
11667 | - int i; | |
11668 | - | |
11669 | - for (i = 0; scan_more && i < 15; i++) { | |
11670 | - unsigned char opcode; | |
11671 | - unsigned char instr_hi; | |
11672 | - unsigned char instr_lo; | |
11673 | - | |
11674 | - if (instr > (unsigned char *)limit) | |
11675 | - break; | |
11676 | - if (probe_kernel_address(instr, opcode)) | |
11677 | - break; | |
11678 | - | |
11679 | - instr_hi = opcode & 0xf0; | |
11680 | - instr_lo = opcode & 0x0f; | |
11681 | - instr++; | |
11682 | - | |
11683 | - switch (instr_hi) { | |
11684 | - case 0x20: | |
11685 | - case 0x30: | |
11686 | - /* Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. */ | |
11687 | - scan_more = ((instr_lo & 7) == 0x6); | |
11688 | - break; | |
11689 | - | |
11690 | - case 0x60: | |
11691 | - /* 0x64 thru 0x67 are valid prefixes in all modes. */ | |
11692 | - scan_more = (instr_lo & 0xC) == 0x4; | |
11693 | - break; | |
11694 | - case 0xF0: | |
11695 | - /* 0xF0, 0xF2, and 0xF3 are valid prefixes */ | |
11696 | - scan_more = !instr_lo || (instr_lo>>1) == 1; | |
11697 | - break; | |
11698 | - case 0x00: | |
11699 | - /* Prefetch instruction is 0x0F0D or 0x0F18 */ | |
11700 | - scan_more = 0; | |
11701 | - if (instr > (unsigned char *)limit) | |
11702 | - break; | |
11703 | - if (probe_kernel_address(instr, opcode)) | |
11704 | - break; | |
11705 | - prefetch = (instr_lo == 0xF) && | |
11706 | - (opcode == 0x0D || opcode == 0x18); | |
11707 | - break; | |
11708 | - default: | |
11709 | - scan_more = 0; | |
11710 | - break; | |
11711 | - } | |
11712 | - } | |
11713 | - return prefetch; | |
11714 | -} | |
11715 | - | |
11716 | -static inline int is_prefetch(struct pt_regs *regs, unsigned long addr, | |
11717 | - unsigned long error_code) | |
11718 | -{ | |
11719 | - if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | |
11720 | - boot_cpu_data.x86 >= 6)) { | |
11721 | - /* Catch an obscure case of prefetch inside an NX page. */ | |
11722 | - if (nx_enabled && (error_code & 16)) | |
11723 | - return 0; | |
11724 | - return __is_prefetch(regs, addr); | |
11725 | - } | |
11726 | - return 0; | |
11727 | -} | |
11728 | - | |
11729 | -static noinline void force_sig_info_fault(int si_signo, int si_code, | |
11730 | - unsigned long address, struct task_struct *tsk) | |
11731 | -{ | |
11732 | - siginfo_t info; | |
11733 | - | |
11734 | - info.si_signo = si_signo; | |
11735 | - info.si_errno = 0; | |
11736 | - info.si_code = si_code; | |
11737 | - info.si_addr = (void __user *)address; | |
11738 | - force_sig_info(si_signo, &info, tsk); | |
11739 | -} | |
11740 | - | |
11741 | -fastcall void do_invalid_op(struct pt_regs *, unsigned long); | |
11742 | - | |
11743 | -#ifdef CONFIG_X86_PAE | |
11744 | -static void dump_fault_path(unsigned long address) | |
11745 | -{ | |
11746 | - unsigned long *p, page; | |
11747 | - unsigned long mfn; | |
11748 | - | |
11749 | - page = read_cr3(); | |
11750 | - p = (unsigned long *)__va(page); | |
11751 | - p += (address >> 30) * 2; | |
11752 | - printk(KERN_ALERT "%08lx -> *pde = %08lx:%08lx\n", page, p[1], p[0]); | |
11753 | - if (p[0] & _PAGE_PRESENT) { | |
11754 | - mfn = (p[0] >> PAGE_SHIFT) | (p[1] << 20); | |
11755 | - page = mfn_to_pfn(mfn) << PAGE_SHIFT; | |
11756 | - p = (unsigned long *)__va(page); | |
11757 | - address &= 0x3fffffff; | |
11758 | - p += (address >> 21) * 2; | |
11759 | - printk(KERN_ALERT "%08lx -> *pme = %08lx:%08lx\n", | |
11760 | - page, p[1], p[0]); | |
11761 | - mfn = (p[0] >> PAGE_SHIFT) | (p[1] << 20); | |
11762 | -#ifdef CONFIG_HIGHPTE | |
11763 | - if (mfn_to_pfn(mfn) >= highstart_pfn) | |
11764 | - return; | |
11765 | -#endif | |
11766 | - if ((p[0] & _PAGE_PRESENT) && !(p[0] & _PAGE_PSE)) { | |
11767 | - page = mfn_to_pfn(mfn) << PAGE_SHIFT; | |
11768 | - p = (unsigned long *) __va(page); | |
11769 | - address &= 0x001fffff; | |
11770 | - p += (address >> 12) * 2; | |
11771 | - printk(KERN_ALERT "%08lx -> *pte = %08lx:%08lx\n", | |
11772 | - page, p[1], p[0]); | |
11773 | - } | |
11774 | - } | |
11775 | -} | |
11776 | -#else | |
11777 | -static void dump_fault_path(unsigned long address) | |
11778 | -{ | |
11779 | - unsigned long page; | |
11780 | - | |
11781 | - page = read_cr3(); | |
11782 | - page = ((unsigned long *) __va(page))[address >> PGDIR_SHIFT]; | |
11783 | - printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page, | |
11784 | - machine_to_phys(page)); | |
11785 | - /* | |
11786 | - * We must not directly access the pte in the highpte | |
11787 | - * case if the page table is located in highmem. | |
11788 | - * And lets rather not kmap-atomic the pte, just in case | |
11789 | - * it's allocated already. | |
11790 | - */ | |
11791 | - if ((machine_to_phys(page) >> PAGE_SHIFT) < max_low_pfn | |
11792 | - && (page & _PAGE_PRESENT) | |
11793 | - && !(page & _PAGE_PSE)) { | |
11794 | - page = machine_to_phys(page & PAGE_MASK); | |
11795 | - page = ((unsigned long *) __va(page))[(address >> PAGE_SHIFT) | |
11796 | - & (PTRS_PER_PTE - 1)]; | |
11797 | - printk(KERN_ALERT "*pte = ma %08lx pa %08lx\n", page, | |
11798 | - machine_to_phys(page)); | |
11799 | - } | |
11800 | -} | |
11801 | -#endif | |
11802 | - | |
11803 | -static int spurious_fault(struct pt_regs *regs, | |
11804 | - unsigned long address, | |
11805 | - unsigned long error_code) | |
11806 | -{ | |
11807 | - pgd_t *pgd; | |
11808 | - pud_t *pud; | |
11809 | - pmd_t *pmd; | |
11810 | - pte_t *pte; | |
11811 | - | |
11812 | - /* Reserved-bit violation or user access to kernel space? */ | |
11813 | - if (error_code & 0x0c) | |
11814 | - return 0; | |
11815 | - | |
11816 | - pgd = init_mm.pgd + pgd_index(address); | |
11817 | - if (!pgd_present(*pgd)) | |
11818 | - return 0; | |
11819 | - | |
11820 | - pud = pud_offset(pgd, address); | |
11821 | - if (!pud_present(*pud)) | |
11822 | - return 0; | |
11823 | - | |
11824 | - pmd = pmd_offset(pud, address); | |
11825 | - if (!pmd_present(*pmd)) | |
11826 | - return 0; | |
11827 | - | |
11828 | - pte = pte_offset_kernel(pmd, address); | |
11829 | - if (!pte_present(*pte)) | |
11830 | - return 0; | |
11831 | - if ((error_code & 0x02) && !pte_write(*pte)) | |
11832 | - return 0; | |
11833 | -#ifdef CONFIG_X86_PAE | |
11834 | - if ((error_code & 0x10) && (__pte_val(*pte) & _PAGE_NX)) | |
11835 | - return 0; | |
11836 | -#endif | |
11837 | - | |
11838 | - return 1; | |
11839 | -} | |
11840 | - | |
11841 | -static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) | |
11842 | -{ | |
11843 | - unsigned index = pgd_index(address); | |
11844 | - pgd_t *pgd_k; | |
11845 | - pud_t *pud, *pud_k; | |
11846 | - pmd_t *pmd, *pmd_k; | |
11847 | - | |
11848 | - pgd += index; | |
11849 | - pgd_k = init_mm.pgd + index; | |
11850 | - | |
11851 | - if (!pgd_present(*pgd_k)) | |
11852 | - return NULL; | |
11853 | - | |
11854 | - /* | |
11855 | - * set_pgd(pgd, *pgd_k); here would be useless on PAE | |
11856 | - * and redundant with the set_pmd() on non-PAE. As would | |
11857 | - * set_pud. | |
11858 | - */ | |
11859 | - | |
11860 | - pud = pud_offset(pgd, address); | |
11861 | - pud_k = pud_offset(pgd_k, address); | |
11862 | - if (!pud_present(*pud_k)) | |
11863 | - return NULL; | |
11864 | - | |
11865 | - pmd = pmd_offset(pud, address); | |
11866 | - pmd_k = pmd_offset(pud_k, address); | |
11867 | - if (!pmd_present(*pmd_k)) | |
11868 | - return NULL; | |
11869 | - if (!pmd_present(*pmd)) { | |
11870 | - bool lazy = x86_read_percpu(xen_lazy_mmu); | |
11871 | - | |
11872 | - x86_write_percpu(xen_lazy_mmu, false); | |
11873 | -#if CONFIG_XEN_COMPAT > 0x030002 | |
11874 | - set_pmd(pmd, *pmd_k); | |
11875 | -#else | |
11876 | - /* | |
11877 | - * When running on older Xen we must launder *pmd_k through | |
11878 | - * pmd_val() to ensure that _PAGE_PRESENT is correctly set. | |
11879 | - */ | |
11880 | - set_pmd(pmd, __pmd(pmd_val(*pmd_k))); | |
11881 | -#endif | |
11882 | - x86_write_percpu(xen_lazy_mmu, lazy); | |
11883 | - } else | |
11884 | - BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); | |
11885 | - return pmd_k; | |
11886 | -} | |
11887 | - | |
11888 | -/* | |
11889 | - * Handle a fault on the vmalloc or module mapping area | |
11890 | - * | |
11891 | - * This assumes no large pages in there. | |
11892 | - */ | |
11893 | -static inline int vmalloc_fault(unsigned long address) | |
11894 | -{ | |
11895 | - unsigned long pgd_paddr; | |
11896 | - pmd_t *pmd_k; | |
11897 | - pte_t *pte_k; | |
11898 | - /* | |
11899 | - * Synchronize this task's top level page-table | |
11900 | - * with the 'reference' page table. | |
11901 | - * | |
11902 | - * Do _not_ use "current" here. We might be inside | |
11903 | - * an interrupt in the middle of a task switch.. | |
11904 | - */ | |
11905 | - pgd_paddr = read_cr3(); | |
11906 | - pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); | |
11907 | - if (!pmd_k) | |
11908 | - return -1; | |
11909 | - pte_k = pte_offset_kernel(pmd_k, address); | |
11910 | - if (!pte_present(*pte_k)) | |
11911 | - return -1; | |
11912 | - return 0; | |
11913 | -} | |
11914 | - | |
11915 | -int show_unhandled_signals = 1; | |
11916 | - | |
11917 | -/* | |
11918 | - * This routine handles page faults. It determines the address, | |
11919 | - * and the problem, and then passes it off to one of the appropriate | |
11920 | - * routines. | |
11921 | - * | |
11922 | - * error_code: | |
11923 | - * bit 0 == 0 means no page found, 1 means protection fault | |
11924 | - * bit 1 == 0 means read, 1 means write | |
11925 | - * bit 2 == 0 means kernel, 1 means user-mode | |
11926 | - * bit 3 == 1 means use of reserved bit detected | |
11927 | - * bit 4 == 1 means fault was an instruction fetch | |
11928 | - */ | |
11929 | -fastcall void __kprobes do_page_fault(struct pt_regs *regs, | |
11930 | - unsigned long error_code) | |
11931 | -{ | |
11932 | - struct task_struct *tsk; | |
11933 | - struct mm_struct *mm; | |
11934 | - struct vm_area_struct * vma; | |
11935 | - unsigned long address; | |
11936 | - int write, si_code; | |
11937 | - int fault; | |
11938 | - | |
11939 | - /* | |
11940 | - * We can fault from pretty much anywhere, with unknown IRQ state. | |
11941 | - */ | |
11942 | - trace_hardirqs_fixup(); | |
11943 | - | |
11944 | - /* get the address */ | |
11945 | - address = read_cr2(); | |
11946 | - | |
11947 | - /* Set the "privileged fault" bit to something sane. */ | |
11948 | - error_code &= ~4; | |
11949 | - error_code |= (regs->xcs & 2) << 1; | |
11950 | - if (regs->eflags & X86_EFLAGS_VM) | |
11951 | - error_code |= 4; | |
11952 | - | |
11953 | - tsk = current; | |
11954 | - | |
11955 | - si_code = SEGV_MAPERR; | |
11956 | - | |
11957 | - /* | |
11958 | - * We fault-in kernel-space virtual memory on-demand. The | |
11959 | - * 'reference' page table is init_mm.pgd. | |
11960 | - * | |
11961 | - * NOTE! We MUST NOT take any locks for this case. We may | |
11962 | - * be in an interrupt or a critical region, and should | |
11963 | - * only copy the information from the master page table, | |
11964 | - * nothing more. | |
11965 | - * | |
11966 | - * This verifies that the fault happens in kernel space | |
11967 | - * (error_code & 4) == 0, and that the fault was not a | |
11968 | - * protection error (error_code & 9) == 0. | |
11969 | - */ | |
11970 | - if (unlikely(address >= TASK_SIZE)) { | |
11971 | -#ifdef CONFIG_XEN | |
11972 | - /* Faults in hypervisor area can never be patched up. */ | |
11973 | - if (address >= hypervisor_virt_start) | |
11974 | - goto bad_area_nosemaphore; | |
11975 | -#endif | |
11976 | - if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0) | |
11977 | - return; | |
11978 | - /* Can take a spurious fault if mapping changes R/O -> R/W. */ | |
11979 | - if (spurious_fault(regs, address, error_code)) | |
11980 | - return; | |
11981 | - if (notify_page_fault(regs)) | |
11982 | - return; | |
11983 | - /* | |
11984 | - * Don't take the mm semaphore here. If we fixup a prefetch | |
11985 | - * fault we could otherwise deadlock. | |
11986 | - */ | |
11987 | - goto bad_area_nosemaphore; | |
11988 | - } | |
11989 | - | |
11990 | - if (notify_page_fault(regs)) | |
11991 | - return; | |
11992 | - | |
11993 | - /* It's safe to allow irq's after cr2 has been saved and the vmalloc | |
11994 | - fault has been handled. */ | |
11995 | - if (regs->eflags & (X86_EFLAGS_IF|VM_MASK)) | |
11996 | - local_irq_enable(); | |
11997 | - | |
11998 | - mm = tsk->mm; | |
11999 | - | |
12000 | - /* | |
12001 | - * If we're in an interrupt, have no user context or are running in an | |
12002 | - * atomic region then we must not take the fault.. | |
12003 | - */ | |
12004 | - if (in_atomic() || !mm) | |
12005 | - goto bad_area_nosemaphore; | |
12006 | - | |
12007 | - /* When running in the kernel we expect faults to occur only to | |
12008 | - * addresses in user space. All other faults represent errors in the | |
12009 | - * kernel and should generate an OOPS. Unfortunately, in the case of an | |
12010 | - * erroneous fault occurring in a code path which already holds mmap_sem | |
12011 | - * we will deadlock attempting to validate the fault against the | |
12012 | - * address space. Luckily the kernel only validly references user | |
12013 | - * space from well defined areas of code, which are listed in the | |
12014 | - * exceptions table. | |
12015 | - * | |
12016 | - * As the vast majority of faults will be valid we will only perform | |
12017 | - * the source reference check when there is a possibility of a deadlock. | |
12018 | - * Attempt to lock the address space, if we cannot we then validate the | |
12019 | - * source. If this is invalid we can skip the address space check, | |
12020 | - * thus avoiding the deadlock. | |
12021 | - */ | |
12022 | - if (!down_read_trylock(&mm->mmap_sem)) { | |
12023 | - if ((error_code & 4) == 0 && | |
12024 | - !search_exception_tables(regs->eip)) | |
12025 | - goto bad_area_nosemaphore; | |
12026 | - down_read(&mm->mmap_sem); | |
12027 | - } | |
12028 | - | |
12029 | - vma = find_vma(mm, address); | |
12030 | - if (!vma) | |
12031 | - goto bad_area; | |
12032 | - if (vma->vm_start <= address) | |
12033 | - goto good_area; | |
12034 | - if (!(vma->vm_flags & VM_GROWSDOWN)) | |
12035 | - goto bad_area; | |
12036 | - if (error_code & 4) { | |
12037 | - /* | |
12038 | - * Accessing the stack below %esp is always a bug. | |
12039 | - * The large cushion allows instructions like enter | |
12040 | - * and pusha to work. ("enter $65535,$31" pushes | |
12041 | - * 32 pointers and then decrements %esp by 65535.) | |
12042 | - */ | |
12043 | - if (address + 65536 + 32 * sizeof(unsigned long) < regs->esp) | |
12044 | - goto bad_area; | |
12045 | - } | |
12046 | - if (expand_stack(vma, address)) | |
12047 | - goto bad_area; | |
12048 | -/* | |
12049 | - * Ok, we have a good vm_area for this memory access, so | |
12050 | - * we can handle it.. | |
12051 | - */ | |
12052 | -good_area: | |
12053 | - si_code = SEGV_ACCERR; | |
12054 | - write = 0; | |
12055 | - switch (error_code & 3) { | |
12056 | - default: /* 3: write, present */ | |
12057 | - /* fall through */ | |
12058 | - case 2: /* write, not present */ | |
12059 | - if (!(vma->vm_flags & VM_WRITE)) | |
12060 | - goto bad_area; | |
12061 | - write++; | |
12062 | - break; | |
12063 | - case 1: /* read, present */ | |
12064 | - goto bad_area; | |
12065 | - case 0: /* read, not present */ | |
12066 | - if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) | |
12067 | - goto bad_area; | |
12068 | - } | |
12069 | - | |
12070 | - survive: | |
12071 | - /* | |
12072 | - * If for any reason at all we couldn't handle the fault, | |
12073 | - * make sure we exit gracefully rather than endlessly redo | |
12074 | - * the fault. | |
12075 | - */ | |
12076 | - fault = handle_mm_fault(mm, vma, address, write); | |
12077 | - if (unlikely(fault & VM_FAULT_ERROR)) { | |
12078 | - if (fault & VM_FAULT_OOM) | |
12079 | - goto out_of_memory; | |
12080 | - else if (fault & VM_FAULT_SIGBUS) | |
12081 | - goto do_sigbus; | |
12082 | - BUG(); | |
12083 | - } | |
12084 | - if (fault & VM_FAULT_MAJOR) | |
12085 | - tsk->maj_flt++; | |
12086 | - else | |
12087 | - tsk->min_flt++; | |
12088 | - | |
12089 | - /* | |
12090 | - * Did it hit the DOS screen memory VA from vm86 mode? | |
12091 | - */ | |
12092 | - if (regs->eflags & VM_MASK) { | |
12093 | - unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT; | |
12094 | - if (bit < 32) | |
12095 | - tsk->thread.screen_bitmap |= 1 << bit; | |
12096 | - } | |
12097 | - up_read(&mm->mmap_sem); | |
12098 | - return; | |
12099 | - | |
12100 | -/* | |
12101 | - * Something tried to access memory that isn't in our memory map.. | |
12102 | - * Fix it, but check if it's kernel or user first.. | |
12103 | - */ | |
12104 | -bad_area: | |
12105 | - up_read(&mm->mmap_sem); | |
12106 | - | |
12107 | -bad_area_nosemaphore: | |
12108 | - /* User mode accesses just cause a SIGSEGV */ | |
12109 | - if (error_code & 4) { | |
12110 | - /* | |
12111 | - * It's possible to have interrupts off here. | |
12112 | - */ | |
12113 | - local_irq_enable(); | |
12114 | - | |
12115 | - /* | |
12116 | - * Valid to do another page fault here because this one came | |
12117 | - * from user space. | |
12118 | - */ | |
12119 | - if (is_prefetch(regs, address, error_code)) | |
12120 | - return; | |
12121 | - | |
12122 | - if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && | |
12123 | - printk_ratelimit()) { | |
12124 | - printk("%s%s[%d]: segfault at %08lx eip %08lx " | |
12125 | - "esp %08lx error %lx\n", | |
12126 | - task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, | |
12127 | - tsk->comm, task_pid_nr(tsk), address, regs->eip, | |
12128 | - regs->esp, error_code); | |
12129 | - } | |
12130 | - tsk->thread.cr2 = address; | |
12131 | - /* Kernel addresses are always protection faults */ | |
12132 | - tsk->thread.error_code = error_code | (address >= TASK_SIZE); | |
12133 | - tsk->thread.trap_no = 14; | |
12134 | - force_sig_info_fault(SIGSEGV, si_code, address, tsk); | |
12135 | - return; | |
12136 | - } | |
12137 | - | |
12138 | -#ifdef CONFIG_X86_F00F_BUG | |
12139 | - /* | |
12140 | - * Pentium F0 0F C7 C8 bug workaround. | |
12141 | - */ | |
12142 | - if (boot_cpu_data.f00f_bug) { | |
12143 | - unsigned long nr; | |
12144 | - | |
12145 | - nr = (address - idt_descr.address) >> 3; | |
12146 | - | |
12147 | - if (nr == 6) { | |
12148 | - do_invalid_op(regs, 0); | |
12149 | - return; | |
12150 | - } | |
12151 | - } | |
12152 | -#endif | |
12153 | - | |
12154 | -no_context: | |
12155 | - /* Are we prepared to handle this kernel fault? */ | |
12156 | - if (fixup_exception(regs)) | |
12157 | - return; | |
12158 | - | |
12159 | - /* | |
12160 | - * Valid to do another page fault here, because if this fault | |
12161 | - * had been triggered by is_prefetch fixup_exception would have | |
12162 | - * handled it. | |
12163 | - */ | |
12164 | - if (is_prefetch(regs, address, error_code)) | |
12165 | - return; | |
12166 | - | |
12167 | -/* | |
12168 | - * Oops. The kernel tried to access some bad page. We'll have to | |
12169 | - * terminate things with extreme prejudice. | |
12170 | - */ | |
12171 | - | |
12172 | - bust_spinlocks(1); | |
12173 | - | |
12174 | - if (oops_may_print()) { | |
12175 | -#ifdef CONFIG_X86_PAE | |
12176 | - if (error_code & 16) { | |
12177 | - pte_t *pte = lookup_address(address); | |
12178 | - | |
12179 | - if (pte && pte_present(*pte) && !pte_exec_kernel(*pte)) | |
12180 | - printk(KERN_CRIT "kernel tried to execute " | |
12181 | - "NX-protected page - exploit attempt? " | |
12182 | - "(uid: %d)\n", current->uid); | |
12183 | - } | |
12184 | -#endif | |
12185 | - if (address < PAGE_SIZE) | |
12186 | - printk(KERN_ALERT "BUG: unable to handle kernel NULL " | |
12187 | - "pointer dereference"); | |
12188 | - else | |
12189 | - printk(KERN_ALERT "BUG: unable to handle kernel paging" | |
12190 | - " request"); | |
12191 | - printk(" at virtual address %08lx\n",address); | |
12192 | - printk(KERN_ALERT "printing eip: %08lx\n", regs->eip); | |
12193 | - dump_fault_path(address); | |
12194 | - } | |
12195 | - tsk->thread.cr2 = address; | |
12196 | - tsk->thread.trap_no = 14; | |
12197 | - tsk->thread.error_code = error_code; | |
12198 | - die("Oops", regs, error_code); | |
12199 | - bust_spinlocks(0); | |
12200 | - do_exit(SIGKILL); | |
12201 | - | |
12202 | -/* | |
12203 | - * We ran out of memory, or some other thing happened to us that made | |
12204 | - * us unable to handle the page fault gracefully. | |
12205 | - */ | |
12206 | -out_of_memory: | |
12207 | - up_read(&mm->mmap_sem); | |
12208 | - if (is_global_init(tsk)) { | |
12209 | - yield(); | |
12210 | - down_read(&mm->mmap_sem); | |
12211 | - goto survive; | |
12212 | - } | |
12213 | - printk("VM: killing process %s\n", tsk->comm); | |
12214 | - if (error_code & 4) | |
12215 | - do_group_exit(SIGKILL); | |
12216 | - goto no_context; | |
12217 | - | |
12218 | -do_sigbus: | |
12219 | - up_read(&mm->mmap_sem); | |
12220 | - | |
12221 | - /* Kernel mode? Handle exceptions or die */ | |
12222 | - if (!(error_code & 4)) | |
12223 | - goto no_context; | |
12224 | - | |
12225 | - /* User space => ok to do another page fault */ | |
12226 | - if (is_prefetch(regs, address, error_code)) | |
12227 | - return; | |
12228 | - | |
12229 | - tsk->thread.cr2 = address; | |
12230 | - tsk->thread.error_code = error_code; | |
12231 | - tsk->thread.trap_no = 14; | |
12232 | - force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); | |
12233 | -} | |
12234 | - | |
12235 | -void vmalloc_sync_all(void) | |
12236 | -{ | |
12237 | - /* | |
12238 | - * Note that races in the updates of insync and start aren't | |
12239 | - * problematic: insync can only get set bits added, and updates to | |
12240 | - * start are only improving performance (without affecting correctness | |
12241 | - * if undone). | |
12242 | - * XEN: To work on PAE, we need to iterate over PMDs rather than PGDs. | |
12243 | - * This change works just fine with 2-level paging too. | |
12244 | - */ | |
12245 | -#define sync_index(a) ((a) >> PMD_SHIFT) | |
12246 | - static DECLARE_BITMAP(insync, PTRS_PER_PGD*PTRS_PER_PMD); | |
12247 | - static unsigned long start = TASK_SIZE; | |
12248 | - unsigned long address; | |
12249 | - | |
12250 | - if (SHARED_KERNEL_PMD) | |
12251 | - return; | |
12252 | - | |
12253 | - BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); | |
12254 | - for (address = start; | |
12255 | - address >= TASK_SIZE && address < hypervisor_virt_start; | |
12256 | - address += 1UL << PMD_SHIFT) { | |
12257 | - if (!test_bit(sync_index(address), insync)) { | |
12258 | - unsigned long flags; | |
12259 | - struct page *page; | |
12260 | - | |
12261 | - spin_lock_irqsave(&pgd_lock, flags); | |
12262 | - /* XEN: failure path assumes non-empty pgd_list. */ | |
12263 | - if (unlikely(!pgd_list)) { | |
12264 | - spin_unlock_irqrestore(&pgd_lock, flags); | |
12265 | - return; | |
12266 | - } | |
12267 | - for (page = pgd_list; page; page = | |
12268 | - (struct page *)page->index) | |
12269 | - if (!vmalloc_sync_one(page_address(page), | |
12270 | - address)) { | |
12271 | - BUG_ON(page != pgd_list); | |
12272 | - break; | |
12273 | - } | |
12274 | - spin_unlock_irqrestore(&pgd_lock, flags); | |
12275 | - if (!page) | |
12276 | - set_bit(sync_index(address), insync); | |
12277 | - } | |
12278 | - if (address == start && test_bit(sync_index(address), insync)) | |
12279 | - start = address + (1UL << PMD_SHIFT); | |
12280 | - } | |
12281 | -} | |
12282 | --- sle11-2009-05-14.orig/arch/x86/mm/fault_64-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
12283 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
12284 | @@ -1,686 +0,0 @@ | |
12285 | -/* | |
12286 | - * linux/arch/x86-64/mm/fault.c | |
12287 | - * | |
12288 | - * Copyright (C) 1995 Linus Torvalds | |
12289 | - * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs. | |
12290 | - */ | |
12291 | - | |
12292 | -#include <linux/signal.h> | |
12293 | -#include <linux/sched.h> | |
12294 | -#include <linux/kernel.h> | |
12295 | -#include <linux/errno.h> | |
12296 | -#include <linux/string.h> | |
12297 | -#include <linux/types.h> | |
12298 | -#include <linux/ptrace.h> | |
12299 | -#include <linux/mman.h> | |
12300 | -#include <linux/mm.h> | |
12301 | -#include <linux/smp.h> | |
12302 | -#include <linux/interrupt.h> | |
12303 | -#include <linux/init.h> | |
12304 | -#include <linux/tty.h> | |
12305 | -#include <linux/vt_kern.h> /* For unblank_screen() */ | |
12306 | -#include <linux/compiler.h> | |
12307 | -#include <linux/vmalloc.h> | |
12308 | -#include <linux/module.h> | |
12309 | -#include <linux/kprobes.h> | |
12310 | -#include <linux/uaccess.h> | |
12311 | -#include <linux/kdebug.h> | |
12312 | -#include <linux/kprobes.h> | |
12313 | - | |
12314 | -#include <asm/system.h> | |
12315 | -#include <asm/pgalloc.h> | |
12316 | -#include <asm/smp.h> | |
12317 | -#include <asm/tlbflush.h> | |
12318 | -#include <asm/proto.h> | |
12319 | -#include <asm-generic/sections.h> | |
12320 | - | |
12321 | -/* Page fault error code bits */ | |
12322 | -#define PF_PROT (1<<0) /* or no page found */ | |
12323 | -#define PF_WRITE (1<<1) | |
12324 | -#define PF_USER (1<<2) | |
12325 | -#define PF_RSVD (1<<3) | |
12326 | -#define PF_INSTR (1<<4) | |
12327 | - | |
12328 | -#ifdef CONFIG_KPROBES | |
12329 | -static inline int notify_page_fault(struct pt_regs *regs) | |
12330 | -{ | |
12331 | - int ret = 0; | |
12332 | - | |
12333 | - /* kprobe_running() needs smp_processor_id() */ | |
12334 | - if (!user_mode(regs)) { | |
12335 | - preempt_disable(); | |
12336 | - if (kprobe_running() && kprobe_fault_handler(regs, 14)) | |
12337 | - ret = 1; | |
12338 | - preempt_enable(); | |
12339 | - } | |
12340 | - | |
12341 | - return ret; | |
12342 | -} | |
12343 | -#else | |
12344 | -static inline int notify_page_fault(struct pt_regs *regs) | |
12345 | -{ | |
12346 | - return 0; | |
12347 | -} | |
12348 | -#endif | |
12349 | - | |
12350 | -/* Sometimes the CPU reports invalid exceptions on prefetch. | |
12351 | - Check that here and ignore. | |
12352 | - Opcode checker based on code by Richard Brunner */ | |
12353 | -static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, | |
12354 | - unsigned long error_code) | |
12355 | -{ | |
12356 | - unsigned char *instr; | |
12357 | - int scan_more = 1; | |
12358 | - int prefetch = 0; | |
12359 | - unsigned char *max_instr; | |
12360 | - | |
12361 | - /* If it was a exec fault ignore */ | |
12362 | - if (error_code & PF_INSTR) | |
12363 | - return 0; | |
12364 | - | |
12365 | - instr = (unsigned char __user *)convert_rip_to_linear(current, regs); | |
12366 | - max_instr = instr + 15; | |
12367 | - | |
12368 | - if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) | |
12369 | - return 0; | |
12370 | - | |
12371 | - while (scan_more && instr < max_instr) { | |
12372 | - unsigned char opcode; | |
12373 | - unsigned char instr_hi; | |
12374 | - unsigned char instr_lo; | |
12375 | - | |
12376 | - if (probe_kernel_address(instr, opcode)) | |
12377 | - break; | |
12378 | - | |
12379 | - instr_hi = opcode & 0xf0; | |
12380 | - instr_lo = opcode & 0x0f; | |
12381 | - instr++; | |
12382 | - | |
12383 | - switch (instr_hi) { | |
12384 | - case 0x20: | |
12385 | - case 0x30: | |
12386 | - /* Values 0x26,0x2E,0x36,0x3E are valid x86 | |
12387 | - prefixes. In long mode, the CPU will signal | |
12388 | - invalid opcode if some of these prefixes are | |
12389 | - present so we will never get here anyway */ | |
12390 | - scan_more = ((instr_lo & 7) == 0x6); | |
12391 | - break; | |
12392 | - | |
12393 | - case 0x40: | |
12394 | - /* In AMD64 long mode, 0x40 to 0x4F are valid REX prefixes | |
12395 | - Need to figure out under what instruction mode the | |
12396 | - instruction was issued ... */ | |
12397 | - /* Could check the LDT for lm, but for now it's good | |
12398 | - enough to assume that long mode only uses well known | |
12399 | - segments or kernel. */ | |
12400 | - scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS); | |
12401 | - break; | |
12402 | - | |
12403 | - case 0x60: | |
12404 | - /* 0x64 thru 0x67 are valid prefixes in all modes. */ | |
12405 | - scan_more = (instr_lo & 0xC) == 0x4; | |
12406 | - break; | |
12407 | - case 0xF0: | |
12408 | - /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */ | |
12409 | - scan_more = !instr_lo || (instr_lo>>1) == 1; | |
12410 | - break; | |
12411 | - case 0x00: | |
12412 | - /* Prefetch instruction is 0x0F0D or 0x0F18 */ | |
12413 | - scan_more = 0; | |
12414 | - if (probe_kernel_address(instr, opcode)) | |
12415 | - break; | |
12416 | - prefetch = (instr_lo == 0xF) && | |
12417 | - (opcode == 0x0D || opcode == 0x18); | |
12418 | - break; | |
12419 | - default: | |
12420 | - scan_more = 0; | |
12421 | - break; | |
12422 | - } | |
12423 | - } | |
12424 | - return prefetch; | |
12425 | -} | |
12426 | - | |
12427 | -static int bad_address(void *p) | |
12428 | -{ | |
12429 | - unsigned long dummy; | |
12430 | - return probe_kernel_address((unsigned long *)p, dummy); | |
12431 | -} | |
12432 | - | |
12433 | -void dump_pagetable(unsigned long address) | |
12434 | -{ | |
12435 | - pgd_t *pgd; | |
12436 | - pud_t *pud; | |
12437 | - pmd_t *pmd; | |
12438 | - pte_t *pte; | |
12439 | - | |
12440 | - pgd = (pgd_t *)read_cr3(); | |
12441 | - | |
12442 | - pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); | |
12443 | - pgd += pgd_index(address); | |
12444 | - if (bad_address(pgd)) goto bad; | |
12445 | - printk("PGD %lx ", pgd_val(*pgd)); | |
12446 | - if (!pgd_present(*pgd)) goto ret; | |
12447 | - | |
12448 | - pud = pud_offset(pgd, address); | |
12449 | - if (bad_address(pud)) goto bad; | |
12450 | - printk("PUD %lx ", pud_val(*pud)); | |
12451 | - if (!pud_present(*pud)) goto ret; | |
12452 | - | |
12453 | - pmd = pmd_offset(pud, address); | |
12454 | - if (bad_address(pmd)) goto bad; | |
12455 | - printk("PMD %lx ", pmd_val(*pmd)); | |
12456 | - if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret; | |
12457 | - | |
12458 | - pte = pte_offset_kernel(pmd, address); | |
12459 | - if (bad_address(pte)) goto bad; | |
12460 | - printk("PTE %lx", pte_val(*pte)); | |
12461 | -ret: | |
12462 | - printk("\n"); | |
12463 | - return; | |
12464 | -bad: | |
12465 | - printk("BAD\n"); | |
12466 | -} | |
12467 | - | |
12468 | -static const char errata93_warning[] = | |
12469 | -KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" | |
12470 | -KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" | |
12471 | -KERN_ERR "******* Please consider a BIOS update.\n" | |
12472 | -KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"; | |
12473 | - | |
12474 | -/* Workaround for K8 erratum #93 & buggy BIOS. | |
12475 | - BIOS SMM functions are required to use a specific workaround | |
12476 | - to avoid corruption of the 64bit RIP register on C stepping K8. | |
12477 | - A lot of BIOS that didn't get tested properly miss this. | |
12478 | - The OS sees this as a page fault with the upper 32bits of RIP cleared. | |
12479 | - Try to work around it here. | |
12480 | - Note we only handle faults in kernel here. */ | |
12481 | - | |
12482 | -static int is_errata93(struct pt_regs *regs, unsigned long address) | |
12483 | -{ | |
12484 | - static int warned; | |
12485 | - if (address != regs->rip) | |
12486 | - return 0; | |
12487 | - if ((address >> 32) != 0) | |
12488 | - return 0; | |
12489 | - address |= 0xffffffffUL << 32; | |
12490 | - if ((address >= (u64)_stext && address <= (u64)_etext) || | |
12491 | - (address >= MODULES_VADDR && address <= MODULES_END)) { | |
12492 | - if (!warned) { | |
12493 | - printk(errata93_warning); | |
12494 | - warned = 1; | |
12495 | - } | |
12496 | - regs->rip = address; | |
12497 | - return 1; | |
12498 | - } | |
12499 | - return 0; | |
12500 | -} | |
12501 | - | |
12502 | -static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, | |
12503 | - unsigned long error_code) | |
12504 | -{ | |
12505 | - unsigned long flags = oops_begin(); | |
12506 | - struct task_struct *tsk; | |
12507 | - | |
12508 | - printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", | |
12509 | - current->comm, address); | |
12510 | - dump_pagetable(address); | |
12511 | - tsk = current; | |
12512 | - tsk->thread.cr2 = address; | |
12513 | - tsk->thread.trap_no = 14; | |
12514 | - tsk->thread.error_code = error_code; | |
12515 | - __die("Bad pagetable", regs, error_code); | |
12516 | - oops_end(flags); | |
12517 | - do_exit(SIGKILL); | |
12518 | -} | |
12519 | - | |
12520 | -/* | |
12521 | - * Handle a fault on the vmalloc area | |
12522 | - * | |
12523 | - * This assumes no large pages in there. | |
12524 | - */ | |
12525 | -static int vmalloc_fault(unsigned long address) | |
12526 | -{ | |
12527 | - pgd_t *pgd, *pgd_ref; | |
12528 | - pud_t *pud, *pud_ref; | |
12529 | - pmd_t *pmd, *pmd_ref; | |
12530 | - pte_t *pte, *pte_ref; | |
12531 | - | |
12532 | - /* Copy kernel mappings over when needed. This can also | |
12533 | - happen within a race in page table update. In the later | |
12534 | - case just flush. */ | |
12535 | - | |
12536 | - /* On Xen the line below does not always work. Needs investigating! */ | |
12537 | - /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/ | |
12538 | - pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK); | |
12539 | - pgd += pgd_index(address); | |
12540 | - pgd_ref = pgd_offset_k(address); | |
12541 | - if (pgd_none(*pgd_ref)) | |
12542 | - return -1; | |
12543 | - if (pgd_none(*pgd)) | |
12544 | - set_pgd(pgd, *pgd_ref); | |
12545 | - else | |
12546 | - BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); | |
12547 | - | |
12548 | - /* Below here mismatches are bugs because these lower tables | |
12549 | - are shared */ | |
12550 | - | |
12551 | - pud = pud_offset(pgd, address); | |
12552 | - pud_ref = pud_offset(pgd_ref, address); | |
12553 | - if (pud_none(*pud_ref)) | |
12554 | - return -1; | |
12555 | - if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref)) | |
12556 | - BUG(); | |
12557 | - pmd = pmd_offset(pud, address); | |
12558 | - pmd_ref = pmd_offset(pud_ref, address); | |
12559 | - if (pmd_none(*pmd_ref)) | |
12560 | - return -1; | |
12561 | - if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref)) | |
12562 | - BUG(); | |
12563 | - pte_ref = pte_offset_kernel(pmd_ref, address); | |
12564 | - if (!pte_present(*pte_ref)) | |
12565 | - return -1; | |
12566 | - pte = pte_offset_kernel(pmd, address); | |
12567 | - /* Don't use pte_page here, because the mappings can point | |
12568 | - outside mem_map, and the NUMA hash lookup cannot handle | |
12569 | - that. */ | |
12570 | - if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) | |
12571 | - BUG(); | |
12572 | - return 0; | |
12573 | -} | |
12574 | - | |
12575 | -int show_unhandled_signals = 1; | |
12576 | - | |
12577 | - | |
12578 | -#define MEM_VERBOSE 1 | |
12579 | - | |
12580 | -#ifdef MEM_VERBOSE | |
12581 | -#define MEM_LOG(_f, _a...) \ | |
12582 | - printk("fault.c:[%d]-> " _f "\n", \ | |
12583 | - __LINE__ , ## _a ) | |
12584 | -#else | |
12585 | -#define MEM_LOG(_f, _a...) ((void)0) | |
12586 | -#endif | |
12587 | - | |
12588 | -static int spurious_fault(struct pt_regs *regs, | |
12589 | - unsigned long address, | |
12590 | - unsigned long error_code) | |
12591 | -{ | |
12592 | - pgd_t *pgd; | |
12593 | - pud_t *pud; | |
12594 | - pmd_t *pmd; | |
12595 | - pte_t *pte; | |
12596 | - | |
12597 | -#ifdef CONFIG_XEN | |
12598 | - /* Faults in hypervisor area are never spurious. */ | |
12599 | - if ((address >= HYPERVISOR_VIRT_START) && | |
12600 | - (address < HYPERVISOR_VIRT_END)) | |
12601 | - return 0; | |
12602 | -#endif | |
12603 | - | |
12604 | - /* Reserved-bit violation or user access to kernel space? */ | |
12605 | - if (error_code & (PF_RSVD|PF_USER)) | |
12606 | - return 0; | |
12607 | - | |
12608 | - pgd = init_mm.pgd + pgd_index(address); | |
12609 | - if (!pgd_present(*pgd)) | |
12610 | - return 0; | |
cc90b958 | 12611 | - |
00e5a55c BS |
12612 | - pud = pud_offset(pgd, address); |
12613 | - if (!pud_present(*pud)) | |
12614 | - return 0; | |
12615 | - | |
12616 | - pmd = pmd_offset(pud, address); | |
12617 | - if (!pmd_present(*pmd)) | |
12618 | - return 0; | |
12619 | - | |
12620 | - pte = pte_offset_kernel(pmd, address); | |
12621 | - if (!pte_present(*pte)) | |
12622 | - return 0; | |
12623 | - if ((error_code & PF_WRITE) && !pte_write(*pte)) | |
12624 | - return 0; | |
12625 | - if ((error_code & PF_INSTR) && (__pte_val(*pte) & _PAGE_NX)) | |
12626 | - return 0; | |
12627 | - | |
12628 | - return 1; | |
cc90b958 BS |
12629 | -} |
12630 | - | |
00e5a55c BS |
12631 | -/* |
12632 | - * This routine handles page faults. It determines the address, | |
12633 | - * and the problem, and then passes it off to one of the appropriate | |
12634 | - * routines. | |
12635 | - */ | |
12636 | -asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, | |
12637 | - unsigned long error_code) | |
cc90b958 | 12638 | -{ |
00e5a55c BS |
12639 | - struct task_struct *tsk; |
12640 | - struct mm_struct *mm; | |
12641 | - struct vm_area_struct * vma; | |
12642 | - unsigned long address; | |
12643 | - const struct exception_table_entry *fixup; | |
12644 | - int write, fault; | |
12645 | - unsigned long flags; | |
12646 | - siginfo_t info; | |
cc90b958 | 12647 | - |
00e5a55c BS |
12648 | - if (!user_mode(regs)) |
12649 | - error_code &= ~PF_USER; /* means kernel */ | |
12650 | - | |
12651 | - /* | |
12652 | - * We can fault from pretty much anywhere, with unknown IRQ state. | |
12653 | - */ | |
12654 | - trace_hardirqs_fixup(); | |
12655 | - | |
12656 | - tsk = current; | |
12657 | - mm = tsk->mm; | |
12658 | - prefetchw(&mm->mmap_sem); | |
12659 | - | |
12660 | - /* get the address */ | |
12661 | - address = read_cr2(); | |
12662 | - | |
12663 | - info.si_code = SEGV_MAPERR; | |
12664 | - | |
12665 | - | |
12666 | - /* | |
12667 | - * We fault-in kernel-space virtual memory on-demand. The | |
12668 | - * 'reference' page table is init_mm.pgd. | |
12669 | - * | |
12670 | - * NOTE! We MUST NOT take any locks for this case. We may | |
12671 | - * be in an interrupt or a critical region, and should | |
12672 | - * only copy the information from the master page table, | |
12673 | - * nothing more. | |
12674 | - * | |
12675 | - * This verifies that the fault happens in kernel space | |
12676 | - * (error_code & 4) == 0, and that the fault was not a | |
12677 | - * protection error (error_code & 9) == 0. | |
12678 | - */ | |
12679 | - if (unlikely(address >= TASK_SIZE64)) { | |
12680 | - /* | |
12681 | - * Don't check for the module range here: its PML4 | |
12682 | - * is always initialized because it's shared with the main | |
12683 | - * kernel text. Only vmalloc may need PML4 syncups. | |
12684 | - */ | |
12685 | - if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && | |
12686 | - ((address >= VMALLOC_START && address < VMALLOC_END))) { | |
12687 | - if (vmalloc_fault(address) >= 0) | |
12688 | - return; | |
12689 | - } | |
12690 | - /* Can take a spurious fault if mapping changes R/O -> R/W. */ | |
12691 | - if (spurious_fault(regs, address, error_code)) | |
12692 | - return; | |
12693 | - if (notify_page_fault(regs)) | |
12694 | - return; | |
12695 | - /* | |
12696 | - * Don't take the mm semaphore here. If we fixup a prefetch | |
12697 | - * fault we could otherwise deadlock. | |
12698 | - */ | |
12699 | - goto bad_area_nosemaphore; | |
cc90b958 | 12700 | - } |
cc90b958 | 12701 | - |
00e5a55c BS |
12702 | - if (notify_page_fault(regs)) |
12703 | - return; | |
cc90b958 | 12704 | - |
00e5a55c BS |
12705 | - if (likely(regs->eflags & X86_EFLAGS_IF)) |
12706 | - local_irq_enable(); | |
cc90b958 | 12707 | - |
00e5a55c BS |
12708 | - if (unlikely(error_code & PF_RSVD)) |
12709 | - pgtable_bad(address, regs, error_code); | |
cc90b958 | 12710 | - |
00e5a55c BS |
12711 | - /* |
12712 | - * If we're in an interrupt or have no user | |
12713 | - * context, we must not take the fault.. | |
12714 | - */ | |
12715 | - if (unlikely(in_atomic() || !mm)) | |
12716 | - goto bad_area_nosemaphore; | |
12717 | - | |
12718 | - /* | |
12719 | - * User-mode registers count as a user access even for any | |
12720 | - * potential system fault or CPU buglet. | |
12721 | - */ | |
12722 | - if (user_mode_vm(regs)) | |
12723 | - error_code |= PF_USER; | |
12724 | - | |
12725 | - again: | |
12726 | - /* When running in the kernel we expect faults to occur only to | |
12727 | - * addresses in user space. All other faults represent errors in the | |
12728 | - * kernel and should generate an OOPS. Unfortunately, in the case of an | |
12729 | - * erroneous fault occurring in a code path which already holds mmap_sem | |
12730 | - * we will deadlock attempting to validate the fault against the | |
12731 | - * address space. Luckily the kernel only validly references user | |
12732 | - * space from well defined areas of code, which are listed in the | |
12733 | - * exceptions table. | |
12734 | - * | |
12735 | - * As the vast majority of faults will be valid we will only perform | |
12736 | - * the source reference check when there is a possibility of a deadlock. | |
12737 | - * Attempt to lock the address space, if we cannot we then validate the | |
12738 | - * source. If this is invalid we can skip the address space check, | |
12739 | - * thus avoiding the deadlock. | |
12740 | - */ | |
12741 | - if (!down_read_trylock(&mm->mmap_sem)) { | |
12742 | - if ((error_code & PF_USER) == 0 && | |
12743 | - !search_exception_tables(regs->rip)) | |
12744 | - goto bad_area_nosemaphore; | |
12745 | - down_read(&mm->mmap_sem); | |
12746 | - } | |
12747 | - | |
12748 | - vma = find_vma(mm, address); | |
12749 | - if (!vma) | |
12750 | - goto bad_area; | |
12751 | - if (likely(vma->vm_start <= address)) | |
12752 | - goto good_area; | |
12753 | - if (!(vma->vm_flags & VM_GROWSDOWN)) | |
12754 | - goto bad_area; | |
12755 | - if (error_code & 4) { | |
12756 | - /* Allow userspace just enough access below the stack pointer | |
12757 | - * to let the 'enter' instruction work. | |
12758 | - */ | |
12759 | - if (address + 65536 + 32 * sizeof(unsigned long) < regs->rsp) | |
12760 | - goto bad_area; | |
12761 | - } | |
12762 | - if (expand_stack(vma, address)) | |
12763 | - goto bad_area; | |
12764 | -/* | |
12765 | - * Ok, we have a good vm_area for this memory access, so | |
12766 | - * we can handle it.. | |
12767 | - */ | |
12768 | -good_area: | |
12769 | - info.si_code = SEGV_ACCERR; | |
12770 | - write = 0; | |
12771 | - switch (error_code & (PF_PROT|PF_WRITE)) { | |
12772 | - default: /* 3: write, present */ | |
12773 | - /* fall through */ | |
12774 | - case PF_WRITE: /* write, not present */ | |
12775 | - if (!(vma->vm_flags & VM_WRITE)) | |
12776 | - goto bad_area; | |
12777 | - write++; | |
12778 | - break; | |
12779 | - case PF_PROT: /* read, present */ | |
12780 | - goto bad_area; | |
12781 | - case 0: /* read, not present */ | |
12782 | - if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) | |
12783 | - goto bad_area; | |
12784 | - } | |
12785 | - | |
12786 | - /* | |
12787 | - * If for any reason at all we couldn't handle the fault, | |
12788 | - * make sure we exit gracefully rather than endlessly redo | |
12789 | - * the fault. | |
12790 | - */ | |
12791 | - fault = handle_mm_fault(mm, vma, address, write); | |
12792 | - if (unlikely(fault & VM_FAULT_ERROR)) { | |
12793 | - if (fault & VM_FAULT_OOM) | |
12794 | - goto out_of_memory; | |
12795 | - else if (fault & VM_FAULT_SIGBUS) | |
12796 | - goto do_sigbus; | |
12797 | - BUG(); | |
12798 | - } | |
12799 | - if (fault & VM_FAULT_MAJOR) | |
12800 | - tsk->maj_flt++; | |
12801 | - else | |
12802 | - tsk->min_flt++; | |
12803 | - up_read(&mm->mmap_sem); | |
12804 | - return; | |
12805 | - | |
12806 | -/* | |
12807 | - * Something tried to access memory that isn't in our memory map.. | |
12808 | - * Fix it, but check if it's kernel or user first.. | |
12809 | - */ | |
12810 | -bad_area: | |
12811 | - up_read(&mm->mmap_sem); | |
12812 | - | |
12813 | -bad_area_nosemaphore: | |
12814 | - /* User mode accesses just cause a SIGSEGV */ | |
12815 | - if (error_code & PF_USER) { | |
12816 | - | |
12817 | - /* | |
12818 | - * It's possible to have interrupts off here. | |
12819 | - */ | |
12820 | - local_irq_enable(); | |
12821 | - | |
12822 | - if (is_prefetch(regs, address, error_code)) | |
12823 | - return; | |
12824 | - | |
12825 | - /* Work around K8 erratum #100 K8 in compat mode | |
12826 | - occasionally jumps to illegal addresses >4GB. We | |
12827 | - catch this here in the page fault handler because | |
12828 | - these addresses are not reachable. Just detect this | |
12829 | - case and return. Any code segment in LDT is | |
12830 | - compatibility mode. */ | |
12831 | - if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && | |
12832 | - (address >> 32)) | |
12833 | - return; | |
12834 | - | |
12835 | - if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && | |
12836 | - printk_ratelimit()) { | |
12837 | - printk( | |
12838 | - "%s%s[%d]: segfault at %lx rip %lx rsp %lx error %lx\n", | |
12839 | - tsk->pid > 1 ? KERN_INFO : KERN_EMERG, | |
12840 | - tsk->comm, tsk->pid, address, regs->rip, | |
12841 | - regs->rsp, error_code); | |
12842 | - } | |
12843 | - | |
12844 | - tsk->thread.cr2 = address; | |
12845 | - /* Kernel addresses are always protection faults */ | |
12846 | - tsk->thread.error_code = error_code | (address >= TASK_SIZE); | |
12847 | - tsk->thread.trap_no = 14; | |
12848 | - info.si_signo = SIGSEGV; | |
12849 | - info.si_errno = 0; | |
12850 | - /* info.si_code has been set above */ | |
12851 | - info.si_addr = (void __user *)address; | |
12852 | - force_sig_info(SIGSEGV, &info, tsk); | |
12853 | - return; | |
12854 | - } | |
cc90b958 | 12855 | - |
00e5a55c BS |
12856 | -no_context: |
12857 | - | |
12858 | - /* Are we prepared to handle this kernel fault? */ | |
12859 | - fixup = search_exception_tables(regs->rip); | |
12860 | - if (fixup) { | |
12861 | - regs->rip = fixup->fixup; | |
12862 | - return; | |
12863 | - } | |
cc90b958 | 12864 | - |
00e5a55c BS |
12865 | - /* |
12866 | - * Hall of shame of CPU/BIOS bugs. | |
12867 | - */ | |
cc90b958 | 12868 | - |
00e5a55c BS |
12869 | - if (is_prefetch(regs, address, error_code)) |
12870 | - return; | |
cc90b958 | 12871 | - |
00e5a55c BS |
12872 | - if (is_errata93(regs, address)) |
12873 | - return; | |
cc90b958 | 12874 | - |
00e5a55c BS |
12875 | -/* |
12876 | - * Oops. The kernel tried to access some bad page. We'll have to | |
12877 | - * terminate things with extreme prejudice. | |
12878 | - */ | |
cc90b958 | 12879 | - |
00e5a55c | 12880 | - flags = oops_begin(); |
cc90b958 | 12881 | - |
00e5a55c BS |
12882 | - if (address < PAGE_SIZE) |
12883 | - printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); | |
12884 | - else | |
12885 | - printk(KERN_ALERT "Unable to handle kernel paging request"); | |
12886 | - printk(" at %016lx RIP: \n" KERN_ALERT,address); | |
12887 | - printk_address(regs->rip); | |
12888 | - dump_pagetable(address); | |
12889 | - tsk->thread.cr2 = address; | |
12890 | - tsk->thread.trap_no = 14; | |
12891 | - tsk->thread.error_code = error_code; | |
12892 | - __die("Oops", regs, error_code); | |
12893 | - /* Executive summary in case the body of the oops scrolled away */ | |
12894 | - printk(KERN_EMERG "CR2: %016lx\n", address); | |
12895 | - oops_end(flags); | |
12896 | - do_exit(SIGKILL); | |
cc90b958 | 12897 | - |
00e5a55c BS |
12898 | -/* |
12899 | - * We ran out of memory, or some other thing happened to us that made | |
12900 | - * us unable to handle the page fault gracefully. | |
12901 | - */ | |
12902 | -out_of_memory: | |
12903 | - up_read(&mm->mmap_sem); | |
12904 | - if (is_global_init(current)) { | |
12905 | - yield(); | |
12906 | - goto again; | |
12907 | - } | |
12908 | - printk("VM: killing process %s\n", tsk->comm); | |
12909 | - if (error_code & 4) | |
12910 | - do_group_exit(SIGKILL); | |
12911 | - goto no_context; | |
cc90b958 | 12912 | - |
00e5a55c BS |
12913 | -do_sigbus: |
12914 | - up_read(&mm->mmap_sem); | |
cc90b958 | 12915 | - |
00e5a55c BS |
12916 | - /* Kernel mode? Handle exceptions or die */ |
12917 | - if (!(error_code & PF_USER)) | |
12918 | - goto no_context; | |
cc90b958 | 12919 | - |
00e5a55c BS |
12920 | - tsk->thread.cr2 = address; |
12921 | - tsk->thread.error_code = error_code; | |
12922 | - tsk->thread.trap_no = 14; | |
12923 | - info.si_signo = SIGBUS; | |
12924 | - info.si_errno = 0; | |
12925 | - info.si_code = BUS_ADRERR; | |
12926 | - info.si_addr = (void __user *)address; | |
12927 | - force_sig_info(SIGBUS, &info, tsk); | |
12928 | - return; | |
12929 | -} | |
cc90b958 | 12930 | - |
00e5a55c BS |
12931 | -DEFINE_SPINLOCK(pgd_lock); |
12932 | -LIST_HEAD(pgd_list); | |
cc90b958 | 12933 | - |
00e5a55c BS |
12934 | -void vmalloc_sync_all(void) |
12935 | -{ | |
12936 | - /* Note that races in the updates of insync and start aren't | |
12937 | - problematic: | |
12938 | - insync can only get set bits added, and updates to start are only | |
12939 | - improving performance (without affecting correctness if undone). */ | |
12940 | - static DECLARE_BITMAP(insync, PTRS_PER_PGD); | |
12941 | - static unsigned long start = VMALLOC_START & PGDIR_MASK; | |
12942 | - unsigned long address; | |
cc90b958 | 12943 | - |
00e5a55c BS |
12944 | - for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { |
12945 | - if (!test_bit(pgd_index(address), insync)) { | |
12946 | - const pgd_t *pgd_ref = pgd_offset_k(address); | |
12947 | - struct page *page; | |
12948 | - | |
12949 | - if (pgd_none(*pgd_ref)) | |
12950 | - continue; | |
12951 | - spin_lock(&pgd_lock); | |
12952 | - list_for_each_entry(page, &pgd_list, lru) { | |
12953 | - pgd_t *pgd; | |
12954 | - pgd = (pgd_t *)page_address(page) + pgd_index(address); | |
12955 | - if (pgd_none(*pgd)) | |
12956 | - set_pgd(pgd, *pgd_ref); | |
12957 | - else | |
12958 | - BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); | |
12959 | - } | |
12960 | - spin_unlock(&pgd_lock); | |
12961 | - set_bit(pgd_index(address), insync); | |
cc90b958 | 12962 | - } |
00e5a55c BS |
12963 | - if (address == start) |
12964 | - start = address + PGDIR_SIZE; | |
cc90b958 | 12965 | - } |
00e5a55c BS |
12966 | - /* Check that there is no need to do the same for the modules area. */ |
12967 | - BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); | |
12968 | - BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == | |
12969 | - (__START_KERNEL & PGDIR_MASK))); | |
12970 | -} | |
12971 | --- sle11-2009-05-14.orig/arch/x86/mm/highmem_32-xen.c 2009-02-16 16:17:21.000000000 +0100 | |
12972 | +++ sle11-2009-05-14/arch/x86/mm/highmem_32-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
12973 | @@ -18,6 +18,49 @@ void kunmap(struct page *page) | |
12974 | kunmap_high(page); | |
12975 | } | |
cc90b958 | 12976 | |
00e5a55c | 12977 | +static void debug_kmap_atomic_prot(enum km_type type) |
cc90b958 | 12978 | +{ |
00e5a55c BS |
12979 | +#ifdef CONFIG_DEBUG_HIGHMEM |
12980 | + static unsigned warn_count = 10; | |
cc90b958 | 12981 | + |
00e5a55c BS |
12982 | + if (unlikely(warn_count == 0)) |
12983 | + return; | |
cc90b958 | 12984 | + |
00e5a55c BS |
12985 | + if (unlikely(in_interrupt())) { |
12986 | + if (in_irq()) { | |
12987 | + if (type != KM_IRQ0 && type != KM_IRQ1 && | |
12988 | + type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ && | |
12989 | + type != KM_BOUNCE_READ) { | |
12990 | + WARN_ON(1); | |
12991 | + warn_count--; | |
12992 | + } | |
12993 | + } else if (!irqs_disabled()) { /* softirq */ | |
12994 | + if (type != KM_IRQ0 && type != KM_IRQ1 && | |
12995 | + type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 && | |
12996 | + type != KM_SKB_SUNRPC_DATA && | |
12997 | + type != KM_SKB_DATA_SOFTIRQ && | |
12998 | + type != KM_BOUNCE_READ) { | |
12999 | + WARN_ON(1); | |
13000 | + warn_count--; | |
13001 | + } | |
13002 | + } | |
13003 | + } | |
cc90b958 | 13004 | + |
00e5a55c BS |
13005 | + if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ || |
13006 | + type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ) { | |
13007 | + if (!irqs_disabled()) { | |
13008 | + WARN_ON(1); | |
13009 | + warn_count--; | |
13010 | + } | |
13011 | + } else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) { | |
13012 | + if (irq_count() == 0 && !irqs_disabled()) { | |
13013 | + WARN_ON(1); | |
13014 | + warn_count--; | |
13015 | + } | |
13016 | + } | |
13017 | +#endif | |
cc90b958 | 13018 | +} |
00e5a55c BS |
13019 | + |
13020 | /* | |
13021 | * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because | |
13022 | * no global lock is needed and because the kmap code must perform a global TLB | |
13023 | @@ -37,6 +80,8 @@ void *kmap_atomic_prot(struct page *page | |
13024 | if (!PageHighMem(page)) | |
13025 | return page_address(page); | |
cc90b958 | 13026 | |
00e5a55c BS |
13027 | + debug_kmap_atomic_prot(type); |
13028 | + | |
13029 | idx = type + KM_TYPE_NR*smp_processor_id(); | |
13030 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | |
13031 | BUG_ON(!pte_none(*(kmap_pte-idx))); | |
13032 | --- sle11-2009-05-14.orig/arch/x86/mm/hypervisor.c 2009-05-06 10:23:43.000000000 +0200 | |
13033 | +++ sle11-2009-05-14/arch/x86/mm/hypervisor.c 2009-05-14 11:18:39.000000000 +0200 | |
13034 | @@ -869,15 +869,11 @@ int xen_limit_pages_to_max_mfn( | |
cc90b958 | 13035 | } |
00e5a55c | 13036 | EXPORT_SYMBOL_GPL(xen_limit_pages_to_max_mfn); |
cc90b958 | 13037 | |
00e5a55c BS |
13038 | -#ifdef __i386__ |
13039 | -int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b) | |
13040 | +int write_ldt_entry(struct desc_struct *ldt, int entry, const void *desc) | |
cc90b958 | 13041 | { |
00e5a55c BS |
13042 | - __u32 *lp = (__u32 *)((char *)ldt + entry * 8); |
13043 | - maddr_t mach_lp = arbitrary_virt_to_machine(lp); | |
13044 | - return HYPERVISOR_update_descriptor( | |
13045 | - mach_lp, (u64)entry_a | ((u64)entry_b<<32)); | |
13046 | + maddr_t mach_lp = arbitrary_virt_to_machine(ldt + entry); | |
13047 | + return HYPERVISOR_update_descriptor(mach_lp, *(const u64*)desc); | |
cc90b958 | 13048 | } |
00e5a55c | 13049 | -#endif |
cc90b958 | 13050 | |
00e5a55c | 13051 | #define MAX_BATCHED_FULL_PTES 32 |
cc90b958 | 13052 | |
00e5a55c BS |
13053 | --- sle11-2009-05-14.orig/arch/x86/mm/init_32-xen.c 2009-02-16 16:18:36.000000000 +0100 |
13054 | +++ sle11-2009-05-14/arch/x86/mm/init_32-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
13055 | @@ -27,13 +27,13 @@ | |
13056 | #include <linux/bootmem.h> | |
13057 | #include <linux/slab.h> | |
13058 | #include <linux/proc_fs.h> | |
13059 | -#include <linux/efi.h> | |
13060 | #include <linux/memory_hotplug.h> | |
13061 | #include <linux/initrd.h> | |
13062 | #include <linux/cpumask.h> | |
13063 | #include <linux/dma-mapping.h> | |
13064 | #include <linux/scatterlist.h> | |
cc90b958 | 13065 | |
00e5a55c BS |
13066 | +#include <asm/asm.h> |
13067 | #include <asm/processor.h> | |
13068 | #include <asm/system.h> | |
13069 | #include <asm/uaccess.h> | |
13070 | @@ -42,18 +42,22 @@ | |
13071 | #include <asm/fixmap.h> | |
13072 | #include <asm/e820.h> | |
13073 | #include <asm/apic.h> | |
13074 | +#include <asm/bugs.h> | |
13075 | #include <asm/tlb.h> | |
13076 | #include <asm/tlbflush.h> | |
13077 | +#include <asm/pgalloc.h> | |
13078 | #include <asm/sections.h> | |
13079 | #include <asm/hypervisor.h> | |
13080 | #include <asm/swiotlb.h> | |
13081 | +#include <asm/setup.h> | |
13082 | +#include <asm/cacheflush.h> | |
13083 | ||
13084 | unsigned int __VMALLOC_RESERVE = 128 << 20; | |
13085 | ||
13086 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | |
13087 | unsigned long highstart_pfn, highend_pfn; | |
13088 | ||
13089 | -static int noinline do_test_wp_bit(void); | |
13090 | +static noinline int do_test_wp_bit(void); | |
13091 | ||
13092 | /* | |
13093 | * Creates a middle page table and puts a pointer to it in the | |
13094 | @@ -64,17 +68,16 @@ static pmd_t * __init one_md_table_init( | |
cc90b958 | 13095 | { |
00e5a55c BS |
13096 | pud_t *pud; |
13097 | pmd_t *pmd_table; | |
13098 | - | |
13099 | + | |
13100 | #ifdef CONFIG_X86_PAE | |
13101 | if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) { | |
13102 | pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); | |
cc90b958 | 13103 | |
00e5a55c BS |
13104 | - paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); |
13105 | + paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); | |
13106 | make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables); | |
13107 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | |
13108 | pud = pud_offset(pgd, 0); | |
13109 | - if (pmd_table != pmd_offset(pud, 0)) | |
13110 | - BUG(); | |
13111 | + BUG_ON(pmd_table != pmd_offset(pud, 0)); | |
13112 | } | |
13113 | #endif | |
13114 | pud = pud_offset(pgd, 0); | |
13115 | @@ -85,7 +88,7 @@ static pmd_t * __init one_md_table_init( | |
cc90b958 | 13116 | |
00e5a55c BS |
13117 | /* |
13118 | * Create a page table and place a pointer to it in a middle page | |
13119 | - * directory entry. | |
13120 | + * directory entry: | |
13121 | */ | |
13122 | static pte_t * __init one_page_table_init(pmd_t *pmd) | |
13123 | { | |
13124 | @@ -99,9 +102,10 @@ static pte_t * __init one_page_table_ini | |
13125 | #ifdef CONFIG_DEBUG_PAGEALLOC | |
13126 | page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); | |
13127 | #endif | |
13128 | - if (!page_table) | |
13129 | + if (!page_table) { | |
13130 | page_table = | |
13131 | (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); | |
13132 | + } | |
13133 | ||
13134 | paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT); | |
13135 | make_lowmem_page_readonly(page_table, | |
13136 | @@ -114,22 +118,21 @@ static pte_t * __init one_page_table_ini | |
13137 | } | |
13138 | ||
13139 | /* | |
13140 | - * This function initializes a certain range of kernel virtual memory | |
13141 | + * This function initializes a certain range of kernel virtual memory | |
13142 | * with new bootmem page tables, everywhere page tables are missing in | |
13143 | * the given range. | |
13144 | - */ | |
cc90b958 | 13145 | - |
00e5a55c BS |
13146 | -/* |
13147 | - * NOTE: The pagetables are allocated contiguous on the physical space | |
13148 | - * so we can cache the place of the first one and move around without | |
13149 | + * | |
13150 | + * NOTE: The pagetables are allocated contiguous on the physical space | |
13151 | + * so we can cache the place of the first one and move around without | |
13152 | * checking the pgd every time. | |
13153 | */ | |
13154 | -static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) | |
13155 | +static void __init | |
13156 | +page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) | |
13157 | { | |
13158 | - pgd_t *pgd; | |
13159 | - pmd_t *pmd; | |
13160 | int pgd_idx, pmd_idx; | |
13161 | unsigned long vaddr; | |
13162 | + pgd_t *pgd; | |
13163 | + pmd_t *pmd; | |
13164 | ||
13165 | vaddr = start; | |
13166 | pgd_idx = pgd_index(vaddr); | |
13167 | @@ -139,7 +142,8 @@ static void __init page_table_range_init | |
13168 | for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { | |
13169 | pmd = one_md_table_init(pgd); | |
13170 | pmd = pmd + pmd_index(vaddr); | |
13171 | - for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { | |
13172 | + for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); | |
13173 | + pmd++, pmd_idx++) { | |
13174 | if (vaddr < hypervisor_virt_start) | |
13175 | one_page_table_init(pmd); | |
13176 | ||
13177 | @@ -157,17 +161,17 @@ static inline int is_kernel_text(unsigne | |
13178 | } | |
13179 | ||
13180 | /* | |
13181 | - * This maps the physical memory to kernel virtual address space, a total | |
13182 | - * of max_low_pfn pages, by creating page tables starting from address | |
13183 | - * PAGE_OFFSET. | |
13184 | + * This maps the physical memory to kernel virtual address space, a total | |
13185 | + * of max_low_pfn pages, by creating page tables starting from address | |
13186 | + * PAGE_OFFSET: | |
13187 | */ | |
13188 | static void __init kernel_physical_mapping_init(pgd_t *pgd_base) | |
13189 | { | |
13190 | + int pgd_idx, pmd_idx, pte_ofs; | |
13191 | unsigned long pfn; | |
13192 | pgd_t *pgd; | |
13193 | pmd_t *pmd; | |
13194 | pte_t *pte; | |
13195 | - int pgd_idx, pmd_idx, pte_ofs; | |
13196 | ||
13197 | unsigned long max_ram_pfn = xen_start_info->nr_pages; | |
13198 | if (max_ram_pfn > max_low_pfn) | |
13199 | @@ -195,36 +199,49 @@ static void __init kernel_physical_mappi | |
13200 | if (pfn >= max_low_pfn) | |
13201 | continue; | |
13202 | pmd += pmd_idx; | |
13203 | - for (; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) { | |
13204 | - unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET; | |
13205 | - if (address >= hypervisor_virt_start) | |
13206 | + for (; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; | |
13207 | + pmd++, pmd_idx++) { | |
13208 | + unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET; | |
13209 | + | |
13210 | + if (addr >= hypervisor_virt_start) | |
13211 | continue; | |
13212 | ||
13213 | - /* Map with big pages if possible, otherwise create normal page tables. */ | |
13214 | + /* | |
13215 | + * Map with big pages if possible, otherwise | |
13216 | + * create normal page tables: | |
13217 | + */ | |
13218 | if (cpu_has_pse) { | |
13219 | - unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; | |
13220 | - if (is_kernel_text(address) || is_kernel_text(address2)) | |
13221 | - set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); | |
13222 | - else | |
13223 | - set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); | |
13224 | + unsigned int addr2; | |
13225 | + pgprot_t prot = PAGE_KERNEL_LARGE; | |
13226 | + | |
13227 | + addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + | |
13228 | + PAGE_OFFSET + PAGE_SIZE-1; | |
13229 | + | |
13230 | + if (is_kernel_text(addr) || | |
13231 | + is_kernel_text(addr2)) | |
13232 | + prot = PAGE_KERNEL_LARGE_EXEC; | |
13233 | + | |
13234 | + set_pmd(pmd, pfn_pmd(pfn, prot)); | |
13235 | ||
13236 | pfn += PTRS_PER_PTE; | |
13237 | - } else { | |
13238 | - pte = one_page_table_init(pmd); | |
13239 | + continue; | |
13240 | + } | |
13241 | + pte = one_page_table_init(pmd); | |
13242 | + | |
13243 | + for (pte += pte_ofs; | |
13244 | + pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; | |
13245 | + pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) { | |
13246 | + pgprot_t prot = PAGE_KERNEL; | |
13247 | + | |
13248 | + /* XEN: Only map initial RAM allocation. */ | |
13249 | + if ((pfn >= max_ram_pfn) || pte_present(*pte)) | |
13250 | + continue; | |
13251 | + if (is_kernel_text(addr)) | |
13252 | + prot = PAGE_KERNEL_EXEC; | |
13253 | ||
13254 | - for (pte += pte_ofs; | |
13255 | - pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; | |
13256 | - pte++, pfn++, pte_ofs++, address += PAGE_SIZE) { | |
13257 | - /* XEN: Only map initial RAM allocation. */ | |
13258 | - if ((pfn >= max_ram_pfn) || pte_present(*pte)) | |
cc90b958 | 13259 | - continue; |
00e5a55c BS |
13260 | - if (is_kernel_text(address)) |
13261 | - set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); | |
cc90b958 | 13262 | - else |
00e5a55c | 13263 | - set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); |
cc90b958 | 13264 | - } |
00e5a55c BS |
13265 | - pte_ofs = 0; |
13266 | + set_pte(pte, pfn_pte(pfn, prot)); | |
13267 | } | |
13268 | + pte_ofs = 0; | |
13269 | } | |
13270 | pmd_idx = 0; | |
13271 | } | |
13272 | @@ -245,57 +262,23 @@ static inline int page_kills_ppro(unsign | |
13273 | ||
13274 | #endif | |
13275 | ||
13276 | -int page_is_ram(unsigned long pagenr) | |
cc90b958 | 13277 | -{ |
00e5a55c BS |
13278 | - int i; |
13279 | - unsigned long addr, end; | |
cc90b958 | 13280 | - |
00e5a55c BS |
13281 | - if (efi_enabled) { |
13282 | - efi_memory_desc_t *md; | |
13283 | - void *p; | |
cc90b958 | 13284 | - |
00e5a55c BS |
13285 | - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { |
13286 | - md = p; | |
13287 | - if (!is_available_memory(md)) | |
13288 | - continue; | |
13289 | - addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT; | |
13290 | - end = (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT; | |
cc90b958 | 13291 | - |
00e5a55c BS |
13292 | - if ((pagenr >= addr) && (pagenr < end)) |
13293 | - return 1; | |
13294 | - } | |
13295 | - return 0; | |
cc90b958 | 13296 | - } |
cc90b958 | 13297 | - |
00e5a55c | 13298 | - for (i = 0; i < e820.nr_map; i++) { |
cc90b958 | 13299 | - |
00e5a55c BS |
13300 | - if (e820.map[i].type != E820_RAM) /* not usable memory */ |
13301 | - continue; | |
13302 | - /* | |
13303 | - * !!!FIXME!!! Some BIOSen report areas as RAM that | |
13304 | - * are not. Notably the 640->1Mb area. We need a sanity | |
13305 | - * check here. | |
13306 | - */ | |
13307 | - addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT; | |
13308 | - end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT; | |
13309 | - if ((pagenr >= addr) && (pagenr < end)) | |
13310 | - return 1; | |
cc90b958 | 13311 | - } |
00e5a55c | 13312 | - return 0; |
cc90b958 | 13313 | -} |
00e5a55c BS |
13314 | - |
13315 | #ifdef CONFIG_HIGHMEM | |
13316 | pte_t *kmap_pte; | |
13317 | pgprot_t kmap_prot; | |
cc90b958 | 13318 | |
00e5a55c BS |
13319 | -#define kmap_get_fixmap_pte(vaddr) \ |
13320 | - pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), (vaddr)), (vaddr)) | |
13321 | +static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr) | |
13322 | +{ | |
13323 | + return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), | |
13324 | + vaddr), vaddr), vaddr); | |
13325 | +} | |
cc90b958 | 13326 | |
00e5a55c | 13327 | static void __init kmap_init(void) |
cc90b958 | 13328 | { |
00e5a55c BS |
13329 | unsigned long kmap_vstart; |
13330 | ||
13331 | - /* cache the first kmap pte */ | |
13332 | + /* | |
13333 | + * Cache the first kmap pte: | |
13334 | + */ | |
13335 | kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); | |
13336 | kmap_pte = kmap_get_fixmap_pte(kmap_vstart); | |
13337 | ||
13338 | @@ -304,11 +287,11 @@ static void __init kmap_init(void) | |
13339 | ||
13340 | static void __init permanent_kmaps_init(pgd_t *pgd_base) | |
cc90b958 | 13341 | { |
00e5a55c BS |
13342 | + unsigned long vaddr; |
13343 | pgd_t *pgd; | |
13344 | pud_t *pud; | |
13345 | pmd_t *pmd; | |
13346 | pte_t *pte; | |
13347 | - unsigned long vaddr; | |
13348 | ||
13349 | vaddr = PKMAP_BASE; | |
13350 | page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); | |
13351 | @@ -317,7 +300,7 @@ static void __init permanent_kmaps_init( | |
13352 | pud = pud_offset(pgd, vaddr); | |
13353 | pmd = pmd_offset(pud, vaddr); | |
13354 | pte = pte_offset_kernel(pmd, vaddr); | |
13355 | - pkmap_page_table = pte; | |
13356 | + pkmap_page_table = pte; | |
cc90b958 BS |
13357 | } |
13358 | ||
00e5a55c BS |
13359 | static void __meminit free_new_highpage(struct page *page, int pfn) |
13360 | @@ -337,7 +320,8 @@ void __init add_one_highpage_init(struct | |
13361 | SetPageReserved(page); | |
13362 | } | |
13363 | ||
13364 | -static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long pfn) | |
13365 | +static int __meminit | |
13366 | +add_one_highpage_hotplug(struct page *page, unsigned long pfn) | |
cc90b958 | 13367 | { |
00e5a55c BS |
13368 | free_new_highpage(page, pfn); |
13369 | totalram_pages++; | |
13370 | @@ -345,6 +329,7 @@ static int __meminit add_one_highpage_ho | |
13371 | max_mapnr = max(pfn, max_mapnr); | |
13372 | #endif | |
13373 | num_physpages++; | |
cc90b958 | 13374 | + |
00e5a55c | 13375 | return 0; |
cc90b958 | 13376 | } |
00e5a55c BS |
13377 | |
13378 | @@ -352,7 +337,7 @@ static int __meminit add_one_highpage_ho | |
13379 | * Not currently handling the NUMA case. | |
13380 | * Assuming single node and all memory that | |
13381 | * has been added dynamically that would be | |
13382 | - * onlined here is in HIGHMEM | |
13383 | + * onlined here is in HIGHMEM. | |
13384 | */ | |
13385 | void __meminit online_page(struct page *page) | |
cc90b958 | 13386 | { |
00e5a55c BS |
13387 | @@ -360,13 +345,11 @@ void __meminit online_page(struct page * |
13388 | add_one_highpage_hotplug(page, page_to_pfn(page)); | |
cc90b958 BS |
13389 | } |
13390 | ||
00e5a55c BS |
13391 | - |
13392 | -#ifdef CONFIG_NUMA | |
13393 | -extern void set_highmem_pages_init(int); | |
13394 | -#else | |
13395 | +#ifndef CONFIG_NUMA | |
13396 | static void __init set_highmem_pages_init(int bad_ppro) | |
cc90b958 | 13397 | { |
00e5a55c | 13398 | int pfn; |
cc90b958 | 13399 | + |
00e5a55c BS |
13400 | for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) { |
13401 | /* | |
13402 | * Holes under sparsemem might not have no mem_map[]: | |
13403 | @@ -376,23 +359,18 @@ static void __init set_highmem_pages_ini | |
13404 | } | |
13405 | totalram_pages += totalhigh_pages; | |
cc90b958 | 13406 | } |
00e5a55c BS |
13407 | -#endif /* CONFIG_FLATMEM */ |
13408 | +#endif /* !CONFIG_NUMA */ | |
cc90b958 | 13409 | |
00e5a55c BS |
13410 | #else |
13411 | -#define kmap_init() do { } while (0) | |
13412 | -#define permanent_kmaps_init(pgd_base) do { } while (0) | |
13413 | -#define set_highmem_pages_init(bad_ppro) do { } while (0) | |
13414 | +# define kmap_init() do { } while (0) | |
13415 | +# define permanent_kmaps_init(pgd_base) do { } while (0) | |
13416 | +# define set_highmem_pages_init(bad_ppro) do { } while (0) | |
13417 | #endif /* CONFIG_HIGHMEM */ | |
cc90b958 | 13418 | |
00e5a55c BS |
13419 | -unsigned long long __PAGE_KERNEL = _PAGE_KERNEL; |
13420 | +pteval_t __PAGE_KERNEL = _PAGE_KERNEL; | |
13421 | EXPORT_SYMBOL(__PAGE_KERNEL); | |
13422 | -unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; | |
13423 | ||
13424 | -#ifdef CONFIG_NUMA | |
13425 | -extern void __init remap_numa_kva(void); | |
13426 | -#else | |
13427 | -#define remap_numa_kva() do {} while (0) | |
13428 | -#endif | |
13429 | +pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; | |
13430 | ||
13431 | pgd_t *swapper_pg_dir; | |
13432 | ||
13433 | @@ -410,9 +388,8 @@ static void __init xen_pagetable_setup_d | |
13434 | * the boot process. | |
13435 | * | |
13436 | * If we're booting on native hardware, this will be a pagetable | |
13437 | - * constructed in arch/i386/kernel/head.S, and not running in PAE mode | |
13438 | - * (even if we'll end up running in PAE). The root of the pagetable | |
13439 | - * will be swapper_pg_dir. | |
13440 | + * constructed in arch/x86/kernel/head_32.S. The root of the | |
13441 | + * pagetable will be swapper_pg_dir. | |
13442 | * | |
13443 | * If we're booting paravirtualized under a hypervisor, then there are | |
13444 | * more options: we may already be running PAE, and the pagetable may | |
13445 | @@ -424,10 +401,10 @@ static void __init xen_pagetable_setup_d | |
13446 | * be partially populated, and so it avoids stomping on any existing | |
13447 | * mappings. | |
cc90b958 | 13448 | */ |
00e5a55c BS |
13449 | -static void __init pagetable_init (void) |
13450 | +static void __init pagetable_init(void) | |
cc90b958 | 13451 | { |
00e5a55c BS |
13452 | - unsigned long vaddr, end; |
13453 | pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base; | |
13454 | + unsigned long vaddr, end; | |
cc90b958 | 13455 | |
00e5a55c | 13456 | xen_pagetable_setup_start(pgd_base); |
cc90b958 | 13457 | |
00e5a55c BS |
13458 | @@ -449,34 +426,36 @@ static void __init pagetable_init (void) |
13459 | * Fixed mappings, only the page table structure has to be | |
13460 | * created - mappings will be set by set_fixmap(): | |
13461 | */ | |
13462 | + early_ioremap_clear(); | |
13463 | vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; | |
13464 | end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; | |
13465 | page_table_range_init(vaddr, end, pgd_base); | |
13466 | + early_ioremap_reset(); | |
cc90b958 | 13467 | |
00e5a55c | 13468 | permanent_kmaps_init(pgd_base); |
cc90b958 | 13469 | |
00e5a55c BS |
13470 | xen_pagetable_setup_done(pgd_base); |
13471 | } | |
cc90b958 | 13472 | |
00e5a55c BS |
13473 | -#if defined(CONFIG_HIBERNATION) || defined(CONFIG_ACPI) |
13474 | +#if defined(CONFIG_ACPI_SLEEP) && !defined(CONFIG_XEN) | |
13475 | /* | |
13476 | - * Swap suspend & friends need this for resume because things like the intel-agp | |
13477 | + * ACPI suspend needs this for resume, because things like the intel-agp | |
13478 | * driver might have split up a kernel 4MB mapping. | |
13479 | */ | |
13480 | -char __nosavedata swsusp_pg_dir[PAGE_SIZE] | |
13481 | - __attribute__ ((aligned (PAGE_SIZE))); | |
13482 | +char swsusp_pg_dir[PAGE_SIZE] | |
13483 | + __attribute__ ((aligned(PAGE_SIZE))); | |
cc90b958 | 13484 | |
00e5a55c | 13485 | static inline void save_pg_dir(void) |
cc90b958 | 13486 | { |
00e5a55c BS |
13487 | memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE); |
13488 | } | |
cc90b958 | 13489 | -#else |
00e5a55c BS |
13490 | +#else /* !CONFIG_ACPI_SLEEP */ |
13491 | static inline void save_pg_dir(void) | |
13492 | { | |
cc90b958 | 13493 | } |
00e5a55c BS |
13494 | -#endif |
13495 | +#endif /* !CONFIG_ACPI_SLEEP */ | |
cc90b958 | 13496 | |
00e5a55c BS |
13497 | -void zap_low_mappings (void) |
13498 | +void zap_low_mappings(void) | |
13499 | { | |
13500 | int i; | |
13501 | ||
13502 | @@ -488,22 +467,24 @@ void zap_low_mappings (void) | |
13503 | * Note that "pgd_clear()" doesn't do it for | |
13504 | * us, because pgd_clear() is a no-op on i386. | |
13505 | */ | |
13506 | - for (i = 0; i < USER_PTRS_PER_PGD; i++) | |
13507 | + for (i = 0; i < USER_PTRS_PER_PGD; i++) { | |
13508 | #if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN) | |
13509 | set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); | |
13510 | #else | |
13511 | set_pgd(swapper_pg_dir+i, __pgd(0)); | |
13512 | #endif | |
cc90b958 | 13513 | + } |
00e5a55c BS |
13514 | flush_tlb_all(); |
13515 | } | |
13516 | ||
13517 | -int nx_enabled = 0; | |
13518 | +int nx_enabled; | |
cc90b958 | 13519 | + |
00e5a55c BS |
13520 | +pteval_t __supported_pte_mask __read_mostly = ~_PAGE_NX; |
13521 | +EXPORT_SYMBOL_GPL(__supported_pte_mask); | |
13522 | ||
13523 | #ifdef CONFIG_X86_PAE | |
13524 | ||
13525 | -static int disable_nx __initdata = 0; | |
13526 | -u64 __supported_pte_mask __read_mostly = ~_PAGE_NX; | |
13527 | -EXPORT_SYMBOL_GPL(__supported_pte_mask); | |
13528 | +static int disable_nx __initdata; | |
13529 | ||
13530 | /* | |
13531 | * noexec = on|off | |
13532 | @@ -520,11 +501,14 @@ static int __init noexec_setup(char *str | |
13533 | __supported_pte_mask |= _PAGE_NX; | |
13534 | disable_nx = 0; | |
13535 | } | |
13536 | - } else if (!strcmp(str,"off")) { | |
13537 | - disable_nx = 1; | |
13538 | - __supported_pte_mask &= ~_PAGE_NX; | |
13539 | - } else | |
13540 | - return -EINVAL; | |
13541 | + } else { | |
13542 | + if (!strcmp(str, "off")) { | |
13543 | + disable_nx = 1; | |
13544 | + __supported_pte_mask &= ~_PAGE_NX; | |
13545 | + } else { | |
13546 | + return -EINVAL; | |
cc90b958 | 13547 | + } |
cc90b958 | 13548 | + } |
00e5a55c BS |
13549 | |
13550 | return 0; | |
13551 | } | |
13552 | @@ -536,6 +520,7 @@ static void __init set_nx(void) | |
13553 | ||
13554 | if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { | |
13555 | cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); | |
cc90b958 | 13556 | + |
00e5a55c BS |
13557 | if ((v[3] & (1 << 20)) && !disable_nx) { |
13558 | rdmsr(MSR_EFER, l, h); | |
13559 | l |= EFER_NX; | |
13560 | @@ -545,35 +530,6 @@ static void __init set_nx(void) | |
13561 | } | |
13562 | } | |
13563 | } | |
13564 | - | |
13565 | -/* | |
13566 | - * Enables/disables executability of a given kernel page and | |
13567 | - * returns the previous setting. | |
13568 | - */ | |
13569 | -int __init set_kernel_exec(unsigned long vaddr, int enable) | |
13570 | -{ | |
13571 | - pte_t *pte; | |
13572 | - int ret = 1; | |
13573 | - | |
13574 | - if (!nx_enabled) | |
13575 | - goto out; | |
13576 | - | |
13577 | - pte = lookup_address(vaddr); | |
13578 | - BUG_ON(!pte); | |
13579 | - | |
13580 | - if (!pte_exec_kernel(*pte)) | |
13581 | - ret = 0; | |
13582 | - | |
13583 | - if (enable) | |
13584 | - pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32)); | |
13585 | - else | |
13586 | - pte->pte_high |= 1 << (_PAGE_BIT_NX - 32); | |
13587 | - pte_update_defer(&init_mm, vaddr, pte); | |
13588 | - __flush_tlb_all(); | |
13589 | -out: | |
13590 | - return ret; | |
13591 | -} | |
13592 | - | |
13593 | #endif | |
13594 | ||
13595 | /* | |
13596 | @@ -590,21 +546,10 @@ void __init paging_init(void) | |
13597 | #ifdef CONFIG_X86_PAE | |
13598 | set_nx(); | |
13599 | if (nx_enabled) | |
13600 | - printk("NX (Execute Disable) protection: active\n"); | |
13601 | + printk(KERN_INFO "NX (Execute Disable) protection: active\n"); | |
13602 | #endif | |
13603 | - | |
13604 | pagetable_init(); | |
13605 | ||
13606 | -#if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN) | |
13607 | - /* | |
13608 | - * We will bail out later - printk doesn't work right now so | |
13609 | - * the user would just see a hanging kernel. | |
13610 | - * when running as xen domain we are already in PAE mode at | |
13611 | - * this point. | |
13612 | - */ | |
13613 | - if (cpu_has_pae) | |
13614 | - set_in_cr4(X86_CR4_PAE); | |
13615 | -#endif | |
13616 | __flush_tlb_all(); | |
13617 | ||
13618 | kmap_init(); | |
13619 | @@ -631,10 +576,10 @@ void __init paging_init(void) | |
13620 | * used to involve black magic jumps to work around some nasty CPU bugs, | |
13621 | * but fortunately the switch to using exceptions got rid of all that. | |
13622 | */ | |
13623 | - | |
13624 | static void __init test_wp_bit(void) | |
13625 | { | |
13626 | - printk("Checking if this processor honours the WP bit even in supervisor mode... "); | |
13627 | + printk(KERN_INFO | |
13628 | + "Checking if this processor honours the WP bit even in supervisor mode..."); | |
13629 | ||
13630 | /* Any page-aligned address will do, the test is non-destructive */ | |
13631 | __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY); | |
13632 | @@ -642,23 +587,22 @@ static void __init test_wp_bit(void) | |
13633 | clear_fixmap(FIX_WP_TEST); | |
13634 | ||
13635 | if (!boot_cpu_data.wp_works_ok) { | |
13636 | - printk("No.\n"); | |
13637 | + printk(KERN_CONT "No.\n"); | |
13638 | #ifdef CONFIG_X86_WP_WORKS_OK | |
13639 | - panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!"); | |
13640 | + panic( | |
13641 | + "This kernel doesn't support CPU's with broken WP. Recompile it for a 386!"); | |
13642 | #endif | |
13643 | } else { | |
13644 | - printk("Ok.\n"); | |
13645 | + printk(KERN_CONT "Ok.\n"); | |
13646 | } | |
13647 | } | |
13648 | ||
13649 | -static struct kcore_list kcore_mem, kcore_vmalloc; | |
13650 | +static struct kcore_list kcore_mem, kcore_vmalloc; | |
13651 | ||
13652 | void __init mem_init(void) | |
13653 | { | |
13654 | - extern int ppro_with_ram_bug(void); | |
13655 | int codesize, reservedpages, datasize, initsize; | |
13656 | - int tmp; | |
13657 | - int bad_ppro; | |
13658 | + int tmp, bad_ppro; | |
13659 | unsigned long pfn; | |
13660 | ||
13661 | #if defined(CONFIG_SWIOTLB) | |
13662 | @@ -668,19 +612,19 @@ void __init mem_init(void) | |
13663 | #ifdef CONFIG_FLATMEM | |
13664 | BUG_ON(!mem_map); | |
13665 | #endif | |
13666 | - | |
13667 | bad_ppro = ppro_with_ram_bug(); | |
13668 | ||
13669 | #ifdef CONFIG_HIGHMEM | |
13670 | /* check that fixmap and pkmap do not overlap */ | |
13671 | - if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { | |
13672 | - printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n"); | |
13673 | + if (PKMAP_BASE + LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { | |
13674 | + printk(KERN_ERR | |
13675 | + "fixmap and kmap areas overlap - this will crash\n"); | |
13676 | printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n", | |
13677 | - PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START); | |
13678 | + PKMAP_BASE, PKMAP_BASE + LAST_PKMAP*PAGE_SIZE, | |
13679 | + FIXADDR_START); | |
13680 | BUG(); | |
13681 | } | |
13682 | #endif | |
13683 | - | |
13684 | /* this will put all low memory onto the freelists */ | |
13685 | totalram_pages += free_all_bootmem(); | |
13686 | /* XEN: init and count low-mem pages outside initial allocation. */ | |
13687 | @@ -693,7 +637,7 @@ void __init mem_init(void) | |
13688 | reservedpages = 0; | |
13689 | for (tmp = 0; tmp < max_low_pfn; tmp++) | |
13690 | /* | |
13691 | - * Only count reserved RAM pages | |
13692 | + * Only count reserved RAM pages: | |
13693 | */ | |
13694 | if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) | |
13695 | reservedpages++; | |
13696 | @@ -704,11 +648,12 @@ void __init mem_init(void) | |
13697 | datasize = (unsigned long) &_edata - (unsigned long) &_etext; | |
13698 | initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; | |
13699 | ||
13700 | - kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); | |
13701 | - kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, | |
13702 | + kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); | |
13703 | + kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, | |
13704 | VMALLOC_END-VMALLOC_START); | |
13705 | ||
13706 | - printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", | |
13707 | + printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, " | |
13708 | + "%dk reserved, %dk data, %dk init, %ldk highmem)\n", | |
13709 | (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), | |
13710 | num_physpages << (PAGE_SHIFT-10), | |
13711 | codesize >> 10, | |
13712 | @@ -719,54 +664,53 @@ void __init mem_init(void) | |
13713 | ); | |
13714 | ||
13715 | #if 1 /* double-sanity-check paranoia */ | |
13716 | - printk("virtual kernel memory layout:\n" | |
13717 | - " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" | |
13718 | + printk(KERN_INFO "virtual kernel memory layout:\n" | |
13719 | + " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" | |
13720 | #ifdef CONFIG_HIGHMEM | |
13721 | - " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" | |
13722 | + " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" | |
13723 | #endif | |
13724 | - " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" | |
13725 | - " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" | |
13726 | - " .init : 0x%08lx - 0x%08lx (%4ld kB)\n" | |
13727 | - " .data : 0x%08lx - 0x%08lx (%4ld kB)\n" | |
13728 | - " .text : 0x%08lx - 0x%08lx (%4ld kB)\n", | |
13729 | - FIXADDR_START, FIXADDR_TOP, | |
13730 | - (FIXADDR_TOP - FIXADDR_START) >> 10, | |
13731 | + " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" | |
13732 | + " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" | |
13733 | + " .init : 0x%08lx - 0x%08lx (%4ld kB)\n" | |
13734 | + " .data : 0x%08lx - 0x%08lx (%4ld kB)\n" | |
13735 | + " .text : 0x%08lx - 0x%08lx (%4ld kB)\n", | |
13736 | + FIXADDR_START, FIXADDR_TOP, | |
13737 | + (FIXADDR_TOP - FIXADDR_START) >> 10, | |
13738 | ||
13739 | #ifdef CONFIG_HIGHMEM | |
13740 | - PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, | |
13741 | - (LAST_PKMAP*PAGE_SIZE) >> 10, | |
13742 | + PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, | |
13743 | + (LAST_PKMAP*PAGE_SIZE) >> 10, | |
13744 | #endif | |
13745 | ||
13746 | - VMALLOC_START, VMALLOC_END, | |
13747 | - (VMALLOC_END - VMALLOC_START) >> 20, | |
13748 | + VMALLOC_START, VMALLOC_END, | |
13749 | + (VMALLOC_END - VMALLOC_START) >> 20, | |
13750 | ||
13751 | - (unsigned long)__va(0), (unsigned long)high_memory, | |
13752 | - ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, | |
13753 | + (unsigned long)__va(0), (unsigned long)high_memory, | |
13754 | + ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, | |
13755 | ||
13756 | - (unsigned long)&__init_begin, (unsigned long)&__init_end, | |
13757 | - ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10, | |
13758 | + (unsigned long)&__init_begin, (unsigned long)&__init_end, | |
13759 | + ((unsigned long)&__init_end - | |
13760 | + (unsigned long)&__init_begin) >> 10, | |
13761 | ||
13762 | - (unsigned long)&_etext, (unsigned long)&_edata, | |
13763 | - ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, | |
13764 | + (unsigned long)&_etext, (unsigned long)&_edata, | |
13765 | + ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, | |
13766 | ||
13767 | - (unsigned long)&_text, (unsigned long)&_etext, | |
13768 | - ((unsigned long)&_etext - (unsigned long)&_text) >> 10); | |
13769 | + (unsigned long)&_text, (unsigned long)&_etext, | |
13770 | + ((unsigned long)&_etext - (unsigned long)&_text) >> 10); | |
13771 | ||
13772 | #ifdef CONFIG_HIGHMEM | |
13773 | - BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START); | |
13774 | - BUG_ON(VMALLOC_END > PKMAP_BASE); | |
13775 | + BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); | |
13776 | + BUG_ON(VMALLOC_END > PKMAP_BASE); | |
13777 | #endif | |
13778 | - BUG_ON(VMALLOC_START > VMALLOC_END); | |
13779 | - BUG_ON((unsigned long)high_memory > VMALLOC_START); | |
13780 | + BUG_ON(VMALLOC_START > VMALLOC_END); | |
13781 | + BUG_ON((unsigned long)high_memory > VMALLOC_START); | |
13782 | #endif /* double-sanity-check paranoia */ | |
13783 | ||
13784 | -#ifdef CONFIG_X86_PAE | |
13785 | - if (!cpu_has_pae) | |
13786 | - panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); | |
13787 | -#endif | |
13788 | if (boot_cpu_data.wp_works_ok < 0) | |
13789 | test_wp_bit(); | |
13790 | ||
13791 | + cpa_init(); | |
cc90b958 | 13792 | + |
00e5a55c BS |
13793 | /* |
13794 | * Subtle. SMP is doing it's boot stuff late (because it has to | |
13795 | * fork idle threads) - but it also needs low mappings for the | |
13796 | @@ -790,49 +734,35 @@ int arch_add_memory(int nid, u64 start, | |
13797 | ||
13798 | return __add_pages(zone, start_pfn, nr_pages); | |
13799 | } | |
13800 | - | |
13801 | #endif | |
13802 | ||
13803 | -struct kmem_cache *pmd_cache; | |
13804 | - | |
13805 | -void __init pgtable_cache_init(void) | |
13806 | -{ | |
13807 | - if (PTRS_PER_PMD > 1) | |
13808 | - pmd_cache = kmem_cache_create("pmd", | |
13809 | - PTRS_PER_PMD*sizeof(pmd_t), | |
13810 | - PTRS_PER_PMD*sizeof(pmd_t), | |
13811 | - SLAB_PANIC, | |
13812 | - pmd_ctor); | |
13813 | -} | |
13814 | - | |
13815 | /* | |
13816 | * This function cannot be __init, since exceptions don't work in that | |
13817 | * section. Put this after the callers, so that it cannot be inlined. | |
13818 | */ | |
13819 | -static int noinline do_test_wp_bit(void) | |
13820 | +static noinline int do_test_wp_bit(void) | |
13821 | { | |
13822 | char tmp_reg; | |
13823 | int flag; | |
13824 | ||
13825 | __asm__ __volatile__( | |
13826 | - " movb %0,%1 \n" | |
13827 | - "1: movb %1,%0 \n" | |
13828 | - " xorl %2,%2 \n" | |
13829 | + " movb %0, %1 \n" | |
13830 | + "1: movb %1, %0 \n" | |
13831 | + " xorl %2, %2 \n" | |
13832 | "2: \n" | |
13833 | - ".section __ex_table,\"a\"\n" | |
13834 | - " .align 4 \n" | |
13835 | - " .long 1b,2b \n" | |
13836 | - ".previous \n" | |
13837 | + _ASM_EXTABLE(1b,2b) | |
13838 | :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)), | |
13839 | "=q" (tmp_reg), | |
13840 | "=r" (flag) | |
13841 | :"2" (1) | |
13842 | :"memory"); | |
13843 | - | |
cc90b958 | 13844 | + |
00e5a55c BS |
13845 | return flag; |
13846 | } | |
13847 | ||
13848 | #ifdef CONFIG_DEBUG_RODATA | |
13849 | +const int rodata_test_data = 0xC3; | |
13850 | +EXPORT_SYMBOL_GPL(rodata_test_data); | |
13851 | ||
13852 | void mark_rodata_ro(void) | |
13853 | { | |
13854 | @@ -845,32 +775,58 @@ void mark_rodata_ro(void) | |
13855 | if (num_possible_cpus() <= 1) | |
13856 | #endif | |
13857 | { | |
13858 | - change_page_attr(virt_to_page(start), | |
13859 | - size >> PAGE_SHIFT, PAGE_KERNEL_RX); | |
13860 | - printk("Write protecting the kernel text: %luk\n", size >> 10); | |
13861 | + set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); | |
13862 | + printk(KERN_INFO "Write protecting the kernel text: %luk\n", | |
13863 | + size >> 10); | |
cc90b958 | 13864 | + |
00e5a55c BS |
13865 | +#ifdef CONFIG_CPA_DEBUG |
13866 | + printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n", | |
13867 | + start, start+size); | |
13868 | + set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT); | |
cc90b958 | 13869 | + |
00e5a55c BS |
13870 | + printk(KERN_INFO "Testing CPA: write protecting again\n"); |
13871 | + set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); | |
cc90b958 | 13872 | +#endif |
00e5a55c BS |
13873 | } |
13874 | #endif | |
13875 | start += size; | |
13876 | size = (unsigned long)__end_rodata - start; | |
13877 | - change_page_attr(virt_to_page(start), | |
13878 | - size >> PAGE_SHIFT, PAGE_KERNEL_RO); | |
13879 | - printk("Write protecting the kernel read-only data: %luk\n", | |
13880 | - size >> 10); | |
13881 | + set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); | |
13882 | + printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", | |
13883 | + size >> 10); | |
13884 | + rodata_test(); | |
cc90b958 | 13885 | + |
00e5a55c BS |
13886 | +#ifdef CONFIG_CPA_DEBUG |
13887 | + printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, start + size); | |
13888 | + set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT); | |
13889 | ||
13890 | - /* | |
13891 | - * change_page_attr() requires a global_flush_tlb() call after it. | |
13892 | - * We do this after the printk so that if something went wrong in the | |
13893 | - * change, the printk gets out at least to give a better debug hint | |
13894 | - * of who is the culprit. | |
13895 | - */ | |
13896 | - global_flush_tlb(); | |
13897 | + printk(KERN_INFO "Testing CPA: write protecting again\n"); | |
13898 | + set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); | |
13899 | +#endif | |
13900 | } | |
13901 | #endif | |
13902 | ||
13903 | void free_init_pages(char *what, unsigned long begin, unsigned long end) | |
13904 | { | |
13905 | +#ifdef CONFIG_DEBUG_PAGEALLOC | |
cc90b958 | 13906 | + /* |
00e5a55c BS |
13907 | + * If debugging page accesses then do not free this memory but |
13908 | + * mark them not present - any buggy init-section access will | |
13909 | + * create a kernel page fault: | |
cc90b958 | 13910 | + */ |
00e5a55c BS |
13911 | + printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", |
13912 | + begin, PAGE_ALIGN(end)); | |
13913 | + set_memory_np(begin, (end - begin) >> PAGE_SHIFT); | |
13914 | +#else | |
13915 | unsigned long addr; | |
13916 | ||
13917 | + /* | |
13918 | + * We just marked the kernel text read only above, now that | |
13919 | + * we are going to free part of that, we need to make that | |
13920 | + * writeable first. | |
13921 | + */ | |
13922 | + set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); | |
cc90b958 | 13923 | + |
00e5a55c BS |
13924 | for (addr = begin; addr < end; addr += PAGE_SIZE) { |
13925 | ClearPageReserved(virt_to_page(addr)); | |
13926 | init_page_count(virt_to_page(addr)); | |
13927 | @@ -879,6 +835,7 @@ void free_init_pages(char *what, unsigne | |
13928 | totalram_pages++; | |
13929 | } | |
13930 | printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); | |
cc90b958 | 13931 | +#endif |
00e5a55c BS |
13932 | } |
13933 | ||
13934 | void free_initmem(void) | |
13935 | @@ -894,4 +851,3 @@ void free_initrd_mem(unsigned long start | |
13936 | free_init_pages("initrd memory", start, end); | |
13937 | } | |
13938 | #endif | |
13939 | - | |
13940 | --- sle11-2009-05-14.orig/arch/x86/mm/init_64-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
13941 | +++ sle11-2009-05-14/arch/x86/mm/init_64-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
13942 | @@ -46,14 +46,13 @@ | |
13943 | #include <asm/proto.h> | |
13944 | #include <asm/smp.h> | |
13945 | #include <asm/sections.h> | |
13946 | +#include <asm/kdebug.h> | |
13947 | +#include <asm/numa.h> | |
13948 | +#include <asm/cacheflush.h> | |
13949 | ||
13950 | #include <xen/features.h> | |
13951 | ||
13952 | -#ifndef Dprintk | |
13953 | -#define Dprintk(x...) | |
13954 | -#endif | |
13955 | - | |
13956 | -const struct dma_mapping_ops* dma_ops; | |
13957 | +const struct dma_mapping_ops *dma_ops; | |
13958 | EXPORT_SYMBOL(dma_ops); | |
13959 | ||
13960 | #if CONFIG_XEN_COMPAT <= 0x030002 | |
13961 | @@ -80,7 +79,21 @@ extern pte_t level1_fixmap_pgt[PTRS_PER_ | |
13962 | (((mfn_to_pfn((addr) >> PAGE_SHIFT)) << PAGE_SHIFT) + \ | |
13963 | __START_KERNEL_map))) | |
13964 | ||
13965 | -static void __meminit early_make_page_readonly(void *va, unsigned int feature) | |
13966 | +pmd_t *__init early_get_pmd(unsigned long va) | |
13967 | +{ | |
13968 | + unsigned long addr; | |
13969 | + unsigned long *page = (unsigned long *)init_level4_pgt; | |
cc90b958 | 13970 | + |
00e5a55c BS |
13971 | + addr = page[pgd_index(va)]; |
13972 | + addr_to_page(addr, page); | |
cc90b958 | 13973 | + |
00e5a55c BS |
13974 | + addr = page[pud_index(va)]; |
13975 | + addr_to_page(addr, page); | |
cc90b958 | 13976 | + |
00e5a55c | 13977 | + return (pmd_t *)&page[pmd_index(va)]; |
cc90b958 BS |
13978 | +} |
13979 | + | |
00e5a55c BS |
13980 | +void __meminit early_make_page_readonly(void *va, unsigned int feature) |
13981 | { | |
13982 | unsigned long addr, _va = (unsigned long)va; | |
13983 | pte_t pte, *ptep; | |
13984 | @@ -107,76 +120,6 @@ static void __meminit early_make_page_re | |
13985 | BUG(); | |
13986 | } | |
13987 | ||
13988 | -static void __make_page_readonly(void *va) | |
13989 | -{ | |
13990 | - pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t pte, *ptep; | |
13991 | - unsigned long addr = (unsigned long) va; | |
13992 | - | |
13993 | - pgd = pgd_offset_k(addr); | |
13994 | - pud = pud_offset(pgd, addr); | |
13995 | - pmd = pmd_offset(pud, addr); | |
13996 | - ptep = pte_offset_kernel(pmd, addr); | |
13997 | - | |
13998 | - pte.pte = ptep->pte & ~_PAGE_RW; | |
13999 | - if (HYPERVISOR_update_va_mapping(addr, pte, 0)) | |
14000 | - xen_l1_entry_update(ptep, pte); /* fallback */ | |
14001 | - | |
14002 | - if ((addr >= VMALLOC_START) && (addr < VMALLOC_END)) | |
14003 | - __make_page_readonly(__va(pte_pfn(pte) << PAGE_SHIFT)); | |
14004 | -} | |
14005 | - | |
14006 | -static void __make_page_writable(void *va) | |
14007 | -{ | |
14008 | - pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t pte, *ptep; | |
14009 | - unsigned long addr = (unsigned long) va; | |
14010 | - | |
14011 | - pgd = pgd_offset_k(addr); | |
14012 | - pud = pud_offset(pgd, addr); | |
14013 | - pmd = pmd_offset(pud, addr); | |
14014 | - ptep = pte_offset_kernel(pmd, addr); | |
14015 | - | |
14016 | - pte.pte = ptep->pte | _PAGE_RW; | |
14017 | - if (HYPERVISOR_update_va_mapping(addr, pte, 0)) | |
14018 | - xen_l1_entry_update(ptep, pte); /* fallback */ | |
14019 | - | |
14020 | - if ((addr >= VMALLOC_START) && (addr < VMALLOC_END)) | |
14021 | - __make_page_writable(__va(pte_pfn(pte) << PAGE_SHIFT)); | |
14022 | -} | |
14023 | - | |
14024 | -void make_page_readonly(void *va, unsigned int feature) | |
14025 | -{ | |
14026 | - if (!xen_feature(feature)) | |
14027 | - __make_page_readonly(va); | |
14028 | -} | |
14029 | - | |
14030 | -void make_page_writable(void *va, unsigned int feature) | |
14031 | -{ | |
14032 | - if (!xen_feature(feature)) | |
14033 | - __make_page_writable(va); | |
14034 | -} | |
14035 | - | |
14036 | -void make_pages_readonly(void *va, unsigned nr, unsigned int feature) | |
14037 | -{ | |
14038 | - if (xen_feature(feature)) | |
14039 | - return; | |
14040 | - | |
14041 | - while (nr-- != 0) { | |
14042 | - __make_page_readonly(va); | |
14043 | - va = (void*)((unsigned long)va + PAGE_SIZE); | |
14044 | - } | |
14045 | -} | |
14046 | - | |
14047 | -void make_pages_writable(void *va, unsigned nr, unsigned int feature) | |
14048 | -{ | |
14049 | - if (xen_feature(feature)) | |
14050 | - return; | |
14051 | - | |
14052 | - while (nr-- != 0) { | |
14053 | - __make_page_writable(va); | |
14054 | - va = (void*)((unsigned long)va + PAGE_SIZE); | |
14055 | - } | |
14056 | -} | |
14057 | - | |
14058 | /* | |
14059 | * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the | |
14060 | * physical space so we can cache the place of the first one and move | |
14061 | @@ -187,22 +130,26 @@ void show_mem(void) | |
14062 | { | |
14063 | long i, total = 0, reserved = 0; | |
14064 | long shared = 0, cached = 0; | |
14065 | - pg_data_t *pgdat; | |
14066 | struct page *page; | |
14067 | + pg_data_t *pgdat; | |
14068 | ||
14069 | printk(KERN_INFO "Mem-info:\n"); | |
14070 | show_free_areas(); | |
14071 | - printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); | |
14072 | + printk(KERN_INFO "Free swap: %6ldkB\n", | |
14073 | + nr_swap_pages << (PAGE_SHIFT-10)); | |
14074 | ||
14075 | for_each_online_pgdat(pgdat) { | |
14076 | - for (i = 0; i < pgdat->node_spanned_pages; ++i) { | |
14077 | - /* this loop can take a while with 256 GB and 4k pages | |
14078 | - so update the NMI watchdog */ | |
14079 | - if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) { | |
14080 | + for (i = 0; i < pgdat->node_spanned_pages; ++i) { | |
14081 | + /* | |
14082 | + * This loop can take a while with 256 GB and | |
14083 | + * 4k pages so defer the NMI watchdog: | |
14084 | + */ | |
14085 | + if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) | |
14086 | touch_nmi_watchdog(); | |
14087 | - } | |
cc90b958 | 14088 | + |
00e5a55c BS |
14089 | if (!pfn_valid(pgdat->node_start_pfn + i)) |
14090 | continue; | |
cc90b958 | 14091 | + |
00e5a55c BS |
14092 | page = pfn_to_page(pgdat->node_start_pfn + i); |
14093 | total++; | |
14094 | if (PageReserved(page)) | |
14095 | @@ -211,58 +158,67 @@ void show_mem(void) | |
14096 | cached++; | |
14097 | else if (page_count(page)) | |
14098 | shared += page_count(page) - 1; | |
14099 | - } | |
cc90b958 | 14100 | + } |
00e5a55c BS |
14101 | } |
14102 | - printk(KERN_INFO "%lu pages of RAM\n", total); | |
14103 | - printk(KERN_INFO "%lu reserved pages\n",reserved); | |
14104 | - printk(KERN_INFO "%lu pages shared\n",shared); | |
14105 | - printk(KERN_INFO "%lu pages swap cached\n",cached); | |
14106 | + printk(KERN_INFO "%lu pages of RAM\n", total); | |
14107 | + printk(KERN_INFO "%lu reserved pages\n", reserved); | |
14108 | + printk(KERN_INFO "%lu pages shared\n", shared); | |
14109 | + printk(KERN_INFO "%lu pages swap cached\n", cached); | |
14110 | } | |
14111 | ||
14112 | +static unsigned long __meminitdata table_start; | |
14113 | +static unsigned long __meminitdata table_end; | |
14114 | ||
14115 | static __init void *spp_getpage(void) | |
14116 | -{ | |
14117 | +{ | |
14118 | void *ptr; | |
cc90b958 | 14119 | + |
00e5a55c BS |
14120 | if (after_bootmem) |
14121 | - ptr = (void *) get_zeroed_page(GFP_ATOMIC); | |
14122 | + ptr = (void *) get_zeroed_page(GFP_ATOMIC); | |
14123 | else if (start_pfn < table_end) { | |
14124 | ptr = __va(start_pfn << PAGE_SHIFT); | |
14125 | start_pfn++; | |
14126 | memset(ptr, 0, PAGE_SIZE); | |
14127 | } else | |
14128 | ptr = alloc_bootmem_pages(PAGE_SIZE); | |
14129 | - if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) | |
14130 | - panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":""); | |
14131 | ||
14132 | - Dprintk("spp_getpage %p\n", ptr); | |
14133 | + if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) { | |
14134 | + panic("set_pte_phys: cannot allocate page data %s\n", | |
14135 | + after_bootmem ? "after bootmem" : ""); | |
cc90b958 BS |
14136 | + } |
14137 | + | |
00e5a55c | 14138 | + pr_debug("spp_getpage %p\n", ptr); |
cc90b958 | 14139 | + |
00e5a55c BS |
14140 | return ptr; |
14141 | -} | |
cc90b958 | 14142 | +} |
00e5a55c BS |
14143 | |
14144 | #define pgd_offset_u(address) (__user_pgd(init_level4_pgt) + pgd_index(address)) | |
14145 | #define pud_offset_u(address) (level3_user_pgt + pud_index(address)) | |
14146 | ||
14147 | -static __init void set_pte_phys(unsigned long vaddr, | |
14148 | - unsigned long phys, pgprot_t prot, int user_mode) | |
14149 | +static __init void | |
14150 | +set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot, int user_mode) | |
14151 | { | |
14152 | pgd_t *pgd; | |
14153 | pud_t *pud; | |
14154 | pmd_t *pmd; | |
14155 | pte_t *pte, new_pte; | |
14156 | ||
14157 | - Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys); | |
14158 | + pr_debug("set_pte_phys %lx to %lx\n", vaddr, phys); | |
14159 | ||
14160 | pgd = (user_mode ? pgd_offset_u(vaddr) : pgd_offset_k(vaddr)); | |
14161 | if (pgd_none(*pgd)) { | |
14162 | - printk("PGD FIXMAP MISSING, it should be setup in head.S!\n"); | |
14163 | + printk(KERN_ERR | |
14164 | + "PGD FIXMAP MISSING, it should be setup in head.S!\n"); | |
14165 | return; | |
14166 | } | |
14167 | pud = (user_mode ? pud_offset_u(vaddr) : pud_offset(pgd, vaddr)); | |
14168 | if (pud_none(*pud)) { | |
14169 | - pmd = (pmd_t *) spp_getpage(); | |
14170 | + pmd = (pmd_t *) spp_getpage(); | |
14171 | make_page_readonly(pmd, XENFEAT_writable_page_tables); | |
14172 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER)); | |
14173 | if (pmd != pmd_offset(pud, 0)) { | |
14174 | - printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0)); | |
14175 | + printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n", | |
14176 | + pmd, pmd_offset(pud, 0)); | |
14177 | return; | |
14178 | } | |
14179 | } | |
14180 | @@ -272,7 +228,7 @@ static __init void set_pte_phys(unsigned | |
14181 | make_page_readonly(pte, XENFEAT_writable_page_tables); | |
14182 | set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER)); | |
14183 | if (pte != pte_offset_kernel(pmd, 0)) { | |
14184 | - printk("PAGETABLE BUG #02!\n"); | |
14185 | + printk(KERN_ERR "PAGETABLE BUG #02!\n"); | |
14186 | return; | |
14187 | } | |
14188 | } | |
14189 | @@ -294,30 +250,30 @@ static __init void set_pte_phys(unsigned | |
14190 | __flush_tlb_one(vaddr); | |
14191 | } | |
14192 | ||
14193 | -static __init void set_pte_phys_ma(unsigned long vaddr, | |
14194 | - unsigned long phys, pgprot_t prot) | |
14195 | +static __init void | |
14196 | +set_pte_phys_ma(unsigned long vaddr, unsigned long phys, pgprot_t prot) | |
14197 | { | |
14198 | pgd_t *pgd; | |
14199 | pud_t *pud; | |
14200 | pmd_t *pmd; | |
14201 | pte_t *pte, new_pte; | |
14202 | ||
14203 | - Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys); | |
14204 | + pr_debug("set_pte_phys_ma %lx to %lx\n", vaddr, phys); | |
14205 | ||
14206 | pgd = pgd_offset_k(vaddr); | |
14207 | if (pgd_none(*pgd)) { | |
14208 | - printk("PGD FIXMAP MISSING, it should be setup in head.S!\n"); | |
14209 | + printk(KERN_ERR | |
14210 | + "PGD FIXMAP MISSING, it should be setup in head.S!\n"); | |
14211 | return; | |
14212 | } | |
14213 | pud = pud_offset(pgd, vaddr); | |
14214 | if (pud_none(*pud)) { | |
14215 | - | |
14216 | - pmd = (pmd_t *) spp_getpage(); | |
14217 | + pmd = (pmd_t *) spp_getpage(); | |
14218 | make_page_readonly(pmd, XENFEAT_writable_page_tables); | |
14219 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER)); | |
14220 | if (pmd != pmd_offset(pud, 0)) { | |
14221 | - printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0)); | |
14222 | - return; | |
14223 | + printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n", | |
14224 | + pmd, pmd_offset(pud, 0)); | |
14225 | } | |
14226 | } | |
14227 | pmd = pmd_offset(pud, vaddr); | |
14228 | @@ -326,7 +282,7 @@ static __init void set_pte_phys_ma(unsig | |
14229 | make_page_readonly(pte, XENFEAT_writable_page_tables); | |
14230 | set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER)); | |
14231 | if (pte != pte_offset_kernel(pmd, 0)) { | |
14232 | - printk("PAGETABLE BUG #02!\n"); | |
14233 | + printk(KERN_ERR "PAGETABLE BUG #02!\n"); | |
14234 | return; | |
14235 | } | |
14236 | } | |
14237 | @@ -350,14 +306,44 @@ static __init void set_pte_phys_ma(unsig | |
14238 | __flush_tlb_one(vaddr); | |
14239 | } | |
14240 | ||
14241 | +#ifndef CONFIG_XEN | |
14242 | +/* | |
14243 | + * The head.S code sets up the kernel high mapping: | |
14244 | + * | |
14245 | + * from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text) | |
14246 | + * | |
14247 | + * phys_addr holds the negative offset to the kernel, which is added | |
14248 | + * to the compile time generated pmds. This results in invalid pmds up | |
14249 | + * to the point where we hit the physaddr 0 mapping. | |
14250 | + * | |
14251 | + * We limit the mappings to the region from _text to _end. _end is | |
14252 | + * rounded up to the 2MB boundary. This catches the invalid pmds as | |
14253 | + * well, as they are located before _text: | |
14254 | + */ | |
14255 | +void __init cleanup_highmap(void) | |
cc90b958 | 14256 | +{ |
00e5a55c BS |
14257 | + unsigned long vaddr = __START_KERNEL_map; |
14258 | + unsigned long end = round_up((unsigned long)_end, PMD_SIZE) - 1; | |
14259 | + pmd_t *pmd = level2_kernel_pgt; | |
14260 | + pmd_t *last_pmd = pmd + PTRS_PER_PMD; | |
cc90b958 | 14261 | + |
00e5a55c BS |
14262 | + for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) { |
14263 | + if (!pmd_present(*pmd)) | |
14264 | + continue; | |
14265 | + if (vaddr < (unsigned long) _text || vaddr > end) | |
14266 | + set_pmd(pmd, __pmd(0)); | |
14267 | + } | |
cc90b958 | 14268 | +} |
00e5a55c | 14269 | +#endif |
cc90b958 | 14270 | + |
00e5a55c BS |
14271 | /* NOTE: this is meant to be run only at boot */ |
14272 | -void __init | |
14273 | -__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot) | |
14274 | +void __init | |
14275 | +__set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot) | |
14276 | { | |
14277 | unsigned long address = __fix_to_virt(idx); | |
14278 | ||
14279 | if (idx >= __end_of_fixed_addresses) { | |
14280 | - printk("Invalid __set_fixmap\n"); | |
14281 | + printk(KERN_ERR "Invalid __set_fixmap\n"); | |
14282 | return; | |
14283 | } | |
14284 | switch (idx) { | |
14285 | @@ -375,16 +361,14 @@ __set_fixmap (enum fixed_addresses idx, | |
14286 | } | |
14287 | } | |
14288 | ||
14289 | -unsigned long __meminitdata table_start, table_end; | |
14290 | - | |
14291 | static __meminit void *alloc_static_page(unsigned long *phys) | |
14292 | { | |
14293 | unsigned long va = (start_pfn << PAGE_SHIFT) + __START_KERNEL_map; | |
14294 | ||
14295 | if (after_bootmem) { | |
14296 | void *adr = (void *)get_zeroed_page(GFP_ATOMIC); | |
14297 | - | |
14298 | *phys = __pa(adr); | |
cc90b958 | 14299 | + |
00e5a55c BS |
14300 | return adr; |
14301 | } | |
14302 | ||
14303 | @@ -396,7 +380,7 @@ static __meminit void *alloc_static_page | |
14304 | ||
14305 | #define PTE_SIZE PAGE_SIZE | |
14306 | ||
14307 | -static inline int make_readonly(unsigned long paddr) | |
14308 | +static inline int __meminit make_readonly(unsigned long paddr) | |
14309 | { | |
14310 | extern char __vsyscall_0; | |
14311 | int readonly = 0; | |
14312 | @@ -430,33 +414,38 @@ static inline int make_readonly(unsigned | |
14313 | /* Must run before zap_low_mappings */ | |
14314 | __meminit void *early_ioremap(unsigned long addr, unsigned long size) | |
14315 | { | |
14316 | - unsigned long vaddr; | |
14317 | pmd_t *pmd, *last_pmd; | |
14318 | + unsigned long vaddr; | |
14319 | int i, pmds; | |
14320 | ||
14321 | pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; | |
14322 | vaddr = __START_KERNEL_map; | |
14323 | pmd = level2_kernel_pgt; | |
14324 | last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1; | |
cc90b958 | 14325 | + |
00e5a55c BS |
14326 | for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) { |
14327 | for (i = 0; i < pmds; i++) { | |
14328 | if (pmd_present(pmd[i])) | |
14329 | - goto next; | |
14330 | + goto continue_outer_loop; | |
14331 | } | |
14332 | vaddr += addr & ~PMD_MASK; | |
14333 | addr &= PMD_MASK; | |
cc90b958 | 14334 | + |
00e5a55c BS |
14335 | for (i = 0; i < pmds; i++, addr += PMD_SIZE) |
14336 | - set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE)); | |
14337 | - __flush_tlb(); | |
14338 | + set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); | |
14339 | + __flush_tlb_all(); | |
cc90b958 | 14340 | + |
00e5a55c BS |
14341 | return (void *)vaddr; |
14342 | - next: | |
14343 | +continue_outer_loop: | |
14344 | ; | |
14345 | } | |
14346 | printk("early_ioremap(0x%lx, %lu) failed\n", addr, size); | |
14347 | return NULL; | |
14348 | } | |
cc90b958 | 14349 | |
00e5a55c BS |
14350 | -/* To avoid virtual aliases later */ |
14351 | +/* | |
14352 | + * To avoid virtual aliases later: | |
14353 | + */ | |
14354 | __meminit void early_iounmap(void *addr, unsigned long size) | |
14355 | { | |
14356 | unsigned long vaddr; | |
14357 | @@ -466,9 +455,11 @@ __meminit void early_iounmap(void *addr, | |
14358 | vaddr = (unsigned long)addr; | |
14359 | pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; | |
14360 | pmd = level2_kernel_pgt + pmd_index(vaddr); | |
cc90b958 | 14361 | + |
00e5a55c BS |
14362 | for (i = 0; i < pmds; i++) |
14363 | pmd_clear(pmd + i); | |
14364 | - __flush_tlb(); | |
cc90b958 | 14365 | + |
00e5a55c BS |
14366 | + __flush_tlb_all(); |
14367 | } | |
14368 | #endif | |
14369 | ||
14370 | @@ -517,18 +508,19 @@ phys_pmd_init(pmd_t *pmd_page, unsigned | |
14371 | static void __meminit | |
14372 | phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) | |
14373 | { | |
14374 | - pmd_t *pmd = pmd_offset(pud,0); | |
14375 | + pmd_t *pmd = pmd_offset(pud, 0); | |
14376 | spin_lock(&init_mm.page_table_lock); | |
14377 | phys_pmd_init(pmd, address, end); | |
14378 | spin_unlock(&init_mm.page_table_lock); | |
14379 | __flush_tlb_all(); | |
14380 | } | |
14381 | ||
14382 | -static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) | |
14383 | -{ | |
14384 | +static void __meminit | |
14385 | +phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) | |
cc90b958 | 14386 | +{ |
00e5a55c BS |
14387 | int i = pud_index(addr); |
14388 | ||
14389 | - for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) { | |
14390 | + for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) { | |
14391 | unsigned long pmd_phys; | |
14392 | pud_t *pud = pud_page + pud_index(addr); | |
14393 | pmd_t *pmd; | |
14394 | @@ -550,8 +542,8 @@ static void __meminit phys_pud_init(pud_ | |
14395 | ||
14396 | early_make_page_readonly(pmd, XENFEAT_writable_page_tables); | |
14397 | } | |
14398 | - __flush_tlb(); | |
14399 | -} | |
14400 | + __flush_tlb_all(); | |
cc90b958 | 14401 | +} |
00e5a55c BS |
14402 | |
14403 | void __init xen_init_pt(void) | |
14404 | { | |
14405 | @@ -632,6 +624,7 @@ void __init xen_init_pt(void) | |
14406 | static void __init extend_init_mapping(unsigned long tables_space) | |
14407 | { | |
14408 | unsigned long va = __START_KERNEL_map; | |
14409 | + unsigned long start = start_pfn; | |
14410 | unsigned long phys, addr, *pte_page; | |
14411 | pmd_t *pmd; | |
14412 | pte_t *pte, new_pte; | |
14413 | @@ -682,6 +675,10 @@ static void __init extend_init_mapping(u | |
14414 | BUG(); | |
14415 | va += PAGE_SIZE; | |
14416 | } | |
cc90b958 | 14417 | + |
00e5a55c BS |
14418 | + if (start_pfn > start) |
14419 | + reserve_early(start << PAGE_SHIFT, | |
14420 | + start_pfn << PAGE_SHIFT, "INITMAP"); | |
14421 | } | |
14422 | ||
14423 | static void __init find_early_table_space(unsigned long end) | |
14424 | @@ -706,7 +703,7 @@ static void __init find_early_table_spac | |
14425 | (table_start << PAGE_SHIFT) + tables); | |
14426 | } | |
14427 | ||
14428 | -static void xen_finish_init_mapping(void) | |
14429 | +static void __init xen_finish_init_mapping(void) | |
14430 | { | |
14431 | unsigned long i, start, end; | |
14432 | ||
14433 | @@ -738,13 +735,6 @@ static void xen_finish_init_mapping(void | |
14434 | /* Allocate pte's for initial fixmaps from 'start_pfn' allocator. */ | |
14435 | table_end = ~0UL; | |
14436 | ||
14437 | - /* | |
14438 | - * Prefetch pte's for the bt_ioremap() area. It gets used before the | |
14439 | - * boot-time allocator is online, so allocate-on-demand would fail. | |
14440 | - */ | |
14441 | - for (i = FIX_BTMAP_END; i <= FIX_BTMAP_BEGIN; i++) | |
14442 | - __set_fixmap(i, 0, __pgprot(0)); | |
14443 | - | |
14444 | /* Switch to the real shared_info page, and clear the dummy page. */ | |
14445 | set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); | |
14446 | HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); | |
14447 | @@ -764,20 +754,23 @@ static void xen_finish_init_mapping(void | |
14448 | table_end = start_pfn; | |
14449 | } | |
14450 | ||
14451 | -/* Setup the direct mapping of the physical memory at PAGE_OFFSET. | |
14452 | - This runs before bootmem is initialized and gets pages directly from the | |
14453 | - physical memory. To access them they are temporarily mapped. */ | |
14454 | +/* | |
14455 | + * Setup the direct mapping of the physical memory at PAGE_OFFSET. | |
14456 | + * This runs before bootmem is initialized and gets pages directly from | |
14457 | + * the physical memory. To access them they are temporarily mapped. | |
14458 | + */ | |
14459 | void __init_refok init_memory_mapping(unsigned long start, unsigned long end) | |
14460 | -{ | |
cc90b958 | 14461 | +{ |
00e5a55c BS |
14462 | unsigned long next; |
14463 | ||
14464 | - Dprintk("init_memory_mapping\n"); | |
14465 | + pr_debug("init_memory_mapping\n"); | |
14466 | ||
14467 | - /* | |
14468 | + /* | |
14469 | * Find space for the kernel direct mapping tables. | |
14470 | - * Later we should allocate these tables in the local node of the memory | |
14471 | - * mapped. Unfortunately this is done currently before the nodes are | |
14472 | - * discovered. | |
14473 | + * | |
14474 | + * Later we should allocate these tables in the local node of the | |
14475 | + * memory mapped. Unfortunately this is done currently before the | |
14476 | + * nodes are discovered. | |
14477 | */ | |
14478 | if (!after_bootmem) | |
14479 | find_early_table_space(end); | |
14480 | @@ -786,8 +779,8 @@ void __init_refok init_memory_mapping(un | |
14481 | end = (unsigned long)__va(end); | |
14482 | ||
14483 | for (; start < end; start = next) { | |
14484 | - unsigned long pud_phys; | |
14485 | pgd_t *pgd = pgd_offset_k(start); | |
14486 | + unsigned long pud_phys; | |
14487 | pud_t *pud; | |
14488 | ||
14489 | if (after_bootmem) | |
14490 | @@ -795,8 +788,8 @@ void __init_refok init_memory_mapping(un | |
14491 | else | |
14492 | pud = alloc_static_page(&pud_phys); | |
14493 | next = start + PGDIR_SIZE; | |
14494 | - if (next > end) | |
14495 | - next = end; | |
14496 | + if (next > end) | |
14497 | + next = end; | |
14498 | phys_pud_init(pud, __pa(start), __pa(next)); | |
14499 | if (!after_bootmem) { | |
14500 | early_make_page_readonly(pud, XENFEAT_writable_page_tables); | |
14501 | @@ -810,12 +803,17 @@ void __init_refok init_memory_mapping(un | |
14502 | } | |
14503 | ||
14504 | __flush_tlb_all(); | |
cc90b958 | 14505 | + |
00e5a55c BS |
14506 | + if (!after_bootmem) |
14507 | + reserve_early(table_start << PAGE_SHIFT, | |
14508 | + table_end << PAGE_SHIFT, "PGTABLE"); | |
14509 | } | |
14510 | ||
14511 | #ifndef CONFIG_NUMA | |
14512 | void __init paging_init(void) | |
14513 | { | |
14514 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | |
cc90b958 | 14515 | + |
00e5a55c BS |
14516 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); |
14517 | max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; | |
14518 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; | |
14519 | @@ -829,40 +827,6 @@ void __init paging_init(void) | |
14520 | } | |
14521 | #endif | |
14522 | ||
14523 | -/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches | |
14524 | - from the CPU leading to inconsistent cache lines. address and size | |
14525 | - must be aligned to 2MB boundaries. | |
14526 | - Does nothing when the mapping doesn't exist. */ | |
14527 | -void __init clear_kernel_mapping(unsigned long address, unsigned long size) | |
14528 | -{ | |
14529 | - unsigned long end = address + size; | |
14530 | - | |
14531 | - BUG_ON(address & ~LARGE_PAGE_MASK); | |
14532 | - BUG_ON(size & ~LARGE_PAGE_MASK); | |
14533 | - | |
14534 | - for (; address < end; address += LARGE_PAGE_SIZE) { | |
14535 | - pgd_t *pgd = pgd_offset_k(address); | |
14536 | - pud_t *pud; | |
14537 | - pmd_t *pmd; | |
14538 | - if (pgd_none(*pgd)) | |
14539 | - continue; | |
14540 | - pud = pud_offset(pgd, address); | |
14541 | - if (pud_none(*pud)) | |
14542 | - continue; | |
14543 | - pmd = pmd_offset(pud, address); | |
14544 | - if (!pmd || pmd_none(*pmd)) | |
14545 | - continue; | |
14546 | - if (0 == (__pmd_val(*pmd) & _PAGE_PSE)) { | |
14547 | - /* Could handle this, but it should not happen currently. */ | |
14548 | - printk(KERN_ERR | |
14549 | - "clear_kernel_mapping: mapping has been split. will leak memory\n"); | |
14550 | - pmd_ERROR(*pmd); | |
14551 | - } | |
14552 | - set_pmd(pmd, __pmd(0)); | |
14553 | - } | |
14554 | - __flush_tlb_all(); | |
14555 | -} | |
14556 | - | |
14557 | /* | |
14558 | * Memory hotplug specific functions | |
14559 | */ | |
14560 | @@ -888,16 +852,12 @@ int arch_add_memory(int nid, u64 start, | |
14561 | unsigned long nr_pages = size >> PAGE_SHIFT; | |
14562 | int ret; | |
14563 | ||
14564 | - init_memory_mapping(start, (start + size -1)); | |
14565 | + init_memory_mapping(start, start + size-1); | |
14566 | ||
14567 | ret = __add_pages(zone, start_pfn, nr_pages); | |
14568 | - if (ret) | |
14569 | - goto error; | |
14570 | + WARN_ON(1); | |
14571 | ||
14572 | return ret; | |
14573 | -error: | |
14574 | - printk("%s: Problem encountered in __add_pages!\n", __func__); | |
14575 | - return ret; | |
14576 | } | |
14577 | EXPORT_SYMBOL_GPL(arch_add_memory); | |
14578 | ||
14579 | @@ -911,36 +871,8 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to | |
14580 | ||
14581 | #endif /* CONFIG_MEMORY_HOTPLUG */ | |
14582 | ||
14583 | -#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE | |
14584 | -/* | |
14585 | - * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance, | |
14586 | - * just online the pages. | |
14587 | - */ | |
14588 | -int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages) | |
14589 | -{ | |
14590 | - int err = -EIO; | |
14591 | - unsigned long pfn; | |
14592 | - unsigned long total = 0, mem = 0; | |
14593 | - for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { | |
14594 | - if (pfn_valid(pfn)) { | |
14595 | - online_page(pfn_to_page(pfn)); | |
14596 | - err = 0; | |
14597 | - mem++; | |
14598 | - } | |
14599 | - total++; | |
14600 | - } | |
14601 | - if (!err) { | |
14602 | - z->spanned_pages += total; | |
14603 | - z->present_pages += mem; | |
14604 | - z->zone_pgdat->node_spanned_pages += total; | |
14605 | - z->zone_pgdat->node_present_pages += mem; | |
14606 | - } | |
14607 | - return err; | |
14608 | -} | |
14609 | -#endif | |
14610 | - | |
14611 | -static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, | |
14612 | - kcore_vsyscall; | |
14613 | +static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, | |
14614 | + kcore_modules, kcore_vsyscall; | |
14615 | ||
14616 | void __init mem_init(void) | |
14617 | { | |
14618 | @@ -949,8 +881,7 @@ void __init mem_init(void) | |
14619 | ||
14620 | pci_iommu_alloc(); | |
14621 | ||
14622 | - /* clear the zero-page */ | |
14623 | - memset(empty_zero_page, 0, PAGE_SIZE); | |
14624 | + /* clear_bss() already clear the empty_zero_page */ | |
14625 | ||
14626 | reservedpages = 0; | |
14627 | ||
14628 | @@ -968,7 +899,6 @@ void __init mem_init(void) | |
14629 | } | |
14630 | reservedpages = end_pfn - totalram_pages - | |
14631 | absent_pages_in_range(0, end_pfn); | |
14632 | - | |
14633 | after_bootmem = 1; | |
14634 | ||
14635 | codesize = (unsigned long) &_etext - (unsigned long) &_text; | |
14636 | @@ -976,46 +906,64 @@ void __init mem_init(void) | |
14637 | initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; | |
14638 | ||
14639 | /* Register memory areas for /proc/kcore */ | |
14640 | - kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); | |
14641 | - kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, | |
14642 | + kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); | |
14643 | + kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, | |
14644 | VMALLOC_END-VMALLOC_START); | |
14645 | kclist_add(&kcore_kernel, &_stext, _end - _stext); | |
14646 | kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN); | |
14647 | - kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, | |
14648 | + kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, | |
14649 | VSYSCALL_END - VSYSCALL_START); | |
14650 | ||
14651 | - printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n", | |
14652 | + printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " | |
14653 | + "%ldk reserved, %ldk data, %ldk init)\n", | |
14654 | (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), | |
14655 | end_pfn << (PAGE_SHIFT-10), | |
14656 | codesize >> 10, | |
14657 | reservedpages << (PAGE_SHIFT-10), | |
14658 | datasize >> 10, | |
14659 | initsize >> 10); | |
cc90b958 | 14660 | + |
00e5a55c BS |
14661 | + cpa_init(); |
14662 | } | |
14663 | ||
14664 | void free_init_pages(char *what, unsigned long begin, unsigned long end) | |
14665 | { | |
14666 | - unsigned long addr; | |
14667 | + unsigned long addr = begin; | |
14668 | ||
14669 | - if (begin >= end) | |
14670 | + if (addr >= end) | |
14671 | return; | |
14672 | ||
14673 | + /* | |
14674 | + * If debugging page accesses then do not free this memory but | |
14675 | + * mark them not present - any buggy init-section access will | |
14676 | + * create a kernel page fault: | |
14677 | + */ | |
14678 | +#ifdef CONFIG_DEBUG_PAGEALLOC | |
14679 | + printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", | |
14680 | + begin, PAGE_ALIGN(end)); | |
14681 | + set_memory_np(begin, (end - begin) >> PAGE_SHIFT); | |
14682 | +#else | |
14683 | printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); | |
14684 | - for (addr = begin; addr < end; addr += PAGE_SIZE) { | |
cc90b958 | 14685 | + |
00e5a55c BS |
14686 | + for (; addr < end; addr += PAGE_SIZE) { |
14687 | ClearPageReserved(virt_to_page(addr)); | |
14688 | init_page_count(virt_to_page(addr)); | |
14689 | memset((void *)(addr & ~(PAGE_SIZE-1)), | |
14690 | POISON_FREE_INITMEM, PAGE_SIZE); | |
14691 | if (addr >= __START_KERNEL_map) { | |
14692 | /* make_readonly() reports all kernel addresses. */ | |
14693 | - __make_page_writable(__va(__pa(addr))); | |
14694 | - change_page_attr_addr(addr, 1, __pgprot(0)); | |
14695 | + if (HYPERVISOR_update_va_mapping((unsigned long)__va(__pa(addr)), | |
14696 | + pfn_pte(__pa(addr) >> PAGE_SHIFT, | |
14697 | + PAGE_KERNEL), | |
14698 | + 0)) | |
14699 | + BUG(); | |
14700 | + if (HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) | |
14701 | + BUG(); | |
14702 | } | |
14703 | free_page(addr); | |
14704 | totalram_pages++; | |
14705 | } | |
14706 | - if (addr > __START_KERNEL_map) | |
14707 | - global_flush_tlb(); | |
cc90b958 | 14708 | +#endif |
cc90b958 BS |
14709 | } |
14710 | ||
00e5a55c BS |
14711 | void free_initmem(void) |
14712 | @@ -1026,6 +974,8 @@ void free_initmem(void) | |
cc90b958 | 14713 | } |
cc90b958 | 14714 | |
00e5a55c BS |
14715 | #ifdef CONFIG_DEBUG_RODATA |
14716 | +const int rodata_test_data = 0xC3; | |
14717 | +EXPORT_SYMBOL_GPL(rodata_test_data); | |
14718 | ||
14719 | void mark_rodata_ro(void) | |
cc90b958 | 14720 | { |
00e5a55c BS |
14721 | @@ -1047,18 +997,27 @@ void mark_rodata_ro(void) |
14722 | if (end <= start) | |
14723 | return; | |
cc90b958 | 14724 | |
00e5a55c | 14725 | - change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO); |
cc90b958 | 14726 | |
00e5a55c BS |
14727 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", |
14728 | (end - start) >> 10); | |
14729 | + set_memory_ro(start, (end - start) >> PAGE_SHIFT); | |
cc90b958 | 14730 | |
00e5a55c BS |
14731 | /* |
14732 | - * change_page_attr_addr() requires a global_flush_tlb() call after it. | |
14733 | - * We do this after the printk so that if something went wrong in the | |
14734 | - * change, the printk gets out at least to give a better debug hint | |
14735 | - * of who is the culprit. | |
14736 | + * The rodata section (but not the kernel text!) should also be | |
14737 | + * not-executable. | |
14738 | */ | |
14739 | - global_flush_tlb(); | |
14740 | + start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; | |
14741 | + set_memory_nx(start, (end - start) >> PAGE_SHIFT); | |
14742 | + | |
14743 | + rodata_test(); | |
14744 | + | |
14745 | +#ifdef CONFIG_CPA_DEBUG | |
14746 | + printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end); | |
14747 | + set_memory_rw(start, (end-start) >> PAGE_SHIFT); | |
14748 | + | |
14749 | + printk(KERN_INFO "Testing CPA: again\n"); | |
14750 | + set_memory_ro(start, (end-start) >> PAGE_SHIFT); | |
14751 | +#endif | |
cc90b958 | 14752 | } |
00e5a55c | 14753 | #endif |
cc90b958 | 14754 | |
00e5a55c BS |
14755 | @@ -1069,17 +1028,21 @@ void free_initrd_mem(unsigned long start |
14756 | } | |
14757 | #endif | |
cc90b958 | 14758 | |
00e5a55c BS |
14759 | -void __init reserve_bootmem_generic(unsigned long phys, unsigned len) |
14760 | -{ | |
14761 | +void __init reserve_bootmem_generic(unsigned long phys, unsigned len) | |
14762 | +{ | |
14763 | #ifdef CONFIG_NUMA | |
14764 | int nid = phys_to_nid(phys); | |
14765 | #endif | |
14766 | unsigned long pfn = phys >> PAGE_SHIFT; | |
14767 | + | |
14768 | if (pfn >= end_pfn) { | |
14769 | - /* This can happen with kdump kernels when accessing firmware | |
14770 | - tables. */ | |
14771 | + /* | |
14772 | + * This can happen with kdump kernels when accessing | |
14773 | + * firmware tables: | |
14774 | + */ | |
14775 | if (pfn < end_pfn_map) | |
14776 | return; | |
14777 | + | |
14778 | printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n", | |
14779 | phys, len); | |
14780 | return; | |
14781 | @@ -1087,9 +1050,9 @@ void __init reserve_bootmem_generic(unsi | |
cc90b958 | 14782 | |
00e5a55c BS |
14783 | /* Should check here against the e820 map to avoid double free */ |
14784 | #ifdef CONFIG_NUMA | |
14785 | - reserve_bootmem_node(NODE_DATA(nid), phys, len); | |
14786 | -#else | |
14787 | - reserve_bootmem(phys, len); | |
14788 | + reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); | |
14789 | +#else | |
14790 | + reserve_bootmem(phys, len, BOOTMEM_DEFAULT); | |
14791 | #endif | |
14792 | #ifndef CONFIG_XEN | |
14793 | if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { | |
14794 | @@ -1099,46 +1062,49 @@ void __init reserve_bootmem_generic(unsi | |
14795 | #endif | |
14796 | } | |
cc90b958 | 14797 | |
00e5a55c BS |
14798 | -int kern_addr_valid(unsigned long addr) |
14799 | -{ | |
14800 | +int kern_addr_valid(unsigned long addr) | |
14801 | +{ | |
14802 | unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT; | |
14803 | - pgd_t *pgd; | |
14804 | - pud_t *pud; | |
14805 | - pmd_t *pmd; | |
14806 | - pte_t *pte; | |
14807 | + pgd_t *pgd; | |
14808 | + pud_t *pud; | |
14809 | + pmd_t *pmd; | |
14810 | + pte_t *pte; | |
cc90b958 | 14811 | |
00e5a55c BS |
14812 | if (above != 0 && above != -1UL) |
14813 | - return 0; | |
14814 | - | |
14815 | + return 0; | |
14816 | + | |
14817 | pgd = pgd_offset_k(addr); | |
14818 | if (pgd_none(*pgd)) | |
14819 | return 0; | |
cc90b958 | 14820 | |
00e5a55c BS |
14821 | pud = pud_offset(pgd, addr); |
14822 | if (pud_none(*pud)) | |
14823 | - return 0; | |
14824 | + return 0; | |
cc90b958 | 14825 | |
00e5a55c BS |
14826 | pmd = pmd_offset(pud, addr); |
14827 | if (pmd_none(*pmd)) | |
14828 | return 0; | |
14829 | + | |
14830 | if (pmd_large(*pmd)) | |
14831 | return pfn_valid(pmd_pfn(*pmd)); | |
cc90b958 | 14832 | |
00e5a55c BS |
14833 | pte = pte_offset_kernel(pmd, addr); |
14834 | if (pte_none(*pte)) | |
14835 | return 0; | |
14836 | + | |
14837 | return pfn_valid(pte_pfn(*pte)); | |
14838 | } | |
cc90b958 | 14839 | |
00e5a55c BS |
14840 | -/* A pseudo VMA to allow ptrace access for the vsyscall page. This only |
14841 | - covers the 64bit vsyscall page now. 32bit has a real VMA now and does | |
14842 | - not need special handling anymore. */ | |
14843 | - | |
14844 | +/* | |
14845 | + * A pseudo VMA to allow ptrace access for the vsyscall page. This only | |
14846 | + * covers the 64bit vsyscall page now. 32bit has a real VMA now and does | |
14847 | + * not need special handling anymore: | |
14848 | + */ | |
14849 | static struct vm_area_struct gate_vma = { | |
14850 | - .vm_start = VSYSCALL_START, | |
14851 | - .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT), | |
14852 | - .vm_page_prot = PAGE_READONLY_EXEC, | |
14853 | - .vm_flags = VM_READ | VM_EXEC | |
14854 | + .vm_start = VSYSCALL_START, | |
14855 | + .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES * PAGE_SIZE), | |
14856 | + .vm_page_prot = PAGE_READONLY_EXEC, | |
14857 | + .vm_flags = VM_READ | VM_EXEC | |
14858 | }; | |
14859 | ||
14860 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) | |
14861 | @@ -1153,14 +1119,17 @@ struct vm_area_struct *get_gate_vma(stru | |
14862 | int in_gate_area(struct task_struct *task, unsigned long addr) | |
cc90b958 | 14863 | { |
00e5a55c BS |
14864 | struct vm_area_struct *vma = get_gate_vma(task); |
14865 | + | |
14866 | if (!vma) | |
14867 | return 0; | |
14868 | + | |
14869 | return (addr >= vma->vm_start) && (addr < vma->vm_end); | |
cc90b958 | 14870 | } |
cc90b958 | 14871 | |
00e5a55c BS |
14872 | -/* Use this when you have no reliable task/vma, typically from interrupt |
14873 | - * context. It is less reliable than using the task's vma and may give | |
14874 | - * false positives. | |
14875 | +/* | |
14876 | + * Use this when you have no reliable task/vma, typically from interrupt | |
14877 | + * context. It is less reliable than using the task's vma and may give | |
14878 | + * false positives: | |
14879 | */ | |
14880 | int in_gate_area_no_task(unsigned long addr) | |
14881 | { | |
14882 | @@ -1180,8 +1149,8 @@ const char *arch_vma_name(struct vm_area | |
14883 | /* | |
14884 | * Initialise the sparsemem vmemmap using huge-pages at the PMD level. | |
14885 | */ | |
14886 | -int __meminit vmemmap_populate(struct page *start_page, | |
14887 | - unsigned long size, int node) | |
14888 | +int __meminit | |
14889 | +vmemmap_populate(struct page *start_page, unsigned long size, int node) | |
14890 | { | |
14891 | unsigned long addr = (unsigned long)start_page; | |
14892 | unsigned long end = (unsigned long)(start_page + size); | |
14893 | @@ -1196,6 +1165,7 @@ int __meminit vmemmap_populate(struct pa | |
14894 | pgd = vmemmap_pgd_populate(addr, node); | |
14895 | if (!pgd) | |
14896 | return -ENOMEM; | |
14897 | + | |
14898 | pud = vmemmap_pud_populate(pgd, addr, node); | |
14899 | if (!pud) | |
14900 | return -ENOMEM; | |
14901 | @@ -1203,20 +1173,22 @@ int __meminit vmemmap_populate(struct pa | |
14902 | pmd = pmd_offset(pud, addr); | |
14903 | if (pmd_none(*pmd)) { | |
14904 | pte_t entry; | |
14905 | - void *p = vmemmap_alloc_block(PMD_SIZE, node); | |
14906 | + void *p; | |
14907 | + | |
14908 | + p = vmemmap_alloc_block(PMD_SIZE, node); | |
14909 | if (!p) | |
14910 | return -ENOMEM; | |
cc90b958 | 14911 | |
00e5a55c BS |
14912 | - entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); |
14913 | - mk_pte_huge(entry); | |
14914 | - set_pmd(pmd, __pmd(pte_val(entry))); | |
14915 | + entry = pfn_pte(__pa(p) >> PAGE_SHIFT, | |
14916 | + PAGE_KERNEL_LARGE); | |
14917 | + set_pmd(pmd, __pmd_ma(__pte_val(entry))); | |
cc90b958 | 14918 | |
00e5a55c BS |
14919 | printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n", |
14920 | addr, addr + PMD_SIZE - 1, p, node); | |
14921 | - } else | |
14922 | + } else { | |
14923 | vmemmap_verify((pte_t *)pmd, node, addr, next); | |
14924 | + } | |
14925 | } | |
14926 | - | |
14927 | return 0; | |
cc90b958 | 14928 | } |
00e5a55c BS |
14929 | #endif |
14930 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
14931 | +++ sle11-2009-05-14/arch/x86/mm/ioremap-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
14932 | @@ -0,0 +1,687 @@ | |
14933 | +/* | |
14934 | + * Re-map IO memory to kernel address space so that we can access it. | |
14935 | + * This is needed for high PCI addresses that aren't mapped in the | |
14936 | + * 640k-1MB IO memory area on PC's | |
14937 | + * | |
14938 | + * (C) Copyright 1995 1996 Linus Torvalds | |
14939 | + */ | |
14940 | + | |
14941 | +#include <linux/bootmem.h> | |
14942 | +#include <linux/init.h> | |
14943 | +#include <linux/io.h> | |
14944 | +#include <linux/module.h> | |
14945 | +#include <linux/pfn.h> | |
14946 | +#include <linux/slab.h> | |
14947 | +#include <linux/vmalloc.h> | |
14948 | + | |
14949 | +#include <asm/cacheflush.h> | |
14950 | +#include <asm/e820.h> | |
14951 | +#include <asm/fixmap.h> | |
14952 | +#include <asm/pgtable.h> | |
14953 | +#include <asm/tlbflush.h> | |
14954 | +#include <asm/pgalloc.h> | |
14955 | + | |
14956 | +enum ioremap_mode { | |
14957 | + IOR_MODE_UNCACHED, | |
14958 | + IOR_MODE_CACHED, | |
14959 | +}; | |
14960 | + | |
14961 | +#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN) | |
14962 | + | |
14963 | +unsigned long __phys_addr(unsigned long x) | |
cc90b958 | 14964 | +{ |
00e5a55c BS |
14965 | + if (x >= __START_KERNEL_map) |
14966 | + return x - __START_KERNEL_map + phys_base; | |
14967 | + return x - PAGE_OFFSET; | |
14968 | +} | |
14969 | +EXPORT_SYMBOL(__phys_addr); | |
14970 | + | |
14971 | +#endif | |
14972 | + | |
14973 | +static int direct_remap_area_pte_fn(pte_t *pte, | |
14974 | + struct page *pmd_page, | |
14975 | + unsigned long address, | |
14976 | + void *data) | |
14977 | +{ | |
14978 | + mmu_update_t **v = (mmu_update_t **)data; | |
14979 | + | |
14980 | + BUG_ON(!pte_none(*pte)); | |
14981 | + | |
14982 | + (*v)->ptr = ((u64)pfn_to_mfn(page_to_pfn(pmd_page)) << | |
14983 | + PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK); | |
14984 | + (*v)++; | |
14985 | + | |
14986 | + return 0; | |
14987 | +} | |
14988 | + | |
14989 | +static int __direct_remap_pfn_range(struct mm_struct *mm, | |
14990 | + unsigned long address, | |
14991 | + unsigned long mfn, | |
14992 | + unsigned long size, | |
14993 | + pgprot_t prot, | |
14994 | + domid_t domid) | |
14995 | +{ | |
14996 | + int rc; | |
14997 | + unsigned long i, start_address; | |
14998 | + mmu_update_t *u, *v, *w; | |
14999 | + | |
15000 | + u = v = w = (mmu_update_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); | |
15001 | + if (u == NULL) | |
15002 | + return -ENOMEM; | |
15003 | + | |
15004 | + start_address = address; | |
15005 | + | |
15006 | + flush_cache_all(); | |
15007 | + | |
15008 | + for (i = 0; i < size; i += PAGE_SIZE) { | |
15009 | + if ((v - u) == (PAGE_SIZE / sizeof(mmu_update_t))) { | |
15010 | + /* Flush a full batch after filling in the PTE ptrs. */ | |
15011 | + rc = apply_to_page_range(mm, start_address, | |
15012 | + address - start_address, | |
15013 | + direct_remap_area_pte_fn, &w); | |
15014 | + if (rc) | |
15015 | + goto out; | |
15016 | + rc = -EFAULT; | |
15017 | + if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0) | |
15018 | + goto out; | |
15019 | + v = w = u; | |
15020 | + start_address = address; | |
15021 | + } | |
15022 | + | |
15023 | + /* | |
15024 | + * Fill in the machine address: PTE ptr is done later by | |
15025 | + * apply_to_page_range(). | |
15026 | + */ | |
15027 | + v->val = __pte_val(pfn_pte_ma(mfn, prot)) | _PAGE_IO; | |
15028 | + | |
15029 | + mfn++; | |
15030 | + address += PAGE_SIZE; | |
15031 | + v++; | |
15032 | + } | |
15033 | + | |
15034 | + if (v != u) { | |
15035 | + /* Final batch. */ | |
15036 | + rc = apply_to_page_range(mm, start_address, | |
15037 | + address - start_address, | |
15038 | + direct_remap_area_pte_fn, &w); | |
15039 | + if (rc) | |
15040 | + goto out; | |
15041 | + rc = -EFAULT; | |
15042 | + if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)) | |
15043 | + goto out; | |
15044 | + } | |
15045 | + | |
15046 | + rc = 0; | |
15047 | + | |
15048 | + out: | |
15049 | + flush_tlb_all(); | |
15050 | + | |
15051 | + free_page((unsigned long)u); | |
15052 | + | |
15053 | + return rc; | |
15054 | +} | |
15055 | + | |
15056 | +int direct_remap_pfn_range(struct vm_area_struct *vma, | |
15057 | + unsigned long address, | |
15058 | + unsigned long mfn, | |
15059 | + unsigned long size, | |
15060 | + pgprot_t prot, | |
15061 | + domid_t domid) | |
15062 | +{ | |
15063 | + if (xen_feature(XENFEAT_auto_translated_physmap)) | |
15064 | + return remap_pfn_range(vma, address, mfn, size, prot); | |
15065 | + | |
15066 | + if (domid == DOMID_SELF) | |
15067 | + return -EINVAL; | |
cc90b958 | 15068 | + |
00e5a55c BS |
15069 | + vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; |
15070 | + | |
15071 | + vma->vm_mm->context.has_foreign_mappings = 1; | |
15072 | + | |
15073 | + return __direct_remap_pfn_range( | |
15074 | + vma->vm_mm, address, mfn, size, prot, domid); | |
cc90b958 | 15075 | +} |
00e5a55c | 15076 | +EXPORT_SYMBOL(direct_remap_pfn_range); |
cc90b958 | 15077 | + |
00e5a55c BS |
15078 | +int direct_kernel_remap_pfn_range(unsigned long address, |
15079 | + unsigned long mfn, | |
15080 | + unsigned long size, | |
15081 | + pgprot_t prot, | |
15082 | + domid_t domid) | |
cc90b958 | 15083 | +{ |
00e5a55c BS |
15084 | + return __direct_remap_pfn_range( |
15085 | + &init_mm, address, mfn, size, prot, domid); | |
15086 | +} | |
15087 | +EXPORT_SYMBOL(direct_kernel_remap_pfn_range); | |
cc90b958 | 15088 | + |
00e5a55c BS |
15089 | +static int lookup_pte_fn( |
15090 | + pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) | |
15091 | +{ | |
15092 | + uint64_t *ptep = (uint64_t *)data; | |
15093 | + if (ptep) | |
15094 | + *ptep = ((uint64_t)pfn_to_mfn(page_to_pfn(pmd_page)) << | |
15095 | + PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK); | |
15096 | + return 0; | |
cc90b958 BS |
15097 | +} |
15098 | + | |
00e5a55c BS |
15099 | +int create_lookup_pte_addr(struct mm_struct *mm, |
15100 | + unsigned long address, | |
15101 | + uint64_t *ptep) | |
cc90b958 | 15102 | +{ |
00e5a55c BS |
15103 | + return apply_to_page_range(mm, address, PAGE_SIZE, |
15104 | + lookup_pte_fn, ptep); | |
15105 | +} | |
cc90b958 | 15106 | + |
00e5a55c BS |
15107 | +EXPORT_SYMBOL(create_lookup_pte_addr); |
15108 | + | |
15109 | +static int noop_fn( | |
15110 | + pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) | |
15111 | +{ | |
15112 | + return 0; | |
cc90b958 BS |
15113 | +} |
15114 | + | |
00e5a55c BS |
15115 | +int touch_pte_range(struct mm_struct *mm, |
15116 | + unsigned long address, | |
15117 | + unsigned long size) | |
cc90b958 | 15118 | +{ |
00e5a55c BS |
15119 | + return apply_to_page_range(mm, address, size, noop_fn, NULL); |
15120 | +} | |
cc90b958 | 15121 | + |
00e5a55c BS |
15122 | +EXPORT_SYMBOL(touch_pte_range); |
15123 | + | |
15124 | +#ifdef CONFIG_X86_32 | |
15125 | +int page_is_ram(unsigned long pagenr) | |
15126 | +{ | |
15127 | + unsigned long addr, end; | |
15128 | + int i; | |
15129 | + | |
15130 | +#ifndef CONFIG_XEN | |
15131 | + /* | |
15132 | + * A special case is the first 4Kb of memory; | |
15133 | + * This is a BIOS owned area, not kernel ram, but generally | |
15134 | + * not listed as such in the E820 table. | |
15135 | + */ | |
15136 | + if (pagenr == 0) | |
15137 | + return 0; | |
15138 | + | |
15139 | + /* | |
15140 | + * Second special case: Some BIOSen report the PC BIOS | |
15141 | + * area (640->1Mb) as ram even though it is not. | |
15142 | + */ | |
15143 | + if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) && | |
15144 | + pagenr < (BIOS_END >> PAGE_SHIFT)) | |
15145 | + return 0; | |
15146 | +#endif | |
15147 | + | |
15148 | + for (i = 0; i < e820.nr_map; i++) { | |
15149 | + /* | |
15150 | + * Not usable memory: | |
15151 | + */ | |
15152 | + if (e820.map[i].type != E820_RAM) | |
15153 | + continue; | |
15154 | + addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT; | |
15155 | + end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT; | |
15156 | + | |
15157 | + | |
15158 | + if ((pagenr >= addr) && (pagenr < end)) | |
15159 | + return 1; | |
15160 | + } | |
15161 | + return 0; | |
cc90b958 | 15162 | +} |
00e5a55c | 15163 | +#endif |
cc90b958 | 15164 | + |
00e5a55c BS |
15165 | +/* |
15166 | + * Fix up the linear direct mapping of the kernel to avoid cache attribute | |
15167 | + * conflicts. | |
15168 | + */ | |
15169 | +static int ioremap_change_attr(unsigned long vaddr, unsigned long size, | |
15170 | + enum ioremap_mode mode) | |
15171 | +{ | |
15172 | + unsigned long nrpages = size >> PAGE_SHIFT; | |
15173 | + int err; | |
15174 | + | |
15175 | + switch (mode) { | |
15176 | + case IOR_MODE_UNCACHED: | |
15177 | + default: | |
15178 | + err = set_memory_uc(vaddr, nrpages); | |
15179 | + break; | |
15180 | + case IOR_MODE_CACHED: | |
15181 | + err = set_memory_wb(vaddr, nrpages); | |
15182 | + break; | |
15183 | + } | |
15184 | + | |
15185 | + return err; | |
15186 | +} | |
cc90b958 | 15187 | + |
cc90b958 | 15188 | +/* |
00e5a55c BS |
15189 | + * Remap an arbitrary physical address space into the kernel virtual |
15190 | + * address space. Needed when the kernel wants to access high addresses | |
15191 | + * directly. | |
cc90b958 | 15192 | + * |
00e5a55c BS |
15193 | + * NOTE! We need to allow non-page-aligned mappings too: we will obviously |
15194 | + * have to convert them into an offset in a page-aligned mapping, but the | |
15195 | + * caller shouldn't need to know that small detail. | |
cc90b958 | 15196 | + */ |
00e5a55c BS |
15197 | +static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, |
15198 | + enum ioremap_mode mode) | |
15199 | +{ | |
15200 | + unsigned long mfn, offset, last_addr, vaddr; | |
15201 | + struct vm_struct *area; | |
15202 | + pgprot_t prot; | |
15203 | + domid_t domid = DOMID_IO; | |
cc90b958 | 15204 | + |
00e5a55c BS |
15205 | + /* Don't allow wraparound or zero size */ |
15206 | + last_addr = phys_addr + size - 1; | |
15207 | + if (!size || last_addr < phys_addr) | |
15208 | + return NULL; | |
cc90b958 | 15209 | + |
00e5a55c BS |
15210 | + /* |
15211 | + * Don't remap the low PCI/ISA area, it's always mapped.. | |
15212 | + */ | |
15213 | + if (is_initial_xendomain() && last_addr < ISA_END_ADDRESS) | |
15214 | + return (__force void __iomem *)isa_bus_to_virt((unsigned long)phys_addr); | |
cc90b958 | 15215 | + |
00e5a55c BS |
15216 | + /* |
15217 | + * Don't allow anybody to remap normal RAM that we're using.. | |
15218 | + */ | |
15219 | + for (mfn = PFN_DOWN(phys_addr); mfn < PFN_UP(last_addr); mfn++) { | |
15220 | + unsigned long pfn = mfn_to_local_pfn(mfn); | |
15221 | + | |
15222 | + if (pfn >= max_pfn) | |
15223 | + continue; | |
15224 | + | |
15225 | + domid = DOMID_SELF; | |
15226 | + | |
15227 | + if (pfn >= max_pfn_mapped) /* bogus */ | |
15228 | + continue; | |
cc90b958 | 15229 | + |
00e5a55c BS |
15230 | + if (pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn))) |
15231 | + return NULL; | |
15232 | + } | |
cc90b958 | 15233 | + |
00e5a55c BS |
15234 | + switch (mode) { |
15235 | + case IOR_MODE_UNCACHED: | |
15236 | + default: | |
15237 | + /* | |
15238 | + * FIXME: we will use UC MINUS for now, as video fb drivers | |
15239 | + * depend on it. Upcoming ioremap_wc() will fix this behavior. | |
15240 | + */ | |
15241 | + prot = PAGE_KERNEL_UC_MINUS; | |
15242 | + break; | |
15243 | + case IOR_MODE_CACHED: | |
15244 | + prot = PAGE_KERNEL; | |
15245 | + break; | |
15246 | + } | |
cc90b958 | 15247 | + |
00e5a55c BS |
15248 | + /* |
15249 | + * Mappings have to be page-aligned | |
15250 | + */ | |
15251 | + offset = phys_addr & ~PAGE_MASK; | |
15252 | + phys_addr &= PAGE_MASK; | |
15253 | + size = PAGE_ALIGN(last_addr+1) - phys_addr; | |
cc90b958 | 15254 | + |
00e5a55c BS |
15255 | + /* |
15256 | + * Ok, go for it.. | |
15257 | + */ | |
15258 | + area = get_vm_area(size, VM_IOREMAP | (mode << 20)); | |
15259 | + if (!area) | |
15260 | + return NULL; | |
15261 | + area->phys_addr = phys_addr; | |
15262 | + vaddr = (unsigned long) area->addr; | |
15263 | + if (__direct_remap_pfn_range(&init_mm, vaddr, PFN_DOWN(phys_addr), | |
15264 | + size, prot, domid)) { | |
15265 | + free_vm_area(area); | |
15266 | + return NULL; | |
15267 | + } | |
cc90b958 | 15268 | + |
00e5a55c BS |
15269 | + if (ioremap_change_attr(vaddr, size, mode) < 0) { |
15270 | + iounmap((void __iomem *) vaddr); | |
15271 | + return NULL; | |
15272 | + } | |
15273 | + | |
15274 | + return (void __iomem *) (vaddr + offset); | |
cc90b958 BS |
15275 | +} |
15276 | + | |
00e5a55c BS |
15277 | +/** |
15278 | + * ioremap_nocache - map bus memory into CPU space | |
15279 | + * @offset: bus address of the memory | |
15280 | + * @size: size of the resource to map | |
15281 | + * | |
15282 | + * ioremap_nocache performs a platform specific sequence of operations to | |
15283 | + * make bus memory CPU accessible via the readb/readw/readl/writeb/ | |
15284 | + * writew/writel functions and the other mmio helpers. The returned | |
15285 | + * address is not guaranteed to be usable directly as a virtual | |
15286 | + * address. | |
15287 | + * | |
15288 | + * This version of ioremap ensures that the memory is marked uncachable | |
15289 | + * on the CPU as well as honouring existing caching rules from things like | |
15290 | + * the PCI bus. Note that there are other caches and buffers on many | |
15291 | + * busses. In particular driver authors should read up on PCI writes | |
15292 | + * | |
15293 | + * It's useful if some control registers are in such an area and | |
15294 | + * write combining or read caching is not desirable: | |
15295 | + * | |
15296 | + * Must be freed with iounmap. | |
15297 | + */ | |
15298 | +void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) | |
cc90b958 | 15299 | +{ |
00e5a55c | 15300 | + return __ioremap(phys_addr, size, IOR_MODE_UNCACHED); |
cc90b958 | 15301 | +} |
00e5a55c | 15302 | +EXPORT_SYMBOL(ioremap_nocache); |
cc90b958 | 15303 | + |
00e5a55c | 15304 | +void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) |
cc90b958 | 15305 | +{ |
00e5a55c BS |
15306 | + return __ioremap(phys_addr, size, IOR_MODE_CACHED); |
15307 | +} | |
15308 | +EXPORT_SYMBOL(ioremap_cache); | |
cc90b958 | 15309 | + |
00e5a55c BS |
15310 | +/** |
15311 | + * iounmap - Free a IO remapping | |
15312 | + * @addr: virtual address from ioremap_* | |
15313 | + * | |
15314 | + * Caller must ensure there is only one unmapping for the same pointer. | |
15315 | + */ | |
15316 | +void iounmap(volatile void __iomem *addr) | |
15317 | +{ | |
15318 | + struct vm_struct *p, *o; | |
cc90b958 | 15319 | + |
00e5a55c BS |
15320 | + if ((void __force *)addr <= high_memory) |
15321 | + return; | |
cc90b958 | 15322 | + |
00e5a55c BS |
15323 | + /* |
15324 | + * __ioremap special-cases the PCI/ISA range by not instantiating a | |
15325 | + * vm_area and by simply returning an address into the kernel mapping | |
15326 | + * of ISA space. So handle that here. | |
15327 | + */ | |
15328 | + if ((unsigned long)addr >= fix_to_virt(FIX_ISAMAP_BEGIN)) | |
15329 | + return; | |
cc90b958 | 15330 | + |
00e5a55c BS |
15331 | + addr = (volatile void __iomem *) |
15332 | + (PAGE_MASK & (unsigned long __force)addr); | |
cc90b958 | 15333 | + |
00e5a55c BS |
15334 | + /* Use the vm area unlocked, assuming the caller |
15335 | + ensures there isn't another iounmap for the same address | |
15336 | + in parallel. Reuse of the virtual address is prevented by | |
15337 | + leaving it in the global lists until we're done with it. | |
15338 | + cpa takes care of the direct mappings. */ | |
15339 | + read_lock(&vmlist_lock); | |
15340 | + for (p = vmlist; p; p = p->next) { | |
15341 | + if (p->addr == addr) | |
cc90b958 | 15342 | + break; |
00e5a55c BS |
15343 | + } |
15344 | + read_unlock(&vmlist_lock); | |
cc90b958 | 15345 | + |
00e5a55c BS |
15346 | + if (!p) { |
15347 | + printk(KERN_ERR "iounmap: bad address %p\n", addr); | |
15348 | + dump_stack(); | |
15349 | + return; | |
15350 | + } | |
cc90b958 | 15351 | + |
00e5a55c BS |
15352 | + if ((p->flags >> 20) != IOR_MODE_CACHED) { |
15353 | + unsigned long n = get_vm_area_size(p) >> PAGE_SHIFT; | |
15354 | + unsigned long mfn = p->phys_addr; | |
15355 | + unsigned long va = (unsigned long)addr; | |
cc90b958 | 15356 | + |
00e5a55c BS |
15357 | + for (; n > 0; n--, mfn++, va += PAGE_SIZE) |
15358 | + if (mfn_to_local_pfn(mfn) < max_pfn) | |
15359 | + set_memory_wb(va, 1); | |
15360 | + } | |
cc90b958 | 15361 | + |
00e5a55c BS |
15362 | + /* Finally remove it */ |
15363 | + o = remove_vm_area((void *)addr); | |
15364 | + BUG_ON(p != o || o == NULL); | |
15365 | + kfree(p); | |
cc90b958 | 15366 | +} |
00e5a55c | 15367 | +EXPORT_SYMBOL(iounmap); |
cc90b958 | 15368 | + |
00e5a55c BS |
15369 | +int __initdata early_ioremap_debug; |
15370 | + | |
15371 | +static int __init early_ioremap_debug_setup(char *str) | |
cc90b958 | 15372 | +{ |
00e5a55c BS |
15373 | + early_ioremap_debug = 1; |
15374 | + | |
15375 | + return 0; | |
cc90b958 | 15376 | +} |
00e5a55c | 15377 | +early_param("early_ioremap_debug", early_ioremap_debug_setup); |
cc90b958 | 15378 | + |
00e5a55c BS |
15379 | +static __initdata int after_paging_init; |
15380 | +static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] | |
15381 | + __attribute__((aligned(PAGE_SIZE))); | |
15382 | + | |
15383 | +#ifdef CONFIG_X86_32 | |
15384 | +static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) | |
cc90b958 | 15385 | +{ |
00e5a55c BS |
15386 | + /* Don't assume we're using swapper_pg_dir at this point */ |
15387 | + pgd_t *base = __va(read_cr3()); | |
15388 | + pgd_t *pgd = &base[pgd_index(addr)]; | |
15389 | + pud_t *pud = pud_offset(pgd, addr); | |
15390 | + pmd_t *pmd = pmd_offset(pud, addr); | |
15391 | + | |
15392 | + return pmd; | |
cc90b958 | 15393 | +} |
00e5a55c BS |
15394 | +#else |
15395 | +#define early_ioremap_pmd early_get_pmd | |
15396 | +#define make_lowmem_page_readonly early_make_page_readonly | |
15397 | +#define make_lowmem_page_writable make_page_writable | |
15398 | +#endif | |
cc90b958 | 15399 | + |
00e5a55c | 15400 | +static inline pte_t * __init early_ioremap_pte(unsigned long addr) |
cc90b958 | 15401 | +{ |
00e5a55c BS |
15402 | + return &bm_pte[pte_index(addr)]; |
15403 | +} | |
cc90b958 | 15404 | + |
00e5a55c BS |
15405 | +void __init early_ioremap_init(void) |
15406 | +{ | |
15407 | + pmd_t *pmd; | |
15408 | + | |
15409 | + if (early_ioremap_debug) | |
15410 | + printk(KERN_INFO "early_ioremap_init()\n"); | |
15411 | + | |
15412 | + pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); | |
15413 | + memset(bm_pte, 0, sizeof(bm_pte)); | |
15414 | + make_lowmem_page_readonly(bm_pte, XENFEAT_writable_page_tables); | |
15415 | + pmd_populate_kernel(&init_mm, pmd, bm_pte); | |
15416 | + | |
15417 | + /* | |
15418 | + * The boot-ioremap range spans multiple pmds, for which | |
15419 | + * we are not prepared: | |
15420 | + */ | |
15421 | + if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { | |
15422 | + WARN_ON(1); | |
15423 | + printk(KERN_WARNING "pmd %p != %p\n", | |
15424 | + pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))); | |
15425 | + printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", | |
15426 | + fix_to_virt(FIX_BTMAP_BEGIN)); | |
15427 | + printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END): %08lx\n", | |
15428 | + fix_to_virt(FIX_BTMAP_END)); | |
cc90b958 | 15429 | + |
00e5a55c BS |
15430 | + printk(KERN_WARNING "FIX_BTMAP_END: %d\n", FIX_BTMAP_END); |
15431 | + printk(KERN_WARNING "FIX_BTMAP_BEGIN: %d\n", | |
15432 | + FIX_BTMAP_BEGIN); | |
15433 | + } | |
15434 | +} | |
cc90b958 | 15435 | + |
00e5a55c BS |
15436 | +#ifdef CONFIG_X86_32 |
15437 | +void __init early_ioremap_clear(void) | |
15438 | +{ | |
15439 | + pmd_t *pmd; | |
cc90b958 | 15440 | + |
00e5a55c BS |
15441 | + if (early_ioremap_debug) |
15442 | + printk(KERN_INFO "early_ioremap_clear()\n"); | |
cc90b958 | 15443 | + |
00e5a55c BS |
15444 | + pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); |
15445 | + pmd_clear(pmd); | |
15446 | + make_lowmem_page_writable(bm_pte, XENFEAT_writable_page_tables); | |
15447 | + /* paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT); */ | |
15448 | + __flush_tlb_all(); | |
15449 | +} | |
cc90b958 | 15450 | + |
00e5a55c BS |
15451 | +void __init early_ioremap_reset(void) |
15452 | +{ | |
15453 | + enum fixed_addresses idx; | |
15454 | + unsigned long addr, phys; | |
15455 | + pte_t *pte; | |
15456 | + | |
15457 | + after_paging_init = 1; | |
15458 | + for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) { | |
15459 | + addr = fix_to_virt(idx); | |
15460 | + pte = early_ioremap_pte(addr); | |
15461 | + if (pte_present(*pte)) { | |
15462 | + phys = __pte_val(*pte) & PAGE_MASK; | |
15463 | + set_fixmap(idx, phys); | |
cc90b958 BS |
15464 | + } |
15465 | + } | |
15466 | +} | |
00e5a55c BS |
15467 | +#endif /* CONFIG_X86_32 */ |
15468 | + | |
15469 | +static void __init __early_set_fixmap(enum fixed_addresses idx, | |
15470 | + unsigned long phys, pgprot_t flags) | |
15471 | +{ | |
15472 | + unsigned long addr = __fix_to_virt(idx); | |
15473 | + pte_t *pte; | |
15474 | + | |
15475 | + if (idx >= __end_of_fixed_addresses) { | |
15476 | + BUG(); | |
15477 | + return; | |
15478 | + } | |
15479 | + pte = early_ioremap_pte(addr); | |
15480 | + if (pgprot_val(flags)) | |
15481 | + set_pte(pte, pfn_pte_ma(phys >> PAGE_SHIFT, flags)); | |
15482 | + else | |
15483 | + pte_clear(NULL, addr, pte); | |
15484 | + __flush_tlb_one(addr); | |
15485 | +} | |
15486 | + | |
15487 | +static inline void __init early_set_fixmap(enum fixed_addresses idx, | |
15488 | + unsigned long phys) | |
15489 | +{ | |
15490 | + if (after_paging_init) | |
15491 | + set_fixmap(idx, phys); | |
15492 | + else | |
15493 | + __early_set_fixmap(idx, phys, PAGE_KERNEL); | |
15494 | +} | |
15495 | + | |
15496 | +static inline void __init early_clear_fixmap(enum fixed_addresses idx) | |
15497 | +{ | |
15498 | + if (after_paging_init) | |
15499 | + clear_fixmap(idx); | |
15500 | + else | |
15501 | + __early_set_fixmap(idx, 0, __pgprot(0)); | |
15502 | +} | |
15503 | + | |
15504 | + | |
15505 | +int __initdata early_ioremap_nested; | |
15506 | + | |
15507 | +static int __init check_early_ioremap_leak(void) | |
15508 | +{ | |
15509 | + if (!early_ioremap_nested) | |
15510 | + return 0; | |
15511 | + | |
15512 | + printk(KERN_WARNING | |
15513 | + "Debug warning: early ioremap leak of %d areas detected.\n", | |
15514 | + early_ioremap_nested); | |
15515 | + printk(KERN_WARNING | |
15516 | + "please boot with early_ioremap_debug and report the dmesg.\n"); | |
15517 | + WARN_ON(1); | |
15518 | + | |
15519 | + return 1; | |
15520 | +} | |
15521 | +late_initcall(check_early_ioremap_leak); | |
15522 | + | |
15523 | +void __init *early_ioremap(unsigned long phys_addr, unsigned long size) | |
15524 | +{ | |
15525 | + unsigned long offset, last_addr; | |
15526 | + unsigned int nrpages, nesting; | |
15527 | + enum fixed_addresses idx0, idx; | |
15528 | + | |
15529 | + WARN_ON(system_state != SYSTEM_BOOTING); | |
15530 | + | |
15531 | + nesting = early_ioremap_nested; | |
15532 | + if (early_ioremap_debug) { | |
15533 | + printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ", | |
15534 | + phys_addr, size, nesting); | |
15535 | + dump_stack(); | |
15536 | + } | |
15537 | + | |
15538 | + /* Don't allow wraparound or zero size */ | |
15539 | + last_addr = phys_addr + size - 1; | |
15540 | + if (!size || last_addr < phys_addr) { | |
15541 | + WARN_ON(1); | |
15542 | + return NULL; | |
15543 | + } | |
15544 | + | |
15545 | + if (nesting >= FIX_BTMAPS_NESTING) { | |
15546 | + WARN_ON(1); | |
15547 | + return NULL; | |
15548 | + } | |
15549 | + early_ioremap_nested++; | |
15550 | + /* | |
15551 | + * Mappings have to be page-aligned | |
15552 | + */ | |
15553 | + offset = phys_addr & ~PAGE_MASK; | |
15554 | + phys_addr &= PAGE_MASK; | |
15555 | + size = PAGE_ALIGN(last_addr) - phys_addr; | |
15556 | + | |
15557 | + /* | |
15558 | + * Mappings have to fit in the FIX_BTMAP area. | |
15559 | + */ | |
15560 | + nrpages = size >> PAGE_SHIFT; | |
15561 | + if (nrpages > NR_FIX_BTMAPS) { | |
15562 | + WARN_ON(1); | |
15563 | + return NULL; | |
15564 | + } | |
15565 | + | |
15566 | + /* | |
15567 | + * Ok, go for it.. | |
15568 | + */ | |
15569 | + idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting; | |
15570 | + idx = idx0; | |
15571 | + while (nrpages > 0) { | |
15572 | + early_set_fixmap(idx, phys_addr); | |
15573 | + phys_addr += PAGE_SIZE; | |
15574 | + --idx; | |
15575 | + --nrpages; | |
15576 | + } | |
15577 | + if (early_ioremap_debug) | |
15578 | + printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); | |
15579 | + | |
15580 | + return (void *) (offset + fix_to_virt(idx0)); | |
15581 | +} | |
15582 | + | |
15583 | +void __init early_iounmap(void *addr, unsigned long size) | |
15584 | +{ | |
15585 | + unsigned long virt_addr; | |
15586 | + unsigned long offset; | |
15587 | + unsigned int nrpages; | |
15588 | + enum fixed_addresses idx; | |
15589 | + unsigned int nesting; | |
15590 | + | |
15591 | + nesting = --early_ioremap_nested; | |
15592 | + WARN_ON(nesting < 0); | |
15593 | + | |
15594 | + if (early_ioremap_debug) { | |
15595 | + printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr, | |
15596 | + size, nesting); | |
15597 | + dump_stack(); | |
15598 | + } | |
15599 | + | |
15600 | + virt_addr = (unsigned long)addr; | |
15601 | + if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) { | |
15602 | + WARN_ON(1); | |
15603 | + return; | |
15604 | + } | |
15605 | + offset = virt_addr & ~PAGE_MASK; | |
15606 | + nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT; | |
15607 | + | |
15608 | + idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting; | |
15609 | + while (nrpages > 0) { | |
15610 | + early_clear_fixmap(idx); | |
15611 | + --idx; | |
15612 | + --nrpages; | |
15613 | + } | |
15614 | +} | |
15615 | + | |
15616 | +void __this_fixmap_does_not_exist(void) | |
15617 | +{ | |
15618 | + WARN_ON(1); | |
15619 | +} | |
15620 | --- sle11-2009-05-14.orig/arch/x86/mm/ioremap_32-xen.c 2009-02-16 16:17:21.000000000 +0100 | |
15621 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
15622 | @@ -1,445 +0,0 @@ | |
cc90b958 | 15623 | -/* |
00e5a55c BS |
15624 | - * arch/i386/mm/ioremap.c |
15625 | - * | |
15626 | - * Re-map IO memory to kernel address space so that we can access it. | |
15627 | - * This is needed for high PCI addresses that aren't mapped in the | |
15628 | - * 640k-1MB IO memory area on PC's | |
15629 | - * | |
15630 | - * (C) Copyright 1995 1996 Linus Torvalds | |
cc90b958 | 15631 | - */ |
cc90b958 | 15632 | - |
00e5a55c BS |
15633 | -#include <linux/vmalloc.h> |
15634 | -#include <linux/init.h> | |
15635 | -#include <linux/slab.h> | |
15636 | -#include <linux/module.h> | |
15637 | -#include <linux/io.h> | |
15638 | -#include <linux/sched.h> | |
15639 | -#include <asm/fixmap.h> | |
15640 | -#include <asm/cacheflush.h> | |
15641 | -#include <asm/tlbflush.h> | |
15642 | -#include <asm/pgtable.h> | |
15643 | -#include <asm/pgalloc.h> | |
cc90b958 | 15644 | - |
00e5a55c BS |
15645 | -#define ISA_START_ADDRESS 0x0 |
15646 | -#define ISA_END_ADDRESS 0x100000 | |
cc90b958 | 15647 | - |
00e5a55c BS |
15648 | -static int direct_remap_area_pte_fn(pte_t *pte, |
15649 | - struct page *pmd_page, | |
15650 | - unsigned long address, | |
15651 | - void *data) | |
cc90b958 | 15652 | -{ |
00e5a55c | 15653 | - mmu_update_t **v = (mmu_update_t **)data; |
cc90b958 | 15654 | - |
00e5a55c | 15655 | - BUG_ON(!pte_none(*pte)); |
cc90b958 | 15656 | - |
00e5a55c BS |
15657 | - (*v)->ptr = ((u64)pfn_to_mfn(page_to_pfn(pmd_page)) << |
15658 | - PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK); | |
15659 | - (*v)++; | |
cc90b958 | 15660 | - |
00e5a55c | 15661 | - return 0; |
cc90b958 BS |
15662 | -} |
15663 | - | |
00e5a55c BS |
15664 | -static int __direct_remap_pfn_range(struct mm_struct *mm, |
15665 | - unsigned long address, | |
15666 | - unsigned long mfn, | |
15667 | - unsigned long size, | |
15668 | - pgprot_t prot, | |
15669 | - domid_t domid) | |
cc90b958 | 15670 | -{ |
00e5a55c BS |
15671 | - int rc; |
15672 | - unsigned long i, start_address; | |
15673 | - mmu_update_t *u, *v, *w; | |
cc90b958 | 15674 | - |
00e5a55c BS |
15675 | - u = v = w = (mmu_update_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); |
15676 | - if (u == NULL) | |
15677 | - return -ENOMEM; | |
cc90b958 | 15678 | - |
00e5a55c | 15679 | - start_address = address; |
cc90b958 | 15680 | - |
00e5a55c | 15681 | - flush_cache_all(); |
cc90b958 | 15682 | - |
00e5a55c BS |
15683 | - for (i = 0; i < size; i += PAGE_SIZE) { |
15684 | - if ((v - u) == (PAGE_SIZE / sizeof(mmu_update_t))) { | |
15685 | - /* Flush a full batch after filling in the PTE ptrs. */ | |
15686 | - rc = apply_to_page_range(mm, start_address, | |
15687 | - address - start_address, | |
15688 | - direct_remap_area_pte_fn, &w); | |
15689 | - if (rc) | |
15690 | - goto out; | |
15691 | - rc = -EFAULT; | |
15692 | - if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0) | |
15693 | - goto out; | |
15694 | - v = w = u; | |
15695 | - start_address = address; | |
15696 | - } | |
cc90b958 | 15697 | - |
00e5a55c BS |
15698 | - /* |
15699 | - * Fill in the machine address: PTE ptr is done later by | |
15700 | - * apply_to_page_range(). | |
15701 | - */ | |
15702 | - v->val = __pte_val(pfn_pte_ma(mfn, prot)) | _PAGE_IO; | |
cc90b958 | 15703 | - |
00e5a55c BS |
15704 | - mfn++; |
15705 | - address += PAGE_SIZE; | |
15706 | - v++; | |
15707 | - } | |
cc90b958 | 15708 | - |
00e5a55c BS |
15709 | - if (v != u) { |
15710 | - /* Final batch. */ | |
15711 | - rc = apply_to_page_range(mm, start_address, | |
15712 | - address - start_address, | |
15713 | - direct_remap_area_pte_fn, &w); | |
15714 | - if (rc) | |
15715 | - goto out; | |
15716 | - rc = -EFAULT; | |
15717 | - if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)) | |
15718 | - goto out; | |
cc90b958 | 15719 | - } |
cc90b958 | 15720 | - |
00e5a55c | 15721 | - rc = 0; |
cc90b958 | 15722 | - |
00e5a55c BS |
15723 | - out: |
15724 | - flush_tlb_all(); | |
cc90b958 | 15725 | - |
00e5a55c | 15726 | - free_page((unsigned long)u); |
cc90b958 | 15727 | - |
00e5a55c | 15728 | - return rc; |
cc90b958 BS |
15729 | -} |
15730 | - | |
00e5a55c BS |
15731 | -int direct_remap_pfn_range(struct vm_area_struct *vma, |
15732 | - unsigned long address, | |
15733 | - unsigned long mfn, | |
15734 | - unsigned long size, | |
15735 | - pgprot_t prot, | |
15736 | - domid_t domid) | |
cc90b958 | 15737 | -{ |
00e5a55c BS |
15738 | - if (xen_feature(XENFEAT_auto_translated_physmap)) |
15739 | - return remap_pfn_range(vma, address, mfn, size, prot); | |
cc90b958 | 15740 | - |
00e5a55c BS |
15741 | - if (domid == DOMID_SELF) |
15742 | - return -EINVAL; | |
cc90b958 | 15743 | - |
00e5a55c | 15744 | - vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; |
cc90b958 | 15745 | - |
00e5a55c | 15746 | - vma->vm_mm->context.has_foreign_mappings = 1; |
cc90b958 | 15747 | - |
00e5a55c BS |
15748 | - return __direct_remap_pfn_range( |
15749 | - vma->vm_mm, address, mfn, size, prot, domid); | |
cc90b958 | 15750 | -} |
00e5a55c | 15751 | -EXPORT_SYMBOL(direct_remap_pfn_range); |
cc90b958 | 15752 | - |
00e5a55c BS |
15753 | -int direct_kernel_remap_pfn_range(unsigned long address, |
15754 | - unsigned long mfn, | |
15755 | - unsigned long size, | |
15756 | - pgprot_t prot, | |
15757 | - domid_t domid) | |
cc90b958 | 15758 | -{ |
00e5a55c BS |
15759 | - return __direct_remap_pfn_range( |
15760 | - &init_mm, address, mfn, size, prot, domid); | |
cc90b958 | 15761 | -} |
00e5a55c | 15762 | -EXPORT_SYMBOL(direct_kernel_remap_pfn_range); |
cc90b958 | 15763 | - |
00e5a55c BS |
15764 | -static int lookup_pte_fn( |
15765 | - pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) | |
cc90b958 | 15766 | -{ |
00e5a55c BS |
15767 | - uint64_t *ptep = (uint64_t *)data; |
15768 | - if (ptep) | |
15769 | - *ptep = ((uint64_t)pfn_to_mfn(page_to_pfn(pmd_page)) << | |
15770 | - PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK); | |
15771 | - return 0; | |
cc90b958 | 15772 | -} |
cc90b958 | 15773 | - |
00e5a55c BS |
15774 | -int create_lookup_pte_addr(struct mm_struct *mm, |
15775 | - unsigned long address, | |
15776 | - uint64_t *ptep) | |
cc90b958 | 15777 | -{ |
00e5a55c BS |
15778 | - return apply_to_page_range(mm, address, PAGE_SIZE, |
15779 | - lookup_pte_fn, ptep); | |
cc90b958 | 15780 | -} |
cc90b958 | 15781 | - |
00e5a55c | 15782 | -EXPORT_SYMBOL(create_lookup_pte_addr); |
cc90b958 | 15783 | - |
00e5a55c BS |
15784 | -static int noop_fn( |
15785 | - pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) | |
cc90b958 | 15786 | -{ |
00e5a55c | 15787 | - return 0; |
cc90b958 BS |
15788 | -} |
15789 | - | |
00e5a55c BS |
15790 | -int touch_pte_range(struct mm_struct *mm, |
15791 | - unsigned long address, | |
15792 | - unsigned long size) | |
cc90b958 | 15793 | -{ |
00e5a55c BS |
15794 | - return apply_to_page_range(mm, address, size, noop_fn, NULL); |
15795 | -} | |
cc90b958 | 15796 | - |
00e5a55c | 15797 | -EXPORT_SYMBOL(touch_pte_range); |
cc90b958 | 15798 | - |
00e5a55c BS |
15799 | -/* |
15800 | - * Does @address reside within a non-highmem page that is local to this virtual | |
15801 | - * machine (i.e., not an I/O page, nor a memory page belonging to another VM). | |
15802 | - * See the comment that accompanies mfn_to_local_pfn() in page.h to understand | |
15803 | - * why this works. | |
15804 | - */ | |
15805 | -static inline int is_local_lowmem(unsigned long address) | |
cc90b958 | 15806 | -{ |
00e5a55c BS |
15807 | - extern unsigned long max_low_pfn; |
15808 | - return (mfn_to_local_pfn(address >> PAGE_SHIFT) < max_low_pfn); | |
cc90b958 BS |
15809 | -} |
15810 | - | |
cc90b958 | 15811 | -/* |
00e5a55c | 15812 | - * Generic mapping function (not visible outside): |
cc90b958 | 15813 | - */ |
cc90b958 | 15814 | - |
00e5a55c BS |
15815 | -/* |
15816 | - * Remap an arbitrary physical address space into the kernel virtual | |
15817 | - * address space. Needed when the kernel wants to access high addresses | |
15818 | - * directly. | |
15819 | - * | |
15820 | - * NOTE! We need to allow non-page-aligned mappings too: we will obviously | |
15821 | - * have to convert them into an offset in a page-aligned mapping, but the | |
15822 | - * caller shouldn't need to know that small detail. | |
15823 | - */ | |
15824 | -void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags) | |
15825 | -{ | |
15826 | - void __iomem * addr; | |
15827 | - struct vm_struct * area; | |
15828 | - unsigned long offset, last_addr; | |
15829 | - pgprot_t prot; | |
15830 | - domid_t domid = DOMID_IO; | |
cc90b958 | 15831 | - |
00e5a55c BS |
15832 | - /* Don't allow wraparound or zero size */ |
15833 | - last_addr = phys_addr + size - 1; | |
15834 | - if (!size || last_addr < phys_addr) | |
15835 | - return NULL; | |
cc90b958 | 15836 | - |
00e5a55c BS |
15837 | - /* |
15838 | - * Don't remap the low PCI/ISA area, it's always mapped.. | |
15839 | - */ | |
15840 | - if (is_initial_xendomain() && | |
15841 | - phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) | |
15842 | - return (void __iomem *) isa_bus_to_virt(phys_addr); | |
cc90b958 | 15843 | - |
00e5a55c BS |
15844 | - /* |
15845 | - * Don't allow anybody to remap normal RAM that we're using.. | |
15846 | - */ | |
15847 | - if (is_local_lowmem(phys_addr)) { | |
15848 | - char *t_addr, *t_end; | |
15849 | - struct page *page; | |
cc90b958 | 15850 | - |
00e5a55c BS |
15851 | - t_addr = bus_to_virt(phys_addr); |
15852 | - t_end = t_addr + (size - 1); | |
15853 | - | |
15854 | - for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++) | |
15855 | - if(!PageReserved(page)) | |
15856 | - return NULL; | |
cc90b958 | 15857 | - |
00e5a55c BS |
15858 | - domid = DOMID_SELF; |
15859 | - } | |
cc90b958 | 15860 | - |
00e5a55c | 15861 | - prot = __pgprot(_KERNPG_TABLE | flags); |
cc90b958 BS |
15862 | - |
15863 | - /* | |
00e5a55c | 15864 | - * Mappings have to be page-aligned |
cc90b958 | 15865 | - */ |
00e5a55c BS |
15866 | - offset = phys_addr & ~PAGE_MASK; |
15867 | - phys_addr &= PAGE_MASK; | |
15868 | - size = PAGE_ALIGN(last_addr+1) - phys_addr; | |
cc90b958 | 15869 | - |
00e5a55c BS |
15870 | - /* |
15871 | - * Ok, go for it.. | |
15872 | - */ | |
15873 | - area = get_vm_area(size, VM_IOREMAP | (flags << 20)); | |
15874 | - if (!area) | |
15875 | - return NULL; | |
15876 | - area->phys_addr = phys_addr; | |
15877 | - addr = (void __iomem *) area->addr; | |
15878 | - if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr, | |
15879 | - phys_addr>>PAGE_SHIFT, | |
15880 | - size, prot, domid)) { | |
15881 | - vunmap((void __force *) addr); | |
15882 | - return NULL; | |
15883 | - } | |
15884 | - return (void __iomem *) (offset + (char __iomem *)addr); | |
cc90b958 | 15885 | -} |
00e5a55c | 15886 | -EXPORT_SYMBOL(__ioremap); |
cc90b958 | 15887 | - |
00e5a55c BS |
15888 | -/** |
15889 | - * ioremap_nocache - map bus memory into CPU space | |
15890 | - * @offset: bus address of the memory | |
15891 | - * @size: size of the resource to map | |
15892 | - * | |
15893 | - * ioremap_nocache performs a platform specific sequence of operations to | |
15894 | - * make bus memory CPU accessible via the readb/readw/readl/writeb/ | |
15895 | - * writew/writel functions and the other mmio helpers. The returned | |
15896 | - * address is not guaranteed to be usable directly as a virtual | |
15897 | - * address. | |
15898 | - * | |
15899 | - * This version of ioremap ensures that the memory is marked uncachable | |
15900 | - * on the CPU as well as honouring existing caching rules from things like | |
15901 | - * the PCI bus. Note that there are other caches and buffers on many | |
15902 | - * busses. In particular driver authors should read up on PCI writes | |
15903 | - * | |
15904 | - * It's useful if some control registers are in such an area and | |
15905 | - * write combining or read caching is not desirable: | |
15906 | - * | |
15907 | - * Must be freed with iounmap. | |
cc90b958 | 15908 | - */ |
cc90b958 | 15909 | - |
00e5a55c | 15910 | -void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) |
cc90b958 | 15911 | -{ |
00e5a55c BS |
15912 | - unsigned long last_addr; |
15913 | - void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD); | |
15914 | - if (!p) | |
15915 | - return p; | |
cc90b958 | 15916 | - |
00e5a55c BS |
15917 | - /* Guaranteed to be > phys_addr, as per __ioremap() */ |
15918 | - last_addr = phys_addr + size - 1; | |
cc90b958 | 15919 | - |
00e5a55c BS |
15920 | - if (is_local_lowmem(last_addr)) { |
15921 | - struct page *ppage = virt_to_page(bus_to_virt(phys_addr)); | |
15922 | - unsigned long npages; | |
cc90b958 | 15923 | - |
00e5a55c | 15924 | - phys_addr &= PAGE_MASK; |
cc90b958 | 15925 | - |
00e5a55c BS |
15926 | - /* This might overflow and become zero.. */ |
15927 | - last_addr = PAGE_ALIGN(last_addr); | |
cc90b958 | 15928 | - |
00e5a55c BS |
15929 | - /* .. but that's ok, because modulo-2**n arithmetic will make |
15930 | - * the page-aligned "last - first" come out right. | |
15931 | - */ | |
15932 | - npages = (last_addr - phys_addr) >> PAGE_SHIFT; | |
cc90b958 | 15933 | - |
00e5a55c BS |
15934 | - if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) { |
15935 | - iounmap(p); | |
15936 | - p = NULL; | |
15937 | - } | |
15938 | - global_flush_tlb(); | |
15939 | - } | |
cc90b958 | 15940 | - |
00e5a55c | 15941 | - return p; |
cc90b958 | 15942 | -} |
00e5a55c | 15943 | -EXPORT_SYMBOL(ioremap_nocache); |
cc90b958 | 15944 | - |
00e5a55c BS |
15945 | -/** |
15946 | - * iounmap - Free a IO remapping | |
15947 | - * @addr: virtual address from ioremap_* | |
15948 | - * | |
15949 | - * Caller must ensure there is only one unmapping for the same pointer. | |
15950 | - */ | |
15951 | -void iounmap(volatile void __iomem *addr) | |
cc90b958 | 15952 | -{ |
00e5a55c | 15953 | - struct vm_struct *p, *o; |
cc90b958 | 15954 | - |
00e5a55c BS |
15955 | - if ((void __force *)addr <= high_memory) |
15956 | - return; | |
cc90b958 | 15957 | - |
cc90b958 | 15958 | - /* |
00e5a55c BS |
15959 | - * __ioremap special-cases the PCI/ISA range by not instantiating a |
15960 | - * vm_area and by simply returning an address into the kernel mapping | |
15961 | - * of ISA space. So handle that here. | |
cc90b958 | 15962 | - */ |
00e5a55c BS |
15963 | - if ((unsigned long) addr >= fix_to_virt(FIX_ISAMAP_BEGIN)) |
15964 | - return; | |
cc90b958 | 15965 | - |
00e5a55c BS |
15966 | - addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long __force)addr); |
15967 | - | |
15968 | - /* Use the vm area unlocked, assuming the caller | |
15969 | - ensures there isn't another iounmap for the same address | |
15970 | - in parallel. Reuse of the virtual address is prevented by | |
15971 | - leaving it in the global lists until we're done with it. | |
15972 | - cpa takes care of the direct mappings. */ | |
15973 | - read_lock(&vmlist_lock); | |
15974 | - for (p = vmlist; p; p = p->next) { | |
15975 | - if (p->addr == addr) | |
15976 | - break; | |
15977 | - } | |
15978 | - read_unlock(&vmlist_lock); | |
cc90b958 | 15979 | - |
00e5a55c BS |
15980 | - if (!p) { |
15981 | - printk("iounmap: bad address %p\n", addr); | |
15982 | - dump_stack(); | |
15983 | - return; | |
15984 | - } | |
cc90b958 | 15985 | - |
00e5a55c BS |
15986 | - /* Reset the direct mapping. Can block */ |
15987 | - if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) { | |
15988 | - change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)), | |
15989 | - get_vm_area_size(p) >> PAGE_SHIFT, | |
15990 | - PAGE_KERNEL); | |
15991 | - global_flush_tlb(); | |
15992 | - } | |
cc90b958 | 15993 | - |
00e5a55c BS |
15994 | - /* Finally remove it */ |
15995 | - o = remove_vm_area((void *)addr); | |
15996 | - BUG_ON(p != o || o == NULL); | |
15997 | - kfree(p); | |
15998 | -} | |
15999 | -EXPORT_SYMBOL(iounmap); | |
16000 | - | |
16001 | -void __init *bt_ioremap(unsigned long phys_addr, unsigned long size) | |
cc90b958 | 16002 | -{ |
00e5a55c BS |
16003 | - unsigned long offset, last_addr; |
16004 | - unsigned int nrpages; | |
16005 | - enum fixed_addresses idx; | |
cc90b958 | 16006 | - |
00e5a55c BS |
16007 | - /* Don't allow wraparound or zero size */ |
16008 | - last_addr = phys_addr + size - 1; | |
16009 | - if (!size || last_addr < phys_addr) | |
16010 | - return NULL; | |
cc90b958 | 16011 | - |
00e5a55c BS |
16012 | - /* |
16013 | - * Don't remap the low PCI/ISA area, it's always mapped.. | |
16014 | - */ | |
16015 | - if (is_initial_xendomain() && | |
16016 | - phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) | |
16017 | - return isa_bus_to_virt(phys_addr); | |
cc90b958 | 16018 | - |
00e5a55c BS |
16019 | - /* |
16020 | - * Mappings have to be page-aligned | |
16021 | - */ | |
16022 | - offset = phys_addr & ~PAGE_MASK; | |
16023 | - phys_addr &= PAGE_MASK; | |
16024 | - size = PAGE_ALIGN(last_addr) - phys_addr; | |
cc90b958 | 16025 | - |
00e5a55c BS |
16026 | - /* |
16027 | - * Mappings have to fit in the FIX_BTMAP area. | |
16028 | - */ | |
16029 | - nrpages = size >> PAGE_SHIFT; | |
16030 | - if (nrpages > NR_FIX_BTMAPS) | |
16031 | - return NULL; | |
cc90b958 | 16032 | - |
00e5a55c BS |
16033 | - /* |
16034 | - * Ok, go for it.. | |
16035 | - */ | |
16036 | - idx = FIX_BTMAP_BEGIN; | |
16037 | - while (nrpages > 0) { | |
16038 | - set_fixmap(idx, phys_addr); | |
16039 | - phys_addr += PAGE_SIZE; | |
16040 | - --idx; | |
16041 | - --nrpages; | |
16042 | - } | |
16043 | - return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN)); | |
cc90b958 BS |
16044 | -} |
16045 | - | |
00e5a55c BS |
16046 | -void __init bt_iounmap(void *addr, unsigned long size) |
16047 | -{ | |
16048 | - unsigned long virt_addr; | |
16049 | - unsigned long offset; | |
16050 | - unsigned int nrpages; | |
16051 | - enum fixed_addresses idx; | |
cc90b958 | 16052 | - |
00e5a55c BS |
16053 | - virt_addr = (unsigned long)addr; |
16054 | - if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) | |
16055 | - return; | |
16056 | - if (virt_addr >= fix_to_virt(FIX_ISAMAP_BEGIN)) | |
16057 | - return; | |
16058 | - offset = virt_addr & ~PAGE_MASK; | |
16059 | - nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT; | |
cc90b958 | 16060 | - |
00e5a55c BS |
16061 | - idx = FIX_BTMAP_BEGIN; |
16062 | - while (nrpages > 0) { | |
16063 | - clear_fixmap(idx); | |
16064 | - --idx; | |
16065 | - --nrpages; | |
16066 | - } | |
16067 | -} | |
16068 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
16069 | +++ sle11-2009-05-14/arch/x86/mm/pageattr-xen.c 2009-03-16 16:37:14.000000000 +0100 | |
16070 | @@ -0,0 +1,1413 @@ | |
16071 | +/* | |
16072 | + * Copyright 2002 Andi Kleen, SuSE Labs. | |
16073 | + * Thanks to Ben LaHaise for precious feedback. | |
16074 | + */ | |
16075 | +#include <linux/highmem.h> | |
16076 | +#include <linux/bootmem.h> | |
16077 | +#include <linux/module.h> | |
16078 | +#include <linux/sched.h> | |
16079 | +#include <linux/slab.h> | |
16080 | +#include <linux/mm.h> | |
16081 | +#include <linux/interrupt.h> | |
16082 | + | |
16083 | +#include <asm/e820.h> | |
16084 | +#include <asm/processor.h> | |
16085 | +#include <asm/tlbflush.h> | |
16086 | +#include <asm/sections.h> | |
16087 | +#include <asm/uaccess.h> | |
16088 | +#include <asm/pgalloc.h> | |
16089 | +#include <asm/proto.h> | |
16090 | +#include <asm/mmu_context.h> | |
16091 | + | |
16092 | +#ifndef CONFIG_X86_64 | |
16093 | +#define TASK_SIZE64 TASK_SIZE | |
16094 | +#endif | |
16095 | + | |
16096 | +static void _pin_lock(struct mm_struct *mm, int lock) { | |
16097 | + if (lock) | |
16098 | + spin_lock(&mm->page_table_lock); | |
16099 | +#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS | |
16100 | + /* While mm->page_table_lock protects us against insertions and | |
16101 | + * removals of higher level page table pages, it doesn't protect | |
16102 | + * against updates of pte-s. Such updates, however, require the | |
16103 | + * pte pages to be in consistent state (unpinned+writable or | |
16104 | + * pinned+readonly). The pinning and attribute changes, however | |
16105 | + * cannot be done atomically, which is why such updates must be | |
16106 | + * prevented from happening concurrently. | |
16107 | + * Note that no pte lock can ever elsewhere be acquired nesting | |
16108 | + * with an already acquired one in the same mm, or with the mm's | |
16109 | + * page_table_lock already acquired, as that would break in the | |
16110 | + * non-split case (where all these are actually resolving to the | |
16111 | + * one page_table_lock). Thus acquiring all of them here is not | |
16112 | + * going to result in dead locks, and the order of acquires | |
16113 | + * doesn't matter. | |
16114 | + */ | |
16115 | + { | |
16116 | + pgd_t *pgd = mm->pgd; | |
16117 | + unsigned g; | |
16118 | + | |
16119 | + for (g = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) { | |
16120 | + pud_t *pud; | |
16121 | + unsigned u; | |
16122 | + | |
16123 | + if (pgd_none(*pgd)) | |
16124 | + continue; | |
16125 | + pud = pud_offset(pgd, 0); | |
16126 | + for (u = 0; u < PTRS_PER_PUD; u++, pud++) { | |
16127 | + pmd_t *pmd; | |
16128 | + unsigned m; | |
16129 | + | |
16130 | + if (pud_none(*pud)) | |
16131 | + continue; | |
16132 | + pmd = pmd_offset(pud, 0); | |
16133 | + for (m = 0; m < PTRS_PER_PMD; m++, pmd++) { | |
16134 | + spinlock_t *ptl; | |
16135 | + | |
16136 | + if (pmd_none(*pmd)) | |
16137 | + continue; | |
16138 | + ptl = pte_lockptr(0, pmd); | |
16139 | + if (lock) | |
16140 | + spin_lock(ptl); | |
16141 | + else | |
16142 | + spin_unlock(ptl); | |
16143 | + } | |
16144 | + } | |
16145 | + } | |
16146 | + } | |
16147 | +#endif | |
16148 | + if (!lock) | |
16149 | + spin_unlock(&mm->page_table_lock); | |
16150 | +} | |
16151 | +#define pin_lock(mm) _pin_lock(mm, 1) | |
16152 | +#define pin_unlock(mm) _pin_lock(mm, 0) | |
16153 | + | |
16154 | +#define PIN_BATCH sizeof(void *) | |
16155 | +static DEFINE_PER_CPU(multicall_entry_t[PIN_BATCH], pb_mcl); | |
16156 | + | |
16157 | +static inline unsigned int pgd_walk_set_prot(struct page *page, pgprot_t flags, | |
16158 | + unsigned int cpu, unsigned int seq) | |
16159 | +{ | |
16160 | + unsigned long pfn = page_to_pfn(page); | |
16161 | + | |
16162 | + if (PageHighMem(page)) { | |
16163 | + if (pgprot_val(flags) & _PAGE_RW) | |
16164 | + ClearPagePinned(page); | |
16165 | + else | |
16166 | + SetPagePinned(page); | |
16167 | + } else { | |
16168 | + MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq, | |
16169 | + (unsigned long)__va(pfn << PAGE_SHIFT), | |
16170 | + pfn_pte(pfn, flags), 0); | |
16171 | + if (unlikely(++seq == PIN_BATCH)) { | |
16172 | + if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu), | |
16173 | + PIN_BATCH, NULL))) | |
16174 | + BUG(); | |
16175 | + seq = 0; | |
16176 | + } | |
16177 | + } | |
16178 | + | |
16179 | + return seq; | |
16180 | +} | |
16181 | + | |
16182 | +static void pgd_walk(pgd_t *pgd_base, pgprot_t flags) | |
16183 | +{ | |
16184 | + pgd_t *pgd = pgd_base; | |
16185 | + pud_t *pud; | |
16186 | + pmd_t *pmd; | |
16187 | + int g,u,m; | |
16188 | + unsigned int cpu, seq; | |
16189 | + multicall_entry_t *mcl; | |
16190 | + | |
16191 | + if (xen_feature(XENFEAT_auto_translated_physmap)) | |
16192 | + return; | |
16193 | + | |
16194 | + cpu = get_cpu(); | |
16195 | + | |
16196 | + /* | |
16197 | + * Cannot iterate up to USER_PTRS_PER_PGD on x86-64 as these pagetables | |
16198 | + * may not be the 'current' task's pagetables (e.g., current may be | |
16199 | + * 32-bit, but the pagetables may be for a 64-bit task). | |
16200 | + * Subtracting 1 from TASK_SIZE64 means the loop limit is correct | |
16201 | + * regardless of whether TASK_SIZE64 is a multiple of PGDIR_SIZE. | |
16202 | + */ | |
16203 | + for (g = 0, seq = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) { | |
16204 | + if (pgd_none(*pgd)) | |
16205 | + continue; | |
16206 | + pud = pud_offset(pgd, 0); | |
16207 | + if (PTRS_PER_PUD > 1) /* not folded */ | |
16208 | + seq = pgd_walk_set_prot(virt_to_page(pud),flags,cpu,seq); | |
16209 | + for (u = 0; u < PTRS_PER_PUD; u++, pud++) { | |
16210 | + if (pud_none(*pud)) | |
16211 | + continue; | |
16212 | + pmd = pmd_offset(pud, 0); | |
16213 | + if (PTRS_PER_PMD > 1) /* not folded */ | |
16214 | + seq = pgd_walk_set_prot(virt_to_page(pmd),flags,cpu,seq); | |
16215 | + for (m = 0; m < PTRS_PER_PMD; m++, pmd++) { | |
16216 | + if (pmd_none(*pmd)) | |
16217 | + continue; | |
16218 | + seq = pgd_walk_set_prot(pmd_page(*pmd),flags,cpu,seq); | |
16219 | + } | |
16220 | + } | |
16221 | + } | |
16222 | + | |
16223 | + mcl = per_cpu(pb_mcl, cpu); | |
16224 | +#ifdef CONFIG_X86_64 | |
16225 | + if (unlikely(seq > PIN_BATCH - 2)) { | |
16226 | + if (unlikely(HYPERVISOR_multicall_check(mcl, seq, NULL))) | |
16227 | + BUG(); | |
16228 | + seq = 0; | |
16229 | + } | |
16230 | + MULTI_update_va_mapping(mcl + seq, | |
16231 | + (unsigned long)__user_pgd(pgd_base), | |
16232 | + pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags), | |
16233 | + 0); | |
16234 | + MULTI_update_va_mapping(mcl + seq + 1, | |
16235 | + (unsigned long)pgd_base, | |
16236 | + pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags), | |
16237 | + UVMF_TLB_FLUSH); | |
16238 | + if (unlikely(HYPERVISOR_multicall_check(mcl, seq + 2, NULL))) | |
16239 | + BUG(); | |
16240 | +#else | |
16241 | + if (likely(seq != 0)) { | |
16242 | + MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq, | |
16243 | + (unsigned long)pgd_base, | |
16244 | + pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags), | |
16245 | + UVMF_TLB_FLUSH); | |
16246 | + if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu), | |
16247 | + seq + 1, NULL))) | |
16248 | + BUG(); | |
16249 | + } else if(HYPERVISOR_update_va_mapping((unsigned long)pgd_base, | |
16250 | + pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags), | |
16251 | + UVMF_TLB_FLUSH)) | |
16252 | + BUG(); | |
16253 | +#endif | |
16254 | + | |
16255 | + put_cpu(); | |
16256 | +} | |
16257 | + | |
16258 | +static void __pgd_pin(pgd_t *pgd) | |
16259 | +{ | |
16260 | + pgd_walk(pgd, PAGE_KERNEL_RO); | |
16261 | + kmap_flush_unused(); | |
16262 | + xen_pgd_pin(__pa(pgd)); /* kernel */ | |
16263 | +#ifdef CONFIG_X86_64 | |
16264 | + xen_pgd_pin(__pa(__user_pgd(pgd))); /* user */ | |
16265 | +#endif | |
16266 | + SetPagePinned(virt_to_page(pgd)); | |
16267 | +} | |
16268 | + | |
16269 | +static void __pgd_unpin(pgd_t *pgd) | |
16270 | +{ | |
16271 | + xen_pgd_unpin(__pa(pgd)); | |
16272 | +#ifdef CONFIG_X86_64 | |
16273 | + xen_pgd_unpin(__pa(__user_pgd(pgd))); | |
16274 | +#endif | |
16275 | + pgd_walk(pgd, PAGE_KERNEL); | |
16276 | + ClearPagePinned(virt_to_page(pgd)); | |
16277 | +} | |
16278 | + | |
16279 | +void pgd_test_and_unpin(pgd_t *pgd) | |
16280 | +{ | |
16281 | + if (PagePinned(virt_to_page(pgd))) | |
16282 | + __pgd_unpin(pgd); | |
16283 | +} | |
16284 | + | |
16285 | +void mm_pin(struct mm_struct *mm) | |
16286 | +{ | |
16287 | + if (xen_feature(XENFEAT_writable_page_tables)) | |
16288 | + return; | |
16289 | + | |
16290 | + pin_lock(mm); | |
16291 | + __pgd_pin(mm->pgd); | |
16292 | + pin_unlock(mm); | |
16293 | +} | |
16294 | + | |
16295 | +void mm_unpin(struct mm_struct *mm) | |
16296 | +{ | |
16297 | + if (xen_feature(XENFEAT_writable_page_tables)) | |
16298 | + return; | |
16299 | + | |
16300 | + pin_lock(mm); | |
16301 | + __pgd_unpin(mm->pgd); | |
16302 | + pin_unlock(mm); | |
16303 | +} | |
16304 | + | |
16305 | +void mm_pin_all(void) | |
16306 | +{ | |
16307 | + struct page *page; | |
16308 | + unsigned long flags; | |
16309 | + | |
16310 | + if (xen_feature(XENFEAT_writable_page_tables)) | |
16311 | + return; | |
16312 | + | |
16313 | + /* | |
16314 | + * Allow uninterrupted access to the pgd_list. Also protects | |
16315 | + * __pgd_pin() by disabling preemption. | |
16316 | + * All other CPUs must be at a safe point (e.g., in stop_machine | |
16317 | + * or offlined entirely). | |
16318 | + */ | |
16319 | + spin_lock_irqsave(&pgd_lock, flags); | |
16320 | + list_for_each_entry(page, &pgd_list, lru) { | |
16321 | + if (!PagePinned(page)) | |
16322 | + __pgd_pin((pgd_t *)page_address(page)); | |
16323 | + } | |
16324 | + spin_unlock_irqrestore(&pgd_lock, flags); | |
16325 | +} | |
16326 | + | |
16327 | +void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) | |
16328 | +{ | |
16329 | + if (!PagePinned(virt_to_page(mm->pgd))) | |
16330 | + mm_pin(mm); | |
16331 | +} | |
16332 | + | |
16333 | +void arch_exit_mmap(struct mm_struct *mm) | |
16334 | +{ | |
16335 | + struct task_struct *tsk = current; | |
16336 | + | |
16337 | + task_lock(tsk); | |
16338 | + | |
16339 | + /* | |
16340 | + * We aggressively remove defunct pgd from cr3. We execute unmap_vmas() | |
16341 | + * *much* faster this way, as no tlb flushes means bigger wrpt batches. | |
16342 | + */ | |
16343 | + if (tsk->active_mm == mm) { | |
16344 | + tsk->active_mm = &init_mm; | |
16345 | + atomic_inc(&init_mm.mm_count); | |
16346 | + | |
16347 | + switch_mm(mm, &init_mm, tsk); | |
16348 | + | |
16349 | + atomic_dec(&mm->mm_count); | |
16350 | + BUG_ON(atomic_read(&mm->mm_count) == 0); | |
16351 | + } | |
16352 | + | |
16353 | + task_unlock(tsk); | |
16354 | + | |
16355 | + if (PagePinned(virt_to_page(mm->pgd)) | |
16356 | + && atomic_read(&mm->mm_count) == 1 | |
16357 | + && !mm->context.has_foreign_mappings) | |
16358 | + mm_unpin(mm); | |
16359 | +} | |
16360 | + | |
16361 | +static void _pte_free(struct page *page, unsigned int order) | |
16362 | +{ | |
16363 | + BUG_ON(order); | |
16364 | + __pte_free(page); | |
16365 | +} | |
16366 | + | |
16367 | +pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) | |
16368 | +{ | |
16369 | + struct page *pte; | |
16370 | + | |
16371 | +#ifdef CONFIG_HIGHPTE | |
16372 | + pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); | |
16373 | +#else | |
16374 | + pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); | |
16375 | +#endif | |
16376 | + if (pte) { | |
16377 | + pgtable_page_ctor(pte); | |
16378 | + SetPageForeign(pte, _pte_free); | |
16379 | + init_page_count(pte); | |
16380 | + } | |
16381 | + return pte; | |
16382 | +} | |
16383 | + | |
16384 | +void __pte_free(pgtable_t pte) | |
16385 | +{ | |
16386 | + if (!PageHighMem(pte)) { | |
16387 | + unsigned long va = (unsigned long)page_address(pte); | |
16388 | + unsigned int level; | |
16389 | + pte_t *ptep = lookup_address(va, &level); | |
16390 | + | |
16391 | + BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep)); | |
16392 | + if (!pte_write(*ptep) | |
16393 | + && HYPERVISOR_update_va_mapping(va, | |
16394 | + mk_pte(pte, PAGE_KERNEL), | |
16395 | + 0)) | |
16396 | + BUG(); | |
16397 | + } else | |
16398 | +#ifdef CONFIG_HIGHPTE | |
16399 | + ClearPagePinned(pte); | |
16400 | +#else | |
16401 | + BUG(); | |
16402 | +#endif | |
16403 | + | |
16404 | + ClearPageForeign(pte); | |
16405 | + init_page_count(pte); | |
16406 | + pgtable_page_dtor(pte); | |
16407 | + __free_page(pte); | |
16408 | +} | |
16409 | + | |
16410 | +#if PAGETABLE_LEVELS >= 3 | |
16411 | +static void _pmd_free(struct page *page, unsigned int order) | |
16412 | +{ | |
16413 | + BUG_ON(order); | |
16414 | + __pmd_free(page); | |
16415 | +} | |
16416 | + | |
16417 | +pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) | |
16418 | +{ | |
16419 | + struct page *pmd; | |
16420 | + | |
16421 | + pmd = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); | |
16422 | + if (!pmd) | |
16423 | + return NULL; | |
16424 | + SetPageForeign(pmd, _pmd_free); | |
16425 | + init_page_count(pmd); | |
16426 | + return page_address(pmd); | |
16427 | +} | |
16428 | + | |
16429 | +void __pmd_free(pgtable_t pmd) | |
16430 | +{ | |
16431 | + unsigned long va = (unsigned long)page_address(pmd); | |
16432 | + unsigned int level; | |
16433 | + pte_t *ptep = lookup_address(va, &level); | |
16434 | + | |
16435 | + BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep)); | |
16436 | + if (!pte_write(*ptep) | |
16437 | + && HYPERVISOR_update_va_mapping(va, mk_pte(pmd, PAGE_KERNEL), 0)) | |
16438 | + BUG(); | |
16439 | + | |
16440 | + ClearPageForeign(pmd); | |
16441 | + init_page_count(pmd); | |
16442 | + __free_page(pmd); | |
16443 | +} | |
16444 | +#endif | |
16445 | + | |
16446 | +/* blktap and gntdev need this, as otherwise they would implicitly (and | |
16447 | + * needlessly, as they never use it) reference init_mm. */ | |
16448 | +pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *vma, | |
16449 | + unsigned long addr, pte_t *ptep, int full) | |
16450 | +{ | |
16451 | + return ptep_get_and_clear_full(vma->vm_mm, addr, ptep, full); | |
16452 | +} | |
16453 | +EXPORT_SYMBOL_GPL(xen_ptep_get_and_clear_full); | |
16454 | + | |
16455 | +/* | |
16456 | + * The current flushing context - we pass it instead of 5 arguments: | |
16457 | + */ | |
16458 | +struct cpa_data { | |
16459 | + unsigned long vaddr; | |
16460 | + pgprot_t mask_set; | |
16461 | + pgprot_t mask_clr; | |
16462 | + int numpages; | |
16463 | + int flushtlb; | |
16464 | + unsigned long pfn; | |
16465 | +}; | |
16466 | + | |
16467 | +#ifdef CONFIG_X86_64 | |
16468 | + | |
16469 | +static inline unsigned long highmap_start_pfn(void) | |
16470 | +{ | |
16471 | + return __pa(_text) >> PAGE_SHIFT; | |
16472 | +} | |
16473 | + | |
16474 | +static inline unsigned long highmap_end_pfn(void) | |
16475 | +{ | |
16476 | + return __pa(round_up((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; | |
16477 | +} | |
16478 | + | |
16479 | +#endif | |
16480 | + | |
16481 | +#ifdef CONFIG_DEBUG_PAGEALLOC | |
16482 | +# define debug_pagealloc 1 | |
16483 | +#else | |
16484 | +# define debug_pagealloc 0 | |
16485 | +#endif | |
16486 | + | |
16487 | +static inline int | |
16488 | +within(unsigned long addr, unsigned long start, unsigned long end) | |
16489 | +{ | |
16490 | + return addr >= start && addr < end; | |
16491 | +} | |
16492 | + | |
16493 | +/* | |
16494 | + * Flushing functions | |
16495 | + */ | |
16496 | + | |
16497 | +/** | |
16498 | + * clflush_cache_range - flush a cache range with clflush | |
16499 | + * @addr: virtual start address | |
16500 | + * @size: number of bytes to flush | |
16501 | + * | |
16502 | + * clflush is an unordered instruction which needs fencing with mfence | |
16503 | + * to avoid ordering issues. | |
16504 | + */ | |
16505 | +void clflush_cache_range(void *vaddr, unsigned int size) | |
16506 | +{ | |
16507 | + void *vend = vaddr + size - 1; | |
16508 | + | |
16509 | + mb(); | |
16510 | + | |
16511 | + for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size) | |
16512 | + clflush(vaddr); | |
16513 | + /* | |
16514 | + * Flush any possible final partial cacheline: | |
16515 | + */ | |
16516 | + clflush(vend); | |
16517 | + | |
16518 | + mb(); | |
16519 | +} | |
16520 | + | |
16521 | +static void __cpa_flush_all(void *arg) | |
16522 | +{ | |
16523 | + unsigned long cache = (unsigned long)arg; | |
16524 | + | |
16525 | + /* | |
16526 | + * Flush all to work around Errata in early athlons regarding | |
16527 | + * large page flushing. | |
16528 | + */ | |
16529 | + __flush_tlb_all(); | |
16530 | + | |
16531 | + if (cache && boot_cpu_data.x86_model >= 4) | |
16532 | + wbinvd(); | |
16533 | +} | |
16534 | + | |
16535 | +static void cpa_flush_all(unsigned long cache) | |
16536 | +{ | |
16537 | + BUG_ON(irqs_disabled()); | |
16538 | + | |
16539 | + on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1); | |
16540 | +} | |
16541 | + | |
16542 | +static void __cpa_flush_range(void *arg) | |
16543 | +{ | |
16544 | + /* | |
16545 | + * We could optimize that further and do individual per page | |
16546 | + * tlb invalidates for a low number of pages. Caveat: we must | |
16547 | + * flush the high aliases on 64bit as well. | |
16548 | + */ | |
16549 | + __flush_tlb_all(); | |
16550 | +} | |
16551 | + | |
16552 | +static void cpa_flush_range(unsigned long start, int numpages, int cache) | |
16553 | +{ | |
16554 | + unsigned int i, level; | |
16555 | + unsigned long addr; | |
16556 | + | |
16557 | + BUG_ON(irqs_disabled()); | |
16558 | + WARN_ON(PAGE_ALIGN(start) != start); | |
16559 | + | |
16560 | + on_each_cpu(__cpa_flush_range, NULL, 1, 1); | |
16561 | + | |
16562 | + if (!cache) | |
16563 | + return; | |
16564 | + | |
16565 | + /* | |
16566 | + * We only need to flush on one CPU, | |
16567 | + * clflush is a MESI-coherent instruction that | |
16568 | + * will cause all other CPUs to flush the same | |
16569 | + * cachelines: | |
16570 | + */ | |
16571 | + for (i = 0, addr = start; i < numpages; i++, addr += PAGE_SIZE) { | |
16572 | + pte_t *pte = lookup_address(addr, &level); | |
16573 | + | |
16574 | + /* | |
16575 | + * Only flush present addresses: | |
16576 | + */ | |
16577 | + if (pte && (__pte_val(*pte) & _PAGE_PRESENT)) | |
16578 | + clflush_cache_range((void *) addr, PAGE_SIZE); | |
16579 | + } | |
16580 | +} | |
16581 | + | |
16582 | +/* | |
16583 | + * Certain areas of memory on x86 require very specific protection flags, | |
16584 | + * for example the BIOS area or kernel text. Callers don't always get this | |
16585 | + * right (again, ioremap() on BIOS memory is not uncommon) so this function | |
16586 | + * checks and fixes these known static required protection bits. | |
16587 | + */ | |
16588 | +static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |
16589 | + unsigned long pfn) | |
16590 | +{ | |
16591 | + pgprot_t forbidden = __pgprot(0); | |
16592 | + | |
16593 | +#ifndef CONFIG_XEN | |
16594 | + /* | |
16595 | + * The BIOS area between 640k and 1Mb needs to be executable for | |
16596 | + * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. | |
16597 | + */ | |
16598 | + if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) | |
16599 | + pgprot_val(forbidden) |= _PAGE_NX; | |
16600 | +#endif | |
16601 | + | |
16602 | + /* | |
16603 | + * The kernel text needs to be executable for obvious reasons | |
16604 | + * Does not cover __inittext since that is gone later on. On | |
16605 | + * 64bit we do not enforce !NX on the low mapping | |
16606 | + */ | |
16607 | + if (within(address, (unsigned long)_text, (unsigned long)_etext)) | |
16608 | + pgprot_val(forbidden) |= _PAGE_NX; | |
16609 | + | |
16610 | + /* | |
16611 | + * The .rodata section needs to be read-only. Using the pfn | |
16612 | + * catches all aliases. | |
16613 | + */ | |
16614 | + if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, | |
16615 | + __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) | |
16616 | + pgprot_val(forbidden) |= _PAGE_RW; | |
16617 | + | |
16618 | + prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); | |
16619 | + | |
16620 | + return prot; | |
16621 | +} | |
16622 | + | |
16623 | +/* | |
16624 | + * Lookup the page table entry for a virtual address. Return a pointer | |
16625 | + * to the entry and the level of the mapping. | |
16626 | + * | |
16627 | + * Note: We return pud and pmd either when the entry is marked large | |
16628 | + * or when the present bit is not set. Otherwise we would return a | |
16629 | + * pointer to a nonexisting mapping. | |
16630 | + */ | |
16631 | +pte_t *lookup_address(unsigned long address, unsigned int *level) | |
16632 | +{ | |
16633 | + pgd_t *pgd = pgd_offset_k(address); | |
16634 | + pud_t *pud; | |
16635 | + pmd_t *pmd; | |
16636 | + | |
16637 | + *level = PG_LEVEL_NONE; | |
16638 | + | |
16639 | + if (pgd_none(*pgd)) | |
16640 | + return NULL; | |
16641 | + | |
16642 | + pud = pud_offset(pgd, address); | |
16643 | + if (pud_none(*pud)) | |
16644 | + return NULL; | |
16645 | + | |
16646 | + *level = PG_LEVEL_1G; | |
16647 | + if (pud_large(*pud) || !pud_present(*pud)) | |
16648 | + return (pte_t *)pud; | |
16649 | + | |
16650 | + pmd = pmd_offset(pud, address); | |
16651 | + if (pmd_none(*pmd)) | |
16652 | + return NULL; | |
16653 | + | |
16654 | + *level = PG_LEVEL_2M; | |
16655 | + if (pmd_large(*pmd) || !pmd_present(*pmd)) | |
16656 | + return (pte_t *)pmd; | |
16657 | + | |
16658 | + *level = PG_LEVEL_4K; | |
16659 | + | |
16660 | + return pte_offset_kernel(pmd, address); | |
16661 | +} | |
16662 | + | |
16663 | +/* | |
16664 | + * Set the new pmd in all the pgds we know about: | |
16665 | + */ | |
16666 | +static void __set_pmd_pte(pte_t *kpte, unsigned long address, | |
16667 | + unsigned int level, pte_t pte) | |
16668 | +{ | |
16669 | + /* change init_mm */ | |
16670 | + switch(level) { | |
16671 | + case PG_LEVEL_2M: | |
16672 | + xen_l2_entry_update((pmd_t *)kpte, __pmd_ma(__pte_val(pte))); | |
16673 | + break; | |
16674 | +#ifdef CONFIG_X86_64 | |
16675 | + case PG_LEVEL_1G: | |
16676 | + xen_l3_entry_update((pud_t *)kpte, __pud_ma(__pte_val(pte))); | |
16677 | + break; | |
16678 | +#endif | |
16679 | + default: | |
16680 | + BUG(); | |
16681 | + } | |
16682 | +#ifdef CONFIG_X86_32 | |
16683 | + if (!SHARED_KERNEL_PMD) { | |
16684 | + struct page *page; | |
16685 | + | |
16686 | + list_for_each_entry(page, &pgd_list, lru) { | |
16687 | + pgd_t *pgd; | |
16688 | + pud_t *pud; | |
16689 | + pmd_t *pmd; | |
16690 | + | |
16691 | + pgd = (pgd_t *)page_address(page) + pgd_index(address); | |
16692 | + pud = pud_offset(pgd, address); | |
16693 | + pmd = pmd_offset(pud, address); | |
16694 | + xen_l2_entry_update(pmd, __pmd_ma(__pte_val(pte))); | |
16695 | + } | |
16696 | + } | |
16697 | +#endif | |
16698 | +} | |
16699 | + | |
16700 | +static int | |
16701 | +try_preserve_large_page(pte_t *kpte, unsigned long address, | |
16702 | + struct cpa_data *cpa) | |
16703 | +{ | |
16704 | + unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; | |
16705 | + pte_t new_pte, old_pte, *tmp; | |
16706 | + pgprot_t old_prot, new_prot; | |
16707 | + int i, do_split = 1; | |
16708 | + unsigned int level; | |
16709 | + | |
16710 | + spin_lock_irqsave(&pgd_lock, flags); | |
16711 | + /* | |
16712 | + * Check for races, another CPU might have split this page | |
16713 | + * up already: | |
16714 | + */ | |
16715 | + tmp = lookup_address(address, &level); | |
16716 | + if (tmp != kpte) | |
16717 | + goto out_unlock; | |
16718 | + | |
16719 | + switch (level) { | |
16720 | + case PG_LEVEL_2M: | |
16721 | + psize = PMD_PAGE_SIZE; | |
16722 | + pmask = PMD_PAGE_MASK; | |
16723 | + break; | |
16724 | +#ifdef CONFIG_X86_64 | |
16725 | + case PG_LEVEL_1G: | |
16726 | + psize = PUD_PAGE_SIZE; | |
16727 | + pmask = PUD_PAGE_MASK; | |
16728 | + break; | |
16729 | +#endif | |
16730 | + default: | |
16731 | + do_split = -EINVAL; | |
16732 | + goto out_unlock; | |
16733 | + } | |
16734 | + | |
16735 | + /* | |
16736 | + * Calculate the number of pages, which fit into this large | |
16737 | + * page starting at address: | |
16738 | + */ | |
16739 | + nextpage_addr = (address + psize) & pmask; | |
16740 | + numpages = (nextpage_addr - address) >> PAGE_SHIFT; | |
16741 | + if (numpages < cpa->numpages) | |
16742 | + cpa->numpages = numpages; | |
16743 | + | |
16744 | + /* | |
16745 | + * We are safe now. Check whether the new pgprot is the same: | |
16746 | + */ | |
16747 | + old_pte = *kpte; | |
16748 | + old_prot = new_prot = pte_pgprot(old_pte); | |
16749 | + | |
16750 | + pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); | |
16751 | + pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); | |
16752 | + | |
16753 | + /* | |
16754 | + * old_pte points to the large page base address. So we need | |
16755 | + * to add the offset of the virtual address: | |
16756 | + */ | |
16757 | + pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT); | |
16758 | + cpa->pfn = pfn; | |
16759 | + | |
16760 | + new_prot = static_protections(new_prot, address, pfn); | |
16761 | + | |
16762 | + /* | |
16763 | + * We need to check the full range, whether | |
16764 | + * static_protection() requires a different pgprot for one of | |
16765 | + * the pages in the range we try to preserve: | |
16766 | + */ | |
16767 | + if (pfn < max_mapnr) { | |
16768 | + addr = address + PAGE_SIZE; | |
16769 | + for (i = 1; i < cpa->numpages && ++pfn < max_mapnr; | |
16770 | + i++, addr += PAGE_SIZE) { | |
16771 | + pgprot_t chk_prot = static_protections(new_prot, addr, pfn); | |
16772 | + | |
16773 | + if (pgprot_val(chk_prot) != pgprot_val(new_prot)) | |
16774 | + goto out_unlock; | |
16775 | + } | |
16776 | + } | |
16777 | + | |
16778 | + /* | |
16779 | + * If there are no changes, return. maxpages has been updated | |
16780 | + * above: | |
16781 | + */ | |
16782 | + if (pgprot_val(new_prot) == pgprot_val(old_prot)) { | |
16783 | + do_split = 0; | |
16784 | + goto out_unlock; | |
16785 | + } | |
16786 | + | |
16787 | + /* | |
16788 | + * We need to change the attributes. Check, whether we can | |
16789 | + * change the large page in one go. We request a split, when | |
16790 | + * the address is not aligned and the number of pages is | |
16791 | + * smaller than the number of pages in the large page. Note | |
16792 | + * that we limited the number of possible pages already to | |
16793 | + * the number of pages in the large page. | |
16794 | + */ | |
16795 | + if (address == (nextpage_addr - psize) && cpa->numpages == numpages) { | |
16796 | + /* | |
16797 | + * The address is aligned and the number of pages | |
16798 | + * covers the full page. | |
16799 | + */ | |
16800 | + new_pte = pfn_pte_ma(__pte_mfn(old_pte), canon_pgprot(new_prot)); | |
16801 | + __set_pmd_pte(kpte, address, level, new_pte); | |
16802 | + cpa->flushtlb = 1; | |
16803 | + do_split = 0; | |
16804 | + } | |
16805 | + | |
16806 | +out_unlock: | |
16807 | + spin_unlock_irqrestore(&pgd_lock, flags); | |
16808 | + | |
16809 | + return do_split; | |
16810 | +} | |
16811 | + | |
16812 | +static LIST_HEAD(page_pool); | |
16813 | +static unsigned long pool_size, pool_pages, pool_low; | |
16814 | +static unsigned long pool_used, pool_failed; | |
16815 | + | |
16816 | +static void cpa_fill_pool(struct page **ret) | |
16817 | +{ | |
16818 | + gfp_t gfp = GFP_KERNEL; | |
16819 | + unsigned long flags; | |
16820 | + struct page *p; | |
16821 | + | |
16822 | + /* | |
16823 | + * Avoid recursion (on debug-pagealloc) and also signal | |
16824 | + * our priority to get to these pagetables: | |
16825 | + */ | |
16826 | + if (current->flags & PF_MEMALLOC) | |
16827 | + return; | |
16828 | + current->flags |= PF_MEMALLOC; | |
16829 | + | |
16830 | + /* | |
16831 | + * Allocate atomically from atomic contexts: | |
16832 | + */ | |
16833 | + if (in_atomic() || irqs_disabled() || debug_pagealloc) | |
16834 | + gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; | |
16835 | + | |
16836 | + while (pool_pages < pool_size || (ret && !*ret)) { | |
16837 | + p = alloc_pages(gfp, 0); | |
16838 | + if (!p) { | |
16839 | + pool_failed++; | |
16840 | + break; | |
16841 | + } | |
16842 | + /* | |
16843 | + * If the call site needs a page right now, provide it: | |
16844 | + */ | |
16845 | + if (ret && !*ret) { | |
16846 | + *ret = p; | |
16847 | + continue; | |
16848 | + } | |
16849 | + spin_lock_irqsave(&pgd_lock, flags); | |
16850 | + list_add(&p->lru, &page_pool); | |
16851 | + pool_pages++; | |
16852 | + spin_unlock_irqrestore(&pgd_lock, flags); | |
16853 | + } | |
16854 | + | |
16855 | + current->flags &= ~PF_MEMALLOC; | |
16856 | +} | |
16857 | + | |
16858 | +#define SHIFT_MB (20 - PAGE_SHIFT) | |
16859 | +#define ROUND_MB_GB ((1 << 10) - 1) | |
16860 | +#define SHIFT_MB_GB 10 | |
16861 | +#define POOL_PAGES_PER_GB 16 | |
16862 | + | |
16863 | +void __init cpa_init(void) | |
16864 | +{ | |
16865 | + struct sysinfo si; | |
16866 | + unsigned long gb; | |
16867 | + | |
16868 | + si_meminfo(&si); | |
16869 | + /* | |
16870 | + * Calculate the number of pool pages: | |
16871 | + * | |
16872 | + * Convert totalram (nr of pages) to MiB and round to the next | |
16873 | + * GiB. Shift MiB to Gib and multiply the result by | |
16874 | + * POOL_PAGES_PER_GB: | |
16875 | + */ | |
16876 | + if (debug_pagealloc) { | |
16877 | + gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; | |
16878 | + pool_size = POOL_PAGES_PER_GB * gb; | |
16879 | + } else { | |
16880 | + pool_size = 1; | |
16881 | + } | |
16882 | + pool_low = pool_size; | |
16883 | + | |
16884 | + cpa_fill_pool(NULL); | |
16885 | + printk(KERN_DEBUG | |
16886 | + "CPA: page pool initialized %lu of %lu pages preallocated\n", | |
16887 | + pool_pages, pool_size); | |
16888 | +} | |
16889 | + | |
16890 | +static int split_large_page(pte_t *kpte, unsigned long address) | |
16891 | +{ | |
16892 | + unsigned long flags, mfn, mfninc = 1; | |
16893 | + unsigned int i, level; | |
16894 | + pte_t *pbase, *tmp; | |
16895 | + pgprot_t ref_prot; | |
16896 | + struct page *base; | |
16897 | + | |
16898 | + /* | |
16899 | + * Get a page from the pool. The pool list is protected by the | |
16900 | + * pgd_lock, which we have to take anyway for the split | |
16901 | + * operation: | |
16902 | + */ | |
16903 | + spin_lock_irqsave(&pgd_lock, flags); | |
16904 | + if (list_empty(&page_pool)) { | |
16905 | + spin_unlock_irqrestore(&pgd_lock, flags); | |
16906 | + base = NULL; | |
16907 | + cpa_fill_pool(&base); | |
16908 | + if (!base) | |
16909 | + return -ENOMEM; | |
16910 | + spin_lock_irqsave(&pgd_lock, flags); | |
16911 | + } else { | |
16912 | + base = list_first_entry(&page_pool, struct page, lru); | |
16913 | + list_del(&base->lru); | |
16914 | + pool_pages--; | |
16915 | + | |
16916 | + if (pool_pages < pool_low) | |
16917 | + pool_low = pool_pages; | |
16918 | + } | |
16919 | + | |
16920 | + /* | |
16921 | + * Check for races, another CPU might have split this page | |
16922 | + * up for us already: | |
16923 | + */ | |
16924 | + tmp = lookup_address(address, &level); | |
16925 | + if (tmp != kpte) | |
16926 | + goto out_unlock; | |
16927 | + | |
16928 | + pbase = (pte_t *)page_address(base); | |
16929 | +#ifdef CONFIG_X86_32 | |
16930 | + paravirt_alloc_pt(&init_mm, page_to_pfn(base)); | |
16931 | +#endif | |
16932 | + ref_prot = pte_pgprot(pte_clrhuge(*kpte)); | |
16933 | + | |
16934 | +#ifdef CONFIG_X86_64 | |
16935 | + if (level == PG_LEVEL_1G) { | |
16936 | + mfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; | |
16937 | + pgprot_val(ref_prot) |= _PAGE_PSE; | |
16938 | + } | |
16939 | +#endif | |
16940 | + | |
16941 | + /* | |
16942 | + * Get the target mfn from the original entry: | |
16943 | + */ | |
16944 | + mfn = __pte_mfn(*kpte); | |
16945 | + for (i = 0; i < PTRS_PER_PTE; i++, mfn += mfninc) | |
16946 | + set_pte(&pbase[i], pfn_pte_ma(mfn, ref_prot)); | |
16947 | + | |
16948 | + /* | |
16949 | + * Install the new, split up pagetable. Important details here: | |
16950 | + * | |
16951 | + * On Intel the NX bit of all levels must be cleared to make a | |
16952 | + * page executable. See section 4.13.2 of Intel 64 and IA-32 | |
16953 | + * Architectures Software Developer's Manual). | |
16954 | + * | |
16955 | + * Mark the entry present. The current mapping might be | |
16956 | + * set to not present, which we preserved above. | |
16957 | + */ | |
16958 | + if (!xen_feature(XENFEAT_writable_page_tables) && | |
16959 | + HYPERVISOR_update_va_mapping((unsigned long)pbase, | |
16960 | + mk_pte(base, PAGE_KERNEL_RO), 0)) | |
16961 | + BUG(); | |
16962 | + ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte))); | |
16963 | + pgprot_val(ref_prot) |= _PAGE_PRESENT; | |
16964 | + __set_pmd_pte(kpte, address, level, mk_pte(base, ref_prot)); | |
16965 | + base = NULL; | |
cc90b958 | 16966 | + |
00e5a55c BS |
16967 | +out_unlock: |
16968 | + /* | |
16969 | + * If we dropped out via the lookup_address check under | |
16970 | + * pgd_lock then stick the page back into the pool: | |
16971 | + */ | |
16972 | + if (base) { | |
16973 | + list_add(&base->lru, &page_pool); | |
16974 | + pool_pages++; | |
16975 | + } else | |
16976 | + pool_used++; | |
16977 | + spin_unlock_irqrestore(&pgd_lock, flags); | |
cc90b958 | 16978 | + |
00e5a55c BS |
16979 | + return 0; |
16980 | +} | |
16981 | + | |
16982 | +static int __change_page_attr(struct cpa_data *cpa, int primary) | |
cc90b958 | 16983 | +{ |
00e5a55c BS |
16984 | + unsigned long address = cpa->vaddr; |
16985 | + int do_split, err; | |
16986 | + unsigned int level; | |
16987 | + pte_t *kpte, old_pte; | |
cc90b958 | 16988 | + |
00e5a55c BS |
16989 | +repeat: |
16990 | + kpte = lookup_address(address, &level); | |
16991 | + if (!kpte) | |
16992 | + return primary ? -EINVAL : 0; | |
16993 | + | |
16994 | + old_pte = *kpte; | |
16995 | + if (!__pte_val(old_pte)) { | |
16996 | + if (!primary) | |
16997 | + return 0; | |
16998 | + printk(KERN_WARNING "CPA: called for zero pte. " | |
16999 | + "vaddr = %lx cpa->vaddr = %lx\n", address, | |
17000 | + cpa->vaddr); | |
17001 | + WARN_ON(1); | |
17002 | + return -EINVAL; | |
17003 | + } | |
17004 | + | |
17005 | + if (level == PG_LEVEL_4K) { | |
17006 | + pte_t new_pte; | |
17007 | + pgprot_t new_prot = pte_pgprot(old_pte); | |
17008 | + unsigned long mfn = __pte_mfn(old_pte); | |
17009 | + | |
17010 | + pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); | |
17011 | + pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); | |
17012 | + | |
17013 | + new_prot = static_protections(new_prot, address, | |
17014 | + mfn_to_local_pfn(mfn)); | |
17015 | + | |
17016 | + /* | |
17017 | + * We need to keep the mfn from the existing PTE, | |
17018 | + * after all we're only going to change it's attributes | |
17019 | + * not the memory it points to | |
17020 | + */ | |
17021 | + new_pte = pfn_pte_ma(mfn, canon_pgprot(new_prot)); | |
17022 | + cpa->pfn = mfn_to_local_pfn(mfn); | |
17023 | + /* | |
17024 | + * Do we really change anything ? | |
17025 | + */ | |
17026 | + if (__pte_val(old_pte) != __pte_val(new_pte)) { | |
17027 | + set_pte_atomic(kpte, new_pte); | |
17028 | + cpa->flushtlb = 1; | |
17029 | + } | |
17030 | + cpa->numpages = 1; | |
17031 | + return 0; | |
17032 | + } | |
17033 | + | |
17034 | + /* | |
17035 | + * Check, whether we can keep the large page intact | |
17036 | + * and just change the pte: | |
17037 | + */ | |
17038 | + do_split = try_preserve_large_page(kpte, address, cpa); | |
17039 | + /* | |
17040 | + * When the range fits into the existing large page, | |
17041 | + * return. cp->numpages and cpa->tlbflush have been updated in | |
17042 | + * try_large_page: | |
17043 | + */ | |
17044 | + if (do_split <= 0) | |
17045 | + return do_split; | |
17046 | + | |
17047 | + /* | |
17048 | + * We have to split the large page: | |
17049 | + */ | |
17050 | + err = split_large_page(kpte, address); | |
17051 | + if (!err) { | |
17052 | + cpa->flushtlb = 1; | |
17053 | + goto repeat; | |
17054 | + } | |
17055 | + | |
17056 | + return err; | |
17057 | +} | |
17058 | + | |
17059 | +static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias); | |
17060 | + | |
17061 | +static int cpa_process_alias(struct cpa_data *cpa) | |
17062 | +{ | |
17063 | + struct cpa_data alias_cpa; | |
17064 | + int ret = 0; | |
17065 | + | |
17066 | + if (cpa->pfn > max_pfn_mapped) | |
17067 | + return 0; | |
17068 | + | |
17069 | + /* | |
17070 | + * No need to redo, when the primary call touched the direct | |
17071 | + * mapping already: | |
17072 | + */ | |
17073 | + if (!within(cpa->vaddr, PAGE_OFFSET, | |
17074 | + PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) { | |
17075 | + | |
17076 | + alias_cpa = *cpa; | |
17077 | + alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); | |
17078 | + | |
17079 | + ret = __change_page_attr_set_clr(&alias_cpa, 0); | |
17080 | + } | |
17081 | + | |
17082 | +#ifdef CONFIG_X86_64 | |
17083 | + if (ret) | |
17084 | + return ret; | |
17085 | + /* | |
17086 | + * No need to redo, when the primary call touched the high | |
17087 | + * mapping already: | |
17088 | + */ | |
17089 | + if (within(cpa->vaddr, (unsigned long) _text, (unsigned long) _end)) | |
17090 | + return 0; | |
17091 | + | |
17092 | + /* | |
17093 | + * If the physical address is inside the kernel map, we need | |
17094 | + * to touch the high mapped kernel as well: | |
17095 | + */ | |
17096 | + if (!within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) | |
17097 | + return 0; | |
17098 | + | |
17099 | + alias_cpa = *cpa; | |
17100 | + alias_cpa.vaddr = | |
17101 | + (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map; | |
17102 | + | |
17103 | + /* | |
17104 | + * The high mapping range is imprecise, so ignore the return value. | |
17105 | + */ | |
17106 | + __change_page_attr_set_clr(&alias_cpa, 0); | |
17107 | +#endif | |
17108 | + return ret; | |
17109 | +} | |
17110 | + | |
17111 | +static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) | |
17112 | +{ | |
17113 | + int ret, numpages = cpa->numpages; | |
17114 | + | |
17115 | + while (numpages) { | |
17116 | + /* | |
17117 | + * Store the remaining nr of pages for the large page | |
17118 | + * preservation check. | |
17119 | + */ | |
17120 | + cpa->numpages = numpages; | |
17121 | + | |
17122 | + ret = __change_page_attr(cpa, checkalias); | |
17123 | + if (ret) | |
17124 | + return ret; | |
17125 | + | |
17126 | + if (checkalias) { | |
17127 | + ret = cpa_process_alias(cpa); | |
17128 | + if (ret) | |
17129 | + return ret; | |
17130 | + } | |
17131 | + | |
17132 | + /* | |
17133 | + * Adjust the number of pages with the result of the | |
17134 | + * CPA operation. Either a large page has been | |
17135 | + * preserved or a single page update happened. | |
17136 | + */ | |
17137 | + BUG_ON(cpa->numpages > numpages); | |
17138 | + numpages -= cpa->numpages; | |
17139 | + cpa->vaddr += cpa->numpages * PAGE_SIZE; | |
17140 | + } | |
17141 | + return 0; | |
17142 | +} | |
17143 | + | |
17144 | +static inline int cache_attr(pgprot_t attr) | |
17145 | +{ | |
17146 | + return pgprot_val(attr) & | |
17147 | + (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD); | |
cc90b958 BS |
17148 | +} |
17149 | + | |
00e5a55c BS |
17150 | +static int change_page_attr_set_clr(unsigned long addr, int numpages, |
17151 | + pgprot_t mask_set, pgprot_t mask_clr) | |
17152 | +{ | |
17153 | + struct cpa_data cpa; | |
17154 | + int ret, cache, checkalias; | |
17155 | + | |
17156 | + /* | |
17157 | + * Check, if we are requested to change a not supported | |
17158 | + * feature: | |
17159 | + */ | |
17160 | + mask_set = canon_pgprot(mask_set); | |
17161 | + mask_clr = canon_pgprot(mask_clr); | |
17162 | + if (!pgprot_val(mask_set) && !pgprot_val(mask_clr)) | |
17163 | + return 0; | |
17164 | + | |
17165 | + /* Ensure we are PAGE_SIZE aligned */ | |
17166 | + if (addr & ~PAGE_MASK) { | |
17167 | + addr &= PAGE_MASK; | |
17168 | + /* | |
17169 | + * People should not be passing in unaligned addresses: | |
17170 | + */ | |
17171 | + WARN_ON_ONCE(1); | |
17172 | + } | |
17173 | + | |
17174 | + cpa.vaddr = addr; | |
17175 | + cpa.numpages = numpages; | |
17176 | + cpa.mask_set = mask_set; | |
17177 | + cpa.mask_clr = mask_clr; | |
17178 | + cpa.flushtlb = 0; | |
17179 | + | |
17180 | + /* No alias checking for _NX bit modifications */ | |
17181 | + checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; | |
17182 | + | |
17183 | + ret = __change_page_attr_set_clr(&cpa, checkalias); | |
17184 | + | |
17185 | + /* | |
17186 | + * Check whether we really changed something: | |
17187 | + */ | |
17188 | + if (!cpa.flushtlb) | |
17189 | + goto out; | |
17190 | + | |
17191 | + /* | |
17192 | + * No need to flush, when we did not set any of the caching | |
17193 | + * attributes: | |
17194 | + */ | |
17195 | + cache = cache_attr(mask_set); | |
17196 | + | |
17197 | + /* | |
17198 | + * On success we use clflush, when the CPU supports it to | |
17199 | + * avoid the wbindv. If the CPU does not support it and in the | |
17200 | + * error case we fall back to cpa_flush_all (which uses | |
17201 | + * wbindv): | |
17202 | + */ | |
17203 | + if (!ret && cpu_has_clflush) | |
17204 | + cpa_flush_range(addr, numpages, cache); | |
17205 | + else | |
17206 | + cpa_flush_all(cache); | |
17207 | + | |
17208 | +out: | |
17209 | + cpa_fill_pool(NULL); | |
cc90b958 | 17210 | + |
00e5a55c BS |
17211 | + return ret; |
17212 | +} | |
cc90b958 | 17213 | + |
00e5a55c BS |
17214 | +static inline int change_page_attr_set(unsigned long addr, int numpages, |
17215 | + pgprot_t mask) | |
cc90b958 | 17216 | +{ |
00e5a55c | 17217 | + return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0)); |
cc90b958 BS |
17218 | +} |
17219 | + | |
00e5a55c BS |
17220 | +static inline int change_page_attr_clear(unsigned long addr, int numpages, |
17221 | + pgprot_t mask) | |
cc90b958 | 17222 | +{ |
00e5a55c | 17223 | + return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask); |
cc90b958 BS |
17224 | +} |
17225 | + | |
00e5a55c | 17226 | +int set_memory_uc(unsigned long addr, int numpages) |
cc90b958 | 17227 | +{ |
00e5a55c BS |
17228 | + return change_page_attr_set(addr, numpages, |
17229 | + __pgprot(_PAGE_PCD)); | |
cc90b958 | 17230 | +} |
00e5a55c | 17231 | +EXPORT_SYMBOL(set_memory_uc); |
cc90b958 | 17232 | + |
00e5a55c | 17233 | +int set_memory_wb(unsigned long addr, int numpages) |
cc90b958 | 17234 | +{ |
00e5a55c BS |
17235 | + return change_page_attr_clear(addr, numpages, |
17236 | + __pgprot(_PAGE_PCD | _PAGE_PWT)); | |
cc90b958 | 17237 | +} |
00e5a55c | 17238 | +EXPORT_SYMBOL(set_memory_wb); |
cc90b958 | 17239 | + |
00e5a55c | 17240 | +int set_memory_x(unsigned long addr, int numpages) |
cc90b958 | 17241 | +{ |
00e5a55c | 17242 | + return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX)); |
cc90b958 | 17243 | +} |
00e5a55c | 17244 | +EXPORT_SYMBOL(set_memory_x); |
cc90b958 | 17245 | + |
00e5a55c | 17246 | +int set_memory_nx(unsigned long addr, int numpages) |
cc90b958 | 17247 | +{ |
00e5a55c | 17248 | + return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX)); |
cc90b958 | 17249 | +} |
00e5a55c | 17250 | +EXPORT_SYMBOL(set_memory_nx); |
cc90b958 | 17251 | + |
00e5a55c | 17252 | +int set_memory_ro(unsigned long addr, int numpages) |
cc90b958 | 17253 | +{ |
00e5a55c | 17254 | + return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW)); |
cc90b958 | 17255 | +} |
cc90b958 | 17256 | + |
00e5a55c | 17257 | +int set_memory_rw(unsigned long addr, int numpages) |
cc90b958 | 17258 | +{ |
00e5a55c | 17259 | + return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW)); |
cc90b958 BS |
17260 | +} |
17261 | + | |
00e5a55c | 17262 | +int set_memory_np(unsigned long addr, int numpages) |
cc90b958 | 17263 | +{ |
00e5a55c | 17264 | + return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT)); |
cc90b958 BS |
17265 | +} |
17266 | + | |
00e5a55c | 17267 | +int set_pages_uc(struct page *page, int numpages) |
cc90b958 | 17268 | +{ |
00e5a55c | 17269 | + unsigned long addr = (unsigned long)page_address(page); |
cc90b958 | 17270 | + |
00e5a55c | 17271 | + return set_memory_uc(addr, numpages); |
cc90b958 | 17272 | +} |
00e5a55c | 17273 | +EXPORT_SYMBOL(set_pages_uc); |
cc90b958 | 17274 | + |
00e5a55c | 17275 | +int set_pages_wb(struct page *page, int numpages) |
cc90b958 | 17276 | +{ |
00e5a55c | 17277 | + unsigned long addr = (unsigned long)page_address(page); |
cc90b958 | 17278 | + |
00e5a55c | 17279 | + return set_memory_wb(addr, numpages); |
cc90b958 | 17280 | +} |
00e5a55c | 17281 | +EXPORT_SYMBOL(set_pages_wb); |
cc90b958 | 17282 | + |
00e5a55c | 17283 | +int set_pages_x(struct page *page, int numpages) |
cc90b958 | 17284 | +{ |
00e5a55c BS |
17285 | + unsigned long addr = (unsigned long)page_address(page); |
17286 | + | |
17287 | + return set_memory_x(addr, numpages); | |
cc90b958 | 17288 | +} |
00e5a55c | 17289 | +EXPORT_SYMBOL(set_pages_x); |
cc90b958 | 17290 | + |
00e5a55c | 17291 | +int set_pages_nx(struct page *page, int numpages) |
cc90b958 | 17292 | +{ |
00e5a55c BS |
17293 | + unsigned long addr = (unsigned long)page_address(page); |
17294 | + | |
17295 | + return set_memory_nx(addr, numpages); | |
cc90b958 | 17296 | +} |
00e5a55c | 17297 | +EXPORT_SYMBOL(set_pages_nx); |
cc90b958 | 17298 | + |
00e5a55c | 17299 | +int set_pages_ro(struct page *page, int numpages) |
cc90b958 | 17300 | +{ |
00e5a55c BS |
17301 | + unsigned long addr = (unsigned long)page_address(page); |
17302 | + | |
17303 | + return set_memory_ro(addr, numpages); | |
cc90b958 BS |
17304 | +} |
17305 | + | |
00e5a55c | 17306 | +int set_pages_rw(struct page *page, int numpages) |
cc90b958 | 17307 | +{ |
00e5a55c BS |
17308 | + unsigned long addr = (unsigned long)page_address(page); |
17309 | + | |
17310 | + return set_memory_rw(addr, numpages); | |
cc90b958 BS |
17311 | +} |
17312 | + | |
00e5a55c BS |
17313 | +#ifdef CONFIG_DEBUG_PAGEALLOC |
17314 | + | |
17315 | +static int __set_pages_p(struct page *page, int numpages) | |
cc90b958 | 17316 | +{ |
00e5a55c BS |
17317 | + struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page), |
17318 | + .numpages = numpages, | |
17319 | + .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), | |
17320 | + .mask_clr = __pgprot(0)}; | |
17321 | + | |
17322 | + return __change_page_attr_set_clr(&cpa, 1); | |
cc90b958 BS |
17323 | +} |
17324 | + | |
00e5a55c | 17325 | +static int __set_pages_np(struct page *page, int numpages) |
cc90b958 | 17326 | +{ |
00e5a55c BS |
17327 | + struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page), |
17328 | + .numpages = numpages, | |
17329 | + .mask_set = __pgprot(0), | |
17330 | + .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)}; | |
17331 | + | |
17332 | + return __change_page_attr_set_clr(&cpa, 1); | |
cc90b958 BS |
17333 | +} |
17334 | + | |
00e5a55c | 17335 | +void kernel_map_pages(struct page *page, int numpages, int enable) |
cc90b958 | 17336 | +{ |
00e5a55c BS |
17337 | + if (PageHighMem(page)) |
17338 | + return; | |
17339 | + if (!enable) { | |
17340 | + debug_check_no_locks_freed(page_address(page), | |
17341 | + numpages * PAGE_SIZE); | |
17342 | + } | |
17343 | + | |
17344 | + /* | |
17345 | + * If page allocator is not up yet then do not call c_p_a(): | |
17346 | + */ | |
17347 | + if (!debug_pagealloc_enabled) | |
17348 | + return; | |
17349 | + | |
17350 | + /* | |
17351 | + * The return value is ignored as the calls cannot fail. | |
17352 | + * Large pages are kept enabled at boot time, and are | |
17353 | + * split up quickly with DEBUG_PAGEALLOC. If a splitup | |
17354 | + * fails here (due to temporary memory shortage) no damage | |
17355 | + * is done because we just keep the largepage intact up | |
17356 | + * to the next attempt when it will likely be split up: | |
17357 | + */ | |
17358 | + if (enable) | |
17359 | + __set_pages_p(page, numpages); | |
17360 | + else | |
17361 | + __set_pages_np(page, numpages); | |
17362 | + | |
17363 | + /* | |
17364 | + * We should perform an IPI and flush all tlbs, | |
17365 | + * but that can deadlock->flush only current cpu: | |
17366 | + */ | |
17367 | + __flush_tlb_all(); | |
cc90b958 | 17368 | + |
00e5a55c BS |
17369 | + /* |
17370 | + * Try to refill the page pool here. We can do this only after | |
17371 | + * the tlb flush. | |
17372 | + */ | |
17373 | + cpa_fill_pool(NULL); | |
cc90b958 | 17374 | +} |
cc90b958 | 17375 | + |
00e5a55c | 17376 | +#ifdef CONFIG_HIBERNATION |
cc90b958 | 17377 | + |
00e5a55c | 17378 | +bool kernel_page_present(struct page *page) |
cc90b958 | 17379 | +{ |
00e5a55c BS |
17380 | + unsigned int level; |
17381 | + pte_t *pte; | |
cc90b958 | 17382 | + |
00e5a55c BS |
17383 | + if (PageHighMem(page)) |
17384 | + return false; | |
17385 | + | |
17386 | + pte = lookup_address((unsigned long)page_address(page), &level); | |
17387 | + return (__pte_val(*pte) & _PAGE_PRESENT); | |
cc90b958 | 17388 | +} |
cc90b958 | 17389 | + |
00e5a55c BS |
17390 | +#endif /* CONFIG_HIBERNATION */ |
17391 | + | |
17392 | +#endif /* CONFIG_DEBUG_PAGEALLOC */ | |
cc90b958 | 17393 | + |
00e5a55c BS |
17394 | +static inline int in_secondary_range(unsigned long va) |
17395 | +{ | |
cc90b958 | 17396 | +#ifdef CONFIG_X86_64 |
00e5a55c | 17397 | + return va >= VMALLOC_START && va < VMALLOC_END; |
cc90b958 | 17398 | +#else |
00e5a55c | 17399 | + return va >= (unsigned long)high_memory; |
cc90b958 | 17400 | +#endif |
cc90b958 BS |
17401 | +} |
17402 | + | |
00e5a55c | 17403 | +static void __make_page_readonly(unsigned long va) |
cc90b958 | 17404 | +{ |
00e5a55c BS |
17405 | + pte_t *pte; |
17406 | + unsigned int level; | |
cc90b958 | 17407 | + |
00e5a55c BS |
17408 | + pte = lookup_address(va, &level); |
17409 | + BUG_ON(!pte || level != PG_LEVEL_4K); | |
17410 | + if (HYPERVISOR_update_va_mapping(va, pte_wrprotect(*pte), 0)) | |
17411 | + BUG(); | |
17412 | + if (in_secondary_range(va)) { | |
17413 | + unsigned long pfn = pte_pfn(*pte); | |
cc90b958 | 17414 | + |
00e5a55c BS |
17415 | +#ifdef CONFIG_HIGHMEM |
17416 | + if (pfn >= highstart_pfn) | |
17417 | + kmap_flush_unused(); /* flush stale writable kmaps */ | |
17418 | + else | |
17419 | +#endif | |
17420 | + __make_page_readonly((unsigned long)__va(pfn << PAGE_SHIFT)); | |
17421 | + } | |
cc90b958 BS |
17422 | +} |
17423 | + | |
00e5a55c | 17424 | +static void __make_page_writable(unsigned long va) |
cc90b958 | 17425 | +{ |
00e5a55c BS |
17426 | + pte_t *pte; |
17427 | + unsigned int level; | |
cc90b958 | 17428 | + |
00e5a55c BS |
17429 | + pte = lookup_address(va, &level); |
17430 | + BUG_ON(!pte || level != PG_LEVEL_4K); | |
17431 | + if (HYPERVISOR_update_va_mapping(va, pte_mkwrite(*pte), 0)) | |
17432 | + BUG(); | |
17433 | + if (in_secondary_range(va)) { | |
17434 | + unsigned long pfn = pte_pfn(*pte); | |
cc90b958 | 17435 | + |
00e5a55c BS |
17436 | +#ifdef CONFIG_HIGHMEM |
17437 | + if (pfn < highstart_pfn) | |
17438 | +#endif | |
17439 | + __make_page_writable((unsigned long)__va(pfn << PAGE_SHIFT)); | |
17440 | + } | |
cc90b958 BS |
17441 | +} |
17442 | + | |
00e5a55c | 17443 | +void make_page_readonly(void *va, unsigned int feature) |
cc90b958 | 17444 | +{ |
00e5a55c BS |
17445 | + if (!xen_feature(feature)) |
17446 | + __make_page_readonly((unsigned long)va); | |
cc90b958 BS |
17447 | +} |
17448 | + | |
00e5a55c | 17449 | +void make_page_writable(void *va, unsigned int feature) |
cc90b958 | 17450 | +{ |
00e5a55c BS |
17451 | + if (!xen_feature(feature)) |
17452 | + __make_page_writable((unsigned long)va); | |
cc90b958 BS |
17453 | +} |
17454 | + | |
00e5a55c | 17455 | +void make_pages_readonly(void *va, unsigned int nr, unsigned int feature) |
cc90b958 | 17456 | +{ |
00e5a55c | 17457 | + unsigned long addr; |
cc90b958 | 17458 | + |
00e5a55c BS |
17459 | + if (xen_feature(feature)) |
17460 | + return; | |
cc90b958 | 17461 | + |
00e5a55c BS |
17462 | + for (addr = (unsigned long)va; nr--; addr += PAGE_SIZE) |
17463 | + __make_page_readonly(addr); | |
cc90b958 BS |
17464 | +} |
17465 | + | |
00e5a55c | 17466 | +void make_pages_writable(void *va, unsigned int nr, unsigned int feature) |
cc90b958 | 17467 | +{ |
00e5a55c BS |
17468 | + unsigned long addr; |
17469 | + | |
17470 | + if (xen_feature(feature)) | |
17471 | + return; | |
17472 | + | |
17473 | + for (addr = (unsigned long)va; nr--; addr += PAGE_SIZE) | |
17474 | + __make_page_writable(addr); | |
cc90b958 | 17475 | +} |
cc90b958 | 17476 | + |
cc90b958 | 17477 | +/* |
00e5a55c BS |
17478 | + * The testcases use internal knowledge of the implementation that shouldn't |
17479 | + * be exposed to the rest of the kernel. Include these directly here. | |
cc90b958 | 17480 | + */ |
00e5a55c BS |
17481 | +#ifdef CONFIG_CPA_DEBUG |
17482 | +#include "pageattr-test.c" | |
17483 | +#endif | |
17484 | --- sle11-2009-05-14.orig/arch/x86/mm/pageattr_64-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
17485 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
17486 | @@ -1,542 +0,0 @@ | |
17487 | -/* | |
17488 | - * Copyright 2002 Andi Kleen, SuSE Labs. | |
17489 | - * Thanks to Ben LaHaise for precious feedback. | |
17490 | - */ | |
17491 | - | |
17492 | -#include <linux/mm.h> | |
17493 | -#include <linux/sched.h> | |
17494 | -#include <linux/highmem.h> | |
17495 | -#include <linux/module.h> | |
17496 | -#include <linux/slab.h> | |
17497 | -#include <asm/uaccess.h> | |
17498 | -#include <asm/processor.h> | |
17499 | -#include <asm/tlbflush.h> | |
17500 | -#include <asm/io.h> | |
17501 | - | |
17502 | -#ifdef CONFIG_XEN | |
17503 | -#include <asm/pgalloc.h> | |
17504 | -#include <asm/mmu_context.h> | |
17505 | - | |
17506 | -static void _pin_lock(struct mm_struct *mm, int lock) { | |
17507 | - if (lock) | |
17508 | - spin_lock(&mm->page_table_lock); | |
17509 | -#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS | |
17510 | - /* While mm->page_table_lock protects us against insertions and | |
17511 | - * removals of higher level page table pages, it doesn't protect | |
17512 | - * against updates of pte-s. Such updates, however, require the | |
17513 | - * pte pages to be in consistent state (unpinned+writable or | |
17514 | - * pinned+readonly). The pinning and attribute changes, however | |
17515 | - * cannot be done atomically, which is why such updates must be | |
17516 | - * prevented from happening concurrently. | |
17517 | - * Note that no pte lock can ever elsewhere be acquired nesting | |
17518 | - * with an already acquired one in the same mm, or with the mm's | |
17519 | - * page_table_lock already acquired, as that would break in the | |
17520 | - * non-split case (where all these are actually resolving to the | |
17521 | - * one page_table_lock). Thus acquiring all of them here is not | |
17522 | - * going to result in dead locks, and the order of acquires | |
17523 | - * doesn't matter. | |
17524 | - */ | |
17525 | - { | |
17526 | - pgd_t *pgd = mm->pgd; | |
17527 | - unsigned g; | |
17528 | - | |
17529 | - for (g = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) { | |
17530 | - pud_t *pud; | |
17531 | - unsigned u; | |
17532 | - | |
17533 | - if (pgd_none(*pgd)) | |
17534 | - continue; | |
17535 | - pud = pud_offset(pgd, 0); | |
17536 | - for (u = 0; u < PTRS_PER_PUD; u++, pud++) { | |
17537 | - pmd_t *pmd; | |
17538 | - unsigned m; | |
17539 | - | |
17540 | - if (pud_none(*pud)) | |
17541 | - continue; | |
17542 | - pmd = pmd_offset(pud, 0); | |
17543 | - for (m = 0; m < PTRS_PER_PMD; m++, pmd++) { | |
17544 | - spinlock_t *ptl; | |
17545 | - | |
17546 | - if (pmd_none(*pmd)) | |
17547 | - continue; | |
17548 | - ptl = pte_lockptr(0, pmd); | |
17549 | - if (lock) | |
17550 | - spin_lock(ptl); | |
17551 | - else | |
17552 | - spin_unlock(ptl); | |
17553 | - } | |
17554 | - } | |
17555 | - } | |
17556 | - } | |
17557 | -#endif | |
17558 | - if (!lock) | |
17559 | - spin_unlock(&mm->page_table_lock); | |
17560 | -} | |
17561 | -#define pin_lock(mm) _pin_lock(mm, 1) | |
17562 | -#define pin_unlock(mm) _pin_lock(mm, 0) | |
17563 | - | |
17564 | -#define PIN_BATCH 8 | |
17565 | -static DEFINE_PER_CPU(multicall_entry_t[PIN_BATCH], pb_mcl); | |
17566 | - | |
17567 | -static inline unsigned int pgd_walk_set_prot(void *pt, pgprot_t flags, | |
17568 | - unsigned int cpu, unsigned int seq) | |
17569 | -{ | |
17570 | - struct page *page = virt_to_page(pt); | |
17571 | - unsigned long pfn = page_to_pfn(page); | |
17572 | - | |
17573 | - MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq, | |
17574 | - (unsigned long)__va(pfn << PAGE_SHIFT), | |
17575 | - pfn_pte(pfn, flags), 0); | |
17576 | - if (unlikely(++seq == PIN_BATCH)) { | |
17577 | - if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu), | |
17578 | - PIN_BATCH, NULL))) | |
17579 | - BUG(); | |
17580 | - seq = 0; | |
17581 | - } | |
17582 | - | |
17583 | - return seq; | |
17584 | -} | |
17585 | - | |
17586 | -static void pgd_walk(pgd_t *pgd_base, pgprot_t flags) | |
17587 | -{ | |
17588 | - pgd_t *pgd = pgd_base; | |
17589 | - pud_t *pud; | |
17590 | - pmd_t *pmd; | |
17591 | - pte_t *pte; | |
17592 | - int g,u,m; | |
17593 | - unsigned int cpu, seq; | |
17594 | - multicall_entry_t *mcl; | |
17595 | - | |
17596 | - cpu = get_cpu(); | |
17597 | - | |
17598 | - /* | |
17599 | - * Cannot iterate up to USER_PTRS_PER_PGD as these pagetables may not | |
17600 | - * be the 'current' task's pagetables (e.g., current may be 32-bit, | |
17601 | - * but the pagetables may be for a 64-bit task). | |
17602 | - * Subtracting 1 from TASK_SIZE64 means the loop limit is correct | |
17603 | - * regardless of whether TASK_SIZE64 is a multiple of PGDIR_SIZE. | |
17604 | - */ | |
17605 | - for (g = 0, seq = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) { | |
17606 | - if (pgd_none(*pgd)) | |
17607 | - continue; | |
17608 | - pud = pud_offset(pgd, 0); | |
17609 | - if (PTRS_PER_PUD > 1) /* not folded */ | |
17610 | - seq = pgd_walk_set_prot(pud,flags,cpu,seq); | |
17611 | - for (u = 0; u < PTRS_PER_PUD; u++, pud++) { | |
17612 | - if (pud_none(*pud)) | |
17613 | - continue; | |
17614 | - pmd = pmd_offset(pud, 0); | |
17615 | - if (PTRS_PER_PMD > 1) /* not folded */ | |
17616 | - seq = pgd_walk_set_prot(pmd,flags,cpu,seq); | |
17617 | - for (m = 0; m < PTRS_PER_PMD; m++, pmd++) { | |
17618 | - if (pmd_none(*pmd)) | |
17619 | - continue; | |
17620 | - pte = pte_offset_kernel(pmd,0); | |
17621 | - seq = pgd_walk_set_prot(pte,flags,cpu,seq); | |
17622 | - } | |
17623 | - } | |
17624 | - } | |
17625 | - | |
17626 | - mcl = per_cpu(pb_mcl, cpu); | |
17627 | - if (unlikely(seq > PIN_BATCH - 2)) { | |
17628 | - if (unlikely(HYPERVISOR_multicall_check(mcl, seq, NULL))) | |
17629 | - BUG(); | |
17630 | - seq = 0; | |
17631 | - } | |
17632 | - MULTI_update_va_mapping(mcl + seq, | |
17633 | - (unsigned long)__user_pgd(pgd_base), | |
17634 | - pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags), | |
17635 | - 0); | |
17636 | - MULTI_update_va_mapping(mcl + seq + 1, | |
17637 | - (unsigned long)pgd_base, | |
17638 | - pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags), | |
17639 | - UVMF_TLB_FLUSH); | |
17640 | - if (unlikely(HYPERVISOR_multicall_check(mcl, seq + 2, NULL))) | |
17641 | - BUG(); | |
17642 | - | |
17643 | - put_cpu(); | |
17644 | -} | |
17645 | - | |
17646 | -static void __pgd_pin(pgd_t *pgd) | |
17647 | -{ | |
17648 | - pgd_walk(pgd, PAGE_KERNEL_RO); | |
17649 | - xen_pgd_pin(__pa(pgd)); /* kernel */ | |
17650 | - xen_pgd_pin(__pa(__user_pgd(pgd))); /* user */ | |
17651 | - SetPagePinned(virt_to_page(pgd)); | |
17652 | -} | |
17653 | - | |
17654 | -static void __pgd_unpin(pgd_t *pgd) | |
17655 | -{ | |
17656 | - xen_pgd_unpin(__pa(pgd)); | |
17657 | - xen_pgd_unpin(__pa(__user_pgd(pgd))); | |
17658 | - pgd_walk(pgd, PAGE_KERNEL); | |
17659 | - ClearPagePinned(virt_to_page(pgd)); | |
17660 | -} | |
17661 | - | |
17662 | -void pgd_test_and_unpin(pgd_t *pgd) | |
17663 | -{ | |
17664 | - if (PagePinned(virt_to_page(pgd))) | |
17665 | - __pgd_unpin(pgd); | |
17666 | -} | |
17667 | - | |
17668 | -void mm_pin(struct mm_struct *mm) | |
17669 | -{ | |
17670 | - if (xen_feature(XENFEAT_writable_page_tables)) | |
17671 | - return; | |
17672 | - | |
17673 | - pin_lock(mm); | |
17674 | - __pgd_pin(mm->pgd); | |
17675 | - pin_unlock(mm); | |
17676 | -} | |
17677 | - | |
17678 | -void mm_unpin(struct mm_struct *mm) | |
17679 | -{ | |
17680 | - if (xen_feature(XENFEAT_writable_page_tables)) | |
17681 | - return; | |
17682 | - | |
17683 | - pin_lock(mm); | |
17684 | - __pgd_unpin(mm->pgd); | |
17685 | - pin_unlock(mm); | |
17686 | -} | |
17687 | - | |
17688 | -void mm_pin_all(void) | |
17689 | -{ | |
17690 | - struct page *page; | |
17691 | - unsigned long flags; | |
17692 | - | |
17693 | - if (xen_feature(XENFEAT_writable_page_tables)) | |
17694 | - return; | |
17695 | - | |
17696 | - /* | |
17697 | - * Allow uninterrupted access to the pgd_list. Also protects | |
17698 | - * __pgd_pin() by disabling preemption. | |
17699 | - * All other CPUs must be at a safe point (e.g., in stop_machine | |
17700 | - * or offlined entirely). | |
17701 | - */ | |
17702 | - spin_lock_irqsave(&pgd_lock, flags); | |
17703 | - list_for_each_entry(page, &pgd_list, lru) { | |
17704 | - if (!PagePinned(page)) | |
17705 | - __pgd_pin((pgd_t *)page_address(page)); | |
17706 | - } | |
17707 | - spin_unlock_irqrestore(&pgd_lock, flags); | |
17708 | -} | |
17709 | - | |
17710 | -void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) | |
17711 | -{ | |
17712 | - if (!PagePinned(virt_to_page(mm->pgd))) | |
17713 | - mm_pin(mm); | |
17714 | -} | |
17715 | - | |
17716 | -void arch_exit_mmap(struct mm_struct *mm) | |
17717 | -{ | |
17718 | - struct task_struct *tsk = current; | |
17719 | - | |
17720 | - task_lock(tsk); | |
17721 | - | |
17722 | - /* | |
17723 | - * We aggressively remove defunct pgd from cr3. We execute unmap_vmas() | |
17724 | - * *much* faster this way, as no tlb flushes means bigger wrpt batches. | |
17725 | - */ | |
17726 | - if (tsk->active_mm == mm) { | |
17727 | - tsk->active_mm = &init_mm; | |
17728 | - atomic_inc(&init_mm.mm_count); | |
17729 | - | |
17730 | - switch_mm(mm, &init_mm, tsk); | |
17731 | - | |
17732 | - atomic_dec(&mm->mm_count); | |
17733 | - BUG_ON(atomic_read(&mm->mm_count) == 0); | |
17734 | - } | |
17735 | - | |
17736 | - task_unlock(tsk); | |
17737 | - | |
17738 | - if (PagePinned(virt_to_page(mm->pgd)) | |
17739 | - && (atomic_read(&mm->mm_count) == 1) | |
17740 | - && !mm->context.has_foreign_mappings) | |
17741 | - mm_unpin(mm); | |
17742 | -} | |
17743 | - | |
17744 | -static void _pte_free(struct page *page, unsigned int order) | |
17745 | -{ | |
17746 | - BUG_ON(order); | |
17747 | - pte_free(page); | |
17748 | -} | |
17749 | - | |
17750 | -struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) | |
17751 | -{ | |
17752 | - struct page *pte; | |
17753 | - | |
17754 | - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); | |
17755 | - if (pte) { | |
17756 | - SetPageForeign(pte, _pte_free); | |
17757 | - init_page_count(pte); | |
17758 | - } | |
17759 | - return pte; | |
17760 | -} | |
17761 | - | |
17762 | -void pte_free(struct page *pte) | |
17763 | -{ | |
17764 | - unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT); | |
17765 | - | |
17766 | - if (!pte_write(*virt_to_ptep(va))) | |
17767 | - if (HYPERVISOR_update_va_mapping( | |
17768 | - va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0)) | |
17769 | - BUG(); | |
17770 | - | |
17771 | - ClearPageForeign(pte); | |
17772 | - init_page_count(pte); | |
17773 | - | |
17774 | - __free_page(pte); | |
17775 | -} | |
17776 | -#endif /* CONFIG_XEN */ | |
17777 | - | |
17778 | -pte_t *lookup_address(unsigned long address) | |
17779 | -{ | |
17780 | - pgd_t *pgd = pgd_offset_k(address); | |
17781 | - pud_t *pud; | |
17782 | - pmd_t *pmd; | |
17783 | - pte_t *pte; | |
17784 | - if (pgd_none(*pgd)) | |
17785 | - return NULL; | |
17786 | - pud = pud_offset(pgd, address); | |
17787 | - if (!pud_present(*pud)) | |
17788 | - return NULL; | |
17789 | - pmd = pmd_offset(pud, address); | |
17790 | - if (!pmd_present(*pmd)) | |
17791 | - return NULL; | |
17792 | - if (pmd_large(*pmd)) | |
17793 | - return (pte_t *)pmd; | |
17794 | - pte = pte_offset_kernel(pmd, address); | |
17795 | - if (pte && !pte_present(*pte)) | |
17796 | - pte = NULL; | |
17797 | - return pte; | |
17798 | -} | |
17799 | - | |
17800 | -static struct page *split_large_page(unsigned long address, pgprot_t prot, | |
17801 | - pgprot_t ref_prot) | |
17802 | -{ | |
17803 | - int i; | |
17804 | - unsigned long addr; | |
17805 | - struct page *base = alloc_pages(GFP_KERNEL, 0); | |
17806 | - pte_t *pbase; | |
17807 | - if (!base) | |
17808 | - return NULL; | |
17809 | - /* | |
17810 | - * page_private is used to track the number of entries in | |
17811 | - * the page table page have non standard attributes. | |
17812 | - */ | |
17813 | - SetPagePrivate(base); | |
17814 | - page_private(base) = 0; | |
17815 | - | |
17816 | - address = __pa(address); | |
17817 | - addr = address & LARGE_PAGE_MASK; | |
17818 | - pbase = (pte_t *)page_address(base); | |
17819 | - for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { | |
17820 | - pbase[i] = pfn_pte(addr >> PAGE_SHIFT, | |
17821 | - addr == address ? prot : ref_prot); | |
17822 | - } | |
17823 | - return base; | |
17824 | -} | |
17825 | - | |
17826 | -void clflush_cache_range(void *adr, int size) | |
cc90b958 | 17827 | -{ |
00e5a55c BS |
17828 | - int i; |
17829 | - for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size) | |
17830 | - clflush(adr+i); | |
cc90b958 | 17831 | -} |
00e5a55c BS |
17832 | - |
17833 | -static void flush_kernel_map(void *arg) | |
17834 | -{ | |
17835 | - struct list_head *l = (struct list_head *)arg; | |
17836 | - struct page *pg; | |
17837 | - | |
17838 | - /* When clflush is available always use it because it is | |
17839 | - much cheaper than WBINVD. */ | |
17840 | - /* clflush is still broken. Disable for now. */ | |
17841 | - if (1 || !cpu_has_clflush) | |
17842 | - asm volatile("wbinvd" ::: "memory"); | |
17843 | - else list_for_each_entry(pg, l, lru) { | |
17844 | - void *adr = page_address(pg); | |
17845 | - clflush_cache_range(adr, PAGE_SIZE); | |
17846 | - } | |
17847 | - __flush_tlb_all(); | |
17848 | -} | |
17849 | - | |
17850 | -static inline void flush_map(struct list_head *l) | |
17851 | -{ | |
17852 | - on_each_cpu(flush_kernel_map, l, 1, 1); | |
17853 | -} | |
17854 | - | |
17855 | -static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */ | |
17856 | - | |
17857 | -static inline void save_page(struct page *fpage) | |
17858 | -{ | |
17859 | - if (!test_and_set_bit(PG_arch_1, &fpage->flags)) | |
17860 | - list_add(&fpage->lru, &deferred_pages); | |
17861 | -} | |
17862 | - | |
17863 | -/* | |
17864 | - * No more special protections in this 2/4MB area - revert to a | |
17865 | - * large page again. | |
17866 | - */ | |
17867 | -static void revert_page(unsigned long address, pgprot_t ref_prot) | |
17868 | -{ | |
17869 | - pgd_t *pgd; | |
17870 | - pud_t *pud; | |
17871 | - pmd_t *pmd; | |
17872 | - pte_t large_pte; | |
17873 | - unsigned long pfn; | |
17874 | - | |
17875 | - pgd = pgd_offset_k(address); | |
17876 | - BUG_ON(pgd_none(*pgd)); | |
17877 | - pud = pud_offset(pgd,address); | |
17878 | - BUG_ON(pud_none(*pud)); | |
17879 | - pmd = pmd_offset(pud, address); | |
17880 | - BUG_ON(__pmd_val(*pmd) & _PAGE_PSE); | |
17881 | - pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT; | |
17882 | - large_pte = pfn_pte(pfn, ref_prot); | |
17883 | - large_pte = pte_mkhuge(large_pte); | |
17884 | - set_pte((pte_t *)pmd, large_pte); | |
17885 | -} | |
17886 | - | |
17887 | -static int | |
17888 | -__change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, | |
17889 | - pgprot_t ref_prot) | |
17890 | -{ | |
17891 | - pte_t *kpte; | |
17892 | - struct page *kpte_page; | |
17893 | - pgprot_t ref_prot2; | |
17894 | - | |
17895 | - kpte = lookup_address(address); | |
17896 | - if (!kpte) return 0; | |
17897 | - kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK); | |
17898 | - BUG_ON(PageLRU(kpte_page)); | |
17899 | - BUG_ON(PageCompound(kpte_page)); | |
17900 | - if (pgprot_val(prot) != pgprot_val(ref_prot)) { | |
17901 | - if (!pte_huge(*kpte)) { | |
17902 | - set_pte(kpte, pfn_pte(pfn, prot)); | |
17903 | - } else { | |
17904 | - /* | |
17905 | - * split_large_page will take the reference for this | |
17906 | - * change_page_attr on the split page. | |
17907 | - */ | |
17908 | - struct page *split; | |
17909 | - ref_prot2 = pte_pgprot(pte_clrhuge(*kpte)); | |
17910 | - split = split_large_page(address, prot, ref_prot2); | |
17911 | - if (!split) | |
17912 | - return -ENOMEM; | |
17913 | - pgprot_val(ref_prot2) &= ~_PAGE_NX; | |
17914 | - set_pte(kpte, mk_pte(split, ref_prot2)); | |
17915 | - kpte_page = split; | |
17916 | - } | |
17917 | - page_private(kpte_page)++; | |
17918 | - } else if (!pte_huge(*kpte)) { | |
17919 | - set_pte(kpte, pfn_pte(pfn, ref_prot)); | |
17920 | - BUG_ON(page_private(kpte_page) == 0); | |
17921 | - page_private(kpte_page)--; | |
17922 | - } else | |
17923 | - BUG(); | |
17924 | - | |
17925 | - /* on x86-64 the direct mapping set at boot is not using 4k pages */ | |
17926 | - /* | |
17927 | - * ..., but the XEN guest kernels (currently) do: | |
17928 | - * If the pte was reserved, it means it was created at boot | |
17929 | - * time (not via split_large_page) and in turn we must not | |
17930 | - * replace it with a large page. | |
17931 | - */ | |
17932 | -#ifndef CONFIG_XEN | |
17933 | - BUG_ON(PageReserved(kpte_page)); | |
17934 | -#else | |
17935 | - if (PageReserved(kpte_page)) | |
17936 | - return 0; | |
17937 | -#endif | |
17938 | - | |
17939 | - save_page(kpte_page); | |
17940 | - if (page_private(kpte_page) == 0) | |
17941 | - revert_page(address, ref_prot); | |
17942 | - return 0; | |
17943 | -} | |
17944 | - | |
17945 | -/* | |
17946 | - * Change the page attributes of an page in the linear mapping. | |
17947 | - * | |
17948 | - * This should be used when a page is mapped with a different caching policy | |
17949 | - * than write-back somewhere - some CPUs do not like it when mappings with | |
17950 | - * different caching policies exist. This changes the page attributes of the | |
17951 | - * in kernel linear mapping too. | |
17952 | - * | |
17953 | - * The caller needs to ensure that there are no conflicting mappings elsewhere. | |
17954 | - * This function only deals with the kernel linear map. | |
17955 | - * | |
17956 | - * Caller must call global_flush_tlb() after this. | |
17957 | - */ | |
17958 | -int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) | |
17959 | -{ | |
17960 | - int err = 0, kernel_map = 0; | |
17961 | - int i; | |
17962 | - | |
17963 | - if (address >= __START_KERNEL_map | |
17964 | - && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) { | |
17965 | - address = (unsigned long)__va(__pa(address)); | |
17966 | - kernel_map = 1; | |
17967 | - } | |
17968 | - | |
17969 | - down_write(&init_mm.mmap_sem); | |
17970 | - for (i = 0; i < numpages; i++, address += PAGE_SIZE) { | |
17971 | - unsigned long pfn = __pa(address) >> PAGE_SHIFT; | |
17972 | - | |
17973 | - if (!kernel_map || pte_present(pfn_pte(0, prot))) { | |
17974 | - err = __change_page_attr(address, pfn, prot, PAGE_KERNEL); | |
17975 | - if (err) | |
17976 | - break; | |
17977 | - } | |
17978 | - /* Handle kernel mapping too which aliases part of the | |
17979 | - * lowmem */ | |
17980 | - if (__pa(address) < KERNEL_TEXT_SIZE) { | |
17981 | - unsigned long addr2; | |
17982 | - pgprot_t prot2; | |
17983 | - addr2 = __START_KERNEL_map + __pa(address); | |
17984 | - /* Make sure the kernel mappings stay executable */ | |
17985 | - prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot))); | |
17986 | - err = __change_page_attr(addr2, pfn, prot2, | |
17987 | - PAGE_KERNEL_EXEC); | |
17988 | - } | |
17989 | - } | |
17990 | - up_write(&init_mm.mmap_sem); | |
17991 | - return err; | |
17992 | -} | |
17993 | - | |
17994 | -/* Don't call this for MMIO areas that may not have a mem_map entry */ | |
17995 | -int change_page_attr(struct page *page, int numpages, pgprot_t prot) | |
17996 | -{ | |
17997 | - unsigned long addr = (unsigned long)page_address(page); | |
17998 | - return change_page_attr_addr(addr, numpages, prot); | |
17999 | -} | |
18000 | - | |
18001 | -void global_flush_tlb(void) | |
18002 | -{ | |
18003 | - struct page *pg, *next; | |
18004 | - struct list_head l; | |
18005 | - | |
18006 | - /* | |
18007 | - * Write-protect the semaphore, to exclude two contexts | |
18008 | - * doing a list_replace_init() call in parallel and to | |
18009 | - * exclude new additions to the deferred_pages list: | |
18010 | - */ | |
18011 | - down_write(&init_mm.mmap_sem); | |
18012 | - list_replace_init(&deferred_pages, &l); | |
18013 | - up_write(&init_mm.mmap_sem); | |
18014 | - | |
18015 | - flush_map(&l); | |
18016 | - | |
18017 | - list_for_each_entry_safe(pg, next, &l, lru) { | |
18018 | - list_del(&pg->lru); | |
18019 | - clear_bit(PG_arch_1, &pg->flags); | |
18020 | - if (page_private(pg) != 0) | |
18021 | - continue; | |
18022 | - ClearPagePrivate(pg); | |
18023 | - __free_page(pg); | |
18024 | - } | |
18025 | -} | |
18026 | - | |
18027 | -EXPORT_SYMBOL(change_page_attr); | |
18028 | -EXPORT_SYMBOL(global_flush_tlb); | |
18029 | --- sle11-2009-05-14.orig/arch/x86/mm/pgtable_32-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
18030 | +++ sle11-2009-05-14/arch/x86/mm/pgtable_32-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
18031 | @@ -29,8 +29,6 @@ | |
18032 | #include <xen/features.h> | |
18033 | #include <asm/hypervisor.h> | |
18034 | ||
18035 | -static void pgd_test_and_unpin(pgd_t *pgd); | |
18036 | - | |
18037 | void show_mem(void) | |
18038 | { | |
18039 | int total = 0, reserved = 0; | |
18040 | @@ -167,53 +165,6 @@ pte_t *pte_alloc_one_kernel(struct mm_st | |
18041 | return pte; | |
18042 | } | |
cc90b958 | 18043 | |
00e5a55c | 18044 | -static void _pte_free(struct page *page, unsigned int order) |
cc90b958 | 18045 | -{ |
00e5a55c BS |
18046 | - BUG_ON(order); |
18047 | - pte_free(page); | |
cc90b958 | 18048 | -} |
00e5a55c BS |
18049 | - |
18050 | -struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) | |
18051 | -{ | |
18052 | - struct page *pte; | |
18053 | - | |
18054 | -#ifdef CONFIG_HIGHPTE | |
18055 | - pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); | |
cc90b958 | 18056 | -#else |
00e5a55c | 18057 | - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); |
cc90b958 | 18058 | -#endif |
00e5a55c BS |
18059 | - if (pte) { |
18060 | - SetPageForeign(pte, _pte_free); | |
18061 | - init_page_count(pte); | |
18062 | - } | |
18063 | - return pte; | |
18064 | -} | |
18065 | - | |
18066 | -void pte_free(struct page *pte) | |
18067 | -{ | |
18068 | - unsigned long pfn = page_to_pfn(pte); | |
18069 | - | |
18070 | - if (!PageHighMem(pte)) { | |
18071 | - unsigned long va = (unsigned long)__va(pfn << PAGE_SHIFT); | |
18072 | - | |
18073 | - if (!pte_write(*virt_to_ptep(va))) | |
18074 | - if (HYPERVISOR_update_va_mapping( | |
18075 | - va, pfn_pte(pfn, PAGE_KERNEL), 0)) | |
18076 | - BUG(); | |
18077 | - } else | |
18078 | - ClearPagePinned(pte); | |
18079 | - | |
18080 | - ClearPageForeign(pte); | |
18081 | - init_page_count(pte); | |
18082 | - | |
18083 | - __free_page(pte); | |
18084 | -} | |
18085 | - | |
18086 | -void pmd_ctor(struct kmem_cache *cache, void *pmd) | |
18087 | -{ | |
18088 | - memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); | |
18089 | -} | |
18090 | - | |
cc90b958 | 18091 | /* |
00e5a55c BS |
18092 | * List of all pgd's needed for non-PAE so it can invalidate entries |
18093 | * in both cached and uncached pgd's; not needed for PAE since the | |
18094 | @@ -224,224 +175,191 @@ void pmd_ctor(struct kmem_cache *cache, | |
18095 | * vmalloc faults work because attached pagetables are never freed. | |
18096 | * -- wli | |
cc90b958 | 18097 | */ |
00e5a55c BS |
18098 | -DEFINE_SPINLOCK(pgd_lock); |
18099 | -struct page *pgd_list; | |
18100 | - | |
18101 | static inline void pgd_list_add(pgd_t *pgd) | |
18102 | { | |
18103 | struct page *page = virt_to_page(pgd); | |
18104 | - page->index = (unsigned long)pgd_list; | |
18105 | - if (pgd_list) | |
18106 | - set_page_private(pgd_list, (unsigned long)&page->index); | |
18107 | - pgd_list = page; | |
18108 | - set_page_private(page, (unsigned long)&pgd_list); | |
18109 | + | |
18110 | + list_add(&page->lru, &pgd_list); | |
18111 | } | |
cc90b958 | 18112 | |
00e5a55c BS |
18113 | static inline void pgd_list_del(pgd_t *pgd) |
18114 | { | |
18115 | - struct page *next, **pprev, *page = virt_to_page(pgd); | |
18116 | - next = (struct page *)page->index; | |
18117 | - pprev = (struct page **)page_private(page); | |
18118 | - *pprev = next; | |
18119 | - if (next) | |
18120 | - set_page_private(next, (unsigned long)pprev); | |
18121 | -} | |
18122 | + struct page *page = virt_to_page(pgd); | |
cc90b958 | 18123 | |
00e5a55c BS |
18124 | + list_del(&page->lru); |
18125 | +} | |
cc90b958 | 18126 | |
00e5a55c BS |
18127 | +#define UNSHARED_PTRS_PER_PGD \ |
18128 | + (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) | |
18129 | ||
18130 | -#if (PTRS_PER_PMD == 1) | |
18131 | -/* Non-PAE pgd constructor */ | |
18132 | -static void pgd_ctor(void *pgd) | |
18133 | +static void pgd_ctor(void *p) | |
18134 | { | |
18135 | + pgd_t *pgd = p; | |
18136 | unsigned long flags; | |
18137 | ||
18138 | - /* !PAE, no pagetable sharing */ | |
18139 | + pgd_test_and_unpin(pgd); | |
cc90b958 | 18140 | + |
00e5a55c BS |
18141 | + /* Clear usermode parts of PGD */ |
18142 | memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); | |
18143 | ||
18144 | spin_lock_irqsave(&pgd_lock, flags); | |
18145 | ||
18146 | - /* must happen under lock */ | |
18147 | - clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, | |
18148 | - swapper_pg_dir + USER_PTRS_PER_PGD, | |
18149 | - KERNEL_PGD_PTRS); | |
18150 | - | |
18151 | - paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, | |
18152 | - __pa(swapper_pg_dir) >> PAGE_SHIFT, | |
18153 | - USER_PTRS_PER_PGD, | |
18154 | - KERNEL_PGD_PTRS); | |
18155 | - pgd_list_add(pgd); | |
18156 | - spin_unlock_irqrestore(&pgd_lock, flags); | |
18157 | -} | |
18158 | -#else /* PTRS_PER_PMD > 1 */ | |
18159 | -/* PAE pgd constructor */ | |
18160 | -static void pgd_ctor(void *pgd) | |
18161 | -{ | |
18162 | - /* PAE, kernel PMD may be shared */ | |
18163 | - | |
18164 | - if (SHARED_KERNEL_PMD) { | |
18165 | - clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, | |
18166 | + /* If the pgd points to a shared pagetable level (either the | |
18167 | + ptes in non-PAE, or shared PMD in PAE), then just copy the | |
18168 | + references from swapper_pg_dir. */ | |
18169 | + if (PAGETABLE_LEVELS == 2 || | |
18170 | + (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) { | |
18171 | + clone_pgd_range(pgd + USER_PTRS_PER_PGD, | |
18172 | swapper_pg_dir + USER_PTRS_PER_PGD, | |
18173 | KERNEL_PGD_PTRS); | |
18174 | - } else { | |
18175 | - memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); | |
18176 | + paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, | |
18177 | + __pa(swapper_pg_dir) >> PAGE_SHIFT, | |
18178 | + USER_PTRS_PER_PGD, | |
18179 | + KERNEL_PGD_PTRS); | |
18180 | } | |
cc90b958 | 18181 | + |
00e5a55c BS |
18182 | + /* list required to sync kernel mapping updates */ |
18183 | + if (PAGETABLE_LEVELS == 2) | |
18184 | + pgd_list_add(pgd); | |
cc90b958 | 18185 | + |
00e5a55c BS |
18186 | + spin_unlock_irqrestore(&pgd_lock, flags); |
18187 | } | |
18188 | -#endif /* PTRS_PER_PMD */ | |
cc90b958 | 18189 | |
00e5a55c BS |
18190 | static void pgd_dtor(void *pgd) |
18191 | { | |
18192 | unsigned long flags; /* can be called from interrupt context */ | |
cc90b958 | 18193 | |
00e5a55c BS |
18194 | - if (SHARED_KERNEL_PMD) |
18195 | - return; | |
cc90b958 | 18196 | - |
00e5a55c BS |
18197 | - paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); |
18198 | - spin_lock_irqsave(&pgd_lock, flags); | |
18199 | - pgd_list_del(pgd); | |
18200 | - spin_unlock_irqrestore(&pgd_lock, flags); | |
18201 | + if (!SHARED_KERNEL_PMD) { | |
18202 | + spin_lock_irqsave(&pgd_lock, flags); | |
18203 | + pgd_list_del(pgd); | |
18204 | + spin_unlock_irqrestore(&pgd_lock, flags); | |
18205 | + } | |
cc90b958 | 18206 | |
00e5a55c BS |
18207 | pgd_test_and_unpin(pgd); |
18208 | } | |
18209 | ||
18210 | -#define UNSHARED_PTRS_PER_PGD \ | |
18211 | - (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) | |
18212 | - | |
18213 | -/* If we allocate a pmd for part of the kernel address space, then | |
18214 | - make sure its initialized with the appropriate kernel mappings. | |
18215 | - Otherwise use a cached zeroed pmd. */ | |
18216 | -static pmd_t *pmd_cache_alloc(int idx) | |
18217 | +#ifdef CONFIG_X86_PAE | |
cc90b958 | 18218 | +/* |
00e5a55c BS |
18219 | + * Mop up any pmd pages which may still be attached to the pgd. |
18220 | + * Normally they will be freed by munmap/exit_mmap, but any pmd we | |
18221 | + * preallocate which never got a corresponding vma will need to be | |
18222 | + * freed manually. | |
cc90b958 | 18223 | + */ |
00e5a55c | 18224 | +static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) |
cc90b958 | 18225 | { |
00e5a55c BS |
18226 | - pmd_t *pmd; |
18227 | + int i; | |
18228 | ||
18229 | - if (idx >= USER_PTRS_PER_PGD) { | |
18230 | - pmd = (pmd_t *)__get_free_page(GFP_KERNEL); | |
18231 | + for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) { | |
18232 | + pgd_t pgd = pgdp[i]; | |
18233 | ||
18234 | -#ifndef CONFIG_XEN | |
18235 | - if (pmd) | |
18236 | - memcpy(pmd, | |
18237 | - (void *)pgd_page_vaddr(swapper_pg_dir[idx]), | |
18238 | - sizeof(pmd_t) * PTRS_PER_PMD); | |
18239 | -#endif | |
18240 | - } else | |
18241 | - pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); | |
18242 | + if (__pgd_val(pgd) != 0) { | |
18243 | + pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); | |
18244 | ||
18245 | - return pmd; | |
18246 | -} | |
18247 | + pgdp[i] = xen_make_pgd(0); | |
18248 | ||
18249 | -static void pmd_cache_free(pmd_t *pmd, int idx) | |
18250 | -{ | |
18251 | - if (idx >= USER_PTRS_PER_PGD) { | |
18252 | - make_lowmem_page_writable(pmd, XENFEAT_writable_page_tables); | |
18253 | - memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); | |
18254 | - free_page((unsigned long)pmd); | |
18255 | - } else | |
18256 | - kmem_cache_free(pmd_cache, pmd); | |
18257 | + paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT); | |
18258 | + pmd_free(mm, pmd); | |
18259 | + } | |
18260 | + } | |
cc90b958 BS |
18261 | } |
18262 | ||
00e5a55c BS |
18263 | -pgd_t *pgd_alloc(struct mm_struct *mm) |
18264 | +/* | |
18265 | + * In PAE mode, we need to do a cr3 reload (=tlb flush) when | |
18266 | + * updating the top-level pagetable entries to guarantee the | |
18267 | + * processor notices the update. Since this is expensive, and | |
18268 | + * all 4 top-level entries are used almost immediately in a | |
18269 | + * new process's life, we just pre-populate them here. | |
18270 | + * | |
18271 | + * Also, if we're in a paravirt environment where the kernel pmd is | |
18272 | + * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate | |
18273 | + * and initialize the kernel pmds here. | |
18274 | + */ | |
18275 | +static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) | |
18276 | { | |
18277 | + pud_t *pud; | |
18278 | + pmd_t *pmds[UNSHARED_PTRS_PER_PGD]; | |
18279 | + unsigned long addr, flags; | |
18280 | int i; | |
18281 | - pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor); | |
18282 | - pmd_t **pmds = NULL; | |
18283 | - unsigned long flags; | |
18284 | - | |
18285 | - pgd_test_and_unpin(pgd); | |
18286 | - | |
18287 | - if (PTRS_PER_PMD == 1 || !pgd) | |
18288 | - return pgd; | |
18289 | - | |
18290 | -#ifdef CONFIG_XEN | |
18291 | - if (!SHARED_KERNEL_PMD) { | |
18292 | - /* | |
18293 | - * We can race save/restore (if we sleep during a GFP_KERNEL memory | |
18294 | - * allocation). We therefore store virtual addresses of pmds as they | |
18295 | - * do not change across save/restore, and poke the machine addresses | |
18296 | - * into the pgdir under the pgd_lock. | |
18297 | - */ | |
18298 | - pmds = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL); | |
18299 | - if (!pmds) { | |
18300 | - quicklist_free(0, pgd_dtor, pgd); | |
18301 | - return NULL; | |
18302 | - } | |
18303 | - } | |
18304 | -#endif | |
18305 | ||
18306 | - /* Allocate pmds, remember virtual addresses. */ | |
18307 | - for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { | |
18308 | - pmd_t *pmd = pmd_cache_alloc(i); | |
18309 | - | |
18310 | - if (!pmd) | |
18311 | + /* | |
18312 | + * We can race save/restore (if we sleep during a GFP_KERNEL memory | |
18313 | + * allocation). We therefore store virtual addresses of pmds as they | |
18314 | + * do not change across save/restore, and poke the machine addresses | |
18315 | + * into the pgdir under the pgd_lock. | |
18316 | + */ | |
18317 | + for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; i++, addr += PUD_SIZE) { | |
18318 | + pmds[i] = pmd_alloc_one(mm, addr); | |
18319 | + if (!pmds[i]) | |
18320 | goto out_oom; | |
18321 | - | |
18322 | - paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); | |
18323 | - if (pmds) | |
18324 | - pmds[i] = pmd; | |
18325 | - else | |
18326 | - set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); | |
18327 | } | |
cc90b958 | 18328 | |
00e5a55c BS |
18329 | -#ifdef CONFIG_XEN |
18330 | - if (SHARED_KERNEL_PMD) | |
18331 | - return pgd; | |
18332 | - | |
18333 | spin_lock_irqsave(&pgd_lock, flags); | |
cc90b958 | 18334 | |
00e5a55c BS |
18335 | /* Protect against save/restore: move below 4GB under pgd_lock. */ |
18336 | - if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) { | |
18337 | - int rc = xen_create_contiguous_region( | |
18338 | - (unsigned long)pgd, 0, 32); | |
18339 | - if (rc) { | |
18340 | - spin_unlock_irqrestore(&pgd_lock, flags); | |
18341 | - goto out_oom; | |
18342 | - } | |
18343 | + if (!xen_feature(XENFEAT_pae_pgdir_above_4gb) | |
18344 | + && xen_create_contiguous_region((unsigned long)pgd, 0, 32)) { | |
18345 | + spin_unlock_irqrestore(&pgd_lock, flags); | |
18346 | +out_oom: | |
18347 | + while (i--) | |
18348 | + pmd_free(mm, pmds[i]); | |
18349 | + return 0; | |
18350 | } | |
cc90b958 | 18351 | |
00e5a55c BS |
18352 | /* Copy kernel pmd contents and write-protect the new pmds. */ |
18353 | - for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { | |
18354 | - memcpy(pmds[i], | |
18355 | - (void *)pgd_page_vaddr(swapper_pg_dir[i]), | |
18356 | - sizeof(pmd_t) * PTRS_PER_PMD); | |
18357 | - make_lowmem_page_readonly( | |
18358 | - pmds[i], XENFEAT_writable_page_tables); | |
18359 | - } | |
18360 | + pud = pud_offset(pgd, 0); | |
18361 | + for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; | |
18362 | + i++, pud++, addr += PUD_SIZE) { | |
18363 | + if (i >= USER_PTRS_PER_PGD) { | |
18364 | + memcpy(pmds[i], | |
18365 | + (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), | |
18366 | + sizeof(pmd_t) * PTRS_PER_PMD); | |
18367 | + make_lowmem_page_readonly( | |
18368 | + pmds[i], XENFEAT_writable_page_tables); | |
18369 | + } | |
cc90b958 | 18370 | |
00e5a55c BS |
18371 | - /* It is safe to poke machine addresses of pmds under the pmd_lock. */ |
18372 | - for (i = 0; i < PTRS_PER_PGD; i++) | |
18373 | - set_pgd(&pgd[i], __pgd(1 + __pa(pmds[i]))); | |
18374 | + /* It is safe to poke machine addresses of pmds under the pgd_lock. */ | |
18375 | + pud_populate(mm, pud, pmds[i]); | |
18376 | + } | |
18377 | ||
18378 | - /* Ensure this pgd gets picked up and pinned on save/restore. */ | |
18379 | + /* List required to sync kernel mapping updates and | |
18380 | + * to pin/unpin on save/restore. */ | |
18381 | pgd_list_add(pgd); | |
18382 | ||
18383 | spin_unlock_irqrestore(&pgd_lock, flags); | |
18384 | ||
18385 | - kfree(pmds); | |
18386 | -#endif | |
18387 | + return 1; | |
18388 | +} | |
18389 | +#else /* !CONFIG_X86_PAE */ | |
18390 | +/* No need to prepopulate any pagetable entries in non-PAE modes. */ | |
18391 | +static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) | |
18392 | +{ | |
18393 | + return 1; | |
18394 | +} | |
18395 | ||
18396 | - return pgd; | |
18397 | +static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) | |
18398 | +{ | |
18399 | +} | |
18400 | +#endif /* CONFIG_X86_PAE */ | |
18401 | ||
18402 | -out_oom: | |
18403 | - if (!pmds) { | |
18404 | - for (i--; i >= 0; i--) { | |
18405 | - pgd_t pgdent = pgd[i]; | |
18406 | - void* pmd = (void *)__va(pgd_val(pgdent)-1); | |
18407 | - paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | |
18408 | - pmd_cache_free(pmd, i); | |
18409 | - } | |
18410 | - } else { | |
18411 | - for (i--; i >= 0; i--) { | |
18412 | - paravirt_release_pd(__pa(pmds[i]) >> PAGE_SHIFT); | |
18413 | - pmd_cache_free(pmds[i], i); | |
18414 | - } | |
18415 | - kfree(pmds); | |
18416 | +pgd_t *pgd_alloc(struct mm_struct *mm) | |
18417 | +{ | |
18418 | + pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); | |
cc90b958 | 18419 | + |
00e5a55c BS |
18420 | + /* so that alloc_pd can use it */ |
18421 | + mm->pgd = pgd; | |
18422 | + if (pgd) | |
18423 | + pgd_ctor(pgd); | |
18424 | + | |
18425 | + if (pgd && !pgd_prepopulate_pmd(mm, pgd)) { | |
18426 | + free_page((unsigned long)pgd); | |
18427 | + pgd = NULL; | |
18428 | } | |
18429 | - quicklist_free(0, pgd_dtor, pgd); | |
18430 | - return NULL; | |
18431 | + | |
18432 | + return pgd; | |
cc90b958 BS |
18433 | } |
18434 | ||
00e5a55c BS |
18435 | -void pgd_free(pgd_t *pgd) |
18436 | +void pgd_free(struct mm_struct *mm, pgd_t *pgd) | |
18437 | { | |
18438 | - int i; | |
cc90b958 | 18439 | - |
00e5a55c BS |
18440 | /* |
18441 | * After this the pgd should not be pinned for the duration of this | |
18442 | * function's execution. We should never sleep and thus never race: | |
18443 | @@ -450,39 +368,43 @@ void pgd_free(pgd_t *pgd) | |
18444 | * 2. The machine addresses in PGD entries will not become invalid | |
18445 | * due to a concurrent save/restore. | |
18446 | */ | |
18447 | - pgd_test_and_unpin(pgd); | |
18448 | + pgd_dtor(pgd); | |
cc90b958 | 18449 | |
00e5a55c BS |
18450 | - /* in the PAE case user pgd entries are overwritten before usage */ |
18451 | - if (PTRS_PER_PMD > 1) { | |
18452 | - for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { | |
18453 | - pgd_t pgdent = pgd[i]; | |
18454 | - void* pmd = (void *)__va(pgd_val(pgdent)-1); | |
18455 | - paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | |
18456 | - pmd_cache_free(pmd, i); | |
18457 | - } | |
18458 | + if (PTRS_PER_PMD > 1 && !xen_feature(XENFEAT_pae_pgdir_above_4gb)) | |
18459 | + xen_destroy_contiguous_region((unsigned long)pgd, 0); | |
cc90b958 | 18460 | |
00e5a55c BS |
18461 | - if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) |
18462 | - xen_destroy_contiguous_region((unsigned long)pgd, 0); | |
18463 | - } | |
18464 | + pgd_mop_up_pmds(mm, pgd); | |
18465 | + free_page((unsigned long)pgd); | |
18466 | +} | |
cc90b958 | 18467 | |
00e5a55c BS |
18468 | - /* in the non-PAE case, free_pgtables() clears user pgd entries */ |
18469 | - quicklist_free(0, pgd_dtor, pgd); | |
18470 | +void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) | |
cc90b958 | 18471 | +{ |
00e5a55c BS |
18472 | + pgtable_page_dtor(pte); |
18473 | + paravirt_release_pt(page_to_pfn(pte)); | |
18474 | + tlb_remove_page(tlb, pte); | |
18475 | } | |
cc90b958 | 18476 | |
00e5a55c BS |
18477 | -void check_pgt_cache(void) |
18478 | +#ifdef CONFIG_X86_PAE | |
cc90b958 | 18479 | + |
00e5a55c BS |
18480 | +void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) |
18481 | { | |
18482 | - quicklist_trim(0, pgd_dtor, 25, 16); | |
18483 | + paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | |
18484 | + tlb_remove_page(tlb, virt_to_page(pmd)); | |
18485 | } | |
18486 | ||
cc90b958 | 18487 | +#endif |
00e5a55c BS |
18488 | + |
18489 | void make_lowmem_page_readonly(void *va, unsigned int feature) | |
18490 | { | |
18491 | pte_t *pte; | |
18492 | + unsigned int level; | |
18493 | int rc; | |
18494 | ||
18495 | if (xen_feature(feature)) | |
18496 | return; | |
cc90b958 | 18497 | |
00e5a55c BS |
18498 | - pte = virt_to_ptep(va); |
18499 | + pte = lookup_address((unsigned long)va, &level); | |
18500 | + BUG_ON(!pte || level != PG_LEVEL_4K || !pte_present(*pte)); | |
18501 | rc = HYPERVISOR_update_va_mapping( | |
18502 | (unsigned long)va, pte_wrprotect(*pte), 0); | |
18503 | BUG_ON(rc); | |
18504 | @@ -491,313 +413,15 @@ void make_lowmem_page_readonly(void *va, | |
18505 | void make_lowmem_page_writable(void *va, unsigned int feature) | |
18506 | { | |
18507 | pte_t *pte; | |
18508 | + unsigned int level; | |
18509 | int rc; | |
cc90b958 | 18510 | |
00e5a55c BS |
18511 | if (xen_feature(feature)) |
18512 | return; | |
cc90b958 | 18513 | |
00e5a55c BS |
18514 | - pte = virt_to_ptep(va); |
18515 | + pte = lookup_address((unsigned long)va, &level); | |
18516 | + BUG_ON(!pte || level != PG_LEVEL_4K || !pte_present(*pte)); | |
18517 | rc = HYPERVISOR_update_va_mapping( | |
18518 | (unsigned long)va, pte_mkwrite(*pte), 0); | |
18519 | BUG_ON(rc); | |
18520 | } | |
cc90b958 | 18521 | - |
00e5a55c | 18522 | -void make_page_readonly(void *va, unsigned int feature) |
cc90b958 | 18523 | -{ |
00e5a55c BS |
18524 | - pte_t *pte; |
18525 | - int rc; | |
18526 | - | |
18527 | - if (xen_feature(feature)) | |
18528 | - return; | |
18529 | - | |
18530 | - pte = virt_to_ptep(va); | |
18531 | - rc = HYPERVISOR_update_va_mapping( | |
18532 | - (unsigned long)va, pte_wrprotect(*pte), 0); | |
18533 | - if (rc) /* fallback? */ | |
18534 | - xen_l1_entry_update(pte, pte_wrprotect(*pte)); | |
18535 | - if ((unsigned long)va >= (unsigned long)high_memory) { | |
18536 | - unsigned long pfn = pte_pfn(*pte); | |
18537 | -#ifdef CONFIG_HIGHMEM | |
18538 | - if (pfn >= highstart_pfn) | |
18539 | - kmap_flush_unused(); /* flush stale writable kmaps */ | |
18540 | - else | |
18541 | -#endif | |
18542 | - make_lowmem_page_readonly( | |
18543 | - phys_to_virt(pfn << PAGE_SHIFT), feature); | |
18544 | - } | |
cc90b958 BS |
18545 | -} |
18546 | - | |
00e5a55c BS |
18547 | -void make_page_writable(void *va, unsigned int feature) |
18548 | -{ | |
18549 | - pte_t *pte; | |
18550 | - int rc; | |
cc90b958 | 18551 | - |
00e5a55c BS |
18552 | - if (xen_feature(feature)) |
18553 | - return; | |
cc90b958 | 18554 | - |
00e5a55c BS |
18555 | - pte = virt_to_ptep(va); |
18556 | - rc = HYPERVISOR_update_va_mapping( | |
18557 | - (unsigned long)va, pte_mkwrite(*pte), 0); | |
18558 | - if (rc) /* fallback? */ | |
18559 | - xen_l1_entry_update(pte, pte_mkwrite(*pte)); | |
18560 | - if ((unsigned long)va >= (unsigned long)high_memory) { | |
18561 | - unsigned long pfn = pte_pfn(*pte); | |
18562 | -#ifdef CONFIG_HIGHMEM | |
18563 | - if (pfn < highstart_pfn) | |
18564 | -#endif | |
18565 | - make_lowmem_page_writable( | |
18566 | - phys_to_virt(pfn << PAGE_SHIFT), feature); | |
18567 | - } | |
18568 | -} | |
18569 | - | |
18570 | -void make_pages_readonly(void *va, unsigned int nr, unsigned int feature) | |
18571 | -{ | |
18572 | - if (xen_feature(feature)) | |
18573 | - return; | |
18574 | - | |
18575 | - while (nr-- != 0) { | |
18576 | - make_page_readonly(va, feature); | |
18577 | - va = (void *)((unsigned long)va + PAGE_SIZE); | |
18578 | - } | |
18579 | -} | |
18580 | - | |
18581 | -void make_pages_writable(void *va, unsigned int nr, unsigned int feature) | |
18582 | -{ | |
18583 | - if (xen_feature(feature)) | |
18584 | - return; | |
18585 | - | |
18586 | - while (nr-- != 0) { | |
18587 | - make_page_writable(va, feature); | |
18588 | - va = (void *)((unsigned long)va + PAGE_SIZE); | |
18589 | - } | |
18590 | -} | |
18591 | - | |
18592 | -static void _pin_lock(struct mm_struct *mm, int lock) { | |
18593 | - if (lock) | |
18594 | - spin_lock(&mm->page_table_lock); | |
18595 | -#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS | |
18596 | - /* While mm->page_table_lock protects us against insertions and | |
18597 | - * removals of higher level page table pages, it doesn't protect | |
18598 | - * against updates of pte-s. Such updates, however, require the | |
18599 | - * pte pages to be in consistent state (unpinned+writable or | |
18600 | - * pinned+readonly). The pinning and attribute changes, however | |
18601 | - * cannot be done atomically, which is why such updates must be | |
18602 | - * prevented from happening concurrently. | |
18603 | - * Note that no pte lock can ever elsewhere be acquired nesting | |
18604 | - * with an already acquired one in the same mm, or with the mm's | |
18605 | - * page_table_lock already acquired, as that would break in the | |
18606 | - * non-split case (where all these are actually resolving to the | |
18607 | - * one page_table_lock). Thus acquiring all of them here is not | |
18608 | - * going to result in dead locks, and the order of acquires | |
18609 | - * doesn't matter. | |
18610 | - */ | |
18611 | - { | |
18612 | - pgd_t *pgd = mm->pgd; | |
18613 | - unsigned g; | |
18614 | - | |
18615 | - for (g = 0; g < USER_PTRS_PER_PGD; g++, pgd++) { | |
18616 | - pud_t *pud; | |
18617 | - unsigned u; | |
18618 | - | |
18619 | - if (pgd_none(*pgd)) | |
18620 | - continue; | |
18621 | - pud = pud_offset(pgd, 0); | |
18622 | - for (u = 0; u < PTRS_PER_PUD; u++, pud++) { | |
18623 | - pmd_t *pmd; | |
18624 | - unsigned m; | |
18625 | - | |
18626 | - if (pud_none(*pud)) | |
18627 | - continue; | |
18628 | - pmd = pmd_offset(pud, 0); | |
18629 | - for (m = 0; m < PTRS_PER_PMD; m++, pmd++) { | |
18630 | - spinlock_t *ptl; | |
18631 | - | |
18632 | - if (pmd_none(*pmd)) | |
18633 | - continue; | |
18634 | - ptl = pte_lockptr(0, pmd); | |
18635 | - if (lock) | |
18636 | - spin_lock(ptl); | |
18637 | - else | |
18638 | - spin_unlock(ptl); | |
18639 | - } | |
18640 | - } | |
18641 | - } | |
18642 | - } | |
18643 | -#endif | |
18644 | - if (!lock) | |
18645 | - spin_unlock(&mm->page_table_lock); | |
18646 | -} | |
18647 | -#define pin_lock(mm) _pin_lock(mm, 1) | |
18648 | -#define pin_unlock(mm) _pin_lock(mm, 0) | |
cc90b958 | 18649 | - |
00e5a55c BS |
18650 | -#define PIN_BATCH 4 |
18651 | -static DEFINE_PER_CPU(multicall_entry_t[PIN_BATCH], pb_mcl); | |
cc90b958 | 18652 | - |
00e5a55c BS |
18653 | -static inline unsigned int pgd_walk_set_prot(struct page *page, pgprot_t flags, |
18654 | - unsigned int cpu, unsigned seq) | |
18655 | -{ | |
18656 | - unsigned long pfn = page_to_pfn(page); | |
cc90b958 | 18657 | - |
00e5a55c BS |
18658 | - if (PageHighMem(page)) { |
18659 | - if (pgprot_val(flags) & _PAGE_RW) | |
18660 | - ClearPagePinned(page); | |
18661 | - else | |
18662 | - SetPagePinned(page); | |
18663 | - } else { | |
18664 | - MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq, | |
18665 | - (unsigned long)__va(pfn << PAGE_SHIFT), | |
18666 | - pfn_pte(pfn, flags), 0); | |
18667 | - if (unlikely(++seq == PIN_BATCH)) { | |
18668 | - if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu), | |
18669 | - PIN_BATCH, NULL))) | |
18670 | - BUG(); | |
18671 | - seq = 0; | |
18672 | - } | |
18673 | - } | |
cc90b958 | 18674 | - |
00e5a55c BS |
18675 | - return seq; |
18676 | -} | |
cc90b958 | 18677 | - |
00e5a55c BS |
18678 | -static void pgd_walk(pgd_t *pgd_base, pgprot_t flags) |
18679 | -{ | |
18680 | - pgd_t *pgd = pgd_base; | |
18681 | - pud_t *pud; | |
18682 | - pmd_t *pmd; | |
18683 | - int g, u, m; | |
18684 | - unsigned int cpu, seq; | |
cc90b958 | 18685 | - |
00e5a55c BS |
18686 | - if (xen_feature(XENFEAT_auto_translated_physmap)) |
18687 | - return; | |
cc90b958 | 18688 | - |
00e5a55c | 18689 | - cpu = get_cpu(); |
cc90b958 | 18690 | - |
00e5a55c BS |
18691 | - for (g = 0, seq = 0; g < USER_PTRS_PER_PGD; g++, pgd++) { |
18692 | - if (pgd_none(*pgd)) | |
18693 | - continue; | |
18694 | - pud = pud_offset(pgd, 0); | |
18695 | - if (PTRS_PER_PUD > 1) /* not folded */ | |
18696 | - seq = pgd_walk_set_prot(virt_to_page(pud),flags,cpu,seq); | |
18697 | - for (u = 0; u < PTRS_PER_PUD; u++, pud++) { | |
18698 | - if (pud_none(*pud)) | |
18699 | - continue; | |
18700 | - pmd = pmd_offset(pud, 0); | |
18701 | - if (PTRS_PER_PMD > 1) /* not folded */ | |
18702 | - seq = pgd_walk_set_prot(virt_to_page(pmd),flags,cpu,seq); | |
18703 | - for (m = 0; m < PTRS_PER_PMD; m++, pmd++) { | |
18704 | - if (pmd_none(*pmd)) | |
18705 | - continue; | |
18706 | - seq = pgd_walk_set_prot(pmd_page(*pmd),flags,cpu,seq); | |
18707 | - } | |
18708 | - } | |
18709 | - } | |
cc90b958 | 18710 | - |
00e5a55c BS |
18711 | - if (likely(seq != 0)) { |
18712 | - MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq, | |
18713 | - (unsigned long)pgd_base, | |
18714 | - pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags), | |
18715 | - UVMF_TLB_FLUSH); | |
18716 | - if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu), | |
18717 | - seq + 1, NULL))) | |
18718 | - BUG(); | |
18719 | - } else if(HYPERVISOR_update_va_mapping((unsigned long)pgd_base, | |
18720 | - pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags), | |
18721 | - UVMF_TLB_FLUSH)) | |
18722 | - BUG(); | |
cc90b958 | 18723 | - |
00e5a55c | 18724 | - put_cpu(); |
cc90b958 BS |
18725 | -} |
18726 | - | |
00e5a55c | 18727 | -static void __pgd_pin(pgd_t *pgd) |
cc90b958 | 18728 | -{ |
00e5a55c BS |
18729 | - pgd_walk(pgd, PAGE_KERNEL_RO); |
18730 | - kmap_flush_unused(); | |
18731 | - xen_pgd_pin(__pa(pgd)); | |
18732 | - SetPagePinned(virt_to_page(pgd)); | |
cc90b958 BS |
18733 | -} |
18734 | - | |
00e5a55c BS |
18735 | -static void __pgd_unpin(pgd_t *pgd) |
18736 | -{ | |
18737 | - xen_pgd_unpin(__pa(pgd)); | |
18738 | - pgd_walk(pgd, PAGE_KERNEL); | |
18739 | - ClearPagePinned(virt_to_page(pgd)); | |
18740 | -} | |
cc90b958 | 18741 | - |
00e5a55c BS |
18742 | -static void pgd_test_and_unpin(pgd_t *pgd) |
18743 | -{ | |
18744 | - if (PagePinned(virt_to_page(pgd))) | |
18745 | - __pgd_unpin(pgd); | |
18746 | -} | |
cc90b958 | 18747 | - |
00e5a55c BS |
18748 | -void mm_pin(struct mm_struct *mm) |
18749 | -{ | |
18750 | - if (xen_feature(XENFEAT_writable_page_tables)) | |
18751 | - return; | |
18752 | - pin_lock(mm); | |
18753 | - __pgd_pin(mm->pgd); | |
18754 | - pin_unlock(mm); | |
18755 | -} | |
cc90b958 | 18756 | - |
00e5a55c BS |
18757 | -void mm_unpin(struct mm_struct *mm) |
18758 | -{ | |
18759 | - if (xen_feature(XENFEAT_writable_page_tables)) | |
18760 | - return; | |
18761 | - pin_lock(mm); | |
18762 | - __pgd_unpin(mm->pgd); | |
18763 | - pin_unlock(mm); | |
18764 | -} | |
cc90b958 | 18765 | - |
00e5a55c BS |
18766 | -void mm_pin_all(void) |
18767 | -{ | |
18768 | - struct page *page; | |
18769 | - unsigned long flags; | |
cc90b958 | 18770 | - |
00e5a55c BS |
18771 | - if (xen_feature(XENFEAT_writable_page_tables)) |
18772 | - return; | |
cc90b958 | 18773 | - |
00e5a55c BS |
18774 | - /* |
18775 | - * Allow uninterrupted access to the pgd_list. Also protects | |
18776 | - * __pgd_pin() by disabling preemption. | |
18777 | - * All other CPUs must be at a safe point (e.g., in stop_machine | |
18778 | - * or offlined entirely). | |
18779 | - */ | |
18780 | - spin_lock_irqsave(&pgd_lock, flags); | |
18781 | - for (page = pgd_list; page; page = (struct page *)page->index) { | |
18782 | - if (!PagePinned(page)) | |
18783 | - __pgd_pin((pgd_t *)page_address(page)); | |
18784 | - } | |
18785 | - spin_unlock_irqrestore(&pgd_lock, flags); | |
cc90b958 BS |
18786 | -} |
18787 | - | |
00e5a55c | 18788 | -void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) |
cc90b958 | 18789 | -{ |
00e5a55c BS |
18790 | - if (!PagePinned(virt_to_page(mm->pgd))) |
18791 | - mm_pin(mm); | |
cc90b958 BS |
18792 | -} |
18793 | - | |
00e5a55c BS |
18794 | -void arch_exit_mmap(struct mm_struct *mm) |
18795 | -{ | |
18796 | - struct task_struct *tsk = current; | |
cc90b958 | 18797 | - |
00e5a55c | 18798 | - task_lock(tsk); |
cc90b958 | 18799 | - |
00e5a55c BS |
18800 | - /* |
18801 | - * We aggressively remove defunct pgd from cr3. We execute unmap_vmas() | |
18802 | - * *much* faster this way, as no tlb flushes means bigger wrpt batches. | |
18803 | - */ | |
18804 | - if (tsk->active_mm == mm) { | |
18805 | - tsk->active_mm = &init_mm; | |
18806 | - atomic_inc(&init_mm.mm_count); | |
cc90b958 | 18807 | - |
00e5a55c | 18808 | - switch_mm(mm, &init_mm, tsk); |
cc90b958 | 18809 | - |
00e5a55c BS |
18810 | - atomic_dec(&mm->mm_count); |
18811 | - BUG_ON(atomic_read(&mm->mm_count) == 0); | |
18812 | - } | |
cc90b958 | 18813 | - |
00e5a55c | 18814 | - task_unlock(tsk); |
cc90b958 | 18815 | - |
00e5a55c BS |
18816 | - if (PagePinned(virt_to_page(mm->pgd)) && |
18817 | - (atomic_read(&mm->mm_count) == 1) && | |
18818 | - !mm->context.has_foreign_mappings) | |
18819 | - mm_unpin(mm); | |
18820 | -} | |
18821 | --- sle11-2009-05-14.orig/arch/x86/pci/irq-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
18822 | +++ sle11-2009-05-14/arch/x86/pci/irq-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
18823 | @@ -204,6 +204,7 @@ static int pirq_ali_get(struct pci_dev * | |
18824 | { | |
18825 | static const unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 }; | |
18826 | ||
18827 | + WARN_ON_ONCE(pirq >= 16); | |
18828 | return irqmap[read_config_nybble(router, 0x48, pirq-1)]; | |
18829 | } | |
18830 | ||
18831 | @@ -211,7 +212,8 @@ static int pirq_ali_set(struct pci_dev * | |
18832 | { | |
18833 | static const unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 }; | |
18834 | unsigned int val = irqmap[irq]; | |
18835 | - | |
18836 | + | |
18837 | + WARN_ON_ONCE(pirq >= 16); | |
18838 | if (val) { | |
18839 | write_config_nybble(router, 0x48, pirq-1, val); | |
18840 | return 1; | |
18841 | @@ -261,12 +263,16 @@ static int pirq_via_set(struct pci_dev * | |
18842 | static int pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int pirq) | |
18843 | { | |
18844 | static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 }; | |
18845 | + | |
18846 | + WARN_ON_ONCE(pirq >= 5); | |
18847 | return read_config_nybble(router, 0x55, pirqmap[pirq-1]); | |
18848 | } | |
18849 | ||
18850 | static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) | |
18851 | { | |
18852 | static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 }; | |
18853 | + | |
18854 | + WARN_ON_ONCE(pirq >= 5); | |
18855 | write_config_nybble(router, 0x55, pirqmap[pirq-1], irq); | |
18856 | return 1; | |
18857 | } | |
18858 | @@ -279,12 +285,16 @@ static int pirq_via586_set(struct pci_de | |
18859 | static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq) | |
18860 | { | |
18861 | static const unsigned char pirqmap[4] = { 1, 0, 2, 3 }; | |
18862 | + | |
18863 | + WARN_ON_ONCE(pirq >= 4); | |
18864 | return read_config_nybble(router,0x43, pirqmap[pirq-1]); | |
18865 | } | |
18866 | ||
18867 | static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) | |
18868 | { | |
18869 | static const unsigned char pirqmap[4] = { 1, 0, 2, 3 }; | |
18870 | + | |
18871 | + WARN_ON_ONCE(pirq >= 4); | |
18872 | write_config_nybble(router, 0x43, pirqmap[pirq-1], irq); | |
18873 | return 1; | |
18874 | } | |
18875 | @@ -423,6 +433,7 @@ static int pirq_sis_set(struct pci_dev * | |
18876 | ||
18877 | static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq) | |
18878 | { | |
18879 | + WARN_ON_ONCE(pirq >= 9); | |
18880 | if (pirq > 8) { | |
18881 | printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); | |
18882 | return 0; | |
18883 | @@ -432,6 +443,7 @@ static int pirq_vlsi_get(struct pci_dev | |
18884 | ||
18885 | static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) | |
18886 | { | |
18887 | + WARN_ON_ONCE(pirq >= 9); | |
18888 | if (pirq > 8) { | |
18889 | printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); | |
18890 | return 0; | |
18891 | @@ -453,14 +465,14 @@ static int pirq_vlsi_set(struct pci_dev | |
18892 | */ | |
18893 | static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq) | |
18894 | { | |
18895 | - outb_p(pirq, 0xc00); | |
18896 | + outb(pirq, 0xc00); | |
18897 | return inb(0xc01) & 0xf; | |
18898 | } | |
18899 | ||
18900 | static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) | |
18901 | { | |
18902 | - outb_p(pirq, 0xc00); | |
18903 | - outb_p(irq, 0xc01); | |
18904 | + outb(pirq, 0xc00); | |
18905 | + outb(irq, 0xc01); | |
18906 | return 1; | |
18907 | } | |
18908 | ||
18909 | @@ -575,6 +587,10 @@ static __init int intel_router_probe(str | |
18910 | case PCI_DEVICE_ID_INTEL_ICH9_4: | |
18911 | case PCI_DEVICE_ID_INTEL_ICH9_5: | |
18912 | case PCI_DEVICE_ID_INTEL_TOLAPAI_0: | |
18913 | + case PCI_DEVICE_ID_INTEL_ICH10_0: | |
18914 | + case PCI_DEVICE_ID_INTEL_ICH10_1: | |
18915 | + case PCI_DEVICE_ID_INTEL_ICH10_2: | |
18916 | + case PCI_DEVICE_ID_INTEL_ICH10_3: | |
18917 | r->name = "PIIX/ICH"; | |
18918 | r->get = pirq_piix_get; | |
18919 | r->set = pirq_piix_set; | |
18920 | --- sle11-2009-05-14.orig/arch/x86/vdso/Makefile 2008-11-25 12:35:54.000000000 +0100 | |
18921 | +++ sle11-2009-05-14/arch/x86/vdso/Makefile 2009-03-16 16:33:40.000000000 +0100 | |
18922 | @@ -66,6 +66,7 @@ vdso32.so-$(VDSO32-y) += int80 | |
18923 | vdso32.so-$(CONFIG_COMPAT) += syscall | |
18924 | vdso32.so-$(VDSO32-y) += sysenter | |
18925 | xen-vdso32-$(subst 1,$(CONFIG_COMPAT),$(shell expr $(CONFIG_XEN_COMPAT)0 '<' 0x0302000)) += int80 | |
18926 | +xen-vdso32-$(CONFIG_X86_32) += syscall | |
18927 | vdso32.so-$(CONFIG_XEN) += $(xen-vdso32-y) | |
18928 | ||
18929 | vdso32-images = $(vdso32.so-y:%=vdso32-%.so) | |
18930 | --- sle11-2009-05-14.orig/arch/x86/vdso/vdso32/syscall.S 2009-05-14 10:56:29.000000000 +0200 | |
18931 | +++ sle11-2009-05-14/arch/x86/vdso/vdso32/syscall.S 2009-03-16 16:33:40.000000000 +0100 | |
18932 | @@ -19,8 +19,10 @@ __kernel_vsyscall: | |
18933 | .Lpush_ebp: | |
18934 | movl %ecx, %ebp | |
18935 | syscall | |
18936 | +#ifndef CONFIG_XEN | |
18937 | movl $__USER32_DS, %ecx | |
18938 | movl %ecx, %ss | |
18939 | +#endif | |
18940 | movl %ebp, %ecx | |
18941 | popl %ebp | |
18942 | .Lpop_ebp: | |
18943 | --- sle11-2009-05-14.orig/arch/x86/vdso/vdso32.S 2009-05-14 10:56:29.000000000 +0200 | |
18944 | +++ sle11-2009-05-14/arch/x86/vdso/vdso32.S 2009-03-16 16:33:40.000000000 +0100 | |
18945 | @@ -19,4 +19,16 @@ vdso32_sysenter_start: | |
18946 | .incbin "arch/x86/vdso/vdso32-sysenter.so" | |
18947 | vdso32_sysenter_end: | |
18948 | ||
18949 | +#if defined(CONFIG_X86_64_XEN) && CONFIG_XEN_COMPAT < 0x030200 | |
18950 | + .globl vdso32_int80_start, vdso32_int80_end | |
18951 | +vdso32_int80_start: | |
18952 | + .incbin "arch/x86/vdso/vdso32-int80.so" | |
18953 | +vdso32_int80_end: | |
18954 | +#elif defined(CONFIG_X86_XEN) | |
18955 | + .globl vdso32_syscall_start, vdso32_syscall_end | |
18956 | +vdso32_syscall_start: | |
18957 | + .incbin "arch/x86/vdso/vdso32-syscall.so" | |
18958 | +vdso32_syscall_end: | |
18959 | +#endif | |
18960 | + | |
18961 | __FINIT | |
18962 | --- sle11-2009-05-14.orig/arch/x86/vdso/vdso32-setup.c 2008-11-25 12:35:53.000000000 +0100 | |
18963 | +++ sle11-2009-05-14/arch/x86/vdso/vdso32-setup.c 2009-03-16 16:33:40.000000000 +0100 | |
18964 | @@ -26,10 +26,6 @@ | |
18965 | #include <asm/vdso.h> | |
18966 | #include <asm/proto.h> | |
18967 | ||
18968 | -#ifdef CONFIG_XEN | |
18969 | -#include <xen/interface/callback.h> | |
cc90b958 | 18970 | -#endif |
cc90b958 | 18971 | - |
00e5a55c BS |
18972 | enum { |
18973 | VDSO_DISABLED = 0, | |
18974 | VDSO_ENABLED = 1, | |
18975 | @@ -229,7 +225,6 @@ static inline void map_compat_vdso(int m | |
18976 | ||
18977 | void enable_sep_cpu(void) | |
18978 | { | |
18979 | -#ifndef CONFIG_XEN | |
18980 | int cpu = get_cpu(); | |
18981 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | |
18982 | ||
18983 | @@ -244,35 +239,6 @@ void enable_sep_cpu(void) | |
18984 | wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0); | |
18985 | wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0); | |
18986 | put_cpu(); | |
18987 | -#else | |
18988 | - extern asmlinkage void ia32pv_sysenter_target(void); | |
18989 | - static struct callback_register sysenter = { | |
18990 | - .type = CALLBACKTYPE_sysenter, | |
18991 | - .address = { __KERNEL_CS, (unsigned long)ia32pv_sysenter_target }, | |
18992 | - }; | |
cc90b958 | 18993 | - |
00e5a55c BS |
18994 | - if (!boot_cpu_has(X86_FEATURE_SEP)) |
18995 | - return; | |
cc90b958 | 18996 | - |
00e5a55c | 18997 | - get_cpu(); |
cc90b958 | 18998 | - |
00e5a55c BS |
18999 | - if (xen_feature(XENFEAT_supervisor_mode_kernel)) |
19000 | - sysenter.address.eip = (unsigned long)ia32_sysenter_target; | |
cc90b958 | 19001 | - |
00e5a55c BS |
19002 | - switch (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter)) { |
19003 | - case 0: | |
19004 | - break; | |
19005 | -#if CONFIG_XEN_COMPAT < 0x030200 | |
19006 | - case -ENOSYS: | |
19007 | - sysenter.type = CALLBACKTYPE_sysenter_deprecated; | |
19008 | - if (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) == 0) | |
19009 | - break; | |
19010 | -#endif | |
19011 | - default: | |
19012 | - clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability); | |
19013 | - break; | |
19014 | - } | |
19015 | -#endif | |
19016 | } | |
19017 | ||
19018 | static struct vm_area_struct gate_vma; | |
19019 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
19020 | +++ sle11-2009-05-14/arch/x86/vdso/vdso32-setup-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
19021 | @@ -0,0 +1,506 @@ | |
19022 | +/* | |
19023 | + * (C) Copyright 2002 Linus Torvalds | |
19024 | + * Portions based on the vdso-randomization code from exec-shield: | |
19025 | + * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar | |
19026 | + * | |
19027 | + * This file contains the needed initializations to support sysenter. | |
19028 | + */ | |
19029 | + | |
19030 | +#include <linux/init.h> | |
19031 | +#include <linux/smp.h> | |
19032 | +#include <linux/thread_info.h> | |
19033 | +#include <linux/sched.h> | |
19034 | +#include <linux/gfp.h> | |
19035 | +#include <linux/string.h> | |
19036 | +#include <linux/elf.h> | |
19037 | +#include <linux/mm.h> | |
19038 | +#include <linux/err.h> | |
19039 | +#include <linux/module.h> | |
19040 | + | |
19041 | +#include <asm/cpufeature.h> | |
19042 | +#include <asm/msr.h> | |
19043 | +#include <asm/pgtable.h> | |
19044 | +#include <asm/unistd.h> | |
19045 | +#include <asm/elf.h> | |
19046 | +#include <asm/tlbflush.h> | |
19047 | +#include <asm/vdso.h> | |
19048 | +#include <asm/proto.h> | |
19049 | + | |
19050 | +#include <xen/interface/callback.h> | |
19051 | + | |
19052 | +enum { | |
19053 | + VDSO_DISABLED = 0, | |
19054 | + VDSO_ENABLED = 1, | |
19055 | + VDSO_COMPAT = 2, | |
19056 | +}; | |
19057 | + | |
19058 | +#ifdef CONFIG_COMPAT_VDSO | |
19059 | +#define VDSO_DEFAULT VDSO_COMPAT | |
19060 | +#else | |
19061 | +#define VDSO_DEFAULT VDSO_ENABLED | |
19062 | +#endif | |
19063 | + | |
19064 | +#ifdef CONFIG_X86_64 | |
19065 | +#define vdso_enabled sysctl_vsyscall32 | |
19066 | +#define arch_setup_additional_pages syscall32_setup_pages | |
19067 | +#endif | |
19068 | + | |
19069 | +/* | |
19070 | + * This is the difference between the prelinked addresses in the vDSO images | |
19071 | + * and the VDSO_HIGH_BASE address where CONFIG_COMPAT_VDSO places the vDSO | |
19072 | + * in the user address space. | |
19073 | + */ | |
19074 | +#define VDSO_ADDR_ADJUST (VDSO_HIGH_BASE - (unsigned long)VDSO32_PRELINK) | |
19075 | + | |
19076 | +/* | |
19077 | + * Should the kernel map a VDSO page into processes and pass its | |
19078 | + * address down to glibc upon exec()? | |
19079 | + */ | |
19080 | +unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT; | |
19081 | + | |
19082 | +static int __init vdso_setup(char *s) | |
19083 | +{ | |
19084 | + vdso_enabled = simple_strtoul(s, NULL, 0); | |
19085 | + | |
19086 | + return 1; | |
19087 | +} | |
19088 | + | |
19089 | +/* | |
19090 | + * For consistency, the argument vdso32=[012] affects the 32-bit vDSO | |
19091 | + * behavior on both 64-bit and 32-bit kernels. | |
19092 | + * On 32-bit kernels, vdso=[012] means the same thing. | |
19093 | + */ | |
19094 | +__setup("vdso32=", vdso_setup); | |
19095 | + | |
19096 | +#ifdef CONFIG_X86_32 | |
19097 | +__setup_param("vdso=", vdso32_setup, vdso_setup, 0); | |
19098 | + | |
19099 | +EXPORT_SYMBOL_GPL(vdso_enabled); | |
19100 | +#endif | |
19101 | + | |
19102 | +static __init void reloc_symtab(Elf32_Ehdr *ehdr, | |
19103 | + unsigned offset, unsigned size) | |
19104 | +{ | |
19105 | + Elf32_Sym *sym = (void *)ehdr + offset; | |
19106 | + unsigned nsym = size / sizeof(*sym); | |
19107 | + unsigned i; | |
19108 | + | |
19109 | + for(i = 0; i < nsym; i++, sym++) { | |
19110 | + if (sym->st_shndx == SHN_UNDEF || | |
19111 | + sym->st_shndx == SHN_ABS) | |
19112 | + continue; /* skip */ | |
19113 | + | |
19114 | + if (sym->st_shndx > SHN_LORESERVE) { | |
19115 | + printk(KERN_INFO "VDSO: unexpected st_shndx %x\n", | |
19116 | + sym->st_shndx); | |
19117 | + continue; | |
19118 | + } | |
19119 | + | |
19120 | + switch(ELF_ST_TYPE(sym->st_info)) { | |
19121 | + case STT_OBJECT: | |
19122 | + case STT_FUNC: | |
19123 | + case STT_SECTION: | |
19124 | + case STT_FILE: | |
19125 | + sym->st_value += VDSO_ADDR_ADJUST; | |
19126 | + } | |
19127 | + } | |
19128 | +} | |
19129 | + | |
19130 | +static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset) | |
19131 | +{ | |
19132 | + Elf32_Dyn *dyn = (void *)ehdr + offset; | |
19133 | + | |
19134 | + for(; dyn->d_tag != DT_NULL; dyn++) | |
19135 | + switch(dyn->d_tag) { | |
19136 | + case DT_PLTGOT: | |
19137 | + case DT_HASH: | |
19138 | + case DT_STRTAB: | |
19139 | + case DT_SYMTAB: | |
19140 | + case DT_RELA: | |
19141 | + case DT_INIT: | |
19142 | + case DT_FINI: | |
19143 | + case DT_REL: | |
19144 | + case DT_DEBUG: | |
19145 | + case DT_JMPREL: | |
19146 | + case DT_VERSYM: | |
19147 | + case DT_VERDEF: | |
19148 | + case DT_VERNEED: | |
19149 | + case DT_ADDRRNGLO ... DT_ADDRRNGHI: | |
19150 | + /* definitely pointers needing relocation */ | |
19151 | + dyn->d_un.d_ptr += VDSO_ADDR_ADJUST; | |
19152 | + break; | |
19153 | + | |
19154 | + case DT_ENCODING ... OLD_DT_LOOS-1: | |
19155 | + case DT_LOOS ... DT_HIOS-1: | |
19156 | + /* Tags above DT_ENCODING are pointers if | |
19157 | + they're even */ | |
19158 | + if (dyn->d_tag >= DT_ENCODING && | |
19159 | + (dyn->d_tag & 1) == 0) | |
19160 | + dyn->d_un.d_ptr += VDSO_ADDR_ADJUST; | |
19161 | + break; | |
19162 | + | |
19163 | + case DT_VERDEFNUM: | |
19164 | + case DT_VERNEEDNUM: | |
19165 | + case DT_FLAGS_1: | |
19166 | + case DT_RELACOUNT: | |
19167 | + case DT_RELCOUNT: | |
19168 | + case DT_VALRNGLO ... DT_VALRNGHI: | |
19169 | + /* definitely not pointers */ | |
19170 | + break; | |
19171 | + | |
19172 | + case OLD_DT_LOOS ... DT_LOOS-1: | |
19173 | + case DT_HIOS ... DT_VALRNGLO-1: | |
19174 | + default: | |
19175 | + if (dyn->d_tag > DT_ENCODING) | |
19176 | + printk(KERN_INFO "VDSO: unexpected DT_tag %x\n", | |
19177 | + dyn->d_tag); | |
19178 | + break; | |
19179 | + } | |
19180 | +} | |
19181 | + | |
19182 | +static __init void relocate_vdso(Elf32_Ehdr *ehdr) | |
19183 | +{ | |
19184 | + Elf32_Phdr *phdr; | |
19185 | + Elf32_Shdr *shdr; | |
19186 | + int i; | |
19187 | + | |
19188 | + BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 || | |
19189 | + !elf_check_arch_ia32(ehdr) || | |
19190 | + ehdr->e_type != ET_DYN); | |
19191 | + | |
19192 | + ehdr->e_entry += VDSO_ADDR_ADJUST; | |
19193 | + | |
19194 | + /* rebase phdrs */ | |
19195 | + phdr = (void *)ehdr + ehdr->e_phoff; | |
19196 | + for (i = 0; i < ehdr->e_phnum; i++) { | |
19197 | + phdr[i].p_vaddr += VDSO_ADDR_ADJUST; | |
19198 | + | |
19199 | + /* relocate dynamic stuff */ | |
19200 | + if (phdr[i].p_type == PT_DYNAMIC) | |
19201 | + reloc_dyn(ehdr, phdr[i].p_offset); | |
19202 | + } | |
19203 | + | |
19204 | + /* rebase sections */ | |
19205 | + shdr = (void *)ehdr + ehdr->e_shoff; | |
19206 | + for(i = 0; i < ehdr->e_shnum; i++) { | |
19207 | + if (!(shdr[i].sh_flags & SHF_ALLOC)) | |
19208 | + continue; | |
19209 | + | |
19210 | + shdr[i].sh_addr += VDSO_ADDR_ADJUST; | |
19211 | + | |
19212 | + if (shdr[i].sh_type == SHT_SYMTAB || | |
19213 | + shdr[i].sh_type == SHT_DYNSYM) | |
19214 | + reloc_symtab(ehdr, shdr[i].sh_offset, | |
19215 | + shdr[i].sh_size); | |
19216 | + } | |
19217 | +} | |
19218 | + | |
19219 | +/* | |
19220 | + * These symbols are defined by vdso32.S to mark the bounds | |
19221 | + * of the ELF DSO images included therein. | |
19222 | + */ | |
19223 | +extern const char vdso32_default_start, vdso32_default_end; | |
19224 | +extern const char vdso32_sysenter_start, vdso32_sysenter_end; | |
19225 | +static struct page *vdso32_pages[1]; | |
19226 | + | |
19227 | +#ifdef CONFIG_X86_64 | |
19228 | + | |
19229 | +#if CONFIG_XEN_COMPAT < 0x030200 | |
19230 | +static int use_int80 = 1; | |
19231 | +#endif | |
19232 | +static int use_sysenter __read_mostly = -1; | |
19233 | + | |
19234 | +#define vdso32_sysenter() (use_sysenter > 0) | |
19235 | + | |
19236 | +/* May not be __init: called during resume */ | |
19237 | +void syscall32_cpu_init(void) | |
19238 | +{ | |
19239 | + static const struct callback_register cstar = { | |
19240 | + .type = CALLBACKTYPE_syscall32, | |
19241 | + .address = (unsigned long)ia32_cstar_target | |
19242 | + }; | |
19243 | + static const struct callback_register sysenter = { | |
19244 | + .type = CALLBACKTYPE_sysenter, | |
19245 | + .address = (unsigned long)ia32_sysenter_target | |
19246 | + }; | |
19247 | + | |
19248 | + if ((HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) < 0) || | |
19249 | + (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar) < 0)) | |
19250 | +#if CONFIG_XEN_COMPAT < 0x030200 | |
19251 | + return; | |
19252 | + use_int80 = 0; | |
19253 | +#else | |
19254 | + BUG(); | |
19255 | +#endif | |
19256 | + | |
19257 | + if (use_sysenter < 0) | |
19258 | + use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL); | |
19259 | +} | |
19260 | + | |
19261 | +#define compat_uses_vma 1 | |
19262 | + | |
19263 | +static inline void map_compat_vdso(int map) | |
19264 | +{ | |
19265 | +} | |
19266 | + | |
19267 | +#else /* CONFIG_X86_32 */ | |
19268 | + | |
19269 | +#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP)) | |
19270 | + | |
19271 | +extern asmlinkage void ia32pv_cstar_target(void); | |
19272 | +static const struct callback_register __cpuinitconst cstar = { | |
19273 | + .type = CALLBACKTYPE_syscall32, | |
19274 | + .address = { __KERNEL_CS, (unsigned long)ia32pv_cstar_target }, | |
19275 | +}; | |
19276 | + | |
19277 | +void __cpuinit enable_sep_cpu(void) | |
19278 | +{ | |
19279 | + extern asmlinkage void ia32pv_sysenter_target(void); | |
19280 | + static struct callback_register __cpuinitdata sysenter = { | |
19281 | + .type = CALLBACKTYPE_sysenter, | |
19282 | + .address = { __KERNEL_CS, (unsigned long)ia32pv_sysenter_target }, | |
19283 | + }; | |
19284 | + | |
19285 | + if (boot_cpu_has(X86_FEATURE_SYSCALL)) { | |
19286 | + if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar) != 0) | |
19287 | + BUG(); | |
19288 | + return; | |
19289 | + } | |
19290 | + | |
19291 | + if (!boot_cpu_has(X86_FEATURE_SEP)) | |
19292 | + return; | |
19293 | + | |
19294 | + if (xen_feature(XENFEAT_supervisor_mode_kernel)) | |
19295 | + sysenter.address.eip = (unsigned long)ia32_sysenter_target; | |
19296 | + | |
19297 | + switch (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter)) { | |
19298 | + case 0: | |
19299 | + break; | |
19300 | +#if CONFIG_XEN_COMPAT < 0x030200 | |
19301 | + case -ENOSYS: | |
19302 | + sysenter.type = CALLBACKTYPE_sysenter_deprecated; | |
19303 | + if (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) == 0) | |
19304 | + break; | |
19305 | +#endif | |
19306 | + default: | |
19307 | + setup_clear_cpu_cap(X86_FEATURE_SEP); | |
19308 | + break; | |
19309 | + } | |
19310 | +} | |
19311 | + | |
19312 | +static struct vm_area_struct gate_vma; | |
19313 | + | |
19314 | +static int __init gate_vma_init(void) | |
19315 | +{ | |
19316 | + gate_vma.vm_mm = NULL; | |
19317 | + gate_vma.vm_start = FIXADDR_USER_START; | |
19318 | + gate_vma.vm_end = FIXADDR_USER_END; | |
19319 | + gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; | |
19320 | + gate_vma.vm_page_prot = __P101; | |
19321 | + /* | |
19322 | + * Make sure the vDSO gets into every core dump. | |
19323 | + * Dumping its contents makes post-mortem fully interpretable later | |
19324 | + * without matching up the same kernel and hardware config to see | |
19325 | + * what PC values meant. | |
19326 | + */ | |
19327 | + gate_vma.vm_flags |= VM_ALWAYSDUMP; | |
19328 | + return 0; | |
19329 | +} | |
19330 | + | |
19331 | +#define compat_uses_vma 0 | |
19332 | + | |
19333 | +static void map_compat_vdso(int map) | |
19334 | +{ | |
19335 | + static int vdso_mapped; | |
19336 | + | |
19337 | + if (map == vdso_mapped) | |
19338 | + return; | |
19339 | + | |
19340 | + vdso_mapped = map; | |
19341 | + | |
19342 | + __set_fixmap(FIX_VDSO, page_to_pfn(vdso32_pages[0]) << PAGE_SHIFT, | |
19343 | + map ? PAGE_READONLY_EXEC : PAGE_NONE); | |
19344 | + | |
19345 | + /* flush stray tlbs */ | |
19346 | + flush_tlb_all(); | |
19347 | +} | |
19348 | + | |
19349 | +#endif /* CONFIG_X86_64 */ | |
19350 | + | |
19351 | +int __init sysenter_setup(void) | |
19352 | +{ | |
19353 | + void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); | |
19354 | + const void *vsyscall; | |
19355 | + size_t vsyscall_len; | |
19356 | + | |
19357 | + vdso32_pages[0] = virt_to_page(syscall_page); | |
19358 | + | |
19359 | +#ifdef CONFIG_X86_32 | |
19360 | + gate_vma_init(); | |
19361 | + | |
19362 | + printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); | |
19363 | +#endif | |
19364 | + | |
19365 | +#if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT < 0x030200 | |
19366 | + if (use_int80) { | |
19367 | + extern const char vdso32_int80_start, vdso32_int80_end; | |
19368 | + | |
19369 | + vsyscall = &vdso32_int80_start; | |
19370 | + vsyscall_len = &vdso32_int80_end - &vdso32_int80_start; | |
19371 | + } else | |
19372 | +#elif defined(CONFIG_X86_32) | |
19373 | + if (boot_cpu_has(X86_FEATURE_SYSCALL) | |
19374 | + && (boot_cpu_data.x86_vendor != X86_VENDOR_AMD | |
19375 | + || HYPERVISOR_callback_op(CALLBACKOP_register, &cstar) != 0)) | |
19376 | + setup_clear_cpu_cap(X86_FEATURE_SYSCALL); | |
19377 | + barrier(); /* until clear_bit()'s constraints are correct ... */ | |
19378 | + if (boot_cpu_has(X86_FEATURE_SYSCALL)) { | |
19379 | + extern const char vdso32_syscall_start, vdso32_syscall_end; | |
19380 | + | |
19381 | + vsyscall = &vdso32_syscall_start; | |
19382 | + vsyscall_len = &vdso32_syscall_end - &vdso32_syscall_start; | |
19383 | + } else | |
19384 | +#endif | |
19385 | + if (!vdso32_sysenter()) { | |
19386 | + vsyscall = &vdso32_default_start; | |
19387 | + vsyscall_len = &vdso32_default_end - &vdso32_default_start; | |
19388 | + } else { | |
19389 | + vsyscall = &vdso32_sysenter_start; | |
19390 | + vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start; | |
19391 | + } | |
19392 | + | |
19393 | + memcpy(syscall_page, vsyscall, vsyscall_len); | |
19394 | + relocate_vdso(syscall_page); | |
19395 | + | |
19396 | + return 0; | |
19397 | +} | |
19398 | + | |
19399 | +/* Setup a VMA at program startup for the vsyscall page */ | |
19400 | +int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) | |
19401 | +{ | |
19402 | + struct mm_struct *mm = current->mm; | |
19403 | + unsigned long addr; | |
19404 | + int ret = 0; | |
19405 | + bool compat; | |
19406 | + | |
19407 | + down_write(&mm->mmap_sem); | |
19408 | + | |
19409 | + /* Test compat mode once here, in case someone | |
19410 | + changes it via sysctl */ | |
19411 | + compat = (vdso_enabled == VDSO_COMPAT); | |
19412 | + | |
19413 | + map_compat_vdso(compat); | |
19414 | + | |
19415 | + if (compat) | |
19416 | + addr = VDSO_HIGH_BASE; | |
19417 | + else { | |
19418 | + addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); | |
19419 | + if (IS_ERR_VALUE(addr)) { | |
19420 | + ret = addr; | |
19421 | + goto up_fail; | |
19422 | + } | |
19423 | + } | |
19424 | + | |
19425 | + if (compat_uses_vma || !compat) { | |
19426 | + /* | |
19427 | + * MAYWRITE to allow gdb to COW and set breakpoints | |
19428 | + * | |
19429 | + * Make sure the vDSO gets into every core dump. | |
19430 | + * Dumping its contents makes post-mortem fully | |
19431 | + * interpretable later without matching up the same | |
19432 | + * kernel and hardware config to see what PC values | |
19433 | + * meant. | |
19434 | + */ | |
19435 | + ret = install_special_mapping(mm, addr, PAGE_SIZE, | |
19436 | + VM_READ|VM_EXEC| | |
19437 | + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| | |
19438 | + VM_ALWAYSDUMP, | |
19439 | + vdso32_pages); | |
19440 | + | |
19441 | + if (ret) | |
19442 | + goto up_fail; | |
19443 | + } | |
19444 | + | |
19445 | + current->mm->context.vdso = (void *)addr; | |
19446 | + current_thread_info()->sysenter_return = | |
19447 | + VDSO32_SYMBOL(addr, SYSENTER_RETURN); | |
19448 | + | |
19449 | + up_fail: | |
19450 | + up_write(&mm->mmap_sem); | |
19451 | + | |
19452 | + return ret; | |
19453 | +} | |
19454 | + | |
19455 | +#ifdef CONFIG_X86_64 | |
19456 | + | |
19457 | +/* | |
19458 | + * This must be done early in case we have an initrd containing 32-bit | |
19459 | + * binaries (e.g., hotplug). This could be pushed upstream. | |
19460 | + */ | |
19461 | +core_initcall(sysenter_setup); | |
19462 | + | |
19463 | +#ifdef CONFIG_SYSCTL | |
19464 | +/* Register vsyscall32 into the ABI table */ | |
19465 | +#include <linux/sysctl.h> | |
19466 | + | |
19467 | +static ctl_table abi_table2[] = { | |
19468 | + { | |
19469 | + .procname = "vsyscall32", | |
19470 | + .data = &sysctl_vsyscall32, | |
19471 | + .maxlen = sizeof(int), | |
19472 | + .mode = 0644, | |
19473 | + .proc_handler = proc_dointvec | |
19474 | + }, | |
19475 | + {} | |
19476 | +}; | |
19477 | + | |
19478 | +static ctl_table abi_root_table2[] = { | |
19479 | + { | |
19480 | + .ctl_name = CTL_ABI, | |
19481 | + .procname = "abi", | |
19482 | + .mode = 0555, | |
19483 | + .child = abi_table2 | |
19484 | + }, | |
19485 | + {} | |
19486 | +}; | |
19487 | + | |
19488 | +static __init int ia32_binfmt_init(void) | |
19489 | +{ | |
19490 | + register_sysctl_table(abi_root_table2); | |
19491 | + return 0; | |
19492 | +} | |
19493 | +__initcall(ia32_binfmt_init); | |
19494 | +#endif | |
19495 | + | |
19496 | +#else /* CONFIG_X86_32 */ | |
19497 | + | |
19498 | +const char *arch_vma_name(struct vm_area_struct *vma) | |
19499 | +{ | |
19500 | + if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) | |
19501 | + return "[vdso]"; | |
19502 | + return NULL; | |
19503 | +} | |
19504 | + | |
19505 | +struct vm_area_struct *get_gate_vma(struct task_struct *tsk) | |
19506 | +{ | |
19507 | + struct mm_struct *mm = tsk->mm; | |
19508 | + | |
19509 | + /* Check to see if this task was created in compat vdso mode */ | |
19510 | + if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) | |
19511 | + return &gate_vma; | |
19512 | + return NULL; | |
19513 | +} | |
19514 | + | |
19515 | +int in_gate_area(struct task_struct *task, unsigned long addr) | |
19516 | +{ | |
19517 | + const struct vm_area_struct *vma = get_gate_vma(task); | |
19518 | + | |
19519 | + return vma && addr >= vma->vm_start && addr < vma->vm_end; | |
19520 | +} | |
19521 | + | |
19522 | +int in_gate_area_no_task(unsigned long addr) | |
19523 | +{ | |
19524 | + return 0; | |
19525 | +} | |
19526 | + | |
19527 | +#endif /* CONFIG_X86_64 */ | |
19528 | --- sle11-2009-05-14.orig/drivers/pci/msi-xen.c 2009-02-16 16:18:36.000000000 +0100 | |
19529 | +++ sle11-2009-05-14/drivers/pci/msi-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
19530 | @@ -43,6 +43,53 @@ struct msi_pirq_entry { | |
19531 | int entry_nr; | |
19532 | }; | |
19533 | ||
19534 | +/* Arch hooks */ | |
19535 | + | |
19536 | +int __attribute__ ((weak)) | |
19537 | +arch_msi_check_device(struct pci_dev *dev, int nvec, int type) | |
19538 | +{ | |
19539 | + return 0; | |
19540 | +} | |
19541 | + | |
19542 | +#ifndef CONFIG_XEN | |
19543 | +int __attribute__ ((weak)) | |
19544 | +arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *entry) | |
19545 | +{ | |
19546 | + return 0; | |
19547 | +} | |
19548 | + | |
19549 | +int __attribute__ ((weak)) | |
19550 | +arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |
19551 | +{ | |
19552 | + struct msi_desc *entry; | |
19553 | + int ret; | |
19554 | + | |
19555 | + list_for_each_entry(entry, &dev->msi_list, list) { | |
19556 | + ret = arch_setup_msi_irq(dev, entry); | |
19557 | + if (ret) | |
19558 | + return ret; | |
19559 | + } | |
19560 | + | |
19561 | + return 0; | |
19562 | +} | |
19563 | + | |
19564 | +void __attribute__ ((weak)) arch_teardown_msi_irq(unsigned int irq) | |
19565 | +{ | |
19566 | + return; | |
19567 | +} | |
19568 | + | |
19569 | +void __attribute__ ((weak)) | |
19570 | +arch_teardown_msi_irqs(struct pci_dev *dev) | |
19571 | +{ | |
19572 | + struct msi_desc *entry; | |
19573 | + | |
19574 | + list_for_each_entry(entry, &dev->msi_list, list) { | |
19575 | + if (entry->irq != 0) | |
19576 | + arch_teardown_msi_irq(entry->irq); | |
19577 | + } | |
19578 | +} | |
19579 | +#endif | |
19580 | + | |
19581 | static void msi_set_enable(struct pci_dev *dev, int enable) | |
19582 | { | |
19583 | int pos; | |
19584 | @@ -270,7 +317,6 @@ static void pci_intx_for_msi(struct pci_ | |
19585 | pci_intx(dev, enable); | |
19586 | } | |
19587 | ||
19588 | -#ifdef CONFIG_PM | |
19589 | static void __pci_restore_msi_state(struct pci_dev *dev) | |
19590 | { | |
19591 | int pirq; | |
19592 | @@ -328,7 +374,7 @@ void pci_restore_msi_state(struct pci_de | |
19593 | __pci_restore_msi_state(dev); | |
19594 | __pci_restore_msix_state(dev); | |
19595 | } | |
19596 | -#endif /* CONFIG_PM */ | |
19597 | +EXPORT_SYMBOL_GPL(pci_restore_msi_state); | |
19598 | ||
19599 | /** | |
19600 | * msi_capability_init - configure device's MSI capability structure | |
19601 | @@ -755,51 +801,3 @@ void pci_msi_init_pci_dev(struct pci_dev | |
19602 | INIT_LIST_HEAD(&dev->msi_list); | |
19603 | #endif | |
19604 | } | |
cc90b958 | 19605 | - |
cc90b958 | 19606 | - |
00e5a55c | 19607 | -/* Arch hooks */ |
cc90b958 | 19608 | - |
00e5a55c BS |
19609 | -int __attribute__ ((weak)) |
19610 | -arch_msi_check_device(struct pci_dev* dev, int nvec, int type) | |
cc90b958 | 19611 | -{ |
00e5a55c | 19612 | - return 0; |
cc90b958 BS |
19613 | -} |
19614 | - | |
00e5a55c BS |
19615 | -#ifndef CONFIG_XEN |
19616 | -int __attribute__ ((weak)) | |
19617 | -arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *entry) | |
cc90b958 | 19618 | -{ |
00e5a55c | 19619 | - return 0; |
cc90b958 BS |
19620 | -} |
19621 | - | |
00e5a55c BS |
19622 | -int __attribute__ ((weak)) |
19623 | -arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |
cc90b958 | 19624 | -{ |
00e5a55c BS |
19625 | - struct msi_desc *entry; |
19626 | - int ret; | |
cc90b958 | 19627 | - |
00e5a55c BS |
19628 | - list_for_each_entry(entry, &dev->msi_list, list) { |
19629 | - ret = arch_setup_msi_irq(dev, entry); | |
19630 | - if (ret) | |
19631 | - return ret; | |
19632 | - } | |
cc90b958 | 19633 | - |
00e5a55c | 19634 | - return 0; |
cc90b958 BS |
19635 | -} |
19636 | - | |
00e5a55c | 19637 | -void __attribute__ ((weak)) arch_teardown_msi_irq(unsigned int irq) |
cc90b958 | 19638 | -{ |
00e5a55c | 19639 | - return; |
cc90b958 BS |
19640 | -} |
19641 | - | |
00e5a55c BS |
19642 | -void __attribute__ ((weak)) |
19643 | -arch_teardown_msi_irqs(struct pci_dev *dev) | |
cc90b958 | 19644 | -{ |
00e5a55c | 19645 | - struct msi_desc *entry; |
cc90b958 | 19646 | - |
00e5a55c BS |
19647 | - list_for_each_entry(entry, &dev->msi_list, list) { |
19648 | - if (entry->irq != 0) | |
19649 | - arch_teardown_msi_irq(entry->irq); | |
19650 | - } | |
cc90b958 | 19651 | -} |
cc90b958 | 19652 | -#endif |
00e5a55c BS |
19653 | --- sle11-2009-05-14.orig/drivers/pci/pci.c 2009-05-14 10:56:29.000000000 +0200 |
19654 | +++ sle11-2009-05-14/drivers/pci/pci.c 2009-03-16 16:33:40.000000000 +0100 | |
19655 | @@ -353,7 +353,12 @@ pci_find_parent_resource(const struct pc | |
19656 | * Restore the BAR values for a given device, so as to make it | |
19657 | * accessible by its driver. | |
19658 | */ | |
19659 | +#ifndef CONFIG_XEN | |
19660 | static void | |
19661 | +#else | |
19662 | +EXPORT_SYMBOL_GPL(pci_restore_bars); | |
19663 | +void | |
19664 | +#endif | |
19665 | pci_restore_bars(struct pci_dev *dev) | |
19666 | { | |
19667 | int i, numres; | |
19668 | --- sle11-2009-05-14.orig/drivers/xen/balloon/sysfs.c 2009-03-04 11:25:55.000000000 +0100 | |
19669 | +++ sle11-2009-05-14/drivers/xen/balloon/sysfs.c 2009-03-16 16:33:40.000000000 +0100 | |
19670 | @@ -108,7 +108,7 @@ static struct attribute_group balloon_in | |
19671 | }; | |
19672 | ||
19673 | static struct sysdev_class balloon_sysdev_class = { | |
19674 | - set_kset_name(BALLOON_CLASS_NAME), | |
19675 | + .name = BALLOON_CLASS_NAME, | |
19676 | }; | |
19677 | ||
19678 | static struct sys_device balloon_sysdev; | |
19679 | --- sle11-2009-05-14.orig/drivers/xen/blkback/blkback.c 2009-02-16 16:18:36.000000000 +0100 | |
19680 | +++ sle11-2009-05-14/drivers/xen/blkback/blkback.c 2009-03-16 16:33:40.000000000 +0100 | |
19681 | @@ -148,7 +148,7 @@ static void unplug_queue(blkif_t *blkif) | |
19682 | return; | |
19683 | if (blkif->plug->unplug_fn) | |
19684 | blkif->plug->unplug_fn(blkif->plug); | |
19685 | - blk_put_queue(blkif->plug); | |
19686 | + kobject_put(&blkif->plug->kobj); | |
19687 | blkif->plug = NULL; | |
19688 | } | |
19689 | ||
19690 | @@ -159,7 +159,8 @@ static void plug_queue(blkif_t *blkif, s | |
19691 | if (q == blkif->plug) | |
19692 | return; | |
19693 | unplug_queue(blkif); | |
19694 | - blk_get_queue(q); | |
19695 | + WARN_ON(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)); | |
19696 | + kobject_get(&q->kobj); | |
19697 | blkif->plug = q; | |
19698 | } | |
19699 | ||
19700 | --- sle11-2009-05-14.orig/drivers/xen/blkfront/blkfront.c 2009-02-16 16:18:36.000000000 +0100 | |
19701 | +++ sle11-2009-05-14/drivers/xen/blkfront/blkfront.c 2009-03-24 10:12:53.000000000 +0100 | |
19702 | @@ -713,7 +713,6 @@ static irqreturn_t blkif_int(int irq, vo | |
19703 | RING_IDX i, rp; | |
19704 | unsigned long flags; | |
19705 | struct blkfront_info *info = (struct blkfront_info *)dev_id; | |
19706 | - int uptodate; | |
19707 | ||
19708 | spin_lock_irqsave(&blkif_io_lock, flags); | |
19709 | ||
19710 | @@ -738,13 +737,13 @@ static irqreturn_t blkif_int(int irq, vo | |
19711 | ||
19712 | ADD_ID_TO_FREELIST(info, id); | |
19713 | ||
19714 | - uptodate = (bret->status == BLKIF_RSP_OKAY); | |
19715 | + ret = bret->status == BLKIF_RSP_OKAY ? 0 : -EIO; | |
19716 | switch (bret->operation) { | |
19717 | case BLKIF_OP_WRITE_BARRIER: | |
19718 | if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { | |
19719 | printk("blkfront: %s: write barrier op failed\n", | |
19720 | info->gd->disk_name); | |
19721 | - uptodate = -EOPNOTSUPP; | |
19722 | + ret = -EOPNOTSUPP; | |
19723 | info->feature_barrier = 0; | |
19724 | xlvbd_barrier(info); | |
19725 | } | |
19726 | @@ -755,10 +754,8 @@ static irqreturn_t blkif_int(int irq, vo | |
19727 | DPRINTK("Bad return from blkdev data " | |
19728 | "request: %x\n", bret->status); | |
19729 | ||
19730 | - ret = end_that_request_first(req, uptodate, | |
19731 | - req->hard_nr_sectors); | |
19732 | + ret = __blk_end_request(req, ret, blk_rq_bytes(req)); | |
19733 | BUG_ON(ret); | |
19734 | - end_that_request_last(req, uptodate); | |
19735 | break; | |
19736 | default: | |
19737 | BUG(); | |
19738 | --- sle11-2009-05-14.orig/drivers/xen/blktap/blktap.c 2009-04-20 11:37:50.000000000 +0200 | |
19739 | +++ sle11-2009-05-14/drivers/xen/blktap/blktap.c 2009-04-20 11:38:54.000000000 +0200 | |
19740 | @@ -331,8 +331,8 @@ static pte_t blktap_clear_pte(struct vm_ | |
19741 | * if vm_file is NULL (meaning mmap failed and we have nothing to do) | |
19742 | */ | |
19743 | if (uvaddr < uvstart || vma->vm_file == NULL) | |
19744 | - return ptep_get_and_clear_full(vma->vm_mm, uvaddr, | |
19745 | - ptep, is_fullmm); | |
19746 | + return xen_ptep_get_and_clear_full(vma, uvaddr, ptep, | |
19747 | + is_fullmm); | |
19748 | ||
19749 | info = vma->vm_file->private_data; | |
19750 | priv = vma->vm_private_data; | |
19751 | @@ -379,8 +379,8 @@ static pte_t blktap_clear_pte(struct vm_ | |
19752 | BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap)); | |
19753 | ||
19754 | /* USING SHADOW PAGE TABLES. */ | |
19755 | - copy = ptep_get_and_clear_full(vma->vm_mm, uvaddr, ptep, | |
19756 | - is_fullmm); | |
19757 | + copy = xen_ptep_get_and_clear_full(vma, uvaddr, ptep, | |
19758 | + is_fullmm); | |
19759 | } | |
19760 | ||
19761 | if (count) { | |
19762 | --- sle11-2009-05-14.orig/drivers/xen/core/Makefile 2009-05-14 10:56:29.000000000 +0200 | |
19763 | +++ sle11-2009-05-14/drivers/xen/core/Makefile 2009-03-16 16:33:40.000000000 +0100 | |
19764 | @@ -10,5 +10,6 @@ obj-$(CONFIG_SYS_HYPERVISOR) += hypervis | |
19765 | obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o | |
19766 | obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o | |
19767 | obj-$(CONFIG_XEN_SMPBOOT) += smpboot.o | |
19768 | +obj-$(CONFIG_X86_SMP) += spinlock.o | |
19769 | obj-$(CONFIG_KEXEC) += machine_kexec.o | |
19770 | obj-$(CONFIG_XEN_XENCOMM) += xencomm.o | |
19771 | --- sle11-2009-05-14.orig/drivers/xen/core/evtchn.c 2009-03-04 11:25:55.000000000 +0100 | |
19772 | +++ sle11-2009-05-14/drivers/xen/core/evtchn.c 2009-03-16 16:33:40.000000000 +0100 | |
19773 | @@ -194,7 +194,7 @@ static inline unsigned int cpu_from_evtc | |
19774 | ||
19775 | /* Upcall to generic IRQ layer. */ | |
19776 | #ifdef CONFIG_X86 | |
19777 | -extern fastcall unsigned int do_IRQ(struct pt_regs *regs); | |
19778 | +extern unsigned int do_IRQ(struct pt_regs *regs); | |
19779 | void __init xen_init_IRQ(void); | |
19780 | void __init init_IRQ(void) | |
19781 | { | |
19782 | @@ -203,13 +203,11 @@ void __init init_IRQ(void) | |
19783 | } | |
19784 | #if defined (__i386__) | |
19785 | static inline void exit_idle(void) {} | |
19786 | -#define IRQ_REG orig_eax | |
19787 | #elif defined (__x86_64__) | |
19788 | #include <asm/idle.h> | |
19789 | -#define IRQ_REG orig_rax | |
19790 | #endif | |
19791 | #define do_IRQ(irq, regs) do { \ | |
19792 | - (regs)->IRQ_REG = ~(irq); \ | |
19793 | + (regs)->orig_ax = ~(irq); \ | |
19794 | do_IRQ((regs)); \ | |
19795 | } while (0) | |
19796 | #endif | |
19797 | @@ -670,13 +668,12 @@ static void set_affinity_irq(unsigned in | |
19798 | int resend_irq_on_evtchn(unsigned int irq) | |
19799 | { | |
19800 | int masked, evtchn = evtchn_from_irq(irq); | |
19801 | - shared_info_t *s = HYPERVISOR_shared_info; | |
19802 | ||
19803 | if (!VALID_EVTCHN(evtchn)) | |
19804 | return 1; | |
19805 | ||
19806 | masked = test_and_set_evtchn_mask(evtchn); | |
19807 | - synch_set_bit(evtchn, s->evtchn_pending); | |
19808 | + set_evtchn(evtchn); | |
19809 | if (!masked) | |
19810 | unmask_evtchn(evtchn); | |
19811 | ||
19812 | @@ -969,6 +966,43 @@ void disable_all_local_evtchn(void) | |
19813 | synch_set_bit(i, &s->evtchn_mask[0]); | |
19814 | } | |
19815 | ||
19816 | +/* Clear an irq's pending state, in preparation for polling on it. */ | |
19817 | +void xen_clear_irq_pending(int irq) | |
19818 | +{ | |
19819 | + int evtchn = evtchn_from_irq(irq); | |
cc90b958 | 19820 | + |
00e5a55c BS |
19821 | + if (VALID_EVTCHN(evtchn)) |
19822 | + clear_evtchn(evtchn); | |
19823 | +} | |
cc90b958 | 19824 | + |
00e5a55c BS |
19825 | +/* Set an irq's pending state, to avoid blocking on it. */ |
19826 | +void xen_set_irq_pending(int irq) | |
19827 | +{ | |
19828 | + int evtchn = evtchn_from_irq(irq); | |
cc90b958 | 19829 | + |
00e5a55c BS |
19830 | + if (VALID_EVTCHN(evtchn)) |
19831 | + set_evtchn(evtchn); | |
19832 | +} | |
cc90b958 | 19833 | + |
00e5a55c BS |
19834 | +/* Test an irq's pending state. */ |
19835 | +int xen_test_irq_pending(int irq) | |
19836 | +{ | |
19837 | + int evtchn = evtchn_from_irq(irq); | |
cc90b958 | 19838 | + |
00e5a55c BS |
19839 | + return VALID_EVTCHN(evtchn) && test_evtchn(evtchn); |
19840 | +} | |
cc90b958 | 19841 | + |
00e5a55c BS |
19842 | +/* Poll waiting for an irq to become pending. In the usual case, the |
19843 | + irq will be disabled so it won't deliver an interrupt. */ | |
19844 | +void xen_poll_irq(int irq) | |
19845 | +{ | |
19846 | + evtchn_port_t evtchn = evtchn_from_irq(irq); | |
cc90b958 | 19847 | + |
00e5a55c BS |
19848 | + if (VALID_EVTCHN(evtchn) |
19849 | + && HYPERVISOR_poll_no_timeout(&evtchn, 1)) | |
19850 | + BUG(); | |
19851 | +} | |
cc90b958 | 19852 | + |
00e5a55c BS |
19853 | static void restore_cpu_virqs(unsigned int cpu) |
19854 | { | |
19855 | struct evtchn_bind_virq bind_virq; | |
19856 | @@ -1022,8 +1056,8 @@ static void restore_cpu_ipis(unsigned in | |
19857 | bind_evtchn_to_cpu(evtchn, cpu); | |
19858 | ||
19859 | /* Ready for use. */ | |
19860 | - unmask_evtchn(evtchn); | |
19861 | - | |
19862 | + if (!(irq_desc[irq].status & IRQ_DISABLED)) | |
19863 | + unmask_evtchn(evtchn); | |
19864 | } | |
19865 | } | |
19866 | ||
19867 | --- sle11-2009-05-14.orig/drivers/xen/core/hypervisor_sysfs.c 2008-12-15 11:27:22.000000000 +0100 | |
19868 | +++ sle11-2009-05-14/drivers/xen/core/hypervisor_sysfs.c 2009-03-16 16:33:40.000000000 +0100 | |
19869 | @@ -50,7 +50,7 @@ static int __init hypervisor_subsys_init | |
19870 | if (!is_running_on_xen()) | |
19871 | return -ENODEV; | |
19872 | ||
19873 | - hypervisor_subsys.kobj.ktype = &hyp_sysfs_kobj_type; | |
19874 | + hypervisor_kobj->ktype = &hyp_sysfs_kobj_type; | |
19875 | return 0; | |
19876 | } | |
19877 | ||
19878 | --- sle11-2009-05-14.orig/drivers/xen/core/smpboot.c 2009-02-16 16:18:36.000000000 +0100 | |
19879 | +++ sle11-2009-05-14/drivers/xen/core/smpboot.c 2009-03-16 16:33:40.000000000 +0100 | |
19880 | @@ -135,6 +135,10 @@ static int __cpuinit xen_smp_intr_init(u | |
19881 | goto fail; | |
19882 | per_cpu(callfunc_irq, cpu) = rc; | |
19883 | ||
19884 | + rc = xen_spinlock_init(cpu); | |
19885 | + if (rc < 0) | |
19886 | + goto fail; | |
cc90b958 | 19887 | + |
00e5a55c BS |
19888 | if ((cpu != 0) && ((rc = local_setup_timer(cpu)) != 0)) |
19889 | goto fail; | |
19890 | ||
19891 | @@ -145,6 +149,7 @@ static int __cpuinit xen_smp_intr_init(u | |
19892 | unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); | |
19893 | if (per_cpu(callfunc_irq, cpu) >= 0) | |
19894 | unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); | |
19895 | + xen_spinlock_cleanup(cpu); | |
19896 | return rc; | |
19897 | } | |
19898 | ||
19899 | @@ -156,6 +161,7 @@ static void xen_smp_intr_exit(unsigned i | |
19900 | ||
19901 | unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); | |
19902 | unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); | |
19903 | + xen_spinlock_cleanup(cpu); | |
19904 | } | |
19905 | #endif | |
19906 | ||
19907 | @@ -208,36 +214,25 @@ static void __cpuinit cpu_initialize_con | |
19908 | smp_trap_init(ctxt.trap_ctxt); | |
19909 | ||
19910 | ctxt.ldt_ents = 0; | |
19911 | - ctxt.gdt_ents = GDT_SIZE / 8; | |
19912 | - | |
19913 | -#ifdef __i386__ | |
19914 | ctxt.gdt_frames[0] = virt_to_mfn(get_cpu_gdt_table(cpu)); | |
19915 | + ctxt.gdt_ents = GDT_SIZE / 8; | |
19916 | ||
19917 | ctxt.user_regs.cs = __KERNEL_CS; | |
19918 | - ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs); | |
19919 | + ctxt.user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); | |
19920 | ||
19921 | ctxt.kernel_ss = __KERNEL_DS; | |
19922 | - ctxt.kernel_sp = idle->thread.esp0; | |
19923 | + ctxt.kernel_sp = idle->thread.sp0; | |
19924 | ||
19925 | - ctxt.event_callback_cs = __KERNEL_CS; | |
19926 | ctxt.event_callback_eip = (unsigned long)hypervisor_callback; | |
19927 | - ctxt.failsafe_callback_cs = __KERNEL_CS; | |
19928 | ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; | |
19929 | +#ifdef __i386__ | |
19930 | + ctxt.event_callback_cs = __KERNEL_CS; | |
19931 | + ctxt.failsafe_callback_cs = __KERNEL_CS; | |
19932 | ||
19933 | ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); | |
19934 | ||
19935 | ctxt.user_regs.fs = __KERNEL_PERCPU; | |
19936 | #else /* __x86_64__ */ | |
19937 | - ctxt.gdt_frames[0] = virt_to_mfn(cpu_gdt_descr[cpu].address); | |
19938 | - | |
19939 | - ctxt.user_regs.cs = __KERNEL_CS; | |
19940 | - ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); | |
19941 | - | |
19942 | - ctxt.kernel_ss = __KERNEL_DS; | |
19943 | - ctxt.kernel_sp = idle->thread.rsp0; | |
19944 | - | |
19945 | - ctxt.event_callback_eip = (unsigned long)hypervisor_callback; | |
19946 | - ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; | |
19947 | ctxt.syscall_callback_eip = (unsigned long)system_call; | |
19948 | ||
19949 | ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt)); | |
19950 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
19951 | +++ sle11-2009-05-14/drivers/xen/core/spinlock.c 2009-03-16 16:33:40.000000000 +0100 | |
19952 | @@ -0,0 +1,161 @@ | |
19953 | +/* | |
19954 | + * Xen spinlock functions | |
19955 | + * | |
19956 | + * See arch/x86/xen/smp.c for copyright and credits for derived | |
19957 | + * portions of this file. | |
19958 | + */ | |
cc90b958 | 19959 | + |
00e5a55c BS |
19960 | +#include <linux/init.h> |
19961 | +#include <linux/irq.h> | |
19962 | +#include <linux/kernel.h> | |
19963 | +#include <linux/kernel_stat.h> | |
19964 | +#include <linux/module.h> | |
19965 | +#include <xen/evtchn.h> | |
cc90b958 | 19966 | + |
00e5a55c | 19967 | +extern irqreturn_t smp_reschedule_interrupt(int, void *); |
cc90b958 | 19968 | + |
00e5a55c BS |
19969 | +static DEFINE_PER_CPU(int, spinlock_irq) = -1; |
19970 | +static char spinlock_name[NR_CPUS][15]; | |
cc90b958 | 19971 | + |
00e5a55c BS |
19972 | +struct spinning { |
19973 | + raw_spinlock_t *lock; | |
19974 | + unsigned int ticket; | |
19975 | + struct spinning *prev; | |
19976 | +}; | |
19977 | +static DEFINE_PER_CPU(struct spinning *, spinning); | |
cc90b958 | 19978 | +/* |
00e5a55c BS |
19979 | + * Protect removal of objects: Addition can be done lockless, and even |
19980 | + * removal itself doesn't need protection - what needs to be prevented is | |
19981 | + * removed objects going out of scope (as they're allocated on the stack. | |
cc90b958 | 19982 | + */ |
00e5a55c BS |
19983 | +static DEFINE_PER_CPU(raw_rwlock_t, spinning_rm_lock) = __RAW_RW_LOCK_UNLOCKED; |
19984 | + | |
19985 | +int __cpuinit xen_spinlock_init(unsigned int cpu) | |
cc90b958 | 19986 | +{ |
00e5a55c BS |
19987 | + int rc; |
19988 | + | |
19989 | + sprintf(spinlock_name[cpu], "spinlock%u", cpu); | |
19990 | + rc = bind_ipi_to_irqhandler(SPIN_UNLOCK_VECTOR, | |
19991 | + cpu, | |
19992 | + smp_reschedule_interrupt, | |
19993 | + IRQF_DISABLED|IRQF_NOBALANCING, | |
19994 | + spinlock_name[cpu], | |
19995 | + NULL); | |
19996 | + if (rc < 0) | |
19997 | + return rc; | |
19998 | + | |
19999 | + disable_irq(rc); /* make sure it's never delivered */ | |
20000 | + per_cpu(spinlock_irq, cpu) = rc; | |
20001 | + | |
20002 | + return 0; | |
cc90b958 BS |
20003 | +} |
20004 | + | |
00e5a55c | 20005 | +void __cpuinit xen_spinlock_cleanup(unsigned int cpu) |
cc90b958 | 20006 | +{ |
00e5a55c BS |
20007 | + if (per_cpu(spinlock_irq, cpu) >= 0) |
20008 | + unbind_from_irqhandler(per_cpu(spinlock_irq, cpu), NULL); | |
20009 | + per_cpu(spinlock_irq, cpu) = -1; | |
cc90b958 BS |
20010 | +} |
20011 | + | |
00e5a55c BS |
20012 | +int xen_spin_wait(raw_spinlock_t *lock, unsigned int token) |
20013 | +{ | |
20014 | + int rc = 0, irq = __get_cpu_var(spinlock_irq); | |
20015 | + raw_rwlock_t *rm_lock; | |
20016 | + unsigned long flags; | |
20017 | + struct spinning spinning; | |
cc90b958 | 20018 | + |
00e5a55c BS |
20019 | + /* If kicker interrupt not initialized yet, just spin. */ |
20020 | + if (unlikely(irq < 0)) | |
20021 | + return 0; | |
cc90b958 | 20022 | + |
00e5a55c | 20023 | + token >>= TICKET_SHIFT; |
cc90b958 | 20024 | + |
00e5a55c BS |
20025 | + /* announce we're spinning */ |
20026 | + spinning.ticket = token; | |
20027 | + spinning.lock = lock; | |
20028 | + spinning.prev = __get_cpu_var(spinning); | |
20029 | + smp_wmb(); | |
20030 | + __get_cpu_var(spinning) = &spinning; | |
cc90b958 | 20031 | + |
00e5a55c BS |
20032 | + /* clear pending */ |
20033 | + xen_clear_irq_pending(irq); | |
cc90b958 | 20034 | + |
00e5a55c BS |
20035 | + do { |
20036 | + /* Check again to make sure it didn't become free while | |
20037 | + * we weren't looking. */ | |
20038 | + if ((lock->slock & ((1U << TICKET_SHIFT) - 1)) == token) { | |
20039 | + /* If we interrupted another spinlock while it was | |
20040 | + * blocking, make sure it doesn't block (again) | |
20041 | + * without rechecking the lock. */ | |
20042 | + if (spinning.prev) | |
20043 | + xen_set_irq_pending(irq); | |
20044 | + rc = 1; | |
20045 | + break; | |
20046 | + } | |
cc90b958 | 20047 | + |
00e5a55c BS |
20048 | + /* block until irq becomes pending */ |
20049 | + xen_poll_irq(irq); | |
20050 | + } while (!xen_test_irq_pending(irq)); | |
cc90b958 | 20051 | + |
00e5a55c BS |
20052 | + /* Leave the irq pending so that any interrupted blocker will |
20053 | + * re-check. */ | |
20054 | + kstat_this_cpu.irqs[irq] += !rc; | |
cc90b958 | 20055 | + |
00e5a55c BS |
20056 | + /* announce we're done */ |
20057 | + __get_cpu_var(spinning) = spinning.prev; | |
20058 | + rm_lock = &__get_cpu_var(spinning_rm_lock); | |
20059 | + raw_local_irq_save(flags); | |
20060 | + __raw_write_lock(rm_lock); | |
20061 | + __raw_write_unlock(rm_lock); | |
20062 | + raw_local_irq_restore(flags); | |
cc90b958 | 20063 | + |
00e5a55c BS |
20064 | + return rc; |
20065 | +} | |
20066 | +EXPORT_SYMBOL(xen_spin_wait); | |
cc90b958 | 20067 | + |
00e5a55c | 20068 | +unsigned int xen_spin_adjust(raw_spinlock_t *lock, unsigned int token) |
cc90b958 | 20069 | +{ |
00e5a55c | 20070 | + return token;//todo |
cc90b958 | 20071 | +} |
00e5a55c | 20072 | +EXPORT_SYMBOL(xen_spin_adjust); |
cc90b958 | 20073 | + |
00e5a55c BS |
20074 | +int xen_spin_wait_flags(raw_spinlock_t *lock, unsigned int *token, |
20075 | + unsigned int flags) | |
cc90b958 | 20076 | +{ |
00e5a55c | 20077 | + return xen_spin_wait(lock, *token);//todo |
cc90b958 | 20078 | +} |
00e5a55c | 20079 | +EXPORT_SYMBOL(xen_spin_wait_flags); |
cc90b958 | 20080 | + |
00e5a55c BS |
20081 | +void xen_spin_kick(raw_spinlock_t *lock, unsigned int token) |
20082 | +{ | |
20083 | + unsigned int cpu; | |
cc90b958 | 20084 | + |
00e5a55c BS |
20085 | + token &= (1U << TICKET_SHIFT) - 1; |
20086 | + for_each_online_cpu(cpu) { | |
20087 | + raw_rwlock_t *rm_lock; | |
20088 | + unsigned long flags; | |
20089 | + struct spinning *spinning; | |
cc90b958 | 20090 | + |
00e5a55c BS |
20091 | + if (cpu == raw_smp_processor_id()) |
20092 | + continue; | |
cc90b958 | 20093 | + |
00e5a55c BS |
20094 | + rm_lock = &per_cpu(spinning_rm_lock, cpu); |
20095 | + raw_local_irq_save(flags); | |
20096 | + __raw_read_lock(rm_lock); | |
cc90b958 | 20097 | + |
00e5a55c BS |
20098 | + spinning = per_cpu(spinning, cpu); |
20099 | + smp_rmb(); | |
20100 | + if (spinning | |
20101 | + && (spinning->lock != lock || spinning->ticket != token)) | |
20102 | + spinning = NULL; | |
cc90b958 | 20103 | + |
00e5a55c BS |
20104 | + __raw_read_unlock(rm_lock); |
20105 | + raw_local_irq_restore(flags); | |
cc90b958 | 20106 | + |
00e5a55c BS |
20107 | + if (unlikely(spinning)) { |
20108 | + notify_remote_via_irq(per_cpu(spinlock_irq, cpu)); | |
20109 | + return; | |
20110 | + } | |
20111 | + } | |
20112 | +} | |
20113 | +EXPORT_SYMBOL(xen_spin_kick); | |
20114 | --- sle11-2009-05-14.orig/drivers/xen/core/xen_sysfs.c 2008-12-15 11:27:22.000000000 +0100 | |
20115 | +++ sle11-2009-05-14/drivers/xen/core/xen_sysfs.c 2009-03-16 16:33:40.000000000 +0100 | |
20116 | @@ -29,12 +29,12 @@ HYPERVISOR_ATTR_RO(type); | |
cc90b958 | 20117 | |
00e5a55c BS |
20118 | static int __init xen_sysfs_type_init(void) |
20119 | { | |
20120 | - return sysfs_create_file(&hypervisor_subsys.kobj, &type_attr.attr); | |
20121 | + return sysfs_create_file(hypervisor_kobj, &type_attr.attr); | |
cc90b958 | 20122 | } |
cc90b958 | 20123 | |
00e5a55c BS |
20124 | static void xen_sysfs_type_destroy(void) |
20125 | { | |
20126 | - sysfs_remove_file(&hypervisor_subsys.kobj, &type_attr.attr); | |
20127 | + sysfs_remove_file(hypervisor_kobj, &type_attr.attr); | |
20128 | } | |
cc90b958 | 20129 | |
00e5a55c BS |
20130 | /* xen version attributes */ |
20131 | @@ -90,13 +90,12 @@ static struct attribute_group version_gr | |
cc90b958 | 20132 | |
00e5a55c | 20133 | static int __init xen_sysfs_version_init(void) |
cc90b958 | 20134 | { |
00e5a55c BS |
20135 | - return sysfs_create_group(&hypervisor_subsys.kobj, |
20136 | - &version_group); | |
20137 | + return sysfs_create_group(hypervisor_kobj, &version_group); | |
cc90b958 BS |
20138 | } |
20139 | ||
00e5a55c | 20140 | static void xen_sysfs_version_destroy(void) |
cc90b958 | 20141 | { |
00e5a55c BS |
20142 | - sysfs_remove_group(&hypervisor_subsys.kobj, &version_group); |
20143 | + sysfs_remove_group(hypervisor_kobj, &version_group); | |
cc90b958 BS |
20144 | } |
20145 | ||
00e5a55c BS |
20146 | /* UUID */ |
20147 | @@ -126,12 +125,12 @@ HYPERVISOR_ATTR_RO(uuid); | |
cc90b958 | 20148 | |
00e5a55c | 20149 | static int __init xen_sysfs_uuid_init(void) |
cc90b958 | 20150 | { |
00e5a55c BS |
20151 | - return sysfs_create_file(&hypervisor_subsys.kobj, &uuid_attr.attr); |
20152 | + return sysfs_create_file(hypervisor_kobj, &uuid_attr.attr); | |
cc90b958 BS |
20153 | } |
20154 | ||
00e5a55c | 20155 | static void xen_sysfs_uuid_destroy(void) |
cc90b958 | 20156 | { |
00e5a55c BS |
20157 | - sysfs_remove_file(&hypervisor_subsys.kobj, &uuid_attr.attr); |
20158 | + sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr); | |
cc90b958 BS |
20159 | } |
20160 | ||
00e5a55c BS |
20161 | /* xen compilation attributes */ |
20162 | @@ -204,14 +203,12 @@ static struct attribute_group xen_compil | |
cc90b958 | 20163 | |
00e5a55c BS |
20164 | int __init static xen_compilation_init(void) |
20165 | { | |
20166 | - return sysfs_create_group(&hypervisor_subsys.kobj, | |
20167 | - &xen_compilation_group); | |
20168 | + return sysfs_create_group(hypervisor_kobj, &xen_compilation_group); | |
cc90b958 BS |
20169 | } |
20170 | ||
00e5a55c | 20171 | static void xen_compilation_destroy(void) |
cc90b958 | 20172 | { |
00e5a55c BS |
20173 | - sysfs_remove_group(&hypervisor_subsys.kobj, |
20174 | - &xen_compilation_group); | |
20175 | + sysfs_remove_group(hypervisor_kobj, &xen_compilation_group); | |
20176 | } | |
cc90b958 | 20177 | |
00e5a55c BS |
20178 | /* xen properties info */ |
20179 | @@ -325,14 +322,12 @@ static struct attribute_group xen_proper | |
cc90b958 | 20180 | |
00e5a55c BS |
20181 | static int __init xen_properties_init(void) |
20182 | { | |
20183 | - return sysfs_create_group(&hypervisor_subsys.kobj, | |
20184 | - &xen_properties_group); | |
20185 | + return sysfs_create_group(hypervisor_kobj, &xen_properties_group); | |
20186 | } | |
cc90b958 | 20187 | |
00e5a55c BS |
20188 | static void xen_properties_destroy(void) |
20189 | { | |
20190 | - sysfs_remove_group(&hypervisor_subsys.kobj, | |
20191 | - &xen_properties_group); | |
20192 | + sysfs_remove_group(hypervisor_kobj, &xen_properties_group); | |
20193 | } | |
cc90b958 | 20194 | |
00e5a55c BS |
20195 | #ifdef CONFIG_KEXEC |
20196 | @@ -350,13 +345,12 @@ HYPERVISOR_ATTR_RO(vmcoreinfo); | |
cc90b958 | 20197 | |
00e5a55c BS |
20198 | static int __init xen_sysfs_vmcoreinfo_init(void) |
20199 | { | |
20200 | - return sysfs_create_file(&hypervisor_subsys.kobj, | |
20201 | - &vmcoreinfo_attr.attr); | |
20202 | + return sysfs_create_file(hypervisor_kobj, &vmcoreinfo_attr.attr); | |
20203 | } | |
cc90b958 | 20204 | |
00e5a55c BS |
20205 | static void xen_sysfs_vmcoreinfo_destroy(void) |
20206 | { | |
20207 | - sysfs_remove_file(&hypervisor_subsys.kobj, &vmcoreinfo_attr.attr); | |
20208 | + sysfs_remove_file(hypervisor_kobj, &vmcoreinfo_attr.attr); | |
20209 | } | |
cc90b958 | 20210 | |
cc90b958 | 20211 | #endif |
00e5a55c BS |
20212 | --- sle11-2009-05-14.orig/drivers/xen/gntdev/gntdev.c 2009-03-04 11:28:34.000000000 +0100 |
20213 | +++ sle11-2009-05-14/drivers/xen/gntdev/gntdev.c 2009-03-16 16:33:40.000000000 +0100 | |
20214 | @@ -782,7 +782,7 @@ static pte_t gntdev_clear_pte(struct vm_ | |
20215 | op.status); | |
20216 | } else { | |
20217 | /* USING SHADOW PAGE TABLES. */ | |
20218 | - copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm); | |
20219 | + copy = xen_ptep_get_and_clear_full(vma, addr, ptep, is_fullmm); | |
20220 | } | |
cc90b958 | 20221 | |
00e5a55c BS |
20222 | /* Finally, we unmap the grant from kernel space. */ |
20223 | @@ -810,7 +810,7 @@ static pte_t gntdev_clear_pte(struct vm_ | |
20224 | >> PAGE_SHIFT, INVALID_P2M_ENTRY); | |
cc90b958 | 20225 | |
00e5a55c BS |
20226 | } else { |
20227 | - copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm); | |
20228 | + copy = xen_ptep_get_and_clear_full(vma, addr, ptep, is_fullmm); | |
20229 | } | |
cc90b958 | 20230 | |
00e5a55c BS |
20231 | return copy; |
20232 | --- sle11-2009-05-14.orig/drivers/xen/scsifront/scsifront.c 2009-02-16 16:18:36.000000000 +0100 | |
20233 | +++ sle11-2009-05-14/drivers/xen/scsifront/scsifront.c 2009-03-16 16:33:40.000000000 +0100 | |
20234 | @@ -260,19 +260,19 @@ static int map_data_for_request(struct v | |
20235 | return -ENOMEM; | |
20236 | } | |
cc90b958 | 20237 | |
00e5a55c BS |
20238 | - if (sc->use_sg) { |
20239 | + if (scsi_bufflen(sc)) { | |
20240 | /* quoted scsi_lib.c/scsi_req_map_sg . */ | |
20241 | - struct scatterlist *sg, *sgl = (struct scatterlist *)sc->request_buffer; | |
20242 | - unsigned int data_len = sc->request_bufflen; | |
20243 | + struct scatterlist *sg, *sgl = scsi_sglist(sc); | |
20244 | + unsigned int data_len = scsi_bufflen(sc); | |
cc90b958 | 20245 | |
00e5a55c BS |
20246 | - nr_pages = (sc->request_bufflen + sgl->offset + PAGE_SIZE - 1) >> PAGE_SHIFT; |
20247 | + nr_pages = (data_len + sgl->offset + PAGE_SIZE - 1) >> PAGE_SHIFT; | |
20248 | if (nr_pages > VSCSIIF_SG_TABLESIZE) { | |
20249 | printk(KERN_ERR "scsifront: Unable to map request_buffer for command!\n"); | |
20250 | ref_cnt = (-E2BIG); | |
20251 | goto big_to_sg; | |
20252 | } | |
cc90b958 | 20253 | |
00e5a55c BS |
20254 | - for_each_sg (sgl, sg, sc->use_sg, i) { |
20255 | + for_each_sg (sgl, sg, scsi_sg_count(sc), i) { | |
20256 | page = sg_page(sg); | |
20257 | off = sg->offset; | |
20258 | len = sg->length; | |
20259 | @@ -306,45 +306,6 @@ static int map_data_for_request(struct v | |
20260 | ref_cnt++; | |
20261 | } | |
20262 | } | |
20263 | - } else if (sc->request_bufflen) { | |
20264 | - unsigned long end = ((unsigned long)sc->request_buffer | |
20265 | - + sc->request_bufflen + PAGE_SIZE - 1) >> PAGE_SHIFT; | |
20266 | - unsigned long start = (unsigned long)sc->request_buffer >> PAGE_SHIFT; | |
20267 | - | |
20268 | - page = virt_to_page(sc->request_buffer); | |
20269 | - nr_pages = end - start; | |
20270 | - len = sc->request_bufflen; | |
20271 | - | |
20272 | - if (nr_pages > VSCSIIF_SG_TABLESIZE) { | |
20273 | - ref_cnt = (-E2BIG); | |
20274 | - goto big_to_sg; | |
20275 | - } | |
20276 | - | |
20277 | - buffer_pfn = page_to_phys(page) >> PAGE_SHIFT; | |
20278 | - | |
20279 | - off = offset_in_page((unsigned long)sc->request_buffer); | |
20280 | - for (i = 0; i < nr_pages; i++) { | |
20281 | - bytes = PAGE_SIZE - off; | |
20282 | - | |
20283 | - if (bytes > len) | |
20284 | - bytes = len; | |
20285 | - | |
20286 | - ref = gnttab_claim_grant_reference(&gref_head); | |
20287 | - BUG_ON(ref == -ENOSPC); | |
20288 | - | |
20289 | - gnttab_grant_foreign_access_ref(ref, info->dev->otherend_id, | |
20290 | - buffer_pfn, write); | |
cc90b958 | 20291 | - |
00e5a55c BS |
20292 | - info->shadow[id].gref[i] = ref; |
20293 | - ring_req->seg[i].gref = ref; | |
20294 | - ring_req->seg[i].offset = (uint16_t)off; | |
20295 | - ring_req->seg[i].length = (uint16_t)bytes; | |
20296 | - | |
20297 | - buffer_pfn++; | |
20298 | - len -= bytes; | |
20299 | - off = 0; | |
20300 | - ref_cnt++; | |
20301 | - } | |
20302 | } | |
cc90b958 | 20303 | |
00e5a55c BS |
20304 | big_to_sg: |
20305 | --- sle11-2009-05-14.orig/drivers/xen/xenoprof/xenoprofile.c 2009-03-11 15:39:38.000000000 +0100 | |
20306 | +++ sle11-2009-05-14/drivers/xen/xenoprof/xenoprofile.c 2009-03-16 16:33:40.000000000 +0100 | |
20307 | @@ -78,7 +78,7 @@ static int xenoprof_resume(struct sys_de | |
cc90b958 | 20308 | |
00e5a55c BS |
20309 | |
20310 | static struct sysdev_class oprofile_sysclass = { | |
20311 | - set_kset_name("oprofile"), | |
20312 | + .name = "oprofile", | |
20313 | .resume = xenoprof_resume, | |
20314 | .suspend = xenoprof_suspend | |
20315 | }; | |
20316 | --- sle11-2009-05-14.orig/include/asm-x86/e820.h 2009-05-14 10:56:29.000000000 +0200 | |
20317 | +++ sle11-2009-05-14/include/asm-x86/e820.h 2009-03-16 16:33:40.000000000 +0100 | |
20318 | @@ -127,7 +127,11 @@ extern char *memory_setup(void); | |
20319 | #endif /* __KERNEL__ */ | |
20320 | #endif /* __ASSEMBLY__ */ | |
20321 | ||
20322 | +#ifndef CONFIG_XEN | |
20323 | #define ISA_START_ADDRESS 0xa0000 | |
20324 | +#else | |
20325 | +#define ISA_START_ADDRESS 0 | |
cc90b958 | 20326 | +#endif |
00e5a55c BS |
20327 | #define ISA_END_ADDRESS 0x100000 |
20328 | #define is_ISA_range(s, e) ((s) >= ISA_START_ADDRESS && (e) < ISA_END_ADDRESS) | |
cc90b958 | 20329 | |
00e5a55c BS |
20330 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/agp.h 2009-02-16 16:18:36.000000000 +0100 |
20331 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/agp.h 2009-03-16 16:33:40.000000000 +0100 | |
20332 | @@ -13,18 +13,13 @@ | |
20333 | * page. This avoids data corruption on some CPUs. | |
20334 | */ | |
cc90b958 | 20335 | |
00e5a55c BS |
20336 | -/* |
20337 | - * Caller's responsibility to call global_flush_tlb() for performance | |
20338 | - * reasons | |
20339 | - */ | |
20340 | #define map_page_into_agp(page) ( \ | |
20341 | xen_create_contiguous_region((unsigned long)page_address(page), 0, 32) \ | |
20342 | - ?: change_page_attr(page, 1, PAGE_KERNEL_NOCACHE)) | |
20343 | + ?: set_pages_uc(page, 1)) | |
20344 | #define unmap_page_from_agp(page) ( \ | |
20345 | xen_destroy_contiguous_region((unsigned long)page_address(page), 0), \ | |
20346 | /* only a fallback: xen_destroy_contiguous_region uses PAGE_KERNEL */ \ | |
20347 | - change_page_attr(page, 1, PAGE_KERNEL)) | |
20348 | -#define flush_agp_mappings() global_flush_tlb() | |
20349 | + set_pages_wb(page, 1)) | |
20350 | ||
20351 | /* | |
20352 | * Could use CLFLUSH here if the cpu supports it. But then it would | |
20353 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/desc.h 2009-02-16 16:18:36.000000000 +0100 | |
20354 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/desc.h 2009-03-16 16:33:40.000000000 +0100 | |
20355 | @@ -1,5 +1,404 @@ | |
20356 | +#ifndef _ASM_DESC_H_ | |
20357 | +#define _ASM_DESC_H_ | |
cc90b958 | 20358 | + |
00e5a55c BS |
20359 | +#ifndef __ASSEMBLY__ |
20360 | +#include <asm/desc_defs.h> | |
20361 | +#include <asm/ldt.h> | |
20362 | +#include <asm/mmu.h> | |
20363 | +#include <linux/smp.h> | |
cc90b958 | 20364 | + |
00e5a55c BS |
20365 | +static inline void fill_ldt(struct desc_struct *desc, |
20366 | + const struct user_desc *info) | |
20367 | +{ | |
20368 | + desc->limit0 = info->limit & 0x0ffff; | |
20369 | + desc->base0 = info->base_addr & 0x0000ffff; | |
cc90b958 | 20370 | + |
00e5a55c BS |
20371 | + desc->base1 = (info->base_addr & 0x00ff0000) >> 16; |
20372 | + desc->type = (info->read_exec_only ^ 1) << 1; | |
20373 | + desc->type |= info->contents << 2; | |
20374 | + desc->s = 1; | |
20375 | + desc->dpl = 0x3; | |
20376 | + desc->p = info->seg_not_present ^ 1; | |
20377 | + desc->limit = (info->limit & 0xf0000) >> 16; | |
20378 | + desc->avl = info->useable; | |
20379 | + desc->d = info->seg_32bit; | |
20380 | + desc->g = info->limit_in_pages; | |
20381 | + desc->base2 = (info->base_addr & 0xff000000) >> 24; | |
20382 | +} | |
cc90b958 | 20383 | + |
00e5a55c BS |
20384 | +#ifndef CONFIG_X86_NO_IDT |
20385 | +extern struct desc_ptr idt_descr; | |
20386 | +extern gate_desc idt_table[]; | |
20387 | +#endif | |
cc90b958 | 20388 | + |
00e5a55c BS |
20389 | +#ifdef CONFIG_X86_64 |
20390 | +extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; | |
20391 | +extern struct desc_ptr cpu_gdt_descr[]; | |
20392 | +/* the cpu gdt accessor */ | |
20393 | +#define get_cpu_gdt_table(x) ((struct desc_struct *)cpu_gdt_descr[x].address) | |
cc90b958 | 20394 | + |
00e5a55c BS |
20395 | +static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func, |
20396 | + unsigned dpl, unsigned ist, unsigned seg) | |
20397 | +{ | |
20398 | + gate->offset_low = PTR_LOW(func); | |
20399 | + gate->segment = __KERNEL_CS; | |
20400 | + gate->ist = ist; | |
20401 | + gate->p = 1; | |
20402 | + gate->dpl = dpl; | |
20403 | + gate->zero0 = 0; | |
20404 | + gate->zero1 = 0; | |
20405 | + gate->type = type; | |
20406 | + gate->offset_middle = PTR_MIDDLE(func); | |
20407 | + gate->offset_high = PTR_HIGH(func); | |
20408 | +} | |
cc90b958 | 20409 | + |
00e5a55c BS |
20410 | +#else |
20411 | +struct gdt_page { | |
20412 | + struct desc_struct gdt[GDT_ENTRIES]; | |
20413 | +} __attribute__((aligned(PAGE_SIZE))); | |
20414 | +DECLARE_PER_CPU(struct gdt_page, gdt_page); | |
cc90b958 | 20415 | + |
00e5a55c BS |
20416 | +static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) |
20417 | +{ | |
20418 | + return per_cpu(gdt_page, cpu).gdt; | |
20419 | +} | |
cc90b958 | 20420 | + |
00e5a55c BS |
20421 | +static inline void pack_gate(gate_desc *gate, unsigned char type, |
20422 | + unsigned long base, unsigned dpl, unsigned flags, unsigned short seg) | |
cc90b958 | 20423 | + |
00e5a55c BS |
20424 | +{ |
20425 | + gate->a = (seg << 16) | (base & 0xffff); | |
20426 | + gate->b = (base & 0xffff0000) | | |
20427 | + (((0x80 | type | (dpl << 5)) & 0xff) << 8); | |
20428 | +} | |
cc90b958 | 20429 | + |
00e5a55c | 20430 | +#endif |
cc90b958 | 20431 | + |
00e5a55c BS |
20432 | +static inline int desc_empty(const void *ptr) |
20433 | +{ | |
20434 | + const u32 *desc = ptr; | |
20435 | + return !(desc[0] | desc[1]); | |
20436 | +} | |
cc90b958 | 20437 | + |
00e5a55c BS |
20438 | +#ifndef CONFIG_XEN |
20439 | +#define load_TR_desc() native_load_tr_desc() | |
20440 | +#define load_gdt(dtr) native_load_gdt(dtr) | |
20441 | +#define load_idt(dtr) native_load_idt(dtr) | |
20442 | +#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr)) | |
20443 | +#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt)) | |
cc90b958 | 20444 | + |
00e5a55c BS |
20445 | +#define store_gdt(dtr) native_store_gdt(dtr) |
20446 | +#define store_idt(dtr) native_store_idt(dtr) | |
20447 | +#define store_tr(tr) (tr = native_store_tr()) | |
20448 | +#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt)) | |
cc90b958 | 20449 | + |
00e5a55c BS |
20450 | +#define load_TLS(t, cpu) native_load_tls(t, cpu) |
20451 | +#define set_ldt native_set_ldt | |
cc90b958 | 20452 | + |
00e5a55c BS |
20453 | +#define write_ldt_entry(dt, entry, desc) \ |
20454 | + native_write_ldt_entry(dt, entry, desc) | |
20455 | +#define write_gdt_entry(dt, entry, desc, type) \ | |
20456 | + native_write_gdt_entry(dt, entry, desc, type) | |
20457 | +#define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g) | |
cc90b958 | 20458 | + |
00e5a55c BS |
20459 | +static inline void native_write_idt_entry(gate_desc *idt, int entry, |
20460 | + const gate_desc *gate) | |
cc90b958 | 20461 | +{ |
00e5a55c | 20462 | + memcpy(&idt[entry], gate, sizeof(*gate)); |
cc90b958 BS |
20463 | +} |
20464 | + | |
00e5a55c BS |
20465 | +static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, |
20466 | + const void *desc) | |
cc90b958 | 20467 | +{ |
00e5a55c | 20468 | + memcpy(&ldt[entry], desc, 8); |
cc90b958 BS |
20469 | +} |
20470 | + | |
00e5a55c BS |
20471 | +static inline void native_write_gdt_entry(struct desc_struct *gdt, int entry, |
20472 | + const void *desc, int type) | |
20473 | +{ | |
20474 | + unsigned int size; | |
20475 | + switch (type) { | |
20476 | + case DESC_TSS: | |
20477 | + size = sizeof(tss_desc); | |
20478 | + break; | |
20479 | + case DESC_LDT: | |
20480 | + size = sizeof(ldt_desc); | |
20481 | + break; | |
20482 | + default: | |
20483 | + size = sizeof(struct desc_struct); | |
20484 | + break; | |
20485 | + } | |
20486 | + memcpy(&gdt[entry], desc, size); | |
20487 | +} | |
20488 | +#endif | |
cc90b958 | 20489 | + |
00e5a55c BS |
20490 | +static inline void pack_descriptor(struct desc_struct *desc, unsigned long base, |
20491 | + unsigned long limit, unsigned char type, | |
20492 | + unsigned char flags) | |
cc90b958 | 20493 | +{ |
00e5a55c BS |
20494 | + desc->a = ((base & 0xffff) << 16) | (limit & 0xffff); |
20495 | + desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) | | |
20496 | + (limit & 0x000f0000) | ((type & 0xff) << 8) | | |
20497 | + ((flags & 0xf) << 20); | |
20498 | + desc->p = 1; | |
cc90b958 BS |
20499 | +} |
20500 | + | |
00e5a55c BS |
20501 | + |
20502 | +#ifndef CONFIG_XEN | |
20503 | +static inline void set_tssldt_descriptor(void *d, unsigned long addr, | |
20504 | + unsigned type, unsigned size) | |
cc90b958 | 20505 | +{ |
00e5a55c BS |
20506 | +#ifdef CONFIG_X86_64 |
20507 | + struct ldttss_desc64 *desc = d; | |
20508 | + memset(desc, 0, sizeof(*desc)); | |
20509 | + desc->limit0 = size & 0xFFFF; | |
20510 | + desc->base0 = PTR_LOW(addr); | |
20511 | + desc->base1 = PTR_MIDDLE(addr) & 0xFF; | |
20512 | + desc->type = type; | |
20513 | + desc->p = 1; | |
20514 | + desc->limit1 = (size >> 16) & 0xF; | |
20515 | + desc->base2 = (PTR_MIDDLE(addr) >> 8) & 0xFF; | |
20516 | + desc->base3 = PTR_HIGH(addr); | |
cc90b958 | 20517 | +#else |
00e5a55c BS |
20518 | + |
20519 | + pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0); | |
cc90b958 | 20520 | +#endif |
cc90b958 BS |
20521 | +} |
20522 | + | |
00e5a55c | 20523 | +static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr) |
cc90b958 | 20524 | +{ |
00e5a55c BS |
20525 | + struct desc_struct *d = get_cpu_gdt_table(cpu); |
20526 | + tss_desc tss; | |
20527 | + | |
20528 | + /* | |
20529 | + * sizeof(unsigned long) coming from an extra "long" at the end | |
20530 | + * of the iobitmap. See tss_struct definition in processor.h | |
20531 | + * | |
20532 | + * -1? seg base+limit should be pointing to the address of the | |
20533 | + * last valid byte | |
20534 | + */ | |
20535 | + set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS, | |
20536 | + IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1); | |
20537 | + write_gdt_entry(d, entry, &tss, DESC_TSS); | |
cc90b958 BS |
20538 | +} |
20539 | + | |
00e5a55c BS |
20540 | +#define set_tss_desc(cpu, addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) |
20541 | + | |
20542 | +static inline void native_set_ldt(const void *addr, unsigned int entries) | |
cc90b958 | 20543 | +{ |
00e5a55c BS |
20544 | + if (likely(entries == 0)) |
20545 | + __asm__ __volatile__("lldt %w0"::"q" (0)); | |
20546 | + else { | |
20547 | + unsigned cpu = smp_processor_id(); | |
20548 | + ldt_desc ldt; | |
20549 | + | |
20550 | + set_tssldt_descriptor(&ldt, (unsigned long)addr, | |
20551 | + DESC_LDT, entries * sizeof(ldt) - 1); | |
20552 | + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, | |
20553 | + &ldt, DESC_LDT); | |
20554 | + __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); | |
20555 | + } | |
cc90b958 | 20556 | +} |
cc90b958 | 20557 | + |
00e5a55c | 20558 | +static inline void native_load_tr_desc(void) |
cc90b958 | 20559 | +{ |
00e5a55c | 20560 | + asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); |
cc90b958 | 20561 | +} |
cc90b958 | 20562 | + |
00e5a55c | 20563 | +static inline void native_load_gdt(const struct desc_ptr *dtr) |
cc90b958 | 20564 | +{ |
00e5a55c | 20565 | + asm volatile("lgdt %0"::"m" (*dtr)); |
cc90b958 BS |
20566 | +} |
20567 | + | |
00e5a55c | 20568 | +static inline void native_load_idt(const struct desc_ptr *dtr) |
cc90b958 | 20569 | +{ |
00e5a55c | 20570 | + asm volatile("lidt %0"::"m" (*dtr)); |
cc90b958 | 20571 | +} |
cc90b958 | 20572 | + |
00e5a55c | 20573 | +static inline void native_store_gdt(struct desc_ptr *dtr) |
cc90b958 | 20574 | +{ |
00e5a55c | 20575 | + asm volatile("sgdt %0":"=m" (*dtr)); |
cc90b958 | 20576 | +} |
cc90b958 | 20577 | + |
00e5a55c | 20578 | +static inline void native_store_idt(struct desc_ptr *dtr) |
cc90b958 | 20579 | +{ |
00e5a55c | 20580 | + asm volatile("sidt %0":"=m" (*dtr)); |
cc90b958 BS |
20581 | +} |
20582 | + | |
00e5a55c | 20583 | +static inline unsigned long native_store_tr(void) |
cc90b958 | 20584 | +{ |
00e5a55c BS |
20585 | + unsigned long tr; |
20586 | + asm volatile("str %0":"=r" (tr)); | |
20587 | + return tr; | |
cc90b958 BS |
20588 | +} |
20589 | + | |
00e5a55c | 20590 | +static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) |
cc90b958 | 20591 | +{ |
00e5a55c BS |
20592 | + unsigned int i; |
20593 | + struct desc_struct *gdt = get_cpu_gdt_table(cpu); | |
cc90b958 | 20594 | + |
00e5a55c BS |
20595 | + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) |
20596 | + gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; | |
cc90b958 | 20597 | +} |
00e5a55c BS |
20598 | +#else |
20599 | +#define load_TLS(t, cpu) xen_load_tls(t, cpu) | |
20600 | +#define set_ldt xen_set_ldt | |
20601 | + | |
20602 | +extern int write_ldt_entry(struct desc_struct *ldt, int entry, | |
20603 | + const void *desc); | |
20604 | +extern int write_gdt_entry(struct desc_struct *gdt, int entry, | |
20605 | + const void *desc, int type); | |
20606 | + | |
20607 | +static inline void xen_load_tls(struct thread_struct *t, unsigned int cpu) | |
cc90b958 | 20608 | +{ |
00e5a55c BS |
20609 | + unsigned int i; |
20610 | + struct desc_struct *gdt = get_cpu_gdt_table(cpu) + GDT_ENTRY_TLS_MIN; | |
20611 | + | |
20612 | + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) | |
20613 | + if (HYPERVISOR_update_descriptor(virt_to_machine(&gdt[i]), | |
20614 | + *(u64 *)&t->tls_array[i])) | |
20615 | + BUG(); | |
cc90b958 | 20616 | +} |
00e5a55c | 20617 | +#endif |
cc90b958 | 20618 | + |
00e5a55c BS |
20619 | +#define _LDT_empty(info) (\ |
20620 | + (info)->base_addr == 0 && \ | |
20621 | + (info)->limit == 0 && \ | |
20622 | + (info)->contents == 0 && \ | |
20623 | + (info)->read_exec_only == 1 && \ | |
20624 | + (info)->seg_32bit == 0 && \ | |
20625 | + (info)->limit_in_pages == 0 && \ | |
20626 | + (info)->seg_not_present == 1 && \ | |
20627 | + (info)->useable == 0) | |
20628 | + | |
20629 | +#ifdef CONFIG_X86_64 | |
20630 | +#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0)) | |
20631 | +#else | |
20632 | +#define LDT_empty(info) (_LDT_empty(info)) | |
20633 | +#endif | |
20634 | + | |
20635 | +static inline void clear_LDT(void) | |
cc90b958 | 20636 | +{ |
00e5a55c BS |
20637 | + set_ldt(NULL, 0); |
20638 | +} | |
cc90b958 | 20639 | + |
00e5a55c BS |
20640 | +/* |
20641 | + * load one particular LDT into the current CPU | |
20642 | + */ | |
20643 | +static inline void load_LDT_nolock(mm_context_t *pc) | |
20644 | +{ | |
20645 | + set_ldt(pc->ldt, pc->size); | |
cc90b958 | 20646 | +} |
00e5a55c BS |
20647 | + |
20648 | +static inline void load_LDT(mm_context_t *pc) | |
cc90b958 | 20649 | +{ |
00e5a55c BS |
20650 | + preempt_disable(); |
20651 | + load_LDT_nolock(pc); | |
20652 | + preempt_enable(); | |
cc90b958 BS |
20653 | +} |
20654 | + | |
00e5a55c BS |
20655 | +static inline unsigned long get_desc_base(const struct desc_struct *desc) |
20656 | +{ | |
20657 | + return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24); | |
20658 | +} | |
cc90b958 | 20659 | + |
00e5a55c | 20660 | +static inline unsigned long get_desc_limit(const struct desc_struct *desc) |
cc90b958 | 20661 | +{ |
00e5a55c | 20662 | + return desc->limit0 | (desc->limit << 16); |
cc90b958 BS |
20663 | +} |
20664 | + | |
00e5a55c BS |
20665 | +#ifndef CONFIG_X86_NO_IDT |
20666 | +static inline void _set_gate(int gate, unsigned type, void *addr, | |
20667 | + unsigned dpl, unsigned ist, unsigned seg) | |
20668 | +{ | |
20669 | + gate_desc s; | |
20670 | + pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg); | |
20671 | + /* | |
20672 | + * does not need to be atomic because it is only done once at | |
20673 | + * setup time | |
20674 | + */ | |
20675 | + write_idt_entry(idt_table, gate, &s); | |
20676 | +} | |
cc90b958 | 20677 | + |
00e5a55c BS |
20678 | +/* |
20679 | + * This needs to use 'idt_table' rather than 'idt', and | |
20680 | + * thus use the _nonmapped_ version of the IDT, as the | |
20681 | + * Pentium F0 0F bugfix can have resulted in the mapped | |
20682 | + * IDT being write-protected. | |
20683 | + */ | |
20684 | +static inline void set_intr_gate(unsigned int n, void *addr) | |
cc90b958 | 20685 | +{ |
00e5a55c BS |
20686 | + BUG_ON((unsigned)n > 0xFF); |
20687 | + _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); | |
20688 | +} | |
cc90b958 | 20689 | + |
00e5a55c BS |
20690 | +/* |
20691 | + * This routine sets up an interrupt gate at directory privilege level 3. | |
20692 | + */ | |
20693 | +static inline void set_system_intr_gate(unsigned int n, void *addr) | |
20694 | +{ | |
20695 | + BUG_ON((unsigned)n > 0xFF); | |
20696 | + _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS); | |
20697 | +} | |
cc90b958 | 20698 | + |
00e5a55c BS |
20699 | +static inline void set_trap_gate(unsigned int n, void *addr) |
20700 | +{ | |
20701 | + BUG_ON((unsigned)n > 0xFF); | |
20702 | + _set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS); | |
20703 | +} | |
cc90b958 | 20704 | + |
00e5a55c BS |
20705 | +static inline void set_system_gate(unsigned int n, void *addr) |
20706 | +{ | |
20707 | + BUG_ON((unsigned)n > 0xFF); | |
20708 | #ifdef CONFIG_X86_32 | |
20709 | -# include "desc_32.h" | |
20710 | + _set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS); | |
20711 | +#else | |
20712 | + _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS); | |
20713 | +#endif | |
cc90b958 | 20714 | +} |
cc90b958 | 20715 | + |
00e5a55c BS |
20716 | +static inline void set_task_gate(unsigned int n, unsigned int gdt_entry) |
20717 | +{ | |
20718 | + BUG_ON((unsigned)n > 0xFF); | |
20719 | + _set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry<<3)); | |
20720 | +} | |
cc90b958 | 20721 | + |
00e5a55c | 20722 | +static inline void set_intr_gate_ist(int n, void *addr, unsigned ist) |
cc90b958 | 20723 | +{ |
00e5a55c BS |
20724 | + BUG_ON((unsigned)n > 0xFF); |
20725 | + _set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS); | |
cc90b958 BS |
20726 | +} |
20727 | + | |
00e5a55c BS |
20728 | +static inline void set_system_gate_ist(int n, void *addr, unsigned ist) |
20729 | +{ | |
20730 | + BUG_ON((unsigned)n > 0xFF); | |
20731 | + _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); | |
20732 | +} | |
20733 | +#endif | |
cc90b958 | 20734 | + |
cc90b958 | 20735 | #else |
00e5a55c BS |
20736 | -# include "desc_64.h" |
20737 | +/* | |
20738 | + * GET_DESC_BASE reads the descriptor base of the specified segment. | |
20739 | + * | |
20740 | + * Args: | |
20741 | + * idx - descriptor index | |
20742 | + * gdt - GDT pointer | |
20743 | + * base - 32bit register to which the base will be written | |
20744 | + * lo_w - lo word of the "base" register | |
20745 | + * lo_b - lo byte of the "base" register | |
20746 | + * hi_b - hi byte of the low word of the "base" register | |
20747 | + * | |
20748 | + * Example: | |
20749 | + * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) | |
20750 | + * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax. | |
20751 | + */ | |
20752 | +#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \ | |
20753 | + movb idx*8+4(gdt), lo_b; \ | |
20754 | + movb idx*8+7(gdt), hi_b; \ | |
20755 | + shll $16, base; \ | |
20756 | + movw idx*8+2(gdt), lo_w; | |
20757 | + | |
20758 | + | |
20759 | +#endif /* __ASSEMBLY__ */ | |
20760 | + | |
20761 | #endif | |
20762 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/desc_32.h 2008-12-15 11:27:22.000000000 +0100 | |
20763 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
20764 | @@ -1,262 +0,0 @@ | |
20765 | -#ifndef __ARCH_DESC_H | |
20766 | -#define __ARCH_DESC_H | |
cc90b958 | 20767 | - |
00e5a55c BS |
20768 | -#include <asm/ldt.h> |
20769 | -#include <asm/segment.h> | |
cc90b958 | 20770 | - |
00e5a55c | 20771 | -#ifndef __ASSEMBLY__ |
cc90b958 | 20772 | - |
00e5a55c BS |
20773 | -#include <linux/preempt.h> |
20774 | -#include <linux/smp.h> | |
cc90b958 | 20775 | - |
00e5a55c | 20776 | -#include <asm/mmu.h> |
cc90b958 | 20777 | - |
00e5a55c BS |
20778 | -struct Xgt_desc_struct { |
20779 | - unsigned short size; | |
20780 | - unsigned long address __attribute__((packed)); | |
20781 | - unsigned short pad; | |
20782 | -} __attribute__ ((packed)); | |
cc90b958 | 20783 | - |
00e5a55c BS |
20784 | -struct gdt_page |
20785 | -{ | |
20786 | - struct desc_struct gdt[GDT_ENTRIES]; | |
20787 | -} __attribute__((aligned(PAGE_SIZE))); | |
20788 | -DECLARE_PER_CPU(struct gdt_page, gdt_page); | |
cc90b958 | 20789 | - |
00e5a55c | 20790 | -static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) |
cc90b958 | 20791 | -{ |
00e5a55c | 20792 | - return per_cpu(gdt_page, cpu).gdt; |
cc90b958 BS |
20793 | -} |
20794 | - | |
00e5a55c BS |
20795 | -extern struct Xgt_desc_struct idt_descr; |
20796 | -extern struct desc_struct idt_table[]; | |
20797 | -extern void set_intr_gate(unsigned int irq, void * addr); | |
cc90b958 | 20798 | - |
00e5a55c BS |
20799 | -static inline void pack_descriptor(__u32 *a, __u32 *b, |
20800 | - unsigned long base, unsigned long limit, unsigned char type, unsigned char flags) | |
20801 | -{ | |
20802 | - *a = ((base & 0xffff) << 16) | (limit & 0xffff); | |
20803 | - *b = (base & 0xff000000) | ((base & 0xff0000) >> 16) | | |
20804 | - (limit & 0x000f0000) | ((type & 0xff) << 8) | ((flags & 0xf) << 20); | |
20805 | -} | |
cc90b958 | 20806 | - |
00e5a55c BS |
20807 | -static inline void pack_gate(__u32 *a, __u32 *b, |
20808 | - unsigned long base, unsigned short seg, unsigned char type, unsigned char flags) | |
cc90b958 | 20809 | -{ |
00e5a55c BS |
20810 | - *a = (seg << 16) | (base & 0xffff); |
20811 | - *b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff); | |
cc90b958 BS |
20812 | -} |
20813 | - | |
00e5a55c BS |
20814 | -#define DESCTYPE_LDT 0x82 /* present, system, DPL-0, LDT */ |
20815 | -#define DESCTYPE_TSS 0x89 /* present, system, DPL-0, 32-bit TSS */ | |
20816 | -#define DESCTYPE_TASK 0x85 /* present, system, DPL-0, task gate */ | |
20817 | -#define DESCTYPE_INT 0x8e /* present, system, DPL-0, interrupt gate */ | |
20818 | -#define DESCTYPE_TRAP 0x8f /* present, system, DPL-0, trap gate */ | |
20819 | -#define DESCTYPE_DPL3 0x60 /* DPL-3 */ | |
20820 | -#define DESCTYPE_S 0x10 /* !system */ | |
cc90b958 | 20821 | - |
00e5a55c BS |
20822 | -#ifndef CONFIG_XEN |
20823 | -#define load_TR_desc() native_load_tr_desc() | |
20824 | -#define load_gdt(dtr) native_load_gdt(dtr) | |
20825 | -#define load_idt(dtr) native_load_idt(dtr) | |
20826 | -#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr)) | |
20827 | -#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt)) | |
20828 | - | |
20829 | -#define store_gdt(dtr) native_store_gdt(dtr) | |
20830 | -#define store_idt(dtr) native_store_idt(dtr) | |
20831 | -#define store_tr(tr) (tr = native_store_tr()) | |
20832 | -#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt)) | |
20833 | - | |
20834 | -#define load_TLS(t, cpu) native_load_tls(t, cpu) | |
20835 | -#define set_ldt native_set_ldt | |
20836 | - | |
20837 | -#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) | |
20838 | -#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) | |
20839 | -#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) | |
20840 | - | |
20841 | -static inline void write_dt_entry(struct desc_struct *dt, | |
20842 | - int entry, u32 entry_low, u32 entry_high) | |
cc90b958 | 20843 | -{ |
00e5a55c BS |
20844 | - dt[entry].a = entry_low; |
20845 | - dt[entry].b = entry_high; | |
cc90b958 BS |
20846 | -} |
20847 | - | |
00e5a55c | 20848 | -static inline void native_set_ldt(const void *addr, unsigned int entries) |
cc90b958 | 20849 | -{ |
00e5a55c BS |
20850 | - if (likely(entries == 0)) |
20851 | - __asm__ __volatile__("lldt %w0"::"q" (0)); | |
20852 | - else { | |
20853 | - unsigned cpu = smp_processor_id(); | |
20854 | - __u32 a, b; | |
cc90b958 | 20855 | - |
00e5a55c BS |
20856 | - pack_descriptor(&a, &b, (unsigned long)addr, |
20857 | - entries * sizeof(struct desc_struct) - 1, | |
20858 | - DESCTYPE_LDT, 0); | |
20859 | - write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b); | |
20860 | - __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); | |
20861 | - } | |
cc90b958 BS |
20862 | -} |
20863 | - | |
cc90b958 | 20864 | - |
00e5a55c BS |
20865 | -static inline void native_load_tr_desc(void) |
20866 | -{ | |
20867 | - asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); | |
20868 | -} | |
cc90b958 | 20869 | - |
00e5a55c BS |
20870 | -static inline void native_load_gdt(const struct Xgt_desc_struct *dtr) |
20871 | -{ | |
20872 | - asm volatile("lgdt %0"::"m" (*dtr)); | |
20873 | -} | |
cc90b958 | 20874 | - |
00e5a55c BS |
20875 | -static inline void native_load_idt(const struct Xgt_desc_struct *dtr) |
20876 | -{ | |
20877 | - asm volatile("lidt %0"::"m" (*dtr)); | |
20878 | -} | |
cc90b958 | 20879 | - |
00e5a55c BS |
20880 | -static inline void native_store_gdt(struct Xgt_desc_struct *dtr) |
20881 | -{ | |
20882 | - asm ("sgdt %0":"=m" (*dtr)); | |
20883 | -} | |
cc90b958 | 20884 | - |
00e5a55c BS |
20885 | -static inline void native_store_idt(struct Xgt_desc_struct *dtr) |
20886 | -{ | |
20887 | - asm ("sidt %0":"=m" (*dtr)); | |
20888 | -} | |
cc90b958 | 20889 | - |
00e5a55c BS |
20890 | -static inline unsigned long native_store_tr(void) |
20891 | -{ | |
20892 | - unsigned long tr; | |
20893 | - asm ("str %0":"=r" (tr)); | |
20894 | - return tr; | |
20895 | -} | |
cc90b958 | 20896 | - |
00e5a55c | 20897 | -static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) |
cc90b958 | 20898 | -{ |
00e5a55c BS |
20899 | - unsigned int i; |
20900 | - struct desc_struct *gdt = get_cpu_gdt_table(cpu); | |
20901 | - | |
20902 | - for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) | |
20903 | - gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; | |
cc90b958 | 20904 | -} |
00e5a55c BS |
20905 | -#else |
20906 | -#define load_TLS(t, cpu) xen_load_tls(t, cpu) | |
20907 | -#define set_ldt xen_set_ldt | |
cc90b958 | 20908 | - |
00e5a55c BS |
20909 | -extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b); |
20910 | -extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b); | |
cc90b958 | 20911 | - |
00e5a55c BS |
20912 | -static inline void xen_load_tls(struct thread_struct *t, unsigned int cpu) |
20913 | -{ | |
20914 | - unsigned int i; | |
20915 | - struct desc_struct *gdt = get_cpu_gdt_table(cpu) + GDT_ENTRY_TLS_MIN; | |
cc90b958 | 20916 | - |
00e5a55c BS |
20917 | - for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) |
20918 | - if (HYPERVISOR_update_descriptor(virt_to_machine(&gdt[i]), | |
20919 | - *(u64 *)&t->tls_array[i])) | |
20920 | - BUG(); | |
20921 | -} | |
20922 | -#endif | |
20923 | - | |
20924 | -#ifndef CONFIG_X86_NO_IDT | |
20925 | -static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg) | |
cc90b958 | 20926 | -{ |
00e5a55c BS |
20927 | - __u32 a, b; |
20928 | - pack_gate(&a, &b, (unsigned long)addr, seg, type, 0); | |
20929 | - write_idt_entry(idt_table, gate, a, b); | |
cc90b958 | 20930 | -} |
00e5a55c | 20931 | -#endif |
cc90b958 | 20932 | - |
00e5a55c BS |
20933 | -#ifndef CONFIG_X86_NO_TSS |
20934 | -static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr) | |
cc90b958 | 20935 | -{ |
00e5a55c BS |
20936 | - __u32 a, b; |
20937 | - pack_descriptor(&a, &b, (unsigned long)addr, | |
20938 | - offsetof(struct tss_struct, __cacheline_filler) - 1, | |
20939 | - DESCTYPE_TSS, 0); | |
20940 | - write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b); | |
cc90b958 | 20941 | -} |
00e5a55c | 20942 | -#endif |
cc90b958 | 20943 | - |
cc90b958 | 20944 | - |
00e5a55c | 20945 | -#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) |
cc90b958 | 20946 | - |
00e5a55c BS |
20947 | -#define LDT_entry_a(info) \ |
20948 | - ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) | |
cc90b958 | 20949 | - |
00e5a55c BS |
20950 | -#define LDT_entry_b(info) \ |
20951 | - (((info)->base_addr & 0xff000000) | \ | |
20952 | - (((info)->base_addr & 0x00ff0000) >> 16) | \ | |
20953 | - ((info)->limit & 0xf0000) | \ | |
20954 | - (((info)->read_exec_only ^ 1) << 9) | \ | |
20955 | - ((info)->contents << 10) | \ | |
20956 | - (((info)->seg_not_present ^ 1) << 15) | \ | |
20957 | - ((info)->seg_32bit << 22) | \ | |
20958 | - ((info)->limit_in_pages << 23) | \ | |
20959 | - ((info)->useable << 20) | \ | |
20960 | - 0x7000) | |
cc90b958 | 20961 | - |
00e5a55c BS |
20962 | -#define LDT_empty(info) (\ |
20963 | - (info)->base_addr == 0 && \ | |
20964 | - (info)->limit == 0 && \ | |
20965 | - (info)->contents == 0 && \ | |
20966 | - (info)->read_exec_only == 1 && \ | |
20967 | - (info)->seg_32bit == 0 && \ | |
20968 | - (info)->limit_in_pages == 0 && \ | |
20969 | - (info)->seg_not_present == 1 && \ | |
20970 | - (info)->useable == 0 ) | |
20971 | - | |
20972 | -static inline void clear_LDT(void) | |
20973 | -{ | |
20974 | - set_ldt(NULL, 0); | |
20975 | -} | |
cc90b958 BS |
20976 | - |
20977 | -/* | |
00e5a55c | 20978 | - * load one particular LDT into the current CPU |
cc90b958 | 20979 | - */ |
00e5a55c BS |
20980 | -static inline void load_LDT_nolock(mm_context_t *pc) |
20981 | -{ | |
20982 | - set_ldt(pc->ldt, pc->size); | |
20983 | -} | |
cc90b958 | 20984 | - |
00e5a55c BS |
20985 | -static inline void load_LDT(mm_context_t *pc) |
20986 | -{ | |
20987 | - preempt_disable(); | |
20988 | - load_LDT_nolock(pc); | |
20989 | - preempt_enable(); | |
20990 | -} | |
20991 | - | |
20992 | -static inline unsigned long get_desc_base(unsigned long *desc) | |
20993 | -{ | |
20994 | - unsigned long base; | |
20995 | - base = ((desc[0] >> 16) & 0x0000ffff) | | |
20996 | - ((desc[1] << 16) & 0x00ff0000) | | |
20997 | - (desc[1] & 0xff000000); | |
20998 | - return base; | |
20999 | -} | |
cc90b958 | 21000 | - |
00e5a55c | 21001 | -#else /* __ASSEMBLY__ */ |
cc90b958 | 21002 | - |
00e5a55c BS |
21003 | -/* |
21004 | - * GET_DESC_BASE reads the descriptor base of the specified segment. | |
21005 | - * | |
21006 | - * Args: | |
21007 | - * idx - descriptor index | |
21008 | - * gdt - GDT pointer | |
21009 | - * base - 32bit register to which the base will be written | |
21010 | - * lo_w - lo word of the "base" register | |
21011 | - * lo_b - lo byte of the "base" register | |
21012 | - * hi_b - hi byte of the low word of the "base" register | |
21013 | - * | |
21014 | - * Example: | |
21015 | - * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) | |
21016 | - * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax. | |
21017 | - */ | |
21018 | -#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \ | |
21019 | - movb idx*8+4(gdt), lo_b; \ | |
21020 | - movb idx*8+7(gdt), hi_b; \ | |
21021 | - shll $16, base; \ | |
21022 | - movw idx*8+2(gdt), lo_w; | |
21023 | - | |
21024 | -#endif /* !__ASSEMBLY__ */ | |
cc90b958 | 21025 | - |
cc90b958 | 21026 | -#endif |
00e5a55c BS |
21027 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/desc_64.h 2009-02-16 16:18:36.000000000 +0100 |
21028 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
21029 | @@ -1,228 +0,0 @@ | |
21030 | -/* Written 2000 by Andi Kleen */ | |
21031 | -#ifndef __ARCH_DESC_H | |
21032 | -#define __ARCH_DESC_H | |
cc90b958 | 21033 | - |
00e5a55c BS |
21034 | -#include <linux/threads.h> |
21035 | -#include <asm/ldt.h> | |
cc90b958 | 21036 | - |
00e5a55c | 21037 | -#ifndef __ASSEMBLY__ |
cc90b958 | 21038 | - |
00e5a55c BS |
21039 | -#include <linux/string.h> |
21040 | -#include <linux/smp.h> | |
21041 | -#include <asm/desc_defs.h> | |
cc90b958 | 21042 | - |
00e5a55c BS |
21043 | -#include <asm/segment.h> |
21044 | -#include <asm/mmu.h> | |
cc90b958 | 21045 | - |
00e5a55c | 21046 | -extern struct desc_ptr idt_descr, cpu_gdt_descr[NR_CPUS]; |
cc90b958 | 21047 | - |
00e5a55c | 21048 | -extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; |
cc90b958 | 21049 | - |
00e5a55c BS |
21050 | -#define load_TR_desc() asm volatile("ltr %w0"::"r" (GDT_ENTRY_TSS*8)) |
21051 | -#define load_LDT_desc() asm volatile("lldt %w0"::"r" (GDT_ENTRY_LDT*8)) | |
cc90b958 | 21052 | - |
00e5a55c BS |
21053 | -static inline void clear_LDT(void) |
21054 | -{ | |
21055 | - int cpu = get_cpu(); | |
cc90b958 | 21056 | - |
00e5a55c BS |
21057 | - /* |
21058 | - * NB. We load the default_ldt for lcall7/27 handling on demand, as | |
21059 | - * it slows down context switching. Noone uses it anyway. | |
21060 | - */ | |
21061 | - cpu = cpu; /* XXX avoid compiler warning */ | |
21062 | - xen_set_ldt(NULL, 0); | |
21063 | - put_cpu(); | |
21064 | -} | |
21065 | - | |
21066 | -#ifndef CONFIG_X86_NO_TSS | |
21067 | -static inline unsigned long __store_tr(void) | |
cc90b958 | 21068 | -{ |
00e5a55c BS |
21069 | - unsigned long tr; |
21070 | - | |
21071 | - asm volatile ("str %w0":"=r" (tr)); | |
21072 | - return tr; | |
cc90b958 BS |
21073 | -} |
21074 | - | |
00e5a55c BS |
21075 | -#define store_tr(tr) (tr) = __store_tr() |
21076 | -#endif | |
21077 | - | |
21078 | -/* | |
21079 | - * This is the ldt that every process will get unless we need | |
21080 | - * something other than this. | |
21081 | - */ | |
21082 | -extern struct desc_struct default_ldt[]; | |
21083 | -#ifndef CONFIG_X86_NO_IDT | |
21084 | -extern struct gate_struct idt_table[]; | |
21085 | -#endif | |
21086 | -extern struct desc_ptr cpu_gdt_descr[]; | |
21087 | - | |
21088 | -/* the cpu gdt accessor */ | |
21089 | -#define cpu_gdt(_cpu) ((struct desc_struct *)cpu_gdt_descr[_cpu].address) | |
21090 | - | |
21091 | -#ifndef CONFIG_XEN | |
21092 | -static inline void load_gdt(const struct desc_ptr *ptr) | |
cc90b958 | 21093 | -{ |
00e5a55c | 21094 | - asm volatile("lgdt %w0"::"m" (*ptr)); |
cc90b958 BS |
21095 | -} |
21096 | - | |
00e5a55c | 21097 | -static inline void store_gdt(struct desc_ptr *ptr) |
cc90b958 | 21098 | -{ |
00e5a55c | 21099 | - asm("sgdt %w0":"=m" (*ptr)); |
cc90b958 | 21100 | -} |
00e5a55c | 21101 | -#endif |
cc90b958 | 21102 | - |
00e5a55c BS |
21103 | -static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist) |
21104 | -{ | |
21105 | - struct gate_struct s; | |
21106 | - s.offset_low = PTR_LOW(func); | |
21107 | - s.segment = __KERNEL_CS; | |
21108 | - s.ist = ist; | |
21109 | - s.p = 1; | |
21110 | - s.dpl = dpl; | |
21111 | - s.zero0 = 0; | |
21112 | - s.zero1 = 0; | |
21113 | - s.type = type; | |
21114 | - s.offset_middle = PTR_MIDDLE(func); | |
21115 | - s.offset_high = PTR_HIGH(func); | |
21116 | - /* does not need to be atomic because it is only done once at setup time */ | |
21117 | - memcpy(adr, &s, 16); | |
21118 | -} | |
cc90b958 | 21119 | - |
00e5a55c BS |
21120 | -#ifndef CONFIG_X86_NO_IDT |
21121 | -static inline void set_intr_gate(int nr, void *func) | |
21122 | -{ | |
21123 | - BUG_ON((unsigned)nr > 0xFF); | |
21124 | - _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0); | |
21125 | -} | |
21126 | - | |
21127 | -static inline void set_intr_gate_ist(int nr, void *func, unsigned ist) | |
21128 | -{ | |
21129 | - BUG_ON((unsigned)nr > 0xFF); | |
21130 | - _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, ist); | |
21131 | -} | |
21132 | - | |
21133 | -static inline void set_system_gate(int nr, void *func) | |
21134 | -{ | |
21135 | - BUG_ON((unsigned)nr > 0xFF); | |
21136 | - _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); | |
21137 | -} | |
cc90b958 | 21138 | - |
00e5a55c BS |
21139 | -static inline void set_system_gate_ist(int nr, void *func, unsigned ist) |
21140 | -{ | |
21141 | - _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist); | |
21142 | -} | |
cc90b958 | 21143 | - |
00e5a55c | 21144 | -static inline void load_idt(const struct desc_ptr *ptr) |
cc90b958 | 21145 | -{ |
00e5a55c | 21146 | - asm volatile("lidt %w0"::"m" (*ptr)); |
cc90b958 BS |
21147 | -} |
21148 | - | |
00e5a55c | 21149 | -static inline void store_idt(struct desc_ptr *dtr) |
cc90b958 | 21150 | -{ |
00e5a55c | 21151 | - asm("sidt %w0":"=m" (*dtr)); |
cc90b958 | 21152 | -} |
00e5a55c | 21153 | -#endif |
cc90b958 | 21154 | - |
00e5a55c BS |
21155 | -static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type, |
21156 | - unsigned size) | |
21157 | -{ | |
21158 | - struct ldttss_desc d; | |
21159 | - memset(&d,0,sizeof(d)); | |
21160 | - d.limit0 = size & 0xFFFF; | |
21161 | - d.base0 = PTR_LOW(tss); | |
21162 | - d.base1 = PTR_MIDDLE(tss) & 0xFF; | |
21163 | - d.type = type; | |
21164 | - d.p = 1; | |
21165 | - d.limit1 = (size >> 16) & 0xF; | |
21166 | - d.base2 = (PTR_MIDDLE(tss) >> 8) & 0xFF; | |
21167 | - d.base3 = PTR_HIGH(tss); | |
21168 | - memcpy(ptr, &d, 16); | |
21169 | -} | |
cc90b958 | 21170 | - |
00e5a55c BS |
21171 | -#ifndef CONFIG_X86_NO_TSS |
21172 | -static inline void set_tss_desc(unsigned cpu, void *addr) | |
cc90b958 BS |
21173 | -{ |
21174 | - /* | |
00e5a55c BS |
21175 | - * sizeof(unsigned long) coming from an extra "long" at the end |
21176 | - * of the iobitmap. See tss_struct definition in processor.h | |
21177 | - * | |
21178 | - * -1? seg base+limit should be pointing to the address of the | |
21179 | - * last valid byte | |
cc90b958 | 21180 | - */ |
00e5a55c BS |
21181 | - set_tssldt_descriptor(&cpu_gdt(cpu)[GDT_ENTRY_TSS], |
21182 | - (unsigned long)addr, DESC_TSS, | |
21183 | - IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1); | |
21184 | -} | |
21185 | -#endif | |
cc90b958 | 21186 | - |
00e5a55c BS |
21187 | -static inline void set_ldt_desc(unsigned cpu, void *addr, int size) |
21188 | -{ | |
21189 | - set_tssldt_descriptor(&cpu_gdt(cpu)[GDT_ENTRY_LDT], (unsigned long)addr, | |
21190 | - DESC_LDT, size * 8 - 1); | |
cc90b958 BS |
21191 | -} |
21192 | - | |
00e5a55c BS |
21193 | -#define LDT_entry_a(info) \ |
21194 | - ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) | |
21195 | -/* Don't allow setting of the lm bit. It is useless anyways because | |
21196 | - 64bit system calls require __USER_CS. */ | |
21197 | -#define LDT_entry_b(info) \ | |
21198 | - (((info)->base_addr & 0xff000000) | \ | |
21199 | - (((info)->base_addr & 0x00ff0000) >> 16) | \ | |
21200 | - ((info)->limit & 0xf0000) | \ | |
21201 | - (((info)->read_exec_only ^ 1) << 9) | \ | |
21202 | - ((info)->contents << 10) | \ | |
21203 | - (((info)->seg_not_present ^ 1) << 15) | \ | |
21204 | - ((info)->seg_32bit << 22) | \ | |
21205 | - ((info)->limit_in_pages << 23) | \ | |
21206 | - ((info)->useable << 20) | \ | |
21207 | - /* ((info)->lm << 21) | */ \ | |
21208 | - 0x7000) | |
cc90b958 | 21209 | - |
00e5a55c BS |
21210 | -#define LDT_empty(info) (\ |
21211 | - (info)->base_addr == 0 && \ | |
21212 | - (info)->limit == 0 && \ | |
21213 | - (info)->contents == 0 && \ | |
21214 | - (info)->read_exec_only == 1 && \ | |
21215 | - (info)->seg_32bit == 0 && \ | |
21216 | - (info)->limit_in_pages == 0 && \ | |
21217 | - (info)->seg_not_present == 1 && \ | |
21218 | - (info)->useable == 0 && \ | |
21219 | - (info)->lm == 0) | |
cc90b958 | 21220 | - |
00e5a55c BS |
21221 | -static inline void load_TLS(struct thread_struct *t, unsigned int cpu) |
21222 | -{ | |
21223 | - unsigned int i; | |
21224 | - u64 *gdt = (u64 *)(cpu_gdt(cpu) + GDT_ENTRY_TLS_MIN); | |
cc90b958 | 21225 | - |
00e5a55c BS |
21226 | - for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) |
21227 | - if (HYPERVISOR_update_descriptor(virt_to_machine(&gdt[i]), | |
21228 | - t->tls_array[i])) | |
21229 | - BUG(); | |
21230 | -} | |
cc90b958 | 21231 | - |
00e5a55c BS |
21232 | -/* |
21233 | - * load one particular LDT into the current CPU | |
21234 | - */ | |
21235 | -static inline void load_LDT_nolock (mm_context_t *pc, int cpu) | |
21236 | -{ | |
21237 | - void *segments = pc->ldt; | |
21238 | - int count = pc->size; | |
cc90b958 | 21239 | - |
00e5a55c BS |
21240 | - if (likely(!count)) |
21241 | - segments = NULL; | |
cc90b958 | 21242 | - |
00e5a55c BS |
21243 | - xen_set_ldt(segments, count); |
21244 | -} | |
cc90b958 | 21245 | - |
00e5a55c BS |
21246 | -static inline void load_LDT(mm_context_t *pc) |
21247 | -{ | |
21248 | - int cpu = get_cpu(); | |
21249 | - load_LDT_nolock(pc, cpu); | |
21250 | - put_cpu(); | |
21251 | -} | |
cc90b958 | 21252 | - |
00e5a55c | 21253 | -extern struct desc_ptr idt_descr; |
cc90b958 | 21254 | - |
00e5a55c BS |
21255 | -#endif /* !__ASSEMBLY__ */ |
21256 | - | |
21257 | -#endif | |
21258 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/dma-mapping_32.h 2009-02-16 16:18:36.000000000 +0100 | |
21259 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/dma-mapping_32.h 2009-03-16 16:33:40.000000000 +0100 | |
21260 | @@ -84,23 +84,13 @@ dma_sync_single_range_for_device(struct | |
21261 | dma_sync_single_for_device(dev, dma_handle+offset, size, direction); | |
21262 | } | |
cc90b958 | 21263 | |
00e5a55c BS |
21264 | -static inline void |
21265 | +extern void | |
21266 | dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, | |
21267 | - enum dma_data_direction direction) | |
21268 | -{ | |
21269 | - if (swiotlb) | |
21270 | - swiotlb_sync_sg_for_cpu(dev,sg,nelems,direction); | |
21271 | - flush_write_buffers(); | |
21272 | -} | |
21273 | + enum dma_data_direction direction); | |
cc90b958 | 21274 | |
00e5a55c BS |
21275 | -static inline void |
21276 | +extern void | |
21277 | dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, | |
21278 | - enum dma_data_direction direction) | |
21279 | -{ | |
21280 | - if (swiotlb) | |
21281 | - swiotlb_sync_sg_for_device(dev,sg,nelems,direction); | |
21282 | - flush_write_buffers(); | |
21283 | -} | |
21284 | + enum dma_data_direction direction); | |
cc90b958 | 21285 | |
00e5a55c BS |
21286 | extern int |
21287 | dma_mapping_error(dma_addr_t dma_addr); | |
21288 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/fixmap_32.h 2009-02-16 16:17:21.000000000 +0100 | |
21289 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/fixmap_32.h 2009-03-16 16:33:40.000000000 +0100 | |
21290 | @@ -64,7 +64,7 @@ enum fixed_addresses { | |
21291 | #endif | |
21292 | #ifdef CONFIG_X86_VISWS_APIC | |
21293 | FIX_CO_CPU, /* Cobalt timer */ | |
21294 | - FIX_CO_APIC, /* Cobalt APIC Redirection Table */ | |
21295 | + FIX_CO_APIC, /* Cobalt APIC Redirection Table */ | |
21296 | FIX_LI_PCIA, /* Lithium PCI Bridge A */ | |
21297 | FIX_LI_PCIB, /* Lithium PCI Bridge B */ | |
21298 | #endif | |
21299 | @@ -73,7 +73,7 @@ enum fixed_addresses { | |
21300 | #endif | |
21301 | #ifdef CONFIG_X86_CYCLONE_TIMER | |
21302 | FIX_CYCLONE_TIMER, /*cyclone timer register*/ | |
21303 | -#endif | |
21304 | +#endif | |
21305 | #ifdef CONFIG_HIGHMEM | |
21306 | FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ | |
21307 | FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, | |
21308 | @@ -93,11 +93,23 @@ enum fixed_addresses { | |
21309 | FIX_ISAMAP_END, | |
21310 | FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1, | |
21311 | __end_of_permanent_fixed_addresses, | |
21312 | - /* temporary boot-time mappings, used before ioremap() is functional */ | |
21313 | -#define NR_FIX_BTMAPS 16 | |
21314 | - FIX_BTMAP_END = __end_of_permanent_fixed_addresses, | |
21315 | - FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1, | |
21316 | + /* | |
21317 | + * 256 temporary boot-time mappings, used by early_ioremap(), | |
21318 | + * before ioremap() is functional. | |
21319 | + * | |
21320 | + * We round it up to the next 512 pages boundary so that we | |
21321 | + * can have a single pgd entry and a single pte table: | |
21322 | + */ | |
21323 | +#define NR_FIX_BTMAPS 64 | |
21324 | +#define FIX_BTMAPS_NESTING 4 | |
21325 | + FIX_BTMAP_END = | |
21326 | + __end_of_permanent_fixed_addresses + 512 - | |
21327 | + (__end_of_permanent_fixed_addresses & 511), | |
21328 | + FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1, | |
21329 | FIX_WP_TEST, | |
21330 | +#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT | |
21331 | + FIX_OHCI1394_BASE, | |
21332 | +#endif | |
21333 | __end_of_fixed_addresses | |
21334 | }; | |
cc90b958 | 21335 | |
00e5a55c BS |
21336 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/fixmap_64.h 2009-02-16 16:17:21.000000000 +0100 |
21337 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/fixmap_64.h 2009-03-16 16:33:40.000000000 +0100 | |
21338 | @@ -15,6 +15,7 @@ | |
21339 | #include <asm/apicdef.h> | |
21340 | #include <asm/page.h> | |
21341 | #include <asm/vsyscall.h> | |
21342 | +#include <asm/efi.h> | |
21343 | #include <asm/acpi.h> | |
cc90b958 | 21344 | |
00e5a55c BS |
21345 | /* |
21346 | @@ -46,6 +47,10 @@ enum fixed_addresses { | |
21347 | FIX_IO_APIC_BASE_0, | |
21348 | FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1, | |
21349 | #endif | |
21350 | +#ifdef CONFIG_EFI | |
21351 | + FIX_EFI_IO_MAP_LAST_PAGE, | |
21352 | + FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE+MAX_EFI_IO_PAGES-1, | |
cc90b958 | 21353 | +#endif |
00e5a55c BS |
21354 | #ifdef CONFIG_ACPI |
21355 | FIX_ACPI_BEGIN, | |
21356 | FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, | |
21357 | @@ -55,10 +60,22 @@ enum fixed_addresses { | |
21358 | FIX_ISAMAP_END, | |
21359 | FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1, | |
21360 | __end_of_permanent_fixed_addresses, | |
21361 | - /* temporary boot-time mappings, used before ioremap() is functional */ | |
21362 | -#define NR_FIX_BTMAPS 16 | |
21363 | - FIX_BTMAP_END = __end_of_permanent_fixed_addresses, | |
21364 | - FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1, | |
21365 | + /* | |
21366 | + * 256 temporary boot-time mappings, used by early_ioremap(), | |
21367 | + * before ioremap() is functional. | |
21368 | + * | |
21369 | + * We round it up to the next 512 pages boundary so that we | |
21370 | + * can have a single pgd entry and a single pte table: | |
21371 | + */ | |
21372 | +#define NR_FIX_BTMAPS 64 | |
21373 | +#define FIX_BTMAPS_NESTING 4 | |
21374 | + FIX_BTMAP_END = | |
21375 | + __end_of_permanent_fixed_addresses + 512 - | |
21376 | + (__end_of_permanent_fixed_addresses & 511), | |
21377 | + FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1, | |
21378 | +#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT | |
21379 | + FIX_OHCI1394_BASE, | |
cc90b958 | 21380 | +#endif |
00e5a55c BS |
21381 | __end_of_fixed_addresses |
21382 | }; | |
21383 | ||
21384 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/highmem.h 2009-02-16 16:17:21.000000000 +0100 | |
21385 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/highmem.h 2009-03-16 16:33:40.000000000 +0100 | |
21386 | @@ -37,11 +37,6 @@ extern pte_t *pkmap_page_table; | |
21387 | * easily, subsequent pte tables have to be allocated in one physical | |
21388 | * chunk of RAM. | |
21389 | */ | |
21390 | -#ifdef CONFIG_X86_PAE | |
21391 | -#define LAST_PKMAP 512 | |
21392 | -#else | |
21393 | -#define LAST_PKMAP 1024 | |
21394 | -#endif | |
21395 | /* | |
21396 | * Ordering is: | |
21397 | * | |
21398 | @@ -57,13 +52,12 @@ extern pte_t *pkmap_page_table; | |
21399 | * VMALLOC_START | |
21400 | * high_memory | |
21401 | */ | |
21402 | -#define PKMAP_BASE ( (FIXADDR_BOOT_START - PAGE_SIZE*(LAST_PKMAP + 1)) & PMD_MASK ) | |
21403 | #define LAST_PKMAP_MASK (LAST_PKMAP-1) | |
21404 | #define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) | |
21405 | #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) | |
21406 | ||
21407 | -extern void * FASTCALL(kmap_high(struct page *page)); | |
21408 | -extern void FASTCALL(kunmap_high(struct page *page)); | |
21409 | +extern void *kmap_high(struct page *page); | |
21410 | +extern void kunmap_high(struct page *page); | |
21411 | ||
21412 | void *kmap(struct page *page); | |
21413 | void kunmap(struct page *page); | |
21414 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/hypervisor.h 2009-02-16 16:18:36.000000000 +0100 | |
21415 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/hypervisor.h 2009-03-16 16:33:40.000000000 +0100 | |
21416 | @@ -264,6 +264,25 @@ HYPERVISOR_poll( | |
21417 | return rc; | |
21418 | } | |
21419 | ||
21420 | +static inline int __must_check | |
21421 | +HYPERVISOR_poll_no_timeout( | |
21422 | + evtchn_port_t *ports, unsigned int nr_ports) | |
21423 | +{ | |
21424 | + int rc; | |
21425 | + struct sched_poll sched_poll = { | |
21426 | + .nr_ports = nr_ports | |
21427 | + }; | |
21428 | + set_xen_guest_handle(sched_poll.ports, ports); | |
cc90b958 | 21429 | + |
00e5a55c BS |
21430 | + rc = HYPERVISOR_sched_op(SCHEDOP_poll, &sched_poll); |
21431 | +#if CONFIG_XEN_COMPAT <= 0x030002 | |
21432 | + if (rc == -ENOSYS) | |
21433 | + rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0); | |
cc90b958 BS |
21434 | +#endif |
21435 | + | |
00e5a55c BS |
21436 | + return rc; |
21437 | +} | |
cc90b958 | 21438 | + |
00e5a55c BS |
21439 | #ifdef CONFIG_XEN |
21440 | ||
21441 | static inline void | |
21442 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/irqflags.h 2009-02-16 16:18:36.000000000 +0100 | |
21443 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/irqflags.h 2009-03-16 16:33:40.000000000 +0100 | |
21444 | @@ -1,5 +1,247 @@ | |
21445 | -#ifdef CONFIG_X86_32 | |
21446 | -# include "irqflags_32.h" | |
21447 | +#ifndef _X86_IRQFLAGS_H_ | |
21448 | +#define _X86_IRQFLAGS_H_ | |
cc90b958 | 21449 | + |
00e5a55c | 21450 | +#include <asm/processor-flags.h> |
cc90b958 BS |
21451 | + |
21452 | +#ifndef __ASSEMBLY__ | |
cc90b958 | 21453 | +/* |
00e5a55c BS |
21454 | + * The use of 'barrier' in the following reflects their use as local-lock |
21455 | + * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following | |
21456 | + * critical operations are executed. All critical operations must complete | |
21457 | + * /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also | |
21458 | + * includes these barriers, for example. | |
cc90b958 | 21459 | + */ |
cc90b958 | 21460 | + |
00e5a55c | 21461 | +#define xen_save_fl(void) (current_vcpu_info()->evtchn_upcall_mask) |
cc90b958 | 21462 | + |
00e5a55c BS |
21463 | +#define xen_restore_fl(f) \ |
21464 | +do { \ | |
21465 | + vcpu_info_t *_vcpu; \ | |
21466 | + barrier(); \ | |
21467 | + _vcpu = current_vcpu_info(); \ | |
21468 | + if ((_vcpu->evtchn_upcall_mask = (f)) == 0) { \ | |
21469 | + barrier(); /* unmask then check (avoid races) */\ | |
21470 | + if (unlikely(_vcpu->evtchn_upcall_pending)) \ | |
21471 | + force_evtchn_callback(); \ | |
21472 | + } \ | |
21473 | +} while (0) | |
cc90b958 | 21474 | + |
00e5a55c BS |
21475 | +#define xen_irq_disable() \ |
21476 | +do { \ | |
21477 | + current_vcpu_info()->evtchn_upcall_mask = 1; \ | |
21478 | + barrier(); \ | |
21479 | +} while (0) | |
cc90b958 | 21480 | + |
00e5a55c BS |
21481 | +#define xen_irq_enable() \ |
21482 | +do { \ | |
21483 | + vcpu_info_t *_vcpu; \ | |
21484 | + barrier(); \ | |
21485 | + _vcpu = current_vcpu_info(); \ | |
21486 | + _vcpu->evtchn_upcall_mask = 0; \ | |
21487 | + barrier(); /* unmask then check (avoid races) */ \ | |
21488 | + if (unlikely(_vcpu->evtchn_upcall_pending)) \ | |
21489 | + force_evtchn_callback(); \ | |
21490 | +} while (0) | |
cc90b958 | 21491 | + |
00e5a55c | 21492 | +void xen_safe_halt(void); |
cc90b958 | 21493 | + |
00e5a55c | 21494 | +void xen_halt(void); |
cc90b958 | 21495 | + |
00e5a55c | 21496 | +#define __raw_local_save_flags() xen_save_fl() |
cc90b958 | 21497 | + |
00e5a55c | 21498 | +#define raw_local_irq_restore(flags) xen_restore_fl(flags) |
cc90b958 | 21499 | + |
00e5a55c | 21500 | +#define raw_local_irq_disable() xen_irq_disable() |
cc90b958 | 21501 | + |
00e5a55c | 21502 | +#define raw_local_irq_enable() xen_irq_enable() |
cc90b958 BS |
21503 | + |
21504 | +/* | |
00e5a55c BS |
21505 | + * Used in the idle loop; sti takes one instruction cycle |
21506 | + * to complete: | |
cc90b958 | 21507 | + */ |
00e5a55c | 21508 | +static inline void raw_safe_halt(void) |
cc90b958 | 21509 | +{ |
00e5a55c | 21510 | + xen_safe_halt(); |
cc90b958 BS |
21511 | +} |
21512 | + | |
cc90b958 | 21513 | +/* |
00e5a55c BS |
21514 | + * Used when interrupts are already enabled or to |
21515 | + * shutdown the processor: | |
21516 | + */ | |
21517 | +static inline void halt(void) | |
21518 | +{ | |
21519 | + xen_halt(); | |
21520 | +} | |
cc90b958 BS |
21521 | + |
21522 | +/* | |
00e5a55c | 21523 | + * For spinlocks, etc: |
cc90b958 | 21524 | + */ |
00e5a55c | 21525 | +#define __raw_local_irq_save() \ |
cc90b958 | 21526 | +({ \ |
00e5a55c BS |
21527 | + unsigned long flags = __raw_local_save_flags(); \ |
21528 | + \ | |
21529 | + raw_local_irq_disable(); \ | |
21530 | + \ | |
21531 | + flags; \ | |
cc90b958 | 21532 | +}) |
00e5a55c BS |
21533 | #else |
21534 | -# include "irqflags_64.h" | |
cc90b958 | 21535 | + |
00e5a55c BS |
21536 | +/* Offsets into shared_info_t. */ |
21537 | +#define evtchn_upcall_pending /* 0 */ | |
21538 | +#define evtchn_upcall_mask 1 | |
cc90b958 | 21539 | + |
00e5a55c | 21540 | +#define sizeof_vcpu_shift 6 |
cc90b958 | 21541 | + |
00e5a55c BS |
21542 | +#ifdef CONFIG_X86_64 |
21543 | +# define __REG_si %rsi | |
21544 | +# define __CPU_num %gs:pda_cpunumber | |
21545 | +#else | |
21546 | +# define __REG_si %esi | |
21547 | +# define __CPU_num TI_cpu(%ebp) | |
21548 | +#endif | |
cc90b958 | 21549 | + |
00e5a55c BS |
21550 | +#ifdef CONFIG_SMP |
21551 | +#define GET_VCPU_INFO movl __CPU_num,%esi ; \ | |
21552 | + shl $sizeof_vcpu_shift,%esi ; \ | |
21553 | + add HYPERVISOR_shared_info,__REG_si | |
21554 | +#else | |
21555 | +#define GET_VCPU_INFO mov HYPERVISOR_shared_info,__REG_si | |
21556 | +#endif | |
cc90b958 | 21557 | + |
00e5a55c BS |
21558 | +#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(__REG_si) |
21559 | +#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(__REG_si) | |
21560 | +#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(__REG_si) | |
21561 | +#define DISABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \ | |
21562 | + __DISABLE_INTERRUPTS | |
21563 | +#define ENABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \ | |
21564 | + __ENABLE_INTERRUPTS | |
cc90b958 | 21565 | + |
00e5a55c BS |
21566 | +#ifndef CONFIG_X86_64 |
21567 | +#define INTERRUPT_RETURN iret | |
21568 | +#define ENABLE_INTERRUPTS_SYSCALL_RET __ENABLE_INTERRUPTS ; \ | |
21569 | +sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \ | |
21570 | + __TEST_PENDING ; \ | |
21571 | + jnz 14f /* process more events if necessary... */ ; \ | |
21572 | + movl PT_ESI(%esp), %esi ; \ | |
21573 | + sysexit ; \ | |
21574 | +14: __DISABLE_INTERRUPTS ; \ | |
21575 | + TRACE_IRQS_OFF ; \ | |
21576 | +sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \ | |
21577 | + push %esp ; \ | |
21578 | + call evtchn_do_upcall ; \ | |
21579 | + add $4,%esp ; \ | |
21580 | + jmp ret_from_intr | |
21581 | +#endif | |
cc90b958 | 21582 | + |
00e5a55c BS |
21583 | + |
21584 | +#endif /* __ASSEMBLY__ */ | |
21585 | + | |
21586 | +#ifndef __ASSEMBLY__ | |
21587 | +#define raw_local_save_flags(flags) \ | |
21588 | + do { (flags) = __raw_local_save_flags(); } while (0) | |
21589 | + | |
21590 | +#define raw_local_irq_save(flags) \ | |
21591 | + do { (flags) = __raw_local_irq_save(); } while (0) | |
21592 | + | |
21593 | +static inline int raw_irqs_disabled_flags(unsigned long flags) | |
cc90b958 | 21594 | +{ |
00e5a55c | 21595 | + return (flags != 0); |
cc90b958 BS |
21596 | +} |
21597 | + | |
00e5a55c | 21598 | +#define raw_irqs_disabled() \ |
cc90b958 | 21599 | +({ \ |
00e5a55c BS |
21600 | + unsigned long flags = __raw_local_save_flags(); \ |
21601 | + \ | |
21602 | + raw_irqs_disabled_flags(flags); \ | |
cc90b958 BS |
21603 | +}) |
21604 | + | |
00e5a55c BS |
21605 | +/* |
21606 | + * makes the traced hardirq state match with the machine state | |
21607 | + * | |
21608 | + * should be a rarely used function, only in places where its | |
21609 | + * otherwise impossible to know the irq state, like in traps. | |
21610 | + */ | |
21611 | +static inline void trace_hardirqs_fixup_flags(unsigned long flags) | |
21612 | +{ | |
21613 | + if (raw_irqs_disabled_flags(flags)) | |
21614 | + trace_hardirqs_off(); | |
21615 | + else | |
21616 | + trace_hardirqs_on(); | |
21617 | +} | |
cc90b958 | 21618 | + |
00e5a55c BS |
21619 | +#define trace_hardirqs_fixup() \ |
21620 | + trace_hardirqs_fixup_flags(__raw_local_save_flags()) | |
cc90b958 | 21621 | + |
00e5a55c | 21622 | +#else |
cc90b958 | 21623 | + |
00e5a55c BS |
21624 | +#ifdef CONFIG_X86_64 |
21625 | +/* | |
21626 | + * Currently paravirt can't handle swapgs nicely when we | |
21627 | + * don't have a stack we can rely on (such as a user space | |
21628 | + * stack). So we either find a way around these or just fault | |
21629 | + * and emulate if a guest tries to call swapgs directly. | |
21630 | + * | |
21631 | + * Either way, this is a good way to document that we don't | |
21632 | + * have a reliable stack. x86_64 only. | |
21633 | + */ | |
21634 | +#define SWAPGS_UNSAFE_STACK swapgs | |
21635 | +#define ARCH_TRACE_IRQS_ON call trace_hardirqs_on_thunk | |
21636 | +#define ARCH_TRACE_IRQS_OFF call trace_hardirqs_off_thunk | |
21637 | +#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk | |
21638 | +#define ARCH_LOCKDEP_SYS_EXIT_IRQ \ | |
21639 | + TRACE_IRQS_ON; \ | |
21640 | + ENABLE_INTERRUPTS(CLBR_NONE); \ | |
21641 | + SAVE_REST; \ | |
21642 | + LOCKDEP_SYS_EXIT; \ | |
21643 | + RESTORE_REST; \ | |
21644 | + __DISABLE_INTERRUPTS; \ | |
21645 | + TRACE_IRQS_OFF; | |
21646 | + | |
21647 | +#else | |
21648 | +#define ARCH_TRACE_IRQS_ON \ | |
21649 | + pushl %eax; \ | |
21650 | + pushl %ecx; \ | |
21651 | + pushl %edx; \ | |
21652 | + call trace_hardirqs_on; \ | |
21653 | + popl %edx; \ | |
21654 | + popl %ecx; \ | |
21655 | + popl %eax; | |
21656 | + | |
21657 | +#define ARCH_TRACE_IRQS_OFF \ | |
21658 | + pushl %eax; \ | |
21659 | + pushl %ecx; \ | |
21660 | + pushl %edx; \ | |
21661 | + call trace_hardirqs_off; \ | |
21662 | + popl %edx; \ | |
21663 | + popl %ecx; \ | |
21664 | + popl %eax; | |
21665 | + | |
21666 | +#define ARCH_LOCKDEP_SYS_EXIT \ | |
21667 | + pushl %eax; \ | |
21668 | + pushl %ecx; \ | |
21669 | + pushl %edx; \ | |
21670 | + call lockdep_sys_exit; \ | |
21671 | + popl %edx; \ | |
21672 | + popl %ecx; \ | |
21673 | + popl %eax; | |
cc90b958 | 21674 | + |
00e5a55c BS |
21675 | +#define ARCH_LOCKDEP_SYS_EXIT_IRQ |
21676 | +#endif | |
cc90b958 | 21677 | + |
00e5a55c BS |
21678 | +#ifdef CONFIG_TRACE_IRQFLAGS |
21679 | +# define TRACE_IRQS_ON ARCH_TRACE_IRQS_ON | |
21680 | +# define TRACE_IRQS_OFF ARCH_TRACE_IRQS_OFF | |
21681 | +#else | |
21682 | +# define TRACE_IRQS_ON | |
21683 | +# define TRACE_IRQS_OFF | |
21684 | +#endif | |
21685 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
21686 | +# define LOCKDEP_SYS_EXIT ARCH_LOCKDEP_SYS_EXIT | |
21687 | +# define LOCKDEP_SYS_EXIT_IRQ ARCH_LOCKDEP_SYS_EXIT_IRQ | |
21688 | +# else | |
21689 | +# define LOCKDEP_SYS_EXIT | |
21690 | +# define LOCKDEP_SYS_EXIT_IRQ | |
21691 | +# endif | |
cc90b958 | 21692 | + |
00e5a55c BS |
21693 | +#endif /* __ASSEMBLY__ */ |
21694 | #endif | |
21695 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/irqflags_32.h 2009-02-16 16:18:36.000000000 +0100 | |
21696 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
21697 | @@ -1,212 +0,0 @@ | |
cc90b958 | 21698 | -/* |
00e5a55c | 21699 | - * include/asm-i386/irqflags.h |
cc90b958 | 21700 | - * |
00e5a55c BS |
21701 | - * IRQ flags handling |
21702 | - * | |
21703 | - * This file gets included from lowlevel asm headers too, to provide | |
21704 | - * wrapped versions of the local_irq_*() APIs, based on the | |
21705 | - * raw_local_irq_*() functions from the lowlevel headers. | |
cc90b958 | 21706 | - */ |
00e5a55c BS |
21707 | -#ifndef _ASM_IRQFLAGS_H |
21708 | -#define _ASM_IRQFLAGS_H | |
cc90b958 | 21709 | - |
00e5a55c BS |
21710 | -#ifndef __ASSEMBLY__ |
21711 | -#define xen_save_fl(void) (current_vcpu_info()->evtchn_upcall_mask) | |
cc90b958 | 21712 | - |
00e5a55c BS |
21713 | -#define xen_restore_fl(f) \ |
21714 | -do { \ | |
21715 | - vcpu_info_t *_vcpu; \ | |
21716 | - barrier(); \ | |
21717 | - _vcpu = current_vcpu_info(); \ | |
21718 | - if ((_vcpu->evtchn_upcall_mask = (f)) == 0) { \ | |
21719 | - barrier(); /* unmask then check (avoid races) */\ | |
21720 | - if (unlikely(_vcpu->evtchn_upcall_pending)) \ | |
21721 | - force_evtchn_callback(); \ | |
21722 | - } \ | |
21723 | -} while (0) | |
cc90b958 | 21724 | - |
00e5a55c BS |
21725 | -#define xen_irq_disable() \ |
21726 | -do { \ | |
21727 | - current_vcpu_info()->evtchn_upcall_mask = 1; \ | |
21728 | - barrier(); \ | |
21729 | -} while (0) | |
cc90b958 | 21730 | - |
00e5a55c BS |
21731 | -#define xen_irq_enable() \ |
21732 | -do { \ | |
21733 | - vcpu_info_t *_vcpu; \ | |
21734 | - barrier(); \ | |
21735 | - _vcpu = current_vcpu_info(); \ | |
21736 | - _vcpu->evtchn_upcall_mask = 0; \ | |
21737 | - barrier(); /* unmask then check (avoid races) */ \ | |
21738 | - if (unlikely(_vcpu->evtchn_upcall_pending)) \ | |
21739 | - force_evtchn_callback(); \ | |
21740 | -} while (0) | |
cc90b958 | 21741 | - |
00e5a55c BS |
21742 | -void xen_safe_halt(void); |
21743 | - | |
21744 | -void xen_halt(void); | |
21745 | - | |
21746 | -/* | |
21747 | - * The use of 'barrier' in the following reflects their use as local-lock | |
21748 | - * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following | |
21749 | - * critical operations are executed. All critical operations must complete | |
21750 | - * /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also | |
21751 | - * includes these barriers, for example. | |
21752 | - */ | |
21753 | - | |
21754 | -#define __raw_local_save_flags() xen_save_fl() | |
21755 | - | |
21756 | -#define raw_local_irq_restore(flags) xen_restore_fl(flags) | |
21757 | - | |
21758 | -#define raw_local_irq_disable() xen_irq_disable() | |
21759 | - | |
21760 | -#define raw_local_irq_enable() xen_irq_enable() | |
cc90b958 | 21761 | - |
cc90b958 | 21762 | -/* |
00e5a55c BS |
21763 | - * Used in the idle loop; sti takes one instruction cycle |
21764 | - * to complete: | |
cc90b958 | 21765 | - */ |
00e5a55c BS |
21766 | -static inline void raw_safe_halt(void) |
21767 | -{ | |
21768 | - xen_safe_halt(); | |
21769 | -} | |
cc90b958 BS |
21770 | - |
21771 | -/* | |
00e5a55c BS |
21772 | - * Used when interrupts are already enabled or to |
21773 | - * shutdown the processor: | |
21774 | - */ | |
21775 | -static inline void halt(void) | |
21776 | -{ | |
21777 | - xen_halt(); | |
21778 | -} | |
21779 | - | |
21780 | -/* | |
21781 | - * For spinlocks, etc: | |
cc90b958 | 21782 | - */ |
00e5a55c BS |
21783 | -#define __raw_local_irq_save() \ |
21784 | -({ \ | |
21785 | - unsigned long flags = __raw_local_save_flags(); \ | |
21786 | - \ | |
21787 | - raw_local_irq_disable(); \ | |
21788 | - \ | |
21789 | - flags; \ | |
21790 | -}) | |
21791 | - | |
21792 | -#else | |
21793 | -/* Offsets into shared_info_t. */ | |
21794 | -#define evtchn_upcall_pending /* 0 */ | |
21795 | -#define evtchn_upcall_mask 1 | |
21796 | - | |
21797 | -#define sizeof_vcpu_shift 6 | |
cc90b958 | 21798 | - |
cc90b958 | 21799 | -#ifdef CONFIG_SMP |
00e5a55c BS |
21800 | -#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \ |
21801 | - shl $sizeof_vcpu_shift,%esi ; \ | |
21802 | - addl HYPERVISOR_shared_info,%esi | |
21803 | -#else | |
21804 | -#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi | |
cc90b958 | 21805 | -#endif |
cc90b958 | 21806 | - |
00e5a55c BS |
21807 | -#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi) |
21808 | -#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi) | |
21809 | -#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi) | |
21810 | -#define DISABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \ | |
21811 | - __DISABLE_INTERRUPTS | |
21812 | -#define ENABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \ | |
21813 | - __ENABLE_INTERRUPTS | |
21814 | -#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \ | |
21815 | -sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \ | |
21816 | - __TEST_PENDING ; \ | |
21817 | - jnz 14f /* process more events if necessary... */ ; \ | |
21818 | - movl PT_ESI(%esp), %esi ; \ | |
21819 | - sysexit ; \ | |
21820 | -14: __DISABLE_INTERRUPTS ; \ | |
21821 | - TRACE_IRQS_OFF ; \ | |
21822 | -sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \ | |
21823 | - push %esp ; \ | |
21824 | - call evtchn_do_upcall ; \ | |
21825 | - add $4,%esp ; \ | |
21826 | - jmp ret_from_intr | |
21827 | -#define INTERRUPT_RETURN iret | |
21828 | -#endif /* __ASSEMBLY__ */ | |
21829 | - | |
21830 | -#ifndef __ASSEMBLY__ | |
21831 | -#define raw_local_save_flags(flags) \ | |
21832 | - do { (flags) = __raw_local_save_flags(); } while (0) | |
21833 | - | |
21834 | -#define raw_local_irq_save(flags) \ | |
21835 | - do { (flags) = __raw_local_irq_save(); } while (0) | |
21836 | - | |
21837 | -static inline int raw_irqs_disabled_flags(unsigned long flags) | |
21838 | -{ | |
21839 | - return (flags != 0); | |
21840 | -} | |
21841 | - | |
21842 | -#define raw_irqs_disabled() \ | |
21843 | -({ \ | |
21844 | - unsigned long flags = __raw_local_save_flags(); \ | |
21845 | - \ | |
21846 | - raw_irqs_disabled_flags(flags); \ | |
21847 | -}) | |
cc90b958 BS |
21848 | - |
21849 | -/* | |
00e5a55c BS |
21850 | - * makes the traced hardirq state match with the machine state |
21851 | - * | |
21852 | - * should be a rarely used function, only in places where its | |
21853 | - * otherwise impossible to know the irq state, like in traps. | |
21854 | - */ | |
21855 | -static inline void trace_hardirqs_fixup_flags(unsigned long flags) | |
21856 | -{ | |
21857 | - if (raw_irqs_disabled_flags(flags)) | |
21858 | - trace_hardirqs_off(); | |
21859 | - else | |
21860 | - trace_hardirqs_on(); | |
21861 | -} | |
21862 | - | |
21863 | -#define trace_hardirqs_fixup() \ | |
21864 | - trace_hardirqs_fixup_flags(__raw_local_save_flags()) | |
21865 | -#endif /* __ASSEMBLY__ */ | |
21866 | - | |
21867 | -/* | |
21868 | - * Do the CPU's IRQ-state tracing from assembly code. We call a | |
21869 | - * C function, so save all the C-clobbered registers: | |
cc90b958 | 21870 | - */ |
00e5a55c | 21871 | -#ifdef CONFIG_TRACE_IRQFLAGS |
cc90b958 | 21872 | - |
00e5a55c BS |
21873 | -# define TRACE_IRQS_ON \ |
21874 | - pushl %eax; \ | |
21875 | - pushl %ecx; \ | |
21876 | - pushl %edx; \ | |
21877 | - call trace_hardirqs_on; \ | |
21878 | - popl %edx; \ | |
21879 | - popl %ecx; \ | |
21880 | - popl %eax; | |
21881 | - | |
21882 | -# define TRACE_IRQS_OFF \ | |
21883 | - pushl %eax; \ | |
21884 | - pushl %ecx; \ | |
21885 | - pushl %edx; \ | |
21886 | - call trace_hardirqs_off; \ | |
21887 | - popl %edx; \ | |
21888 | - popl %ecx; \ | |
21889 | - popl %eax; | |
21890 | - | |
21891 | -#else | |
21892 | -# define TRACE_IRQS_ON | |
21893 | -# define TRACE_IRQS_OFF | |
cc90b958 BS |
21894 | -#endif |
21895 | - | |
00e5a55c BS |
21896 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC |
21897 | -# define LOCKDEP_SYS_EXIT \ | |
21898 | - pushl %eax; \ | |
21899 | - pushl %ecx; \ | |
21900 | - pushl %edx; \ | |
21901 | - call lockdep_sys_exit; \ | |
21902 | - popl %edx; \ | |
21903 | - popl %ecx; \ | |
21904 | - popl %eax; | |
cc90b958 | 21905 | -#else |
00e5a55c | 21906 | -# define LOCKDEP_SYS_EXIT |
cc90b958 BS |
21907 | -#endif |
21908 | - | |
00e5a55c BS |
21909 | -#endif |
21910 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/irqflags_64.h 2009-02-16 16:18:36.000000000 +0100 | |
21911 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
21912 | @@ -1,178 +0,0 @@ | |
cc90b958 | 21913 | -/* |
00e5a55c BS |
21914 | - * include/asm-x86_64/irqflags.h |
21915 | - * | |
21916 | - * IRQ flags handling | |
21917 | - * | |
21918 | - * This file gets included from lowlevel asm headers too, to provide | |
21919 | - * wrapped versions of the local_irq_*() APIs, based on the | |
21920 | - * raw_local_irq_*() functions from the lowlevel headers. | |
cc90b958 | 21921 | - */ |
00e5a55c BS |
21922 | -#ifndef _ASM_IRQFLAGS_H |
21923 | -#define _ASM_IRQFLAGS_H | |
21924 | -#include <asm/processor-flags.h> | |
cc90b958 | 21925 | - |
00e5a55c BS |
21926 | -#ifndef __ASSEMBLY__ |
21927 | -/* | |
21928 | - * Interrupt control: | |
21929 | - */ | |
cc90b958 | 21930 | - |
00e5a55c BS |
21931 | -/* |
21932 | - * The use of 'barrier' in the following reflects their use as local-lock | |
21933 | - * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following | |
21934 | - * critical operations are executed. All critical operations must complete | |
21935 | - * /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also | |
21936 | - * includes these barriers, for example. | |
21937 | - */ | |
cc90b958 | 21938 | - |
00e5a55c | 21939 | -#define __raw_local_save_flags() (current_vcpu_info()->evtchn_upcall_mask) |
cc90b958 | 21940 | - |
00e5a55c BS |
21941 | -#define raw_local_save_flags(flags) \ |
21942 | - do { (flags) = __raw_local_save_flags(); } while (0) | |
cc90b958 | 21943 | - |
00e5a55c BS |
21944 | -#define raw_local_irq_restore(x) \ |
21945 | -do { \ | |
21946 | - vcpu_info_t *_vcpu; \ | |
21947 | - barrier(); \ | |
21948 | - _vcpu = current_vcpu_info(); \ | |
21949 | - if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \ | |
21950 | - barrier(); /* unmask then check (avoid races) */ \ | |
21951 | - if ( unlikely(_vcpu->evtchn_upcall_pending) ) \ | |
21952 | - force_evtchn_callback(); \ | |
21953 | - } \ | |
21954 | -} while (0) | |
21955 | - | |
21956 | -#ifdef CONFIG_X86_VSMP | |
cc90b958 BS |
21957 | - |
21958 | -/* | |
00e5a55c | 21959 | - * Interrupt control for the VSMP architecture: |
cc90b958 | 21960 | - */ |
cc90b958 | 21961 | - |
00e5a55c | 21962 | -static inline void raw_local_irq_disable(void) |
cc90b958 | 21963 | -{ |
00e5a55c | 21964 | - unsigned long flags = __raw_local_save_flags(); |
cc90b958 | 21965 | - |
00e5a55c | 21966 | - raw_local_irq_restore((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); |
cc90b958 BS |
21967 | -} |
21968 | - | |
00e5a55c | 21969 | -static inline void raw_local_irq_enable(void) |
cc90b958 | 21970 | -{ |
00e5a55c | 21971 | - unsigned long flags = __raw_local_save_flags(); |
cc90b958 | 21972 | - |
00e5a55c | 21973 | - raw_local_irq_restore((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); |
cc90b958 BS |
21974 | -} |
21975 | - | |
00e5a55c | 21976 | -static inline int raw_irqs_disabled_flags(unsigned long flags) |
cc90b958 | 21977 | -{ |
00e5a55c | 21978 | - return !(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC); |
cc90b958 BS |
21979 | -} |
21980 | - | |
00e5a55c | 21981 | -#else /* CONFIG_X86_VSMP */ |
cc90b958 | 21982 | - |
00e5a55c BS |
21983 | -#define raw_local_irq_disable() \ |
21984 | -do { \ | |
21985 | - current_vcpu_info()->evtchn_upcall_mask = 1; \ | |
21986 | - barrier(); \ | |
21987 | -} while (0) | |
cc90b958 | 21988 | - |
00e5a55c BS |
21989 | -#define raw_local_irq_enable() \ |
21990 | -do { \ | |
21991 | - vcpu_info_t *_vcpu; \ | |
21992 | - barrier(); \ | |
21993 | - _vcpu = current_vcpu_info(); \ | |
21994 | - _vcpu->evtchn_upcall_mask = 0; \ | |
21995 | - barrier(); /* unmask then check (avoid races) */ \ | |
21996 | - if ( unlikely(_vcpu->evtchn_upcall_pending) ) \ | |
21997 | - force_evtchn_callback(); \ | |
21998 | -} while (0) | |
21999 | - | |
22000 | -static inline int raw_irqs_disabled_flags(unsigned long flags) | |
22001 | -{ | |
22002 | - return (flags != 0); | |
22003 | -} | |
22004 | - | |
22005 | -#endif | |
cc90b958 BS |
22006 | - |
22007 | -/* | |
00e5a55c | 22008 | - * For spinlocks, etc.: |
cc90b958 | 22009 | - */ |
cc90b958 | 22010 | - |
00e5a55c BS |
22011 | -#define __raw_local_irq_save() \ |
22012 | -({ \ | |
22013 | - unsigned long flags = __raw_local_save_flags(); \ | |
22014 | - \ | |
22015 | - raw_local_irq_disable(); \ | |
22016 | - \ | |
22017 | - flags; \ | |
22018 | -}) | |
cc90b958 | 22019 | - |
00e5a55c BS |
22020 | -#define raw_local_irq_save(flags) \ |
22021 | - do { (flags) = __raw_local_irq_save(); } while (0) | |
cc90b958 | 22022 | - |
00e5a55c BS |
22023 | -#define raw_irqs_disabled() \ |
22024 | -({ \ | |
22025 | - unsigned long flags = __raw_local_save_flags(); \ | |
22026 | - \ | |
22027 | - raw_irqs_disabled_flags(flags); \ | |
22028 | -}) | |
cc90b958 BS |
22029 | - |
22030 | -/* | |
00e5a55c BS |
22031 | - * makes the traced hardirq state match with the machine state |
22032 | - * | |
22033 | - * should be a rarely used function, only in places where its | |
22034 | - * otherwise impossible to know the irq state, like in traps. | |
cc90b958 | 22035 | - */ |
00e5a55c BS |
22036 | -static inline void trace_hardirqs_fixup_flags(unsigned long flags) |
22037 | -{ | |
22038 | - if (raw_irqs_disabled_flags(flags)) | |
22039 | - trace_hardirqs_off(); | |
22040 | - else | |
22041 | - trace_hardirqs_on(); | |
22042 | -} | |
cc90b958 | 22043 | - |
00e5a55c BS |
22044 | -#define trace_hardirqs_fixup() \ |
22045 | - trace_hardirqs_fixup_flags(__raw_local_save_flags()) | |
22046 | -/* | |
22047 | - * Used in the idle loop; sti takes one instruction cycle | |
22048 | - * to complete: | |
22049 | - */ | |
22050 | -void xen_safe_halt(void); | |
22051 | -static inline void raw_safe_halt(void) | |
22052 | -{ | |
22053 | - xen_safe_halt(); | |
22054 | -} | |
cc90b958 | 22055 | - |
00e5a55c BS |
22056 | -/* |
22057 | - * Used when interrupts are already enabled or to | |
22058 | - * shutdown the processor: | |
22059 | - */ | |
22060 | -void xen_halt(void); | |
22061 | -static inline void halt(void) | |
22062 | -{ | |
22063 | - xen_halt(); | |
22064 | -} | |
cc90b958 | 22065 | - |
00e5a55c BS |
22066 | -#else /* __ASSEMBLY__: */ |
22067 | -# ifdef CONFIG_TRACE_IRQFLAGS | |
22068 | -# define TRACE_IRQS_ON call trace_hardirqs_on_thunk | |
22069 | -# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk | |
22070 | -# else | |
22071 | -# define TRACE_IRQS_ON | |
22072 | -# define TRACE_IRQS_OFF | |
22073 | -# endif | |
22074 | -# ifdef CONFIG_DEBUG_LOCK_ALLOC | |
22075 | -# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk | |
22076 | -# define LOCKDEP_SYS_EXIT_IRQ \ | |
22077 | - TRACE_IRQS_ON; \ | |
22078 | - sti; \ | |
22079 | - SAVE_REST; \ | |
22080 | - LOCKDEP_SYS_EXIT; \ | |
22081 | - RESTORE_REST; \ | |
22082 | - cli; \ | |
22083 | - TRACE_IRQS_OFF; | |
22084 | -# else | |
22085 | -# define LOCKDEP_SYS_EXIT | |
22086 | -# define LOCKDEP_SYS_EXIT_IRQ | |
22087 | -# endif | |
22088 | -#endif | |
cc90b958 | 22089 | - |
00e5a55c BS |
22090 | -#endif |
22091 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/maddr_32.h 2009-02-16 16:17:21.000000000 +0100 | |
22092 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/maddr_32.h 2009-03-16 16:33:40.000000000 +0100 | |
22093 | @@ -1,6 +1,7 @@ | |
22094 | #ifndef _I386_MADDR_H | |
22095 | #define _I386_MADDR_H | |
22096 | ||
22097 | +#include <asm/bug.h> | |
22098 | #include <xen/features.h> | |
22099 | #include <xen/interface/xen.h> | |
22100 | ||
22101 | @@ -151,25 +152,9 @@ static inline paddr_t pte_machine_to_phy | |
22102 | phys = (phys << PAGE_SHIFT) | (machine & ~PHYSICAL_PAGE_MASK); | |
22103 | return phys; | |
22104 | } | |
22105 | -#endif | |
cc90b958 | 22106 | - |
00e5a55c BS |
22107 | -#ifdef CONFIG_X86_PAE |
22108 | -#define __pte_ma(x) ((pte_t) { (x), (maddr_t)(x) >> 32 } ) | |
22109 | -extern unsigned long long __supported_pte_mask; | |
22110 | -static inline pte_t pfn_pte_ma(unsigned long page_nr, pgprot_t pgprot) | |
22111 | -{ | |
22112 | - pte_t pte; | |
cc90b958 | 22113 | - |
00e5a55c BS |
22114 | - pte.pte_high = (page_nr >> (32 - PAGE_SHIFT)) | \ |
22115 | - (pgprot_val(pgprot) >> 32); | |
22116 | - pte.pte_high &= (__supported_pte_mask >> 32); | |
22117 | - pte.pte_low = ((page_nr << PAGE_SHIFT) | pgprot_val(pgprot)) & \ | |
22118 | - __supported_pte_mask; | |
22119 | - return pte; | |
22120 | -} | |
22121 | #else | |
22122 | -#define __pte_ma(x) ((pte_t) { (x) } ) | |
22123 | -#define pfn_pte_ma(pfn, prot) __pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) | |
22124 | +#define pte_phys_to_machine phys_to_machine | |
22125 | +#define pte_machine_to_phys machine_to_phys | |
22126 | #endif | |
22127 | ||
22128 | #else /* !CONFIG_XEN */ | |
22129 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/maddr_64.h 2009-05-14 10:56:29.000000000 +0200 | |
22130 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/maddr_64.h 2009-03-16 16:33:40.000000000 +0100 | |
22131 | @@ -1,6 +1,7 @@ | |
22132 | #ifndef _X86_64_MADDR_H | |
22133 | #define _X86_64_MADDR_H | |
22134 | ||
22135 | +#include <asm/bug.h> | |
22136 | #include <xen/features.h> | |
22137 | #include <xen/interface/xen.h> | |
22138 | ||
22139 | @@ -16,6 +17,7 @@ typedef unsigned long maddr_t; | |
22140 | #ifdef CONFIG_XEN | |
22141 | ||
22142 | extern unsigned long *phys_to_machine_mapping; | |
22143 | +extern unsigned long max_mapnr; | |
22144 | ||
22145 | #undef machine_to_phys_mapping | |
22146 | extern unsigned long *machine_to_phys_mapping; | |
22147 | @@ -25,7 +27,7 @@ static inline unsigned long pfn_to_mfn(u | |
22148 | { | |
22149 | if (xen_feature(XENFEAT_auto_translated_physmap)) | |
22150 | return pfn; | |
22151 | - BUG_ON(end_pfn && pfn >= end_pfn); | |
22152 | + BUG_ON(max_mapnr && pfn >= max_mapnr); | |
22153 | return phys_to_machine_mapping[pfn] & ~FOREIGN_FRAME_BIT; | |
22154 | } | |
22155 | ||
22156 | @@ -33,7 +35,7 @@ static inline int phys_to_machine_mappin | |
22157 | { | |
22158 | if (xen_feature(XENFEAT_auto_translated_physmap)) | |
22159 | return 1; | |
22160 | - BUG_ON(end_pfn && pfn >= end_pfn); | |
22161 | + BUG_ON(max_mapnr && pfn >= max_mapnr); | |
22162 | return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY); | |
22163 | } | |
22164 | ||
22165 | @@ -45,7 +47,7 @@ static inline unsigned long mfn_to_pfn(u | |
22166 | return mfn; | |
22167 | ||
22168 | if (unlikely((mfn >> machine_to_phys_order) != 0)) | |
22169 | - return end_pfn; | |
22170 | + return max_mapnr; | |
22171 | ||
22172 | /* The array access can fail (e.g., device space beyond end of RAM). */ | |
22173 | asm ( | |
22174 | @@ -60,7 +62,7 @@ static inline unsigned long mfn_to_pfn(u | |
22175 | " .quad 1b,3b\n" | |
22176 | ".previous" | |
22177 | : "=r" (pfn) | |
22178 | - : "m" (machine_to_phys_mapping[mfn]), "m" (end_pfn) ); | |
22179 | + : "m" (machine_to_phys_mapping[mfn]), "m" (max_mapnr) ); | |
22180 | ||
22181 | return pfn; | |
22182 | } | |
22183 | @@ -88,16 +90,16 @@ static inline unsigned long mfn_to_pfn(u | |
22184 | static inline unsigned long mfn_to_local_pfn(unsigned long mfn) | |
22185 | { | |
22186 | unsigned long pfn = mfn_to_pfn(mfn); | |
22187 | - if ((pfn < end_pfn) | |
22188 | + if ((pfn < max_mapnr) | |
22189 | && !xen_feature(XENFEAT_auto_translated_physmap) | |
22190 | && (phys_to_machine_mapping[pfn] != mfn)) | |
22191 | - return end_pfn; /* force !pfn_valid() */ | |
22192 | + return max_mapnr; /* force !pfn_valid() */ | |
22193 | return pfn; | |
22194 | } | |
22195 | ||
22196 | static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn) | |
22197 | { | |
22198 | - BUG_ON(end_pfn && pfn >= end_pfn); | |
22199 | + BUG_ON(max_mapnr && pfn >= max_mapnr); | |
22200 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | |
22201 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | |
22202 | return; | |
22203 | @@ -135,9 +137,6 @@ static inline paddr_t pte_machine_to_phy | |
22204 | return phys; | |
22205 | } | |
22206 | ||
22207 | -#define __pte_ma(x) ((pte_t) { (x) } ) | |
22208 | -#define pfn_pte_ma(pfn, prot) __pte_ma((((pfn) << PAGE_SHIFT) | pgprot_val(prot)) & __supported_pte_mask) | |
cc90b958 | 22209 | - |
00e5a55c BS |
22210 | #else /* !CONFIG_XEN */ |
22211 | ||
22212 | #define pfn_to_mfn(pfn) (pfn) | |
22213 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/mmu_context_32.h 2009-02-16 16:17:21.000000000 +0100 | |
22214 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/mmu_context_32.h 2009-03-16 16:33:40.000000000 +0100 | |
22215 | @@ -51,8 +51,6 @@ static inline void __prepare_arch_switch | |
22216 | : : "r" (0) ); | |
22217 | } | |
22218 | ||
22219 | -void leave_mm(unsigned long cpu); | |
22220 | - | |
22221 | static inline void switch_mm(struct mm_struct *prev, | |
22222 | struct mm_struct *next, | |
22223 | struct task_struct *tsk) | |
22224 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/mmu_context_64.h 2009-02-16 16:17:21.000000000 +0100 | |
22225 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/mmu_context_64.h 2009-03-16 16:33:40.000000000 +0100 | |
22226 | @@ -62,12 +62,6 @@ extern void mm_pin(struct mm_struct *mm) | |
22227 | extern void mm_unpin(struct mm_struct *mm); | |
22228 | void mm_pin_all(void); | |
22229 | ||
22230 | -static inline void load_cr3(pgd_t *pgd) | |
22231 | -{ | |
22232 | - asm volatile("movq %0,%%cr3" :: "r" (phys_to_machine(__pa(pgd))) : | |
22233 | - "memory"); | |
22234 | -} | |
22235 | - | |
22236 | static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, | |
22237 | struct task_struct *tsk) | |
22238 | { | |
22239 | @@ -97,7 +91,7 @@ static inline void switch_mm(struct mm_s | |
22240 | op++; | |
22241 | ||
22242 | if (unlikely(next->context.ldt != prev->context.ldt)) { | |
22243 | - /* load_LDT_nolock(&next->context, cpu) */ | |
22244 | + /* load_LDT_nolock(&next->context) */ | |
22245 | op->cmd = MMUEXT_SET_LDT; | |
22246 | op->arg1.linear_addr = (unsigned long)next->context.ldt; | |
22247 | op->arg2.nr_ents = next->context.size; | |
22248 | @@ -110,7 +104,7 @@ static inline void switch_mm(struct mm_s | |
22249 | else { | |
22250 | write_pda(mmu_state, TLBSTATE_OK); | |
22251 | if (read_pda(active_mm) != next) | |
22252 | - out_of_line_bug(); | |
22253 | + BUG(); | |
22254 | if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { | |
22255 | /* We were in lazy tlb mode and leave_mm disabled | |
22256 | * tlb flush IPI delivery. We must reload CR3 | |
22257 | @@ -118,7 +112,7 @@ static inline void switch_mm(struct mm_s | |
22258 | */ | |
22259 | load_cr3(next->pgd); | |
22260 | xen_new_user_pt(__pa(__user_pgd(next->pgd))); | |
22261 | - load_LDT_nolock(&next->context, cpu); | |
22262 | + load_LDT_nolock(&next->context); | |
22263 | } | |
22264 | } | |
22265 | #endif | |
22266 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/page.h 2009-02-16 16:18:36.000000000 +0100 | |
22267 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/page.h 2009-03-16 16:33:40.000000000 +0100 | |
22268 | @@ -1,13 +1,231 @@ | |
22269 | +#ifndef _ASM_X86_PAGE_H | |
22270 | +#define _ASM_X86_PAGE_H | |
22271 | + | |
22272 | +#include <linux/const.h> | |
22273 | + | |
22274 | +/* PAGE_SHIFT determines the page size */ | |
22275 | +#define PAGE_SHIFT 12 | |
22276 | +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) | |
22277 | +#define PAGE_MASK (~(PAGE_SIZE-1)) | |
22278 | + | |
22279 | #ifdef __KERNEL__ | |
22280 | -# ifdef CONFIG_X86_32 | |
22281 | -# include "page_32.h" | |
22282 | -# else | |
22283 | -# include "page_64.h" | |
22284 | -# endif | |
22285 | + | |
22286 | +/* | |
22287 | + * Need to repeat this here in order to not include pgtable.h (which in turn | |
22288 | + * depends on definitions made here), but to be able to use the symbolics | |
22289 | + * below. The preprocessor will warn if the two definitions aren't identical. | |
22290 | + */ | |
22291 | +#define _PAGE_BIT_PRESENT 0 | |
22292 | +#define _PAGE_PRESENT (_AC(1, L)<<_PAGE_BIT_PRESENT) | |
22293 | +#define _PAGE_BIT_IO 9 | |
22294 | +#define _PAGE_IO (_AC(1, L)<<_PAGE_BIT_IO) | |
22295 | + | |
22296 | +#define PHYSICAL_PAGE_MASK (~(_AT(phys_addr_t, PAGE_SIZE) - 1) & __PHYSICAL_MASK) | |
22297 | +#define PTE_MASK _AT(pteval_t, PHYSICAL_PAGE_MASK) | |
22298 | + | |
22299 | +#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) | |
22300 | +#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) | |
22301 | + | |
22302 | +#define HPAGE_SHIFT PMD_SHIFT | |
22303 | +#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) | |
22304 | +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) | |
22305 | +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) | |
22306 | + | |
22307 | +/* to align the pointer to the (next) page boundary */ | |
22308 | +#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) | |
22309 | + | |
22310 | +#define __PHYSICAL_MASK _AT(phys_addr_t, (_AC(1,ULL) << __PHYSICAL_MASK_SHIFT) - 1) | |
22311 | +#define __VIRTUAL_MASK ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1) | |
22312 | + | |
22313 | +#ifndef __ASSEMBLY__ | |
22314 | +#include <linux/types.h> | |
22315 | +#endif | |
22316 | + | |
22317 | +#ifdef CONFIG_X86_64 | |
22318 | +#include <asm/page_64.h> | |
22319 | +#define max_pfn_mapped end_pfn_map | |
22320 | +#else | |
22321 | +#include <asm/page_32.h> | |
22322 | +#define max_pfn_mapped max_low_pfn | |
22323 | +#endif /* CONFIG_X86_64 */ | |
22324 | + | |
22325 | +#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) | |
22326 | + | |
22327 | +#define VM_DATA_DEFAULT_FLAGS \ | |
22328 | + (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ | |
22329 | + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) | |
22330 | + | |
22331 | + | |
22332 | +#ifndef __ASSEMBLY__ | |
22333 | + | |
22334 | +extern int page_is_ram(unsigned long pagenr); | |
22335 | + | |
22336 | +struct page; | |
22337 | + | |
22338 | +static inline void clear_user_page(void *page, unsigned long vaddr, | |
22339 | + struct page *pg) | |
22340 | +{ | |
22341 | + clear_page(page); | |
22342 | +} | |
22343 | + | |
22344 | +static inline void copy_user_page(void *to, void *from, unsigned long vaddr, | |
22345 | + struct page *topage) | |
22346 | +{ | |
22347 | + copy_page(to, from); | |
22348 | +} | |
22349 | + | |
22350 | +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ | |
22351 | + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) | |
22352 | +#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE | |
22353 | + | |
22354 | +typedef struct { pgprotval_t pgprot; } pgprot_t; | |
22355 | + | |
22356 | +#define pgprot_val(x) ((x).pgprot) | |
22357 | +#define __pgprot(x) ((pgprot_t) { (x) } ) | |
22358 | + | |
22359 | +#include <asm/maddr.h> | |
22360 | + | |
22361 | +typedef struct { pgdval_t pgd; } pgd_t; | |
22362 | + | |
22363 | +#define __pgd_ma(x) ((pgd_t) { (x) } ) | |
22364 | +static inline pgd_t xen_make_pgd(pgdval_t val) | |
22365 | +{ | |
22366 | + if (val & _PAGE_PRESENT) | |
22367 | + val = pte_phys_to_machine(val); | |
22368 | + return (pgd_t) { val }; | |
22369 | +} | |
22370 | + | |
22371 | +#define __pgd_val(x) ((x).pgd) | |
22372 | +static inline pgdval_t xen_pgd_val(pgd_t pgd) | |
22373 | +{ | |
22374 | + pgdval_t ret = __pgd_val(pgd); | |
22375 | +#if PAGETABLE_LEVELS == 2 && CONFIG_XEN_COMPAT <= 0x030002 | |
22376 | + if (ret) | |
22377 | + ret = machine_to_phys(ret) | _PAGE_PRESENT; | |
22378 | +#else | |
22379 | + if (ret & _PAGE_PRESENT) | |
22380 | + ret = pte_machine_to_phys(ret); | |
22381 | +#endif | |
22382 | + return ret; | |
22383 | +} | |
22384 | + | |
22385 | +#if PAGETABLE_LEVELS >= 3 | |
22386 | +#if PAGETABLE_LEVELS == 4 | |
22387 | +typedef struct { pudval_t pud; } pud_t; | |
22388 | + | |
22389 | +#define __pud_ma(x) ((pud_t) { (x) } ) | |
22390 | +static inline pud_t xen_make_pud(pudval_t val) | |
22391 | +{ | |
22392 | + if (val & _PAGE_PRESENT) | |
22393 | + val = pte_phys_to_machine(val); | |
22394 | + return (pud_t) { val }; | |
22395 | +} | |
22396 | + | |
22397 | +#define __pud_val(x) ((x).pud) | |
22398 | +static inline pudval_t xen_pud_val(pud_t pud) | |
22399 | +{ | |
22400 | + pudval_t ret = __pud_val(pud); | |
22401 | + if (ret & _PAGE_PRESENT) | |
22402 | + ret = pte_machine_to_phys(ret); | |
22403 | + return ret; | |
22404 | +} | |
22405 | +#else /* PAGETABLE_LEVELS == 3 */ | |
22406 | +#include <asm-generic/pgtable-nopud.h> | |
22407 | + | |
22408 | +#define __pud_val(x) __pgd_val((x).pgd) | |
22409 | +static inline pudval_t xen_pud_val(pud_t pud) | |
22410 | +{ | |
22411 | + return xen_pgd_val(pud.pgd); | |
22412 | +} | |
22413 | +#endif /* PAGETABLE_LEVELS == 4 */ | |
22414 | + | |
22415 | +typedef struct { pmdval_t pmd; } pmd_t; | |
22416 | + | |
22417 | +#define __pmd_ma(x) ((pmd_t) { (x) } ) | |
22418 | +static inline pmd_t xen_make_pmd(pmdval_t val) | |
22419 | +{ | |
22420 | + if (val & _PAGE_PRESENT) | |
22421 | + val = pte_phys_to_machine(val); | |
22422 | + return (pmd_t) { val }; | |
22423 | +} | |
22424 | + | |
22425 | +#define __pmd_val(x) ((x).pmd) | |
22426 | +static inline pmdval_t xen_pmd_val(pmd_t pmd) | |
22427 | +{ | |
22428 | + pmdval_t ret = __pmd_val(pmd); | |
22429 | +#if CONFIG_XEN_COMPAT <= 0x030002 | |
22430 | + if (ret) | |
22431 | + ret = pte_machine_to_phys(ret) | _PAGE_PRESENT; | |
22432 | #else | |
22433 | -# ifdef __i386__ | |
22434 | -# include "page_32.h" | |
22435 | -# else | |
22436 | -# include "page_64.h" | |
22437 | -# endif | |
22438 | + if (ret & _PAGE_PRESENT) | |
22439 | + ret = pte_machine_to_phys(ret); | |
22440 | +#endif | |
22441 | + return ret; | |
22442 | +} | |
22443 | +#else /* PAGETABLE_LEVELS == 2 */ | |
22444 | +#include <asm-generic/pgtable-nopmd.h> | |
22445 | + | |
22446 | +#define __pmd_ma(x) ((pmd_t) { .pud.pgd = __pgd_ma(x) } ) | |
22447 | +#define __pmd_val(x) __pgd_val((x).pud.pgd) | |
22448 | +static inline pmdval_t xen_pmd_val(pmd_t pmd) | |
22449 | +{ | |
22450 | + return xen_pgd_val(pmd.pud.pgd); | |
22451 | +} | |
22452 | +#endif /* PAGETABLE_LEVELS >= 3 */ | |
22453 | + | |
22454 | +#define __pte_ma(x) ((pte_t) { .pte = (x) } ) | |
22455 | +static inline pte_t xen_make_pte(pteval_t val) | |
22456 | +{ | |
22457 | + if ((val & (_PAGE_PRESENT|_PAGE_IO)) == _PAGE_PRESENT) | |
22458 | + val = pte_phys_to_machine(val); | |
22459 | + return (pte_t) { .pte = val }; | |
22460 | +} | |
22461 | + | |
22462 | +#define __pte_val(x) ((x).pte) | |
22463 | +static inline pteval_t xen_pte_val(pte_t pte) | |
22464 | +{ | |
22465 | + pteval_t ret = __pte_val(pte); | |
22466 | + if ((pte.pte_low & (_PAGE_PRESENT|_PAGE_IO)) == _PAGE_PRESENT) | |
22467 | + ret = pte_machine_to_phys(ret); | |
22468 | + return ret; | |
22469 | +} | |
22470 | + | |
22471 | +#define pgd_val(x) xen_pgd_val(x) | |
22472 | +#define __pgd(x) xen_make_pgd(x) | |
22473 | + | |
22474 | +#ifndef __PAGETABLE_PUD_FOLDED | |
22475 | +#define pud_val(x) xen_pud_val(x) | |
22476 | +#define __pud(x) xen_make_pud(x) | |
22477 | +#endif | |
22478 | + | |
22479 | +#ifndef __PAGETABLE_PMD_FOLDED | |
22480 | +#define pmd_val(x) xen_pmd_val(x) | |
22481 | +#define __pmd(x) xen_make_pmd(x) | |
22482 | #endif | |
22483 | + | |
22484 | +#define pte_val(x) xen_pte_val(x) | |
22485 | +#define __pte(x) xen_make_pte(x) | |
22486 | + | |
22487 | +#define __pa(x) __phys_addr((unsigned long)(x)) | |
22488 | +/* __pa_symbol should be used for C visible symbols. | |
22489 | + This seems to be the official gcc blessed way to do such arithmetic. */ | |
22490 | +#define __pa_symbol(x) __pa(__phys_reloc_hide((unsigned long)(x))) | |
22491 | + | |
22492 | +#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) | |
22493 | + | |
22494 | +#define __boot_va(x) __va(x) | |
22495 | +#define __boot_pa(x) __pa(x) | |
22496 | + | |
22497 | +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) | |
22498 | +#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) | |
22499 | +#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) | |
22500 | + | |
22501 | +#endif /* __ASSEMBLY__ */ | |
22502 | + | |
22503 | +#include <asm-generic/memory_model.h> | |
22504 | +#include <asm-generic/page.h> | |
22505 | + | |
22506 | +#define __HAVE_ARCH_GATE_AREA 1 | |
22507 | + | |
22508 | +#endif /* __KERNEL__ */ | |
22509 | +#endif /* _ASM_X86_PAGE_H */ | |
22510 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/page_64.h 2009-02-16 16:18:36.000000000 +0100 | |
22511 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/page_64.h 2009-03-16 16:33:40.000000000 +0100 | |
22512 | @@ -1,37 +1,9 @@ | |
22513 | #ifndef _X86_64_PAGE_H | |
22514 | #define _X86_64_PAGE_H | |
22515 | ||
22516 | -/* #include <linux/string.h> */ | |
22517 | -#ifndef __ASSEMBLY__ | |
22518 | -#include <linux/kernel.h> | |
22519 | -#include <linux/types.h> | |
22520 | -#include <asm/bug.h> | |
cc90b958 | 22521 | -#endif |
00e5a55c BS |
22522 | -#include <linux/const.h> |
22523 | -#include <xen/interface/xen.h> | |
cc90b958 BS |
22524 | - |
22525 | -/* | |
00e5a55c BS |
22526 | - * Need to repeat this here in order to not include pgtable.h (which in turn |
22527 | - * depends on definitions made here), but to be able to use the symbolic | |
22528 | - * below. The preprocessor will warn if the two definitions aren't identical. | |
cc90b958 | 22529 | - */ |
00e5a55c BS |
22530 | -#define _PAGE_PRESENT 0x001 |
22531 | -#define _PAGE_IO 0x200 | |
cc90b958 | 22532 | - |
00e5a55c BS |
22533 | -/* PAGE_SHIFT determines the page size */ |
22534 | -#define PAGE_SHIFT 12 | |
22535 | -#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) | |
22536 | -#define PAGE_MASK (~(PAGE_SIZE-1)) | |
cc90b958 | 22537 | - |
00e5a55c BS |
22538 | -/* See Documentation/x86_64/mm.txt for a description of the memory map. */ |
22539 | -#define __PHYSICAL_MASK_SHIFT 46 | |
22540 | -#define __PHYSICAL_MASK ((_AC(1,UL) << __PHYSICAL_MASK_SHIFT) - 1) | |
22541 | -#define __VIRTUAL_MASK_SHIFT 48 | |
22542 | -#define __VIRTUAL_MASK ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1) | |
cc90b958 | 22543 | - |
00e5a55c BS |
22544 | -#define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK) |
22545 | +#define PAGETABLE_LEVELS 4 | |
22546 | ||
22547 | -#define THREAD_ORDER 1 | |
22548 | +#define THREAD_ORDER 1 | |
22549 | #define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) | |
22550 | #define CURRENT_MASK (~(THREAD_SIZE-1)) | |
22551 | ||
22552 | @@ -51,106 +23,10 @@ | |
22553 | #define MCE_STACK 5 | |
22554 | #define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ | |
22555 | ||
22556 | -#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1)) | |
22557 | -#define LARGE_PAGE_SIZE (_AC(1,UL) << PMD_SHIFT) | |
cc90b958 | 22558 | - |
00e5a55c BS |
22559 | -#define HPAGE_SHIFT PMD_SHIFT |
22560 | -#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) | |
22561 | -#define HPAGE_MASK (~(HPAGE_SIZE - 1)) | |
22562 | -#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) | |
cc90b958 | 22563 | - |
00e5a55c BS |
22564 | -#ifdef __KERNEL__ |
22565 | -#ifndef __ASSEMBLY__ | |
cc90b958 | 22566 | - |
00e5a55c | 22567 | -extern unsigned long end_pfn; |
cc90b958 | 22568 | - |
00e5a55c | 22569 | -#include <asm/maddr.h> |
cc90b958 | 22570 | - |
00e5a55c BS |
22571 | -void clear_page(void *); |
22572 | -void copy_page(void *, void *); | |
22573 | - | |
22574 | -#define clear_user_page(page, vaddr, pg) clear_page(page) | |
22575 | -#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) | |
22576 | - | |
22577 | -#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ | |
22578 | - alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) | |
22579 | -#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE | |
cc90b958 BS |
22580 | - |
22581 | -/* | |
00e5a55c | 22582 | - * These are used to make use of C type-checking.. |
cc90b958 | 22583 | - */ |
00e5a55c BS |
22584 | -typedef struct { unsigned long pte; } pte_t; |
22585 | -typedef struct { unsigned long pmd; } pmd_t; | |
22586 | -typedef struct { unsigned long pud; } pud_t; | |
22587 | -typedef struct { unsigned long pgd; } pgd_t; | |
22588 | -#define PTE_MASK PHYSICAL_PAGE_MASK | |
cc90b958 | 22589 | - |
00e5a55c | 22590 | -typedef struct { unsigned long pgprot; } pgprot_t; |
cc90b958 | 22591 | - |
00e5a55c BS |
22592 | -#define __pte_val(x) ((x).pte) |
22593 | -#define pte_val(x) ((__pte_val(x) & (_PAGE_PRESENT|_PAGE_IO)) \ | |
22594 | - == _PAGE_PRESENT ? \ | |
22595 | - pte_machine_to_phys(__pte_val(x)) : \ | |
22596 | - __pte_val(x)) | |
cc90b958 | 22597 | - |
00e5a55c BS |
22598 | -#define __pmd_val(x) ((x).pmd) |
22599 | -static inline unsigned long pmd_val(pmd_t x) | |
22600 | -{ | |
22601 | - unsigned long ret = __pmd_val(x); | |
22602 | -#if CONFIG_XEN_COMPAT <= 0x030002 | |
22603 | - if (ret) ret = pte_machine_to_phys(ret) | _PAGE_PRESENT; | |
22604 | -#else | |
22605 | - if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); | |
22606 | -#endif | |
22607 | - return ret; | |
22608 | -} | |
cc90b958 | 22609 | - |
00e5a55c BS |
22610 | -#define __pud_val(x) ((x).pud) |
22611 | -static inline unsigned long pud_val(pud_t x) | |
22612 | -{ | |
22613 | - unsigned long ret = __pud_val(x); | |
22614 | - if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); | |
22615 | - return ret; | |
22616 | -} | |
cc90b958 | 22617 | - |
00e5a55c BS |
22618 | -#define __pgd_val(x) ((x).pgd) |
22619 | -static inline unsigned long pgd_val(pgd_t x) | |
22620 | -{ | |
22621 | - unsigned long ret = __pgd_val(x); | |
22622 | - if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); | |
22623 | - return ret; | |
22624 | -} | |
cc90b958 | 22625 | - |
00e5a55c | 22626 | -#define pgprot_val(x) ((x).pgprot) |
cc90b958 | 22627 | - |
00e5a55c BS |
22628 | -static inline pte_t __pte(unsigned long x) |
22629 | -{ | |
22630 | - if ((x & (_PAGE_PRESENT|_PAGE_IO)) == _PAGE_PRESENT) | |
22631 | - x = pte_phys_to_machine(x); | |
22632 | - return ((pte_t) { (x) }); | |
22633 | -} | |
22634 | - | |
22635 | -static inline pmd_t __pmd(unsigned long x) | |
22636 | -{ | |
22637 | - if (x & _PAGE_PRESENT) x = pte_phys_to_machine(x); | |
22638 | - return ((pmd_t) { (x) }); | |
22639 | -} | |
22640 | - | |
22641 | -static inline pud_t __pud(unsigned long x) | |
22642 | -{ | |
22643 | - if (x & _PAGE_PRESENT) x = pte_phys_to_machine(x); | |
22644 | - return ((pud_t) { (x) }); | |
22645 | -} | |
cc90b958 | 22646 | - |
00e5a55c | 22647 | -static inline pgd_t __pgd(unsigned long x) |
cc90b958 | 22648 | -{ |
00e5a55c BS |
22649 | - if (x & _PAGE_PRESENT) x = pte_phys_to_machine(x); |
22650 | - return ((pgd_t) { (x) }); | |
cc90b958 BS |
22651 | -} |
22652 | - | |
00e5a55c BS |
22653 | -#define __pgprot(x) ((pgprot_t) { (x) } ) |
22654 | +#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) | |
22655 | +#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) | |
22656 | ||
22657 | -#endif /* !__ASSEMBLY__ */ | |
22658 | +#define __PAGE_OFFSET _AC(0xffff880000000000, UL) | |
22659 | ||
22660 | #define __PHYSICAL_START CONFIG_PHYSICAL_START | |
22661 | #define __KERNEL_ALIGN 0x200000 | |
22662 | @@ -166,52 +42,58 @@ static inline pgd_t __pgd(unsigned long | |
22663 | ||
22664 | #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) | |
22665 | #define __START_KERNEL_map _AC(0xffffffff80000000, UL) | |
22666 | -#define __PAGE_OFFSET _AC(0xffff880000000000, UL) | |
22667 | ||
22668 | #if CONFIG_XEN_COMPAT <= 0x030002 | |
22669 | #undef LOAD_OFFSET | |
22670 | #define LOAD_OFFSET 0 | |
22671 | #endif | |
22672 | ||
22673 | -/* to align the pointer to the (next) page boundary */ | |
22674 | -#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) | |
cc90b958 | 22675 | - |
00e5a55c BS |
22676 | -#define KERNEL_TEXT_SIZE (40*1024*1024) |
22677 | -#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL) | |
22678 | +/* See Documentation/x86_64/mm.txt for a description of the memory map. */ | |
22679 | +#define __PHYSICAL_MASK_SHIFT 46 | |
22680 | +#define __VIRTUAL_MASK_SHIFT 48 | |
22681 | ||
22682 | -#define PAGE_OFFSET __PAGE_OFFSET | |
22683 | +/* | |
22684 | + * Kernel image size is limited to 128 MB (see level2_kernel_pgt in | |
22685 | + * arch/x86/kernel/head_64.S), and it is mapped here: | |
22686 | + */ | |
22687 | +#define KERNEL_IMAGE_SIZE (128*1024*1024) | |
22688 | +#define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL) | |
22689 | ||
22690 | #ifndef __ASSEMBLY__ | |
22691 | +void clear_page(void *page); | |
22692 | +void copy_page(void *to, void *from); | |
22693 | + | |
22694 | +extern unsigned long end_pfn; | |
22695 | +extern unsigned long end_pfn_map; | |
22696 | + | |
22697 | static inline unsigned long __phys_addr(unsigned long x) | |
22698 | { | |
22699 | - return x - (x >= __START_KERNEL_map ? __START_KERNEL_map : PAGE_OFFSET); | |
22700 | + return x - (x >= __START_KERNEL_map ? __START_KERNEL_map : __PAGE_OFFSET); | |
22701 | } | |
22702 | -#endif | |
22703 | ||
22704 | -#define __pa(x) __phys_addr((unsigned long)(x)) | |
22705 | -#define __pa_symbol(x) __phys_addr((unsigned long)(x)) | |
22706 | +#define __phys_reloc_hide(x) (x) | |
22707 | ||
22708 | -#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) | |
22709 | -#define __boot_va(x) __va(x) | |
22710 | -#define __boot_pa(x) __pa(x) | |
22711 | -#ifdef CONFIG_FLATMEM | |
22712 | -#define pfn_valid(pfn) ((pfn) < end_pfn) | |
22713 | -#endif | |
22714 | +/* | |
22715 | + * These are used to make use of C type-checking.. | |
22716 | + */ | |
22717 | +typedef unsigned long pteval_t; | |
22718 | +typedef unsigned long pmdval_t; | |
22719 | +typedef unsigned long pudval_t; | |
22720 | +typedef unsigned long pgdval_t; | |
22721 | +typedef unsigned long pgprotval_t; | |
22722 | +typedef unsigned long phys_addr_t; | |
22723 | ||
22724 | -#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) | |
22725 | -#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) | |
22726 | -#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) | |
22727 | - | |
22728 | -#define VM_DATA_DEFAULT_FLAGS \ | |
22729 | - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ | |
22730 | - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) | |
22731 | +typedef struct page *pgtable_t; | |
22732 | + | |
22733 | +typedef union { pteval_t pte; unsigned int pte_low; } pte_t; | |
22734 | ||
22735 | -#define __HAVE_ARCH_GATE_AREA 1 | |
22736 | #define vmemmap ((struct page *)VMEMMAP_START) | |
22737 | ||
22738 | -#include <asm-generic/memory_model.h> | |
22739 | -#include <asm-generic/page.h> | |
22740 | +#endif /* !__ASSEMBLY__ */ | |
22741 | + | |
22742 | +#ifdef CONFIG_FLATMEM | |
22743 | +#define pfn_valid(pfn) ((pfn) < max_mapnr) | |
22744 | +#endif | |
22745 | ||
22746 | -#endif /* __KERNEL__ */ | |
22747 | ||
22748 | #endif /* _X86_64_PAGE_H */ | |
22749 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pci.h 2009-02-16 16:18:36.000000000 +0100 | |
22750 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pci.h 2009-03-16 16:33:40.000000000 +0100 | |
22751 | @@ -71,6 +71,7 @@ extern int pci_mmap_page_range(struct pc | |
22752 | ||
22753 | ||
22754 | #ifdef CONFIG_PCI | |
22755 | +extern void early_quirks(void); | |
22756 | static inline void pci_dma_burst_advice(struct pci_dev *pdev, | |
22757 | enum pci_dma_burst_strategy *strat, | |
22758 | unsigned long *strategy_parameter) | |
22759 | @@ -78,9 +79,10 @@ static inline void pci_dma_burst_advice( | |
22760 | *strat = PCI_DMA_BURST_INFINITY; | |
22761 | *strategy_parameter = ~0UL; | |
22762 | } | |
22763 | +#else | |
22764 | +static inline void early_quirks(void) { } | |
22765 | #endif | |
22766 | ||
22767 | - | |
22768 | #endif /* __KERNEL__ */ | |
22769 | ||
22770 | #ifdef CONFIG_X86_32 | |
22771 | @@ -95,6 +97,19 @@ static inline void pci_dma_burst_advice( | |
22772 | /* generic pci stuff */ | |
22773 | #include <asm-generic/pci.h> | |
22774 | ||
22775 | +#ifdef CONFIG_NUMA | |
22776 | +/* Returns the node based on pci bus */ | |
22777 | +static inline int __pcibus_to_node(struct pci_bus *bus) | |
22778 | +{ | |
22779 | + struct pci_sysdata *sd = bus->sysdata; | |
22780 | + | |
22781 | + return sd->node; | |
22782 | +} | |
22783 | ||
22784 | +static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus) | |
22785 | +{ | |
22786 | + return node_to_cpumask(__pcibus_to_node(bus)); | |
22787 | +} | |
22788 | +#endif | |
22789 | ||
22790 | #endif | |
22791 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgalloc_32.h 2009-02-16 16:17:21.000000000 +0100 | |
22792 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgalloc_32.h 2009-03-16 16:33:40.000000000 +0100 | |
22793 | @@ -3,69 +3,109 @@ | |
22794 | ||
22795 | #include <linux/threads.h> | |
22796 | #include <linux/mm.h> /* for struct page */ | |
22797 | +#include <linux/pagemap.h> | |
22798 | +#include <asm/tlb.h> | |
22799 | +#include <asm-generic/tlb.h> | |
22800 | #include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */ | |
22801 | ||
22802 | #define paravirt_alloc_pt(mm, pfn) do { } while (0) | |
22803 | -#define paravirt_alloc_pd(pfn) do { } while (0) | |
22804 | -#define paravirt_alloc_pd(pfn) do { } while (0) | |
22805 | +#define paravirt_alloc_pd(mm, pfn) do { } while (0) | |
22806 | #define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0) | |
22807 | #define paravirt_release_pt(pfn) do { } while (0) | |
22808 | #define paravirt_release_pd(pfn) do { } while (0) | |
22809 | ||
22810 | -#define pmd_populate_kernel(mm, pmd, pte) \ | |
22811 | -do { \ | |
22812 | - paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT); \ | |
22813 | - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \ | |
22814 | -} while (0) | |
22815 | - | |
22816 | -#define pmd_populate(mm, pmd, pte) \ | |
22817 | -do { \ | |
22818 | - unsigned long pfn = page_to_pfn(pte); \ | |
22819 | - paravirt_alloc_pt(mm, pfn); \ | |
22820 | - if (PagePinned(virt_to_page((mm)->pgd))) { \ | |
22821 | - if (!PageHighMem(pte)) \ | |
22822 | - BUG_ON(HYPERVISOR_update_va_mapping( \ | |
22823 | - (unsigned long)__va(pfn << PAGE_SHIFT), \ | |
22824 | - pfn_pte(pfn, PAGE_KERNEL_RO), 0)); \ | |
22825 | - else if (!test_and_set_bit(PG_pinned, &pte->flags)) \ | |
22826 | - kmap_flush_unused(); \ | |
22827 | - set_pmd(pmd, \ | |
22828 | - __pmd(_PAGE_TABLE + ((paddr_t)pfn << PAGE_SHIFT))); \ | |
22829 | - } else \ | |
22830 | - *(pmd) = __pmd(_PAGE_TABLE + ((paddr_t)pfn << PAGE_SHIFT)); \ | |
22831 | -} while (0) | |
22832 | +static inline void pmd_populate_kernel(struct mm_struct *mm, | |
22833 | + pmd_t *pmd, pte_t *pte) | |
22834 | +{ | |
22835 | + paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT); | |
22836 | + set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); | |
22837 | +} | |
22838 | + | |
22839 | +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte) | |
22840 | +{ | |
22841 | + unsigned long pfn = page_to_pfn(pte); | |
22842 | + | |
22843 | + paravirt_alloc_pt(mm, pfn); | |
22844 | + if (PagePinned(virt_to_page(mm->pgd))) { | |
22845 | + if (!PageHighMem(pte)) | |
22846 | + BUG_ON(HYPERVISOR_update_va_mapping( | |
22847 | + (unsigned long)__va(pfn << PAGE_SHIFT), | |
22848 | + pfn_pte(pfn, PAGE_KERNEL_RO), 0)); | |
22849 | + else if (!test_and_set_bit(PG_pinned, &pte->flags)) | |
22850 | + kmap_flush_unused(); | |
22851 | + set_pmd(pmd, __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE)); | |
22852 | + } else | |
22853 | + *pmd = __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE); | |
22854 | +} | |
22855 | +#define pmd_pgtable(pmd) pmd_page(pmd) | |
22856 | ||
22857 | /* | |
22858 | * Allocate and free page tables. | |
22859 | */ | |
22860 | +extern void pgd_test_and_unpin(pgd_t *); | |
22861 | extern pgd_t *pgd_alloc(struct mm_struct *); | |
22862 | -extern void pgd_free(pgd_t *pgd); | |
22863 | +extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); | |
22864 | ||
22865 | extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long); | |
22866 | -extern struct page *pte_alloc_one(struct mm_struct *, unsigned long); | |
22867 | +extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long); | |
22868 | ||
22869 | -static inline void pte_free_kernel(pte_t *pte) | |
22870 | +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) | |
22871 | { | |
22872 | make_lowmem_page_writable(pte, XENFEAT_writable_page_tables); | |
22873 | free_page((unsigned long)pte); | |
22874 | } | |
22875 | ||
22876 | -extern void pte_free(struct page *pte); | |
22877 | +extern void __pte_free(pgtable_t); | |
22878 | +static inline void pte_free(struct mm_struct *mm, pgtable_t pte) | |
22879 | +{ | |
22880 | + __pte_free(pte); | |
22881 | +} | |
22882 | + | |
22883 | ||
22884 | -#define __pte_free_tlb(tlb,pte) \ | |
22885 | -do { \ | |
22886 | - paravirt_release_pt(page_to_pfn(pte)); \ | |
22887 | - tlb_remove_page((tlb),(pte)); \ | |
cc90b958 | 22888 | -} while (0) |
00e5a55c BS |
22889 | +extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte); |
22890 | ||
22891 | #ifdef CONFIG_X86_PAE | |
22892 | /* | |
22893 | * In the PAE case we free the pmds as part of the pgd. | |
22894 | */ | |
22895 | -#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) | |
22896 | -#define pmd_free(x) do { } while (0) | |
22897 | -#define __pmd_free_tlb(tlb,x) do { } while (0) | |
22898 | -#define pud_populate(mm, pmd, pte) BUG() | |
cc90b958 | 22899 | -#endif |
00e5a55c BS |
22900 | +extern pmd_t *pmd_alloc_one(struct mm_struct *, unsigned long); |
22901 | + | |
22902 | +extern void __pmd_free(pgtable_t); | |
22903 | +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) | |
22904 | +{ | |
22905 | + BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); | |
22906 | + __pmd_free(virt_to_page(pmd)); | |
22907 | +} | |
22908 | + | |
22909 | +extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd); | |
22910 | + | |
22911 | +static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) | |
22912 | +{ | |
22913 | + struct page *page = virt_to_page(pmd); | |
22914 | + unsigned long pfn = page_to_pfn(page); | |
22915 | + | |
22916 | + paravirt_alloc_pd(mm, pfn); | |
22917 | + | |
22918 | + /* Note: almost everything apart from _PAGE_PRESENT is | |
22919 | + reserved at the pmd (PDPT) level. */ | |
22920 | + if (PagePinned(virt_to_page(mm->pgd))) { | |
22921 | + BUG_ON(PageHighMem(page)); | |
22922 | + BUG_ON(HYPERVISOR_update_va_mapping( | |
22923 | + (unsigned long)__va(pfn << PAGE_SHIFT), | |
22924 | + pfn_pte(pfn, PAGE_KERNEL_RO), 0)); | |
22925 | + set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT)); | |
22926 | + } else | |
22927 | + *pudp = __pud(__pa(pmd) | _PAGE_PRESENT); | |
22928 | + | |
22929 | + /* | |
22930 | + * According to Intel App note "TLBs, Paging-Structure Caches, | |
22931 | + * and Their Invalidation", April 2007, document 317080-001, | |
22932 | + * section 8.1: in PAE mode we explicitly have to flush the | |
22933 | + * TLB via cr3 if the top-level pgd is changed... | |
22934 | + */ | |
22935 | + if (mm == current->active_mm) | |
22936 | + xen_tlb_flush(); | |
22937 | +} | |
22938 | +#endif /* CONFIG_X86_PAE */ | |
22939 | ||
22940 | #endif /* _I386_PGALLOC_H */ | |
22941 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgalloc_64.h 2009-02-16 16:18:36.000000000 +0100 | |
22942 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgalloc_64.h 2009-03-16 16:33:40.000000000 +0100 | |
22943 | @@ -6,30 +6,13 @@ | |
22944 | #include <linux/mm.h> | |
22945 | #include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */ | |
22946 | ||
22947 | -#include <xen/features.h> | |
22948 | -void make_page_readonly(void *va, unsigned int feature); | |
22949 | -void make_page_writable(void *va, unsigned int feature); | |
22950 | -void make_pages_readonly(void *va, unsigned int nr, unsigned int feature); | |
22951 | -void make_pages_writable(void *va, unsigned int nr, unsigned int feature); | |
22952 | +pmd_t *early_get_pmd(unsigned long va); | |
22953 | +void early_make_page_readonly(void *va, unsigned int feature); | |
22954 | ||
22955 | #define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD) | |
22956 | ||
22957 | -static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) | |
cc90b958 | 22958 | -{ |
00e5a55c | 22959 | - set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte))); |
cc90b958 BS |
22960 | -} |
22961 | - | |
00e5a55c | 22962 | -static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte) |
cc90b958 | 22963 | -{ |
00e5a55c BS |
22964 | - if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) { |
22965 | - BUG_ON(HYPERVISOR_update_va_mapping( | |
22966 | - (unsigned long)__va(page_to_pfn(pte) << PAGE_SHIFT), | |
22967 | - pfn_pte(page_to_pfn(pte), PAGE_KERNEL_RO), 0)); | |
22968 | - set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT))); | |
22969 | - } else { | |
22970 | - *(pmd) = __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)); | |
22971 | - } | |
cc90b958 | 22972 | -} |
00e5a55c BS |
22973 | +#define pmd_populate_kernel(mm, pmd, pte) \ |
22974 | + set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte))) | |
22975 | ||
22976 | static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) | |
22977 | { | |
22978 | @@ -63,53 +46,58 @@ static inline void pgd_populate(struct m | |
22979 | } | |
22980 | } | |
22981 | ||
22982 | -extern struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr); | |
22983 | -extern void pte_free(struct page *pte); | |
22984 | +#define pmd_pgtable(pmd) pmd_page(pmd) | |
22985 | ||
22986 | -static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) | |
22987 | +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte) | |
22988 | { | |
22989 | - struct page *pg; | |
cc90b958 | 22990 | - |
00e5a55c BS |
22991 | - pg = pte_alloc_one(mm, addr); |
22992 | - return pg ? page_address(pg) : NULL; | |
22993 | + if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) { | |
22994 | + BUG_ON(HYPERVISOR_update_va_mapping( | |
22995 | + (unsigned long)__va(page_to_pfn(pte) << PAGE_SHIFT), | |
22996 | + pfn_pte(page_to_pfn(pte), PAGE_KERNEL_RO), 0)); | |
22997 | + set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT))); | |
22998 | + } else { | |
22999 | + *(pmd) = __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)); | |
23000 | + } | |
23001 | } | |
23002 | ||
23003 | -static inline void pmd_free(pmd_t *pmd) | |
23004 | +extern void __pmd_free(pgtable_t); | |
23005 | +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) | |
23006 | { | |
23007 | BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); | |
23008 | - pte_free(virt_to_page(pmd)); | |
23009 | + __pmd_free(virt_to_page(pmd)); | |
23010 | } | |
23011 | ||
23012 | +extern pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr); | |
23013 | + | |
23014 | static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) | |
23015 | { | |
23016 | - struct page *pg; | |
cc90b958 | 23017 | - |
00e5a55c BS |
23018 | - pg = pte_alloc_one(mm, addr); |
23019 | - return pg ? page_address(pg) : NULL; | |
23020 | + return (pud_t *)pmd_alloc_one(mm, addr); | |
23021 | } | |
23022 | ||
23023 | -static inline void pud_free(pud_t *pud) | |
23024 | +static inline void pud_free(struct mm_struct *mm, pud_t *pud) | |
23025 | { | |
23026 | BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); | |
23027 | - pte_free(virt_to_page(pud)); | |
23028 | + __pmd_free(virt_to_page(pud)); | |
23029 | } | |
23030 | ||
23031 | static inline void pgd_list_add(pgd_t *pgd) | |
23032 | { | |
23033 | struct page *page = virt_to_page(pgd); | |
23034 | + unsigned long flags; | |
23035 | ||
23036 | - spin_lock(&pgd_lock); | |
23037 | + spin_lock_irqsave(&pgd_lock, flags); | |
23038 | list_add(&page->lru, &pgd_list); | |
23039 | - spin_unlock(&pgd_lock); | |
23040 | + spin_unlock_irqrestore(&pgd_lock, flags); | |
23041 | } | |
23042 | ||
23043 | static inline void pgd_list_del(pgd_t *pgd) | |
23044 | { | |
23045 | struct page *page = virt_to_page(pgd); | |
23046 | + unsigned long flags; | |
23047 | ||
23048 | - spin_lock(&pgd_lock); | |
23049 | + spin_lock_irqsave(&pgd_lock, flags); | |
23050 | list_del(&page->lru); | |
23051 | - spin_unlock(&pgd_lock); | |
23052 | + spin_unlock_irqrestore(&pgd_lock, flags); | |
23053 | } | |
23054 | ||
23055 | extern void pgd_test_and_unpin(pgd_t *); | |
23056 | @@ -145,7 +133,7 @@ static inline pgd_t *pgd_alloc(struct mm | |
23057 | return pgd; | |
23058 | } | |
23059 | ||
23060 | -static inline void pgd_free(pgd_t *pgd) | |
23061 | +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) | |
23062 | { | |
23063 | pgd_test_and_unpin(pgd); | |
23064 | pgd_list_del(pgd); | |
23065 | @@ -161,17 +149,30 @@ static inline pte_t *pte_alloc_one_kerne | |
23066 | return pte; | |
23067 | } | |
23068 | ||
23069 | +extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr); | |
23070 | + | |
23071 | /* Should really implement gc for free page table pages. This could be | |
23072 | done with a reference count in struct page. */ | |
23073 | ||
23074 | -static inline void pte_free_kernel(pte_t *pte) | |
23075 | +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) | |
23076 | { | |
23077 | BUG_ON((unsigned long)pte & (PAGE_SIZE-1)); | |
23078 | make_page_writable(pte, XENFEAT_writable_page_tables); | |
23079 | free_page((unsigned long)pte); | |
23080 | } | |
23081 | ||
23082 | -#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) | |
23083 | +extern void __pte_free(pgtable_t); | |
23084 | +static inline void pte_free(struct mm_struct *mm, pgtable_t pte) | |
23085 | +{ | |
23086 | + __pte_free(pte); | |
23087 | +} | |
23088 | + | |
23089 | +#define __pte_free_tlb(tlb,pte) \ | |
23090 | +do { \ | |
23091 | + pgtable_page_dtor((pte)); \ | |
23092 | + tlb_remove_page((tlb), (pte)); \ | |
23093 | +} while (0) | |
23094 | + | |
23095 | #define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x)) | |
23096 | #define __pud_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x)) | |
23097 | ||
23098 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable.h 2009-02-16 16:18:36.000000000 +0100 | |
23099 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable.h 2009-03-16 16:33:40.000000000 +0100 | |
23100 | @@ -1,5 +1,467 @@ | |
23101 | +#ifndef _ASM_X86_PGTABLE_H | |
23102 | +#define _ASM_X86_PGTABLE_H | |
23103 | + | |
23104 | +#define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1) | |
23105 | +#define FIRST_USER_ADDRESS 0 | |
23106 | + | |
23107 | +#define _PAGE_BIT_PRESENT 0 | |
23108 | +#define _PAGE_BIT_RW 1 | |
23109 | +#define _PAGE_BIT_USER 2 | |
23110 | +#define _PAGE_BIT_PWT 3 | |
23111 | +#define _PAGE_BIT_PCD 4 | |
23112 | +#define _PAGE_BIT_ACCESSED 5 | |
23113 | +#define _PAGE_BIT_DIRTY 6 | |
23114 | +#define _PAGE_BIT_FILE 6 | |
23115 | +#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ | |
23116 | +#define _PAGE_BIT_PAT 7 /* on 4KB pages */ | |
23117 | +#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ | |
23118 | +#define _PAGE_BIT_IO 9 /* Mapped page is I/O or foreign and | |
23119 | + * has no associated page struct. */ | |
23120 | +#define _PAGE_BIT_UNUSED2 10 /* available for programmer */ | |
23121 | +#define _PAGE_BIT_UNUSED3 11 | |
23122 | +#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ | |
23123 | +#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ | |
23124 | + | |
23125 | +/* | |
23126 | + * Note: we use _AC(1, L) instead of _AC(1, UL) so that we get a | |
23127 | + * sign-extended value on 32-bit with all 1's in the upper word, | |
23128 | + * which preserves the upper pte values on 64-bit ptes: | |
23129 | + */ | |
23130 | +#define _PAGE_PRESENT (_AC(1, L)<<_PAGE_BIT_PRESENT) | |
23131 | +#define _PAGE_RW (_AC(1, L)<<_PAGE_BIT_RW) | |
23132 | +#define _PAGE_USER (_AC(1, L)<<_PAGE_BIT_USER) | |
23133 | +#define _PAGE_PWT (_AC(1, L)<<_PAGE_BIT_PWT) | |
23134 | +#define _PAGE_PCD (_AC(1, L)<<_PAGE_BIT_PCD) | |
23135 | +#define _PAGE_ACCESSED (_AC(1, L)<<_PAGE_BIT_ACCESSED) | |
23136 | +#define _PAGE_DIRTY (_AC(1, L)<<_PAGE_BIT_DIRTY) | |
23137 | +#define _PAGE_PSE (_AC(1, L)<<_PAGE_BIT_PSE) /* 2MB page */ | |
23138 | +#define _PAGE_GLOBAL (_AC(1, L)<<_PAGE_BIT_GLOBAL) /* Global TLB entry */ | |
23139 | +#define _PAGE_IO (_AC(1, L)<<_PAGE_BIT_IO) | |
23140 | +#define _PAGE_UNUSED2 (_AC(1, L)<<_PAGE_BIT_UNUSED2) | |
23141 | +#define _PAGE_UNUSED3 (_AC(1, L)<<_PAGE_BIT_UNUSED3) | |
23142 | +#define _PAGE_PAT (_AC(1, L)<<_PAGE_BIT_PAT) | |
23143 | +#define _PAGE_PAT_LARGE (_AC(1, L)<<_PAGE_BIT_PAT_LARGE) | |
23144 | + | |
23145 | +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | |
23146 | +#define _PAGE_NX (_AC(1, ULL) << _PAGE_BIT_NX) | |
23147 | +#else | |
23148 | +#define _PAGE_NX 0 | |
23149 | +#endif | |
23150 | + | |
23151 | +/* If _PAGE_PRESENT is clear, we use these: */ | |
23152 | +#define _PAGE_FILE _PAGE_DIRTY /* nonlinear file mapping, saved PTE; unset:swap */ | |
23153 | +#define _PAGE_PROTNONE _PAGE_PSE /* if the user mapped it with PROT_NONE; | |
23154 | + pte_present gives true */ | |
23155 | + | |
23156 | +#ifndef __ASSEMBLY__ | |
23157 | +#if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT <= 0x030002 | |
23158 | +extern unsigned int __kernel_page_user; | |
23159 | +#else | |
23160 | +#define __kernel_page_user 0 | |
23161 | +#endif | |
23162 | +#endif | |
23163 | + | |
23164 | +#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) | |
23165 | +#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | __kernel_page_user) | |
23166 | + | |
23167 | +#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_IO) | |
23168 | + | |
23169 | +#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) | |
23170 | +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) | |
23171 | + | |
23172 | +#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) | |
23173 | +#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) | |
23174 | +#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) | |
23175 | +#define PAGE_COPY PAGE_COPY_NOEXEC | |
23176 | +#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) | |
23177 | +#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) | |
23178 | + | |
23179 | +#ifdef CONFIG_X86_32 | |
23180 | +#define _PAGE_KERNEL_EXEC \ | |
23181 | + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) | |
23182 | +#define _PAGE_KERNEL (_PAGE_KERNEL_EXEC | _PAGE_NX) | |
23183 | + | |
23184 | +#ifndef __ASSEMBLY__ | |
23185 | +extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC; | |
23186 | +#endif /* __ASSEMBLY__ */ | |
23187 | +#else | |
23188 | +#define __PAGE_KERNEL_EXEC \ | |
23189 | + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | __kernel_page_user) | |
23190 | +#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) | |
23191 | +#endif | |
23192 | + | |
23193 | +#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) | |
23194 | +#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) | |
23195 | +#define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT) | |
23196 | +#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT) | |
23197 | +#define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD) | |
23198 | +#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) | |
23199 | +#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT) | |
23200 | +#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) | |
23201 | +#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) | |
23202 | + | |
23203 | +/* | |
23204 | + * We don't support GLOBAL page in xenolinux64 | |
23205 | + */ | |
23206 | +#define MAKE_GLOBAL(x) __pgprot((x)) | |
23207 | + | |
23208 | +#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL) | |
23209 | +#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO) | |
23210 | +#define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC) | |
23211 | +#define PAGE_KERNEL_RX MAKE_GLOBAL(__PAGE_KERNEL_RX) | |
23212 | +#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE) | |
23213 | +#define PAGE_KERNEL_UC_MINUS MAKE_GLOBAL(__PAGE_KERNEL_UC_MINUS) | |
23214 | +#define PAGE_KERNEL_EXEC_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_EXEC_NOCACHE) | |
23215 | +#define PAGE_KERNEL_LARGE MAKE_GLOBAL(__PAGE_KERNEL_LARGE) | |
23216 | +#define PAGE_KERNEL_LARGE_EXEC MAKE_GLOBAL(__PAGE_KERNEL_LARGE_EXEC) | |
23217 | +#define PAGE_KERNEL_VSYSCALL MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL) | |
23218 | +#define PAGE_KERNEL_VSYSCALL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL_NOCACHE) | |
23219 | + | |
23220 | +/* xwr */ | |
23221 | +#define __P000 PAGE_NONE | |
23222 | +#define __P001 PAGE_READONLY | |
23223 | +#define __P010 PAGE_COPY | |
23224 | +#define __P011 PAGE_COPY | |
23225 | +#define __P100 PAGE_READONLY_EXEC | |
23226 | +#define __P101 PAGE_READONLY_EXEC | |
23227 | +#define __P110 PAGE_COPY_EXEC | |
23228 | +#define __P111 PAGE_COPY_EXEC | |
23229 | + | |
23230 | +#define __S000 PAGE_NONE | |
23231 | +#define __S001 PAGE_READONLY | |
23232 | +#define __S010 PAGE_SHARED | |
23233 | +#define __S011 PAGE_SHARED | |
23234 | +#define __S100 PAGE_READONLY_EXEC | |
23235 | +#define __S101 PAGE_READONLY_EXEC | |
23236 | +#define __S110 PAGE_SHARED_EXEC | |
23237 | +#define __S111 PAGE_SHARED_EXEC | |
23238 | + | |
23239 | +#ifndef __ASSEMBLY__ | |
23240 | + | |
23241 | +/* | |
23242 | + * ZERO_PAGE is a global shared page that is always zero: used | |
23243 | + * for zero-mapped memory areas etc.. | |
23244 | + */ | |
23245 | +extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; | |
23246 | +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) | |
23247 | + | |
23248 | +extern spinlock_t pgd_lock; | |
23249 | +extern struct list_head pgd_list; | |
23250 | + | |
23251 | +/* | |
23252 | + * The following only work if pte_present() is true. | |
23253 | + * Undefined behaviour if not.. | |
23254 | + */ | |
23255 | +static inline int pte_dirty(pte_t pte) { return __pte_val(pte) & _PAGE_DIRTY; } | |
23256 | +static inline int pte_young(pte_t pte) { return __pte_val(pte) & _PAGE_ACCESSED; } | |
23257 | +static inline int pte_write(pte_t pte) { return __pte_val(pte) & _PAGE_RW; } | |
23258 | +static inline int pte_file(pte_t pte) { return __pte_val(pte) & _PAGE_FILE; } | |
23259 | +static inline int pte_huge(pte_t pte) { return __pte_val(pte) & _PAGE_PSE; } | |
23260 | +static inline int pte_global(pte_t pte) { return 0; } | |
23261 | +static inline int pte_exec(pte_t pte) { return !(__pte_val(pte) & _PAGE_NX); } | |
23262 | + | |
23263 | +static inline int pmd_large(pmd_t pte) { | |
23264 | + return (__pmd_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) == | |
23265 | + (_PAGE_PSE|_PAGE_PRESENT); | |
23266 | +} | |
23267 | + | |
23268 | +static inline pte_t pte_mkclean(pte_t pte) { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_DIRTY); } | |
23269 | +static inline pte_t pte_mkold(pte_t pte) { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED); } | |
23270 | +static inline pte_t pte_wrprotect(pte_t pte) { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_RW); } | |
23271 | +static inline pte_t pte_mkexec(pte_t pte) { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_NX); } | |
23272 | +static inline pte_t pte_mkdirty(pte_t pte) { return __pte_ma(__pte_val(pte) | _PAGE_DIRTY); } | |
23273 | +static inline pte_t pte_mkyoung(pte_t pte) { return __pte_ma(__pte_val(pte) | _PAGE_ACCESSED); } | |
23274 | +static inline pte_t pte_mkwrite(pte_t pte) { return __pte_ma(__pte_val(pte) | _PAGE_RW); } | |
23275 | +static inline pte_t pte_mkhuge(pte_t pte) { return __pte_ma(__pte_val(pte) | _PAGE_PSE); } | |
23276 | +static inline pte_t pte_clrhuge(pte_t pte) { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_PSE); } | |
23277 | +static inline pte_t pte_mkglobal(pte_t pte) { return pte; } | |
23278 | +static inline pte_t pte_clrglobal(pte_t pte) { return pte; } | |
23279 | + | |
23280 | +extern pteval_t __supported_pte_mask; | |
23281 | + | |
23282 | +static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) | |
23283 | +{ | |
23284 | + return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) | | |
23285 | + pgprot_val(pgprot)) & __supported_pte_mask); | |
23286 | +} | |
23287 | + | |
23288 | +static inline pte_t pfn_pte_ma(unsigned long page_nr, pgprot_t pgprot) | |
23289 | +{ | |
23290 | + return __pte_ma((((phys_addr_t)page_nr << PAGE_SHIFT) | | |
23291 | + pgprot_val(pgprot)) & __supported_pte_mask); | |
23292 | +} | |
23293 | + | |
23294 | +static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) | |
23295 | +{ | |
23296 | + return __pmd((((phys_addr_t)page_nr << PAGE_SHIFT) | | |
23297 | + pgprot_val(pgprot)) & __supported_pte_mask); | |
23298 | +} | |
23299 | + | |
23300 | +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) | |
23301 | +{ | |
23302 | + pteval_t val = pte_val(pte); | |
23303 | + | |
23304 | + val &= _PAGE_CHG_MASK; | |
23305 | + val |= pgprot_val(newprot) & __supported_pte_mask; | |
23306 | + | |
23307 | + return __pte(val); | |
23308 | +} | |
23309 | + | |
23310 | +#define pte_pgprot(x) __pgprot(pte_val(x) & (0xfff | _PAGE_NX)) | |
23311 | + | |
23312 | +#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) | |
23313 | + | |
23314 | +#define set_pte(ptep, pte) xen_set_pte(ptep, pte) | |
23315 | +#define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte) | |
23316 | + | |
23317 | +#define set_pte_atomic(ptep, pte) \ | |
23318 | + xen_set_pte_atomic(ptep, pte) | |
23319 | + | |
23320 | +#define set_pmd(pmdp, pmd) xen_set_pmd(pmdp, pmd) | |
23321 | + | |
23322 | +#ifndef __PAGETABLE_PUD_FOLDED | |
23323 | +#define set_pgd(pgdp, pgd) xen_set_pgd(pgdp, pgd) | |
23324 | +#define pgd_clear(pgd) xen_pgd_clear(pgd) | |
23325 | +#endif | |
23326 | + | |
23327 | +#ifndef set_pud | |
23328 | +# define set_pud(pudp, pud) xen_set_pud(pudp, pud) | |
23329 | +#endif | |
23330 | + | |
23331 | +#ifndef __PAGETABLE_PMD_FOLDED | |
23332 | +#define pud_clear(pud) xen_pud_clear(pud) | |
23333 | +#endif | |
23334 | + | |
23335 | +#define pte_clear(mm, addr, ptep) xen_pte_clear(mm, addr, ptep) | |
23336 | +#define pmd_clear(pmd) xen_pmd_clear(pmd) | |
23337 | + | |
23338 | +#define pte_update(mm, addr, ptep) do { } while (0) | |
23339 | +#define pte_update_defer(mm, addr, ptep) do { } while (0) | |
23340 | + | |
23341 | +#endif /* __ASSEMBLY__ */ | |
23342 | + | |
23343 | #ifdef CONFIG_X86_32 | |
23344 | # include "pgtable_32.h" | |
23345 | #else | |
23346 | # include "pgtable_64.h" | |
23347 | #endif | |
23348 | + | |
23349 | +#ifndef __ASSEMBLY__ | |
23350 | + | |
23351 | +enum { | |
23352 | + PG_LEVEL_NONE, | |
23353 | + PG_LEVEL_4K, | |
23354 | + PG_LEVEL_2M, | |
23355 | + PG_LEVEL_1G, | |
23356 | +}; | |
23357 | + | |
23358 | +/* | |
23359 | + * Helper function that returns the kernel pagetable entry controlling | |
23360 | + * the virtual address 'address'. NULL means no pagetable entry present. | |
23361 | + * NOTE: the return type is pte_t but if the pmd is PSE then we return it | |
23362 | + * as a pte too. | |
23363 | + */ | |
23364 | +extern pte_t *lookup_address(unsigned long address, unsigned int *level); | |
23365 | + | |
23366 | +/* local pte updates need not use xchg for locking */ | |
23367 | +static inline pte_t xen_local_ptep_get_and_clear(pte_t *ptep, pte_t res) | |
23368 | +{ | |
23369 | + xen_set_pte(ptep, __pte(0)); | |
23370 | + return res; | |
23371 | +} | |
23372 | + | |
23373 | +static inline void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | |
23374 | + pte_t *ptep , pte_t pte) | |
23375 | +{ | |
23376 | + if ((mm != current->mm && mm != &init_mm) || | |
23377 | + HYPERVISOR_update_va_mapping(addr, pte, 0)) | |
23378 | + xen_set_pte(ptep, pte); | |
23379 | +} | |
23380 | + | |
23381 | +static inline void xen_pte_clear(struct mm_struct *mm, unsigned long addr, | |
23382 | + pte_t *ptep) | |
23383 | +{ | |
23384 | + if ((mm != current->mm && mm != &init_mm) | |
23385 | + || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) | |
23386 | + __xen_pte_clear(ptep); | |
23387 | +} | |
23388 | + | |
23389 | +#ifndef CONFIG_PARAVIRT | |
23390 | +/* | |
23391 | + * Rules for using pte_update - it must be called after any PTE update which | |
23392 | + * has not been done using the set_pte / clear_pte interfaces. It is used by | |
23393 | + * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE | |
23394 | + * updates should either be sets, clears, or set_pte_atomic for P->P | |
23395 | + * transitions, which means this hook should only be called for user PTEs. | |
23396 | + * This hook implies a P->P protection or access change has taken place, which | |
23397 | + * requires a subsequent TLB flush. The notification can optionally be delayed | |
23398 | + * until the TLB flush event by using the pte_update_defer form of the | |
23399 | + * interface, but care must be taken to assure that the flush happens while | |
23400 | + * still holding the same page table lock so that the shadow and primary pages | |
23401 | + * do not become out of sync on SMP. | |
23402 | + */ | |
23403 | +#define pte_update(mm, addr, ptep) do { } while (0) | |
23404 | +#define pte_update_defer(mm, addr, ptep) do { } while (0) | |
23405 | +#endif | |
23406 | + | |
23407 | +/* | |
23408 | + * We only update the dirty/accessed state if we set | |
23409 | + * the dirty bit by hand in the kernel, since the hardware | |
23410 | + * will do the accessed bit for us, and we don't want to | |
23411 | + * race with other CPU's that might be updating the dirty | |
23412 | + * bit at the same time. | |
23413 | + */ | |
23414 | +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS | |
23415 | +#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ | |
23416 | +({ \ | |
23417 | + int __changed = !pte_same(*(ptep), entry); \ | |
23418 | + if (__changed && (dirty)) { \ | |
23419 | + if ( likely((vma)->vm_mm == current->mm) ) { \ | |
23420 | + BUG_ON(HYPERVISOR_update_va_mapping(address, \ | |
23421 | + entry, \ | |
23422 | + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ | |
23423 | + UVMF_INVLPG|UVMF_MULTI)); \ | |
23424 | + } else { \ | |
23425 | + xen_l1_entry_update(ptep, entry); \ | |
23426 | + flush_tlb_page(vma, address); \ | |
23427 | + } \ | |
23428 | + } \ | |
23429 | + __changed; \ | |
23430 | +}) | |
23431 | + | |
23432 | +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG | |
23433 | +#define ptep_test_and_clear_young(vma, addr, ptep) ({ \ | |
23434 | + int __ret = 0; \ | |
23435 | + if (pte_young(*(ptep))) \ | |
23436 | + __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \ | |
23437 | + &(ptep)->pte); \ | |
23438 | + if (__ret) \ | |
23439 | + pte_update((vma)->vm_mm, addr, ptep); \ | |
23440 | + __ret; \ | |
23441 | +}) | |
23442 | + | |
23443 | +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH | |
23444 | +#define ptep_clear_flush_young(vma, address, ptep) \ | |
23445 | +({ \ | |
23446 | + pte_t __pte = *(ptep); \ | |
23447 | + int __young = pte_young(__pte); \ | |
23448 | + __pte = pte_mkold(__pte); \ | |
23449 | + if (PagePinned(virt_to_page((vma)->vm_mm->pgd))) \ | |
23450 | + (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \ | |
23451 | + else if (__young) \ | |
23452 | + (ptep)->pte_low = __pte.pte_low; \ | |
23453 | + __young; \ | |
23454 | +}) | |
23455 | + | |
23456 | +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH | |
23457 | +#define ptep_clear_flush(vma, addr, ptep) \ | |
23458 | +({ \ | |
23459 | + pte_t *__ptep = (ptep); \ | |
23460 | + pte_t __res = *__ptep; \ | |
23461 | + if (!pte_none(__res) && \ | |
23462 | + ((vma)->vm_mm != current->mm || \ | |
23463 | + HYPERVISOR_update_va_mapping(addr, __pte(0), \ | |
23464 | + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ | |
23465 | + UVMF_INVLPG|UVMF_MULTI))) { \ | |
23466 | + __xen_pte_clear(__ptep); \ | |
23467 | + flush_tlb_page(vma, addr); \ | |
23468 | + } \ | |
23469 | + __res; \ | |
23470 | +}) | |
23471 | + | |
23472 | +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR | |
23473 | +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |
23474 | +{ | |
23475 | + pte_t pte = *ptep; | |
23476 | + if (!pte_none(pte) | |
23477 | + && (mm != &init_mm | |
23478 | + || HYPERVISOR_update_va_mapping(addr, __pte(0), 0))) { | |
23479 | + pte = xen_ptep_get_and_clear(ptep, pte); | |
23480 | + pte_update(mm, addr, ptep); | |
23481 | + } | |
23482 | + return pte; | |
23483 | +} | |
23484 | + | |
23485 | +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL | |
23486 | +#define ptep_get_and_clear_full(mm, addr, ptep, full) \ | |
23487 | + ((full) ? ({ \ | |
23488 | + pte_t *__ptep = (ptep); \ | |
23489 | + pte_t __res = *__ptep; \ | |
23490 | + if (!PagePinned(virt_to_page((mm)->pgd))) \ | |
23491 | + __xen_pte_clear(__ptep); \ | |
23492 | + else if (!pte_none(__res)) \ | |
23493 | + xen_l1_entry_update(__ptep, __pte(0)); \ | |
23494 | + __res; \ | |
23495 | + }) : \ | |
23496 | + ptep_get_and_clear(mm, addr, ptep)) | |
23497 | + | |
23498 | +pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *, unsigned long, pte_t *, int); | |
23499 | + | |
23500 | +#define __HAVE_ARCH_PTEP_SET_WRPROTECT | |
23501 | +static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |
23502 | +{ | |
23503 | + pte_t pte = *ptep; | |
23504 | + if (pte_write(pte)) | |
23505 | + set_pte_at(mm, addr, ptep, pte_wrprotect(pte)); | |
23506 | +} | |
23507 | + | |
23508 | +#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \ | |
23509 | + xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) | |
23510 | + | |
23511 | +#define arbitrary_virt_to_machine(va) \ | |
23512 | +({ \ | |
23513 | + unsigned int __lvl; \ | |
23514 | + pte_t *__ptep = lookup_address((unsigned long)(va), &__lvl); \ | |
23515 | + BUG_ON(!__ptep || __lvl != PG_LEVEL_4K || !pte_present(*__ptep));\ | |
23516 | + (((maddr_t)pte_mfn(*__ptep) << PAGE_SHIFT) \ | |
23517 | + | ((unsigned long)(va) & (PAGE_SIZE - 1))); \ | |
23518 | +}) | |
23519 | + | |
23520 | +#ifdef CONFIG_HIGHPTE | |
23521 | +#include <asm/io.h> | |
23522 | +struct page *kmap_atomic_to_page(void *); | |
23523 | +#define ptep_to_machine(ptep) \ | |
23524 | +({ \ | |
23525 | + pte_t *__ptep = (ptep); \ | |
23526 | + page_to_phys(kmap_atomic_to_page(__ptep)) \ | |
23527 | + | ((unsigned long)__ptep & (PAGE_SIZE - 1)); \ | |
23528 | +}) | |
23529 | +#else | |
23530 | +#define ptep_to_machine(ptep) virt_to_machine(ptep) | |
23531 | +#endif | |
23532 | + | |
23533 | +#include <asm-generic/pgtable.h> | |
23534 | + | |
23535 | +#include <xen/features.h> | |
23536 | +void make_page_readonly(void *va, unsigned int feature); | |
23537 | +void make_page_writable(void *va, unsigned int feature); | |
23538 | +void make_pages_readonly(void *va, unsigned int nr, unsigned int feature); | |
23539 | +void make_pages_writable(void *va, unsigned int nr, unsigned int feature); | |
23540 | + | |
23541 | +struct vm_area_struct; | |
23542 | + | |
23543 | +int direct_remap_pfn_range(struct vm_area_struct *vma, | |
23544 | + unsigned long address, | |
23545 | + unsigned long mfn, | |
23546 | + unsigned long size, | |
23547 | + pgprot_t prot, | |
23548 | + domid_t domid); | |
23549 | +int direct_kernel_remap_pfn_range(unsigned long address, | |
23550 | + unsigned long mfn, | |
23551 | + unsigned long size, | |
23552 | + pgprot_t prot, | |
23553 | + domid_t domid); | |
23554 | +int create_lookup_pte_addr(struct mm_struct *mm, | |
23555 | + unsigned long address, | |
23556 | + uint64_t *ptep); | |
23557 | +int touch_pte_range(struct mm_struct *mm, | |
23558 | + unsigned long address, | |
23559 | + unsigned long size); | |
23560 | + | |
23561 | +int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd, | |
23562 | + unsigned long addr, unsigned long end, pgprot_t newprot, | |
23563 | + int dirty_accountable); | |
23564 | + | |
23565 | +#endif /* __ASSEMBLY__ */ | |
23566 | + | |
23567 | +#endif /* _ASM_X86_PGTABLE_H */ | |
23568 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h 2009-02-16 16:17:21.000000000 +0100 | |
23569 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable-3level.h 2009-03-16 16:33:40.000000000 +0100 | |
23570 | @@ -18,16 +18,18 @@ | |
23571 | printk("%s:%d: bad pgd %p(%016Lx pfn %08Lx).\n", __FILE__, __LINE__, \ | |
23572 | &(e), __pgd_val(e), (pgd_val(e) & PTE_MASK) >> PAGE_SHIFT) | |
23573 | ||
23574 | -#define pud_none(pud) 0 | |
23575 | -#define pud_bad(pud) 0 | |
23576 | -#define pud_present(pud) 1 | |
23577 | ||
cc90b958 | 23578 | -/* |
00e5a55c | 23579 | - * All present pages with !NX bit are kernel-executable: |
cc90b958 | 23580 | - */ |
00e5a55c BS |
23581 | -static inline int pte_exec_kernel(pte_t pte) |
23582 | +static inline int pud_none(pud_t pud) | |
23583 | +{ | |
23584 | + return __pud_val(pud) == 0; | |
23585 | +} | |
23586 | +static inline int pud_bad(pud_t pud) | |
23587 | { | |
23588 | - return !(__pte_val(pte) & _PAGE_NX); | |
23589 | + return (__pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0; | |
23590 | +} | |
23591 | +static inline int pud_present(pud_t pud) | |
23592 | +{ | |
23593 | + return __pud_val(pud) & _PAGE_PRESENT; | |
23594 | } | |
23595 | ||
23596 | /* Rules for using set_pte: the pte being assigned *must* be | |
23597 | @@ -44,14 +46,6 @@ static inline void xen_set_pte(pte_t *pt | |
23598 | ptep->pte_low = pte.pte_low; | |
23599 | } | |
23600 | ||
23601 | -static inline void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | |
23602 | - pte_t *ptep , pte_t pte) | |
23603 | -{ | |
23604 | - if ((mm != current->mm && mm != &init_mm) || | |
23605 | - HYPERVISOR_update_va_mapping(addr, pte, 0)) | |
23606 | - xen_set_pte(ptep, pte); | |
cc90b958 BS |
23607 | -} |
23608 | - | |
00e5a55c BS |
23609 | static inline void xen_set_pte_atomic(pte_t *ptep, pte_t pte) |
23610 | { | |
23611 | set_64bit((unsigned long long *)(ptep),__pte_val(pte)); | |
23612 | @@ -70,14 +64,11 @@ static inline void xen_set_pud(pud_t *pu | |
23613 | * entry, so clear the bottom half first and enforce ordering with a compiler | |
23614 | * barrier. | |
23615 | */ | |
23616 | -static inline void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |
23617 | +static inline void __xen_pte_clear(pte_t *ptep) | |
23618 | { | |
23619 | - if ((mm != current->mm && mm != &init_mm) | |
23620 | - || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { | |
23621 | - ptep->pte_low = 0; | |
23622 | - smp_wmb(); | |
23623 | - ptep->pte_high = 0; | |
23624 | - } | |
23625 | + ptep->pte_low = 0; | |
23626 | + smp_wmb(); | |
23627 | + ptep->pte_high = 0; | |
23628 | } | |
23629 | ||
23630 | static inline void xen_pmd_clear(pmd_t *pmd) | |
23631 | @@ -85,21 +76,25 @@ static inline void xen_pmd_clear(pmd_t * | |
23632 | xen_l2_entry_update(pmd, __pmd(0)); | |
23633 | } | |
23634 | ||
23635 | -#define set_pte(ptep, pte) xen_set_pte(ptep, pte) | |
23636 | -#define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte) | |
23637 | -#define set_pte_atomic(ptep, pte) xen_set_pte_atomic(ptep, pte) | |
23638 | -#define set_pmd(pmdp, pmd) xen_set_pmd(pmdp, pmd) | |
23639 | -#define set_pud(pudp, pud) xen_set_pud(pudp, pud) | |
23640 | -#define pte_clear(mm, addr, ptep) xen_pte_clear(mm, addr, ptep) | |
23641 | -#define pmd_clear(pmd) xen_pmd_clear(pmd) | |
23642 | +static inline void pud_clear(pud_t *pudp) | |
23643 | +{ | |
23644 | + pgdval_t pgd; | |
23645 | + | |
23646 | + set_pud(pudp, __pud(0)); | |
23647 | ||
23648 | -/* | |
23649 | - * Pentium-II erratum A13: in PAE mode we explicitly have to flush | |
23650 | - * the TLB via cr3 if the top-level pgd is changed... | |
23651 | - * We do not let the generic code free and clear pgd entries due to | |
23652 | - * this erratum. | |
23653 | - */ | |
23654 | -static inline void pud_clear (pud_t * pud) { } | |
23655 | + /* | |
23656 | + * According to Intel App note "TLBs, Paging-Structure Caches, | |
23657 | + * and Their Invalidation", April 2007, document 317080-001, | |
23658 | + * section 8.1: in PAE mode we explicitly have to flush the | |
23659 | + * TLB via cr3 if the top-level pgd is changed... | |
23660 | + * | |
23661 | + * Make sure the pud entry we're updating is within the | |
23662 | + * current pgd to avoid unnecessary TLB flushes. | |
23663 | + */ | |
23664 | + pgd = read_cr3(); | |
23665 | + if (__pa(pudp) >= pgd && __pa(pudp) < (pgd + sizeof(pgd_t)*PTRS_PER_PGD)) | |
23666 | + xen_tlb_flush(); | |
23667 | +} | |
23668 | ||
23669 | #define pud_page(pud) \ | |
23670 | ((struct page *) __va(pud_val(pud) & PAGE_MASK)) | |
23671 | @@ -128,24 +123,6 @@ static inline pte_t xen_ptep_get_and_cle | |
23672 | #define xen_ptep_get_and_clear(xp, pte) xen_local_ptep_get_and_clear(xp, pte) | |
23673 | #endif | |
23674 | ||
23675 | -#define __HAVE_ARCH_PTEP_CLEAR_FLUSH | |
23676 | -#define ptep_clear_flush(vma, addr, ptep) \ | |
23677 | -({ \ | |
23678 | - pte_t *__ptep = (ptep); \ | |
23679 | - pte_t __res = *__ptep; \ | |
23680 | - if (!pte_none(__res) && \ | |
23681 | - ((vma)->vm_mm != current->mm || \ | |
23682 | - HYPERVISOR_update_va_mapping(addr, __pte(0), \ | |
23683 | - (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ | |
23684 | - UVMF_INVLPG|UVMF_MULTI))) { \ | |
23685 | - __ptep->pte_low = 0; \ | |
23686 | - smp_wmb(); \ | |
23687 | - __ptep->pte_high = 0; \ | |
23688 | - flush_tlb_page(vma, addr); \ | |
23689 | - } \ | |
23690 | - __res; \ | |
23691 | -}) | |
cc90b958 | 23692 | - |
00e5a55c BS |
23693 | #define __HAVE_ARCH_PTE_SAME |
23694 | static inline int pte_same(pte_t a, pte_t b) | |
23695 | { | |
23696 | @@ -168,26 +145,12 @@ static inline int pte_none(pte_t pte) | |
23697 | mfn_to_local_pfn(__pte_mfn(_pte)) : \ | |
23698 | __pte_mfn(_pte)) | |
23699 | ||
23700 | -extern unsigned long long __supported_pte_mask; | |
cc90b958 | 23701 | - |
00e5a55c | 23702 | -static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) |
cc90b958 | 23703 | -{ |
00e5a55c BS |
23704 | - return __pte((((unsigned long long)page_nr << PAGE_SHIFT) | |
23705 | - pgprot_val(pgprot)) & __supported_pte_mask); | |
cc90b958 BS |
23706 | -} |
23707 | - | |
00e5a55c | 23708 | -static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) |
cc90b958 | 23709 | -{ |
00e5a55c BS |
23710 | - return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | |
23711 | - pgprot_val(pgprot)) & __supported_pte_mask); | |
cc90b958 | 23712 | -} |
cc90b958 | 23713 | - |
00e5a55c BS |
23714 | /* |
23715 | * Bits 0, 6 and 7 are taken in the low part of the pte, | |
23716 | * put the 32 bits of offset into the high part. | |
23717 | */ | |
23718 | #define pte_to_pgoff(pte) ((pte).pte_high) | |
23719 | -#define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) }) | |
23720 | +#define pgoff_to_pte(off) ((pte_t) { { .pte_low = _PAGE_FILE, .pte_high = (off) } }) | |
23721 | #define PTE_FILE_MAX_BITS 32 | |
23722 | ||
23723 | /* Encode and de-code a swap entry */ | |
23724 | @@ -195,8 +158,6 @@ static inline pmd_t pfn_pmd(unsigned lon | |
23725 | #define __swp_offset(x) ((x).val >> 5) | |
23726 | #define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5}) | |
23727 | #define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high }) | |
23728 | -#define __swp_entry_to_pte(x) ((pte_t){ 0, (x).val }) | |
cc90b958 | 23729 | - |
00e5a55c BS |
23730 | -#define __pmd_free_tlb(tlb, x) do { } while (0) |
23731 | +#define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } }) | |
23732 | ||
23733 | #endif /* _I386_PGTABLE_3LEVEL_H */ | |
23734 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable_32.h 2009-02-16 16:18:36.000000000 +0100 | |
23735 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable_32.h 2009-03-16 16:33:40.000000000 +0100 | |
23736 | @@ -1,8 +1,6 @@ | |
23737 | #ifndef _I386_PGTABLE_H | |
23738 | #define _I386_PGTABLE_H | |
23739 | ||
23740 | -#include <asm/hypervisor.h> | |
cc90b958 | 23741 | - |
00e5a55c BS |
23742 | /* |
23743 | * The Linux memory management assumes a three-level page table setup. On | |
23744 | * the i386, we use that, but "fold" the mid level into the top-level page | |
23745 | @@ -25,20 +23,10 @@ | |
23746 | ||
23747 | struct vm_area_struct; | |
23748 | ||
23749 | -/* | |
23750 | - * ZERO_PAGE is a global shared page that is always zero: used | |
23751 | - * for zero-mapped memory areas etc.. | |
23752 | - */ | |
23753 | -#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) | |
23754 | -extern unsigned long empty_zero_page[1024]; | |
23755 | extern pgd_t *swapper_pg_dir; | |
23756 | -extern struct kmem_cache *pmd_cache; | |
23757 | -extern spinlock_t pgd_lock; | |
23758 | -extern struct page *pgd_list; | |
23759 | -void check_pgt_cache(void); | |
23760 | ||
23761 | -void pmd_ctor(struct kmem_cache *, void *); | |
23762 | -void pgtable_cache_init(void); | |
23763 | +static inline void pgtable_cache_init(void) { } | |
23764 | +static inline void check_pgt_cache(void) { } | |
23765 | void paging_init(void); | |
23766 | ||
23767 | ||
23768 | @@ -58,16 +46,9 @@ void paging_init(void); | |
23769 | #define PGDIR_SIZE (1UL << PGDIR_SHIFT) | |
23770 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) | |
23771 | ||
23772 | -#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) | |
23773 | -#define FIRST_USER_ADDRESS 0 | |
cc90b958 | 23774 | - |
00e5a55c BS |
23775 | #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) |
23776 | #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) | |
23777 | ||
23778 | -#define TWOLEVEL_PGDIR_SHIFT 22 | |
23779 | -#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT) | |
23780 | -#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS) | |
cc90b958 | 23781 | - |
00e5a55c BS |
23782 | /* Just any arbitrary offset to the start of the vmalloc VM area: the |
23783 | * current 8MB value just means that there will be a 8MB "hole" after the | |
23784 | * physical memory until the kernel virtual memory starts. That means that | |
23785 | @@ -78,121 +59,19 @@ void paging_init(void); | |
23786 | #define VMALLOC_OFFSET (8*1024*1024) | |
23787 | #define VMALLOC_START (((unsigned long) high_memory + \ | |
23788 | 2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1)) | |
23789 | -#ifdef CONFIG_HIGHMEM | |
23790 | -# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) | |
23791 | -#else | |
23792 | -# define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) | |
23793 | -#endif | |
cc90b958 | 23794 | - |
cc90b958 | 23795 | -/* |
00e5a55c BS |
23796 | - * _PAGE_PSE set in the page directory entry just means that |
23797 | - * the page directory entry points directly to a 4MB-aligned block of | |
23798 | - * memory. | |
cc90b958 | 23799 | - */ |
00e5a55c BS |
23800 | -#define _PAGE_BIT_PRESENT 0 |
23801 | -#define _PAGE_BIT_RW 1 | |
23802 | -#define _PAGE_BIT_USER 2 | |
23803 | -#define _PAGE_BIT_PWT 3 | |
23804 | -#define _PAGE_BIT_PCD 4 | |
23805 | -#define _PAGE_BIT_ACCESSED 5 | |
23806 | -#define _PAGE_BIT_DIRTY 6 | |
23807 | -#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page, Pentium+, if present.. */ | |
23808 | -#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ | |
23809 | -/*#define _PAGE_BIT_UNUSED1 9*/ /* available for programmer */ | |
23810 | -#define _PAGE_BIT_UNUSED2 10 | |
23811 | -#define _PAGE_BIT_UNUSED3 11 | |
23812 | -#define _PAGE_BIT_NX 63 | |
cc90b958 | 23813 | - |
00e5a55c BS |
23814 | -#define _PAGE_PRESENT 0x001 |
23815 | -#define _PAGE_RW 0x002 | |
23816 | -#define _PAGE_USER 0x004 | |
23817 | -#define _PAGE_PWT 0x008 | |
23818 | -#define _PAGE_PCD 0x010 | |
23819 | -#define _PAGE_ACCESSED 0x020 | |
23820 | -#define _PAGE_DIRTY 0x040 | |
23821 | -#define _PAGE_PSE 0x080 /* 4 MB (or 2MB) page, Pentium+, if present.. */ | |
23822 | -#define _PAGE_GLOBAL 0x100 /* Global TLB entry PPro+ */ | |
23823 | -/*#define _PAGE_UNUSED1 0x200*/ /* available for programmer */ | |
23824 | -#define _PAGE_UNUSED2 0x400 | |
23825 | -#define _PAGE_UNUSED3 0x800 | |
cc90b958 | 23826 | - |
00e5a55c BS |
23827 | -/* If _PAGE_PRESENT is clear, we use these: */ |
23828 | -#define _PAGE_FILE 0x040 /* nonlinear file mapping, saved PTE; unset:swap */ | |
23829 | -#define _PAGE_PROTNONE 0x080 /* if the user mapped it with PROT_NONE; | |
23830 | - pte_present gives true */ | |
23831 | #ifdef CONFIG_X86_PAE | |
23832 | -#define _PAGE_NX (1ULL<<_PAGE_BIT_NX) | |
23833 | +#define LAST_PKMAP 512 | |
23834 | #else | |
23835 | -#define _PAGE_NX 0 | |
23836 | +#define LAST_PKMAP 1024 | |
23837 | #endif | |
23838 | ||
23839 | -/* Mapped page is I/O or foreign and has no associated page struct. */ | |
23840 | -#define _PAGE_IO 0x200 | |
23841 | +#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE*(LAST_PKMAP + 1)) & PMD_MASK) | |
23842 | ||
23843 | -#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) | |
23844 | -#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | |
23845 | -#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_IO) | |
cc90b958 | 23846 | - |
00e5a55c BS |
23847 | -#define PAGE_NONE \ |
23848 | - __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) | |
23849 | -#define PAGE_SHARED \ | |
23850 | - __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) | |
cc90b958 | 23851 | - |
00e5a55c BS |
23852 | -#define PAGE_SHARED_EXEC \ |
23853 | - __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) | |
23854 | -#define PAGE_COPY_NOEXEC \ | |
23855 | - __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) | |
23856 | -#define PAGE_COPY_EXEC \ | |
23857 | - __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) | |
23858 | -#define PAGE_COPY \ | |
23859 | - PAGE_COPY_NOEXEC | |
23860 | -#define PAGE_READONLY \ | |
23861 | - __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) | |
23862 | -#define PAGE_READONLY_EXEC \ | |
23863 | - __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) | |
cc90b958 | 23864 | - |
00e5a55c BS |
23865 | -#define _PAGE_KERNEL \ |
23866 | - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX) | |
23867 | -#define _PAGE_KERNEL_EXEC \ | |
23868 | - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) | |
23869 | - | |
23870 | -extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC; | |
23871 | -#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) | |
23872 | -#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) | |
23873 | -#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD) | |
23874 | -#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) | |
23875 | -#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) | |
23876 | - | |
23877 | -#define PAGE_KERNEL __pgprot(__PAGE_KERNEL) | |
23878 | -#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) | |
23879 | -#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) | |
23880 | -#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX) | |
23881 | -#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) | |
23882 | -#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) | |
23883 | -#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) | |
cc90b958 BS |
23884 | - |
23885 | -/* | |
00e5a55c BS |
23886 | - * The i386 can't do page protection for execute, and considers that |
23887 | - * the same are read. Also, write permissions imply read permissions. | |
23888 | - * This is the closest we can get.. | |
cc90b958 | 23889 | - */ |
00e5a55c BS |
23890 | -#define __P000 PAGE_NONE |
23891 | -#define __P001 PAGE_READONLY | |
23892 | -#define __P010 PAGE_COPY | |
23893 | -#define __P011 PAGE_COPY | |
23894 | -#define __P100 PAGE_READONLY_EXEC | |
23895 | -#define __P101 PAGE_READONLY_EXEC | |
23896 | -#define __P110 PAGE_COPY_EXEC | |
23897 | -#define __P111 PAGE_COPY_EXEC | |
cc90b958 | 23898 | - |
00e5a55c BS |
23899 | -#define __S000 PAGE_NONE |
23900 | -#define __S001 PAGE_READONLY | |
23901 | -#define __S010 PAGE_SHARED | |
23902 | -#define __S011 PAGE_SHARED | |
23903 | -#define __S100 PAGE_READONLY_EXEC | |
23904 | -#define __S101 PAGE_READONLY_EXEC | |
23905 | -#define __S110 PAGE_SHARED_EXEC | |
23906 | -#define __S111 PAGE_SHARED_EXEC | |
23907 | +#ifdef CONFIG_HIGHMEM | |
23908 | +# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) | |
23909 | +#else | |
23910 | +# define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) | |
23911 | +#endif | |
23912 | ||
23913 | /* | |
23914 | * Define this if things work differently on an i386 and an i486: | |
23915 | @@ -221,28 +100,6 @@ extern unsigned long pg0[]; | |
23916 | ||
23917 | #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) | |
23918 | ||
cc90b958 | 23919 | -/* |
00e5a55c BS |
23920 | - * The following only work if pte_present() is true. |
23921 | - * Undefined behaviour if not.. | |
cc90b958 | 23922 | - */ |
00e5a55c BS |
23923 | -static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; } |
23924 | -static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; } | |
23925 | -static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; } | |
23926 | -static inline int pte_huge(pte_t pte) { return (pte).pte_low & _PAGE_PSE; } | |
cc90b958 | 23927 | - |
00e5a55c BS |
23928 | -/* |
23929 | - * The following only works if pte_present() is not true. | |
23930 | - */ | |
23931 | -static inline int pte_file(pte_t pte) { return (pte).pte_low & _PAGE_FILE; } | |
cc90b958 | 23932 | - |
00e5a55c BS |
23933 | -static inline pte_t pte_mkclean(pte_t pte) { (pte).pte_low &= ~_PAGE_DIRTY; return pte; } |
23934 | -static inline pte_t pte_mkold(pte_t pte) { (pte).pte_low &= ~_PAGE_ACCESSED; return pte; } | |
23935 | -static inline pte_t pte_wrprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_RW; return pte; } | |
23936 | -static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; } | |
23937 | -static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } | |
23938 | -static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } | |
23939 | -static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; } | |
cc90b958 | 23940 | - |
00e5a55c BS |
23941 | #ifdef CONFIG_X86_PAE |
23942 | # include <asm/pgtable-3level.h> | |
23943 | #else | |
23944 | @@ -250,111 +107,6 @@ static inline pte_t pte_mkhuge(pte_t pte | |
23945 | #endif | |
23946 | ||
23947 | /* | |
23948 | - * Rules for using pte_update - it must be called after any PTE update which | |
23949 | - * has not been done using the set_pte / clear_pte interfaces. It is used by | |
23950 | - * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE | |
23951 | - * updates should either be sets, clears, or set_pte_atomic for P->P | |
23952 | - * transitions, which means this hook should only be called for user PTEs. | |
23953 | - * This hook implies a P->P protection or access change has taken place, which | |
23954 | - * requires a subsequent TLB flush. The notification can optionally be delayed | |
23955 | - * until the TLB flush event by using the pte_update_defer form of the | |
23956 | - * interface, but care must be taken to assure that the flush happens while | |
23957 | - * still holding the same page table lock so that the shadow and primary pages | |
23958 | - * do not become out of sync on SMP. | |
23959 | - */ | |
23960 | -#define pte_update(mm, addr, ptep) do { } while (0) | |
23961 | -#define pte_update_defer(mm, addr, ptep) do { } while (0) | |
cc90b958 | 23962 | - |
00e5a55c BS |
23963 | -/* local pte updates need not use xchg for locking */ |
23964 | -static inline pte_t xen_local_ptep_get_and_clear(pte_t *ptep, pte_t res) | |
23965 | -{ | |
23966 | - xen_set_pte(ptep, __pte(0)); | |
23967 | - return res; | |
23968 | -} | |
cc90b958 BS |
23969 | - |
23970 | -/* | |
00e5a55c BS |
23971 | - * We only update the dirty/accessed state if we set |
23972 | - * the dirty bit by hand in the kernel, since the hardware | |
23973 | - * will do the accessed bit for us, and we don't want to | |
23974 | - * race with other CPU's that might be updating the dirty | |
23975 | - * bit at the same time. | |
cc90b958 | 23976 | - */ |
00e5a55c BS |
23977 | -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS |
23978 | -#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ | |
23979 | -({ \ | |
23980 | - int __changed = !pte_same(*(ptep), entry); \ | |
23981 | - if (__changed && (dirty)) { \ | |
23982 | - if ( likely((vma)->vm_mm == current->mm) ) { \ | |
23983 | - BUG_ON(HYPERVISOR_update_va_mapping(address, \ | |
23984 | - entry, \ | |
23985 | - (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ | |
23986 | - UVMF_INVLPG|UVMF_MULTI)); \ | |
23987 | - } else { \ | |
23988 | - xen_l1_entry_update(ptep, entry); \ | |
23989 | - flush_tlb_page(vma, address); \ | |
23990 | - } \ | |
23991 | - } \ | |
23992 | - __changed; \ | |
23993 | -}) | |
cc90b958 | 23994 | - |
00e5a55c BS |
23995 | -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG |
23996 | -#define ptep_test_and_clear_young(vma, addr, ptep) ({ \ | |
23997 | - int __ret = 0; \ | |
23998 | - if (pte_young(*(ptep))) \ | |
23999 | - __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \ | |
24000 | - &(ptep)->pte_low); \ | |
24001 | - if (__ret) \ | |
24002 | - pte_update((vma)->vm_mm, addr, ptep); \ | |
24003 | - __ret; \ | |
24004 | -}) | |
24005 | - | |
24006 | -#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH | |
24007 | -#define ptep_clear_flush_young(vma, address, ptep) \ | |
24008 | -({ \ | |
24009 | - pte_t __pte = *(ptep); \ | |
24010 | - int __young = pte_young(__pte); \ | |
24011 | - __pte = pte_mkold(__pte); \ | |
24012 | - if (PagePinned(virt_to_page((vma)->vm_mm->pgd))) \ | |
24013 | - (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \ | |
24014 | - else if (__young) \ | |
24015 | - (ptep)->pte_low = __pte.pte_low; \ | |
24016 | - __young; \ | |
24017 | -}) | |
24018 | - | |
24019 | -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR | |
24020 | -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |
cc90b958 | 24021 | -{ |
00e5a55c BS |
24022 | - pte_t pte = *ptep; |
24023 | - if (!pte_none(pte) | |
24024 | - && (mm != &init_mm | |
24025 | - || HYPERVISOR_update_va_mapping(addr, __pte(0), 0))) { | |
24026 | - pte = xen_ptep_get_and_clear(ptep, pte); | |
24027 | - pte_update(mm, addr, ptep); | |
24028 | - } | |
24029 | - return pte; | |
cc90b958 BS |
24030 | -} |
24031 | - | |
00e5a55c BS |
24032 | -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL |
24033 | -#define ptep_get_and_clear_full(mm, addr, ptep, full) \ | |
24034 | - ((full) ? ({ \ | |
24035 | - pte_t __res = *(ptep); \ | |
24036 | - if (PagePinned(virt_to_page((mm)->pgd))) \ | |
24037 | - xen_l1_entry_update(ptep, __pte(0)); \ | |
24038 | - else \ | |
24039 | - *(ptep) = __pte(0); \ | |
24040 | - __res; \ | |
24041 | - }) : \ | |
24042 | - ptep_get_and_clear(mm, addr, ptep)) | |
24043 | - | |
24044 | -#define __HAVE_ARCH_PTEP_SET_WRPROTECT | |
24045 | -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |
cc90b958 | 24046 | -{ |
00e5a55c BS |
24047 | - pte_t pte = *ptep; |
24048 | - if (pte_write(pte)) | |
24049 | - set_pte_at(mm, addr, ptep, pte_wrprotect(pte)); | |
cc90b958 BS |
24050 | -} |
24051 | - | |
cc90b958 | 24052 | -/* |
00e5a55c BS |
24053 | * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); |
24054 | * | |
24055 | * dst - pointer to pgd range anwhere on a pgd page | |
24056 | @@ -383,26 +135,6 @@ static inline void clone_pgd_range(pgd_t | |
24057 | ||
24058 | #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) | |
24059 | ||
24060 | -static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) | |
24061 | -{ | |
24062 | - /* | |
24063 | - * Since this might change the present bit (which controls whether | |
24064 | - * a pte_t object has undergone p2m translation), we must use | |
24065 | - * pte_val() on the input pte and __pte() for the return value. | |
24066 | - */ | |
24067 | - paddr_t pteval = pte_val(pte); | |
cc90b958 | 24068 | - |
00e5a55c BS |
24069 | - pteval &= _PAGE_CHG_MASK; |
24070 | - pteval |= pgprot_val(newprot); | |
24071 | -#ifdef CONFIG_X86_PAE | |
24072 | - pteval &= __supported_pte_mask; | |
24073 | -#endif | |
24074 | - return __pte(pteval); | |
24075 | -} | |
cc90b958 | 24076 | - |
00e5a55c BS |
24077 | -#define pmd_large(pmd) \ |
24078 | -((__pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT)) | |
cc90b958 | 24079 | - |
00e5a55c BS |
24080 | /* |
24081 | * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] | |
24082 | * | |
24083 | @@ -424,6 +156,8 @@ static inline pte_t pte_modify(pte_t pte | |
24084 | */ | |
24085 | #define pgd_offset_k(address) pgd_offset(&init_mm, address) | |
24086 | ||
24087 | +static inline int pud_large(pud_t pud) { return 0; } | |
24088 | + | |
24089 | /* | |
24090 | * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] | |
24091 | * | |
24092 | @@ -449,26 +183,6 @@ static inline pte_t pte_modify(pte_t pte | |
24093 | #define pmd_page_vaddr(pmd) \ | |
24094 | ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) | |
24095 | ||
24096 | -/* | |
24097 | - * Helper function that returns the kernel pagetable entry controlling | |
24098 | - * the virtual address 'address'. NULL means no pagetable entry present. | |
24099 | - * NOTE: the return type is pte_t but if the pmd is PSE then we return it | |
24100 | - * as a pte too. | |
24101 | - */ | |
24102 | -extern pte_t *lookup_address(unsigned long address); | |
cc90b958 BS |
24103 | - |
24104 | -/* | |
00e5a55c BS |
24105 | - * Make a given kernel text page executable/non-executable. |
24106 | - * Returns the previous executability setting of that page (which | |
24107 | - * is used to restore the previous state). Used by the SMP bootup code. | |
24108 | - * NOTE: this is an __init function for security reasons. | |
cc90b958 | 24109 | - */ |
00e5a55c BS |
24110 | -#ifdef CONFIG_X86_PAE |
24111 | - extern int set_kernel_exec(unsigned long vaddr, int enable); | |
24112 | -#else | |
24113 | - static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;} | |
cc90b958 | 24114 | -#endif |
cc90b958 | 24115 | - |
00e5a55c BS |
24116 | #if defined(CONFIG_HIGHPTE) |
24117 | #define pte_offset_map(dir, address) \ | |
24118 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) | |
24119 | @@ -496,72 +210,22 @@ extern pte_t *lookup_address(unsigned lo | |
24120 | */ | |
24121 | #define update_mmu_cache(vma,address,pte) do { } while (0) | |
24122 | ||
24123 | -#include <xen/features.h> | |
24124 | void make_lowmem_page_readonly(void *va, unsigned int feature); | |
24125 | void make_lowmem_page_writable(void *va, unsigned int feature); | |
24126 | -void make_page_readonly(void *va, unsigned int feature); | |
24127 | -void make_page_writable(void *va, unsigned int feature); | |
24128 | -void make_pages_readonly(void *va, unsigned int nr, unsigned int feature); | |
24129 | -void make_pages_writable(void *va, unsigned int nr, unsigned int feature); | |
cc90b958 | 24130 | - |
00e5a55c BS |
24131 | -#define virt_to_ptep(va) \ |
24132 | -({ \ | |
24133 | - pte_t *__ptep = lookup_address((unsigned long)(va)); \ | |
24134 | - BUG_ON(!__ptep || !pte_present(*__ptep)); \ | |
24135 | - __ptep; \ | |
24136 | -}) | |
cc90b958 | 24137 | - |
00e5a55c BS |
24138 | -#define arbitrary_virt_to_machine(va) \ |
24139 | - (((maddr_t)pte_mfn(*virt_to_ptep(va)) << PAGE_SHIFT) \ | |
24140 | - | ((unsigned long)(va) & (PAGE_SIZE - 1))) | |
cc90b958 | 24141 | - |
00e5a55c BS |
24142 | -#ifdef CONFIG_HIGHPTE |
24143 | -#include <asm/io.h> | |
24144 | -struct page *kmap_atomic_to_page(void *); | |
24145 | -#define ptep_to_machine(ptep) \ | |
24146 | -({ \ | |
24147 | - pte_t *__ptep = (ptep); \ | |
24148 | - page_to_phys(kmap_atomic_to_page(__ptep)) \ | |
24149 | - | ((unsigned long)__ptep & (PAGE_SIZE - 1)); \ | |
24150 | -}) | |
24151 | -#else | |
24152 | -#define ptep_to_machine(ptep) virt_to_machine(ptep) | |
cc90b958 | 24153 | -#endif |
00e5a55c BS |
24154 | |
24155 | #endif /* !__ASSEMBLY__ */ | |
24156 | ||
24157 | +/* | |
24158 | + * kern_addr_valid() is (1) for FLATMEM and (0) for | |
24159 | + * SPARSEMEM and DISCONTIGMEM | |
24160 | + */ | |
24161 | #ifdef CONFIG_FLATMEM | |
24162 | #define kern_addr_valid(addr) (1) | |
24163 | -#endif /* CONFIG_FLATMEM */ | |
cc90b958 | 24164 | - |
00e5a55c BS |
24165 | -int direct_remap_pfn_range(struct vm_area_struct *vma, |
24166 | - unsigned long address, | |
24167 | - unsigned long mfn, | |
24168 | - unsigned long size, | |
24169 | - pgprot_t prot, | |
24170 | - domid_t domid); | |
24171 | -int direct_kernel_remap_pfn_range(unsigned long address, | |
24172 | - unsigned long mfn, | |
24173 | - unsigned long size, | |
24174 | - pgprot_t prot, | |
24175 | - domid_t domid); | |
24176 | -int create_lookup_pte_addr(struct mm_struct *mm, | |
24177 | - unsigned long address, | |
24178 | - uint64_t *ptep); | |
24179 | -int touch_pte_range(struct mm_struct *mm, | |
24180 | - unsigned long address, | |
24181 | - unsigned long size); | |
cc90b958 | 24182 | - |
00e5a55c BS |
24183 | -int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd, |
24184 | - unsigned long addr, unsigned long end, pgprot_t newprot, | |
24185 | - int dirty_accountable); | |
cc90b958 | 24186 | - |
00e5a55c BS |
24187 | -#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \ |
24188 | - xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) | |
24189 | +#else | |
24190 | +#define kern_addr_valid(kaddr) (0) | |
24191 | +#endif | |
24192 | ||
24193 | #define io_remap_pfn_range(vma,from,pfn,size,prot) \ | |
24194 | direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO) | |
24195 | ||
24196 | -#include <asm-generic/pgtable.h> | |
cc90b958 | 24197 | - |
00e5a55c BS |
24198 | #endif /* _I386_PGTABLE_H */ |
24199 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-02-16 16:18:36.000000000 +0100 | |
24200 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-03-16 16:33:40.000000000 +0100 | |
24201 | @@ -13,49 +13,26 @@ | |
24202 | #include <linux/threads.h> | |
24203 | #include <linux/sched.h> | |
24204 | #include <asm/pda.h> | |
24205 | -#ifdef CONFIG_XEN | |
24206 | -#include <asm/hypervisor.h> | |
24207 | ||
24208 | +#ifdef CONFIG_XEN | |
24209 | extern pud_t level3_user_pgt[512]; | |
24210 | ||
24211 | extern void xen_init_pt(void); | |
cc90b958 | 24212 | - |
00e5a55c | 24213 | -extern pte_t *lookup_address(unsigned long address); |
cc90b958 | 24214 | - |
00e5a55c BS |
24215 | -#define virt_to_ptep(va) \ |
24216 | -({ \ | |
24217 | - pte_t *__ptep = lookup_address((unsigned long)(va)); \ | |
24218 | - BUG_ON(!__ptep || !pte_present(*__ptep)); \ | |
24219 | - __ptep; \ | |
24220 | -}) | |
cc90b958 | 24221 | - |
00e5a55c BS |
24222 | -#define arbitrary_virt_to_machine(va) \ |
24223 | - (((maddr_t)pte_mfn(*virt_to_ptep(va)) << PAGE_SHIFT) \ | |
24224 | - | ((unsigned long)(va) & (PAGE_SIZE - 1))) | |
cc90b958 | 24225 | - |
00e5a55c BS |
24226 | -#define ptep_to_machine(ptep) virt_to_machine(ptep) |
24227 | #endif | |
24228 | ||
24229 | extern pud_t level3_kernel_pgt[512]; | |
24230 | extern pud_t level3_ident_pgt[512]; | |
24231 | extern pmd_t level2_kernel_pgt[512]; | |
24232 | extern pgd_t init_level4_pgt[]; | |
24233 | -extern unsigned long __supported_pte_mask; | |
24234 | ||
24235 | #define swapper_pg_dir init_level4_pgt | |
24236 | ||
24237 | extern void paging_init(void); | |
24238 | -extern void clear_kernel_mapping(unsigned long addr, unsigned long size); | |
24239 | - | |
24240 | -/* | |
24241 | - * ZERO_PAGE is a global shared page that is always zero: used | |
24242 | - * for zero-mapped memory areas etc.. | |
24243 | - */ | |
24244 | -extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; | |
24245 | -#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) | |
24246 | ||
24247 | #endif /* !__ASSEMBLY__ */ | |
24248 | ||
24249 | +#define SHARED_KERNEL_PMD 1 | |
24250 | + | |
24251 | /* | |
24252 | * PGDIR_SHIFT determines what a top-level page table entry can map | |
24253 | */ | |
24254 | @@ -98,31 +75,63 @@ extern unsigned long empty_zero_page[PAG | |
24255 | #define pgd_none(x) (!__pgd_val(x)) | |
24256 | #define pud_none(x) (!__pud_val(x)) | |
24257 | ||
24258 | -static inline void set_pte(pte_t *dst, pte_t val) | |
24259 | +struct mm_struct; | |
24260 | + | |
24261 | +#define __xen_pte_clear(ptep) xen_set_pte(ptep, __pte(0)) | |
24262 | + | |
24263 | +static inline void xen_set_pte(pte_t *ptep, pte_t pte) | |
24264 | +{ | |
24265 | + *ptep = pte; | |
24266 | +} | |
24267 | + | |
24268 | +static inline void xen_set_pte_atomic(pte_t *ptep, pte_t pte) | |
24269 | { | |
24270 | - *dst = val; | |
24271 | + xen_set_pte(ptep, pte); | |
24272 | } | |
24273 | ||
24274 | -#define set_pmd(pmdptr, pmdval) xen_l2_entry_update(pmdptr, (pmdval)) | |
24275 | -#define set_pud(pudptr, pudval) xen_l3_entry_update(pudptr, (pudval)) | |
24276 | -#define set_pgd(pgdptr, pgdval) xen_l4_entry_update(pgdptr, (pgdval)) | |
24277 | +#ifdef CONFIG_SMP | |
24278 | +static inline pte_t xen_ptep_get_and_clear(pte_t *xp, pte_t ret) | |
24279 | +{ | |
24280 | + return __pte_ma(xchg(&xp->pte, 0)); | |
24281 | +} | |
24282 | +#else | |
24283 | +#define xen_ptep_get_and_clear(xp, pte) xen_local_ptep_get_and_clear(xp, pte) | |
24284 | +#endif | |
24285 | ||
24286 | -static inline void pud_clear (pud_t * pud) | |
24287 | +static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd) | |
24288 | { | |
24289 | - set_pud(pud, __pud(0)); | |
24290 | + xen_l2_entry_update(pmdp, pmd); | |
24291 | +} | |
24292 | + | |
24293 | +static inline void xen_pmd_clear(pmd_t *pmd) | |
24294 | +{ | |
24295 | + xen_set_pmd(pmd, xen_make_pmd(0)); | |
24296 | +} | |
24297 | + | |
24298 | +static inline void xen_set_pud(pud_t *pudp, pud_t pud) | |
24299 | +{ | |
24300 | + xen_l3_entry_update(pudp, pud); | |
24301 | +} | |
24302 | + | |
24303 | +static inline void xen_pud_clear(pud_t *pud) | |
24304 | +{ | |
24305 | + xen_set_pud(pud, xen_make_pud(0)); | |
24306 | } | |
24307 | ||
24308 | #define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD) | |
24309 | ||
24310 | -static inline void pgd_clear (pgd_t * pgd) | |
24311 | +static inline void xen_set_pgd(pgd_t *pgdp, pgd_t pgd) | |
24312 | { | |
24313 | - set_pgd(pgd, __pgd(0)); | |
24314 | - set_pgd(__user_pgd(pgd), __pgd(0)); | |
24315 | + xen_l4_entry_update(pgdp, pgd); | |
24316 | } | |
24317 | ||
24318 | -#define pte_same(a, b) ((a).pte == (b).pte) | |
24319 | +static inline void xen_pgd_clear(pgd_t * pgd) | |
24320 | +{ | |
24321 | + xen_set_pgd(pgd, xen_make_pgd(0)); | |
24322 | + xen_set_pgd(__user_pgd(pgd), xen_make_pgd(0)); | |
24323 | +} | |
24324 | ||
24325 | -#define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK)) | |
24326 | +#define pte_same(a, b) ((a).pte == (b).pte) | |
24327 | ||
24328 | #endif /* !__ASSEMBLY__ */ | |
24329 | ||
24330 | @@ -133,8 +142,6 @@ static inline void pgd_clear (pgd_t * pg | |
24331 | #define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT) | |
24332 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) | |
24333 | ||
24334 | -#define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1) | |
24335 | -#define FIRST_USER_ADDRESS 0 | |
24336 | ||
24337 | #define MAXMEM _AC(0x3fffffffffff, UL) | |
24338 | #define VMALLOC_START _AC(0xffffc20000000000, UL) | |
24339 | @@ -144,105 +151,6 @@ static inline void pgd_clear (pgd_t * pg | |
24340 | #define MODULES_END _AC(0xfffffffffff00000, UL) | |
24341 | #define MODULES_LEN (MODULES_END - MODULES_VADDR) | |
24342 | ||
24343 | -#define _PAGE_BIT_PRESENT 0 | |
24344 | -#define _PAGE_BIT_RW 1 | |
24345 | -#define _PAGE_BIT_USER 2 | |
24346 | -#define _PAGE_BIT_PWT 3 | |
24347 | -#define _PAGE_BIT_PCD 4 | |
24348 | -#define _PAGE_BIT_ACCESSED 5 | |
24349 | -#define _PAGE_BIT_DIRTY 6 | |
24350 | -#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ | |
24351 | -#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ | |
24352 | -#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ | |
24353 | - | |
24354 | -#define _PAGE_PRESENT 0x001 | |
24355 | -#define _PAGE_RW 0x002 | |
24356 | -#define _PAGE_USER 0x004 | |
24357 | -#define _PAGE_PWT 0x008 | |
24358 | -#define _PAGE_PCD 0x010 | |
24359 | -#define _PAGE_ACCESSED 0x020 | |
24360 | -#define _PAGE_DIRTY 0x040 | |
24361 | -#define _PAGE_PSE 0x080 /* 2MB page */ | |
24362 | -#define _PAGE_FILE 0x040 /* nonlinear file mapping, saved PTE; unset:swap */ | |
24363 | -#define _PAGE_GLOBAL 0x100 /* Global TLB entry */ | |
cc90b958 | 24364 | - |
00e5a55c BS |
24365 | -#define _PAGE_PROTNONE 0x080 /* If not present */ |
24366 | -#define _PAGE_NX (_AC(1,UL)<<_PAGE_BIT_NX) | |
cc90b958 | 24367 | - |
00e5a55c BS |
24368 | -/* Mapped page is I/O or foreign and has no associated page struct. */ |
24369 | -#define _PAGE_IO 0x200 | |
cc90b958 | 24370 | - |
00e5a55c BS |
24371 | -#ifndef __ASSEMBLY__ |
24372 | -#if CONFIG_XEN_COMPAT <= 0x030002 | |
24373 | -extern unsigned int __kernel_page_user; | |
24374 | -#else | |
24375 | -#define __kernel_page_user 0 | |
24376 | -#endif | |
24377 | -#endif | |
cc90b958 | 24378 | - |
00e5a55c BS |
24379 | -#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) |
24380 | -#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | __kernel_page_user) | |
cc90b958 | 24381 | - |
00e5a55c BS |
24382 | -#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_IO) |
24383 | - | |
24384 | -#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) | |
24385 | -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) | |
24386 | -#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) | |
24387 | -#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) | |
24388 | -#define PAGE_COPY PAGE_COPY_NOEXEC | |
24389 | -#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) | |
24390 | -#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) | |
24391 | -#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) | |
24392 | -#define __PAGE_KERNEL \ | |
24393 | - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX | __kernel_page_user) | |
24394 | -#define __PAGE_KERNEL_EXEC \ | |
24395 | - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | __kernel_page_user) | |
24396 | -#define __PAGE_KERNEL_NOCACHE \ | |
24397 | - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED | _PAGE_NX | __kernel_page_user) | |
24398 | -#define __PAGE_KERNEL_RO \ | |
24399 | - (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX | __kernel_page_user) | |
24400 | -#define __PAGE_KERNEL_VSYSCALL \ | |
24401 | - (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) | |
24402 | -#define __PAGE_KERNEL_VSYSCALL_NOCACHE \ | |
24403 | - (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PCD) | |
24404 | -#define __PAGE_KERNEL_LARGE \ | |
24405 | - (__PAGE_KERNEL | _PAGE_PSE) | |
24406 | -#define __PAGE_KERNEL_LARGE_EXEC \ | |
24407 | - (__PAGE_KERNEL_EXEC | _PAGE_PSE) | |
cc90b958 BS |
24408 | - |
24409 | -/* | |
00e5a55c | 24410 | - * We don't support GLOBAL page in xenolinux64 |
cc90b958 | 24411 | - */ |
00e5a55c | 24412 | -#define MAKE_GLOBAL(x) __pgprot((x)) |
cc90b958 | 24413 | - |
00e5a55c BS |
24414 | -#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL) |
24415 | -#define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC) | |
24416 | -#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO) | |
24417 | -#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE) | |
24418 | -#define PAGE_KERNEL_VSYSCALL32 __pgprot(__PAGE_KERNEL_VSYSCALL) | |
24419 | -#define PAGE_KERNEL_VSYSCALL MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL) | |
24420 | -#define PAGE_KERNEL_LARGE MAKE_GLOBAL(__PAGE_KERNEL_LARGE) | |
24421 | -#define PAGE_KERNEL_VSYSCALL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL_NOCACHE) | |
cc90b958 | 24422 | - |
00e5a55c BS |
24423 | -/* xwr */ |
24424 | -#define __P000 PAGE_NONE | |
24425 | -#define __P001 PAGE_READONLY | |
24426 | -#define __P010 PAGE_COPY | |
24427 | -#define __P011 PAGE_COPY | |
24428 | -#define __P100 PAGE_READONLY_EXEC | |
24429 | -#define __P101 PAGE_READONLY_EXEC | |
24430 | -#define __P110 PAGE_COPY_EXEC | |
24431 | -#define __P111 PAGE_COPY_EXEC | |
cc90b958 | 24432 | - |
00e5a55c BS |
24433 | -#define __S000 PAGE_NONE |
24434 | -#define __S001 PAGE_READONLY | |
24435 | -#define __S010 PAGE_SHARED | |
24436 | -#define __S011 PAGE_SHARED | |
24437 | -#define __S100 PAGE_READONLY_EXEC | |
24438 | -#define __S101 PAGE_READONLY_EXEC | |
24439 | -#define __S110 PAGE_SHARED_EXEC | |
24440 | -#define __S111 PAGE_SHARED_EXEC | |
cc90b958 | 24441 | - |
00e5a55c BS |
24442 | #ifndef __ASSEMBLY__ |
24443 | ||
24444 | static inline unsigned long pgd_bad(pgd_t pgd) | |
24445 | @@ -260,119 +168,26 @@ static inline unsigned long pmd_bad(pmd_ | |
24446 | return __pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); | |
24447 | } | |
24448 | ||
24449 | -#define set_pte_at(_mm,addr,ptep,pteval) do { \ | |
24450 | - if (((_mm) != current->mm && (_mm) != &init_mm) || \ | |
24451 | - HYPERVISOR_update_va_mapping((addr), (pteval), 0)) \ | |
24452 | - set_pte((ptep), (pteval)); \ | |
24453 | -} while (0) | |
cc90b958 | 24454 | - |
00e5a55c BS |
24455 | #define pte_none(x) (!(x).pte) |
24456 | #define pte_present(x) ((x).pte & (_PAGE_PRESENT | _PAGE_PROTNONE)) | |
24457 | -#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) | |
24458 | ||
24459 | -#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) | |
24460 | +#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) /* FIXME: is this right? */ | |
24461 | ||
24462 | #define __pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT) | |
24463 | #define pte_mfn(_pte) ((_pte).pte & _PAGE_PRESENT ? \ | |
24464 | __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte))) | |
24465 | -#define pte_pfn(_pte) ((_pte).pte & _PAGE_IO ? end_pfn : \ | |
24466 | +#define pte_pfn(_pte) ((_pte).pte & _PAGE_IO ? max_mapnr : \ | |
24467 | (_pte).pte & _PAGE_PRESENT ? \ | |
24468 | mfn_to_local_pfn(__pte_mfn(_pte)) : \ | |
24469 | __pte_mfn(_pte)) | |
24470 | ||
24471 | #define pte_page(x) pfn_to_page(pte_pfn(x)) | |
24472 | ||
24473 | -static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) | |
24474 | -{ | |
24475 | - unsigned long pte = page_nr << PAGE_SHIFT; | |
24476 | - pte |= pgprot_val(pgprot); | |
24477 | - pte &= __supported_pte_mask; | |
24478 | - return __pte(pte); | |
24479 | -} | |
cc90b958 | 24480 | - |
00e5a55c BS |
24481 | -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
24482 | -{ | |
24483 | - pte_t pte = *ptep; | |
24484 | - if (!pte_none(pte)) { | |
24485 | - if ((mm != &init_mm) || | |
24486 | - HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) | |
24487 | - pte = __pte_ma(xchg(&ptep->pte, 0)); | |
24488 | - } | |
24489 | - return pte; | |
24490 | -} | |
cc90b958 | 24491 | - |
00e5a55c BS |
24492 | -static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) |
24493 | -{ | |
24494 | - if (full) { | |
24495 | - pte_t pte = *ptep; | |
24496 | - if (PagePinned(virt_to_page(mm->pgd))) | |
24497 | - xen_l1_entry_update(ptep, __pte(0)); | |
24498 | - else | |
24499 | - *ptep = __pte(0); | |
24500 | - return pte; | |
24501 | - } | |
24502 | - return ptep_get_and_clear(mm, addr, ptep); | |
24503 | -} | |
cc90b958 | 24504 | - |
00e5a55c BS |
24505 | -#define ptep_clear_flush(vma, addr, ptep) \ |
24506 | -({ \ | |
24507 | - pte_t *__ptep = (ptep); \ | |
24508 | - pte_t __res = *__ptep; \ | |
24509 | - if (!pte_none(__res) && \ | |
24510 | - ((vma)->vm_mm != current->mm || \ | |
24511 | - HYPERVISOR_update_va_mapping(addr, __pte(0), \ | |
24512 | - (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ | |
24513 | - UVMF_INVLPG|UVMF_MULTI))) { \ | |
24514 | - __ptep->pte = 0; \ | |
24515 | - flush_tlb_page(vma, addr); \ | |
24516 | - } \ | |
24517 | - __res; \ | |
24518 | -}) | |
cc90b958 | 24519 | - |
00e5a55c BS |
24520 | -/* |
24521 | - * The following only work if pte_present() is true. | |
24522 | - * Undefined behaviour if not.. | |
24523 | - */ | |
24524 | -#define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT) | |
24525 | -static inline int pte_dirty(pte_t pte) { return __pte_val(pte) & _PAGE_DIRTY; } | |
24526 | -static inline int pte_young(pte_t pte) { return __pte_val(pte) & _PAGE_ACCESSED; } | |
24527 | -static inline int pte_write(pte_t pte) { return __pte_val(pte) & _PAGE_RW; } | |
24528 | -static inline int pte_file(pte_t pte) { return __pte_val(pte) & _PAGE_FILE; } | |
24529 | -static inline int pte_huge(pte_t pte) { return __pte_val(pte) & _PAGE_PSE; } | |
cc90b958 | 24530 | - |
00e5a55c BS |
24531 | -static inline pte_t pte_mkclean(pte_t pte) { __pte_val(pte) &= ~_PAGE_DIRTY; return pte; } |
24532 | -static inline pte_t pte_mkold(pte_t pte) { __pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } | |
24533 | -static inline pte_t pte_wrprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_RW; return pte; } | |
24534 | -static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) &= ~_PAGE_NX; return pte; } | |
24535 | -static inline pte_t pte_mkdirty(pte_t pte) { __pte_val(pte) |= _PAGE_DIRTY; return pte; } | |
24536 | -static inline pte_t pte_mkyoung(pte_t pte) { __pte_val(pte) |= _PAGE_ACCESSED; return pte; } | |
24537 | -static inline pte_t pte_mkwrite(pte_t pte) { __pte_val(pte) |= _PAGE_RW; return pte; } | |
24538 | -static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; } | |
24539 | -static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; } | |
cc90b958 | 24540 | - |
00e5a55c BS |
24541 | -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) |
24542 | -{ | |
24543 | - if (!pte_young(*ptep)) | |
24544 | - return 0; | |
24545 | - return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte); | |
24546 | -} | |
cc90b958 | 24547 | - |
00e5a55c | 24548 | -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
cc90b958 | 24549 | -{ |
00e5a55c BS |
24550 | - pte_t pte = *ptep; |
24551 | - if (pte_write(pte)) | |
24552 | - set_pte_at(mm, addr, ptep, pte_wrprotect(pte)); | |
cc90b958 BS |
24553 | -} |
24554 | - | |
00e5a55c BS |
24555 | /* |
24556 | * Macro to mark a page protection value as "uncacheable". | |
24557 | */ | |
24558 | #define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) | |
24559 | ||
24560 | -static inline int pmd_large(pmd_t pte) { | |
24561 | - return (__pmd_val(pte) & __LARGE_PTE) == __LARGE_PTE; | |
24562 | -} | |
cc90b958 | 24563 | - |
00e5a55c BS |
24564 | |
24565 | /* | |
24566 | * Conversion functions: convert a page and protection to a page entry, | |
24567 | @@ -388,6 +203,7 @@ static inline int pmd_large(pmd_t pte) { | |
24568 | #define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr)) | |
24569 | #define pgd_offset_k(address) (init_level4_pgt + pgd_index(address)) | |
24570 | #define pgd_present(pgd) (__pgd_val(pgd) & _PAGE_PRESENT) | |
24571 | +static inline int pgd_large(pgd_t pgd) { return 0; } | |
24572 | #define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE) | |
24573 | ||
24574 | /* PUD - Level3 access */ | |
24575 | @@ -398,6 +214,12 @@ static inline int pmd_large(pmd_t pte) { | |
24576 | #define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address)) | |
24577 | #define pud_present(pud) (__pud_val(pud) & _PAGE_PRESENT) | |
24578 | ||
24579 | +static inline int pud_large(pud_t pte) | |
24580 | +{ | |
24581 | + return (__pud_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) == | |
24582 | + (_PAGE_PSE|_PAGE_PRESENT); | |
24583 | +} | |
24584 | + | |
24585 | /* PMD - Level 2 access */ | |
24586 | #define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK)) | |
24587 | #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) | |
24588 | @@ -413,36 +235,18 @@ static inline int pmd_large(pmd_t pte) { | |
24589 | #else | |
24590 | #define pmd_present(x) (__pmd_val(x) & _PAGE_PRESENT) | |
24591 | #endif | |
24592 | -#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) | |
24593 | #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot))) | |
24594 | #define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT) | |
24595 | ||
24596 | #define pte_to_pgoff(pte) ((__pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) | |
24597 | -#define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE }) | |
24598 | +#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | _PAGE_FILE }) | |
24599 | #define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT | |
24600 | ||
24601 | /* PTE - Level 1 access. */ | |
24602 | ||
24603 | /* page, protection -> pte */ | |
24604 | #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) | |
24605 | -#define mk_pte_huge(entry) (__pte_val(entry) |= _PAGE_PRESENT | _PAGE_PSE) | |
24606 | ||
24607 | -/* Change flags of a PTE */ | |
24608 | -static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) | |
cc90b958 | 24609 | -{ |
00e5a55c BS |
24610 | - /* |
24611 | - * Since this might change the present bit (which controls whether | |
24612 | - * a pte_t object has undergone p2m translation), we must use | |
24613 | - * pte_val() on the input pte and __pte() for the return value. | |
24614 | - */ | |
24615 | - unsigned long pteval = pte_val(pte); | |
cc90b958 | 24616 | - |
00e5a55c BS |
24617 | - pteval &= _PAGE_CHG_MASK; |
24618 | - pteval |= pgprot_val(newprot); | |
24619 | - pteval &= __supported_pte_mask; | |
24620 | - return __pte(pteval); | |
24621 | -} | |
cc90b958 | 24622 | - |
00e5a55c BS |
24623 | #define pte_index(address) \ |
24624 | (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) | |
24625 | #define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \ | |
24626 | @@ -456,101 +260,21 @@ static inline pte_t pte_modify(pte_t pte | |
24627 | ||
24628 | #define update_mmu_cache(vma,address,pte) do { } while (0) | |
24629 | ||
24630 | -/* | |
24631 | - * Rules for using ptep_establish: the pte MUST be a user pte, and | |
24632 | - * must be a present->present transition. | |
24633 | - */ | |
24634 | -#define __HAVE_ARCH_PTEP_ESTABLISH | |
24635 | -#define ptep_establish(vma, address, ptep, pteval) \ | |
24636 | - do { \ | |
24637 | - if ( likely((vma)->vm_mm == current->mm) ) { \ | |
24638 | - BUG_ON(HYPERVISOR_update_va_mapping(address, \ | |
24639 | - pteval, \ | |
24640 | - (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ | |
24641 | - UVMF_INVLPG|UVMF_MULTI)); \ | |
24642 | - } else { \ | |
24643 | - xen_l1_entry_update(ptep, pteval); \ | |
24644 | - flush_tlb_page(vma, address); \ | |
24645 | - } \ | |
24646 | - } while (0) | |
cc90b958 | 24647 | - |
00e5a55c BS |
24648 | -/* We only update the dirty/accessed state if we set |
24649 | - * the dirty bit by hand in the kernel, since the hardware | |
24650 | - * will do the accessed bit for us, and we don't want to | |
24651 | - * race with other CPU's that might be updating the dirty | |
24652 | - * bit at the same time. */ | |
24653 | -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS | |
24654 | -#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ | |
24655 | -({ \ | |
24656 | - int __changed = !pte_same(*(ptep), entry); \ | |
24657 | - if (__changed && (dirty)) \ | |
24658 | - ptep_establish(vma, address, ptep, entry); \ | |
24659 | - __changed; \ | |
24660 | -}) | |
cc90b958 | 24661 | - |
00e5a55c BS |
24662 | -#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH |
24663 | -#define ptep_clear_flush_young(vma, address, ptep) \ | |
24664 | -({ \ | |
24665 | - pte_t __pte = *(ptep); \ | |
24666 | - int __young = pte_young(__pte); \ | |
24667 | - __pte = pte_mkold(__pte); \ | |
24668 | - if (PagePinned(virt_to_page((vma)->vm_mm->pgd))) \ | |
24669 | - (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \ | |
24670 | - else if (__young) \ | |
24671 | - set_pte(ptep, __pte); \ | |
24672 | - __young; \ | |
24673 | -}) | |
cc90b958 | 24674 | - |
00e5a55c BS |
24675 | /* Encode and de-code a swap entry */ |
24676 | #define __swp_type(x) (((x).val >> 1) & 0x3f) | |
24677 | #define __swp_offset(x) ((x).val >> 8) | |
24678 | #define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) | |
24679 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { __pte_val(pte) }) | |
24680 | -#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) | |
cc90b958 | 24681 | - |
00e5a55c BS |
24682 | -extern spinlock_t pgd_lock; |
24683 | -extern struct list_head pgd_list; | |
24684 | +#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) | |
24685 | ||
24686 | extern int kern_addr_valid(unsigned long addr); | |
cc90b958 | 24687 | - |
00e5a55c | 24688 | -#define DOMID_LOCAL (0xFFFFU) |
cc90b958 | 24689 | - |
00e5a55c | 24690 | -struct vm_area_struct; |
cc90b958 | 24691 | - |
00e5a55c BS |
24692 | -int direct_remap_pfn_range(struct vm_area_struct *vma, |
24693 | - unsigned long address, | |
24694 | - unsigned long mfn, | |
24695 | - unsigned long size, | |
24696 | - pgprot_t prot, | |
24697 | - domid_t domid); | |
cc90b958 | 24698 | - |
00e5a55c BS |
24699 | -int direct_kernel_remap_pfn_range(unsigned long address, |
24700 | - unsigned long mfn, | |
24701 | - unsigned long size, | |
24702 | - pgprot_t prot, | |
24703 | - domid_t domid); | |
cc90b958 | 24704 | - |
00e5a55c BS |
24705 | -int create_lookup_pte_addr(struct mm_struct *mm, |
24706 | - unsigned long address, | |
24707 | - uint64_t *ptep); | |
cc90b958 | 24708 | - |
00e5a55c BS |
24709 | -int touch_pte_range(struct mm_struct *mm, |
24710 | - unsigned long address, | |
24711 | - unsigned long size); | |
24712 | - | |
24713 | -int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd, | |
24714 | - unsigned long addr, unsigned long end, pgprot_t newprot, | |
24715 | - int dirty_accountable); | |
24716 | - | |
24717 | -#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \ | |
24718 | - xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) | |
24719 | - | |
24720 | -pte_t *lookup_address(unsigned long addr); | |
24721 | +extern void cleanup_highmap(void); | |
24722 | ||
24723 | #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ | |
24724 | direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO) | |
24725 | ||
24726 | #define HAVE_ARCH_UNMAPPED_AREA | |
24727 | +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN | |
24728 | ||
24729 | #define pgtable_cache_init() do { } while (0) | |
24730 | #define check_pgt_cache() do { } while (0) | |
24731 | @@ -563,13 +287,7 @@ pte_t *lookup_address(unsigned long addr | |
24732 | #define kc_offset_to_vaddr(o) \ | |
24733 | (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o)) | |
24734 | ||
24735 | -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG | |
24736 | -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR | |
24737 | -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL | |
24738 | -#define __HAVE_ARCH_PTEP_CLEAR_FLUSH | |
24739 | -#define __HAVE_ARCH_PTEP_SET_WRPROTECT | |
24740 | #define __HAVE_ARCH_PTE_SAME | |
24741 | -#include <asm-generic/pgtable.h> | |
24742 | #endif /* !__ASSEMBLY__ */ | |
24743 | ||
24744 | #endif /* _X86_64_PGTABLE_H */ | |
24745 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/processor.h 2009-02-16 16:18:36.000000000 +0100 | |
24746 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/processor.h 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
24747 | @@ -1,5 +1,793 @@ |
24748 | +#ifndef __ASM_X86_PROCESSOR_H | |
24749 | +#define __ASM_X86_PROCESSOR_H | |
24750 | + | |
24751 | +#include <asm/processor-flags.h> | |
24752 | + | |
24753 | +/* migration helpers, for KVM - will be removed in 2.6.25: */ | |
24754 | +#include <asm/vm86.h> | |
24755 | +#define Xgt_desc_struct desc_ptr | |
24756 | + | |
24757 | +/* Forward declaration, a strange C thing */ | |
24758 | +struct task_struct; | |
24759 | +struct mm_struct; | |
24760 | + | |
24761 | +#include <asm/vm86.h> | |
24762 | +#include <asm/math_emu.h> | |
24763 | +#include <asm/segment.h> | |
24764 | +#include <asm/types.h> | |
24765 | +#include <asm/sigcontext.h> | |
24766 | +#include <asm/current.h> | |
24767 | +#include <asm/cpufeature.h> | |
24768 | +#include <asm/system.h> | |
24769 | +#include <asm/page.h> | |
24770 | +#include <asm/percpu.h> | |
24771 | +#include <asm/msr.h> | |
24772 | +#include <asm/desc_defs.h> | |
24773 | +#include <asm/nops.h> | |
24774 | +#include <linux/personality.h> | |
24775 | +#include <linux/cpumask.h> | |
24776 | +#include <linux/cache.h> | |
24777 | +#include <linux/threads.h> | |
24778 | +#include <linux/init.h> | |
24779 | +#include <xen/interface/physdev.h> | |
24780 | + | |
24781 | +/* | |
24782 | + * Default implementation of macro that returns current | |
24783 | + * instruction pointer ("program counter"). | |
24784 | + */ | |
24785 | +static inline void *current_text_addr(void) | |
24786 | +{ | |
24787 | + void *pc; | |
24788 | + asm volatile("mov $1f,%0\n1:":"=r" (pc)); | |
24789 | + return pc; | |
24790 | +} | |
24791 | + | |
24792 | +#ifdef CONFIG_X86_VSMP | |
24793 | +#define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT) | |
24794 | +#define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT) | |
24795 | +#else | |
24796 | +#define ARCH_MIN_TASKALIGN 16 | |
24797 | +#define ARCH_MIN_MMSTRUCT_ALIGN 0 | |
24798 | +#endif | |
24799 | + | |
24800 | +/* | |
24801 | + * CPU type and hardware bug flags. Kept separately for each CPU. | |
24802 | + * Members of this structure are referenced in head.S, so think twice | |
24803 | + * before touching them. [mj] | |
24804 | + */ | |
24805 | + | |
24806 | +struct cpuinfo_x86 { | |
24807 | + __u8 x86; /* CPU family */ | |
24808 | + __u8 x86_vendor; /* CPU vendor */ | |
24809 | + __u8 x86_model; | |
24810 | + __u8 x86_mask; | |
24811 | +#ifdef CONFIG_X86_32 | |
24812 | + char wp_works_ok; /* It doesn't on 386's */ | |
24813 | + char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */ | |
24814 | + char hard_math; | |
24815 | + char rfu; | |
24816 | + char fdiv_bug; | |
24817 | + char f00f_bug; | |
24818 | + char coma_bug; | |
24819 | + char pad0; | |
24820 | +#else | |
24821 | + /* number of 4K pages in DTLB/ITLB combined(in pages)*/ | |
24822 | + int x86_tlbsize; | |
24823 | + __u8 x86_virt_bits, x86_phys_bits; | |
24824 | + /* cpuid returned core id bits */ | |
24825 | + __u8 x86_coreid_bits; | |
24826 | + /* Max extended CPUID function supported */ | |
24827 | + __u32 extended_cpuid_level; | |
24828 | +#endif | |
24829 | + int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ | |
24830 | + __u32 x86_capability[NCAPINTS]; | |
24831 | + char x86_vendor_id[16]; | |
24832 | + char x86_model_id[64]; | |
24833 | + int x86_cache_size; /* in KB - valid for CPUS which support this | |
24834 | + call */ | |
24835 | + int x86_cache_alignment; /* In bytes */ | |
24836 | + int x86_power; | |
24837 | + unsigned long loops_per_jiffy; | |
24838 | +#ifdef CONFIG_SMP | |
24839 | + cpumask_t llc_shared_map; /* cpus sharing the last level cache */ | |
24840 | +#endif | |
24841 | + u16 x86_max_cores; /* cpuid returned max cores value */ | |
24842 | + u16 apicid; | |
24843 | + u16 x86_clflush_size; | |
24844 | +#ifdef CONFIG_SMP | |
24845 | + u16 booted_cores; /* number of cores as seen by OS */ | |
24846 | + u16 phys_proc_id; /* Physical processor id. */ | |
24847 | + u16 cpu_core_id; /* Core id */ | |
24848 | + u16 cpu_index; /* index into per_cpu list */ | |
24849 | +#endif | |
24850 | +} __attribute__((__aligned__(SMP_CACHE_BYTES))); | |
24851 | + | |
24852 | +#define X86_VENDOR_INTEL 0 | |
24853 | +#define X86_VENDOR_CYRIX 1 | |
24854 | +#define X86_VENDOR_AMD 2 | |
24855 | +#define X86_VENDOR_UMC 3 | |
24856 | +#define X86_VENDOR_NEXGEN 4 | |
24857 | +#define X86_VENDOR_CENTAUR 5 | |
24858 | +#define X86_VENDOR_TRANSMETA 7 | |
24859 | +#define X86_VENDOR_NSC 8 | |
24860 | +#define X86_VENDOR_NUM 9 | |
24861 | +#define X86_VENDOR_UNKNOWN 0xff | |
24862 | + | |
24863 | +/* | |
24864 | + * capabilities of CPUs | |
24865 | + */ | |
24866 | +extern struct cpuinfo_x86 boot_cpu_data; | |
24867 | +extern struct cpuinfo_x86 new_cpu_data; | |
24868 | +extern __u32 cleared_cpu_caps[NCAPINTS]; | |
24869 | + | |
24870 | +#ifdef CONFIG_SMP | |
24871 | +DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); | |
24872 | +#define cpu_data(cpu) per_cpu(cpu_info, cpu) | |
24873 | +#define current_cpu_data cpu_data(smp_processor_id()) | |
24874 | +#else | |
24875 | +#define cpu_data(cpu) boot_cpu_data | |
24876 | +#define current_cpu_data boot_cpu_data | |
24877 | +#endif | |
24878 | + | |
24879 | +void cpu_detect(struct cpuinfo_x86 *c); | |
24880 | + | |
24881 | +extern void identify_cpu(struct cpuinfo_x86 *); | |
24882 | +extern void identify_boot_cpu(void); | |
24883 | +extern void identify_secondary_cpu(struct cpuinfo_x86 *); | |
24884 | +extern void print_cpu_info(struct cpuinfo_x86 *); | |
24885 | +extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); | |
24886 | +extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); | |
24887 | +extern unsigned short num_cache_leaves; | |
24888 | + | |
24889 | +#if defined(CONFIG_X86_HT) || defined(CONFIG_X86_64) | |
24890 | +extern void detect_ht(struct cpuinfo_x86 *c); | |
24891 | +#else | |
24892 | +static inline void detect_ht(struct cpuinfo_x86 *c) {} | |
24893 | +#endif | |
24894 | + | |
24895 | +static inline void xen_cpuid(unsigned int *eax, unsigned int *ebx, | |
24896 | + unsigned int *ecx, unsigned int *edx) | |
24897 | +{ | |
24898 | + /* ecx is often an input as well as an output. */ | |
24899 | + __asm__(XEN_CPUID | |
24900 | + : "=a" (*eax), | |
24901 | + "=b" (*ebx), | |
24902 | + "=c" (*ecx), | |
24903 | + "=d" (*edx) | |
24904 | + : "0" (*eax), "2" (*ecx)); | |
24905 | +} | |
24906 | + | |
24907 | +static inline void load_cr3(pgd_t *pgdir) | |
24908 | +{ | |
24909 | + write_cr3(__pa(pgdir)); | |
24910 | +} | |
24911 | + | |
24912 | +#ifndef CONFIG_X86_NO_TSS | |
24913 | +#ifdef CONFIG_X86_32 | |
24914 | +/* This is the TSS defined by the hardware. */ | |
24915 | +struct x86_hw_tss { | |
24916 | + unsigned short back_link, __blh; | |
24917 | + unsigned long sp0; | |
24918 | + unsigned short ss0, __ss0h; | |
24919 | + unsigned long sp1; | |
24920 | + unsigned short ss1, __ss1h; /* ss1 caches MSR_IA32_SYSENTER_CS */ | |
24921 | + unsigned long sp2; | |
24922 | + unsigned short ss2, __ss2h; | |
24923 | + unsigned long __cr3; | |
24924 | + unsigned long ip; | |
24925 | + unsigned long flags; | |
24926 | + unsigned long ax, cx, dx, bx; | |
24927 | + unsigned long sp, bp, si, di; | |
24928 | + unsigned short es, __esh; | |
24929 | + unsigned short cs, __csh; | |
24930 | + unsigned short ss, __ssh; | |
24931 | + unsigned short ds, __dsh; | |
24932 | + unsigned short fs, __fsh; | |
24933 | + unsigned short gs, __gsh; | |
24934 | + unsigned short ldt, __ldth; | |
24935 | + unsigned short trace, io_bitmap_base; | |
24936 | +} __attribute__((packed)); | |
24937 | +extern struct tss_struct doublefault_tss; | |
24938 | +#else | |
24939 | +struct x86_hw_tss { | |
24940 | + u32 reserved1; | |
24941 | + u64 sp0; | |
24942 | + u64 sp1; | |
24943 | + u64 sp2; | |
24944 | + u64 reserved2; | |
24945 | + u64 ist[7]; | |
24946 | + u32 reserved3; | |
24947 | + u32 reserved4; | |
24948 | + u16 reserved5; | |
24949 | + u16 io_bitmap_base; | |
24950 | +} __attribute__((packed)) ____cacheline_aligned; | |
24951 | +#endif | |
24952 | +#endif /* CONFIG_X86_NO_TSS */ | |
24953 | + | |
24954 | +/* | |
24955 | + * Size of io_bitmap. | |
24956 | + */ | |
24957 | +#define IO_BITMAP_BITS 65536 | |
24958 | +#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) | |
24959 | +#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) | |
24960 | +#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap) | |
24961 | +#define INVALID_IO_BITMAP_OFFSET 0x8000 | |
24962 | +#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000 | |
24963 | + | |
24964 | +#ifndef CONFIG_X86_NO_TSS | |
24965 | +struct tss_struct { | |
24966 | + struct x86_hw_tss x86_tss; | |
24967 | + | |
24968 | + /* | |
24969 | + * The extra 1 is there because the CPU will access an | |
24970 | + * additional byte beyond the end of the IO permission | |
24971 | + * bitmap. The extra byte must be all 1 bits, and must | |
24972 | + * be within the limit. | |
24973 | + */ | |
24974 | + unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; | |
24975 | + /* | |
24976 | + * Cache the current maximum and the last task that used the bitmap: | |
24977 | + */ | |
24978 | + unsigned long io_bitmap_max; | |
24979 | + struct thread_struct *io_bitmap_owner; | |
24980 | + /* | |
24981 | + * pads the TSS to be cacheline-aligned (size is 0x100) | |
24982 | + */ | |
24983 | + unsigned long __cacheline_filler[35]; | |
24984 | + /* | |
24985 | + * .. and then another 0x100 bytes for emergency kernel stack | |
24986 | + */ | |
24987 | + unsigned long stack[64]; | |
24988 | +} __attribute__((packed)); | |
24989 | + | |
24990 | +DECLARE_PER_CPU(struct tss_struct, init_tss); | |
24991 | + | |
24992 | +/* Save the original ist values for checking stack pointers during debugging */ | |
24993 | +struct orig_ist { | |
24994 | + unsigned long ist[7]; | |
24995 | +}; | |
24996 | +#endif /* CONFIG_X86_NO_TSS */ | |
24997 | + | |
24998 | +#define MXCSR_DEFAULT 0x1f80 | |
24999 | + | |
25000 | +struct i387_fsave_struct { | |
25001 | + u32 cwd; | |
25002 | + u32 swd; | |
25003 | + u32 twd; | |
25004 | + u32 fip; | |
25005 | + u32 fcs; | |
25006 | + u32 foo; | |
25007 | + u32 fos; | |
25008 | + u32 st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ | |
25009 | + u32 status; /* software status information */ | |
25010 | +}; | |
25011 | + | |
25012 | +struct i387_fxsave_struct { | |
25013 | + u16 cwd; | |
25014 | + u16 swd; | |
25015 | + u16 twd; | |
25016 | + u16 fop; | |
25017 | + union { | |
25018 | + struct { | |
25019 | + u64 rip; | |
25020 | + u64 rdp; | |
25021 | + }; | |
25022 | + struct { | |
25023 | + u32 fip; | |
25024 | + u32 fcs; | |
25025 | + u32 foo; | |
25026 | + u32 fos; | |
25027 | + }; | |
25028 | + }; | |
25029 | + u32 mxcsr; | |
25030 | + u32 mxcsr_mask; | |
25031 | + u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ | |
25032 | + u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ | |
25033 | + u32 padding[24]; | |
25034 | +} __attribute__((aligned(16))); | |
25035 | + | |
25036 | +struct i387_soft_struct { | |
25037 | + u32 cwd; | |
25038 | + u32 swd; | |
25039 | + u32 twd; | |
25040 | + u32 fip; | |
25041 | + u32 fcs; | |
25042 | + u32 foo; | |
25043 | + u32 fos; | |
25044 | + u32 st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ | |
25045 | + u8 ftop, changed, lookahead, no_update, rm, alimit; | |
25046 | + struct info *info; | |
25047 | + u32 entry_eip; | |
25048 | +}; | |
25049 | + | |
25050 | +union i387_union { | |
25051 | + struct i387_fsave_struct fsave; | |
25052 | + struct i387_fxsave_struct fxsave; | |
25053 | + struct i387_soft_struct soft; | |
25054 | +}; | |
25055 | + | |
25056 | +#ifdef CONFIG_X86_32 | |
25057 | +DECLARE_PER_CPU(u8, cpu_llc_id); | |
25058 | +#elif !defined(CONFIG_X86_NO_TSS) | |
25059 | +DECLARE_PER_CPU(struct orig_ist, orig_ist); | |
25060 | +#endif | |
25061 | + | |
25062 | +extern void print_cpu_info(struct cpuinfo_x86 *); | |
25063 | +extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); | |
25064 | +extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); | |
25065 | +extern unsigned short num_cache_leaves; | |
25066 | + | |
25067 | +struct thread_struct { | |
25068 | +/* cached TLS descriptors. */ | |
25069 | + struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; | |
25070 | + unsigned long sp0; | |
25071 | + unsigned long sp; | |
25072 | +#ifdef CONFIG_X86_32 | |
25073 | + unsigned long sysenter_cs; | |
25074 | +#else | |
25075 | + unsigned long usersp; /* Copy from PDA */ | |
25076 | + unsigned short es, ds, fsindex, gsindex; | |
25077 | +#endif | |
25078 | + unsigned long ip; | |
25079 | + unsigned long fs; | |
25080 | + unsigned long gs; | |
25081 | +/* Hardware debugging registers */ | |
25082 | + unsigned long debugreg0; | |
25083 | + unsigned long debugreg1; | |
25084 | + unsigned long debugreg2; | |
25085 | + unsigned long debugreg3; | |
25086 | + unsigned long debugreg6; | |
25087 | + unsigned long debugreg7; | |
25088 | +/* fault info */ | |
25089 | + unsigned long cr2, trap_no, error_code; | |
25090 | +/* floating point info */ | |
25091 | + union i387_union i387 __attribute__((aligned(16)));; | |
25092 | +#ifdef CONFIG_X86_32 | |
25093 | +/* virtual 86 mode info */ | |
25094 | + struct vm86_struct __user *vm86_info; | |
25095 | + unsigned long screen_bitmap; | |
25096 | + unsigned long v86flags, v86mask, saved_sp0; | |
25097 | + unsigned int saved_fs, saved_gs; | |
25098 | +#endif | |
25099 | +/* IO permissions */ | |
25100 | + unsigned long *io_bitmap_ptr; | |
25101 | + unsigned long iopl; | |
25102 | +/* max allowed port in the bitmap, in bytes: */ | |
25103 | + unsigned io_bitmap_max; | |
25104 | +/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ | |
25105 | + unsigned long debugctlmsr; | |
25106 | +/* Debug Store - if not 0 points to a DS Save Area configuration; | |
25107 | + * goes into MSR_IA32_DS_AREA */ | |
25108 | + unsigned long ds_area_msr; | |
25109 | +}; | |
25110 | + | |
25111 | +static inline unsigned long xen_get_debugreg(int regno) | |
25112 | +{ | |
25113 | + return HYPERVISOR_get_debugreg(regno); | |
25114 | +} | |
25115 | + | |
25116 | +static inline void xen_set_debugreg(int regno, unsigned long value) | |
25117 | +{ | |
25118 | + WARN_ON(HYPERVISOR_set_debugreg(regno, value)); | |
25119 | +} | |
25120 | + | |
25121 | +/* | |
25122 | + * Set IOPL bits in EFLAGS from given mask | |
25123 | + */ | |
25124 | +static inline void xen_set_iopl_mask(unsigned mask) | |
25125 | +{ | |
25126 | + struct physdev_set_iopl set_iopl; | |
25127 | + | |
25128 | + /* Force the change at ring 0. */ | |
25129 | + set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; | |
25130 | + WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl)); | |
25131 | +} | |
25132 | + | |
25133 | +#ifndef CONFIG_X86_NO_TSS | |
25134 | +static inline void native_load_sp0(struct tss_struct *tss, | |
25135 | + struct thread_struct *thread) | |
25136 | +{ | |
25137 | + tss->x86_tss.sp0 = thread->sp0; | |
25138 | +#ifdef CONFIG_X86_32 | |
25139 | + /* Only happens when SEP is enabled, no need to test "SEP"arately */ | |
25140 | + if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { | |
25141 | + tss->x86_tss.ss1 = thread->sysenter_cs; | |
25142 | + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | |
25143 | + } | |
25144 | +#endif | |
25145 | +} | |
25146 | +#else | |
25147 | +#define xen_load_sp0(tss, thread) do { \ | |
25148 | + if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->sp0)) \ | |
25149 | + BUG(); \ | |
25150 | +} while (0) | |
25151 | +#endif | |
25152 | + | |
25153 | +#define __cpuid xen_cpuid | |
25154 | +#define paravirt_enabled() 0 | |
25155 | + | |
25156 | +/* | |
25157 | + * These special macros can be used to get or set a debugging register | |
25158 | + */ | |
25159 | +#define get_debugreg(var, register) \ | |
25160 | + (var) = xen_get_debugreg(register) | |
25161 | +#define set_debugreg(value, register) \ | |
25162 | + xen_set_debugreg(register, value) | |
25163 | + | |
25164 | +#define load_sp0 xen_load_sp0 | |
25165 | + | |
25166 | +#define set_iopl_mask xen_set_iopl_mask | |
25167 | + | |
25168 | +/* | |
25169 | + * Save the cr4 feature set we're using (ie | |
25170 | + * Pentium 4MB enable and PPro Global page | |
25171 | + * enable), so that any CPU's that boot up | |
25172 | + * after us can get the correct flags. | |
25173 | + */ | |
25174 | +extern unsigned long mmu_cr4_features; | |
25175 | + | |
25176 | +static inline void set_in_cr4(unsigned long mask) | |
25177 | +{ | |
25178 | + unsigned cr4; | |
25179 | + mmu_cr4_features |= mask; | |
25180 | + cr4 = read_cr4(); | |
25181 | + cr4 |= mask; | |
25182 | + write_cr4(cr4); | |
25183 | +} | |
25184 | + | |
25185 | +static inline void clear_in_cr4(unsigned long mask) | |
25186 | +{ | |
25187 | + unsigned cr4; | |
25188 | + mmu_cr4_features &= ~mask; | |
25189 | + cr4 = read_cr4(); | |
25190 | + cr4 &= ~mask; | |
25191 | + write_cr4(cr4); | |
25192 | +} | |
25193 | + | |
25194 | +struct microcode_header { | |
25195 | + unsigned int hdrver; | |
25196 | + unsigned int rev; | |
25197 | + unsigned int date; | |
25198 | + unsigned int sig; | |
25199 | + unsigned int cksum; | |
25200 | + unsigned int ldrver; | |
25201 | + unsigned int pf; | |
25202 | + unsigned int datasize; | |
25203 | + unsigned int totalsize; | |
25204 | + unsigned int reserved[3]; | |
25205 | +}; | |
25206 | + | |
25207 | +struct microcode { | |
25208 | + struct microcode_header hdr; | |
25209 | + unsigned int bits[0]; | |
25210 | +}; | |
25211 | + | |
25212 | +typedef struct microcode microcode_t; | |
25213 | +typedef struct microcode_header microcode_header_t; | |
25214 | + | |
25215 | +/* microcode format is extended from prescott processors */ | |
25216 | +struct extended_signature { | |
25217 | + unsigned int sig; | |
25218 | + unsigned int pf; | |
25219 | + unsigned int cksum; | |
25220 | +}; | |
25221 | + | |
25222 | +struct extended_sigtable { | |
25223 | + unsigned int count; | |
25224 | + unsigned int cksum; | |
25225 | + unsigned int reserved[3]; | |
25226 | + struct extended_signature sigs[0]; | |
25227 | +}; | |
25228 | + | |
25229 | +typedef struct { | |
25230 | + unsigned long seg; | |
25231 | +} mm_segment_t; | |
25232 | + | |
25233 | + | |
25234 | +/* | |
25235 | + * create a kernel thread without removing it from tasklists | |
25236 | + */ | |
25237 | +extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); | |
25238 | + | |
25239 | +/* Free all resources held by a thread. */ | |
25240 | +extern void release_thread(struct task_struct *); | |
25241 | + | |
25242 | +/* Prepare to copy thread state - unlazy all lazy status */ | |
25243 | +extern void prepare_to_copy(struct task_struct *tsk); | |
25244 | + | |
25245 | +unsigned long get_wchan(struct task_struct *p); | |
25246 | + | |
25247 | +/* | |
25248 | + * Generic CPUID function | |
25249 | + * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx | |
25250 | + * resulting in stale register contents being returned. | |
25251 | + */ | |
25252 | +static inline void cpuid(unsigned int op, | |
25253 | + unsigned int *eax, unsigned int *ebx, | |
25254 | + unsigned int *ecx, unsigned int *edx) | |
25255 | +{ | |
25256 | + *eax = op; | |
25257 | + *ecx = 0; | |
25258 | + __cpuid(eax, ebx, ecx, edx); | |
25259 | +} | |
25260 | + | |
25261 | +/* Some CPUID calls want 'count' to be placed in ecx */ | |
25262 | +static inline void cpuid_count(unsigned int op, int count, | |
25263 | + unsigned int *eax, unsigned int *ebx, | |
25264 | + unsigned int *ecx, unsigned int *edx) | |
25265 | +{ | |
25266 | + *eax = op; | |
25267 | + *ecx = count; | |
25268 | + __cpuid(eax, ebx, ecx, edx); | |
25269 | +} | |
25270 | + | |
25271 | +/* | |
25272 | + * CPUID functions returning a single datum | |
25273 | + */ | |
25274 | +static inline unsigned int cpuid_eax(unsigned int op) | |
25275 | +{ | |
25276 | + unsigned int eax, ebx, ecx, edx; | |
25277 | + | |
25278 | + cpuid(op, &eax, &ebx, &ecx, &edx); | |
25279 | + return eax; | |
25280 | +} | |
25281 | +static inline unsigned int cpuid_ebx(unsigned int op) | |
25282 | +{ | |
25283 | + unsigned int eax, ebx, ecx, edx; | |
25284 | + | |
25285 | + cpuid(op, &eax, &ebx, &ecx, &edx); | |
25286 | + return ebx; | |
25287 | +} | |
25288 | +static inline unsigned int cpuid_ecx(unsigned int op) | |
25289 | +{ | |
25290 | + unsigned int eax, ebx, ecx, edx; | |
25291 | + | |
25292 | + cpuid(op, &eax, &ebx, &ecx, &edx); | |
25293 | + return ecx; | |
25294 | +} | |
25295 | +static inline unsigned int cpuid_edx(unsigned int op) | |
25296 | +{ | |
25297 | + unsigned int eax, ebx, ecx, edx; | |
25298 | + | |
25299 | + cpuid(op, &eax, &ebx, &ecx, &edx); | |
25300 | + return edx; | |
25301 | +} | |
25302 | + | |
25303 | +/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ | |
25304 | +static inline void rep_nop(void) | |
25305 | +{ | |
25306 | + __asm__ __volatile__("rep;nop": : :"memory"); | |
25307 | +} | |
25308 | + | |
25309 | +/* Stop speculative execution */ | |
25310 | +static inline void sync_core(void) | |
25311 | +{ | |
25312 | + int tmp; | |
25313 | + asm volatile("cpuid" : "=a" (tmp) : "0" (1) | |
25314 | + : "ebx", "ecx", "edx", "memory"); | |
25315 | +} | |
25316 | + | |
25317 | +#define cpu_relax() rep_nop() | |
25318 | + | |
25319 | +static inline void __monitor(const void *eax, unsigned long ecx, | |
25320 | + unsigned long edx) | |
25321 | +{ | |
25322 | + /* "monitor %eax,%ecx,%edx;" */ | |
25323 | + asm volatile( | |
25324 | + ".byte 0x0f,0x01,0xc8;" | |
25325 | + : :"a" (eax), "c" (ecx), "d"(edx)); | |
25326 | +} | |
25327 | + | |
25328 | +static inline void __mwait(unsigned long eax, unsigned long ecx) | |
25329 | +{ | |
25330 | + /* "mwait %eax,%ecx;" */ | |
25331 | + asm volatile( | |
25332 | + ".byte 0x0f,0x01,0xc9;" | |
25333 | + : :"a" (eax), "c" (ecx)); | |
25334 | +} | |
25335 | + | |
25336 | +static inline void __sti_mwait(unsigned long eax, unsigned long ecx) | |
25337 | +{ | |
25338 | + /* "mwait %eax,%ecx;" */ | |
25339 | + asm volatile( | |
25340 | + "sti; .byte 0x0f,0x01,0xc9;" | |
25341 | + : :"a" (eax), "c" (ecx)); | |
25342 | +} | |
25343 | + | |
25344 | +extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); | |
25345 | + | |
25346 | +extern int force_mwait; | |
25347 | + | |
25348 | +extern void select_idle_routine(const struct cpuinfo_x86 *c); | |
25349 | + | |
25350 | +extern unsigned long boot_option_idle_override; | |
25351 | + | |
25352 | +extern void enable_sep_cpu(void); | |
25353 | +extern int sysenter_setup(void); | |
25354 | + | |
25355 | +/* Defined in head.S */ | |
25356 | +extern struct desc_ptr early_gdt_descr; | |
25357 | + | |
25358 | +extern void cpu_set_gdt(int); | |
25359 | +extern void switch_to_new_gdt(void); | |
25360 | +extern void cpu_init(void); | |
25361 | +extern void init_gdt(int cpu); | |
25362 | + | |
25363 | +/* from system description table in BIOS. Mostly for MCA use, but | |
25364 | + * others may find it useful. */ | |
25365 | +extern unsigned int machine_id; | |
25366 | +extern unsigned int machine_submodel_id; | |
25367 | +extern unsigned int BIOS_revision; | |
25368 | + | |
25369 | +/* Boot loader type from the setup header */ | |
25370 | +extern int bootloader_type; | |
25371 | + | |
25372 | +extern char ignore_fpu_irq; | |
25373 | +#define cache_line_size() (boot_cpu_data.x86_cache_alignment) | |
25374 | + | |
25375 | +#define HAVE_ARCH_PICK_MMAP_LAYOUT 1 | |
25376 | +#define ARCH_HAS_PREFETCHW | |
25377 | +#define ARCH_HAS_SPINLOCK_PREFETCH | |
25378 | + | |
25379 | +#ifdef CONFIG_X86_32 | |
25380 | +#define BASE_PREFETCH ASM_NOP4 | |
25381 | +#define ARCH_HAS_PREFETCH | |
25382 | +#else | |
25383 | +#define BASE_PREFETCH "prefetcht0 (%1)" | |
25384 | +#endif | |
25385 | + | |
25386 | +/* Prefetch instructions for Pentium III and AMD Athlon */ | |
25387 | +/* It's not worth to care about 3dnow! prefetches for the K6 | |
25388 | + because they are microcoded there and very slow. | |
25389 | + However we don't do prefetches for pre XP Athlons currently | |
25390 | + That should be fixed. */ | |
25391 | +static inline void prefetch(const void *x) | |
25392 | +{ | |
25393 | + alternative_input(BASE_PREFETCH, | |
25394 | + "prefetchnta (%1)", | |
25395 | + X86_FEATURE_XMM, | |
25396 | + "r" (x)); | |
25397 | +} | |
25398 | + | |
25399 | +/* 3dnow! prefetch to get an exclusive cache line. Useful for | |
25400 | + spinlocks to avoid one state transition in the cache coherency protocol. */ | |
25401 | +static inline void prefetchw(const void *x) | |
25402 | +{ | |
25403 | + alternative_input(BASE_PREFETCH, | |
25404 | + "prefetchw (%1)", | |
25405 | + X86_FEATURE_3DNOW, | |
25406 | + "r" (x)); | |
25407 | +} | |
25408 | + | |
25409 | +#define spin_lock_prefetch(x) prefetchw(x) | |
25410 | #ifdef CONFIG_X86_32 | |
25411 | -# include "processor_32.h" | |
25412 | +/* | |
25413 | + * User space process size: 3GB (default). | |
25414 | + */ | |
25415 | +#define TASK_SIZE (PAGE_OFFSET) | |
25416 | +#define STACK_TOP TASK_SIZE | |
25417 | +#define STACK_TOP_MAX STACK_TOP | |
25418 | + | |
25419 | +#define INIT_THREAD { \ | |
25420 | + .sp0 = sizeof(init_stack) + (long)&init_stack, \ | |
25421 | + .vm86_info = NULL, \ | |
25422 | + .sysenter_cs = __KERNEL_CS, \ | |
25423 | + .io_bitmap_ptr = NULL, \ | |
25424 | + .fs = __KERNEL_PERCPU, \ | |
25425 | +} | |
25426 | + | |
25427 | +/* | |
25428 | + * Note that the .io_bitmap member must be extra-big. This is because | |
25429 | + * the CPU will access an additional byte beyond the end of the IO | |
25430 | + * permission bitmap. The extra byte must be all 1 bits, and must | |
25431 | + * be within the limit. | |
25432 | + */ | |
25433 | +#define INIT_TSS { \ | |
25434 | + .x86_tss = { \ | |
25435 | + .sp0 = sizeof(init_stack) + (long)&init_stack, \ | |
25436 | + .ss0 = __KERNEL_DS, \ | |
25437 | + .ss1 = __KERNEL_CS, \ | |
25438 | + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ | |
25439 | + }, \ | |
25440 | + .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, \ | |
25441 | +} | |
25442 | + | |
25443 | +#define start_thread(regs, new_eip, new_esp) do { \ | |
25444 | + __asm__("movl %0,%%gs": :"r" (0)); \ | |
25445 | + regs->fs = 0; \ | |
25446 | + set_fs(USER_DS); \ | |
25447 | + regs->ds = __USER_DS; \ | |
25448 | + regs->es = __USER_DS; \ | |
25449 | + regs->ss = __USER_DS; \ | |
25450 | + regs->cs = __USER_CS; \ | |
25451 | + regs->ip = new_eip; \ | |
25452 | + regs->sp = new_esp; \ | |
25453 | +} while (0) | |
25454 | + | |
25455 | + | |
25456 | +extern unsigned long thread_saved_pc(struct task_struct *tsk); | |
25457 | + | |
25458 | +#define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long)) | |
25459 | +#define KSTK_TOP(info) \ | |
25460 | +({ \ | |
25461 | + unsigned long *__ptr = (unsigned long *)(info); \ | |
25462 | + (unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \ | |
25463 | +}) | |
25464 | + | |
25465 | +/* | |
25466 | + * The below -8 is to reserve 8 bytes on top of the ring0 stack. | |
25467 | + * This is necessary to guarantee that the entire "struct pt_regs" | |
25468 | + * is accessable even if the CPU haven't stored the SS/ESP registers | |
25469 | + * on the stack (interrupt gate does not save these registers | |
25470 | + * when switching to the same priv ring). | |
25471 | + * Therefore beware: accessing the ss/esp fields of the | |
25472 | + * "struct pt_regs" is possible, but they may contain the | |
25473 | + * completely wrong values. | |
25474 | + */ | |
25475 | +#define task_pt_regs(task) \ | |
25476 | +({ \ | |
25477 | + struct pt_regs *__regs__; \ | |
25478 | + __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \ | |
25479 | + __regs__ - 1; \ | |
25480 | +}) | |
25481 | + | |
25482 | +#define KSTK_ESP(task) (task_pt_regs(task)->sp) | |
25483 | + | |
25484 | #else | |
25485 | -# include "processor_64.h" | |
25486 | +/* | |
25487 | + * User space process size. 47bits minus one guard page. | |
25488 | + */ | |
25489 | +#define TASK_SIZE64 (0x800000000000UL - 4096) | |
25490 | + | |
25491 | +/* This decides where the kernel will search for a free chunk of vm | |
25492 | + * space during mmap's. | |
25493 | + */ | |
25494 | +#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \ | |
25495 | + 0xc0000000 : 0xFFFFe000) | |
25496 | + | |
25497 | +#define TASK_SIZE (test_thread_flag(TIF_IA32) ? \ | |
25498 | + IA32_PAGE_OFFSET : TASK_SIZE64) | |
25499 | +#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \ | |
25500 | + IA32_PAGE_OFFSET : TASK_SIZE64) | |
25501 | + | |
25502 | +#define STACK_TOP TASK_SIZE | |
25503 | +#define STACK_TOP_MAX TASK_SIZE64 | |
25504 | + | |
25505 | +#define INIT_THREAD { \ | |
25506 | + .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ | |
25507 | +} | |
25508 | + | |
25509 | +#define INIT_TSS { \ | |
25510 | + .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ | |
25511 | +} | |
25512 | + | |
25513 | +#define start_thread(regs, new_rip, new_rsp) do { \ | |
25514 | + asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0)); \ | |
25515 | + load_gs_index(0); \ | |
25516 | + (regs)->ip = (new_rip); \ | |
25517 | + (regs)->sp = (new_rsp); \ | |
25518 | + write_pda(oldrsp, (new_rsp)); \ | |
25519 | + (regs)->cs = __USER_CS; \ | |
25520 | + (regs)->ss = __USER_DS; \ | |
25521 | + (regs)->flags = 0x200; \ | |
25522 | + set_fs(USER_DS); \ | |
25523 | +} while (0) | |
25524 | + | |
25525 | +/* | |
25526 | + * Return saved PC of a blocked thread. | |
25527 | + * What is this good for? it will be always the scheduler or ret_from_fork. | |
25528 | + */ | |
25529 | +#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8)) | |
25530 | + | |
25531 | +#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) | |
25532 | +#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */ | |
25533 | +#endif /* CONFIG_X86_64 */ | |
25534 | + | |
25535 | +/* This decides where the kernel will search for a free chunk of vm | |
25536 | + * space during mmap's. | |
25537 | + */ | |
25538 | +#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) | |
25539 | + | |
25540 | +#define KSTK_EIP(task) (task_pt_regs(task)->ip) | |
25541 | + | |
25542 | #endif | |
00e5a55c BS |
25543 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/processor_32.h 2009-02-16 16:18:36.000000000 +0100 |
25544 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
25545 | @@ -1,751 +0,0 @@ | |
25546 | -/* | |
25547 | - * include/asm-i386/processor.h | |
25548 | - * | |
25549 | - * Copyright (C) 1994 Linus Torvalds | |
25550 | - */ | |
25551 | - | |
25552 | -#ifndef __ASM_I386_PROCESSOR_H | |
25553 | -#define __ASM_I386_PROCESSOR_H | |
25554 | - | |
25555 | -#include <asm/vm86.h> | |
25556 | -#include <asm/math_emu.h> | |
25557 | -#include <asm/segment.h> | |
25558 | -#include <asm/page.h> | |
25559 | -#include <asm/types.h> | |
25560 | -#include <asm/sigcontext.h> | |
25561 | -#include <asm/cpufeature.h> | |
25562 | -#include <asm/msr.h> | |
25563 | -#include <asm/system.h> | |
25564 | -#include <linux/cache.h> | |
25565 | -#include <linux/threads.h> | |
25566 | -#include <asm/percpu.h> | |
25567 | -#include <linux/cpumask.h> | |
25568 | -#include <linux/init.h> | |
25569 | -#include <asm/processor-flags.h> | |
25570 | -#include <xen/interface/physdev.h> | |
25571 | - | |
25572 | -/* flag for disabling the tsc */ | |
25573 | -#define tsc_disable 0 | |
25574 | - | |
25575 | -struct desc_struct { | |
25576 | - unsigned long a,b; | |
25577 | -}; | |
25578 | - | |
25579 | -#define desc_empty(desc) \ | |
25580 | - (!((desc)->a | (desc)->b)) | |
25581 | - | |
25582 | -#define desc_equal(desc1, desc2) \ | |
25583 | - (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b)) | |
25584 | -/* | |
25585 | - * Default implementation of macro that returns current | |
25586 | - * instruction pointer ("program counter"). | |
25587 | - */ | |
25588 | -#define current_text_addr() ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; }) | |
25589 | - | |
25590 | -/* | |
25591 | - * CPU type and hardware bug flags. Kept separately for each CPU. | |
25592 | - * Members of this structure are referenced in head.S, so think twice | |
25593 | - * before touching them. [mj] | |
25594 | - */ | |
25595 | - | |
25596 | -struct cpuinfo_x86 { | |
25597 | - __u8 x86; /* CPU family */ | |
25598 | - __u8 x86_vendor; /* CPU vendor */ | |
25599 | - __u8 x86_model; | |
25600 | - __u8 x86_mask; | |
25601 | - char wp_works_ok; /* It doesn't on 386's */ | |
25602 | - char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */ | |
25603 | - char hard_math; | |
25604 | - char rfu; | |
25605 | - int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ | |
25606 | - unsigned long x86_capability[NCAPINTS]; | |
25607 | - char x86_vendor_id[16]; | |
25608 | - char x86_model_id[64]; | |
25609 | - int x86_cache_size; /* in KB - valid for CPUS which support this | |
25610 | - call */ | |
25611 | - int x86_cache_alignment; /* In bytes */ | |
25612 | - char fdiv_bug; | |
25613 | - char f00f_bug; | |
25614 | - char coma_bug; | |
25615 | - char pad0; | |
25616 | - int x86_power; | |
25617 | - unsigned long loops_per_jiffy; | |
25618 | -#ifdef CONFIG_SMP | |
25619 | - cpumask_t llc_shared_map; /* cpus sharing the last level cache */ | |
25620 | -#endif | |
25621 | - unsigned char x86_max_cores; /* cpuid returned max cores value */ | |
25622 | - unsigned char apicid; | |
25623 | - unsigned short x86_clflush_size; | |
25624 | -#ifdef CONFIG_SMP | |
25625 | - unsigned char booted_cores; /* number of cores as seen by OS */ | |
25626 | - __u8 phys_proc_id; /* Physical processor id. */ | |
25627 | - __u8 cpu_core_id; /* Core id */ | |
25628 | - __u8 cpu_index; /* index into per_cpu list */ | |
25629 | -#endif | |
25630 | -} __attribute__((__aligned__(SMP_CACHE_BYTES))); | |
25631 | - | |
25632 | -#define X86_VENDOR_INTEL 0 | |
25633 | -#define X86_VENDOR_CYRIX 1 | |
25634 | -#define X86_VENDOR_AMD 2 | |
25635 | -#define X86_VENDOR_UMC 3 | |
25636 | -#define X86_VENDOR_NEXGEN 4 | |
25637 | -#define X86_VENDOR_CENTAUR 5 | |
25638 | -#define X86_VENDOR_TRANSMETA 7 | |
25639 | -#define X86_VENDOR_NSC 8 | |
25640 | -#define X86_VENDOR_NUM 9 | |
25641 | -#define X86_VENDOR_UNKNOWN 0xff | |
25642 | - | |
25643 | -/* | |
25644 | - * capabilities of CPUs | |
25645 | - */ | |
25646 | - | |
25647 | -extern struct cpuinfo_x86 boot_cpu_data; | |
25648 | -extern struct cpuinfo_x86 new_cpu_data; | |
25649 | -#ifndef CONFIG_X86_NO_TSS | |
25650 | -extern struct tss_struct doublefault_tss; | |
25651 | -DECLARE_PER_CPU(struct tss_struct, init_tss); | |
25652 | -#endif | |
25653 | - | |
25654 | -#ifdef CONFIG_SMP | |
25655 | -DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); | |
25656 | -#define cpu_data(cpu) per_cpu(cpu_info, cpu) | |
25657 | -#define current_cpu_data cpu_data(smp_processor_id()) | |
25658 | -#else | |
25659 | -#define cpu_data(cpu) boot_cpu_data | |
25660 | -#define current_cpu_data boot_cpu_data | |
25661 | -#endif | |
25662 | - | |
25663 | -/* | |
25664 | - * the following now lives in the per cpu area: | |
25665 | - * extern int cpu_llc_id[NR_CPUS]; | |
25666 | - */ | |
25667 | -DECLARE_PER_CPU(u8, cpu_llc_id); | |
25668 | -extern char ignore_fpu_irq; | |
25669 | - | |
25670 | -void __init cpu_detect(struct cpuinfo_x86 *c); | |
25671 | - | |
25672 | -extern void identify_boot_cpu(void); | |
25673 | -extern void identify_secondary_cpu(struct cpuinfo_x86 *); | |
25674 | -extern void print_cpu_info(struct cpuinfo_x86 *); | |
25675 | -extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); | |
25676 | -extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); | |
25677 | -extern unsigned short num_cache_leaves; | |
25678 | - | |
25679 | -#ifdef CONFIG_X86_HT | |
25680 | -extern void detect_ht(struct cpuinfo_x86 *c); | |
25681 | -#else | |
25682 | -static inline void detect_ht(struct cpuinfo_x86 *c) {} | |
25683 | -#endif | |
25684 | - | |
25685 | -static inline void xen_cpuid(unsigned int *eax, unsigned int *ebx, | |
25686 | - unsigned int *ecx, unsigned int *edx) | |
25687 | -{ | |
25688 | - /* ecx is often an input as well as an output. */ | |
25689 | - __asm__(XEN_CPUID | |
25690 | - : "=a" (*eax), | |
25691 | - "=b" (*ebx), | |
25692 | - "=c" (*ecx), | |
25693 | - "=d" (*edx) | |
25694 | - : "0" (*eax), "2" (*ecx)); | |
25695 | -} | |
25696 | - | |
25697 | -#define load_cr3(pgdir) write_cr3(__pa(pgdir)) | |
25698 | - | |
25699 | -/* | |
25700 | - * Save the cr4 feature set we're using (ie | |
25701 | - * Pentium 4MB enable and PPro Global page | |
25702 | - * enable), so that any CPU's that boot up | |
25703 | - * after us can get the correct flags. | |
25704 | - */ | |
25705 | -extern unsigned long mmu_cr4_features; | |
25706 | - | |
25707 | -static inline void set_in_cr4 (unsigned long mask) | |
25708 | -{ | |
25709 | - unsigned cr4; | |
25710 | - mmu_cr4_features |= mask; | |
25711 | - cr4 = read_cr4(); | |
25712 | - cr4 |= mask; | |
25713 | - write_cr4(cr4); | |
25714 | -} | |
25715 | - | |
25716 | -static inline void clear_in_cr4 (unsigned long mask) | |
25717 | -{ | |
25718 | - unsigned cr4; | |
25719 | - mmu_cr4_features &= ~mask; | |
25720 | - cr4 = read_cr4(); | |
25721 | - cr4 &= ~mask; | |
25722 | - write_cr4(cr4); | |
25723 | -} | |
25724 | - | |
25725 | -/* Stop speculative execution */ | |
25726 | -static inline void sync_core(void) | |
25727 | -{ | |
25728 | - int tmp; | |
25729 | - asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory"); | |
25730 | -} | |
25731 | - | |
25732 | -static inline void __monitor(const void *eax, unsigned long ecx, | |
25733 | - unsigned long edx) | |
25734 | -{ | |
25735 | - /* "monitor %eax,%ecx,%edx;" */ | |
25736 | - asm volatile( | |
25737 | - ".byte 0x0f,0x01,0xc8;" | |
25738 | - : :"a" (eax), "c" (ecx), "d"(edx)); | |
25739 | -} | |
25740 | - | |
25741 | -static inline void __mwait(unsigned long eax, unsigned long ecx) | |
25742 | -{ | |
25743 | - /* "mwait %eax,%ecx;" */ | |
25744 | - asm volatile( | |
25745 | - ".byte 0x0f,0x01,0xc9;" | |
25746 | - : :"a" (eax), "c" (ecx)); | |
25747 | -} | |
25748 | - | |
25749 | -extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); | |
25750 | - | |
25751 | -/* from system description table in BIOS. Mostly for MCA use, but | |
25752 | -others may find it useful. */ | |
25753 | -extern unsigned int machine_id; | |
25754 | -extern unsigned int machine_submodel_id; | |
25755 | -extern unsigned int BIOS_revision; | |
25756 | -extern unsigned int mca_pentium_flag; | |
25757 | - | |
25758 | -/* Boot loader type from the setup header */ | |
25759 | -extern int bootloader_type; | |
25760 | - | |
25761 | -/* | |
25762 | - * User space process size: 3GB (default). | |
25763 | - */ | |
25764 | -#define TASK_SIZE (PAGE_OFFSET) | |
25765 | - | |
25766 | -/* This decides where the kernel will search for a free chunk of vm | |
25767 | - * space during mmap's. | |
25768 | - */ | |
25769 | -#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) | |
25770 | - | |
25771 | -#define HAVE_ARCH_PICK_MMAP_LAYOUT | |
25772 | - | |
25773 | -extern void hard_disable_TSC(void); | |
25774 | -extern void disable_TSC(void); | |
25775 | -extern void hard_enable_TSC(void); | |
25776 | - | |
25777 | -/* | |
25778 | - * Size of io_bitmap. | |
25779 | - */ | |
25780 | -#define IO_BITMAP_BITS 65536 | |
25781 | -#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) | |
25782 | -#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) | |
25783 | -#ifndef CONFIG_X86_NO_TSS | |
25784 | -#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) | |
25785 | -#endif | |
25786 | -#define INVALID_IO_BITMAP_OFFSET 0x8000 | |
25787 | -#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000 | |
25788 | - | |
25789 | -struct i387_fsave_struct { | |
25790 | - long cwd; | |
25791 | - long swd; | |
25792 | - long twd; | |
25793 | - long fip; | |
25794 | - long fcs; | |
25795 | - long foo; | |
25796 | - long fos; | |
25797 | - long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ | |
25798 | - long status; /* software status information */ | |
25799 | -}; | |
25800 | - | |
25801 | -struct i387_fxsave_struct { | |
25802 | - unsigned short cwd; | |
25803 | - unsigned short swd; | |
25804 | - unsigned short twd; | |
25805 | - unsigned short fop; | |
25806 | - long fip; | |
25807 | - long fcs; | |
25808 | - long foo; | |
25809 | - long fos; | |
25810 | - long mxcsr; | |
25811 | - long mxcsr_mask; | |
25812 | - long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ | |
25813 | - long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ | |
25814 | - long padding[56]; | |
25815 | -} __attribute__ ((aligned (16))); | |
25816 | - | |
25817 | -struct i387_soft_struct { | |
25818 | - long cwd; | |
25819 | - long swd; | |
25820 | - long twd; | |
25821 | - long fip; | |
25822 | - long fcs; | |
25823 | - long foo; | |
25824 | - long fos; | |
25825 | - long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ | |
25826 | - unsigned char ftop, changed, lookahead, no_update, rm, alimit; | |
25827 | - struct info *info; | |
25828 | - unsigned long entry_eip; | |
25829 | -}; | |
25830 | - | |
25831 | -union i387_union { | |
25832 | - struct i387_fsave_struct fsave; | |
25833 | - struct i387_fxsave_struct fxsave; | |
25834 | - struct i387_soft_struct soft; | |
25835 | -}; | |
25836 | - | |
25837 | -typedef struct { | |
25838 | - unsigned long seg; | |
25839 | -} mm_segment_t; | |
25840 | - | |
25841 | -struct thread_struct; | |
25842 | - | |
25843 | -#ifndef CONFIG_X86_NO_TSS | |
25844 | -/* This is the TSS defined by the hardware. */ | |
25845 | -struct i386_hw_tss { | |
25846 | - unsigned short back_link,__blh; | |
25847 | - unsigned long esp0; | |
25848 | - unsigned short ss0,__ss0h; | |
25849 | - unsigned long esp1; | |
25850 | - unsigned short ss1,__ss1h; /* ss1 is used to cache MSR_IA32_SYSENTER_CS */ | |
25851 | - unsigned long esp2; | |
25852 | - unsigned short ss2,__ss2h; | |
25853 | - unsigned long __cr3; | |
25854 | - unsigned long eip; | |
25855 | - unsigned long eflags; | |
25856 | - unsigned long eax,ecx,edx,ebx; | |
25857 | - unsigned long esp; | |
25858 | - unsigned long ebp; | |
25859 | - unsigned long esi; | |
25860 | - unsigned long edi; | |
25861 | - unsigned short es, __esh; | |
25862 | - unsigned short cs, __csh; | |
25863 | - unsigned short ss, __ssh; | |
25864 | - unsigned short ds, __dsh; | |
25865 | - unsigned short fs, __fsh; | |
25866 | - unsigned short gs, __gsh; | |
25867 | - unsigned short ldt, __ldth; | |
25868 | - unsigned short trace, io_bitmap_base; | |
25869 | -} __attribute__((packed)); | |
25870 | - | |
25871 | -struct tss_struct { | |
25872 | - struct i386_hw_tss x86_tss; | |
25873 | - | |
25874 | - /* | |
25875 | - * The extra 1 is there because the CPU will access an | |
25876 | - * additional byte beyond the end of the IO permission | |
25877 | - * bitmap. The extra byte must be all 1 bits, and must | |
25878 | - * be within the limit. | |
25879 | - */ | |
25880 | - unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; | |
25881 | - /* | |
25882 | - * Cache the current maximum and the last task that used the bitmap: | |
25883 | - */ | |
25884 | - unsigned long io_bitmap_max; | |
25885 | - struct thread_struct *io_bitmap_owner; | |
25886 | - /* | |
25887 | - * pads the TSS to be cacheline-aligned (size is 0x100) | |
25888 | - */ | |
25889 | - unsigned long __cacheline_filler[35]; | |
25890 | - /* | |
25891 | - * .. and then another 0x100 bytes for emergency kernel stack | |
25892 | - */ | |
25893 | - unsigned long stack[64]; | |
25894 | -} __attribute__((packed)); | |
25895 | -#endif | |
25896 | - | |
25897 | -#define ARCH_MIN_TASKALIGN 16 | |
25898 | - | |
25899 | -struct thread_struct { | |
25900 | -/* cached TLS descriptors. */ | |
25901 | - struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; | |
25902 | - unsigned long esp0; | |
25903 | - unsigned long sysenter_cs; | |
25904 | - unsigned long eip; | |
25905 | - unsigned long esp; | |
25906 | - unsigned long fs; | |
25907 | - unsigned long gs; | |
25908 | -/* Hardware debugging registers */ | |
25909 | - unsigned long debugreg[8]; /* %%db0-7 debug registers */ | |
25910 | -/* fault info */ | |
25911 | - unsigned long cr2, trap_no, error_code; | |
25912 | -/* floating point info */ | |
25913 | - union i387_union i387; | |
25914 | -/* virtual 86 mode info */ | |
25915 | - struct vm86_struct __user * vm86_info; | |
25916 | - unsigned long screen_bitmap; | |
25917 | - unsigned long v86flags, v86mask, saved_esp0; | |
25918 | - unsigned int saved_fs, saved_gs; | |
25919 | -/* IO permissions */ | |
25920 | - unsigned long *io_bitmap_ptr; | |
25921 | - unsigned long iopl; | |
25922 | -/* max allowed port in the bitmap, in bytes: */ | |
25923 | - unsigned long io_bitmap_max; | |
25924 | -}; | |
25925 | - | |
25926 | -#define INIT_THREAD { \ | |
25927 | - .esp0 = sizeof(init_stack) + (long)&init_stack, \ | |
25928 | - .vm86_info = NULL, \ | |
25929 | - .sysenter_cs = __KERNEL_CS, \ | |
25930 | - .io_bitmap_ptr = NULL, \ | |
25931 | - .fs = __KERNEL_PERCPU, \ | |
25932 | -} | |
25933 | - | |
25934 | -/* | |
25935 | - * Note that the .io_bitmap member must be extra-big. This is because | |
25936 | - * the CPU will access an additional byte beyond the end of the IO | |
25937 | - * permission bitmap. The extra byte must be all 1 bits, and must | |
25938 | - * be within the limit. | |
25939 | - */ | |
25940 | -#define INIT_TSS { \ | |
25941 | - .x86_tss = { \ | |
25942 | - .esp0 = sizeof(init_stack) + (long)&init_stack, \ | |
25943 | - .ss0 = __KERNEL_DS, \ | |
25944 | - .ss1 = __KERNEL_CS, \ | |
25945 | - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ | |
25946 | - }, \ | |
25947 | - .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \ | |
25948 | -} | |
25949 | - | |
25950 | -#define start_thread(regs, new_eip, new_esp) do { \ | |
25951 | - __asm__("movl %0,%%gs": :"r" (0)); \ | |
25952 | - regs->xfs = 0; \ | |
25953 | - set_fs(USER_DS); \ | |
25954 | - regs->xds = __USER_DS; \ | |
25955 | - regs->xes = __USER_DS; \ | |
25956 | - regs->xss = __USER_DS; \ | |
25957 | - regs->xcs = __USER_CS; \ | |
25958 | - regs->eip = new_eip; \ | |
25959 | - regs->esp = new_esp; \ | |
25960 | -} while (0) | |
25961 | - | |
25962 | -/* Forward declaration, a strange C thing */ | |
25963 | -struct task_struct; | |
25964 | -struct mm_struct; | |
25965 | - | |
25966 | -/* Free all resources held by a thread. */ | |
25967 | -extern void release_thread(struct task_struct *); | |
25968 | - | |
25969 | -/* Prepare to copy thread state - unlazy all lazy status */ | |
25970 | -extern void prepare_to_copy(struct task_struct *tsk); | |
25971 | - | |
25972 | -/* | |
25973 | - * create a kernel thread without removing it from tasklists | |
25974 | - */ | |
25975 | -extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); | |
25976 | - | |
25977 | -extern unsigned long thread_saved_pc(struct task_struct *tsk); | |
25978 | -void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack); | |
25979 | - | |
25980 | -unsigned long get_wchan(struct task_struct *p); | |
25981 | - | |
25982 | -#define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long)) | |
25983 | -#define KSTK_TOP(info) \ | |
25984 | -({ \ | |
25985 | - unsigned long *__ptr = (unsigned long *)(info); \ | |
25986 | - (unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \ | |
25987 | -}) | |
25988 | - | |
25989 | -/* | |
25990 | - * The below -8 is to reserve 8 bytes on top of the ring0 stack. | |
25991 | - * This is necessary to guarantee that the entire "struct pt_regs" | |
25992 | - * is accessable even if the CPU haven't stored the SS/ESP registers | |
25993 | - * on the stack (interrupt gate does not save these registers | |
25994 | - * when switching to the same priv ring). | |
25995 | - * Therefore beware: accessing the xss/esp fields of the | |
25996 | - * "struct pt_regs" is possible, but they may contain the | |
25997 | - * completely wrong values. | |
25998 | - */ | |
25999 | -#define task_pt_regs(task) \ | |
26000 | -({ \ | |
26001 | - struct pt_regs *__regs__; \ | |
26002 | - __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \ | |
26003 | - __regs__ - 1; \ | |
26004 | -}) | |
26005 | - | |
26006 | -#define KSTK_EIP(task) (task_pt_regs(task)->eip) | |
26007 | -#define KSTK_ESP(task) (task_pt_regs(task)->esp) | |
26008 | - | |
26009 | - | |
26010 | -struct microcode_header { | |
26011 | - unsigned int hdrver; | |
26012 | - unsigned int rev; | |
26013 | - unsigned int date; | |
26014 | - unsigned int sig; | |
26015 | - unsigned int cksum; | |
26016 | - unsigned int ldrver; | |
26017 | - unsigned int pf; | |
26018 | - unsigned int datasize; | |
26019 | - unsigned int totalsize; | |
26020 | - unsigned int reserved[3]; | |
26021 | -}; | |
26022 | - | |
26023 | -struct microcode { | |
26024 | - struct microcode_header hdr; | |
26025 | - unsigned int bits[0]; | |
26026 | -}; | |
26027 | - | |
26028 | -typedef struct microcode microcode_t; | |
26029 | -typedef struct microcode_header microcode_header_t; | |
26030 | - | |
26031 | -/* microcode format is extended from prescott processors */ | |
26032 | -struct extended_signature { | |
26033 | - unsigned int sig; | |
26034 | - unsigned int pf; | |
26035 | - unsigned int cksum; | |
26036 | -}; | |
26037 | - | |
26038 | -struct extended_sigtable { | |
26039 | - unsigned int count; | |
26040 | - unsigned int cksum; | |
26041 | - unsigned int reserved[3]; | |
26042 | - struct extended_signature sigs[0]; | |
26043 | -}; | |
26044 | - | |
26045 | -/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ | |
26046 | -static inline void rep_nop(void) | |
26047 | -{ | |
26048 | - __asm__ __volatile__("rep;nop": : :"memory"); | |
26049 | -} | |
26050 | - | |
26051 | -#define cpu_relax() rep_nop() | |
26052 | - | |
26053 | -#ifndef CONFIG_X86_NO_TSS | |
26054 | -static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct *thread) | |
26055 | -{ | |
26056 | - tss->x86_tss.esp0 = thread->esp0; | |
26057 | - /* This can only happen when SEP is enabled, no need to test "SEP"arately */ | |
26058 | - if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { | |
26059 | - tss->x86_tss.ss1 = thread->sysenter_cs; | |
26060 | - wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | |
26061 | - } | |
26062 | -} | |
26063 | -#else | |
26064 | -#define xen_load_esp0(tss, thread) do { \ | |
26065 | - if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \ | |
26066 | - BUG(); \ | |
26067 | -} while (0) | |
26068 | -#endif | |
26069 | - | |
26070 | - | |
26071 | -static inline unsigned long xen_get_debugreg(int regno) | |
26072 | -{ | |
26073 | - return HYPERVISOR_get_debugreg(regno); | |
26074 | -} | |
26075 | - | |
26076 | -static inline void xen_set_debugreg(int regno, unsigned long value) | |
26077 | -{ | |
26078 | - WARN_ON(HYPERVISOR_set_debugreg(regno, value)); | |
26079 | -} | |
26080 | - | |
26081 | -/* | |
26082 | - * Set IOPL bits in EFLAGS from given mask | |
26083 | - */ | |
26084 | -static inline void xen_set_iopl_mask(unsigned mask) | |
26085 | -{ | |
26086 | - struct physdev_set_iopl set_iopl; | |
26087 | - | |
26088 | - /* Force the change at ring 0. */ | |
26089 | - set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; | |
26090 | - WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl)); | |
26091 | -} | |
26092 | - | |
26093 | - | |
26094 | -#define paravirt_enabled() 0 | |
26095 | -#define __cpuid xen_cpuid | |
26096 | - | |
26097 | -#define load_esp0 xen_load_esp0 | |
26098 | - | |
26099 | -/* | |
26100 | - * These special macros can be used to get or set a debugging register | |
26101 | - */ | |
26102 | -#define get_debugreg(var, register) \ | |
26103 | - (var) = xen_get_debugreg(register) | |
26104 | -#define set_debugreg(value, register) \ | |
26105 | - xen_set_debugreg(register, value) | |
26106 | - | |
26107 | -#define set_iopl_mask xen_set_iopl_mask | |
26108 | - | |
26109 | -/* | |
26110 | - * Generic CPUID function | |
26111 | - * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx | |
26112 | - * resulting in stale register contents being returned. | |
26113 | - */ | |
26114 | -static inline void cpuid(unsigned int op, | |
26115 | - unsigned int *eax, unsigned int *ebx, | |
26116 | - unsigned int *ecx, unsigned int *edx) | |
26117 | -{ | |
26118 | - *eax = op; | |
26119 | - *ecx = 0; | |
26120 | - __cpuid(eax, ebx, ecx, edx); | |
26121 | -} | |
26122 | - | |
26123 | -/* Some CPUID calls want 'count' to be placed in ecx */ | |
26124 | -static inline void cpuid_count(unsigned int op, int count, | |
26125 | - unsigned int *eax, unsigned int *ebx, | |
26126 | - unsigned int *ecx, unsigned int *edx) | |
26127 | -{ | |
26128 | - *eax = op; | |
26129 | - *ecx = count; | |
26130 | - __cpuid(eax, ebx, ecx, edx); | |
26131 | -} | |
26132 | - | |
26133 | -/* | |
26134 | - * CPUID functions returning a single datum | |
26135 | - */ | |
26136 | -static inline unsigned int cpuid_eax(unsigned int op) | |
26137 | -{ | |
26138 | - unsigned int eax, ebx, ecx, edx; | |
26139 | - | |
26140 | - cpuid(op, &eax, &ebx, &ecx, &edx); | |
26141 | - return eax; | |
26142 | -} | |
26143 | -static inline unsigned int cpuid_ebx(unsigned int op) | |
26144 | -{ | |
26145 | - unsigned int eax, ebx, ecx, edx; | |
26146 | - | |
26147 | - cpuid(op, &eax, &ebx, &ecx, &edx); | |
26148 | - return ebx; | |
26149 | -} | |
26150 | -static inline unsigned int cpuid_ecx(unsigned int op) | |
26151 | -{ | |
26152 | - unsigned int eax, ebx, ecx, edx; | |
26153 | - | |
26154 | - cpuid(op, &eax, &ebx, &ecx, &edx); | |
26155 | - return ecx; | |
26156 | -} | |
26157 | -static inline unsigned int cpuid_edx(unsigned int op) | |
26158 | -{ | |
26159 | - unsigned int eax, ebx, ecx, edx; | |
26160 | - | |
26161 | - cpuid(op, &eax, &ebx, &ecx, &edx); | |
26162 | - return edx; | |
26163 | -} | |
26164 | - | |
26165 | -/* generic versions from gas */ | |
26166 | -#define GENERIC_NOP1 ".byte 0x90\n" | |
26167 | -#define GENERIC_NOP2 ".byte 0x89,0xf6\n" | |
26168 | -#define GENERIC_NOP3 ".byte 0x8d,0x76,0x00\n" | |
26169 | -#define GENERIC_NOP4 ".byte 0x8d,0x74,0x26,0x00\n" | |
26170 | -#define GENERIC_NOP5 GENERIC_NOP1 GENERIC_NOP4 | |
26171 | -#define GENERIC_NOP6 ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n" | |
26172 | -#define GENERIC_NOP7 ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n" | |
26173 | -#define GENERIC_NOP8 GENERIC_NOP1 GENERIC_NOP7 | |
26174 | - | |
26175 | -/* Opteron nops */ | |
26176 | -#define K8_NOP1 GENERIC_NOP1 | |
26177 | -#define K8_NOP2 ".byte 0x66,0x90\n" | |
26178 | -#define K8_NOP3 ".byte 0x66,0x66,0x90\n" | |
26179 | -#define K8_NOP4 ".byte 0x66,0x66,0x66,0x90\n" | |
26180 | -#define K8_NOP5 K8_NOP3 K8_NOP2 | |
26181 | -#define K8_NOP6 K8_NOP3 K8_NOP3 | |
26182 | -#define K8_NOP7 K8_NOP4 K8_NOP3 | |
26183 | -#define K8_NOP8 K8_NOP4 K8_NOP4 | |
26184 | - | |
26185 | -/* K7 nops */ | |
26186 | -/* uses eax dependencies (arbitary choice) */ | |
26187 | -#define K7_NOP1 GENERIC_NOP1 | |
26188 | -#define K7_NOP2 ".byte 0x8b,0xc0\n" | |
26189 | -#define K7_NOP3 ".byte 0x8d,0x04,0x20\n" | |
26190 | -#define K7_NOP4 ".byte 0x8d,0x44,0x20,0x00\n" | |
26191 | -#define K7_NOP5 K7_NOP4 ASM_NOP1 | |
26192 | -#define K7_NOP6 ".byte 0x8d,0x80,0,0,0,0\n" | |
26193 | -#define K7_NOP7 ".byte 0x8D,0x04,0x05,0,0,0,0\n" | |
26194 | -#define K7_NOP8 K7_NOP7 ASM_NOP1 | |
26195 | - | |
26196 | -/* P6 nops */ | |
26197 | -/* uses eax dependencies (Intel-recommended choice) */ | |
26198 | -#define P6_NOP1 GENERIC_NOP1 | |
26199 | -#define P6_NOP2 ".byte 0x66,0x90\n" | |
26200 | -#define P6_NOP3 ".byte 0x0f,0x1f,0x00\n" | |
26201 | -#define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n" | |
26202 | -#define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n" | |
26203 | -#define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n" | |
26204 | -#define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n" | |
26205 | -#define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n" | |
26206 | - | |
26207 | -#ifdef CONFIG_MK8 | |
26208 | -#define ASM_NOP1 K8_NOP1 | |
26209 | -#define ASM_NOP2 K8_NOP2 | |
26210 | -#define ASM_NOP3 K8_NOP3 | |
26211 | -#define ASM_NOP4 K8_NOP4 | |
26212 | -#define ASM_NOP5 K8_NOP5 | |
26213 | -#define ASM_NOP6 K8_NOP6 | |
26214 | -#define ASM_NOP7 K8_NOP7 | |
26215 | -#define ASM_NOP8 K8_NOP8 | |
26216 | -#elif defined(CONFIG_MK7) | |
26217 | -#define ASM_NOP1 K7_NOP1 | |
26218 | -#define ASM_NOP2 K7_NOP2 | |
26219 | -#define ASM_NOP3 K7_NOP3 | |
26220 | -#define ASM_NOP4 K7_NOP4 | |
26221 | -#define ASM_NOP5 K7_NOP5 | |
26222 | -#define ASM_NOP6 K7_NOP6 | |
26223 | -#define ASM_NOP7 K7_NOP7 | |
26224 | -#define ASM_NOP8 K7_NOP8 | |
26225 | -#elif defined(CONFIG_M686) || defined(CONFIG_MPENTIUMII) || \ | |
26226 | - defined(CONFIG_MPENTIUMIII) || defined(CONFIG_MPENTIUMM) || \ | |
26227 | - defined(CONFIG_MCORE2) || defined(CONFIG_PENTIUM4) | |
26228 | -#define ASM_NOP1 P6_NOP1 | |
26229 | -#define ASM_NOP2 P6_NOP2 | |
26230 | -#define ASM_NOP3 P6_NOP3 | |
26231 | -#define ASM_NOP4 P6_NOP4 | |
26232 | -#define ASM_NOP5 P6_NOP5 | |
26233 | -#define ASM_NOP6 P6_NOP6 | |
26234 | -#define ASM_NOP7 P6_NOP7 | |
26235 | -#define ASM_NOP8 P6_NOP8 | |
26236 | -#else | |
26237 | -#define ASM_NOP1 GENERIC_NOP1 | |
26238 | -#define ASM_NOP2 GENERIC_NOP2 | |
26239 | -#define ASM_NOP3 GENERIC_NOP3 | |
26240 | -#define ASM_NOP4 GENERIC_NOP4 | |
26241 | -#define ASM_NOP5 GENERIC_NOP5 | |
26242 | -#define ASM_NOP6 GENERIC_NOP6 | |
26243 | -#define ASM_NOP7 GENERIC_NOP7 | |
26244 | -#define ASM_NOP8 GENERIC_NOP8 | |
26245 | -#endif | |
26246 | - | |
26247 | -#define ASM_NOP_MAX 8 | |
26248 | - | |
26249 | -/* Prefetch instructions for Pentium III and AMD Athlon */ | |
26250 | -/* It's not worth to care about 3dnow! prefetches for the K6 | |
26251 | - because they are microcoded there and very slow. | |
26252 | - However we don't do prefetches for pre XP Athlons currently | |
26253 | - That should be fixed. */ | |
26254 | -#define ARCH_HAS_PREFETCH | |
26255 | -static inline void prefetch(const void *x) | |
26256 | -{ | |
26257 | - alternative_input(ASM_NOP4, | |
26258 | - "prefetchnta (%1)", | |
26259 | - X86_FEATURE_XMM, | |
26260 | - "r" (x)); | |
26261 | -} | |
26262 | - | |
26263 | -#define ARCH_HAS_PREFETCH | |
26264 | -#define ARCH_HAS_PREFETCHW | |
26265 | -#define ARCH_HAS_SPINLOCK_PREFETCH | |
26266 | - | |
26267 | -/* 3dnow! prefetch to get an exclusive cache line. Useful for | |
26268 | - spinlocks to avoid one state transition in the cache coherency protocol. */ | |
26269 | -static inline void prefetchw(const void *x) | |
26270 | -{ | |
26271 | - alternative_input(ASM_NOP4, | |
26272 | - "prefetchw (%1)", | |
26273 | - X86_FEATURE_3DNOW, | |
26274 | - "r" (x)); | |
26275 | -} | |
26276 | -#define spin_lock_prefetch(x) prefetchw(x) | |
26277 | - | |
26278 | -extern void select_idle_routine(const struct cpuinfo_x86 *c); | |
26279 | - | |
26280 | -#define cache_line_size() (boot_cpu_data.x86_cache_alignment) | |
26281 | - | |
26282 | -extern unsigned long boot_option_idle_override; | |
26283 | -extern void enable_sep_cpu(void); | |
26284 | -extern int sysenter_setup(void); | |
26285 | - | |
26286 | -/* Defined in head.S */ | |
26287 | -extern struct Xgt_desc_struct early_gdt_descr; | |
26288 | - | |
26289 | -extern void cpu_set_gdt(int); | |
26290 | -extern void switch_to_new_gdt(void); | |
26291 | -extern void cpu_init(void); | |
26292 | -extern void init_gdt(int cpu); | |
26293 | - | |
26294 | -extern int force_mwait; | |
26295 | - | |
26296 | -#endif /* __ASM_I386_PROCESSOR_H */ | |
26297 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/processor_64.h 2009-02-16 16:18:36.000000000 +0100 | |
26298 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
26299 | @@ -1,461 +0,0 @@ | |
26300 | -/* | |
26301 | - * include/asm-x86_64/processor.h | |
26302 | - * | |
26303 | - * Copyright (C) 1994 Linus Torvalds | |
26304 | - */ | |
26305 | - | |
26306 | -#ifndef __ASM_X86_64_PROCESSOR_H | |
26307 | -#define __ASM_X86_64_PROCESSOR_H | |
26308 | - | |
26309 | -#include <asm/segment.h> | |
26310 | -#include <asm/page.h> | |
26311 | -#include <asm/types.h> | |
26312 | -#include <asm/sigcontext.h> | |
26313 | -#include <asm/cpufeature.h> | |
26314 | -#include <linux/threads.h> | |
26315 | -#include <asm/msr.h> | |
26316 | -#include <asm/current.h> | |
26317 | -#include <asm/system.h> | |
26318 | -#include <asm/mmsegment.h> | |
26319 | -#include <asm/percpu.h> | |
26320 | -#include <linux/personality.h> | |
26321 | -#include <linux/cpumask.h> | |
26322 | -#include <asm/processor-flags.h> | |
26323 | - | |
26324 | -#define TF_MASK 0x00000100 | |
26325 | -#define IF_MASK 0x00000200 | |
26326 | -#define IOPL_MASK 0x00003000 | |
26327 | -#define NT_MASK 0x00004000 | |
26328 | -#define VM_MASK 0x00020000 | |
26329 | -#define AC_MASK 0x00040000 | |
26330 | -#define VIF_MASK 0x00080000 /* virtual interrupt flag */ | |
26331 | -#define VIP_MASK 0x00100000 /* virtual interrupt pending */ | |
26332 | -#define ID_MASK 0x00200000 | |
26333 | - | |
26334 | -#define desc_empty(desc) \ | |
26335 | - (!((desc)->a | (desc)->b)) | |
26336 | - | |
26337 | -#define desc_equal(desc1, desc2) \ | |
26338 | - (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b)) | |
26339 | - | |
26340 | -/* | |
26341 | - * Default implementation of macro that returns current | |
26342 | - * instruction pointer ("program counter"). | |
26343 | - */ | |
26344 | -#define current_text_addr() ({ void *pc; asm volatile("leaq 1f(%%rip),%0\n1:":"=r"(pc)); pc; }) | |
26345 | - | |
26346 | -/* | |
26347 | - * CPU type and hardware bug flags. Kept separately for each CPU. | |
26348 | - */ | |
26349 | - | |
26350 | -struct cpuinfo_x86 { | |
26351 | - __u8 x86; /* CPU family */ | |
26352 | - __u8 x86_vendor; /* CPU vendor */ | |
26353 | - __u8 x86_model; | |
26354 | - __u8 x86_mask; | |
26355 | - int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ | |
26356 | - __u32 x86_capability[NCAPINTS]; | |
26357 | - char x86_vendor_id[16]; | |
26358 | - char x86_model_id[64]; | |
26359 | - int x86_cache_size; /* in KB */ | |
26360 | - int x86_clflush_size; | |
26361 | - int x86_cache_alignment; | |
26362 | - int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/ | |
26363 | - __u8 x86_virt_bits, x86_phys_bits; | |
26364 | - __u8 x86_max_cores; /* cpuid returned max cores value */ | |
26365 | - __u32 x86_power; | |
26366 | - __u32 extended_cpuid_level; /* Max extended CPUID function supported */ | |
26367 | - unsigned long loops_per_jiffy; | |
26368 | -#ifdef CONFIG_SMP | |
26369 | - cpumask_t llc_shared_map; /* cpus sharing the last level cache */ | |
26370 | -#endif | |
26371 | - __u8 apicid; | |
26372 | -#ifdef CONFIG_SMP | |
26373 | - __u8 booted_cores; /* number of cores as seen by OS */ | |
26374 | - __u8 phys_proc_id; /* Physical Processor id. */ | |
26375 | - __u8 cpu_core_id; /* Core id. */ | |
26376 | - __u8 cpu_index; /* index into per_cpu list */ | |
26377 | -#endif | |
26378 | -} ____cacheline_aligned; | |
26379 | - | |
26380 | -#define X86_VENDOR_INTEL 0 | |
26381 | -#define X86_VENDOR_CYRIX 1 | |
26382 | -#define X86_VENDOR_AMD 2 | |
26383 | -#define X86_VENDOR_UMC 3 | |
26384 | -#define X86_VENDOR_NEXGEN 4 | |
26385 | -#define X86_VENDOR_CENTAUR 5 | |
26386 | -#define X86_VENDOR_TRANSMETA 7 | |
26387 | -#define X86_VENDOR_NUM 8 | |
26388 | -#define X86_VENDOR_UNKNOWN 0xff | |
26389 | - | |
26390 | -#ifdef CONFIG_SMP | |
26391 | -DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); | |
26392 | -#define cpu_data(cpu) per_cpu(cpu_info, cpu) | |
26393 | -#define current_cpu_data cpu_data(smp_processor_id()) | |
26394 | -#else | |
26395 | -#define cpu_data(cpu) boot_cpu_data | |
26396 | -#define current_cpu_data boot_cpu_data | |
26397 | -#endif | |
26398 | - | |
26399 | -extern char ignore_irq13; | |
26400 | - | |
26401 | -extern void identify_cpu(struct cpuinfo_x86 *); | |
26402 | -extern void print_cpu_info(struct cpuinfo_x86 *); | |
26403 | -extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); | |
26404 | -extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); | |
26405 | -extern unsigned short num_cache_leaves; | |
26406 | - | |
26407 | -/* | |
26408 | - * Save the cr4 feature set we're using (ie | |
26409 | - * Pentium 4MB enable and PPro Global page | |
26410 | - * enable), so that any CPU's that boot up | |
26411 | - * after us can get the correct flags. | |
26412 | - */ | |
26413 | -extern unsigned long mmu_cr4_features; | |
26414 | - | |
26415 | -static inline void set_in_cr4 (unsigned long mask) | |
26416 | -{ | |
26417 | - mmu_cr4_features |= mask; | |
26418 | - __asm__("movq %%cr4,%%rax\n\t" | |
26419 | - "orq %0,%%rax\n\t" | |
26420 | - "movq %%rax,%%cr4\n" | |
26421 | - : : "irg" (mask) | |
26422 | - :"ax"); | |
26423 | -} | |
26424 | - | |
26425 | -static inline void clear_in_cr4 (unsigned long mask) | |
26426 | -{ | |
26427 | - mmu_cr4_features &= ~mask; | |
26428 | - __asm__("movq %%cr4,%%rax\n\t" | |
26429 | - "andq %0,%%rax\n\t" | |
26430 | - "movq %%rax,%%cr4\n" | |
26431 | - : : "irg" (~mask) | |
26432 | - :"ax"); | |
26433 | -} | |
26434 | - | |
26435 | - | |
26436 | -/* | |
26437 | - * User space process size. 47bits minus one guard page. | |
26438 | - */ | |
26439 | -#define TASK_SIZE64 (0x800000000000UL - 4096) | |
26440 | - | |
26441 | -/* This decides where the kernel will search for a free chunk of vm | |
26442 | - * space during mmap's. | |
26443 | - */ | |
26444 | -#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? 0xc0000000 : 0xFFFFe000) | |
26445 | - | |
26446 | -#define TASK_SIZE (test_thread_flag(TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE64) | |
26447 | -#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? IA32_PAGE_OFFSET : TASK_SIZE64) | |
26448 | - | |
26449 | -#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE/3) | |
26450 | - | |
26451 | -/* | |
26452 | - * Size of io_bitmap. | |
26453 | - */ | |
26454 | -#define IO_BITMAP_BITS 65536 | |
26455 | -#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) | |
26456 | -#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) | |
26457 | -#ifndef CONFIG_X86_NO_TSS | |
26458 | -#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) | |
26459 | -#endif | |
26460 | -#define INVALID_IO_BITMAP_OFFSET 0x8000 | |
26461 | - | |
26462 | -struct i387_fxsave_struct { | |
26463 | - u16 cwd; | |
26464 | - u16 swd; | |
26465 | - u16 twd; | |
26466 | - u16 fop; | |
26467 | - u64 rip; | |
26468 | - u64 rdp; | |
26469 | - u32 mxcsr; | |
26470 | - u32 mxcsr_mask; | |
26471 | - u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ | |
26472 | - u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ | |
26473 | - u32 padding[24]; | |
26474 | -} __attribute__ ((aligned (16))); | |
26475 | - | |
26476 | -union i387_union { | |
26477 | - struct i387_fxsave_struct fxsave; | |
26478 | -}; | |
26479 | - | |
26480 | -#ifndef CONFIG_X86_NO_TSS | |
26481 | -struct tss_struct { | |
26482 | - u32 reserved1; | |
26483 | - u64 rsp0; | |
26484 | - u64 rsp1; | |
26485 | - u64 rsp2; | |
26486 | - u64 reserved2; | |
26487 | - u64 ist[7]; | |
26488 | - u32 reserved3; | |
26489 | - u32 reserved4; | |
26490 | - u16 reserved5; | |
26491 | - u16 io_bitmap_base; | |
26492 | - /* | |
26493 | - * The extra 1 is there because the CPU will access an | |
26494 | - * additional byte beyond the end of the IO permission | |
26495 | - * bitmap. The extra byte must be all 1 bits, and must | |
26496 | - * be within the limit. Thus we have: | |
26497 | - * | |
26498 | - * 128 bytes, the bitmap itself, for ports 0..0x3ff | |
26499 | - * 8 bytes, for an extra "long" of ~0UL | |
26500 | - */ | |
26501 | - unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; | |
26502 | -} __attribute__((packed)) ____cacheline_aligned; | |
26503 | - | |
26504 | -DECLARE_PER_CPU(struct tss_struct,init_tss); | |
26505 | -#endif | |
26506 | - | |
26507 | - | |
26508 | -extern struct cpuinfo_x86 boot_cpu_data; | |
26509 | -#ifndef CONFIG_X86_NO_TSS | |
26510 | -/* Save the original ist values for checking stack pointers during debugging */ | |
26511 | -struct orig_ist { | |
26512 | - unsigned long ist[7]; | |
26513 | -}; | |
26514 | -DECLARE_PER_CPU(struct orig_ist, orig_ist); | |
26515 | -#endif | |
26516 | - | |
26517 | -#ifdef CONFIG_X86_VSMP | |
26518 | -#define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT) | |
26519 | -#define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT) | |
26520 | -#else | |
26521 | -#define ARCH_MIN_TASKALIGN 16 | |
26522 | -#define ARCH_MIN_MMSTRUCT_ALIGN 0 | |
26523 | -#endif | |
26524 | - | |
26525 | -struct thread_struct { | |
26526 | - unsigned long rsp0; | |
26527 | - unsigned long rsp; | |
26528 | - unsigned long userrsp; /* Copy from PDA */ | |
26529 | - unsigned long fs; | |
26530 | - unsigned long gs; | |
26531 | - unsigned short es, ds, fsindex, gsindex; | |
26532 | -/* Hardware debugging registers */ | |
26533 | - unsigned long debugreg0; | |
26534 | - unsigned long debugreg1; | |
26535 | - unsigned long debugreg2; | |
26536 | - unsigned long debugreg3; | |
26537 | - unsigned long debugreg6; | |
26538 | - unsigned long debugreg7; | |
26539 | -/* fault info */ | |
26540 | - unsigned long cr2, trap_no, error_code; | |
26541 | -/* floating point info */ | |
26542 | - union i387_union i387 __attribute__((aligned(16))); | |
26543 | -/* IO permissions. the bitmap could be moved into the GDT, that would make | |
26544 | - switch faster for a limited number of ioperm using tasks. -AK */ | |
26545 | - int ioperm; | |
26546 | - unsigned long *io_bitmap_ptr; | |
26547 | - unsigned io_bitmap_max; | |
26548 | -/* cached TLS descriptors. */ | |
26549 | - u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; | |
26550 | - unsigned int iopl; | |
26551 | -} __attribute__((aligned(16))); | |
26552 | - | |
26553 | -#define INIT_THREAD { \ | |
26554 | - .rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \ | |
26555 | -} | |
26556 | - | |
26557 | -#ifndef CONFIG_X86_NO_TSS | |
26558 | -#define INIT_TSS { \ | |
26559 | - .rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \ | |
26560 | -} | |
26561 | -#endif | |
26562 | - | |
26563 | -#define INIT_MMAP \ | |
26564 | -{ &init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, NULL, NULL } | |
26565 | - | |
26566 | -#define start_thread(regs,new_rip,new_rsp) do { \ | |
26567 | - asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0)); \ | |
26568 | - load_gs_index(0); \ | |
26569 | - (regs)->rip = (new_rip); \ | |
26570 | - (regs)->rsp = (new_rsp); \ | |
26571 | - write_pda(oldrsp, (new_rsp)); \ | |
26572 | - (regs)->cs = __USER_CS; \ | |
26573 | - (regs)->ss = __USER_DS; \ | |
26574 | - (regs)->eflags = 0x200; \ | |
26575 | - set_fs(USER_DS); \ | |
26576 | -} while(0) | |
26577 | - | |
26578 | -#define get_debugreg(var, register) \ | |
26579 | - var = HYPERVISOR_get_debugreg(register) | |
26580 | -#define set_debugreg(value, register) do { \ | |
26581 | - if (HYPERVISOR_set_debugreg(register, value)) \ | |
26582 | - BUG(); \ | |
26583 | -} while (0) | |
26584 | - | |
26585 | -struct task_struct; | |
26586 | -struct mm_struct; | |
26587 | - | |
26588 | -/* Free all resources held by a thread. */ | |
26589 | -extern void release_thread(struct task_struct *); | |
26590 | - | |
26591 | -/* Prepare to copy thread state - unlazy all lazy status */ | |
26592 | -extern void prepare_to_copy(struct task_struct *tsk); | |
cc90b958 BS |
26593 | - |
26594 | -/* | |
00e5a55c | 26595 | - * create a kernel thread without removing it from tasklists |
cc90b958 | 26596 | - */ |
00e5a55c | 26597 | -extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); |
cc90b958 | 26598 | - |
00e5a55c BS |
26599 | -/* |
26600 | - * Return saved PC of a blocked thread. | |
26601 | - * What is this good for? it will be always the scheduler or ret_from_fork. | |
26602 | - */ | |
26603 | -#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.rsp - 8)) | |
cc90b958 | 26604 | - |
00e5a55c BS |
26605 | -extern unsigned long get_wchan(struct task_struct *p); |
26606 | -#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.rsp0 - 1) | |
26607 | -#define KSTK_EIP(tsk) (task_pt_regs(tsk)->rip) | |
26608 | -#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */ | |
cc90b958 | 26609 | - |
cc90b958 | 26610 | - |
00e5a55c BS |
26611 | -struct microcode_header { |
26612 | - unsigned int hdrver; | |
26613 | - unsigned int rev; | |
26614 | - unsigned int date; | |
26615 | - unsigned int sig; | |
26616 | - unsigned int cksum; | |
26617 | - unsigned int ldrver; | |
26618 | - unsigned int pf; | |
26619 | - unsigned int datasize; | |
26620 | - unsigned int totalsize; | |
26621 | - unsigned int reserved[3]; | |
26622 | -}; | |
cc90b958 | 26623 | - |
00e5a55c BS |
26624 | -struct microcode { |
26625 | - struct microcode_header hdr; | |
26626 | - unsigned int bits[0]; | |
26627 | -}; | |
cc90b958 | 26628 | - |
00e5a55c BS |
26629 | -typedef struct microcode microcode_t; |
26630 | -typedef struct microcode_header microcode_header_t; | |
cc90b958 | 26631 | - |
00e5a55c BS |
26632 | -/* microcode format is extended from prescott processors */ |
26633 | -struct extended_signature { | |
26634 | - unsigned int sig; | |
26635 | - unsigned int pf; | |
26636 | - unsigned int cksum; | |
26637 | -}; | |
cc90b958 | 26638 | - |
00e5a55c BS |
26639 | -struct extended_sigtable { |
26640 | - unsigned int count; | |
26641 | - unsigned int cksum; | |
26642 | - unsigned int reserved[3]; | |
26643 | - struct extended_signature sigs[0]; | |
26644 | -}; | |
cc90b958 | 26645 | - |
cc90b958 | 26646 | - |
00e5a55c BS |
26647 | -#if defined(CONFIG_MPSC) || defined(CONFIG_MCORE2) |
26648 | -#define ASM_NOP1 P6_NOP1 | |
26649 | -#define ASM_NOP2 P6_NOP2 | |
26650 | -#define ASM_NOP3 P6_NOP3 | |
26651 | -#define ASM_NOP4 P6_NOP4 | |
26652 | -#define ASM_NOP5 P6_NOP5 | |
26653 | -#define ASM_NOP6 P6_NOP6 | |
26654 | -#define ASM_NOP7 P6_NOP7 | |
26655 | -#define ASM_NOP8 P6_NOP8 | |
cc90b958 | 26656 | -#else |
00e5a55c BS |
26657 | -#define ASM_NOP1 K8_NOP1 |
26658 | -#define ASM_NOP2 K8_NOP2 | |
26659 | -#define ASM_NOP3 K8_NOP3 | |
26660 | -#define ASM_NOP4 K8_NOP4 | |
26661 | -#define ASM_NOP5 K8_NOP5 | |
26662 | -#define ASM_NOP6 K8_NOP6 | |
26663 | -#define ASM_NOP7 K8_NOP7 | |
26664 | -#define ASM_NOP8 K8_NOP8 | |
cc90b958 BS |
26665 | -#endif |
26666 | - | |
00e5a55c BS |
26667 | -/* Opteron nops */ |
26668 | -#define K8_NOP1 ".byte 0x90\n" | |
26669 | -#define K8_NOP2 ".byte 0x66,0x90\n" | |
26670 | -#define K8_NOP3 ".byte 0x66,0x66,0x90\n" | |
26671 | -#define K8_NOP4 ".byte 0x66,0x66,0x66,0x90\n" | |
26672 | -#define K8_NOP5 K8_NOP3 K8_NOP2 | |
26673 | -#define K8_NOP6 K8_NOP3 K8_NOP3 | |
26674 | -#define K8_NOP7 K8_NOP4 K8_NOP3 | |
26675 | -#define K8_NOP8 K8_NOP4 K8_NOP4 | |
cc90b958 | 26676 | - |
00e5a55c BS |
26677 | -/* P6 nops */ |
26678 | -/* uses eax dependencies (Intel-recommended choice) */ | |
26679 | -#define P6_NOP1 ".byte 0x90\n" | |
26680 | -#define P6_NOP2 ".byte 0x66,0x90\n" | |
26681 | -#define P6_NOP3 ".byte 0x0f,0x1f,0x00\n" | |
26682 | -#define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n" | |
26683 | -#define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n" | |
26684 | -#define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n" | |
26685 | -#define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n" | |
26686 | -#define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n" | |
cc90b958 | 26687 | - |
00e5a55c | 26688 | -#define ASM_NOP_MAX 8 |
cc90b958 | 26689 | - |
00e5a55c BS |
26690 | -/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ |
26691 | -static inline void rep_nop(void) | |
26692 | -{ | |
26693 | - __asm__ __volatile__("rep;nop": : :"memory"); | |
26694 | -} | |
cc90b958 | 26695 | - |
00e5a55c BS |
26696 | -/* Stop speculative execution */ |
26697 | -static inline void sync_core(void) | |
26698 | -{ | |
26699 | - int tmp; | |
26700 | - asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory"); | |
26701 | -} | |
cc90b958 | 26702 | - |
00e5a55c BS |
26703 | -#define ARCH_HAS_PREFETCHW 1 |
26704 | -static inline void prefetchw(void *x) | |
26705 | -{ | |
26706 | - alternative_input("prefetcht0 (%1)", | |
26707 | - "prefetchw (%1)", | |
26708 | - X86_FEATURE_3DNOW, | |
26709 | - "r" (x)); | |
26710 | -} | |
cc90b958 | 26711 | - |
00e5a55c | 26712 | -#define ARCH_HAS_SPINLOCK_PREFETCH 1 |
cc90b958 | 26713 | - |
00e5a55c | 26714 | -#define spin_lock_prefetch(x) prefetchw(x) |
cc90b958 | 26715 | - |
00e5a55c | 26716 | -#define cpu_relax() rep_nop() |
cc90b958 | 26717 | - |
00e5a55c BS |
26718 | -static inline void __monitor(const void *eax, unsigned long ecx, |
26719 | - unsigned long edx) | |
26720 | -{ | |
26721 | - /* "monitor %eax,%ecx,%edx;" */ | |
26722 | - asm volatile( | |
26723 | - ".byte 0x0f,0x01,0xc8;" | |
26724 | - : :"a" (eax), "c" (ecx), "d"(edx)); | |
26725 | -} | |
cc90b958 | 26726 | - |
00e5a55c BS |
26727 | -static inline void __mwait(unsigned long eax, unsigned long ecx) |
26728 | -{ | |
26729 | - /* "mwait %eax,%ecx;" */ | |
26730 | - asm volatile( | |
26731 | - ".byte 0x0f,0x01,0xc9;" | |
26732 | - : :"a" (eax), "c" (ecx)); | |
26733 | -} | |
cc90b958 | 26734 | - |
00e5a55c BS |
26735 | -static inline void __sti_mwait(unsigned long eax, unsigned long ecx) |
26736 | -{ | |
26737 | - /* "mwait %eax,%ecx;" */ | |
26738 | - asm volatile( | |
26739 | - "sti; .byte 0x0f,0x01,0xc9;" | |
26740 | - : :"a" (eax), "c" (ecx)); | |
26741 | -} | |
cc90b958 | 26742 | - |
00e5a55c | 26743 | -extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); |
cc90b958 | 26744 | - |
00e5a55c BS |
26745 | -#define stack_current() \ |
26746 | -({ \ | |
26747 | - struct thread_info *ti; \ | |
26748 | - asm("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \ | |
26749 | - ti->task; \ | |
26750 | -}) | |
26751 | - | |
26752 | -#define cache_line_size() (boot_cpu_data.x86_cache_alignment) | |
26753 | - | |
26754 | -extern unsigned long boot_option_idle_override; | |
26755 | -/* Boot loader type from the setup header */ | |
26756 | -extern int bootloader_type; | |
cc90b958 | 26757 | - |
00e5a55c | 26758 | -#define HAVE_ARCH_PICK_MMAP_LAYOUT 1 |
cc90b958 | 26759 | - |
00e5a55c BS |
26760 | -#endif /* __ASM_X86_64_PROCESSOR_H */ |
26761 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/segment.h 2009-02-16 16:18:36.000000000 +0100 | |
26762 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/segment.h 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
26763 | @@ -1,5 +1,204 @@ |
26764 | +#ifndef _ASM_X86_SEGMENT_H_ | |
26765 | +#define _ASM_X86_SEGMENT_H_ | |
26766 | + | |
26767 | +/* Simple and small GDT entries for booting only */ | |
26768 | + | |
26769 | +#define GDT_ENTRY_BOOT_CS 2 | |
26770 | +#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8) | |
26771 | + | |
26772 | +#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1) | |
26773 | +#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8) | |
26774 | + | |
26775 | +#define GDT_ENTRY_BOOT_TSS (GDT_ENTRY_BOOT_CS + 2) | |
26776 | +#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS * 8) | |
26777 | + | |
26778 | #ifdef CONFIG_X86_32 | |
26779 | -# include "segment_32.h" | |
26780 | +/* | |
26781 | + * The layout of the per-CPU GDT under Linux: | |
26782 | + * | |
26783 | + * 0 - null | |
26784 | + * 1 - reserved | |
26785 | + * 2 - reserved | |
26786 | + * 3 - reserved | |
26787 | + * | |
26788 | + * 4 - unused <==== new cacheline | |
26789 | + * 5 - unused | |
26790 | + * | |
26791 | + * ------- start of TLS (Thread-Local Storage) segments: | |
26792 | + * | |
26793 | + * 6 - TLS segment #1 [ glibc's TLS segment ] | |
26794 | + * 7 - TLS segment #2 [ Wine's %fs Win32 segment ] | |
26795 | + * 8 - TLS segment #3 | |
26796 | + * 9 - reserved | |
26797 | + * 10 - reserved | |
26798 | + * 11 - reserved | |
26799 | + * | |
26800 | + * ------- start of kernel segments: | |
26801 | + * | |
26802 | + * 12 - kernel code segment <==== new cacheline | |
26803 | + * 13 - kernel data segment | |
26804 | + * 14 - default user CS | |
26805 | + * 15 - default user DS | |
26806 | + * 16 - TSS | |
26807 | + * 17 - LDT | |
26808 | + * 18 - PNPBIOS support (16->32 gate) | |
26809 | + * 19 - PNPBIOS support | |
26810 | + * 20 - PNPBIOS support | |
26811 | + * 21 - PNPBIOS support | |
26812 | + * 22 - PNPBIOS support | |
26813 | + * 23 - APM BIOS support | |
26814 | + * 24 - APM BIOS support | |
26815 | + * 25 - APM BIOS support | |
26816 | + * | |
26817 | + * 26 - ESPFIX small SS | |
26818 | + * 27 - per-cpu [ offset to per-cpu data area ] | |
26819 | + * 28 - unused | |
26820 | + * 29 - unused | |
26821 | + * 30 - unused | |
26822 | + * 31 - TSS for double fault handler | |
26823 | + */ | |
26824 | +#define GDT_ENTRY_TLS_MIN 6 | |
26825 | +#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) | |
26826 | + | |
26827 | +#define GDT_ENTRY_DEFAULT_USER_CS 14 | |
26828 | +#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3) | |
26829 | + | |
26830 | +#define GDT_ENTRY_DEFAULT_USER_DS 15 | |
26831 | +#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3) | |
26832 | + | |
26833 | +#define GDT_ENTRY_KERNEL_BASE 12 | |
26834 | + | |
26835 | +#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0) | |
26836 | +#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) | |
26837 | + | |
26838 | +#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1) | |
26839 | +#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) | |
26840 | + | |
26841 | +#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4) | |
26842 | +#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5) | |
26843 | + | |
26844 | +#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 6) | |
26845 | +#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 11) | |
26846 | + | |
26847 | +#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14) | |
26848 | +#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) | |
26849 | + | |
26850 | +#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15) | |
26851 | +#ifdef CONFIG_SMP | |
26852 | +#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) | |
26853 | #else | |
26854 | -# include "../../segment_64.h" | |
26855 | +#define __KERNEL_PERCPU 0 | |
26856 | +#endif | |
26857 | + | |
26858 | +#define GDT_ENTRY_DOUBLEFAULT_TSS 31 | |
26859 | + | |
26860 | +/* | |
26861 | + * The GDT has 32 entries | |
26862 | + */ | |
26863 | +#define GDT_ENTRIES 32 | |
26864 | + | |
26865 | +/* The PnP BIOS entries in the GDT */ | |
26866 | +#define GDT_ENTRY_PNPBIOS_CS32 (GDT_ENTRY_PNPBIOS_BASE + 0) | |
26867 | +#define GDT_ENTRY_PNPBIOS_CS16 (GDT_ENTRY_PNPBIOS_BASE + 1) | |
26868 | +#define GDT_ENTRY_PNPBIOS_DS (GDT_ENTRY_PNPBIOS_BASE + 2) | |
26869 | +#define GDT_ENTRY_PNPBIOS_TS1 (GDT_ENTRY_PNPBIOS_BASE + 3) | |
26870 | +#define GDT_ENTRY_PNPBIOS_TS2 (GDT_ENTRY_PNPBIOS_BASE + 4) | |
26871 | + | |
26872 | +/* The PnP BIOS selectors */ | |
26873 | +#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32 * 8) /* segment for calling fn */ | |
26874 | +#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16 * 8) /* code segment for BIOS */ | |
26875 | +#define PNP_DS (GDT_ENTRY_PNPBIOS_DS * 8) /* data segment for BIOS */ | |
26876 | +#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */ | |
26877 | +#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */ | |
26878 | + | |
26879 | +/* Bottom two bits of selector give the ring privilege level */ | |
26880 | +#define SEGMENT_RPL_MASK 0x3 | |
26881 | +/* Bit 2 is table indicator (LDT/GDT) */ | |
26882 | +#define SEGMENT_TI_MASK 0x4 | |
26883 | + | |
26884 | +/* User mode is privilege level 3 */ | |
26885 | +#define USER_RPL 0x3 | |
26886 | +/* LDT segment has TI set, GDT has it cleared */ | |
26887 | +#define SEGMENT_LDT 0x4 | |
26888 | +#define SEGMENT_GDT 0x0 | |
26889 | + | |
26890 | +/* | |
26891 | + * Matching rules for certain types of segments. | |
26892 | + */ | |
26893 | + | |
26894 | +/* Matches only __KERNEL_CS, ignoring PnP / USER / APM segments */ | |
26895 | +#define SEGMENT_IS_KERNEL_CODE(x) (((x) & ~3) == GDT_ENTRY_KERNEL_CS * 8 \ | |
26896 | + || ((x) & ~3) == (FLAT_KERNEL_CS & ~3)) | |
26897 | + | |
26898 | +/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */ | |
26899 | +#define SEGMENT_IS_FLAT_CODE(x) (((x) & ~0x13) == GDT_ENTRY_KERNEL_CS * 8 \ | |
26900 | + || ((x) & ~3) == (FLAT_KERNEL_CS & ~3) \ | |
26901 | + || ((x) & ~3) == (FLAT_USER_CS & ~3)) | |
26902 | + | |
26903 | +/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ | |
26904 | +#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) | |
26905 | + | |
26906 | +#define get_kernel_rpl() (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) | |
26907 | + | |
26908 | +#else | |
26909 | +#include <asm/cache.h> | |
26910 | + | |
26911 | +#define __KERNEL_CS 0x10 | |
26912 | +#define __KERNEL_DS 0x18 | |
26913 | + | |
26914 | +#define __KERNEL32_CS 0x08 | |
26915 | + | |
26916 | +/* | |
26917 | + * we cannot use the same code segment descriptor for user and kernel | |
26918 | + * -- not even in the long flat mode, because of different DPL /kkeil | |
26919 | + * The segment offset needs to contain a RPL. Grr. -AK | |
26920 | + * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) | |
26921 | + */ | |
26922 | + | |
26923 | +#define __USER32_CS 0x23 /* 4*8+3 */ | |
26924 | +#define __USER_DS 0x2b /* 5*8+3 */ | |
26925 | +#define __USER_CS 0x33 /* 6*8+3 */ | |
26926 | +#define __USER32_DS __USER_DS | |
26927 | + | |
26928 | +#define GDT_ENTRY_TSS 8 /* needs two entries */ | |
26929 | +#define GDT_ENTRY_LDT 10 /* needs two entries */ | |
26930 | +#define GDT_ENTRY_TLS_MIN 12 | |
26931 | +#define GDT_ENTRY_TLS_MAX 14 | |
26932 | + | |
26933 | +#define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */ | |
26934 | +#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3) | |
26935 | + | |
26936 | +/* TLS indexes for 64bit - hardcoded in arch_prctl */ | |
26937 | +#define FS_TLS 0 | |
26938 | +#define GS_TLS 1 | |
26939 | + | |
26940 | +#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3) | |
26941 | +#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3) | |
26942 | + | |
26943 | +#define GDT_ENTRIES 16 | |
26944 | + | |
26945 | +#endif | |
26946 | + | |
26947 | +/* User mode is privilege level 3 */ | |
26948 | +#define USER_RPL 0x3 | |
26949 | +/* LDT segment has TI set, GDT has it cleared */ | |
26950 | +#define SEGMENT_LDT 0x4 | |
26951 | +#define SEGMENT_GDT 0x0 | |
26952 | + | |
26953 | +/* Bottom two bits of selector give the ring privilege level */ | |
26954 | +#define SEGMENT_RPL_MASK 0x3 | |
26955 | +/* Bit 2 is table indicator (LDT/GDT) */ | |
26956 | +#define SEGMENT_TI_MASK 0x4 | |
26957 | + | |
26958 | +#define IDT_ENTRIES 256 | |
26959 | +#define GDT_SIZE (GDT_ENTRIES * 8) | |
26960 | +#define GDT_ENTRY_TLS_ENTRIES 3 | |
26961 | +#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) | |
26962 | + | |
26963 | +#ifdef __KERNEL__ | |
26964 | +#ifndef __ASSEMBLY__ | |
26965 | +extern const char early_idt_handlers[IDT_ENTRIES][10]; | |
26966 | +#endif | |
26967 | +#endif | |
26968 | + | |
26969 | #endif | |
00e5a55c BS |
26970 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/segment_32.h 2008-12-15 11:27:22.000000000 +0100 |
26971 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
26972 | @@ -1,150 +0,0 @@ | |
26973 | -#ifndef _ASM_SEGMENT_H | |
26974 | -#define _ASM_SEGMENT_H | |
26975 | - | |
26976 | -/* | |
26977 | - * The layout of the per-CPU GDT under Linux: | |
26978 | - * | |
26979 | - * 0 - null | |
26980 | - * 1 - reserved | |
26981 | - * 2 - reserved | |
26982 | - * 3 - reserved | |
26983 | - * | |
26984 | - * 4 - unused <==== new cacheline | |
26985 | - * 5 - unused | |
26986 | - * | |
26987 | - * ------- start of TLS (Thread-Local Storage) segments: | |
26988 | - * | |
26989 | - * 6 - TLS segment #1 [ glibc's TLS segment ] | |
26990 | - * 7 - TLS segment #2 [ Wine's %fs Win32 segment ] | |
26991 | - * 8 - TLS segment #3 | |
26992 | - * 9 - reserved | |
26993 | - * 10 - reserved | |
26994 | - * 11 - reserved | |
26995 | - * | |
26996 | - * ------- start of kernel segments: | |
26997 | - * | |
26998 | - * 12 - kernel code segment <==== new cacheline | |
26999 | - * 13 - kernel data segment | |
27000 | - * 14 - default user CS | |
27001 | - * 15 - default user DS | |
27002 | - * 16 - TSS | |
27003 | - * 17 - LDT | |
27004 | - * 18 - PNPBIOS support (16->32 gate) | |
27005 | - * 19 - PNPBIOS support | |
27006 | - * 20 - PNPBIOS support | |
27007 | - * 21 - PNPBIOS support | |
27008 | - * 22 - PNPBIOS support | |
27009 | - * 23 - APM BIOS support | |
27010 | - * 24 - APM BIOS support | |
27011 | - * 25 - APM BIOS support | |
27012 | - * | |
27013 | - * 26 - ESPFIX small SS | |
27014 | - * 27 - per-cpu [ offset to per-cpu data area ] | |
27015 | - * 28 - unused | |
27016 | - * 29 - unused | |
27017 | - * 30 - unused | |
27018 | - * 31 - TSS for double fault handler | |
27019 | - */ | |
27020 | -#define GDT_ENTRY_TLS_ENTRIES 3 | |
27021 | -#define GDT_ENTRY_TLS_MIN 6 | |
27022 | -#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) | |
27023 | - | |
27024 | -#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) | |
27025 | - | |
27026 | -#define GDT_ENTRY_DEFAULT_USER_CS 14 | |
27027 | -#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3) | |
27028 | - | |
27029 | -#define GDT_ENTRY_DEFAULT_USER_DS 15 | |
27030 | -#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3) | |
27031 | - | |
27032 | -#define GDT_ENTRY_KERNEL_BASE 12 | |
27033 | - | |
27034 | -#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0) | |
27035 | -#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) | |
27036 | - | |
27037 | -#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1) | |
27038 | -#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) | |
27039 | - | |
27040 | -#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4) | |
27041 | -#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5) | |
27042 | - | |
27043 | -#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 6) | |
27044 | -#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 11) | |
27045 | - | |
27046 | -#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14) | |
27047 | -#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) | |
27048 | - | |
27049 | -#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15) | |
27050 | -#ifdef CONFIG_SMP | |
27051 | -#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) | |
27052 | -#else | |
27053 | -#define __KERNEL_PERCPU 0 | |
27054 | -#endif | |
27055 | - | |
27056 | -#define GDT_ENTRY_DOUBLEFAULT_TSS 31 | |
27057 | - | |
27058 | -/* | |
27059 | - * The GDT has 32 entries | |
27060 | - */ | |
27061 | -#define GDT_ENTRIES 32 | |
27062 | -#define GDT_SIZE (GDT_ENTRIES * 8) | |
27063 | - | |
27064 | -/* Simple and small GDT entries for booting only */ | |
27065 | - | |
27066 | -#define GDT_ENTRY_BOOT_CS 2 | |
27067 | -#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8) | |
27068 | - | |
27069 | -#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1) | |
27070 | -#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8) | |
27071 | - | |
27072 | -/* The PnP BIOS entries in the GDT */ | |
27073 | -#define GDT_ENTRY_PNPBIOS_CS32 (GDT_ENTRY_PNPBIOS_BASE + 0) | |
27074 | -#define GDT_ENTRY_PNPBIOS_CS16 (GDT_ENTRY_PNPBIOS_BASE + 1) | |
27075 | -#define GDT_ENTRY_PNPBIOS_DS (GDT_ENTRY_PNPBIOS_BASE + 2) | |
27076 | -#define GDT_ENTRY_PNPBIOS_TS1 (GDT_ENTRY_PNPBIOS_BASE + 3) | |
27077 | -#define GDT_ENTRY_PNPBIOS_TS2 (GDT_ENTRY_PNPBIOS_BASE + 4) | |
27078 | - | |
27079 | -/* The PnP BIOS selectors */ | |
27080 | -#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32 * 8) /* segment for calling fn */ | |
27081 | -#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16 * 8) /* code segment for BIOS */ | |
27082 | -#define PNP_DS (GDT_ENTRY_PNPBIOS_DS * 8) /* data segment for BIOS */ | |
27083 | -#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */ | |
27084 | -#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */ | |
27085 | - | |
27086 | -/* | |
27087 | - * The interrupt descriptor table has room for 256 idt's, | |
27088 | - * the global descriptor table is dependent on the number | |
27089 | - * of tasks we can have.. | |
27090 | - */ | |
27091 | -#define IDT_ENTRIES 256 | |
27092 | - | |
27093 | -/* Bottom two bits of selector give the ring privilege level */ | |
27094 | -#define SEGMENT_RPL_MASK 0x3 | |
27095 | -/* Bit 2 is table indicator (LDT/GDT) */ | |
27096 | -#define SEGMENT_TI_MASK 0x4 | |
27097 | - | |
27098 | -/* User mode is privilege level 3 */ | |
27099 | -#define USER_RPL 0x3 | |
27100 | -/* LDT segment has TI set, GDT has it cleared */ | |
27101 | -#define SEGMENT_LDT 0x4 | |
27102 | -#define SEGMENT_GDT 0x0 | |
27103 | - | |
27104 | -#define get_kernel_rpl() (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) | |
27105 | - | |
27106 | -/* | |
27107 | - * Matching rules for certain types of segments. | |
27108 | - */ | |
27109 | - | |
27110 | -/* Matches only __KERNEL_CS, ignoring PnP / USER / APM segments */ | |
27111 | -#define SEGMENT_IS_KERNEL_CODE(x) (((x) & ~3) == GDT_ENTRY_KERNEL_CS * 8 \ | |
27112 | - || ((x) & ~3) == (FLAT_KERNEL_CS & ~3)) | |
27113 | - | |
27114 | -/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */ | |
27115 | -#define SEGMENT_IS_FLAT_CODE(x) (((x) & ~0x13) == GDT_ENTRY_KERNEL_CS * 8 \ | |
27116 | - || ((x) & ~3) == (FLAT_KERNEL_CS & ~3) \ | |
27117 | - || ((x) & ~3) == (FLAT_USER_CS & ~3)) | |
27118 | - | |
27119 | -/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ | |
27120 | -#define SEGMENT_IS_PNP_CODE(x) (((x) & ~0x0b) == GDT_ENTRY_PNPBIOS_BASE * 8) | |
27121 | - | |
27122 | -#endif | |
27123 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/smp_32.h 2009-02-16 16:18:36.000000000 +0100 | |
27124 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/smp_32.h 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
27125 | @@ -1,56 +1,51 @@ |
27126 | #ifndef __ASM_SMP_H | |
27127 | #define __ASM_SMP_H | |
27128 | ||
27129 | +#ifndef __ASSEMBLY__ | |
27130 | +#include <linux/cpumask.h> | |
27131 | +#include <linux/init.h> | |
27132 | + | |
27133 | /* | |
27134 | * We need the APIC definitions automatically as part of 'smp.h' | |
27135 | */ | |
27136 | -#ifndef __ASSEMBLY__ | |
27137 | -#include <linux/kernel.h> | |
27138 | -#include <linux/threads.h> | |
27139 | -#include <linux/cpumask.h> | |
27140 | +#ifdef CONFIG_X86_LOCAL_APIC | |
27141 | +# include <asm/mpspec.h> | |
27142 | +# include <asm/apic.h> | |
27143 | +# ifdef CONFIG_X86_IO_APIC | |
27144 | +# include <asm/io_apic.h> | |
27145 | +# endif | |
27146 | #endif | |
27147 | ||
27148 | -#if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__) | |
27149 | -#include <linux/bitops.h> | |
27150 | -#include <asm/mpspec.h> | |
27151 | -#include <asm/apic.h> | |
27152 | -#ifdef CONFIG_X86_IO_APIC | |
27153 | -#include <asm/io_apic.h> | |
27154 | -#endif | |
27155 | -#endif | |
27156 | +#define cpu_callout_map cpu_possible_map | |
27157 | +#define cpu_callin_map cpu_possible_map | |
27158 | ||
27159 | -#define BAD_APICID 0xFFu | |
27160 | -#ifdef CONFIG_SMP | |
27161 | -#ifndef __ASSEMBLY__ | |
27162 | +extern int smp_num_siblings; | |
27163 | +extern unsigned int num_processors; | |
27164 | ||
27165 | -/* | |
27166 | - * Private routines/data | |
27167 | - */ | |
27168 | - | |
27169 | extern void smp_alloc_memory(void); | |
27170 | -extern int pic_mode; | |
27171 | -extern int smp_num_siblings; | |
27172 | -DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); | |
27173 | -DECLARE_PER_CPU(cpumask_t, cpu_core_map); | |
27174 | +extern void lock_ipi_call_lock(void); | |
27175 | +extern void unlock_ipi_call_lock(void); | |
27176 | ||
27177 | extern void (*mtrr_hook) (void); | |
27178 | extern void zap_low_mappings (void); | |
27179 | -extern void lock_ipi_call_lock(void); | |
27180 | -extern void unlock_ipi_call_lock(void); | |
27181 | ||
27182 | -#define MAX_APICID 256 | |
27183 | -extern u8 __initdata x86_cpu_to_apicid_init[]; | |
27184 | -extern void *x86_cpu_to_apicid_ptr; | |
27185 | +DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); | |
27186 | +DECLARE_PER_CPU(cpumask_t, cpu_core_map); | |
27187 | +DECLARE_PER_CPU(u8, cpu_llc_id); | |
27188 | DECLARE_PER_CPU(u8, x86_cpu_to_apicid); | |
27189 | ||
27190 | -#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) | |
27191 | - | |
27192 | #ifdef CONFIG_HOTPLUG_CPU | |
27193 | extern void cpu_exit_clear(void); | |
27194 | extern void cpu_uninit(void); | |
27195 | #endif | |
27196 | ||
27197 | +#ifdef CONFIG_SMP | |
27198 | + | |
27199 | #ifndef CONFIG_XEN | |
27200 | + | |
27201 | +/* Globals due to paravirt */ | |
27202 | +extern void set_cpu_sibling_map(int cpu); | |
27203 | + | |
27204 | struct smp_ops | |
27205 | { | |
27206 | void (*smp_prepare_boot_cpu)(void); | |
27207 | @@ -104,11 +99,11 @@ void native_smp_prepare_cpus(unsigned in | |
27208 | int native_cpu_up(unsigned int cpunum); | |
27209 | void native_smp_cpus_done(unsigned int max_cpus); | |
27210 | ||
27211 | -#define startup_ipi_hook(phys_apicid, start_eip, start_esp) \ | |
27212 | -do { } while (0) | |
27213 | - | |
27214 | -#else | |
27215 | +#ifndef CONFIG_PARAVIRT | |
27216 | +#define startup_ipi_hook(phys_apicid, start_eip, start_esp) do { } while (0) | |
27217 | +#endif | |
27218 | ||
27219 | +#else /* CONFIG_XEN */ | |
27220 | ||
27221 | void xen_smp_send_stop(void); | |
27222 | void xen_smp_send_reschedule(int cpu); | |
27223 | @@ -120,7 +115,12 @@ int xen_smp_call_function_mask(cpumask_t | |
27224 | #define smp_send_reschedule xen_smp_send_reschedule | |
27225 | #define smp_call_function_mask xen_smp_call_function_mask | |
27226 | ||
27227 | -#endif | |
27228 | +extern void prefill_possible_map(void); | |
27229 | + | |
27230 | +#endif /* CONFIG_XEN */ | |
27231 | + | |
27232 | +extern int __cpu_disable(void); | |
27233 | +extern void __cpu_die(unsigned int cpu); | |
27234 | ||
27235 | /* | |
27236 | * This function is needed by all SMP systems. It must _always_ be valid | |
27237 | @@ -130,64 +130,49 @@ int xen_smp_call_function_mask(cpumask_t | |
27238 | DECLARE_PER_CPU(int, cpu_number); | |
27239 | #define raw_smp_processor_id() (x86_read_percpu(cpu_number)) | |
27240 | ||
27241 | -extern cpumask_t cpu_possible_map; | |
27242 | -#define cpu_callin_map cpu_possible_map | |
27243 | +#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) | |
27244 | + | |
27245 | +#define safe_smp_processor_id() smp_processor_id() | |
27246 | ||
27247 | /* We don't mark CPUs online until __cpu_up(), so we need another measure */ | |
27248 | static inline int num_booting_cpus(void) | |
27249 | { | |
27250 | - return cpus_weight(cpu_possible_map); | |
27251 | + return cpus_weight(cpu_callout_map); | |
27252 | } | |
27253 | ||
27254 | -#define safe_smp_processor_id() smp_processor_id() | |
27255 | -extern int __cpu_disable(void); | |
27256 | -extern void __cpu_die(unsigned int cpu); | |
27257 | -extern void prefill_possible_map(void); | |
27258 | -extern unsigned int num_processors; | |
27259 | - | |
27260 | -#endif /* !__ASSEMBLY__ */ | |
27261 | - | |
27262 | #else /* CONFIG_SMP */ | |
27263 | ||
27264 | #define safe_smp_processor_id() 0 | |
27265 | #define cpu_physical_id(cpu) boot_cpu_physical_apicid | |
27266 | ||
27267 | -#define NO_PROC_ID 0xFF /* No processor magic marker */ | |
27268 | - | |
27269 | -#endif /* CONFIG_SMP */ | |
27270 | - | |
27271 | -#ifndef __ASSEMBLY__ | |
27272 | +#endif /* !CONFIG_SMP */ | |
27273 | ||
27274 | #ifdef CONFIG_X86_LOCAL_APIC | |
27275 | ||
27276 | -#ifdef APIC_DEFINITION | |
27277 | +static __inline int logical_smp_processor_id(void) | |
27278 | +{ | |
27279 | + /* we don't want to mark this access volatile - bad code generation */ | |
27280 | + return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); | |
27281 | +} | |
27282 | + | |
27283 | +# ifdef APIC_DEFINITION | |
27284 | extern int hard_smp_processor_id(void); | |
27285 | -#else | |
27286 | -#include <mach_apicdef.h> | |
27287 | +# else | |
27288 | +# include <mach_apicdef.h> | |
27289 | static inline int hard_smp_processor_id(void) | |
27290 | { | |
27291 | /* we don't want to mark this access volatile - bad code generation */ | |
27292 | - return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); | |
27293 | + return GET_APIC_ID(*(u32 *)(APIC_BASE + APIC_ID)); | |
27294 | } | |
27295 | -#endif /* APIC_DEFINITION */ | |
27296 | +# endif /* APIC_DEFINITION */ | |
27297 | ||
27298 | #else /* CONFIG_X86_LOCAL_APIC */ | |
27299 | ||
27300 | -#ifndef CONFIG_SMP | |
27301 | -#define hard_smp_processor_id() 0 | |
27302 | -#endif | |
27303 | +# ifndef CONFIG_SMP | |
27304 | +# define hard_smp_processor_id() 0 | |
27305 | +# endif | |
27306 | ||
27307 | #endif /* CONFIG_X86_LOCAL_APIC */ | |
27308 | ||
27309 | -extern u8 apicid_2_node[]; | |
27310 | - | |
27311 | -#ifdef CONFIG_X86_LOCAL_APIC | |
27312 | -static __inline int logical_smp_processor_id(void) | |
27313 | -{ | |
27314 | - /* we don't want to mark this access volatile - bad code generation */ | |
27315 | - return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); | |
27316 | -} | |
27317 | -#endif | |
27318 | -#endif | |
27319 | - | |
27320 | +#endif /* !ASSEMBLY */ | |
27321 | #endif | |
00e5a55c BS |
27322 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/smp_64.h 2009-02-16 16:18:36.000000000 +0100 |
27323 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/smp_64.h 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
27324 | @@ -1,139 +1,103 @@ |
27325 | #ifndef __ASM_SMP_H | |
27326 | #define __ASM_SMP_H | |
27327 | ||
27328 | -/* | |
27329 | - * We need the APIC definitions automatically as part of 'smp.h' | |
27330 | - */ | |
27331 | -#include <linux/threads.h> | |
27332 | #include <linux/cpumask.h> | |
27333 | -#include <linux/bitops.h> | |
27334 | #include <linux/init.h> | |
27335 | -extern int disable_apic; | |
27336 | ||
27337 | #ifdef CONFIG_X86_LOCAL_APIC | |
27338 | -#include <asm/mpspec.h> | |
27339 | +/* | |
27340 | + * We need the APIC definitions automatically as part of 'smp.h' | |
27341 | + */ | |
27342 | #include <asm/apic.h> | |
27343 | #ifdef CONFIG_X86_IO_APIC | |
27344 | #include <asm/io_apic.h> | |
27345 | #endif | |
27346 | -#include <asm/thread_info.h> | |
27347 | +#include <asm/mpspec.h> | |
27348 | #endif | |
27349 | - | |
27350 | -#ifdef CONFIG_SMP | |
27351 | - | |
27352 | #include <asm/pda.h> | |
27353 | +#include <asm/thread_info.h> | |
27354 | ||
27355 | -struct pt_regs; | |
27356 | - | |
27357 | -extern cpumask_t cpu_present_mask; | |
27358 | -extern cpumask_t cpu_possible_map; | |
27359 | -extern cpumask_t cpu_online_map; | |
27360 | extern cpumask_t cpu_initialized; | |
27361 | ||
27362 | -/* | |
27363 | - * Private routines/data | |
27364 | - */ | |
27365 | - | |
27366 | +extern int smp_num_siblings; | |
27367 | +extern unsigned int num_processors; | |
27368 | + | |
27369 | extern void smp_alloc_memory(void); | |
27370 | -extern volatile unsigned long smp_invalidate_needed; | |
27371 | extern void lock_ipi_call_lock(void); | |
27372 | extern void unlock_ipi_call_lock(void); | |
27373 | -extern int smp_num_siblings; | |
27374 | -extern void smp_send_reschedule(int cpu); | |
27375 | + | |
27376 | extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *), | |
27377 | void *info, int wait); | |
27378 | ||
27379 | -/* | |
27380 | - * cpu_sibling_map and cpu_core_map now live | |
27381 | - * in the per cpu area | |
27382 | - * | |
27383 | - * extern cpumask_t cpu_sibling_map[NR_CPUS]; | |
27384 | - * extern cpumask_t cpu_core_map[NR_CPUS]; | |
27385 | - */ | |
27386 | DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); | |
27387 | DECLARE_PER_CPU(cpumask_t, cpu_core_map); | |
27388 | -DECLARE_PER_CPU(u8, cpu_llc_id); | |
27389 | - | |
27390 | -#define SMP_TRAMPOLINE_BASE 0x6000 | |
27391 | +DECLARE_PER_CPU(u16, cpu_llc_id); | |
27392 | +DECLARE_PER_CPU(u16, x86_cpu_to_apicid); | |
27393 | +DECLARE_PER_CPU(u16, x86_bios_cpu_apicid); | |
27394 | ||
27395 | -/* | |
27396 | - * On x86 all CPUs are mapped 1:1 to the APIC space. | |
27397 | - * This simplifies scheduling and IPI sending and | |
27398 | - * compresses data structures. | |
27399 | - */ | |
27400 | - | |
27401 | -static inline int num_booting_cpus(void) | |
27402 | +#ifdef CONFIG_X86_LOCAL_APIC | |
27403 | +static inline int cpu_present_to_apicid(int mps_cpu) | |
27404 | { | |
27405 | - return cpus_weight(cpu_possible_map); | |
27406 | + if (cpu_present(mps_cpu)) | |
27407 | + return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); | |
27408 | + else | |
27409 | + return BAD_APICID; | |
27410 | } | |
27411 | +#endif | |
27412 | ||
27413 | -#define raw_smp_processor_id() read_pda(cpunumber) | |
27414 | +#ifdef CONFIG_SMP | |
27415 | + | |
27416 | +#define SMP_TRAMPOLINE_BASE 0x6000 | |
27417 | ||
27418 | extern int __cpu_disable(void); | |
27419 | extern void __cpu_die(unsigned int cpu); | |
27420 | extern void prefill_possible_map(void); | |
27421 | -extern unsigned num_processors; | |
27422 | extern unsigned __cpuinitdata disabled_cpus; | |
27423 | ||
27424 | -#define NO_PROC_ID 0xFF /* No processor magic marker */ | |
27425 | - | |
27426 | -#endif /* CONFIG_SMP */ | |
27427 | +#define raw_smp_processor_id() read_pda(cpunumber) | |
27428 | +#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) | |
27429 | ||
27430 | -#define safe_smp_processor_id() smp_processor_id() | |
27431 | - | |
27432 | -#ifdef CONFIG_X86_LOCAL_APIC | |
27433 | -static inline int hard_smp_processor_id(void) | |
27434 | -{ | |
27435 | - /* we don't want to mark this access volatile - bad code generation */ | |
27436 | - return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); | |
27437 | -} | |
27438 | -#endif | |
27439 | +#define stack_smp_processor_id() \ | |
27440 | + ({ \ | |
27441 | + struct thread_info *ti; \ | |
27442 | + __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \ | |
27443 | + ti->cpu; \ | |
27444 | +}) | |
27445 | ||
27446 | /* | |
27447 | - * Some lowlevel functions might want to know about | |
27448 | - * the real APIC ID <-> CPU # mapping. | |
27449 | + * On x86 all CPUs are mapped 1:1 to the APIC space. This simplifies | |
27450 | + * scheduling and IPI sending and compresses data structures. | |
27451 | */ | |
27452 | -extern u8 __initdata x86_cpu_to_apicid_init[]; | |
27453 | -extern void *x86_cpu_to_apicid_ptr; | |
27454 | -DECLARE_PER_CPU(u8, x86_cpu_to_apicid); /* physical ID */ | |
27455 | -extern u8 bios_cpu_apicid[]; | |
27456 | - | |
27457 | -#ifdef CONFIG_X86_LOCAL_APIC | |
27458 | -static inline int cpu_present_to_apicid(int mps_cpu) | |
27459 | +static inline int num_booting_cpus(void) | |
27460 | { | |
27461 | - if (mps_cpu < NR_CPUS) | |
27462 | - return (int)bios_cpu_apicid[mps_cpu]; | |
27463 | - else | |
27464 | - return BAD_APICID; | |
27465 | + return cpus_weight(cpu_possible_map); | |
27466 | } | |
27467 | -#endif | |
27468 | ||
27469 | -#ifndef CONFIG_SMP | |
27470 | +extern void smp_send_reschedule(int cpu); | |
27471 | + | |
27472 | +#else /* CONFIG_SMP */ | |
27473 | + | |
27474 | +extern unsigned int boot_cpu_id; | |
27475 | +#define cpu_physical_id(cpu) boot_cpu_id | |
27476 | #define stack_smp_processor_id() 0 | |
27477 | -#define cpu_logical_map(x) (x) | |
27478 | -#else | |
27479 | -#include <asm/thread_info.h> | |
27480 | -#define stack_smp_processor_id() \ | |
27481 | -({ \ | |
27482 | - struct thread_info *ti; \ | |
27483 | - __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \ | |
27484 | - ti->cpu; \ | |
27485 | -}) | |
27486 | -#endif | |
27487 | + | |
27488 | +#endif /* !CONFIG_SMP */ | |
27489 | + | |
27490 | +#define safe_smp_processor_id() smp_processor_id() | |
27491 | ||
27492 | #ifdef CONFIG_X86_LOCAL_APIC | |
27493 | static __inline int logical_smp_processor_id(void) | |
27494 | { | |
27495 | /* we don't want to mark this access volatile - bad code generation */ | |
27496 | - return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); | |
27497 | + return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); | |
27498 | +} | |
27499 | + | |
27500 | +static inline int hard_smp_processor_id(void) | |
27501 | +{ | |
27502 | + /* we don't want to mark this access volatile - bad code generation */ | |
27503 | + return GET_APIC_ID(*(u32 *)(APIC_BASE + APIC_ID)); | |
27504 | } | |
27505 | #endif | |
27506 | ||
27507 | -#ifdef CONFIG_SMP | |
27508 | -#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) | |
27509 | -#else | |
27510 | -extern unsigned int boot_cpu_id; | |
27511 | -#define cpu_physical_id(cpu) boot_cpu_id | |
27512 | -#endif /* !CONFIG_SMP */ | |
27513 | #endif | |
27514 | ||
00e5a55c BS |
27515 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 |
27516 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/spinlock.h 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
27517 | @@ -0,0 +1,333 @@ |
27518 | +#ifndef _X86_SPINLOCK_H_ | |
27519 | +#define _X86_SPINLOCK_H_ | |
27520 | + | |
27521 | +#include <asm/atomic.h> | |
27522 | +#include <asm/rwlock.h> | |
27523 | +#include <asm/page.h> | |
27524 | +#include <asm/processor.h> | |
27525 | +#include <linux/compiler.h> | |
27526 | + | |
27527 | +/* | |
27528 | + * Your basic SMP spinlocks, allowing only a single CPU anywhere | |
27529 | + * | |
27530 | + * Simple spin lock operations. There are two variants, one clears IRQ's | |
27531 | + * on the local processor, one does not. | |
27532 | + * | |
27533 | + * These are fair FIFO ticket locks, which are currently limited to 256 | |
27534 | + * CPUs. | |
27535 | + * | |
27536 | + * (the type definitions are in asm/spinlock_types.h) | |
27537 | + */ | |
27538 | + | |
27539 | +#ifdef CONFIG_X86_32 | |
27540 | +# define LOCK_PTR_REG "a" | |
27541 | +# define REG_PTR_MODE "k" | |
27542 | +#else | |
27543 | +# define LOCK_PTR_REG "D" | |
27544 | +# define REG_PTR_MODE "q" | |
27545 | +#endif | |
27546 | + | |
27547 | +#if defined(CONFIG_X86_32) && \ | |
27548 | + (defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)) | |
27549 | +/* | |
27550 | + * On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock | |
27551 | + * (PPro errata 66, 92) | |
27552 | + */ | |
27553 | +# define UNLOCK_LOCK_PREFIX LOCK_PREFIX | |
27554 | +#else | |
27555 | +# define UNLOCK_LOCK_PREFIX | |
27556 | +#endif | |
27557 | + | |
27558 | +int xen_spinlock_init(unsigned int cpu); | |
27559 | +void xen_spinlock_cleanup(unsigned int cpu); | |
27560 | +extern int xen_spin_wait(raw_spinlock_t *, unsigned int token); | |
27561 | +extern int xen_spin_wait_flags(raw_spinlock_t *, unsigned int *token, | |
27562 | + unsigned int flags); | |
27563 | +extern unsigned int xen_spin_adjust(raw_spinlock_t *, unsigned int token); | |
27564 | +extern void xen_spin_kick(raw_spinlock_t *, unsigned int token); | |
27565 | + | |
27566 | +/* | |
27567 | + * Ticket locks are conceptually two parts, one indicating the current head of | |
27568 | + * the queue, and the other indicating the current tail. The lock is acquired | |
27569 | + * by atomically noting the tail and incrementing it by one (thus adding | |
27570 | + * ourself to the queue and noting our position), then waiting until the head | |
27571 | + * becomes equal to the the initial value of the tail. | |
27572 | + * | |
27573 | + * We use an xadd covering *both* parts of the lock, to increment the tail and | |
27574 | + * also load the position of the head, which takes care of memory ordering | |
27575 | + * issues and should be optimal for the uncontended case. Note the tail must be | |
27576 | + * in the high part, because a wide xadd increment of the low part would carry | |
27577 | + * up and contaminate the high part. | |
27578 | + * | |
27579 | + * With fewer than 2^8 possible CPUs, we can use x86's partial registers to | |
27580 | + * save some instructions and make the code more elegant. There really isn't | |
27581 | + * much between them in performance though, especially as locks are out of line. | |
27582 | + */ | |
27583 | +#if (NR_CPUS < 256) | |
27584 | +#define TICKET_SHIFT 8 | |
27585 | +#define __raw_spin_lock_preamble \ | |
27586 | + asm(LOCK_PREFIX "xaddw %w0, %2\n\t" \ | |
27587 | + "cmpb %h0, %b0\n\t" \ | |
27588 | + "sete %1" \ | |
27589 | + : "=&Q" (token), "=qm" (free), "+m" (lock->slock) \ | |
27590 | + : "0" (0x0100) \ | |
27591 | + : "memory", "cc") | |
27592 | +#define __raw_spin_lock_body \ | |
27593 | + asm("1:\t" \ | |
27594 | + "cmpb %h0, %b0\n\t" \ | |
27595 | + "je 2f\n\t" \ | |
27596 | + "decl %1\n\t" \ | |
27597 | + "jz 2f\n\t" \ | |
27598 | + "rep ; nop\n\t" \ | |
27599 | + "movb %2, %b0\n\t" \ | |
27600 | + /* don't need lfence here, because loads are in-order */ \ | |
27601 | + "jmp 1b\n" \ | |
27602 | + "2:" \ | |
27603 | + : "+Q" (token), "+g" (count) \ | |
27604 | + : "m" (lock->slock) \ | |
27605 | + : "memory", "cc") | |
27606 | + | |
27607 | + | |
27608 | +static inline int __raw_spin_trylock(raw_spinlock_t *lock) | |
27609 | +{ | |
27610 | + int tmp, new; | |
27611 | + | |
27612 | + asm("movzwl %2, %0\n\t" | |
27613 | + "cmpb %h0, %b0\n\t" | |
27614 | + "leal 0x100(%" REG_PTR_MODE "0), %1\n\t" | |
27615 | + "jne 1f\n\t" | |
27616 | + LOCK_PREFIX "cmpxchgw %w1, %2\n\t" | |
27617 | + "1:\t" | |
27618 | + "sete %b1\n\t" | |
27619 | + "movzbl %b1, %0\n\t" | |
27620 | + : "=&a" (tmp), "=&q" (new), "+m" (lock->slock) | |
27621 | + : | |
27622 | + : "memory", "cc"); | |
27623 | + | |
27624 | + return tmp; | |
27625 | +} | |
27626 | + | |
27627 | +static inline void __raw_spin_unlock(raw_spinlock_t *lock) | |
27628 | +{ | |
27629 | + unsigned int token; | |
27630 | + unsigned char kick; | |
27631 | + | |
27632 | + asm(UNLOCK_LOCK_PREFIX "incb %2\n\t" | |
27633 | + "movzwl %2, %0\n\t" | |
27634 | + "cmpb %h0, %b0\n\t" | |
27635 | + "setne %1" | |
27636 | + : "=&Q" (token), "=qm" (kick), "+m" (lock->slock) | |
27637 | + : | |
27638 | + : "memory", "cc"); | |
27639 | + if (kick) | |
27640 | + xen_spin_kick(lock, token); | |
27641 | +} | |
27642 | +#else | |
27643 | +#define TICKET_SHIFT 16 | |
27644 | +#define __raw_spin_lock_preamble \ | |
27645 | + do { \ | |
27646 | + unsigned int tmp; \ | |
27647 | + asm(LOCK_PREFIX "xaddl %0, %2\n\t" \ | |
27648 | + "shldl $16, %0, %3\n\t" \ | |
27649 | + "cmpw %w3, %w0\n\t" \ | |
27650 | + "sete %1" | |
27651 | + : "=&r" (token), "=qm" (free), "+m" (lock->slock), \ | |
27652 | + "=&g" (tmp) \ | |
27653 | + : "0" (0x00010000) \ | |
27654 | + : "memory", "cc"); \ | |
27655 | + } while (0) | |
27656 | +#define __raw_spin_lock_body \ | |
27657 | + do { \ | |
27658 | + unsigned int tmp; \ | |
27659 | + asm("shldl $16, %0, %2\n" \ | |
27660 | + "1:\t" \ | |
27661 | + "cmpw %w2, %w0\n\t" \ | |
27662 | + "je 2f\n\t" \ | |
27663 | + "decl %1\n\t" \ | |
27664 | + "jz 2f\n\t" \ | |
27665 | + "rep ; nop\n\t" \ | |
27666 | + "movw %3, %w0\n\t" \ | |
27667 | + /* don't need lfence here, because loads are in-order */ \ | |
27668 | + "jmp 1b\n" \ | |
27669 | + "2:" \ | |
27670 | + : "+r" (token), "+g" (count), "=&g" (tmp) \ | |
27671 | + : "m" (lock->slock) \ | |
27672 | + : "memory", "cc"); \ | |
27673 | + } while (0) | |
27674 | + | |
00e5a55c BS |
27675 | +static inline int __raw_spin_trylock(raw_spinlock_t *lock) |
27676 | +{ | |
27677 | + int tmp; | |
27678 | + int new; | |
27679 | + | |
27680 | + asm("movl %2, %0\n\t" | |
27681 | + "movl %0, %1\n\t" | |
27682 | + "roll $16, %0\n\t" | |
27683 | + "cmpl %0, %1\n\t" | |
27684 | + "leal 0x00010000(%" REG_PTR_MODE "0), %1\n\t" | |
27685 | + "jne 1f\n\t" | |
27686 | + LOCK_PREFIX "cmpxchgl %1, %2\n" | |
27687 | + "1:\t" | |
27688 | + "sete %b1\n\t" | |
27689 | + "movzbl %b1, %0\n\t" | |
27690 | + : "=&a" (tmp), "=&q" (new), "+m" (lock->slock) | |
27691 | + : | |
27692 | + : "memory", "cc"); | |
27693 | + | |
27694 | + return tmp; | |
27695 | +} | |
27696 | + | |
27697 | +static inline void __raw_spin_unlock(raw_spinlock_t *lock) | |
27698 | +{ | |
27699 | + unsigned int token, tmp; | |
27700 | + bool kick; | |
27701 | + | |
27702 | + asm(UNLOCK_LOCK_PREFIX "incw %2\n\t" | |
27703 | + "movl %2, %0\n\t" | |
27704 | + "shldl $16, %0, %3\n\t" | |
27705 | + "cmpw %w3, %w0\n\t" | |
27706 | + "setne %1" | |
27707 | + : "=&r" (token), "=qm" (kick), "+m" (lock->slock), "=&r" (tmp) | |
27708 | + : | |
27709 | + : "memory", "cc"); | |
27710 | + if (kick) | |
27711 | + xen_spin_kick(lock, token); | |
27712 | +} | |
27713 | +#endif | |
27714 | + | |
27715 | +static inline int __raw_spin_is_locked(raw_spinlock_t *lock) | |
27716 | +{ | |
27717 | + int tmp = *(volatile signed int *)(&(lock)->slock); | |
27718 | + | |
27719 | + return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1)); | |
27720 | +} | |
27721 | + | |
27722 | +static inline int __raw_spin_is_contended(raw_spinlock_t *lock) | |
27723 | +{ | |
27724 | + int tmp = *(volatile signed int *)(&(lock)->slock); | |
27725 | + | |
27726 | + return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1; | |
27727 | +} | |
27728 | + | |
27729 | +static inline void __raw_spin_lock(raw_spinlock_t *lock) | |
27730 | +{ | |
27731 | + unsigned int token, count; | |
27732 | + bool free; | |
27733 | + | |
27734 | + __raw_spin_lock_preamble; | |
27735 | + if (unlikely(!free)) | |
27736 | + token = xen_spin_adjust(lock, token); | |
27737 | + do { | |
27738 | + count = 1 << 10; | |
27739 | + __raw_spin_lock_body; | |
27740 | + } while (unlikely(!count) && !xen_spin_wait(lock, token)); | |
27741 | +} | |
27742 | + | |
27743 | +static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, | |
27744 | + unsigned long flags) | |
27745 | +{ | |
27746 | + unsigned int token, count; | |
27747 | + bool free; | |
27748 | + | |
27749 | + __raw_spin_lock_preamble; | |
27750 | + if (unlikely(!free)) | |
27751 | + token = xen_spin_adjust(lock, token); | |
27752 | + do { | |
27753 | + count = 1 << 10; | |
27754 | + __raw_spin_lock_body; | |
27755 | + } while (unlikely(!count) && !xen_spin_wait_flags(lock, &token, flags)); | |
27756 | +} | |
27757 | + | |
27758 | +static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) | |
27759 | +{ | |
27760 | + while (__raw_spin_is_locked(lock)) | |
27761 | + cpu_relax(); | |
27762 | +} | |
27763 | + | |
27764 | +/* | |
27765 | + * Read-write spinlocks, allowing multiple readers | |
27766 | + * but only one writer. | |
27767 | + * | |
27768 | + * NOTE! it is quite common to have readers in interrupts | |
27769 | + * but no interrupt writers. For those circumstances we | |
27770 | + * can "mix" irq-safe locks - any writer needs to get a | |
27771 | + * irq-safe write-lock, but readers can get non-irqsafe | |
27772 | + * read-locks. | |
27773 | + * | |
27774 | + * On x86, we implement read-write locks as a 32-bit counter | |
27775 | + * with the high bit (sign) being the "contended" bit. | |
27776 | + */ | |
27777 | + | |
27778 | +/** | |
27779 | + * read_can_lock - would read_trylock() succeed? | |
27780 | + * @lock: the rwlock in question. | |
27781 | + */ | |
27782 | +static inline int __raw_read_can_lock(raw_rwlock_t *lock) | |
cc90b958 | 27783 | +{ |
00e5a55c BS |
27784 | + return (int)(lock)->lock > 0; |
27785 | +} | |
cc90b958 | 27786 | + |
00e5a55c BS |
27787 | +/** |
27788 | + * write_can_lock - would write_trylock() succeed? | |
27789 | + * @lock: the rwlock in question. | |
27790 | + */ | |
27791 | +static inline int __raw_write_can_lock(raw_rwlock_t *lock) | |
27792 | +{ | |
27793 | + return (lock)->lock == RW_LOCK_BIAS; | |
cc90b958 BS |
27794 | +} |
27795 | + | |
00e5a55c | 27796 | +static inline void __raw_read_lock(raw_rwlock_t *rw) |
cc90b958 | 27797 | +{ |
00e5a55c BS |
27798 | + asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t" |
27799 | + "jns 1f\n" | |
27800 | + "call __read_lock_failed\n\t" | |
27801 | + "1:\n" | |
27802 | + ::LOCK_PTR_REG (rw) : "memory"); | |
27803 | +} | |
cc90b958 | 27804 | + |
00e5a55c BS |
27805 | +static inline void __raw_write_lock(raw_rwlock_t *rw) |
27806 | +{ | |
27807 | + asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t" | |
27808 | + "jz 1f\n" | |
27809 | + "call __write_lock_failed\n\t" | |
27810 | + "1:\n" | |
27811 | + ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory"); | |
cc90b958 | 27812 | +} |
cc90b958 | 27813 | + |
00e5a55c | 27814 | +static inline int __raw_read_trylock(raw_rwlock_t *lock) |
cc90b958 | 27815 | +{ |
00e5a55c | 27816 | + atomic_t *count = (atomic_t *)lock; |
cc90b958 | 27817 | + |
00e5a55c BS |
27818 | + atomic_dec(count); |
27819 | + if (atomic_read(count) >= 0) | |
27820 | + return 1; | |
27821 | + atomic_inc(count); | |
27822 | + return 0; | |
cc90b958 BS |
27823 | +} |
27824 | + | |
00e5a55c | 27825 | +static inline int __raw_write_trylock(raw_rwlock_t *lock) |
cc90b958 | 27826 | +{ |
00e5a55c | 27827 | + atomic_t *count = (atomic_t *)lock; |
cc90b958 | 27828 | + |
00e5a55c BS |
27829 | + if (atomic_sub_and_test(RW_LOCK_BIAS, count)) |
27830 | + return 1; | |
27831 | + atomic_add(RW_LOCK_BIAS, count); | |
27832 | + return 0; | |
cc90b958 BS |
27833 | +} |
27834 | + | |
00e5a55c | 27835 | +static inline void __raw_read_unlock(raw_rwlock_t *rw) |
cc90b958 | 27836 | +{ |
00e5a55c BS |
27837 | + asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory"); |
27838 | +} | |
cc90b958 | 27839 | + |
00e5a55c BS |
27840 | +static inline void __raw_write_unlock(raw_rwlock_t *rw) |
27841 | +{ | |
27842 | + asm volatile(LOCK_PREFIX "addl %1, %0" | |
27843 | + : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory"); | |
27844 | +} | |
27845 | + | |
27846 | +#define _raw_spin_relax(lock) cpu_relax() | |
27847 | +#define _raw_read_relax(lock) cpu_relax() | |
27848 | +#define _raw_write_relax(lock) cpu_relax() | |
27849 | + | |
27850 | +#endif | |
27851 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/system.h 2009-02-16 16:18:36.000000000 +0100 | |
27852 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/system.h 2009-03-16 16:33:40.000000000 +0100 | |
27853 | @@ -1,5 +1,393 @@ | |
27854 | +#ifndef _ASM_X86_SYSTEM_H_ | |
27855 | +#define _ASM_X86_SYSTEM_H_ | |
27856 | + | |
27857 | +#include <asm/asm.h> | |
27858 | +#include <asm/segment.h> | |
27859 | +#include <asm/cpufeature.h> | |
27860 | +#include <asm/cmpxchg.h> | |
27861 | +#include <asm/nops.h> | |
27862 | +#include <asm/hypervisor.h> | |
27863 | + | |
27864 | +#include <linux/kernel.h> | |
27865 | +#include <linux/irqflags.h> | |
27866 | + | |
27867 | +/* entries in ARCH_DLINFO: */ | |
27868 | +#ifdef CONFIG_IA32_EMULATION | |
27869 | +# define AT_VECTOR_SIZE_ARCH 2 | |
27870 | +#else | |
27871 | +# define AT_VECTOR_SIZE_ARCH 1 | |
27872 | +#endif | |
27873 | + | |
27874 | +#ifdef CONFIG_X86_32 | |
27875 | + | |
27876 | +struct task_struct; /* one of the stranger aspects of C forward declarations */ | |
27877 | +struct task_struct *__switch_to(struct task_struct *prev, | |
27878 | + struct task_struct *next); | |
27879 | + | |
27880 | +/* | |
27881 | + * Saving eflags is important. It switches not only IOPL between tasks, | |
27882 | + * it also protects other tasks from NT leaking through sysenter etc. | |
27883 | + */ | |
27884 | +#define switch_to(prev, next, last) do { \ | |
27885 | + unsigned long esi, edi; \ | |
27886 | + asm volatile("pushfl\n\t" /* Save flags */ \ | |
27887 | + "pushl %%ebp\n\t" \ | |
27888 | + "movl %%esp,%0\n\t" /* save ESP */ \ | |
27889 | + "movl %5,%%esp\n\t" /* restore ESP */ \ | |
27890 | + "movl $1f,%1\n\t" /* save EIP */ \ | |
27891 | + "pushl %6\n\t" /* restore EIP */ \ | |
27892 | + "jmp __switch_to\n" \ | |
27893 | + "1:\t" \ | |
27894 | + "popl %%ebp\n\t" \ | |
27895 | + "popfl" \ | |
27896 | + :"=m" (prev->thread.sp), "=m" (prev->thread.ip), \ | |
27897 | + "=a" (last), "=S" (esi), "=D" (edi) \ | |
27898 | + :"m" (next->thread.sp), "m" (next->thread.ip), \ | |
27899 | + "2" (prev), "d" (next)); \ | |
27900 | +} while (0) | |
27901 | + | |
27902 | +/* | |
27903 | + * disable hlt during certain critical i/o operations | |
27904 | + */ | |
27905 | +#define HAVE_DISABLE_HLT | |
27906 | +#else | |
27907 | +#define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t" | |
27908 | +#define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" | |
27909 | + | |
27910 | +/* frame pointer must be last for get_wchan */ | |
27911 | +#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" | |
27912 | +#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" | |
27913 | + | |
27914 | +#define __EXTRA_CLOBBER \ | |
27915 | + , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ | |
27916 | + "r12", "r13", "r14", "r15" | |
27917 | + | |
27918 | +/* Save restore flags to clear handle leaking NT */ | |
27919 | +#define switch_to(prev, next, last) \ | |
27920 | + asm volatile(SAVE_CONTEXT \ | |
27921 | + "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ | |
27922 | + "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ | |
27923 | + "call __switch_to\n\t" \ | |
27924 | + ".globl thread_return\n" \ | |
27925 | + "thread_return:\n\t" \ | |
27926 | + "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ | |
27927 | + "movq %P[thread_info](%%rsi),%%r8\n\t" \ | |
27928 | + LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ | |
27929 | + "movq %%rax,%%rdi\n\t" \ | |
27930 | + "jc ret_from_fork\n\t" \ | |
27931 | + RESTORE_CONTEXT \ | |
27932 | + : "=a" (last) \ | |
27933 | + : [next] "S" (next), [prev] "D" (prev), \ | |
27934 | + [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ | |
27935 | + [ti_flags] "i" (offsetof(struct thread_info, flags)), \ | |
27936 | + [tif_fork] "i" (TIF_FORK), \ | |
27937 | + [thread_info] "i" (offsetof(struct task_struct, stack)), \ | |
27938 | + [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ | |
27939 | + : "memory", "cc" __EXTRA_CLOBBER) | |
27940 | +#endif | |
27941 | + | |
27942 | +#ifdef __KERNEL__ | |
27943 | +#define _set_base(addr, base) do { unsigned long __pr; \ | |
27944 | +__asm__ __volatile__ ("movw %%dx,%1\n\t" \ | |
27945 | + "rorl $16,%%edx\n\t" \ | |
27946 | + "movb %%dl,%2\n\t" \ | |
27947 | + "movb %%dh,%3" \ | |
27948 | + :"=&d" (__pr) \ | |
27949 | + :"m" (*((addr)+2)), \ | |
27950 | + "m" (*((addr)+4)), \ | |
27951 | + "m" (*((addr)+7)), \ | |
27952 | + "0" (base) \ | |
27953 | + ); } while (0) | |
27954 | + | |
27955 | +#define _set_limit(addr, limit) do { unsigned long __lr; \ | |
27956 | +__asm__ __volatile__ ("movw %%dx,%1\n\t" \ | |
27957 | + "rorl $16,%%edx\n\t" \ | |
27958 | + "movb %2,%%dh\n\t" \ | |
27959 | + "andb $0xf0,%%dh\n\t" \ | |
27960 | + "orb %%dh,%%dl\n\t" \ | |
27961 | + "movb %%dl,%2" \ | |
27962 | + :"=&d" (__lr) \ | |
27963 | + :"m" (*(addr)), \ | |
27964 | + "m" (*((addr)+6)), \ | |
27965 | + "0" (limit) \ | |
27966 | + ); } while (0) | |
27967 | + | |
27968 | +#define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base)) | |
27969 | +#define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1)) | |
27970 | + | |
27971 | +extern void load_gs_index(unsigned); | |
27972 | + | |
27973 | +/* | |
27974 | + * Load a segment. Fall back on loading the zero | |
27975 | + * segment if something goes wrong.. | |
27976 | + */ | |
27977 | +#define loadsegment(seg, value) \ | |
27978 | + asm volatile("\n" \ | |
27979 | + "1:\t" \ | |
27980 | + "movl %k0,%%" #seg "\n" \ | |
27981 | + "2:\n" \ | |
27982 | + ".section .fixup,\"ax\"\n" \ | |
27983 | + "3:\t" \ | |
27984 | + "movl %k1, %%" #seg "\n\t" \ | |
27985 | + "jmp 2b\n" \ | |
27986 | + ".previous\n" \ | |
27987 | + _ASM_EXTABLE(1b,3b) \ | |
27988 | + : :"r" (value), "r" (0)) | |
27989 | + | |
27990 | + | |
27991 | +/* | |
27992 | + * Save a segment register away | |
27993 | + */ | |
27994 | +#define savesegment(seg, value) \ | |
27995 | + asm volatile("mov %%" #seg ",%0":"=rm" (value)) | |
27996 | + | |
27997 | +static inline unsigned long get_limit(unsigned long segment) | |
27998 | +{ | |
27999 | + unsigned long __limit; | |
28000 | + __asm__("lsll %1,%0" | |
28001 | + :"=r" (__limit):"r" (segment)); | |
28002 | + return __limit+1; | |
cc90b958 BS |
28003 | +} |
28004 | + | |
00e5a55c | 28005 | +static inline void xen_clts(void) |
cc90b958 | 28006 | +{ |
00e5a55c | 28007 | + HYPERVISOR_fpu_taskswitch(0); |
cc90b958 BS |
28008 | +} |
28009 | + | |
00e5a55c | 28010 | +static inline void xen_stts(void) |
cc90b958 | 28011 | +{ |
00e5a55c | 28012 | + HYPERVISOR_fpu_taskswitch(1); |
cc90b958 BS |
28013 | +} |
28014 | + | |
28015 | +/* | |
00e5a55c BS |
28016 | + * Volatile isn't enough to prevent the compiler from reordering the |
28017 | + * read/write functions for the control registers and messing everything up. | |
28018 | + * A memory clobber would solve the problem, but would prevent reordering of | |
28019 | + * all loads stores around it, which can hurt performance. Solution is to | |
28020 | + * use a variable and mimic reads and writes to it to enforce serialization | |
cc90b958 | 28021 | + */ |
00e5a55c | 28022 | +static unsigned long __force_order; |
cc90b958 | 28023 | + |
00e5a55c | 28024 | +static inline unsigned long xen_read_cr0(void) |
cc90b958 | 28025 | +{ |
00e5a55c BS |
28026 | + unsigned long val; |
28027 | + asm volatile("mov %%cr0,%0\n\t" :"=r" (val), "=m" (__force_order)); | |
28028 | + return val; | |
cc90b958 BS |
28029 | +} |
28030 | + | |
00e5a55c | 28031 | +static inline void xen_write_cr0(unsigned long val) |
cc90b958 | 28032 | +{ |
00e5a55c | 28033 | + asm volatile("mov %0,%%cr0": :"r" (val), "m" (__force_order)); |
cc90b958 BS |
28034 | +} |
28035 | + | |
00e5a55c BS |
28036 | +#define xen_read_cr2() (current_vcpu_info()->arch.cr2) |
28037 | +#define xen_write_cr2(val) ((void)(current_vcpu_info()->arch.cr2 = (val))) | |
28038 | + | |
28039 | +static inline unsigned long xen_read_cr3(void) | |
cc90b958 | 28040 | +{ |
00e5a55c BS |
28041 | + unsigned long val; |
28042 | + asm volatile("mov %%cr3,%0\n\t" :"=r" (val), "=m" (__force_order)); | |
28043 | +#ifdef CONFIG_X86_32 | |
28044 | + return mfn_to_pfn(xen_cr3_to_pfn(val)) << PAGE_SHIFT; | |
28045 | +#else | |
28046 | + return machine_to_phys(val); | |
28047 | +#endif | |
cc90b958 BS |
28048 | +} |
28049 | + | |
00e5a55c | 28050 | +static inline void xen_write_cr3(unsigned long val) |
cc90b958 | 28051 | +{ |
00e5a55c BS |
28052 | +#ifdef CONFIG_X86_32 |
28053 | + val = xen_pfn_to_cr3(pfn_to_mfn(val >> PAGE_SHIFT)); | |
28054 | +#else | |
28055 | + val = phys_to_machine(val); | |
28056 | +#endif | |
28057 | + asm volatile("mov %0,%%cr3": :"r" (val), "m" (__force_order)); | |
cc90b958 BS |
28058 | +} |
28059 | + | |
00e5a55c | 28060 | +static inline unsigned long xen_read_cr4(void) |
cc90b958 | 28061 | +{ |
00e5a55c BS |
28062 | + unsigned long val; |
28063 | + asm volatile("mov %%cr4,%0\n\t" :"=r" (val), "=m" (__force_order)); | |
28064 | + return val; | |
28065 | +} | |
cc90b958 | 28066 | + |
00e5a55c BS |
28067 | +#define xen_read_cr4_safe() xen_read_cr4() |
28068 | + | |
28069 | +static inline void xen_write_cr4(unsigned long val) | |
28070 | +{ | |
28071 | + asm volatile("mov %0,%%cr4": :"r" (val), "m" (__force_order)); | |
28072 | +} | |
28073 | + | |
28074 | +#ifdef CONFIG_X86_64 | |
28075 | +static inline unsigned long xen_read_cr8(void) | |
28076 | +{ | |
cc90b958 BS |
28077 | + return 0; |
28078 | +} | |
28079 | + | |
00e5a55c | 28080 | +static inline void xen_write_cr8(unsigned long val) |
cc90b958 | 28081 | +{ |
00e5a55c BS |
28082 | + BUG_ON(val); |
28083 | +} | |
28084 | +#endif | |
cc90b958 | 28085 | + |
00e5a55c BS |
28086 | +static inline void xen_wbinvd(void) |
28087 | +{ | |
28088 | + asm volatile("wbinvd": : :"memory"); | |
cc90b958 | 28089 | +} |
00e5a55c BS |
28090 | +#define read_cr0() (xen_read_cr0()) |
28091 | +#define write_cr0(x) (xen_write_cr0(x)) | |
28092 | +#define read_cr2() (xen_read_cr2()) | |
28093 | +#define write_cr2(x) (xen_write_cr2(x)) | |
28094 | +#define read_cr3() (xen_read_cr3()) | |
28095 | +#define write_cr3(x) (xen_write_cr3(x)) | |
28096 | +#define read_cr4() (xen_read_cr4()) | |
28097 | +#define read_cr4_safe() (xen_read_cr4_safe()) | |
28098 | +#define write_cr4(x) (xen_write_cr4(x)) | |
28099 | +#define wbinvd() (xen_wbinvd()) | |
28100 | +#ifdef CONFIG_X86_64 | |
28101 | +#define read_cr8() (xen_read_cr8()) | |
28102 | +#define write_cr8(x) (xen_write_cr8(x)) | |
28103 | +#endif | |
cc90b958 | 28104 | + |
00e5a55c BS |
28105 | +/* Clear the 'TS' bit */ |
28106 | +#define clts() (xen_clts()) | |
28107 | +#define stts() (xen_stts()) | |
28108 | + | |
28109 | +#endif /* __KERNEL__ */ | |
28110 | + | |
28111 | +static inline void clflush(volatile void *__p) | |
cc90b958 | 28112 | +{ |
00e5a55c | 28113 | + asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); |
cc90b958 BS |
28114 | +} |
28115 | + | |
00e5a55c BS |
28116 | +#define nop() __asm__ __volatile__ ("nop") |
28117 | + | |
28118 | +void disable_hlt(void); | |
28119 | +void enable_hlt(void); | |
28120 | + | |
28121 | +extern int es7000_plat; | |
28122 | +void cpu_idle_wait(void); | |
28123 | + | |
28124 | +extern unsigned long arch_align_stack(unsigned long sp); | |
28125 | +extern void free_init_pages(char *what, unsigned long begin, unsigned long end); | |
28126 | + | |
28127 | +void default_idle(void); | |
28128 | + | |
28129 | +/* | |
28130 | + * Force strict CPU ordering. | |
28131 | + * And yes, this is required on UP too when we're talking | |
28132 | + * to devices. | |
28133 | + */ | |
28134 | #ifdef CONFIG_X86_32 | |
28135 | -# include "system_32.h" | |
28136 | +/* | |
28137 | + * For now, "wmb()" doesn't actually do anything, as all | |
28138 | + * Intel CPU's follow what Intel calls a *Processor Order*, | |
28139 | + * in which all writes are seen in the program order even | |
28140 | + * outside the CPU. | |
28141 | + * | |
28142 | + * I expect future Intel CPU's to have a weaker ordering, | |
28143 | + * but I'd also expect them to finally get their act together | |
28144 | + * and add some real memory barriers if so. | |
28145 | + * | |
28146 | + * Some non intel clones support out of order store. wmb() ceases to be a | |
28147 | + * nop for these. | |
28148 | + */ | |
28149 | +#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) | |
28150 | +#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) | |
28151 | +#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) | |
28152 | +#else | |
28153 | +#define mb() asm volatile("mfence":::"memory") | |
28154 | +#define rmb() asm volatile("lfence":::"memory") | |
28155 | +#define wmb() asm volatile("sfence" ::: "memory") | |
28156 | +#endif | |
28157 | + | |
28158 | +/** | |
28159 | + * read_barrier_depends - Flush all pending reads that subsequents reads | |
28160 | + * depend on. | |
28161 | + * | |
28162 | + * No data-dependent reads from memory-like regions are ever reordered | |
28163 | + * over this barrier. All reads preceding this primitive are guaranteed | |
28164 | + * to access memory (but not necessarily other CPUs' caches) before any | |
28165 | + * reads following this primitive that depend on the data return by | |
28166 | + * any of the preceding reads. This primitive is much lighter weight than | |
28167 | + * rmb() on most CPUs, and is never heavier weight than is | |
28168 | + * rmb(). | |
28169 | + * | |
28170 | + * These ordering constraints are respected by both the local CPU | |
28171 | + * and the compiler. | |
28172 | + * | |
28173 | + * Ordering is not guaranteed by anything other than these primitives, | |
28174 | + * not even by data dependencies. See the documentation for | |
28175 | + * memory_barrier() for examples and URLs to more information. | |
28176 | + * | |
28177 | + * For example, the following code would force ordering (the initial | |
28178 | + * value of "a" is zero, "b" is one, and "p" is "&a"): | |
28179 | + * | |
28180 | + * <programlisting> | |
28181 | + * CPU 0 CPU 1 | |
28182 | + * | |
28183 | + * b = 2; | |
28184 | + * memory_barrier(); | |
28185 | + * p = &b; q = p; | |
28186 | + * read_barrier_depends(); | |
28187 | + * d = *q; | |
28188 | + * </programlisting> | |
28189 | + * | |
28190 | + * because the read of "*q" depends on the read of "p" and these | |
28191 | + * two reads are separated by a read_barrier_depends(). However, | |
28192 | + * the following code, with the same initial values for "a" and "b": | |
28193 | + * | |
28194 | + * <programlisting> | |
28195 | + * CPU 0 CPU 1 | |
28196 | + * | |
28197 | + * a = 2; | |
28198 | + * memory_barrier(); | |
28199 | + * b = 3; y = b; | |
28200 | + * read_barrier_depends(); | |
28201 | + * x = a; | |
28202 | + * </programlisting> | |
28203 | + * | |
28204 | + * does not enforce ordering, since there is no data dependency between | |
28205 | + * the read of "a" and the read of "b". Therefore, on some CPUs, such | |
28206 | + * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() | |
28207 | + * in cases like this where there are no data dependencies. | |
28208 | + **/ | |
28209 | + | |
28210 | +#define read_barrier_depends() do { } while (0) | |
28211 | + | |
28212 | +#ifdef CONFIG_SMP | |
28213 | +#define smp_mb() mb() | |
28214 | +#ifdef CONFIG_X86_PPRO_FENCE | |
28215 | +# define smp_rmb() rmb() | |
28216 | #else | |
28217 | -# include "system_64.h" | |
28218 | +# define smp_rmb() barrier() | |
28219 | +#endif | |
28220 | +#ifdef CONFIG_X86_OOSTORE | |
28221 | +# define smp_wmb() wmb() | |
28222 | +#else | |
28223 | +# define smp_wmb() barrier() | |
28224 | +#endif | |
28225 | +#define smp_read_barrier_depends() read_barrier_depends() | |
28226 | +#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) | |
28227 | +#else | |
28228 | +#define smp_mb() barrier() | |
28229 | +#define smp_rmb() barrier() | |
28230 | +#define smp_wmb() barrier() | |
28231 | +#define smp_read_barrier_depends() do { } while (0) | |
28232 | +#define set_mb(var, value) do { var = value; barrier(); } while (0) | |
28233 | +#endif | |
28234 | + | |
28235 | +/* | |
28236 | + * Stop RDTSC speculation. This is needed when you need to use RDTSC | |
28237 | + * (or get_cycles or vread that possibly accesses the TSC) in a defined | |
28238 | + * code region. | |
28239 | + * | |
28240 | + * (Could use an alternative three way for this if there was one.) | |
28241 | + */ | |
28242 | +static inline void rdtsc_barrier(void) | |
cc90b958 | 28243 | +{ |
00e5a55c BS |
28244 | + alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); |
28245 | + alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); | |
cc90b958 BS |
28246 | +} |
28247 | + | |
00e5a55c BS |
28248 | #endif |
28249 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/system_32.h 2009-02-16 16:18:36.000000000 +0100 | |
28250 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
cc90b958 BS |
28251 | @@ -1,312 +0,0 @@ |
28252 | -#ifndef __ASM_SYSTEM_H | |
28253 | -#define __ASM_SYSTEM_H | |
28254 | - | |
28255 | -#include <linux/kernel.h> | |
28256 | -#include <asm/segment.h> | |
28257 | -#include <asm/cpufeature.h> | |
28258 | -#include <asm/cmpxchg.h> | |
28259 | -#include <asm/synch_bitops.h> | |
28260 | -#include <asm/hypervisor.h> | |
28261 | - | |
28262 | -#ifdef __KERNEL__ | |
28263 | -#define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */ | |
28264 | - | |
28265 | -struct task_struct; /* one of the stranger aspects of C forward declarations.. */ | |
28266 | -extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next)); | |
28267 | - | |
28268 | -/* | |
28269 | - * Saving eflags is important. It switches not only IOPL between tasks, | |
28270 | - * it also protects other tasks from NT leaking through sysenter etc. | |
28271 | - */ | |
28272 | -#define switch_to(prev,next,last) do { \ | |
28273 | - unsigned long esi,edi; \ | |
28274 | - asm volatile("pushfl\n\t" /* Save flags */ \ | |
28275 | - "pushl %%ebp\n\t" \ | |
28276 | - "movl %%esp,%0\n\t" /* save ESP */ \ | |
28277 | - "movl %5,%%esp\n\t" /* restore ESP */ \ | |
28278 | - "movl $1f,%1\n\t" /* save EIP */ \ | |
28279 | - "pushl %6\n\t" /* restore EIP */ \ | |
28280 | - "jmp __switch_to\n" \ | |
28281 | - "1:\t" \ | |
28282 | - "popl %%ebp\n\t" \ | |
28283 | - "popfl" \ | |
28284 | - :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \ | |
28285 | - "=a" (last),"=S" (esi),"=D" (edi) \ | |
28286 | - :"m" (next->thread.esp),"m" (next->thread.eip), \ | |
28287 | - "2" (prev), "d" (next)); \ | |
28288 | -} while (0) | |
28289 | - | |
28290 | -#define _set_base(addr,base) do { unsigned long __pr; \ | |
28291 | -__asm__ __volatile__ ("movw %%dx,%1\n\t" \ | |
28292 | - "rorl $16,%%edx\n\t" \ | |
28293 | - "movb %%dl,%2\n\t" \ | |
28294 | - "movb %%dh,%3" \ | |
28295 | - :"=&d" (__pr) \ | |
28296 | - :"m" (*((addr)+2)), \ | |
28297 | - "m" (*((addr)+4)), \ | |
28298 | - "m" (*((addr)+7)), \ | |
28299 | - "0" (base) \ | |
28300 | - ); } while(0) | |
28301 | - | |
28302 | -#define _set_limit(addr,limit) do { unsigned long __lr; \ | |
28303 | -__asm__ __volatile__ ("movw %%dx,%1\n\t" \ | |
28304 | - "rorl $16,%%edx\n\t" \ | |
28305 | - "movb %2,%%dh\n\t" \ | |
28306 | - "andb $0xf0,%%dh\n\t" \ | |
28307 | - "orb %%dh,%%dl\n\t" \ | |
28308 | - "movb %%dl,%2" \ | |
28309 | - :"=&d" (__lr) \ | |
28310 | - :"m" (*(addr)), \ | |
28311 | - "m" (*((addr)+6)), \ | |
28312 | - "0" (limit) \ | |
28313 | - ); } while(0) | |
28314 | - | |
28315 | -#define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) ) | |
28316 | -#define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1) ) | |
28317 | - | |
28318 | -/* | |
28319 | - * Load a segment. Fall back on loading the zero | |
28320 | - * segment if something goes wrong.. | |
28321 | - */ | |
28322 | -#define loadsegment(seg,value) \ | |
28323 | - asm volatile("\n" \ | |
28324 | - "1:\t" \ | |
28325 | - "mov %0,%%" #seg "\n" \ | |
28326 | - "2:\n" \ | |
28327 | - ".section .fixup,\"ax\"\n" \ | |
28328 | - "3:\t" \ | |
28329 | - "pushl $0\n\t" \ | |
28330 | - "popl %%" #seg "\n\t" \ | |
28331 | - "jmp 2b\n" \ | |
28332 | - ".previous\n" \ | |
28333 | - ".section __ex_table,\"a\"\n\t" \ | |
28334 | - ".align 4\n\t" \ | |
28335 | - ".long 1b,3b\n" \ | |
28336 | - ".previous" \ | |
28337 | - : :"rm" (value)) | |
28338 | - | |
28339 | -/* | |
28340 | - * Save a segment register away | |
28341 | - */ | |
28342 | -#define savesegment(seg, value) \ | |
28343 | - asm volatile("mov %%" #seg ",%0":"=rm" (value)) | |
28344 | - | |
28345 | -static inline void xen_clts(void) | |
28346 | -{ | |
28347 | - HYPERVISOR_fpu_taskswitch(0); | |
28348 | -} | |
28349 | - | |
28350 | -static inline unsigned long xen_read_cr0(void) | |
28351 | -{ | |
28352 | - unsigned long val; | |
28353 | - asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); | |
28354 | - return val; | |
28355 | -} | |
28356 | - | |
28357 | -static inline void xen_write_cr0(unsigned long val) | |
28358 | -{ | |
28359 | - asm volatile("movl %0,%%cr0": :"r" (val)); | |
28360 | -} | |
28361 | - | |
28362 | -#define xen_read_cr2() (current_vcpu_info()->arch.cr2) | |
28363 | - | |
28364 | -static inline void xen_write_cr2(unsigned long val) | |
28365 | -{ | |
28366 | - asm volatile("movl %0,%%cr2": :"r" (val)); | |
28367 | -} | |
28368 | - | |
28369 | -static inline unsigned long xen_read_cr3(void) | |
28370 | -{ | |
28371 | - unsigned long val; | |
28372 | - asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); | |
28373 | - return mfn_to_pfn(xen_cr3_to_pfn(val)) << PAGE_SHIFT; | |
28374 | -} | |
28375 | - | |
28376 | -static inline void xen_write_cr3(unsigned long val) | |
28377 | -{ | |
28378 | - val = xen_pfn_to_cr3(pfn_to_mfn(val >> PAGE_SHIFT)); | |
28379 | - asm volatile("movl %0,%%cr3": :"r" (val)); | |
28380 | -} | |
28381 | - | |
28382 | -static inline unsigned long xen_read_cr4(void) | |
28383 | -{ | |
28384 | - unsigned long val; | |
28385 | - asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); | |
28386 | - return val; | |
28387 | -} | |
28388 | - | |
28389 | -static inline unsigned long xen_read_cr4_safe(void) | |
28390 | -{ | |
28391 | - unsigned long val; | |
28392 | - /* This could fault if %cr4 does not exist */ | |
28393 | - asm volatile("1: movl %%cr4, %0 \n" | |
28394 | - "2: \n" | |
28395 | - ".section __ex_table,\"a\" \n" | |
28396 | - ".long 1b,2b \n" | |
28397 | - ".previous \n" | |
28398 | - : "=r" (val): "0" (0)); | |
28399 | - return val; | |
28400 | -} | |
28401 | - | |
28402 | -static inline void xen_write_cr4(unsigned long val) | |
28403 | -{ | |
28404 | - asm volatile("movl %0,%%cr4": :"r" (val)); | |
28405 | -} | |
28406 | - | |
28407 | -static inline void xen_wbinvd(void) | |
28408 | -{ | |
28409 | - asm volatile("wbinvd": : :"memory"); | |
28410 | -} | |
28411 | - | |
28412 | -static inline void clflush(volatile void *__p) | |
28413 | -{ | |
28414 | - asm volatile("clflush %0" : "+m" (*(char __force *)__p)); | |
28415 | -} | |
28416 | - | |
28417 | -#define read_cr0() (xen_read_cr0()) | |
28418 | -#define write_cr0(x) (xen_write_cr0(x)) | |
28419 | -#define read_cr2() (xen_read_cr2()) | |
28420 | -#define write_cr2(x) (xen_write_cr2(x)) | |
28421 | -#define read_cr3() (xen_read_cr3()) | |
28422 | -#define write_cr3(x) (xen_write_cr3(x)) | |
28423 | -#define read_cr4() (xen_read_cr4()) | |
28424 | -#define read_cr4_safe() (xen_read_cr4_safe()) | |
28425 | -#define write_cr4(x) (xen_write_cr4(x)) | |
28426 | -#define wbinvd() (xen_wbinvd()) | |
28427 | - | |
28428 | -/* Clear the 'TS' bit */ | |
28429 | -#define clts() (xen_clts()) | |
28430 | - | |
28431 | -/* Set the 'TS' bit */ | |
28432 | -#define stts() (HYPERVISOR_fpu_taskswitch(1)) | |
28433 | - | |
28434 | -#endif /* __KERNEL__ */ | |
28435 | - | |
28436 | -static inline unsigned long get_limit(unsigned long segment) | |
28437 | -{ | |
28438 | - unsigned long __limit; | |
28439 | - __asm__("lsll %1,%0" | |
28440 | - :"=r" (__limit):"r" (segment)); | |
28441 | - return __limit+1; | |
28442 | -} | |
28443 | - | |
28444 | -#define nop() __asm__ __volatile__ ("nop") | |
28445 | - | |
28446 | -/* | |
28447 | - * Force strict CPU ordering. | |
28448 | - * And yes, this is required on UP too when we're talking | |
28449 | - * to devices. | |
28450 | - * | |
28451 | - * For now, "wmb()" doesn't actually do anything, as all | |
28452 | - * Intel CPU's follow what Intel calls a *Processor Order*, | |
28453 | - * in which all writes are seen in the program order even | |
28454 | - * outside the CPU. | |
28455 | - * | |
28456 | - * I expect future Intel CPU's to have a weaker ordering, | |
28457 | - * but I'd also expect them to finally get their act together | |
28458 | - * and add some real memory barriers if so. | |
28459 | - * | |
28460 | - * Some non intel clones support out of order store. wmb() ceases to be a | |
28461 | - * nop for these. | |
28462 | - */ | |
28463 | - | |
28464 | - | |
28465 | -#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) | |
28466 | -#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) | |
28467 | -#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) | |
28468 | - | |
28469 | -/** | |
28470 | - * read_barrier_depends - Flush all pending reads that subsequents reads | |
28471 | - * depend on. | |
28472 | - * | |
28473 | - * No data-dependent reads from memory-like regions are ever reordered | |
28474 | - * over this barrier. All reads preceding this primitive are guaranteed | |
28475 | - * to access memory (but not necessarily other CPUs' caches) before any | |
28476 | - * reads following this primitive that depend on the data return by | |
28477 | - * any of the preceding reads. This primitive is much lighter weight than | |
28478 | - * rmb() on most CPUs, and is never heavier weight than is | |
28479 | - * rmb(). | |
28480 | - * | |
28481 | - * These ordering constraints are respected by both the local CPU | |
28482 | - * and the compiler. | |
28483 | - * | |
28484 | - * Ordering is not guaranteed by anything other than these primitives, | |
28485 | - * not even by data dependencies. See the documentation for | |
28486 | - * memory_barrier() for examples and URLs to more information. | |
28487 | - * | |
28488 | - * For example, the following code would force ordering (the initial | |
28489 | - * value of "a" is zero, "b" is one, and "p" is "&a"): | |
28490 | - * | |
28491 | - * <programlisting> | |
28492 | - * CPU 0 CPU 1 | |
28493 | - * | |
28494 | - * b = 2; | |
28495 | - * memory_barrier(); | |
28496 | - * p = &b; q = p; | |
28497 | - * read_barrier_depends(); | |
28498 | - * d = *q; | |
28499 | - * </programlisting> | |
28500 | - * | |
28501 | - * because the read of "*q" depends on the read of "p" and these | |
28502 | - * two reads are separated by a read_barrier_depends(). However, | |
28503 | - * the following code, with the same initial values for "a" and "b": | |
28504 | - * | |
28505 | - * <programlisting> | |
28506 | - * CPU 0 CPU 1 | |
28507 | - * | |
28508 | - * a = 2; | |
28509 | - * memory_barrier(); | |
28510 | - * b = 3; y = b; | |
28511 | - * read_barrier_depends(); | |
28512 | - * x = a; | |
28513 | - * </programlisting> | |
28514 | - * | |
28515 | - * does not enforce ordering, since there is no data dependency between | |
28516 | - * the read of "a" and the read of "b". Therefore, on some CPUs, such | |
28517 | - * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() | |
28518 | - * in cases like this where there are no data dependencies. | |
28519 | - **/ | |
28520 | - | |
28521 | -#define read_barrier_depends() do { } while(0) | |
28522 | - | |
28523 | -#ifdef CONFIG_SMP | |
28524 | -#define smp_mb() mb() | |
28525 | -#ifdef CONFIG_X86_PPRO_FENCE | |
28526 | -# define smp_rmb() rmb() | |
28527 | -#else | |
28528 | -# define smp_rmb() barrier() | |
28529 | -#endif | |
28530 | -#ifdef CONFIG_X86_OOSTORE | |
28531 | -# define smp_wmb() wmb() | |
28532 | -#else | |
28533 | -# define smp_wmb() barrier() | |
28534 | -#endif | |
28535 | -#define smp_read_barrier_depends() read_barrier_depends() | |
28536 | -#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) | |
28537 | -#else | |
28538 | -#define smp_mb() barrier() | |
28539 | -#define smp_rmb() barrier() | |
28540 | -#define smp_wmb() barrier() | |
28541 | -#define smp_read_barrier_depends() do { } while(0) | |
28542 | -#define set_mb(var, value) do { var = value; barrier(); } while (0) | |
28543 | -#endif | |
28544 | - | |
28545 | -#include <linux/irqflags.h> | |
28546 | - | |
28547 | -/* | |
28548 | - * disable hlt during certain critical i/o operations | |
28549 | - */ | |
28550 | -#define HAVE_DISABLE_HLT | |
28551 | -void disable_hlt(void); | |
28552 | -void enable_hlt(void); | |
28553 | - | |
28554 | -extern int es7000_plat; | |
28555 | -void cpu_idle_wait(void); | |
28556 | - | |
28557 | -extern unsigned long arch_align_stack(unsigned long sp); | |
28558 | -extern void free_init_pages(char *what, unsigned long begin, unsigned long end); | |
28559 | - | |
28560 | -void default_idle(void); | |
28561 | -void __show_registers(struct pt_regs *, int all); | |
28562 | - | |
28563 | -#endif | |
00e5a55c BS |
28564 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/system_64.h 2009-02-16 16:18:36.000000000 +0100 |
28565 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/system_64.h 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
28566 | @@ -1,122 +1,9 @@ |
28567 | #ifndef __ASM_SYSTEM_H | |
28568 | #define __ASM_SYSTEM_H | |
28569 | ||
28570 | -#include <linux/kernel.h> | |
28571 | #include <asm/segment.h> | |
28572 | #include <asm/cmpxchg.h> | |
28573 | ||
28574 | -#include <asm/synch_bitops.h> | |
28575 | -#include <asm/hypervisor.h> | |
28576 | -#include <xen/interface/arch-x86_64.h> | |
28577 | - | |
28578 | -#ifdef __KERNEL__ | |
28579 | - | |
28580 | -/* entries in ARCH_DLINFO: */ | |
28581 | -#ifdef CONFIG_IA32_EMULATION | |
28582 | -# define AT_VECTOR_SIZE_ARCH 2 | |
28583 | -#else | |
28584 | -# define AT_VECTOR_SIZE_ARCH 1 | |
28585 | -#endif | |
28586 | - | |
28587 | -#define __SAVE(reg,offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t" | |
28588 | -#define __RESTORE(reg,offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" | |
28589 | - | |
28590 | -/* frame pointer must be last for get_wchan */ | |
28591 | -#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" | |
28592 | -#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\n\t" | |
28593 | - | |
28594 | -#define __EXTRA_CLOBBER \ | |
28595 | - ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" | |
28596 | - | |
28597 | -/* Save restore flags to clear handle leaking NT */ | |
28598 | -#define switch_to(prev,next,last) \ | |
28599 | - asm volatile(SAVE_CONTEXT \ | |
28600 | - "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ | |
28601 | - "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ | |
28602 | - "call __switch_to\n\t" \ | |
28603 | - ".globl thread_return\n" \ | |
28604 | - "thread_return:\n\t" \ | |
28605 | - "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ | |
28606 | - "movq %P[thread_info](%%rsi),%%r8\n\t" \ | |
28607 | - LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ | |
28608 | - "movq %%rax,%%rdi\n\t" \ | |
28609 | - "jc ret_from_fork\n\t" \ | |
28610 | - RESTORE_CONTEXT \ | |
28611 | - : "=a" (last) \ | |
28612 | - : [next] "S" (next), [prev] "D" (prev), \ | |
28613 | - [threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \ | |
28614 | - [ti_flags] "i" (offsetof(struct thread_info, flags)),\ | |
28615 | - [tif_fork] "i" (TIF_FORK), \ | |
28616 | - [thread_info] "i" (offsetof(struct task_struct, stack)), \ | |
28617 | - [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ | |
28618 | - : "memory", "cc" __EXTRA_CLOBBER) | |
28619 | - | |
28620 | -extern void load_gs_index(unsigned); | |
28621 | - | |
28622 | -/* | |
28623 | - * Load a segment. Fall back on loading the zero | |
28624 | - * segment if something goes wrong.. | |
28625 | - */ | |
28626 | -#define loadsegment(seg,value) \ | |
28627 | - asm volatile("\n" \ | |
28628 | - "1:\t" \ | |
28629 | - "movl %k0,%%" #seg "\n" \ | |
28630 | - "2:\n" \ | |
28631 | - ".section .fixup,\"ax\"\n" \ | |
28632 | - "3:\t" \ | |
28633 | - "movl %1,%%" #seg "\n\t" \ | |
28634 | - "jmp 2b\n" \ | |
28635 | - ".previous\n" \ | |
28636 | - ".section __ex_table,\"a\"\n\t" \ | |
28637 | - ".align 8\n\t" \ | |
28638 | - ".quad 1b,3b\n" \ | |
28639 | - ".previous" \ | |
28640 | - : :"r" (value), "r" (0)) | |
28641 | - | |
28642 | -/* | |
28643 | - * Clear and set 'TS' bit respectively | |
28644 | - */ | |
28645 | -#define clts() (HYPERVISOR_fpu_taskswitch(0)) | |
28646 | - | |
00e5a55c BS |
28647 | -static inline unsigned long read_cr0(void) |
28648 | -{ | |
28649 | - unsigned long cr0; | |
28650 | - asm volatile("movq %%cr0,%0" : "=r" (cr0)); | |
28651 | - return cr0; | |
28652 | -} | |
28653 | - | |
28654 | -static inline void write_cr0(unsigned long val) | |
28655 | -{ | |
28656 | - asm volatile("movq %0,%%cr0" :: "r" (val)); | |
28657 | -} | |
28658 | - | |
28659 | -#define read_cr2() current_vcpu_info()->arch.cr2 | |
28660 | - | |
28661 | -#define write_cr2(val) ((void)(current_vcpu_info()->arch.cr2 = (val))) | |
28662 | - | |
28663 | -#define read_cr3() ({ \ | |
28664 | - unsigned long __dummy; \ | |
28665 | - asm volatile("movq %%cr3,%0" : "=r" (__dummy)); \ | |
28666 | - machine_to_phys(__dummy); \ | |
28667 | -}) | |
28668 | - | |
28669 | -static inline void write_cr3(unsigned long val) | |
28670 | -{ | |
28671 | - val = phys_to_machine(val); | |
28672 | - asm volatile("movq %0,%%cr3" :: "r" (val) : "memory"); | |
28673 | -} | |
28674 | - | |
28675 | -static inline unsigned long read_cr4(void) | |
28676 | -{ | |
28677 | - unsigned long cr4; | |
28678 | - asm volatile("movq %%cr4,%0" : "=r" (cr4)); | |
28679 | - return cr4; | |
28680 | -} | |
28681 | - | |
28682 | -static inline void write_cr4(unsigned long val) | |
28683 | -{ | |
28684 | - asm volatile("movq %0,%%cr4" :: "r" (val) : "memory"); | |
28685 | -} | |
28686 | ||
28687 | static inline unsigned long read_cr8(void) | |
28688 | { | |
28689 | @@ -128,52 +15,6 @@ static inline void write_cr8(unsigned lo | |
28690 | BUG_ON(val); | |
28691 | } | |
28692 | ||
28693 | -#define stts() (HYPERVISOR_fpu_taskswitch(1)) | |
28694 | - | |
28695 | -#define wbinvd() \ | |
28696 | - __asm__ __volatile__ ("wbinvd": : :"memory") | |
28697 | - | |
28698 | -#endif /* __KERNEL__ */ | |
28699 | - | |
28700 | -static inline void clflush(volatile void *__p) | |
28701 | -{ | |
28702 | - asm volatile("clflush %0" : "+m" (*(char __force *)__p)); | |
28703 | -} | |
28704 | - | |
28705 | -#define nop() __asm__ __volatile__ ("nop") | |
28706 | - | |
28707 | -#ifdef CONFIG_SMP | |
28708 | -#define smp_mb() mb() | |
28709 | -#define smp_rmb() barrier() | |
28710 | -#define smp_wmb() barrier() | |
28711 | -#define smp_read_barrier_depends() do {} while(0) | |
28712 | -#else | |
28713 | -#define smp_mb() barrier() | |
28714 | -#define smp_rmb() barrier() | |
28715 | -#define smp_wmb() barrier() | |
28716 | -#define smp_read_barrier_depends() do {} while(0) | |
28717 | -#endif | |
28718 | - | |
28719 | - | |
28720 | -/* | |
28721 | - * Force strict CPU ordering. | |
28722 | - * And yes, this is required on UP too when we're talking | |
28723 | - * to devices. | |
28724 | - */ | |
28725 | -#define mb() asm volatile("mfence":::"memory") | |
28726 | -#define rmb() asm volatile("lfence":::"memory") | |
28727 | -#define wmb() asm volatile("sfence" ::: "memory") | |
28728 | - | |
28729 | -#define read_barrier_depends() do {} while(0) | |
28730 | -#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) | |
28731 | - | |
28732 | -#define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0) | |
28733 | - | |
28734 | #include <linux/irqflags.h> | |
28735 | ||
28736 | -void cpu_idle_wait(void); | |
28737 | - | |
28738 | -extern unsigned long arch_align_stack(unsigned long sp); | |
28739 | -extern void free_init_pages(char *what, unsigned long begin, unsigned long end); | |
28740 | - | |
28741 | #endif | |
28742 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/tlbflush.h 2009-02-16 16:18:36.000000000 +0100 | |
28743 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/tlbflush.h 2009-03-16 16:33:40.000000000 +0100 | |
28744 | @@ -1,5 +1,106 @@ | |
28745 | +#ifndef _ASM_X86_TLBFLUSH_H | |
28746 | +#define _ASM_X86_TLBFLUSH_H | |
cc90b958 | 28747 | + |
00e5a55c BS |
28748 | +#include <linux/mm.h> |
28749 | +#include <linux/sched.h> | |
cc90b958 | 28750 | + |
00e5a55c BS |
28751 | +#include <asm/processor.h> |
28752 | +#include <asm/system.h> | |
cc90b958 | 28753 | + |
00e5a55c BS |
28754 | +#define __flush_tlb() xen_tlb_flush() |
28755 | +#define __flush_tlb_global() xen_tlb_flush() | |
28756 | +#define __flush_tlb_single(addr) xen_invlpg(addr) | |
28757 | +#define __flush_tlb_all() xen_tlb_flush() | |
28758 | +#define __flush_tlb_one(addr) xen_invlpg(addr) | |
cc90b958 | 28759 | + |
00e5a55c BS |
28760 | #ifdef CONFIG_X86_32 |
28761 | -# include "tlbflush_32.h" | |
28762 | +# define TLB_FLUSH_ALL 0xffffffff | |
28763 | #else | |
28764 | -# include "tlbflush_64.h" | |
28765 | +# define TLB_FLUSH_ALL -1ULL | |
28766 | #endif | |
cc90b958 BS |
28767 | + |
28768 | +/* | |
00e5a55c BS |
28769 | + * TLB flushing: |
28770 | + * | |
28771 | + * - flush_tlb() flushes the current mm struct TLBs | |
28772 | + * - flush_tlb_all() flushes all processes TLBs | |
28773 | + * - flush_tlb_mm(mm) flushes the specified mm context TLB's | |
28774 | + * - flush_tlb_page(vma, vmaddr) flushes one page | |
28775 | + * - flush_tlb_range(vma, start, end) flushes a range of pages | |
28776 | + * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages | |
28777 | + * | |
28778 | + * ..but the i386 has somewhat limited tlb flushing capabilities, | |
28779 | + * and page-granular flushes are available only on i486 and up. | |
28780 | + * | |
28781 | + * x86-64 can only flush individual pages or full VMs. For a range flush | |
28782 | + * we always do the full VM. Might be worth trying if for a small | |
28783 | + * range a few INVLPGs in a row are a win. | |
cc90b958 | 28784 | + */ |
cc90b958 | 28785 | + |
00e5a55c | 28786 | +#ifndef CONFIG_SMP |
cc90b958 | 28787 | + |
00e5a55c BS |
28788 | +#define flush_tlb() __flush_tlb() |
28789 | +#define flush_tlb_all() __flush_tlb_all() | |
28790 | +#define local_flush_tlb() __flush_tlb() | |
cc90b958 | 28791 | + |
00e5a55c | 28792 | +static inline void flush_tlb_mm(struct mm_struct *mm) |
cc90b958 | 28793 | +{ |
00e5a55c BS |
28794 | + if (mm == current->active_mm) |
28795 | + __flush_tlb(); | |
cc90b958 | 28796 | +} |
cc90b958 | 28797 | + |
00e5a55c BS |
28798 | +static inline void flush_tlb_page(struct vm_area_struct *vma, |
28799 | + unsigned long addr) | |
cc90b958 | 28800 | +{ |
00e5a55c BS |
28801 | + if (vma->vm_mm == current->active_mm) |
28802 | + __flush_tlb_one(addr); | |
cc90b958 | 28803 | +} |
cc90b958 | 28804 | + |
00e5a55c BS |
28805 | +static inline void flush_tlb_range(struct vm_area_struct *vma, |
28806 | + unsigned long start, unsigned long end) | |
cc90b958 | 28807 | +{ |
00e5a55c BS |
28808 | + if (vma->vm_mm == current->active_mm) |
28809 | + __flush_tlb(); | |
cc90b958 BS |
28810 | +} |
28811 | + | |
00e5a55c | 28812 | +#else /* SMP */ |
cc90b958 | 28813 | + |
00e5a55c | 28814 | +#include <asm/smp.h> |
cc90b958 | 28815 | + |
00e5a55c | 28816 | +#define local_flush_tlb() __flush_tlb() |
cc90b958 | 28817 | + |
00e5a55c BS |
28818 | +#define flush_tlb_all xen_tlb_flush_all |
28819 | +#define flush_tlb_current_task() xen_tlb_flush_mask(¤t->mm->cpu_vm_mask) | |
28820 | +#define flush_tlb_mm(mm) xen_tlb_flush_mask(&(mm)->cpu_vm_mask) | |
28821 | +#define flush_tlb_page(vma, va) xen_invlpg_mask(&(vma)->vm_mm->cpu_vm_mask, va) | |
cc90b958 | 28822 | + |
00e5a55c | 28823 | +#define flush_tlb() flush_tlb_current_task() |
cc90b958 | 28824 | + |
00e5a55c BS |
28825 | +static inline void flush_tlb_range(struct vm_area_struct *vma, |
28826 | + unsigned long start, unsigned long end) | |
28827 | +{ | |
28828 | + flush_tlb_mm(vma->vm_mm); | |
28829 | +} | |
cc90b958 | 28830 | + |
00e5a55c BS |
28831 | +#define TLBSTATE_OK 1 |
28832 | +#define TLBSTATE_LAZY 2 | |
28833 | + | |
28834 | +#ifdef CONFIG_X86_32 | |
28835 | +struct tlb_state | |
28836 | +{ | |
28837 | + struct mm_struct *active_mm; | |
28838 | + int state; | |
28839 | + char __cacheline_padding[L1_CACHE_BYTES-8]; | |
28840 | +}; | |
28841 | +DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); | |
cc90b958 BS |
28842 | +#endif |
28843 | + | |
00e5a55c BS |
28844 | +#endif /* SMP */ |
28845 | + | |
28846 | +static inline void flush_tlb_kernel_range(unsigned long start, | |
28847 | + unsigned long end) | |
cc90b958 | 28848 | +{ |
00e5a55c | 28849 | + flush_tlb_all(); |
cc90b958 BS |
28850 | +} |
28851 | + | |
00e5a55c BS |
28852 | +#endif /* _ASM_X86_TLBFLUSH_H */ |
28853 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/tlbflush_32.h 2009-02-16 16:18:36.000000000 +0100 | |
28854 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
cc90b958 BS |
28855 | @@ -1,99 +0,0 @@ |
28856 | -#ifndef _I386_TLBFLUSH_H | |
28857 | -#define _I386_TLBFLUSH_H | |
28858 | - | |
28859 | -#include <linux/mm.h> | |
28860 | -#include <asm/processor.h> | |
28861 | - | |
28862 | -#define __flush_tlb() xen_tlb_flush() | |
28863 | -#define __flush_tlb_global() xen_tlb_flush() | |
28864 | -#define __flush_tlb_all() xen_tlb_flush() | |
28865 | - | |
28866 | -#define cpu_has_invlpg (boot_cpu_data.x86 > 3) | |
28867 | - | |
28868 | -#define __flush_tlb_single(addr) xen_invlpg(addr) | |
28869 | - | |
28870 | -#define __flush_tlb_one(addr) __flush_tlb_single(addr) | |
28871 | - | |
28872 | -/* | |
28873 | - * TLB flushing: | |
28874 | - * | |
28875 | - * - flush_tlb() flushes the current mm struct TLBs | |
28876 | - * - flush_tlb_all() flushes all processes TLBs | |
28877 | - * - flush_tlb_mm(mm) flushes the specified mm context TLB's | |
28878 | - * - flush_tlb_page(vma, vmaddr) flushes one page | |
28879 | - * - flush_tlb_range(vma, start, end) flushes a range of pages | |
28880 | - * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages | |
28881 | - * | |
28882 | - * ..but the i386 has somewhat limited tlb flushing capabilities, | |
28883 | - * and page-granular flushes are available only on i486 and up. | |
28884 | - */ | |
28885 | - | |
28886 | -#define TLB_FLUSH_ALL 0xffffffff | |
28887 | - | |
28888 | - | |
28889 | -#ifndef CONFIG_SMP | |
28890 | - | |
28891 | -#include <linux/sched.h> | |
28892 | - | |
28893 | -#define flush_tlb() __flush_tlb() | |
28894 | -#define flush_tlb_all() __flush_tlb_all() | |
28895 | -#define local_flush_tlb() __flush_tlb() | |
28896 | - | |
28897 | -static inline void flush_tlb_mm(struct mm_struct *mm) | |
28898 | -{ | |
28899 | - if (mm == current->active_mm) | |
28900 | - __flush_tlb(); | |
28901 | -} | |
28902 | - | |
28903 | -static inline void flush_tlb_page(struct vm_area_struct *vma, | |
28904 | - unsigned long addr) | |
28905 | -{ | |
28906 | - if (vma->vm_mm == current->active_mm) | |
28907 | - __flush_tlb_one(addr); | |
28908 | -} | |
28909 | - | |
28910 | -static inline void flush_tlb_range(struct vm_area_struct *vma, | |
28911 | - unsigned long start, unsigned long end) | |
28912 | -{ | |
28913 | - if (vma->vm_mm == current->active_mm) | |
28914 | - __flush_tlb(); | |
28915 | -} | |
28916 | - | |
28917 | -#else /* SMP */ | |
28918 | - | |
28919 | -#include <asm/smp.h> | |
28920 | - | |
28921 | -#define local_flush_tlb() \ | |
28922 | - __flush_tlb() | |
28923 | - | |
28924 | -#define flush_tlb_all xen_tlb_flush_all | |
28925 | -#define flush_tlb_current_task() xen_tlb_flush_mask(¤t->mm->cpu_vm_mask) | |
28926 | -#define flush_tlb_mm(mm) xen_tlb_flush_mask(&(mm)->cpu_vm_mask) | |
28927 | -#define flush_tlb_page(vma, va) xen_invlpg_mask(&(vma)->vm_mm->cpu_vm_mask, va) | |
28928 | - | |
28929 | -#define flush_tlb() flush_tlb_current_task() | |
28930 | - | |
28931 | -static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end) | |
28932 | -{ | |
28933 | - flush_tlb_mm(vma->vm_mm); | |
28934 | -} | |
28935 | - | |
28936 | -#define TLBSTATE_OK 1 | |
28937 | -#define TLBSTATE_LAZY 2 | |
28938 | - | |
28939 | -struct tlb_state | |
28940 | -{ | |
28941 | - struct mm_struct *active_mm; | |
28942 | - int state; | |
28943 | - char __cacheline_padding[L1_CACHE_BYTES-8]; | |
28944 | -}; | |
28945 | -DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); | |
28946 | -#endif /* SMP */ | |
28947 | - | |
28948 | -static inline void flush_tlb_kernel_range(unsigned long start, | |
28949 | - unsigned long end) | |
28950 | -{ | |
28951 | - flush_tlb_all(); | |
28952 | -} | |
28953 | - | |
28954 | -#endif /* _I386_TLBFLUSH_H */ | |
00e5a55c BS |
28955 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/tlbflush_64.h 2009-02-16 16:18:36.000000000 +0100 |
28956 | +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
cc90b958 BS |
28957 | @@ -1,97 +0,0 @@ |
28958 | -#ifndef _X8664_TLBFLUSH_H | |
28959 | -#define _X8664_TLBFLUSH_H | |
28960 | - | |
28961 | -#include <linux/mm.h> | |
28962 | -#include <linux/sched.h> | |
28963 | -#include <asm/processor.h> | |
28964 | -#include <asm/system.h> | |
28965 | - | |
28966 | -#define __flush_tlb() xen_tlb_flush() | |
28967 | - | |
28968 | -/* | |
28969 | - * Global pages have to be flushed a bit differently. Not a real | |
28970 | - * performance problem because this does not happen often. | |
28971 | - */ | |
28972 | -#define __flush_tlb_global() xen_tlb_flush() | |
28973 | - | |
28974 | -#define __flush_tlb_all() __flush_tlb_global() | |
28975 | - | |
28976 | -#define __flush_tlb_one(addr) xen_invlpg((unsigned long)addr) | |
28977 | - | |
28978 | - | |
28979 | -/* | |
28980 | - * TLB flushing: | |
28981 | - * | |
28982 | - * - flush_tlb() flushes the current mm struct TLBs | |
28983 | - * - flush_tlb_all() flushes all processes TLBs | |
28984 | - * - flush_tlb_mm(mm) flushes the specified mm context TLB's | |
28985 | - * - flush_tlb_page(vma, vmaddr) flushes one page | |
28986 | - * - flush_tlb_range(vma, start, end) flushes a range of pages | |
28987 | - * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages | |
28988 | - * | |
28989 | - * x86-64 can only flush individual pages or full VMs. For a range flush | |
28990 | - * we always do the full VM. Might be worth trying if for a small | |
28991 | - * range a few INVLPGs in a row are a win. | |
28992 | - */ | |
28993 | - | |
28994 | -#ifndef CONFIG_SMP | |
28995 | - | |
28996 | -#define flush_tlb() __flush_tlb() | |
28997 | -#define flush_tlb_all() __flush_tlb_all() | |
28998 | -#define local_flush_tlb() __flush_tlb() | |
28999 | - | |
29000 | -static inline void flush_tlb_mm(struct mm_struct *mm) | |
29001 | -{ | |
29002 | - if (mm == current->active_mm) | |
29003 | - __flush_tlb(); | |
29004 | -} | |
29005 | - | |
29006 | -static inline void flush_tlb_page(struct vm_area_struct *vma, | |
29007 | - unsigned long addr) | |
29008 | -{ | |
29009 | - if (vma->vm_mm == current->active_mm) | |
29010 | - __flush_tlb_one(addr); | |
29011 | -} | |
29012 | - | |
29013 | -static inline void flush_tlb_range(struct vm_area_struct *vma, | |
29014 | - unsigned long start, unsigned long end) | |
29015 | -{ | |
29016 | - if (vma->vm_mm == current->active_mm) | |
29017 | - __flush_tlb(); | |
29018 | -} | |
29019 | - | |
29020 | -#else | |
29021 | - | |
29022 | -#include <asm/smp.h> | |
29023 | - | |
29024 | -#define local_flush_tlb() \ | |
29025 | - __flush_tlb() | |
29026 | - | |
29027 | -#define flush_tlb_all xen_tlb_flush_all | |
29028 | -#define flush_tlb_current_task() xen_tlb_flush_mask(¤t->mm->cpu_vm_mask) | |
29029 | -#define flush_tlb_mm(mm) xen_tlb_flush_mask(&(mm)->cpu_vm_mask) | |
29030 | -#define flush_tlb_page(vma, va) xen_invlpg_mask(&(vma)->vm_mm->cpu_vm_mask, va) | |
29031 | - | |
29032 | -#define flush_tlb() flush_tlb_current_task() | |
29033 | - | |
29034 | -static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end) | |
29035 | -{ | |
29036 | - flush_tlb_mm(vma->vm_mm); | |
29037 | -} | |
29038 | - | |
29039 | -#define TLBSTATE_OK 1 | |
29040 | -#define TLBSTATE_LAZY 2 | |
29041 | - | |
29042 | -/* Roughly an IPI every 20MB with 4k pages for freeing page table | |
29043 | - ranges. Cost is about 42k of memory for each CPU. */ | |
29044 | -#define ARCH_FREE_PTE_NR 5350 | |
29045 | - | |
29046 | -#endif | |
29047 | - | |
29048 | -static inline void flush_tlb_kernel_range(unsigned long start, | |
29049 | - unsigned long end) | |
29050 | -{ | |
29051 | - flush_tlb_all(); | |
29052 | -} | |
29053 | - | |
29054 | -#endif /* _X8664_TLBFLUSH_H */ | |
00e5a55c BS |
29055 | --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/irq_vectors.h 2009-05-14 10:56:29.000000000 +0200 |
29056 | +++ sle11-2009-05-14/include/asm-x86/mach-xen/irq_vectors.h 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
29057 | @@ -82,7 +82,8 @@ |
29058 | ||
29059 | #define RESCHEDULE_VECTOR 0 | |
29060 | #define CALL_FUNCTION_VECTOR 1 | |
29061 | -#define NR_IPIS 2 | |
29062 | +#define SPIN_UNLOCK_VECTOR 2 | |
29063 | +#define NR_IPIS 3 | |
29064 | ||
29065 | /* | |
29066 | * The maximum number of vectors supported by i386 processors | |
00e5a55c BS |
29067 | --- sle11-2009-05-14.orig/include/asm-x86/mmu.h 2009-02-16 16:18:36.000000000 +0100 |
29068 | +++ sle11-2009-05-14/include/asm-x86/mmu.h 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
29069 | @@ -23,7 +23,7 @@ typedef struct { |
29070 | void *vdso; | |
29071 | } mm_context_t; | |
29072 | ||
29073 | -#ifdef CONFIG_SMP | |
29074 | +#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) | |
29075 | void leave_mm(int cpu); | |
29076 | #else | |
29077 | static inline void leave_mm(int cpu) | |
00e5a55c BS |
29078 | --- sle11-2009-05-14.orig/include/asm-x86/ptrace.h 2009-05-14 10:56:29.000000000 +0200 |
29079 | +++ sle11-2009-05-14/include/asm-x86/ptrace.h 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
29080 | @@ -249,7 +249,9 @@ extern void user_enable_single_step(stru |
29081 | extern void user_disable_single_step(struct task_struct *); | |
29082 | ||
29083 | extern void user_enable_block_step(struct task_struct *); | |
29084 | -#ifdef CONFIG_X86_DEBUGCTLMSR | |
29085 | +#if defined(CONFIG_XEN) | |
29086 | +#define arch_has_block_step() (0) | |
29087 | +#elif defined(CONFIG_X86_DEBUGCTLMSR) | |
29088 | #define arch_has_block_step() (1) | |
29089 | #else | |
29090 | #define arch_has_block_step() (boot_cpu_data.x86 >= 6) | |
00e5a55c BS |
29091 | --- sle11-2009-05-14.orig/include/asm-x86/thread_info.h 2009-02-16 16:17:21.000000000 +0100 |
29092 | +++ sle11-2009-05-14/include/asm-x86/thread_info.h 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 | 29093 | @@ -94,6 +94,9 @@ struct thread_info { |
cc90b958 BS |
29094 | #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ |
29095 | #define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ | |
00e5a55c | 29096 | #define TIF_PERFMON_CTXSW 28 /* perfmon needs ctxsw calls */ |
cc90b958 BS |
29097 | +#ifdef CONFIG_X86_XEN |
29098 | +#define TIF_CSTAR 31 /* cstar-based syscall (special handling) */ | |
29099 | +#endif | |
29100 | ||
29101 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | |
00e5a55c | 29102 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) |
cc90b958 | 29103 | @@ -118,6 +121,7 @@ struct thread_info { |
cc90b958 | 29104 | #define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) |
00e5a55c BS |
29105 | #define _TIF_PERFMON_WORK (1 << TIF_PERFMON_WORK) |
29106 | #define _TIF_PERFMON_CTXSW (1 << TIF_PERFMON_CTXSW) | |
cc90b958 BS |
29107 | +#define _TIF_CSTAR (1 << TIF_CSTAR) |
29108 | ||
29109 | /* work to do in syscall_trace_enter() */ | |
29110 | #define _TIF_WORK_SYSCALL_ENTRY \ | |
29111 | @@ -147,12 +151,12 @@ struct thread_info { | |
29112 | (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS| \ | |
29113 | _TIF_NOTSC|_TIF_PERFMON_CTXSW) | |
29114 | ||
29115 | -#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW | |
29116 | -#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) | |
29117 | #else | |
29118 | -#define _TIF_WORK_CTXSW_NEXT (_TIF_NOTSC | _TIF_DEBUG) | |
29119 | -#define _TIF_WORK_CTXSW_PREV (_TIF_NOTSC) | |
29120 | +#define _TIF_WORK_CTXSW (_TIF_NOTSC \ | |
29121 | + /*todo | _TIF_DEBUGCTLMSR | _TIF_DS_AREA_MSR | _TIF_BTS_TRACE_TS*/) | |
29122 | #endif | |
29123 | +#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW | |
29124 | +#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) | |
29125 | ||
29126 | #define PREEMPT_ACTIVE 0x10000000 | |
29127 | ||
00e5a55c BS |
29128 | --- sle11-2009-05-14.orig/include/asm-x86/time.h 2009-05-14 10:56:29.000000000 +0200 |
29129 | +++ sle11-2009-05-14/include/asm-x86/time.h 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
29130 | @@ -58,4 +58,10 @@ static inline int native_set_wallclock(u |
29131 | ||
29132 | extern unsigned long __init calibrate_cpu(void); | |
29133 | ||
29134 | +#ifdef CONFIG_XEN | |
29135 | +extern int xen_independent_wallclock(void); | |
29136 | +extern unsigned long xen_read_persistent_clock(void); | |
29137 | +extern int xen_update_persistent_clock(void); | |
29138 | +#endif | |
29139 | + | |
29140 | #endif | |
00e5a55c BS |
29141 | --- sle11-2009-05-14.orig/include/linux/page-flags.h 2009-02-16 16:17:21.000000000 +0100 |
29142 | +++ sle11-2009-05-14/include/linux/page-flags.h 2009-03-16 16:33:40.000000000 +0100 | |
29143 | @@ -102,8 +102,8 @@ enum pageflags { | |
cc90b958 BS |
29144 | PG_foreign, /* Page is owned by foreign allocator. */ |
29145 | PG_pinned, /* Cannot alias with PG_owner_priv_1 since | |
29146 | * bad_page() checks include this bit. | |
29147 | - * Also cannot use PG_arch_1 since that now | |
29148 | - * has a different purpose on x86. */ | |
29149 | + * Should not use PG_arch_1 as that may have | |
29150 | + * a different purpose elsewhere. */ | |
29151 | #endif | |
29152 | __NR_PAGEFLAGS, | |
29153 | ||
00e5a55c BS |
29154 | --- sle11-2009-05-14.orig/include/linux/pci.h 2008-12-15 11:27:22.000000000 +0100 |
29155 | +++ sle11-2009-05-14/include/linux/pci.h 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
29156 | @@ -644,6 +644,9 @@ int pcie_set_readrq(struct pci_dev *dev, |
29157 | void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno); | |
29158 | int __must_check pci_assign_resource(struct pci_dev *dev, int i); | |
29159 | int pci_select_bars(struct pci_dev *dev, unsigned long flags); | |
29160 | +#ifdef CONFIG_XEN | |
29161 | +void pci_restore_bars(struct pci_dev *); | |
29162 | +#endif | |
29163 | ||
29164 | /* ROM control related routines */ | |
29165 | void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size); | |
00e5a55c BS |
29166 | --- sle11-2009-05-14.orig/include/xen/evtchn.h 2009-03-04 11:28:34.000000000 +0100 |
29167 | +++ sle11-2009-05-14/include/xen/evtchn.h 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
29168 | @@ -130,12 +130,37 @@ static inline void clear_evtchn(int port |
29169 | synch_clear_bit(port, s->evtchn_pending); | |
29170 | } | |
29171 | ||
29172 | +static inline void set_evtchn(int port) | |
29173 | +{ | |
29174 | + shared_info_t *s = HYPERVISOR_shared_info; | |
29175 | + synch_set_bit(port, s->evtchn_pending); | |
29176 | +} | |
29177 | + | |
29178 | +static inline int test_evtchn(int port) | |
29179 | +{ | |
29180 | + shared_info_t *s = HYPERVISOR_shared_info; | |
29181 | + return synch_test_bit(port, s->evtchn_pending); | |
29182 | +} | |
29183 | + | |
29184 | static inline void notify_remote_via_evtchn(int port) | |
29185 | { | |
29186 | struct evtchn_send send = { .port = port }; | |
29187 | VOID(HYPERVISOR_event_channel_op(EVTCHNOP_send, &send)); | |
29188 | } | |
29189 | ||
29190 | +/* Clear an irq's pending state, in preparation for polling on it. */ | |
29191 | +void xen_clear_irq_pending(int irq); | |
29192 | + | |
29193 | +/* Set an irq's pending state, to avoid blocking on it. */ | |
29194 | +void xen_set_irq_pending(int irq); | |
29195 | + | |
29196 | +/* Test an irq's pending state. */ | |
29197 | +int xen_test_irq_pending(int irq); | |
29198 | + | |
29199 | +/* Poll waiting for an irq to become pending. In the usual case, the | |
29200 | + irq will be disabled so it won't deliver an interrupt. */ | |
29201 | +void xen_poll_irq(int irq); | |
29202 | + | |
29203 | /* | |
29204 | * Use these to access the event channel underlying the IRQ handle returned | |
29205 | * by bind_*_to_irqhandler(). | |
00e5a55c BS |
29206 | --- sle11-2009-05-14.orig/kernel/sysctl_check.c 2009-02-16 16:18:36.000000000 +0100 |
29207 | +++ sle11-2009-05-14/kernel/sysctl_check.c 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
29208 | @@ -899,7 +899,7 @@ static const struct trans_ctl_table tran |
29209 | }; | |
29210 | ||
29211 | #ifdef CONFIG_XEN | |
29212 | -static struct trans_ctl_table trans_xen_table[] = { | |
29213 | +static const struct trans_ctl_table trans_xen_table[] = { | |
29214 | { CTL_XEN_INDEPENDENT_WALLCLOCK, "independent_wallclock" }, | |
29215 | { CTL_XEN_PERMITTED_CLOCK_JITTER, "permitted_clock_jitter" }, | |
29216 | {} | |
00e5a55c BS |
29217 | --- sle11-2009-05-14.orig/lib/swiotlb-xen.c 2009-02-16 16:18:36.000000000 +0100 |
29218 | +++ sle11-2009-05-14/lib/swiotlb-xen.c 2009-03-16 16:33:40.000000000 +0100 | |
cc90b958 BS |
29219 | @@ -30,7 +30,6 @@ |
29220 | #include <asm/gnttab_dma.h> | |
29221 | ||
29222 | int swiotlb; | |
29223 | -EXPORT_SYMBOL(swiotlb); | |
29224 | ||
29225 | #define OFFSET(val,align) ((unsigned long)((val) & ( (align) - 1))) | |
29226 | ||
29227 | @@ -289,6 +288,15 @@ __sync_single(struct phys_addr buffer, c | |
29228 | } | |
29229 | } | |
29230 | ||
29231 | +static inline unsigned int is_span_boundary(unsigned int index, | |
29232 | + unsigned int nslots, | |
29233 | + unsigned long offset_slots, | |
29234 | + unsigned long max_slots) | |
29235 | +{ | |
29236 | + unsigned long offset = (offset_slots + index) & (max_slots - 1); | |
29237 | + return offset + nslots > max_slots; | |
29238 | +} | |
29239 | + | |
29240 | /* | |
29241 | * Allocates bounce buffer and returns its kernel virtual address. | |
29242 | */ | |
29243 | @@ -300,6 +308,15 @@ map_single(struct device *hwdev, struct | |
29244 | unsigned int nslots, stride, index, wrap; | |
29245 | struct phys_addr slot_buf; | |
29246 | int i; | |
29247 | + unsigned long mask; | |
29248 | + unsigned long offset_slots; | |
29249 | + unsigned long max_slots; | |
29250 | + | |
29251 | + mask = dma_get_seg_boundary(hwdev); | |
29252 | + offset_slots = -IO_TLB_SEGSIZE; | |
29253 | + max_slots = mask + 1 | |
29254 | + ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT | |
29255 | + : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); | |
29256 | ||
29257 | /* | |
29258 | * For mappings greater than a page, we limit the stride (and | |
29259 | @@ -319,12 +336,21 @@ map_single(struct device *hwdev, struct | |
29260 | */ | |
29261 | spin_lock_irqsave(&io_tlb_lock, flags); | |
29262 | { | |
29263 | - wrap = index = ALIGN(io_tlb_index, stride); | |
29264 | - | |
29265 | + index = ALIGN(io_tlb_index, stride); | |
29266 | if (index >= iotlb_nslabs) | |
29267 | - wrap = index = 0; | |
29268 | + index = 0; | |
29269 | + wrap = index; | |
29270 | ||
29271 | do { | |
29272 | + while (is_span_boundary(index, nslots, offset_slots, | |
29273 | + max_slots)) { | |
29274 | + index += stride; | |
29275 | + if (index >= iotlb_nslabs) | |
29276 | + index = 0; | |
29277 | + if (index == wrap) | |
29278 | + goto not_found; | |
29279 | + } | |
29280 | + | |
29281 | /* | |
29282 | * If we find a slot that indicates we have 'nslots' | |
29283 | * number of contiguous buffers, we allocate the | |
29284 | @@ -359,6 +385,7 @@ map_single(struct device *hwdev, struct | |
29285 | index = 0; | |
29286 | } while (index != wrap); | |
29287 | ||
29288 | + not_found: | |
29289 | spin_unlock_irqrestore(&io_tlb_lock, flags); | |
29290 | return NULL; | |
29291 | } |