5 Acked-by: Jeff Mahoney <jeffm@suse.com>
6 Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches.py
8 --- sle11-2009-05-14.orig/arch/x86/Kconfig 2009-03-16 16:33:40.000000000 +0100
9 +++ sle11-2009-05-14/arch/x86/Kconfig 2009-03-16 16:38:05.000000000 +0100
10 @@ -28,7 +28,7 @@ config X86
11 select HAVE_DYNAMIC_FTRACE
13 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) && !XEN
14 - select HAVE_ARCH_KGDB if !X86_VOYAGER
15 + select HAVE_ARCH_KGDB if !X86_VOYAGER && !XEN
16 select HAVE_ARCH_TRACEHOOK
17 select HAVE_GENERIC_DMA_COHERENT if X86_32
18 select HAVE_EFFICIENT_UNALIGNED_ACCESS
19 @@ -486,6 +486,7 @@ config PARAVIRT_DEBUG
25 This option adds a kernel parameter 'memtest', which allows memtest
27 @@ -1007,7 +1008,7 @@ config X86_PAE
29 bool "Enable 1GB pages for kernel pagetables" if EMBEDDED
32 + depends on X86_64 && !XEN
34 Allow the kernel linear mapping to use 1GB pages on CPUs that
35 support it. This can improve the kernel's performance a tiny bit by
36 @@ -1349,8 +1350,7 @@ source kernel/Kconfig.hz
39 bool "kexec system call"
40 - depends on X86_BIOS_REBOOT
41 - depends on !XEN_UNPRIVILEGED_GUEST
42 + depends on X86_BIOS_REBOOT || (XEN && !XEN_UNPRIVILEGED_GUEST)
44 kexec is a system call that implements the ability to shutdown your
45 current kernel, and to start another kernel. It is like a reboot
46 @@ -1948,6 +1948,4 @@ source "crypto/Kconfig"
48 source "arch/x86/kvm/Kconfig"
50 -source "drivers/xen/Kconfig"
53 --- sle11-2009-05-14.orig/arch/x86/ia32/ia32entry-xen.S 2009-03-16 16:33:40.000000000 +0100
54 +++ sle11-2009-05-14/arch/x86/ia32/ia32entry-xen.S 2009-03-16 16:38:05.000000000 +0100
55 @@ -129,12 +129,14 @@ sysenter_tracesys:
59 - movq $-ENOSYS,RAX(%rsp) /* really needed? */
60 + movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
61 movq %rsp,%rdi /* &pt_regs -> arg1 */
62 call syscall_trace_enter
63 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
66 + cmpl $(IA32_NR_syscalls-1),%eax
67 + ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
70 ENDPROC(ia32_sysenter_target)
71 @@ -200,13 +202,15 @@ cstar_tracesys:
75 - movq $-ENOSYS,RAX(%rsp) /* really needed? */
76 + movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
77 movq %rsp,%rdi /* &pt_regs -> arg1 */
78 call syscall_trace_enter
79 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
82 movl RSP-ARGOFFSET(%rsp), %r8d
83 + cmpl $(IA32_NR_syscalls-1),%eax
84 + ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
86 END(ia32_cstar_target)
88 @@ -264,7 +268,7 @@ ENTRY(ia32_syscall)
91 cmpl $(IA32_NR_syscalls-1),%eax
93 + ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
95 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
97 @@ -274,7 +278,7 @@ ia32_sysret:
101 - movq $-ENOSYS,RAX(%rsp) /* really needed? */
102 + movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
103 movq %rsp,%rdi /* &pt_regs -> arg1 */
104 call syscall_trace_enter
105 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
106 @@ -365,7 +369,7 @@ ia32_sys_call_table:
109 .quad compat_sys_stime /* stime */ /* 25 */
110 - .quad sys32_ptrace /* ptrace */
111 + .quad compat_sys_ptrace /* ptrace */
113 .quad sys_fstat /* (old)fstat */
115 --- sle11-2009-05-14.orig/arch/x86/kernel/Makefile 2009-03-16 16:33:40.000000000 +0100
116 +++ sle11-2009-05-14/arch/x86/kernel/Makefile 2009-03-16 16:38:05.000000000 +0100
117 @@ -122,8 +122,7 @@ ifeq ($(CONFIG_X86_64),y)
119 obj-$(CONFIG_XEN) += nmi_64.o
120 time_64-$(CONFIG_XEN) += time_32.o
121 - pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o
124 -disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \
125 - smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o
126 +disabled-obj-$(CONFIG_XEN) := crash.o early-quirks.o hpet.o i8253.o i8259_$(BITS).o \
127 + pci-swiotlb_64.o reboot.o smpboot.o tlb_$(BITS).o tsc_$(BITS).o tsc_sync.o vsmp_64.o
128 --- sle11-2009-05-14.orig/arch/x86/kernel/acpi/Makefile 2008-12-01 11:11:08.000000000 +0100
129 +++ sle11-2009-05-14/arch/x86/kernel/acpi/Makefile 2009-03-16 16:38:05.000000000 +0100
130 @@ -15,4 +15,4 @@ $(obj)/wakeup_rm.o: $(obj)/realmode/w
131 $(obj)/realmode/wakeup.bin: FORCE
132 $(Q)$(MAKE) $(build)=$(obj)/realmode
134 -disabled-obj-$(CONFIG_XEN) := cstate.o wakeup_$(BITS).o
135 +disabled-obj-$(CONFIG_XEN) := cstate.o wakeup_%.o
136 --- sle11-2009-05-14.orig/arch/x86/kernel/acpi/boot.c 2009-03-16 16:33:40.000000000 +0100
137 +++ sle11-2009-05-14/arch/x86/kernel/acpi/boot.c 2009-03-16 16:38:05.000000000 +0100
138 @@ -251,19 +251,23 @@ static int __init acpi_parse_madt(struct
140 static void __cpuinit acpi_register_lapic(int id, u8 enabled)
143 unsigned int ver = 0;
153 if (boot_cpu_physical_apicid != -1U)
154 ver = apic_version[boot_cpu_physical_apicid];
157 generic_processor_info(id, ver);
162 @@ -774,6 +778,7 @@ static int __init acpi_parse_fadt(struct
163 * returns 0 on success, < 0 on error
167 static void __init acpi_register_lapic_address(unsigned long address)
169 mp_lapic_addr = address;
170 @@ -787,6 +792,9 @@ static void __init acpi_register_lapic_a
175 +#define acpi_register_lapic_address(address)
178 static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
180 --- sle11-2009-05-14.orig/arch/x86/kernel/acpi/sleep-xen.c 2009-03-16 16:33:40.000000000 +0100
181 +++ sle11-2009-05-14/arch/x86/kernel/acpi/sleep-xen.c 2009-03-16 16:38:05.000000000 +0100
183 #include <linux/dmi.h>
184 #include <linux/cpumask.h>
186 -#include <asm/smp.h>
187 +#include "realmode/wakeup.h"
190 #ifndef CONFIG_ACPI_PV_SLEEP
191 -/* address in low memory of the wakeup routine. */
192 -unsigned long acpi_wakeup_address = 0;
193 +unsigned long acpi_wakeup_address;
194 unsigned long acpi_realmode_flags;
195 -extern char wakeup_start, wakeup_end;
197 -extern unsigned long acpi_copy_wakeup_routine(unsigned long);
198 +/* address in low memory of the wakeup routine. */
199 +static unsigned long acpi_realmode;
202 +static char temp_stack[10240];
207 @@ -26,17 +30,69 @@ extern unsigned long acpi_copy_wakeup_ro
209 * Create an identity mapped page table and copy the wakeup routine to
212 + * Note that this is too late to change acpi_wakeup_address.
214 int acpi_save_state_mem(void)
216 #ifndef CONFIG_ACPI_PV_SLEEP
217 - if (!acpi_wakeup_address) {
218 - printk(KERN_ERR "Could not allocate memory during boot, S3 disabled\n");
219 + struct wakeup_header *header;
221 + if (!acpi_realmode) {
222 + printk(KERN_ERR "Could not allocate memory during boot, "
226 - memcpy((void *)acpi_wakeup_address, &wakeup_start,
227 - &wakeup_end - &wakeup_start);
228 - acpi_copy_wakeup_routine(acpi_wakeup_address);
229 + memcpy((void *)acpi_realmode, &wakeup_code_start, WAKEUP_SIZE);
231 + header = (struct wakeup_header *)(acpi_realmode + HEADER_OFFSET);
232 + if (header->signature != 0x51ee1111) {
233 + printk(KERN_ERR "wakeup header does not match\n");
237 + header->video_mode = saved_video_mode;
239 + header->wakeup_jmp_seg = acpi_wakeup_address >> 4;
240 + /* GDT[0]: GDT self-pointer */
241 + header->wakeup_gdt[0] =
242 + (u64)(sizeof(header->wakeup_gdt) - 1) +
243 + ((u64)(acpi_wakeup_address +
244 + ((char *)&header->wakeup_gdt - (char *)acpi_realmode))
246 + /* GDT[1]: real-mode-like code segment */
247 + header->wakeup_gdt[1] = (0x009bULL << 40) +
248 + ((u64)acpi_wakeup_address << 16) + 0xffff;
249 + /* GDT[2]: real-mode-like data segment */
250 + header->wakeup_gdt[2] = (0x0093ULL << 40) +
251 + ((u64)acpi_wakeup_address << 16) + 0xffff;
253 +#ifndef CONFIG_64BIT
254 + store_gdt((struct desc_ptr *)&header->pmode_gdt);
256 + header->pmode_efer_low = nx_enabled;
257 + if (header->pmode_efer_low & 1) {
258 + /* This is strange, why not save efer, always? */
259 + rdmsr(MSR_EFER, header->pmode_efer_low,
260 + header->pmode_efer_high);
262 +#endif /* !CONFIG_64BIT */
264 + header->pmode_cr0 = read_cr0();
265 + header->pmode_cr4 = read_cr4();
266 + header->realmode_flags = acpi_realmode_flags;
267 + header->real_magic = 0x12345678;
269 +#ifndef CONFIG_64BIT
270 + header->pmode_entry = (u32)&wakeup_pmode_return;
271 + header->pmode_cr3 = (u32)(swsusp_pg_dir - __PAGE_OFFSET);
272 + saved_magic = 0x12345678;
273 +#else /* CONFIG_64BIT */
274 + header->trampoline_segment = setup_trampoline() >> 4;
275 + init_rsp = (unsigned long)temp_stack + 4096;
276 + initial_code = (unsigned long)wakeup_long64;
277 + saved_magic = 0x123456789abcdef0;
278 +#endif /* CONFIG_64BIT */
282 @@ -61,15 +117,20 @@ void acpi_restore_state_mem(void)
283 void __init acpi_reserve_bootmem(void)
285 #ifndef CONFIG_ACPI_PV_SLEEP
286 - if ((&wakeup_end - &wakeup_start) > PAGE_SIZE*2) {
287 + if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) {
289 "ACPI: Wakeup code way too big, S3 disabled.\n");
293 - acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2);
294 - if (!acpi_wakeup_address)
295 + acpi_realmode = (unsigned long)alloc_bootmem_low(WAKEUP_SIZE);
297 + if (!acpi_realmode) {
298 printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
302 + acpi_wakeup_address = virt_to_phys((void *)acpi_realmode);
306 --- sle11-2009-05-14.orig/arch/x86/kernel/cpu/common-xen.c 2009-03-16 16:33:40.000000000 +0100
307 +++ sle11-2009-05-14/arch/x86/kernel/cpu/common-xen.c 2009-03-16 16:38:05.000000000 +0100
309 #include <linux/module.h>
310 #include <linux/percpu.h>
311 #include <linux/bootmem.h>
312 -#include <asm/semaphore.h>
313 #include <asm/processor.h>
314 #include <asm/i387.h>
317 #include <asm/mmu_context.h>
318 #include <asm/mtrr.h>
320 +#include <asm/pat.h>
321 #ifdef CONFIG_X86_LOCAL_APIC
322 #include <asm/mpspec.h>
323 #include <asm/apic.h>
324 @@ -69,9 +69,9 @@ __u32 cleared_cpu_caps[NCAPINTS] __cpuin
325 static int cachesize_override __cpuinitdata = -1;
326 static int disable_x86_serial_nr __cpuinitdata = 1;
328 -struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
329 +struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
331 -static void __cpuinit default_init(struct cpuinfo_x86 * c)
332 +static void __cpuinit default_init(struct cpuinfo_x86 *c)
334 /* Not much we can do here... */
335 /* Check if at least it has cpuid */
336 @@ -88,11 +88,11 @@ static struct cpu_dev __cpuinitdata defa
337 .c_init = default_init,
338 .c_vendor = "Unknown",
340 -static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu;
341 +static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
343 static int __init cachesize_setup(char *str)
345 - get_option (&str, &cachesize_override);
346 + get_option(&str, &cachesize_override);
349 __setup("cachesize=", cachesize_setup);
350 @@ -114,12 +114,12 @@ int __cpuinit get_model_name(struct cpui
351 /* Intel chips right-justify this string for some dumb reason;
352 undo that brain damage */
353 p = q = &c->x86_model_id[0];
354 - while ( *p == ' ' )
362 - while ( q <= &c->x86_model_id[48] )
363 + while (q <= &c->x86_model_id[48])
364 *q++ = '\0'; /* Zero-pad the rest */
367 @@ -137,7 +137,7 @@ void __cpuinit display_cacheinfo(struct
368 cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
369 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
370 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
371 - c->x86_cache_size=(ecx>>24)+(edx>>24);
372 + c->x86_cache_size = (ecx>>24)+(edx>>24);
375 if (n < 0x80000006) /* Some chips just has a large L1. */
376 @@ -145,16 +145,16 @@ void __cpuinit display_cacheinfo(struct
378 ecx = cpuid_ecx(0x80000006);
382 /* do processor-specific cache resizing */
383 if (this_cpu->c_size_cache)
384 - l2size = this_cpu->c_size_cache(c,l2size);
385 + l2size = this_cpu->c_size_cache(c, l2size);
387 /* Allow user to override all this if necessary. */
388 if (cachesize_override != -1)
389 l2size = cachesize_override;
393 return; /* Again, no L2 cache is possible */
395 c->x86_cache_size = l2size;
396 @@ -163,16 +163,19 @@ void __cpuinit display_cacheinfo(struct
400 -/* Naming convention should be: <Name> [(<Codename>)] */
401 -/* This table only is used unless init_<vendor>() below doesn't set it; */
402 -/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
404 + * Naming convention should be: <Name> [(<Codename>)]
405 + * This table only is used unless init_<vendor>() below doesn't set it;
406 + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
410 /* Look up CPU names by table lookup. */
411 static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
413 struct cpu_model_info *info;
415 - if ( c->x86_model >= 16 )
416 + if (c->x86_model >= 16)
417 return NULL; /* Range check */
420 @@ -197,9 +200,9 @@ static void __cpuinit get_cpu_vendor(str
422 for (i = 0; i < X86_VENDOR_NUM; i++) {
424 - if (!strcmp(v,cpu_devs[i]->c_ident[0]) ||
425 - (cpu_devs[i]->c_ident[1] &&
426 - !strcmp(v,cpu_devs[i]->c_ident[1]))) {
427 + if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
428 + (cpu_devs[i]->c_ident[1] &&
429 + !strcmp(v, cpu_devs[i]->c_ident[1]))) {
432 this_cpu = cpu_devs[i];
433 @@ -217,7 +220,7 @@ static void __cpuinit get_cpu_vendor(str
437 -static int __init x86_fxsr_setup(char * s)
438 +static int __init x86_fxsr_setup(char *s)
440 setup_clear_cpu_cap(X86_FEATURE_FXSR);
441 setup_clear_cpu_cap(X86_FEATURE_XMM);
442 @@ -226,7 +229,7 @@ static int __init x86_fxsr_setup(char *
443 __setup("nofxsr", x86_fxsr_setup);
446 -static int __init x86_sep_setup(char * s)
447 +static int __init x86_sep_setup(char *s)
449 setup_clear_cpu_cap(X86_FEATURE_SEP);
451 @@ -315,12 +318,15 @@ static void __cpuinit early_get_cap(stru
455 -/* Do minimum CPU detection early.
456 - Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
457 - The others are not touched to avoid unwanted side effects.
459 - WARNING: this function is only called on the BP. Don't add code here
460 - that is supposed to run on all CPUs. */
462 + * Do minimum CPU detection early.
463 + * Fields really needed: vendor, cpuid_level, family, model, mask,
465 + * The others are not touched to avoid unwanted side effects.
467 + * WARNING: this function is only called on the BP. Don't add code here
468 + * that is supposed to run on all CPUs.
470 static void __init early_cpu_detect(void)
472 struct cpuinfo_x86 *c = &boot_cpu_data;
473 @@ -335,19 +341,14 @@ static void __init early_cpu_detect(void
475 get_cpu_vendor(c, 1);
477 - switch (c->x86_vendor) {
478 - case X86_VENDOR_AMD:
481 - case X86_VENDOR_INTEL:
482 - early_init_intel(c);
485 + if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
486 + cpu_devs[c->x86_vendor]->c_early_init)
487 + cpu_devs[c->x86_vendor]->c_early_init(c);
492 -static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
493 +static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
497 @@ -358,13 +359,12 @@ static void __cpuinit generic_identify(s
498 (unsigned int *)&c->x86_vendor_id[0],
499 (unsigned int *)&c->x86_vendor_id[8],
500 (unsigned int *)&c->x86_vendor_id[4]);
503 get_cpu_vendor(c, 0);
504 /* Initialize the standard set of capabilities */
505 /* Note that the vendor-specific code below might override */
507 /* Intel-defined flags: level 0x00000001 */
508 - if ( c->cpuid_level >= 0x00000001 ) {
509 + if (c->cpuid_level >= 0x00000001) {
510 u32 capability, excap;
511 cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
512 c->x86_capability[0] = capability;
513 @@ -376,12 +376,14 @@ static void __cpuinit generic_identify(s
515 c->x86_model += ((tfms >> 16) & 0xF) << 4;
516 c->x86_mask = tfms & 15;
517 + c->initial_apicid = (ebx >> 24) & 0xFF;
519 - c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
520 + c->apicid = phys_pkg_id(c->initial_apicid, 0);
521 + c->phys_proc_id = c->initial_apicid;
523 - c->apicid = (ebx >> 24) & 0xFF;
524 + c->apicid = c->initial_apicid;
526 - if (c->x86_capability[0] & (1<<19))
527 + if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
528 c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
530 /* Have CPUID level 0 only - unheard of */
531 @@ -390,33 +392,30 @@ static void __cpuinit generic_identify(s
533 /* AMD-defined flags: level 0x80000001 */
534 xlvl = cpuid_eax(0x80000000);
535 - if ( (xlvl & 0xffff0000) == 0x80000000 ) {
536 - if ( xlvl >= 0x80000001 ) {
537 + if ((xlvl & 0xffff0000) == 0x80000000) {
538 + if (xlvl >= 0x80000001) {
539 c->x86_capability[1] = cpuid_edx(0x80000001);
540 c->x86_capability[6] = cpuid_ecx(0x80000001);
542 - if ( xlvl >= 0x80000004 )
543 + if (xlvl >= 0x80000004)
544 get_model_name(c); /* Default name */
547 init_scattered_cpuid_features(c);
550 -#ifdef CONFIG_X86_HT
551 - c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
555 static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
557 - if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
558 + if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
559 /* Disable processor serial number */
560 - unsigned long lo,hi;
561 - rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
562 + unsigned long lo, hi;
563 + rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
565 - wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
566 + wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
567 printk(KERN_NOTICE "CPU serial number disabled.\n");
568 - clear_bit(X86_FEATURE_PN, c->x86_capability);
569 + clear_cpu_cap(c, X86_FEATURE_PN);
571 /* Disabling the serial number may affect the cpuid level */
572 c->cpuid_level = cpuid_eax(0);
573 @@ -451,9 +450,11 @@ void __cpuinit identify_cpu(struct cpuin
574 memset(&c->x86_capability, 0, sizeof c->x86_capability);
576 if (!have_cpuid_p()) {
577 - /* First of all, decide if this is a 486 or higher */
578 - /* It's a 486 if we can modify the AC flag */
579 - if ( flag_is_changeable_p(X86_EFLAGS_AC) )
581 + * First of all, decide if this is a 486 or higher
582 + * It's a 486 if we can modify the AC flag
584 + if (flag_is_changeable_p(X86_EFLAGS_AC))
588 @@ -486,10 +487,10 @@ void __cpuinit identify_cpu(struct cpuin
591 /* If the model name is still unset, do table lookup. */
592 - if ( !c->x86_model_id[0] ) {
593 + if (!c->x86_model_id[0]) {
595 p = table_lookup_model(c);
598 strcpy(c->x86_model_id, p);
601 @@ -503,9 +504,9 @@ void __cpuinit identify_cpu(struct cpuin
602 * common between the CPUs. The first time this routine gets
603 * executed, c == &boot_cpu_data.
605 - if ( c != &boot_cpu_data ) {
606 + if (c != &boot_cpu_data) {
607 /* AND the already accumulated flags with these */
608 - for ( i = 0 ; i < NCAPINTS ; i++ )
609 + for (i = 0 ; i < NCAPINTS ; i++)
610 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
613 @@ -549,7 +550,7 @@ void __cpuinit detect_ht(struct cpuinfo_
615 if (smp_num_siblings == 1) {
616 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
617 - } else if (smp_num_siblings > 1 ) {
618 + } else if (smp_num_siblings > 1) {
620 if (smp_num_siblings > NR_CPUS) {
621 printk(KERN_WARNING "CPU: Unsupported number of the "
622 @@ -559,7 +560,7 @@ void __cpuinit detect_ht(struct cpuinfo_
625 index_msb = get_count_order(smp_num_siblings);
626 - c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
627 + c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
629 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
631 @@ -570,7 +571,7 @@ void __cpuinit detect_ht(struct cpuinfo_
633 core_bits = get_count_order(c->x86_max_cores);
635 - c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
636 + c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
637 ((1 << core_bits) - 1);
639 if (c->x86_max_cores > 1)
640 @@ -604,7 +605,7 @@ void __cpuinit print_cpu_info(struct cpu
642 printk("%s", c->x86_model_id);
644 - if (c->x86_mask || c->cpuid_level >= 0)
645 + if (c->x86_mask || c->cpuid_level >= 0)
646 printk(" stepping %02x\n", c->x86_mask);
649 @@ -623,24 +624,17 @@ __setup("clearcpuid=", setup_disablecpui
651 cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
653 -/* This is hacky. :)
654 - * We're emulating future behavior.
655 - * In the future, the cpu-specific init functions will be called implicitly
656 - * via the magic of initcalls.
657 - * They will insert themselves into the cpu_devs structure.
658 - * Then, when cpu_init() is called, we can just iterate over that array.
660 void __init early_cpu_init(void)
666 - centaur_init_cpu();
667 - transmeta_init_cpu();
670 + struct cpu_vendor_dev *cvdev;
672 + for (cvdev = __x86cpuvendor_start ;
673 + cvdev < __x86cpuvendor_end ;
675 + cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
678 + validate_pat_support(&boot_cpu_data);
681 /* Make sure %fs is initialized properly in idle threads */
682 @@ -685,7 +679,7 @@ void __cpuinit cpu_init(void)
683 int cpu = smp_processor_id();
684 struct task_struct *curr = current;
685 #ifndef CONFIG_X86_NO_TSS
686 - struct tss_struct * t = &per_cpu(init_tss, cpu);
687 + struct tss_struct *t = &per_cpu(init_tss, cpu);
689 struct thread_struct *thread = &curr->thread;
691 @@ -738,7 +732,7 @@ void __cpuinit cpu_init(void)
692 mxcsr_feature_mask_init();
695 -#ifdef CONFIG_HOTPLUG_CPU
696 +#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_XEN)
697 void __cpuinit cpu_uninit(void)
699 int cpu = raw_smp_processor_id();
700 --- sle11-2009-05-14.orig/arch/x86/kernel/cpu/mtrr/main-xen.c 2009-03-16 16:33:40.000000000 +0100
701 +++ sle11-2009-05-14/arch/x86/kernel/cpu/mtrr/main-xen.c 2009-03-16 16:38:05.000000000 +0100
702 @@ -35,6 +35,8 @@ struct mtrr_ops *mtrr_if = &generic_mtrr
703 unsigned int num_var_ranges;
704 unsigned int mtrr_usage_table[MAX_VAR_RANGES];
708 static void __init set_num_var_ranges(void)
710 struct xen_platform_op op;
711 @@ -162,8 +164,144 @@ mtrr_del(int reg, unsigned long base, un
712 EXPORT_SYMBOL(mtrr_add);
713 EXPORT_SYMBOL(mtrr_del);
716 + * Returns the effective MTRR type for the region
718 + * - 0xFE - when the range is "not entirely covered" by _any_ var range MTRR
719 + * - 0xFF - when MTRR is not enabled
721 +u8 mtrr_type_lookup(u64 start, u64 end)
724 + u64 start_mfn, end_mfn, base_mfn, top_mfn;
725 + u8 prev_match, curr_match;
726 + struct xen_platform_op op;
728 + if (!is_initial_xendomain())
729 + return MTRR_TYPE_WRBACK;
731 + if (!num_var_ranges)
734 + start_mfn = start >> PAGE_SHIFT;
735 + /* Make end inclusive end, instead of exclusive */
736 + end_mfn = --end >> PAGE_SHIFT;
738 + /* Look in fixed ranges. Just return the type as per start */
739 + if (start_mfn < 0x100) {
741 + op.cmd = XENPF_read_memtype;
742 + op.u.read_memtype.reg = ???;
743 + error = HYPERVISOR_platform_op(&op);
745 + return op.u.read_memtype.type;
747 + return MTRR_TYPE_UNCACHABLE;
751 + * Look in variable ranges
752 + * Look of multiple ranges matching this address and pick type
753 + * as per MTRR precedence
756 + for (i = 0; i < num_var_ranges; ++i) {
757 + op.cmd = XENPF_read_memtype;
758 + op.u.read_memtype.reg = i;
759 + error = HYPERVISOR_platform_op(&op);
761 + if (error || !op.u.read_memtype.nr_mfns)
764 + base_mfn = op.u.read_memtype.mfn;
765 + top_mfn = base_mfn + op.u.read_memtype.nr_mfns - 1;
767 + if (base_mfn > end_mfn || start_mfn > top_mfn) {
771 + if (base_mfn > start_mfn || end_mfn > top_mfn) {
775 + curr_match = op.u.read_memtype.type;
776 + if (prev_match == 0xFF) {
777 + prev_match = curr_match;
781 + if (prev_match == MTRR_TYPE_UNCACHABLE ||
782 + curr_match == MTRR_TYPE_UNCACHABLE) {
783 + return MTRR_TYPE_UNCACHABLE;
786 + if ((prev_match == MTRR_TYPE_WRBACK &&
787 + curr_match == MTRR_TYPE_WRTHROUGH) ||
788 + (prev_match == MTRR_TYPE_WRTHROUGH &&
789 + curr_match == MTRR_TYPE_WRBACK)) {
790 + prev_match = MTRR_TYPE_WRTHROUGH;
791 + curr_match = MTRR_TYPE_WRTHROUGH;
794 + if (prev_match != curr_match) {
795 + return MTRR_TYPE_UNCACHABLE;
800 + if (start >= (1ULL<<32) && (end < tom2))
801 + return MTRR_TYPE_WRBACK;
804 + if (prev_match != 0xFF)
808 + op.cmd = XENPF_read_def_memtype;
809 + error = HYPERVISOR_platform_op(&op);
811 + return op.u.read_def_memtype.type;
813 + return MTRR_TYPE_UNCACHABLE;
817 + * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
818 + * for memory >4GB. Check for that here.
819 + * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
820 + * apply to are wrong, but so far we don't know of any such case in the wild.
822 +#define Tom2Enabled (1U << 21)
823 +#define Tom2ForceMemTypeWB (1U << 22)
825 +int __init amd_special_default_mtrr(void)
829 + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
831 + if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
833 + /* In case some hypervisor doesn't pass SYSCFG through */
834 + if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
837 + * Memory between 4GB and top of mem is forced WB by this magic bit.
838 + * Reserved before K8RevF, but should be zero there.
840 + if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
841 + (Tom2Enabled | Tom2ForceMemTypeWB))
846 void __init mtrr_bp_init(void)
848 + if (amd_special_default_mtrr()) {
850 + rdmsrl(MSR_K8_TOP_MEM2, tom2);
851 + tom2 &= 0xffffff8000000ULL;
855 void mtrr_ap_init(void)
856 --- sle11-2009-05-14.orig/arch/x86/kernel/e820_32-xen.c 2009-03-16 16:33:40.000000000 +0100
857 +++ sle11-2009-05-14/arch/x86/kernel/e820_32-xen.c 2009-03-16 16:38:05.000000000 +0100
858 @@ -469,7 +469,7 @@ int __init sanitize_e820_map(struct e820
859 * thinkpad 560x, for example, does not cooperate with the memory
862 -int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
863 +int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
866 /* Only one memory region (or negative)? Ignore it */
867 @@ -480,33 +480,17 @@ int __init copy_e820_map(struct e820entr
871 - unsigned long long start = biosmap->addr;
872 - unsigned long long size = biosmap->size;
873 - unsigned long long end = start + size;
874 - unsigned long type = biosmap->type;
875 + u64 start = biosmap->addr;
876 + u64 size = biosmap->size;
877 + u64 end = start + size;
878 + u32 type = biosmap->type;
880 /* Overflow in 64 bits? Ignore the memory map. */
886 - * Some BIOSes claim RAM in the 640k - 1M region.
887 - * Not right. Fix it up.
889 - if (type == E820_RAM) {
890 - if (start < 0x100000ULL && end > 0xA0000ULL) {
891 - if (start < 0xA0000ULL)
892 - add_memory_region(start, 0xA0000ULL-start, type);
893 - if (end <= 0x100000ULL)
895 - start = 0x100000ULL;
896 - size = end - start;
900 add_memory_region(start, size, type);
901 - } while (biosmap++,--nr_map);
902 + } while (biosmap++, --nr_map);
905 if (is_initial_xendomain()) {
906 @@ -528,7 +512,7 @@ int __init copy_e820_map(struct e820entr
908 * Find the highest page frame number we have available
910 -void __init find_max_pfn(void)
911 +void __init propagate_e820_map(void)
915 @@ -814,7 +798,7 @@ static int __init parse_memmap(char *arg
916 * size before original memory map is
920 + propagate_e820_map();
921 saved_max_pfn = max_pfn;
924 --- sle11-2009-05-14.orig/arch/x86/kernel/e820_64-xen.c 2009-03-16 16:33:40.000000000 +0100
925 +++ sle11-2009-05-14/arch/x86/kernel/e820_64-xen.c 2009-03-16 16:38:05.000000000 +0100
926 @@ -40,11 +40,11 @@ struct e820map machine_e820;
927 unsigned long end_pfn;
930 - * end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
931 - * The direct mapping extends to end_pfn_map, so that we can directly access
932 + * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
933 + * The direct mapping extends to max_pfn_mapped, so that we can directly access
934 * apertures, ACPI and other tables without having to play with fixmaps.
936 -unsigned long end_pfn_map;
937 +unsigned long max_pfn_mapped;
940 * Last pfn which the user wants to use.
941 @@ -63,8 +63,8 @@ struct early_res {
942 static struct early_res early_res[MAX_EARLY_RES] __initdata = {
944 { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
946 - { SMP_TRAMPOLINE_BASE, SMP_TRAMPOLINE_BASE + 2*PAGE_SIZE, "SMP_TRAMPOLINE" },
947 +#ifdef CONFIG_X86_TRAMPOLINE
948 + { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
952 @@ -89,19 +89,47 @@ void __init reserve_early(unsigned long
953 strncpy(r->name, name, sizeof(r->name) - 1);
956 -void __init early_res_to_bootmem(void)
957 +void __init free_early(unsigned long start, unsigned long end)
959 + struct early_res *r;
962 + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
964 + if (start == r->start && end == r->end)
967 + if (i >= MAX_EARLY_RES || !early_res[i].end)
968 + panic("free_early on not reserved area: %lx-%lx!", start, end);
970 + for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
973 + memmove(&early_res[i], &early_res[i + 1],
974 + (j - 1 - i) * sizeof(struct early_res));
976 + early_res[j - 1].end = 0;
979 +void __init early_res_to_bootmem(unsigned long start, unsigned long end)
982 + unsigned long final_start, final_end;
983 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
984 struct early_res *r = &early_res[i];
985 - printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i,
986 - r->start, r->end - 1, r->name);
987 - reserve_bootmem_generic(r->start, r->end - r->start);
988 + final_start = max(start, r->start);
989 + final_end = min(end, r->end);
990 + if (final_start >= final_end)
992 + printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i,
993 + final_start, final_end - 1, r->name);
994 + reserve_bootmem_generic(final_start, final_end - final_start);
998 /* Check for already reserved areas */
999 -static inline int bad_addr(unsigned long *addrp, unsigned long size)
1000 +static inline int __init
1001 +bad_addr(unsigned long *addrp, unsigned long size, unsigned long align)
1004 unsigned long addr = *addrp, last;
1005 @@ -111,7 +139,7 @@ again:
1006 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
1007 struct early_res *r = &early_res[i];
1008 if (last >= r->start && addr < r->end) {
1009 - *addrp = addr = r->end;
1010 + *addrp = addr = round_up(r->end, align);
1014 @@ -119,6 +147,40 @@ again:
1018 +/* Check for already reserved areas */
1019 +static inline int __init
1020 +bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
1023 + unsigned long addr = *addrp, last;
1024 + unsigned long size = *sizep;
1027 + last = addr + size;
1028 + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
1029 + struct early_res *r = &early_res[i];
1030 + if (last > r->start && addr < r->start) {
1031 + size = r->start - addr;
1035 + if (last > r->end && addr < r->end) {
1036 + addr = round_up(r->end, align);
1037 + size = last - addr;
1041 + if (last <= r->end && addr >= r->start) {
1053 * This function checks if any part of the range <start,end> is mapped
1055 @@ -194,26 +256,27 @@ int __init e820_all_mapped(unsigned long
1056 * Find a free area with specified alignment in a specific range.
1058 unsigned long __init find_e820_area(unsigned long start, unsigned long end,
1059 - unsigned size, unsigned long align)
1060 + unsigned long size, unsigned long align)
1063 - unsigned long mask = ~(align - 1);
1065 for (i = 0; i < e820.nr_map; i++) {
1066 struct e820entry *ei = &e820.map[i];
1067 - unsigned long addr = ei->addr, last;
1068 + unsigned long addr, last;
1069 + unsigned long ei_last;
1071 if (ei->type != E820_RAM)
1073 + addr = round_up(ei->addr, align);
1074 + ei_last = ei->addr + ei->size;
1077 - if (addr > ei->addr + ei->size)
1078 + addr = round_up(start, align);
1079 + if (addr >= ei_last)
1081 - while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
1082 + while (bad_addr(&addr, size, align) && addr+size <= ei_last)
1084 - addr = (addr + align - 1) & mask;
1086 - if (last > ei->addr + ei->size)
1087 + if (last > ei_last)
1091 @@ -223,6 +286,40 @@ unsigned long __init find_e820_area(unsi
1095 + * Find next free range after *start
1097 +unsigned long __init find_e820_area_size(unsigned long start,
1098 + unsigned long *sizep,
1099 + unsigned long align)
1103 + for (i = 0; i < e820.nr_map; i++) {
1104 + struct e820entry *ei = &e820.map[i];
1105 + unsigned long addr, last;
1106 + unsigned long ei_last;
1108 + if (ei->type != E820_RAM)
1110 + addr = round_up(ei->addr, align);
1111 + ei_last = ei->addr + ei->size;
1113 + addr = round_up(start, align);
1114 + if (addr >= ei_last)
1116 + *sizep = ei_last - addr;
1117 + while (bad_addr_size(&addr, sizep, align) &&
1118 + addr + *sizep <= ei_last)
1120 + last = addr + *sizep;
1121 + if (last > ei_last)
1129 * Find the highest page frame number we have available
1131 unsigned long __init e820_end_of_ram(void)
1132 @@ -231,31 +328,29 @@ unsigned long __init e820_end_of_ram(voi
1134 end_pfn = find_max_pfn_with_active_regions();
1136 - if (end_pfn > end_pfn_map)
1137 - end_pfn_map = end_pfn;
1138 - if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
1139 - end_pfn_map = MAXMEM>>PAGE_SHIFT;
1140 + if (end_pfn > max_pfn_mapped)
1141 + max_pfn_mapped = end_pfn;
1142 + if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT)
1143 + max_pfn_mapped = MAXMEM>>PAGE_SHIFT;
1144 if (end_pfn > end_user_pfn)
1145 end_pfn = end_user_pfn;
1146 - if (end_pfn > end_pfn_map)
1147 - end_pfn = end_pfn_map;
1148 + if (end_pfn > max_pfn_mapped)
1149 + end_pfn = max_pfn_mapped;
1151 - printk(KERN_INFO "end_pfn_map = %lu\n", end_pfn_map);
1152 + printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped);
1157 * Mark e820 reserved areas as busy for the resource manager.
1159 -void __init e820_reserve_resources(struct e820entry *e820, int nr_map,
1160 - struct resource *code_resource,
1161 - struct resource *data_resource,
1162 - struct resource *bss_resource)
1163 +void __init e820_reserve_resources(struct e820entry *e820, int nr_map)
1166 + struct resource *res;
1168 + res = alloc_bootmem_low(sizeof(struct resource) * nr_map);
1169 for (i = 0; i < nr_map; i++) {
1170 - struct resource *res;
1171 - res = alloc_bootmem_low(sizeof(struct resource));
1172 switch (e820[i].type) {
1173 case E820_RAM: res->name = "System RAM"; break;
1174 case E820_ACPI: res->name = "ACPI Tables"; break;
1175 @@ -265,26 +360,8 @@ void __init e820_reserve_resources(struc
1176 res->start = e820[i].addr;
1177 res->end = res->start + e820[i].size - 1;
1178 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1179 - request_resource(&iomem_resource, res);
1180 - if (e820[i].type == E820_RAM) {
1182 - * We don't know which RAM region contains kernel data,
1183 - * so we try it repeatedly and let the resource manager
1187 - request_resource(res, code_resource);
1188 - request_resource(res, data_resource);
1189 - request_resource(res, bss_resource);
1191 -#ifdef CONFIG_KEXEC
1192 - if (crashk_res.start != crashk_res.end)
1193 - request_resource(res, &crashk_res);
1195 - xen_machine_kexec_register_resources(res);
1199 + insert_resource(&iomem_resource, res);
1204 @@ -338,9 +415,9 @@ static int __init e820_find_active_regio
1205 if (*ei_startpfn >= *ei_endpfn)
1208 - /* Check if end_pfn_map should be updated */
1209 - if (ei->type != E820_RAM && *ei_endpfn > end_pfn_map)
1210 - end_pfn_map = *ei_endpfn;
1211 + /* Check if max_pfn_mapped should be updated */
1212 + if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped)
1213 + max_pfn_mapped = *ei_endpfn;
1215 /* Skip if map is outside the node */
1216 if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
1217 @@ -667,10 +744,10 @@ static int __init copy_e820_map(struct e
1221 - unsigned long start = biosmap->addr;
1222 - unsigned long size = biosmap->size;
1223 - unsigned long end = start + size;
1224 - unsigned long type = biosmap->type;
1225 + u64 start = biosmap->addr;
1226 + u64 size = biosmap->size;
1227 + u64 end = start + size;
1228 + u32 type = biosmap->type;
1230 /* Overflow in 64 bits? Ignore the memory map. */
1232 @@ -801,7 +878,7 @@ static int __init parse_memmap_opt(char
1233 saved_max_pfn = e820_end_of_ram();
1234 remove_all_active_ranges();
1237 + max_pfn_mapped = 0;
1241 --- sle11-2009-05-14.orig/arch/x86/kernel/early_printk-xen.c 2009-03-16 16:33:40.000000000 +0100
1242 +++ sle11-2009-05-14/arch/x86/kernel/early_printk-xen.c 2009-03-16 16:38:05.000000000 +0100
1246 static int max_ypos = 25, max_xpos = 80;
1247 -static int current_ypos = 25, current_xpos = 0;
1248 +static int current_ypos = 25, current_xpos;
1250 static void early_vga_write(struct console *con, const char *str, unsigned n)
1252 @@ -108,12 +108,12 @@ static __init void early_serial_init(cha
1256 - if (!strncmp(s,"0x",2)) {
1257 + if (!strncmp(s, "0x", 2)) {
1258 early_serial_base = simple_strtoul(s, &e, 16);
1260 static int bases[] = { 0x3f8, 0x2f8 };
1262 - if (!strncmp(s,"ttyS",4))
1263 + if (!strncmp(s, "ttyS", 4))
1265 port = simple_strtoul(s, &e, 10);
1266 if (port > 1 || s == e)
1267 @@ -223,7 +223,7 @@ static struct console simnow_console = {
1269 /* Direct interface for emergencies */
1270 static struct console *early_console = &early_vga_console;
1271 -static int early_console_initialized = 0;
1272 +static int early_console_initialized;
1274 void early_printk(const char *fmt, ...)
1276 @@ -231,9 +231,9 @@ void early_printk(const char *fmt, ...)
1281 - n = vscnprintf(buf,512,fmt,ap);
1282 - early_console->write(early_console,buf,n);
1283 + va_start(ap, fmt);
1284 + n = vscnprintf(buf, 512, fmt, ap);
1285 + early_console->write(early_console, buf, n);
1289 @@ -259,16 +259,16 @@ static int __init setup_early_printk(cha
1290 early_console = &early_serial_console;
1291 } else if (!strncmp(buf, "vga", 3)) {
1293 - && boot_params.screen_info.orig_video_isVGA == 1) {
1294 + && boot_params.screen_info.orig_video_isVGA == 1) {
1295 max_xpos = boot_params.screen_info.orig_video_cols;
1296 max_ypos = boot_params.screen_info.orig_video_lines;
1297 current_ypos = boot_params.screen_info.orig_y;
1299 early_console = &early_vga_console;
1300 - } else if (!strncmp(buf, "simnow", 6)) {
1301 - simnow_init(buf + 6);
1302 - early_console = &simnow_console;
1304 + } else if (!strncmp(buf, "simnow", 6)) {
1305 + simnow_init(buf + 6);
1306 + early_console = &simnow_console;
1309 } else if (!strncmp(buf, "xen", 3)) {
1310 early_console = &xenboot_console;
1311 --- sle11-2009-05-14.orig/arch/x86/kernel/entry_32-xen.S 2009-05-14 11:18:32.000000000 +0200
1312 +++ sle11-2009-05-14/arch/x86/kernel/entry_32-xen.S 2009-03-16 16:38:05.000000000 +0100
1315 - * linux/arch/i386/entry.S
1317 * Copyright (C) 1991, 1992 Linus Torvalds
1320 #include <asm/desc.h>
1321 #include <asm/percpu.h>
1322 #include <asm/dwarf2.h>
1323 +#include <asm/processor-flags.h>
1324 #include "irq_vectors.h"
1325 #include <xen/interface/xen.h>
1329 #define nr_syscalls ((syscall_table_size)/4)
1331 -CF_MASK = 0x00000001
1332 -TF_MASK = 0x00000100
1333 -IF_MASK = 0x00000200
1334 -DF_MASK = 0x00000400
1335 -NT_MASK = 0x00004000
1336 -VM_MASK = 0x00020000
1337 /* Pseudo-eflags. */
1338 NMI_MASK = 0x80000000
1340 @@ -87,7 +81,7 @@ NMI_MASK = 0x80000000
1342 .macro TRACE_IRQS_IRET
1343 #ifdef CONFIG_TRACE_IRQFLAGS
1344 - testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off?
1345 + testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off?
1349 @@ -249,7 +243,7 @@ ret_from_intr:
1351 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
1352 movb PT_CS(%esp), %al
1353 - andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
1354 + andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
1355 cmpl $USER_RPL, %eax
1356 jb resume_kernel # not returning to v8086 or userspace
1358 @@ -258,6 +252,7 @@ ENTRY(resume_userspace)
1359 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
1360 # setting need_resched or sigpending
1361 # between sampling and the iret
1363 movl TI_flags(%ebp), %ecx
1364 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
1365 # int/exception return?
1366 @@ -274,7 +269,7 @@ need_resched:
1367 movl TI_flags(%ebp), %ecx # need_resched set ?
1368 testb $_TIF_NEED_RESCHED, %cl
1370 - testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off (exception path) ?
1371 + testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
1373 call preempt_schedule_irq
1375 @@ -299,10 +294,10 @@ ENTRY(ia32_sysenter_target)
1376 movl SYSENTER_stack_sp0(%esp),%esp
1379 - * No need to follow this irqs on/off section: the syscall
1380 - * disabled irqs and here we enable it straight after entry:
1381 + * Interrupts are disabled here, but we can't trace it until
1382 + * enough kernel state to call TRACE_IRQS_OFF can be called - but
1383 + * we immediately enable interrupts at that point anyway.
1385 - ENABLE_INTERRUPTS(CLBR_NONE)
1387 CFI_ADJUST_CFA_OFFSET 4
1388 /*CFI_REL_OFFSET ss, 0*/
1389 @@ -310,6 +305,7 @@ sysenter_past_esp:
1390 CFI_ADJUST_CFA_OFFSET 4
1391 CFI_REL_OFFSET esp, 0
1393 + orl $X86_EFLAGS_IF, (%esp)
1394 CFI_ADJUST_CFA_OFFSET 4
1396 CFI_ADJUST_CFA_OFFSET 4
1397 @@ -323,6 +319,11 @@ sysenter_past_esp:
1398 CFI_ADJUST_CFA_OFFSET 4
1399 CFI_REL_OFFSET eip, 0
1402 + CFI_ADJUST_CFA_OFFSET 4
1404 + ENABLE_INTERRUPTS(CLBR_NONE)
1407 * Load the potential sixth argument from user stack.
1408 * Careful about security.
1409 @@ -330,14 +331,12 @@ sysenter_past_esp:
1410 cmpl $__PAGE_OFFSET-3,%ebp
1413 + movl %ebp,PT_EBP(%esp)
1414 .section __ex_table,"a"
1416 .long 1b,syscall_fault
1420 - CFI_ADJUST_CFA_OFFSET 4
1422 GET_THREAD_INFO(%ebp)
1424 jnz syscall_trace_entry
1425 @@ -414,7 +413,7 @@ syscall_exit:
1426 # setting need_resched or sigpending
1427 # between sampling and the iret
1429 - testl $TF_MASK,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit
1430 + testl $X86_EFLAGS_TF,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit
1432 orl $_TIF_SINGLESTEP,TI_flags(%ebp)
1434 @@ -430,7 +429,7 @@ restore_all:
1435 # See comments in process.c:copy_thread() for details.
1436 movb PT_OLDSS(%esp), %ah
1437 movb PT_CS(%esp), %al
1438 - andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
1439 + andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
1440 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
1442 je ldt_ss # returning to user-space with LDT SS
1443 @@ -438,7 +437,7 @@ restore_nocheck:
1446 movl PT_EFLAGS(%esp), %eax
1447 - testl $(VM_MASK|NMI_MASK), %eax
1448 + testl $(X86_EFLAGS_VM|NMI_MASK), %eax
1451 shr $9, %eax # EAX[0] == IRET_EFLAGS.IF
1452 @@ -456,7 +455,7 @@ restore_nocheck_notrace:
1455 .section .fixup,"ax"
1458 pushl $0 # no error code
1459 pushl $do_iret_error
1461 @@ -560,7 +559,7 @@ work_resched:
1462 work_notifysig: # deal with pending signals and
1463 # notify-resume requests
1465 - testl $VM_MASK, PT_EFLAGS(%esp)
1466 + testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
1468 jne work_notifysig_v86 # returning to kernel-space or
1470 @@ -617,9 +616,6 @@ END(syscall_exit_work)
1472 RING0_INT_FRAME # can't unwind into user space anyway
1474 - pushl %eax # save orig_eax
1475 - CFI_ADJUST_CFA_OFFSET 4
1477 GET_THREAD_INFO(%ebp)
1478 movl $-EFAULT,PT_EAX(%esp)
1479 jmp resume_userspace
1480 --- sle11-2009-05-14.orig/arch/x86/kernel/entry_64-xen.S 2009-03-16 16:33:40.000000000 +0100
1481 +++ sle11-2009-05-14/arch/x86/kernel/entry_64-xen.S 2009-03-16 16:38:05.000000000 +0100
1482 @@ -338,19 +338,17 @@ badsys:
1483 /* Do syscall tracing */
1486 - movq $-ENOSYS,RAX(%rsp)
1487 + movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
1488 FIXUP_TOP_OF_STACK %rdi
1490 call syscall_trace_enter
1491 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
1493 cmpq $__NR_syscall_max,%rax
1494 - movq $-ENOSYS,%rcx
1497 + ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
1498 movq %r10,%rcx /* fixup for C */
1499 call *sys_call_table(,%rax,8)
1500 -1: movq %rax,RAX-ARGOFFSET(%rsp)
1501 + movq %rax,RAX-ARGOFFSET(%rsp)
1502 /* Use IRET because user could have changed frame */
1505 --- sle11-2009-05-14.orig/arch/x86/kernel/genapic_64-xen.c 2009-03-16 16:33:40.000000000 +0100
1506 +++ sle11-2009-05-14/arch/x86/kernel/genapic_64-xen.c 2009-03-16 16:38:05.000000000 +0100
1508 #include <linux/kernel.h>
1509 #include <linux/ctype.h>
1510 #include <linux/init.h>
1511 +#include <linux/hardirq.h>
1513 #include <asm/smp.h>
1514 #include <asm/ipi.h>
1516 #include <acpi/acpi_bus.h>
1519 -/* which logical CPU number maps to which CPU (physical APIC ID) */
1521 -u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata
1522 - = { [0 ... NR_CPUS-1] = BAD_APICID };
1523 -void *x86_cpu_to_apicid_early_ptr;
1525 -DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
1526 -EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
1527 +DEFINE_PER_CPU(int, x2apic_extra_bits);
1530 struct genapic __read_mostly *genapic = &apic_flat;
1532 +static enum uv_system_type uv_system_type;
1534 extern struct genapic apic_xen;
1535 struct genapic __read_mostly *genapic = &apic_xen;
1536 @@ -47,6 +43,9 @@ struct genapic __read_mostly *genapic =
1537 void __init setup_apic_routing(void)
1540 + if (uv_system_type == UV_NON_UNIQUE_APIC)
1541 + genapic = &apic_x2apic_uv_x;
1545 * Quirk: some x86_64 machines can only use physical APIC mode
1546 @@ -59,7 +58,7 @@ void __init setup_apic_routing(void)
1550 - if (cpus_weight(cpu_possible_map) <= 8)
1551 + if (num_possible_cpus() <= 8)
1552 genapic = &apic_flat;
1554 genapic = &apic_physflat;
1555 @@ -85,3 +84,41 @@ void send_IPI_self(int vector)
1556 xen_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
1560 +int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
1563 + if (!strcmp(oem_id, "SGI")) {
1564 + if (!strcmp(oem_table_id, "UVL"))
1565 + uv_system_type = UV_LEGACY_APIC;
1566 + else if (!strcmp(oem_table_id, "UVX"))
1567 + uv_system_type = UV_X2APIC;
1568 + else if (!strcmp(oem_table_id, "UVH"))
1569 + uv_system_type = UV_NON_UNIQUE_APIC;
1576 +unsigned int read_apic_id(void)
1580 + WARN_ON(preemptible() && num_online_cpus() > 1);
1581 + id = apic_read(APIC_ID);
1582 + if (uv_system_type >= UV_X2APIC)
1583 + id |= __get_cpu_var(x2apic_extra_bits);
1587 +enum uv_system_type get_uv_system_type(void)
1589 + return uv_system_type;
1592 +int is_uv_system(void)
1594 + return uv_system_type != UV_NONE;
1597 --- sle11-2009-05-14.orig/arch/x86/kernel/genapic_xen_64.c 2008-12-15 11:27:22.000000000 +0100
1598 +++ sle11-2009-05-14/arch/x86/kernel/genapic_xen_64.c 2009-03-16 16:38:05.000000000 +0100
1599 @@ -72,9 +72,7 @@ static cpumask_t xen_target_cpus(void)
1601 static cpumask_t xen_vector_allocation_domain(int cpu)
1603 - cpumask_t domain = CPU_MASK_NONE;
1604 - cpu_set(cpu, domain);
1606 + return cpumask_of_cpu(cpu);
1610 --- sle11-2009-05-14.orig/arch/x86/kernel/head64-xen.c 2009-03-16 16:33:40.000000000 +0100
1611 +++ sle11-2009-05-14/arch/x86/kernel/head64-xen.c 2009-03-16 16:38:05.000000000 +0100
1613 #include <linux/string.h>
1614 #include <linux/percpu.h>
1615 #include <linux/start_kernel.h>
1616 +#include <linux/io.h>
1617 #include <linux/module.h>
1619 #include <asm/processor.h>
1621 #include <asm/sections.h>
1622 #include <asm/kdebug.h>
1623 #include <asm/e820.h>
1624 +#include <asm/bios_ebda.h>
1626 unsigned long start_pfn;
1628 @@ -75,34 +77,75 @@ EXPORT_SYMBOL(machine_to_phys_mapping);
1629 unsigned int machine_to_phys_order;
1630 EXPORT_SYMBOL(machine_to_phys_order);
1632 -#define EBDA_ADDR_POINTER 0x40E
1633 +#define BIOS_LOWMEM_KILOBYTES 0x413
1635 -static __init void reserve_ebda(void)
1637 + * The BIOS places the EBDA/XBDA at the top of conventional
1638 + * memory, and usually decreases the reported amount of
1639 + * conventional memory (int 0x12) too. This also contains a
1640 + * workaround for Dell systems that neglect to reserve EBDA.
1641 + * The same workaround also avoids a problem with the AMD768MPX
1642 + * chipset: reserve a page before VGA to prevent PCI prefetch
1643 + * into it (errata #56). Usually the page is reserved anyways,
1644 + * unless you have no PS/2 mouse plugged in.
1646 +static void __init reserve_ebda_region(void)
1649 - unsigned ebda_addr, ebda_size;
1650 + unsigned int lowmem, ebda_addr;
1653 - * there is a real-mode segmented pointer pointing to the
1654 - * 4K EBDA area at 0x40E
1656 - ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
1660 + /* To determine the position of the EBDA and the */
1661 + /* end of conventional memory, we need to look at */
1662 + /* the BIOS data area. In a paravirtual environment */
1663 + /* that area is absent. We'll just have to assume */
1664 + /* that the paravirt case can handle memory setup */
1665 + /* correctly, without our help. */
1666 + if (paravirt_enabled())
1669 - ebda_size = *(unsigned short *)__va(ebda_addr);
1670 + /* end of low (conventional) memory */
1671 + lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
1674 + /* start of EBDA area */
1675 + ebda_addr = get_bios_ebda();
1677 + /* Fixup: bios puts an EBDA in the top 64K segment */
1678 + /* of conventional memory, but does not adjust lowmem. */
1679 + if ((lowmem - ebda_addr) <= 0x10000)
1680 + lowmem = ebda_addr;
1682 + /* Fixup: bios does not report an EBDA at all. */
1683 + /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
1684 + if ((ebda_addr == 0) && (lowmem >= 0x9f000))
1687 + /* Paranoia: should never happen, but... */
1688 + if ((lowmem == 0) || (lowmem >= 0x100000))
1691 - /* Round EBDA up to pages */
1692 - if (ebda_size == 0)
1695 - ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
1696 - if (ebda_size > 64*1024)
1697 - ebda_size = 64*1024;
1698 + /* reserve all memory between lowmem and the 1MB mark */
1699 + reserve_early(lowmem, 0x100000, "BIOS reserved");
1703 - reserve_early(ebda_addr, ebda_addr + ebda_size, "EBDA");
1704 +static void __init reserve_setup_data(void)
1707 + struct setup_data *data;
1708 + unsigned long pa_data;
1711 + if (boot_params.hdr.version < 0x0209)
1713 + pa_data = boot_params.hdr.setup_data;
1715 + data = early_ioremap(pa_data, sizeof(*data));
1716 + sprintf(buf, "setup data %x", data->type);
1717 + reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
1718 + pa_data = data->next;
1719 + early_iounmap(data, sizeof(*data));
1724 @@ -112,6 +155,19 @@ void __init x86_64_start_kernel(char * r
1725 unsigned long machine_to_phys_nr_ents;
1729 + * Build-time sanity checks on the kernel image and module
1730 + * area mappings. (these are purely build-time and produce no code)
1732 + BUILD_BUG_ON(MODULES_VADDR < KERNEL_IMAGE_START);
1733 + BUILD_BUG_ON(MODULES_VADDR-KERNEL_IMAGE_START < KERNEL_IMAGE_SIZE);
1734 + BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE);
1735 + BUILD_BUG_ON((KERNEL_IMAGE_START & ~PMD_MASK) != 0);
1736 + BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
1737 + BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
1738 + BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
1739 + (__START_KERNEL & PGDIR_MASK)));
1741 xen_setup_features();
1743 xen_start_info = (struct start_info *)real_mode_data;
1744 @@ -140,7 +196,7 @@ void __init x86_64_start_kernel(char * r
1745 /* Cleanup the over mapped high alias */
1748 - for (i = 0; i < IDT_ENTRIES; i++) {
1749 + for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
1750 #ifdef CONFIG_EARLY_PRINTK
1751 set_intr_gate(i, &early_idt_handlers[i]);
1753 @@ -163,7 +219,8 @@ void __init x86_64_start_kernel(char * r
1754 reserve_early(round_up(__pa_symbol(&_end), PAGE_SIZE),
1755 start_pfn << PAGE_SHIFT, "Xen provided");
1758 + reserve_ebda_region();
1759 + reserve_setup_data();
1762 * At this point everything still needed from the boot loader
1763 --- sle11-2009-05-14.orig/arch/x86/kernel/head_32-xen.S 2009-03-16 16:33:40.000000000 +0100
1764 +++ sle11-2009-05-14/arch/x86/kernel/head_32-xen.S 2009-03-16 16:38:05.000000000 +0100
1765 @@ -69,7 +69,7 @@ ENTRY(startup_32)
1766 cld # gcc2 wants the direction flag cleared at all times
1768 pushl $0 # fake return address for unwinder
1770 + jmp i386_start_kernel
1772 #define HYPERCALL_PAGE_OFFSET 0x1000
1773 .org HYPERCALL_PAGE_OFFSET
1774 --- sle11-2009-05-14.orig/arch/x86/kernel/init_task-xen.c 2009-03-16 16:33:40.000000000 +0100
1775 +++ sle11-2009-05-14/arch/x86/kernel/init_task-xen.c 2009-03-16 16:38:05.000000000 +0100
1777 #include <asm/desc.h>
1779 static struct fs_struct init_fs = INIT_FS;
1780 -static struct files_struct init_files = INIT_FILES;
1781 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
1782 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
1783 #ifdef CONFIG_X86_XEN
1784 --- sle11-2009-05-14.orig/arch/x86/kernel/io_apic_32-xen.c 2009-03-16 16:33:40.000000000 +0100
1785 +++ sle11-2009-05-14/arch/x86/kernel/io_apic_32-xen.c 2009-03-16 16:38:05.000000000 +0100
1786 @@ -88,6 +88,16 @@ int sis_apic_bug = -1;
1788 int nr_ioapic_registers[MAX_IO_APICS];
1790 +/* I/O APIC entries */
1791 +struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
1794 +/* MP IRQ source entries */
1795 +struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
1797 +/* # of MP IRQ source entries */
1798 +int mp_irq_entries;
1800 static int disable_timer_pin_1 __initdata;
1803 @@ -863,10 +873,7 @@ static int __init find_isa_irq_pin(int i
1804 for (i = 0; i < mp_irq_entries; i++) {
1805 int lbus = mp_irqs[i].mpc_srcbus;
1807 - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
1808 - mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
1809 - mp_bus_id_to_type[lbus] == MP_BUS_MCA
1811 + if (test_bit(lbus, mp_bus_not_pci) &&
1812 (mp_irqs[i].mpc_irqtype == type) &&
1813 (mp_irqs[i].mpc_srcbusirq == irq))
1815 @@ -882,10 +889,7 @@ static int __init find_isa_irq_apic(int
1816 for (i = 0; i < mp_irq_entries; i++) {
1817 int lbus = mp_irqs[i].mpc_srcbus;
1819 - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
1820 - mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
1821 - mp_bus_id_to_type[lbus] == MP_BUS_MCA
1823 + if (test_bit(lbus, mp_bus_not_pci) &&
1824 (mp_irqs[i].mpc_irqtype == type) &&
1825 (mp_irqs[i].mpc_srcbusirq == irq))
1827 @@ -926,7 +930,7 @@ int IO_APIC_get_PCI_irq_vector(int bus,
1828 mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
1831 - if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
1832 + if (!test_bit(lbus, mp_bus_not_pci) &&
1833 !mp_irqs[i].mpc_irqtype &&
1835 (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
1836 @@ -977,6 +981,7 @@ void __init setup_ioapic_dest(void)
1837 #endif /* !CONFIG_XEN */
1840 +#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
1842 * EISA Edge/Level control register, ELCR
1844 @@ -990,6 +995,13 @@ static int EISA_ELCR(unsigned int irq)
1845 "Broken MPtable reports ISA irq %d\n", irq);
1850 +/* ISA interrupts are always polarity zero edge triggered,
1851 + * when listed as conforming in the MP table. */
1853 +#define default_ISA_trigger(idx) (0)
1854 +#define default_ISA_polarity(idx) (0)
1856 /* EISA interrupts are always polarity zero and can be edge or level
1857 * trigger depending on the ELCR value. If an interrupt is listed as
1858 @@ -997,13 +1009,7 @@ static int EISA_ELCR(unsigned int irq)
1859 * be read in from the ELCR */
1861 #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
1862 -#define default_EISA_polarity(idx) (0)
1864 -/* ISA interrupts are always polarity zero edge triggered,
1865 - * when listed as conforming in the MP table. */
1867 -#define default_ISA_trigger(idx) (0)
1868 -#define default_ISA_polarity(idx) (0)
1869 +#define default_EISA_polarity(idx) default_ISA_polarity(idx)
1871 /* PCI interrupts are always polarity one level triggered,
1872 * when listed as conforming in the MP table. */
1873 @@ -1015,7 +1021,7 @@ static int EISA_ELCR(unsigned int irq)
1874 * when listed as conforming in the MP table. */
1876 #define default_MCA_trigger(idx) (1)
1877 -#define default_MCA_polarity(idx) (0)
1878 +#define default_MCA_polarity(idx) default_ISA_polarity(idx)
1880 static int MPBIOS_polarity(int idx)
1882 @@ -1029,35 +1035,9 @@ static int MPBIOS_polarity(int idx)
1884 case 0: /* conforms, ie. bus-type dependent polarity */
1886 - switch (mp_bus_id_to_type[bus])
1888 - case MP_BUS_ISA: /* ISA pin */
1890 - polarity = default_ISA_polarity(idx);
1893 - case MP_BUS_EISA: /* EISA pin */
1895 - polarity = default_EISA_polarity(idx);
1898 - case MP_BUS_PCI: /* PCI pin */
1900 - polarity = default_PCI_polarity(idx);
1903 - case MP_BUS_MCA: /* MCA pin */
1905 - polarity = default_MCA_polarity(idx);
1910 - printk(KERN_WARNING "broken BIOS!!\n");
1915 + polarity = test_bit(bus, mp_bus_not_pci)?
1916 + default_ISA_polarity(idx):
1917 + default_PCI_polarity(idx);
1920 case 1: /* high active */
1921 @@ -1098,11 +1078,15 @@ static int MPBIOS_trigger(int idx)
1923 case 0: /* conforms, ie. bus-type dependent */
1925 + trigger = test_bit(bus, mp_bus_not_pci)?
1926 + default_ISA_trigger(idx):
1927 + default_PCI_trigger(idx);
1928 +#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
1929 switch (mp_bus_id_to_type[bus])
1931 case MP_BUS_ISA: /* ISA pin */
1933 - trigger = default_ISA_trigger(idx);
1934 + /* set before the switch */
1937 case MP_BUS_EISA: /* EISA pin */
1938 @@ -1112,7 +1096,7 @@ static int MPBIOS_trigger(int idx)
1940 case MP_BUS_PCI: /* PCI pin */
1942 - trigger = default_PCI_trigger(idx);
1943 + /* set before the switch */
1946 case MP_BUS_MCA: /* MCA pin */
1947 @@ -1127,6 +1111,7 @@ static int MPBIOS_trigger(int idx)
1955 @@ -1176,39 +1161,22 @@ static int pin_2_irq(int idx, int apic,
1956 if (mp_irqs[idx].mpc_dstirq != pin)
1957 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
1959 - switch (mp_bus_id_to_type[bus])
1961 - case MP_BUS_ISA: /* ISA pin */
1965 - irq = mp_irqs[idx].mpc_srcbusirq;
1968 - case MP_BUS_PCI: /* PCI pin */
1971 - * PCI IRQs are mapped in order
1975 - irq += nr_ioapic_registers[i++];
1979 - * For MPS mode, so far only needed by ES7000 platform
1981 - if (ioapic_renumber_irq)
1982 - irq = ioapic_renumber_irq(apic, irq);
1983 + if (test_bit(bus, mp_bus_not_pci))
1984 + irq = mp_irqs[idx].mpc_srcbusirq;
1987 + * PCI IRQs are mapped in order
1991 + irq += nr_ioapic_registers[i++];
1998 - printk(KERN_ERR "unknown bus type %d.\n",bus);
2003 + * For MPS mode, so far only needed by ES7000 platform
2005 + if (ioapic_renumber_irq)
2006 + irq = ioapic_renumber_irq(apic, irq);
2010 @@ -1314,7 +1282,6 @@ static void __init setup_IO_APIC_irqs(vo
2012 struct IO_APIC_route_entry entry;
2013 int apic, pin, idx, irq, first_notcon = 1, vector;
2014 - unsigned long flags;
2016 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
2018 @@ -1380,9 +1347,7 @@ static void __init setup_IO_APIC_irqs(vo
2019 if (!apic && (irq < 16))
2020 disable_8259A_irq(irq);
2022 - spin_lock_irqsave(&ioapic_lock, flags);
2023 - __ioapic_write_entry(apic, pin, entry);
2024 - spin_unlock_irqrestore(&ioapic_lock, flags);
2025 + ioapic_write_entry(apic, pin, entry);
2029 @@ -1577,8 +1542,8 @@ void /*__init*/ print_local_APIC(void *
2031 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
2032 smp_processor_id(), hard_smp_processor_id());
2033 - v = apic_read(APIC_ID);
2034 - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v));
2035 + printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
2036 + GET_APIC_ID(read_apic_id()));
2037 v = apic_read(APIC_LVR);
2038 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
2039 ver = GET_APIC_VERSION(v);
2040 @@ -1791,7 +1756,7 @@ void disable_IO_APIC(void)
2041 entry.delivery_mode = dest_ExtINT; /* ExtInt */
2043 entry.dest.physical.physical_dest =
2044 - GET_APIC_ID(apic_read(APIC_ID));
2045 + GET_APIC_ID(read_apic_id());
2048 * Add it to the IO-APIC irq-routing table:
2049 @@ -2090,8 +2055,7 @@ static inline void init_IO_APIC_traps(vo
2050 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2052 for (irq = 0; irq < NR_IRQS ; irq++) {
2054 - if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
2055 + if (IO_APIC_IRQ(irq) && !irq_vector[irq]) {
2057 * Hmm.. We don't have an entry for this,
2058 * so default to an old-fashioned 8259
2059 @@ -2166,7 +2130,7 @@ static void __init setup_nmi(void)
2060 * cycles as some i82489DX-based boards have glue logic that keeps the
2061 * 8259A interrupt line asserted until INTA. --macro
2063 -static inline void unlock_ExtINT_logic(void)
2064 +static inline void __init unlock_ExtINT_logic(void)
2067 struct IO_APIC_route_entry entry0, entry1;
2068 @@ -2218,8 +2182,6 @@ static inline void unlock_ExtINT_logic(v
2069 ioapic_write_entry(apic, pin, entry0);
2072 -int timer_uses_ioapic_pin_0;
2075 * This code may look a bit paranoid, but it's supposed to cooperate with
2076 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
2077 @@ -2259,9 +2221,6 @@ static inline void __init check_timer(vo
2078 pin2 = ioapic_i8259.pin;
2079 apic2 = ioapic_i8259.apic;
2082 - timer_uses_ioapic_pin_0 = 1;
2084 printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
2085 vector, apic1, pin1, apic2, pin2);
2087 @@ -2555,6 +2514,7 @@ void destroy_irq(unsigned int irq)
2088 dynamic_irq_cleanup(irq);
2090 spin_lock_irqsave(&vector_lock, flags);
2091 + clear_bit(irq_vector[irq], used_vectors);
2092 irq_vector[irq] = 0;
2093 spin_unlock_irqrestore(&vector_lock, flags);
2095 @@ -2871,7 +2831,6 @@ int __init io_apic_get_redir_entries (in
2096 int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
2098 struct IO_APIC_route_entry entry;
2099 - unsigned long flags;
2101 if (!IO_APIC_IRQ(irq)) {
2102 printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
2103 @@ -2912,9 +2871,7 @@ int io_apic_set_pci_routing (int ioapic,
2104 if (!ioapic && (irq < 16))
2105 disable_8259A_irq(irq);
2107 - spin_lock_irqsave(&ioapic_lock, flags);
2108 - __ioapic_write_entry(ioapic, pin, entry);
2109 - spin_unlock_irqrestore(&ioapic_lock, flags);
2110 + ioapic_write_entry(ioapic, pin, entry);
2114 --- sle11-2009-05-14.orig/arch/x86/kernel/io_apic_64-xen.c 2009-03-16 16:33:40.000000000 +0100
2115 +++ sle11-2009-05-14/arch/x86/kernel/io_apic_64-xen.c 2009-03-16 16:38:05.000000000 +0100
2117 #include <asm/smp.h>
2118 #include <asm/desc.h>
2119 #include <asm/proto.h>
2120 -#include <asm/mach_apic.h>
2121 #include <asm/acpi.h>
2122 #include <asm/dma.h>
2123 #include <asm/nmi.h>
2124 #include <asm/msidef.h>
2125 #include <asm/hypertransport.h>
2127 +#include <mach_ipi.h>
2128 +#include <mach_apic.h>
2133 @@ -101,6 +103,16 @@ DEFINE_SPINLOCK(vector_lock);
2135 int nr_ioapic_registers[MAX_IO_APICS];
2137 +/* I/O APIC entries */
2138 +struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
2141 +/* MP IRQ source entries */
2142 +struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
2144 +/* # of MP IRQ source entries */
2145 +int mp_irq_entries;
2148 * Rough estimation of how many shared IRQs there are, can
2149 * be changed anytime.
2150 @@ -181,11 +193,10 @@ static inline void io_apic_modify(unsign
2151 writel(value, &io_apic->data);
2154 -static int io_apic_level_ack_pending(unsigned int irq)
2155 +static bool io_apic_level_ack_pending(unsigned int irq)
2157 struct irq_pin_list *entry;
2158 unsigned long flags;
2161 spin_lock_irqsave(&ioapic_lock, flags);
2162 entry = irq_2_pin + irq;
2163 @@ -198,13 +209,17 @@ static int io_apic_level_ack_pending(uns
2165 reg = io_apic_read(entry->apic, 0x10 + pin*2);
2166 /* Is the remote IRR bit set? */
2167 - pending |= (reg >> 14) & 1;
2168 + if ((reg >> 14) & 1) {
2169 + spin_unlock_irqrestore(&ioapic_lock, flags);
2174 entry = irq_2_pin + entry->next;
2176 spin_unlock_irqrestore(&ioapic_lock, flags);
2183 @@ -762,7 +777,7 @@ static void __clear_irq_vector(int irq)
2184 per_cpu(vector_irq, cpu)[vector] = -1;
2187 - cfg->domain = CPU_MASK_NONE;
2188 + cpus_clear(cfg->domain);
2191 void __setup_vector_irq(int cpu)
2192 @@ -899,9 +914,8 @@ static void __init setup_IO_APIC_irqs(vo
2193 static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
2195 struct IO_APIC_route_entry entry;
2196 - unsigned long flags;
2198 - memset(&entry,0,sizeof(entry));
2199 + memset(&entry, 0, sizeof(entry));
2201 disable_8259A_irq(0);
2203 @@ -929,10 +943,7 @@ static void __init setup_ExtINT_IRQ0_pin
2205 * Add it to the IO-APIC irq-routing table:
2207 - spin_lock_irqsave(&ioapic_lock, flags);
2208 - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
2209 - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
2210 - spin_unlock_irqrestore(&ioapic_lock, flags);
2211 + ioapic_write_entry(apic, pin, entry);
2213 enable_8259A_irq(0);
2215 @@ -1061,8 +1072,7 @@ void __apicdebuginit print_local_APIC(vo
2217 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
2218 smp_processor_id(), hard_smp_processor_id());
2219 - v = apic_read(APIC_ID);
2220 - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v));
2221 + printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
2222 v = apic_read(APIC_LVR);
2223 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
2224 ver = GET_APIC_VERSION(v);
2225 @@ -1260,7 +1270,7 @@ void disable_IO_APIC(void)
2226 entry.dest_mode = 0; /* Physical */
2227 entry.delivery_mode = dest_ExtINT; /* ExtInt */
2229 - entry.dest = GET_APIC_ID(apic_read(APIC_ID));
2230 + entry.dest = GET_APIC_ID(read_apic_id());
2233 * Add it to the IO-APIC irq-routing table:
2234 @@ -1353,9 +1363,7 @@ static int ioapic_retrigger_irq(unsigned
2235 unsigned long flags;
2237 spin_lock_irqsave(&vector_lock, flags);
2239 - cpu_set(first_cpu(cfg->domain), mask);
2241 + mask = cpumask_of_cpu(first_cpu(cfg->domain));
2242 send_IPI_mask(mask, cfg->vector);
2243 spin_unlock_irqrestore(&vector_lock, flags);
2245 @@ -1519,8 +1527,7 @@ static inline void init_IO_APIC_traps(vo
2246 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2248 for (irq = 0; irq < NR_IRQS ; irq++) {
2250 - if (IO_APIC_IRQ(tmp) && !irq_cfg[tmp].vector) {
2251 + if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) {
2253 * Hmm.. We don't have an entry for this,
2254 * so default to an old-fashioned 8259
2255 @@ -1597,22 +1604,19 @@ static void __init setup_nmi(void)
2256 * cycles as some i82489DX-based boards have glue logic that keeps the
2257 * 8259A interrupt line asserted until INTA. --macro
2259 -static inline void unlock_ExtINT_logic(void)
2260 +static inline void __init unlock_ExtINT_logic(void)
2263 struct IO_APIC_route_entry entry0, entry1;
2264 unsigned char save_control, save_freq_select;
2265 - unsigned long flags;
2267 pin = find_isa_irq_pin(8, mp_INT);
2268 apic = find_isa_irq_apic(8, mp_INT);
2272 - spin_lock_irqsave(&ioapic_lock, flags);
2273 - *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
2274 - *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
2275 - spin_unlock_irqrestore(&ioapic_lock, flags);
2276 + entry0 = ioapic_read_entry(apic, pin);
2278 clear_IO_APIC_pin(apic, pin);
2280 memset(&entry1, 0, sizeof(entry1));
2281 @@ -1625,10 +1629,7 @@ static inline void unlock_ExtINT_logic(v
2285 - spin_lock_irqsave(&ioapic_lock, flags);
2286 - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
2287 - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
2288 - spin_unlock_irqrestore(&ioapic_lock, flags);
2289 + ioapic_write_entry(apic, pin, entry1);
2291 save_control = CMOS_READ(RTC_CONTROL);
2292 save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
2293 @@ -1647,10 +1648,7 @@ static inline void unlock_ExtINT_logic(v
2294 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
2295 clear_IO_APIC_pin(apic, pin);
2297 - spin_lock_irqsave(&ioapic_lock, flags);
2298 - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
2299 - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
2300 - spin_unlock_irqrestore(&ioapic_lock, flags);
2301 + ioapic_write_entry(apic, pin, entry0);
2305 @@ -2327,7 +2325,6 @@ static struct resource * __init ioapic_s
2309 - memset(mem, 0, n);
2310 mem += sizeof(struct resource) * nr_ioapics;
2312 for (i = 0; i < nr_ioapics; i++) {
2313 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
2314 +++ sle11-2009-05-14/arch/x86/kernel/ipi-xen.c 2009-03-16 16:38:05.000000000 +0100
2316 +#include <linux/cpumask.h>
2317 +#include <linux/interrupt.h>
2318 +#include <linux/init.h>
2320 +#include <linux/mm.h>
2321 +#include <linux/delay.h>
2322 +#include <linux/spinlock.h>
2323 +#include <linux/kernel_stat.h>
2324 +#include <linux/mc146818rtc.h>
2325 +#include <linux/cache.h>
2326 +#include <linux/interrupt.h>
2327 +#include <linux/cpu.h>
2328 +#include <linux/module.h>
2330 +#include <asm/smp.h>
2331 +#include <asm/mtrr.h>
2332 +#include <asm/tlbflush.h>
2333 +#include <asm/mmu_context.h>
2334 +#include <asm/apic.h>
2335 +#include <asm/proto.h>
2337 +#ifdef CONFIG_X86_32
2339 +#include <mach_apic.h>
2341 + * the following functions deal with sending IPIs between CPUs.
2343 + * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
2346 +static inline int __prepare_ICR(unsigned int shortcut, int vector)
2348 + unsigned int icr = shortcut | APIC_DEST_LOGICAL;
2352 + icr |= APIC_DM_FIXED | vector;
2355 + icr |= APIC_DM_NMI;
2361 +static inline int __prepare_ICR2(unsigned int mask)
2363 + return SET_APIC_DEST_FIELD(mask);
2366 +#include <xen/evtchn.h>
2368 +DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
2370 +static inline void __send_IPI_one(unsigned int cpu, int vector)
2372 + int irq = per_cpu(ipi_to_irq, cpu)[vector];
2374 + notify_remote_via_irq(irq);
2378 +void __send_IPI_shortcut(unsigned int shortcut, int vector)
2382 + * Subtle. In the case of the 'never do double writes' workaround
2383 + * we have to lock out interrupts to be safe. As we don't care
2384 + * of the value read we use an atomic rmw access to avoid costly
2385 + * cli/sti. Otherwise we use an even cheaper single atomic write
2393 + apic_wait_icr_idle();
2396 + * No need to touch the target chip field
2398 + cfg = __prepare_ICR(shortcut, vector);
2401 + * Send the IPI. The write to APIC_ICR fires this off.
2403 + apic_write_around(APIC_ICR, cfg);
2407 + switch (shortcut) {
2408 + case APIC_DEST_SELF:
2409 + __send_IPI_one(smp_processor_id(), vector);
2411 + case APIC_DEST_ALLBUT:
2412 + for_each_online_cpu(cpu)
2413 + if (cpu != smp_processor_id())
2414 + __send_IPI_one(cpu, vector);
2417 + printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
2424 +void send_IPI_self(int vector)
2426 + __send_IPI_shortcut(APIC_DEST_SELF, vector);
2431 + * This is used to send an IPI with no shorthand notation (the destination is
2432 + * specified in bits 56 to 63 of the ICR).
2434 +static inline void __send_IPI_dest_field(unsigned long mask, int vector)
2436 + unsigned long cfg;
2441 + if (unlikely(vector == NMI_VECTOR))
2442 + safe_apic_wait_icr_idle();
2444 + apic_wait_icr_idle();
2447 + * prepare target chip field
2449 + cfg = __prepare_ICR2(mask);
2450 + apic_write_around(APIC_ICR2, cfg);
2455 + cfg = __prepare_ICR(0, vector);
2458 + * Send the IPI. The write to APIC_ICR fires this off.
2460 + apic_write_around(APIC_ICR, cfg);
2465 + * This is only used on smaller machines.
2467 +void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
2470 + unsigned long mask = cpus_addr(cpumask)[0];
2475 + unsigned long flags;
2477 + local_irq_save(flags);
2479 + WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
2480 + __send_IPI_dest_field(mask, vector);
2482 + cpus_andnot(mask, cpumask, cpu_online_map);
2483 + WARN_ON(!cpus_empty(mask));
2484 + for_each_online_cpu(cpu)
2485 + if (cpu_isset(cpu, cpumask))
2486 + __send_IPI_one(cpu, vector);
2488 + local_irq_restore(flags);
2491 +void send_IPI_mask_sequence(cpumask_t mask, int vector)
2494 + unsigned long flags;
2495 + unsigned int query_cpu;
2498 + * Hack. The clustered APIC addressing mode doesn't allow us to send
2499 + * to an arbitrary mask, so I do a unicasts to each CPU instead. This
2500 + * should be modified to do 1 message per cluster ID - mbligh
2503 + local_irq_save(flags);
2504 + for_each_possible_cpu(query_cpu) {
2505 + if (cpu_isset(query_cpu, mask)) {
2506 + __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
2510 + local_irq_restore(flags);
2512 + send_IPI_mask_bitmask(mask, vector);
2516 +/* must come after the send_IPI functions above for inlining */
2517 +#include <mach_ipi.h>
2520 +static int convert_apicid_to_cpu(int apic_id)
2524 + for_each_possible_cpu(i) {
2525 + if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
2531 +int safe_smp_processor_id(void)
2533 + int apicid, cpuid;
2535 + if (!boot_cpu_has(X86_FEATURE_APIC))
2538 + apicid = hard_smp_processor_id();
2539 + if (apicid == BAD_APICID)
2542 + cpuid = convert_apicid_to_cpu(apicid);
2544 + return cpuid >= 0 ? cpuid : 0;
2548 --- sle11-2009-05-14.orig/arch/x86/kernel/irq_32-xen.c 2009-03-16 16:33:40.000000000 +0100
2549 +++ sle11-2009-05-14/arch/x86/kernel/irq_32-xen.c 2009-03-16 16:38:05.000000000 +0100
2550 @@ -79,7 +79,7 @@ unsigned int do_IRQ(struct pt_regs *regs
2552 if (unlikely((unsigned)irq >= NR_IRQS)) {
2553 printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
2554 - __FUNCTION__, irq);
2559 @@ -134,7 +134,7 @@ unsigned int do_IRQ(struct pt_regs *regs
2560 : "=a" (arg1), "=d" (arg2), "=b" (bx)
2561 : "0" (irq), "1" (desc), "2" (isp),
2562 "D" (desc->handle_irq)
2564 + : "memory", "cc", "ecx"
2568 @@ -190,8 +190,6 @@ void irq_ctx_exit(int cpu)
2569 hardirq_ctx[cpu] = NULL;
2572 -extern asmlinkage void __do_softirq(void);
2574 asmlinkage void do_softirq(void)
2576 unsigned long flags;
2577 --- sle11-2009-05-14.orig/arch/x86/kernel/machine_kexec_64.c 2009-03-16 16:33:40.000000000 +0100
2578 +++ sle11-2009-05-14/arch/x86/kernel/machine_kexec_64.c 2009-03-16 16:38:05.000000000 +0100
2579 @@ -120,8 +120,6 @@ int __init machine_kexec_setup_resources
2583 -void machine_kexec_register_resources(struct resource *res) { ; }
2585 #else /* CONFIG_XEN */
2587 #define x__pmd(x) __pmd(x)
2588 --- sle11-2009-05-14.orig/arch/x86/kernel/microcode-xen.c 2009-03-16 16:33:40.000000000 +0100
2589 +++ sle11-2009-05-14/arch/x86/kernel/microcode-xen.c 2009-03-16 16:38:05.000000000 +0100
2590 @@ -162,7 +162,7 @@ static int request_microcode(void)
2591 c->x86, c->x86_model, c->x86_mask);
2592 error = request_firmware(&firmware, name, µcode_pdev->dev);
2594 - pr_debug("ucode data file %s load failed\n", name);
2595 + pr_debug("microcode: ucode data file %s load failed\n", name);
2599 --- sle11-2009-05-14.orig/arch/x86/kernel/mmconf-fam10h_64.c 2009-05-14 10:56:29.000000000 +0200
2600 +++ sle11-2009-05-14/arch/x86/kernel/mmconf-fam10h_64.c 2009-03-16 16:38:05.000000000 +0100
2601 @@ -219,6 +219,16 @@ void __cpuinit fam10h_check_enable_mmcfg
2602 val |= fam10h_pci_mmconf_base | (8 << FAM10H_MMIO_CONF_BUSRANGE_SHIFT) |
2603 FAM10H_MMIO_CONF_ENABLE;
2604 wrmsrl(address, val);
2610 + rdmsrl(address, val2);
2612 + pci_probe &= ~PCI_CHECK_ENABLE_AMD_MMCONF;
2617 static int __devinit set_check_enable_amd_mmconf(const struct dmi_system_id *d)
2618 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
2619 +++ sle11-2009-05-14/arch/x86/kernel/mpparse-xen.c 2009-03-16 16:38:05.000000000 +0100
2622 + * Intel Multiprocessor Specification 1.1 and 1.4
2623 + * compliant MP-table parsing routines.
2625 + * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
2626 + * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
2627 + * (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de>
2630 +#include <linux/mm.h>
2631 +#include <linux/init.h>
2632 +#include <linux/delay.h>
2633 +#include <linux/bootmem.h>
2634 +#include <linux/kernel_stat.h>
2635 +#include <linux/mc146818rtc.h>
2636 +#include <linux/bitops.h>
2637 +#include <linux/acpi.h>
2638 +#include <linux/module.h>
2640 +#include <asm/smp.h>
2641 +#include <asm/mtrr.h>
2642 +#include <asm/mpspec.h>
2643 +#include <asm/pgalloc.h>
2644 +#include <asm/io_apic.h>
2645 +#include <asm/proto.h>
2646 +#include <asm/acpi.h>
2647 +#include <asm/bios_ebda.h>
2649 +#include <mach_apic.h>
2650 +#ifdef CONFIG_X86_32
2651 +#include <mach_apicdef.h>
2652 +#include <mach_mpparse.h>
2655 +/* Have we found an MP table */
2656 +int smp_found_config;
2659 + * Various Linux-internal data structures created from the
2662 +#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
2663 +int mp_bus_id_to_type[MAX_MP_BUSSES];
2666 +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
2667 +int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 };
2669 +static int mp_current_pci_id;
2674 + * Intel MP BIOS table parsing routines:
2678 + * Checksum an MP configuration block.
2681 +static int __init mpf_checksum(unsigned char *mp, int len)
2688 + return sum & 0xFF;
2691 +#ifdef CONFIG_X86_NUMAQ
2693 + * Have to match translation table entries to main table entries by counter
2694 + * hence the mpc_record variable .... can't see a less disgusting way of
2698 +static int mpc_record;
2699 +static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
2703 +static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
2707 + char *bootup_cpu = "";
2709 + if (!(m->mpc_cpuflag & CPU_ENABLED)) {
2713 +#ifdef CONFIG_X86_NUMAQ
2714 + apicid = mpc_apic_id(m, translation_table[mpc_record]);
2716 + apicid = m->mpc_apicid;
2718 + if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
2719 + bootup_cpu = " (Bootup-CPU)";
2720 + boot_cpu_physical_apicid = m->mpc_apicid;
2723 + printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
2724 + generic_processor_info(apicid, m->mpc_apicver);
2725 +#else /* CONFIG_XEN */
2730 +static void __init MP_bus_info(struct mpc_config_bus *m)
2734 + memcpy(str, m->mpc_bustype, 6);
2737 +#ifdef CONFIG_X86_NUMAQ
2738 + mpc_oem_bus_info(m, str, translation_table[mpc_record]);
2740 + Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
2743 +#if MAX_MP_BUSSES < 256
2744 + if (m->mpc_busid >= MAX_MP_BUSSES) {
2745 + printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
2746 + " is too large, max. supported is %d\n",
2747 + m->mpc_busid, str, MAX_MP_BUSSES - 1);
2752 + if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
2753 + set_bit(m->mpc_busid, mp_bus_not_pci);
2754 +#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
2755 + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
2757 + } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
2758 +#ifdef CONFIG_X86_NUMAQ
2759 + mpc_oem_pci_bus(m, translation_table[mpc_record]);
2761 + clear_bit(m->mpc_busid, mp_bus_not_pci);
2762 + mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
2763 + mp_current_pci_id++;
2764 +#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
2765 + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
2766 + } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
2767 + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
2768 + } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) {
2769 + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
2772 + printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
2775 +#ifdef CONFIG_X86_IO_APIC
2777 +static int bad_ioapic(unsigned long address)
2779 + if (nr_ioapics >= MAX_IO_APICS) {
2780 + printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
2781 + "(found %d)\n", MAX_IO_APICS, nr_ioapics);
2782 + panic("Recompile kernel with bigger MAX_IO_APICS!\n");
2785 + printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
2786 + " found in table, skipping!\n");
2792 +static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
2794 + if (!(m->mpc_flags & MPC_APIC_USABLE))
2797 + printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
2798 + m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
2800 + if (bad_ioapic(m->mpc_apicaddr))
2803 + mp_ioapics[nr_ioapics] = *m;
2807 +static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
2809 + mp_irqs[mp_irq_entries] = *m;
2810 + Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
2811 + " IRQ %02x, APIC ID %x, APIC INT %02x\n",
2812 + m->mpc_irqtype, m->mpc_irqflag & 3,
2813 + (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
2814 + m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
2815 + if (++mp_irq_entries == MAX_IRQ_SOURCES)
2816 + panic("Max # of irq sources exceeded!!\n");
2821 +static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
2823 + Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
2824 + " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
2825 + m->mpc_irqtype, m->mpc_irqflag & 3,
2826 + (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
2827 + m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
2830 +#ifdef CONFIG_X86_NUMAQ
2831 +static void __init MP_translation_info(struct mpc_config_translation *m)
2834 + "Translation: record %d, type %d, quad %d, global %d, local %d\n",
2835 + mpc_record, m->trans_type, m->trans_quad, m->trans_global,
2838 + if (mpc_record >= MAX_MPC_ENTRY)
2839 + printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
2841 + translation_table[mpc_record] = m; /* stash this for later */
2842 + if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
2843 + node_set_online(m->trans_quad);
2847 + * Read/parse the MPC oem tables
2850 +static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
2851 + unsigned short oemsize)
2853 + int count = sizeof(*oemtable); /* the header size */
2854 + unsigned char *oemptr = ((unsigned char *)oemtable) + count;
2857 + printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
2859 + if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) {
2860 + printk(KERN_WARNING
2861 + "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
2862 + oemtable->oem_signature[0], oemtable->oem_signature[1],
2863 + oemtable->oem_signature[2], oemtable->oem_signature[3]);
2866 + if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) {
2867 + printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
2870 + while (count < oemtable->oem_length) {
2871 + switch (*oemptr) {
2872 + case MP_TRANSLATION:
2874 + struct mpc_config_translation *m =
2875 + (struct mpc_config_translation *)oemptr;
2876 + MP_translation_info(m);
2877 + oemptr += sizeof(*m);
2878 + count += sizeof(*m);
2884 + printk(KERN_WARNING
2885 + "Unrecognised OEM table entry type! - %d\n",
2893 +static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
2896 + if (strncmp(oem, "IBM NUMA", 8))
2897 + printk("Warning! May not be a NUMA-Q system!\n");
2898 + if (mpc->mpc_oemptr)
2899 + smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr,
2900 + mpc->mpc_oemsize);
2902 +#endif /* CONFIG_X86_NUMAQ */
2905 + * Read/parse the MPC
2908 +static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
2912 + int count = sizeof(*mpc);
2913 + unsigned char *mpt = ((unsigned char *)mpc) + count;
2915 + if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) {
2916 + printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n",
2917 + mpc->mpc_signature[0], mpc->mpc_signature[1],
2918 + mpc->mpc_signature[2], mpc->mpc_signature[3]);
2921 + if (mpf_checksum((unsigned char *)mpc, mpc->mpc_length)) {
2922 + printk(KERN_ERR "MPTABLE: checksum error!\n");
2925 + if (mpc->mpc_spec != 0x01 && mpc->mpc_spec != 0x04) {
2926 + printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
2930 + if (!mpc->mpc_lapic) {
2931 + printk(KERN_ERR "MPTABLE: null local APIC address!\n");
2934 + memcpy(oem, mpc->mpc_oem, 8);
2936 + printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem);
2938 + memcpy(str, mpc->mpc_productid, 12);
2940 + printk("Product ID: %s ", str);
2942 +#ifdef CONFIG_X86_32
2943 + mps_oem_check(mpc, oem, str);
2945 + printk(KERN_INFO "MPTABLE: Product ID: %s ", str);
2947 + printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic);
2949 + /* save the local APIC address, it might be non-default */
2951 + mp_lapic_addr = mpc->mpc_lapic;
2957 + * Now process the configuration blocks.
2959 +#ifdef CONFIG_X86_NUMAQ
2962 + while (count < mpc->mpc_length) {
2964 + case MP_PROCESSOR:
2966 + struct mpc_config_processor *m =
2967 + (struct mpc_config_processor *)mpt;
2968 + /* ACPI may have already provided this data */
2970 + MP_processor_info(m);
2971 + mpt += sizeof(*m);
2972 + count += sizeof(*m);
2977 + struct mpc_config_bus *m =
2978 + (struct mpc_config_bus *)mpt;
2980 + mpt += sizeof(*m);
2981 + count += sizeof(*m);
2986 +#ifdef CONFIG_X86_IO_APIC
2987 + struct mpc_config_ioapic *m =
2988 + (struct mpc_config_ioapic *)mpt;
2989 + MP_ioapic_info(m);
2991 + mpt += sizeof(struct mpc_config_ioapic);
2992 + count += sizeof(struct mpc_config_ioapic);
2997 +#ifdef CONFIG_X86_IO_APIC
2998 + struct mpc_config_intsrc *m =
2999 + (struct mpc_config_intsrc *)mpt;
3001 + MP_intsrc_info(m);
3003 + mpt += sizeof(struct mpc_config_intsrc);
3004 + count += sizeof(struct mpc_config_intsrc);
3009 + struct mpc_config_lintsrc *m =
3010 + (struct mpc_config_lintsrc *)mpt;
3011 + MP_lintsrc_info(m);
3012 + mpt += sizeof(*m);
3013 + count += sizeof(*m);
3017 + /* wrong mptable */
3018 + printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
3019 + printk(KERN_ERR "type %x\n", *mpt);
3020 + print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
3021 + 1, mpc, mpc->mpc_length, 1);
3022 + count = mpc->mpc_length;
3025 +#ifdef CONFIG_X86_NUMAQ
3029 + setup_apic_routing();
3030 + if (!num_processors)
3031 + printk(KERN_ERR "MPTABLE: no processors registered!\n");
3032 + return num_processors;
3035 +#ifdef CONFIG_X86_IO_APIC
3037 +static int __init ELCR_trigger(unsigned int irq)
3039 + unsigned int port;
3041 + port = 0x4d0 + (irq >> 3);
3042 + return (inb(port) >> (irq & 7)) & 1;
3045 +static void __init construct_default_ioirq_mptable(int mpc_default_type)
3047 + struct mpc_config_intsrc intsrc;
3049 + int ELCR_fallback = 0;
3051 + intsrc.mpc_type = MP_INTSRC;
3052 + intsrc.mpc_irqflag = 0; /* conforming */
3053 + intsrc.mpc_srcbus = 0;
3054 + intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
3056 + intsrc.mpc_irqtype = mp_INT;
3059 + * If true, we have an ISA/PCI system with no IRQ entries
3060 + * in the MP table. To prevent the PCI interrupts from being set up
3061 + * incorrectly, we try to use the ELCR. The sanity check to see if
3062 + * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
3063 + * never be level sensitive, so we simply see if the ELCR agrees.
3064 + * If it does, we assume it's valid.
3066 + if (mpc_default_type == 5) {
3067 + printk(KERN_INFO "ISA/PCI bus type with no IRQ information... "
3068 + "falling back to ELCR\n");
3070 + if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) ||
3072 + printk(KERN_ERR "ELCR contains invalid data... "
3073 + "not using ELCR\n");
3076 + "Using ELCR to identify PCI interrupts\n");
3077 + ELCR_fallback = 1;
3081 + for (i = 0; i < 16; i++) {
3082 + switch (mpc_default_type) {
3084 + if (i == 0 || i == 13)
3085 + continue; /* IRQ0 & IRQ13 not connected */
3086 + /* fall through */
3089 + continue; /* IRQ2 is never connected */
3092 + if (ELCR_fallback) {
3094 + * If the ELCR indicates a level-sensitive interrupt, we
3095 + * copy that information over to the MP table in the
3096 + * irqflag field (level sensitive, active high polarity).
3098 + if (ELCR_trigger(i))
3099 + intsrc.mpc_irqflag = 13;
3101 + intsrc.mpc_irqflag = 0;
3104 + intsrc.mpc_srcbusirq = i;
3105 + intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
3106 + MP_intsrc_info(&intsrc);
3109 + intsrc.mpc_irqtype = mp_ExtINT;
3110 + intsrc.mpc_srcbusirq = 0;
3111 + intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */
3112 + MP_intsrc_info(&intsrc);
3117 +static inline void __init construct_default_ISA_mptable(int mpc_default_type)
3119 + struct mpc_config_processor processor;
3120 + struct mpc_config_bus bus;
3121 +#ifdef CONFIG_X86_IO_APIC
3122 + struct mpc_config_ioapic ioapic;
3124 + struct mpc_config_lintsrc lintsrc;
3125 + int linttypes[2] = { mp_ExtINT, mp_NMI };
3129 + * local APIC has default address
3131 + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
3134 + * 2 CPUs, numbered 0 & 1.
3136 + processor.mpc_type = MP_PROCESSOR;
3137 + /* Either an integrated APIC or a discrete 82489DX. */
3138 + processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
3139 + processor.mpc_cpuflag = CPU_ENABLED;
3140 + processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
3141 + (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
3142 + processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
3143 + processor.mpc_reserved[0] = 0;
3144 + processor.mpc_reserved[1] = 0;
3145 + for (i = 0; i < 2; i++) {
3146 + processor.mpc_apicid = i;
3147 + MP_processor_info(&processor);
3150 + bus.mpc_type = MP_BUS;
3151 + bus.mpc_busid = 0;
3152 + switch (mpc_default_type) {
3154 + printk(KERN_ERR "???\nUnknown standard configuration %d\n",
3155 + mpc_default_type);
3156 + /* fall through */
3159 + memcpy(bus.mpc_bustype, "ISA ", 6);
3164 + memcpy(bus.mpc_bustype, "EISA ", 6);
3168 + memcpy(bus.mpc_bustype, "MCA ", 6);
3170 + MP_bus_info(&bus);
3171 + if (mpc_default_type > 4) {
3172 + bus.mpc_busid = 1;
3173 + memcpy(bus.mpc_bustype, "PCI ", 6);
3174 + MP_bus_info(&bus);
3177 +#ifdef CONFIG_X86_IO_APIC
3178 + ioapic.mpc_type = MP_IOAPIC;
3179 + ioapic.mpc_apicid = 2;
3180 + ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
3181 + ioapic.mpc_flags = MPC_APIC_USABLE;
3182 + ioapic.mpc_apicaddr = 0xFEC00000;
3183 + MP_ioapic_info(&ioapic);
3186 + * We set up most of the low 16 IO-APIC pins according to MPS rules.
3188 + construct_default_ioirq_mptable(mpc_default_type);
3190 + lintsrc.mpc_type = MP_LINTSRC;
3191 + lintsrc.mpc_irqflag = 0; /* conforming */
3192 + lintsrc.mpc_srcbusid = 0;
3193 + lintsrc.mpc_srcbusirq = 0;
3194 + lintsrc.mpc_destapic = MP_APIC_ALL;
3195 + for (i = 0; i < 2; i++) {
3196 + lintsrc.mpc_irqtype = linttypes[i];
3197 + lintsrc.mpc_destapiclint = i;
3198 + MP_lintsrc_info(&lintsrc);
3202 +static struct intel_mp_floating *mpf_found;
3205 + * Scan the memory blocks for an SMP configuration block.
3207 +static void __init __get_smp_config(unsigned early)
3209 + struct intel_mp_floating *mpf = mpf_found;
3211 + if (acpi_lapic && early)
3214 + * ACPI supports both logical (e.g. Hyper-Threading) and physical
3215 + * processors, where MPS only supports physical.
3217 + if (acpi_lapic && acpi_ioapic) {
3218 + printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
3221 + } else if (acpi_lapic)
3222 + printk(KERN_INFO "Using ACPI for processor (LAPIC) "
3223 + "configuration information\n");
3225 + printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
3226 + mpf->mpf_specification);
3227 +#ifdef CONFIG_X86_32
3228 + if (mpf->mpf_feature2 & (1 << 7)) {
3229 + printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
3232 + printk(KERN_INFO " Virtual Wire compatibility mode.\n");
3237 + * Now see if we need to read further.
3239 + if (mpf->mpf_feature1 != 0) {
3242 + * local APIC has default address
3244 + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
3248 + printk(KERN_INFO "Default MP configuration #%d\n",
3249 + mpf->mpf_feature1);
3250 + construct_default_ISA_mptable(mpf->mpf_feature1);
3252 + } else if (mpf->mpf_physptr) {
3255 + * Read the physical hardware table. Anything here will
3256 + * override the defaults.
3258 + if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr), early)) {
3259 + smp_found_config = 0;
3261 + "BIOS bug, MP table errors detected!...\n");
3262 + printk(KERN_ERR "... disabling SMP support. "
3263 + "(tell your hw vendor)\n");
3269 +#ifdef CONFIG_X86_IO_APIC
3271 + * If there are no explicit MP IRQ entries, then we are
3272 + * broken. We set up most of the low 16 IO-APIC pins to
3273 + * ISA defaults and hope it will work.
3275 + if (!mp_irq_entries) {
3276 + struct mpc_config_bus bus;
3278 + printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
3279 + "using default mptable. "
3280 + "(tell your hw vendor)\n");
3282 + bus.mpc_type = MP_BUS;
3283 + bus.mpc_busid = 0;
3284 + memcpy(bus.mpc_bustype, "ISA ", 6);
3285 + MP_bus_info(&bus);
3287 + construct_default_ioirq_mptable(0);
3294 + printk(KERN_INFO "Processors: %d\n", num_processors);
3296 + * Only use the first configuration found.
3300 +void __init early_get_smp_config(void)
3302 + __get_smp_config(1);
3305 +void __init get_smp_config(void)
3307 + __get_smp_config(0);
3310 +static int __init smp_scan_config(unsigned long base, unsigned long length,
3313 + unsigned int *bp = isa_bus_to_virt(base);
3314 + struct intel_mp_floating *mpf;
3316 + Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length);
3317 + BUILD_BUG_ON(sizeof(*mpf) != 16);
3319 + while (length > 0) {
3320 + mpf = (struct intel_mp_floating *)bp;
3321 + if ((*bp == SMP_MAGIC_IDENT) &&
3322 + (mpf->mpf_length == 1) &&
3323 + !mpf_checksum((unsigned char *)bp, 16) &&
3324 + ((mpf->mpf_specification == 1)
3325 + || (mpf->mpf_specification == 4))) {
3327 + smp_found_config = 1;
3329 +#ifdef CONFIG_X86_32
3331 + printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
3332 + mpf, virt_to_phys(mpf));
3333 + reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE,
3335 + if (mpf->mpf_physptr) {
3337 + * We cannot access to MPC table to compute
3338 + * table size yet, as only few megabytes from
3339 + * the bottom is mapped now.
3340 + * PC-9800's MPC table places on the very last
3341 + * of physical memory; so that simply reserving
3342 + * PAGE_SIZE from mpg->mpf_physptr yields BUG()
3343 + * in reserve_bootmem.
3345 + unsigned long size = PAGE_SIZE;
3346 + unsigned long end = max_low_pfn * PAGE_SIZE;
3347 + if (mpf->mpf_physptr + size > end)
3348 + size = end - mpf->mpf_physptr;
3349 + reserve_bootmem(mpf->mpf_physptr, size,
3353 + printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
3354 + mpf, ((void *)bp - isa_bus_to_virt(base)) + base);
3356 +#elif !defined(CONFIG_XEN)
3360 + reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE);
3361 + if (mpf->mpf_physptr)
3362 + reserve_bootmem_generic(mpf->mpf_physptr,
3373 +static void __init __find_smp_config(unsigned reserve)
3376 + unsigned int address;
3380 + * FIXME: Linux assumes you have 640K of base ram..
3381 + * this continues the error...
3383 + * 1) Scan the bottom 1K for a signature
3384 + * 2) Scan the top 1K of base RAM
3385 + * 3) Scan the 64K of bios
3387 + if (smp_scan_config(0x0, 0x400, reserve) ||
3388 + smp_scan_config(639 * 0x400, 0x400, reserve) ||
3389 + smp_scan_config(0xF0000, 0x10000, reserve))
3392 + * If it is an SMP machine we should know now, unless the
3393 + * configuration is in an EISA/MCA bus machine with an
3394 + * extended bios data area.
3396 + * there is a real-mode segmented pointer pointing to the
3397 + * 4K EBDA area at 0x40E, calculate and scan it here.
3399 + * NOTE! There are Linux loaders that will corrupt the EBDA
3400 + * area, and as such this kind of SMP config may be less
3401 + * trustworthy, simply because the SMP table may have been
3402 + * stomped on during early boot. These loaders are buggy and
3403 + * should be fixed.
3405 + * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
3409 + address = get_bios_ebda();
3411 + smp_scan_config(address, 0x400, reserve);
3415 +void __init early_find_smp_config(void)
3417 + __find_smp_config(0);
3420 +void __init find_smp_config(void)
3422 + __find_smp_config(1);
3425 +/* --------------------------------------------------------------------------
3426 + ACPI-based MP Configuration
3427 + -------------------------------------------------------------------------- */
3430 + * Keep this outside and initialized to 0, for !CONFIG_ACPI builds:
3436 +#ifdef CONFIG_X86_IO_APIC
3438 +#define MP_ISA_BUS 0
3440 +extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
3442 +static int mp_find_ioapic(int gsi)
3446 + /* Find the IOAPIC that manages this GSI. */
3447 + for (i = 0; i < nr_ioapics; i++) {
3448 + if ((gsi >= mp_ioapic_routing[i].gsi_base)
3449 + && (gsi <= mp_ioapic_routing[i].gsi_end))
3453 + printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
3457 +static u8 __init uniq_ioapic_id(u8 id)
3459 +#ifdef CONFIG_X86_32
3460 + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
3461 + !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
3462 + return io_apic_get_unique_id(nr_ioapics, id);
3467 + DECLARE_BITMAP(used, 256);
3468 + bitmap_zero(used, 256);
3469 + for (i = 0; i < nr_ioapics; i++) {
3470 + struct mpc_config_ioapic *ia = &mp_ioapics[i];
3471 + __set_bit(ia->mpc_apicid, used);
3473 + if (!test_bit(id, used))
3475 + return find_first_zero_bit(used, 256);
3479 +void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
3483 + if (bad_ioapic(address))
3488 + mp_ioapics[idx].mpc_type = MP_IOAPIC;
3489 + mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
3490 + mp_ioapics[idx].mpc_apicaddr = address;
3493 + set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
3495 + mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
3496 +#ifdef CONFIG_X86_32
3497 + mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
3499 + mp_ioapics[idx].mpc_apicver = 0;
3502 + * Build basic GSI lookup table to facilitate gsi->io_apic lookups
3503 + * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
3505 + mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
3506 + mp_ioapic_routing[idx].gsi_base = gsi_base;
3507 + mp_ioapic_routing[idx].gsi_end = gsi_base +
3508 + io_apic_get_redir_entries(idx);
3510 + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
3511 + "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
3512 + mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
3513 + mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
3518 +void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
3520 + struct mpc_config_intsrc intsrc;
3525 + * Convert 'gsi' to 'ioapic.pin'.
3527 + ioapic = mp_find_ioapic(gsi);
3530 + pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
3533 + * TBD: This check is for faulty timer entries, where the override
3534 + * erroneously sets the trigger to level, resulting in a HUGE
3535 + * increase of timer interrupts!
3537 + if ((bus_irq == 0) && (trigger == 3))
3540 + intsrc.mpc_type = MP_INTSRC;
3541 + intsrc.mpc_irqtype = mp_INT;
3542 + intsrc.mpc_irqflag = (trigger << 2) | polarity;
3543 + intsrc.mpc_srcbus = MP_ISA_BUS;
3544 + intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
3545 + intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */
3546 + intsrc.mpc_dstirq = pin; /* INTIN# */
3548 + MP_intsrc_info(&intsrc);
3551 +void __init mp_config_acpi_legacy_irqs(void)
3553 + struct mpc_config_intsrc intsrc;
3557 +#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
3559 + * Fabricate the legacy ISA bus (bus #31).
3561 + mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
3563 + set_bit(MP_ISA_BUS, mp_bus_not_pci);
3564 + Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
3567 + * Older generations of ES7000 have no legacy identity mappings
3569 + if (es7000_plat == 1)
3573 + * Locate the IOAPIC that manages the ISA IRQs (0-15).
3575 + ioapic = mp_find_ioapic(0);
3579 + intsrc.mpc_type = MP_INTSRC;
3580 + intsrc.mpc_irqflag = 0; /* Conforming */
3581 + intsrc.mpc_srcbus = MP_ISA_BUS;
3582 +#ifdef CONFIG_X86_IO_APIC
3583 + intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
3586 + * Use the default configuration for the IRQs 0-15. Unless
3587 + * overridden by (MADT) interrupt source override entries.
3589 + for (i = 0; i < 16; i++) {
3592 + for (idx = 0; idx < mp_irq_entries; idx++) {
3593 + struct mpc_config_intsrc *irq = mp_irqs + idx;
3595 + /* Do we already have a mapping for this ISA IRQ? */
3596 + if (irq->mpc_srcbus == MP_ISA_BUS
3597 + && irq->mpc_srcbusirq == i)
3600 + /* Do we already have a mapping for this IOAPIC pin */
3601 + if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
3602 + (irq->mpc_dstirq == i))
3606 + if (idx != mp_irq_entries) {
3607 + printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
3608 + continue; /* IRQ already used */
3611 + intsrc.mpc_irqtype = mp_INT;
3612 + intsrc.mpc_srcbusirq = i; /* Identity mapped */
3613 + intsrc.mpc_dstirq = i;
3615 + MP_intsrc_info(&intsrc);
3619 +int mp_register_gsi(u32 gsi, int triggering, int polarity)
3623 +#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
3624 +#define MAX_GSI_NUM 4096
3625 +#define IRQ_COMPRESSION_START 64
3627 + static int pci_irq = IRQ_COMPRESSION_START;
3629 + * Mapping between Global System Interrupts, which
3630 + * represent all possible interrupts, and IRQs
3631 + * assigned to actual devices.
3633 + static int gsi_to_irq[MAX_GSI_NUM];
3636 + if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
3640 + /* Don't set up the ACPI SCI because it's already set up */
3641 + if (acpi_gbl_FADT.sci_interrupt == gsi)
3644 + ioapic = mp_find_ioapic(gsi);
3646 + printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
3650 + ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
3652 +#ifndef CONFIG_X86_32
3653 + if (ioapic_renumber_irq)
3654 + gsi = ioapic_renumber_irq(ioapic, gsi);
3658 + * Avoid pin reprogramming. PRTs typically include entries
3659 + * with redundant pin->gsi mappings (but unique PCI devices);
3660 + * we only program the IOAPIC on the first.
3662 + if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
3663 + printk(KERN_ERR "Invalid reference to IOAPIC pin "
3664 + "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
3668 + if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
3669 + Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
3670 + mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
3671 +#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
3672 + return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
3678 + set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
3679 +#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
3681 + * For GSI >= 64, use IRQ compression
3683 + if ((gsi >= IRQ_COMPRESSION_START)
3684 + && (triggering == ACPI_LEVEL_SENSITIVE)) {
3686 + * For PCI devices assign IRQs in order, avoiding gaps
3687 + * due to unused I/O APIC pins.
3690 + if (gsi < MAX_GSI_NUM) {
3692 + * Retain the VIA chipset work-around (gsi > 15), but
3693 + * avoid a problem where the 8254 timer (IRQ0) is setup
3694 + * via an override (so it's not on pin 0 of the ioapic),
3695 + * and at the same time, the pin 0 interrupt is a PCI
3696 + * type. The gsi > 15 test could cause these two pins
3697 + * to be shared as IRQ0, and they are not shareable.
3698 + * So test for this condition, and if necessary, avoid
3699 + * the pin collision.
3703 + * Don't assign IRQ used by ACPI SCI
3705 + if (gsi == acpi_gbl_FADT.sci_interrupt)
3707 + gsi_to_irq[irq] = gsi;
3709 + printk(KERN_ERR "GSI %u is too high\n", gsi);
3714 + io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
3715 + triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
3716 + polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
3720 +#endif /* CONFIG_X86_IO_APIC */
3721 +#endif /* CONFIG_ACPI */
3722 --- sle11-2009-05-14.orig/arch/x86/kernel/mpparse_32-xen.c 2009-03-16 16:33:40.000000000 +0100
3723 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
3726 - * Intel Multiprocessor Specification 1.1 and 1.4
3727 - * compliant MP-table parsing routines.
3729 - * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
3730 - * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
3733 - * Erich Boleyn : MP v1.4 and additional changes.
3734 - * Alan Cox : Added EBDA scanning
3735 - * Ingo Molnar : various cleanups and rewrites
3736 - * Maciej W. Rozycki: Bits for default MP configurations
3737 - * Paul Diefenbaugh: Added full ACPI support
3740 -#include <linux/mm.h>
3741 -#include <linux/init.h>
3742 -#include <linux/acpi.h>
3743 -#include <linux/delay.h>
3744 -#include <linux/bootmem.h>
3745 -#include <linux/kernel_stat.h>
3746 -#include <linux/mc146818rtc.h>
3747 -#include <linux/bitops.h>
3749 -#include <asm/smp.h>
3750 -#include <asm/acpi.h>
3751 -#include <asm/mtrr.h>
3752 -#include <asm/mpspec.h>
3753 -#include <asm/io_apic.h>
3755 -#include <mach_apic.h>
3756 -#include <mach_apicdef.h>
3757 -#include <mach_mpparse.h>
3758 -#include <bios_ebda.h>
3760 -/* Have we found an MP table */
3761 -int smp_found_config;
3762 -unsigned int __cpuinitdata maxcpus = NR_CPUS;
3765 - * Various Linux-internal data structures created from the
3768 -int apic_version [MAX_APICS];
3769 -int mp_bus_id_to_type [MAX_MP_BUSSES];
3770 -int mp_bus_id_to_node [MAX_MP_BUSSES];
3771 -int mp_bus_id_to_local [MAX_MP_BUSSES];
3772 -int quad_local_to_mp_bus_id [NR_CPUS/4][4];
3773 -int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
3774 -static int mp_current_pci_id;
3776 -/* I/O APIC entries */
3777 -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
3779 -/* # of MP IRQ source entries */
3780 -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
3782 -/* MP IRQ source entries */
3783 -int mp_irq_entries;
3788 -unsigned long mp_lapic_addr;
3790 -unsigned int def_to_bigsmp = 0;
3792 -/* Processor that is doing the boot up */
3793 -unsigned int boot_cpu_physical_apicid = -1U;
3794 -/* Internal processor count */
3795 -unsigned int num_processors;
3797 -/* Bitmask of physically existing CPUs */
3798 -physid_mask_t phys_cpu_present_map;
3800 -u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
3803 - * Intel MP BIOS table parsing routines:
3808 - * Checksum an MP configuration block.
3811 -static int __init mpf_checksum(unsigned char *mp, int len)
3818 - return sum & 0xFF;
3822 - * Have to match translation table entries to main table entries by counter
3823 - * hence the mpc_record variable .... can't see a less disgusting way of
3827 -static int mpc_record;
3828 -static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata;
3831 -static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
3834 - physid_mask_t phys_cpu;
3836 - if (!(m->mpc_cpuflag & CPU_ENABLED))
3839 - apicid = mpc_apic_id(m, translation_table[mpc_record]);
3841 - if (m->mpc_featureflag&(1<<0))
3842 - Dprintk(" Floating point unit present.\n");
3843 - if (m->mpc_featureflag&(1<<7))
3844 - Dprintk(" Machine Exception supported.\n");
3845 - if (m->mpc_featureflag&(1<<8))
3846 - Dprintk(" 64 bit compare & exchange supported.\n");
3847 - if (m->mpc_featureflag&(1<<9))
3848 - Dprintk(" Internal APIC present.\n");
3849 - if (m->mpc_featureflag&(1<<11))
3850 - Dprintk(" SEP present.\n");
3851 - if (m->mpc_featureflag&(1<<12))
3852 - Dprintk(" MTRR present.\n");
3853 - if (m->mpc_featureflag&(1<<13))
3854 - Dprintk(" PGE present.\n");
3855 - if (m->mpc_featureflag&(1<<14))
3856 - Dprintk(" MCA present.\n");
3857 - if (m->mpc_featureflag&(1<<15))
3858 - Dprintk(" CMOV present.\n");
3859 - if (m->mpc_featureflag&(1<<16))
3860 - Dprintk(" PAT present.\n");
3861 - if (m->mpc_featureflag&(1<<17))
3862 - Dprintk(" PSE present.\n");
3863 - if (m->mpc_featureflag&(1<<18))
3864 - Dprintk(" PSN present.\n");
3865 - if (m->mpc_featureflag&(1<<19))
3866 - Dprintk(" Cache Line Flush Instruction present.\n");
3868 - if (m->mpc_featureflag&(1<<21))
3869 - Dprintk(" Debug Trace and EMON Store present.\n");
3870 - if (m->mpc_featureflag&(1<<22))
3871 - Dprintk(" ACPI Thermal Throttle Registers present.\n");
3872 - if (m->mpc_featureflag&(1<<23))
3873 - Dprintk(" MMX present.\n");
3874 - if (m->mpc_featureflag&(1<<24))
3875 - Dprintk(" FXSR present.\n");
3876 - if (m->mpc_featureflag&(1<<25))
3877 - Dprintk(" XMM present.\n");
3878 - if (m->mpc_featureflag&(1<<26))
3879 - Dprintk(" Willamette New Instructions present.\n");
3880 - if (m->mpc_featureflag&(1<<27))
3881 - Dprintk(" Self Snoop present.\n");
3882 - if (m->mpc_featureflag&(1<<28))
3883 - Dprintk(" HT present.\n");
3884 - if (m->mpc_featureflag&(1<<29))
3885 - Dprintk(" Thermal Monitor present.\n");
3886 - /* 30, 31 Reserved */
3889 - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
3890 - Dprintk(" Bootup CPU\n");
3891 - boot_cpu_physical_apicid = m->mpc_apicid;
3894 - ver = m->mpc_apicver;
3897 - * Validate version
3900 - printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
3901 - "fixing up to 0x10. (tell your hw vendor)\n",
3905 - apic_version[m->mpc_apicid] = ver;
3907 - phys_cpu = apicid_to_cpu_present(apicid);
3908 - physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
3910 - if (num_processors >= NR_CPUS) {
3911 - printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
3912 - " Processor ignored.\n", NR_CPUS);
3916 - if (num_processors >= maxcpus) {
3917 - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
3918 - " Processor ignored.\n", maxcpus);
3922 - cpu_set(num_processors, cpu_possible_map);
3926 - * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
3927 - * but we need to work other dependencies like SMP_SUSPEND etc
3928 - * before this can be done without some confusion.
3929 - * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
3930 - * - Ashok Raj <ashok.raj@intel.com>
3932 - if (num_processors > 8) {
3933 - switch (boot_cpu_data.x86_vendor) {
3934 - case X86_VENDOR_INTEL:
3935 - if (!APIC_XAPIC(ver)) {
3936 - def_to_bigsmp = 0;
3939 - /* If P4 and above fall through */
3940 - case X86_VENDOR_AMD:
3941 - def_to_bigsmp = 1;
3944 - bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
3947 -static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
3951 -#endif /* CONFIG_XEN */
3953 -static void __init MP_bus_info (struct mpc_config_bus *m)
3957 - memcpy(str, m->mpc_bustype, 6);
3960 - mpc_oem_bus_info(m, str, translation_table[mpc_record]);
3962 -#if MAX_MP_BUSSES < 256
3963 - if (m->mpc_busid >= MAX_MP_BUSSES) {
3964 - printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
3965 - " is too large, max. supported is %d\n",
3966 - m->mpc_busid, str, MAX_MP_BUSSES - 1);
3971 - if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
3972 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
3973 - } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
3974 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
3975 - } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
3976 - mpc_oem_pci_bus(m, translation_table[mpc_record]);
3977 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
3978 - mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
3979 - mp_current_pci_id++;
3980 - } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
3981 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
3983 - printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
3987 -static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
3989 - if (!(m->mpc_flags & MPC_APIC_USABLE))
3992 - printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
3993 - m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
3994 - if (nr_ioapics >= MAX_IO_APICS) {
3995 - printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n",
3996 - MAX_IO_APICS, nr_ioapics);
3997 - panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
3999 - if (!m->mpc_apicaddr) {
4000 - printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
4001 - " found in MP table, skipping!\n");
4004 - mp_ioapics[nr_ioapics] = *m;
4008 -static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
4010 - mp_irqs [mp_irq_entries] = *m;
4011 - Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
4012 - " IRQ %02x, APIC ID %x, APIC INT %02x\n",
4013 - m->mpc_irqtype, m->mpc_irqflag & 3,
4014 - (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
4015 - m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
4016 - if (++mp_irq_entries == MAX_IRQ_SOURCES)
4017 - panic("Max # of irq sources exceeded!!\n");
4020 -static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
4022 - Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
4023 - " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
4024 - m->mpc_irqtype, m->mpc_irqflag & 3,
4025 - (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
4026 - m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
4029 -#ifdef CONFIG_X86_NUMAQ
4030 -static void __init MP_translation_info (struct mpc_config_translation *m)
4032 - printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
4034 - if (mpc_record >= MAX_MPC_ENTRY)
4035 - printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
4037 - translation_table[mpc_record] = m; /* stash this for later */
4038 - if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
4039 - node_set_online(m->trans_quad);
4043 - * Read/parse the MPC oem tables
4046 -static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
4047 - unsigned short oemsize)
4049 - int count = sizeof (*oemtable); /* the header size */
4050 - unsigned char *oemptr = ((unsigned char *)oemtable)+count;
4053 - printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
4054 - if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
4056 - printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
4057 - oemtable->oem_signature[0],
4058 - oemtable->oem_signature[1],
4059 - oemtable->oem_signature[2],
4060 - oemtable->oem_signature[3]);
4063 - if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
4065 - printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
4068 - while (count < oemtable->oem_length) {
4069 - switch (*oemptr) {
4070 - case MP_TRANSLATION:
4072 - struct mpc_config_translation *m=
4073 - (struct mpc_config_translation *)oemptr;
4074 - MP_translation_info(m);
4075 - oemptr += sizeof(*m);
4076 - count += sizeof(*m);
4082 - printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
4089 -static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
4092 - if (strncmp(oem, "IBM NUMA", 8))
4093 - printk("Warning! May not be a NUMA-Q system!\n");
4094 - if (mpc->mpc_oemptr)
4095 - smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr,
4096 - mpc->mpc_oemsize);
4098 -#endif /* CONFIG_X86_NUMAQ */
4101 - * Read/parse the MPC
4104 -static int __init smp_read_mpc(struct mp_config_table *mpc)
4108 - int count=sizeof(*mpc);
4109 - unsigned char *mpt=((unsigned char *)mpc)+count;
4111 - if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
4112 - printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n",
4113 - *(u32 *)mpc->mpc_signature);
4116 - if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
4117 - printk(KERN_ERR "SMP mptable: checksum error!\n");
4120 - if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
4121 - printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
4125 - if (!mpc->mpc_lapic) {
4126 - printk(KERN_ERR "SMP mptable: null local APIC address!\n");
4129 - memcpy(oem,mpc->mpc_oem,8);
4131 - printk(KERN_INFO "OEM ID: %s ",oem);
4133 - memcpy(str,mpc->mpc_productid,12);
4135 - printk("Product ID: %s ",str);
4137 - mps_oem_check(mpc, oem, str);
4139 - printk("APIC at: 0x%X\n", mpc->mpc_lapic);
4142 - * Save the local APIC address (it might be non-default) -- but only
4143 - * if we're not using ACPI.
4146 - mp_lapic_addr = mpc->mpc_lapic;
4149 - * Now process the configuration blocks.
4152 - while (count < mpc->mpc_length) {
4154 - case MP_PROCESSOR:
4156 - struct mpc_config_processor *m=
4157 - (struct mpc_config_processor *)mpt;
4158 - /* ACPI may have already provided this data */
4160 - MP_processor_info(m);
4161 - mpt += sizeof(*m);
4162 - count += sizeof(*m);
4167 - struct mpc_config_bus *m=
4168 - (struct mpc_config_bus *)mpt;
4170 - mpt += sizeof(*m);
4171 - count += sizeof(*m);
4176 - struct mpc_config_ioapic *m=
4177 - (struct mpc_config_ioapic *)mpt;
4178 - MP_ioapic_info(m);
4180 - count+=sizeof(*m);
4185 - struct mpc_config_intsrc *m=
4186 - (struct mpc_config_intsrc *)mpt;
4188 - MP_intsrc_info(m);
4190 - count+=sizeof(*m);
4195 - struct mpc_config_lintsrc *m=
4196 - (struct mpc_config_lintsrc *)mpt;
4197 - MP_lintsrc_info(m);
4199 - count+=sizeof(*m);
4204 - count = mpc->mpc_length;
4210 - setup_apic_routing();
4211 - if (!num_processors)
4212 - printk(KERN_ERR "SMP mptable: no processors registered!\n");
4213 - return num_processors;
4216 -static int __init ELCR_trigger(unsigned int irq)
4218 - unsigned int port;
4220 - port = 0x4d0 + (irq >> 3);
4221 - return (inb(port) >> (irq & 7)) & 1;
4224 -static void __init construct_default_ioirq_mptable(int mpc_default_type)
4226 - struct mpc_config_intsrc intsrc;
4228 - int ELCR_fallback = 0;
4230 - intsrc.mpc_type = MP_INTSRC;
4231 - intsrc.mpc_irqflag = 0; /* conforming */
4232 - intsrc.mpc_srcbus = 0;
4233 - intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
4235 - intsrc.mpc_irqtype = mp_INT;
4238 - * If true, we have an ISA/PCI system with no IRQ entries
4239 - * in the MP table. To prevent the PCI interrupts from being set up
4240 - * incorrectly, we try to use the ELCR. The sanity check to see if
4241 - * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
4242 - * never be level sensitive, so we simply see if the ELCR agrees.
4243 - * If it does, we assume it's valid.
4245 - if (mpc_default_type == 5) {
4246 - printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
4248 - if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
4249 - printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n");
4251 - printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
4252 - ELCR_fallback = 1;
4256 - for (i = 0; i < 16; i++) {
4257 - switch (mpc_default_type) {
4259 - if (i == 0 || i == 13)
4260 - continue; /* IRQ0 & IRQ13 not connected */
4261 - /* fall through */
4264 - continue; /* IRQ2 is never connected */
4267 - if (ELCR_fallback) {
4269 - * If the ELCR indicates a level-sensitive interrupt, we
4270 - * copy that information over to the MP table in the
4271 - * irqflag field (level sensitive, active high polarity).
4273 - if (ELCR_trigger(i))
4274 - intsrc.mpc_irqflag = 13;
4276 - intsrc.mpc_irqflag = 0;
4279 - intsrc.mpc_srcbusirq = i;
4280 - intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
4281 - MP_intsrc_info(&intsrc);
4284 - intsrc.mpc_irqtype = mp_ExtINT;
4285 - intsrc.mpc_srcbusirq = 0;
4286 - intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */
4287 - MP_intsrc_info(&intsrc);
4290 -static inline void __init construct_default_ISA_mptable(int mpc_default_type)
4292 - struct mpc_config_processor processor;
4293 - struct mpc_config_bus bus;
4294 - struct mpc_config_ioapic ioapic;
4295 - struct mpc_config_lintsrc lintsrc;
4296 - int linttypes[2] = { mp_ExtINT, mp_NMI };
4300 - * local APIC has default address
4302 - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
4305 - * 2 CPUs, numbered 0 & 1.
4307 - processor.mpc_type = MP_PROCESSOR;
4308 - /* Either an integrated APIC or a discrete 82489DX. */
4309 - processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
4310 - processor.mpc_cpuflag = CPU_ENABLED;
4311 - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
4312 - (boot_cpu_data.x86_model << 4) |
4313 - boot_cpu_data.x86_mask;
4314 - processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
4315 - processor.mpc_reserved[0] = 0;
4316 - processor.mpc_reserved[1] = 0;
4317 - for (i = 0; i < 2; i++) {
4318 - processor.mpc_apicid = i;
4319 - MP_processor_info(&processor);
4322 - bus.mpc_type = MP_BUS;
4323 - bus.mpc_busid = 0;
4324 - switch (mpc_default_type) {
4327 - printk(KERN_ERR "Unknown standard configuration %d\n",
4328 - mpc_default_type);
4329 - /* fall through */
4332 - memcpy(bus.mpc_bustype, "ISA ", 6);
4337 - memcpy(bus.mpc_bustype, "EISA ", 6);
4341 - memcpy(bus.mpc_bustype, "MCA ", 6);
4343 - MP_bus_info(&bus);
4344 - if (mpc_default_type > 4) {
4345 - bus.mpc_busid = 1;
4346 - memcpy(bus.mpc_bustype, "PCI ", 6);
4347 - MP_bus_info(&bus);
4350 - ioapic.mpc_type = MP_IOAPIC;
4351 - ioapic.mpc_apicid = 2;
4352 - ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
4353 - ioapic.mpc_flags = MPC_APIC_USABLE;
4354 - ioapic.mpc_apicaddr = 0xFEC00000;
4355 - MP_ioapic_info(&ioapic);
4358 - * We set up most of the low 16 IO-APIC pins according to MPS rules.
4360 - construct_default_ioirq_mptable(mpc_default_type);
4362 - lintsrc.mpc_type = MP_LINTSRC;
4363 - lintsrc.mpc_irqflag = 0; /* conforming */
4364 - lintsrc.mpc_srcbusid = 0;
4365 - lintsrc.mpc_srcbusirq = 0;
4366 - lintsrc.mpc_destapic = MP_APIC_ALL;
4367 - for (i = 0; i < 2; i++) {
4368 - lintsrc.mpc_irqtype = linttypes[i];
4369 - lintsrc.mpc_destapiclint = i;
4370 - MP_lintsrc_info(&lintsrc);
4374 -static struct intel_mp_floating *mpf_found;
4377 - * Scan the memory blocks for an SMP configuration block.
4379 -void __init get_smp_config (void)
4381 - struct intel_mp_floating *mpf = mpf_found;
4384 - * ACPI supports both logical (e.g. Hyper-Threading) and physical
4385 - * processors, where MPS only supports physical.
4387 - if (acpi_lapic && acpi_ioapic) {
4388 - printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
4391 - else if (acpi_lapic)
4392 - printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
4394 - printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
4395 - if (mpf->mpf_feature2 & (1<<7)) {
4396 - printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
4399 - printk(KERN_INFO " Virtual Wire compatibility mode.\n");
4404 - * Now see if we need to read further.
4406 - if (mpf->mpf_feature1 != 0) {
4408 - printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
4409 - construct_default_ISA_mptable(mpf->mpf_feature1);
4411 - } else if (mpf->mpf_physptr) {
4414 - * Read the physical hardware table. Anything here will
4415 - * override the defaults.
4417 - if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
4418 - smp_found_config = 0;
4419 - printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
4420 - printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
4424 - * If there are no explicit MP IRQ entries, then we are
4425 - * broken. We set up most of the low 16 IO-APIC pins to
4426 - * ISA defaults and hope it will work.
4428 - if (!mp_irq_entries) {
4429 - struct mpc_config_bus bus;
4431 - printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
4433 - bus.mpc_type = MP_BUS;
4434 - bus.mpc_busid = 0;
4435 - memcpy(bus.mpc_bustype, "ISA ", 6);
4436 - MP_bus_info(&bus);
4438 - construct_default_ioirq_mptable(0);
4444 - printk(KERN_INFO "Processors: %d\n", num_processors);
4446 - * Only use the first configuration found.
4450 -static int __init smp_scan_config (unsigned long base, unsigned long length)
4452 - unsigned long *bp = isa_bus_to_virt(base);
4453 - struct intel_mp_floating *mpf;
4455 - printk(KERN_INFO "Scan SMP from %p for %ld bytes.\n", bp,length);
4456 - if (sizeof(*mpf) != 16)
4457 - printk("Error: MPF size\n");
4459 - while (length > 0) {
4460 - mpf = (struct intel_mp_floating *)bp;
4461 - if ((*bp == SMP_MAGIC_IDENT) &&
4462 - (mpf->mpf_length == 1) &&
4463 - !mpf_checksum((unsigned char *)bp, 16) &&
4464 - ((mpf->mpf_specification == 1)
4465 - || (mpf->mpf_specification == 4)) ) {
4467 - smp_found_config = 1;
4469 - printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
4470 - mpf, virt_to_phys(mpf));
4471 - reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE,
4473 - if (mpf->mpf_physptr) {
4475 - * We cannot access to MPC table to compute
4476 - * table size yet, as only few megabytes from
4477 - * the bottom is mapped now.
4478 - * PC-9800's MPC table places on the very last
4479 - * of physical memory; so that simply reserving
4480 - * PAGE_SIZE from mpg->mpf_physptr yields BUG()
4481 - * in reserve_bootmem.
4483 - unsigned long size = PAGE_SIZE;
4484 - unsigned long end = max_low_pfn * PAGE_SIZE;
4485 - if (mpf->mpf_physptr + size > end)
4486 - size = end - mpf->mpf_physptr;
4487 - reserve_bootmem(mpf->mpf_physptr, size,
4491 - printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
4492 - mpf, ((void *)bp - isa_bus_to_virt(base)) + base);
4504 -void __init find_smp_config (void)
4507 - unsigned int address;
4511 - * FIXME: Linux assumes you have 640K of base ram..
4512 - * this continues the error...
4514 - * 1) Scan the bottom 1K for a signature
4515 - * 2) Scan the top 1K of base RAM
4516 - * 3) Scan the 64K of bios
4518 - if (smp_scan_config(0x0,0x400) ||
4519 - smp_scan_config(639*0x400,0x400) ||
4520 - smp_scan_config(0xF0000,0x10000))
4523 - * If it is an SMP machine we should know now, unless the
4524 - * configuration is in an EISA/MCA bus machine with an
4525 - * extended bios data area.
4527 - * there is a real-mode segmented pointer pointing to the
4528 - * 4K EBDA area at 0x40E, calculate and scan it here.
4530 - * NOTE! There are Linux loaders that will corrupt the EBDA
4531 - * area, and as such this kind of SMP config may be less
4532 - * trustworthy, simply because the SMP table may have been
4533 - * stomped on during early boot. These loaders are buggy and
4534 - * should be fixed.
4536 - * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
4540 - address = get_bios_ebda();
4542 - smp_scan_config(address, 0x400);
4548 -/* --------------------------------------------------------------------------
4549 - ACPI-based MP Configuration
4550 - -------------------------------------------------------------------------- */
4554 -void __init mp_register_lapic_address(u64 address)
4557 - mp_lapic_addr = (unsigned long) address;
4559 - set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
4561 - if (boot_cpu_physical_apicid == -1U)
4562 - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
4564 - Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
4568 -void __cpuinit mp_register_lapic (u8 id, u8 enabled)
4570 - struct mpc_config_processor processor;
4573 - if (MAX_APICS - id <= 0) {
4574 - printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
4579 - if (id == boot_cpu_physical_apicid)
4583 - processor.mpc_type = MP_PROCESSOR;
4584 - processor.mpc_apicid = id;
4585 - processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
4586 - processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
4587 - processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
4588 - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
4589 - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
4590 - processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
4591 - processor.mpc_reserved[0] = 0;
4592 - processor.mpc_reserved[1] = 0;
4595 - MP_processor_info(&processor);
4598 -#ifdef CONFIG_X86_IO_APIC
4600 -#define MP_ISA_BUS 0
4601 -#define MP_MAX_IOAPIC_PIN 127
4603 -static struct mp_ioapic_routing {
4607 - u32 pin_programmed[4];
4608 -} mp_ioapic_routing[MAX_IO_APICS];
4610 -static int mp_find_ioapic (int gsi)
4614 - /* Find the IOAPIC that manages this GSI. */
4615 - for (i = 0; i < nr_ioapics; i++) {
4616 - if ((gsi >= mp_ioapic_routing[i].gsi_base)
4617 - && (gsi <= mp_ioapic_routing[i].gsi_end))
4621 - printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
4626 -void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
4631 - if (nr_ioapics >= MAX_IO_APICS) {
4632 - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
4633 - "(found %d)\n", MAX_IO_APICS, nr_ioapics);
4634 - panic("Recompile kernel with bigger MAX_IO_APICS!\n");
4637 - printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
4638 - " found in MADT table, skipping!\n");
4642 - idx = nr_ioapics++;
4644 - mp_ioapics[idx].mpc_type = MP_IOAPIC;
4645 - mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
4646 - mp_ioapics[idx].mpc_apicaddr = address;
4649 - set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
4651 - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
4652 - && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
4653 - tmpid = io_apic_get_unique_id(idx, id);
4656 - if (tmpid == -1) {
4660 - mp_ioapics[idx].mpc_apicid = tmpid;
4661 - mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
4664 - * Build basic GSI lookup table to facilitate gsi->io_apic lookups
4665 - * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
4667 - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
4668 - mp_ioapic_routing[idx].gsi_base = gsi_base;
4669 - mp_ioapic_routing[idx].gsi_end = gsi_base +
4670 - io_apic_get_redir_entries(idx);
4672 - printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
4673 - "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
4674 - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
4675 - mp_ioapic_routing[idx].gsi_base,
4676 - mp_ioapic_routing[idx].gsi_end);
4680 -mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
4682 - struct mpc_config_intsrc intsrc;
4687 - * Convert 'gsi' to 'ioapic.pin'.
4689 - ioapic = mp_find_ioapic(gsi);
4692 - pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
4695 - * TBD: This check is for faulty timer entries, where the override
4696 - * erroneously sets the trigger to level, resulting in a HUGE
4697 - * increase of timer interrupts!
4699 - if ((bus_irq == 0) && (trigger == 3))
4702 - intsrc.mpc_type = MP_INTSRC;
4703 - intsrc.mpc_irqtype = mp_INT;
4704 - intsrc.mpc_irqflag = (trigger << 2) | polarity;
4705 - intsrc.mpc_srcbus = MP_ISA_BUS;
4706 - intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
4707 - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */
4708 - intsrc.mpc_dstirq = pin; /* INTIN# */
4710 - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
4711 - intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
4712 - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
4713 - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
4715 - mp_irqs[mp_irq_entries] = intsrc;
4716 - if (++mp_irq_entries == MAX_IRQ_SOURCES)
4717 - panic("Max # of irq sources exceeded!\n");
4720 -void __init mp_config_acpi_legacy_irqs (void)
4722 - struct mpc_config_intsrc intsrc;
4727 - * Fabricate the legacy ISA bus (bus #31).
4729 - mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
4730 - Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
4733 - * Older generations of ES7000 have no legacy identity mappings
4735 - if (es7000_plat == 1)
4739 - * Locate the IOAPIC that manages the ISA IRQs (0-15).
4741 - ioapic = mp_find_ioapic(0);
4745 - intsrc.mpc_type = MP_INTSRC;
4746 - intsrc.mpc_irqflag = 0; /* Conforming */
4747 - intsrc.mpc_srcbus = MP_ISA_BUS;
4748 - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
4751 - * Use the default configuration for the IRQs 0-15. Unless
4752 - * overridden by (MADT) interrupt source override entries.
4754 - for (i = 0; i < 16; i++) {
4757 - for (idx = 0; idx < mp_irq_entries; idx++) {
4758 - struct mpc_config_intsrc *irq = mp_irqs + idx;
4760 - /* Do we already have a mapping for this ISA IRQ? */
4761 - if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
4764 - /* Do we already have a mapping for this IOAPIC pin */
4765 - if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
4766 - (irq->mpc_dstirq == i))
4770 - if (idx != mp_irq_entries) {
4771 - printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
4772 - continue; /* IRQ already used */
4775 - intsrc.mpc_irqtype = mp_INT;
4776 - intsrc.mpc_srcbusirq = i; /* Identity mapped */
4777 - intsrc.mpc_dstirq = i;
4779 - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
4780 - "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
4781 - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
4782 - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic,
4783 - intsrc.mpc_dstirq);
4785 - mp_irqs[mp_irq_entries] = intsrc;
4786 - if (++mp_irq_entries == MAX_IRQ_SOURCES)
4787 - panic("Max # of irq sources exceeded!\n");
4791 -#define MAX_GSI_NUM 4096
4792 -#define IRQ_COMPRESSION_START 64
4794 -int mp_register_gsi(u32 gsi, int triggering, int polarity)
4797 - int ioapic_pin = 0;
4799 - static int pci_irq = IRQ_COMPRESSION_START;
4801 - * Mapping between Global System Interrupts, which
4802 - * represent all possible interrupts, and IRQs
4803 - * assigned to actual devices.
4805 - static int gsi_to_irq[MAX_GSI_NUM];
4807 - /* Don't set up the ACPI SCI because it's already set up */
4808 - if (acpi_gbl_FADT.sci_interrupt == gsi)
4811 - ioapic = mp_find_ioapic(gsi);
4813 - printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
4817 - ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
4819 - if (ioapic_renumber_irq)
4820 - gsi = ioapic_renumber_irq(ioapic, gsi);
4823 - * Avoid pin reprogramming. PRTs typically include entries
4824 - * with redundant pin->gsi mappings (but unique PCI devices);
4825 - * we only program the IOAPIC on the first.
4827 - bit = ioapic_pin % 32;
4828 - idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
4830 - printk(KERN_ERR "Invalid reference to IOAPIC pin "
4831 - "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
4835 - if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
4836 - Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
4837 - mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
4838 - return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
4841 - mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
4844 - * For GSI >= 64, use IRQ compression
4846 - if ((gsi >= IRQ_COMPRESSION_START)
4847 - && (triggering == ACPI_LEVEL_SENSITIVE)) {
4849 - * For PCI devices assign IRQs in order, avoiding gaps
4850 - * due to unused I/O APIC pins.
4853 - if (gsi < MAX_GSI_NUM) {
4855 - * Retain the VIA chipset work-around (gsi > 15), but
4856 - * avoid a problem where the 8254 timer (IRQ0) is setup
4857 - * via an override (so it's not on pin 0 of the ioapic),
4858 - * and at the same time, the pin 0 interrupt is a PCI
4859 - * type. The gsi > 15 test could cause these two pins
4860 - * to be shared as IRQ0, and they are not shareable.
4861 - * So test for this condition, and if necessary, avoid
4862 - * the pin collision.
4864 - if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
4867 - * Don't assign IRQ used by ACPI SCI
4869 - if (gsi == acpi_gbl_FADT.sci_interrupt)
4871 - gsi_to_irq[irq] = gsi;
4873 - printk(KERN_ERR "GSI %u is too high\n", gsi);
4878 - io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
4879 - triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
4880 - polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
4884 -#endif /* CONFIG_X86_IO_APIC */
4885 -#endif /* CONFIG_ACPI */
4886 --- sle11-2009-05-14.orig/arch/x86/kernel/mpparse_64-xen.c 2009-03-16 16:33:40.000000000 +0100
4887 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
4890 - * Intel Multiprocessor Specification 1.1 and 1.4
4891 - * compliant MP-table parsing routines.
4893 - * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
4894 - * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
4897 - * Erich Boleyn : MP v1.4 and additional changes.
4898 - * Alan Cox : Added EBDA scanning
4899 - * Ingo Molnar : various cleanups and rewrites
4900 - * Maciej W. Rozycki: Bits for default MP configurations
4901 - * Paul Diefenbaugh: Added full ACPI support
4904 -#include <linux/mm.h>
4905 -#include <linux/init.h>
4906 -#include <linux/delay.h>
4907 -#include <linux/bootmem.h>
4908 -#include <linux/kernel_stat.h>
4909 -#include <linux/mc146818rtc.h>
4910 -#include <linux/acpi.h>
4911 -#include <linux/module.h>
4913 -#include <asm/smp.h>
4914 -#include <asm/mtrr.h>
4915 -#include <asm/mpspec.h>
4916 -#include <asm/pgalloc.h>
4917 -#include <asm/io_apic.h>
4918 -#include <asm/proto.h>
4919 -#include <asm/acpi.h>
4921 -/* Have we found an MP table */
4922 -int smp_found_config;
4925 - * Various Linux-internal data structures created from the
4928 -DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
4929 -int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
4931 -static int mp_current_pci_id = 0;
4932 -/* I/O APIC entries */
4933 -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
4935 -/* # of MP IRQ source entries */
4936 -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
4938 -/* MP IRQ source entries */
4939 -int mp_irq_entries;
4942 -unsigned long mp_lapic_addr = 0;
4946 -/* Processor that is doing the boot up */
4947 -unsigned int boot_cpu_id = -1U;
4948 -EXPORT_SYMBOL(boot_cpu_id);
4950 -/* Internal processor count */
4951 -unsigned int num_processors;
4953 -unsigned disabled_cpus __cpuinitdata;
4955 -/* Bitmask of physically existing CPUs */
4956 -physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
4959 -u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
4960 - = { [0 ... NR_CPUS-1] = BAD_APICID };
4961 -void *x86_bios_cpu_apicid_early_ptr;
4963 -DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
4964 -EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
4968 - * Intel MP BIOS table parsing routines:
4972 - * Checksum an MP configuration block.
4975 -static int __init mpf_checksum(unsigned char *mp, int len)
4982 - return sum & 0xFF;
4986 -static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
4989 - cpumask_t tmp_map;
4990 - char *bootup_cpu = "";
4992 - if (!(m->mpc_cpuflag & CPU_ENABLED)) {
4996 - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
4997 - bootup_cpu = " (Bootup-CPU)";
4998 - boot_cpu_id = m->mpc_apicid;
5001 - printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
5003 - if (num_processors >= NR_CPUS) {
5004 - printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
5005 - " Processor ignored.\n", NR_CPUS);
5010 - cpus_complement(tmp_map, cpu_present_map);
5011 - cpu = first_cpu(tmp_map);
5013 - physid_set(m->mpc_apicid, phys_cpu_present_map);
5014 - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
5016 - * x86_bios_cpu_apicid is required to have processors listed
5017 - * in same order as logical cpu numbers. Hence the first
5018 - * entry is BSP, and so on.
5022 - /* are we being called early in kernel startup? */
5023 - if (x86_cpu_to_apicid_early_ptr) {
5024 - u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
5025 - u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
5027 - cpu_to_apicid[cpu] = m->mpc_apicid;
5028 - bios_cpu_apicid[cpu] = m->mpc_apicid;
5030 - per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid;
5031 - per_cpu(x86_bios_cpu_apicid, cpu) = m->mpc_apicid;
5034 - cpu_set(cpu, cpu_possible_map);
5035 - cpu_set(cpu, cpu_present_map);
5038 -static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
5042 -#endif /* CONFIG_XEN */
5044 -static void __init MP_bus_info (struct mpc_config_bus *m)
5048 - memcpy(str, m->mpc_bustype, 6);
5050 - Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
5052 - if (strncmp(str, "ISA", 3) == 0) {
5053 - set_bit(m->mpc_busid, mp_bus_not_pci);
5054 - } else if (strncmp(str, "PCI", 3) == 0) {
5055 - clear_bit(m->mpc_busid, mp_bus_not_pci);
5056 - mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
5057 - mp_current_pci_id++;
5059 - printk(KERN_ERR "Unknown bustype %s\n", str);
5063 -static int bad_ioapic(unsigned long address)
5065 - if (nr_ioapics >= MAX_IO_APICS) {
5066 - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
5067 - "(found %d)\n", MAX_IO_APICS, nr_ioapics);
5068 - panic("Recompile kernel with bigger MAX_IO_APICS!\n");
5071 - printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
5072 - " found in table, skipping!\n");
5078 -static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
5080 - if (!(m->mpc_flags & MPC_APIC_USABLE))
5083 - printk("I/O APIC #%d at 0x%X.\n",
5084 - m->mpc_apicid, m->mpc_apicaddr);
5086 - if (bad_ioapic(m->mpc_apicaddr))
5089 - mp_ioapics[nr_ioapics] = *m;
5093 -static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
5095 - mp_irqs [mp_irq_entries] = *m;
5096 - Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
5097 - " IRQ %02x, APIC ID %x, APIC INT %02x\n",
5098 - m->mpc_irqtype, m->mpc_irqflag & 3,
5099 - (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
5100 - m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
5101 - if (++mp_irq_entries >= MAX_IRQ_SOURCES)
5102 - panic("Max # of irq sources exceeded!!\n");
5105 -static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
5107 - Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
5108 - " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
5109 - m->mpc_irqtype, m->mpc_irqflag & 3,
5110 - (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
5111 - m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
5115 - * Read/parse the MPC
5118 -static int __init smp_read_mpc(struct mp_config_table *mpc)
5121 - int count=sizeof(*mpc);
5122 - unsigned char *mpt=((unsigned char *)mpc)+count;
5124 - if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
5125 - printk("MPTABLE: bad signature [%c%c%c%c]!\n",
5126 - mpc->mpc_signature[0],
5127 - mpc->mpc_signature[1],
5128 - mpc->mpc_signature[2],
5129 - mpc->mpc_signature[3]);
5132 - if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
5133 - printk("MPTABLE: checksum error!\n");
5136 - if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
5137 - printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
5141 - if (!mpc->mpc_lapic) {
5142 - printk(KERN_ERR "MPTABLE: null local APIC address!\n");
5145 - memcpy(str,mpc->mpc_oem,8);
5147 - printk(KERN_INFO "MPTABLE: OEM ID: %s ",str);
5149 - memcpy(str,mpc->mpc_productid,12);
5151 - printk("MPTABLE: Product ID: %s ",str);
5153 - printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic);
5155 - /* save the local APIC address, it might be non-default */
5157 - mp_lapic_addr = mpc->mpc_lapic;
5160 - * Now process the configuration blocks.
5162 - while (count < mpc->mpc_length) {
5164 - case MP_PROCESSOR:
5166 - struct mpc_config_processor *m=
5167 - (struct mpc_config_processor *)mpt;
5169 - MP_processor_info(m);
5170 - mpt += sizeof(*m);
5171 - count += sizeof(*m);
5176 - struct mpc_config_bus *m=
5177 - (struct mpc_config_bus *)mpt;
5179 - mpt += sizeof(*m);
5180 - count += sizeof(*m);
5185 - struct mpc_config_ioapic *m=
5186 - (struct mpc_config_ioapic *)mpt;
5187 - MP_ioapic_info(m);
5188 - mpt += sizeof(*m);
5189 - count += sizeof(*m);
5194 - struct mpc_config_intsrc *m=
5195 - (struct mpc_config_intsrc *)mpt;
5197 - MP_intsrc_info(m);
5198 - mpt += sizeof(*m);
5199 - count += sizeof(*m);
5204 - struct mpc_config_lintsrc *m=
5205 - (struct mpc_config_lintsrc *)mpt;
5206 - MP_lintsrc_info(m);
5207 - mpt += sizeof(*m);
5208 - count += sizeof(*m);
5213 - setup_apic_routing();
5214 - if (!num_processors)
5215 - printk(KERN_ERR "MPTABLE: no processors registered!\n");
5216 - return num_processors;
5219 -static int __init ELCR_trigger(unsigned int irq)
5221 - unsigned int port;
5223 - port = 0x4d0 + (irq >> 3);
5224 - return (inb(port) >> (irq & 7)) & 1;
5227 -static void __init construct_default_ioirq_mptable(int mpc_default_type)
5229 - struct mpc_config_intsrc intsrc;
5231 - int ELCR_fallback = 0;
5233 - intsrc.mpc_type = MP_INTSRC;
5234 - intsrc.mpc_irqflag = 0; /* conforming */
5235 - intsrc.mpc_srcbus = 0;
5236 - intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
5238 - intsrc.mpc_irqtype = mp_INT;
5241 - * If true, we have an ISA/PCI system with no IRQ entries
5242 - * in the MP table. To prevent the PCI interrupts from being set up
5243 - * incorrectly, we try to use the ELCR. The sanity check to see if
5244 - * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
5245 - * never be level sensitive, so we simply see if the ELCR agrees.
5246 - * If it does, we assume it's valid.
5248 - if (mpc_default_type == 5) {
5249 - printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
5251 - if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
5252 - printk(KERN_ERR "ELCR contains invalid data... not using ELCR\n");
5254 - printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
5255 - ELCR_fallback = 1;
5259 - for (i = 0; i < 16; i++) {
5260 - switch (mpc_default_type) {
5262 - if (i == 0 || i == 13)
5263 - continue; /* IRQ0 & IRQ13 not connected */
5264 - /* fall through */
5267 - continue; /* IRQ2 is never connected */
5270 - if (ELCR_fallback) {
5272 - * If the ELCR indicates a level-sensitive interrupt, we
5273 - * copy that information over to the MP table in the
5274 - * irqflag field (level sensitive, active high polarity).
5276 - if (ELCR_trigger(i))
5277 - intsrc.mpc_irqflag = 13;
5279 - intsrc.mpc_irqflag = 0;
5282 - intsrc.mpc_srcbusirq = i;
5283 - intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
5284 - MP_intsrc_info(&intsrc);
5287 - intsrc.mpc_irqtype = mp_ExtINT;
5288 - intsrc.mpc_srcbusirq = 0;
5289 - intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */
5290 - MP_intsrc_info(&intsrc);
5293 -static inline void __init construct_default_ISA_mptable(int mpc_default_type)
5295 - struct mpc_config_processor processor;
5296 - struct mpc_config_bus bus;
5297 - struct mpc_config_ioapic ioapic;
5298 - struct mpc_config_lintsrc lintsrc;
5299 - int linttypes[2] = { mp_ExtINT, mp_NMI };
5303 - * local APIC has default address
5305 - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
5308 - * 2 CPUs, numbered 0 & 1.
5310 - processor.mpc_type = MP_PROCESSOR;
5311 - processor.mpc_apicver = 0;
5312 - processor.mpc_cpuflag = CPU_ENABLED;
5313 - processor.mpc_cpufeature = 0;
5314 - processor.mpc_featureflag = 0;
5315 - processor.mpc_reserved[0] = 0;
5316 - processor.mpc_reserved[1] = 0;
5317 - for (i = 0; i < 2; i++) {
5318 - processor.mpc_apicid = i;
5319 - MP_processor_info(&processor);
5322 - bus.mpc_type = MP_BUS;
5323 - bus.mpc_busid = 0;
5324 - switch (mpc_default_type) {
5326 - printk(KERN_ERR "???\nUnknown standard configuration %d\n",
5327 - mpc_default_type);
5328 - /* fall through */
5331 - memcpy(bus.mpc_bustype, "ISA ", 6);
5334 - MP_bus_info(&bus);
5335 - if (mpc_default_type > 4) {
5336 - bus.mpc_busid = 1;
5337 - memcpy(bus.mpc_bustype, "PCI ", 6);
5338 - MP_bus_info(&bus);
5341 - ioapic.mpc_type = MP_IOAPIC;
5342 - ioapic.mpc_apicid = 2;
5343 - ioapic.mpc_apicver = 0;
5344 - ioapic.mpc_flags = MPC_APIC_USABLE;
5345 - ioapic.mpc_apicaddr = 0xFEC00000;
5346 - MP_ioapic_info(&ioapic);
5349 - * We set up most of the low 16 IO-APIC pins according to MPS rules.
5351 - construct_default_ioirq_mptable(mpc_default_type);
5353 - lintsrc.mpc_type = MP_LINTSRC;
5354 - lintsrc.mpc_irqflag = 0; /* conforming */
5355 - lintsrc.mpc_srcbusid = 0;
5356 - lintsrc.mpc_srcbusirq = 0;
5357 - lintsrc.mpc_destapic = MP_APIC_ALL;
5358 - for (i = 0; i < 2; i++) {
5359 - lintsrc.mpc_irqtype = linttypes[i];
5360 - lintsrc.mpc_destapiclint = i;
5361 - MP_lintsrc_info(&lintsrc);
5365 -static struct intel_mp_floating *mpf_found;
5368 - * Scan the memory blocks for an SMP configuration block.
5370 -void __init get_smp_config (void)
5372 - struct intel_mp_floating *mpf = mpf_found;
5375 - * ACPI supports both logical (e.g. Hyper-Threading) and physical
5376 - * processors, where MPS only supports physical.
5378 - if (acpi_lapic && acpi_ioapic) {
5379 - printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
5382 - else if (acpi_lapic)
5383 - printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
5385 - printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
5388 - * Now see if we need to read further.
5390 - if (mpf->mpf_feature1 != 0) {
5392 - printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
5393 - construct_default_ISA_mptable(mpf->mpf_feature1);
5395 - } else if (mpf->mpf_physptr) {
5398 - * Read the physical hardware table. Anything here will
5399 - * override the defaults.
5401 - if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
5402 - smp_found_config = 0;
5403 - printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
5404 - printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
5408 - * If there are no explicit MP IRQ entries, then we are
5409 - * broken. We set up most of the low 16 IO-APIC pins to
5410 - * ISA defaults and hope it will work.
5412 - if (!mp_irq_entries) {
5413 - struct mpc_config_bus bus;
5415 - printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
5417 - bus.mpc_type = MP_BUS;
5418 - bus.mpc_busid = 0;
5419 - memcpy(bus.mpc_bustype, "ISA ", 6);
5420 - MP_bus_info(&bus);
5422 - construct_default_ioirq_mptable(0);
5428 - printk(KERN_INFO "Processors: %d\n", num_processors);
5430 - * Only use the first configuration found.
5434 -static int __init smp_scan_config (unsigned long base, unsigned long length)
5436 - extern void __bad_mpf_size(void);
5437 - unsigned int *bp = isa_bus_to_virt(base);
5438 - struct intel_mp_floating *mpf;
5440 - Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
5441 - if (sizeof(*mpf) != 16)
5444 - while (length > 0) {
5445 - mpf = (struct intel_mp_floating *)bp;
5446 - if ((*bp == SMP_MAGIC_IDENT) &&
5447 - (mpf->mpf_length == 1) &&
5448 - !mpf_checksum((unsigned char *)bp, 16) &&
5449 - ((mpf->mpf_specification == 1)
5450 - || (mpf->mpf_specification == 4)) ) {
5452 - smp_found_config = 1;
5462 -void __init find_smp_config(void)
5464 - unsigned int address;
5467 - * FIXME: Linux assumes you have 640K of base ram..
5468 - * this continues the error...
5470 - * 1) Scan the bottom 1K for a signature
5471 - * 2) Scan the top 1K of base RAM
5472 - * 3) Scan the 64K of bios
5474 - if (smp_scan_config(0x0,0x400) ||
5475 - smp_scan_config(639*0x400,0x400) ||
5476 - smp_scan_config(0xF0000,0x10000))
5479 - * If it is an SMP machine we should know now.
5481 - * there is a real-mode segmented pointer pointing to the
5482 - * 4K EBDA area at 0x40E, calculate and scan it here.
5484 - * NOTE! There are Linux loaders that will corrupt the EBDA
5485 - * area, and as such this kind of SMP config may be less
5486 - * trustworthy, simply because the SMP table may have been
5487 - * stomped on during early boot. These loaders are buggy and
5488 - * should be fixed.
5491 - address = *(unsigned short *)phys_to_virt(0x40E);
5493 - if (smp_scan_config(address, 0x1000))
5496 - /* If we have come this far, we did not find an MP table */
5497 - printk(KERN_INFO "No mptable found.\n");
5500 -/* --------------------------------------------------------------------------
5501 - ACPI-based MP Configuration
5502 - -------------------------------------------------------------------------- */
5506 -void __init mp_register_lapic_address(u64 address)
5509 - mp_lapic_addr = (unsigned long) address;
5510 - set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
5511 - if (boot_cpu_id == -1U)
5512 - boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
5516 -void __cpuinit mp_register_lapic (u8 id, u8 enabled)
5518 - struct mpc_config_processor processor;
5521 - if (id == boot_cpu_id)
5525 - processor.mpc_type = MP_PROCESSOR;
5526 - processor.mpc_apicid = id;
5527 - processor.mpc_apicver = 0;
5528 - processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
5529 - processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
5530 - processor.mpc_cpufeature = 0;
5531 - processor.mpc_featureflag = 0;
5532 - processor.mpc_reserved[0] = 0;
5533 - processor.mpc_reserved[1] = 0;
5536 - MP_processor_info(&processor);
5539 -#define MP_ISA_BUS 0
5540 -#define MP_MAX_IOAPIC_PIN 127
5542 -static struct mp_ioapic_routing {
5546 - u32 pin_programmed[4];
5547 -} mp_ioapic_routing[MAX_IO_APICS];
5549 -static int mp_find_ioapic(int gsi)
5553 - /* Find the IOAPIC that manages this GSI. */
5554 - for (i = 0; i < nr_ioapics; i++) {
5555 - if ((gsi >= mp_ioapic_routing[i].gsi_start)
5556 - && (gsi <= mp_ioapic_routing[i].gsi_end))
5560 - printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
5564 -static u8 uniq_ioapic_id(u8 id)
5567 - DECLARE_BITMAP(used, 256);
5568 - bitmap_zero(used, 256);
5569 - for (i = 0; i < nr_ioapics; i++) {
5570 - struct mpc_config_ioapic *ia = &mp_ioapics[i];
5571 - __set_bit(ia->mpc_apicid, used);
5573 - if (!test_bit(id, used))
5575 - return find_first_zero_bit(used, 256);
5578 -void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
5582 - if (bad_ioapic(address))
5587 - mp_ioapics[idx].mpc_type = MP_IOAPIC;
5588 - mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
5589 - mp_ioapics[idx].mpc_apicaddr = address;
5592 - set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
5594 - mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
5595 - mp_ioapics[idx].mpc_apicver = 0;
5598 - * Build basic IRQ lookup table to facilitate gsi->io_apic lookups
5599 - * and to prevent reprogramming of IOAPIC pins (PCI IRQs).
5601 - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
5602 - mp_ioapic_routing[idx].gsi_start = gsi_base;
5603 - mp_ioapic_routing[idx].gsi_end = gsi_base +
5604 - io_apic_get_redir_entries(idx);
5606 - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, "
5607 - "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
5608 - mp_ioapics[idx].mpc_apicaddr,
5609 - mp_ioapic_routing[idx].gsi_start,
5610 - mp_ioapic_routing[idx].gsi_end);
5616 -mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
5618 - struct mpc_config_intsrc intsrc;
5623 - * Convert 'gsi' to 'ioapic.pin'.
5625 - ioapic = mp_find_ioapic(gsi);
5628 - pin = gsi - mp_ioapic_routing[ioapic].gsi_start;
5631 - * TBD: This check is for faulty timer entries, where the override
5632 - * erroneously sets the trigger to level, resulting in a HUGE
5633 - * increase of timer interrupts!
5635 - if ((bus_irq == 0) && (trigger == 3))
5638 - intsrc.mpc_type = MP_INTSRC;
5639 - intsrc.mpc_irqtype = mp_INT;
5640 - intsrc.mpc_irqflag = (trigger << 2) | polarity;
5641 - intsrc.mpc_srcbus = MP_ISA_BUS;
5642 - intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
5643 - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */
5644 - intsrc.mpc_dstirq = pin; /* INTIN# */
5646 - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
5647 - intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
5648 - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
5649 - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
5651 - mp_irqs[mp_irq_entries] = intsrc;
5652 - if (++mp_irq_entries == MAX_IRQ_SOURCES)
5653 - panic("Max # of irq sources exceeded!\n");
5656 -void __init mp_config_acpi_legacy_irqs(void)
5658 - struct mpc_config_intsrc intsrc;
5663 - * Fabricate the legacy ISA bus (bus #31).
5665 - set_bit(MP_ISA_BUS, mp_bus_not_pci);
5668 - * Locate the IOAPIC that manages the ISA IRQs (0-15).
5670 - ioapic = mp_find_ioapic(0);
5674 - intsrc.mpc_type = MP_INTSRC;
5675 - intsrc.mpc_irqflag = 0; /* Conforming */
5676 - intsrc.mpc_srcbus = MP_ISA_BUS;
5677 - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
5680 - * Use the default configuration for the IRQs 0-15. Unless
5681 - * overridden by (MADT) interrupt source override entries.
5683 - for (i = 0; i < 16; i++) {
5686 - for (idx = 0; idx < mp_irq_entries; idx++) {
5687 - struct mpc_config_intsrc *irq = mp_irqs + idx;
5689 - /* Do we already have a mapping for this ISA IRQ? */
5690 - if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
5693 - /* Do we already have a mapping for this IOAPIC pin */
5694 - if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
5695 - (irq->mpc_dstirq == i))
5699 - if (idx != mp_irq_entries) {
5700 - printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
5701 - continue; /* IRQ already used */
5704 - intsrc.mpc_irqtype = mp_INT;
5705 - intsrc.mpc_srcbusirq = i; /* Identity mapped */
5706 - intsrc.mpc_dstirq = i;
5708 - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
5709 - "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
5710 - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
5711 - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic,
5712 - intsrc.mpc_dstirq);
5714 - mp_irqs[mp_irq_entries] = intsrc;
5715 - if (++mp_irq_entries == MAX_IRQ_SOURCES)
5716 - panic("Max # of irq sources exceeded!\n");
5720 -int mp_register_gsi(u32 gsi, int triggering, int polarity)
5723 - int ioapic_pin = 0;
5726 - if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
5729 - /* Don't set up the ACPI SCI because it's already set up */
5730 - if (acpi_gbl_FADT.sci_interrupt == gsi)
5733 - ioapic = mp_find_ioapic(gsi);
5735 - printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
5739 - ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_start;
5742 - * Avoid pin reprogramming. PRTs typically include entries
5743 - * with redundant pin->gsi mappings (but unique PCI devices);
5744 - * we only program the IOAPIC on the first.
5746 - bit = ioapic_pin % 32;
5747 - idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
5749 - printk(KERN_ERR "Invalid reference to IOAPIC pin "
5750 - "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
5754 - if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
5755 - Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
5756 - mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
5760 - mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
5762 - io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
5763 - triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
5764 - polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
5767 -#endif /*CONFIG_ACPI*/
5768 --- sle11-2009-05-14.orig/arch/x86/kernel/pci-dma-xen.c 2009-03-16 16:33:40.000000000 +0100
5769 +++ sle11-2009-05-14/arch/x86/kernel/pci-dma-xen.c 2009-03-16 16:38:05.000000000 +0100
5772 - * Dynamic DMA mapping support.
5774 - * On i386 there is no hardware dynamic DMA address translation,
5775 - * so consistent alloc/free are merely page allocation/freeing.
5776 - * The rest of the dynamic DMA mapping interface is implemented
5780 -#include <linux/types.h>
5781 -#include <linux/mm.h>
5782 -#include <linux/string.h>
5783 +#include <linux/dma-mapping.h>
5784 +#include <linux/dmar.h>
5785 +#include <linux/bootmem.h>
5786 #include <linux/pci.h>
5787 -#include <linux/module.h>
5788 -#include <linux/version.h>
5789 -#include <asm/io.h>
5790 -#include <xen/balloon.h>
5791 -#include <xen/gnttab.h>
5792 -#include <asm/swiotlb.h>
5793 -#include <asm/tlbflush.h>
5794 -#include <asm/swiotlb_32.h>
5795 -#include <asm/gnttab_dma.h>
5796 -#include <asm/bug.h>
5799 -#include <asm/iommu.h>
5800 +#include <asm/proto.h>
5801 +#include <asm/dma.h>
5802 +#include <asm/gart.h>
5803 +#include <asm/calgary.h>
5805 +int forbid_dac __read_mostly;
5806 +EXPORT_SYMBOL(forbid_dac);
5808 +const struct dma_mapping_ops *dma_ops;
5809 +EXPORT_SYMBOL(dma_ops);
5811 +static int iommu_sac_force __read_mostly;
5813 +#ifdef CONFIG_IOMMU_DEBUG
5814 +int panic_on_overflow __read_mostly = 1;
5815 +int force_iommu __read_mostly = 1;
5817 +int panic_on_overflow __read_mostly = 0;
5818 +int force_iommu __read_mostly = 0;
5821 int iommu_merge __read_mostly = 0;
5822 -EXPORT_SYMBOL(iommu_merge);
5824 -dma_addr_t bad_dma_address __read_mostly;
5825 -EXPORT_SYMBOL(bad_dma_address);
5826 +int no_iommu __read_mostly;
5827 +/* Set this to 1 if there is a HW IOMMU in the system */
5828 +int iommu_detected __read_mostly = 0;
5830 /* This tells the BIO block layer to assume merging. Default to off
5831 because we cannot guarantee merging later. */
5832 int iommu_bio_merge __read_mostly = 0;
5833 EXPORT_SYMBOL(iommu_bio_merge);
5835 -int force_iommu __read_mostly= 0;
5836 +dma_addr_t bad_dma_address __read_mostly = 0;
5837 +EXPORT_SYMBOL(bad_dma_address);
5839 -__init int iommu_setup(char *p)
5843 +/* Dummy device used for NULL arguments (normally ISA). Better would
5844 + be probably a smaller DMA mask, but this is bug-to-bug compatible
5846 +struct device fallback_dev = {
5847 + .bus_id = "fallback device",
5848 + .coherent_dma_mask = DMA_32BIT_MASK,
5849 + .dma_mask = &fallback_dev.coherent_dma_mask,
5852 -void __init pci_iommu_alloc(void)
5853 +int dma_set_mask(struct device *dev, u64 mask)
5855 -#ifdef CONFIG_SWIOTLB
5856 - pci_swiotlb_init();
5859 + if (!dev->dma_mask || !dma_supported(dev, mask))
5862 + *dev->dma_mask = mask;
5864 -static int __init pci_iommu_init(void)
5869 +EXPORT_SYMBOL(dma_set_mask);
5871 -/* Must execute after PCI subsystem */
5872 -fs_initcall(pci_iommu_init);
5875 -struct dma_coherent_mem {
5880 - unsigned long *bitmap;
5883 -#define IOMMU_BUG_ON(test) \
5885 - if (unlikely(test)) { \
5886 - printk(KERN_ALERT "Fatal DMA error! " \
5887 - "Please use 'swiotlb=force'\n"); \
5891 +#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
5892 +static __initdata void *dma32_bootmem_ptr;
5893 +static unsigned long dma32_bootmem_size __initdata = (128ULL<<20);
5895 -static int check_pages_physically_contiguous(unsigned long pfn,
5896 - unsigned int offset,
5898 +static int __init parse_dma32_size_opt(char *p)
5900 - unsigned long next_mfn;
5904 - next_mfn = pfn_to_mfn(pfn);
5905 - nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT;
5907 - for (i = 1; i < nr_pages; i++) {
5908 - if (pfn_to_mfn(++pfn) != ++next_mfn)
5914 + dma32_bootmem_size = memparse(p, &p);
5917 +early_param("dma32_size", parse_dma32_size_opt);
5919 -int range_straddles_page_boundary(paddr_t p, size_t size)
5920 +void __init dma32_reserve_bootmem(void)
5922 - unsigned long pfn = p >> PAGE_SHIFT;
5923 - unsigned int offset = p & ~PAGE_MASK;
5924 + unsigned long size, align;
5925 + if (end_pfn <= MAX_DMA32_PFN)
5928 - return ((offset + size > PAGE_SIZE) &&
5929 - !check_pages_physically_contiguous(pfn, offset, size));
5930 + align = 64ULL<<20;
5931 + size = round_up(dma32_bootmem_size, align);
5932 + dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
5933 + __pa(MAX_DMA_ADDRESS));
5934 + if (dma32_bootmem_ptr)
5935 + dma32_bootmem_size = size;
5937 + dma32_bootmem_size = 0;
5941 -dma_map_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
5942 - enum dma_data_direction direction)
5943 +static void __init dma32_free_bootmem(void)
5948 + if (end_pfn <= MAX_DMA32_PFN)
5951 - BUG_ON(!valid_dma_direction(direction));
5952 - WARN_ON(nents == 0 || sgl->length == 0);
5953 + if (!dma32_bootmem_ptr)
5957 - rc = swiotlb_map_sg(hwdev, sgl, nents, direction);
5959 - struct scatterlist *sg;
5961 - for_each_sg(sgl, sg, nents, i) {
5962 - BUG_ON(!sg_page(sg));
5964 - gnttab_dma_map_page(sg_page(sg)) + sg->offset;
5965 - sg->dma_length = sg->length;
5966 - IOMMU_BUG_ON(address_needs_mapping(
5967 - hwdev, sg->dma_address));
5968 - IOMMU_BUG_ON(range_straddles_page_boundary(
5969 - page_to_pseudophys(sg_page(sg)) + sg->offset,
5974 + for_each_online_node(node)
5975 + free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr),
5976 + dma32_bootmem_size);
5978 - flush_write_buffers();
5980 + dma32_bootmem_ptr = NULL;
5981 + dma32_bootmem_size = 0;
5983 -EXPORT_SYMBOL(dma_map_sg);
5985 +#define dma32_free_bootmem() ((void)0)
5989 -dma_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
5990 - enum dma_data_direction direction)
5993 +static const struct dma_mapping_ops swiotlb_dma_ops = {
5994 + .mapping_error = swiotlb_dma_mapping_error,
5995 + .map_single = swiotlb_map_single_phys,
5996 + .unmap_single = swiotlb_unmap_single,
5997 + .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
5998 + .sync_single_for_device = swiotlb_sync_single_for_device,
5999 + .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
6000 + .sync_single_range_for_device = swiotlb_sync_single_range_for_device,
6001 + .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
6002 + .sync_sg_for_device = swiotlb_sync_sg_for_device,
6003 + .map_sg = swiotlb_map_sg,
6004 + .unmap_sg = swiotlb_unmap_sg,
6005 + .dma_supported = swiotlb_dma_supported
6008 - BUG_ON(!valid_dma_direction(direction));
6010 - swiotlb_unmap_sg(hwdev, sgl, nents, direction);
6012 - struct scatterlist *sg;
6013 +void __init pci_iommu_alloc(void)
6015 + /* free the range so iommu could get some range less than 4G */
6016 + dma32_free_bootmem();
6018 + * The order of these functions is important for
6019 + * fall-back/fail-over reasons
6021 +#ifdef CONFIG_GART_IOMMU
6022 + gart_iommu_hole_init();
6025 - for_each_sg(sgl, sg, nents, i)
6026 - gnttab_dma_unmap_page(sg->dma_address);
6029 -EXPORT_SYMBOL(dma_unmap_sg);
6030 +#ifdef CONFIG_CALGARY_IOMMU
6034 -#ifdef CONFIG_HIGHMEM
6036 -dma_map_page(struct device *dev, struct page *page, unsigned long offset,
6037 - size_t size, enum dma_data_direction direction)
6039 - dma_addr_t dma_addr;
6040 + detect_intel_iommu();
6042 - BUG_ON(!valid_dma_direction(direction));
6043 +#ifdef CONFIG_SWIOTLB
6046 - dma_addr = swiotlb_map_page(
6047 - dev, page, offset, size, direction);
6049 - dma_addr = gnttab_dma_map_page(page) + offset;
6050 - IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr));
6051 + printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
6052 + dma_ops = &swiotlb_dma_ops;
6058 -EXPORT_SYMBOL(dma_map_page);
6061 -dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
6062 - enum dma_data_direction direction)
6064 + * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
6067 +static __init int iommu_setup(char *p)
6069 - BUG_ON(!valid_dma_direction(direction));
6071 - swiotlb_unmap_page(dev, dma_address, size, direction);
6073 - gnttab_dma_unmap_page(dma_address);
6075 -EXPORT_SYMBOL(dma_unmap_page);
6076 -#endif /* CONFIG_HIGHMEM */
6080 -dma_mapping_error(dma_addr_t dma_addr)
6083 - return swiotlb_dma_mapping_error(dma_addr);
6086 -EXPORT_SYMBOL(dma_mapping_error);
6091 -dma_supported(struct device *dev, u64 mask)
6094 - return swiotlb_dma_supported(dev, mask);
6096 - * By default we'll BUG when an infeasible DMA is requested, and
6097 - * request swiotlb=force (see IOMMU_BUG_ON).
6101 -EXPORT_SYMBOL(dma_supported);
6103 + if (!strncmp(p, "off", 3))
6105 + /* gart_parse_options has more force support */
6106 + if (!strncmp(p, "force", 5))
6108 + if (!strncmp(p, "noforce", 7)) {
6113 -void *dma_alloc_coherent(struct device *dev, size_t size,
6114 - dma_addr_t *dma_handle, gfp_t gfp)
6117 - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
6118 - unsigned int order = get_order(size);
6119 - unsigned long vstart;
6121 + if (!strncmp(p, "biomerge", 8)) {
6122 + iommu_bio_merge = 4096;
6126 + if (!strncmp(p, "panic", 5))
6127 + panic_on_overflow = 1;
6128 + if (!strncmp(p, "nopanic", 7))
6129 + panic_on_overflow = 0;
6130 + if (!strncmp(p, "merge", 5)) {
6134 + if (!strncmp(p, "nomerge", 7))
6136 + if (!strncmp(p, "forcesac", 8))
6137 + iommu_sac_force = 1;
6138 + if (!strncmp(p, "allowdac", 8))
6140 + if (!strncmp(p, "nodac", 5))
6142 + if (!strncmp(p, "usedac", 6)) {
6146 +#ifdef CONFIG_SWIOTLB
6147 + if (!strncmp(p, "soft", 4))
6151 - /* ignore region specifiers */
6152 - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
6153 +#ifdef CONFIG_GART_IOMMU
6154 + gart_parse_options(p);
6158 - int page = bitmap_find_free_region(mem->bitmap, mem->size,
6161 - *dma_handle = mem->device_base + (page << PAGE_SHIFT);
6162 - ret = mem->virt_base + (page << PAGE_SHIFT);
6163 - memset(ret, 0, size);
6166 - if (mem->flags & DMA_MEMORY_EXCLUSIVE)
6168 +#ifdef CONFIG_CALGARY_IOMMU
6169 + if (!strncmp(p, "calgary", 7))
6171 +#endif /* CONFIG_CALGARY_IOMMU */
6173 + p += strcspn(p, ",");
6179 +early_param("iommu", iommu_setup);
6181 - if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
6184 - vstart = __get_free_pages(gfp, order);
6185 - ret = (void *)vstart;
6186 +static int check_pages_physically_contiguous(unsigned long pfn,
6187 + unsigned int offset,
6190 + unsigned long next_mfn;
6194 - if (dev != NULL && dev->coherent_dma_mask)
6195 - mask = dev->coherent_dma_mask;
6197 - mask = 0xffffffff;
6198 + next_mfn = pfn_to_mfn(pfn);
6199 + nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT;
6201 - if (ret != NULL) {
6202 - if (xen_create_contiguous_region(vstart, order,
6203 - fls64(mask)) != 0) {
6204 - free_pages(vstart, order);
6207 - memset(ret, 0, size);
6208 - *dma_handle = virt_to_bus(ret);
6209 + for (i = 1; i < nr_pages; i++) {
6210 + if (pfn_to_mfn(++pfn) != ++next_mfn)
6216 -EXPORT_SYMBOL(dma_alloc_coherent);
6218 -void dma_free_coherent(struct device *dev, size_t size,
6219 - void *vaddr, dma_addr_t dma_handle)
6220 +int range_straddles_page_boundary(paddr_t p, size_t size)
6222 - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
6223 - int order = get_order(size);
6225 - WARN_ON(irqs_disabled()); /* for portability */
6226 - if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) {
6227 - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
6228 + unsigned long pfn = p >> PAGE_SHIFT;
6229 + unsigned int offset = p & ~PAGE_MASK;
6231 - bitmap_release_region(mem->bitmap, page, order);
6233 - xen_destroy_contiguous_region((unsigned long)vaddr, order);
6234 - free_pages((unsigned long)vaddr, order);
6236 + return ((offset + size > PAGE_SIZE) &&
6237 + !check_pages_physically_contiguous(pfn, offset, size));
6239 -EXPORT_SYMBOL(dma_free_coherent);
6241 -#ifdef ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
6242 +#ifdef CONFIG_X86_32
6243 int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
6244 dma_addr_t device_addr, size_t size, int flags)
6246 @@ -327,8 +295,8 @@ EXPORT_SYMBOL(dma_declare_coherent_memor
6247 void dma_release_declared_memory(struct device *dev)
6249 struct dma_coherent_mem *mem = dev->dma_mem;
6255 dev->dma_mem = NULL;
6256 iounmap(mem->virt_base);
6257 @@ -341,8 +309,10 @@ void *dma_mark_declared_memory_occupied(
6258 dma_addr_t device_addr, size_t size)
6260 struct dma_coherent_mem *mem = dev->dma_mem;
6261 - int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
6263 + int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1);
6265 + pages >>= PAGE_SHIFT;
6268 return ERR_PTR(-EINVAL);
6269 @@ -354,103 +324,270 @@ void *dma_mark_declared_memory_occupied(
6270 return mem->virt_base + (pos << PAGE_SHIFT);
6272 EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
6273 -#endif /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */
6275 -#if defined(CONFIG_PCI) && !defined(CONFIG_XEN)
6276 -/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
6279 -EXPORT_SYMBOL(forbid_dac);
6281 -static __devinit void via_no_dac(struct pci_dev *dev)
6282 +static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size,
6283 + dma_addr_t *dma_handle, void **ret)
6285 - if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
6286 - printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n");
6288 + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
6289 + int order = get_order(size);
6292 + int page = bitmap_find_free_region(mem->bitmap, mem->size,
6295 + *dma_handle = mem->device_base + (page << PAGE_SHIFT);
6296 + *ret = mem->virt_base + (page << PAGE_SHIFT);
6297 + memset(*ret, 0, size);
6299 + if (mem->flags & DMA_MEMORY_EXCLUSIVE)
6302 + return (mem != NULL);
6304 -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
6306 -static int check_iommu(char *s)
6307 +static int dma_release_coherent(struct device *dev, int order, void *vaddr)
6309 - if (!strcmp(s, "usedac")) {
6311 + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
6313 + if (mem && vaddr >= mem->virt_base && vaddr <
6314 + (mem->virt_base + (mem->size << PAGE_SHIFT))) {
6315 + int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
6317 + bitmap_release_region(mem->bitmap, page, order);
6322 -__setup("iommu=", check_iommu);
6324 +#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0)
6325 +#define dma_release_coherent(dev, order, vaddr) (0)
6326 +#endif /* CONFIG_X86_32 */
6328 +int dma_supported(struct device *dev, u64 mask)
6331 + if (mask > 0xffffffff && forbid_dac > 0) {
6332 + printk(KERN_INFO "PCI: Disallowing DAC for device %s\n",
6339 -dma_map_single(struct device *dev, void *ptr, size_t size,
6340 - enum dma_data_direction direction)
6341 + if (dma_ops->dma_supported)
6342 + return dma_ops->dma_supported(dev, mask);
6344 + /* Copied from i386. Doesn't make much sense, because it will
6345 + only work for pci_alloc_coherent.
6346 + The caller just has to use GFP_DMA in this case. */
6347 + if (mask < DMA_24BIT_MASK)
6350 + /* Tell the device to use SAC when IOMMU force is on. This
6351 + allows the driver to use cheaper accesses in some cases.
6353 + Problem with this is that if we overflow the IOMMU area and
6354 + return DAC as fallback address the device may not handle it
6357 + As a special case some controllers have a 39bit address
6358 + mode that is as efficient as 32bit (aic79xx). Don't force
6359 + SAC for these. Assume all masks <= 40 bits are of this
6360 + type. Normally this doesn't make any difference, but gives
6361 + more gentle handling of IOMMU overflow. */
6362 + if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
6363 + printk(KERN_INFO "%s: Force SAC with mask %Lx\n",
6364 + dev->bus_id, mask);
6370 +EXPORT_SYMBOL(dma_supported);
6372 +/* Allocate DMA memory on node near device */
6373 +static struct page *
6374 +dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
6379 - BUG_ON(!valid_dma_direction(direction));
6380 - WARN_ON(size == 0);
6381 + node = dev_to_node(dev);
6384 - dma = swiotlb_map_single(dev, ptr, size, direction);
6386 - dma = gnttab_dma_map_page(virt_to_page(ptr)) +
6387 - offset_in_page(ptr);
6388 - IOMMU_BUG_ON(range_straddles_page_boundary(__pa(ptr), size));
6389 - IOMMU_BUG_ON(address_needs_mapping(dev, dma));
6392 - flush_write_buffers();
6395 -EXPORT_SYMBOL(dma_map_single);
6398 -dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
6399 - enum dma_data_direction direction)
6401 - BUG_ON(!valid_dma_direction(direction));
6403 - swiotlb_unmap_single(dev, dma_addr, size, direction);
6405 - gnttab_dma_unmap_page(dma_addr);
6406 + return alloc_pages_node(node, gfp, order);
6410 + * Allocate memory for a coherent mapping.
6413 +dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
6416 + void *memory = NULL;
6417 + struct page *page;
6418 + unsigned long dma_mask = 0;
6420 + unsigned int order = get_order(size);
6422 + /* ignore region specifiers */
6423 + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
6425 + if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory))
6429 + dev = &fallback_dev;
6432 + dma_mask = dev->coherent_dma_mask;
6433 + if (dma_mask == 0)
6434 + dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK;
6436 + /* Device not DMA able */
6437 + if (dev->dma_mask == NULL)
6440 + /* Don't invoke OOM killer or retry in lower 16MB DMA zone */
6441 + if (gfp & __GFP_DMA)
6445 + gfp &= ~(__GFP_DMA | __GFP_DMA32);
6447 +#ifdef CONFIG_X86_64
6448 + /* Why <=? Even when the mask is smaller than 4GB it is often
6449 + larger than 16MB and in this case we have a chance of
6450 + finding fitting memory in the next higher zone first. If
6451 + not retry with true GFP_DMA. -AK */
6452 + if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
6458 + page = dma_alloc_pages(dev,
6459 + noretry ? gfp | __GFP_NORETRY : gfp, order);
6466 + dma_addr_t bus = page_to_phys(page);
6467 + memory = page_address(page);
6468 + high = (bus + size) >= dma_mask;
6470 + if (force_iommu && !(gfp & GFP_DMA))
6473 + free_pages((unsigned long)memory, order);
6475 + /* Don't use the 16MB ZONE_DMA unless absolutely
6476 + needed. It's better to use remapping first. */
6477 + if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
6478 + gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
6482 + /* Let low level make its own zone decisions */
6483 + gfp &= ~(GFP_DMA32|GFP_DMA);
6485 + if (dma_ops->alloc_coherent)
6486 + return dma_ops->alloc_coherent(dev, size,
6491 + memset(memory, 0, size);
6493 + *dma_handle = bus;
6498 + if (dma_ops->alloc_coherent) {
6499 + free_pages((unsigned long)memory, order);
6500 + gfp &= ~(GFP_DMA|GFP_DMA32);
6501 + return dma_ops->alloc_coherent(dev, size, dma_handle, gfp);
6504 + if (dma_ops->map_simple) {
6505 + *dma_handle = dma_ops->map_simple(dev, virt_to_bus(memory),
6507 + PCI_DMA_BIDIRECTIONAL);
6508 + if (*dma_handle != bad_dma_address)
6512 + memory = page_address(page);
6513 + if (xen_create_contiguous_region((unsigned long)memory, order,
6514 + fls64(dma_mask)) == 0) {
6515 + memset(memory, 0, size);
6516 + *dma_handle = virt_to_bus(memory);
6521 + if (panic_on_overflow)
6522 + panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",
6523 + (unsigned long)size);
6524 + free_pages((unsigned long)memory, order);
6527 -EXPORT_SYMBOL(dma_unmap_single);
6528 +EXPORT_SYMBOL(dma_alloc_coherent);
6531 -dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
6532 - enum dma_data_direction direction)
6534 + * Unmap coherent memory.
6535 + * The caller must ensure that the device has finished accessing the mapping.
6537 +void dma_free_coherent(struct device *dev, size_t size,
6538 + void *vaddr, dma_addr_t bus)
6541 - swiotlb_sync_single_for_cpu(dev, dma_handle, size, direction);
6542 + int order = get_order(size);
6543 + WARN_ON(irqs_disabled()); /* for portability */
6544 + if (dma_release_coherent(dev, order, vaddr))
6547 + if (dma_ops->unmap_single)
6548 + dma_ops->unmap_single(dev, bus, size, 0);
6550 + xen_destroy_contiguous_region((unsigned long)vaddr, order);
6551 + free_pages((unsigned long)vaddr, order);
6553 -EXPORT_SYMBOL(dma_sync_single_for_cpu);
6554 +EXPORT_SYMBOL(dma_free_coherent);
6557 -dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
6558 - enum dma_data_direction direction)
6559 +static int __init pci_iommu_init(void)
6562 - swiotlb_sync_single_for_device(dev, dma_handle, size, direction);
6563 +#ifdef CONFIG_CALGARY_IOMMU
6564 + calgary_iommu_init();
6567 + intel_iommu_init();
6569 +#ifdef CONFIG_GART_IOMMU
6570 + gart_iommu_init();
6576 -EXPORT_SYMBOL(dma_sync_single_for_device);
6579 -dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
6580 - enum dma_data_direction direction)
6581 +void pci_iommu_shutdown(void)
6584 - swiotlb_sync_sg_for_cpu(dev,sg,nelems,direction);
6585 - flush_write_buffers();
6586 + gart_iommu_shutdown();
6588 -EXPORT_SYMBOL(dma_sync_sg_for_cpu);
6589 +/* Must execute after PCI subsystem */
6590 +fs_initcall(pci_iommu_init);
6593 +/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
6596 -dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
6597 - enum dma_data_direction direction)
6598 +static __devinit void via_no_dac(struct pci_dev *dev)
6601 - swiotlb_sync_sg_for_device(dev,sg,nelems,direction);
6602 - flush_write_buffers();
6603 + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
6604 + printk(KERN_INFO "PCI: VIA PCI bridge detected."
6605 + "Disabling DAC.\n");
6609 -EXPORT_SYMBOL(dma_sync_sg_for_device);
6610 +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
6612 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
6613 +++ sle11-2009-05-14/arch/x86/kernel/pci-nommu-xen.c 2009-03-16 16:38:05.000000000 +0100
6615 +#include <linux/dma-mapping.h>
6616 +#include <linux/dmar.h>
6617 +#include <linux/bootmem.h>
6618 +#include <linux/pci.h>
6620 +#include <xen/gnttab.h>
6622 +#include <asm/proto.h>
6623 +#include <asm/dma.h>
6624 +#include <asm/swiotlb.h>
6625 +#include <asm/tlbflush.h>
6626 +#include <asm/gnttab_dma.h>
6627 +#include <asm/bug.h>
6629 +#define IOMMU_BUG_ON(test) \
6631 + if (unlikely(test)) { \
6632 + printk(KERN_ALERT "Fatal DMA error! " \
6633 + "Please use 'swiotlb=force'\n"); \
6639 +gnttab_map_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
6643 + struct scatterlist *sg;
6645 + WARN_ON(nents == 0 || sgl->length == 0);
6647 + for_each_sg(sgl, sg, nents, i) {
6648 + BUG_ON(!sg_page(sg));
6650 + gnttab_dma_map_page(sg_page(sg)) + sg->offset;
6651 + sg->dma_length = sg->length;
6652 + IOMMU_BUG_ON(address_needs_mapping(
6653 + hwdev, sg->dma_address));
6654 + IOMMU_BUG_ON(range_straddles_page_boundary(
6655 + page_to_pseudophys(sg_page(sg)) + sg->offset,
6663 +gnttab_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
6667 + struct scatterlist *sg;
6669 + for_each_sg(sgl, sg, nents, i)
6670 + gnttab_dma_unmap_page(sg->dma_address);
6674 +gnttab_map_single(struct device *dev, phys_addr_t paddr, size_t size,
6679 + WARN_ON(size == 0);
6681 + dma = gnttab_dma_map_page(pfn_to_page(paddr >> PAGE_SHIFT)) +
6682 + offset_in_page(paddr);
6683 + IOMMU_BUG_ON(range_straddles_page_boundary(paddr, size));
6684 + IOMMU_BUG_ON(address_needs_mapping(dev, dma));
6690 +gnttab_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
6693 + gnttab_dma_unmap_page(dma_addr);
6696 +static int nommu_mapping_error(dma_addr_t dma_addr)
6698 + return (dma_addr == bad_dma_address);
6701 +static const struct dma_mapping_ops nommu_dma_ops = {
6702 + .map_single = gnttab_map_single,
6703 + .unmap_single = gnttab_unmap_single,
6704 + .map_sg = gnttab_map_sg,
6705 + .unmap_sg = gnttab_unmap_sg,
6706 + .dma_supported = swiotlb_dma_supported,
6707 + .mapping_error = nommu_mapping_error
6710 +void __init no_iommu_init(void)
6715 + force_iommu = 0; /* no HW IOMMU */
6716 + dma_ops = &nommu_dma_ops;
6718 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
6719 +++ sle11-2009-05-14/arch/x86/kernel/process-xen.c 2009-03-16 16:38:05.000000000 +0100
6721 +#include <linux/errno.h>
6722 +#include <linux/kernel.h>
6723 +#include <linux/mm.h>
6724 +#include <linux/smp.h>
6725 +#include <linux/slab.h>
6726 +#include <linux/sched.h>
6727 +#include <linux/module.h>
6728 +#include <linux/pm.h>
6730 +struct kmem_cache *task_xstate_cachep;
6732 +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
6735 + if (src->thread.xstate) {
6736 + dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
6738 + if (!dst->thread.xstate)
6740 + WARN_ON((unsigned long)dst->thread.xstate & 15);
6741 + memcpy(dst->thread.xstate, src->thread.xstate, xstate_size);
6746 +void free_thread_xstate(struct task_struct *tsk)
6748 + if (tsk->thread.xstate) {
6749 + kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
6750 + tsk->thread.xstate = NULL;
6754 +void free_thread_info(struct thread_info *ti)
6756 + free_thread_xstate(ti->task);
6757 + free_pages((unsigned long)ti, get_order(THREAD_SIZE));
6760 +void arch_task_cache_init(void)
6762 + task_xstate_cachep =
6763 + kmem_cache_create("task_xstate", xstate_size,
6764 + __alignof__(union thread_xstate),
6765 + SLAB_PANIC, NULL);
6768 +static void do_nothing(void *unused)
6773 + * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
6774 + * pm_idle and update to new pm_idle value. Required while changing pm_idle
6775 + * handler on SMP systems.
6777 + * Caller must have changed pm_idle to the new value before the call. Old
6778 + * pm_idle value will not be used by any CPU after the return of this function.
6780 +void cpu_idle_wait(void)
6783 + /* kick all the CPUs so that they exit out of pm_idle */
6784 + smp_call_function(do_nothing, NULL, 0, 1);
6786 +EXPORT_SYMBOL_GPL(cpu_idle_wait);
6790 + * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
6791 + * which can obviate IPI to trigger checking of need_resched.
6792 + * We execute MONITOR against need_resched and enter optimized wait state
6793 + * through MWAIT. Whenever someone changes need_resched, we would be woken
6794 + * up from MWAIT (without an IPI).
6796 + * New with Core Duo processors, MWAIT can take some hints based on CPU
6799 +void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
6801 + if (!need_resched()) {
6802 + __monitor((void *)¤t_thread_info()->flags, 0, 0);
6804 + if (!need_resched())
6809 +/* Default MONITOR/MWAIT with no hints, used for default C1 state */
6810 +static void mwait_idle(void)
6812 + if (!need_resched()) {
6813 + __monitor((void *)¤t_thread_info()->flags, 0, 0);
6815 + if (!need_resched())
6816 + __sti_mwait(0, 0);
6818 + local_irq_enable();
6820 + local_irq_enable();
6825 + * On SMP it's slightly faster (but much more power-consuming!)
6826 + * to poll the ->work.need_resched flag instead of waiting for the
6827 + * cross-CPU IPI to arrive. Use this option with caution.
6829 +static void poll_idle(void)
6831 + local_irq_enable();
6837 + * mwait selection logic:
6839 + * It depends on the CPU. For AMD CPUs that support MWAIT this is
6840 + * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings
6841 + * then depend on a clock divisor and current Pstate of the core. If
6842 + * all cores of a processor are in halt state (C1) the processor can
6843 + * enter the C1E (C1 enhanced) state. If mwait is used this will never
6846 + * idle=mwait overrides this decision and forces the usage of mwait.
6848 +static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
6853 + if (c->x86_vendor == X86_VENDOR_AMD) {
6864 +void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
6867 + static int selected;
6871 +#ifdef CONFIG_X86_SMP
6872 + if (pm_idle == poll_idle && smp_num_siblings > 1) {
6873 + printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
6874 + " performance may degrade.\n");
6877 + if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
6879 + * Skip, if setup has overridden idle.
6880 + * One CPU supports mwait => All CPUs supports mwait
6883 + printk(KERN_INFO "using mwait in idle threads.\n");
6884 + pm_idle = mwait_idle;
6891 +static int __init idle_setup(char *str)
6893 + if (!strcmp(str, "poll")) {
6894 + printk("using polling idle threads.\n");
6895 + pm_idle = poll_idle;
6898 + else if (!strcmp(str, "mwait"))
6904 + boot_option_idle_override = 1;
6907 +early_param("idle", idle_setup);
6909 --- sle11-2009-05-14.orig/arch/x86/kernel/process_32-xen.c 2009-03-16 16:33:40.000000000 +0100
6910 +++ sle11-2009-05-14/arch/x86/kernel/process_32-xen.c 2009-03-16 16:38:05.000000000 +0100
6912 #include <linux/personality.h>
6913 #include <linux/tick.h>
6914 #include <linux/percpu.h>
6915 +#include <linux/prctl.h>
6917 #include <asm/uaccess.h>
6918 #include <asm/pgtable.h>
6920 #include <asm/processor.h>
6921 #include <asm/i387.h>
6922 #include <asm/desc.h>
6923 -#include <asm/vm86.h>
6924 #ifdef CONFIG_MATH_EMULATION
6925 #include <asm/math_emu.h>
6927 @@ -102,16 +102,6 @@ void enable_hlt(void)
6929 EXPORT_SYMBOL(enable_hlt);
6932 - * On SMP it's slightly faster (but much more power-consuming!)
6933 - * to poll the ->work.need_resched flag instead of waiting for the
6934 - * cross-CPU IPI to arrive. Use this option with caution.
6936 -static void poll_idle(void)
6941 static void xen_idle(void)
6943 current_thread_info()->status &= ~TS_POLLING;
6944 @@ -121,20 +111,10 @@ static void xen_idle(void)
6948 - local_irq_disable();
6949 - if (!need_resched()) {
6954 - t0n = ktime_to_ns(t0);
6955 + if (!need_resched())
6956 safe_halt(); /* enables interrupts racelessly */
6957 - local_irq_disable();
6959 - t1n = ktime_to_ns(t1);
6960 - sched_clock_idle_wakeup_event(t1n - t0n);
6962 - local_irq_enable();
6964 + local_irq_enable();
6965 current_thread_info()->status |= TS_POLLING;
6967 #ifdef CONFIG_APM_MODULE
6968 @@ -142,7 +122,6 @@ EXPORT_SYMBOL(default_idle);
6971 #ifdef CONFIG_HOTPLUG_CPU
6972 -extern cpumask_t cpu_initialized;
6973 static inline void play_dead(void)
6976 @@ -187,6 +166,7 @@ void cpu_idle(void)
6977 if (cpu_is_offline(cpu))
6980 + local_irq_disable();
6981 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
6984 @@ -197,44 +177,6 @@ void cpu_idle(void)
6988 -static void do_nothing(void *unused)
6993 - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
6994 - * pm_idle and update to new pm_idle value. Required while changing pm_idle
6995 - * handler on SMP systems.
6997 - * Caller must have changed pm_idle to the new value before the call. Old
6998 - * pm_idle value will not be used by any CPU after the return of this function.
7000 -void cpu_idle_wait(void)
7003 - /* kick all the CPUs so that they exit out of pm_idle */
7004 - smp_call_function(do_nothing, NULL, 0, 1);
7006 -EXPORT_SYMBOL_GPL(cpu_idle_wait);
7008 -void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
7012 -static int __init idle_setup(char *str)
7014 - if (!strcmp(str, "poll")) {
7015 - printk("using polling idle threads.\n");
7016 - pm_idle = poll_idle;
7021 - boot_option_idle_override = 1;
7024 -early_param("idle", idle_setup);
7026 void __show_registers(struct pt_regs *regs, int all)
7028 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
7029 @@ -260,7 +202,7 @@ void __show_registers(struct pt_regs *re
7030 init_utsname()->version);
7032 printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
7033 - 0xffff & regs->cs, regs->ip, regs->flags,
7034 + (u16)regs->cs, regs->ip, regs->flags,
7035 smp_processor_id());
7036 print_symbol("EIP is at %s\n", regs->ip);
7038 @@ -269,8 +211,7 @@ void __show_registers(struct pt_regs *re
7039 printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
7040 regs->si, regs->di, regs->bp, sp);
7041 printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
7042 - regs->ds & 0xffff, regs->es & 0xffff,
7043 - regs->fs & 0xffff, gs, ss);
7044 + (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
7048 @@ -367,6 +308,7 @@ void flush_thread(void)
7050 * Forget coprocessor state..
7052 + tsk->fpu_counter = 0;
7056 @@ -437,11 +379,30 @@ int copy_thread(int nr, unsigned long cl
7060 -#ifdef CONFIG_SECCOMP
7062 +start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
7064 + __asm__("movl %0, %%gs" :: "r"(0));
7067 + regs->ds = __USER_DS;
7068 + regs->es = __USER_DS;
7069 + regs->ss = __USER_DS;
7070 + regs->cs = __USER_CS;
7071 + regs->ip = new_ip;
7072 + regs->sp = new_sp;
7074 + * Free the old FP and other extended state
7076 + free_thread_xstate(current);
7078 +EXPORT_SYMBOL_GPL(start_thread);
7080 static void hard_disable_TSC(void)
7082 write_cr4(read_cr4() | X86_CR4_TSD);
7085 void disable_TSC(void)
7088 @@ -453,11 +414,47 @@ void disable_TSC(void)
7093 static void hard_enable_TSC(void)
7095 write_cr4(read_cr4() & ~X86_CR4_TSD);
7097 -#endif /* CONFIG_SECCOMP */
7099 +static void enable_TSC(void)
7101 + preempt_disable();
7102 + if (test_and_clear_thread_flag(TIF_NOTSC))
7104 + * Must flip the CPU state synchronously with
7105 + * TIF_NOTSC in the current running context.
7107 + hard_enable_TSC();
7111 +int get_tsc_mode(unsigned long adr)
7115 + if (test_thread_flag(TIF_NOTSC))
7116 + val = PR_TSC_SIGSEGV;
7118 + val = PR_TSC_ENABLE;
7120 + return put_user(val, (unsigned int __user *)adr);
7123 +int set_tsc_mode(unsigned int val)
7125 + if (val == PR_TSC_SIGSEGV)
7127 + else if (val == PR_TSC_ENABLE)
7135 static noinline void
7136 __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
7137 @@ -473,12 +470,12 @@ __switch_to_xtra(struct task_struct *pre
7138 /* we clear debugctl to make sure DS
7139 * is not in use when we change it */
7141 - wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
7142 + update_debugctlmsr(0);
7143 wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
7146 if (next->debugctlmsr != debugctl)
7147 - wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0);
7148 + update_debugctlmsr(next->debugctlmsr);
7150 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
7151 set_debugreg(next->debugreg0, 0);
7152 @@ -490,7 +487,6 @@ __switch_to_xtra(struct task_struct *pre
7153 set_debugreg(next->debugreg7, 7);
7156 -#ifdef CONFIG_SECCOMP
7157 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
7158 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
7159 /* prev and next are different */
7160 @@ -499,7 +495,6 @@ __switch_to_xtra(struct task_struct *pre
7167 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
7168 @@ -637,7 +632,7 @@ struct task_struct * __switch_to(struct
7170 /* we're going to use this soon, after a few expensive things */
7171 if (next_p->fpu_counter > 5)
7172 - prefetch(&next->i387.fxsave);
7173 + prefetch(next->xstate);
7176 * Now maybe handle debug registers
7177 @@ -658,8 +653,11 @@ struct task_struct * __switch_to(struct
7178 /* If the task has used fpu the last 5 timeslices, just do a full
7179 * restore of the math state immediately to avoid the trap; the
7180 * chances of needing FPU soon are obviously high now
7182 + * tsk_used_math() checks prevent calling math_state_restore(),
7183 + * which can sleep in the case of !tsk_used_math()
7185 - if (next_p->fpu_counter > 5)
7186 + if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
7187 math_state_restore();
7190 --- sle11-2009-05-14.orig/arch/x86/kernel/process_64-xen.c 2009-03-16 16:33:40.000000000 +0100
7191 +++ sle11-2009-05-14/arch/x86/kernel/process_64-xen.c 2009-03-16 16:38:05.000000000 +0100
7193 #include <linux/kprobes.h>
7194 #include <linux/kdebug.h>
7195 #include <linux/tick.h>
7196 +#include <linux/prctl.h>
7198 #include <asm/uaccess.h>
7199 #include <asm/pgtable.h>
7200 @@ -102,17 +103,6 @@ void exit_idle(void)
7205 - * On SMP it's slightly faster (but much more power-consuming!)
7206 - * to poll the ->need_resched flag instead of waiting for the
7207 - * cross-CPU IPI to arrive. Use this option with caution.
7209 -static void poll_idle(void)
7211 - local_irq_enable();
7215 static void xen_idle(void)
7217 current_thread_info()->status &= ~TS_POLLING;
7218 @@ -121,20 +111,10 @@ static void xen_idle(void)
7219 * test NEED_RESCHED:
7222 - local_irq_disable();
7223 - if (!need_resched()) {
7228 - t0n = ktime_to_ns(t0);
7229 + if (!need_resched())
7230 safe_halt(); /* enables interrupts racelessly */
7231 - local_irq_disable();
7233 - t1n = ktime_to_ns(t1);
7234 - sched_clock_idle_wakeup_event(t1n - t0n);
7236 - local_irq_enable();
7238 + local_irq_enable();
7239 current_thread_info()->status |= TS_POLLING;
7242 @@ -195,45 +175,6 @@ void cpu_idle(void)
7246 -static void do_nothing(void *unused)
7251 - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
7252 - * pm_idle and update to new pm_idle value. Required while changing pm_idle
7253 - * handler on SMP systems.
7255 - * Caller must have changed pm_idle to the new value before the call. Old
7256 - * pm_idle value will not be used by any CPU after the return of this function.
7258 -void cpu_idle_wait(void)
7261 - /* kick all the CPUs so that they exit out of pm_idle */
7262 - smp_call_function(do_nothing, NULL, 0, 1);
7264 -EXPORT_SYMBOL_GPL(cpu_idle_wait);
7266 -void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
7270 -static int __init idle_setup(char *str)
7272 - if (!strcmp(str, "poll")) {
7273 - printk("using polling idle threads.\n");
7274 - pm_idle = poll_idle;
7275 - } else if (!strcmp(str, "mwait"))
7280 - boot_option_idle_override = 1;
7283 -early_param("idle", idle_setup);
7285 /* Prints also some state that isn't saved in the pt_regs */
7286 void __show_regs(struct pt_regs * regs)
7288 @@ -360,6 +301,7 @@ void flush_thread(void)
7290 * Forget coprocessor state..
7292 + tsk->fpu_counter = 0;
7296 @@ -472,6 +414,83 @@ out:
7301 +start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
7303 + asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
7305 + regs->ip = new_ip;
7306 + regs->sp = new_sp;
7307 + write_pda(oldrsp, new_sp);
7308 + regs->cs = __USER_CS;
7309 + regs->ss = __USER_DS;
7310 + regs->flags = 0x200;
7313 + * Free the old FP and other extended state
7315 + free_thread_xstate(current);
7317 +EXPORT_SYMBOL_GPL(start_thread);
7319 +static void hard_disable_TSC(void)
7321 + write_cr4(read_cr4() | X86_CR4_TSD);
7324 +void disable_TSC(void)
7326 + preempt_disable();
7327 + if (!test_and_set_thread_flag(TIF_NOTSC))
7329 + * Must flip the CPU state synchronously with
7330 + * TIF_NOTSC in the current running context.
7332 + hard_disable_TSC();
7336 +static void hard_enable_TSC(void)
7338 + write_cr4(read_cr4() & ~X86_CR4_TSD);
7341 +static void enable_TSC(void)
7343 + preempt_disable();
7344 + if (test_and_clear_thread_flag(TIF_NOTSC))
7346 + * Must flip the CPU state synchronously with
7347 + * TIF_NOTSC in the current running context.
7349 + hard_enable_TSC();
7353 +int get_tsc_mode(unsigned long adr)
7357 + if (test_thread_flag(TIF_NOTSC))
7358 + val = PR_TSC_SIGSEGV;
7360 + val = PR_TSC_ENABLE;
7362 + return put_user(val, (unsigned int __user *)adr);
7365 +int set_tsc_mode(unsigned int val)
7367 + if (val == PR_TSC_SIGSEGV)
7369 + else if (val == PR_TSC_ENABLE)
7378 * This special macro can be used to load a debugging register
7380 @@ -491,12 +510,12 @@ static inline void __switch_to_xtra(stru
7381 /* we clear debugctl to make sure DS
7382 * is not in use when we change it */
7384 - wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
7385 + update_debugctlmsr(0);
7386 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
7389 if (next->debugctlmsr != debugctl)
7390 - wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
7391 + update_debugctlmsr(next->debugctlmsr);
7393 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
7395 @@ -508,6 +527,15 @@ static inline void __switch_to_xtra(stru
7399 + if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
7400 + test_tsk_thread_flag(next_p, TIF_NOTSC)) {
7401 + /* prev and next are different */
7402 + if (test_tsk_thread_flag(next_p, TIF_NOTSC))
7403 + hard_disable_TSC();
7405 + hard_enable_TSC();
7409 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
7410 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
7411 @@ -547,7 +575,7 @@ __switch_to(struct task_struct *prev_p,
7413 /* we're going to use this soon, after a few expensive things */
7414 if (next_p->fpu_counter>5)
7415 - prefetch(&next->i387.fxsave);
7416 + prefetch(next->xstate);
7419 * This is basically '__unlazy_fpu', except that we queue a
7420 @@ -680,8 +708,11 @@ __switch_to(struct task_struct *prev_p,
7421 /* If the task has used fpu the last 5 timeslices, just do a full
7422 * restore of the math state immediately to avoid the trap; the
7423 * chances of needing FPU soon are obviously high now
7425 + * tsk_used_math() checks prevent calling math_state_restore(),
7426 + * which can sleep in the case of !tsk_used_math()
7428 - if (next_p->fpu_counter>5)
7429 + if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
7430 math_state_restore();
7433 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
7434 +++ sle11-2009-05-14/arch/x86/kernel/setup-xen.c 2009-03-16 16:38:05.000000000 +0100
7436 +#include <linux/kernel.h>
7437 +#include <linux/module.h>
7438 +#include <linux/init.h>
7439 +#include <linux/bootmem.h>
7440 +#include <linux/percpu.h>
7441 +#include <asm/smp.h>
7442 +#include <asm/percpu.h>
7443 +#include <asm/sections.h>
7444 +#include <asm/processor.h>
7445 +#include <asm/setup.h>
7446 +#include <asm/topology.h>
7447 +#include <asm/mpspec.h>
7448 +#include <asm/apicdef.h>
7450 +#ifdef CONFIG_X86_LOCAL_APIC
7451 +unsigned int num_processors;
7452 +unsigned disabled_cpus __cpuinitdata;
7453 +/* Processor that is doing the boot up */
7454 +unsigned int boot_cpu_physical_apicid = -1U;
7455 +EXPORT_SYMBOL(boot_cpu_physical_apicid);
7457 +DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
7458 +EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
7460 +/* Bitmask of physically existing CPUs */
7461 +physid_mask_t phys_cpu_present_map;
7464 +#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
7466 + * Copy data used in early init routines from the initial arrays to the
7467 + * per cpu data areas. These arrays then become expendable and the
7468 + * *_early_ptr's are zeroed indicating that the static arrays are gone.
7470 +static void __init setup_per_cpu_maps(void)
7475 + for_each_possible_cpu(cpu) {
7476 + per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu];
7477 + per_cpu(x86_bios_cpu_apicid, cpu) =
7478 + x86_bios_cpu_apicid_init[cpu];
7480 + per_cpu(x86_cpu_to_node_map, cpu) =
7481 + x86_cpu_to_node_map_init[cpu];
7485 + /* indicate the early static arrays will soon be gone */
7486 + x86_cpu_to_apicid_early_ptr = NULL;
7487 + x86_bios_cpu_apicid_early_ptr = NULL;
7489 + x86_cpu_to_node_map_early_ptr = NULL;
7494 +#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
7495 +cpumask_t *cpumask_of_cpu_map __read_mostly;
7496 +EXPORT_SYMBOL(cpumask_of_cpu_map);
7498 +/* requires nr_cpu_ids to be initialized */
7499 +static void __init setup_cpumask_of_cpu(void)
7503 + /* alloc_bootmem zeroes memory */
7504 + cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
7505 + for (i = 0; i < nr_cpu_ids; i++)
7506 + cpu_set(i, cpumask_of_cpu_map[i]);
7509 +static inline void setup_cpumask_of_cpu(void) { }
7512 +#ifdef CONFIG_X86_32
7514 + * Great future not-so-futuristic plan: make i386 and x86_64 do it
7517 +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
7518 +EXPORT_SYMBOL(__per_cpu_offset);
7522 + * Great future plan:
7523 + * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
7524 + * Always point %gs to its beginning
7526 +void __init setup_per_cpu_areas(void)
7528 + int i, highest_cpu = 0;
7529 + unsigned long size;
7531 +#ifdef CONFIG_HOTPLUG_CPU
7532 + prefill_possible_map();
7535 + /* Copy section for each CPU (we discard the original) */
7536 + size = PERCPU_ENOUGH_ROOM;
7537 + printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n",
7540 + for_each_possible_cpu(i) {
7542 +#ifndef CONFIG_NEED_MULTIPLE_NODES
7543 + ptr = alloc_bootmem_pages(size);
7545 + int node = early_cpu_to_node(i);
7546 + if (!node_online(node) || !NODE_DATA(node)) {
7547 + ptr = alloc_bootmem_pages(size);
7549 + "cpu %d has no node or node-local memory\n", i);
7552 + ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
7555 + panic("Cannot allocate cpu data for CPU %d\n", i);
7556 +#ifdef CONFIG_X86_64
7557 + cpu_pda(i)->data_offset = ptr - __per_cpu_start;
7559 + __per_cpu_offset[i] = ptr - __per_cpu_start;
7561 + memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
7566 + nr_cpu_ids = highest_cpu + 1;
7567 + printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids);
7569 + /* Setup percpu data maps */
7570 + setup_per_cpu_maps();
7572 + /* Setup cpumask_of_cpu map */
7573 + setup_cpumask_of_cpu();
7577 --- sle11-2009-05-14.orig/arch/x86/kernel/setup64-xen.c 2009-03-16 16:33:40.000000000 +0100
7578 +++ sle11-2009-05-14/arch/x86/kernel/setup64-xen.c 2009-03-16 16:38:05.000000000 +0100
7580 #include <linux/bootmem.h>
7581 #include <linux/bitops.h>
7582 #include <linux/module.h>
7583 +#include <linux/kgdb.h>
7584 #include <asm/pda.h>
7585 #include <asm/pgtable.h>
7586 #include <asm/processor.h>
7588 #include <asm/proto.h>
7589 #include <asm/sections.h>
7590 #include <asm/setup.h>
7591 +#include <asm/genapic.h>
7593 #include <asm/hypervisor.h>
7595 @@ -81,8 +83,8 @@ int force_personality32 = 0;
7596 Control non executable heap for 32bit processes.
7597 To control the stack too use noexec=off
7599 -on PROT_READ does not imply PROT_EXEC for 32bit processes
7600 -off PROT_READ implies PROT_EXEC (default)
7601 +on PROT_READ does not imply PROT_EXEC for 32bit processes (default)
7602 +off PROT_READ implies PROT_EXEC
7604 static int __init nonx32_setup(char *str)
7606 @@ -94,85 +96,6 @@ static int __init nonx32_setup(char *str
7608 __setup("noexec32=", nonx32_setup);
7611 - * Copy data used in early init routines from the initial arrays to the
7612 - * per cpu data areas. These arrays then become expendable and the
7613 - * *_early_ptr's are zeroed indicating that the static arrays are gone.
7615 -static void __init setup_per_cpu_maps(void)
7620 - for_each_possible_cpu(cpu) {
7622 - if (per_cpu_offset(cpu)) {
7624 - per_cpu(x86_cpu_to_apicid, cpu) =
7625 - x86_cpu_to_apicid_init[cpu];
7626 - per_cpu(x86_bios_cpu_apicid, cpu) =
7627 - x86_bios_cpu_apicid_init[cpu];
7629 - per_cpu(x86_cpu_to_node_map, cpu) =
7630 - x86_cpu_to_node_map_init[cpu];
7635 - printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n",
7640 - /* indicate the early static arrays will soon be gone */
7641 - x86_cpu_to_apicid_early_ptr = NULL;
7642 - x86_bios_cpu_apicid_early_ptr = NULL;
7644 - x86_cpu_to_node_map_early_ptr = NULL;
7650 - * Great future plan:
7651 - * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
7652 - * Always point %gs to its beginning
7654 -void __init setup_per_cpu_areas(void)
7657 - unsigned long size;
7659 -#ifdef CONFIG_HOTPLUG_CPU
7660 - prefill_possible_map();
7663 - /* Copy section for each CPU (we discard the original) */
7664 - size = PERCPU_ENOUGH_ROOM;
7666 - printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
7667 - for_each_cpu_mask (i, cpu_possible_map) {
7669 -#ifndef CONFIG_NEED_MULTIPLE_NODES
7670 - ptr = alloc_bootmem_pages(size);
7672 - int node = early_cpu_to_node(i);
7674 - if (!node_online(node) || !NODE_DATA(node))
7675 - ptr = alloc_bootmem_pages(size);
7677 - ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
7680 - panic("Cannot allocate cpu data for CPU %d\n", i);
7681 - cpu_pda(i)->data_offset = ptr - __per_cpu_start;
7682 - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
7685 - /* setup percpu data maps early */
7686 - setup_per_cpu_maps();
7690 static void __init_refok switch_pt(int cpu)
7692 @@ -410,6 +333,17 @@ void __cpuinit cpu_init (void)
7694 load_LDT(&init_mm.context);
7698 + * If the kgdb is connected no debug regs should be altered. This
7699 + * is only applicable when KGDB and a KGDB I/O module are built
7700 + * into the kernel and you are using early debugging with
7701 + * kgdbwait. KGDB will control the kernel HW breakpoint registers.
7703 + if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
7704 + arch_kgdb_ops.correct_hw_break();
7708 * Clear all 6 debug registers:
7710 @@ -420,10 +354,17 @@ void __cpuinit cpu_init (void)
7711 set_debugreg(0UL, 3);
7712 set_debugreg(0UL, 6);
7713 set_debugreg(0UL, 7);
7715 + /* If the kgdb is connected no debug regs should be altered. */
7721 asm ("pushfq; popq %0" : "=rm" (kernel_eflags));
7722 if (raw_irqs_disabled())
7723 kernel_eflags &= ~X86_EFLAGS_IF;
7725 + if (is_uv_system())
7728 --- sle11-2009-05-14.orig/arch/x86/kernel/setup_32-xen.c 2009-03-16 16:33:40.000000000 +0100
7729 +++ sle11-2009-05-14/arch/x86/kernel/setup_32-xen.c 2009-03-16 16:38:05.000000000 +0100
7731 #include <linux/efi.h>
7732 #include <linux/init.h>
7733 #include <linux/edd.h>
7734 +#include <linux/iscsi_ibft.h>
7735 #include <linux/nodemask.h>
7736 #include <linux/kernel.h>
7737 #include <linux/percpu.h>
7739 #include <linux/pfn.h>
7740 #include <linux/pci.h>
7741 #include <linux/init_ohci1394_dma.h>
7742 +#include <linux/kvm_para.h>
7744 #include <video/edid.h>
7747 #include <xen/firmware.h>
7748 #include <xen/xencons.h>
7749 #include <setup_arch.h>
7750 -#include <bios_ebda.h>
7751 +#include <asm/bios_ebda.h>
7752 #include <asm/cacheflush.h>
7753 +#include <asm/processor.h>
7756 #include <xen/interface/kexec.h>
7757 @@ -136,7 +139,12 @@ static struct resource standard_io_resou
7763 + .flags = IORESOURCE_BUSY | IORESOURCE_IO
7765 + .name = "keyboard",
7768 .flags = IORESOURCE_BUSY | IORESOURCE_IO
7770 .name = "dma page reg",
7771 @@ -166,6 +174,8 @@ struct cpuinfo_x86 new_cpu_data __cpuini
7772 struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
7773 EXPORT_SYMBOL(boot_cpu_data);
7775 +unsigned int def_to_bigsmp;
7777 #ifndef CONFIG_X86_PAE
7778 unsigned long mmu_cr4_features;
7780 @@ -204,7 +214,7 @@ EXPORT_SYMBOL(ist_info);
7781 extern void early_cpu_init(void);
7782 extern int root_mountflags;
7784 -unsigned long saved_videomode;
7785 +unsigned long saved_video_mode;
7787 #define RAMDISK_IMAGE_START_MASK 0x07FF
7788 #define RAMDISK_PROMPT_FLAG 0x8000
7789 @@ -259,7 +269,7 @@ static inline void copy_edd(void)
7793 -int __initdata user_defined_memmap = 0;
7794 +int __initdata user_defined_memmap;
7797 * "mem=nopentium" disables the 4MB page tables.
7798 @@ -420,20 +430,59 @@ unsigned long __init find_max_low_pfn(vo
7802 +#define BIOS_LOWMEM_KILOBYTES 0x413
7805 - * workaround for Dell systems that neglect to reserve EBDA
7806 + * The BIOS places the EBDA/XBDA at the top of conventional
7807 + * memory, and usually decreases the reported amount of
7808 + * conventional memory (int 0x12) too. This also contains a
7809 + * workaround for Dell systems that neglect to reserve EBDA.
7810 + * The same workaround also avoids a problem with the AMD768MPX
7811 + * chipset: reserve a page before VGA to prevent PCI prefetch
7812 + * into it (errata #56). Usually the page is reserved anyways,
7813 + * unless you have no PS/2 mouse plugged in.
7815 static void __init reserve_ebda_region(void)
7817 - unsigned int addr;
7818 - addr = get_bios_ebda();
7820 - reserve_bootmem(addr, PAGE_SIZE, BOOTMEM_DEFAULT);
7821 + unsigned int lowmem, ebda_addr;
7823 + /* To determine the position of the EBDA and the */
7824 + /* end of conventional memory, we need to look at */
7825 + /* the BIOS data area. In a paravirtual environment */
7826 + /* that area is absent. We'll just have to assume */
7827 + /* that the paravirt case can handle memory setup */
7828 + /* correctly, without our help. */
7829 + if (paravirt_enabled())
7832 + /* end of low (conventional) memory */
7833 + lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
7836 + /* start of EBDA area */
7837 + ebda_addr = get_bios_ebda();
7839 + /* Fixup: bios puts an EBDA in the top 64K segment */
7840 + /* of conventional memory, but does not adjust lowmem. */
7841 + if ((lowmem - ebda_addr) <= 0x10000)
7842 + lowmem = ebda_addr;
7844 + /* Fixup: bios does not report an EBDA at all. */
7845 + /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
7846 + if ((ebda_addr == 0) && (lowmem >= 0x9f000))
7849 + /* Paranoia: should never happen, but... */
7850 + if ((lowmem == 0) || (lowmem >= 0x100000))
7853 + /* reserve all memory between lowmem and the 1MB mark */
7854 + reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT);
7858 #ifndef CONFIG_NEED_MULTIPLE_NODES
7859 -void __init setup_bootmem_allocator(void);
7860 +static void __init setup_bootmem_allocator(void);
7861 static unsigned long __init setup_memory(void)
7864 @@ -469,7 +518,7 @@ static unsigned long __init setup_memory
7868 -void __init zone_sizes_init(void)
7869 +static void __init zone_sizes_init(void)
7871 unsigned long max_zone_pfns[MAX_NR_ZONES];
7872 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
7873 @@ -521,10 +570,16 @@ static void __init reserve_crashkernel(v
7874 (unsigned long)(crash_size >> 20),
7875 (unsigned long)(crash_base >> 20),
7876 (unsigned long)(total_mem >> 20));
7878 + if (reserve_bootmem(crash_base, crash_size,
7879 + BOOTMEM_EXCLUSIVE) < 0) {
7880 + printk(KERN_INFO "crashkernel reservation "
7881 + "failed - memory is in use\n");
7885 crashk_res.start = crash_base;
7886 crashk_res.end = crash_base + crash_size - 1;
7887 - reserve_bootmem(crash_base, crash_size,
7890 printk(KERN_INFO "crashkernel reservation failed - "
7891 "you have to specify a base address\n");
7892 @@ -658,16 +713,9 @@ void __init setup_bootmem_allocator(void
7894 reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT);
7896 - /* reserve EBDA region, it's a 4K region */
7897 + /* reserve EBDA region */
7898 reserve_ebda_region();
7900 - /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
7901 - PCI prefetch into it (errata #56). Usually the page is reserved anyways,
7902 - unless you have no PS/2 mouse plugged in. */
7903 - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
7904 - boot_cpu_data.x86 == 6)
7905 - reserve_bootmem(0xa0000 - 4096, 4096, BOOTMEM_DEFAULT);
7909 * But first pinch a few for the stack/trampoline stuff
7910 @@ -689,6 +737,8 @@ void __init setup_bootmem_allocator(void
7913 reserve_crashkernel();
7915 + reserve_ibft_region();
7919 @@ -724,6 +774,18 @@ char * __init __attribute__((weak)) memo
7920 return machine_specific_memory_setup();
7925 + * In the golden day, when everything among i386 and x86_64 will be
7926 + * integrated, this will not live here
7928 +void *x86_cpu_to_node_map_early_ptr;
7929 +int x86_cpu_to_node_map_init[NR_CPUS] = {
7930 + [0 ... NR_CPUS-1] = NUMA_NO_NODE
7932 +DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
7936 * Determine if we were loaded by an EFI loader. If so, then we have also been
7937 * passed the efi memmap, systab, etc., so we should use these data structures
7938 @@ -773,7 +835,7 @@ void __init setup_arch(char **cmdline_p)
7940 apm_info.bios = boot_params.apm_bios_info;
7941 ist_info = boot_params.ist_info;
7942 - saved_videomode = boot_params.hdr.vid_mode;
7943 + saved_video_mode = boot_params.hdr.vid_mode;
7944 if( boot_params.sys_desc_table.length != 0 ) {
7945 set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
7946 machine_id = boot_params.sys_desc_table.table[0];
7947 @@ -840,15 +902,19 @@ void __init setup_arch(char **cmdline_p)
7950 /* update e820 for memory not covered by WB MTRRs */
7952 + propagate_e820_map();
7955 if (mtrr_trim_uncached_memory(max_pfn))
7957 + propagate_e820_map();
7960 max_low_pfn = setup_memory();
7962 +#ifdef CONFIG_KVM_CLOCK
7968 * Must be after max_low_pfn is determined, and before kernel
7969 @@ -856,6 +922,7 @@ void __init setup_arch(char **cmdline_p)
7976 * NOTE: before this point _nobody_ is allowed to allocate
7977 @@ -977,6 +1044,18 @@ void __init setup_arch(char **cmdline_p)
7981 +#if defined(CONFIG_X86_SMP) && !defined(CONFIG_XEN)
7983 + * setup to use the early static init tables during kernel startup
7984 + * X86_SMP will exclude sub-arches that don't deal well with it.
7986 + x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
7987 + x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
7989 + x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
7993 #ifdef CONFIG_X86_GENERICARCH
7994 generic_apic_probe();
7996 --- sle11-2009-05-14.orig/arch/x86/kernel/setup_64-xen.c 2009-03-16 16:33:40.000000000 +0100
7997 +++ sle11-2009-05-14/arch/x86/kernel/setup_64-xen.c 2009-03-16 16:38:05.000000000 +0100
7999 #include <linux/crash_dump.h>
8000 #include <linux/root_dev.h>
8001 #include <linux/pci.h>
8002 +#include <asm/pci-direct.h>
8003 #include <linux/efi.h>
8004 #include <linux/acpi.h>
8005 #include <linux/kallsyms.h>
8006 #include <linux/edd.h>
8007 +#include <linux/iscsi_ibft.h>
8008 #include <linux/mmzone.h>
8009 #include <linux/kexec.h>
8010 #include <linux/cpufreq.h>
8011 #include <linux/dmi.h>
8012 #include <linux/dma-mapping.h>
8013 #include <linux/ctype.h>
8014 +#include <linux/sort.h>
8015 #include <linux/uaccess.h>
8016 #include <linux/init_ohci1394_dma.h>
8017 +#include <linux/kvm_para.h>
8019 #include <asm/mtrr.h>
8020 #include <asm/uaccess.h>
8022 #include <asm/mmu_context.h>
8023 #include <asm/proto.h>
8024 #include <asm/setup.h>
8025 -#include <asm/mach_apic.h>
8026 #include <asm/numa.h>
8027 #include <asm/sections.h>
8028 #include <asm/dmi.h>
8030 #include <asm/mce.h>
8032 #include <asm/topology.h>
8033 +#include <asm/pat.h>
8035 +#include <mach_apic.h>
8037 #include <linux/percpu.h>
8038 #include <xen/interface/physdev.h>
8039 @@ -149,7 +155,7 @@ extern int root_mountflags;
8041 char __initdata command_line[COMMAND_LINE_SIZE];
8043 -struct resource standard_io_resources[] = {
8044 +static struct resource standard_io_resources[] = {
8045 { .name = "dma1", .start = 0x00, .end = 0x1f,
8046 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
8047 { .name = "pic1", .start = 0x20, .end = 0x21,
8048 @@ -158,7 +164,9 @@ struct resource standard_io_resources[]
8049 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
8050 { .name = "timer1", .start = 0x50, .end = 0x53,
8051 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
8052 - { .name = "keyboard", .start = 0x60, .end = 0x6f,
8053 + { .name = "keyboard", .start = 0x60, .end = 0x60,
8054 + .flags = IORESOURCE_BUSY | IORESOURCE_IO },
8055 + { .name = "keyboard", .start = 0x64, .end = 0x64,
8056 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
8057 { .name = "dma page reg", .start = 0x80, .end = 0x8f,
8058 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
8059 @@ -224,8 +232,10 @@ contig_initmem_init(unsigned long start_
8060 e820_register_active_regions(0, start_pfn, end_pfn);
8062 free_bootmem_with_active_regions(0, xen_start_info->nr_pages);
8063 + early_res_to_bootmem(0, xen_start_info->nr_pages<<PAGE_SHIFT);
8065 free_bootmem_with_active_regions(0, end_pfn);
8066 + early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
8068 reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
8070 @@ -290,6 +300,7 @@ static void __init reserve_crashkernel(v
8071 (unsigned long)(total_mem >> 20));
8072 crashk_res.start = crash_base;
8073 crashk_res.end = crash_base + crash_size - 1;
8074 + insert_resource(&iomem_resource, &crashk_res);
8078 @@ -306,6 +317,40 @@ void __attribute__((weak)) __init memory
8079 machine_specific_memory_setup();
8082 +static void __init parse_setup_data(void)
8084 + struct setup_data *data;
8085 + unsigned long pa_data;
8087 + if (boot_params.hdr.version < 0x0209)
8089 + pa_data = boot_params.hdr.setup_data;
8091 + data = early_ioremap(pa_data, PAGE_SIZE);
8092 + switch (data->type) {
8096 +#ifndef CONFIG_DEBUG_BOOT_PARAMS
8097 + free_early(pa_data, pa_data+sizeof(*data)+data->len);
8099 + pa_data = data->next;
8100 + early_iounmap(data, PAGE_SIZE);
8104 +#ifdef CONFIG_PCI_MMCONFIG
8105 +extern void __cpuinit fam10h_check_enable_mmcfg(void);
8106 +extern void __init check_enable_amd_mmconf_dmi(void);
8108 +void __cpuinit fam10h_check_enable_mmcfg(void)
8111 +void __init check_enable_amd_mmconf_dmi(void)
8117 * setup_arch - architecture-specific boot-time initializations
8119 @@ -389,6 +434,8 @@ void __init setup_arch(char **cmdline_p)
8120 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
8121 *cmdline_p = command_line;
8123 + parse_setup_data();
8125 parse_early_param();
8127 #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
8128 @@ -398,6 +445,13 @@ void __init setup_arch(char **cmdline_p)
8130 finish_e820_parsing();
8133 + /* after parse_early_param, so could debug it */
8134 + insert_resource(&iomem_resource, &code_resource);
8135 + insert_resource(&iomem_resource, &data_resource);
8136 + insert_resource(&iomem_resource, &bss_resource);
8139 early_gart_iommu_check();
8141 e820_register_active_regions(0, 0, -1UL);
8142 @@ -420,15 +474,23 @@ void __init setup_arch(char **cmdline_p)
8146 - init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
8147 + max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT));
8155 if (is_initial_xendomain())
8160 +#ifdef CONFIG_KVM_CLOCK
8164 #if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
8165 /* setup to use the early static init tables during kernel startup */
8166 x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
8167 @@ -459,9 +521,9 @@ void __init setup_arch(char **cmdline_p)
8168 contig_initmem_init(0, end_pfn);
8171 - early_res_to_bootmem();
8174 + dma32_reserve_bootmem();
8176 #ifdef CONFIG_ACPI_SLEEP
8178 * Reserve low memory region for sleep support.
8179 @@ -487,16 +549,17 @@ void __init setup_arch(char **cmdline_p)
8180 unsigned long end_of_mem = end_pfn << PAGE_SHIFT;
8182 if (ramdisk_end <= end_of_mem) {
8184 - reserve_bootmem_generic(ramdisk_image, ramdisk_size);
8187 + * don't need to reserve again, already reserved early
8188 + * in x86_64_start_kernel, and early_res_to_bootmem
8189 + * convert that to reserved in bootmem
8191 initrd_start = ramdisk_image + PAGE_OFFSET;
8192 initrd_end = initrd_start+ramdisk_size;
8194 initrd_below_start_ok = 1;
8197 - /* Assumes everything on node 0 */
8198 free_bootmem(ramdisk_image, ramdisk_size);
8199 printk(KERN_ERR "initrd extends beyond end of memory "
8200 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
8201 @@ -506,6 +569,9 @@ void __init setup_arch(char **cmdline_p)
8204 reserve_crashkernel();
8206 + reserve_ibft_region();
8210 #ifdef CONFIG_X86_LOCAL_APIC
8211 @@ -633,16 +699,16 @@ void __init setup_arch(char **cmdline_p)
8212 prefill_possible_map();
8218 * We trust e820 completely. No explicit ROM probing in memory.
8221 if (is_initial_xendomain())
8222 - e820_reserve_resources(machine_e820.map, machine_e820.nr_map,
8223 - &code_resource, &data_resource, &bss_resource);
8224 + e820_reserve_resources(machine_e820.map, machine_e820.nr_map);
8226 - e820_reserve_resources(e820.map, e820.nr_map,
8227 - &code_resource, &data_resource, &bss_resource);
8228 + e820_reserve_resources(e820.map, e820.nr_map);
8229 e820_mark_nosave_regions();
8232 @@ -690,6 +756,9 @@ void __init setup_arch(char **cmdline_p)
8235 #endif /* !CONFIG_XEN */
8237 + /* do this before identify_cpu for boot cpu */
8238 + check_enable_amd_mmconf_dmi();
8242 @@ -786,9 +855,9 @@ static void __cpuinit amd_detect_cmp(str
8243 bits = c->x86_coreid_bits;
8245 /* Low order bits define the core id (index of core in socket) */
8246 - c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
8247 - /* Convert the APIC ID into the socket ID */
8248 - c->phys_proc_id = phys_pkg_id(bits);
8249 + c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
8250 + /* Convert the initial APIC ID into the socket ID */
8251 + c->phys_proc_id = c->initial_apicid >> bits;
8254 node = c->phys_proc_id;
8255 @@ -805,7 +874,7 @@ static void __cpuinit amd_detect_cmp(str
8256 If that doesn't result in a usable node fall back to the
8257 path for the previous case. */
8259 - int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits);
8260 + int ht_nodeid = c->initial_apicid;
8262 if (ht_nodeid >= 0 &&
8263 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
8264 @@ -913,7 +982,7 @@ static void __cpuinit init_amd(struct cp
8266 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
8267 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
8268 - clear_bit(0*32+31, (unsigned long *)&c->x86_capability);
8269 + clear_cpu_cap(c, 0*32+31);
8271 /* On C+ stepping K8 rep microcode works well for copy/memset */
8272 level = cpuid_eax(1);
8273 @@ -955,9 +1024,25 @@ static void __cpuinit init_amd(struct cp
8274 /* MFENCE stops RDTSC speculation */
8275 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
8277 + if (c->x86 == 0x10)
8278 + fam10h_check_enable_mmcfg();
8281 if (amd_apic_timer_broken())
8282 disable_apic_timer = 1;
8284 + if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
8285 + unsigned long long tseg;
8288 + * Split up direct mapping around the TSEG SMM area.
8289 + * Don't do it for gbpages because there seems very little
8290 + * benefit in doing so.
8292 + if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
8293 + (tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
8294 + set_memory_4k((unsigned long)__va(tseg), 1);
8299 @@ -1051,7 +1136,7 @@ static void __cpuinit early_init_intel(s
8301 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
8302 (c->x86 == 0x6 && c->x86_model >= 0x0e))
8303 - set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
8304 + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
8307 static void __cpuinit init_intel(struct cpuinfo_x86 *c)
8308 @@ -1094,9 +1179,6 @@ static void __cpuinit init_intel(struct
8311 c->x86_cache_alignment = c->x86_clflush_size * 2;
8312 - if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
8313 - (c->x86 == 0x6 && c->x86_model >= 0x0e))
8314 - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
8316 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
8317 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
8318 @@ -1105,6 +1187,32 @@ static void __cpuinit init_intel(struct
8322 +static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
8324 + if (c->x86 == 0x6 && c->x86_model >= 0xf)
8325 + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
8328 +static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
8333 + n = c->extended_cpuid_level;
8334 + if (n >= 0x80000008) {
8335 + unsigned eax = cpuid_eax(0x80000008);
8336 + c->x86_virt_bits = (eax >> 8) & 0xff;
8337 + c->x86_phys_bits = eax & 0xff;
8340 + if (c->x86 == 0x6 && c->x86_model >= 0xf) {
8341 + c->x86_cache_alignment = c->x86_clflush_size * 2;
8342 + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
8343 + set_cpu_cap(c, X86_FEATURE_REP_GOOD);
8345 + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
8348 static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
8350 char *v = c->x86_vendor_id;
8351 @@ -1113,6 +1221,8 @@ static void __cpuinit get_cpu_vendor(str
8352 c->x86_vendor = X86_VENDOR_AMD;
8353 else if (!strcmp(v, "GenuineIntel"))
8354 c->x86_vendor = X86_VENDOR_INTEL;
8355 + else if (!strcmp(v, "CentaurHauls"))
8356 + c->x86_vendor = X86_VENDOR_CENTAUR;
8358 c->x86_vendor = X86_VENDOR_UNKNOWN;
8360 @@ -1160,15 +1270,16 @@ static void __cpuinit early_identify_cpu
8361 c->x86 += (tfms >> 20) & 0xff;
8363 c->x86_model += ((tfms >> 16) & 0xF) << 4;
8364 - if (c->x86_capability[0] & (1<<19))
8365 + if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
8366 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
8368 /* Have CPUID level 0 only - unheard of */
8372 + c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
8374 - c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
8375 + c->phys_proc_id = c->initial_apicid;
8377 /* AMD-defined flags: level 0x80000001 */
8378 xlvl = cpuid_eax(0x80000000);
8379 @@ -1201,8 +1312,12 @@ static void __cpuinit early_identify_cpu
8380 case X86_VENDOR_INTEL:
8381 early_init_intel(c);
8383 + case X86_VENDOR_CENTAUR:
8384 + early_init_centaur(c);
8388 + validate_pat_support(c);
8392 @@ -1237,6 +1352,10 @@ void __cpuinit identify_cpu(struct cpuin
8396 + case X86_VENDOR_CENTAUR:
8400 case X86_VENDOR_UNKNOWN:
8402 display_cacheinfo(c);
8403 @@ -1266,14 +1385,24 @@ void __cpuinit identify_cpu(struct cpuin
8405 select_idle_routine(c);
8407 - if (c != &boot_cpu_data)
8410 numa_add_cpu(smp_processor_id());
8415 +void __cpuinit identify_boot_cpu(void)
8417 + identify_cpu(&boot_cpu_data);
8420 +void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
8422 + BUG_ON(c == &boot_cpu_data);
8427 static __init int setup_noclflush(char *arg)
8429 setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
8430 @@ -1302,123 +1431,3 @@ static __init int setup_disablecpuid(cha
8433 __setup("clearcpuid=", setup_disablecpuid);
8436 - * Get CPU information for use by the procfs.
8439 -static int show_cpuinfo(struct seq_file *m, void *v)
8441 - struct cpuinfo_x86 *c = v;
8445 - cpu = c->cpu_index;
8448 - seq_printf(m, "processor\t: %u\n"
8449 - "vendor_id\t: %s\n"
8450 - "cpu family\t: %d\n"
8452 - "model name\t: %s\n",
8454 - c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
8456 - (int)c->x86_model,
8457 - c->x86_model_id[0] ? c->x86_model_id : "unknown");
8459 - if (c->x86_mask || c->cpuid_level >= 0)
8460 - seq_printf(m, "stepping\t: %d\n", c->x86_mask);
8462 - seq_printf(m, "stepping\t: unknown\n");
8464 - if (cpu_has(c, X86_FEATURE_TSC)) {
8465 - unsigned int freq = cpufreq_quick_get((unsigned)cpu);
8469 - seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
8470 - freq / 1000, (freq % 1000));
8474 - if (c->x86_cache_size >= 0)
8475 - seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
8478 - if (smp_num_siblings * c->x86_max_cores > 1) {
8479 - seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
8480 - seq_printf(m, "siblings\t: %d\n",
8481 - cpus_weight(per_cpu(cpu_core_map, cpu)));
8482 - seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
8483 - seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
8489 - "fpu_exception\t: yes\n"
8490 - "cpuid level\t: %d\n"
8495 - for (i = 0; i < 32*NCAPINTS; i++)
8496 - if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
8497 - seq_printf(m, " %s", x86_cap_flags[i]);
8499 - seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
8500 - c->loops_per_jiffy/(500000/HZ),
8501 - (c->loops_per_jiffy/(5000/HZ)) % 100);
8503 - if (c->x86_tlbsize > 0)
8504 - seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
8505 - seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
8506 - seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
8508 - seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
8509 - c->x86_phys_bits, c->x86_virt_bits);
8511 - seq_printf(m, "power management:");
8512 - for (i = 0; i < 32; i++) {
8513 - if (c->x86_power & (1 << i)) {
8514 - if (i < ARRAY_SIZE(x86_power_flags) &&
8515 - x86_power_flags[i])
8516 - seq_printf(m, "%s%s",
8517 - x86_power_flags[i][0]?" ":"",
8518 - x86_power_flags[i]);
8520 - seq_printf(m, " [%d]", i);
8524 - seq_printf(m, "\n\n");
8529 -static void *c_start(struct seq_file *m, loff_t *pos)
8531 - if (*pos == 0) /* just in case, cpu 0 is not the first */
8532 - *pos = first_cpu(cpu_online_map);
8533 - if ((*pos) < NR_CPUS && cpu_online(*pos))
8534 - return &cpu_data(*pos);
8538 -static void *c_next(struct seq_file *m, void *v, loff_t *pos)
8540 - *pos = next_cpu(*pos, cpu_online_map);
8541 - return c_start(m, pos);
8544 -static void c_stop(struct seq_file *m, void *v)
8548 -const struct seq_operations cpuinfo_op = {
8552 - .show = show_cpuinfo,
8554 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
8555 +++ sle11-2009-05-14/arch/x86/kernel/smp-xen.c 2009-03-16 16:38:05.000000000 +0100
8558 + * Intel SMP support routines.
8560 + * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
8561 + * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
8562 + * (c) 2002,2003 Andi Kleen, SuSE Labs.
8564 + * i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com>
8566 + * This code is released under the GNU General Public License version 2 or
8570 +#include <linux/init.h>
8572 +#include <linux/mm.h>
8573 +#include <linux/delay.h>
8574 +#include <linux/spinlock.h>
8575 +#include <linux/kernel_stat.h>
8576 +#include <linux/mc146818rtc.h>
8577 +#include <linux/cache.h>
8578 +#include <linux/interrupt.h>
8579 +#include <linux/cpu.h>
8581 +#include <asm/mtrr.h>
8582 +#include <asm/tlbflush.h>
8583 +#include <asm/mmu_context.h>
8584 +#include <asm/proto.h>
8585 +#include <mach_ipi.h>
8586 +#include <xen/evtchn.h>
8588 + * Some notes on x86 processor bugs affecting SMP operation:
8590 + * Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
8591 + * The Linux implications for SMP are handled as follows:
8593 + * Pentium III / [Xeon]
8594 + * None of the E1AP-E3AP errata are visible to the user.
8596 + * E1AP. see PII A1AP
8597 + * E2AP. see PII A2AP
8598 + * E3AP. see PII A3AP
8600 + * Pentium II / [Xeon]
8601 + * None of the A1AP-A3AP errata are visible to the user.
8603 + * A1AP. see PPro 1AP
8604 + * A2AP. see PPro 2AP
8605 + * A3AP. see PPro 7AP
8608 + * None of 1AP-9AP errata are visible to the normal user,
8609 + * except occasional delivery of 'spurious interrupt' as trap #15.
8610 + * This is very rare and a non-problem.
8612 + * 1AP. Linux maps APIC as non-cacheable
8613 + * 2AP. worked around in hardware
8614 + * 3AP. fixed in C0 and above steppings microcode update.
8615 + * Linux does not use excessive STARTUP_IPIs.
8616 + * 4AP. worked around in hardware
8617 + * 5AP. symmetric IO mode (normal Linux operation) not affected.
8618 + * 'noapic' mode has vector 0xf filled out properly.
8619 + * 6AP. 'noapic' mode might be affected - fixed in later steppings
8620 + * 7AP. We do not assume writes to the LVT deassering IRQs
8621 + * 8AP. We do not enable low power mode (deep sleep) during MP bootup
8622 + * 9AP. We do not use mixed mode
8625 + * There is a marginal case where REP MOVS on 100MHz SMP
8626 + * machines with B stepping processors can fail. XXX should provide
8627 + * an L1cache=Writethrough or L1cache=off option.
8629 + * B stepping CPUs may hang. There are hardware work arounds
8630 + * for this. We warn about it in case your board doesn't have the work
8631 + * arounds. Basically that's so I can tell anyone with a B stepping
8632 + * CPU and SMP problems "tough".
8634 + * Specific items [From Pentium Processor Specification Update]
8636 + * 1AP. Linux doesn't use remote read
8637 + * 2AP. Linux doesn't trust APIC errors
8638 + * 3AP. We work around this
8639 + * 4AP. Linux never generated 3 interrupts of the same priority
8640 + * to cause a lost local interrupt.
8641 + * 5AP. Remote read is never used
8642 + * 6AP. not affected - worked around in hardware
8643 + * 7AP. not affected - worked around in hardware
8644 + * 8AP. worked around in hardware - we get explicit CS errors if not
8645 + * 9AP. only 'noapic' mode affected. Might generate spurious
8646 + * interrupts, we log only the first one and count the
8648 + * 10AP. not affected - worked around in hardware
8649 + * 11AP. Linux reads the APIC between writes to avoid this, as per
8650 + * the documentation. Make sure you preserve this as it affects
8651 + * the C stepping chips too.
8652 + * 12AP. not affected - worked around in hardware
8653 + * 13AP. not affected - worked around in hardware
8654 + * 14AP. we always deassert INIT during bootup
8655 + * 15AP. not affected - worked around in hardware
8656 + * 16AP. not affected - worked around in hardware
8657 + * 17AP. not affected - worked around in hardware
8658 + * 18AP. not affected - worked around in hardware
8659 + * 19AP. not affected - worked around in BIOS
8661 + * If this sounds worrying believe me these bugs are either ___RARE___,
8662 + * or are signal timing bugs worked around in hardware and there's
8663 + * about nothing of note with C stepping upwards.
8667 + * this function sends a 'reschedule' IPI to another CPU.
8668 + * it goes straight through and wastes no time serializing
8669 + * anything. Worst case is that we lose a reschedule ...
8671 +void xen_smp_send_reschedule(int cpu)
8673 + if (unlikely(cpu_is_offline(cpu))) {
8677 + send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
8681 + * Structure and data for smp_call_function(). This is designed to minimise
8682 + * static memory requirements. It also looks cleaner.
8684 +static DEFINE_SPINLOCK(call_lock);
8686 +struct call_data_struct {
8687 + void (*func) (void *info);
8690 + atomic_t finished;
8694 +void lock_ipi_call_lock(void)
8696 + spin_lock_irq(&call_lock);
8699 +void unlock_ipi_call_lock(void)
8701 + spin_unlock_irq(&call_lock);
8704 +static struct call_data_struct *call_data;
8706 +static void __smp_call_function(void (*func) (void *info), void *info,
8707 + int nonatomic, int wait)
8709 + struct call_data_struct data;
8710 + int cpus = num_online_cpus() - 1;
8717 + atomic_set(&data.started, 0);
8720 + atomic_set(&data.finished, 0);
8722 + call_data = &data;
8725 + /* Send a message to all other CPUs and wait for them to respond */
8726 + send_IPI_allbutself(CALL_FUNCTION_VECTOR);
8728 + /* Wait for response */
8729 + while (atomic_read(&data.started) != cpus)
8733 + while (atomic_read(&data.finished) != cpus)
8739 + * smp_call_function_mask(): Run a function on a set of other CPUs.
8740 + * @mask: The set of cpus to run on. Must not include the current cpu.
8741 + * @func: The function to run. This must be fast and non-blocking.
8742 + * @info: An arbitrary pointer to pass to the function.
8743 + * @wait: If true, wait (atomically) until function has completed on other CPUs.
8745 + * Returns 0 on success, else a negative status code.
8747 + * If @wait is true, then returns once @func has returned; otherwise
8748 + * it returns just before the target cpu calls @func.
8750 + * You must not call this function with disabled interrupts or from a
8751 + * hardware interrupt handler or from a bottom half handler.
8754 +xen_smp_call_function_mask(cpumask_t mask,
8755 + void (*func)(void *), void *info,
8758 + struct call_data_struct data;
8759 + cpumask_t allbutself;
8762 + /* Can deadlock when called with interrupts disabled */
8763 + WARN_ON(irqs_disabled());
8765 + /* Holding any lock stops cpus from going down. */
8766 + spin_lock(&call_lock);
8768 + allbutself = cpu_online_map;
8769 + cpu_clear(smp_processor_id(), allbutself);
8771 + cpus_and(mask, mask, allbutself);
8772 + cpus = cpus_weight(mask);
8775 + spin_unlock(&call_lock);
8781 + atomic_set(&data.started, 0);
8784 + atomic_set(&data.finished, 0);
8786 + call_data = &data;
8789 + /* Send a message to other CPUs */
8790 + if (cpus_equal(mask, allbutself) &&
8791 + cpus_equal(cpu_online_map, cpu_callout_map))
8792 + send_IPI_allbutself(CALL_FUNCTION_VECTOR);
8794 + send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
8796 + /* Wait for response */
8797 + while (atomic_read(&data.started) != cpus)
8801 + while (atomic_read(&data.finished) != cpus)
8803 + spin_unlock(&call_lock);
8808 +static void stop_this_cpu(void *dummy)
8810 + local_irq_disable();
8812 + * Remove this CPU:
8814 + cpu_clear(smp_processor_id(), cpu_online_map);
8815 + disable_all_local_evtchn();
8816 + if (hlt_works(smp_processor_id()))
8822 + * this function calls the 'stop' function on all other CPUs in the system.
8825 +void xen_smp_send_stop(void)
8828 + unsigned long flags;
8830 + /* Don't deadlock on the call lock in panic */
8831 + nolock = !spin_trylock(&call_lock);
8832 + local_irq_save(flags);
8833 + __smp_call_function(stop_this_cpu, NULL, 0, 0);
8835 + spin_unlock(&call_lock);
8836 + disable_all_local_evtchn();
8837 + local_irq_restore(flags);
8841 + * Reschedule call back. Nothing to do,
8842 + * all the work is done automatically when
8843 + * we return from the interrupt.
8845 +irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
8847 +#ifdef CONFIG_X86_32
8848 + __get_cpu_var(irq_stat).irq_resched_count++;
8850 + add_pda(irq_resched_count, 1);
8852 + return IRQ_HANDLED;
8855 +irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
8857 + void (*func) (void *info) = call_data->func;
8858 + void *info = call_data->info;
8859 + int wait = call_data->wait;
8862 + * Notify initiating CPU that I've grabbed the data and am
8863 + * about to execute the function
8866 + atomic_inc(&call_data->started);
8868 + * At this point the info structure may be out of scope unless wait==1
8872 +#ifdef CONFIG_X86_32
8873 + __get_cpu_var(irq_stat).irq_call_count++;
8875 + add_pda(irq_call_count, 1);
8881 + atomic_inc(&call_data->finished);
8884 + return IRQ_HANDLED;
8886 --- sle11-2009-05-14.orig/arch/x86/kernel/smp_32-xen.c 2009-03-16 16:33:40.000000000 +0100
8887 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
8890 - * Intel SMP support routines.
8892 - * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
8893 - * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
8895 - * This code is released under the GNU General Public License version 2 or
8899 -#include <linux/init.h>
8901 -#include <linux/mm.h>
8902 -#include <linux/delay.h>
8903 -#include <linux/spinlock.h>
8904 -#include <linux/kernel_stat.h>
8905 -#include <linux/mc146818rtc.h>
8906 -#include <linux/cache.h>
8907 -#include <linux/interrupt.h>
8908 -#include <linux/cpu.h>
8909 -#include <linux/module.h>
8911 -#include <asm/mtrr.h>
8912 -#include <asm/tlbflush.h>
8913 -#include <asm/mmu_context.h>
8915 -#include <mach_apic.h>
8917 -#include <xen/evtchn.h>
8920 - * Some notes on x86 processor bugs affecting SMP operation:
8922 - * Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
8923 - * The Linux implications for SMP are handled as follows:
8925 - * Pentium III / [Xeon]
8926 - * None of the E1AP-E3AP errata are visible to the user.
8928 - * E1AP. see PII A1AP
8929 - * E2AP. see PII A2AP
8930 - * E3AP. see PII A3AP
8932 - * Pentium II / [Xeon]
8933 - * None of the A1AP-A3AP errata are visible to the user.
8935 - * A1AP. see PPro 1AP
8936 - * A2AP. see PPro 2AP
8937 - * A3AP. see PPro 7AP
8940 - * None of 1AP-9AP errata are visible to the normal user,
8941 - * except occasional delivery of 'spurious interrupt' as trap #15.
8942 - * This is very rare and a non-problem.
8944 - * 1AP. Linux maps APIC as non-cacheable
8945 - * 2AP. worked around in hardware
8946 - * 3AP. fixed in C0 and above steppings microcode update.
8947 - * Linux does not use excessive STARTUP_IPIs.
8948 - * 4AP. worked around in hardware
8949 - * 5AP. symmetric IO mode (normal Linux operation) not affected.
8950 - * 'noapic' mode has vector 0xf filled out properly.
8951 - * 6AP. 'noapic' mode might be affected - fixed in later steppings
8952 - * 7AP. We do not assume writes to the LVT deassering IRQs
8953 - * 8AP. We do not enable low power mode (deep sleep) during MP bootup
8954 - * 9AP. We do not use mixed mode
8957 - * There is a marginal case where REP MOVS on 100MHz SMP
8958 - * machines with B stepping processors can fail. XXX should provide
8959 - * an L1cache=Writethrough or L1cache=off option.
8961 - * B stepping CPUs may hang. There are hardware work arounds
8962 - * for this. We warn about it in case your board doesn't have the work
8963 - * arounds. Basically that's so I can tell anyone with a B stepping
8964 - * CPU and SMP problems "tough".
8966 - * Specific items [From Pentium Processor Specification Update]
8968 - * 1AP. Linux doesn't use remote read
8969 - * 2AP. Linux doesn't trust APIC errors
8970 - * 3AP. We work around this
8971 - * 4AP. Linux never generated 3 interrupts of the same priority
8972 - * to cause a lost local interrupt.
8973 - * 5AP. Remote read is never used
8974 - * 6AP. not affected - worked around in hardware
8975 - * 7AP. not affected - worked around in hardware
8976 - * 8AP. worked around in hardware - we get explicit CS errors if not
8977 - * 9AP. only 'noapic' mode affected. Might generate spurious
8978 - * interrupts, we log only the first one and count the
8980 - * 10AP. not affected - worked around in hardware
8981 - * 11AP. Linux reads the APIC between writes to avoid this, as per
8982 - * the documentation. Make sure you preserve this as it affects
8983 - * the C stepping chips too.
8984 - * 12AP. not affected - worked around in hardware
8985 - * 13AP. not affected - worked around in hardware
8986 - * 14AP. we always deassert INIT during bootup
8987 - * 15AP. not affected - worked around in hardware
8988 - * 16AP. not affected - worked around in hardware
8989 - * 17AP. not affected - worked around in hardware
8990 - * 18AP. not affected - worked around in hardware
8991 - * 19AP. not affected - worked around in BIOS
8993 - * If this sounds worrying believe me these bugs are either ___RARE___,
8994 - * or are signal timing bugs worked around in hardware and there's
8995 - * about nothing of note with C stepping upwards.
8998 -DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
9001 - * the following functions deal with sending IPIs between CPUs.
9003 - * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
9006 -static inline int __prepare_ICR (unsigned int shortcut, int vector)
9008 - unsigned int icr = shortcut | APIC_DEST_LOGICAL;
9012 - icr |= APIC_DM_FIXED | vector;
9015 - icr |= APIC_DM_NMI;
9021 -static inline int __prepare_ICR2 (unsigned int mask)
9023 - return SET_APIC_DEST_FIELD(mask);
9026 -DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
9028 -static inline void __send_IPI_one(unsigned int cpu, int vector)
9030 - int irq = per_cpu(ipi_to_irq, cpu)[vector];
9032 - notify_remote_via_irq(irq);
9035 -void __send_IPI_shortcut(unsigned int shortcut, int vector)
9039 - switch (shortcut) {
9040 - case APIC_DEST_SELF:
9041 - __send_IPI_one(smp_processor_id(), vector);
9043 - case APIC_DEST_ALLBUT:
9044 - for (cpu = 0; cpu < NR_CPUS; ++cpu) {
9045 - if (cpu == smp_processor_id())
9047 - if (cpu_isset(cpu, cpu_online_map)) {
9048 - __send_IPI_one(cpu, vector);
9053 - printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
9059 -void send_IPI_self(int vector)
9061 - __send_IPI_shortcut(APIC_DEST_SELF, vector);
9065 - * This is only used on smaller machines.
9067 -void send_IPI_mask_bitmask(cpumask_t mask, int vector)
9069 - unsigned long flags;
9072 - local_irq_save(flags);
9073 - WARN_ON(cpus_addr(mask)[0] & ~cpus_addr(cpu_online_map)[0]);
9075 - for (cpu = 0; cpu < NR_CPUS; ++cpu) {
9076 - if (cpu_isset(cpu, mask)) {
9077 - __send_IPI_one(cpu, vector);
9081 - local_irq_restore(flags);
9084 -void send_IPI_mask_sequence(cpumask_t mask, int vector)
9087 - send_IPI_mask_bitmask(mask, vector);
9090 -#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
9094 - * Smarter SMP flushing macros.
9095 - * c/o Linus Torvalds.
9097 - * These mean you can really definitely utterly forget about
9098 - * writing to user space from interrupts. (Its not allowed anyway).
9100 - * Optimizations Manfred Spraul <manfred@colorfullife.com>
9103 -static cpumask_t flush_cpumask;
9104 -static struct mm_struct * flush_mm;
9105 -static unsigned long flush_va;
9106 -static DEFINE_SPINLOCK(tlbstate_lock);
9109 - * We cannot call mmdrop() because we are in interrupt context,
9110 - * instead update mm->cpu_vm_mask.
9112 - * We need to reload %cr3 since the page tables may be going
9113 - * away from under us..
9115 -void leave_mm(int cpu)
9117 - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
9119 - cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
9120 - load_cr3(swapper_pg_dir);
9122 -EXPORT_SYMBOL_GPL(leave_mm);
9126 - * The flush IPI assumes that a thread switch happens in this order:
9127 - * [cpu0: the cpu that switches]
9128 - * 1) switch_mm() either 1a) or 1b)
9129 - * 1a) thread switch to a different mm
9130 - * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
9131 - * Stop ipi delivery for the old mm. This is not synchronized with
9132 - * the other cpus, but smp_invalidate_interrupt ignore flush ipis
9133 - * for the wrong mm, and in the worst case we perform a superfluous
9135 - * 1a2) set cpu_tlbstate to TLBSTATE_OK
9136 - * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
9137 - * was in lazy tlb mode.
9138 - * 1a3) update cpu_tlbstate[].active_mm
9139 - * Now cpu0 accepts tlb flushes for the new mm.
9140 - * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
9141 - * Now the other cpus will send tlb flush ipis.
9142 - * 1a4) change cr3.
9143 - * 1b) thread switch without mm change
9144 - * cpu_tlbstate[].active_mm is correct, cpu0 already handles
9146 - * 1b1) set cpu_tlbstate to TLBSTATE_OK
9147 - * 1b2) test_and_set the cpu bit in cpu_vm_mask.
9148 - * Atomically set the bit [other cpus will start sending flush ipis],
9149 - * and test the bit.
9150 - * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
9151 - * 2) switch %%esp, ie current
9153 - * The interrupt must handle 2 special cases:
9154 - * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
9155 - * - the cpu performs speculative tlb reads, i.e. even if the cpu only
9156 - * runs in kernel space, the cpu could load tlb entries for user space
9159 - * The good news is that cpu_tlbstate is local to each cpu, no
9160 - * write/read ordering problems.
9166 - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
9167 - * 2) Leave the mm if we are in the lazy tlb mode.
9170 -irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id)
9172 - unsigned long cpu;
9176 - if (!cpu_isset(cpu, flush_cpumask))
9179 - * This was a BUG() but until someone can quote me the
9180 - * line from the intel manual that guarantees an IPI to
9181 - * multiple CPUs is retried _only_ on the erroring CPUs
9182 - * its staying as a return
9187 - if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
9188 - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
9189 - if (flush_va == TLB_FLUSH_ALL)
9190 - local_flush_tlb();
9192 - __flush_tlb_one(flush_va);
9196 - smp_mb__before_clear_bit();
9197 - cpu_clear(cpu, flush_cpumask);
9198 - smp_mb__after_clear_bit();
9200 - put_cpu_no_resched();
9201 - __get_cpu_var(irq_stat).irq_tlb_count++;
9203 - return IRQ_HANDLED;
9206 -void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
9209 - cpumask_t cpumask = *cpumaskp;
9212 - * A couple of (to be removed) sanity checks:
9214 - * - current CPU must not be in mask
9215 - * - mask must exist :)
9217 - BUG_ON(cpus_empty(cpumask));
9218 - BUG_ON(cpu_isset(smp_processor_id(), cpumask));
9221 -#ifdef CONFIG_HOTPLUG_CPU
9222 - /* If a CPU which we ran on has gone down, OK. */
9223 - cpus_and(cpumask, cpumask, cpu_online_map);
9224 - if (unlikely(cpus_empty(cpumask)))
9229 - * i'm not happy about this global shared spinlock in the
9230 - * MM hot path, but we'll see how contended it is.
9231 - * AK: x86-64 has a faster method that could be ported.
9233 - spin_lock(&tlbstate_lock);
9237 - cpus_or(flush_cpumask, cpumask, flush_cpumask);
9239 - * We have to send the IPI only to
9242 - send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
9244 - while (!cpus_empty(flush_cpumask))
9245 - /* nothing. lockup detection does not belong here */
9250 - spin_unlock(&tlbstate_lock);
9253 -void flush_tlb_current_task(void)
9255 - struct mm_struct *mm = current->mm;
9256 - cpumask_t cpu_mask;
9258 - preempt_disable();
9259 - cpu_mask = mm->cpu_vm_mask;
9260 - cpu_clear(smp_processor_id(), cpu_mask);
9262 - local_flush_tlb();
9263 - if (!cpus_empty(cpu_mask))
9264 - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
9268 -void flush_tlb_mm (struct mm_struct * mm)
9270 - cpumask_t cpu_mask;
9272 - preempt_disable();
9273 - cpu_mask = mm->cpu_vm_mask;
9274 - cpu_clear(smp_processor_id(), cpu_mask);
9276 - if (current->active_mm == mm) {
9278 - local_flush_tlb();
9280 - leave_mm(smp_processor_id());
9282 - if (!cpus_empty(cpu_mask))
9283 - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
9288 -void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
9290 - struct mm_struct *mm = vma->vm_mm;
9291 - cpumask_t cpu_mask;
9293 - preempt_disable();
9294 - cpu_mask = mm->cpu_vm_mask;
9295 - cpu_clear(smp_processor_id(), cpu_mask);
9297 - if (current->active_mm == mm) {
9299 - __flush_tlb_one(va);
9301 - leave_mm(smp_processor_id());
9304 - if (!cpus_empty(cpu_mask))
9305 - flush_tlb_others(cpu_mask, mm, va);
9309 -EXPORT_SYMBOL(flush_tlb_page);
9311 -static void do_flush_tlb_all(void* info)
9313 - unsigned long cpu = smp_processor_id();
9315 - __flush_tlb_all();
9316 - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
9320 -void flush_tlb_all(void)
9322 - on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
9328 - * this function sends a 'reschedule' IPI to another CPU.
9329 - * it goes straight through and wastes no time serializing
9330 - * anything. Worst case is that we lose a reschedule ...
9332 -void xen_smp_send_reschedule(int cpu)
9334 - WARN_ON(cpu_is_offline(cpu));
9335 - send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
9339 - * Structure and data for smp_call_function(). This is designed to minimise
9340 - * static memory requirements. It also looks cleaner.
9342 -static DEFINE_SPINLOCK(call_lock);
9344 -struct call_data_struct {
9345 - void (*func) (void *info);
9348 - atomic_t finished;
9352 -void lock_ipi_call_lock(void)
9354 - spin_lock_irq(&call_lock);
9357 -void unlock_ipi_call_lock(void)
9359 - spin_unlock_irq(&call_lock);
9362 -static struct call_data_struct *call_data;
9364 -static void __smp_call_function(void (*func) (void *info), void *info,
9365 - int nonatomic, int wait)
9367 - struct call_data_struct data;
9368 - int cpus = num_online_cpus() - 1;
9375 - atomic_set(&data.started, 0);
9378 - atomic_set(&data.finished, 0);
9380 - call_data = &data;
9383 - /* Send a message to all other CPUs and wait for them to respond */
9384 - send_IPI_allbutself(CALL_FUNCTION_VECTOR);
9386 - /* Wait for response */
9387 - while (atomic_read(&data.started) != cpus)
9391 - while (atomic_read(&data.finished) != cpus)
9397 - * smp_call_function_mask(): Run a function on a set of other CPUs.
9398 - * @mask: The set of cpus to run on. Must not include the current cpu.
9399 - * @func: The function to run. This must be fast and non-blocking.
9400 - * @info: An arbitrary pointer to pass to the function.
9401 - * @wait: If true, wait (atomically) until function has completed on other CPUs.
9403 - * Returns 0 on success, else a negative status code.
9405 - * If @wait is true, then returns once @func has returned; otherwise
9406 - * it returns just before the target cpu calls @func.
9408 - * You must not call this function with disabled interrupts or from a
9409 - * hardware interrupt handler or from a bottom half handler.
9412 -xen_smp_call_function_mask(cpumask_t mask,
9413 - void (*func)(void *), void *info,
9416 - struct call_data_struct data;
9417 - cpumask_t allbutself;
9420 - /* Can deadlock when called with interrupts disabled */
9421 - WARN_ON(irqs_disabled());
9423 - /* Holding any lock stops cpus from going down. */
9424 - spin_lock(&call_lock);
9426 - allbutself = cpu_online_map;
9427 - cpu_clear(smp_processor_id(), allbutself);
9429 - cpus_and(mask, mask, allbutself);
9430 - cpus = cpus_weight(mask);
9433 - spin_unlock(&call_lock);
9439 - atomic_set(&data.started, 0);
9442 - atomic_set(&data.finished, 0);
9444 - call_data = &data;
9447 - /* Send a message to other CPUs */
9448 - if (cpus_equal(mask, allbutself))
9449 - send_IPI_allbutself(CALL_FUNCTION_VECTOR);
9451 - send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
9453 - /* Wait for response */
9454 - while (atomic_read(&data.started) != cpus)
9458 - while (atomic_read(&data.finished) != cpus)
9460 - spin_unlock(&call_lock);
9465 -static void stop_this_cpu (void * dummy)
9467 - local_irq_disable();
9469 - * Remove this CPU:
9471 - cpu_clear(smp_processor_id(), cpu_online_map);
9472 - disable_all_local_evtchn();
9473 - if (cpu_data(smp_processor_id()).hlt_works_ok)
9479 - * this function calls the 'stop' function on all other CPUs in the system.
9482 -void xen_smp_send_stop(void)
9484 - /* Don't deadlock on the call lock in panic */
9485 - int nolock = !spin_trylock(&call_lock);
9486 - unsigned long flags;
9488 - local_irq_save(flags);
9489 - __smp_call_function(stop_this_cpu, NULL, 0, 0);
9491 - spin_unlock(&call_lock);
9492 - disable_all_local_evtchn();
9493 - local_irq_restore(flags);
9497 - * Reschedule call back. Nothing to do,
9498 - * all the work is done automatically when
9499 - * we return from the interrupt.
9501 -irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
9503 - __get_cpu_var(irq_stat).irq_resched_count++;
9505 - return IRQ_HANDLED;
9508 -#include <linux/kallsyms.h>
9509 -irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
9511 - void (*func) (void *info) = call_data->func;
9512 - void *info = call_data->info;
9513 - int wait = call_data->wait;
9516 - * Notify initiating CPU that I've grabbed the data and am
9517 - * about to execute the function
9520 - atomic_inc(&call_data->started);
9522 - * At this point the info structure may be out of scope unless wait==1
9526 - __get_cpu_var(irq_stat).irq_call_count++;
9531 - atomic_inc(&call_data->finished);
9534 - return IRQ_HANDLED;
9536 --- sle11-2009-05-14.orig/arch/x86/kernel/smp_64-xen.c 2009-03-16 16:33:40.000000000 +0100
9537 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
9540 - * Intel SMP support routines.
9542 - * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
9543 - * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
9544 - * (c) 2002,2003 Andi Kleen, SuSE Labs.
9546 - * This code is released under the GNU General Public License version 2 or
9550 -#include <linux/init.h>
9552 -#include <linux/mm.h>
9553 -#include <linux/delay.h>
9554 -#include <linux/spinlock.h>
9555 -#include <linux/smp.h>
9556 -#include <linux/kernel_stat.h>
9557 -#include <linux/mc146818rtc.h>
9558 -#include <linux/interrupt.h>
9560 -#include <asm/mtrr.h>
9561 -#include <asm/pgalloc.h>
9562 -#include <asm/tlbflush.h>
9563 -#include <asm/mach_apic.h>
9564 -#include <asm/mmu_context.h>
9565 -#include <asm/proto.h>
9566 -#include <asm/apicdef.h>
9567 -#include <asm/idle.h>
9569 -#include <xen/evtchn.h>
9574 - * Smarter SMP flushing macros.
9575 - * c/o Linus Torvalds.
9577 - * These mean you can really definitely utterly forget about
9578 - * writing to user space from interrupts. (Its not allowed anyway).
9580 - * Optimizations Manfred Spraul <manfred@colorfullife.com>
9582 - * More scalable flush, from Andi Kleen
9584 - * To avoid global state use 8 different call vectors.
9585 - * Each CPU uses a specific vector to trigger flushes on other
9586 - * CPUs. Depending on the received vector the target CPUs look into
9587 - * the right per cpu variable for the flush data.
9589 - * With more than 8 CPUs they are hashed to the 8 available
9590 - * vectors. The limited global vector space forces us to this right now.
9591 - * In future when interrupts are split into per CPU domains this could be
9592 - * fixed, at the cost of triggering multiple IPIs in some cases.
9595 -union smp_flush_state {
9597 - cpumask_t flush_cpumask;
9598 - struct mm_struct *flush_mm;
9599 - unsigned long flush_va;
9600 - spinlock_t tlbstate_lock;
9602 - char pad[SMP_CACHE_BYTES];
9603 -} ____cacheline_aligned;
9605 -/* State is put into the per CPU data section, but padded
9606 - to a full cache line because other CPUs can access it and we don't
9607 - want false sharing in the per cpu data segment. */
9608 -static DEFINE_PER_CPU(union smp_flush_state, flush_state);
9611 - * We cannot call mmdrop() because we are in interrupt context,
9612 - * instead update mm->cpu_vm_mask.
9614 -void leave_mm(int cpu)
9616 - if (read_pda(mmu_state) == TLBSTATE_OK)
9618 - cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
9619 - load_cr3(swapper_pg_dir);
9621 -EXPORT_SYMBOL_GPL(leave_mm);
9625 - * The flush IPI assumes that a thread switch happens in this order:
9626 - * [cpu0: the cpu that switches]
9627 - * 1) switch_mm() either 1a) or 1b)
9628 - * 1a) thread switch to a different mm
9629 - * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
9630 - * Stop ipi delivery for the old mm. This is not synchronized with
9631 - * the other cpus, but smp_invalidate_interrupt ignore flush ipis
9632 - * for the wrong mm, and in the worst case we perform a superfluous
9634 - * 1a2) set cpu mmu_state to TLBSTATE_OK
9635 - * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
9636 - * was in lazy tlb mode.
9637 - * 1a3) update cpu active_mm
9638 - * Now cpu0 accepts tlb flushes for the new mm.
9639 - * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
9640 - * Now the other cpus will send tlb flush ipis.
9641 - * 1a4) change cr3.
9642 - * 1b) thread switch without mm change
9643 - * cpu active_mm is correct, cpu0 already handles
9645 - * 1b1) set cpu mmu_state to TLBSTATE_OK
9646 - * 1b2) test_and_set the cpu bit in cpu_vm_mask.
9647 - * Atomically set the bit [other cpus will start sending flush ipis],
9648 - * and test the bit.
9649 - * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
9650 - * 2) switch %%esp, ie current
9652 - * The interrupt must handle 2 special cases:
9653 - * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
9654 - * - the cpu performs speculative tlb reads, i.e. even if the cpu only
9655 - * runs in kernel space, the cpu could load tlb entries for user space
9658 - * The good news is that cpu mmu_state is local to each cpu, no
9659 - * write/read ordering problems.
9665 - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
9666 - * 2) Leave the mm if we are in the lazy tlb mode.
9668 - * Interrupts are disabled.
9671 -asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
9675 - union smp_flush_state *f;
9677 - cpu = smp_processor_id();
9679 - * orig_rax contains the negated interrupt vector.
9680 - * Use that to determine where the sender put the data.
9682 - sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
9683 - f = &per_cpu(flush_state, sender);
9685 - if (!cpu_isset(cpu, f->flush_cpumask))
9688 - * This was a BUG() but until someone can quote me the
9689 - * line from the intel manual that guarantees an IPI to
9690 - * multiple CPUs is retried _only_ on the erroring CPUs
9691 - * its staying as a return
9696 - if (f->flush_mm == read_pda(active_mm)) {
9697 - if (read_pda(mmu_state) == TLBSTATE_OK) {
9698 - if (f->flush_va == TLB_FLUSH_ALL)
9699 - local_flush_tlb();
9701 - __flush_tlb_one(f->flush_va);
9707 - cpu_clear(cpu, f->flush_cpumask);
9708 - add_pda(irq_tlb_count, 1);
9711 -void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
9715 - union smp_flush_state *f;
9716 - cpumask_t cpumask = *cpumaskp;
9718 - /* Caller has disabled preemption */
9719 - sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
9720 - f = &per_cpu(flush_state, sender);
9723 - * Could avoid this lock when
9724 - * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
9725 - * probably not worth checking this for a cache-hot lock.
9727 - spin_lock(&f->tlbstate_lock);
9731 - cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
9734 - * We have to send the IPI only to
9737 - send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
9739 - while (!cpus_empty(f->flush_cpumask))
9742 - f->flush_mm = NULL;
9744 - spin_unlock(&f->tlbstate_lock);
9747 -int __cpuinit init_smp_flush(void)
9751 - for_each_cpu_mask(i, cpu_possible_map) {
9752 - spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
9756 -core_initcall(init_smp_flush);
9758 -void flush_tlb_current_task(void)
9760 - struct mm_struct *mm = current->mm;
9761 - cpumask_t cpu_mask;
9763 - preempt_disable();
9764 - cpu_mask = mm->cpu_vm_mask;
9765 - cpu_clear(smp_processor_id(), cpu_mask);
9767 - local_flush_tlb();
9768 - if (!cpus_empty(cpu_mask))
9769 - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
9773 -void flush_tlb_mm (struct mm_struct * mm)
9775 - cpumask_t cpu_mask;
9777 - preempt_disable();
9778 - cpu_mask = mm->cpu_vm_mask;
9779 - cpu_clear(smp_processor_id(), cpu_mask);
9781 - if (current->active_mm == mm) {
9783 - local_flush_tlb();
9785 - leave_mm(smp_processor_id());
9787 - if (!cpus_empty(cpu_mask))
9788 - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
9793 -void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
9795 - struct mm_struct *mm = vma->vm_mm;
9796 - cpumask_t cpu_mask;
9798 - preempt_disable();
9799 - cpu_mask = mm->cpu_vm_mask;
9800 - cpu_clear(smp_processor_id(), cpu_mask);
9802 - if (current->active_mm == mm) {
9804 - __flush_tlb_one(va);
9806 - leave_mm(smp_processor_id());
9809 - if (!cpus_empty(cpu_mask))
9810 - flush_tlb_others(cpu_mask, mm, va);
9815 -static void do_flush_tlb_all(void* info)
9817 - unsigned long cpu = smp_processor_id();
9819 - __flush_tlb_all();
9820 - if (read_pda(mmu_state) == TLBSTATE_LAZY)
9824 -void flush_tlb_all(void)
9826 - on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
9831 - * this function sends a 'reschedule' IPI to another CPU.
9832 - * it goes straight through and wastes no time serializing
9833 - * anything. Worst case is that we lose a reschedule ...
9836 -void smp_send_reschedule(int cpu)
9838 - send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
9842 - * Structure and data for smp_call_function(). This is designed to minimise
9843 - * static memory requirements. It also looks cleaner.
9845 -static DEFINE_SPINLOCK(call_lock);
9847 -struct call_data_struct {
9848 - void (*func) (void *info);
9851 - atomic_t finished;
9855 -static struct call_data_struct * call_data;
9857 -void lock_ipi_call_lock(void)
9859 - spin_lock_irq(&call_lock);
9862 -void unlock_ipi_call_lock(void)
9864 - spin_unlock_irq(&call_lock);
9868 - * this function sends a 'generic call function' IPI to all other CPU
9869 - * of the system defined in the mask.
9871 -static int __smp_call_function_mask(cpumask_t mask,
9872 - void (*func)(void *), void *info,
9875 - struct call_data_struct data;
9876 - cpumask_t allbutself;
9879 - allbutself = cpu_online_map;
9880 - cpu_clear(smp_processor_id(), allbutself);
9882 - cpus_and(mask, mask, allbutself);
9883 - cpus = cpus_weight(mask);
9890 - atomic_set(&data.started, 0);
9893 - atomic_set(&data.finished, 0);
9895 - call_data = &data;
9898 - /* Send a message to other CPUs */
9899 - if (cpus_equal(mask, allbutself))
9900 - send_IPI_allbutself(CALL_FUNCTION_VECTOR);
9902 - send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
9904 - /* Wait for response */
9905 - while (atomic_read(&data.started) != cpus)
9911 - while (atomic_read(&data.finished) != cpus)
9917 - * smp_call_function_mask(): Run a function on a set of other CPUs.
9918 - * @mask: The set of cpus to run on. Must not include the current cpu.
9919 - * @func: The function to run. This must be fast and non-blocking.
9920 - * @info: An arbitrary pointer to pass to the function.
9921 - * @wait: If true, wait (atomically) until function has completed on other CPUs.
9923 - * Returns 0 on success, else a negative status code.
9925 - * If @wait is true, then returns once @func has returned; otherwise
9926 - * it returns just before the target cpu calls @func.
9928 - * You must not call this function with disabled interrupts or from a
9929 - * hardware interrupt handler or from a bottom half handler.
9931 -int smp_call_function_mask(cpumask_t mask,
9932 - void (*func)(void *), void *info,
9937 - /* Can deadlock when called with interrupts disabled */
9938 - WARN_ON(irqs_disabled());
9940 - spin_lock(&call_lock);
9941 - ret = __smp_call_function_mask(mask, func, info, wait);
9942 - spin_unlock(&call_lock);
9945 -EXPORT_SYMBOL(smp_call_function_mask);
9948 - * smp_call_function_single - Run a function on a specific CPU
9949 - * @func: The function to run. This must be fast and non-blocking.
9950 - * @info: An arbitrary pointer to pass to the function.
9951 - * @nonatomic: Currently unused.
9952 - * @wait: If true, wait until function has completed on other CPUs.
9954 - * Retrurns 0 on success, else a negative status code.
9956 - * Does not return until the remote CPU is nearly ready to execute <func>
9957 - * or is or has executed.
9960 -int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
9961 - int nonatomic, int wait)
9963 - /* prevent preemption and reschedule on another processor */
9964 - int ret, me = get_cpu();
9966 - /* Can deadlock when called with interrupts disabled */
9967 - WARN_ON(irqs_disabled());
9970 - local_irq_disable();
9972 - local_irq_enable();
9977 - ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
9982 -EXPORT_SYMBOL(smp_call_function_single);
9985 - * smp_call_function - run a function on all other CPUs.
9986 - * @func: The function to run. This must be fast and non-blocking.
9987 - * @info: An arbitrary pointer to pass to the function.
9988 - * @nonatomic: currently unused.
9989 - * @wait: If true, wait (atomically) until function has completed on other
9992 - * Returns 0 on success, else a negative status code. Does not return until
9993 - * remote CPUs are nearly ready to execute func or are or have executed.
9995 - * You must not call this function with disabled interrupts or from a
9996 - * hardware interrupt handler or from a bottom half handler.
9997 - * Actually there are a few legal cases, like panic.
9999 -int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
10002 - return smp_call_function_mask(cpu_online_map, func, info, wait);
10004 -EXPORT_SYMBOL(smp_call_function);
10006 -static void stop_this_cpu(void *dummy)
10008 - local_irq_disable();
10010 - * Remove this CPU:
10012 - cpu_clear(smp_processor_id(), cpu_online_map);
10013 - disable_all_local_evtchn();
10018 -void smp_send_stop(void)
10021 - unsigned long flags;
10023 -#ifndef CONFIG_XEN
10024 - if (reboot_force)
10028 - /* Don't deadlock on the call lock in panic */
10029 - nolock = !spin_trylock(&call_lock);
10030 - local_irq_save(flags);
10031 - __smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0);
10033 - spin_unlock(&call_lock);
10034 - disable_all_local_evtchn();
10035 - local_irq_restore(flags);
10039 - * Reschedule call back. Nothing to do,
10040 - * all the work is done automatically when
10041 - * we return from the interrupt.
10043 -#ifndef CONFIG_XEN
10044 -asmlinkage void smp_reschedule_interrupt(void)
10046 -asmlinkage irqreturn_t smp_reschedule_interrupt(int irq, void *ctx)
10049 -#ifndef CONFIG_XEN
10052 - add_pda(irq_resched_count, 1);
10054 - return IRQ_HANDLED;
10058 -#ifndef CONFIG_XEN
10059 -asmlinkage void smp_call_function_interrupt(void)
10061 -asmlinkage irqreturn_t smp_call_function_interrupt(int irq, void *ctx)
10064 - void (*func) (void *info) = call_data->func;
10065 - void *info = call_data->info;
10066 - int wait = call_data->wait;
10068 -#ifndef CONFIG_XEN
10072 - * Notify initiating CPU that I've grabbed the data and am
10073 - * about to execute the function
10076 - atomic_inc(&call_data->started);
10078 - * At this point the info structure may be out of scope unless wait==1
10083 - add_pda(irq_call_count, 1);
10087 - atomic_inc(&call_data->finished);
10090 - return IRQ_HANDLED;
10093 --- sle11-2009-05-14.orig/arch/x86/kernel/time_32-xen.c 2009-03-24 10:12:48.000000000 +0100
10094 +++ sle11-2009-05-14/arch/x86/kernel/time_32-xen.c 2009-03-24 10:13:09.000000000 +0100
10095 @@ -699,8 +699,6 @@ int xen_update_persistent_clock(void)
10099 -extern void (*late_time_init)(void);
10101 /* Dynamically-mapped IRQ. */
10102 DEFINE_PER_CPU(int, timer_irq);
10104 --- sle11-2009-05-14.orig/arch/x86/kernel/traps_32-xen.c 2009-03-16 16:33:40.000000000 +0100
10105 +++ sle11-2009-05-14/arch/x86/kernel/traps_32-xen.c 2009-03-16 16:38:05.000000000 +0100
10107 * 'Traps.c' handles hardware traps and faults after we have saved some
10108 * state in 'asm.s'.
10110 -#include <linux/sched.h>
10111 +#include <linux/interrupt.h>
10112 +#include <linux/kallsyms.h>
10113 +#include <linux/spinlock.h>
10114 +#include <linux/highmem.h>
10115 +#include <linux/kprobes.h>
10116 +#include <linux/uaccess.h>
10117 +#include <linux/utsname.h>
10118 +#include <linux/kdebug.h>
10119 #include <linux/kernel.h>
10120 +#include <linux/module.h>
10121 +#include <linux/ptrace.h>
10122 #include <linux/string.h>
10123 +#include <linux/unwind.h>
10124 +#include <linux/delay.h>
10125 #include <linux/errno.h>
10126 +#include <linux/kexec.h>
10127 +#include <linux/sched.h>
10128 #include <linux/timer.h>
10129 -#include <linux/mm.h>
10130 #include <linux/init.h>
10131 -#include <linux/delay.h>
10132 -#include <linux/spinlock.h>
10133 -#include <linux/interrupt.h>
10134 -#include <linux/highmem.h>
10135 -#include <linux/kallsyms.h>
10136 -#include <linux/ptrace.h>
10137 -#include <linux/utsname.h>
10138 -#include <linux/kprobes.h>
10139 -#include <linux/kexec.h>
10140 -#include <linux/unwind.h>
10141 -#include <linux/uaccess.h>
10142 -#include <linux/nmi.h>
10143 #include <linux/bug.h>
10144 +#include <linux/nmi.h>
10145 +#include <linux/mm.h>
10148 #include <linux/ioport.h>
10149 @@ -43,21 +45,18 @@
10150 #include <linux/edac.h>
10153 +#include <asm/arch_hooks.h>
10154 +#include <asm/stacktrace.h>
10155 #include <asm/processor.h>
10156 -#include <asm/system.h>
10157 -#include <asm/io.h>
10158 -#include <asm/atomic.h>
10159 #include <asm/debugreg.h>
10160 +#include <asm/atomic.h>
10161 +#include <asm/system.h>
10162 +#include <asm/unwind.h>
10163 #include <asm/desc.h>
10164 #include <asm/i387.h>
10165 #include <asm/nmi.h>
10166 -#include <asm/unwind.h>
10167 #include <asm/smp.h>
10168 -#include <asm/arch_hooks.h>
10169 -#include <linux/kdebug.h>
10170 -#include <asm/stacktrace.h>
10172 -#include <linux/module.h>
10173 +#include <asm/io.h>
10175 #include "mach_traps.h"
10177 @@ -71,7 +70,7 @@ EXPORT_SYMBOL_GPL(used_vectors);
10178 asmlinkage int system_call(void);
10180 /* Do we ignore FPU interrupts ? */
10181 -char ignore_fpu_irq = 0;
10182 +char ignore_fpu_irq;
10184 #ifndef CONFIG_X86_NO_IDT
10186 @@ -113,12 +112,13 @@ static unsigned int code_bytes = 64;
10187 void printk_address(unsigned long address, int reliable)
10189 #ifdef CONFIG_KALLSYMS
10190 - unsigned long offset = 0, symsize;
10191 + char namebuf[KSYM_NAME_LEN];
10192 + unsigned long offset = 0;
10193 + unsigned long symsize;
10194 const char *symname;
10196 - char *delim = ":";
10197 - char namebuf[128];
10198 char reliab[4] = "";
10199 + char *delim = ":";
10202 symname = kallsyms_lookup(address, &symsize, &offset,
10203 &modname, namebuf);
10204 @@ -146,13 +146,14 @@ static inline int valid_stack_ptr(struct
10206 /* The form of the top of the frame on the stack */
10207 struct stack_frame {
10208 - struct stack_frame *next_frame;
10209 - unsigned long return_address;
10210 + struct stack_frame *next_frame;
10211 + unsigned long return_address;
10214 -static inline unsigned long print_context_stack(struct thread_info *tinfo,
10215 - unsigned long *stack, unsigned long bp,
10216 - const struct stacktrace_ops *ops, void *data)
10217 +static inline unsigned long
10218 +print_context_stack(struct thread_info *tinfo,
10219 + unsigned long *stack, unsigned long bp,
10220 + const struct stacktrace_ops *ops, void *data)
10222 struct stack_frame *frame = (struct stack_frame *)bp;
10224 @@ -174,7 +175,7 @@ static inline unsigned long print_contex
10228 -#define MSG(msg) ops->warning(data, msg)
10229 +#define MSG(msg) ops->warning(data, msg)
10231 void dump_trace(struct task_struct *task, struct pt_regs *regs,
10232 unsigned long *stack, unsigned long bp,
10233 @@ -185,6 +186,7 @@ void dump_trace(struct task_struct *task
10236 unsigned long dummy;
10239 if (task != current)
10240 stack = (unsigned long *)task->thread.sp;
10241 @@ -194,7 +196,7 @@ void dump_trace(struct task_struct *task
10243 if (task == current) {
10244 /* Grab bp right from our regs */
10245 - asm ("movl %%ebp, %0" : "=r" (bp) : );
10246 + asm("movl %%ebp, %0" : "=r" (bp) :);
10248 /* bp is the last reg pushed by switch_to */
10249 bp = *(unsigned long *) task->thread.sp;
10250 @@ -204,15 +206,18 @@ void dump_trace(struct task_struct *task
10253 struct thread_info *context;
10255 context = (struct thread_info *)
10256 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
10257 bp = print_context_stack(context, stack, bp, ops, data);
10258 - /* Should be after the line below, but somewhere
10259 - in early boot context comes out corrupted and we
10260 - can't reference it -AK */
10262 + * Should be after the line below, but somewhere
10263 + * in early boot context comes out corrupted and we
10264 + * can't reference it:
10266 if (ops->stack(data, "IRQ") < 0)
10268 - stack = (unsigned long*)context->previous_esp;
10269 + stack = (unsigned long *)context->previous_esp;
10272 touch_nmi_watchdog();
10273 @@ -251,15 +256,15 @@ static void print_trace_address(void *da
10276 static const struct stacktrace_ops print_trace_ops = {
10277 - .warning = print_trace_warning,
10278 - .warning_symbol = print_trace_warning_symbol,
10279 - .stack = print_trace_stack,
10280 - .address = print_trace_address,
10281 + .warning = print_trace_warning,
10282 + .warning_symbol = print_trace_warning_symbol,
10283 + .stack = print_trace_stack,
10284 + .address = print_trace_address,
10288 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
10289 - unsigned long *stack, unsigned long bp, char *log_lvl)
10290 + unsigned long *stack, unsigned long bp, char *log_lvl)
10292 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
10293 printk("%s =======================\n", log_lvl);
10294 @@ -271,21 +276,22 @@ void show_trace(struct task_struct *task
10295 show_trace_log_lvl(task, regs, stack, bp, "");
10298 -static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
10299 - unsigned long *sp, unsigned long bp, char *log_lvl)
10301 +show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
10302 + unsigned long *sp, unsigned long bp, char *log_lvl)
10304 unsigned long *stack;
10309 - sp = (unsigned long*)task->thread.sp;
10310 + sp = (unsigned long *)task->thread.sp;
10312 sp = (unsigned long *)&sp;
10316 - for(i = 0; i < kstack_depth_to_print; i++) {
10317 + for (i = 0; i < kstack_depth_to_print; i++) {
10318 if (kstack_end(stack))
10320 if (i && ((i % 8) == 0))
10321 @@ -293,6 +299,7 @@ static void show_stack_log_lvl(struct ta
10322 printk("%08lx ", *stack++);
10324 printk("\n%sCall Trace:\n", log_lvl);
10326 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
10329 @@ -307,8 +314,8 @@ void show_stack(struct task_struct *task
10331 void dump_stack(void)
10333 - unsigned long stack;
10334 unsigned long bp = 0;
10335 + unsigned long stack;
10337 #ifdef CONFIG_FRAME_POINTER
10339 @@ -320,6 +327,7 @@ void dump_stack(void)
10340 init_utsname()->release,
10341 (int)strcspn(init_utsname()->version, " "),
10342 init_utsname()->version);
10344 show_trace(current, NULL, &stack, bp);
10347 @@ -331,6 +339,7 @@ void show_registers(struct pt_regs *regs
10350 __show_registers(regs, 0);
10352 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
10353 TASK_COMM_LEN, current->comm, task_pid_nr(current),
10354 current_thread_info(), current, task_thread_info(current));
10355 @@ -339,10 +348,10 @@ void show_registers(struct pt_regs *regs
10356 * time of the fault..
10358 if (!user_mode_vm(regs)) {
10360 unsigned int code_prologue = code_bytes * 43 / 64;
10361 unsigned int code_len = code_bytes;
10365 printk("\n" KERN_EMERG "Stack: ");
10366 show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG);
10367 @@ -369,7 +378,7 @@ void show_registers(struct pt_regs *regs
10374 int is_valid_bugaddr(unsigned long ip)
10376 @@ -385,10 +394,10 @@ int is_valid_bugaddr(unsigned long ip)
10378 static int die_counter;
10380 -int __kprobes __die(const char * str, struct pt_regs * regs, long err)
10381 +int __kprobes __die(const char *str, struct pt_regs *regs, long err)
10383 - unsigned long sp;
10385 + unsigned long sp;
10387 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
10388 #ifdef CONFIG_PREEMPT
10389 @@ -403,8 +412,8 @@ int __kprobes __die(const char * str, st
10392 if (notify_die(DIE_OOPS, str, regs, err,
10393 - current->thread.trap_no, SIGSEGV) !=
10395 + current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) {
10397 show_registers(regs);
10398 /* Executive summary in case the oops scrolled away */
10399 sp = (unsigned long) (®s->sp);
10400 @@ -416,17 +425,18 @@ int __kprobes __die(const char * str, st
10401 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
10402 print_symbol("%s", regs->ip);
10403 printk(" SS:ESP %04x:%08lx\n", ss, sp);
10414 - * This is gone through when something in the kernel has done something bad and
10415 - * is about to be terminated.
10416 + * This is gone through when something in the kernel has done something bad
10417 + * and is about to be terminated:
10419 -void die(const char * str, struct pt_regs * regs, long err)
10420 +void die(const char *str, struct pt_regs *regs, long err)
10423 raw_spinlock_t lock;
10424 @@ -448,8 +458,9 @@ void die(const char * str, struct pt_reg
10425 die.lock_owner = smp_processor_id();
10426 die.lock_owner_depth = 0;
10430 raw_local_irq_save(flags);
10433 if (++die.lock_owner_depth < 3) {
10434 report_bug(regs->ip, regs);
10435 @@ -482,19 +493,20 @@ void die(const char * str, struct pt_reg
10439 -static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
10440 +static inline void
10441 +die_if_kernel(const char *str, struct pt_regs *regs, long err)
10443 if (!user_mode_vm(regs))
10444 die(str, regs, err);
10447 -static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
10448 - struct pt_regs * regs, long error_code,
10450 +static void __kprobes
10451 +do_trap(int trapnr, int signr, char *str, int vm86, struct pt_regs *regs,
10452 + long error_code, siginfo_t *info)
10454 struct task_struct *tsk = current;
10456 - if (regs->flags & VM_MASK) {
10457 + if (regs->flags & X86_VM_MASK) {
10461 @@ -503,109 +515,112 @@ static void __kprobes do_trap(int trapnr
10462 if (!user_mode(regs))
10467 - * We want error_code and trap_no set for userspace faults and
10468 - * kernelspace faults which result in die(), but not
10469 - * kernelspace faults which are fixed up. die() gives the
10470 - * process no chance to handle the signal and notice the
10471 - * kernel fault information, so that won't result in polluting
10472 - * the information about previously queued, but not yet
10473 - * delivered, faults. See also do_general_protection below.
10475 - tsk->thread.error_code = error_code;
10476 - tsk->thread.trap_no = trapnr;
10479 + * We want error_code and trap_no set for userspace faults and
10480 + * kernelspace faults which result in die(), but not
10481 + * kernelspace faults which are fixed up. die() gives the
10482 + * process no chance to handle the signal and notice the
10483 + * kernel fault information, so that won't result in polluting
10484 + * the information about previously queued, but not yet
10485 + * delivered, faults. See also do_general_protection below.
10487 + tsk->thread.error_code = error_code;
10488 + tsk->thread.trap_no = trapnr;
10491 - force_sig_info(signr, info, tsk);
10493 - force_sig(signr, tsk);
10497 + force_sig_info(signr, info, tsk);
10499 + force_sig(signr, tsk);
10503 - if (!fixup_exception(regs)) {
10504 - tsk->thread.error_code = error_code;
10505 - tsk->thread.trap_no = trapnr;
10506 - die(str, regs, error_code);
10510 + if (!fixup_exception(regs)) {
10511 + tsk->thread.error_code = error_code;
10512 + tsk->thread.trap_no = trapnr;
10513 + die(str, regs, error_code);
10518 - int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr);
10519 - if (ret) goto trap_signal;
10523 + if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
10524 + error_code, trapnr))
10525 + goto trap_signal;
10529 -#define DO_ERROR(trapnr, signr, str, name) \
10530 -void do_##name(struct pt_regs * regs, long error_code) \
10532 - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
10533 - == NOTIFY_STOP) \
10535 - do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
10538 -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \
10539 -void do_##name(struct pt_regs * regs, long error_code) \
10541 - siginfo_t info; \
10543 - local_irq_enable(); \
10544 - info.si_signo = signr; \
10545 - info.si_errno = 0; \
10546 - info.si_code = sicode; \
10547 - info.si_addr = (void __user *)siaddr; \
10548 - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
10549 - == NOTIFY_STOP) \
10551 - do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
10554 -#define DO_VM86_ERROR(trapnr, signr, str, name) \
10555 -void do_##name(struct pt_regs * regs, long error_code) \
10557 - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
10558 - == NOTIFY_STOP) \
10560 - do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
10563 -#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
10564 -void do_##name(struct pt_regs * regs, long error_code) \
10566 - siginfo_t info; \
10567 - info.si_signo = signr; \
10568 - info.si_errno = 0; \
10569 - info.si_code = sicode; \
10570 - info.si_addr = (void __user *)siaddr; \
10571 - trace_hardirqs_fixup(); \
10572 - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
10573 - == NOTIFY_STOP) \
10575 - do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
10576 +#define DO_ERROR(trapnr, signr, str, name) \
10577 +void do_##name(struct pt_regs *regs, long error_code) \
10579 + trace_hardirqs_fixup(); \
10580 + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
10581 + == NOTIFY_STOP) \
10583 + do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
10586 +#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \
10587 +void do_##name(struct pt_regs *regs, long error_code) \
10589 + siginfo_t info; \
10591 + local_irq_enable(); \
10592 + info.si_signo = signr; \
10593 + info.si_errno = 0; \
10594 + info.si_code = sicode; \
10595 + info.si_addr = (void __user *)siaddr; \
10596 + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
10597 + == NOTIFY_STOP) \
10599 + do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
10602 +#define DO_VM86_ERROR(trapnr, signr, str, name) \
10603 +void do_##name(struct pt_regs *regs, long error_code) \
10605 + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
10606 + == NOTIFY_STOP) \
10608 + do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
10611 +#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
10612 +void do_##name(struct pt_regs *regs, long error_code) \
10614 + siginfo_t info; \
10615 + info.si_signo = signr; \
10616 + info.si_errno = 0; \
10617 + info.si_code = sicode; \
10618 + info.si_addr = (void __user *)siaddr; \
10619 + trace_hardirqs_fixup(); \
10620 + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
10621 + == NOTIFY_STOP) \
10623 + do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
10626 -DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
10627 +DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
10628 #ifndef CONFIG_KPROBES
10629 -DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
10630 +DO_VM86_ERROR(3, SIGTRAP, "int3", int3)
10632 -DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
10633 -DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
10634 -DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
10635 -DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
10636 +DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow)
10637 +DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds)
10638 +DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
10639 +DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
10640 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
10641 DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
10642 DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
10643 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
10644 -DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
10645 +DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1)
10647 void __kprobes do_general_protection(struct pt_regs * regs,
10650 - if (regs->flags & VM_MASK)
10651 + struct thread_struct *thread;
10653 + thread = ¤t->thread;
10655 + if (regs->flags & X86_VM_MASK)
10658 if (!user_mode(regs))
10659 @@ -613,6 +628,7 @@ void __kprobes do_general_protection(str
10661 current->thread.error_code = error_code;
10662 current->thread.trap_no = 13;
10664 if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
10665 printk_ratelimit()) {
10667 @@ -642,22 +658,25 @@ gp_in_kernel:
10671 -static __kprobes void
10672 -mem_parity_error(unsigned char reason, struct pt_regs * regs)
10673 +static notrace __kprobes void
10674 +mem_parity_error(unsigned char reason, struct pt_regs *regs)
10676 - printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
10677 - "CPU %d.\n", reason, smp_processor_id());
10678 - printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
10679 + printk(KERN_EMERG
10680 + "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
10681 + reason, smp_processor_id());
10683 + printk(KERN_EMERG
10684 + "You have some hardware problem, likely on the PCI bus.\n");
10686 #if defined(CONFIG_EDAC)
10687 - if(edac_handler_set()) {
10688 + if (edac_handler_set()) {
10689 edac_atomic_assert_error();
10694 if (panic_on_unrecovered_nmi)
10695 - panic("NMI: Not continuing");
10696 + panic("NMI: Not continuing");
10698 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
10700 @@ -665,8 +684,8 @@ mem_parity_error(unsigned char reason, s
10701 clear_mem_error(reason);
10704 -static __kprobes void
10705 -io_check_error(unsigned char reason, struct pt_regs * regs)
10706 +static notrace __kprobes void
10707 +io_check_error(unsigned char reason, struct pt_regs *regs)
10709 printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
10710 show_registers(regs);
10711 @@ -675,38 +694,43 @@ io_check_error(unsigned char reason, str
10712 clear_io_check_error(reason);
10715 -static __kprobes void
10716 -unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
10717 +static notrace __kprobes void
10718 +unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
10720 + if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
10723 - /* Might actually be able to figure out what the guilty party
10727 + * Might actually be able to figure out what the guilty party
10735 - printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
10736 - "CPU %d.\n", reason, smp_processor_id());
10737 + printk(KERN_EMERG
10738 + "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
10739 + reason, smp_processor_id());
10741 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
10742 if (panic_on_unrecovered_nmi)
10743 - panic("NMI: Not continuing");
10744 + panic("NMI: Not continuing");
10746 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
10749 static DEFINE_SPINLOCK(nmi_print_lock);
10751 -void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
10752 +void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg)
10754 - if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
10756 + if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP)
10759 spin_lock(&nmi_print_lock);
10761 * We are in trouble anyway, lets at least try
10762 - * to get a message out.
10763 + * to get a message out:
10766 printk(KERN_EMERG "%s", msg);
10767 @@ -717,9 +741,10 @@ void __kprobes die_nmi(struct pt_regs *r
10768 spin_unlock(&nmi_print_lock);
10771 - /* If we are in kernel we are probably nested up pretty bad
10772 - * and might aswell get out now while we still can.
10775 + * If we are in kernel we are probably nested up pretty bad
10776 + * and might aswell get out now while we still can:
10778 if (!user_mode_vm(regs)) {
10779 current->thread.trap_no = 2;
10781 @@ -728,14 +753,14 @@ void __kprobes die_nmi(struct pt_regs *r
10785 -static __kprobes void default_do_nmi(struct pt_regs * regs)
10786 +static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
10788 unsigned char reason = 0;
10790 - /* Only the BSP gets external NMIs from the system. */
10791 + /* Only the BSP gets external NMIs from the system: */
10792 if (!smp_processor_id())
10793 reason = get_nmi_reason();
10796 if (!(reason & 0xc0)) {
10797 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
10799 @@ -748,8 +773,10 @@ static __kprobes void default_do_nmi(str
10800 if (nmi_watchdog_tick(regs, reason))
10802 if (!do_nmi_callback(regs, smp_processor_id()))
10804 unknown_nmi_error(reason, regs);
10806 + unknown_nmi_error(reason, regs);
10811 @@ -761,14 +788,14 @@ static __kprobes void default_do_nmi(str
10812 io_check_error(reason, regs);
10814 * Reassert NMI in case it became active meanwhile
10815 - * as it's edge-triggered.
10816 + * as it's edge-triggered:
10821 static int ignore_nmis;
10823 -__kprobes void do_nmi(struct pt_regs * regs, long error_code)
10824 +notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code)
10828 @@ -804,9 +831,12 @@ void __kprobes do_int3(struct pt_regs *r
10829 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
10832 - /* This is an interrupt gate, because kprobes wants interrupts
10833 - disabled. Normal trap handlers don't. */
10835 + * This is an interrupt gate, because kprobes wants interrupts
10836 + * disabled. Normal trap handlers don't.
10838 restore_interrupts(regs);
10840 do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
10843 @@ -821,7 +851,7 @@ void __kprobes do_int3(struct pt_regs *r
10844 * from user space. Such code must not hold kernel locks (since it
10845 * can equally take a page fault), therefore it is safe to call
10846 * force_sig_info even though that claims and releases locks.
10849 * Code in ./signal.c ensures that the debug control register
10850 * is restored before we deliver any signal, and therefore that
10851 * user code runs with the correct debug control register even though
10852 @@ -833,10 +863,10 @@ void __kprobes do_int3(struct pt_regs *r
10853 * find every occurrence of the TF bit that could be saved away even
10856 -void __kprobes do_debug(struct pt_regs * regs, long error_code)
10857 +void __kprobes do_debug(struct pt_regs *regs, long error_code)
10859 - unsigned int condition;
10860 struct task_struct *tsk = current;
10861 + unsigned int condition;
10863 trace_hardirqs_fixup();
10865 @@ -861,7 +891,7 @@ void __kprobes do_debug(struct pt_regs *
10869 - if (regs->flags & VM_MASK)
10870 + if (regs->flags & X86_VM_MASK)
10873 /* Save debug status register where ptrace can see it */
10874 @@ -884,7 +914,8 @@ void __kprobes do_debug(struct pt_regs *
10875 /* Ok, finally something we can handle */
10876 send_sigtrap(tsk, regs, error_code);
10878 - /* Disable additional traps. They'll be re-enabled when
10880 + * Disable additional traps. They'll be re-enabled when
10881 * the signal is delivered.
10884 @@ -897,7 +928,7 @@ debug_vm86:
10887 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
10888 - regs->flags &= ~TF_MASK;
10889 + regs->flags &= ~X86_EFLAGS_TF;
10893 @@ -908,9 +939,10 @@ clear_TF_reenable:
10895 void math_error(void __user *ip)
10897 - struct task_struct * task;
10898 + struct task_struct *task;
10899 + unsigned short cwd;
10900 + unsigned short swd;
10902 - unsigned short cwd, swd;
10905 * Save the info for the exception handler and clear the error.
10906 @@ -936,36 +968,36 @@ void math_error(void __user *ip)
10907 cwd = get_fpu_cwd(task);
10908 swd = get_fpu_swd(task);
10909 switch (swd & ~cwd & 0x3f) {
10910 - case 0x000: /* No unmasked exception */
10912 - default: /* Multiple exceptions */
10914 - case 0x001: /* Invalid Op */
10916 - * swd & 0x240 == 0x040: Stack Underflow
10917 - * swd & 0x240 == 0x240: Stack Overflow
10918 - * User must clear the SF bit (0x40) if set
10920 - info.si_code = FPE_FLTINV;
10922 - case 0x002: /* Denormalize */
10923 - case 0x010: /* Underflow */
10924 - info.si_code = FPE_FLTUND;
10926 - case 0x004: /* Zero Divide */
10927 - info.si_code = FPE_FLTDIV;
10929 - case 0x008: /* Overflow */
10930 - info.si_code = FPE_FLTOVF;
10932 - case 0x020: /* Precision */
10933 - info.si_code = FPE_FLTRES;
10935 + case 0x000: /* No unmasked exception */
10937 + default: /* Multiple exceptions */
10939 + case 0x001: /* Invalid Op */
10941 + * swd & 0x240 == 0x040: Stack Underflow
10942 + * swd & 0x240 == 0x240: Stack Overflow
10943 + * User must clear the SF bit (0x40) if set
10945 + info.si_code = FPE_FLTINV;
10947 + case 0x002: /* Denormalize */
10948 + case 0x010: /* Underflow */
10949 + info.si_code = FPE_FLTUND;
10951 + case 0x004: /* Zero Divide */
10952 + info.si_code = FPE_FLTDIV;
10954 + case 0x008: /* Overflow */
10955 + info.si_code = FPE_FLTOVF;
10957 + case 0x020: /* Precision */
10958 + info.si_code = FPE_FLTRES;
10961 force_sig_info(SIGFPE, &info, task);
10964 -void do_coprocessor_error(struct pt_regs * regs, long error_code)
10965 +void do_coprocessor_error(struct pt_regs *regs, long error_code)
10967 ignore_fpu_irq = 1;
10968 math_error((void __user *)regs->ip);
10969 @@ -973,9 +1005,9 @@ void do_coprocessor_error(struct pt_regs
10971 static void simd_math_error(void __user *ip)
10973 - struct task_struct * task;
10975 + struct task_struct *task;
10976 unsigned short mxcsr;
10980 * Save the info for the exception handler and clear the error.
10981 @@ -996,84 +1028,82 @@ static void simd_math_error(void __user
10983 mxcsr = get_fpu_mxcsr(task);
10984 switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
10988 - case 0x001: /* Invalid Op */
10989 - info.si_code = FPE_FLTINV;
10991 - case 0x002: /* Denormalize */
10992 - case 0x010: /* Underflow */
10993 - info.si_code = FPE_FLTUND;
10995 - case 0x004: /* Zero Divide */
10996 - info.si_code = FPE_FLTDIV;
10998 - case 0x008: /* Overflow */
10999 - info.si_code = FPE_FLTOVF;
11001 - case 0x020: /* Precision */
11002 - info.si_code = FPE_FLTRES;
11007 + case 0x001: /* Invalid Op */
11008 + info.si_code = FPE_FLTINV;
11010 + case 0x002: /* Denormalize */
11011 + case 0x010: /* Underflow */
11012 + info.si_code = FPE_FLTUND;
11014 + case 0x004: /* Zero Divide */
11015 + info.si_code = FPE_FLTDIV;
11017 + case 0x008: /* Overflow */
11018 + info.si_code = FPE_FLTOVF;
11020 + case 0x020: /* Precision */
11021 + info.si_code = FPE_FLTRES;
11024 force_sig_info(SIGFPE, &info, task);
11027 -void do_simd_coprocessor_error(struct pt_regs * regs,
11029 +void do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
11032 /* Handle SIMD FPU exceptions on PIII+ processors. */
11033 ignore_fpu_irq = 1;
11034 simd_math_error((void __user *)regs->ip);
11037 - * Handle strange cache flush from user space exception
11038 - * in all other cases. This is undocumented behaviour.
11040 - if (regs->flags & VM_MASK) {
11041 - handle_vm86_fault((struct kernel_vm86_regs *)regs,
11045 - current->thread.trap_no = 19;
11046 - current->thread.error_code = error_code;
11047 - die_if_kernel("cache flush denied", regs, error_code);
11048 - force_sig(SIGSEGV, current);
11052 + * Handle strange cache flush from user space exception
11053 + * in all other cases. This is undocumented behaviour.
11055 + if (regs->flags & X86_VM_MASK) {
11056 + handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code);
11059 + current->thread.trap_no = 19;
11060 + current->thread.error_code = error_code;
11061 + die_if_kernel("cache flush denied", regs, error_code);
11062 + force_sig(SIGSEGV, current);
11066 -void do_spurious_interrupt_bug(struct pt_regs * regs,
11068 +void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
11071 /* No need to warn about this any longer. */
11072 - printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
11073 + printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
11077 -unsigned long patch_espfix_desc(unsigned long uesp,
11078 - unsigned long kesp)
11079 +unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
11081 struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt;
11082 unsigned long base = (kesp - uesp) & -THREAD_SIZE;
11083 unsigned long new_kesp = kesp - base;
11084 unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
11085 __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
11087 /* Set up base for espfix segment */
11088 - desc &= 0x00f0ff0000000000ULL;
11089 - desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
11090 + desc &= 0x00f0ff0000000000ULL;
11091 + desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
11092 ((((__u64)base) << 32) & 0xff00000000000000ULL) |
11093 ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
11094 (lim_pages & 0xffff);
11095 *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
11102 - * 'math_state_restore()' saves the current math information in the
11103 + * 'math_state_restore()' saves the current math information in the
11104 * old math state array, and gets the new ones from the current task
11106 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
11107 @@ -1087,9 +1117,22 @@ asmlinkage void math_state_restore(void)
11108 struct thread_info *thread = current_thread_info();
11109 struct task_struct *tsk = thread->task;
11111 + if (!tsk_used_math(tsk)) {
11112 + local_irq_enable();
11114 + * does a slab alloc which can sleep
11116 + if (init_fpu(tsk)) {
11118 + * ran out of memory!
11120 + do_group_exit(SIGKILL);
11123 + local_irq_disable();
11126 /* NB. 'clts' is done for us by Xen during virtual trap. */
11127 - if (!tsk_used_math(tsk))
11130 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
11131 tsk->fpu_counter++;
11132 @@ -1100,15 +1143,15 @@ EXPORT_SYMBOL_GPL(math_state_restore);
11134 asmlinkage void math_emulate(long arg)
11136 - printk(KERN_EMERG "math-emulation not enabled and no coprocessor found.\n");
11137 - printk(KERN_EMERG "killing %s.\n",current->comm);
11138 - force_sig(SIGFPE,current);
11139 + printk(KERN_EMERG
11140 + "math-emulation not enabled and no coprocessor found.\n");
11141 + printk(KERN_EMERG "killing %s.\n", current->comm);
11142 + force_sig(SIGFPE, current);
11146 #endif /* CONFIG_MATH_EMULATION */
11150 * NB. All these are "trap gates" (i.e. events_mask isn't set) except
11151 * for those that specify <dpl>|4 in the second field.
11152 @@ -1146,25 +1189,21 @@ void __init trap_init(void)
11154 printk("HYPERVISOR_set_trap_table failed: error %d\n", ret);
11157 - * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
11158 - * Generate a build-time error if the alignment is wrong.
11160 - BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15);
11161 if (cpu_has_fxsr) {
11162 printk(KERN_INFO "Enabling fast FPU save and restore... ");
11163 set_in_cr4(X86_CR4_OSFXSR);
11167 - printk(KERN_INFO "Enabling unmasked SIMD FPU exception "
11170 + "Enabling unmasked SIMD FPU exception support... ");
11171 set_in_cr4(X86_CR4_OSXMMEXCPT);
11175 + init_thread_xstate();
11177 - * Should be a barrier for any external CPU state.
11178 + * Should be a barrier for any external CPU state:
11182 @@ -1183,6 +1222,7 @@ void __cpuinit smp_trap_init(trap_info_t
11183 static int __init kstack_setup(char *s)
11185 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
11189 __setup("kstack=", kstack_setup);
11190 --- sle11-2009-05-14.orig/arch/x86/kernel/traps_64-xen.c 2009-03-16 16:33:40.000000000 +0100
11191 +++ sle11-2009-05-14/arch/x86/kernel/traps_64-xen.c 2009-03-16 16:38:05.000000000 +0100
11193 #include <linux/kdebug.h>
11194 #include <linux/utsname.h>
11196 +#include <mach_traps.h>
11198 #if defined(CONFIG_EDAC)
11199 #include <linux/edac.h>
11201 @@ -601,10 +603,16 @@ void die(const char * str, struct pt_reg
11204 #if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_SYSCTL)
11205 -void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
11206 +notrace __kprobes void
11207 +die_nmi(char *str, struct pt_regs *regs, int do_panic)
11209 - unsigned long flags = oops_begin();
11210 + unsigned long flags;
11212 + if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) ==
11216 + flags = oops_begin();
11218 * We are in trouble anyway, lets at least try
11219 * to get a message out.
11220 @@ -769,7 +777,7 @@ asmlinkage void __kprobes do_general_pro
11221 die("general protection fault", regs, error_code);
11224 -static __kprobes void
11225 +static notrace __kprobes void
11226 mem_parity_error(unsigned char reason, struct pt_regs * regs)
11228 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
11229 @@ -792,7 +800,7 @@ mem_parity_error(unsigned char reason, s
11230 clear_mem_error(reason);
11233 -static __kprobes void
11234 +static notrace __kprobes void
11235 io_check_error(unsigned char reason, struct pt_regs * regs)
11237 printk("NMI: IOCK error (debug interrupt?)\n");
11238 @@ -802,9 +810,11 @@ io_check_error(unsigned char reason, str
11239 clear_io_check_error(reason);
11242 -static __kprobes void
11243 +static notrace __kprobes void
11244 unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
11246 + if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
11248 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
11250 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
11251 @@ -817,7 +827,7 @@ unknown_nmi_error(unsigned char reason,
11253 /* Runs on IST stack. This code must keep interrupts off all the time.
11254 Nested NMIs are prevented by the CPU. */
11255 -asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs)
11256 +asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
11258 unsigned char reason = 0;
11260 @@ -1117,11 +1127,25 @@ asmlinkage void __attribute__((weak)) mc
11261 asmlinkage void math_state_restore(void)
11263 struct task_struct *me = current;
11265 + if (!used_math()) {
11266 + local_irq_enable();
11268 + * does a slab alloc which can sleep
11270 + if (init_fpu(me)) {
11272 + * ran out of memory!
11274 + do_group_exit(SIGKILL);
11277 + local_irq_disable();
11280 /* clts(); */ /* 'clts' is done for us by Xen during virtual trap. */
11282 - if (!used_math())
11284 - restore_fpu_checking(&me->thread.i387.fxsave);
11285 + restore_fpu_checking(&me->thread.xstate->fxsave);
11286 task_thread_info(me)->status |= TS_USEDFPU;
11289 @@ -1168,6 +1192,10 @@ void __init trap_init(void)
11290 printk("HYPERVISOR_set_trap_table failed: error %d\n", ret);
11293 + * initialize the per thread extended state:
11295 + init_thread_xstate();
11297 * Should be a barrier for any external CPU state.
11300 --- sle11-2009-05-14.orig/arch/x86/kernel/vsyscall_64-xen.c 2009-03-16 16:33:40.000000000 +0100
11301 +++ sle11-2009-05-14/arch/x86/kernel/vsyscall_64-xen.c 2009-03-16 16:38:05.000000000 +0100
11302 @@ -216,7 +216,7 @@ vgetcpu(unsigned *cpu, unsigned *node, s
11306 -long __vsyscall(3) venosys_1(void)
11307 +static long __vsyscall(3) venosys_1(void)
11311 --- sle11-2009-05-14.orig/arch/x86/mm/fault-xen.c 2009-03-16 16:33:40.000000000 +0100
11312 +++ sle11-2009-05-14/arch/x86/mm/fault-xen.c 2009-03-16 16:38:05.000000000 +0100
11313 @@ -510,6 +510,11 @@ static int vmalloc_fault(unsigned long a
11314 unsigned long pgd_paddr;
11318 + /* Make sure we are in vmalloc area */
11319 + if (!(address >= VMALLOC_START && address < VMALLOC_END))
11323 * Synchronize this task's top level page-table
11324 * with the 'reference' page table.
11325 @@ -670,7 +675,7 @@ void __kprobes do_page_fault(struct pt_r
11326 #ifdef CONFIG_X86_32
11327 /* It's safe to allow irq's after cr2 has been saved and the vmalloc
11328 fault has been handled. */
11329 - if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
11330 + if (regs->flags & (X86_EFLAGS_IF | X86_VM_MASK))
11331 local_irq_enable();
11334 @@ -1017,9 +1022,5 @@ void vmalloc_sync_all(void)
11335 if (address == start)
11336 start = address + PGDIR_SIZE;
11338 - /* Check that there is no need to do the same for the modules area. */
11339 - BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
11340 - BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
11341 - (__START_KERNEL & PGDIR_MASK)));
11344 --- sle11-2009-05-14.orig/arch/x86/mm/highmem_32-xen.c 2009-03-16 16:33:40.000000000 +0100
11345 +++ sle11-2009-05-14/arch/x86/mm/highmem_32-xen.c 2009-03-16 16:38:05.000000000 +0100
11346 @@ -200,6 +200,8 @@ EXPORT_SYMBOL(kmap);
11347 EXPORT_SYMBOL(kunmap);
11348 EXPORT_SYMBOL(kmap_atomic);
11349 EXPORT_SYMBOL(kunmap_atomic);
11350 +#ifdef CONFIG_HIGHPTE
11351 EXPORT_SYMBOL(kmap_atomic_to_page);
11353 EXPORT_SYMBOL(clear_highpage);
11354 EXPORT_SYMBOL(copy_highpage);
11355 --- sle11-2009-05-14.orig/arch/x86/mm/init_32-xen.c 2009-03-16 16:33:40.000000000 +0100
11356 +++ sle11-2009-05-14/arch/x86/mm/init_32-xen.c 2009-03-16 16:38:05.000000000 +0100
11359 - * linux/arch/i386/mm/init.c
11361 * Copyright (C) 1995 Linus Torvalds
11364 #include <linux/init.h>
11365 #include <linux/highmem.h>
11366 #include <linux/pagemap.h>
11367 +#include <linux/pci.h>
11368 #include <linux/pfn.h>
11369 #include <linux/poison.h>
11370 #include <linux/bootmem.h>
11373 unsigned int __VMALLOC_RESERVE = 128 << 20;
11375 +unsigned long max_pfn_mapped;
11377 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
11378 unsigned long highstart_pfn, highend_pfn;
11380 @@ -73,7 +75,7 @@ static pmd_t * __init one_md_table_init(
11381 if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) {
11382 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
11384 - paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
11385 + paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
11386 make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
11387 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
11388 pud = pud_offset(pgd, 0);
11389 @@ -107,7 +109,7 @@ static pte_t * __init one_page_table_ini
11390 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
11393 - paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT);
11394 + paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
11395 make_lowmem_page_readonly(page_table,
11396 XENFEAT_writable_page_tables);
11397 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
11398 @@ -209,8 +211,13 @@ static void __init kernel_physical_mappi
11400 * Map with big pages if possible, otherwise
11401 * create normal page tables:
11403 + * Don't use a large page for the first 2/4MB of memory
11404 + * because there are often fixed size MTRRs in there
11405 + * and overlapping MTRRs into large pages can cause
11408 - if (cpu_has_pse) {
11409 + if (cpu_has_pse && !(pgd_idx == 0 && pmd_idx == 0)) {
11410 unsigned int addr2;
11411 pgprot_t prot = PAGE_KERNEL_LARGE;
11413 @@ -224,6 +231,7 @@ static void __init kernel_physical_mappi
11414 set_pmd(pmd, pfn_pmd(pfn, prot));
11416 pfn += PTRS_PER_PTE;
11417 + max_pfn_mapped = pfn;
11420 pte = one_page_table_init(pmd);
11421 @@ -241,6 +249,7 @@ static void __init kernel_physical_mappi
11423 set_pte(pte, pfn_pte(pfn, prot));
11425 + max_pfn_mapped = pfn;
11429 @@ -262,6 +271,25 @@ static inline int page_kills_ppro(unsign
11434 + * devmem_is_allowed() checks to see if /dev/mem access to a certain address
11435 + * is valid. The argument is a physical page number.
11438 + * On x86, access has to be given to the first megabyte of ram because that area
11439 + * contains bios code and data regions used by X and dosemu and similar apps.
11440 + * Access has to be given to non-kernel-ram areas as well, these contain the PCI
11441 + * mmio resources as well as potential bios/acpi data regions.
11443 +int devmem_is_allowed(unsigned long pagenr)
11445 + if (pagenr <= 256)
11447 + if (mfn_to_local_pfn(pagenr) >= max_pfn)
11452 #ifdef CONFIG_HIGHMEM
11454 pgprot_t kmap_prot;
11455 @@ -303,48 +331,18 @@ static void __init permanent_kmaps_init(
11456 pkmap_page_table = pte;
11459 -static void __meminit free_new_highpage(struct page *page, int pfn)
11461 - init_page_count(page);
11462 - if (pfn < xen_start_info->nr_pages)
11463 - __free_page(page);
11464 - totalhigh_pages++;
11467 void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
11469 if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
11470 ClearPageReserved(page);
11471 - free_new_highpage(page, pfn);
11472 + init_page_count(page);
11473 + if (pfn < xen_start_info->nr_pages)
11474 + __free_page(page);
11475 + totalhigh_pages++;
11477 SetPageReserved(page);
11480 -static int __meminit
11481 -add_one_highpage_hotplug(struct page *page, unsigned long pfn)
11483 - free_new_highpage(page, pfn);
11484 - totalram_pages++;
11485 -#ifdef CONFIG_FLATMEM
11486 - max_mapnr = max(pfn, max_mapnr);
11494 - * Not currently handling the NUMA case.
11495 - * Assuming single node and all memory that
11496 - * has been added dynamically that would be
11497 - * onlined here is in HIGHMEM.
11499 -void __meminit online_page(struct page *page)
11501 - ClearPageReserved(page);
11502 - add_one_highpage_hotplug(page, page_to_pfn(page));
11505 #ifndef CONFIG_NUMA
11506 static void __init set_highmem_pages_init(int bad_ppro)
11508 @@ -459,15 +457,13 @@ void zap_low_mappings(void)
11515 * Zap initial low-memory mappings.
11517 * Note that "pgd_clear()" doesn't do it for
11518 * us, because pgd_clear() is a no-op on i386.
11520 - for (i = 0; i < USER_PTRS_PER_PGD; i++) {
11521 + for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) {
11522 #if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
11523 set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
11525 @@ -572,9 +568,9 @@ void __init paging_init(void)
11528 * Test if the WP bit works in supervisor mode. It isn't supported on 386's
11529 - * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This
11530 - * used to involve black magic jumps to work around some nasty CPU bugs,
11531 - * but fortunately the switch to using exceptions got rid of all that.
11532 + * and also on some strange 486's. All 586+'s are OK. This used to involve
11533 + * black magic jumps to work around some nasty CPU bugs, but fortunately the
11534 + * switch to using exceptions got rid of all that.
11536 static void __init test_wp_bit(void)
11538 @@ -605,9 +601,7 @@ void __init mem_init(void)
11542 -#if defined(CONFIG_SWIOTLB)
11545 + pci_iommu_alloc();
11547 #ifdef CONFIG_FLATMEM
11549 @@ -710,16 +704,8 @@ void __init mem_init(void)
11555 - * Subtle. SMP is doing it's boot stuff late (because it has to
11556 - * fork idle threads) - but it also needs low mappings for the
11557 - * protected-mode entry to work. We zap these entries only after
11558 - * the WP-bit has been tested.
11560 -#ifndef CONFIG_SMP
11562 zap_low_mappings();
11565 SetPagePinned(virt_to_page(init_mm.pgd));
11567 @@ -769,25 +755,17 @@ void mark_rodata_ro(void)
11568 unsigned long start = PFN_ALIGN(_text);
11569 unsigned long size = PFN_ALIGN(_etext) - start;
11571 -#ifndef CONFIG_KPROBES
11572 -#ifdef CONFIG_HOTPLUG_CPU
11573 - /* It must still be possible to apply SMP alternatives. */
11574 - if (num_possible_cpus() <= 1)
11577 - set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
11578 - printk(KERN_INFO "Write protecting the kernel text: %luk\n",
11580 + set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
11581 + printk(KERN_INFO "Write protecting the kernel text: %luk\n",
11584 #ifdef CONFIG_CPA_DEBUG
11585 - printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
11586 - start, start+size);
11587 - set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
11588 + printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
11589 + start, start+size);
11590 + set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
11592 - printk(KERN_INFO "Testing CPA: write protecting again\n");
11593 - set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
11596 + printk(KERN_INFO "Testing CPA: write protecting again\n");
11597 + set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
11600 size = (unsigned long)__end_rodata - start;
11601 --- sle11-2009-05-14.orig/arch/x86/mm/init_64-xen.c 2009-03-16 16:33:40.000000000 +0100
11602 +++ sle11-2009-05-14/arch/x86/mm/init_64-xen.c 2009-03-16 16:38:05.000000000 +0100
11605 #include <xen/features.h>
11607 -const struct dma_mapping_ops *dma_ops;
11608 -EXPORT_SYMBOL(dma_ops);
11610 #if CONFIG_XEN_COMPAT <= 0x030002
11611 unsigned int __kernel_page_user;
11612 EXPORT_SYMBOL(__kernel_page_user);
11613 @@ -68,6 +65,28 @@ extern unsigned long start_pfn;
11614 extern pmd_t level2_fixmap_pgt[PTRS_PER_PMD];
11615 extern pte_t level1_fixmap_pgt[PTRS_PER_PTE];
11617 +#ifndef CONFIG_XEN
11618 +int direct_gbpages __meminitdata
11619 +#ifdef CONFIG_DIRECT_GBPAGES
11624 +static int __init parse_direct_gbpages_off(char *arg)
11626 + direct_gbpages = 0;
11629 +early_param("nogbpages", parse_direct_gbpages_off);
11631 +static int __init parse_direct_gbpages_on(char *arg)
11633 + direct_gbpages = 1;
11636 +early_param("gbpages", parse_direct_gbpages_on);
11640 * Use this until direct mapping is established, i.e. before __va() is
11641 * available in init_memory_mapping().
11642 @@ -135,9 +154,6 @@ void show_mem(void)
11644 printk(KERN_INFO "Mem-info:\n");
11646 - printk(KERN_INFO "Free swap: %6ldkB\n",
11647 - nr_swap_pages << (PAGE_SHIFT-10));
11649 for_each_online_pgdat(pgdat) {
11650 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
11652 @@ -328,7 +344,7 @@ void __init cleanup_highmap(void)
11653 pmd_t *last_pmd = pmd + PTRS_PER_PMD;
11655 for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) {
11656 - if (!pmd_present(*pmd))
11657 + if (pmd_none(*pmd))
11659 if (vaddr < (unsigned long) _text || vaddr > end)
11660 set_pmd(pmd, __pmd(0));
11661 @@ -337,8 +353,7 @@ void __init cleanup_highmap(void)
11664 /* NOTE: this is meant to be run only at boot */
11666 -__set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
11667 +void __init __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
11669 unsigned long address = __fix_to_virt(idx);
11671 @@ -463,7 +478,7 @@ __meminit void early_iounmap(void *addr,
11675 -static void __meminit
11676 +static unsigned long __meminit
11677 phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
11679 int i = pmd_index(address);
11680 @@ -503,21 +518,26 @@ phys_pmd_init(pmd_t *pmd_page, unsigned
11681 set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE));
11687 -static void __meminit
11688 +static unsigned long __meminit
11689 phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
11691 pmd_t *pmd = pmd_offset(pud, 0);
11692 + unsigned long last_map_addr;
11694 spin_lock(&init_mm.page_table_lock);
11695 - phys_pmd_init(pmd, address, end);
11696 + last_map_addr = phys_pmd_init(pmd, address, end);
11697 spin_unlock(&init_mm.page_table_lock);
11699 + return last_map_addr;
11702 -static void __meminit
11703 +static unsigned long __meminit
11704 phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
11706 + unsigned long last_map_addr = end;
11707 int i = pud_index(addr);
11709 for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) {
11710 @@ -529,7 +549,15 @@ phys_pud_init(pud_t *pud_page, unsigned
11713 if (__pud_val(*pud)) {
11714 - phys_pmd_update(pud, addr, end);
11715 + if (!pud_large(*pud))
11716 + last_map_addr = phys_pmd_update(pud, addr, end);
11720 + if (direct_gbpages) {
11721 + set_pte((pte_t *)pud,
11722 + pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
11723 + last_map_addr = (addr & PUD_MASK) + PUD_SIZE;
11727 @@ -537,12 +565,14 @@ phys_pud_init(pud_t *pud_page, unsigned
11729 spin_lock(&init_mm.page_table_lock);
11730 *pud = __pud(pmd_phys | _KERNPG_TABLE);
11731 - phys_pmd_init(pmd, addr, end);
11732 + last_map_addr = phys_pmd_init(pmd, addr, end);
11733 spin_unlock(&init_mm.page_table_lock);
11735 early_make_page_readonly(pmd, XENFEAT_writable_page_tables);
11739 + return last_map_addr >> PAGE_SHIFT;
11742 void __init xen_init_pt(void)
11743 @@ -754,16 +784,138 @@ static void __init xen_finish_init_mappi
11744 table_end = start_pfn;
11747 +static void __init init_gbpages(void)
11749 +#ifndef CONFIG_XEN
11750 + if (direct_gbpages && cpu_has_gbpages)
11751 + printk(KERN_INFO "Using GB pages for direct mapping\n");
11753 + direct_gbpages = 0;
11757 +#ifdef CONFIG_MEMTEST_BOOTPARAM
11759 +static void __init memtest(unsigned long start_phys, unsigned long size,
11760 + unsigned pattern)
11763 + unsigned long *start;
11764 + unsigned long start_bad;
11765 + unsigned long last_bad;
11766 + unsigned long val;
11767 + unsigned long start_phys_aligned;
11768 + unsigned long count;
11769 + unsigned long incr;
11771 + switch (pattern) {
11779 + val = 0x5555555555555555UL;
11782 + val = 0xaaaaaaaaaaaaaaaaUL;
11788 + incr = sizeof(unsigned long);
11789 + start_phys_aligned = ALIGN(start_phys, incr);
11790 + count = (size - (start_phys_aligned - start_phys))/incr;
11791 + start = __va(start_phys_aligned);
11795 + for (i = 0; i < count; i++)
11797 + for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
11798 + if (*start != val) {
11799 + if (start_phys_aligned == last_bad + incr) {
11800 + last_bad += incr;
11803 + printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved",
11804 + val, start_bad, last_bad + incr);
11805 + reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
11807 + start_bad = last_bad = start_phys_aligned;
11812 + printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved",
11813 + val, start_bad, last_bad + incr);
11814 + reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
11819 +static int memtest_pattern __initdata = CONFIG_MEMTEST_BOOTPARAM_VALUE;
11821 +static int __init parse_memtest(char *arg)
11824 + memtest_pattern = simple_strtoul(arg, NULL, 0);
11828 +early_param("memtest", parse_memtest);
11830 +static void __init early_memtest(unsigned long start, unsigned long end)
11832 + u64 t_start, t_size;
11833 + unsigned pattern;
11835 + if (!memtest_pattern)
11838 + printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
11839 + for (pattern = 0; pattern < memtest_pattern; pattern++) {
11842 + while (t_start < end) {
11843 + t_start = find_e820_area_size(t_start, &t_size, 1);
11846 + if (t_start >= end)
11848 + if (t_start + t_size > end)
11849 + t_size = end - t_start;
11851 + printk(KERN_CONT "\n %016llx - %016llx pattern %d",
11852 + (unsigned long long)t_start,
11853 + (unsigned long long)t_start + t_size, pattern);
11855 + memtest(t_start, t_size, pattern);
11857 + t_start += t_size;
11860 + printk(KERN_CONT "\n");
11863 +static void __init early_memtest(unsigned long start, unsigned long end)
11869 * Setup the direct mapping of the physical memory at PAGE_OFFSET.
11870 * This runs before bootmem is initialized and gets pages directly from
11871 * the physical memory. To access them they are temporarily mapped.
11873 -void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
11874 +unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long end)
11876 - unsigned long next;
11877 + unsigned long next, last_map_addr = end;
11878 + unsigned long start_phys = start, end_phys = end;
11880 - pr_debug("init_memory_mapping\n");
11881 + printk(KERN_INFO "init_memory_mapping\n");
11884 * Find space for the kernel direct mapping tables.
11885 @@ -772,8 +924,10 @@ void __init_refok init_memory_mapping(un
11886 * memory mapped. Unfortunately this is done currently before the
11887 * nodes are discovered.
11889 - if (!after_bootmem)
11890 + if (!after_bootmem) {
11892 find_early_table_space(end);
11895 start = (unsigned long)__va(start);
11896 end = (unsigned long)__va(end);
11897 @@ -790,7 +944,7 @@ void __init_refok init_memory_mapping(un
11898 next = start + PGDIR_SIZE;
11901 - phys_pud_init(pud, __pa(start), __pa(next));
11902 + last_map_addr = phys_pud_init(pud, __pa(start), __pa(next));
11903 if (!after_bootmem) {
11904 early_make_page_readonly(pud, XENFEAT_writable_page_tables);
11905 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
11906 @@ -807,6 +961,11 @@ void __init_refok init_memory_mapping(un
11907 if (!after_bootmem)
11908 reserve_early(table_start << PAGE_SHIFT,
11909 table_end << PAGE_SHIFT, "PGTABLE");
11911 + if (!after_bootmem)
11912 + early_memtest(start_phys, end_phys);
11914 + return last_map_addr;
11917 #ifndef CONFIG_NUMA
11918 @@ -830,15 +989,6 @@ void __init paging_init(void)
11920 * Memory hotplug specific functions
11922 -void online_page(struct page *page)
11924 - ClearPageReserved(page);
11925 - init_page_count(page);
11926 - __free_page(page);
11927 - totalram_pages++;
11931 #ifdef CONFIG_MEMORY_HOTPLUG
11933 * Memory is added always to NORMAL zone. This means you will never get
11934 @@ -848,11 +998,13 @@ int arch_add_memory(int nid, u64 start,
11936 struct pglist_data *pgdat = NODE_DATA(nid);
11937 struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
11938 - unsigned long start_pfn = start >> PAGE_SHIFT;
11939 + unsigned long last_mapped_pfn, start_pfn = start >> PAGE_SHIFT;
11940 unsigned long nr_pages = size >> PAGE_SHIFT;
11943 - init_memory_mapping(start, start + size-1);
11944 + last_mapped_pfn = init_memory_mapping(start, start + size-1);
11945 + if (last_mapped_pfn > max_pfn_mapped)
11946 + max_pfn_mapped = last_mapped_pfn;
11948 ret = __add_pages(zone, start_pfn, nr_pages);
11950 @@ -871,6 +1023,26 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to
11952 #endif /* CONFIG_MEMORY_HOTPLUG */
11955 + * devmem_is_allowed() checks to see if /dev/mem access to a certain address
11956 + * is valid. The argument is a physical page number.
11959 + * On x86, access has to be given to the first megabyte of ram because that area
11960 + * contains bios code and data regions used by X and dosemu and similar apps.
11961 + * Access has to be given to non-kernel-ram areas as well, these contain the PCI
11962 + * mmio resources as well as potential bios/acpi data regions.
11964 +int devmem_is_allowed(unsigned long pagenr)
11966 + if (pagenr <= 256)
11968 + if (mfn_to_local_pfn(pagenr) >= max_pfn)
11974 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
11975 kcore_modules, kcore_vsyscall;
11977 @@ -979,24 +1151,7 @@ EXPORT_SYMBOL_GPL(rodata_test_data);
11979 void mark_rodata_ro(void)
11981 - unsigned long start = (unsigned long)_stext, end;
11983 -#ifdef CONFIG_HOTPLUG_CPU
11984 - /* It must still be possible to apply SMP alternatives. */
11985 - if (num_possible_cpus() > 1)
11986 - start = (unsigned long)_etext;
11989 -#ifdef CONFIG_KPROBES
11990 - start = (unsigned long)__start_rodata;
11993 - end = (unsigned long)__end_rodata;
11994 - start = (start + PAGE_SIZE - 1) & PAGE_MASK;
11995 - end &= PAGE_MASK;
11996 - if (end <= start)
11999 + unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
12001 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
12002 (end - start) >> 10);
12003 @@ -1019,6 +1174,7 @@ void mark_rodata_ro(void)
12004 set_memory_ro(start, (end-start) >> PAGE_SHIFT);
12010 #ifdef CONFIG_BLK_DEV_INITRD
12011 @@ -1031,7 +1187,7 @@ void free_initrd_mem(unsigned long start
12012 void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
12015 - int nid = phys_to_nid(phys);
12016 + int nid, next_nid;
12018 unsigned long pfn = phys >> PAGE_SHIFT;
12020 @@ -1040,7 +1196,7 @@ void __init reserve_bootmem_generic(unsi
12021 * This can happen with kdump kernels when accessing
12024 - if (pfn < end_pfn_map)
12025 + if (pfn < max_pfn_mapped)
12028 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
12029 @@ -1050,10 +1206,16 @@ void __init reserve_bootmem_generic(unsi
12031 /* Should check here against the e820 map to avoid double free */
12033 - reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
12034 + nid = phys_to_nid(phys);
12035 + next_nid = phys_to_nid(phys + len - 1);
12036 + if (nid == next_nid)
12037 + reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
12039 + reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
12041 reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
12045 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
12046 dma_reserve += len / PAGE_SIZE;
12047 @@ -1149,6 +1311,10 @@ const char *arch_vma_name(struct vm_area
12049 * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
12051 +static long __meminitdata addr_start, addr_end;
12052 +static void __meminitdata *p_start, *p_end;
12053 +static int __meminitdata node_start;
12056 vmemmap_populate(struct page *start_page, unsigned long size, int node)
12058 @@ -1183,12 +1349,32 @@ vmemmap_populate(struct page *start_page
12059 PAGE_KERNEL_LARGE);
12060 set_pmd(pmd, __pmd_ma(__pte_val(entry)));
12062 - printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
12063 - addr, addr + PMD_SIZE - 1, p, node);
12064 + /* check to see if we have contiguous blocks */
12065 + if (p_end != p || node_start != node) {
12067 + printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
12068 + addr_start, addr_end-1, p_start, p_end-1, node_start);
12069 + addr_start = addr;
12070 + node_start = node;
12073 + addr_end = addr + PMD_SIZE;
12074 + p_end = p + PMD_SIZE;
12076 vmemmap_verify((pte_t *)pmd, node, addr, next);
12082 +void __meminit vmemmap_populate_print_last(void)
12085 + printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
12086 + addr_start, addr_end-1, p_start, p_end-1, node_start);
12093 --- sle11-2009-05-14.orig/arch/x86/mm/ioremap-xen.c 2009-03-16 16:33:40.000000000 +0100
12094 +++ sle11-2009-05-14/arch/x86/mm/ioremap-xen.c 2009-03-16 16:38:05.000000000 +0100
12095 @@ -20,14 +20,11 @@
12096 #include <asm/pgtable.h>
12097 #include <asm/tlbflush.h>
12098 #include <asm/pgalloc.h>
12099 +#include <asm/pat.h>
12101 -enum ioremap_mode {
12102 - IOR_MODE_UNCACHED,
12106 -#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
12107 +#ifdef CONFIG_X86_64
12109 +#ifndef CONFIG_XEN
12110 unsigned long __phys_addr(unsigned long x)
12112 if (x >= __START_KERNEL_map)
12113 @@ -35,6 +32,19 @@ unsigned long __phys_addr(unsigned long
12114 return x - PAGE_OFFSET;
12116 EXPORT_SYMBOL(__phys_addr);
12119 +static inline int phys_addr_valid(unsigned long addr)
12121 + return addr < (1UL << boot_cpu_data.x86_phys_bits);
12126 +static inline int phys_addr_valid(unsigned long addr)
12133 @@ -92,7 +102,8 @@ static int __direct_remap_pfn_range(stru
12134 * Fill in the machine address: PTE ptr is done later by
12135 * apply_to_page_range().
12137 - v->val = __pte_val(pfn_pte_ma(mfn, prot)) | _PAGE_IO;
12138 + pgprot_val(prot) |= _PAGE_IO;
12139 + v->val = __pte_val(pte_mkspecial(pfn_pte_ma(mfn, prot)));
12142 address += PAGE_SIZE;
12143 @@ -189,10 +200,9 @@ int touch_pte_range(struct mm_struct *mm
12145 EXPORT_SYMBOL(touch_pte_range);
12147 -#ifdef CONFIG_X86_32
12148 int page_is_ram(unsigned long pagenr)
12150 - unsigned long addr, end;
12151 + resource_size_t addr, end;
12155 @@ -228,31 +238,51 @@ int page_is_ram(unsigned long pagenr)
12162 * Fix up the linear direct mapping of the kernel to avoid cache attribute
12165 static int ioremap_change_attr(unsigned long vaddr, unsigned long size,
12166 - enum ioremap_mode mode)
12167 + unsigned long prot_val)
12169 unsigned long nrpages = size >> PAGE_SHIFT;
12173 - case IOR_MODE_UNCACHED:
12174 + switch (prot_val) {
12175 + case _PAGE_CACHE_UC:
12177 - err = set_memory_uc(vaddr, nrpages);
12178 + err = _set_memory_uc(vaddr, nrpages);
12180 + case _PAGE_CACHE_WC:
12181 + err = _set_memory_wc(vaddr, nrpages);
12183 - case IOR_MODE_CACHED:
12184 - err = set_memory_wb(vaddr, nrpages);
12185 + case _PAGE_CACHE_WB:
12186 + err = _set_memory_wb(vaddr, nrpages);
12193 +int ioremap_check_change_attr(unsigned long mfn, unsigned long size,
12194 + unsigned long prot_val)
12196 + unsigned long sz;
12199 + for (sz = rc = 0; sz < size && !rc; ++mfn, sz += PAGE_SIZE) {
12200 + unsigned long pfn = mfn_to_local_pfn(mfn);
12202 + if (pfn >= max_pfn_mapped)
12204 + rc = ioremap_change_attr((unsigned long)__va(pfn << PAGE_SHIFT),
12205 + PAGE_SIZE, prot_val);
12212 * Remap an arbitrary physical address space into the kernel virtual
12213 * address space. Needed when the kernel wants to access high addresses
12214 @@ -262,12 +292,15 @@ static int ioremap_change_attr(unsigned
12215 * have to convert them into an offset in a page-aligned mapping, but the
12216 * caller shouldn't need to know that small detail.
12218 -static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
12219 - enum ioremap_mode mode)
12220 +static void __iomem *__ioremap_caller(resource_size_t phys_addr,
12221 + unsigned long size, unsigned long prot_val, void *caller)
12223 - unsigned long mfn, offset, last_addr, vaddr;
12224 + unsigned long mfn, offset, vaddr;
12225 + resource_size_t last_addr;
12226 struct vm_struct *area;
12227 + unsigned long new_prot_val;
12230 domid_t domid = DOMID_IO;
12232 /* Don't allow wraparound or zero size */
12233 @@ -275,6 +308,13 @@ static void __iomem *__ioremap(resource_
12234 if (!size || last_addr < phys_addr)
12237 + if (!phys_addr_valid(phys_addr)) {
12238 + printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
12239 + (unsigned long long)phys_addr);
12245 * Don't remap the low PCI/ISA area, it's always mapped..
12247 @@ -287,55 +327,86 @@ static void __iomem *__ioremap(resource_
12248 for (mfn = PFN_DOWN(phys_addr); mfn < PFN_UP(last_addr); mfn++) {
12249 unsigned long pfn = mfn_to_local_pfn(mfn);
12251 - if (pfn >= max_pfn)
12253 + if (pfn_valid(pfn)) {
12254 + if (!PageReserved(pfn_to_page(pfn)))
12256 + domid = DOMID_SELF;
12259 + WARN_ON_ONCE(domid == DOMID_SELF);
12261 - domid = DOMID_SELF;
12263 + * Mappings have to be page-aligned
12265 + offset = phys_addr & ~PAGE_MASK;
12266 + phys_addr &= PAGE_MASK;
12267 + size = PAGE_ALIGN(last_addr+1) - phys_addr;
12269 - if (pfn >= max_pfn_mapped) /* bogus */
12271 + retval = reserve_memtype(phys_addr, phys_addr + size,
12272 + prot_val, &new_prot_val);
12274 + pr_debug("Warning: reserve_memtype returned %d\n", retval);
12278 - if (pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn)))
12279 + if (prot_val != new_prot_val) {
12281 + * Do not fallback to certain memory types with certain
12282 + * requested type:
12283 + * - request is uc-, return cannot be write-back
12284 + * - request is uc-, return cannot be write-combine
12285 + * - request is write-combine, return cannot be write-back
12287 + if ((prot_val == _PAGE_CACHE_UC_MINUS &&
12288 + (new_prot_val == _PAGE_CACHE_WB ||
12289 + new_prot_val == _PAGE_CACHE_WC)) ||
12290 + (prot_val == _PAGE_CACHE_WC &&
12291 + new_prot_val == _PAGE_CACHE_WB)) {
12293 + "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
12294 + (unsigned long long)phys_addr,
12295 + (unsigned long long)(phys_addr + size),
12296 + prot_val, new_prot_val);
12297 + free_memtype(phys_addr, phys_addr + size);
12300 + prot_val = new_prot_val;
12304 - case IOR_MODE_UNCACHED:
12305 + switch (prot_val) {
12306 + case _PAGE_CACHE_UC:
12309 - * FIXME: we will use UC MINUS for now, as video fb drivers
12310 - * depend on it. Upcoming ioremap_wc() will fix this behavior.
12312 + prot = PAGE_KERNEL_NOCACHE;
12314 + case _PAGE_CACHE_UC_MINUS:
12315 prot = PAGE_KERNEL_UC_MINUS;
12317 - case IOR_MODE_CACHED:
12318 + case _PAGE_CACHE_WC:
12319 + prot = PAGE_KERNEL_WC;
12321 + case _PAGE_CACHE_WB:
12322 prot = PAGE_KERNEL;
12327 - * Mappings have to be page-aligned
12329 - offset = phys_addr & ~PAGE_MASK;
12330 - phys_addr &= PAGE_MASK;
12331 - size = PAGE_ALIGN(last_addr+1) - phys_addr;
12336 - area = get_vm_area(size, VM_IOREMAP | (mode << 20));
12337 + area = get_vm_area_caller(size, VM_IOREMAP, caller);
12340 area->phys_addr = phys_addr;
12341 vaddr = (unsigned long) area->addr;
12342 if (__direct_remap_pfn_range(&init_mm, vaddr, PFN_DOWN(phys_addr),
12343 size, prot, domid)) {
12344 + free_memtype(phys_addr, phys_addr + size);
12345 free_vm_area(area);
12349 - if (ioremap_change_attr(vaddr, size, mode) < 0) {
12350 - iounmap((void __iomem *) vaddr);
12351 + if (ioremap_change_attr(vaddr, size, prot_val) < 0) {
12352 + free_memtype(phys_addr, phys_addr + size);
12353 + vunmap(area->addr);
12357 @@ -365,16 +436,72 @@ static void __iomem *__ioremap(resource_
12359 void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
12361 - return __ioremap(phys_addr, size, IOR_MODE_UNCACHED);
12363 + * Ideally, this should be:
12364 + * pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS;
12366 + * Till we fix all X drivers to use ioremap_wc(), we will use
12369 + unsigned long val = _PAGE_CACHE_UC_MINUS;
12371 + return __ioremap_caller(phys_addr, size, val,
12372 + __builtin_return_address(0));
12374 EXPORT_SYMBOL(ioremap_nocache);
12377 + * ioremap_wc - map memory into CPU space write combined
12378 + * @offset: bus address of the memory
12379 + * @size: size of the resource to map
12381 + * This version of ioremap ensures that the memory is marked write combining.
12382 + * Write combining allows faster writes to some hardware devices.
12384 + * Must be freed with iounmap.
12386 +void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
12388 + if (pat_wc_enabled)
12389 + return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
12390 + __builtin_return_address(0));
12392 + return ioremap_nocache(phys_addr, size);
12394 +EXPORT_SYMBOL(ioremap_wc);
12396 void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
12398 - return __ioremap(phys_addr, size, IOR_MODE_CACHED);
12399 + return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WB,
12400 + __builtin_return_address(0));
12402 EXPORT_SYMBOL(ioremap_cache);
12404 +#ifndef CONFIG_XEN
12405 +static void __iomem *ioremap_default(resource_size_t phys_addr,
12406 + unsigned long size)
12408 + unsigned long flags;
12413 + * - WB for WB-able memory and no other conflicting mappings
12414 + * - UC_MINUS for non-WB-able memory with no other conflicting mappings
12415 + * - Inherit from confliting mappings otherwise
12417 + err = reserve_memtype(phys_addr, phys_addr + size, -1, &flags);
12421 + ret = (void *) __ioremap_caller(phys_addr, size, flags,
12422 + __builtin_return_address(0));
12424 + free_memtype(phys_addr, phys_addr + size);
12425 + return (void __iomem *)ret;
12430 * iounmap - Free a IO remapping
12431 * @addr: virtual address from ioremap_*
12432 @@ -417,15 +544,7 @@ void iounmap(volatile void __iomem *addr
12436 - if ((p->flags >> 20) != IOR_MODE_CACHED) {
12437 - unsigned long n = get_vm_area_size(p) >> PAGE_SHIFT;
12438 - unsigned long mfn = p->phys_addr;
12439 - unsigned long va = (unsigned long)addr;
12441 - for (; n > 0; n--, mfn++, va += PAGE_SIZE)
12442 - if (mfn_to_local_pfn(mfn) < max_pfn)
12443 - set_memory_wb(va, 1);
12445 + free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
12447 /* Finally remove it */
12448 o = remove_vm_area((void *)addr);
12449 @@ -434,6 +553,37 @@ void iounmap(volatile void __iomem *addr
12451 EXPORT_SYMBOL(iounmap);
12453 +#ifndef CONFIG_XEN
12455 + * Convert a physical pointer to a virtual kernel pointer for /dev/mem
12458 +void *xlate_dev_mem_ptr(unsigned long phys)
12461 + unsigned long start = phys & PAGE_MASK;
12463 + /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
12464 + if (page_is_ram(start >> PAGE_SHIFT))
12465 + return __va(phys);
12467 + addr = (void *)ioremap_default(start, PAGE_SIZE);
12469 + addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
12474 +void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
12476 + if (page_is_ram(phys >> PAGE_SHIFT))
12479 + iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
12484 int __initdata early_ioremap_debug;
12486 static int __init early_ioremap_debug_setup(char *str)
12487 @@ -445,8 +595,8 @@ static int __init early_ioremap_debug_se
12488 early_param("early_ioremap_debug", early_ioremap_debug_setup);
12490 static __initdata int after_paging_init;
12491 -static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
12492 - __attribute__((aligned(PAGE_SIZE)));
12493 +static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
12494 + __section(.bss.page_aligned);
12496 #ifdef CONFIG_X86_32
12497 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
12498 @@ -461,8 +611,8 @@ static inline pmd_t * __init early_iorem
12501 #define early_ioremap_pmd early_get_pmd
12502 +#undef make_lowmem_page_readonly
12503 #define make_lowmem_page_readonly early_make_page_readonly
12504 -#define make_lowmem_page_writable make_page_writable
12507 static inline pte_t * __init early_ioremap_pte(unsigned long addr)
12508 @@ -512,7 +662,7 @@ void __init early_ioremap_clear(void)
12509 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
12511 make_lowmem_page_writable(bm_pte, XENFEAT_writable_page_tables);
12512 - /* paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT); */
12513 + /* paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT); */
12517 @@ -654,10 +804,11 @@ void __init early_iounmap(void *addr, un
12518 unsigned long offset;
12519 unsigned int nrpages;
12520 enum fixed_addresses idx;
12521 - unsigned int nesting;
12524 nesting = --early_ioremap_nested;
12525 - WARN_ON(nesting < 0);
12526 + if (WARN_ON(nesting < 0))
12529 if (early_ioremap_debug) {
12530 printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr,
12531 --- sle11-2009-05-14.orig/arch/x86/mm/pageattr-xen.c 2009-03-16 16:37:14.000000000 +0100
12532 +++ sle11-2009-05-14/arch/x86/mm/pageattr-xen.c 2009-03-16 16:38:05.000000000 +0100
12534 #include <linux/slab.h>
12535 #include <linux/mm.h>
12536 #include <linux/interrupt.h>
12537 +#include <linux/seq_file.h>
12538 +#include <linux/debugfs.h>
12540 #include <asm/e820.h>
12541 #include <asm/processor.h>
12542 @@ -17,370 +19,7 @@
12543 #include <asm/uaccess.h>
12544 #include <asm/pgalloc.h>
12545 #include <asm/proto.h>
12546 -#include <asm/mmu_context.h>
12548 -#ifndef CONFIG_X86_64
12549 -#define TASK_SIZE64 TASK_SIZE
12552 -static void _pin_lock(struct mm_struct *mm, int lock) {
12554 - spin_lock(&mm->page_table_lock);
12555 -#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
12556 - /* While mm->page_table_lock protects us against insertions and
12557 - * removals of higher level page table pages, it doesn't protect
12558 - * against updates of pte-s. Such updates, however, require the
12559 - * pte pages to be in consistent state (unpinned+writable or
12560 - * pinned+readonly). The pinning and attribute changes, however
12561 - * cannot be done atomically, which is why such updates must be
12562 - * prevented from happening concurrently.
12563 - * Note that no pte lock can ever elsewhere be acquired nesting
12564 - * with an already acquired one in the same mm, or with the mm's
12565 - * page_table_lock already acquired, as that would break in the
12566 - * non-split case (where all these are actually resolving to the
12567 - * one page_table_lock). Thus acquiring all of them here is not
12568 - * going to result in dead locks, and the order of acquires
12569 - * doesn't matter.
12572 - pgd_t *pgd = mm->pgd;
12575 - for (g = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
12579 - if (pgd_none(*pgd))
12581 - pud = pud_offset(pgd, 0);
12582 - for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
12586 - if (pud_none(*pud))
12588 - pmd = pmd_offset(pud, 0);
12589 - for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
12592 - if (pmd_none(*pmd))
12594 - ptl = pte_lockptr(0, pmd);
12598 - spin_unlock(ptl);
12605 - spin_unlock(&mm->page_table_lock);
12607 -#define pin_lock(mm) _pin_lock(mm, 1)
12608 -#define pin_unlock(mm) _pin_lock(mm, 0)
12610 -#define PIN_BATCH sizeof(void *)
12611 -static DEFINE_PER_CPU(multicall_entry_t[PIN_BATCH], pb_mcl);
12613 -static inline unsigned int pgd_walk_set_prot(struct page *page, pgprot_t flags,
12614 - unsigned int cpu, unsigned int seq)
12616 - unsigned long pfn = page_to_pfn(page);
12618 - if (PageHighMem(page)) {
12619 - if (pgprot_val(flags) & _PAGE_RW)
12620 - ClearPagePinned(page);
12622 - SetPagePinned(page);
12624 - MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq,
12625 - (unsigned long)__va(pfn << PAGE_SHIFT),
12626 - pfn_pte(pfn, flags), 0);
12627 - if (unlikely(++seq == PIN_BATCH)) {
12628 - if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu),
12629 - PIN_BATCH, NULL)))
12638 -static void pgd_walk(pgd_t *pgd_base, pgprot_t flags)
12640 - pgd_t *pgd = pgd_base;
12644 - unsigned int cpu, seq;
12645 - multicall_entry_t *mcl;
12647 - if (xen_feature(XENFEAT_auto_translated_physmap))
12653 - * Cannot iterate up to USER_PTRS_PER_PGD on x86-64 as these pagetables
12654 - * may not be the 'current' task's pagetables (e.g., current may be
12655 - * 32-bit, but the pagetables may be for a 64-bit task).
12656 - * Subtracting 1 from TASK_SIZE64 means the loop limit is correct
12657 - * regardless of whether TASK_SIZE64 is a multiple of PGDIR_SIZE.
12659 - for (g = 0, seq = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
12660 - if (pgd_none(*pgd))
12662 - pud = pud_offset(pgd, 0);
12663 - if (PTRS_PER_PUD > 1) /* not folded */
12664 - seq = pgd_walk_set_prot(virt_to_page(pud),flags,cpu,seq);
12665 - for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
12666 - if (pud_none(*pud))
12668 - pmd = pmd_offset(pud, 0);
12669 - if (PTRS_PER_PMD > 1) /* not folded */
12670 - seq = pgd_walk_set_prot(virt_to_page(pmd),flags,cpu,seq);
12671 - for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
12672 - if (pmd_none(*pmd))
12674 - seq = pgd_walk_set_prot(pmd_page(*pmd),flags,cpu,seq);
12679 - mcl = per_cpu(pb_mcl, cpu);
12680 -#ifdef CONFIG_X86_64
12681 - if (unlikely(seq > PIN_BATCH - 2)) {
12682 - if (unlikely(HYPERVISOR_multicall_check(mcl, seq, NULL)))
12686 - MULTI_update_va_mapping(mcl + seq,
12687 - (unsigned long)__user_pgd(pgd_base),
12688 - pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags),
12690 - MULTI_update_va_mapping(mcl + seq + 1,
12691 - (unsigned long)pgd_base,
12692 - pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
12694 - if (unlikely(HYPERVISOR_multicall_check(mcl, seq + 2, NULL)))
12697 - if (likely(seq != 0)) {
12698 - MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq,
12699 - (unsigned long)pgd_base,
12700 - pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
12702 - if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu),
12705 - } else if(HYPERVISOR_update_va_mapping((unsigned long)pgd_base,
12706 - pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
12714 -static void __pgd_pin(pgd_t *pgd)
12716 - pgd_walk(pgd, PAGE_KERNEL_RO);
12717 - kmap_flush_unused();
12718 - xen_pgd_pin(__pa(pgd)); /* kernel */
12719 -#ifdef CONFIG_X86_64
12720 - xen_pgd_pin(__pa(__user_pgd(pgd))); /* user */
12722 - SetPagePinned(virt_to_page(pgd));
12725 -static void __pgd_unpin(pgd_t *pgd)
12727 - xen_pgd_unpin(__pa(pgd));
12728 -#ifdef CONFIG_X86_64
12729 - xen_pgd_unpin(__pa(__user_pgd(pgd)));
12731 - pgd_walk(pgd, PAGE_KERNEL);
12732 - ClearPagePinned(virt_to_page(pgd));
12735 -void pgd_test_and_unpin(pgd_t *pgd)
12737 - if (PagePinned(virt_to_page(pgd)))
12738 - __pgd_unpin(pgd);
12741 -void mm_pin(struct mm_struct *mm)
12743 - if (xen_feature(XENFEAT_writable_page_tables))
12747 - __pgd_pin(mm->pgd);
12751 -void mm_unpin(struct mm_struct *mm)
12753 - if (xen_feature(XENFEAT_writable_page_tables))
12757 - __pgd_unpin(mm->pgd);
12761 -void mm_pin_all(void)
12763 - struct page *page;
12764 - unsigned long flags;
12766 - if (xen_feature(XENFEAT_writable_page_tables))
12770 - * Allow uninterrupted access to the pgd_list. Also protects
12771 - * __pgd_pin() by disabling preemption.
12772 - * All other CPUs must be at a safe point (e.g., in stop_machine
12773 - * or offlined entirely).
12775 - spin_lock_irqsave(&pgd_lock, flags);
12776 - list_for_each_entry(page, &pgd_list, lru) {
12777 - if (!PagePinned(page))
12778 - __pgd_pin((pgd_t *)page_address(page));
12780 - spin_unlock_irqrestore(&pgd_lock, flags);
12783 -void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
12785 - if (!PagePinned(virt_to_page(mm->pgd)))
12789 -void arch_exit_mmap(struct mm_struct *mm)
12791 - struct task_struct *tsk = current;
12796 - * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
12797 - * *much* faster this way, as no tlb flushes means bigger wrpt batches.
12799 - if (tsk->active_mm == mm) {
12800 - tsk->active_mm = &init_mm;
12801 - atomic_inc(&init_mm.mm_count);
12803 - switch_mm(mm, &init_mm, tsk);
12805 - atomic_dec(&mm->mm_count);
12806 - BUG_ON(atomic_read(&mm->mm_count) == 0);
12809 - task_unlock(tsk);
12811 - if (PagePinned(virt_to_page(mm->pgd))
12812 - && atomic_read(&mm->mm_count) == 1
12813 - && !mm->context.has_foreign_mappings)
12817 -static void _pte_free(struct page *page, unsigned int order)
12820 - __pte_free(page);
12823 -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
12825 - struct page *pte;
12827 -#ifdef CONFIG_HIGHPTE
12828 - pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
12830 - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
12833 - pgtable_page_ctor(pte);
12834 - SetPageForeign(pte, _pte_free);
12835 - init_page_count(pte);
12840 -void __pte_free(pgtable_t pte)
12842 - if (!PageHighMem(pte)) {
12843 - unsigned long va = (unsigned long)page_address(pte);
12844 - unsigned int level;
12845 - pte_t *ptep = lookup_address(va, &level);
12847 - BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep));
12848 - if (!pte_write(*ptep)
12849 - && HYPERVISOR_update_va_mapping(va,
12850 - mk_pte(pte, PAGE_KERNEL),
12854 -#ifdef CONFIG_HIGHPTE
12855 - ClearPagePinned(pte);
12860 - ClearPageForeign(pte);
12861 - init_page_count(pte);
12862 - pgtable_page_dtor(pte);
12863 - __free_page(pte);
12866 -#if PAGETABLE_LEVELS >= 3
12867 -static void _pmd_free(struct page *page, unsigned int order)
12870 - __pmd_free(page);
12873 -pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
12875 - struct page *pmd;
12877 - pmd = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
12880 - SetPageForeign(pmd, _pmd_free);
12881 - init_page_count(pmd);
12882 - return page_address(pmd);
12885 -void __pmd_free(pgtable_t pmd)
12887 - unsigned long va = (unsigned long)page_address(pmd);
12888 - unsigned int level;
12889 - pte_t *ptep = lookup_address(va, &level);
12891 - BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep));
12892 - if (!pte_write(*ptep)
12893 - && HYPERVISOR_update_va_mapping(va, mk_pte(pmd, PAGE_KERNEL), 0))
12896 - ClearPageForeign(pmd);
12897 - init_page_count(pmd);
12898 - __free_page(pmd);
12902 -/* blktap and gntdev need this, as otherwise they would implicitly (and
12903 - * needlessly, as they never use it) reference init_mm. */
12904 -pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *vma,
12905 - unsigned long addr, pte_t *ptep, int full)
12907 - return ptep_get_and_clear_full(vma->vm_mm, addr, ptep, full);
12909 -EXPORT_SYMBOL_GPL(xen_ptep_get_and_clear_full);
12910 +#include <asm/pat.h>
12913 * The current flushing context - we pass it instead of 5 arguments:
12914 @@ -392,6 +31,7 @@ struct cpa_data {
12918 + unsigned force_split : 1;
12921 #ifdef CONFIG_X86_64
12922 @@ -637,6 +277,9 @@ try_preserve_large_page(pte_t *kpte, uns
12923 int i, do_split = 1;
12924 unsigned int level;
12926 + if (cpa->force_split)
12929 spin_lock_irqsave(&pgd_lock, flags);
12931 * Check for races, another CPU might have split this page
12932 @@ -856,9 +499,7 @@ static int split_large_page(pte_t *kpte,
12935 pbase = (pte_t *)page_address(base);
12936 -#ifdef CONFIG_X86_32
12937 - paravirt_alloc_pt(&init_mm, page_to_pfn(base));
12939 + paravirt_alloc_pte(&init_mm, page_to_pfn(base));
12940 ref_prot = pte_pgprot(pte_clrhuge(*kpte));
12942 #ifdef CONFIG_X86_64
12943 @@ -919,7 +560,7 @@ static int __change_page_attr(struct cpa
12945 kpte = lookup_address(address, &level);
12947 - return primary ? -EINVAL : 0;
12951 if (!__pte_val(old_pte)) {
12952 @@ -1078,7 +719,8 @@ static inline int cache_attr(pgprot_t at
12955 static int change_page_attr_set_clr(unsigned long addr, int numpages,
12956 - pgprot_t mask_set, pgprot_t mask_clr)
12957 + pgprot_t mask_set, pgprot_t mask_clr,
12960 struct cpa_data cpa;
12961 int ret, cache, checkalias;
12962 @@ -1089,7 +731,7 @@ static int change_page_attr_set_clr(unsi
12964 mask_set = canon_pgprot(mask_set);
12965 mask_clr = canon_pgprot(mask_clr);
12966 - if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
12967 + if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split)
12970 /* Ensure we are PAGE_SIZE aligned */
12971 @@ -1106,6 +748,7 @@ static int change_page_attr_set_clr(unsi
12972 cpa.mask_set = mask_set;
12973 cpa.mask_clr = mask_clr;
12975 + cpa.force_split = force_split;
12977 /* No alias checking for _NX bit modifications */
12978 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
12979 @@ -1144,26 +787,67 @@ out:
12980 static inline int change_page_attr_set(unsigned long addr, int numpages,
12983 - return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
12984 + return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0);
12987 static inline int change_page_attr_clear(unsigned long addr, int numpages,
12990 - return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
12991 + return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0);
12994 -int set_memory_uc(unsigned long addr, int numpages)
12995 +int _set_memory_uc(unsigned long addr, int numpages)
12998 + * for now UC MINUS. see comments in ioremap_nocache()
13000 return change_page_attr_set(addr, numpages,
13001 - __pgprot(_PAGE_PCD));
13002 + __pgprot(_PAGE_CACHE_UC_MINUS));
13005 +int set_memory_uc(unsigned long addr, int numpages)
13008 + * for now UC MINUS. see comments in ioremap_nocache()
13010 + if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
13011 + _PAGE_CACHE_UC_MINUS, NULL))
13014 + return _set_memory_uc(addr, numpages);
13016 EXPORT_SYMBOL(set_memory_uc);
13018 -int set_memory_wb(unsigned long addr, int numpages)
13019 +int _set_memory_wc(unsigned long addr, int numpages)
13021 + return change_page_attr_set(addr, numpages,
13022 + __pgprot(_PAGE_CACHE_WC));
13025 +int set_memory_wc(unsigned long addr, int numpages)
13027 + if (!pat_wc_enabled)
13028 + return set_memory_uc(addr, numpages);
13030 + if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
13031 + _PAGE_CACHE_WC, NULL))
13034 + return _set_memory_wc(addr, numpages);
13036 +EXPORT_SYMBOL(set_memory_wc);
13038 +int _set_memory_wb(unsigned long addr, int numpages)
13040 return change_page_attr_clear(addr, numpages,
13041 - __pgprot(_PAGE_PCD | _PAGE_PWT));
13042 + __pgprot(_PAGE_CACHE_MASK));
13045 +int set_memory_wb(unsigned long addr, int numpages)
13047 + free_memtype(addr, addr + numpages * PAGE_SIZE);
13049 + return _set_memory_wb(addr, numpages);
13051 EXPORT_SYMBOL(set_memory_wb);
13053 @@ -1194,6 +878,12 @@ int set_memory_np(unsigned long addr, in
13054 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
13057 +int set_memory_4k(unsigned long addr, int numpages)
13059 + return change_page_attr_set_clr(addr, numpages, __pgprot(0),
13063 int set_pages_uc(struct page *page, int numpages)
13065 unsigned long addr = (unsigned long)page_address(page);
13066 @@ -1303,6 +993,45 @@ void kernel_map_pages(struct page *page,
13067 cpa_fill_pool(NULL);
13070 +#ifdef CONFIG_DEBUG_FS
13071 +static int dpa_show(struct seq_file *m, void *v)
13073 + seq_puts(m, "DEBUG_PAGEALLOC\n");
13074 + seq_printf(m, "pool_size : %lu\n", pool_size);
13075 + seq_printf(m, "pool_pages : %lu\n", pool_pages);
13076 + seq_printf(m, "pool_low : %lu\n", pool_low);
13077 + seq_printf(m, "pool_used : %lu\n", pool_used);
13078 + seq_printf(m, "pool_failed : %lu\n", pool_failed);
13083 +static int dpa_open(struct inode *inode, struct file *filp)
13085 + return single_open(filp, dpa_show, NULL);
13088 +static const struct file_operations dpa_fops = {
13089 + .open = dpa_open,
13090 + .read = seq_read,
13091 + .llseek = seq_lseek,
13092 + .release = single_release,
13095 +static int __init debug_pagealloc_proc_init(void)
13097 + struct dentry *de;
13099 + de = debugfs_create_file("debug_pagealloc", 0600, NULL, NULL,
13106 +__initcall(debug_pagealloc_proc_init);
13109 #ifdef CONFIG_HIBERNATION
13111 bool kernel_page_present(struct page *page)
13112 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
13113 +++ sle11-2009-05-14/arch/x86/mm/pat-xen.c 2009-03-16 16:38:05.000000000 +0100
13116 + * Handle caching attributes in page tables (PAT)
13118 + * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
13119 + * Suresh B Siddha <suresh.b.siddha@intel.com>
13121 + * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
13124 +#include <linux/mm.h>
13125 +#include <linux/kernel.h>
13126 +#include <linux/gfp.h>
13127 +#include <linux/fs.h>
13128 +#include <linux/bootmem.h>
13130 +#include <asm/msr.h>
13131 +#include <asm/tlbflush.h>
13132 +#include <asm/processor.h>
13133 +#include <asm/page.h>
13134 +#include <asm/pgtable.h>
13135 +#include <asm/pat.h>
13136 +#include <asm/e820.h>
13137 +#include <asm/cacheflush.h>
13138 +#include <asm/fcntl.h>
13139 +#include <asm/mtrr.h>
13140 +#include <asm/io.h>
13142 +#ifdef CONFIG_X86_PAT
13143 +int __read_mostly pat_wc_enabled = 1;
13145 +void __cpuinit pat_disable(char *reason)
13147 + pat_wc_enabled = 0;
13148 + printk(KERN_INFO "%s\n", reason);
13151 +static int __init nopat(char *str)
13153 + pat_disable("PAT support disabled.");
13156 +early_param("nopat", nopat);
13159 +static u64 __read_mostly boot_pat_state;
13162 + PAT_UC = 0, /* uncached */
13163 + PAT_WC = 1, /* Write combining */
13164 + PAT_WT = 4, /* Write Through */
13165 + PAT_WP = 5, /* Write Protected */
13166 + PAT_WB = 6, /* Write Back (default) */
13167 + PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */
13170 +#define PAT(x,y) ((u64)PAT_ ## y << ((x)*8))
13172 +void pat_init(void)
13176 + if (!pat_wc_enabled)
13179 + /* Paranoia check. */
13180 + if (!cpu_has_pat) {
13181 + printk(KERN_ERR "PAT enabled, but CPU feature cleared\n");
13183 + * Panic if this happens on the secondary CPU, and we
13184 + * switched to PAT on the boot CPU. We have no way to
13187 + BUG_ON(boot_pat_state);
13190 +#ifndef CONFIG_XEN
13191 + /* Set PWT to Write-Combining. All other bits stay the same */
13193 + * PTE encoding used in Linux:
13198 + * 000 WB _PAGE_CACHE_WB
13199 + * 001 WC _PAGE_CACHE_WC
13200 + * 010 UC- _PAGE_CACHE_UC_MINUS
13201 + * 011 UC _PAGE_CACHE_UC
13204 + pat = PAT(0,WB) | PAT(1,WC) | PAT(2,UC_MINUS) | PAT(3,UC) |
13205 + PAT(4,WB) | PAT(5,WC) | PAT(6,UC_MINUS) | PAT(7,UC);
13207 + /* Boot CPU check */
13208 + if (!boot_pat_state)
13209 + rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
13211 + wrmsrl(MSR_IA32_CR_PAT, pat);
13214 + * PAT settings are part of the hypervisor interface, and their
13215 + * assignment cannot be changed.
13217 + rdmsrl(MSR_IA32_CR_PAT, pat);
13218 + if (!boot_pat_state)
13219 + boot_pat_state = pat;
13221 + printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
13222 + smp_processor_id(), boot_pat_state, pat);
13227 +static char *cattr_name(unsigned long flags)
13229 + switch (flags & _PAGE_CACHE_MASK) {
13230 + case _PAGE_CACHE_UC: return "uncached";
13231 + case _PAGE_CACHE_UC_MINUS: return "uncached-minus";
13232 + case _PAGE_CACHE_WB: return "write-back";
13233 + case _PAGE_CACHE_WC: return "write-combining";
13234 + case _PAGE_CACHE_WP: return "write-protected";
13235 + case _PAGE_CACHE_WT: return "write-through";
13236 + default: return "broken";
13241 + * The global memtype list keeps track of memory type for specific
13242 + * physical memory areas. Conflicting memory types in different
13243 + * mappings can cause CPU cache corruption. To avoid this we keep track.
13245 + * The list is sorted based on starting address and can contain multiple
13246 + * entries for each address (this allows reference counting for overlapping
13247 + * areas). All the aliases have the same cache attributes of course.
13248 + * Zero attributes are represented as holes.
13250 + * Currently the data structure is a list because the number of mappings
13251 + * are expected to be relatively small. If this should be a problem
13252 + * it could be changed to a rbtree or similar.
13254 + * memtype_lock protects the whole list.
13260 + unsigned long type;
13261 + struct list_head nd;
13264 +static LIST_HEAD(memtype_list);
13265 +static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
13268 + * Does intersection of PAT memory type and MTRR memory type and returns
13269 + * the resulting memory type as PAT understands it.
13270 + * (Type in pat and mtrr will not have same value)
13271 + * The intersection is based on "Effective Memory Type" tables in IA-32
13274 +static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot,
13275 + unsigned long *ret_prot)
13277 + unsigned long pat_type;
13280 + pat_type = prot & _PAGE_CACHE_MASK;
13281 + prot &= (~_PAGE_CACHE_MASK);
13284 + * We return the PAT request directly for types where PAT takes
13285 + * precedence with respect to MTRR and for UC_MINUS.
13286 + * Consistency checks with other PAT requests is done later
13287 + * while going through memtype list.
13289 + if (pat_type == _PAGE_CACHE_WC) {
13290 + *ret_prot = prot | _PAGE_CACHE_WC;
13292 + } else if (pat_type == _PAGE_CACHE_UC_MINUS) {
13293 + *ret_prot = prot | _PAGE_CACHE_UC_MINUS;
13295 + } else if (pat_type == _PAGE_CACHE_UC) {
13296 + *ret_prot = prot | _PAGE_CACHE_UC;
13301 + * Look for MTRR hint to get the effective type in case where PAT
13302 + * request is for WB.
13304 + mtrr_type = mtrr_type_lookup(start, end);
13306 + if (mtrr_type == MTRR_TYPE_UNCACHABLE) {
13307 + *ret_prot = prot | _PAGE_CACHE_UC;
13308 + } else if (mtrr_type == MTRR_TYPE_WRCOMB) {
13309 + *ret_prot = prot | _PAGE_CACHE_WC;
13311 + *ret_prot = prot | _PAGE_CACHE_WB;
13318 + * req_type typically has one of the:
13319 + * - _PAGE_CACHE_WB
13320 + * - _PAGE_CACHE_WC
13321 + * - _PAGE_CACHE_UC_MINUS
13322 + * - _PAGE_CACHE_UC
13324 + * req_type will have a special case value '-1', when requester want to inherit
13325 + * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS.
13327 + * If ret_type is NULL, function will return an error if it cannot reserve the
13328 + * region with req_type. If ret_type is non-null, function will return
13329 + * available type in ret_type in case of no error. In case of any error
13330 + * it will return a negative return value.
13332 +int reserve_memtype(u64 start, u64 end, unsigned long req_type,
13333 + unsigned long *ret_type)
13335 + struct memtype *new_entry = NULL;
13336 + struct memtype *parse;
13337 + unsigned long actual_type;
13340 + /* Only track when pat_wc_enabled */
13341 + if (!pat_wc_enabled) {
13342 + /* This is identical to page table setting without PAT */
13344 + if (req_type == -1) {
13345 + *ret_type = _PAGE_CACHE_WB;
13347 + *ret_type = req_type;
13353 + /* Low ISA region is always mapped WB in page table. No need to track */
13354 + if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) {
13356 + *ret_type = _PAGE_CACHE_WB;
13361 + if (req_type == -1) {
13363 + * Call mtrr_lookup to get the type hint. This is an
13364 + * optimization for /dev/mem mmap'ers into WB memory (BIOS
13365 + * tools and ACPI tools). Use WB request for WB memory and use
13366 + * UC_MINUS otherwise.
13368 + u8 mtrr_type = mtrr_type_lookup(start, end);
13370 + if (mtrr_type == MTRR_TYPE_WRBACK) {
13371 + req_type = _PAGE_CACHE_WB;
13372 + actual_type = _PAGE_CACHE_WB;
13374 + req_type = _PAGE_CACHE_UC_MINUS;
13375 + actual_type = _PAGE_CACHE_UC_MINUS;
13378 + req_type &= _PAGE_CACHE_MASK;
13379 + err = pat_x_mtrr_type(start, end, req_type, &actual_type);
13384 + *ret_type = actual_type;
13389 + new_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL);
13393 + new_entry->start = start;
13394 + new_entry->end = end;
13395 + new_entry->type = actual_type;
13398 + *ret_type = actual_type;
13400 + spin_lock(&memtype_lock);
13402 + /* Search for existing mapping that overlaps the current range */
13403 + list_for_each_entry(parse, &memtype_list, nd) {
13404 + struct memtype *saved_ptr;
13406 + if (parse->start >= end) {
13407 + pr_debug("New Entry\n");
13408 + list_add(&new_entry->nd, parse->nd.prev);
13409 + new_entry = NULL;
13413 + if (start <= parse->start && end >= parse->start) {
13414 + if (actual_type != parse->type && ret_type) {
13415 + actual_type = parse->type;
13416 + *ret_type = actual_type;
13417 + new_entry->type = actual_type;
13420 + if (actual_type != parse->type) {
13422 + KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
13423 + current->comm, current->pid,
13425 + cattr_name(actual_type),
13426 + cattr_name(parse->type));
13431 + saved_ptr = parse;
13433 + * Check to see whether the request overlaps more
13434 + * than one entry in the list
13436 + list_for_each_entry_continue(parse, &memtype_list, nd) {
13437 + if (end <= parse->start) {
13441 + if (actual_type != parse->type) {
13443 + KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
13444 + current->comm, current->pid,
13446 + cattr_name(actual_type),
13447 + cattr_name(parse->type));
13457 + pr_debug("Overlap at 0x%Lx-0x%Lx\n",
13458 + saved_ptr->start, saved_ptr->end);
13459 + /* No conflict. Go ahead and add this new entry */
13460 + list_add(&new_entry->nd, saved_ptr->nd.prev);
13461 + new_entry = NULL;
13465 + if (start < parse->end) {
13466 + if (actual_type != parse->type && ret_type) {
13467 + actual_type = parse->type;
13468 + *ret_type = actual_type;
13469 + new_entry->type = actual_type;
13472 + if (actual_type != parse->type) {
13474 + KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
13475 + current->comm, current->pid,
13477 + cattr_name(actual_type),
13478 + cattr_name(parse->type));
13483 + saved_ptr = parse;
13485 + * Check to see whether the request overlaps more
13486 + * than one entry in the list
13488 + list_for_each_entry_continue(parse, &memtype_list, nd) {
13489 + if (end <= parse->start) {
13493 + if (actual_type != parse->type) {
13495 + KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
13496 + current->comm, current->pid,
13498 + cattr_name(actual_type),
13499 + cattr_name(parse->type));
13509 + pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n",
13510 + saved_ptr->start, saved_ptr->end);
13511 + /* No conflict. Go ahead and add this new entry */
13512 + list_add(&new_entry->nd, &saved_ptr->nd);
13513 + new_entry = NULL;
13520 + "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n",
13521 + start, end, cattr_name(new_entry->type),
13522 + cattr_name(req_type));
13523 + kfree(new_entry);
13524 + spin_unlock(&memtype_lock);
13529 + /* No conflict. Not yet added to the list. Add to the tail */
13530 + list_add_tail(&new_entry->nd, &memtype_list);
13531 + pr_debug("New Entry\n");
13536 + "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
13537 + start, end, cattr_name(actual_type),
13538 + cattr_name(req_type), cattr_name(*ret_type));
13541 + "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n",
13542 + start, end, cattr_name(actual_type),
13543 + cattr_name(req_type));
13546 + spin_unlock(&memtype_lock);
13550 +int free_memtype(u64 start, u64 end)
13552 + struct memtype *ml;
13553 + int err = -EINVAL;
13555 + /* Only track when pat_wc_enabled */
13556 + if (!pat_wc_enabled) {
13560 + /* Low ISA region is always mapped WB. No need to track */
13561 + if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) {
13565 + spin_lock(&memtype_lock);
13566 + list_for_each_entry(ml, &memtype_list, nd) {
13567 + if (ml->start == start && ml->end == end) {
13568 + list_del(&ml->nd);
13574 + spin_unlock(&memtype_lock);
13577 + printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n",
13578 + current->comm, current->pid, start, end);
13581 + pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end);
13587 + * /dev/mem mmap interface. The memtype used for mapping varies:
13588 + * - Use UC for mappings with O_SYNC flag
13589 + * - Without O_SYNC flag, if there is any conflict in reserve_memtype,
13590 + * inherit the memtype from existing mapping.
13591 + * - Else use UC_MINUS memtype (for backward compatibility with existing
13594 +pgprot_t phys_mem_access_prot(struct file *file, unsigned long mfn,
13595 + unsigned long size, pgprot_t vma_prot)
13600 +#ifdef CONFIG_NONPROMISC_DEVMEM
13601 +/* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/
13602 +static inline int range_is_allowed(unsigned long mfn, unsigned long size)
13607 +static inline int range_is_allowed(unsigned long mfn, unsigned long size)
13609 + u64 from = ((u64)mfn) << PAGE_SHIFT;
13610 + u64 to = from + size;
13611 + u64 cursor = from;
13613 + while (cursor < to) {
13614 + if (!devmem_is_allowed(mfn)) {
13616 + "Program %s tried to access /dev/mem between %Lx->%Lx.\n",
13617 + current->comm, from, to);
13620 + cursor += PAGE_SIZE;
13625 +#endif /* CONFIG_NONPROMISC_DEVMEM */
13627 +int phys_mem_access_prot_allowed(struct file *file, unsigned long mfn,
13628 + unsigned long size, pgprot_t *vma_prot)
13630 + u64 addr = (u64)mfn << PAGE_SHIFT;
13631 + unsigned long flags = _PAGE_CACHE_UC_MINUS;
13634 + if (!range_is_allowed(mfn, size))
13637 + if (file->f_flags & O_SYNC) {
13638 + flags = _PAGE_CACHE_UC;
13641 +#ifndef CONFIG_X86_32
13642 +#ifndef CONFIG_XEN /* Xen sets correct MTRR type on non-RAM for us. */
13644 + * On the PPro and successors, the MTRRs are used to set
13645 + * memory types for physical addresses outside main memory,
13646 + * so blindly setting UC or PWT on those pages is wrong.
13647 + * For Pentiums and earlier, the surround logic should disable
13648 + * caching for the high addresses through the KEN pin, but
13649 + * we maintain the tradition of paranoia in this code.
13651 + if (!pat_wc_enabled &&
13652 + ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) ||
13653 + test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) ||
13654 + test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) ||
13655 + test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) &&
13656 + (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
13657 + flags = _PAGE_CACHE_UC;
13663 + * With O_SYNC, we can only take UC mapping. Fail if we cannot.
13664 + * Without O_SYNC, we want to get
13665 + * - WB for WB-able memory and no other conflicting mappings
13666 + * - UC_MINUS for non-WB-able memory with no other conflicting mappings
13667 + * - Inherit from confliting mappings otherwise
13669 + if (flags != _PAGE_CACHE_UC_MINUS) {
13670 + retval = reserve_memtype(addr, addr + size, flags, NULL);
13672 + retval = reserve_memtype(addr, addr + size, -1, &flags);
13678 + if (ioremap_check_change_attr(mfn, size, flags) < 0) {
13679 + free_memtype(addr, addr + size);
13681 + "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
13682 + current->comm, current->pid,
13683 + cattr_name(flags),
13684 + addr, addr + size);
13688 + *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
13693 +void map_devmem(unsigned long mfn, unsigned long size, pgprot_t vma_prot)
13695 + u64 addr = (u64)mfn << PAGE_SHIFT;
13696 + unsigned long flags;
13697 + unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
13699 + reserve_memtype(addr, addr + size, want_flags, &flags);
13700 + if (flags != want_flags) {
13702 + "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n",
13703 + current->comm, current->pid,
13704 + cattr_name(want_flags),
13705 + addr, (unsigned long long)(addr + size),
13706 + cattr_name(flags));
13710 +void unmap_devmem(unsigned long mfn, unsigned long size, pgprot_t vma_prot)
13712 + u64 addr = (u64)mfn << PAGE_SHIFT;
13714 + free_memtype(addr, addr + size);
13717 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
13718 +++ sle11-2009-05-14/arch/x86/mm/pgtable-xen.c 2009-03-16 16:38:05.000000000 +0100
13720 +#include <linux/mm.h>
13721 +#include <linux/module.h>
13722 +#include <xen/features.h>
13723 +#include <asm/pgalloc.h>
13724 +#include <asm/pgtable.h>
13725 +#include <asm/tlb.h>
13726 +#include <asm/hypervisor.h>
13727 +#include <asm/mmu_context.h>
13729 +pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
13731 + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
13733 + make_lowmem_page_readonly(pte, XENFEAT_writable_page_tables);
13737 +static void _pte_free(struct page *page, unsigned int order)
13740 + __pte_free(page);
13743 +pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
13745 + struct page *pte;
13747 +#ifdef CONFIG_HIGHPTE
13748 + pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
13750 + pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
13753 + pgtable_page_ctor(pte);
13754 + SetPageForeign(pte, _pte_free);
13755 + init_page_count(pte);
13760 +void __pte_free(pgtable_t pte)
13762 + if (!PageHighMem(pte)) {
13763 + unsigned long va = (unsigned long)page_address(pte);
13764 + unsigned int level;
13765 + pte_t *ptep = lookup_address(va, &level);
13767 + BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep));
13768 + if (!pte_write(*ptep)
13769 + && HYPERVISOR_update_va_mapping(va,
13770 + mk_pte(pte, PAGE_KERNEL),
13774 +#ifdef CONFIG_HIGHPTE
13775 + ClearPagePinned(pte);
13780 + ClearPageForeign(pte);
13781 + init_page_count(pte);
13782 + pgtable_page_dtor(pte);
13783 + __free_page(pte);
13786 +void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
13788 + pgtable_page_dtor(pte);
13789 + paravirt_release_pte(page_to_pfn(pte));
13790 + tlb_remove_page(tlb, pte);
13793 +#if PAGETABLE_LEVELS > 2
13794 +static void _pmd_free(struct page *page, unsigned int order)
13797 + __pmd_free(page);
13800 +pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
13802 + struct page *pmd;
13804 + pmd = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
13807 + SetPageForeign(pmd, _pmd_free);
13808 + init_page_count(pmd);
13809 + return page_address(pmd);
13812 +void __pmd_free(pgtable_t pmd)
13814 + unsigned long va = (unsigned long)page_address(pmd);
13815 + unsigned int level;
13816 + pte_t *ptep = lookup_address(va, &level);
13818 + BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep));
13819 + if (!pte_write(*ptep)
13820 + && HYPERVISOR_update_va_mapping(va, mk_pte(pmd, PAGE_KERNEL), 0))
13823 + ClearPageForeign(pmd);
13824 + init_page_count(pmd);
13825 + __free_page(pmd);
13828 +void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
13830 + paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
13831 + tlb_remove_page(tlb, virt_to_page(pmd));
13834 +#if PAGETABLE_LEVELS > 3
13835 +void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
13837 + paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
13838 + tlb_remove_page(tlb, virt_to_page(pud));
13840 +#endif /* PAGETABLE_LEVELS > 3 */
13841 +#endif /* PAGETABLE_LEVELS > 2 */
13843 +#ifndef CONFIG_X86_64
13844 +#define TASK_SIZE64 TASK_SIZE
13847 +static void _pin_lock(struct mm_struct *mm, int lock) {
13849 + spin_lock(&mm->page_table_lock);
13850 +#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
13851 + /* While mm->page_table_lock protects us against insertions and
13852 + * removals of higher level page table pages, it doesn't protect
13853 + * against updates of pte-s. Such updates, however, require the
13854 + * pte pages to be in consistent state (unpinned+writable or
13855 + * pinned+readonly). The pinning and attribute changes, however
13856 + * cannot be done atomically, which is why such updates must be
13857 + * prevented from happening concurrently.
13858 + * Note that no pte lock can ever elsewhere be acquired nesting
13859 + * with an already acquired one in the same mm, or with the mm's
13860 + * page_table_lock already acquired, as that would break in the
13861 + * non-split case (where all these are actually resolving to the
13862 + * one page_table_lock). Thus acquiring all of them here is not
13863 + * going to result in dead locks, and the order of acquires
13864 + * doesn't matter.
13867 + pgd_t *pgd = mm->pgd;
13870 + for (g = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
13874 + if (pgd_none(*pgd))
13876 + pud = pud_offset(pgd, 0);
13877 + for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
13881 + if (pud_none(*pud))
13883 + pmd = pmd_offset(pud, 0);
13884 + for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
13887 + if (pmd_none(*pmd))
13889 + ptl = pte_lockptr(0, pmd);
13893 + spin_unlock(ptl);
13900 + spin_unlock(&mm->page_table_lock);
13902 +#define pin_lock(mm) _pin_lock(mm, 1)
13903 +#define pin_unlock(mm) _pin_lock(mm, 0)
13905 +#define PIN_BATCH sizeof(void *)
13906 +static DEFINE_PER_CPU(multicall_entry_t[PIN_BATCH], pb_mcl);
13908 +static inline unsigned int pgd_walk_set_prot(struct page *page, pgprot_t flags,
13909 + unsigned int cpu, unsigned int seq)
13911 + unsigned long pfn = page_to_pfn(page);
13913 + if (PageHighMem(page)) {
13914 + if (pgprot_val(flags) & _PAGE_RW)
13915 + ClearPagePinned(page);
13917 + SetPagePinned(page);
13919 + MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq,
13920 + (unsigned long)__va(pfn << PAGE_SHIFT),
13921 + pfn_pte(pfn, flags), 0);
13922 + if (unlikely(++seq == PIN_BATCH)) {
13923 + if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu),
13924 + PIN_BATCH, NULL)))
13933 +static void pgd_walk(pgd_t *pgd_base, pgprot_t flags)
13935 + pgd_t *pgd = pgd_base;
13939 + unsigned int cpu, seq;
13940 + multicall_entry_t *mcl;
13942 + if (xen_feature(XENFEAT_auto_translated_physmap))
13948 + * Cannot iterate up to USER_PTRS_PER_PGD on x86-64 as these pagetables
13949 + * may not be the 'current' task's pagetables (e.g., current may be
13950 + * 32-bit, but the pagetables may be for a 64-bit task).
13951 + * Subtracting 1 from TASK_SIZE64 means the loop limit is correct
13952 + * regardless of whether TASK_SIZE64 is a multiple of PGDIR_SIZE.
13954 + for (g = 0, seq = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
13955 + if (pgd_none(*pgd))
13957 + pud = pud_offset(pgd, 0);
13958 + if (PTRS_PER_PUD > 1) /* not folded */
13959 + seq = pgd_walk_set_prot(virt_to_page(pud),flags,cpu,seq);
13960 + for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
13961 + if (pud_none(*pud))
13963 + pmd = pmd_offset(pud, 0);
13964 + if (PTRS_PER_PMD > 1) /* not folded */
13965 + seq = pgd_walk_set_prot(virt_to_page(pmd),flags,cpu,seq);
13966 + for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
13967 + if (pmd_none(*pmd))
13969 + seq = pgd_walk_set_prot(pmd_page(*pmd),flags,cpu,seq);
13974 + mcl = per_cpu(pb_mcl, cpu);
13975 +#ifdef CONFIG_X86_64
13976 + if (unlikely(seq > PIN_BATCH - 2)) {
13977 + if (unlikely(HYPERVISOR_multicall_check(mcl, seq, NULL)))
13981 + MULTI_update_va_mapping(mcl + seq,
13982 + (unsigned long)__user_pgd(pgd_base),
13983 + pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags),
13985 + MULTI_update_va_mapping(mcl + seq + 1,
13986 + (unsigned long)pgd_base,
13987 + pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
13989 + if (unlikely(HYPERVISOR_multicall_check(mcl, seq + 2, NULL)))
13992 + if (likely(seq != 0)) {
13993 + MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq,
13994 + (unsigned long)pgd_base,
13995 + pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
13997 + if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu),
14000 + } else if(HYPERVISOR_update_va_mapping((unsigned long)pgd_base,
14001 + pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
14009 +static void __pgd_pin(pgd_t *pgd)
14011 + pgd_walk(pgd, PAGE_KERNEL_RO);
14012 + kmap_flush_unused();
14013 + xen_pgd_pin(__pa(pgd)); /* kernel */
14014 +#ifdef CONFIG_X86_64
14015 + xen_pgd_pin(__pa(__user_pgd(pgd))); /* user */
14017 + SetPagePinned(virt_to_page(pgd));
14020 +static void __pgd_unpin(pgd_t *pgd)
14022 + xen_pgd_unpin(__pa(pgd));
14023 +#ifdef CONFIG_X86_64
14024 + xen_pgd_unpin(__pa(__user_pgd(pgd)));
14026 + pgd_walk(pgd, PAGE_KERNEL);
14027 + ClearPagePinned(virt_to_page(pgd));
14030 +static void pgd_test_and_unpin(pgd_t *pgd)
14032 + if (PagePinned(virt_to_page(pgd)))
14033 + __pgd_unpin(pgd);
14036 +void mm_pin(struct mm_struct *mm)
14038 + if (xen_feature(XENFEAT_writable_page_tables))
14042 + __pgd_pin(mm->pgd);
14046 +void mm_unpin(struct mm_struct *mm)
14048 + if (xen_feature(XENFEAT_writable_page_tables))
14052 + __pgd_unpin(mm->pgd);
14056 +void mm_pin_all(void)
14058 + struct page *page;
14059 + unsigned long flags;
14061 + if (xen_feature(XENFEAT_writable_page_tables))
14065 + * Allow uninterrupted access to the pgd_list. Also protects
14066 + * __pgd_pin() by disabling preemption.
14067 + * All other CPUs must be at a safe point (e.g., in stop_machine
14068 + * or offlined entirely).
14070 + spin_lock_irqsave(&pgd_lock, flags);
14071 + list_for_each_entry(page, &pgd_list, lru) {
14072 + if (!PagePinned(page))
14073 + __pgd_pin((pgd_t *)page_address(page));
14075 + spin_unlock_irqrestore(&pgd_lock, flags);
14078 +void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
14080 + if (!PagePinned(virt_to_page(mm->pgd)))
14084 +void arch_exit_mmap(struct mm_struct *mm)
14086 + struct task_struct *tsk = current;
14091 + * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
14092 + * *much* faster this way, as no tlb flushes means bigger wrpt batches.
14094 + if (tsk->active_mm == mm) {
14095 + tsk->active_mm = &init_mm;
14096 + atomic_inc(&init_mm.mm_count);
14098 + switch_mm(mm, &init_mm, tsk);
14100 + atomic_dec(&mm->mm_count);
14101 + BUG_ON(atomic_read(&mm->mm_count) == 0);
14104 + task_unlock(tsk);
14106 + if (PagePinned(virt_to_page(mm->pgd))
14107 + && atomic_read(&mm->mm_count) == 1
14108 + && !mm->context.has_foreign_mappings)
14112 +static inline void pgd_list_add(pgd_t *pgd)
14114 + struct page *page = virt_to_page(pgd);
14116 + list_add(&page->lru, &pgd_list);
14119 +static inline void pgd_list_del(pgd_t *pgd)
14121 + struct page *page = virt_to_page(pgd);
14123 + list_del(&page->lru);
14126 +#define UNSHARED_PTRS_PER_PGD \
14127 + (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
14129 +static void pgd_ctor(void *p)
14132 + unsigned long flags;
14134 + pgd_test_and_unpin(pgd);
14136 + /* Clear usermode parts of PGD */
14137 + memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t));
14139 + spin_lock_irqsave(&pgd_lock, flags);
14141 + /* If the pgd points to a shared pagetable level (either the
14142 + ptes in non-PAE, or shared PMD in PAE), then just copy the
14143 + references from swapper_pg_dir. */
14144 + if (PAGETABLE_LEVELS == 2 ||
14145 + (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
14146 + PAGETABLE_LEVELS == 4) {
14147 + clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
14148 + swapper_pg_dir + KERNEL_PGD_BOUNDARY,
14149 + KERNEL_PGD_PTRS);
14150 + paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT,
14151 + __pa(swapper_pg_dir) >> PAGE_SHIFT,
14152 + KERNEL_PGD_BOUNDARY,
14153 + KERNEL_PGD_PTRS);
14156 +#ifdef CONFIG_X86_64
14157 + /* set level3_user_pgt for vsyscall area */
14158 + __user_pgd(pgd)[pgd_index(VSYSCALL_START)] =
14159 + __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
14162 +#ifndef CONFIG_X86_PAE
14163 + /* list required to sync kernel mapping updates */
14164 + if (!SHARED_KERNEL_PMD)
14165 + pgd_list_add(pgd);
14168 + spin_unlock_irqrestore(&pgd_lock, flags);
14171 +static void pgd_dtor(void *pgd)
14173 + unsigned long flags; /* can be called from interrupt context */
14175 + if (!SHARED_KERNEL_PMD) {
14176 + spin_lock_irqsave(&pgd_lock, flags);
14177 + pgd_list_del(pgd);
14178 + spin_unlock_irqrestore(&pgd_lock, flags);
14181 + pgd_test_and_unpin(pgd);
14185 + * List of all pgd's needed for non-PAE so it can invalidate entries
14186 + * in both cached and uncached pgd's; not needed for PAE since the
14187 + * kernel pmd is shared. If PAE were not to share the pmd a similar
14188 + * tactic would be needed. This is essentially codepath-based locking
14189 + * against pageattr.c; it is the unique case in which a valid change
14190 + * of kernel pagetables can't be lazily synchronized by vmalloc faults.
14191 + * vmalloc faults work because attached pagetables are never freed.
14195 +#ifdef CONFIG_X86_PAE
14197 + * Mop up any pmd pages which may still be attached to the pgd.
14198 + * Normally they will be freed by munmap/exit_mmap, but any pmd we
14199 + * preallocate which never got a corresponding vma will need to be
14200 + * freed manually.
14202 +static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
14206 + for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
14207 + pgd_t pgd = pgdp[i];
14209 + if (__pgd_val(pgd) != 0) {
14210 + pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
14212 + pgdp[i] = xen_make_pgd(0);
14214 + paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
14215 + pmd_free(mm, pmd);
14219 + if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
14220 + xen_destroy_contiguous_region((unsigned long)pgdp, 0);
14224 + * In PAE mode, we need to do a cr3 reload (=tlb flush) when
14225 + * updating the top-level pagetable entries to guarantee the
14226 + * processor notices the update. Since this is expensive, and
14227 + * all 4 top-level entries are used almost immediately in a
14228 + * new process's life, we just pre-populate them here.
14230 + * Also, if we're in a paravirt environment where the kernel pmd is
14231 + * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
14232 + * and initialize the kernel pmds here.
14234 +static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
14237 + pmd_t *pmds[UNSHARED_PTRS_PER_PGD];
14238 + unsigned long addr, flags;
14242 + * We can race save/restore (if we sleep during a GFP_KERNEL memory
14243 + * allocation). We therefore store virtual addresses of pmds as they
14244 + * do not change across save/restore, and poke the machine addresses
14245 + * into the pgdir under the pgd_lock.
14247 + for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; i++, addr += PUD_SIZE) {
14248 + pmds[i] = pmd_alloc_one(mm, addr);
14253 + spin_lock_irqsave(&pgd_lock, flags);
14255 + /* Protect against save/restore: move below 4GB under pgd_lock. */
14256 + if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)
14257 + && xen_create_contiguous_region((unsigned long)pgd, 0, 32)) {
14258 + spin_unlock_irqrestore(&pgd_lock, flags);
14261 + pmd_free(mm, pmds[i]);
14265 + /* Copy kernel pmd contents and write-protect the new pmds. */
14266 + pud = pud_offset(pgd, 0);
14267 + for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
14268 + i++, pud++, addr += PUD_SIZE) {
14269 + if (i >= KERNEL_PGD_BOUNDARY) {
14271 + (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
14272 + sizeof(pmd_t) * PTRS_PER_PMD);
14273 + make_lowmem_page_readonly(
14274 + pmds[i], XENFEAT_writable_page_tables);
14277 + /* It is safe to poke machine addresses of pmds under the pgd_lock. */
14278 + pud_populate(mm, pud, pmds[i]);
14281 + /* List required to sync kernel mapping updates and
14282 + * to pin/unpin on save/restore. */
14283 + pgd_list_add(pgd);
14285 + spin_unlock_irqrestore(&pgd_lock, flags);
14290 +void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
14292 + struct page *page = virt_to_page(pmd);
14293 + unsigned long pfn = page_to_pfn(page);
14295 + paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
14297 + /* Note: almost everything apart from _PAGE_PRESENT is
14298 + reserved at the pmd (PDPT) level. */
14299 + if (PagePinned(virt_to_page(mm->pgd))) {
14300 + BUG_ON(PageHighMem(page));
14301 + BUG_ON(HYPERVISOR_update_va_mapping(
14302 + (unsigned long)__va(pfn << PAGE_SHIFT),
14303 + pfn_pte(pfn, PAGE_KERNEL_RO), 0));
14304 + set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
14306 + *pudp = __pud(__pa(pmd) | _PAGE_PRESENT);
14309 + * According to Intel App note "TLBs, Paging-Structure Caches,
14310 + * and Their Invalidation", April 2007, document 317080-001,
14311 + * section 8.1: in PAE mode we explicitly have to flush the
14312 + * TLB via cr3 if the top-level pgd is changed...
14314 + if (mm == current->active_mm)
14317 +#else /* !CONFIG_X86_PAE */
14318 +/* No need to prepopulate any pagetable entries in non-PAE modes. */
14319 +static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
14324 +static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd)
14327 +#endif /* CONFIG_X86_PAE */
14329 +#ifdef CONFIG_X86_64
14330 +/* We allocate two contiguous pages for kernel and user. */
14331 +#define PGD_ORDER 1
14333 +#define PGD_ORDER 0
14336 +pgd_t *pgd_alloc(struct mm_struct *mm)
14338 + pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER);
14340 + /* so that alloc_pd can use it */
14345 + if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
14346 + free_pages((unsigned long)pgd, PGD_ORDER);
14353 +void pgd_free(struct mm_struct *mm, pgd_t *pgd)
14356 + * After this the pgd should not be pinned for the duration of this
14357 + * function's execution. We should never sleep and thus never race:
14358 + * 1. User pmds will not become write-protected under our feet due
14359 + * to a concurrent mm_pin_all().
14360 + * 2. The machine addresses in PGD entries will not become invalid
14361 + * due to a concurrent save/restore.
14365 + pgd_mop_up_pmds(mm, pgd);
14366 + free_pages((unsigned long)pgd, PGD_ORDER);
14369 +/* blktap and gntdev need this, as otherwise they would implicitly (and
14370 + * needlessly, as they never use it) reference init_mm. */
14371 +pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *vma,
14372 + unsigned long addr, pte_t *ptep, int full)
14374 + return ptep_get_and_clear_full(vma->vm_mm, addr, ptep, full);
14376 +EXPORT_SYMBOL_GPL(xen_ptep_get_and_clear_full);
14378 +int ptep_set_access_flags(struct vm_area_struct *vma,
14379 + unsigned long address, pte_t *ptep,
14380 + pte_t entry, int dirty)
14382 + int changed = !pte_same(*ptep, entry);
14384 + if (changed && dirty) {
14385 + if (likely(vma->vm_mm == current->mm)) {
14386 + if (HYPERVISOR_update_va_mapping(address,
14388 + (unsigned long)vma->vm_mm->cpu_vm_mask.bits|
14389 + UVMF_INVLPG|UVMF_MULTI))
14392 + xen_l1_entry_update(ptep, entry);
14393 + flush_tlb_page(vma, address);
14400 +int ptep_test_and_clear_young(struct vm_area_struct *vma,
14401 + unsigned long addr, pte_t *ptep)
14405 + if (pte_young(*ptep))
14406 + ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
14410 + pte_update(vma->vm_mm, addr, ptep);
14415 +int ptep_clear_flush_young(struct vm_area_struct *vma,
14416 + unsigned long address, pte_t *ptep)
14418 + pte_t pte = *ptep;
14419 + int young = pte_young(pte);
14421 + pte = pte_mkold(pte);
14422 + if (PagePinned(virt_to_page(vma->vm_mm->pgd)))
14423 + ptep_set_access_flags(vma, address, ptep, pte, young);
14425 + ptep->pte_low = pte.pte_low;
14429 --- sle11-2009-05-14.orig/arch/x86/mm/pgtable_32-xen.c 2009-03-16 16:33:40.000000000 +0100
14430 +++ sle11-2009-05-14/arch/x86/mm/pgtable_32-xen.c 2009-03-16 16:38:05.000000000 +0100
14433 - * linux/arch/i386/mm/pgtable.c
14436 #include <linux/sched.h>
14437 #include <linux/kernel.h>
14438 #include <linux/errno.h>
14439 @@ -41,7 +37,6 @@ void show_mem(void)
14441 printk(KERN_INFO "Mem-info:\n");
14443 - printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
14444 for_each_online_pgdat(pgdat) {
14445 pgdat_resize_lock(pgdat, &flags);
14446 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
14447 @@ -157,243 +152,6 @@ void __init reserve_top_address(unsigned
14448 __VMALLOC_RESERVE += reserve;
14451 -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
14453 - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
14455 - make_lowmem_page_readonly(pte, XENFEAT_writable_page_tables);
14460 - * List of all pgd's needed for non-PAE so it can invalidate entries
14461 - * in both cached and uncached pgd's; not needed for PAE since the
14462 - * kernel pmd is shared. If PAE were not to share the pmd a similar
14463 - * tactic would be needed. This is essentially codepath-based locking
14464 - * against pageattr.c; it is the unique case in which a valid change
14465 - * of kernel pagetables can't be lazily synchronized by vmalloc faults.
14466 - * vmalloc faults work because attached pagetables are never freed.
14469 -static inline void pgd_list_add(pgd_t *pgd)
14471 - struct page *page = virt_to_page(pgd);
14473 - list_add(&page->lru, &pgd_list);
14476 -static inline void pgd_list_del(pgd_t *pgd)
14478 - struct page *page = virt_to_page(pgd);
14480 - list_del(&page->lru);
14483 -#define UNSHARED_PTRS_PER_PGD \
14484 - (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
14486 -static void pgd_ctor(void *p)
14489 - unsigned long flags;
14491 - pgd_test_and_unpin(pgd);
14493 - /* Clear usermode parts of PGD */
14494 - memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
14496 - spin_lock_irqsave(&pgd_lock, flags);
14498 - /* If the pgd points to a shared pagetable level (either the
14499 - ptes in non-PAE, or shared PMD in PAE), then just copy the
14500 - references from swapper_pg_dir. */
14501 - if (PAGETABLE_LEVELS == 2 ||
14502 - (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
14503 - clone_pgd_range(pgd + USER_PTRS_PER_PGD,
14504 - swapper_pg_dir + USER_PTRS_PER_PGD,
14505 - KERNEL_PGD_PTRS);
14506 - paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
14507 - __pa(swapper_pg_dir) >> PAGE_SHIFT,
14508 - USER_PTRS_PER_PGD,
14509 - KERNEL_PGD_PTRS);
14512 - /* list required to sync kernel mapping updates */
14513 - if (PAGETABLE_LEVELS == 2)
14514 - pgd_list_add(pgd);
14516 - spin_unlock_irqrestore(&pgd_lock, flags);
14519 -static void pgd_dtor(void *pgd)
14521 - unsigned long flags; /* can be called from interrupt context */
14523 - if (!SHARED_KERNEL_PMD) {
14524 - spin_lock_irqsave(&pgd_lock, flags);
14525 - pgd_list_del(pgd);
14526 - spin_unlock_irqrestore(&pgd_lock, flags);
14529 - pgd_test_and_unpin(pgd);
14532 -#ifdef CONFIG_X86_PAE
14534 - * Mop up any pmd pages which may still be attached to the pgd.
14535 - * Normally they will be freed by munmap/exit_mmap, but any pmd we
14536 - * preallocate which never got a corresponding vma will need to be
14537 - * freed manually.
14539 -static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
14543 - for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
14544 - pgd_t pgd = pgdp[i];
14546 - if (__pgd_val(pgd) != 0) {
14547 - pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
14549 - pgdp[i] = xen_make_pgd(0);
14551 - paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
14552 - pmd_free(mm, pmd);
14558 - * In PAE mode, we need to do a cr3 reload (=tlb flush) when
14559 - * updating the top-level pagetable entries to guarantee the
14560 - * processor notices the update. Since this is expensive, and
14561 - * all 4 top-level entries are used almost immediately in a
14562 - * new process's life, we just pre-populate them here.
14564 - * Also, if we're in a paravirt environment where the kernel pmd is
14565 - * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
14566 - * and initialize the kernel pmds here.
14568 -static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
14571 - pmd_t *pmds[UNSHARED_PTRS_PER_PGD];
14572 - unsigned long addr, flags;
14576 - * We can race save/restore (if we sleep during a GFP_KERNEL memory
14577 - * allocation). We therefore store virtual addresses of pmds as they
14578 - * do not change across save/restore, and poke the machine addresses
14579 - * into the pgdir under the pgd_lock.
14581 - for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; i++, addr += PUD_SIZE) {
14582 - pmds[i] = pmd_alloc_one(mm, addr);
14587 - spin_lock_irqsave(&pgd_lock, flags);
14589 - /* Protect against save/restore: move below 4GB under pgd_lock. */
14590 - if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)
14591 - && xen_create_contiguous_region((unsigned long)pgd, 0, 32)) {
14592 - spin_unlock_irqrestore(&pgd_lock, flags);
14595 - pmd_free(mm, pmds[i]);
14599 - /* Copy kernel pmd contents and write-protect the new pmds. */
14600 - pud = pud_offset(pgd, 0);
14601 - for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
14602 - i++, pud++, addr += PUD_SIZE) {
14603 - if (i >= USER_PTRS_PER_PGD) {
14605 - (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
14606 - sizeof(pmd_t) * PTRS_PER_PMD);
14607 - make_lowmem_page_readonly(
14608 - pmds[i], XENFEAT_writable_page_tables);
14611 - /* It is safe to poke machine addresses of pmds under the pgd_lock. */
14612 - pud_populate(mm, pud, pmds[i]);
14615 - /* List required to sync kernel mapping updates and
14616 - * to pin/unpin on save/restore. */
14617 - pgd_list_add(pgd);
14619 - spin_unlock_irqrestore(&pgd_lock, flags);
14623 -#else /* !CONFIG_X86_PAE */
14624 -/* No need to prepopulate any pagetable entries in non-PAE modes. */
14625 -static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
14630 -static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
14633 -#endif /* CONFIG_X86_PAE */
14635 -pgd_t *pgd_alloc(struct mm_struct *mm)
14637 - pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
14639 - /* so that alloc_pd can use it */
14644 - if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
14645 - free_page((unsigned long)pgd);
14652 -void pgd_free(struct mm_struct *mm, pgd_t *pgd)
14655 - * After this the pgd should not be pinned for the duration of this
14656 - * function's execution. We should never sleep and thus never race:
14657 - * 1. User pmds will not become write-protected under our feet due
14658 - * to a concurrent mm_pin_all().
14659 - * 2. The machine addresses in PGD entries will not become invalid
14660 - * due to a concurrent save/restore.
14664 - if (PTRS_PER_PMD > 1 && !xen_feature(XENFEAT_pae_pgdir_above_4gb))
14665 - xen_destroy_contiguous_region((unsigned long)pgd, 0);
14667 - pgd_mop_up_pmds(mm, pgd);
14668 - free_page((unsigned long)pgd);
14671 -void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
14673 - pgtable_page_dtor(pte);
14674 - paravirt_release_pt(page_to_pfn(pte));
14675 - tlb_remove_page(tlb, pte);
14678 -#ifdef CONFIG_X86_PAE
14680 -void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
14682 - paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
14683 - tlb_remove_page(tlb, virt_to_page(pmd));
14688 void make_lowmem_page_readonly(void *va, unsigned int feature)
14691 --- sle11-2009-05-14.orig/arch/x86/pci/i386.c 2009-05-14 10:56:29.000000000 +0200
14692 +++ sle11-2009-05-14/arch/x86/pci/i386.c 2009-05-14 11:20:29.000000000 +0200
14693 @@ -331,10 +331,14 @@ int pci_mmap_page_range(struct pci_dev *
14697 +#ifndef CONFIG_XEN
14698 if (((vma->vm_pgoff < max_low_pfn_mapped) ||
14699 (vma->vm_pgoff >= (1UL<<(32 - PAGE_SHIFT)) &&
14700 vma->vm_pgoff < max_pfn_mapped)) &&
14701 ioremap_change_attr((unsigned long)__va(addr), len, flags)) {
14703 + if (ioremap_check_change_attr(vma->vm_pgoff, len, flags)) {
14705 free_memtype(addr, addr + len);
14708 --- sle11-2009-05-14.orig/arch/x86/pci/irq-xen.c 2009-03-16 16:33:40.000000000 +0100
14709 +++ sle11-2009-05-14/arch/x86/pci/irq-xen.c 2009-03-16 16:38:05.000000000 +0100
14710 @@ -140,9 +140,11 @@ static void __init pirq_peer_trick(void)
14711 busmap[e->bus] = 1;
14713 for(i = 1; i < 256; i++) {
14715 if (!busmap[i] || pci_find_bus(0, i))
14717 - if (pci_scan_bus_with_sysdata(i))
14718 + node = get_mp_bus_to_node(i);
14719 + if (pci_scan_bus_on_node(i, &pci_root_ops, node))
14720 printk(KERN_INFO "PCI: Discovered primary peer "
14721 "bus %02x [IRQ]\n", i);
14723 @@ -204,7 +206,7 @@ static int pirq_ali_get(struct pci_dev *
14725 static const unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
14727 - WARN_ON_ONCE(pirq >= 16);
14728 + WARN_ON_ONCE(pirq > 16);
14729 return irqmap[read_config_nybble(router, 0x48, pirq-1)];
14732 @@ -213,7 +215,7 @@ static int pirq_ali_set(struct pci_dev *
14733 static const unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
14734 unsigned int val = irqmap[irq];
14736 - WARN_ON_ONCE(pirq >= 16);
14737 + WARN_ON_ONCE(pirq > 16);
14739 write_config_nybble(router, 0x48, pirq-1, val);
14741 @@ -264,7 +266,7 @@ static int pirq_via586_get(struct pci_de
14743 static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
14745 - WARN_ON_ONCE(pirq >= 5);
14746 + WARN_ON_ONCE(pirq > 5);
14747 return read_config_nybble(router, 0x55, pirqmap[pirq-1]);
14750 @@ -272,7 +274,7 @@ static int pirq_via586_set(struct pci_de
14752 static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
14754 - WARN_ON_ONCE(pirq >= 5);
14755 + WARN_ON_ONCE(pirq > 5);
14756 write_config_nybble(router, 0x55, pirqmap[pirq-1], irq);
14759 @@ -286,7 +288,7 @@ static int pirq_ite_get(struct pci_dev *
14761 static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
14763 - WARN_ON_ONCE(pirq >= 4);
14764 + WARN_ON_ONCE(pirq > 4);
14765 return read_config_nybble(router,0x43, pirqmap[pirq-1]);
14768 @@ -294,7 +296,7 @@ static int pirq_ite_set(struct pci_dev *
14770 static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
14772 - WARN_ON_ONCE(pirq >= 4);
14773 + WARN_ON_ONCE(pirq > 4);
14774 write_config_nybble(router, 0x43, pirqmap[pirq-1], irq);
14777 @@ -623,6 +625,13 @@ static __init int via_router_probe(struc
14779 device = PCI_DEVICE_ID_VIA_8235;
14781 + case PCI_DEVICE_ID_VIA_8237:
14783 + * Asus a7v600 bios wrongly reports 8237
14784 + * as 586-compatible
14786 + device = PCI_DEVICE_ID_VIA_8237;
14791 --- sle11-2009-05-14.orig/arch/x86/vdso/vdso32-setup-xen.c 2009-03-16 16:33:40.000000000 +0100
14792 +++ sle11-2009-05-14/arch/x86/vdso/vdso32-setup-xen.c 2009-03-16 16:38:05.000000000 +0100
14793 @@ -164,7 +164,7 @@ static __init void relocate_vdso(Elf32_E
14797 - BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 ||
14798 + BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 ||
14799 !elf_check_arch_ia32(ehdr) ||
14800 ehdr->e_type != ET_DYN);
14802 @@ -233,8 +233,12 @@ void syscall32_cpu_init(void)
14806 - if (use_sysenter < 0)
14807 - use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
14808 + if (use_sysenter < 0) {
14809 + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
14810 + use_sysenter = 1;
14811 + if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)
14812 + use_sysenter = 1;
14816 #define compat_uses_vma 1
14817 @@ -337,8 +341,6 @@ int __init sysenter_setup(void)
14819 #ifdef CONFIG_X86_32
14822 - printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
14825 #if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT < 0x030200
14826 @@ -383,6 +385,9 @@ int arch_setup_additional_pages(struct l
14830 + if (vdso_enabled == VDSO_DISABLED)
14833 down_write(&mm->mmap_sem);
14835 /* Test compat mode once here, in case someone
14836 --- sle11-2009-05-14.orig/drivers/acpi/processor_core.c 2009-02-16 15:58:14.000000000 +0100
14837 +++ sle11-2009-05-14/drivers/acpi/processor_core.c 2009-03-16 16:38:05.000000000 +0100
14838 @@ -657,7 +657,7 @@ static int acpi_processor_get_info(struc
14841 status = acpi_evaluate_object(pr->handle, "_SUN", NULL, &buffer);
14842 - if (ACPI_SUCCESS(status))
14843 + if (ACPI_SUCCESS(status) && pr->id != -1)
14844 arch_fix_phys_package_id(pr->id, object.integer.value);
14847 --- sle11-2009-05-14.orig/drivers/input/xen-kbdfront.c 2009-05-14 10:56:29.000000000 +0200
14848 +++ sle11-2009-05-14/drivers/input/xen-kbdfront.c 2009-03-16 16:38:05.000000000 +0100
14849 @@ -325,7 +325,6 @@ static struct xenbus_device_id xenkbd_id
14851 static struct xenbus_driver xenkbd = {
14853 - .owner = THIS_MODULE,
14855 .probe = xenkbd_probe,
14856 .remove = xenkbd_remove,
14857 --- sle11-2009-05-14.orig/drivers/oprofile/cpu_buffer.c 2009-03-12 16:15:32.000000000 +0100
14858 +++ sle11-2009-05-14/drivers/oprofile/cpu_buffer.c 2009-03-16 16:38:05.000000000 +0100
14859 @@ -341,7 +341,7 @@ void oprofile_add_mode(int cpu_mode)
14861 int oprofile_add_domain_switch(int32_t domain_id)
14863 - struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
14864 + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
14866 /* should have space for switching into and out of domain
14867 (2 slots each) plus one sample and one cpu mode switch */
14868 --- sle11-2009-05-14.orig/drivers/pci/msi-xen.c 2009-03-16 16:33:40.000000000 +0100
14869 +++ sle11-2009-05-14/drivers/pci/msi-xen.c 2009-03-16 16:38:05.000000000 +0100
14870 @@ -583,7 +583,7 @@ int pci_enable_msi(struct pci_dev* dev)
14871 EXPORT_SYMBOL(pci_enable_msi);
14873 extern void pci_frontend_disable_msi(struct pci_dev* dev);
14874 -void pci_disable_msi(struct pci_dev* dev)
14875 +void pci_msi_shutdown(struct pci_dev* dev)
14879 @@ -612,6 +612,10 @@ void pci_disable_msi(struct pci_dev* dev
14880 pci_intx_for_msi(dev, 1);
14881 dev->msi_enabled = 0;
14883 +void pci_disable_msi(struct pci_dev* dev)
14885 + pci_msi_shutdown(dev);
14887 EXPORT_SYMBOL(pci_disable_msi);
14890 @@ -714,7 +718,7 @@ int pci_enable_msix(struct pci_dev* dev,
14891 EXPORT_SYMBOL(pci_enable_msix);
14893 extern void pci_frontend_disable_msix(struct pci_dev* dev);
14894 -void pci_disable_msix(struct pci_dev* dev)
14895 +void pci_msix_shutdown(struct pci_dev* dev)
14897 if (!pci_msi_enable)
14899 @@ -751,6 +755,10 @@ void pci_disable_msix(struct pci_dev* de
14900 pci_intx_for_msi(dev, 1);
14901 dev->msix_enabled = 0;
14903 +void pci_disable_msix(struct pci_dev* dev)
14905 + pci_msix_shutdown(dev);
14907 EXPORT_SYMBOL(pci_disable_msix);
14910 --- sle11-2009-05-14.orig/drivers/video/Kconfig 2009-02-16 15:58:02.000000000 +0100
14911 +++ sle11-2009-05-14/drivers/video/Kconfig 2009-03-16 16:38:05.000000000 +0100
14912 @@ -2029,7 +2029,7 @@ config FB_VIRTUAL
14914 config XEN_FBDEV_FRONTEND
14915 tristate "Xen virtual frame buffer support"
14916 - depends on FB && XEN
14917 + depends on FB && PARAVIRT_XEN
14918 select FB_SYS_FILLRECT
14919 select FB_SYS_COPYAREA
14920 select FB_SYS_IMAGEBLIT
14921 --- sle11-2009-05-14.orig/drivers/video/xen-fbfront.c 2009-05-14 10:56:29.000000000 +0200
14922 +++ sle11-2009-05-14/drivers/video/xen-fbfront.c 2009-03-16 16:38:05.000000000 +0100
14923 @@ -670,7 +670,6 @@ static struct xenbus_device_id xenfb_ids
14925 static struct xenbus_driver xenfb = {
14927 - .owner = THIS_MODULE,
14929 .probe = xenfb_probe,
14930 .remove = xenfb_remove,
14931 --- sle11-2009-05-14.orig/drivers/xen/Kconfig 2009-03-04 11:28:34.000000000 +0100
14932 +++ sle11-2009-05-14/drivers/xen/Kconfig 2009-03-16 16:38:05.000000000 +0100
14934 # This Kconfig describe xen options
14937 -mainmenu "Xen Configuration"
14942 --- sle11-2009-05-14.orig/drivers/xen/Makefile 2009-02-16 16:17:21.000000000 +0100
14943 +++ sle11-2009-05-14/drivers/xen/Makefile 2009-03-16 16:38:05.000000000 +0100
14945 -obj-$(CONFIG_PARAVIRT_XEN) += grant-table.o
14946 +obj-$(CONFIG_PARAVIRT_XEN) += grant-table.o features.o events.o
14947 +xen-xencomm-$(CONFIG_PARAVIRT_XEN) := xencomm.o
14948 +xen-balloon-$(CONFIG_PARAVIRT_XEN) := balloon.o
14950 +xen-balloon-$(CONFIG_XEN) := balloon/
14951 obj-$(CONFIG_XEN) += core/
14952 obj-$(CONFIG_XEN) += console/
14953 obj-$(CONFIG_XEN) += evtchn/
14954 @@ -7,7 +10,8 @@ obj-y += xenbus/
14955 obj-$(CONFIG_XEN) += char/
14957 obj-$(CONFIG_XEN) += util.o
14958 -obj-$(CONFIG_XEN_BALLOON) += balloon/
14959 +obj-$(CONFIG_XEN_XENCOMM) += $(xen-xencomm-y)
14960 +obj-$(CONFIG_XEN_BALLOON) += $(xen-balloon-y)
14961 obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/
14962 obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/
14963 obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/
14964 --- sle11-2009-05-14.orig/drivers/xen/blkfront/blkfront.c 2009-03-24 10:12:53.000000000 +0100
14965 +++ sle11-2009-05-14/drivers/xen/blkfront/blkfront.c 2009-05-19 10:38:53.000000000 +0200
14966 @@ -285,7 +285,11 @@ static void backend_changed(struct xenbu
14969 case XenbusStateClosing:
14970 - bd = bdget(info->dev);
14972 + xenbus_frontend_closed(dev);
14975 + bd = bdget_disk(info->gd, 0);
14977 xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
14979 --- sle11-2009-05-14.orig/drivers/xen/blkfront/block.h 2009-03-24 10:11:58.000000000 +0100
14980 +++ sle11-2009-05-14/drivers/xen/blkfront/block.h 2009-03-16 16:38:05.000000000 +0100
14981 @@ -96,7 +96,6 @@ struct blk_shadow {
14982 struct blkfront_info
14984 struct xenbus_device *xbdev;
14986 struct gendisk *gd;
14988 blkif_vdev_t handle;
14989 --- sle11-2009-05-14.orig/drivers/xen/blkfront/vbd.c 2009-02-16 16:17:21.000000000 +0100
14990 +++ sle11-2009-05-14/drivers/xen/blkfront/vbd.c 2009-03-16 16:38:05.000000000 +0100
14991 @@ -246,17 +246,32 @@ xlvbd_init_blk_queue(struct gendisk *gd,
14996 -xlvbd_alloc_gendisk(int major, int minor, blkif_sector_t capacity, int vdevice,
14997 - u16 vdisk_info, u16 sector_size,
14998 - struct blkfront_info *info)
15000 +xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info,
15001 + u16 sector_size, struct blkfront_info *info)
15003 + int major, minor;
15004 struct gendisk *gd;
15005 struct xlbd_major_info *mi;
15008 unsigned int offset;
15010 + if ((vdevice>>EXT_SHIFT) > 1) {
15011 + /* this is above the extended range; something is wrong */
15012 + printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", vdevice);
15016 + if (!VDEV_IS_EXTENDED(vdevice)) {
15017 + major = BLKIF_MAJOR(vdevice);
15018 + minor = BLKIF_MINOR(vdevice);
15022 + minor = BLKIF_MINOR_EXT(vdevice);
15025 BUG_ON(info->gd != NULL);
15026 BUG_ON(info->mi != NULL);
15027 BUG_ON(info->rq != NULL);
15028 @@ -337,41 +352,6 @@ xlvbd_alloc_gendisk(int major, int minor
15033 -xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info,
15034 - u16 sector_size, struct blkfront_info *info)
15036 - struct block_device *bd;
15038 - int major, minor;
15040 - if ((vdevice>>EXT_SHIFT) > 1) {
15041 - /* this is above the extended range; something is wrong */
15042 - printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", vdevice);
15046 - if (!VDEV_IS_EXTENDED(vdevice)) {
15047 - major = BLKIF_MAJOR(vdevice);
15048 - minor = BLKIF_MINOR(vdevice);
15052 - minor = BLKIF_MINOR_EXT(vdevice);
15055 - info->dev = MKDEV(major, minor);
15056 - bd = bdget(info->dev);
15060 - err = xlvbd_alloc_gendisk(major, minor, capacity, vdevice, vdisk_info,
15061 - sector_size, info);
15068 xlvbd_del(struct blkfront_info *info)
15070 --- sle11-2009-05-14.orig/drivers/xen/blktap/blktap.c 2009-04-20 11:38:54.000000000 +0200
15071 +++ sle11-2009-05-14/drivers/xen/blktap/blktap.c 2009-04-20 11:40:14.000000000 +0200
15072 @@ -111,6 +111,7 @@ typedef struct tap_blkif {
15073 unsigned long mode; /*current switching mode */
15074 int minor; /*Minor number for tapdisk device */
15075 pid_t pid; /*tapdisk process id */
15076 + struct pid_namespace *pid_ns; /*... and its corresponding namespace */
15077 enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace
15079 unsigned long *idx_map; /*Record the user ring id to kern
15080 @@ -299,16 +300,14 @@ struct tap_vma_priv {
15081 struct page *map[];
15084 -static struct page *blktap_nopage(struct vm_area_struct *vma,
15085 - unsigned long address,
15087 +static int blktap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
15090 * if the page has not been mapped in by the driver then return
15091 - * NOPAGE_SIGBUS to the domain.
15092 + * VM_FAULT_SIGBUS to the domain.
15095 - return NOPAGE_SIGBUS;
15096 + return VM_FAULT_SIGBUS;
15099 static pte_t blktap_clear_pte(struct vm_area_struct *vma,
15100 @@ -404,7 +403,7 @@ static void blktap_vma_close(struct vm_a
15103 struct vm_operations_struct blktap_vm_ops = {
15104 - nopage: blktap_nopage,
15105 + fault: blktap_fault,
15106 zap_pte: blktap_clear_pte,
15107 close: blktap_vma_close,
15109 @@ -498,9 +497,8 @@ found:
15110 tapfds[minor] = info;
15112 if ((class = get_xen_class()) != NULL)
15113 - class_device_create(class, NULL,
15114 - MKDEV(blktap_major, minor), NULL,
15115 - "blktap%d", minor);
15116 + device_create(class, NULL, MKDEV(blktap_major, minor),
15117 + "blktap%d", minor);
15121 @@ -542,7 +540,7 @@ void signal_tapdisk(int idx)
15124 if (info->pid > 0) {
15125 - ptask = find_task_by_pid(info->pid);
15126 + ptask = find_task_by_pid_ns(info->pid, info->pid_ns);
15128 info->status = CLEANSHUTDOWN;
15130 @@ -770,8 +768,9 @@ static int blktap_ioctl(struct inode *in
15133 info->pid = (pid_t)arg;
15134 - DPRINTK("blktap: pid received %d\n",
15136 + info->pid_ns = current->nsproxy->pid_ns;
15137 + DPRINTK("blktap: pid received %p:%d\n",
15138 + info->pid_ns, info->pid);
15142 @@ -1684,9 +1683,7 @@ static int __init blkif_init(void)
15143 * We only create the device when a request of a new device is
15146 - class_device_create(class, NULL,
15147 - MKDEV(blktap_major, 0), NULL,
15149 + device_create(class, NULL, MKDEV(blktap_major, 0), "blktap0");
15151 /* this is bad, but not fatal */
15152 WPRINTK("blktap: sysfs xen_class not created\n");
15153 --- sle11-2009-05-14.orig/drivers/xen/char/mem.c 2008-12-15 11:27:22.000000000 +0100
15154 +++ sle11-2009-05-14/drivers/xen/char/mem.c 2009-03-16 16:38:05.000000000 +0100
15155 @@ -33,6 +33,27 @@ static inline int uncached_access(struct
15159 +static inline int range_is_allowed(unsigned long pfn, unsigned long size)
15161 +#ifdef CONFIG_NONPROMISC_DEVMEM
15162 + u64 from = ((u64)pfn) << PAGE_SHIFT;
15163 + u64 to = from + size;
15164 + u64 cursor = from;
15166 + while (cursor < to) {
15167 + if (!devmem_is_allowed(pfn)) {
15169 + "Program %s tried to access /dev/mem between %Lx->%Lx.\n",
15170 + current->comm, from, to);
15173 + cursor += PAGE_SIZE;
15181 * This funcion reads the *physical* memory. The f_pos points directly to the
15183 @@ -55,6 +76,9 @@ static ssize_t read_mem(struct file * fi
15185 sz = min_t(unsigned long, sz, count);
15187 + if (!range_is_allowed(p >> PAGE_SHIFT, count))
15190 v = ioremap(p, sz);
15191 if (IS_ERR(v) || v == NULL) {
15193 @@ -103,6 +127,9 @@ static ssize_t write_mem(struct file * f
15195 sz = min_t(unsigned long, sz, count);
15197 + if (!range_is_allowed(p >> PAGE_SHIFT, sz))
15200 v = ioremap(p, sz);
15203 @@ -131,6 +158,23 @@ static ssize_t write_mem(struct file * f
15206 #ifndef ARCH_HAS_DEV_MEM_MMAP_MEM
15207 +static void mmap_mem_open(struct vm_area_struct *vma)
15209 + map_devmem(vma->vm_pgoff, vma->vm_end - vma->vm_start,
15210 + vma->vm_page_prot);
15213 +static void mmap_mem_close(struct vm_area_struct *vma)
15215 + unmap_devmem(vma->vm_pgoff, vma->vm_end - vma->vm_start,
15216 + vma->vm_page_prot);
15219 +static struct vm_operations_struct mmap_mem_ops = {
15220 + .open = mmap_mem_open,
15221 + .close = mmap_mem_close
15224 static int xen_mmap_mem(struct file * file, struct vm_area_struct * vma)
15226 size_t size = vma->vm_end - vma->vm_start;
15227 @@ -138,6 +182,15 @@ static int xen_mmap_mem(struct file * fi
15228 if (uncached_access(file))
15229 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
15231 + if (!range_is_allowed(vma->vm_pgoff, size))
15234 + if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size,
15235 + &vma->vm_page_prot))
15238 + vma->vm_ops = &mmap_mem_ops;
15240 /* We want to return the real error code, not EAGAIN. */
15241 return direct_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
15242 size, vma->vm_page_prot, DOMID_IO);
15243 --- sle11-2009-05-14.orig/drivers/xen/console/console.c 2008-12-15 11:26:44.000000000 +0100
15244 +++ sle11-2009-05-14/drivers/xen/console/console.c 2009-03-16 16:38:05.000000000 +0100
15245 @@ -552,16 +552,18 @@ static int xencons_write(
15249 -static void xencons_put_char(struct tty_struct *tty, u_char ch)
15250 +static int xencons_put_char(struct tty_struct *tty, u_char ch)
15252 unsigned long flags;
15255 if (DUMMY_TTY(tty))
15259 spin_lock_irqsave(&xencons_lock, flags);
15260 - (void)__xencons_put_char(ch);
15261 + ret = __xencons_put_char(ch);
15262 spin_unlock_irqrestore(&xencons_lock, flags);
15266 static void xencons_flush_chars(struct tty_struct *tty)
15267 @@ -583,7 +585,7 @@ static void xencons_wait_until_sent(stru
15268 if (DUMMY_TTY(tty))
15271 - while (DRV(tty->driver)->chars_in_buffer(tty)) {
15272 + while (tty_chars_in_buffer(tty)) {
15273 set_current_state(TASK_INTERRUPTIBLE);
15274 schedule_timeout(1);
15275 if (signal_pending(current))
15276 @@ -632,8 +634,7 @@ static void xencons_close(struct tty_str
15279 tty_wait_until_sent(tty, 0);
15280 - if (DRV(tty->driver)->flush_buffer != NULL)
15281 - DRV(tty->driver)->flush_buffer(tty);
15282 + tty_driver_flush_buffer(tty);
15283 if (tty->ldisc.flush_buffer != NULL)
15284 tty->ldisc.flush_buffer(tty);
15286 --- sle11-2009-05-14.orig/drivers/xen/core/machine_kexec.c 2009-02-17 11:46:41.000000000 +0100
15287 +++ sle11-2009-05-14/drivers/xen/core/machine_kexec.c 2009-03-16 16:38:05.000000000 +0100
15290 #include <linux/kexec.h>
15291 #include <xen/interface/kexec.h>
15292 +#include <linux/reboot.h>
15293 #include <linux/mm.h>
15294 #include <linux/bootmem.h>
15296 @@ -90,6 +91,9 @@ void __init xen_machine_kexec_setup_reso
15297 xen_hypervisor_res.start = range.start;
15298 xen_hypervisor_res.end = range.start + range.size - 1;
15299 xen_hypervisor_res.flags = IORESOURCE_BUSY | IORESOURCE_MEM;
15300 +#ifdef CONFIG_X86_64
15301 + insert_resource(&iomem_resource, &xen_hypervisor_res);
15304 /* fill in crashk_res if range is reserved by hypervisor */
15306 @@ -102,6 +106,9 @@ void __init xen_machine_kexec_setup_reso
15308 crashk_res.start = range.start;
15309 crashk_res.end = range.start + range.size - 1;
15310 +#ifdef CONFIG_X86_64
15311 + insert_resource(&iomem_resource, &crashk_res);
15315 /* get physical address of vmcoreinfo */
15316 @@ -153,11 +160,13 @@ void __init xen_machine_kexec_setup_reso
15320 +#ifndef CONFIG_X86_64
15321 void __init xen_machine_kexec_register_resources(struct resource *res)
15323 request_resource(res, &xen_hypervisor_res);
15324 machine_kexec_register_resources(res);
15328 static void setup_load_arg(xen_kexec_image_t *xki, struct kimage *image)
15330 @@ -228,6 +237,11 @@ void machine_shutdown(void)
15334 +void machine_crash_shutdown(struct pt_regs *regs)
15336 + /* The kernel is broken so disable interrupts */
15337 + local_irq_disable();
15342 --- sle11-2009-05-14.orig/drivers/xen/core/smpboot.c 2009-03-16 16:33:40.000000000 +0100
15343 +++ sle11-2009-05-14/drivers/xen/core/smpboot.c 2009-03-16 16:38:05.000000000 +0100
15344 @@ -53,17 +53,16 @@ static DEFINE_PER_CPU(int, callfunc_irq)
15345 static char resched_name[NR_CPUS][15];
15346 static char callfunc_name[NR_CPUS][15];
15348 -u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
15349 +#ifdef CONFIG_X86_LOCAL_APIC
15350 +#define set_cpu_to_apicid(cpu, apicid) (per_cpu(x86_cpu_to_apicid, cpu) = (apicid))
15352 +#define set_cpu_to_apicid(cpu, apicid)
15355 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
15356 DEFINE_PER_CPU(cpumask_t, cpu_core_map);
15357 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
15359 -#if defined(__i386__)
15360 -DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
15361 -EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
15364 void __init prefill_possible_map(void)
15367 @@ -154,7 +153,7 @@ static int __cpuinit xen_smp_intr_init(u
15370 #ifdef CONFIG_HOTPLUG_CPU
15371 -static void xen_smp_intr_exit(unsigned int cpu)
15372 +static void __cpuexit xen_smp_intr_exit(unsigned int cpu)
15375 local_teardown_timer(cpu);
15376 @@ -263,8 +262,7 @@ void __init smp_prepare_cpus(unsigned in
15377 boot_cpu_data.apicid = apicid;
15378 cpu_data(0) = boot_cpu_data;
15380 - cpu_2_logical_apicid[0] = apicid;
15381 - per_cpu(x86_cpu_to_apicid, 0) = apicid;
15382 + set_cpu_to_apicid(0, apicid);
15384 current_thread_info()->cpu = 0;
15386 @@ -319,8 +317,7 @@ void __init smp_prepare_cpus(unsigned in
15387 cpu_data(cpu).cpu_index = cpu;
15388 cpu_data(cpu).apicid = apicid;
15390 - cpu_2_logical_apicid[cpu] = apicid;
15391 - per_cpu(x86_cpu_to_apicid, cpu) = apicid;
15392 + set_cpu_to_apicid(cpu, apicid);
15395 cpu_pda(cpu)->pcurrent = idle;
15396 @@ -375,7 +372,7 @@ static int __init initialize_cpu_present
15398 core_initcall(initialize_cpu_present_map);
15400 -int __cpu_disable(void)
15401 +int __cpuexit __cpu_disable(void)
15403 cpumask_t map = cpu_online_map;
15404 unsigned int cpu = smp_processor_id();
15405 @@ -392,7 +389,7 @@ int __cpu_disable(void)
15409 -void __cpu_die(unsigned int cpu)
15410 +void __cpuexit __cpu_die(unsigned int cpu)
15412 while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
15413 current->state = TASK_UNINTERRUPTIBLE;
15414 --- sle11-2009-05-14.orig/drivers/xen/core/xen_proc.c 2009-05-14 10:56:29.000000000 +0200
15415 +++ sle11-2009-05-14/drivers/xen/core/xen_proc.c 2009-03-16 16:38:05.000000000 +0100
15416 @@ -8,7 +8,7 @@ static struct proc_dir_entry *xen_base;
15417 struct proc_dir_entry *create_xen_proc_entry(const char *name, mode_t mode)
15419 if ( xen_base == NULL )
15420 - if ( (xen_base = proc_mkdir("xen", &proc_root)) == NULL )
15421 + if ( (xen_base = proc_mkdir("xen", NULL)) == NULL )
15422 panic("Couldn't create /proc/xen");
15423 return create_proc_entry(name, mode, xen_base);
15425 --- sle11-2009-05-14.orig/drivers/xen/fbfront/xenfb.c 2009-03-04 11:25:55.000000000 +0100
15426 +++ sle11-2009-05-14/drivers/xen/fbfront/xenfb.c 2009-03-16 16:38:05.000000000 +0100
15427 @@ -93,7 +93,7 @@ struct xenfb_info
15428 * only mappings. The former creates unfaulted pages. Preserves
15429 * invariant. The latter removes pages. Preserves invariant.
15431 - * 3. Holding both locks: xenfb_vm_nopage(). Extends the dirty
15432 + * 3. Holding both locks: xenfb_vm_fault(). Extends the dirty
15433 * rectangle and updates mappings consistently. Preserves
15436 @@ -112,13 +112,13 @@ struct xenfb_info
15438 * But FIXME: the invariant is too weak. It misses that the fault
15439 * record in mappings must be consistent with the mapping of pages in
15440 - * the associated address space! do_no_page() updates the PTE after
15441 - * xenfb_vm_nopage() returns, i.e. outside the critical region. This
15442 + * the associated address space! __do_fault() updates the PTE after
15443 + * xenfb_vm_fault() returns, i.e. outside the critical region. This
15444 * allows the following race:
15446 * X writes to some address in the Xen frame buffer
15447 - * Fault - call do_no_page()
15448 - * call xenfb_vm_nopage()
15449 + * Fault - call __do_fault()
15450 + * call xenfb_vm_fault()
15454 @@ -387,18 +387,17 @@ static void xenfb_vm_close(struct vm_are
15455 mutex_unlock(&info->mm_lock);
15458 -static struct page *xenfb_vm_nopage(struct vm_area_struct *vma,
15459 - unsigned long vaddr, int *type)
15460 +static int xenfb_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
15462 struct xenfb_mapping *map = vma->vm_private_data;
15463 struct xenfb_info *info = map->info;
15464 - int pgnr = (vaddr - vma->vm_start) >> PAGE_SHIFT;
15465 + int pgnr = ((long)vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT;
15466 unsigned long flags;
15470 if (pgnr >= info->nr_pages)
15471 - return NOPAGE_SIGBUS;
15472 + return VM_FAULT_SIGBUS;
15474 mutex_lock(&info->mm_lock);
15475 spin_lock_irqsave(&info->dirty_lock, flags);
15476 @@ -414,16 +413,15 @@ static struct page *xenfb_vm_nopage(stru
15477 spin_unlock_irqrestore(&info->dirty_lock, flags);
15478 mutex_unlock(&info->mm_lock);
15481 - *type = VM_FAULT_MINOR;
15482 + vmf->page = page;
15485 + return VM_FAULT_MINOR;
15488 static struct vm_operations_struct xenfb_vm_ops = {
15489 .open = xenfb_vm_open,
15490 .close = xenfb_vm_close,
15491 - .nopage = xenfb_vm_nopage,
15492 + .fault = xenfb_vm_fault,
15495 static int xenfb_mmap(struct fb_info *fb_info, struct vm_area_struct *vma)
15496 --- sle11-2009-05-14.orig/drivers/xen/gntdev/gntdev.c 2009-03-16 16:33:40.000000000 +0100
15497 +++ sle11-2009-05-14/drivers/xen/gntdev/gntdev.c 2009-03-16 16:38:05.000000000 +0100
15498 @@ -392,7 +392,7 @@ nomem_out:
15499 static int __init gntdev_init(void)
15501 struct class *class;
15502 - struct class_device *device;
15503 + struct device *device;
15505 if (!is_running_on_xen()) {
15506 printk(KERN_ERR "You must be running Xen to use gntdev\n");
15507 @@ -417,8 +417,8 @@ static int __init gntdev_init(void)
15511 - device = class_device_create(class, NULL, MKDEV(gntdev_major, 0),
15512 - NULL, GNTDEV_NAME);
15513 + device = device_create(class, NULL, MKDEV(gntdev_major, 0),
15515 if (IS_ERR(device)) {
15516 printk(KERN_ERR "Error creating gntdev device in xen_class\n");
15517 printk(KERN_ERR "gntdev created with major number = %d\n",
15518 @@ -435,7 +435,7 @@ static void __exit gntdev_exit(void)
15520 struct class *class;
15521 if ((class = get_xen_class()) != NULL)
15522 - class_device_destroy(class, MKDEV(gntdev_major, 0));
15523 + device_destroy(class, MKDEV(gntdev_major, 0));
15524 unregister_chrdev(gntdev_major, GNTDEV_NAME);
15527 --- sle11-2009-05-14.orig/drivers/xen/netfront/netfront.c 2009-03-30 16:39:44.000000000 +0200
15528 +++ sle11-2009-05-14/drivers/xen/netfront/netfront.c 2009-03-30 16:40:17.000000000 +0200
15529 @@ -1464,8 +1464,7 @@ err:
15533 - while ((skb = __skb_dequeue(&errq)))
15535 + __skb_queue_purge(&errq);
15537 while ((skb = __skb_dequeue(&rxq)) != NULL) {
15538 struct page *page = NETFRONT_SKB_CB(skb)->page;
15539 @@ -1630,8 +1629,7 @@ static void netif_release_rx_bufs_flip(s
15543 - while ((skb = __skb_dequeue(&free_list)) != NULL)
15544 - dev_kfree_skb(skb);
15545 + __skb_queue_purge(&free_list);
15547 spin_unlock_bh(&np->rx_lock);
15549 --- sle11-2009-05-14.orig/drivers/xen/privcmd/privcmd.c 2009-03-04 11:28:34.000000000 +0100
15550 +++ sle11-2009-05-14/drivers/xen/privcmd/privcmd.c 2009-03-16 16:38:05.000000000 +0100
15551 @@ -261,15 +261,13 @@ static long privcmd_ioctl(struct file *f
15554 #ifndef HAVE_ARCH_PRIVCMD_MMAP
15555 -static struct page *privcmd_nopage(struct vm_area_struct *vma,
15556 - unsigned long address,
15558 +static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
15560 - return NOPAGE_SIGBUS;
15561 + return VM_FAULT_SIGBUS;
15564 static struct vm_operations_struct privcmd_vm_ops = {
15565 - .nopage = privcmd_nopage
15566 + .fault = privcmd_fault
15569 static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
15570 --- sle11-2009-05-14.orig/drivers/xen/xenbus/xenbus_client.c 2009-03-24 10:12:22.000000000 +0100
15571 +++ sle11-2009-05-14/drivers/xen/xenbus/xenbus_client.c 2009-03-24 10:13:17.000000000 +0100
15572 @@ -442,7 +442,7 @@ int xenbus_map_ring_valloc(struct xenbus
15576 - area = alloc_vm_area(PAGE_SIZE);
15577 + area = xen_alloc_vm_area(PAGE_SIZE);
15581 @@ -452,7 +452,7 @@ int xenbus_map_ring_valloc(struct xenbus
15584 if (op.status != GNTST_okay) {
15585 - free_vm_area(area);
15586 + xen_free_vm_area(area);
15587 xenbus_dev_fatal(dev, op.status,
15588 "mapping in shared page %d from domain %d",
15589 gnt_ref, dev->otherend_id);
15590 @@ -551,7 +551,7 @@ int xenbus_unmap_ring_vfree(struct xenbu
15593 if (op.status == GNTST_okay)
15594 - free_vm_area(area);
15595 + xen_free_vm_area(area);
15597 xenbus_dev_error(dev, op.status,
15598 "unmapping page at handle %d error %d",
15599 --- sle11-2009-05-14.orig/drivers/xen/xenbus/xenbus_probe.c 2009-02-16 16:18:36.000000000 +0100
15600 +++ sle11-2009-05-14/drivers/xen/xenbus/xenbus_probe.c 2009-03-16 16:38:05.000000000 +0100
15601 @@ -173,7 +173,7 @@ static int read_backend_details(struct x
15602 return read_otherend_details(xendev, "backend-id", "backend");
15605 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) && (defined(CONFIG_XEN) || defined(MODULE))
15606 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
15607 static int xenbus_uevent_frontend(struct device *dev, struct kobj_uevent_env *env)
15609 struct xenbus_device *xdev;
15610 @@ -185,8 +185,10 @@ static int xenbus_uevent_frontend(struct
15613 /* stuff we want to pass to /sbin/hotplug */
15614 +#if defined(CONFIG_XEN) || defined(MODULE)
15615 add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype);
15616 add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename);
15618 add_uevent_var(env, "MODALIAS=xen:%s", xdev->devicetype);
15621 @@ -207,10 +209,8 @@ static struct xen_bus_type xenbus_fronte
15622 .probe = xenbus_dev_probe,
15623 .remove = xenbus_dev_remove,
15624 .shutdown = xenbus_dev_shutdown,
15625 -#if defined(CONFIG_XEN) || defined(MODULE)
15626 .uevent = xenbus_uevent_frontend,
15630 #if defined(CONFIG_XEN) || defined(MODULE)
15632 @@ -519,6 +519,15 @@ static ssize_t xendev_show_devtype(struc
15634 DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
15636 +static ssize_t xendev_show_modalias(struct device *dev,
15637 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)
15638 + struct device_attribute *attr,
15642 + return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype);
15644 +DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL);
15646 int xenbus_probe_node(struct xen_bus_type *bus,
15648 @@ -579,10 +588,16 @@ int xenbus_probe_node(struct xen_bus_typ
15650 err = device_create_file(&xendev->dev, &dev_attr_devtype);
15652 - goto fail_remove_file;
15653 + goto fail_remove_nodename;
15655 + err = device_create_file(&xendev->dev, &dev_attr_modalias);
15657 + goto fail_remove_devtype;
15661 +fail_remove_devtype:
15662 + device_remove_file(&xendev->dev, &dev_attr_devtype);
15663 +fail_remove_nodename:
15664 device_remove_file(&xendev->dev, &dev_attr_nodename);
15666 device_unregister(&xendev->dev);
15667 --- sle11-2009-05-14.orig/fs/aio.c 2009-03-24 10:11:37.000000000 +0100
15668 +++ sle11-2009-05-14/fs/aio.c 2009-03-24 10:13:25.000000000 +0100
15669 @@ -1271,6 +1271,7 @@ static void io_destroy(struct kioctx *io
15670 #ifdef CONFIG_EPOLL
15671 /* forget the poll file, but it's up to the user to close it */
15673 + fput(ioctx->file);
15674 ioctx->file->private_data = 0;
15677 @@ -1295,6 +1296,7 @@ static int aio_queue_fd_close(struct ino
15678 spin_lock_irq(&ioctx->ctx_lock);
15680 spin_unlock_irq(&ioctx->ctx_lock);
15685 @@ -1330,16 +1332,17 @@ static const struct file_operations aioq
15687 static int make_aio_fd(struct kioctx *ioctx)
15690 - struct inode *inode;
15694 - error = anon_inode_getfd(&fd, &inode, &file, "[aioq]",
15695 - &aioq_fops, ioctx);
15698 + fd = anon_inode_getfd("[aioq]", &aioq_fops, ioctx);
15702 /* associate the file with the IO context */
15706 file->private_data = ioctx;
15707 ioctx->file = file;
15708 init_waitqueue_head(&ioctx->poll_wait);
15709 --- sle11-2009-05-14.orig/include/asm-x86/dma-mapping.h 2009-05-14 10:56:29.000000000 +0200
15710 +++ sle11-2009-05-14/include/asm-x86/dma-mapping.h 2009-03-16 16:38:05.000000000 +0100
15711 @@ -223,8 +223,13 @@ static inline dma_addr_t dma_map_page(st
15712 struct dma_mapping_ops *ops = get_dma_ops(dev);
15714 BUG_ON(!valid_dma_direction(direction));
15715 +#ifndef CONFIG_XEN
15716 return ops->map_single(dev, page_to_phys(page) + offset,
15719 + return ops->map_single(dev, page_to_pseudophys(page) + offset,
15720 + size, direction);
15724 static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
15725 --- sle11-2009-05-14.orig/include/asm-x86/genapic_64.h 2009-05-14 10:56:29.000000000 +0200
15726 +++ sle11-2009-05-14/include/asm-x86/genapic_64.h 2009-03-16 16:38:05.000000000 +0100
15727 @@ -46,6 +46,7 @@ extern struct genapic apic_x2apic_phys;
15728 extern int acpi_madt_oem_check(char *, char *);
15730 extern void apic_send_IPI_self(int vector);
15731 +#ifndef CONFIG_XEN
15732 enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
15733 extern enum uv_system_type get_uv_system_type(void);
15734 extern int is_uv_system(void);
15735 @@ -55,6 +56,10 @@ DECLARE_PER_CPU(int, x2apic_extra_bits);
15736 extern void uv_cpu_init(void);
15737 extern void uv_system_init(void);
15738 extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
15740 +#define is_uv_system() 0
15741 +#define uv_cpu_init() ((void)0)
15744 extern void setup_apic_routing(void);
15746 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/desc.h 2009-03-16 16:33:40.000000000 +0100
15747 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/desc.h 2009-03-16 16:38:05.000000000 +0100
15748 @@ -64,8 +64,8 @@ static inline struct desc_struct *get_cp
15751 static inline void pack_gate(gate_desc *gate, unsigned char type,
15752 - unsigned long base, unsigned dpl, unsigned flags, unsigned short seg)
15754 + unsigned long base, unsigned dpl, unsigned flags,
15755 + unsigned short seg)
15757 gate->a = (seg << 16) | (base & 0xffff);
15758 gate->b = (base & 0xffff0000) |
15759 @@ -84,22 +84,23 @@ static inline int desc_empty(const void
15760 #define load_TR_desc() native_load_tr_desc()
15761 #define load_gdt(dtr) native_load_gdt(dtr)
15762 #define load_idt(dtr) native_load_idt(dtr)
15763 -#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
15764 -#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
15765 +#define load_tr(tr) asm volatile("ltr %0"::"m" (tr))
15766 +#define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt))
15768 #define store_gdt(dtr) native_store_gdt(dtr)
15769 #define store_idt(dtr) native_store_idt(dtr)
15770 #define store_tr(tr) (tr = native_store_tr())
15771 -#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
15772 +#define store_ldt(ldt) asm("sldt %0":"=m" (ldt))
15774 #define load_TLS(t, cpu) native_load_tls(t, cpu)
15775 #define set_ldt native_set_ldt
15777 -#define write_ldt_entry(dt, entry, desc) \
15778 - native_write_ldt_entry(dt, entry, desc)
15779 -#define write_gdt_entry(dt, entry, desc, type) \
15780 - native_write_gdt_entry(dt, entry, desc, type)
15781 -#define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g)
15782 +#define write_ldt_entry(dt, entry, desc) \
15783 + native_write_ldt_entry(dt, entry, desc)
15784 +#define write_gdt_entry(dt, entry, desc, type) \
15785 + native_write_gdt_entry(dt, entry, desc, type)
15786 +#define write_idt_entry(dt, entry, g) \
15787 + native_write_idt_entry(dt, entry, g)
15789 static inline void native_write_idt_entry(gate_desc *idt, int entry,
15790 const gate_desc *gate)
15791 @@ -138,8 +139,8 @@ static inline void pack_descriptor(struc
15793 desc->a = ((base & 0xffff) << 16) | (limit & 0xffff);
15794 desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
15795 - (limit & 0x000f0000) | ((type & 0xff) << 8) |
15796 - ((flags & 0xf) << 20);
15797 + (limit & 0x000f0000) | ((type & 0xff) << 8) |
15798 + ((flags & 0xf) << 20);
15802 @@ -160,7 +161,6 @@ static inline void set_tssldt_descriptor
15803 desc->base2 = (PTR_MIDDLE(addr) >> 8) & 0xFF;
15804 desc->base3 = PTR_HIGH(addr);
15807 pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
15810 @@ -178,7 +178,8 @@ static inline void __set_tss_desc(unsign
15813 set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
15814 - IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
15815 + IO_BITMAP_OFFSET + IO_BITMAP_BYTES +
15816 + sizeof(unsigned long) - 1);
15817 write_gdt_entry(d, entry, &tss, DESC_TSS);
15820 @@ -187,16 +188,16 @@ static inline void __set_tss_desc(unsign
15821 static inline void native_set_ldt(const void *addr, unsigned int entries)
15823 if (likely(entries == 0))
15824 - __asm__ __volatile__("lldt %w0"::"q" (0));
15825 + asm volatile("lldt %w0"::"q" (0));
15827 unsigned cpu = smp_processor_id();
15830 - set_tssldt_descriptor(&ldt, (unsigned long)addr,
15831 - DESC_LDT, entries * sizeof(ldt) - 1);
15832 + set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
15833 + entries * LDT_ENTRY_SIZE - 1);
15834 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
15836 - __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
15837 + asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
15841 @@ -261,15 +262,15 @@ static inline void xen_load_tls(struct t
15845 -#define _LDT_empty(info) (\
15846 - (info)->base_addr == 0 && \
15847 - (info)->limit == 0 && \
15848 - (info)->contents == 0 && \
15849 - (info)->read_exec_only == 1 && \
15850 - (info)->seg_32bit == 0 && \
15851 - (info)->limit_in_pages == 0 && \
15852 - (info)->seg_not_present == 1 && \
15853 - (info)->useable == 0)
15854 +#define _LDT_empty(info) \
15855 + ((info)->base_addr == 0 && \
15856 + (info)->limit == 0 && \
15857 + (info)->contents == 0 && \
15858 + (info)->read_exec_only == 1 && \
15859 + (info)->seg_32bit == 0 && \
15860 + (info)->limit_in_pages == 0 && \
15861 + (info)->seg_not_present == 1 && \
15862 + (info)->useable == 0)
15864 #ifdef CONFIG_X86_64
15865 #define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
15866 @@ -309,7 +310,7 @@ static inline unsigned long get_desc_lim
15868 #ifndef CONFIG_X86_NO_IDT
15869 static inline void _set_gate(int gate, unsigned type, void *addr,
15870 - unsigned dpl, unsigned ist, unsigned seg)
15871 + unsigned dpl, unsigned ist, unsigned seg)
15874 pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
15875 @@ -393,10 +394,10 @@ static inline void set_system_gate_ist(i
15876 * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
15878 #define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
15879 - movb idx*8+4(gdt), lo_b; \
15880 - movb idx*8+7(gdt), hi_b; \
15881 - shll $16, base; \
15882 - movw idx*8+2(gdt), lo_w;
15883 + movb idx * 8 + 4(gdt), lo_b; \
15884 + movb idx * 8 + 7(gdt), hi_b; \
15885 + shll $16, base; \
15886 + movw idx * 8 + 2(gdt), lo_w;
15889 #endif /* __ASSEMBLY__ */
15890 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/dma-mapping.h 2009-02-16 16:18:36.000000000 +0100
15891 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/dma-mapping.h 2009-03-16 16:38:05.000000000 +0100
15893 -#ifdef CONFIG_X86_32
15894 -# include "dma-mapping_32.h"
15896 -# include "dma-mapping_64.h"
15898 +#ifndef _ASM_DMA_MAPPING_H_
15900 +#include "../../dma-mapping.h"
15903 +address_needs_mapping(struct device *hwdev, dma_addr_t addr)
15905 + dma_addr_t mask = 0xffffffff;
15906 + /* If the device has a mask, use it, otherwise default to 32 bits */
15907 + if (hwdev && hwdev->dma_mask)
15908 + mask = *hwdev->dma_mask;
15909 + return (addr & ~mask) != 0;
15912 +extern int range_straddles_page_boundary(paddr_t p, size_t size);
15914 +#endif /* _ASM_DMA_MAPPING_H_ */
15915 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/dma-mapping_32.h 2009-03-16 16:33:40.000000000 +0100
15916 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
15918 -#ifndef _ASM_I386_DMA_MAPPING_H
15919 -#define _ASM_I386_DMA_MAPPING_H
15922 - * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
15926 -#include <linux/mm.h>
15927 -#include <linux/scatterlist.h>
15928 -#include <asm/cache.h>
15929 -#include <asm/io.h>
15930 -#include <asm/swiotlb.h>
15933 -address_needs_mapping(struct device *hwdev, dma_addr_t addr)
15935 - dma_addr_t mask = 0xffffffff;
15936 - /* If the device has a mask, use it, otherwise default to 32 bits */
15937 - if (hwdev && hwdev->dma_mask)
15938 - mask = *hwdev->dma_mask;
15939 - return (addr & ~mask) != 0;
15942 -extern int range_straddles_page_boundary(paddr_t p, size_t size);
15944 -#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
15945 -#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
15947 -void *dma_alloc_coherent(struct device *dev, size_t size,
15948 - dma_addr_t *dma_handle, gfp_t flag);
15950 -void dma_free_coherent(struct device *dev, size_t size,
15951 - void *vaddr, dma_addr_t dma_handle);
15954 -dma_map_single(struct device *dev, void *ptr, size_t size,
15955 - enum dma_data_direction direction);
15958 -dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
15959 - enum dma_data_direction direction);
15961 -extern int dma_map_sg(struct device *hwdev, struct scatterlist *sg,
15962 - int nents, enum dma_data_direction direction);
15963 -extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg,
15964 - int nents, enum dma_data_direction direction);
15966 -#ifdef CONFIG_HIGHMEM
15968 -dma_map_page(struct device *dev, struct page *page, unsigned long offset,
15969 - size_t size, enum dma_data_direction direction);
15972 -dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
15973 - enum dma_data_direction direction);
15975 -#define dma_map_page(dev, page, offset, size, dir) \
15976 - dma_map_single(dev, page_address(page) + (offset), (size), (dir))
15977 -#define dma_unmap_page dma_unmap_single
15981 -dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
15982 - enum dma_data_direction direction);
15985 -dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
15986 - enum dma_data_direction direction);
15988 -static inline void
15989 -dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
15990 - unsigned long offset, size_t size,
15991 - enum dma_data_direction direction)
15993 - dma_sync_single_for_cpu(dev, dma_handle+offset, size, direction);
15996 -static inline void
15997 -dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
15998 - unsigned long offset, size_t size,
15999 - enum dma_data_direction direction)
16001 - dma_sync_single_for_device(dev, dma_handle+offset, size, direction);
16005 -dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
16006 - enum dma_data_direction direction);
16009 -dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
16010 - enum dma_data_direction direction);
16013 -dma_mapping_error(dma_addr_t dma_addr);
16016 -dma_supported(struct device *dev, u64 mask);
16019 -dma_set_mask(struct device *dev, u64 mask)
16021 - if(!dev->dma_mask || !dma_supported(dev, mask))
16024 - *dev->dma_mask = mask;
16030 -dma_get_cache_alignment(void)
16032 - /* no easy way to get cache size on all x86, so return the
16033 - * maximum possible, to be safe */
16034 - return (1 << INTERNODE_CACHE_SHIFT);
16037 -#define dma_is_consistent(d, h) (1)
16039 -static inline void
16040 -dma_cache_sync(struct device *dev, void *vaddr, size_t size,
16041 - enum dma_data_direction direction)
16043 - flush_write_buffers();
16046 -#define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
16048 -dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
16049 - dma_addr_t device_addr, size_t size, int flags);
16052 -dma_release_declared_memory(struct device *dev);
16055 -dma_mark_declared_memory_occupied(struct device *dev,
16056 - dma_addr_t device_addr, size_t size);
16059 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2009-02-16 16:18:36.000000000 +0100
16060 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
16062 -#ifndef _X8664_DMA_MAPPING_H
16063 -#define _X8664_DMA_MAPPING_H 1
16066 - * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
16070 -#include <linux/scatterlist.h>
16071 -#include <asm/io.h>
16073 -struct dma_mapping_ops {
16074 - int (*mapping_error)(dma_addr_t dma_addr);
16075 - void* (*alloc_coherent)(struct device *dev, size_t size,
16076 - dma_addr_t *dma_handle, gfp_t gfp);
16077 - void (*free_coherent)(struct device *dev, size_t size,
16078 - void *vaddr, dma_addr_t dma_handle);
16079 - dma_addr_t (*map_single)(struct device *hwdev, void *ptr,
16080 - size_t size, int direction);
16081 - /* like map_single, but doesn't check the device mask */
16082 - dma_addr_t (*map_simple)(struct device *hwdev, char *ptr,
16083 - size_t size, int direction);
16084 - void (*unmap_single)(struct device *dev, dma_addr_t addr,
16085 - size_t size, int direction);
16086 - void (*sync_single_for_cpu)(struct device *hwdev,
16087 - dma_addr_t dma_handle, size_t size,
16089 - void (*sync_single_for_device)(struct device *hwdev,
16090 - dma_addr_t dma_handle, size_t size,
16092 - void (*sync_single_range_for_cpu)(struct device *hwdev,
16093 - dma_addr_t dma_handle, unsigned long offset,
16094 - size_t size, int direction);
16095 - void (*sync_single_range_for_device)(struct device *hwdev,
16096 - dma_addr_t dma_handle, unsigned long offset,
16097 - size_t size, int direction);
16098 - void (*sync_sg_for_cpu)(struct device *hwdev,
16099 - struct scatterlist *sg, int nelems,
16101 - void (*sync_sg_for_device)(struct device *hwdev,
16102 - struct scatterlist *sg, int nelems,
16104 - int (*map_sg)(struct device *hwdev, struct scatterlist *sg,
16105 - int nents, int direction);
16106 - void (*unmap_sg)(struct device *hwdev,
16107 - struct scatterlist *sg, int nents,
16109 - int (*dma_supported)(struct device *hwdev, u64 mask);
16113 -extern dma_addr_t bad_dma_address;
16114 -extern const struct dma_mapping_ops* dma_ops;
16115 -extern int iommu_merge;
16118 -static inline int dma_mapping_error(dma_addr_t dma_addr)
16120 - if (dma_ops->mapping_error)
16121 - return dma_ops->mapping_error(dma_addr);
16123 - return (dma_addr == bad_dma_address);
16126 -#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
16127 -#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
16129 -#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
16130 -#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
16132 -extern void *dma_alloc_coherent(struct device *dev, size_t size,
16133 - dma_addr_t *dma_handle, gfp_t gfp);
16134 -extern void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
16135 - dma_addr_t dma_handle);
16137 -static inline dma_addr_t
16138 -dma_map_single(struct device *hwdev, void *ptr, size_t size,
16141 - BUG_ON(!valid_dma_direction(direction));
16142 - return dma_ops->map_single(hwdev, ptr, size, direction);
16145 -static inline void
16146 -dma_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
16149 - BUG_ON(!valid_dma_direction(direction));
16150 - dma_ops->unmap_single(dev, addr, size, direction);
16153 -#define dma_map_page(dev,page,offset,size,dir) \
16154 - dma_map_single((dev), page_address(page)+(offset), (size), (dir))
16156 -#define dma_unmap_page dma_unmap_single
16158 -static inline void
16159 -dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
16160 - size_t size, int direction)
16162 - BUG_ON(!valid_dma_direction(direction));
16163 - if (dma_ops->sync_single_for_cpu)
16164 - dma_ops->sync_single_for_cpu(hwdev, dma_handle, size,
16166 - flush_write_buffers();
16169 -static inline void
16170 -dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle,
16171 - size_t size, int direction)
16173 - BUG_ON(!valid_dma_direction(direction));
16174 - if (dma_ops->sync_single_for_device)
16175 - dma_ops->sync_single_for_device(hwdev, dma_handle, size,
16177 - flush_write_buffers();
16180 -static inline void
16181 -dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
16182 - unsigned long offset, size_t size, int direction)
16184 - BUG_ON(!valid_dma_direction(direction));
16185 - if (dma_ops->sync_single_range_for_cpu) {
16186 - dma_ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, size, direction);
16189 - flush_write_buffers();
16192 -static inline void
16193 -dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle,
16194 - unsigned long offset, size_t size, int direction)
16196 - BUG_ON(!valid_dma_direction(direction));
16197 - if (dma_ops->sync_single_range_for_device)
16198 - dma_ops->sync_single_range_for_device(hwdev, dma_handle,
16199 - offset, size, direction);
16201 - flush_write_buffers();
16204 -static inline void
16205 -dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
16206 - int nelems, int direction)
16208 - BUG_ON(!valid_dma_direction(direction));
16209 - if (dma_ops->sync_sg_for_cpu)
16210 - dma_ops->sync_sg_for_cpu(hwdev, sg, nelems, direction);
16211 - flush_write_buffers();
16214 -static inline void
16215 -dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
16216 - int nelems, int direction)
16218 - BUG_ON(!valid_dma_direction(direction));
16219 - if (dma_ops->sync_sg_for_device) {
16220 - dma_ops->sync_sg_for_device(hwdev, sg, nelems, direction);
16223 - flush_write_buffers();
16227 -dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction)
16229 - BUG_ON(!valid_dma_direction(direction));
16230 - return dma_ops->map_sg(hwdev, sg, nents, direction);
16233 -static inline void
16234 -dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
16237 - BUG_ON(!valid_dma_direction(direction));
16238 - dma_ops->unmap_sg(hwdev, sg, nents, direction);
16241 -extern int dma_supported(struct device *hwdev, u64 mask);
16243 -/* same for gart, swiotlb, and nommu */
16244 -static inline int dma_get_cache_alignment(void)
16246 - return boot_cpu_data.x86_clflush_size;
16249 -#define dma_is_consistent(d, h) 1
16251 -extern int dma_set_mask(struct device *dev, u64 mask);
16253 -static inline void
16254 -dma_cache_sync(struct device *dev, void *vaddr, size_t size,
16255 - enum dma_data_direction dir)
16257 - flush_write_buffers();
16260 -extern struct device fallback_dev;
16261 -extern int panic_on_overflow;
16264 -#endif /* _X8664_DMA_MAPPING_H */
16266 -#include "dma-mapping_32.h"
16267 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/fixmap.h 2009-02-16 16:18:36.000000000 +0100
16268 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/fixmap.h 2009-03-16 16:38:05.000000000 +0100
16270 +#ifndef _ASM_FIXMAP_H
16271 +#define _ASM_FIXMAP_H
16273 #ifdef CONFIG_X86_32
16274 # include "fixmap_32.h"
16276 # include "fixmap_64.h"
16279 +#define clear_fixmap(idx) \
16280 + __set_fixmap(idx, 0, __pgprot(0))
16283 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/fixmap_32.h 2009-03-16 16:33:40.000000000 +0100
16284 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/fixmap_32.h 2009-03-16 16:38:05.000000000 +0100
16286 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
16289 -#ifndef _ASM_FIXMAP_H
16290 -#define _ASM_FIXMAP_H
16291 +#ifndef _ASM_FIXMAP_32_H
16292 +#define _ASM_FIXMAP_32_H
16294 /* used by vmalloc.c, vsyscall.lds.S.
16296 @@ -102,8 +102,7 @@ enum fixed_addresses {
16298 #define NR_FIX_BTMAPS 64
16299 #define FIX_BTMAPS_NESTING 4
16301 - __end_of_permanent_fixed_addresses + 512 -
16302 + FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 512 -
16303 (__end_of_permanent_fixed_addresses & 511),
16304 FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
16306 @@ -114,19 +113,16 @@ enum fixed_addresses {
16309 extern void __set_fixmap(enum fixed_addresses idx,
16310 - maddr_t phys, pgprot_t flags);
16311 + maddr_t phys, pgprot_t flags);
16312 extern void reserve_top_address(unsigned long reserve);
16314 -#define set_fixmap(idx, phys) \
16315 - __set_fixmap(idx, phys, PAGE_KERNEL)
16316 +#define set_fixmap(idx, phys) \
16317 + __set_fixmap(idx, phys, PAGE_KERNEL)
16319 * Some hardware wants to get fixmapped without caching.
16321 -#define set_fixmap_nocache(idx, phys) \
16322 - __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
16324 -#define clear_fixmap(idx) \
16325 - __set_fixmap(idx, 0, __pgprot(0))
16326 +#define set_fixmap_nocache(idx, phys) \
16327 + __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
16329 #define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP)
16331 @@ -159,7 +155,7 @@ static __always_inline unsigned long fix
16332 if (idx >= __end_of_fixed_addresses)
16333 __this_fixmap_does_not_exist();
16335 - return __fix_to_virt(idx);
16336 + return __fix_to_virt(idx);
16339 static inline unsigned long virt_to_fix(const unsigned long vaddr)
16340 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/fixmap_64.h 2009-03-16 16:33:40.000000000 +0100
16341 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/fixmap_64.h 2009-03-16 16:38:05.000000000 +0100
16343 * Copyright (C) 1998 Ingo Molnar
16346 -#ifndef _ASM_FIXMAP_H
16347 -#define _ASM_FIXMAP_H
16348 +#ifndef _ASM_FIXMAP_64_H
16349 +#define _ASM_FIXMAP_64_H
16351 #include <linux/kernel.h>
16352 #include <asm/apicdef.h>
16355 enum fixed_addresses {
16356 VSYSCALL_LAST_PAGE,
16357 - VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
16358 + VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
16359 + + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
16362 FIX_EARLYCON_MEM_BASE,
16363 @@ -45,11 +46,12 @@ enum fixed_addresses {
16366 FIX_IO_APIC_BASE_0,
16367 - FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
16368 + FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
16371 FIX_EFI_IO_MAP_LAST_PAGE,
16372 - FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE+MAX_EFI_IO_PAGES-1,
16373 + FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE
16374 + + MAX_EFI_IO_PAGES - 1,
16378 @@ -79,19 +81,16 @@ enum fixed_addresses {
16379 __end_of_fixed_addresses
16382 -extern void __set_fixmap (enum fixed_addresses idx,
16383 - unsigned long phys, pgprot_t flags);
16384 +extern void __set_fixmap(enum fixed_addresses idx,
16385 + unsigned long phys, pgprot_t flags);
16387 -#define set_fixmap(idx, phys) \
16388 - __set_fixmap(idx, phys, PAGE_KERNEL)
16389 +#define set_fixmap(idx, phys) \
16390 + __set_fixmap(idx, phys, PAGE_KERNEL)
16392 * Some hardware wants to get fixmapped without caching.
16394 -#define set_fixmap_nocache(idx, phys) \
16395 - __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
16397 -#define clear_fixmap(idx) \
16398 - __set_fixmap(idx, 0, __pgprot(0))
16399 +#define set_fixmap_nocache(idx, phys) \
16400 + __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
16402 #define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE)
16403 #define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
16404 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/highmem.h 2009-03-16 16:33:40.000000000 +0100
16405 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/highmem.h 2009-03-16 16:38:05.000000000 +0100
16407 * Gerhard.Wichert@pdb.siemens.de
16410 - * Redesigned the x86 32-bit VM architecture to deal with
16411 + * Redesigned the x86 32-bit VM architecture to deal with
16412 * up to 16 Terabyte physical memory. With current x86 CPUs
16413 * we now support up to 64 Gigabytes physical RAM.
16415 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/io.h 2009-02-16 16:18:36.000000000 +0100
16416 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/io.h 2009-03-16 16:38:05.000000000 +0100
16418 +#ifndef _ASM_X86_IO_H
16419 +#define _ASM_X86_IO_H
16421 +#define ARCH_HAS_IOREMAP_WC
16423 #ifdef CONFIG_X86_32
16424 # include "io_32.h"
16426 # include "io_64.h"
16429 +extern void *xlate_dev_mem_ptr(unsigned long phys);
16430 +extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr);
16432 +extern void map_devmem(unsigned long pfn, unsigned long len, pgprot_t);
16433 +extern void unmap_devmem(unsigned long pfn, unsigned long len, pgprot_t);
16435 +extern int ioremap_check_change_attr(unsigned long mfn, unsigned long size,
16436 + unsigned long prot_val);
16437 +extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size);
16439 +#endif /* _ASM_X86_IO_H */
16440 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/irqflags.h 2009-03-16 16:33:40.000000000 +0100
16441 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/irqflags.h 2009-03-16 16:38:05.000000000 +0100
16442 @@ -137,11 +137,11 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
16443 #endif /* __ASSEMBLY__ */
16445 #ifndef __ASSEMBLY__
16446 -#define raw_local_save_flags(flags) \
16447 - do { (flags) = __raw_local_save_flags(); } while (0)
16448 +#define raw_local_save_flags(flags) \
16449 + do { (flags) = __raw_local_save_flags(); } while (0)
16451 -#define raw_local_irq_save(flags) \
16452 - do { (flags) = __raw_local_irq_save(); } while (0)
16453 +#define raw_local_irq_save(flags) \
16454 + do { (flags) = __raw_local_irq_save(); } while (0)
16456 static inline int raw_irqs_disabled_flags(unsigned long flags)
16458 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/mmu_context_32.h 2009-03-16 16:33:40.000000000 +0100
16459 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/mmu_context_32.h 2009-03-16 16:38:05.000000000 +0100
16460 @@ -94,7 +94,7 @@ static inline void switch_mm(struct mm_s
16461 BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next);
16463 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
16464 - /* We were in lazy tlb mode and leave_mm disabled
16465 + /* We were in lazy tlb mode and leave_mm disabled
16466 * tlb flush IPI delivery. We must reload %cr3.
16468 load_cr3(next->pgd);
16469 @@ -107,10 +107,10 @@ static inline void switch_mm(struct mm_s
16470 #define deactivate_mm(tsk, mm) \
16471 asm("movl %0,%%gs": :"r" (0));
16473 -#define activate_mm(prev, next) \
16475 - xen_activate_mm(prev, next); \
16476 - switch_mm((prev),(next),NULL); \
16478 +#define activate_mm(prev, next) \
16480 + xen_activate_mm(prev, next); \
16481 + switch_mm((prev), (next), NULL); \
16485 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/mmu_context_64.h 2009-03-16 16:33:40.000000000 +0100
16486 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/mmu_context_64.h 2009-03-16 16:38:05.000000000 +0100
16487 @@ -21,7 +21,7 @@ void destroy_context(struct mm_struct *m
16488 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
16490 #if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
16491 - if (read_pda(mmu_state) == TLBSTATE_OK)
16492 + if (read_pda(mmu_state) == TLBSTATE_OK)
16493 write_pda(mmu_state, TLBSTATE_LAZY);
16496 @@ -62,7 +62,7 @@ extern void mm_pin(struct mm_struct *mm)
16497 extern void mm_unpin(struct mm_struct *mm);
16498 void mm_pin_all(void);
16500 -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
16501 +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
16502 struct task_struct *tsk)
16504 unsigned cpu = smp_processor_id();
16505 @@ -106,7 +106,7 @@ static inline void switch_mm(struct mm_s
16506 if (read_pda(active_mm) != next)
16508 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
16509 - /* We were in lazy tlb mode and leave_mm disabled
16510 + /* We were in lazy tlb mode and leave_mm disabled
16511 * tlb flush IPI delivery. We must reload CR3
16512 * to make sure to use no freed page tables.
16514 @@ -118,10 +118,11 @@ static inline void switch_mm(struct mm_s
16518 -#define deactivate_mm(tsk,mm) do { \
16519 - load_gs_index(0); \
16520 - asm volatile("movl %0,%%fs"::"r"(0)); \
16522 +#define deactivate_mm(tsk, mm) \
16524 + load_gs_index(0); \
16525 + asm volatile("movl %0,%%fs"::"r"(0)); \
16528 static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
16530 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/page.h 2009-03-16 16:33:40.000000000 +0100
16531 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/page.h 2009-03-16 16:38:05.000000000 +0100
16533 #define _PAGE_BIT_IO 9
16534 #define _PAGE_IO (_AC(1, L)<<_PAGE_BIT_IO)
16536 -#define PHYSICAL_PAGE_MASK (~(_AT(phys_addr_t, PAGE_SIZE) - 1) & __PHYSICAL_MASK)
16537 -#define PTE_MASK _AT(pteval_t, PHYSICAL_PAGE_MASK)
16538 +#define __PHYSICAL_MASK ((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1)
16539 +#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
16541 +/* Cast PAGE_MASK to a signed type so that it is sign-extended if
16542 + virtual addresses are 32-bits but physical addresses are larger
16543 + (ie, 32-bit PAE). */
16544 +#define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK)
16546 +/* PTE_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
16547 +#define PTE_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
16549 #define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT)
16550 #define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))
16551 @@ -34,19 +42,14 @@
16552 /* to align the pointer to the (next) page boundary */
16553 #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
16555 -#define __PHYSICAL_MASK _AT(phys_addr_t, (_AC(1,ULL) << __PHYSICAL_MASK_SHIFT) - 1)
16556 -#define __VIRTUAL_MASK ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1)
16558 #ifndef __ASSEMBLY__
16559 #include <linux/types.h>
16562 #ifdef CONFIG_X86_64
16563 #include <asm/page_64.h>
16564 -#define max_pfn_mapped end_pfn_map
16566 #include <asm/page_32.h>
16567 -#define max_pfn_mapped max_low_pfn
16568 #endif /* CONFIG_X86_64 */
16570 #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
16572 #ifndef __ASSEMBLY__
16574 extern int page_is_ram(unsigned long pagenr);
16575 +extern int devmem_is_allowed(unsigned long pagenr);
16577 +extern unsigned long max_pfn_mapped;
16581 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/page_64.h 2009-03-16 16:33:40.000000000 +0100
16582 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/page_64.h 2009-03-16 16:38:05.000000000 +0100
16585 #define THREAD_ORDER 1
16586 #define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
16587 -#define CURRENT_MASK (~(THREAD_SIZE-1))
16588 +#define CURRENT_MASK (~(THREAD_SIZE - 1))
16590 #define EXCEPTION_STACK_ORDER 0
16591 #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
16592 @@ -53,10 +53,10 @@
16593 #define __VIRTUAL_MASK_SHIFT 48
16596 - * Kernel image size is limited to 128 MB (see level2_kernel_pgt in
16597 + * Kernel image size is limited to 512 MB (see level2_kernel_pgt in
16598 * arch/x86/kernel/head_64.S), and it is mapped here:
16600 -#define KERNEL_IMAGE_SIZE (128*1024*1024)
16601 +#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
16602 #define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL)
16604 #ifndef __ASSEMBLY__
16605 @@ -64,7 +64,6 @@ void clear_page(void *page);
16606 void copy_page(void *to, void *from);
16608 extern unsigned long end_pfn;
16609 -extern unsigned long end_pfn_map;
16611 static inline unsigned long __phys_addr(unsigned long x)
16613 @@ -89,6 +88,9 @@ typedef union { pteval_t pte; unsigned i
16615 #define vmemmap ((struct page *)VMEMMAP_START)
16617 +extern unsigned long init_memory_mapping(unsigned long start,
16618 + unsigned long end);
16620 #endif /* !__ASSEMBLY__ */
16622 #ifdef CONFIG_FLATMEM
16623 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pci.h 2009-03-16 16:33:40.000000000 +0100
16624 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pci.h 2009-03-16 16:38:05.000000000 +0100
16626 #include <asm/scatterlist.h>
16627 #include <asm/io.h>
16632 struct pci_sysdata {
16633 int domain; /* PCI domain */
16634 int node; /* NUMA node */
16635 #ifdef CONFIG_X86_64
16636 - void* iommu; /* IOMMU private data */
16637 + void *iommu; /* IOMMU private data */
16639 #ifdef CONFIG_XEN_PCIDEV_FRONTEND
16640 struct pcifront_device *pdev;
16641 @@ -23,6 +22,8 @@ struct pci_sysdata {
16644 /* scan a bus after allocating a pci_sysdata for it */
16645 +extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
16647 extern struct pci_bus *pci_scan_bus_with_sysdata(int busno);
16649 static inline int pci_domain_nr(struct pci_bus *bus)
16650 @@ -36,6 +37,7 @@ static inline int pci_proc_domain(struct
16651 return pci_domain_nr(bus);
16654 +extern void pci_iommu_alloc(void);
16656 /* Can be used to override the logic in pci_scan_bus for skipping
16657 already-configured bus numbers - to be used for buggy BIOSes
16658 @@ -57,7 +59,7 @@ extern unsigned long pci_mem_start;
16659 #define PCIBIOS_MIN_CARDBUS_IO 0x4000
16661 void pcibios_config_init(void);
16662 -struct pci_bus * pcibios_scan_root(int bus);
16663 +struct pci_bus *pcibios_scan_root(int bus);
16665 void pcibios_set_master(struct pci_dev *dev);
16666 void pcibios_penalize_isa_irq(int irq, int active);
16667 @@ -67,7 +69,8 @@ int pcibios_set_irq_routing(struct pci_d
16669 #define HAVE_PCI_MMAP
16670 extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
16671 - enum pci_mmap_state mmap_state, int write_combine);
16672 + enum pci_mmap_state mmap_state,
16673 + int write_combine);
16677 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgalloc.h 2009-02-16 16:18:36.000000000 +0100
16678 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgalloc.h 2009-03-16 16:38:05.000000000 +0100
16680 -#ifdef CONFIG_X86_32
16681 -# include "pgalloc_32.h"
16683 -# include "pgalloc_64.h"
16684 +#ifndef _ASM_X86_PGALLOC_H
16685 +#define _ASM_X86_PGALLOC_H
16687 +#include <linux/threads.h>
16688 +#include <linux/mm.h> /* for struct page */
16689 +#include <linux/pagemap.h>
16691 +#include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */
16693 +static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) {}
16694 +static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {}
16695 +static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
16696 + unsigned long start, unsigned long count) {}
16697 +static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) {}
16698 +static inline void paravirt_release_pte(unsigned long pfn) {}
16699 +static inline void paravirt_release_pmd(unsigned long pfn) {}
16700 +static inline void paravirt_release_pud(unsigned long pfn) {}
16702 +#ifdef CONFIG_X86_64
16703 +void early_make_page_readonly(void *va, unsigned int feature);
16704 +pmd_t *early_get_pmd(unsigned long va);
16705 +#define make_lowmem_page_readonly make_page_readonly
16706 +#define make_lowmem_page_writable make_page_writable
16710 + * Allocate and free page tables.
16712 +extern pgd_t *pgd_alloc(struct mm_struct *);
16713 +extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
16715 +extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
16716 +extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
16718 +/* Should really implement gc for free page table pages. This could be
16719 + done with a reference count in struct page. */
16721 +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
16723 + BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
16724 + make_lowmem_page_writable(pte, XENFEAT_writable_page_tables);
16725 + free_page((unsigned long)pte);
16728 +extern void __pte_free(pgtable_t);
16729 +static inline void pte_free(struct mm_struct *mm, struct page *pte)
16734 +extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
16736 +static inline void pmd_populate_kernel(struct mm_struct *mm,
16737 + pmd_t *pmd, pte_t *pte)
16739 + paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT);
16740 + set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
16743 +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
16744 + struct page *pte)
16746 + unsigned long pfn = page_to_pfn(pte);
16748 + paravirt_alloc_pte(mm, pfn);
16749 + if (PagePinned(virt_to_page(mm->pgd))) {
16750 + if (!PageHighMem(pte))
16751 + BUG_ON(HYPERVISOR_update_va_mapping(
16752 + (unsigned long)__va(pfn << PAGE_SHIFT),
16753 + pfn_pte(pfn, PAGE_KERNEL_RO), 0));
16754 +#ifndef CONFIG_X86_64
16755 + else if (!TestSetPagePinned(pte))
16756 + kmap_flush_unused();
16758 + set_pmd(pmd, __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
16760 + *pmd = __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE);
16763 +#define pmd_pgtable(pmd) pmd_page(pmd)
16765 +#if PAGETABLE_LEVELS > 2
16766 +extern pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr);
16767 +extern void __pmd_free(pgtable_t);
16769 +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
16771 + BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
16772 + __pmd_free(virt_to_page(pmd));
16775 +extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
16777 +#ifdef CONFIG_X86_PAE
16778 +extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd);
16779 +#else /* !CONFIG_X86_PAE */
16780 +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
16782 + paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
16783 + if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
16784 + BUG_ON(HYPERVISOR_update_va_mapping(
16785 + (unsigned long)pmd,
16786 + pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT,
16787 + PAGE_KERNEL_RO), 0));
16788 + set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
16790 + *pud = __pud(_PAGE_TABLE | __pa(pmd));
16792 +#endif /* CONFIG_X86_PAE */
16794 +#if PAGETABLE_LEVELS > 3
16795 +#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
16798 + * We need to use the batch mode here, but pgd_pupulate() won't be
16799 + * be called frequently.
16801 +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
16803 + paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
16804 + if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
16805 + BUG_ON(HYPERVISOR_update_va_mapping(
16806 + (unsigned long)pud,
16807 + pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT,
16808 + PAGE_KERNEL_RO), 0));
16809 + set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
16810 + set_pgd(__user_pgd(pgd), __pgd(_PAGE_TABLE | __pa(pud)));
16812 + *(pgd) = __pgd(_PAGE_TABLE | __pa(pud));
16813 + *__user_pgd(pgd) = *(pgd);
16817 +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
16819 + return (pud_t *)pmd_alloc_one(mm, addr);
16822 +static inline void pud_free(struct mm_struct *mm, pud_t *pud)
16824 + BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
16825 + __pmd_free(virt_to_page(pud));
16828 +extern void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
16829 +#endif /* PAGETABLE_LEVELS > 3 */
16830 +#endif /* PAGETABLE_LEVELS > 2 */
16832 +#endif /* _ASM_X86_PGALLOC_H */
16833 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgalloc_32.h 2009-03-16 16:33:40.000000000 +0100
16834 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
16836 -#ifndef _I386_PGALLOC_H
16837 -#define _I386_PGALLOC_H
16839 -#include <linux/threads.h>
16840 -#include <linux/mm.h> /* for struct page */
16841 -#include <linux/pagemap.h>
16842 -#include <asm/tlb.h>
16843 -#include <asm-generic/tlb.h>
16844 -#include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */
16846 -#define paravirt_alloc_pt(mm, pfn) do { } while (0)
16847 -#define paravirt_alloc_pd(mm, pfn) do { } while (0)
16848 -#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0)
16849 -#define paravirt_release_pt(pfn) do { } while (0)
16850 -#define paravirt_release_pd(pfn) do { } while (0)
16852 -static inline void pmd_populate_kernel(struct mm_struct *mm,
16853 - pmd_t *pmd, pte_t *pte)
16855 - paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT);
16856 - set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
16859 -static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
16861 - unsigned long pfn = page_to_pfn(pte);
16863 - paravirt_alloc_pt(mm, pfn);
16864 - if (PagePinned(virt_to_page(mm->pgd))) {
16865 - if (!PageHighMem(pte))
16866 - BUG_ON(HYPERVISOR_update_va_mapping(
16867 - (unsigned long)__va(pfn << PAGE_SHIFT),
16868 - pfn_pte(pfn, PAGE_KERNEL_RO), 0));
16869 - else if (!test_and_set_bit(PG_pinned, &pte->flags))
16870 - kmap_flush_unused();
16871 - set_pmd(pmd, __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
16873 - *pmd = __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE);
16875 -#define pmd_pgtable(pmd) pmd_page(pmd)
16878 - * Allocate and free page tables.
16880 -extern void pgd_test_and_unpin(pgd_t *);
16881 -extern pgd_t *pgd_alloc(struct mm_struct *);
16882 -extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
16884 -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
16885 -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
16887 -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
16889 - make_lowmem_page_writable(pte, XENFEAT_writable_page_tables);
16890 - free_page((unsigned long)pte);
16893 -extern void __pte_free(pgtable_t);
16894 -static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
16900 -extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
16902 -#ifdef CONFIG_X86_PAE
16904 - * In the PAE case we free the pmds as part of the pgd.
16906 -extern pmd_t *pmd_alloc_one(struct mm_struct *, unsigned long);
16908 -extern void __pmd_free(pgtable_t);
16909 -static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
16911 - BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
16912 - __pmd_free(virt_to_page(pmd));
16915 -extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
16917 -static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
16919 - struct page *page = virt_to_page(pmd);
16920 - unsigned long pfn = page_to_pfn(page);
16922 - paravirt_alloc_pd(mm, pfn);
16924 - /* Note: almost everything apart from _PAGE_PRESENT is
16925 - reserved at the pmd (PDPT) level. */
16926 - if (PagePinned(virt_to_page(mm->pgd))) {
16927 - BUG_ON(PageHighMem(page));
16928 - BUG_ON(HYPERVISOR_update_va_mapping(
16929 - (unsigned long)__va(pfn << PAGE_SHIFT),
16930 - pfn_pte(pfn, PAGE_KERNEL_RO), 0));
16931 - set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
16933 - *pudp = __pud(__pa(pmd) | _PAGE_PRESENT);
16936 - * According to Intel App note "TLBs, Paging-Structure Caches,
16937 - * and Their Invalidation", April 2007, document 317080-001,
16938 - * section 8.1: in PAE mode we explicitly have to flush the
16939 - * TLB via cr3 if the top-level pgd is changed...
16941 - if (mm == current->active_mm)
16944 -#endif /* CONFIG_X86_PAE */
16946 -#endif /* _I386_PGALLOC_H */
16947 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgalloc_64.h 2009-03-16 16:33:40.000000000 +0100
16948 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
16950 -#ifndef _X86_64_PGALLOC_H
16951 -#define _X86_64_PGALLOC_H
16953 -#include <asm/pda.h>
16954 -#include <linux/threads.h>
16955 -#include <linux/mm.h>
16956 -#include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */
16958 -pmd_t *early_get_pmd(unsigned long va);
16959 -void early_make_page_readonly(void *va, unsigned int feature);
16961 -#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
16963 -#define pmd_populate_kernel(mm, pmd, pte) \
16964 - set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)))
16966 -static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
16968 - if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
16969 - BUG_ON(HYPERVISOR_update_va_mapping(
16970 - (unsigned long)pmd,
16971 - pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT,
16972 - PAGE_KERNEL_RO), 0));
16973 - set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
16975 - *(pud) = __pud(_PAGE_TABLE | __pa(pmd));
16980 - * We need to use the batch mode here, but pgd_pupulate() won't be
16981 - * be called frequently.
16983 -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
16985 - if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
16986 - BUG_ON(HYPERVISOR_update_va_mapping(
16987 - (unsigned long)pud,
16988 - pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT,
16989 - PAGE_KERNEL_RO), 0));
16990 - set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
16991 - set_pgd(__user_pgd(pgd), __pgd(_PAGE_TABLE | __pa(pud)));
16993 - *(pgd) = __pgd(_PAGE_TABLE | __pa(pud));
16994 - *(__user_pgd(pgd)) = *(pgd);
16998 -#define pmd_pgtable(pmd) pmd_page(pmd)
17000 -static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
17002 - if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
17003 - BUG_ON(HYPERVISOR_update_va_mapping(
17004 - (unsigned long)__va(page_to_pfn(pte) << PAGE_SHIFT),
17005 - pfn_pte(page_to_pfn(pte), PAGE_KERNEL_RO), 0));
17006 - set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)));
17008 - *(pmd) = __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT));
17012 -extern void __pmd_free(pgtable_t);
17013 -static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
17015 - BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
17016 - __pmd_free(virt_to_page(pmd));
17019 -extern pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr);
17021 -static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
17023 - return (pud_t *)pmd_alloc_one(mm, addr);
17026 -static inline void pud_free(struct mm_struct *mm, pud_t *pud)
17028 - BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
17029 - __pmd_free(virt_to_page(pud));
17032 -static inline void pgd_list_add(pgd_t *pgd)
17034 - struct page *page = virt_to_page(pgd);
17035 - unsigned long flags;
17037 - spin_lock_irqsave(&pgd_lock, flags);
17038 - list_add(&page->lru, &pgd_list);
17039 - spin_unlock_irqrestore(&pgd_lock, flags);
17042 -static inline void pgd_list_del(pgd_t *pgd)
17044 - struct page *page = virt_to_page(pgd);
17045 - unsigned long flags;
17047 - spin_lock_irqsave(&pgd_lock, flags);
17048 - list_del(&page->lru);
17049 - spin_unlock_irqrestore(&pgd_lock, flags);
17052 -extern void pgd_test_and_unpin(pgd_t *);
17054 -static inline pgd_t *pgd_alloc(struct mm_struct *mm)
17057 - * We allocate two contiguous pages for kernel and user.
17059 - unsigned boundary;
17060 - pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, 1);
17063 - pgd_list_add(pgd);
17064 - pgd_test_and_unpin(pgd);
17066 - * Copy kernel pointers in from init.
17067 - * Could keep a freelist or slab cache of those because the kernel
17068 - * part never changes.
17070 - boundary = pgd_index(__PAGE_OFFSET);
17071 - memset(pgd, 0, boundary * sizeof(pgd_t));
17072 - memcpy(pgd + boundary,
17073 - init_level4_pgt + boundary,
17074 - (PTRS_PER_PGD - boundary) * sizeof(pgd_t));
17076 - memset(__user_pgd(pgd), 0, PAGE_SIZE); /* clean up user pgd */
17078 - * Set level3_user_pgt for vsyscall area
17080 - __user_pgd(pgd)[pgd_index(VSYSCALL_START)] =
17081 - __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
17085 -static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
17087 - pgd_test_and_unpin(pgd);
17088 - pgd_list_del(pgd);
17089 - free_pages((unsigned long)pgd, 1);
17092 -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
17094 - pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
17096 - make_page_readonly(pte, XENFEAT_writable_page_tables);
17101 -extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
17103 -/* Should really implement gc for free page table pages. This could be
17104 - done with a reference count in struct page. */
17106 -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
17108 - BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
17109 - make_page_writable(pte, XENFEAT_writable_page_tables);
17110 - free_page((unsigned long)pte);
17113 -extern void __pte_free(pgtable_t);
17114 -static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
17119 -#define __pte_free_tlb(tlb,pte) \
17121 - pgtable_page_dtor((pte)); \
17122 - tlb_remove_page((tlb), (pte)); \
17125 -#define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
17126 -#define __pud_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
17128 -#endif /* _X86_64_PGALLOC_H */
17129 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable.h 2009-03-16 16:33:40.000000000 +0100
17130 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable.h 2009-03-16 16:38:05.000000000 +0100
17132 #ifndef _ASM_X86_PGTABLE_H
17133 #define _ASM_X86_PGTABLE_H
17135 -#define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1)
17136 #define FIRST_USER_ADDRESS 0
17138 -#define _PAGE_BIT_PRESENT 0
17139 -#define _PAGE_BIT_RW 1
17140 -#define _PAGE_BIT_USER 2
17141 -#define _PAGE_BIT_PWT 3
17142 -#define _PAGE_BIT_PCD 4
17143 -#define _PAGE_BIT_ACCESSED 5
17144 -#define _PAGE_BIT_DIRTY 6
17145 -#define _PAGE_BIT_FILE 6
17146 +#define _PAGE_BIT_PRESENT 0 /* is present */
17147 +#define _PAGE_BIT_RW 1 /* writeable */
17148 +#define _PAGE_BIT_USER 2 /* userspace addressable */
17149 +#define _PAGE_BIT_PWT 3 /* page write through */
17150 +#define _PAGE_BIT_PCD 4 /* page cache disabled */
17151 +#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */
17152 +#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */
17153 #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
17154 #define _PAGE_BIT_PAT 7 /* on 4KB pages */
17155 #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
17157 #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
17158 #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
17160 +/* If _PAGE_BIT_PRESENT is clear, we use these: */
17162 +/* set: nonlinear file mapping, saved PTE; unset:swap */
17163 +#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY
17165 +/* if the user mapped it with PROT_NONE; pte_present gives true */
17166 +#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
17169 * Note: we use _AC(1, L) instead of _AC(1, UL) so that we get a
17170 * sign-extended value on 32-bit with all 1's in the upper word,
17175 -/* If _PAGE_PRESENT is clear, we use these: */
17176 -#define _PAGE_FILE _PAGE_DIRTY /* nonlinear file mapping, saved PTE; unset:swap */
17177 -#define _PAGE_PROTNONE _PAGE_PSE /* if the user mapped it with PROT_NONE;
17178 - pte_present gives true */
17179 +#define _PAGE_FILE (_AC(1, L)<<_PAGE_BIT_FILE)
17180 +#define _PAGE_PROTNONE (_AC(1, L)<<_PAGE_BIT_PROTNONE)
17182 #ifndef __ASSEMBLY__
17183 #if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT <= 0x030002
17184 @@ -61,20 +65,42 @@ extern unsigned int __kernel_page_user;
17188 -#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
17189 -#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | __kernel_page_user)
17190 +#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
17191 + _PAGE_ACCESSED | _PAGE_DIRTY)
17192 +#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
17193 + _PAGE_DIRTY | __kernel_page_user)
17195 +/* Set of bits not changed in pte_modify */
17196 +#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_CACHE_MASK | _PAGE_IO | \
17197 + _PAGE_ACCESSED | _PAGE_DIRTY)
17199 -#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_IO)
17201 + * PAT settings are part of the hypervisor interface, which sets the
17202 + * MSR to 0x050100070406 (i.e. WB, WT, UC-, UC, WC, WP [, UC, UC]).
17204 +#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT | _PAGE_PAT)
17205 +#define _PAGE_CACHE_WB (0)
17206 +#define _PAGE_CACHE_WT (_PAGE_PWT)
17207 +#define _PAGE_CACHE_WC (_PAGE_PAT)
17208 +#define _PAGE_CACHE_WP (_PAGE_PAT | _PAGE_PWT)
17209 +#define _PAGE_CACHE_UC_MINUS (_PAGE_PCD)
17210 +#define _PAGE_CACHE_UC (_PAGE_PCD | _PAGE_PWT)
17212 #define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
17213 -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
17214 +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
17215 + _PAGE_ACCESSED | _PAGE_NX)
17217 -#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
17218 -#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
17219 -#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
17220 +#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \
17221 + _PAGE_USER | _PAGE_ACCESSED)
17222 +#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
17223 + _PAGE_ACCESSED | _PAGE_NX)
17224 +#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
17226 #define PAGE_COPY PAGE_COPY_NOEXEC
17227 -#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
17228 -#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
17229 +#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \
17230 + _PAGE_ACCESSED | _PAGE_NX)
17231 +#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
17234 #ifdef CONFIG_X86_32
17235 #define _PAGE_KERNEL_EXEC \
17236 @@ -93,6 +119,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KE
17237 #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
17238 #define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
17239 #define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT)
17240 +#define __PAGE_KERNEL_WC (__PAGE_KERNEL | _PAGE_CACHE_WC)
17241 #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
17242 #define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD)
17243 #define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER)
17244 @@ -109,6 +136,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KE
17245 #define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO)
17246 #define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC)
17247 #define PAGE_KERNEL_RX MAKE_GLOBAL(__PAGE_KERNEL_RX)
17248 +#define PAGE_KERNEL_WC MAKE_GLOBAL(__PAGE_KERNEL_WC)
17249 #define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
17250 #define PAGE_KERNEL_UC_MINUS MAKE_GLOBAL(__PAGE_KERNEL_UC_MINUS)
17251 #define PAGE_KERNEL_EXEC_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_EXEC_NOCACHE)
17252 @@ -142,7 +170,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KE
17253 * ZERO_PAGE is a global shared page that is always zero: used
17254 * for zero-mapped memory areas etc..
17256 -extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
17257 +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
17258 #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
17260 extern spinlock_t pgd_lock;
17261 @@ -152,30 +180,111 @@ extern struct list_head pgd_list;
17262 * The following only work if pte_present() is true.
17263 * Undefined behaviour if not..
17265 -static inline int pte_dirty(pte_t pte) { return __pte_val(pte) & _PAGE_DIRTY; }
17266 -static inline int pte_young(pte_t pte) { return __pte_val(pte) & _PAGE_ACCESSED; }
17267 -static inline int pte_write(pte_t pte) { return __pte_val(pte) & _PAGE_RW; }
17268 -static inline int pte_file(pte_t pte) { return __pte_val(pte) & _PAGE_FILE; }
17269 -static inline int pte_huge(pte_t pte) { return __pte_val(pte) & _PAGE_PSE; }
17270 -static inline int pte_global(pte_t pte) { return 0; }
17271 -static inline int pte_exec(pte_t pte) { return !(__pte_val(pte) & _PAGE_NX); }
17273 -static inline int pmd_large(pmd_t pte) {
17274 - return (__pmd_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
17275 - (_PAGE_PSE|_PAGE_PRESENT);
17278 -static inline pte_t pte_mkclean(pte_t pte) { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_DIRTY); }
17279 -static inline pte_t pte_mkold(pte_t pte) { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED); }
17280 -static inline pte_t pte_wrprotect(pte_t pte) { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_RW); }
17281 -static inline pte_t pte_mkexec(pte_t pte) { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_NX); }
17282 -static inline pte_t pte_mkdirty(pte_t pte) { return __pte_ma(__pte_val(pte) | _PAGE_DIRTY); }
17283 -static inline pte_t pte_mkyoung(pte_t pte) { return __pte_ma(__pte_val(pte) | _PAGE_ACCESSED); }
17284 -static inline pte_t pte_mkwrite(pte_t pte) { return __pte_ma(__pte_val(pte) | _PAGE_RW); }
17285 -static inline pte_t pte_mkhuge(pte_t pte) { return __pte_ma(__pte_val(pte) | _PAGE_PSE); }
17286 -static inline pte_t pte_clrhuge(pte_t pte) { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_PSE); }
17287 -static inline pte_t pte_mkglobal(pte_t pte) { return pte; }
17288 -static inline pte_t pte_clrglobal(pte_t pte) { return pte; }
17289 +static inline int pte_dirty(pte_t pte)
17291 + return __pte_val(pte) & _PAGE_DIRTY;
17294 +static inline int pte_young(pte_t pte)
17296 + return __pte_val(pte) & _PAGE_ACCESSED;
17299 +static inline int pte_write(pte_t pte)
17301 + return __pte_val(pte) & _PAGE_RW;
17304 +static inline int pte_file(pte_t pte)
17306 + return __pte_val(pte) & _PAGE_FILE;
17309 +static inline int pte_huge(pte_t pte)
17311 + return __pte_val(pte) & _PAGE_PSE;
17314 +static inline int pte_global(pte_t pte)
17319 +static inline int pte_exec(pte_t pte)
17321 + return !(__pte_val(pte) & _PAGE_NX);
17324 +static inline int pte_special(pte_t pte)
17329 +static inline int pmd_large(pmd_t pte)
17331 + return (__pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
17332 + (_PAGE_PSE | _PAGE_PRESENT);
17335 +static inline pte_t pte_mkclean(pte_t pte)
17337 + return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_DIRTY);
17340 +static inline pte_t pte_mkold(pte_t pte)
17342 + return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED);
17345 +static inline pte_t pte_wrprotect(pte_t pte)
17347 + return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_RW);
17350 +static inline pte_t pte_mkexec(pte_t pte)
17352 + return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_NX);
17355 +static inline pte_t pte_mkdirty(pte_t pte)
17357 + return __pte_ma(__pte_val(pte) | _PAGE_DIRTY);
17360 +static inline pte_t pte_mkyoung(pte_t pte)
17362 + return __pte_ma(__pte_val(pte) | _PAGE_ACCESSED);
17365 +static inline pte_t pte_mkwrite(pte_t pte)
17367 + return __pte_ma(__pte_val(pte) | _PAGE_RW);
17370 +static inline pte_t pte_mkhuge(pte_t pte)
17372 + return __pte_ma(__pte_val(pte) | _PAGE_PSE);
17375 +static inline pte_t pte_clrhuge(pte_t pte)
17377 + return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_PSE);
17380 +static inline pte_t pte_mkglobal(pte_t pte)
17385 +static inline pte_t pte_clrglobal(pte_t pte)
17390 +static inline pte_t pte_mkspecial(pte_t pte)
17395 extern pteval_t __supported_pte_mask;
17397 @@ -202,15 +311,33 @@ static inline pte_t pte_modify(pte_t pte
17398 pteval_t val = pte_val(pte);
17400 val &= _PAGE_CHG_MASK;
17401 - val |= pgprot_val(newprot) & __supported_pte_mask;
17402 + val |= pgprot_val(newprot) & (~_PAGE_CHG_MASK) & __supported_pte_mask;
17407 -#define pte_pgprot(x) __pgprot(pte_val(x) & (0xfff | _PAGE_NX))
17408 +/* mprotect needs to preserve PAT bits when updating vm_page_prot */
17409 +#define pgprot_modify pgprot_modify
17410 +static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
17412 + pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK;
17413 + pgprotval_t addbits = pgprot_val(newprot);
17414 + return __pgprot(preservebits | addbits);
17417 +#define pte_pgprot(x) __pgprot(__pte_val(x) & ~PTE_MASK)
17419 #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
17421 +#ifndef __ASSEMBLY__
17422 +#define __HAVE_PHYS_MEM_ACCESS_PROT
17424 +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
17425 + unsigned long size, pgprot_t vma_prot);
17426 +int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
17427 + unsigned long size, pgprot_t *vma_prot);
17430 #define set_pte(ptep, pte) xen_set_pte(ptep, pte)
17431 #define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte)
17433 @@ -246,6 +373,9 @@ static inline pte_t pte_modify(pte_t pte
17434 # include "pgtable_64.h"
17437 +#define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET)
17438 +#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
17440 #ifndef __ASSEMBLY__
17443 @@ -312,46 +442,17 @@ static inline void xen_pte_clear(struct
17444 * bit at the same time.
17446 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
17447 -#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
17449 - int __changed = !pte_same(*(ptep), entry); \
17450 - if (__changed && (dirty)) { \
17451 - if ( likely((vma)->vm_mm == current->mm) ) { \
17452 - BUG_ON(HYPERVISOR_update_va_mapping(address, \
17454 - (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
17455 - UVMF_INVLPG|UVMF_MULTI)); \
17457 - xen_l1_entry_update(ptep, entry); \
17458 - flush_tlb_page(vma, address); \
17463 +extern int ptep_set_access_flags(struct vm_area_struct *vma,
17464 + unsigned long address, pte_t *ptep,
17465 + pte_t entry, int dirty);
17467 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
17468 -#define ptep_test_and_clear_young(vma, addr, ptep) ({ \
17470 - if (pte_young(*(ptep))) \
17471 - __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \
17474 - pte_update((vma)->vm_mm, addr, ptep); \
17477 +extern int ptep_test_and_clear_young(struct vm_area_struct *vma,
17478 + unsigned long addr, pte_t *ptep);
17480 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
17481 -#define ptep_clear_flush_young(vma, address, ptep) \
17483 - pte_t __pte = *(ptep); \
17484 - int __young = pte_young(__pte); \
17485 - __pte = pte_mkold(__pte); \
17486 - if (PagePinned(virt_to_page((vma)->vm_mm->pgd))) \
17487 - (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \
17488 - else if (__young) \
17489 - (ptep)->pte_low = __pte.pte_low; \
17492 +extern int ptep_clear_flush_young(struct vm_area_struct *vma,
17493 + unsigned long address, pte_t *ptep);
17495 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
17496 #define ptep_clear_flush(vma, addr, ptep) \
17497 @@ -370,7 +471,8 @@ static inline void xen_pte_clear(struct
17500 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
17501 -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
17502 +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
17507 @@ -398,13 +500,29 @@ static inline pte_t ptep_get_and_clear(s
17508 pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *, unsigned long, pte_t *, int);
17510 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
17511 -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
17512 +static inline void ptep_set_wrprotect(struct mm_struct *mm,
17513 + unsigned long addr, pte_t *ptep)
17516 if (pte_write(pte))
17517 set_pte_at(mm, addr, ptep, pte_wrprotect(pte));
17521 + * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
17523 + * dst - pointer to pgd range anwhere on a pgd page
17525 + * count - the number of pgds to copy.
17527 + * dst and src can be on the same page, but the range must not overlap,
17528 + * and must not cross a page boundary.
17530 +static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
17532 + memcpy(dst, src, count * sizeof(pgd_t));
17535 #define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
17536 xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
17538 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h 2009-03-16 16:33:40.000000000 +0100
17539 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable-3level.h 2009-03-16 16:38:05.000000000 +0100
17541 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
17544 -#define pte_ERROR(e) \
17545 - printk("%s:%d: bad pte %p(%016Lx pfn %08lx).\n", __FILE__, __LINE__, \
17546 - &(e), __pte_val(e), pte_pfn(e))
17547 -#define pmd_ERROR(e) \
17548 - printk("%s:%d: bad pmd %p(%016Lx pfn %08Lx).\n", __FILE__, __LINE__, \
17549 - &(e), __pmd_val(e), (pmd_val(e) & PTE_MASK) >> PAGE_SHIFT)
17550 -#define pgd_ERROR(e) \
17551 - printk("%s:%d: bad pgd %p(%016Lx pfn %08Lx).\n", __FILE__, __LINE__, \
17552 - &(e), __pgd_val(e), (pgd_val(e) & PTE_MASK) >> PAGE_SHIFT)
17554 +#define pte_ERROR(e) \
17555 + printk("%s:%d: bad pte %p(%016Lx pfn %08lx).\n", \
17556 + __FILE__, __LINE__, &(e), __pte_val(e), pte_pfn(e))
17557 +#define pmd_ERROR(e) \
17558 + printk("%s:%d: bad pmd %p(%016Lx pfn %08Lx).\n", \
17559 + __FILE__, __LINE__, &(e), __pmd_val(e), \
17560 + (pmd_val(e) & PTE_MASK) >> PAGE_SHIFT)
17561 +#define pgd_ERROR(e) \
17562 + printk("%s:%d: bad pgd %p(%016Lx pfn %08Lx).\n", \
17563 + __FILE__, __LINE__, &(e), __pgd_val(e), \
17564 + (pgd_val(e) & PTE_MASK) >> PAGE_SHIFT)
17566 static inline int pud_none(pud_t pud)
17568 return __pud_val(pud) == 0;
17571 static inline int pud_bad(pud_t pud)
17573 return (__pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0;
17576 static inline int pud_present(pud_t pud)
17578 return __pud_val(pud) & _PAGE_PRESENT;
17579 @@ -48,12 +51,14 @@ static inline void xen_set_pte(pte_t *pt
17581 static inline void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
17583 - set_64bit((unsigned long long *)(ptep),__pte_val(pte));
17584 + set_64bit((unsigned long long *)(ptep), __pte_val(pte));
17587 static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd)
17589 xen_l2_entry_update(pmdp, pmd);
17592 static inline void xen_set_pud(pud_t *pudp, pud_t pud)
17594 xen_l3_entry_update(pudp, pud);
17595 @@ -92,20 +97,19 @@ static inline void pud_clear(pud_t *pudp
17596 * current pgd to avoid unnecessary TLB flushes.
17599 - if (__pa(pudp) >= pgd && __pa(pudp) < (pgd + sizeof(pgd_t)*PTRS_PER_PGD))
17600 + if (__pa(pudp) >= pgd && __pa(pudp) <
17601 + (pgd + sizeof(pgd_t)*PTRS_PER_PGD))
17605 -#define pud_page(pud) \
17606 -((struct page *) __va(pud_val(pud) & PAGE_MASK))
17607 +#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_MASK))
17609 -#define pud_page_vaddr(pud) \
17610 -((unsigned long) __va(pud_val(pud) & PAGE_MASK))
17611 +#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_MASK))
17614 /* Find an entry in the second-level page table.. */
17615 -#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
17616 - pmd_index(address))
17617 +#define pmd_offset(pud, address) ((pmd_t *)pud_page(*(pud)) + \
17618 + pmd_index(address))
17621 static inline pte_t xen_ptep_get_and_clear(pte_t *ptep, pte_t res)
17622 @@ -150,7 +154,8 @@ static inline int pte_none(pte_t pte)
17623 * put the 32 bits of offset into the high part.
17625 #define pte_to_pgoff(pte) ((pte).pte_high)
17626 -#define pgoff_to_pte(off) ((pte_t) { { .pte_low = _PAGE_FILE, .pte_high = (off) } })
17627 +#define pgoff_to_pte(off) \
17628 + ((pte_t) { { .pte_low = _PAGE_FILE, .pte_high = (off) } })
17629 #define PTE_FILE_MAX_BITS 32
17631 /* Encode and de-code a swap entry */
17632 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable_32.h 2009-03-16 16:33:40.000000000 +0100
17633 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable_32.h 2009-03-16 16:38:05.000000000 +0100
17634 @@ -38,16 +38,13 @@ void paging_init(void);
17635 #ifdef CONFIG_X86_PAE
17636 # include <asm/pgtable-3level-defs.h>
17637 # define PMD_SIZE (1UL << PMD_SHIFT)
17638 -# define PMD_MASK (~(PMD_SIZE-1))
17639 +# define PMD_MASK (~(PMD_SIZE - 1))
17641 # include <asm/pgtable-2level-defs.h>
17644 #define PGDIR_SIZE (1UL << PGDIR_SHIFT)
17645 -#define PGDIR_MASK (~(PGDIR_SIZE-1))
17647 -#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
17648 -#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
17649 +#define PGDIR_MASK (~(PGDIR_SIZE - 1))
17651 /* Just any arbitrary offset to the start of the vmalloc VM area: the
17652 * current 8MB value just means that there will be a 8MB "hole" after the
17653 @@ -56,21 +53,22 @@ void paging_init(void);
17654 * The vmalloc() routines leaves a hole of 4kB between each vmalloced
17655 * area for the same reason. ;)
17657 -#define VMALLOC_OFFSET (8*1024*1024)
17658 -#define VMALLOC_START (((unsigned long) high_memory + \
17659 - 2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1))
17660 +#define VMALLOC_OFFSET (8 * 1024 * 1024)
17661 +#define VMALLOC_START (((unsigned long)high_memory + 2 * VMALLOC_OFFSET - 1) \
17662 + & ~(VMALLOC_OFFSET - 1))
17663 #ifdef CONFIG_X86_PAE
17664 #define LAST_PKMAP 512
17666 #define LAST_PKMAP 1024
17669 -#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE*(LAST_PKMAP + 1)) & PMD_MASK)
17670 +#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE * (LAST_PKMAP + 1)) \
17673 #ifdef CONFIG_HIGHMEM
17674 -# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE)
17675 +# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
17677 -# define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE)
17678 +# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE)
17682 @@ -91,10 +89,10 @@ extern unsigned long pg0[];
17683 /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
17684 can temporarily clear it. */
17685 #define pmd_present(x) (__pmd_val(x))
17686 -#define pmd_bad(x) ((__pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
17687 +#define pmd_bad(x) ((__pmd_val(x) & (~PTE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
17689 #define pmd_present(x) (__pmd_val(x) & _PAGE_PRESENT)
17690 -#define pmd_bad(x) ((__pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
17691 +#define pmd_bad(x) ((__pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
17695 @@ -107,32 +105,18 @@ extern unsigned long pg0[];
17699 - * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
17701 - * dst - pointer to pgd range anwhere on a pgd page
17703 - * count - the number of pgds to copy.
17705 - * dst and src can be on the same page, but the range must not overlap,
17706 - * and must not cross a page boundary.
17707 + * Macro to mark a page protection value as "uncacheable".
17708 + * On processors which do not support it, this is a no-op.
17710 -static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
17712 - memcpy(dst, src, count * sizeof(pgd_t));
17716 - * Macro to mark a page protection value as "uncacheable". On processors which do not support
17717 - * it, this is a no-op.
17719 -#define pgprot_noncached(prot) ((boot_cpu_data.x86 > 3) \
17720 - ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) : (prot))
17721 +#define pgprot_noncached(prot) \
17722 + ((boot_cpu_data.x86 > 3) \
17723 + ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) \
17727 * Conversion functions: convert a page and protection to a page entry,
17728 * and a page entry and page directory to the page they refer to.
17731 #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
17734 @@ -141,20 +125,20 @@ static inline void clone_pgd_range(pgd_t
17735 * this macro returns the index of the entry in the pgd page which would
17736 * control the given virtual address
17738 -#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
17739 -#define pgd_index_k(addr) pgd_index(addr)
17740 +#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
17741 +#define pgd_index_k(addr) pgd_index((addr))
17744 * pgd_offset() returns a (pgd_t *)
17745 * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
17747 -#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
17748 +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
17751 * a shortcut which implies the use of the kernel's pgd, instead
17754 -#define pgd_offset_k(address) pgd_offset(&init_mm, address)
17755 +#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
17757 static inline int pud_large(pud_t pud) { return 0; }
17759 @@ -164,8 +148,8 @@ static inline int pud_large(pud_t pud) {
17760 * this macro returns the index of the entry in the pmd page which would
17761 * control the given virtual address
17763 -#define pmd_index(address) \
17764 - (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
17765 +#define pmd_index(address) \
17766 + (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
17769 * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
17770 @@ -173,33 +157,36 @@ static inline int pud_large(pud_t pud) {
17771 * this macro returns the index of the entry in the pte page which would
17772 * control the given virtual address
17774 -#define pte_index(address) \
17775 - (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
17776 -#define pte_offset_kernel(dir, address) \
17777 - ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
17778 +#define pte_index(address) \
17779 + (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
17780 +#define pte_offset_kernel(dir, address) \
17781 + ((pte_t *)pmd_page_vaddr(*(dir)) + pte_index((address)))
17783 -#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
17784 +#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
17786 -#define pmd_page_vaddr(pmd) \
17787 - ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
17788 +#define pmd_page_vaddr(pmd) \
17789 + ((unsigned long)__va(pmd_val((pmd)) & PTE_MASK))
17791 #if defined(CONFIG_HIGHPTE)
17792 -#define pte_offset_map(dir, address) \
17793 - ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
17794 -#define pte_offset_map_nested(dir, address) \
17795 - ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address))
17796 -#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
17797 -#define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
17799 -#define pte_offset_map(dir, address) \
17800 - ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address))
17801 -#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
17802 +#define pte_offset_map(dir, address) \
17803 + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \
17804 + pte_index((address)))
17805 +#define pte_offset_map_nested(dir, address) \
17806 + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \
17807 + pte_index((address)))
17808 +#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0)
17809 +#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
17811 +#define pte_offset_map(dir, address) \
17812 + ((pte_t *)page_address(pmd_page(*(dir))) + pte_index((address)))
17813 +#define pte_offset_map_nested(dir, address) pte_offset_map((dir), (address))
17814 #define pte_unmap(pte) do { } while (0)
17815 #define pte_unmap_nested(pte) do { } while (0)
17818 /* Clear a kernel PTE and flush it from the TLB */
17819 -#define kpte_clear_flush(ptep, vaddr) do { \
17820 +#define kpte_clear_flush(ptep, vaddr) \
17822 if (HYPERVISOR_update_va_mapping(vaddr, __pte(0), UVMF_INVLPG)) \
17825 @@ -208,7 +195,7 @@ static inline int pud_large(pud_t pud) {
17826 * The i386 doesn't have any external MMU info: the kernel page
17827 * tables contain all the necessary information.
17829 -#define update_mmu_cache(vma,address,pte) do { } while (0)
17830 +#define update_mmu_cache(vma, address, pte) do { } while (0)
17832 void make_lowmem_page_readonly(void *va, unsigned int feature);
17833 void make_lowmem_page_writable(void *va, unsigned int feature);
17834 @@ -225,7 +212,7 @@ void make_lowmem_page_writable(void *va,
17835 #define kern_addr_valid(kaddr) (0)
17838 -#define io_remap_pfn_range(vma,from,pfn,size,prot) \
17839 -direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO)
17840 +#define io_remap_pfn_range(vma, from, pfn, size, prot) \
17841 + direct_remap_pfn_range(vma, from, pfn, size, prot, DOMID_IO)
17843 #endif /* _I386_PGTABLE_H */
17844 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-03-16 16:33:40.000000000 +0100
17845 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-03-16 16:38:05.000000000 +0100
17846 @@ -31,7 +31,7 @@ extern void paging_init(void);
17848 #endif /* !__ASSEMBLY__ */
17850 -#define SHARED_KERNEL_PMD 1
17851 +#define SHARED_KERNEL_PMD 0
17854 * PGDIR_SHIFT determines what a top-level page table entry can map
17855 @@ -59,18 +59,20 @@ extern void paging_init(void);
17857 #ifndef __ASSEMBLY__
17859 -#define pte_ERROR(e) \
17860 - printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
17861 - &(e), __pte_val(e), pte_pfn(e))
17862 -#define pmd_ERROR(e) \
17863 - printk("%s:%d: bad pmd %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
17864 - &(e), __pmd_val(e), pmd_pfn(e))
17865 -#define pud_ERROR(e) \
17866 - printk("%s:%d: bad pud %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
17867 - &(e), __pud_val(e), (pud_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
17868 -#define pgd_ERROR(e) \
17869 - printk("%s:%d: bad pgd %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
17870 - &(e), __pgd_val(e), (pgd_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
17871 +#define pte_ERROR(e) \
17872 + printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n", \
17873 + __FILE__, __LINE__, &(e), __pte_val(e), pte_pfn(e))
17874 +#define pmd_ERROR(e) \
17875 + printk("%s:%d: bad pmd %p(%016lx pfn %010lx).\n", \
17876 + __FILE__, __LINE__, &(e), __pmd_val(e), pmd_pfn(e))
17877 +#define pud_ERROR(e) \
17878 + printk("%s:%d: bad pud %p(%016lx pfn %010lx).\n", \
17879 + __FILE__, __LINE__, &(e), __pud_val(e), \
17880 + (pud_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
17881 +#define pgd_ERROR(e) \
17882 + printk("%s:%d: bad pgd %p(%016lx pfn %010lx).\n", \
17883 + __FILE__, __LINE__, &(e), __pgd_val(e), \
17884 + (pgd_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
17886 #define pgd_none(x) (!__pgd_val(x))
17887 #define pud_none(x) (!__pud_val(x))
17888 @@ -125,7 +127,7 @@ static inline void xen_set_pgd(pgd_t *pg
17889 xen_l4_entry_update(pgdp, pgd);
17892 -static inline void xen_pgd_clear(pgd_t * pgd)
17893 +static inline void xen_pgd_clear(pgd_t *pgd)
17895 xen_set_pgd(pgd, xen_make_pgd(0));
17896 xen_set_pgd(__user_pgd(pgd), xen_make_pgd(0));
17897 @@ -135,43 +137,43 @@ static inline void xen_pgd_clear(pgd_t *
17899 #endif /* !__ASSEMBLY__ */
17901 -#define PMD_SIZE (_AC(1,UL) << PMD_SHIFT)
17902 -#define PMD_MASK (~(PMD_SIZE-1))
17903 -#define PUD_SIZE (_AC(1,UL) << PUD_SHIFT)
17904 -#define PUD_MASK (~(PUD_SIZE-1))
17905 -#define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT)
17906 -#define PGDIR_MASK (~(PGDIR_SIZE-1))
17907 +#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
17908 +#define PMD_MASK (~(PMD_SIZE - 1))
17909 +#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
17910 +#define PUD_MASK (~(PUD_SIZE - 1))
17911 +#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
17912 +#define PGDIR_MASK (~(PGDIR_SIZE - 1))
17915 -#define MAXMEM _AC(0x3fffffffffff, UL)
17916 +#define MAXMEM _AC(0x00003fffffffffff, UL)
17917 #define VMALLOC_START _AC(0xffffc20000000000, UL)
17918 #define VMALLOC_END _AC(0xffffe1ffffffffff, UL)
17919 #define VMEMMAP_START _AC(0xffffe20000000000, UL)
17920 -#define MODULES_VADDR _AC(0xffffffff88000000, UL)
17921 +#define MODULES_VADDR _AC(0xffffffffa0000000, UL)
17922 #define MODULES_END _AC(0xfffffffffff00000, UL)
17923 #define MODULES_LEN (MODULES_END - MODULES_VADDR)
17925 #ifndef __ASSEMBLY__
17927 -static inline unsigned long pgd_bad(pgd_t pgd)
17928 +static inline int pgd_bad(pgd_t pgd)
17930 - return __pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
17931 + return (__pgd_val(pgd) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
17934 -static inline unsigned long pud_bad(pud_t pud)
17935 +static inline int pud_bad(pud_t pud)
17937 - return __pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
17938 + return (__pud_val(pud) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
17941 -static inline unsigned long pmd_bad(pmd_t pmd)
17942 +static inline int pmd_bad(pmd_t pmd)
17944 - return __pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
17945 + return (__pmd_val(pmd) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
17948 #define pte_none(x) (!(x).pte)
17949 #define pte_present(x) ((x).pte & (_PAGE_PRESENT | _PAGE_PROTNONE))
17951 -#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) /* FIXME: is this right? */
17952 +#define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */
17954 #define __pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT)
17955 #define pte_mfn(_pte) ((_pte).pte & _PAGE_PRESENT ? \
17956 @@ -181,13 +183,13 @@ static inline unsigned long pmd_bad(pmd_
17957 mfn_to_local_pfn(__pte_mfn(_pte)) : \
17960 -#define pte_page(x) pfn_to_page(pte_pfn(x))
17961 +#define pte_page(x) pfn_to_page(pte_pfn((x)))
17964 * Macro to mark a page protection value as "uncacheable".
17966 -#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT))
17968 +#define pgprot_noncached(prot) \
17969 + (__pgprot(pgprot_val((prot)) | _PAGE_PCD | _PAGE_PWT))
17972 * Conversion functions: convert a page and protection to a page entry,
17973 @@ -197,36 +199,39 @@ static inline unsigned long pmd_bad(pmd_
17977 -#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
17978 -#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT))
17979 -#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
17980 -#define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
17981 -#define pgd_offset_k(address) (init_level4_pgt + pgd_index(address))
17982 +#define pgd_page_vaddr(pgd) \
17983 + ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_MASK))
17984 +#define pgd_page(pgd) (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT))
17985 +#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
17986 +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
17987 +#define pgd_offset_k(address) (init_level4_pgt + pgd_index((address)))
17988 #define pgd_present(pgd) (__pgd_val(pgd) & _PAGE_PRESENT)
17989 static inline int pgd_large(pgd_t pgd) { return 0; }
17990 #define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE)
17992 /* PUD - Level3 access */
17993 /* to find an entry in a page-table-directory. */
17994 -#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
17995 -#define pud_page(pud) (pfn_to_page(pud_val(pud) >> PAGE_SHIFT))
17996 -#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
17997 -#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address))
17998 +#define pud_page_vaddr(pud) \
17999 + ((unsigned long)__va(pud_val((pud)) & PHYSICAL_PAGE_MASK))
18000 +#define pud_page(pud) (pfn_to_page(pud_val((pud)) >> PAGE_SHIFT))
18001 +#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
18002 +#define pud_offset(pgd, address) \
18003 + ((pud_t *)pgd_page_vaddr(*(pgd)) + pud_index((address)))
18004 #define pud_present(pud) (__pud_val(pud) & _PAGE_PRESENT)
18006 static inline int pud_large(pud_t pte)
18008 - return (__pud_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
18009 - (_PAGE_PSE|_PAGE_PRESENT);
18010 + return (__pud_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
18011 + (_PAGE_PSE | _PAGE_PRESENT);
18014 /* PMD - Level 2 access */
18015 -#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
18016 -#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
18017 +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_MASK))
18018 +#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
18020 -#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
18021 -#define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \
18022 - pmd_index(address))
18023 +#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
18024 +#define pmd_offset(dir, address) ((pmd_t *)pud_page_vaddr(*(dir)) + \
18025 + pmd_index(address))
18026 #define pmd_none(x) (!__pmd_val(x))
18027 #if CONFIG_XEN_COMPAT <= 0x030002
18028 /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
18029 @@ -235,43 +240,56 @@ static inline int pud_large(pud_t pte)
18031 #define pmd_present(x) (__pmd_val(x) & _PAGE_PRESENT)
18033 -#define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
18034 -#define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
18035 +#define pfn_pmd(nr, prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val((prot))))
18036 +#define pmd_pfn(x) ((pmd_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT)
18038 #define pte_to_pgoff(pte) ((__pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
18039 -#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | _PAGE_FILE })
18040 +#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | \
18042 #define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT
18044 /* PTE - Level 1 access. */
18046 /* page, protection -> pte */
18047 -#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
18049 -#define pte_index(address) \
18050 - (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
18051 +#define mk_pte(page, pgprot) pfn_pte(page_to_pfn((page)), (pgprot))
18053 +#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
18054 #define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
18055 - pte_index(address))
18056 + pte_index((address)))
18058 /* x86-64 always has all page tables mapped. */
18059 -#define pte_offset_map(dir,address) pte_offset_kernel(dir,address)
18060 -#define pte_offset_map_nested(dir,address) pte_offset_kernel(dir,address)
18061 +#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
18062 +#define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address))
18063 #define pte_unmap(pte) /* NOP */
18064 -#define pte_unmap_nested(pte) /* NOP */
18065 +#define pte_unmap_nested(pte) /* NOP */
18067 +#define update_mmu_cache(vma, address, pte) do { } while (0)
18069 -#define update_mmu_cache(vma,address,pte) do { } while (0)
18070 +#define direct_gbpages 0
18072 /* Encode and de-code a swap entry */
18073 -#define __swp_type(x) (((x).val >> 1) & 0x3f)
18074 -#define __swp_offset(x) ((x).val >> 8)
18075 -#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
18076 +#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
18077 +#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
18078 +#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
18080 +#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1)
18081 +#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1)
18084 +#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \
18085 + & ((1U << SWP_TYPE_BITS) - 1))
18086 +#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT)
18087 +#define __swp_entry(type, offset) ((swp_entry_t) { \
18088 + ((type) << (_PAGE_BIT_PRESENT + 1)) \
18089 + | ((offset) << SWP_OFFSET_SHIFT) })
18090 #define __pte_to_swp_entry(pte) ((swp_entry_t) { __pte_val(pte) })
18091 #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
18093 -extern int kern_addr_valid(unsigned long addr);
18094 +extern int kern_addr_valid(unsigned long addr);
18095 extern void cleanup_highmap(void);
18097 -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
18098 - direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
18099 +#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
18100 + direct_remap_pfn_range(vma, vaddr, pfn, size, prot, DOMID_IO)
18102 #define HAVE_ARCH_UNMAPPED_AREA
18103 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
18104 @@ -284,8 +302,10 @@ extern void cleanup_highmap(void);
18106 /* fs/proc/kcore.c */
18107 #define kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK)
18108 -#define kc_offset_to_vaddr(o) \
18109 - (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
18110 +#define kc_offset_to_vaddr(o) \
18111 + (((o) & (1UL << (__VIRTUAL_MASK_SHIFT - 1))) \
18112 + ? ((o) | ~__VIRTUAL_MASK) \
18115 #define __HAVE_ARCH_PTE_SAME
18116 #endif /* !__ASSEMBLY__ */
18117 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/processor.h 2009-03-16 16:33:40.000000000 +0100
18118 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/processor.h 2009-03-16 16:38:05.000000000 +0100
18121 #include <asm/processor-flags.h>
18123 -/* migration helpers, for KVM - will be removed in 2.6.25: */
18124 -#include <asm/vm86.h>
18125 -#define Xgt_desc_struct desc_ptr
18127 /* Forward declaration, a strange C thing */
18128 struct task_struct;
18130 @@ -24,6 +20,7 @@ struct mm_struct;
18131 #include <asm/msr.h>
18132 #include <asm/desc_defs.h>
18133 #include <asm/nops.h>
18135 #include <linux/personality.h>
18136 #include <linux/cpumask.h>
18137 #include <linux/cache.h>
18138 @@ -38,16 +35,18 @@ struct mm_struct;
18139 static inline void *current_text_addr(void)
18142 - asm volatile("mov $1f,%0\n1:":"=r" (pc));
18144 + asm volatile("mov $1f, %0; 1:":"=r" (pc));
18149 #ifdef CONFIG_X86_VSMP
18150 -#define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT)
18151 -#define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT)
18152 +# define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT)
18153 +# define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT)
18155 -#define ARCH_MIN_TASKALIGN 16
18156 -#define ARCH_MIN_MMSTRUCT_ALIGN 0
18157 +# define ARCH_MIN_TASKALIGN 16
18158 +# define ARCH_MIN_MMSTRUCT_ALIGN 0
18162 @@ -57,68 +56,80 @@ static inline void *current_text_addr(vo
18165 struct cpuinfo_x86 {
18166 - __u8 x86; /* CPU family */
18167 - __u8 x86_vendor; /* CPU vendor */
18170 + __u8 x86; /* CPU family */
18171 + __u8 x86_vendor; /* CPU vendor */
18174 #ifdef CONFIG_X86_32
18175 - char wp_works_ok; /* It doesn't on 386's */
18176 - char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */
18183 + char wp_works_ok; /* It doesn't on 386's */
18185 + /* Problems on some 486Dx4's and old 386's: */
18186 + char hlt_works_ok;
18194 - /* number of 4K pages in DTLB/ITLB combined(in pages)*/
18196 - __u8 x86_virt_bits, x86_phys_bits;
18197 - /* cpuid returned core id bits */
18198 - __u8 x86_coreid_bits;
18199 - /* Max extended CPUID function supported */
18200 - __u32 extended_cpuid_level;
18202 - int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */
18203 - __u32 x86_capability[NCAPINTS];
18204 - char x86_vendor_id[16];
18205 - char x86_model_id[64];
18206 - int x86_cache_size; /* in KB - valid for CPUS which support this
18208 - int x86_cache_alignment; /* In bytes */
18210 - unsigned long loops_per_jiffy;
18211 + /* Number of 4K pages in DTLB/ITLB combined(in pages): */
18213 + __u8 x86_virt_bits;
18214 + __u8 x86_phys_bits;
18215 + /* CPUID returned core id bits: */
18216 + __u8 x86_coreid_bits;
18217 + /* Max extended CPUID function supported: */
18218 + __u32 extended_cpuid_level;
18220 + /* Maximum supported CPUID level, -1=no CPUID: */
18222 + __u32 x86_capability[NCAPINTS];
18223 + char x86_vendor_id[16];
18224 + char x86_model_id[64];
18225 + /* in KB - valid for CPUS which support this call: */
18226 + int x86_cache_size;
18227 + int x86_cache_alignment; /* In bytes */
18229 + unsigned long loops_per_jiffy;
18231 - cpumask_t llc_shared_map; /* cpus sharing the last level cache */
18232 + /* cpus sharing the last level cache: */
18233 + cpumask_t llc_shared_map;
18235 - u16 x86_max_cores; /* cpuid returned max cores value */
18237 - u16 x86_clflush_size;
18238 + /* cpuid returned max cores value: */
18239 + u16 x86_max_cores;
18241 + u16 initial_apicid;
18242 + u16 x86_clflush_size;
18244 - u16 booted_cores; /* number of cores as seen by OS */
18245 - u16 phys_proc_id; /* Physical processor id. */
18246 - u16 cpu_core_id; /* Core id */
18247 - u16 cpu_index; /* index into per_cpu list */
18248 + /* number of cores as seen by the OS: */
18249 + u16 booted_cores;
18250 + /* Physical processor id: */
18251 + u16 phys_proc_id;
18254 + /* Index into per_cpu list: */
18257 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
18259 -#define X86_VENDOR_INTEL 0
18260 -#define X86_VENDOR_CYRIX 1
18261 -#define X86_VENDOR_AMD 2
18262 -#define X86_VENDOR_UMC 3
18263 -#define X86_VENDOR_NEXGEN 4
18264 -#define X86_VENDOR_CENTAUR 5
18265 -#define X86_VENDOR_TRANSMETA 7
18266 -#define X86_VENDOR_NSC 8
18267 -#define X86_VENDOR_NUM 9
18268 -#define X86_VENDOR_UNKNOWN 0xff
18269 +#define X86_VENDOR_INTEL 0
18270 +#define X86_VENDOR_CYRIX 1
18271 +#define X86_VENDOR_AMD 2
18272 +#define X86_VENDOR_UMC 3
18273 +#define X86_VENDOR_CENTAUR 5
18274 +#define X86_VENDOR_TRANSMETA 7
18275 +#define X86_VENDOR_NSC 8
18276 +#define X86_VENDOR_NUM 9
18278 +#define X86_VENDOR_UNKNOWN 0xff
18281 * capabilities of CPUs
18283 -extern struct cpuinfo_x86 boot_cpu_data;
18284 -extern struct cpuinfo_x86 new_cpu_data;
18285 -extern __u32 cleared_cpu_caps[NCAPINTS];
18286 +extern struct cpuinfo_x86 boot_cpu_data;
18287 +extern struct cpuinfo_x86 new_cpu_data;
18289 +extern __u32 cleared_cpu_caps[NCAPINTS];
18292 DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
18293 @@ -129,7 +140,18 @@ DECLARE_PER_CPU(struct cpuinfo_x86, cpu_
18294 #define current_cpu_data boot_cpu_data
18297 -void cpu_detect(struct cpuinfo_x86 *c);
18298 +static inline int hlt_works(int cpu)
18300 +#ifdef CONFIG_X86_32
18301 + return cpu_data(cpu).hlt_works_ok;
18307 +#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
18309 +extern void cpu_detect(struct cpuinfo_x86 *c);
18311 extern void identify_cpu(struct cpuinfo_x86 *);
18312 extern void identify_boot_cpu(void);
18313 @@ -149,12 +171,12 @@ static inline void xen_cpuid(unsigned in
18314 unsigned int *ecx, unsigned int *edx)
18316 /* ecx is often an input as well as an output. */
18317 - __asm__(XEN_CPUID
18322 - : "0" (*eax), "2" (*ecx));
18328 + : "0" (*eax), "2" (*ecx));
18331 static inline void load_cr3(pgd_t *pgdir)
18332 @@ -166,57 +188,70 @@ static inline void load_cr3(pgd_t *pgdir
18333 #ifdef CONFIG_X86_32
18334 /* This is the TSS defined by the hardware. */
18335 struct x86_hw_tss {
18336 - unsigned short back_link, __blh;
18337 - unsigned long sp0;
18338 - unsigned short ss0, __ss0h;
18339 - unsigned long sp1;
18340 - unsigned short ss1, __ss1h; /* ss1 caches MSR_IA32_SYSENTER_CS */
18341 - unsigned long sp2;
18342 - unsigned short ss2, __ss2h;
18343 - unsigned long __cr3;
18344 - unsigned long ip;
18345 - unsigned long flags;
18346 - unsigned long ax, cx, dx, bx;
18347 - unsigned long sp, bp, si, di;
18348 - unsigned short es, __esh;
18349 - unsigned short cs, __csh;
18350 - unsigned short ss, __ssh;
18351 - unsigned short ds, __dsh;
18352 - unsigned short fs, __fsh;
18353 - unsigned short gs, __gsh;
18354 - unsigned short ldt, __ldth;
18355 - unsigned short trace, io_bitmap_base;
18356 + unsigned short back_link, __blh;
18357 + unsigned long sp0;
18358 + unsigned short ss0, __ss0h;
18359 + unsigned long sp1;
18360 + /* ss1 caches MSR_IA32_SYSENTER_CS: */
18361 + unsigned short ss1, __ss1h;
18362 + unsigned long sp2;
18363 + unsigned short ss2, __ss2h;
18364 + unsigned long __cr3;
18365 + unsigned long ip;
18366 + unsigned long flags;
18367 + unsigned long ax;
18368 + unsigned long cx;
18369 + unsigned long dx;
18370 + unsigned long bx;
18371 + unsigned long sp;
18372 + unsigned long bp;
18373 + unsigned long si;
18374 + unsigned long di;
18375 + unsigned short es, __esh;
18376 + unsigned short cs, __csh;
18377 + unsigned short ss, __ssh;
18378 + unsigned short ds, __dsh;
18379 + unsigned short fs, __fsh;
18380 + unsigned short gs, __gsh;
18381 + unsigned short ldt, __ldth;
18382 + unsigned short trace;
18383 + unsigned short io_bitmap_base;
18385 } __attribute__((packed));
18386 extern struct tss_struct doublefault_tss;
18388 struct x86_hw_tss {
18398 - u16 io_bitmap_base;
18408 + u16 io_bitmap_base;
18410 } __attribute__((packed)) ____cacheline_aligned;
18412 #endif /* CONFIG_X86_NO_TSS */
18415 - * Size of io_bitmap.
18416 + * IO-bitmap sizes:
18418 -#define IO_BITMAP_BITS 65536
18419 -#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
18420 -#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
18421 -#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap)
18422 -#define INVALID_IO_BITMAP_OFFSET 0x8000
18423 -#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000
18424 +#define IO_BITMAP_BITS 65536
18425 +#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
18426 +#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
18427 +#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap)
18428 +#define INVALID_IO_BITMAP_OFFSET 0x8000
18429 +#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000
18431 #ifndef CONFIG_X86_NO_TSS
18432 struct tss_struct {
18433 - struct x86_hw_tss x86_tss;
18435 + * The hardware state:
18437 + struct x86_hw_tss x86_tss;
18440 * The extra 1 is there because the CPU will access an
18441 @@ -224,136 +259,162 @@ struct tss_struct {
18442 * bitmap. The extra byte must be all 1 bits, and must
18443 * be within the limit.
18445 - unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
18446 + unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
18448 * Cache the current maximum and the last task that used the bitmap:
18450 - unsigned long io_bitmap_max;
18451 - struct thread_struct *io_bitmap_owner;
18452 + unsigned long io_bitmap_max;
18453 + struct thread_struct *io_bitmap_owner;
18456 - * pads the TSS to be cacheline-aligned (size is 0x100)
18457 + * Pad the TSS to be cacheline-aligned (size is 0x100):
18459 - unsigned long __cacheline_filler[35];
18460 + unsigned long __cacheline_filler[35];
18462 - * .. and then another 0x100 bytes for emergency kernel stack
18463 + * .. and then another 0x100 bytes for the emergency kernel stack:
18465 - unsigned long stack[64];
18466 + unsigned long stack[64];
18468 } __attribute__((packed));
18470 DECLARE_PER_CPU(struct tss_struct, init_tss);
18472 -/* Save the original ist values for checking stack pointers during debugging */
18474 + * Save the original ist values for checking stack pointers during debugging
18477 - unsigned long ist[7];
18478 + unsigned long ist[7];
18480 #endif /* CONFIG_X86_NO_TSS */
18482 #define MXCSR_DEFAULT 0x1f80
18484 struct i387_fsave_struct {
18492 - u32 st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */
18493 - u32 status; /* software status information */
18494 + u32 cwd; /* FPU Control Word */
18495 + u32 swd; /* FPU Status Word */
18496 + u32 twd; /* FPU Tag Word */
18497 + u32 fip; /* FPU IP Offset */
18498 + u32 fcs; /* FPU IP Selector */
18499 + u32 foo; /* FPU Operand Pointer Offset */
18500 + u32 fos; /* FPU Operand Pointer Selector */
18502 + /* 8*10 bytes for each FP-reg = 80 bytes: */
18503 + u32 st_space[20];
18505 + /* Software status information [not touched by FSAVE ]: */
18509 struct i387_fxsave_struct {
18514 + u16 cwd; /* Control Word */
18515 + u16 swd; /* Status Word */
18516 + u16 twd; /* Tag Word */
18517 + u16 fop; /* Last Instruction Opcode */
18522 + u64 rip; /* Instruction Pointer */
18523 + u64 rdp; /* Data Pointer */
18530 + u32 fip; /* FPU IP Offset */
18531 + u32 fcs; /* FPU IP Selector */
18532 + u32 foo; /* FPU Operand Offset */
18533 + u32 fos; /* FPU Operand Selector */
18538 - u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
18539 - u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
18541 + u32 mxcsr; /* MXCSR Register State */
18542 + u32 mxcsr_mask; /* MXCSR Mask */
18544 + /* 8*16 bytes for each FP-reg = 128 bytes: */
18545 + u32 st_space[32];
18547 + /* 16*16 bytes for each XMM-reg = 256 bytes: */
18548 + u32 xmm_space[64];
18552 } __attribute__((aligned(16)));
18554 struct i387_soft_struct {
18562 - u32 st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */
18563 - u8 ftop, changed, lookahead, no_update, rm, alimit;
18564 - struct info *info;
18573 + /* 8*10 bytes for each FP-reg = 80 bytes: */
18574 + u32 st_space[20];
18581 + struct info *info;
18585 -union i387_union {
18586 +union thread_xstate {
18587 struct i387_fsave_struct fsave;
18588 struct i387_fxsave_struct fxsave;
18589 - struct i387_soft_struct soft;
18590 + struct i387_soft_struct soft;
18593 -#ifdef CONFIG_X86_32
18594 -DECLARE_PER_CPU(u8, cpu_llc_id);
18595 -#elif !defined(CONFIG_X86_NO_TSS)
18596 +#if defined(CONFIG_X86_64) && !defined(CONFIG_X86_NO_TSS)
18597 DECLARE_PER_CPU(struct orig_ist, orig_ist);
18600 extern void print_cpu_info(struct cpuinfo_x86 *);
18601 +extern unsigned int xstate_size;
18602 +extern void free_thread_xstate(struct task_struct *);
18603 +extern struct kmem_cache *task_xstate_cachep;
18604 extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
18605 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
18606 extern unsigned short num_cache_leaves;
18608 struct thread_struct {
18609 -/* cached TLS descriptors. */
18610 - struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
18611 - unsigned long sp0;
18612 - unsigned long sp;
18613 + /* Cached TLS descriptors: */
18614 + struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
18615 + unsigned long sp0;
18616 + unsigned long sp;
18617 #ifdef CONFIG_X86_32
18618 - unsigned long sysenter_cs;
18619 + unsigned long sysenter_cs;
18621 - unsigned long usersp; /* Copy from PDA */
18622 - unsigned short es, ds, fsindex, gsindex;
18624 - unsigned long ip;
18625 - unsigned long fs;
18626 - unsigned long gs;
18627 -/* Hardware debugging registers */
18628 - unsigned long debugreg0;
18629 - unsigned long debugreg1;
18630 - unsigned long debugreg2;
18631 - unsigned long debugreg3;
18632 - unsigned long debugreg6;
18633 - unsigned long debugreg7;
18635 - unsigned long cr2, trap_no, error_code;
18636 -/* floating point info */
18637 - union i387_union i387 __attribute__((aligned(16)));;
18638 + unsigned long usersp; /* Copy from PDA */
18639 + unsigned short es;
18640 + unsigned short ds;
18641 + unsigned short fsindex;
18642 + unsigned short gsindex;
18644 + unsigned long ip;
18645 + unsigned long fs;
18646 + unsigned long gs;
18647 + /* Hardware debugging registers: */
18648 + unsigned long debugreg0;
18649 + unsigned long debugreg1;
18650 + unsigned long debugreg2;
18651 + unsigned long debugreg3;
18652 + unsigned long debugreg6;
18653 + unsigned long debugreg7;
18654 + /* Fault info: */
18655 + unsigned long cr2;
18656 + unsigned long trap_no;
18657 + unsigned long error_code;
18658 + /* floating point and extended processor state */
18659 + union thread_xstate *xstate;
18660 #ifdef CONFIG_X86_32
18661 -/* virtual 86 mode info */
18662 + /* Virtual 86 mode info */
18663 struct vm86_struct __user *vm86_info;
18664 unsigned long screen_bitmap;
18665 unsigned long v86flags, v86mask, saved_sp0;
18666 unsigned int saved_fs, saved_gs;
18668 -/* IO permissions */
18669 - unsigned long *io_bitmap_ptr;
18670 - unsigned long iopl;
18671 -/* max allowed port in the bitmap, in bytes: */
18672 - unsigned io_bitmap_max;
18673 + /* IO permissions: */
18674 + unsigned long *io_bitmap_ptr;
18675 + unsigned long iopl;
18676 + /* Max allowed port in the bitmap, in bytes: */
18677 + unsigned io_bitmap_max;
18678 /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */
18679 unsigned long debugctlmsr;
18680 /* Debug Store - if not 0 points to a DS Save Area configuration;
18681 @@ -384,12 +445,12 @@ static inline void xen_set_iopl_mask(uns
18684 #ifndef CONFIG_X86_NO_TSS
18685 -static inline void native_load_sp0(struct tss_struct *tss,
18686 - struct thread_struct *thread)
18687 +static inline void
18688 +native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
18690 tss->x86_tss.sp0 = thread->sp0;
18691 #ifdef CONFIG_X86_32
18692 - /* Only happens when SEP is enabled, no need to test "SEP"arately */
18693 + /* Only happens when SEP is enabled, no need to test "SEP"arately: */
18694 if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
18695 tss->x86_tss.ss1 = thread->sysenter_cs;
18696 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
18697 @@ -403,8 +464,8 @@ static inline void native_load_sp0(struc
18701 -#define __cpuid xen_cpuid
18702 -#define paravirt_enabled() 0
18703 +#define __cpuid xen_cpuid
18704 +#define paravirt_enabled() 0
18707 * These special macros can be used to get or set a debugging register
18708 @@ -424,11 +485,12 @@ static inline void native_load_sp0(struc
18709 * enable), so that any CPU's that boot up
18710 * after us can get the correct flags.
18712 -extern unsigned long mmu_cr4_features;
18713 +extern unsigned long mmu_cr4_features;
18715 static inline void set_in_cr4(unsigned long mask)
18719 mmu_cr4_features |= mask;
18722 @@ -438,6 +500,7 @@ static inline void set_in_cr4(unsigned l
18723 static inline void clear_in_cr4(unsigned long mask)
18727 mmu_cr4_features &= ~mask;
18730 @@ -445,42 +508,42 @@ static inline void clear_in_cr4(unsigned
18733 struct microcode_header {
18734 - unsigned int hdrver;
18735 - unsigned int rev;
18736 - unsigned int date;
18737 - unsigned int sig;
18738 - unsigned int cksum;
18739 - unsigned int ldrver;
18741 - unsigned int datasize;
18742 - unsigned int totalsize;
18743 - unsigned int reserved[3];
18744 + unsigned int hdrver;
18745 + unsigned int rev;
18746 + unsigned int date;
18747 + unsigned int sig;
18748 + unsigned int cksum;
18749 + unsigned int ldrver;
18751 + unsigned int datasize;
18752 + unsigned int totalsize;
18753 + unsigned int reserved[3];
18757 - struct microcode_header hdr;
18758 - unsigned int bits[0];
18759 + struct microcode_header hdr;
18760 + unsigned int bits[0];
18763 -typedef struct microcode microcode_t;
18764 -typedef struct microcode_header microcode_header_t;
18765 +typedef struct microcode microcode_t;
18766 +typedef struct microcode_header microcode_header_t;
18768 /* microcode format is extended from prescott processors */
18769 struct extended_signature {
18770 - unsigned int sig;
18772 - unsigned int cksum;
18773 + unsigned int sig;
18775 + unsigned int cksum;
18778 struct extended_sigtable {
18779 - unsigned int count;
18780 - unsigned int cksum;
18781 - unsigned int reserved[3];
18782 + unsigned int count;
18783 + unsigned int cksum;
18784 + unsigned int reserved[3];
18785 struct extended_signature sigs[0];
18789 - unsigned long seg;
18790 + unsigned long seg;
18794 @@ -492,7 +555,7 @@ extern int kernel_thread(int (*fn)(void
18795 /* Free all resources held by a thread. */
18796 extern void release_thread(struct task_struct *);
18798 -/* Prepare to copy thread state - unlazy all lazy status */
18799 +/* Prepare to copy thread state - unlazy all lazy state */
18800 extern void prepare_to_copy(struct task_struct *tsk);
18802 unsigned long get_wchan(struct task_struct *p);
18803 @@ -529,118 +592,138 @@ static inline unsigned int cpuid_eax(uns
18804 unsigned int eax, ebx, ecx, edx;
18806 cpuid(op, &eax, &ebx, &ecx, &edx);
18811 static inline unsigned int cpuid_ebx(unsigned int op)
18813 unsigned int eax, ebx, ecx, edx;
18815 cpuid(op, &eax, &ebx, &ecx, &edx);
18820 static inline unsigned int cpuid_ecx(unsigned int op)
18822 unsigned int eax, ebx, ecx, edx;
18824 cpuid(op, &eax, &ebx, &ecx, &edx);
18829 static inline unsigned int cpuid_edx(unsigned int op)
18831 unsigned int eax, ebx, ecx, edx;
18833 cpuid(op, &eax, &ebx, &ecx, &edx);
18838 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
18839 static inline void rep_nop(void)
18841 - __asm__ __volatile__("rep;nop": : :"memory");
18842 + asm volatile("rep; nop" ::: "memory");
18845 -/* Stop speculative execution */
18846 +static inline void cpu_relax(void)
18851 +/* Stop speculative execution: */
18852 static inline void sync_core(void)
18856 asm volatile("cpuid" : "=a" (tmp) : "0" (1)
18857 - : "ebx", "ecx", "edx", "memory");
18858 + : "ebx", "ecx", "edx", "memory");
18861 -#define cpu_relax() rep_nop()
18863 static inline void __monitor(const void *eax, unsigned long ecx,
18864 - unsigned long edx)
18865 + unsigned long edx)
18867 - /* "monitor %eax,%ecx,%edx;" */
18869 - ".byte 0x0f,0x01,0xc8;"
18870 - : :"a" (eax), "c" (ecx), "d"(edx));
18871 + /* "monitor %eax, %ecx, %edx;" */
18872 + asm volatile(".byte 0x0f, 0x01, 0xc8;"
18873 + :: "a" (eax), "c" (ecx), "d"(edx));
18876 static inline void __mwait(unsigned long eax, unsigned long ecx)
18878 - /* "mwait %eax,%ecx;" */
18880 - ".byte 0x0f,0x01,0xc9;"
18881 - : :"a" (eax), "c" (ecx));
18882 + /* "mwait %eax, %ecx;" */
18883 + asm volatile(".byte 0x0f, 0x01, 0xc9;"
18884 + :: "a" (eax), "c" (ecx));
18887 static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
18889 - /* "mwait %eax,%ecx;" */
18891 - "sti; .byte 0x0f,0x01,0xc9;"
18892 - : :"a" (eax), "c" (ecx));
18893 + trace_hardirqs_on();
18894 + /* "mwait %eax, %ecx;" */
18895 + asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
18896 + :: "a" (eax), "c" (ecx));
18899 extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
18901 -extern int force_mwait;
18902 +extern int force_mwait;
18904 extern void select_idle_routine(const struct cpuinfo_x86 *c);
18906 -extern unsigned long boot_option_idle_override;
18907 +extern unsigned long boot_option_idle_override;
18909 extern void enable_sep_cpu(void);
18910 extern int sysenter_setup(void);
18912 /* Defined in head.S */
18913 -extern struct desc_ptr early_gdt_descr;
18914 +extern struct desc_ptr early_gdt_descr;
18916 extern void cpu_set_gdt(int);
18917 extern void switch_to_new_gdt(void);
18918 extern void cpu_init(void);
18919 extern void init_gdt(int cpu);
18921 -/* from system description table in BIOS. Mostly for MCA use, but
18922 - * others may find it useful. */
18923 -extern unsigned int machine_id;
18924 -extern unsigned int machine_submodel_id;
18925 -extern unsigned int BIOS_revision;
18926 +static inline void update_debugctlmsr(unsigned long debugctlmsr)
18928 +#ifndef CONFIG_X86_DEBUGCTLMSR
18929 + if (boot_cpu_data.x86 < 6)
18932 + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
18935 -/* Boot loader type from the setup header */
18936 -extern int bootloader_type;
18938 + * from system description table in BIOS. Mostly for MCA use, but
18939 + * others may find it useful:
18941 +extern unsigned int machine_id;
18942 +extern unsigned int machine_submodel_id;
18943 +extern unsigned int BIOS_revision;
18945 +/* Boot loader type from the setup header: */
18946 +extern int bootloader_type;
18948 -extern char ignore_fpu_irq;
18949 -#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
18950 +extern char ignore_fpu_irq;
18952 #define HAVE_ARCH_PICK_MMAP_LAYOUT 1
18953 #define ARCH_HAS_PREFETCHW
18954 #define ARCH_HAS_SPINLOCK_PREFETCH
18956 #ifdef CONFIG_X86_32
18957 -#define BASE_PREFETCH ASM_NOP4
18958 -#define ARCH_HAS_PREFETCH
18959 +# define BASE_PREFETCH ASM_NOP4
18960 +# define ARCH_HAS_PREFETCH
18962 -#define BASE_PREFETCH "prefetcht0 (%1)"
18963 +# define BASE_PREFETCH "prefetcht0 (%1)"
18966 -/* Prefetch instructions for Pentium III and AMD Athlon */
18967 -/* It's not worth to care about 3dnow! prefetches for the K6
18968 - because they are microcoded there and very slow.
18969 - However we don't do prefetches for pre XP Athlons currently
18970 - That should be fixed. */
18972 + * Prefetch instructions for Pentium III (+) and AMD Athlon (+)
18974 + * It's not worth to care about 3dnow prefetches for the K6
18975 + * because they are microcoded there and very slow.
18977 static inline void prefetch(const void *x)
18979 alternative_input(BASE_PREFETCH,
18980 @@ -649,8 +732,11 @@ static inline void prefetch(const void *
18984 -/* 3dnow! prefetch to get an exclusive cache line. Useful for
18985 - spinlocks to avoid one state transition in the cache coherency protocol. */
18987 + * 3dnow prefetch to get an exclusive cache line.
18988 + * Useful for spinlocks to avoid one state transition in the
18989 + * cache coherency protocol:
18991 static inline void prefetchw(const void *x)
18993 alternative_input(BASE_PREFETCH,
18994 @@ -659,21 +745,25 @@ static inline void prefetchw(const void
18998 -#define spin_lock_prefetch(x) prefetchw(x)
18999 +static inline void spin_lock_prefetch(const void *x)
19004 #ifdef CONFIG_X86_32
19006 * User space process size: 3GB (default).
19008 -#define TASK_SIZE (PAGE_OFFSET)
19009 -#define STACK_TOP TASK_SIZE
19010 -#define STACK_TOP_MAX STACK_TOP
19012 -#define INIT_THREAD { \
19013 - .sp0 = sizeof(init_stack) + (long)&init_stack, \
19014 - .vm86_info = NULL, \
19015 - .sysenter_cs = __KERNEL_CS, \
19016 - .io_bitmap_ptr = NULL, \
19017 - .fs = __KERNEL_PERCPU, \
19018 +#define TASK_SIZE PAGE_OFFSET
19019 +#define STACK_TOP TASK_SIZE
19020 +#define STACK_TOP_MAX STACK_TOP
19022 +#define INIT_THREAD { \
19023 + .sp0 = sizeof(init_stack) + (long)&init_stack, \
19024 + .vm86_info = NULL, \
19025 + .sysenter_cs = __KERNEL_CS, \
19026 + .io_bitmap_ptr = NULL, \
19027 + .fs = __KERNEL_PERCPU, \
19031 @@ -682,28 +772,15 @@ static inline void prefetchw(const void
19032 * permission bitmap. The extra byte must be all 1 bits, and must
19033 * be within the limit.
19035 -#define INIT_TSS { \
19037 +#define INIT_TSS { \
19039 .sp0 = sizeof(init_stack) + (long)&init_stack, \
19040 - .ss0 = __KERNEL_DS, \
19041 - .ss1 = __KERNEL_CS, \
19042 - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
19044 - .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, \
19047 -#define start_thread(regs, new_eip, new_esp) do { \
19048 - __asm__("movl %0,%%gs": :"r" (0)); \
19050 - set_fs(USER_DS); \
19051 - regs->ds = __USER_DS; \
19052 - regs->es = __USER_DS; \
19053 - regs->ss = __USER_DS; \
19054 - regs->cs = __USER_CS; \
19055 - regs->ip = new_eip; \
19056 - regs->sp = new_esp; \
19059 + .ss0 = __KERNEL_DS, \
19060 + .ss1 = __KERNEL_CS, \
19061 + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
19063 + .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, \
19066 extern unsigned long thread_saved_pc(struct task_struct *tsk);
19068 @@ -731,24 +808,24 @@ extern unsigned long thread_saved_pc(str
19072 -#define KSTK_ESP(task) (task_pt_regs(task)->sp)
19073 +#define KSTK_ESP(task) (task_pt_regs(task)->sp)
19077 * User space process size. 47bits minus one guard page.
19079 -#define TASK_SIZE64 (0x800000000000UL - 4096)
19080 +#define TASK_SIZE64 ((1UL << 47) - PAGE_SIZE)
19082 /* This decides where the kernel will search for a free chunk of vm
19083 * space during mmap's.
19085 -#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
19086 - 0xc0000000 : 0xFFFFe000)
19087 +#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
19088 + 0xc0000000 : 0xFFFFe000)
19090 -#define TASK_SIZE (test_thread_flag(TIF_IA32) ? \
19091 - IA32_PAGE_OFFSET : TASK_SIZE64)
19092 -#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \
19093 - IA32_PAGE_OFFSET : TASK_SIZE64)
19094 +#define TASK_SIZE (test_thread_flag(TIF_IA32) ? \
19095 + IA32_PAGE_OFFSET : TASK_SIZE64)
19096 +#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \
19097 + IA32_PAGE_OFFSET : TASK_SIZE64)
19099 #define STACK_TOP TASK_SIZE
19100 #define STACK_TOP_MAX TASK_SIZE64
19101 @@ -761,33 +838,32 @@ extern unsigned long thread_saved_pc(str
19102 .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
19105 -#define start_thread(regs, new_rip, new_rsp) do { \
19106 - asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0)); \
19107 - load_gs_index(0); \
19108 - (regs)->ip = (new_rip); \
19109 - (regs)->sp = (new_rsp); \
19110 - write_pda(oldrsp, (new_rsp)); \
19111 - (regs)->cs = __USER_CS; \
19112 - (regs)->ss = __USER_DS; \
19113 - (regs)->flags = 0x200; \
19114 - set_fs(USER_DS); \
19118 * Return saved PC of a blocked thread.
19119 * What is this good for? it will be always the scheduler or ret_from_fork.
19121 -#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8))
19122 +#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8))
19124 -#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
19125 -#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */
19126 +#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
19127 +#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */
19128 #endif /* CONFIG_X86_64 */
19130 -/* This decides where the kernel will search for a free chunk of vm
19131 +extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
19132 + unsigned long new_sp);
19135 + * This decides where the kernel will search for a free chunk of vm
19136 * space during mmap's.
19138 #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3))
19140 -#define KSTK_EIP(task) (task_pt_regs(task)->ip)
19141 +#define KSTK_EIP(task) (task_pt_regs(task)->ip)
19143 +/* Get/set a process' ability to use the timestamp counter instruction */
19144 +#define GET_TSC_CTL(adr) get_tsc_mode((adr))
19145 +#define SET_TSC_CTL(val) set_tsc_mode((val))
19147 +extern int get_tsc_mode(unsigned long adr);
19148 +extern int set_tsc_mode(unsigned int val);
19151 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/segment.h 2009-03-16 16:33:40.000000000 +0100
19152 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/segment.h 2009-03-16 16:38:05.000000000 +0100
19153 @@ -191,13 +191,14 @@
19154 #define SEGMENT_TI_MASK 0x4
19156 #define IDT_ENTRIES 256
19157 +#define NUM_EXCEPTION_VECTORS 32
19158 #define GDT_SIZE (GDT_ENTRIES * 8)
19159 #define GDT_ENTRY_TLS_ENTRIES 3
19160 #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
19163 #ifndef __ASSEMBLY__
19164 -extern const char early_idt_handlers[IDT_ENTRIES][10];
19165 +extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][10];
19169 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/smp.h 2009-02-16 16:18:36.000000000 +0100
19170 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/smp.h 2009-03-16 16:38:05.000000000 +0100
19172 -#ifdef CONFIG_X86_32
19173 -# include "smp_32.h"
19174 +#ifndef _ASM_X86_SMP_H_
19175 +#define _ASM_X86_SMP_H_
19176 +#ifndef __ASSEMBLY__
19177 +#include <linux/cpumask.h>
19178 +#include <linux/init.h>
19179 +#include <asm/percpu.h>
19182 + * We need the APIC definitions automatically as part of 'smp.h'
19184 +#ifdef CONFIG_X86_LOCAL_APIC
19185 +# include <asm/mpspec.h>
19186 +# include <asm/apic.h>
19187 +# ifdef CONFIG_X86_IO_APIC
19188 +# include <asm/io_apic.h>
19191 +#include <asm/pda.h>
19192 +#include <asm/thread_info.h>
19194 +#define cpu_callout_map cpu_possible_map
19195 +extern cpumask_t cpu_initialized;
19196 +#define cpu_callin_map cpu_possible_map
19198 +extern void (*mtrr_hook)(void);
19199 +extern void zap_low_mappings(void);
19201 +extern int smp_num_siblings;
19202 +extern unsigned int num_processors;
19203 +extern cpumask_t cpu_initialized;
19205 +#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
19206 +extern u16 x86_cpu_to_apicid_init[];
19207 +extern u16 x86_bios_cpu_apicid_init[];
19208 +extern void *x86_cpu_to_apicid_early_ptr;
19209 +extern void *x86_bios_cpu_apicid_early_ptr;
19211 -# include "smp_64.h"
19212 +#define x86_cpu_to_apicid_early_ptr NULL
19213 +#define x86_bios_cpu_apicid_early_ptr NULL
19216 +DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
19217 +DECLARE_PER_CPU(cpumask_t, cpu_core_map);
19218 +DECLARE_PER_CPU(u16, cpu_llc_id);
19219 +DECLARE_PER_CPU(u16, x86_cpu_to_apicid);
19220 +DECLARE_PER_CPU(u16, x86_bios_cpu_apicid);
19224 +#ifndef CONFIG_XEN
19226 +/* Static state in head.S used to set up a CPU */
19229 + unsigned short ss;
19233 + void (*smp_prepare_boot_cpu)(void);
19234 + void (*smp_prepare_cpus)(unsigned max_cpus);
19235 + int (*cpu_up)(unsigned cpu);
19236 + void (*smp_cpus_done)(unsigned max_cpus);
19238 + void (*smp_send_stop)(void);
19239 + void (*smp_send_reschedule)(int cpu);
19240 + int (*smp_call_function_mask)(cpumask_t mask,
19241 + void (*func)(void *info), void *info,
19245 +/* Globals due to paravirt */
19246 +extern void set_cpu_sibling_map(int cpu);
19248 +#ifndef CONFIG_PARAVIRT
19249 +#define startup_ipi_hook(phys_apicid, start_eip, start_esp) do { } while (0)
19251 +extern struct smp_ops smp_ops;
19253 +static inline void smp_send_stop(void)
19255 + smp_ops.smp_send_stop();
19258 +static inline void smp_prepare_boot_cpu(void)
19260 + smp_ops.smp_prepare_boot_cpu();
19263 +static inline void smp_prepare_cpus(unsigned int max_cpus)
19265 + smp_ops.smp_prepare_cpus(max_cpus);
19268 +static inline void smp_cpus_done(unsigned int max_cpus)
19270 + smp_ops.smp_cpus_done(max_cpus);
19273 +static inline int __cpu_up(unsigned int cpu)
19275 + return smp_ops.cpu_up(cpu);
19278 +static inline void smp_send_reschedule(int cpu)
19280 + smp_ops.smp_send_reschedule(cpu);
19283 +static inline int smp_call_function_mask(cpumask_t mask,
19284 + void (*func) (void *info), void *info,
19287 + return smp_ops.smp_call_function_mask(mask, func, info, wait);
19290 +void native_smp_prepare_boot_cpu(void);
19291 +void native_smp_prepare_cpus(unsigned int max_cpus);
19292 +void native_smp_cpus_done(unsigned int max_cpus);
19293 +int native_cpu_up(unsigned int cpunum);
19295 +#else /* CONFIG_XEN */
19297 +void xen_smp_send_stop(void);
19298 +void xen_smp_send_reschedule(int cpu);
19299 +int xen_smp_call_function_mask(cpumask_t mask,
19300 + void (*func) (void *info), void *info,
19303 +#define smp_send_stop xen_smp_send_stop
19304 +#define smp_send_reschedule xen_smp_send_reschedule
19305 +#define smp_call_function_mask xen_smp_call_function_mask
19307 +extern void prefill_possible_map(void);
19309 +#endif /* CONFIG_XEN */
19311 +extern int __cpu_disable(void);
19312 +extern void __cpu_die(unsigned int cpu);
19314 +extern void prefill_possible_map(void);
19316 +void smp_store_cpu_info(int id);
19317 +#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
19319 +/* We don't mark CPUs online until __cpu_up(), so we need another measure */
19320 +static inline int num_booting_cpus(void)
19322 + return cpus_weight(cpu_callout_map);
19324 +#endif /* CONFIG_SMP */
19326 +extern unsigned disabled_cpus __cpuinitdata;
19328 +#ifdef CONFIG_X86_32_SMP
19330 + * This function is needed by all SMP systems. It must _always_ be valid
19331 + * from the initial startup. We map APIC_BASE very early in page_setup(),
19332 + * so this is correct in the x86 case.
19334 +DECLARE_PER_CPU(int, cpu_number);
19335 +#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
19336 +#define safe_smp_processor_id() smp_processor_id()
19338 +#elif defined(CONFIG_X86_64_SMP)
19339 +#define raw_smp_processor_id() read_pda(cpunumber)
19341 +#define stack_smp_processor_id() \
19343 + struct thread_info *ti; \
19344 + __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \
19347 +#define safe_smp_processor_id() smp_processor_id()
19349 +#else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */
19350 +#define cpu_physical_id(cpu) boot_cpu_physical_apicid
19351 +#define safe_smp_processor_id() 0
19352 +#define stack_smp_processor_id() 0
19355 +#ifdef CONFIG_X86_LOCAL_APIC
19357 +static inline int logical_smp_processor_id(void)
19359 + /* we don't want to mark this access volatile - bad code generation */
19360 + return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
19363 +#ifndef CONFIG_X86_64
19364 +static inline unsigned int read_apic_id(void)
19366 + return *(u32 *)(APIC_BASE + APIC_ID);
19369 +extern unsigned int read_apic_id(void);
19373 +# ifdef APIC_DEFINITION
19374 +extern int hard_smp_processor_id(void);
19376 +# include <mach_apicdef.h>
19377 +static inline int hard_smp_processor_id(void)
19379 + /* we don't want to mark this access volatile - bad code generation */
19380 + return GET_APIC_ID(read_apic_id());
19382 +# endif /* APIC_DEFINITION */
19384 +#else /* CONFIG_X86_LOCAL_APIC */
19386 +# ifndef CONFIG_SMP
19387 +# define hard_smp_processor_id() 0
19390 +#endif /* CONFIG_X86_LOCAL_APIC */
19392 +#ifdef CONFIG_HOTPLUG_CPU
19393 +extern void cpu_exit_clear(void);
19394 +extern void cpu_uninit(void);
19397 +extern void smp_alloc_memory(void);
19398 +extern void lock_ipi_call_lock(void);
19399 +extern void unlock_ipi_call_lock(void);
19400 +#endif /* __ASSEMBLY__ */
19402 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/smp_32.h 2009-03-16 16:33:40.000000000 +0100
19403 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
19405 -#ifndef __ASM_SMP_H
19406 -#define __ASM_SMP_H
19408 -#ifndef __ASSEMBLY__
19409 -#include <linux/cpumask.h>
19410 -#include <linux/init.h>
19413 - * We need the APIC definitions automatically as part of 'smp.h'
19415 -#ifdef CONFIG_X86_LOCAL_APIC
19416 -# include <asm/mpspec.h>
19417 -# include <asm/apic.h>
19418 -# ifdef CONFIG_X86_IO_APIC
19419 -# include <asm/io_apic.h>
19423 -#define cpu_callout_map cpu_possible_map
19424 -#define cpu_callin_map cpu_possible_map
19426 -extern int smp_num_siblings;
19427 -extern unsigned int num_processors;
19429 -extern void smp_alloc_memory(void);
19430 -extern void lock_ipi_call_lock(void);
19431 -extern void unlock_ipi_call_lock(void);
19433 -extern void (*mtrr_hook) (void);
19434 -extern void zap_low_mappings (void);
19436 -DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
19437 -DECLARE_PER_CPU(cpumask_t, cpu_core_map);
19438 -DECLARE_PER_CPU(u8, cpu_llc_id);
19439 -DECLARE_PER_CPU(u8, x86_cpu_to_apicid);
19441 -#ifdef CONFIG_HOTPLUG_CPU
19442 -extern void cpu_exit_clear(void);
19443 -extern void cpu_uninit(void);
19448 -#ifndef CONFIG_XEN
19450 -/* Globals due to paravirt */
19451 -extern void set_cpu_sibling_map(int cpu);
19455 - void (*smp_prepare_boot_cpu)(void);
19456 - void (*smp_prepare_cpus)(unsigned max_cpus);
19457 - int (*cpu_up)(unsigned cpu);
19458 - void (*smp_cpus_done)(unsigned max_cpus);
19460 - void (*smp_send_stop)(void);
19461 - void (*smp_send_reschedule)(int cpu);
19462 - int (*smp_call_function_mask)(cpumask_t mask,
19463 - void (*func)(void *info), void *info,
19467 -extern struct smp_ops smp_ops;
19469 -static inline void smp_prepare_boot_cpu(void)
19471 - smp_ops.smp_prepare_boot_cpu();
19473 -static inline void smp_prepare_cpus(unsigned int max_cpus)
19475 - smp_ops.smp_prepare_cpus(max_cpus);
19477 -static inline int __cpu_up(unsigned int cpu)
19479 - return smp_ops.cpu_up(cpu);
19481 -static inline void smp_cpus_done(unsigned int max_cpus)
19483 - smp_ops.smp_cpus_done(max_cpus);
19486 -static inline void smp_send_stop(void)
19488 - smp_ops.smp_send_stop();
19490 -static inline void smp_send_reschedule(int cpu)
19492 - smp_ops.smp_send_reschedule(cpu);
19494 -static inline int smp_call_function_mask(cpumask_t mask,
19495 - void (*func) (void *info), void *info,
19498 - return smp_ops.smp_call_function_mask(mask, func, info, wait);
19501 -void native_smp_prepare_boot_cpu(void);
19502 -void native_smp_prepare_cpus(unsigned int max_cpus);
19503 -int native_cpu_up(unsigned int cpunum);
19504 -void native_smp_cpus_done(unsigned int max_cpus);
19506 -#ifndef CONFIG_PARAVIRT
19507 -#define startup_ipi_hook(phys_apicid, start_eip, start_esp) do { } while (0)
19510 -#else /* CONFIG_XEN */
19512 -void xen_smp_send_stop(void);
19513 -void xen_smp_send_reschedule(int cpu);
19514 -int xen_smp_call_function_mask(cpumask_t mask,
19515 - void (*func) (void *info), void *info,
19518 -#define smp_send_stop xen_smp_send_stop
19519 -#define smp_send_reschedule xen_smp_send_reschedule
19520 -#define smp_call_function_mask xen_smp_call_function_mask
19522 -extern void prefill_possible_map(void);
19524 -#endif /* CONFIG_XEN */
19526 -extern int __cpu_disable(void);
19527 -extern void __cpu_die(unsigned int cpu);
19530 - * This function is needed by all SMP systems. It must _always_ be valid
19531 - * from the initial startup. We map APIC_BASE very early in page_setup(),
19532 - * so this is correct in the x86 case.
19534 -DECLARE_PER_CPU(int, cpu_number);
19535 -#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
19537 -#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
19539 -#define safe_smp_processor_id() smp_processor_id()
19541 -/* We don't mark CPUs online until __cpu_up(), so we need another measure */
19542 -static inline int num_booting_cpus(void)
19544 - return cpus_weight(cpu_callout_map);
19547 -#else /* CONFIG_SMP */
19549 -#define safe_smp_processor_id() 0
19550 -#define cpu_physical_id(cpu) boot_cpu_physical_apicid
19552 -#endif /* !CONFIG_SMP */
19554 -#ifdef CONFIG_X86_LOCAL_APIC
19556 -static __inline int logical_smp_processor_id(void)
19558 - /* we don't want to mark this access volatile - bad code generation */
19559 - return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
19562 -# ifdef APIC_DEFINITION
19563 -extern int hard_smp_processor_id(void);
19565 -# include <mach_apicdef.h>
19566 -static inline int hard_smp_processor_id(void)
19568 - /* we don't want to mark this access volatile - bad code generation */
19569 - return GET_APIC_ID(*(u32 *)(APIC_BASE + APIC_ID));
19571 -# endif /* APIC_DEFINITION */
19573 -#else /* CONFIG_X86_LOCAL_APIC */
19575 -# ifndef CONFIG_SMP
19576 -# define hard_smp_processor_id() 0
19579 -#endif /* CONFIG_X86_LOCAL_APIC */
19581 -#endif /* !ASSEMBLY */
19583 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/smp_64.h 2009-03-16 16:33:40.000000000 +0100
19584 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
19586 -#ifndef __ASM_SMP_H
19587 -#define __ASM_SMP_H
19589 -#include <linux/cpumask.h>
19590 -#include <linux/init.h>
19592 -#ifdef CONFIG_X86_LOCAL_APIC
19594 - * We need the APIC definitions automatically as part of 'smp.h'
19596 -#include <asm/apic.h>
19597 -#ifdef CONFIG_X86_IO_APIC
19598 -#include <asm/io_apic.h>
19600 -#include <asm/mpspec.h>
19602 -#include <asm/pda.h>
19603 -#include <asm/thread_info.h>
19605 -extern cpumask_t cpu_initialized;
19607 -extern int smp_num_siblings;
19608 -extern unsigned int num_processors;
19610 -extern void smp_alloc_memory(void);
19611 -extern void lock_ipi_call_lock(void);
19612 -extern void unlock_ipi_call_lock(void);
19614 -extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *),
19615 - void *info, int wait);
19617 -DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
19618 -DECLARE_PER_CPU(cpumask_t, cpu_core_map);
19619 -DECLARE_PER_CPU(u16, cpu_llc_id);
19620 -DECLARE_PER_CPU(u16, x86_cpu_to_apicid);
19621 -DECLARE_PER_CPU(u16, x86_bios_cpu_apicid);
19623 -#ifdef CONFIG_X86_LOCAL_APIC
19624 -static inline int cpu_present_to_apicid(int mps_cpu)
19626 - if (cpu_present(mps_cpu))
19627 - return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
19629 - return BAD_APICID;
19635 -#define SMP_TRAMPOLINE_BASE 0x6000
19637 -extern int __cpu_disable(void);
19638 -extern void __cpu_die(unsigned int cpu);
19639 -extern void prefill_possible_map(void);
19640 -extern unsigned __cpuinitdata disabled_cpus;
19642 -#define raw_smp_processor_id() read_pda(cpunumber)
19643 -#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
19645 -#define stack_smp_processor_id() \
19647 - struct thread_info *ti; \
19648 - __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \
19653 - * On x86 all CPUs are mapped 1:1 to the APIC space. This simplifies
19654 - * scheduling and IPI sending and compresses data structures.
19656 -static inline int num_booting_cpus(void)
19658 - return cpus_weight(cpu_possible_map);
19661 -extern void smp_send_reschedule(int cpu);
19663 -#else /* CONFIG_SMP */
19665 -extern unsigned int boot_cpu_id;
19666 -#define cpu_physical_id(cpu) boot_cpu_id
19667 -#define stack_smp_processor_id() 0
19669 -#endif /* !CONFIG_SMP */
19671 -#define safe_smp_processor_id() smp_processor_id()
19673 -#ifdef CONFIG_X86_LOCAL_APIC
19674 -static __inline int logical_smp_processor_id(void)
19676 - /* we don't want to mark this access volatile - bad code generation */
19677 - return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
19680 -static inline int hard_smp_processor_id(void)
19682 - /* we don't want to mark this access volatile - bad code generation */
19683 - return GET_APIC_ID(*(u32 *)(APIC_BASE + APIC_ID));
19689 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/spinlock.h 2009-03-16 16:33:40.000000000 +0100
19690 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/spinlock.h 2009-03-16 16:38:05.000000000 +0100
19691 @@ -88,7 +88,7 @@ extern void xen_spin_kick(raw_spinlock_t
19695 -static inline int __raw_spin_trylock(raw_spinlock_t *lock)
19696 +static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
19700 @@ -107,7 +107,7 @@ static inline int __raw_spin_trylock(raw
19704 -static inline void __raw_spin_unlock(raw_spinlock_t *lock)
19705 +static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
19707 unsigned int token;
19708 unsigned char kick;
19709 @@ -155,7 +155,7 @@ static inline void __raw_spin_unlock(raw
19710 : "memory", "cc"); \
19713 -static inline int __raw_spin_trylock(raw_spinlock_t *lock)
19714 +static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
19718 @@ -177,7 +177,7 @@ static inline int __raw_spin_trylock(raw
19722 -static inline void __raw_spin_unlock(raw_spinlock_t *lock)
19723 +static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
19725 unsigned int token, tmp;
19727 @@ -197,19 +197,19 @@ static inline void __raw_spin_unlock(raw
19729 static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
19731 - int tmp = *(volatile signed int *)(&(lock)->slock);
19732 + int tmp = ACCESS_ONCE(lock->slock);
19734 return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
19737 static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
19739 - int tmp = *(volatile signed int *)(&(lock)->slock);
19740 + int tmp = ACCESS_ONCE(lock->slock);
19742 return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
19745 -static inline void __raw_spin_lock(raw_spinlock_t *lock)
19746 +static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
19748 unsigned int token, count;
19750 @@ -223,8 +223,8 @@ static inline void __raw_spin_lock(raw_s
19751 } while (unlikely(!count) && !xen_spin_wait(lock, token));
19754 -static inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
19755 - unsigned long flags)
19756 +static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
19757 + unsigned long flags)
19759 unsigned int token, count;
19761 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/swiotlb.h 2009-02-16 16:18:36.000000000 +0100
19762 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/swiotlb.h 2009-03-16 16:38:05.000000000 +0100
19764 -#ifdef CONFIG_X86_32
19765 -# include "swiotlb_32.h"
19767 -# include "../../swiotlb.h"
19769 +#ifndef _ASM_SWIOTLB_H
19771 +#include "../../swiotlb.h"
19773 +dma_addr_t swiotlb_map_single_phys(struct device *, phys_addr_t, size_t size,
19776 +#endif /* _ASM_SWIOTLB_H */
19777 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/swiotlb_32.h 2009-05-14 10:56:29.000000000 +0200
19778 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
19780 -#ifndef _ASM_SWIOTLB_H
19781 -#define _ASM_SWIOTLB_H 1
19783 -/* SWIOTLB interface */
19785 -extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr, size_t size,
19787 -extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
19788 - size_t size, int dir);
19789 -extern void swiotlb_sync_single_for_cpu(struct device *hwdev,
19790 - dma_addr_t dev_addr,
19791 - size_t size, int dir);
19792 -extern void swiotlb_sync_single_for_device(struct device *hwdev,
19793 - dma_addr_t dev_addr,
19794 - size_t size, int dir);
19795 -extern void swiotlb_sync_sg_for_cpu(struct device *hwdev,
19796 - struct scatterlist *sg, int nelems,
19798 -extern void swiotlb_sync_sg_for_device(struct device *hwdev,
19799 - struct scatterlist *sg, int nelems,
19801 -extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg,
19802 - int nents, int direction);
19803 -extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg,
19804 - int nents, int direction);
19805 -extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr);
19806 -#ifdef CONFIG_HIGHMEM
19807 -extern dma_addr_t swiotlb_map_page(struct device *hwdev, struct page *page,
19808 - unsigned long offset, size_t size,
19809 - enum dma_data_direction direction);
19810 -extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address,
19811 - size_t size, enum dma_data_direction direction);
19813 -extern int swiotlb_dma_supported(struct device *hwdev, u64 mask);
19814 -extern void swiotlb_init(void);
19816 -#ifdef CONFIG_SWIOTLB
19817 -extern int swiotlb;
19823 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/system.h 2009-03-16 16:33:40.000000000 +0100
19824 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/system.h 2009-03-16 16:38:05.000000000 +0100
19825 @@ -28,22 +28,44 @@ struct task_struct *__switch_to(struct t
19826 * Saving eflags is important. It switches not only IOPL between tasks,
19827 * it also protects other tasks from NT leaking through sysenter etc.
19829 -#define switch_to(prev, next, last) do { \
19830 - unsigned long esi, edi; \
19831 - asm volatile("pushfl\n\t" /* Save flags */ \
19832 - "pushl %%ebp\n\t" \
19833 - "movl %%esp,%0\n\t" /* save ESP */ \
19834 - "movl %5,%%esp\n\t" /* restore ESP */ \
19835 - "movl $1f,%1\n\t" /* save EIP */ \
19836 - "pushl %6\n\t" /* restore EIP */ \
19837 - "jmp __switch_to\n" \
19838 +#define switch_to(prev, next, last) \
19841 + * Context-switching clobbers all registers, so we clobber \
19842 + * them explicitly, via unused output variables. \
19843 + * (EAX and EBP is not listed because EBP is saved/restored \
19844 + * explicitly for wchan access and EAX is the return value of \
19845 + * __switch_to()) \
19847 + unsigned long ebx, ecx, edx, esi, edi; \
19849 + asm volatile("pushfl\n\t" /* save flags */ \
19850 + "pushl %%ebp\n\t" /* save EBP */ \
19851 + "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \
19852 + "movl %[next_sp],%%esp\n\t" /* restore ESP */ \
19853 + "movl $1f,%[prev_ip]\n\t" /* save EIP */ \
19854 + "pushl %[next_ip]\n\t" /* restore EIP */ \
19855 + "jmp __switch_to\n" /* regparm call */ \
19857 - "popl %%ebp\n\t" \
19859 - :"=m" (prev->thread.sp), "=m" (prev->thread.ip), \
19860 - "=a" (last), "=S" (esi), "=D" (edi) \
19861 - :"m" (next->thread.sp), "m" (next->thread.ip), \
19862 - "2" (prev), "d" (next)); \
19863 + "popl %%ebp\n\t" /* restore EBP */ \
19864 + "popfl\n" /* restore flags */ \
19866 + /* output parameters */ \
19867 + : [prev_sp] "=m" (prev->thread.sp), \
19868 + [prev_ip] "=m" (prev->thread.ip), \
19871 + /* clobbered output registers: */ \
19872 + "=b" (ebx), "=c" (ecx), "=d" (edx), \
19873 + "=S" (esi), "=D" (edi) \
19875 + /* input parameters: */ \
19876 + : [next_sp] "m" (next->thread.sp), \
19877 + [next_ip] "m" (next->thread.ip), \
19879 + /* regparm parameters for __switch_to(): */ \
19880 + [prev] "a" (prev), \
19881 + [next] "d" (next)); \
19885 @@ -123,30 +145,29 @@ extern void load_gs_index(unsigned);
19887 #define loadsegment(seg, value) \
19888 asm volatile("\n" \
19890 - "movl %k0,%%" #seg "\n" \
19892 - ".section .fixup,\"ax\"\n" \
19894 - "movl %k1, %%" #seg "\n\t" \
19897 - _ASM_EXTABLE(1b,3b) \
19898 - : :"r" (value), "r" (0))
19900 + "movl %k0,%%" #seg "\n" \
19902 + ".section .fixup,\"ax\"\n" \
19904 + "movl %k1, %%" #seg "\n\t" \
19907 + _ASM_EXTABLE(1b,3b) \
19908 + : :"r" (value), "r" (0))
19912 * Save a segment register away
19914 -#define savesegment(seg, value) \
19915 +#define savesegment(seg, value) \
19916 asm volatile("mov %%" #seg ",%0":"=rm" (value))
19918 static inline unsigned long get_limit(unsigned long segment)
19920 unsigned long __limit;
19921 - __asm__("lsll %1,%0"
19922 - :"=r" (__limit):"r" (segment));
19923 - return __limit+1;
19924 + asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
19925 + return __limit + 1;
19928 static inline void xen_clts(void)
19929 @@ -171,13 +192,13 @@ static unsigned long __force_order;
19930 static inline unsigned long xen_read_cr0(void)
19933 - asm volatile("mov %%cr0,%0\n\t" :"=r" (val), "=m" (__force_order));
19934 + asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order));
19938 static inline void xen_write_cr0(unsigned long val)
19940 - asm volatile("mov %0,%%cr0": :"r" (val), "m" (__force_order));
19941 + asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order));
19944 #define xen_read_cr2() (current_vcpu_info()->arch.cr2)
19945 @@ -186,7 +207,7 @@ static inline void xen_write_cr0(unsigne
19946 static inline unsigned long xen_read_cr3(void)
19949 - asm volatile("mov %%cr3,%0\n\t" :"=r" (val), "=m" (__force_order));
19950 + asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
19951 #ifdef CONFIG_X86_32
19952 return mfn_to_pfn(xen_cr3_to_pfn(val)) << PAGE_SHIFT;
19954 @@ -201,13 +222,13 @@ static inline void xen_write_cr3(unsigne
19956 val = phys_to_machine(val);
19958 - asm volatile("mov %0,%%cr3": :"r" (val), "m" (__force_order));
19959 + asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order));
19962 static inline unsigned long xen_read_cr4(void)
19965 - asm volatile("mov %%cr4,%0\n\t" :"=r" (val), "=m" (__force_order));
19966 + asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
19970 @@ -215,7 +236,7 @@ static inline unsigned long xen_read_cr4
19972 static inline void xen_write_cr4(unsigned long val)
19974 - asm volatile("mov %0,%%cr4": :"r" (val), "m" (__force_order));
19975 + asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
19978 #ifdef CONFIG_X86_64
19979 @@ -234,6 +255,7 @@ static inline void xen_wbinvd(void)
19981 asm volatile("wbinvd": : :"memory");
19984 #define read_cr0() (xen_read_cr0())
19985 #define write_cr0(x) (xen_write_cr0(x))
19986 #define read_cr2() (xen_read_cr2())
19987 @@ -260,7 +282,7 @@ static inline void clflush(volatile void
19988 asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
19991 -#define nop() __asm__ __volatile__ ("nop")
19992 +#define nop() asm volatile ("nop")
19994 void disable_hlt(void);
19995 void enable_hlt(void);
19996 @@ -280,16 +302,7 @@ void default_idle(void);
19998 #ifdef CONFIG_X86_32
20000 - * For now, "wmb()" doesn't actually do anything, as all
20001 - * Intel CPU's follow what Intel calls a *Processor Order*,
20002 - * in which all writes are seen in the program order even
20003 - * outside the CPU.
20005 - * I expect future Intel CPU's to have a weaker ordering,
20006 - * but I'd also expect them to finally get their act together
20007 - * and add some real memory barriers if so.
20009 - * Some non intel clones support out of order store. wmb() ceases to be a
20010 + * Some non-Intel clones support out of order store. wmb() ceases to be a
20013 #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
20014 @@ -368,7 +381,7 @@ void default_idle(void);
20015 # define smp_wmb() barrier()
20017 #define smp_read_barrier_depends() read_barrier_depends()
20018 -#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
20019 +#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
20021 #define smp_mb() barrier()
20022 #define smp_rmb() barrier()
20023 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/tlbflush.h 2009-03-16 16:33:40.000000000 +0100
20024 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/tlbflush.h 2009-03-16 16:38:05.000000000 +0100
20025 @@ -86,8 +86,7 @@ static inline void flush_tlb_range(struc
20026 #define TLBSTATE_LAZY 2
20028 #ifdef CONFIG_X86_32
20031 +struct tlb_state {
20032 struct mm_struct *active_mm;
20034 char __cacheline_padding[L1_CACHE_BYTES-8];
20035 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/vga.h 2009-05-14 10:56:29.000000000 +0200
20036 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/vga.h 2009-03-16 16:38:05.000000000 +0100
20038 * access the videoram directly without any black magic.
20041 -#define VGA_MAP_MEM(x,s) (unsigned long)isa_bus_to_virt(x)
20042 +#define VGA_MAP_MEM(x, s) (unsigned long)isa_bus_to_virt(x)
20044 #define vga_readb(x) (*(x))
20045 -#define vga_writeb(x,y) (*(y) = (x))
20046 +#define vga_writeb(x, y) (*(y) = (x))
20049 --- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/xor_64.h 2009-05-14 10:56:29.000000000 +0200
20050 +++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/xor_64.h 2009-03-16 16:38:05.000000000 +0100
20053 - * x86-64 changes / gcc fixes from Andi Kleen.
20054 + * x86-64 changes / gcc fixes from Andi Kleen.
20055 * Copyright 2002 Andi Kleen, SuSE Labs.
20057 * This hasn't been optimized for the hammer yet, but there are likely
20058 * no advantages to be gotten from x86-64 here anyways.
20061 -typedef struct { unsigned long a,b; } __attribute__((aligned(16))) xmm_store_t;
20063 + unsigned long a, b;
20064 +} __attribute__((aligned(16))) xmm_store_t;
20066 -/* Doesn't use gcc to save the XMM registers, because there is no easy way to
20067 +/* Doesn't use gcc to save the XMM registers, because there is no easy way to
20068 tell it to do a clts before the register saving. */
20069 -#define XMMS_SAVE do { \
20070 +#define XMMS_SAVE \
20072 preempt_disable(); \
20073 if (!(current_thread_info()->status & TS_USEDFPU)) \
20075 - __asm__ __volatile__ ( \
20077 "movups %%xmm0,(%1) ;\n\t" \
20078 "movups %%xmm1,0x10(%1) ;\n\t" \
20079 "movups %%xmm2,0x20(%1) ;\n\t" \
20080 @@ -22,10 +25,11 @@ typedef struct { unsigned long a,b; } __
20087 -#define XMMS_RESTORE do { \
20089 +#define XMMS_RESTORE \
20093 "movups (%1),%%xmm0 ;\n\t" \
20094 "movups 0x10(%1),%%xmm1 ;\n\t" \
20095 @@ -37,72 +41,72 @@ typedef struct { unsigned long a,b; } __
20096 if (!(current_thread_info()->status & TS_USEDFPU)) \
20098 preempt_enable(); \
20102 #define OFFS(x) "16*("#x")"
20103 #define PF_OFFS(x) "256+16*("#x")"
20104 #define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n"
20105 -#define LD(x,y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
20106 -#define ST(x,y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
20107 +#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
20108 +#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
20109 #define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n"
20110 #define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n"
20111 #define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n"
20112 #define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n"
20113 #define PF5(x) " prefetchnta "PF_OFFS(x)"(%[p6]) ;\n"
20114 -#define XO1(x,y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
20115 -#define XO2(x,y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
20116 -#define XO3(x,y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
20117 -#define XO4(x,y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
20118 -#define XO5(x,y) " xorps "OFFS(x)"(%[p6]), %%xmm"#y" ;\n"
20119 +#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
20120 +#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
20121 +#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
20122 +#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
20123 +#define XO5(x, y) " xorps "OFFS(x)"(%[p6]), %%xmm"#y" ;\n"
20127 xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
20129 - unsigned int lines = bytes >> 8;
20130 + unsigned int lines = bytes >> 8;
20132 xmm_store_t xmm_save[4];
20185 - " addq %[inc], %[p1] ;\n"
20186 - " addq %[inc], %[p2] ;\n"
20187 + " addq %[inc], %[p1] ;\n"
20188 + " addq %[inc], %[p2] ;\n"
20189 " decl %[cnt] ; jnz 1b"
20190 : [p1] "+r" (p1), [p2] "+r" (p2), [cnt] "+r" (lines)
20191 - : [inc] "r" (256UL)
20193 + : [inc] "r" (256UL)
20198 @@ -117,52 +121,52 @@ xor_sse_3(unsigned long bytes, unsigned
20202 - __asm__ __volatile__ (
20262 - " addq %[inc], %[p1] ;\n"
20263 - " addq %[inc], %[p2] ;\n"
20264 - " addq %[inc], %[p3] ;\n"
20265 + " addq %[inc], %[p1] ;\n"
20266 + " addq %[inc], %[p2] ;\n"
20267 + " addq %[inc], %[p3] ;\n"
20268 " decl %[cnt] ; jnz 1b"
20269 : [cnt] "+r" (lines),
20270 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
20271 : [inc] "r" (256UL)
20277 @@ -171,64 +175,64 @@ xor_sse_4(unsigned long bytes, unsigned
20278 unsigned long *p3, unsigned long *p4)
20280 unsigned int lines = bytes >> 8;
20281 - xmm_store_t xmm_save[4];
20282 + xmm_store_t xmm_save[4];
20287 - __asm__ __volatile__ (
20358 - " addq %[inc], %[p1] ;\n"
20359 - " addq %[inc], %[p2] ;\n"
20360 - " addq %[inc], %[p3] ;\n"
20361 - " addq %[inc], %[p4] ;\n"
20362 + " addq %[inc], %[p1] ;\n"
20363 + " addq %[inc], %[p2] ;\n"
20364 + " addq %[inc], %[p3] ;\n"
20365 + " addq %[inc], %[p4] ;\n"
20366 " decl %[cnt] ; jnz 1b"
20367 : [cnt] "+c" (lines),
20368 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
20369 : [inc] "r" (256UL)
20375 @@ -237,70 +241,70 @@ static void
20376 xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
20377 unsigned long *p3, unsigned long *p4, unsigned long *p5)
20379 - unsigned int lines = bytes >> 8;
20380 + unsigned int lines = bytes >> 8;
20381 xmm_store_t xmm_save[4];
20386 - __asm__ __volatile__ (
20468 - " addq %[inc], %[p1] ;\n"
20469 - " addq %[inc], %[p2] ;\n"
20470 - " addq %[inc], %[p3] ;\n"
20471 - " addq %[inc], %[p4] ;\n"
20472 - " addq %[inc], %[p5] ;\n"
20473 + " addq %[inc], %[p1] ;\n"
20474 + " addq %[inc], %[p2] ;\n"
20475 + " addq %[inc], %[p3] ;\n"
20476 + " addq %[inc], %[p4] ;\n"
20477 + " addq %[inc], %[p5] ;\n"
20478 " decl %[cnt] ; jnz 1b"
20479 : [cnt] "+c" (lines),
20480 - [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4),
20481 + [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4),
20483 : [inc] "r" (256UL)
20485 @@ -309,18 +313,18 @@ xor_sse_5(unsigned long bytes, unsigned
20488 static struct xor_block_template xor_block_sse = {
20489 - .name = "generic_sse",
20490 - .do_2 = xor_sse_2,
20491 - .do_3 = xor_sse_3,
20492 - .do_4 = xor_sse_4,
20493 - .do_5 = xor_sse_5,
20494 + .name = "generic_sse",
20495 + .do_2 = xor_sse_2,
20496 + .do_3 = xor_sse_3,
20497 + .do_4 = xor_sse_4,
20498 + .do_5 = xor_sse_5,
20501 #undef XOR_TRY_TEMPLATES
20502 -#define XOR_TRY_TEMPLATES \
20504 - xor_speed(&xor_block_sse); \
20506 +#define XOR_TRY_TEMPLATES \
20508 + xor_speed(&xor_block_sse); \
20511 /* We force the use of the SSE xor block because it can write around L2.
20512 We may also be able to load into the L1 only depending on how the cpu
20513 --- sle11-2009-05-14.orig/include/asm-x86/scatterlist.h 2009-05-14 10:56:29.000000000 +0200
20514 +++ sle11-2009-05-14/include/asm-x86/scatterlist.h 2009-03-16 16:38:05.000000000 +0100
20515 @@ -24,7 +24,7 @@ struct scatterlist {
20518 #define sg_dma_address(sg) ((sg)->dma_address)
20519 -#ifdef CONFIG_X86_32
20520 +#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
20521 # define sg_dma_len(sg) ((sg)->length)
20523 # define sg_dma_len(sg) ((sg)->dma_length)
20524 --- sle11-2009-05-14.orig/include/linux/page-flags.h 2009-03-16 16:33:40.000000000 +0100
20525 +++ sle11-2009-05-14/include/linux/page-flags.h 2009-03-16 16:38:05.000000000 +0100
20526 @@ -278,18 +278,25 @@ static inline void SetPageUptodate(struc
20528 CLEARPAGEFLAG(Uptodate, uptodate)
20530 -#define PageForeign(page) test_bit(PG_foreign, &(page)->flags)
20531 -#define SetPageForeign(_page, dtor) do { \
20532 - set_bit(PG_foreign, &(_page)->flags); \
20533 - BUG_ON((dtor) == (void (*)(struct page *, unsigned int))0); \
20534 - (_page)->index = (long)(dtor); \
20536 -#define ClearPageForeign(page) do { \
20537 - clear_bit(PG_foreign, &(page)->flags); \
20538 - (page)->index = 0; \
20540 -#define PageForeignDestructor(_page, order) \
20541 - ((void (*)(struct page *, unsigned int))(_page)->index)(_page, order)
20543 +TESTPAGEFLAG(Foreign, foreign)
20544 +static inline void SetPageForeign(struct page *page,
20545 + void (*dtor)(struct page *, unsigned int))
20548 + set_bit(PG_foreign, &page->flags);
20549 + page->index = (long)dtor;
20551 +static inline void ClearPageForeign(struct page *page)
20553 + clear_bit(PG_foreign, &page->flags);
20556 +static inline void PageForeignDestructor(struct page *page, unsigned int order)
20558 + ((void (*)(struct page *, unsigned int))page->index)(page, order);
20562 extern void cancel_dirty_page(struct page *page, unsigned int account_size);
20564 --- sle11-2009-05-14.orig/include/xen/balloon.h 2008-11-25 12:35:56.000000000 +0100
20565 +++ sle11-2009-05-14/include/xen/balloon.h 2009-03-16 16:38:05.000000000 +0100
20570 -#ifndef __ASM_BALLOON_H__
20571 -#define __ASM_BALLOON_H__
20572 +#ifndef __XEN_BALLOON_H__
20573 +#define __XEN_BALLOON_H__
20575 +#include <linux/spinlock.h>
20577 +#if !defined(CONFIG_PARAVIRT_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
20579 * Inform the balloon driver that it should allow some slop for device-driver
20580 * memory activities.
20581 @@ -53,5 +56,6 @@ void balloon_release_driver_page(struct
20582 extern spinlock_t balloon_lock;
20583 #define balloon_lock(__flags) spin_lock_irqsave(&balloon_lock, __flags)
20584 #define balloon_unlock(__flags) spin_unlock_irqrestore(&balloon_lock, __flags)
20587 -#endif /* __ASM_BALLOON_H__ */
20588 +#endif /* __XEN_BALLOON_H__ */
20589 --- sle11-2009-05-14.orig/include/xen/interface/grant_table.h 2008-11-25 12:22:34.000000000 +0100
20590 +++ sle11-2009-05-14/include/xen/interface/grant_table.h 2009-03-16 16:38:05.000000000 +0100
20591 @@ -193,6 +193,7 @@ struct gnttab_map_grant_ref {
20592 grant_handle_t handle;
20593 uint64_t dev_bus_addr;
20595 +DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref);
20596 typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t;
20597 DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t);
20599 @@ -216,6 +217,7 @@ struct gnttab_unmap_grant_ref {
20600 /* OUT parameters. */
20601 int16_t status; /* GNTST_* */
20603 +DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref);
20604 typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t;
20605 DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t);
20607 @@ -237,6 +239,7 @@ struct gnttab_setup_table {
20608 int16_t status; /* GNTST_* */
20609 XEN_GUEST_HANDLE(ulong) frame_list;
20611 +DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_setup_table);
20612 typedef struct gnttab_setup_table gnttab_setup_table_t;
20613 DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t);
20615 @@ -251,6 +254,7 @@ struct gnttab_dump_table {
20616 /* OUT parameters. */
20617 int16_t status; /* GNTST_* */
20619 +DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_dump_table);
20620 typedef struct gnttab_dump_table gnttab_dump_table_t;
20621 DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t);
20623 @@ -271,6 +275,7 @@ struct gnttab_transfer {
20624 /* OUT parameters. */
20627 +DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_transfer);
20628 typedef struct gnttab_transfer gnttab_transfer_t;
20629 DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t);
20631 @@ -314,6 +319,7 @@ typedef struct gnttab_copy {
20632 /* OUT parameters. */
20635 +DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_copy);
20636 DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t);
20639 @@ -332,6 +338,7 @@ struct gnttab_query_size {
20640 uint32_t max_nr_frames;
20641 int16_t status; /* GNTST_* */
20643 +DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_query_size);
20644 typedef struct gnttab_query_size gnttab_query_size_t;
20645 DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t);
20647 --- sle11-2009-05-14.orig/include/xen/interface/io/fbif.h 2008-11-25 12:35:56.000000000 +0100
20648 +++ sle11-2009-05-14/include/xen/interface/io/fbif.h 2009-03-16 16:38:05.000000000 +0100
20649 @@ -150,7 +150,12 @@ struct xenfb_page
20650 * framebuffer with a max resolution of 12,800x10,240. Should
20651 * be enough for a while with room leftover for expansion.
20653 +#ifndef CONFIG_PARAVIRT_XEN
20654 unsigned long pd[256];
20656 + /* Two directory pages should be enough for a while. */
20657 + unsigned long pd[2];
20662 --- sle11-2009-05-14.orig/include/xen/interface/memory.h 2009-02-16 16:17:21.000000000 +0100
20663 +++ sle11-2009-05-14/include/xen/interface/memory.h 2009-03-16 16:38:05.000000000 +0100
20664 @@ -62,7 +62,7 @@ struct xen_memory_reservation {
20665 * OUT: GMFN bases of extents that were allocated
20666 * (NB. This command also updates the mach_to_phys translation table)
20668 - XEN_GUEST_HANDLE(ulong) extent_start;
20669 + XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
20671 /* Number of extents, and size/alignment of each (2^extent_order pages). */
20672 xen_ulong_t nr_extents;
20673 @@ -82,7 +82,6 @@ struct xen_memory_reservation {
20677 -DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_memory_reservation);
20678 typedef struct xen_memory_reservation xen_memory_reservation_t;
20679 DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);
20681 @@ -168,7 +167,11 @@ struct xen_machphys_mfn_list {
20682 * any large discontiguities in the machine address space, 2MB gaps in
20683 * the machphys table will be represented by an MFN base of zero.
20685 +#ifndef CONFIG_PARAVIRT_XEN
20686 XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
20688 + ulong extent_start;
20692 * Number of extents written to the above array. This will be smaller
20693 @@ -176,7 +179,6 @@ struct xen_machphys_mfn_list {
20695 unsigned int nr_extents;
20697 -DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
20698 typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t;
20699 DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t);
20701 @@ -216,7 +218,6 @@ struct xen_add_to_physmap {
20702 /* GPFN where the source mapping page should appear. */
20705 -DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
20706 typedef struct xen_add_to_physmap xen_add_to_physmap_t;
20707 DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
20709 @@ -249,13 +250,21 @@ struct xen_translate_gpfn_list {
20710 xen_ulong_t nr_gpfns;
20712 /* List of GPFNs to translate. */
20713 +#ifndef CONFIG_PARAVIRT_XEN
20714 XEN_GUEST_HANDLE(xen_pfn_t) gpfn_list;
20720 * Output list to contain MFN translations. May be the same as the input
20721 * list (in which case each input GPFN is overwritten with the output MFN).
20723 +#ifndef CONFIG_PARAVIRT_XEN
20724 XEN_GUEST_HANDLE(xen_pfn_t) mfn_list;
20729 DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
20730 typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t;
20731 --- sle11-2009-05-14.orig/include/xen/interface/vcpu.h 2008-11-25 12:35:56.000000000 +0100
20732 +++ sle11-2009-05-14/include/xen/interface/vcpu.h 2009-03-16 16:38:05.000000000 +0100
20733 @@ -85,6 +85,7 @@ struct vcpu_runstate_info {
20737 +DEFINE_XEN_GUEST_HANDLE_STRUCT(vcpu_runstate_info);
20738 typedef struct vcpu_runstate_info vcpu_runstate_info_t;
20739 DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_t);
20741 @@ -140,6 +141,7 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_register_ru
20742 struct vcpu_set_periodic_timer {
20743 uint64_t period_ns;
20745 +DEFINE_XEN_GUEST_HANDLE_STRUCT(vcpu_set_periodic_timer);
20746 typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t;
20747 DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t);
20749 @@ -153,6 +155,7 @@ struct vcpu_set_singleshot_timer {
20750 uint64_t timeout_abs_ns; /* Absolute system time value in nanoseconds. */
20751 uint32_t flags; /* VCPU_SSHOTTMR_??? */
20753 +DEFINE_XEN_GUEST_HANDLE_STRUCT(vcpu_set_singleshot_timer);
20754 typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t;
20755 DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t);
20757 @@ -176,6 +179,7 @@ struct vcpu_register_vcpu_info {
20758 uint32_t offset; /* offset within page */
20759 uint32_t rsvd; /* unused */
20761 +DEFINE_XEN_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info);
20762 typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t;
20763 DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t);
20765 --- sle11-2009-05-14.orig/lib/swiotlb-xen.c 2009-03-16 16:33:40.000000000 +0100
20766 +++ sle11-2009-05-14/lib/swiotlb-xen.c 2009-03-16 16:38:05.000000000 +0100
20768 #include <linux/ctype.h>
20769 #include <linux/init.h>
20770 #include <linux/bootmem.h>
20771 +#include <linux/iommu-helper.h>
20772 #include <linux/highmem.h>
20773 #include <asm/io.h>
20774 #include <asm/pci.h>
20775 @@ -288,15 +289,6 @@ __sync_single(struct phys_addr buffer, c
20779 -static inline unsigned int is_span_boundary(unsigned int index,
20780 - unsigned int nslots,
20781 - unsigned long offset_slots,
20782 - unsigned long max_slots)
20784 - unsigned long offset = (offset_slots + index) & (max_slots - 1);
20785 - return offset + nslots > max_slots;
20789 * Allocates bounce buffer and returns its kernel virtual address.
20791 @@ -335,61 +327,53 @@ map_single(struct device *hwdev, struct
20792 * request and allocate a buffer from that IO TLB pool.
20794 spin_lock_irqsave(&io_tlb_lock, flags);
20796 - index = ALIGN(io_tlb_index, stride);
20797 - if (index >= iotlb_nslabs)
20800 + index = ALIGN(io_tlb_index, stride);
20801 + if (index >= iotlb_nslabs)
20806 - while (is_span_boundary(index, nslots, offset_slots,
20809 - if (index >= iotlb_nslabs)
20811 - if (index == wrap)
20815 + while (iommu_is_span_boundary(index, nslots, offset_slots,
20818 + if (index >= iotlb_nslabs)
20820 + if (index == wrap)
20825 + * If we find a slot that indicates we have 'nslots' number of
20826 + * contiguous buffers, we allocate the buffers from that slot
20827 + * and mark the entries as '0' indicating unavailable.
20829 + if (io_tlb_list[index] >= nslots) {
20832 + for (i = index; i < (int) (index + nslots); i++)
20833 + io_tlb_list[i] = 0;
20834 + for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
20835 + io_tlb_list[i] = ++count;
20836 + dma_addr = iotlb_virt_start + (index << IO_TLB_SHIFT);
20839 - * If we find a slot that indicates we have 'nslots'
20840 - * number of contiguous buffers, we allocate the
20841 - * buffers from that slot and mark the entries as '0'
20842 - * indicating unavailable.
20843 + * Update the indices to avoid searching in the next
20846 - if (io_tlb_list[index] >= nslots) {
20849 - for (i = index; i < (int)(index + nslots); i++)
20850 - io_tlb_list[i] = 0;
20851 - for (i = index - 1;
20852 - (OFFSET(i, IO_TLB_SEGSIZE) !=
20853 - IO_TLB_SEGSIZE -1) && io_tlb_list[i];
20855 - io_tlb_list[i] = ++count;
20856 - dma_addr = iotlb_virt_start +
20857 - (index << IO_TLB_SHIFT);
20860 - * Update the indices to avoid searching in
20861 - * the next round.
20864 - ((index + nslots) < iotlb_nslabs
20865 - ? (index + nslots) : 0);
20866 + io_tlb_index = ((index + nslots) < iotlb_nslabs
20867 + ? (index + nslots) : 0);
20872 - if (index >= iotlb_nslabs)
20874 - } while (index != wrap);
20878 + if (index >= iotlb_nslabs)
20880 + } while (index != wrap);
20883 - spin_unlock_irqrestore(&io_tlb_lock, flags);
20888 + spin_unlock_irqrestore(&io_tlb_lock, flags);
20891 spin_unlock_irqrestore(&io_tlb_lock, flags);
20894 @@ -502,11 +486,13 @@ swiotlb_full(struct device *dev, size_t
20895 * Once the device is given the dma address, the device owns this memory until
20896 * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
20899 -swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
20901 - dma_addr_t dev_addr = gnttab_dma_map_page(virt_to_page(ptr)) +
20902 - offset_in_page(ptr);
20904 +_swiotlb_map_single(struct device *hwdev, phys_addr_t paddr, size_t size,
20905 + int dir, struct dma_attrs *attrs)
20907 + struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
20908 + dma_addr_t dev_addr = gnttab_dma_map_page(page) +
20909 + offset_in_page(paddr);
20911 struct phys_addr buffer;
20913 @@ -517,7 +503,7 @@ swiotlb_map_single(struct device *hwdev,
20914 * we can safely return the device addr and not worry about bounce
20917 - if (!range_straddles_page_boundary(__pa(ptr), size) &&
20918 + if (!range_straddles_page_boundary(paddr, size) &&
20919 !address_needs_mapping(hwdev, dev_addr))
20922 @@ -525,8 +511,8 @@ swiotlb_map_single(struct device *hwdev,
20923 * Oh well, have to allocate and map a bounce buffer.
20925 gnttab_dma_unmap_page(dev_addr);
20926 - buffer.page = virt_to_page(ptr);
20927 - buffer.offset = (unsigned long)ptr & ~PAGE_MASK;
20928 + buffer.page = page;
20929 + buffer.offset = offset_in_page(paddr);
20930 map = map_single(hwdev, buffer, size, dir);
20932 swiotlb_full(hwdev, size, dir, 1);
20933 @@ -537,6 +523,26 @@ swiotlb_map_single(struct device *hwdev,
20938 +swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
20939 + int dir, struct dma_attrs *attrs)
20941 + return _swiotlb_map_single(hwdev, virt_to_phys(ptr), size, dir, attrs);
20943 +EXPORT_SYMBOL(swiotlb_map_single_attrs);
20946 +swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
20948 + return _swiotlb_map_single(hwdev, virt_to_phys(ptr), size, dir, NULL);
20952 +swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)
20954 + return _swiotlb_map_single(hwdev, paddr, size, dir, NULL);
20958 * Unmap a single streaming mode DMA translation. The dma_addr and size must
20959 * match what was provided for in a previous swiotlb_map_single call. All
20960 @@ -546,8 +552,8 @@ swiotlb_map_single(struct device *hwdev,
20961 * whatever the device wrote there.
20964 -swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
20966 +swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
20967 + size_t size, int dir, struct dma_attrs *attrs)
20969 BUG_ON(dir == DMA_NONE);
20970 if (in_swiotlb_aperture(dev_addr))
20971 @@ -555,7 +561,14 @@ swiotlb_unmap_single(struct device *hwde
20973 gnttab_dma_unmap_page(dev_addr);
20975 +EXPORT_SYMBOL(swiotlb_unmap_single_attrs);
20978 +swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
20981 + return swiotlb_unmap_single_attrs(hwdev, dev_addr, size, dir, NULL);
20984 * Make physical memory consistent for a single streaming mode DMA translation
20985 * after a transfer.
20986 @@ -584,6 +597,26 @@ swiotlb_sync_single_for_device(struct de
20987 sync_single(hwdev, bus_to_virt(dev_addr), size, dir);
20991 +swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
20992 + unsigned long offset, size_t size, int dir)
20994 + BUG_ON(dir == DMA_NONE);
20995 + if (in_swiotlb_aperture(dev_addr))
20996 + sync_single(hwdev, bus_to_virt(dev_addr + offset), size, dir);
21000 +swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
21001 + unsigned long offset, size_t size, int dir)
21003 + BUG_ON(dir == DMA_NONE);
21004 + if (in_swiotlb_aperture(dev_addr))
21005 + sync_single(hwdev, bus_to_virt(dev_addr + offset), size, dir);
21008 +void swiotlb_unmap_sg_attrs(struct device *, struct scatterlist *, int, int,
21009 + struct dma_attrs *);
21011 * Map a set of buffers described by scatterlist in streaming mode for DMA.
21012 * This is the scatter-gather version of the above swiotlb_map_single
21013 @@ -601,8 +634,8 @@ swiotlb_sync_single_for_device(struct de
21017 -swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
21019 +swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
21020 + int dir, struct dma_attrs *attrs)
21022 struct scatterlist *sg;
21023 struct phys_addr buffer;
21024 @@ -626,7 +659,8 @@ swiotlb_map_sg(struct device *hwdev, str
21025 /* Don't panic here, we expect map_sg users
21026 to do proper error handling. */
21027 swiotlb_full(hwdev, sg->length, dir, 0);
21028 - swiotlb_unmap_sg(hwdev, sgl, i, dir);
21029 + swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
21031 sgl[0].dma_length = 0;
21034 @@ -637,14 +671,22 @@ swiotlb_map_sg(struct device *hwdev, str
21038 +EXPORT_SYMBOL(swiotlb_map_sg_attrs);
21041 +swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
21044 + return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
21048 * Unmap a set of streaming mode DMA translations. Again, cpu read rules
21049 * concerning calls here are the same as for swiotlb_unmap_single() above.
21052 -swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
21054 +swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
21055 + int nelems, int dir, struct dma_attrs *attrs)
21057 struct scatterlist *sg;
21059 @@ -659,6 +701,14 @@ swiotlb_unmap_sg(struct device *hwdev, s
21060 gnttab_dma_unmap_page(sg->dma_address);
21063 +EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
21066 +swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
21069 + return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
21073 * Make physical memory consistent for a set of streaming mode DMA translations
21074 @@ -699,46 +749,6 @@ swiotlb_sync_sg_for_device(struct device
21078 -#ifdef CONFIG_HIGHMEM
21081 -swiotlb_map_page(struct device *hwdev, struct page *page,
21082 - unsigned long offset, size_t size,
21083 - enum dma_data_direction direction)
21085 - struct phys_addr buffer;
21086 - dma_addr_t dev_addr;
21089 - dev_addr = gnttab_dma_map_page(page) + offset;
21090 - if (address_needs_mapping(hwdev, dev_addr)) {
21091 - gnttab_dma_unmap_page(dev_addr);
21092 - buffer.page = page;
21093 - buffer.offset = offset;
21094 - map = map_single(hwdev, buffer, size, direction);
21096 - swiotlb_full(hwdev, size, direction, 1);
21097 - map = io_tlb_overflow_buffer;
21099 - dev_addr = (dma_addr_t)virt_to_bus(map);
21106 -swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address,
21107 - size_t size, enum dma_data_direction direction)
21109 - BUG_ON(direction == DMA_NONE);
21110 - if (in_swiotlb_aperture(dma_address))
21111 - unmap_single(hwdev, bus_to_virt(dma_address), size, direction);
21113 - gnttab_dma_unmap_page(dma_address);
21119 swiotlb_dma_mapping_error(dma_addr_t dma_addr)