5 Acked-by: Jeff Mahoney <jeffm@suse.com>
6 Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches.py
8 --- sle11-2009-10-16.orig/arch/x86/Kconfig 2009-03-16 16:33:40.000000000 +0100
9 +++ sle11-2009-10-16/arch/x86/Kconfig 2009-03-16 16:38:05.000000000 +0100
10 @@ -28,7 +28,7 @@ config X86
11 select HAVE_DYNAMIC_FTRACE
13 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) && !XEN
14 - select HAVE_ARCH_KGDB if !X86_VOYAGER
15 + select HAVE_ARCH_KGDB if !X86_VOYAGER && !XEN
16 select HAVE_ARCH_TRACEHOOK
17 select HAVE_GENERIC_DMA_COHERENT if X86_32
18 select HAVE_EFFICIENT_UNALIGNED_ACCESS
19 @@ -486,6 +486,7 @@ config PARAVIRT_DEBUG
25 This option adds a kernel parameter 'memtest', which allows memtest
27 @@ -1007,7 +1008,7 @@ config X86_PAE
29 bool "Enable 1GB pages for kernel pagetables" if EMBEDDED
32 + depends on X86_64 && !XEN
34 Allow the kernel linear mapping to use 1GB pages on CPUs that
35 support it. This can improve the kernel's performance a tiny bit by
36 @@ -1349,8 +1350,7 @@ source kernel/Kconfig.hz
39 bool "kexec system call"
40 - depends on X86_BIOS_REBOOT
41 - depends on !XEN_UNPRIVILEGED_GUEST
42 + depends on X86_BIOS_REBOOT || (XEN && !XEN_UNPRIVILEGED_GUEST)
44 kexec is a system call that implements the ability to shutdown your
45 current kernel, and to start another kernel. It is like a reboot
46 @@ -1948,6 +1948,4 @@ source "crypto/Kconfig"
48 source "arch/x86/kvm/Kconfig"
50 -source "drivers/xen/Kconfig"
53 --- sle11-2009-10-16.orig/arch/x86/ia32/ia32entry-xen.S 2009-03-16 16:33:40.000000000 +0100
54 +++ sle11-2009-10-16/arch/x86/ia32/ia32entry-xen.S 2009-03-16 16:38:05.000000000 +0100
55 @@ -129,12 +129,14 @@ sysenter_tracesys:
59 - movq $-ENOSYS,RAX(%rsp) /* really needed? */
60 + movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
61 movq %rsp,%rdi /* &pt_regs -> arg1 */
62 call syscall_trace_enter
63 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
66 + cmpl $(IA32_NR_syscalls-1),%eax
67 + ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
70 ENDPROC(ia32_sysenter_target)
71 @@ -200,13 +202,15 @@ cstar_tracesys:
75 - movq $-ENOSYS,RAX(%rsp) /* really needed? */
76 + movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
77 movq %rsp,%rdi /* &pt_regs -> arg1 */
78 call syscall_trace_enter
79 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
82 movl RSP-ARGOFFSET(%rsp), %r8d
83 + cmpl $(IA32_NR_syscalls-1),%eax
84 + ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
86 END(ia32_cstar_target)
88 @@ -264,7 +268,7 @@ ENTRY(ia32_syscall)
91 cmpl $(IA32_NR_syscalls-1),%eax
93 + ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
95 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
97 @@ -274,7 +278,7 @@ ia32_sysret:
101 - movq $-ENOSYS,RAX(%rsp) /* really needed? */
102 + movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
103 movq %rsp,%rdi /* &pt_regs -> arg1 */
104 call syscall_trace_enter
105 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
106 @@ -365,7 +369,7 @@ ia32_sys_call_table:
109 .quad compat_sys_stime /* stime */ /* 25 */
110 - .quad sys32_ptrace /* ptrace */
111 + .quad compat_sys_ptrace /* ptrace */
113 .quad sys_fstat /* (old)fstat */
115 --- sle11-2009-10-16.orig/arch/x86/kernel/Makefile 2009-03-16 16:33:40.000000000 +0100
116 +++ sle11-2009-10-16/arch/x86/kernel/Makefile 2009-03-16 16:38:05.000000000 +0100
117 @@ -122,8 +122,7 @@ ifeq ($(CONFIG_X86_64),y)
119 obj-$(CONFIG_XEN) += nmi_64.o
120 time_64-$(CONFIG_XEN) += time_32.o
121 - pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o
124 -disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \
125 - smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o
126 +disabled-obj-$(CONFIG_XEN) := crash.o early-quirks.o hpet.o i8253.o i8259_$(BITS).o \
127 + pci-swiotlb_64.o reboot.o smpboot.o tlb_$(BITS).o tsc_$(BITS).o tsc_sync.o vsmp_64.o
128 --- sle11-2009-10-16.orig/arch/x86/kernel/acpi/Makefile 2008-12-01 11:11:08.000000000 +0100
129 +++ sle11-2009-10-16/arch/x86/kernel/acpi/Makefile 2009-03-16 16:38:05.000000000 +0100
130 @@ -15,4 +15,4 @@ $(obj)/wakeup_rm.o: $(obj)/realmode/w
131 $(obj)/realmode/wakeup.bin: FORCE
132 $(Q)$(MAKE) $(build)=$(obj)/realmode
134 -disabled-obj-$(CONFIG_XEN) := cstate.o wakeup_$(BITS).o
135 +disabled-obj-$(CONFIG_XEN) := cstate.o wakeup_%.o
136 --- sle11-2009-10-16.orig/arch/x86/kernel/acpi/boot.c 2009-03-16 16:33:40.000000000 +0100
137 +++ sle11-2009-10-16/arch/x86/kernel/acpi/boot.c 2009-08-26 11:55:26.000000000 +0200
138 @@ -251,6 +251,7 @@ static int __init acpi_parse_madt(struct
140 static void __cpuinit acpi_register_lapic(int id, u8 enabled)
143 unsigned int ver = 0;
146 @@ -264,6 +265,7 @@ static void __cpuinit acpi_register_lapi
149 generic_processor_info(id, ver);
154 @@ -774,6 +776,7 @@ static int __init acpi_parse_fadt(struct
155 * returns 0 on success, < 0 on error
159 static void __init acpi_register_lapic_address(unsigned long address)
161 mp_lapic_addr = address;
162 @@ -787,6 +790,9 @@ static void __init acpi_register_lapic_a
167 +#define acpi_register_lapic_address(address)
170 static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
172 --- sle11-2009-10-16.orig/arch/x86/kernel/acpi/sleep-xen.c 2009-03-16 16:33:40.000000000 +0100
173 +++ sle11-2009-10-16/arch/x86/kernel/acpi/sleep-xen.c 2009-03-16 16:38:05.000000000 +0100
175 #include <linux/dmi.h>
176 #include <linux/cpumask.h>
178 -#include <asm/smp.h>
179 +#include "realmode/wakeup.h"
182 #ifndef CONFIG_ACPI_PV_SLEEP
183 -/* address in low memory of the wakeup routine. */
184 -unsigned long acpi_wakeup_address = 0;
185 +unsigned long acpi_wakeup_address;
186 unsigned long acpi_realmode_flags;
187 -extern char wakeup_start, wakeup_end;
189 -extern unsigned long acpi_copy_wakeup_routine(unsigned long);
190 +/* address in low memory of the wakeup routine. */
191 +static unsigned long acpi_realmode;
194 +static char temp_stack[10240];
199 @@ -26,17 +30,69 @@ extern unsigned long acpi_copy_wakeup_ro
201 * Create an identity mapped page table and copy the wakeup routine to
204 + * Note that this is too late to change acpi_wakeup_address.
206 int acpi_save_state_mem(void)
208 #ifndef CONFIG_ACPI_PV_SLEEP
209 - if (!acpi_wakeup_address) {
210 - printk(KERN_ERR "Could not allocate memory during boot, S3 disabled\n");
211 + struct wakeup_header *header;
213 + if (!acpi_realmode) {
214 + printk(KERN_ERR "Could not allocate memory during boot, "
218 - memcpy((void *)acpi_wakeup_address, &wakeup_start,
219 - &wakeup_end - &wakeup_start);
220 - acpi_copy_wakeup_routine(acpi_wakeup_address);
221 + memcpy((void *)acpi_realmode, &wakeup_code_start, WAKEUP_SIZE);
223 + header = (struct wakeup_header *)(acpi_realmode + HEADER_OFFSET);
224 + if (header->signature != 0x51ee1111) {
225 + printk(KERN_ERR "wakeup header does not match\n");
229 + header->video_mode = saved_video_mode;
231 + header->wakeup_jmp_seg = acpi_wakeup_address >> 4;
232 + /* GDT[0]: GDT self-pointer */
233 + header->wakeup_gdt[0] =
234 + (u64)(sizeof(header->wakeup_gdt) - 1) +
235 + ((u64)(acpi_wakeup_address +
236 + ((char *)&header->wakeup_gdt - (char *)acpi_realmode))
238 + /* GDT[1]: real-mode-like code segment */
239 + header->wakeup_gdt[1] = (0x009bULL << 40) +
240 + ((u64)acpi_wakeup_address << 16) + 0xffff;
241 + /* GDT[2]: real-mode-like data segment */
242 + header->wakeup_gdt[2] = (0x0093ULL << 40) +
243 + ((u64)acpi_wakeup_address << 16) + 0xffff;
245 +#ifndef CONFIG_64BIT
246 + store_gdt((struct desc_ptr *)&header->pmode_gdt);
248 + header->pmode_efer_low = nx_enabled;
249 + if (header->pmode_efer_low & 1) {
250 + /* This is strange, why not save efer, always? */
251 + rdmsr(MSR_EFER, header->pmode_efer_low,
252 + header->pmode_efer_high);
254 +#endif /* !CONFIG_64BIT */
256 + header->pmode_cr0 = read_cr0();
257 + header->pmode_cr4 = read_cr4();
258 + header->realmode_flags = acpi_realmode_flags;
259 + header->real_magic = 0x12345678;
261 +#ifndef CONFIG_64BIT
262 + header->pmode_entry = (u32)&wakeup_pmode_return;
263 + header->pmode_cr3 = (u32)(swsusp_pg_dir - __PAGE_OFFSET);
264 + saved_magic = 0x12345678;
265 +#else /* CONFIG_64BIT */
266 + header->trampoline_segment = setup_trampoline() >> 4;
267 + init_rsp = (unsigned long)temp_stack + 4096;
268 + initial_code = (unsigned long)wakeup_long64;
269 + saved_magic = 0x123456789abcdef0;
270 +#endif /* CONFIG_64BIT */
274 @@ -61,15 +117,20 @@ void acpi_restore_state_mem(void)
275 void __init acpi_reserve_bootmem(void)
277 #ifndef CONFIG_ACPI_PV_SLEEP
278 - if ((&wakeup_end - &wakeup_start) > PAGE_SIZE*2) {
279 + if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) {
281 "ACPI: Wakeup code way too big, S3 disabled.\n");
285 - acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2);
286 - if (!acpi_wakeup_address)
287 + acpi_realmode = (unsigned long)alloc_bootmem_low(WAKEUP_SIZE);
289 + if (!acpi_realmode) {
290 printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
294 + acpi_wakeup_address = virt_to_phys((void *)acpi_realmode);
298 --- sle11-2009-10-16.orig/arch/x86/kernel/cpu/common-xen.c 2009-03-16 16:33:40.000000000 +0100
299 +++ sle11-2009-10-16/arch/x86/kernel/cpu/common-xen.c 2009-03-16 16:38:05.000000000 +0100
301 #include <linux/module.h>
302 #include <linux/percpu.h>
303 #include <linux/bootmem.h>
304 -#include <asm/semaphore.h>
305 #include <asm/processor.h>
306 #include <asm/i387.h>
309 #include <asm/mmu_context.h>
310 #include <asm/mtrr.h>
312 +#include <asm/pat.h>
313 #ifdef CONFIG_X86_LOCAL_APIC
314 #include <asm/mpspec.h>
315 #include <asm/apic.h>
316 @@ -69,9 +69,9 @@ __u32 cleared_cpu_caps[NCAPINTS] __cpuin
317 static int cachesize_override __cpuinitdata = -1;
318 static int disable_x86_serial_nr __cpuinitdata = 1;
320 -struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
321 +struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
323 -static void __cpuinit default_init(struct cpuinfo_x86 * c)
324 +static void __cpuinit default_init(struct cpuinfo_x86 *c)
326 /* Not much we can do here... */
327 /* Check if at least it has cpuid */
328 @@ -88,11 +88,11 @@ static struct cpu_dev __cpuinitdata defa
329 .c_init = default_init,
330 .c_vendor = "Unknown",
332 -static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu;
333 +static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
335 static int __init cachesize_setup(char *str)
337 - get_option (&str, &cachesize_override);
338 + get_option(&str, &cachesize_override);
341 __setup("cachesize=", cachesize_setup);
342 @@ -114,12 +114,12 @@ int __cpuinit get_model_name(struct cpui
343 /* Intel chips right-justify this string for some dumb reason;
344 undo that brain damage */
345 p = q = &c->x86_model_id[0];
346 - while ( *p == ' ' )
354 - while ( q <= &c->x86_model_id[48] )
355 + while (q <= &c->x86_model_id[48])
356 *q++ = '\0'; /* Zero-pad the rest */
359 @@ -137,7 +137,7 @@ void __cpuinit display_cacheinfo(struct
360 cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
361 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
362 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
363 - c->x86_cache_size=(ecx>>24)+(edx>>24);
364 + c->x86_cache_size = (ecx>>24)+(edx>>24);
367 if (n < 0x80000006) /* Some chips just has a large L1. */
368 @@ -145,16 +145,16 @@ void __cpuinit display_cacheinfo(struct
370 ecx = cpuid_ecx(0x80000006);
374 /* do processor-specific cache resizing */
375 if (this_cpu->c_size_cache)
376 - l2size = this_cpu->c_size_cache(c,l2size);
377 + l2size = this_cpu->c_size_cache(c, l2size);
379 /* Allow user to override all this if necessary. */
380 if (cachesize_override != -1)
381 l2size = cachesize_override;
385 return; /* Again, no L2 cache is possible */
387 c->x86_cache_size = l2size;
388 @@ -163,16 +163,19 @@ void __cpuinit display_cacheinfo(struct
392 -/* Naming convention should be: <Name> [(<Codename>)] */
393 -/* This table only is used unless init_<vendor>() below doesn't set it; */
394 -/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
396 + * Naming convention should be: <Name> [(<Codename>)]
397 + * This table only is used unless init_<vendor>() below doesn't set it;
398 + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
402 /* Look up CPU names by table lookup. */
403 static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
405 struct cpu_model_info *info;
407 - if ( c->x86_model >= 16 )
408 + if (c->x86_model >= 16)
409 return NULL; /* Range check */
412 @@ -197,9 +200,9 @@ static void __cpuinit get_cpu_vendor(str
414 for (i = 0; i < X86_VENDOR_NUM; i++) {
416 - if (!strcmp(v,cpu_devs[i]->c_ident[0]) ||
417 - (cpu_devs[i]->c_ident[1] &&
418 - !strcmp(v,cpu_devs[i]->c_ident[1]))) {
419 + if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
420 + (cpu_devs[i]->c_ident[1] &&
421 + !strcmp(v, cpu_devs[i]->c_ident[1]))) {
424 this_cpu = cpu_devs[i];
425 @@ -217,7 +220,7 @@ static void __cpuinit get_cpu_vendor(str
429 -static int __init x86_fxsr_setup(char * s)
430 +static int __init x86_fxsr_setup(char *s)
432 setup_clear_cpu_cap(X86_FEATURE_FXSR);
433 setup_clear_cpu_cap(X86_FEATURE_XMM);
434 @@ -226,7 +229,7 @@ static int __init x86_fxsr_setup(char *
435 __setup("nofxsr", x86_fxsr_setup);
438 -static int __init x86_sep_setup(char * s)
439 +static int __init x86_sep_setup(char *s)
441 setup_clear_cpu_cap(X86_FEATURE_SEP);
443 @@ -315,12 +318,15 @@ static void __cpuinit early_get_cap(stru
447 -/* Do minimum CPU detection early.
448 - Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
449 - The others are not touched to avoid unwanted side effects.
451 - WARNING: this function is only called on the BP. Don't add code here
452 - that is supposed to run on all CPUs. */
454 + * Do minimum CPU detection early.
455 + * Fields really needed: vendor, cpuid_level, family, model, mask,
457 + * The others are not touched to avoid unwanted side effects.
459 + * WARNING: this function is only called on the BP. Don't add code here
460 + * that is supposed to run on all CPUs.
462 static void __init early_cpu_detect(void)
464 struct cpuinfo_x86 *c = &boot_cpu_data;
465 @@ -335,19 +341,14 @@ static void __init early_cpu_detect(void
467 get_cpu_vendor(c, 1);
469 - switch (c->x86_vendor) {
470 - case X86_VENDOR_AMD:
473 - case X86_VENDOR_INTEL:
474 - early_init_intel(c);
477 + if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
478 + cpu_devs[c->x86_vendor]->c_early_init)
479 + cpu_devs[c->x86_vendor]->c_early_init(c);
484 -static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
485 +static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
489 @@ -358,13 +359,12 @@ static void __cpuinit generic_identify(s
490 (unsigned int *)&c->x86_vendor_id[0],
491 (unsigned int *)&c->x86_vendor_id[8],
492 (unsigned int *)&c->x86_vendor_id[4]);
495 get_cpu_vendor(c, 0);
496 /* Initialize the standard set of capabilities */
497 /* Note that the vendor-specific code below might override */
499 /* Intel-defined flags: level 0x00000001 */
500 - if ( c->cpuid_level >= 0x00000001 ) {
501 + if (c->cpuid_level >= 0x00000001) {
502 u32 capability, excap;
503 cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
504 c->x86_capability[0] = capability;
505 @@ -376,12 +376,14 @@ static void __cpuinit generic_identify(s
507 c->x86_model += ((tfms >> 16) & 0xF) << 4;
508 c->x86_mask = tfms & 15;
509 + c->initial_apicid = (ebx >> 24) & 0xFF;
511 - c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
512 + c->apicid = phys_pkg_id(c->initial_apicid, 0);
513 + c->phys_proc_id = c->initial_apicid;
515 - c->apicid = (ebx >> 24) & 0xFF;
516 + c->apicid = c->initial_apicid;
518 - if (c->x86_capability[0] & (1<<19))
519 + if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
520 c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
522 /* Have CPUID level 0 only - unheard of */
523 @@ -390,33 +392,30 @@ static void __cpuinit generic_identify(s
525 /* AMD-defined flags: level 0x80000001 */
526 xlvl = cpuid_eax(0x80000000);
527 - if ( (xlvl & 0xffff0000) == 0x80000000 ) {
528 - if ( xlvl >= 0x80000001 ) {
529 + if ((xlvl & 0xffff0000) == 0x80000000) {
530 + if (xlvl >= 0x80000001) {
531 c->x86_capability[1] = cpuid_edx(0x80000001);
532 c->x86_capability[6] = cpuid_ecx(0x80000001);
534 - if ( xlvl >= 0x80000004 )
535 + if (xlvl >= 0x80000004)
536 get_model_name(c); /* Default name */
539 init_scattered_cpuid_features(c);
542 -#ifdef CONFIG_X86_HT
543 - c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
547 static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
549 - if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
550 + if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
551 /* Disable processor serial number */
552 - unsigned long lo,hi;
553 - rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
554 + unsigned long lo, hi;
555 + rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
557 - wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
558 + wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
559 printk(KERN_NOTICE "CPU serial number disabled.\n");
560 - clear_bit(X86_FEATURE_PN, c->x86_capability);
561 + clear_cpu_cap(c, X86_FEATURE_PN);
563 /* Disabling the serial number may affect the cpuid level */
564 c->cpuid_level = cpuid_eax(0);
565 @@ -451,9 +450,11 @@ void __cpuinit identify_cpu(struct cpuin
566 memset(&c->x86_capability, 0, sizeof c->x86_capability);
568 if (!have_cpuid_p()) {
569 - /* First of all, decide if this is a 486 or higher */
570 - /* It's a 486 if we can modify the AC flag */
571 - if ( flag_is_changeable_p(X86_EFLAGS_AC) )
573 + * First of all, decide if this is a 486 or higher
574 + * It's a 486 if we can modify the AC flag
576 + if (flag_is_changeable_p(X86_EFLAGS_AC))
580 @@ -486,10 +487,10 @@ void __cpuinit identify_cpu(struct cpuin
583 /* If the model name is still unset, do table lookup. */
584 - if ( !c->x86_model_id[0] ) {
585 + if (!c->x86_model_id[0]) {
587 p = table_lookup_model(c);
590 strcpy(c->x86_model_id, p);
593 @@ -503,9 +504,9 @@ void __cpuinit identify_cpu(struct cpuin
594 * common between the CPUs. The first time this routine gets
595 * executed, c == &boot_cpu_data.
597 - if ( c != &boot_cpu_data ) {
598 + if (c != &boot_cpu_data) {
599 /* AND the already accumulated flags with these */
600 - for ( i = 0 ; i < NCAPINTS ; i++ )
601 + for (i = 0 ; i < NCAPINTS ; i++)
602 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
605 @@ -549,7 +550,7 @@ void __cpuinit detect_ht(struct cpuinfo_
607 if (smp_num_siblings == 1) {
608 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
609 - } else if (smp_num_siblings > 1 ) {
610 + } else if (smp_num_siblings > 1) {
612 if (smp_num_siblings > NR_CPUS) {
613 printk(KERN_WARNING "CPU: Unsupported number of the "
614 @@ -559,7 +560,7 @@ void __cpuinit detect_ht(struct cpuinfo_
617 index_msb = get_count_order(smp_num_siblings);
618 - c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
619 + c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
621 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
623 @@ -570,7 +571,7 @@ void __cpuinit detect_ht(struct cpuinfo_
625 core_bits = get_count_order(c->x86_max_cores);
627 - c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
628 + c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
629 ((1 << core_bits) - 1);
631 if (c->x86_max_cores > 1)
632 @@ -604,7 +605,7 @@ void __cpuinit print_cpu_info(struct cpu
634 printk("%s", c->x86_model_id);
636 - if (c->x86_mask || c->cpuid_level >= 0)
637 + if (c->x86_mask || c->cpuid_level >= 0)
638 printk(" stepping %02x\n", c->x86_mask);
641 @@ -623,24 +624,17 @@ __setup("clearcpuid=", setup_disablecpui
643 cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
645 -/* This is hacky. :)
646 - * We're emulating future behavior.
647 - * In the future, the cpu-specific init functions will be called implicitly
648 - * via the magic of initcalls.
649 - * They will insert themselves into the cpu_devs structure.
650 - * Then, when cpu_init() is called, we can just iterate over that array.
652 void __init early_cpu_init(void)
658 - centaur_init_cpu();
659 - transmeta_init_cpu();
662 + struct cpu_vendor_dev *cvdev;
664 + for (cvdev = __x86cpuvendor_start ;
665 + cvdev < __x86cpuvendor_end ;
667 + cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
670 + validate_pat_support(&boot_cpu_data);
673 /* Make sure %fs is initialized properly in idle threads */
674 @@ -685,7 +679,7 @@ void __cpuinit cpu_init(void)
675 int cpu = smp_processor_id();
676 struct task_struct *curr = current;
677 #ifndef CONFIG_X86_NO_TSS
678 - struct tss_struct * t = &per_cpu(init_tss, cpu);
679 + struct tss_struct *t = &per_cpu(init_tss, cpu);
681 struct thread_struct *thread = &curr->thread;
683 @@ -738,7 +732,7 @@ void __cpuinit cpu_init(void)
684 mxcsr_feature_mask_init();
687 -#ifdef CONFIG_HOTPLUG_CPU
688 +#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_XEN)
689 void __cpuinit cpu_uninit(void)
691 int cpu = raw_smp_processor_id();
692 --- sle11-2009-10-16.orig/arch/x86/kernel/cpu/mtrr/main-xen.c 2009-03-16 16:33:40.000000000 +0100
693 +++ sle11-2009-10-16/arch/x86/kernel/cpu/mtrr/main-xen.c 2009-03-16 16:38:05.000000000 +0100
694 @@ -35,6 +35,8 @@ struct mtrr_ops *mtrr_if = &generic_mtrr
695 unsigned int num_var_ranges;
696 unsigned int mtrr_usage_table[MAX_VAR_RANGES];
700 static void __init set_num_var_ranges(void)
702 struct xen_platform_op op;
703 @@ -162,8 +164,144 @@ mtrr_del(int reg, unsigned long base, un
704 EXPORT_SYMBOL(mtrr_add);
705 EXPORT_SYMBOL(mtrr_del);
708 + * Returns the effective MTRR type for the region
710 + * - 0xFE - when the range is "not entirely covered" by _any_ var range MTRR
711 + * - 0xFF - when MTRR is not enabled
713 +u8 mtrr_type_lookup(u64 start, u64 end)
716 + u64 start_mfn, end_mfn, base_mfn, top_mfn;
717 + u8 prev_match, curr_match;
718 + struct xen_platform_op op;
720 + if (!is_initial_xendomain())
721 + return MTRR_TYPE_WRBACK;
723 + if (!num_var_ranges)
726 + start_mfn = start >> PAGE_SHIFT;
727 + /* Make end inclusive end, instead of exclusive */
728 + end_mfn = --end >> PAGE_SHIFT;
730 + /* Look in fixed ranges. Just return the type as per start */
731 + if (start_mfn < 0x100) {
733 + op.cmd = XENPF_read_memtype;
734 + op.u.read_memtype.reg = ???;
735 + error = HYPERVISOR_platform_op(&op);
737 + return op.u.read_memtype.type;
739 + return MTRR_TYPE_UNCACHABLE;
743 + * Look in variable ranges
744 + * Look of multiple ranges matching this address and pick type
745 + * as per MTRR precedence
748 + for (i = 0; i < num_var_ranges; ++i) {
749 + op.cmd = XENPF_read_memtype;
750 + op.u.read_memtype.reg = i;
751 + error = HYPERVISOR_platform_op(&op);
753 + if (error || !op.u.read_memtype.nr_mfns)
756 + base_mfn = op.u.read_memtype.mfn;
757 + top_mfn = base_mfn + op.u.read_memtype.nr_mfns - 1;
759 + if (base_mfn > end_mfn || start_mfn > top_mfn) {
763 + if (base_mfn > start_mfn || end_mfn > top_mfn) {
767 + curr_match = op.u.read_memtype.type;
768 + if (prev_match == 0xFF) {
769 + prev_match = curr_match;
773 + if (prev_match == MTRR_TYPE_UNCACHABLE ||
774 + curr_match == MTRR_TYPE_UNCACHABLE) {
775 + return MTRR_TYPE_UNCACHABLE;
778 + if ((prev_match == MTRR_TYPE_WRBACK &&
779 + curr_match == MTRR_TYPE_WRTHROUGH) ||
780 + (prev_match == MTRR_TYPE_WRTHROUGH &&
781 + curr_match == MTRR_TYPE_WRBACK)) {
782 + prev_match = MTRR_TYPE_WRTHROUGH;
783 + curr_match = MTRR_TYPE_WRTHROUGH;
786 + if (prev_match != curr_match) {
787 + return MTRR_TYPE_UNCACHABLE;
792 + if (start >= (1ULL<<32) && (end < tom2))
793 + return MTRR_TYPE_WRBACK;
796 + if (prev_match != 0xFF)
800 + op.cmd = XENPF_read_def_memtype;
801 + error = HYPERVISOR_platform_op(&op);
803 + return op.u.read_def_memtype.type;
805 + return MTRR_TYPE_UNCACHABLE;
809 + * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
810 + * for memory >4GB. Check for that here.
811 + * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
812 + * apply to are wrong, but so far we don't know of any such case in the wild.
814 +#define Tom2Enabled (1U << 21)
815 +#define Tom2ForceMemTypeWB (1U << 22)
817 +int __init amd_special_default_mtrr(void)
821 + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
823 + if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
825 + /* In case some hypervisor doesn't pass SYSCFG through */
826 + if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
829 + * Memory between 4GB and top of mem is forced WB by this magic bit.
830 + * Reserved before K8RevF, but should be zero there.
832 + if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
833 + (Tom2Enabled | Tom2ForceMemTypeWB))
838 void __init mtrr_bp_init(void)
840 + if (amd_special_default_mtrr()) {
842 + rdmsrl(MSR_K8_TOP_MEM2, tom2);
843 + tom2 &= 0xffffff8000000ULL;
847 void mtrr_ap_init(void)
848 --- sle11-2009-10-16.orig/arch/x86/kernel/e820_32-xen.c 2009-03-16 16:33:40.000000000 +0100
849 +++ sle11-2009-10-16/arch/x86/kernel/e820_32-xen.c 2009-03-16 16:38:05.000000000 +0100
850 @@ -469,7 +469,7 @@ int __init sanitize_e820_map(struct e820
851 * thinkpad 560x, for example, does not cooperate with the memory
854 -int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
855 +int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
858 /* Only one memory region (or negative)? Ignore it */
859 @@ -480,33 +480,17 @@ int __init copy_e820_map(struct e820entr
863 - unsigned long long start = biosmap->addr;
864 - unsigned long long size = biosmap->size;
865 - unsigned long long end = start + size;
866 - unsigned long type = biosmap->type;
867 + u64 start = biosmap->addr;
868 + u64 size = biosmap->size;
869 + u64 end = start + size;
870 + u32 type = biosmap->type;
872 /* Overflow in 64 bits? Ignore the memory map. */
878 - * Some BIOSes claim RAM in the 640k - 1M region.
879 - * Not right. Fix it up.
881 - if (type == E820_RAM) {
882 - if (start < 0x100000ULL && end > 0xA0000ULL) {
883 - if (start < 0xA0000ULL)
884 - add_memory_region(start, 0xA0000ULL-start, type);
885 - if (end <= 0x100000ULL)
887 - start = 0x100000ULL;
888 - size = end - start;
892 add_memory_region(start, size, type);
893 - } while (biosmap++,--nr_map);
894 + } while (biosmap++, --nr_map);
897 if (is_initial_xendomain()) {
898 @@ -528,7 +512,7 @@ int __init copy_e820_map(struct e820entr
900 * Find the highest page frame number we have available
902 -void __init find_max_pfn(void)
903 +void __init propagate_e820_map(void)
907 @@ -814,7 +798,7 @@ static int __init parse_memmap(char *arg
908 * size before original memory map is
912 + propagate_e820_map();
913 saved_max_pfn = max_pfn;
916 --- sle11-2009-10-16.orig/arch/x86/kernel/e820_64-xen.c 2009-03-16 16:33:40.000000000 +0100
917 +++ sle11-2009-10-16/arch/x86/kernel/e820_64-xen.c 2009-03-16 16:38:05.000000000 +0100
918 @@ -40,11 +40,11 @@ struct e820map machine_e820;
919 unsigned long end_pfn;
922 - * end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
923 - * The direct mapping extends to end_pfn_map, so that we can directly access
924 + * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
925 + * The direct mapping extends to max_pfn_mapped, so that we can directly access
926 * apertures, ACPI and other tables without having to play with fixmaps.
928 -unsigned long end_pfn_map;
929 +unsigned long max_pfn_mapped;
932 * Last pfn which the user wants to use.
933 @@ -63,8 +63,8 @@ struct early_res {
934 static struct early_res early_res[MAX_EARLY_RES] __initdata = {
936 { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
938 - { SMP_TRAMPOLINE_BASE, SMP_TRAMPOLINE_BASE + 2*PAGE_SIZE, "SMP_TRAMPOLINE" },
939 +#ifdef CONFIG_X86_TRAMPOLINE
940 + { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
944 @@ -89,19 +89,47 @@ void __init reserve_early(unsigned long
945 strncpy(r->name, name, sizeof(r->name) - 1);
948 -void __init early_res_to_bootmem(void)
949 +void __init free_early(unsigned long start, unsigned long end)
951 + struct early_res *r;
954 + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
956 + if (start == r->start && end == r->end)
959 + if (i >= MAX_EARLY_RES || !early_res[i].end)
960 + panic("free_early on not reserved area: %lx-%lx!", start, end);
962 + for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
965 + memmove(&early_res[i], &early_res[i + 1],
966 + (j - 1 - i) * sizeof(struct early_res));
968 + early_res[j - 1].end = 0;
971 +void __init early_res_to_bootmem(unsigned long start, unsigned long end)
974 + unsigned long final_start, final_end;
975 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
976 struct early_res *r = &early_res[i];
977 - printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i,
978 - r->start, r->end - 1, r->name);
979 - reserve_bootmem_generic(r->start, r->end - r->start);
980 + final_start = max(start, r->start);
981 + final_end = min(end, r->end);
982 + if (final_start >= final_end)
984 + printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i,
985 + final_start, final_end - 1, r->name);
986 + reserve_bootmem_generic(final_start, final_end - final_start);
990 /* Check for already reserved areas */
991 -static inline int bad_addr(unsigned long *addrp, unsigned long size)
992 +static inline int __init
993 +bad_addr(unsigned long *addrp, unsigned long size, unsigned long align)
996 unsigned long addr = *addrp, last;
997 @@ -111,7 +139,7 @@ again:
998 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
999 struct early_res *r = &early_res[i];
1000 if (last >= r->start && addr < r->end) {
1001 - *addrp = addr = r->end;
1002 + *addrp = addr = round_up(r->end, align);
1006 @@ -119,6 +147,40 @@ again:
1010 +/* Check for already reserved areas */
1011 +static inline int __init
1012 +bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
1015 + unsigned long addr = *addrp, last;
1016 + unsigned long size = *sizep;
1019 + last = addr + size;
1020 + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
1021 + struct early_res *r = &early_res[i];
1022 + if (last > r->start && addr < r->start) {
1023 + size = r->start - addr;
1027 + if (last > r->end && addr < r->end) {
1028 + addr = round_up(r->end, align);
1029 + size = last - addr;
1033 + if (last <= r->end && addr >= r->start) {
1045 * This function checks if any part of the range <start,end> is mapped
1047 @@ -194,26 +256,27 @@ int __init e820_all_mapped(unsigned long
1048 * Find a free area with specified alignment in a specific range.
1050 unsigned long __init find_e820_area(unsigned long start, unsigned long end,
1051 - unsigned size, unsigned long align)
1052 + unsigned long size, unsigned long align)
1055 - unsigned long mask = ~(align - 1);
1057 for (i = 0; i < e820.nr_map; i++) {
1058 struct e820entry *ei = &e820.map[i];
1059 - unsigned long addr = ei->addr, last;
1060 + unsigned long addr, last;
1061 + unsigned long ei_last;
1063 if (ei->type != E820_RAM)
1065 + addr = round_up(ei->addr, align);
1066 + ei_last = ei->addr + ei->size;
1069 - if (addr > ei->addr + ei->size)
1070 + addr = round_up(start, align);
1071 + if (addr >= ei_last)
1073 - while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
1074 + while (bad_addr(&addr, size, align) && addr+size <= ei_last)
1076 - addr = (addr + align - 1) & mask;
1078 - if (last > ei->addr + ei->size)
1079 + if (last > ei_last)
1083 @@ -223,6 +286,40 @@ unsigned long __init find_e820_area(unsi
1087 + * Find next free range after *start
1089 +unsigned long __init find_e820_area_size(unsigned long start,
1090 + unsigned long *sizep,
1091 + unsigned long align)
1095 + for (i = 0; i < e820.nr_map; i++) {
1096 + struct e820entry *ei = &e820.map[i];
1097 + unsigned long addr, last;
1098 + unsigned long ei_last;
1100 + if (ei->type != E820_RAM)
1102 + addr = round_up(ei->addr, align);
1103 + ei_last = ei->addr + ei->size;
1105 + addr = round_up(start, align);
1106 + if (addr >= ei_last)
1108 + *sizep = ei_last - addr;
1109 + while (bad_addr_size(&addr, sizep, align) &&
1110 + addr + *sizep <= ei_last)
1112 + last = addr + *sizep;
1113 + if (last > ei_last)
1121 * Find the highest page frame number we have available
1123 unsigned long __init e820_end_of_ram(void)
1124 @@ -231,31 +328,29 @@ unsigned long __init e820_end_of_ram(voi
1126 end_pfn = find_max_pfn_with_active_regions();
1128 - if (end_pfn > end_pfn_map)
1129 - end_pfn_map = end_pfn;
1130 - if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
1131 - end_pfn_map = MAXMEM>>PAGE_SHIFT;
1132 + if (end_pfn > max_pfn_mapped)
1133 + max_pfn_mapped = end_pfn;
1134 + if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT)
1135 + max_pfn_mapped = MAXMEM>>PAGE_SHIFT;
1136 if (end_pfn > end_user_pfn)
1137 end_pfn = end_user_pfn;
1138 - if (end_pfn > end_pfn_map)
1139 - end_pfn = end_pfn_map;
1140 + if (end_pfn > max_pfn_mapped)
1141 + end_pfn = max_pfn_mapped;
1143 - printk(KERN_INFO "end_pfn_map = %lu\n", end_pfn_map);
1144 + printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped);
1149 * Mark e820 reserved areas as busy for the resource manager.
1151 -void __init e820_reserve_resources(struct e820entry *e820, int nr_map,
1152 - struct resource *code_resource,
1153 - struct resource *data_resource,
1154 - struct resource *bss_resource)
1155 +void __init e820_reserve_resources(struct e820entry *e820, int nr_map)
1158 + struct resource *res;
1160 + res = alloc_bootmem_low(sizeof(struct resource) * nr_map);
1161 for (i = 0; i < nr_map; i++) {
1162 - struct resource *res;
1163 - res = alloc_bootmem_low(sizeof(struct resource));
1164 switch (e820[i].type) {
1165 case E820_RAM: res->name = "System RAM"; break;
1166 case E820_ACPI: res->name = "ACPI Tables"; break;
1167 @@ -265,26 +360,8 @@ void __init e820_reserve_resources(struc
1168 res->start = e820[i].addr;
1169 res->end = res->start + e820[i].size - 1;
1170 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1171 - request_resource(&iomem_resource, res);
1172 - if (e820[i].type == E820_RAM) {
1174 - * We don't know which RAM region contains kernel data,
1175 - * so we try it repeatedly and let the resource manager
1179 - request_resource(res, code_resource);
1180 - request_resource(res, data_resource);
1181 - request_resource(res, bss_resource);
1183 -#ifdef CONFIG_KEXEC
1184 - if (crashk_res.start != crashk_res.end)
1185 - request_resource(res, &crashk_res);
1187 - xen_machine_kexec_register_resources(res);
1191 + insert_resource(&iomem_resource, res);
1196 @@ -338,9 +415,9 @@ static int __init e820_find_active_regio
1197 if (*ei_startpfn >= *ei_endpfn)
1200 - /* Check if end_pfn_map should be updated */
1201 - if (ei->type != E820_RAM && *ei_endpfn > end_pfn_map)
1202 - end_pfn_map = *ei_endpfn;
1203 + /* Check if max_pfn_mapped should be updated */
1204 + if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped)
1205 + max_pfn_mapped = *ei_endpfn;
1207 /* Skip if map is outside the node */
1208 if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
1209 @@ -667,10 +744,10 @@ static int __init copy_e820_map(struct e
1213 - unsigned long start = biosmap->addr;
1214 - unsigned long size = biosmap->size;
1215 - unsigned long end = start + size;
1216 - unsigned long type = biosmap->type;
1217 + u64 start = biosmap->addr;
1218 + u64 size = biosmap->size;
1219 + u64 end = start + size;
1220 + u32 type = biosmap->type;
1222 /* Overflow in 64 bits? Ignore the memory map. */
1224 @@ -801,7 +878,7 @@ static int __init parse_memmap_opt(char
1225 saved_max_pfn = e820_end_of_ram();
1226 remove_all_active_ranges();
1229 + max_pfn_mapped = 0;
1233 --- sle11-2009-10-16.orig/arch/x86/kernel/early_printk-xen.c 2009-03-16 16:33:40.000000000 +0100
1234 +++ sle11-2009-10-16/arch/x86/kernel/early_printk-xen.c 2009-09-24 10:29:16.000000000 +0200
1236 #define VGABASE (__ISA_IO_base + 0xb8000)
1238 static int max_ypos = 25, max_xpos = 80;
1239 -static int current_ypos = 25, current_xpos = 0;
1240 +static int current_ypos = 25, current_xpos;
1242 static void early_vga_write(struct console *con, const char *str, unsigned n)
1244 @@ -108,12 +108,12 @@ static __init void early_serial_init(cha
1248 - if (!strncmp(s,"0x",2)) {
1249 + if (!strncmp(s, "0x", 2)) {
1250 early_serial_base = simple_strtoul(s, &e, 16);
1252 static int bases[] = { 0x3f8, 0x2f8 };
1254 - if (!strncmp(s,"ttyS",4))
1255 + if (!strncmp(s, "ttyS", 4))
1257 port = simple_strtoul(s, &e, 10);
1258 if (port > 1 || s == e)
1259 @@ -223,7 +223,7 @@ static struct console simnow_console = {
1261 /* Direct interface for emergencies */
1262 static struct console *early_console = &early_vga_console;
1263 -static int early_console_initialized = 0;
1264 +static int early_console_initialized;
1266 void early_printk(const char *fmt, ...)
1268 @@ -231,9 +231,9 @@ void early_printk(const char *fmt, ...)
1273 - n = vscnprintf(buf,512,fmt,ap);
1274 - early_console->write(early_console,buf,n);
1275 + va_start(ap, fmt);
1276 + n = vscnprintf(buf, 512, fmt, ap);
1277 + early_console->write(early_console, buf, n);
1281 @@ -259,16 +259,16 @@ static int __init setup_early_printk(cha
1282 early_console = &early_serial_console;
1283 } else if (!strncmp(buf, "vga", 3)) {
1285 - && boot_params.screen_info.orig_video_isVGA == 1) {
1286 + && boot_params.screen_info.orig_video_isVGA == 1) {
1287 max_xpos = boot_params.screen_info.orig_video_cols;
1288 max_ypos = boot_params.screen_info.orig_video_lines;
1289 current_ypos = boot_params.screen_info.orig_y;
1291 early_console = &early_vga_console;
1292 - } else if (!strncmp(buf, "simnow", 6)) {
1293 - simnow_init(buf + 6);
1294 - early_console = &simnow_console;
1296 + } else if (!strncmp(buf, "simnow", 6)) {
1297 + simnow_init(buf + 6);
1298 + early_console = &simnow_console;
1301 } else if (!strncmp(buf, "xen", 3)) {
1302 early_console = &xenboot_console;
1303 --- sle11-2009-10-16.orig/arch/x86/kernel/entry_32-xen.S 2009-05-14 11:18:32.000000000 +0200
1304 +++ sle11-2009-10-16/arch/x86/kernel/entry_32-xen.S 2009-03-16 16:38:05.000000000 +0100
1307 - * linux/arch/i386/entry.S
1309 * Copyright (C) 1991, 1992 Linus Torvalds
1312 #include <asm/desc.h>
1313 #include <asm/percpu.h>
1314 #include <asm/dwarf2.h>
1315 +#include <asm/processor-flags.h>
1316 #include "irq_vectors.h"
1317 #include <xen/interface/xen.h>
1321 #define nr_syscalls ((syscall_table_size)/4)
1323 -CF_MASK = 0x00000001
1324 -TF_MASK = 0x00000100
1325 -IF_MASK = 0x00000200
1326 -DF_MASK = 0x00000400
1327 -NT_MASK = 0x00004000
1328 -VM_MASK = 0x00020000
1329 /* Pseudo-eflags. */
1330 NMI_MASK = 0x80000000
1332 @@ -87,7 +81,7 @@ NMI_MASK = 0x80000000
1334 .macro TRACE_IRQS_IRET
1335 #ifdef CONFIG_TRACE_IRQFLAGS
1336 - testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off?
1337 + testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off?
1341 @@ -249,7 +243,7 @@ ret_from_intr:
1343 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
1344 movb PT_CS(%esp), %al
1345 - andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
1346 + andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
1347 cmpl $USER_RPL, %eax
1348 jb resume_kernel # not returning to v8086 or userspace
1350 @@ -258,6 +252,7 @@ ENTRY(resume_userspace)
1351 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
1352 # setting need_resched or sigpending
1353 # between sampling and the iret
1355 movl TI_flags(%ebp), %ecx
1356 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
1357 # int/exception return?
1358 @@ -274,7 +269,7 @@ need_resched:
1359 movl TI_flags(%ebp), %ecx # need_resched set ?
1360 testb $_TIF_NEED_RESCHED, %cl
1362 - testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off (exception path) ?
1363 + testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
1365 call preempt_schedule_irq
1367 @@ -299,10 +294,10 @@ ENTRY(ia32_sysenter_target)
1368 movl SYSENTER_stack_sp0(%esp),%esp
1371 - * No need to follow this irqs on/off section: the syscall
1372 - * disabled irqs and here we enable it straight after entry:
1373 + * Interrupts are disabled here, but we can't trace it until
1374 + * enough kernel state to call TRACE_IRQS_OFF can be called - but
1375 + * we immediately enable interrupts at that point anyway.
1377 - ENABLE_INTERRUPTS(CLBR_NONE)
1379 CFI_ADJUST_CFA_OFFSET 4
1380 /*CFI_REL_OFFSET ss, 0*/
1381 @@ -310,6 +305,7 @@ sysenter_past_esp:
1382 CFI_ADJUST_CFA_OFFSET 4
1383 CFI_REL_OFFSET esp, 0
1385 + orl $X86_EFLAGS_IF, (%esp)
1386 CFI_ADJUST_CFA_OFFSET 4
1388 CFI_ADJUST_CFA_OFFSET 4
1389 @@ -323,6 +319,11 @@ sysenter_past_esp:
1390 CFI_ADJUST_CFA_OFFSET 4
1391 CFI_REL_OFFSET eip, 0
1394 + CFI_ADJUST_CFA_OFFSET 4
1396 + ENABLE_INTERRUPTS(CLBR_NONE)
1399 * Load the potential sixth argument from user stack.
1400 * Careful about security.
1401 @@ -330,14 +331,12 @@ sysenter_past_esp:
1402 cmpl $__PAGE_OFFSET-3,%ebp
1405 + movl %ebp,PT_EBP(%esp)
1406 .section __ex_table,"a"
1408 .long 1b,syscall_fault
1412 - CFI_ADJUST_CFA_OFFSET 4
1414 GET_THREAD_INFO(%ebp)
1416 jnz syscall_trace_entry
1417 @@ -414,7 +413,7 @@ syscall_exit:
1418 # setting need_resched or sigpending
1419 # between sampling and the iret
1421 - testl $TF_MASK,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit
1422 + testl $X86_EFLAGS_TF,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit
1424 orl $_TIF_SINGLESTEP,TI_flags(%ebp)
1426 @@ -430,7 +429,7 @@ restore_all:
1427 # See comments in process.c:copy_thread() for details.
1428 movb PT_OLDSS(%esp), %ah
1429 movb PT_CS(%esp), %al
1430 - andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
1431 + andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
1432 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
1434 je ldt_ss # returning to user-space with LDT SS
1435 @@ -438,7 +437,7 @@ restore_nocheck:
1438 movl PT_EFLAGS(%esp), %eax
1439 - testl $(VM_MASK|NMI_MASK), %eax
1440 + testl $(X86_EFLAGS_VM|NMI_MASK), %eax
1443 shr $9, %eax # EAX[0] == IRET_EFLAGS.IF
1444 @@ -456,7 +455,7 @@ restore_nocheck_notrace:
1447 .section .fixup,"ax"
1450 pushl $0 # no error code
1451 pushl $do_iret_error
1453 @@ -560,7 +559,7 @@ work_resched:
1454 work_notifysig: # deal with pending signals and
1455 # notify-resume requests
1457 - testl $VM_MASK, PT_EFLAGS(%esp)
1458 + testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
1460 jne work_notifysig_v86 # returning to kernel-space or
1462 @@ -617,9 +616,6 @@ END(syscall_exit_work)
1464 RING0_INT_FRAME # can't unwind into user space anyway
1466 - pushl %eax # save orig_eax
1467 - CFI_ADJUST_CFA_OFFSET 4
1469 GET_THREAD_INFO(%ebp)
1470 movl $-EFAULT,PT_EAX(%esp)
1471 jmp resume_userspace
1472 --- sle11-2009-10-16.orig/arch/x86/kernel/entry_64-xen.S 2009-03-16 16:33:40.000000000 +0100
1473 +++ sle11-2009-10-16/arch/x86/kernel/entry_64-xen.S 2009-03-16 16:38:05.000000000 +0100
1474 @@ -338,19 +338,17 @@ badsys:
1475 /* Do syscall tracing */
1478 - movq $-ENOSYS,RAX(%rsp)
1479 + movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
1480 FIXUP_TOP_OF_STACK %rdi
1482 call syscall_trace_enter
1483 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
1485 cmpq $__NR_syscall_max,%rax
1486 - movq $-ENOSYS,%rcx
1489 + ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
1490 movq %r10,%rcx /* fixup for C */
1491 call *sys_call_table(,%rax,8)
1492 -1: movq %rax,RAX-ARGOFFSET(%rsp)
1493 + movq %rax,RAX-ARGOFFSET(%rsp)
1494 /* Use IRET because user could have changed frame */
1497 --- sle11-2009-10-16.orig/arch/x86/kernel/genapic_64-xen.c 2009-03-16 16:33:40.000000000 +0100
1498 +++ sle11-2009-10-16/arch/x86/kernel/genapic_64-xen.c 2009-03-16 16:38:05.000000000 +0100
1500 #include <linux/kernel.h>
1501 #include <linux/ctype.h>
1502 #include <linux/init.h>
1503 +#include <linux/hardirq.h>
1505 #include <asm/smp.h>
1506 #include <asm/ipi.h>
1508 #include <acpi/acpi_bus.h>
1511 -/* which logical CPU number maps to which CPU (physical APIC ID) */
1513 -u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata
1514 - = { [0 ... NR_CPUS-1] = BAD_APICID };
1515 -void *x86_cpu_to_apicid_early_ptr;
1517 -DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
1518 -EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
1519 +DEFINE_PER_CPU(int, x2apic_extra_bits);
1522 struct genapic __read_mostly *genapic = &apic_flat;
1524 +static enum uv_system_type uv_system_type;
1526 extern struct genapic apic_xen;
1527 struct genapic __read_mostly *genapic = &apic_xen;
1528 @@ -47,6 +43,9 @@ struct genapic __read_mostly *genapic =
1529 void __init setup_apic_routing(void)
1532 + if (uv_system_type == UV_NON_UNIQUE_APIC)
1533 + genapic = &apic_x2apic_uv_x;
1537 * Quirk: some x86_64 machines can only use physical APIC mode
1538 @@ -59,7 +58,7 @@ void __init setup_apic_routing(void)
1542 - if (cpus_weight(cpu_possible_map) <= 8)
1543 + if (num_possible_cpus() <= 8)
1544 genapic = &apic_flat;
1546 genapic = &apic_physflat;
1547 @@ -85,3 +84,41 @@ void send_IPI_self(int vector)
1548 xen_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
1552 +int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
1555 + if (!strcmp(oem_id, "SGI")) {
1556 + if (!strcmp(oem_table_id, "UVL"))
1557 + uv_system_type = UV_LEGACY_APIC;
1558 + else if (!strcmp(oem_table_id, "UVX"))
1559 + uv_system_type = UV_X2APIC;
1560 + else if (!strcmp(oem_table_id, "UVH"))
1561 + uv_system_type = UV_NON_UNIQUE_APIC;
1568 +unsigned int read_apic_id(void)
1572 + WARN_ON(preemptible() && num_online_cpus() > 1);
1573 + id = apic_read(APIC_ID);
1574 + if (uv_system_type >= UV_X2APIC)
1575 + id |= __get_cpu_var(x2apic_extra_bits);
1579 +enum uv_system_type get_uv_system_type(void)
1581 + return uv_system_type;
1584 +int is_uv_system(void)
1586 + return uv_system_type != UV_NONE;
1589 --- sle11-2009-10-16.orig/arch/x86/kernel/genapic_xen_64.c 2008-12-15 11:27:22.000000000 +0100
1590 +++ sle11-2009-10-16/arch/x86/kernel/genapic_xen_64.c 2009-03-16 16:38:05.000000000 +0100
1591 @@ -72,9 +72,7 @@ static cpumask_t xen_target_cpus(void)
1593 static cpumask_t xen_vector_allocation_domain(int cpu)
1595 - cpumask_t domain = CPU_MASK_NONE;
1596 - cpu_set(cpu, domain);
1598 + return cpumask_of_cpu(cpu);
1602 --- sle11-2009-10-16.orig/arch/x86/kernel/head64-xen.c 2009-03-16 16:33:40.000000000 +0100
1603 +++ sle11-2009-10-16/arch/x86/kernel/head64-xen.c 2009-03-16 16:38:05.000000000 +0100
1605 #include <linux/string.h>
1606 #include <linux/percpu.h>
1607 #include <linux/start_kernel.h>
1608 +#include <linux/io.h>
1609 #include <linux/module.h>
1611 #include <asm/processor.h>
1613 #include <asm/sections.h>
1614 #include <asm/kdebug.h>
1615 #include <asm/e820.h>
1616 +#include <asm/bios_ebda.h>
1618 unsigned long start_pfn;
1620 @@ -75,34 +77,75 @@ EXPORT_SYMBOL(machine_to_phys_mapping);
1621 unsigned int machine_to_phys_order;
1622 EXPORT_SYMBOL(machine_to_phys_order);
1624 -#define EBDA_ADDR_POINTER 0x40E
1625 +#define BIOS_LOWMEM_KILOBYTES 0x413
1627 -static __init void reserve_ebda(void)
1629 + * The BIOS places the EBDA/XBDA at the top of conventional
1630 + * memory, and usually decreases the reported amount of
1631 + * conventional memory (int 0x12) too. This also contains a
1632 + * workaround for Dell systems that neglect to reserve EBDA.
1633 + * The same workaround also avoids a problem with the AMD768MPX
1634 + * chipset: reserve a page before VGA to prevent PCI prefetch
1635 + * into it (errata #56). Usually the page is reserved anyways,
1636 + * unless you have no PS/2 mouse plugged in.
1638 +static void __init reserve_ebda_region(void)
1641 - unsigned ebda_addr, ebda_size;
1642 + unsigned int lowmem, ebda_addr;
1645 - * there is a real-mode segmented pointer pointing to the
1646 - * 4K EBDA area at 0x40E
1648 - ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
1652 + /* To determine the position of the EBDA and the */
1653 + /* end of conventional memory, we need to look at */
1654 + /* the BIOS data area. In a paravirtual environment */
1655 + /* that area is absent. We'll just have to assume */
1656 + /* that the paravirt case can handle memory setup */
1657 + /* correctly, without our help. */
1658 + if (paravirt_enabled())
1661 - ebda_size = *(unsigned short *)__va(ebda_addr);
1662 + /* end of low (conventional) memory */
1663 + lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
1666 + /* start of EBDA area */
1667 + ebda_addr = get_bios_ebda();
1669 + /* Fixup: bios puts an EBDA in the top 64K segment */
1670 + /* of conventional memory, but does not adjust lowmem. */
1671 + if ((lowmem - ebda_addr) <= 0x10000)
1672 + lowmem = ebda_addr;
1674 + /* Fixup: bios does not report an EBDA at all. */
1675 + /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
1676 + if ((ebda_addr == 0) && (lowmem >= 0x9f000))
1679 + /* Paranoia: should never happen, but... */
1680 + if ((lowmem == 0) || (lowmem >= 0x100000))
1683 - /* Round EBDA up to pages */
1684 - if (ebda_size == 0)
1687 - ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
1688 - if (ebda_size > 64*1024)
1689 - ebda_size = 64*1024;
1690 + /* reserve all memory between lowmem and the 1MB mark */
1691 + reserve_early(lowmem, 0x100000, "BIOS reserved");
1695 - reserve_early(ebda_addr, ebda_addr + ebda_size, "EBDA");
1696 +static void __init reserve_setup_data(void)
1699 + struct setup_data *data;
1700 + unsigned long pa_data;
1703 + if (boot_params.hdr.version < 0x0209)
1705 + pa_data = boot_params.hdr.setup_data;
1707 + data = early_ioremap(pa_data, sizeof(*data));
1708 + sprintf(buf, "setup data %x", data->type);
1709 + reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
1710 + pa_data = data->next;
1711 + early_iounmap(data, sizeof(*data));
1716 @@ -112,6 +155,19 @@ void __init x86_64_start_kernel(char * r
1717 unsigned long machine_to_phys_nr_ents;
1721 + * Build-time sanity checks on the kernel image and module
1722 + * area mappings. (these are purely build-time and produce no code)
1724 + BUILD_BUG_ON(MODULES_VADDR < KERNEL_IMAGE_START);
1725 + BUILD_BUG_ON(MODULES_VADDR-KERNEL_IMAGE_START < KERNEL_IMAGE_SIZE);
1726 + BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE);
1727 + BUILD_BUG_ON((KERNEL_IMAGE_START & ~PMD_MASK) != 0);
1728 + BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
1729 + BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
1730 + BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
1731 + (__START_KERNEL & PGDIR_MASK)));
1733 xen_setup_features();
1735 xen_start_info = (struct start_info *)real_mode_data;
1736 @@ -140,7 +196,7 @@ void __init x86_64_start_kernel(char * r
1737 /* Cleanup the over mapped high alias */
1740 - for (i = 0; i < IDT_ENTRIES; i++) {
1741 + for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
1742 #ifdef CONFIG_EARLY_PRINTK
1743 set_intr_gate(i, &early_idt_handlers[i]);
1745 @@ -163,7 +219,8 @@ void __init x86_64_start_kernel(char * r
1746 reserve_early(round_up(__pa_symbol(&_end), PAGE_SIZE),
1747 start_pfn << PAGE_SHIFT, "Xen provided");
1750 + reserve_ebda_region();
1751 + reserve_setup_data();
1754 * At this point everything still needed from the boot loader
1755 --- sle11-2009-10-16.orig/arch/x86/kernel/head_32-xen.S 2009-03-16 16:33:40.000000000 +0100
1756 +++ sle11-2009-10-16/arch/x86/kernel/head_32-xen.S 2009-03-16 16:38:05.000000000 +0100
1757 @@ -69,7 +69,7 @@ ENTRY(startup_32)
1758 cld # gcc2 wants the direction flag cleared at all times
1760 pushl $0 # fake return address for unwinder
1762 + jmp i386_start_kernel
1764 #define HYPERCALL_PAGE_OFFSET 0x1000
1765 .org HYPERCALL_PAGE_OFFSET
1766 --- sle11-2009-10-16.orig/arch/x86/kernel/init_task-xen.c 2009-03-16 16:33:40.000000000 +0100
1767 +++ sle11-2009-10-16/arch/x86/kernel/init_task-xen.c 2009-03-16 16:38:05.000000000 +0100
1769 #include <asm/desc.h>
1771 static struct fs_struct init_fs = INIT_FS;
1772 -static struct files_struct init_files = INIT_FILES;
1773 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
1774 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
1775 #ifdef CONFIG_X86_XEN
1776 --- sle11-2009-10-16.orig/arch/x86/kernel/io_apic_32-xen.c 2009-03-16 16:33:40.000000000 +0100
1777 +++ sle11-2009-10-16/arch/x86/kernel/io_apic_32-xen.c 2009-03-16 16:38:05.000000000 +0100
1778 @@ -88,6 +88,16 @@ int sis_apic_bug = -1;
1780 int nr_ioapic_registers[MAX_IO_APICS];
1782 +/* I/O APIC entries */
1783 +struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
1786 +/* MP IRQ source entries */
1787 +struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
1789 +/* # of MP IRQ source entries */
1790 +int mp_irq_entries;
1792 static int disable_timer_pin_1 __initdata;
1795 @@ -863,10 +873,7 @@ static int __init find_isa_irq_pin(int i
1796 for (i = 0; i < mp_irq_entries; i++) {
1797 int lbus = mp_irqs[i].mpc_srcbus;
1799 - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
1800 - mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
1801 - mp_bus_id_to_type[lbus] == MP_BUS_MCA
1803 + if (test_bit(lbus, mp_bus_not_pci) &&
1804 (mp_irqs[i].mpc_irqtype == type) &&
1805 (mp_irqs[i].mpc_srcbusirq == irq))
1807 @@ -882,10 +889,7 @@ static int __init find_isa_irq_apic(int
1808 for (i = 0; i < mp_irq_entries; i++) {
1809 int lbus = mp_irqs[i].mpc_srcbus;
1811 - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
1812 - mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
1813 - mp_bus_id_to_type[lbus] == MP_BUS_MCA
1815 + if (test_bit(lbus, mp_bus_not_pci) &&
1816 (mp_irqs[i].mpc_irqtype == type) &&
1817 (mp_irqs[i].mpc_srcbusirq == irq))
1819 @@ -926,7 +930,7 @@ int IO_APIC_get_PCI_irq_vector(int bus,
1820 mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
1823 - if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
1824 + if (!test_bit(lbus, mp_bus_not_pci) &&
1825 !mp_irqs[i].mpc_irqtype &&
1827 (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
1828 @@ -977,6 +981,7 @@ void __init setup_ioapic_dest(void)
1829 #endif /* !CONFIG_XEN */
1832 +#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
1834 * EISA Edge/Level control register, ELCR
1836 @@ -990,6 +995,13 @@ static int EISA_ELCR(unsigned int irq)
1837 "Broken MPtable reports ISA irq %d\n", irq);
1842 +/* ISA interrupts are always polarity zero edge triggered,
1843 + * when listed as conforming in the MP table. */
1845 +#define default_ISA_trigger(idx) (0)
1846 +#define default_ISA_polarity(idx) (0)
1848 /* EISA interrupts are always polarity zero and can be edge or level
1849 * trigger depending on the ELCR value. If an interrupt is listed as
1850 @@ -997,13 +1009,7 @@ static int EISA_ELCR(unsigned int irq)
1851 * be read in from the ELCR */
1853 #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
1854 -#define default_EISA_polarity(idx) (0)
1856 -/* ISA interrupts are always polarity zero edge triggered,
1857 - * when listed as conforming in the MP table. */
1859 -#define default_ISA_trigger(idx) (0)
1860 -#define default_ISA_polarity(idx) (0)
1861 +#define default_EISA_polarity(idx) default_ISA_polarity(idx)
1863 /* PCI interrupts are always polarity one level triggered,
1864 * when listed as conforming in the MP table. */
1865 @@ -1015,7 +1021,7 @@ static int EISA_ELCR(unsigned int irq)
1866 * when listed as conforming in the MP table. */
1868 #define default_MCA_trigger(idx) (1)
1869 -#define default_MCA_polarity(idx) (0)
1870 +#define default_MCA_polarity(idx) default_ISA_polarity(idx)
1872 static int MPBIOS_polarity(int idx)
1874 @@ -1029,35 +1035,9 @@ static int MPBIOS_polarity(int idx)
1876 case 0: /* conforms, ie. bus-type dependent polarity */
1878 - switch (mp_bus_id_to_type[bus])
1880 - case MP_BUS_ISA: /* ISA pin */
1882 - polarity = default_ISA_polarity(idx);
1885 - case MP_BUS_EISA: /* EISA pin */
1887 - polarity = default_EISA_polarity(idx);
1890 - case MP_BUS_PCI: /* PCI pin */
1892 - polarity = default_PCI_polarity(idx);
1895 - case MP_BUS_MCA: /* MCA pin */
1897 - polarity = default_MCA_polarity(idx);
1902 - printk(KERN_WARNING "broken BIOS!!\n");
1907 + polarity = test_bit(bus, mp_bus_not_pci)?
1908 + default_ISA_polarity(idx):
1909 + default_PCI_polarity(idx);
1912 case 1: /* high active */
1913 @@ -1098,11 +1078,15 @@ static int MPBIOS_trigger(int idx)
1915 case 0: /* conforms, ie. bus-type dependent */
1917 + trigger = test_bit(bus, mp_bus_not_pci)?
1918 + default_ISA_trigger(idx):
1919 + default_PCI_trigger(idx);
1920 +#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
1921 switch (mp_bus_id_to_type[bus])
1923 case MP_BUS_ISA: /* ISA pin */
1925 - trigger = default_ISA_trigger(idx);
1926 + /* set before the switch */
1929 case MP_BUS_EISA: /* EISA pin */
1930 @@ -1112,7 +1096,7 @@ static int MPBIOS_trigger(int idx)
1932 case MP_BUS_PCI: /* PCI pin */
1934 - trigger = default_PCI_trigger(idx);
1935 + /* set before the switch */
1938 case MP_BUS_MCA: /* MCA pin */
1939 @@ -1127,6 +1111,7 @@ static int MPBIOS_trigger(int idx)
1947 @@ -1176,39 +1161,22 @@ static int pin_2_irq(int idx, int apic,
1948 if (mp_irqs[idx].mpc_dstirq != pin)
1949 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
1951 - switch (mp_bus_id_to_type[bus])
1953 - case MP_BUS_ISA: /* ISA pin */
1957 - irq = mp_irqs[idx].mpc_srcbusirq;
1960 - case MP_BUS_PCI: /* PCI pin */
1963 - * PCI IRQs are mapped in order
1967 - irq += nr_ioapic_registers[i++];
1971 - * For MPS mode, so far only needed by ES7000 platform
1973 - if (ioapic_renumber_irq)
1974 - irq = ioapic_renumber_irq(apic, irq);
1975 + if (test_bit(bus, mp_bus_not_pci))
1976 + irq = mp_irqs[idx].mpc_srcbusirq;
1979 + * PCI IRQs are mapped in order
1983 + irq += nr_ioapic_registers[i++];
1990 - printk(KERN_ERR "unknown bus type %d.\n",bus);
1995 + * For MPS mode, so far only needed by ES7000 platform
1997 + if (ioapic_renumber_irq)
1998 + irq = ioapic_renumber_irq(apic, irq);
2002 @@ -1314,7 +1282,6 @@ static void __init setup_IO_APIC_irqs(vo
2004 struct IO_APIC_route_entry entry;
2005 int apic, pin, idx, irq, first_notcon = 1, vector;
2006 - unsigned long flags;
2008 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
2010 @@ -1380,9 +1347,7 @@ static void __init setup_IO_APIC_irqs(vo
2011 if (!apic && (irq < 16))
2012 disable_8259A_irq(irq);
2014 - spin_lock_irqsave(&ioapic_lock, flags);
2015 - __ioapic_write_entry(apic, pin, entry);
2016 - spin_unlock_irqrestore(&ioapic_lock, flags);
2017 + ioapic_write_entry(apic, pin, entry);
2021 @@ -1577,8 +1542,8 @@ void /*__init*/ print_local_APIC(void *
2023 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
2024 smp_processor_id(), hard_smp_processor_id());
2025 - v = apic_read(APIC_ID);
2026 - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v));
2027 + printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
2028 + GET_APIC_ID(read_apic_id()));
2029 v = apic_read(APIC_LVR);
2030 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
2031 ver = GET_APIC_VERSION(v);
2032 @@ -1791,7 +1756,7 @@ void disable_IO_APIC(void)
2033 entry.delivery_mode = dest_ExtINT; /* ExtInt */
2035 entry.dest.physical.physical_dest =
2036 - GET_APIC_ID(apic_read(APIC_ID));
2037 + GET_APIC_ID(read_apic_id());
2040 * Add it to the IO-APIC irq-routing table:
2041 @@ -2090,8 +2055,7 @@ static inline void init_IO_APIC_traps(vo
2042 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2044 for (irq = 0; irq < NR_IRQS ; irq++) {
2046 - if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
2047 + if (IO_APIC_IRQ(irq) && !irq_vector[irq]) {
2049 * Hmm.. We don't have an entry for this,
2050 * so default to an old-fashioned 8259
2051 @@ -2166,7 +2130,7 @@ static void __init setup_nmi(void)
2052 * cycles as some i82489DX-based boards have glue logic that keeps the
2053 * 8259A interrupt line asserted until INTA. --macro
2055 -static inline void unlock_ExtINT_logic(void)
2056 +static inline void __init unlock_ExtINT_logic(void)
2059 struct IO_APIC_route_entry entry0, entry1;
2060 @@ -2218,8 +2182,6 @@ static inline void unlock_ExtINT_logic(v
2061 ioapic_write_entry(apic, pin, entry0);
2064 -int timer_uses_ioapic_pin_0;
2067 * This code may look a bit paranoid, but it's supposed to cooperate with
2068 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
2069 @@ -2259,9 +2221,6 @@ static inline void __init check_timer(vo
2070 pin2 = ioapic_i8259.pin;
2071 apic2 = ioapic_i8259.apic;
2074 - timer_uses_ioapic_pin_0 = 1;
2076 printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
2077 vector, apic1, pin1, apic2, pin2);
2079 @@ -2555,6 +2514,7 @@ void destroy_irq(unsigned int irq)
2080 dynamic_irq_cleanup(irq);
2082 spin_lock_irqsave(&vector_lock, flags);
2083 + clear_bit(irq_vector[irq], used_vectors);
2084 irq_vector[irq] = 0;
2085 spin_unlock_irqrestore(&vector_lock, flags);
2087 @@ -2871,7 +2831,6 @@ int __init io_apic_get_redir_entries (in
2088 int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
2090 struct IO_APIC_route_entry entry;
2091 - unsigned long flags;
2093 if (!IO_APIC_IRQ(irq)) {
2094 printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
2095 @@ -2912,9 +2871,7 @@ int io_apic_set_pci_routing (int ioapic,
2096 if (!ioapic && (irq < 16))
2097 disable_8259A_irq(irq);
2099 - spin_lock_irqsave(&ioapic_lock, flags);
2100 - __ioapic_write_entry(ioapic, pin, entry);
2101 - spin_unlock_irqrestore(&ioapic_lock, flags);
2102 + ioapic_write_entry(ioapic, pin, entry);
2106 --- sle11-2009-10-16.orig/arch/x86/kernel/io_apic_64-xen.c 2009-03-16 16:33:40.000000000 +0100
2107 +++ sle11-2009-10-16/arch/x86/kernel/io_apic_64-xen.c 2009-03-16 16:38:05.000000000 +0100
2109 #include <asm/smp.h>
2110 #include <asm/desc.h>
2111 #include <asm/proto.h>
2112 -#include <asm/mach_apic.h>
2113 #include <asm/acpi.h>
2114 #include <asm/dma.h>
2115 #include <asm/nmi.h>
2116 #include <asm/msidef.h>
2117 #include <asm/hypertransport.h>
2119 +#include <mach_ipi.h>
2120 +#include <mach_apic.h>
2125 @@ -101,6 +103,16 @@ DEFINE_SPINLOCK(vector_lock);
2127 int nr_ioapic_registers[MAX_IO_APICS];
2129 +/* I/O APIC entries */
2130 +struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
2133 +/* MP IRQ source entries */
2134 +struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
2136 +/* # of MP IRQ source entries */
2137 +int mp_irq_entries;
2140 * Rough estimation of how many shared IRQs there are, can
2141 * be changed anytime.
2142 @@ -181,11 +193,10 @@ static inline void io_apic_modify(unsign
2143 writel(value, &io_apic->data);
2146 -static int io_apic_level_ack_pending(unsigned int irq)
2147 +static bool io_apic_level_ack_pending(unsigned int irq)
2149 struct irq_pin_list *entry;
2150 unsigned long flags;
2153 spin_lock_irqsave(&ioapic_lock, flags);
2154 entry = irq_2_pin + irq;
2155 @@ -198,13 +209,17 @@ static int io_apic_level_ack_pending(uns
2157 reg = io_apic_read(entry->apic, 0x10 + pin*2);
2158 /* Is the remote IRR bit set? */
2159 - pending |= (reg >> 14) & 1;
2160 + if ((reg >> 14) & 1) {
2161 + spin_unlock_irqrestore(&ioapic_lock, flags);
2166 entry = irq_2_pin + entry->next;
2168 spin_unlock_irqrestore(&ioapic_lock, flags);
2175 @@ -762,7 +777,7 @@ static void __clear_irq_vector(int irq)
2176 per_cpu(vector_irq, cpu)[vector] = -1;
2179 - cfg->domain = CPU_MASK_NONE;
2180 + cpus_clear(cfg->domain);
2183 void __setup_vector_irq(int cpu)
2184 @@ -899,9 +914,8 @@ static void __init setup_IO_APIC_irqs(vo
2185 static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
2187 struct IO_APIC_route_entry entry;
2188 - unsigned long flags;
2190 - memset(&entry,0,sizeof(entry));
2191 + memset(&entry, 0, sizeof(entry));
2193 disable_8259A_irq(0);
2195 @@ -929,10 +943,7 @@ static void __init setup_ExtINT_IRQ0_pin
2197 * Add it to the IO-APIC irq-routing table:
2199 - spin_lock_irqsave(&ioapic_lock, flags);
2200 - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
2201 - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
2202 - spin_unlock_irqrestore(&ioapic_lock, flags);
2203 + ioapic_write_entry(apic, pin, entry);
2205 enable_8259A_irq(0);
2207 @@ -1061,8 +1072,7 @@ void __apicdebuginit print_local_APIC(vo
2209 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
2210 smp_processor_id(), hard_smp_processor_id());
2211 - v = apic_read(APIC_ID);
2212 - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v));
2213 + printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
2214 v = apic_read(APIC_LVR);
2215 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
2216 ver = GET_APIC_VERSION(v);
2217 @@ -1260,7 +1270,7 @@ void disable_IO_APIC(void)
2218 entry.dest_mode = 0; /* Physical */
2219 entry.delivery_mode = dest_ExtINT; /* ExtInt */
2221 - entry.dest = GET_APIC_ID(apic_read(APIC_ID));
2222 + entry.dest = GET_APIC_ID(read_apic_id());
2225 * Add it to the IO-APIC irq-routing table:
2226 @@ -1353,9 +1363,7 @@ static int ioapic_retrigger_irq(unsigned
2227 unsigned long flags;
2229 spin_lock_irqsave(&vector_lock, flags);
2231 - cpu_set(first_cpu(cfg->domain), mask);
2233 + mask = cpumask_of_cpu(first_cpu(cfg->domain));
2234 send_IPI_mask(mask, cfg->vector);
2235 spin_unlock_irqrestore(&vector_lock, flags);
2237 @@ -1519,8 +1527,7 @@ static inline void init_IO_APIC_traps(vo
2238 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2240 for (irq = 0; irq < NR_IRQS ; irq++) {
2242 - if (IO_APIC_IRQ(tmp) && !irq_cfg[tmp].vector) {
2243 + if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) {
2245 * Hmm.. We don't have an entry for this,
2246 * so default to an old-fashioned 8259
2247 @@ -1597,22 +1604,19 @@ static void __init setup_nmi(void)
2248 * cycles as some i82489DX-based boards have glue logic that keeps the
2249 * 8259A interrupt line asserted until INTA. --macro
2251 -static inline void unlock_ExtINT_logic(void)
2252 +static inline void __init unlock_ExtINT_logic(void)
2255 struct IO_APIC_route_entry entry0, entry1;
2256 unsigned char save_control, save_freq_select;
2257 - unsigned long flags;
2259 pin = find_isa_irq_pin(8, mp_INT);
2260 apic = find_isa_irq_apic(8, mp_INT);
2264 - spin_lock_irqsave(&ioapic_lock, flags);
2265 - *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
2266 - *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
2267 - spin_unlock_irqrestore(&ioapic_lock, flags);
2268 + entry0 = ioapic_read_entry(apic, pin);
2270 clear_IO_APIC_pin(apic, pin);
2272 memset(&entry1, 0, sizeof(entry1));
2273 @@ -1625,10 +1629,7 @@ static inline void unlock_ExtINT_logic(v
2277 - spin_lock_irqsave(&ioapic_lock, flags);
2278 - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
2279 - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
2280 - spin_unlock_irqrestore(&ioapic_lock, flags);
2281 + ioapic_write_entry(apic, pin, entry1);
2283 save_control = CMOS_READ(RTC_CONTROL);
2284 save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
2285 @@ -1647,10 +1648,7 @@ static inline void unlock_ExtINT_logic(v
2286 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
2287 clear_IO_APIC_pin(apic, pin);
2289 - spin_lock_irqsave(&ioapic_lock, flags);
2290 - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
2291 - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
2292 - spin_unlock_irqrestore(&ioapic_lock, flags);
2293 + ioapic_write_entry(apic, pin, entry0);
2297 @@ -2327,7 +2325,6 @@ static struct resource * __init ioapic_s
2301 - memset(mem, 0, n);
2302 mem += sizeof(struct resource) * nr_ioapics;
2304 for (i = 0; i < nr_ioapics; i++) {
2305 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
2306 +++ sle11-2009-10-16/arch/x86/kernel/ipi-xen.c 2009-03-16 16:38:05.000000000 +0100
2308 +#include <linux/cpumask.h>
2309 +#include <linux/interrupt.h>
2310 +#include <linux/init.h>
2312 +#include <linux/mm.h>
2313 +#include <linux/delay.h>
2314 +#include <linux/spinlock.h>
2315 +#include <linux/kernel_stat.h>
2316 +#include <linux/mc146818rtc.h>
2317 +#include <linux/cache.h>
2318 +#include <linux/interrupt.h>
2319 +#include <linux/cpu.h>
2320 +#include <linux/module.h>
2322 +#include <asm/smp.h>
2323 +#include <asm/mtrr.h>
2324 +#include <asm/tlbflush.h>
2325 +#include <asm/mmu_context.h>
2326 +#include <asm/apic.h>
2327 +#include <asm/proto.h>
2329 +#ifdef CONFIG_X86_32
2331 +#include <mach_apic.h>
2333 + * the following functions deal with sending IPIs between CPUs.
2335 + * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
2338 +static inline int __prepare_ICR(unsigned int shortcut, int vector)
2340 + unsigned int icr = shortcut | APIC_DEST_LOGICAL;
2344 + icr |= APIC_DM_FIXED | vector;
2347 + icr |= APIC_DM_NMI;
2353 +static inline int __prepare_ICR2(unsigned int mask)
2355 + return SET_APIC_DEST_FIELD(mask);
2358 +#include <xen/evtchn.h>
2360 +DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
2362 +static inline void __send_IPI_one(unsigned int cpu, int vector)
2364 + int irq = per_cpu(ipi_to_irq, cpu)[vector];
2366 + notify_remote_via_irq(irq);
2370 +void __send_IPI_shortcut(unsigned int shortcut, int vector)
2374 + * Subtle. In the case of the 'never do double writes' workaround
2375 + * we have to lock out interrupts to be safe. As we don't care
2376 + * of the value read we use an atomic rmw access to avoid costly
2377 + * cli/sti. Otherwise we use an even cheaper single atomic write
2385 + apic_wait_icr_idle();
2388 + * No need to touch the target chip field
2390 + cfg = __prepare_ICR(shortcut, vector);
2393 + * Send the IPI. The write to APIC_ICR fires this off.
2395 + apic_write_around(APIC_ICR, cfg);
2399 + switch (shortcut) {
2400 + case APIC_DEST_SELF:
2401 + __send_IPI_one(smp_processor_id(), vector);
2403 + case APIC_DEST_ALLBUT:
2404 + for_each_online_cpu(cpu)
2405 + if (cpu != smp_processor_id())
2406 + __send_IPI_one(cpu, vector);
2409 + printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
2416 +void send_IPI_self(int vector)
2418 + __send_IPI_shortcut(APIC_DEST_SELF, vector);
2423 + * This is used to send an IPI with no shorthand notation (the destination is
2424 + * specified in bits 56 to 63 of the ICR).
2426 +static inline void __send_IPI_dest_field(unsigned long mask, int vector)
2428 + unsigned long cfg;
2433 + if (unlikely(vector == NMI_VECTOR))
2434 + safe_apic_wait_icr_idle();
2436 + apic_wait_icr_idle();
2439 + * prepare target chip field
2441 + cfg = __prepare_ICR2(mask);
2442 + apic_write_around(APIC_ICR2, cfg);
2447 + cfg = __prepare_ICR(0, vector);
2450 + * Send the IPI. The write to APIC_ICR fires this off.
2452 + apic_write_around(APIC_ICR, cfg);
2457 + * This is only used on smaller machines.
2459 +void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
2462 + unsigned long mask = cpus_addr(cpumask)[0];
2467 + unsigned long flags;
2469 + local_irq_save(flags);
2471 + WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
2472 + __send_IPI_dest_field(mask, vector);
2474 + cpus_andnot(mask, cpumask, cpu_online_map);
2475 + WARN_ON(!cpus_empty(mask));
2476 + for_each_online_cpu(cpu)
2477 + if (cpu_isset(cpu, cpumask))
2478 + __send_IPI_one(cpu, vector);
2480 + local_irq_restore(flags);
2483 +void send_IPI_mask_sequence(cpumask_t mask, int vector)
2486 + unsigned long flags;
2487 + unsigned int query_cpu;
2490 + * Hack. The clustered APIC addressing mode doesn't allow us to send
2491 + * to an arbitrary mask, so I do a unicasts to each CPU instead. This
2492 + * should be modified to do 1 message per cluster ID - mbligh
2495 + local_irq_save(flags);
2496 + for_each_possible_cpu(query_cpu) {
2497 + if (cpu_isset(query_cpu, mask)) {
2498 + __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
2502 + local_irq_restore(flags);
2504 + send_IPI_mask_bitmask(mask, vector);
2508 +/* must come after the send_IPI functions above for inlining */
2509 +#include <mach_ipi.h>
2512 +static int convert_apicid_to_cpu(int apic_id)
2516 + for_each_possible_cpu(i) {
2517 + if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
2523 +int safe_smp_processor_id(void)
2525 + int apicid, cpuid;
2527 + if (!boot_cpu_has(X86_FEATURE_APIC))
2530 + apicid = hard_smp_processor_id();
2531 + if (apicid == BAD_APICID)
2534 + cpuid = convert_apicid_to_cpu(apicid);
2536 + return cpuid >= 0 ? cpuid : 0;
2540 --- sle11-2009-10-16.orig/arch/x86/kernel/irq_32-xen.c 2009-03-16 16:33:40.000000000 +0100
2541 +++ sle11-2009-10-16/arch/x86/kernel/irq_32-xen.c 2009-03-16 16:38:05.000000000 +0100
2542 @@ -79,7 +79,7 @@ unsigned int do_IRQ(struct pt_regs *regs
2544 if (unlikely((unsigned)irq >= NR_IRQS)) {
2545 printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
2546 - __FUNCTION__, irq);
2551 @@ -134,7 +134,7 @@ unsigned int do_IRQ(struct pt_regs *regs
2552 : "=a" (arg1), "=d" (arg2), "=b" (bx)
2553 : "0" (irq), "1" (desc), "2" (isp),
2554 "D" (desc->handle_irq)
2556 + : "memory", "cc", "ecx"
2560 @@ -190,8 +190,6 @@ void irq_ctx_exit(int cpu)
2561 hardirq_ctx[cpu] = NULL;
2564 -extern asmlinkage void __do_softirq(void);
2566 asmlinkage void do_softirq(void)
2568 unsigned long flags;
2569 --- sle11-2009-10-16.orig/arch/x86/kernel/machine_kexec_64.c 2009-03-16 16:33:40.000000000 +0100
2570 +++ sle11-2009-10-16/arch/x86/kernel/machine_kexec_64.c 2009-03-16 16:38:05.000000000 +0100
2571 @@ -120,8 +120,6 @@ int __init machine_kexec_setup_resources
2575 -void machine_kexec_register_resources(struct resource *res) { ; }
2577 #else /* CONFIG_XEN */
2579 #define x__pmd(x) __pmd(x)
2580 --- sle11-2009-10-16.orig/arch/x86/kernel/microcode-xen.c 2009-03-16 16:33:40.000000000 +0100
2581 +++ sle11-2009-10-16/arch/x86/kernel/microcode-xen.c 2009-03-16 16:38:05.000000000 +0100
2582 @@ -162,7 +162,7 @@ static int request_microcode(void)
2583 c->x86, c->x86_model, c->x86_mask);
2584 error = request_firmware(&firmware, name, µcode_pdev->dev);
2586 - pr_debug("ucode data file %s load failed\n", name);
2587 + pr_debug("microcode: ucode data file %s load failed\n", name);
2591 --- sle11-2009-10-16.orig/arch/x86/kernel/mmconf-fam10h_64.c 2009-10-28 14:55:03.000000000 +0100
2592 +++ sle11-2009-10-16/arch/x86/kernel/mmconf-fam10h_64.c 2009-03-16 16:38:05.000000000 +0100
2593 @@ -219,6 +219,16 @@ void __cpuinit fam10h_check_enable_mmcfg
2594 val |= fam10h_pci_mmconf_base | (8 << FAM10H_MMIO_CONF_BUSRANGE_SHIFT) |
2595 FAM10H_MMIO_CONF_ENABLE;
2596 wrmsrl(address, val);
2602 + rdmsrl(address, val2);
2604 + pci_probe &= ~PCI_CHECK_ENABLE_AMD_MMCONF;
2609 static int __devinit set_check_enable_amd_mmconf(const struct dmi_system_id *d)
2610 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
2611 +++ sle11-2009-10-16/arch/x86/kernel/mpparse-xen.c 2009-03-16 16:38:05.000000000 +0100
2614 + * Intel Multiprocessor Specification 1.1 and 1.4
2615 + * compliant MP-table parsing routines.
2617 + * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
2618 + * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
2619 + * (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de>
2622 +#include <linux/mm.h>
2623 +#include <linux/init.h>
2624 +#include <linux/delay.h>
2625 +#include <linux/bootmem.h>
2626 +#include <linux/kernel_stat.h>
2627 +#include <linux/mc146818rtc.h>
2628 +#include <linux/bitops.h>
2629 +#include <linux/acpi.h>
2630 +#include <linux/module.h>
2632 +#include <asm/smp.h>
2633 +#include <asm/mtrr.h>
2634 +#include <asm/mpspec.h>
2635 +#include <asm/pgalloc.h>
2636 +#include <asm/io_apic.h>
2637 +#include <asm/proto.h>
2638 +#include <asm/acpi.h>
2639 +#include <asm/bios_ebda.h>
2641 +#include <mach_apic.h>
2642 +#ifdef CONFIG_X86_32
2643 +#include <mach_apicdef.h>
2644 +#include <mach_mpparse.h>
2647 +/* Have we found an MP table */
2648 +int smp_found_config;
2651 + * Various Linux-internal data structures created from the
2654 +#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
2655 +int mp_bus_id_to_type[MAX_MP_BUSSES];
2658 +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
2659 +int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 };
2661 +static int mp_current_pci_id;
2666 + * Intel MP BIOS table parsing routines:
2670 + * Checksum an MP configuration block.
2673 +static int __init mpf_checksum(unsigned char *mp, int len)
2680 + return sum & 0xFF;
2683 +#ifdef CONFIG_X86_NUMAQ
2685 + * Have to match translation table entries to main table entries by counter
2686 + * hence the mpc_record variable .... can't see a less disgusting way of
2690 +static int mpc_record;
2691 +static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
2695 +static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
2699 + char *bootup_cpu = "";
2701 + if (!(m->mpc_cpuflag & CPU_ENABLED)) {
2705 +#ifdef CONFIG_X86_NUMAQ
2706 + apicid = mpc_apic_id(m, translation_table[mpc_record]);
2708 + apicid = m->mpc_apicid;
2710 + if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
2711 + bootup_cpu = " (Bootup-CPU)";
2712 + boot_cpu_physical_apicid = m->mpc_apicid;
2715 + printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
2716 + generic_processor_info(apicid, m->mpc_apicver);
2717 +#else /* CONFIG_XEN */
2722 +static void __init MP_bus_info(struct mpc_config_bus *m)
2726 + memcpy(str, m->mpc_bustype, 6);
2729 +#ifdef CONFIG_X86_NUMAQ
2730 + mpc_oem_bus_info(m, str, translation_table[mpc_record]);
2732 + Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
2735 +#if MAX_MP_BUSSES < 256
2736 + if (m->mpc_busid >= MAX_MP_BUSSES) {
2737 + printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
2738 + " is too large, max. supported is %d\n",
2739 + m->mpc_busid, str, MAX_MP_BUSSES - 1);
2744 + if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
2745 + set_bit(m->mpc_busid, mp_bus_not_pci);
2746 +#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
2747 + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
2749 + } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
2750 +#ifdef CONFIG_X86_NUMAQ
2751 + mpc_oem_pci_bus(m, translation_table[mpc_record]);
2753 + clear_bit(m->mpc_busid, mp_bus_not_pci);
2754 + mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
2755 + mp_current_pci_id++;
2756 +#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
2757 + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
2758 + } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
2759 + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
2760 + } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) {
2761 + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
2764 + printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
2767 +#ifdef CONFIG_X86_IO_APIC
2769 +static int bad_ioapic(unsigned long address)
2771 + if (nr_ioapics >= MAX_IO_APICS) {
2772 + printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
2773 + "(found %d)\n", MAX_IO_APICS, nr_ioapics);
2774 + panic("Recompile kernel with bigger MAX_IO_APICS!\n");
2777 + printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
2778 + " found in table, skipping!\n");
2784 +static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
2786 + if (!(m->mpc_flags & MPC_APIC_USABLE))
2789 + printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
2790 + m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
2792 + if (bad_ioapic(m->mpc_apicaddr))
2795 + mp_ioapics[nr_ioapics] = *m;
2799 +static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
2801 + mp_irqs[mp_irq_entries] = *m;
2802 + Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
2803 + " IRQ %02x, APIC ID %x, APIC INT %02x\n",
2804 + m->mpc_irqtype, m->mpc_irqflag & 3,
2805 + (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
2806 + m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
2807 + if (++mp_irq_entries == MAX_IRQ_SOURCES)
2808 + panic("Max # of irq sources exceeded!!\n");
2813 +static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
2815 + Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
2816 + " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
2817 + m->mpc_irqtype, m->mpc_irqflag & 3,
2818 + (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
2819 + m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
2822 +#ifdef CONFIG_X86_NUMAQ
2823 +static void __init MP_translation_info(struct mpc_config_translation *m)
2826 + "Translation: record %d, type %d, quad %d, global %d, local %d\n",
2827 + mpc_record, m->trans_type, m->trans_quad, m->trans_global,
2830 + if (mpc_record >= MAX_MPC_ENTRY)
2831 + printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
2833 + translation_table[mpc_record] = m; /* stash this for later */
2834 + if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
2835 + node_set_online(m->trans_quad);
2839 + * Read/parse the MPC oem tables
2842 +static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
2843 + unsigned short oemsize)
2845 + int count = sizeof(*oemtable); /* the header size */
2846 + unsigned char *oemptr = ((unsigned char *)oemtable) + count;
2849 + printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
2851 + if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) {
2852 + printk(KERN_WARNING
2853 + "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
2854 + oemtable->oem_signature[0], oemtable->oem_signature[1],
2855 + oemtable->oem_signature[2], oemtable->oem_signature[3]);
2858 + if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) {
2859 + printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
2862 + while (count < oemtable->oem_length) {
2863 + switch (*oemptr) {
2864 + case MP_TRANSLATION:
2866 + struct mpc_config_translation *m =
2867 + (struct mpc_config_translation *)oemptr;
2868 + MP_translation_info(m);
2869 + oemptr += sizeof(*m);
2870 + count += sizeof(*m);
2876 + printk(KERN_WARNING
2877 + "Unrecognised OEM table entry type! - %d\n",
2885 +static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
2888 + if (strncmp(oem, "IBM NUMA", 8))
2889 + printk("Warning! May not be a NUMA-Q system!\n");
2890 + if (mpc->mpc_oemptr)
2891 + smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr,
2892 + mpc->mpc_oemsize);
2894 +#endif /* CONFIG_X86_NUMAQ */
2897 + * Read/parse the MPC
2900 +static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
2904 + int count = sizeof(*mpc);
2905 + unsigned char *mpt = ((unsigned char *)mpc) + count;
2907 + if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) {
2908 + printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n",
2909 + mpc->mpc_signature[0], mpc->mpc_signature[1],
2910 + mpc->mpc_signature[2], mpc->mpc_signature[3]);
2913 + if (mpf_checksum((unsigned char *)mpc, mpc->mpc_length)) {
2914 + printk(KERN_ERR "MPTABLE: checksum error!\n");
2917 + if (mpc->mpc_spec != 0x01 && mpc->mpc_spec != 0x04) {
2918 + printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
2922 + if (!mpc->mpc_lapic) {
2923 + printk(KERN_ERR "MPTABLE: null local APIC address!\n");
2926 + memcpy(oem, mpc->mpc_oem, 8);
2928 + printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem);
2930 + memcpy(str, mpc->mpc_productid, 12);
2932 + printk("Product ID: %s ", str);
2934 +#ifdef CONFIG_X86_32
2935 + mps_oem_check(mpc, oem, str);
2937 + printk(KERN_INFO "MPTABLE: Product ID: %s ", str);
2939 + printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic);
2941 + /* save the local APIC address, it might be non-default */
2943 + mp_lapic_addr = mpc->mpc_lapic;
2949 + * Now process the configuration blocks.
2951 +#ifdef CONFIG_X86_NUMAQ
2954 + while (count < mpc->mpc_length) {
2956 + case MP_PROCESSOR:
2958 + struct mpc_config_processor *m =
2959 + (struct mpc_config_processor *)mpt;
2960 + /* ACPI may have already provided this data */
2962 + MP_processor_info(m);
2963 + mpt += sizeof(*m);
2964 + count += sizeof(*m);
2969 + struct mpc_config_bus *m =
2970 + (struct mpc_config_bus *)mpt;
2972 + mpt += sizeof(*m);
2973 + count += sizeof(*m);
2978 +#ifdef CONFIG_X86_IO_APIC
2979 + struct mpc_config_ioapic *m =
2980 + (struct mpc_config_ioapic *)mpt;
2981 + MP_ioapic_info(m);
2983 + mpt += sizeof(struct mpc_config_ioapic);
2984 + count += sizeof(struct mpc_config_ioapic);
2989 +#ifdef CONFIG_X86_IO_APIC
2990 + struct mpc_config_intsrc *m =
2991 + (struct mpc_config_intsrc *)mpt;
2993 + MP_intsrc_info(m);
2995 + mpt += sizeof(struct mpc_config_intsrc);
2996 + count += sizeof(struct mpc_config_intsrc);
3001 + struct mpc_config_lintsrc *m =
3002 + (struct mpc_config_lintsrc *)mpt;
3003 + MP_lintsrc_info(m);
3004 + mpt += sizeof(*m);
3005 + count += sizeof(*m);
3009 + /* wrong mptable */
3010 + printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
3011 + printk(KERN_ERR "type %x\n", *mpt);
3012 + print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
3013 + 1, mpc, mpc->mpc_length, 1);
3014 + count = mpc->mpc_length;
3017 +#ifdef CONFIG_X86_NUMAQ
3021 + setup_apic_routing();
3022 + if (!num_processors)
3023 + printk(KERN_ERR "MPTABLE: no processors registered!\n");
3024 + return num_processors;
3027 +#ifdef CONFIG_X86_IO_APIC
3029 +static int __init ELCR_trigger(unsigned int irq)
3031 + unsigned int port;
3033 + port = 0x4d0 + (irq >> 3);
3034 + return (inb(port) >> (irq & 7)) & 1;
3037 +static void __init construct_default_ioirq_mptable(int mpc_default_type)
3039 + struct mpc_config_intsrc intsrc;
3041 + int ELCR_fallback = 0;
3043 + intsrc.mpc_type = MP_INTSRC;
3044 + intsrc.mpc_irqflag = 0; /* conforming */
3045 + intsrc.mpc_srcbus = 0;
3046 + intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
3048 + intsrc.mpc_irqtype = mp_INT;
3051 + * If true, we have an ISA/PCI system with no IRQ entries
3052 + * in the MP table. To prevent the PCI interrupts from being set up
3053 + * incorrectly, we try to use the ELCR. The sanity check to see if
3054 + * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
3055 + * never be level sensitive, so we simply see if the ELCR agrees.
3056 + * If it does, we assume it's valid.
3058 + if (mpc_default_type == 5) {
3059 + printk(KERN_INFO "ISA/PCI bus type with no IRQ information... "
3060 + "falling back to ELCR\n");
3062 + if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) ||
3064 + printk(KERN_ERR "ELCR contains invalid data... "
3065 + "not using ELCR\n");
3068 + "Using ELCR to identify PCI interrupts\n");
3069 + ELCR_fallback = 1;
3073 + for (i = 0; i < 16; i++) {
3074 + switch (mpc_default_type) {
3076 + if (i == 0 || i == 13)
3077 + continue; /* IRQ0 & IRQ13 not connected */
3078 + /* fall through */
3081 + continue; /* IRQ2 is never connected */
3084 + if (ELCR_fallback) {
3086 + * If the ELCR indicates a level-sensitive interrupt, we
3087 + * copy that information over to the MP table in the
3088 + * irqflag field (level sensitive, active high polarity).
3090 + if (ELCR_trigger(i))
3091 + intsrc.mpc_irqflag = 13;
3093 + intsrc.mpc_irqflag = 0;
3096 + intsrc.mpc_srcbusirq = i;
3097 + intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
3098 + MP_intsrc_info(&intsrc);
3101 + intsrc.mpc_irqtype = mp_ExtINT;
3102 + intsrc.mpc_srcbusirq = 0;
3103 + intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */
3104 + MP_intsrc_info(&intsrc);
3109 +static inline void __init construct_default_ISA_mptable(int mpc_default_type)
3111 + struct mpc_config_processor processor;
3112 + struct mpc_config_bus bus;
3113 +#ifdef CONFIG_X86_IO_APIC
3114 + struct mpc_config_ioapic ioapic;
3116 + struct mpc_config_lintsrc lintsrc;
3117 + int linttypes[2] = { mp_ExtINT, mp_NMI };
3121 + * local APIC has default address
3123 + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
3126 + * 2 CPUs, numbered 0 & 1.
3128 + processor.mpc_type = MP_PROCESSOR;
3129 + /* Either an integrated APIC or a discrete 82489DX. */
3130 + processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
3131 + processor.mpc_cpuflag = CPU_ENABLED;
3132 + processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
3133 + (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
3134 + processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
3135 + processor.mpc_reserved[0] = 0;
3136 + processor.mpc_reserved[1] = 0;
3137 + for (i = 0; i < 2; i++) {
3138 + processor.mpc_apicid = i;
3139 + MP_processor_info(&processor);
3142 + bus.mpc_type = MP_BUS;
3143 + bus.mpc_busid = 0;
3144 + switch (mpc_default_type) {
3146 + printk(KERN_ERR "???\nUnknown standard configuration %d\n",
3147 + mpc_default_type);
3148 + /* fall through */
3151 + memcpy(bus.mpc_bustype, "ISA ", 6);
3156 + memcpy(bus.mpc_bustype, "EISA ", 6);
3160 + memcpy(bus.mpc_bustype, "MCA ", 6);
3162 + MP_bus_info(&bus);
3163 + if (mpc_default_type > 4) {
3164 + bus.mpc_busid = 1;
3165 + memcpy(bus.mpc_bustype, "PCI ", 6);
3166 + MP_bus_info(&bus);
3169 +#ifdef CONFIG_X86_IO_APIC
3170 + ioapic.mpc_type = MP_IOAPIC;
3171 + ioapic.mpc_apicid = 2;
3172 + ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
3173 + ioapic.mpc_flags = MPC_APIC_USABLE;
3174 + ioapic.mpc_apicaddr = 0xFEC00000;
3175 + MP_ioapic_info(&ioapic);
3178 + * We set up most of the low 16 IO-APIC pins according to MPS rules.
3180 + construct_default_ioirq_mptable(mpc_default_type);
3182 + lintsrc.mpc_type = MP_LINTSRC;
3183 + lintsrc.mpc_irqflag = 0; /* conforming */
3184 + lintsrc.mpc_srcbusid = 0;
3185 + lintsrc.mpc_srcbusirq = 0;
3186 + lintsrc.mpc_destapic = MP_APIC_ALL;
3187 + for (i = 0; i < 2; i++) {
3188 + lintsrc.mpc_irqtype = linttypes[i];
3189 + lintsrc.mpc_destapiclint = i;
3190 + MP_lintsrc_info(&lintsrc);
3194 +static struct intel_mp_floating *mpf_found;
3197 + * Scan the memory blocks for an SMP configuration block.
3199 +static void __init __get_smp_config(unsigned early)
3201 + struct intel_mp_floating *mpf = mpf_found;
3203 + if (acpi_lapic && early)
3206 + * ACPI supports both logical (e.g. Hyper-Threading) and physical
3207 + * processors, where MPS only supports physical.
3209 + if (acpi_lapic && acpi_ioapic) {
3210 + printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
3213 + } else if (acpi_lapic)
3214 + printk(KERN_INFO "Using ACPI for processor (LAPIC) "
3215 + "configuration information\n");
3217 + printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
3218 + mpf->mpf_specification);
3219 +#ifdef CONFIG_X86_32
3220 + if (mpf->mpf_feature2 & (1 << 7)) {
3221 + printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
3224 + printk(KERN_INFO " Virtual Wire compatibility mode.\n");
3229 + * Now see if we need to read further.
3231 + if (mpf->mpf_feature1 != 0) {
3234 + * local APIC has default address
3236 + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
3240 + printk(KERN_INFO "Default MP configuration #%d\n",
3241 + mpf->mpf_feature1);
3242 + construct_default_ISA_mptable(mpf->mpf_feature1);
3244 + } else if (mpf->mpf_physptr) {
3247 + * Read the physical hardware table. Anything here will
3248 + * override the defaults.
3250 + if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr), early)) {
3251 + smp_found_config = 0;
3253 + "BIOS bug, MP table errors detected!...\n");
3254 + printk(KERN_ERR "... disabling SMP support. "
3255 + "(tell your hw vendor)\n");
3261 +#ifdef CONFIG_X86_IO_APIC
3263 + * If there are no explicit MP IRQ entries, then we are
3264 + * broken. We set up most of the low 16 IO-APIC pins to
3265 + * ISA defaults and hope it will work.
3267 + if (!mp_irq_entries) {
3268 + struct mpc_config_bus bus;
3270 + printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
3271 + "using default mptable. "
3272 + "(tell your hw vendor)\n");
3274 + bus.mpc_type = MP_BUS;
3275 + bus.mpc_busid = 0;
3276 + memcpy(bus.mpc_bustype, "ISA ", 6);
3277 + MP_bus_info(&bus);
3279 + construct_default_ioirq_mptable(0);
3286 + printk(KERN_INFO "Processors: %d\n", num_processors);
3288 + * Only use the first configuration found.
3292 +void __init early_get_smp_config(void)
3294 + __get_smp_config(1);
3297 +void __init get_smp_config(void)
3299 + __get_smp_config(0);
3302 +static int __init smp_scan_config(unsigned long base, unsigned long length,
3305 + unsigned int *bp = isa_bus_to_virt(base);
3306 + struct intel_mp_floating *mpf;
3308 + Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length);
3309 + BUILD_BUG_ON(sizeof(*mpf) != 16);
3311 + while (length > 0) {
3312 + mpf = (struct intel_mp_floating *)bp;
3313 + if ((*bp == SMP_MAGIC_IDENT) &&
3314 + (mpf->mpf_length == 1) &&
3315 + !mpf_checksum((unsigned char *)bp, 16) &&
3316 + ((mpf->mpf_specification == 1)
3317 + || (mpf->mpf_specification == 4))) {
3319 + smp_found_config = 1;
3321 +#ifdef CONFIG_X86_32
3323 + printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
3324 + mpf, virt_to_phys(mpf));
3325 + reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE,
3327 + if (mpf->mpf_physptr) {
3329 + * We cannot access to MPC table to compute
3330 + * table size yet, as only few megabytes from
3331 + * the bottom is mapped now.
3332 + * PC-9800's MPC table places on the very last
3333 + * of physical memory; so that simply reserving
3334 + * PAGE_SIZE from mpg->mpf_physptr yields BUG()
3335 + * in reserve_bootmem.
3337 + unsigned long size = PAGE_SIZE;
3338 + unsigned long end = max_low_pfn * PAGE_SIZE;
3339 + if (mpf->mpf_physptr + size > end)
3340 + size = end - mpf->mpf_physptr;
3341 + reserve_bootmem(mpf->mpf_physptr, size,
3345 + printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
3346 + mpf, ((void *)bp - isa_bus_to_virt(base)) + base);
3348 +#elif !defined(CONFIG_XEN)
3352 + reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE);
3353 + if (mpf->mpf_physptr)
3354 + reserve_bootmem_generic(mpf->mpf_physptr,
3365 +static void __init __find_smp_config(unsigned reserve)
3368 + unsigned int address;
3372 + * FIXME: Linux assumes you have 640K of base ram..
3373 + * this continues the error...
3375 + * 1) Scan the bottom 1K for a signature
3376 + * 2) Scan the top 1K of base RAM
3377 + * 3) Scan the 64K of bios
3379 + if (smp_scan_config(0x0, 0x400, reserve) ||
3380 + smp_scan_config(639 * 0x400, 0x400, reserve) ||
3381 + smp_scan_config(0xF0000, 0x10000, reserve))
3384 + * If it is an SMP machine we should know now, unless the
3385 + * configuration is in an EISA/MCA bus machine with an
3386 + * extended bios data area.
3388 + * there is a real-mode segmented pointer pointing to the
3389 + * 4K EBDA area at 0x40E, calculate and scan it here.
3391 + * NOTE! There are Linux loaders that will corrupt the EBDA
3392 + * area, and as such this kind of SMP config may be less
3393 + * trustworthy, simply because the SMP table may have been
3394 + * stomped on during early boot. These loaders are buggy and
3395 + * should be fixed.
3397 + * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
3401 + address = get_bios_ebda();
3403 + smp_scan_config(address, 0x400, reserve);
3407 +void __init early_find_smp_config(void)
3409 + __find_smp_config(0);
3412 +void __init find_smp_config(void)
3414 + __find_smp_config(1);
3417 +/* --------------------------------------------------------------------------
3418 + ACPI-based MP Configuration
3419 + -------------------------------------------------------------------------- */
3422 + * Keep this outside and initialized to 0, for !CONFIG_ACPI builds:
3428 +#ifdef CONFIG_X86_IO_APIC
3430 +#define MP_ISA_BUS 0
3432 +extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
3434 +static int mp_find_ioapic(int gsi)
3438 + /* Find the IOAPIC that manages this GSI. */
3439 + for (i = 0; i < nr_ioapics; i++) {
3440 + if ((gsi >= mp_ioapic_routing[i].gsi_base)
3441 + && (gsi <= mp_ioapic_routing[i].gsi_end))
3445 + printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
3449 +static u8 __init uniq_ioapic_id(u8 id)
3451 +#ifdef CONFIG_X86_32
3452 + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
3453 + !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
3454 + return io_apic_get_unique_id(nr_ioapics, id);
3459 + DECLARE_BITMAP(used, 256);
3460 + bitmap_zero(used, 256);
3461 + for (i = 0; i < nr_ioapics; i++) {
3462 + struct mpc_config_ioapic *ia = &mp_ioapics[i];
3463 + __set_bit(ia->mpc_apicid, used);
3465 + if (!test_bit(id, used))
3467 + return find_first_zero_bit(used, 256);
3471 +void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
3475 + if (bad_ioapic(address))
3480 + mp_ioapics[idx].mpc_type = MP_IOAPIC;
3481 + mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
3482 + mp_ioapics[idx].mpc_apicaddr = address;
3485 + set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
3487 + mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
3488 +#ifdef CONFIG_X86_32
3489 + mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
3491 + mp_ioapics[idx].mpc_apicver = 0;
3494 + * Build basic GSI lookup table to facilitate gsi->io_apic lookups
3495 + * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
3497 + mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
3498 + mp_ioapic_routing[idx].gsi_base = gsi_base;
3499 + mp_ioapic_routing[idx].gsi_end = gsi_base +
3500 + io_apic_get_redir_entries(idx);
3502 + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
3503 + "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
3504 + mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
3505 + mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
3510 +void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
3512 + struct mpc_config_intsrc intsrc;
3517 + * Convert 'gsi' to 'ioapic.pin'.
3519 + ioapic = mp_find_ioapic(gsi);
3522 + pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
3525 + * TBD: This check is for faulty timer entries, where the override
3526 + * erroneously sets the trigger to level, resulting in a HUGE
3527 + * increase of timer interrupts!
3529 + if ((bus_irq == 0) && (trigger == 3))
3532 + intsrc.mpc_type = MP_INTSRC;
3533 + intsrc.mpc_irqtype = mp_INT;
3534 + intsrc.mpc_irqflag = (trigger << 2) | polarity;
3535 + intsrc.mpc_srcbus = MP_ISA_BUS;
3536 + intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
3537 + intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */
3538 + intsrc.mpc_dstirq = pin; /* INTIN# */
3540 + MP_intsrc_info(&intsrc);
3543 +void __init mp_config_acpi_legacy_irqs(void)
3545 + struct mpc_config_intsrc intsrc;
3549 +#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
3551 + * Fabricate the legacy ISA bus (bus #31).
3553 + mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
3555 + set_bit(MP_ISA_BUS, mp_bus_not_pci);
3556 + Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
3559 + * Older generations of ES7000 have no legacy identity mappings
3561 + if (es7000_plat == 1)
3565 + * Locate the IOAPIC that manages the ISA IRQs (0-15).
3567 + ioapic = mp_find_ioapic(0);
3571 + intsrc.mpc_type = MP_INTSRC;
3572 + intsrc.mpc_irqflag = 0; /* Conforming */
3573 + intsrc.mpc_srcbus = MP_ISA_BUS;
3574 +#ifdef CONFIG_X86_IO_APIC
3575 + intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
3578 + * Use the default configuration for the IRQs 0-15. Unless
3579 + * overridden by (MADT) interrupt source override entries.
3581 + for (i = 0; i < 16; i++) {
3584 + for (idx = 0; idx < mp_irq_entries; idx++) {
3585 + struct mpc_config_intsrc *irq = mp_irqs + idx;
3587 + /* Do we already have a mapping for this ISA IRQ? */
3588 + if (irq->mpc_srcbus == MP_ISA_BUS
3589 + && irq->mpc_srcbusirq == i)
3592 + /* Do we already have a mapping for this IOAPIC pin */
3593 + if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
3594 + (irq->mpc_dstirq == i))
3598 + if (idx != mp_irq_entries) {
3599 + printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
3600 + continue; /* IRQ already used */
3603 + intsrc.mpc_irqtype = mp_INT;
3604 + intsrc.mpc_srcbusirq = i; /* Identity mapped */
3605 + intsrc.mpc_dstirq = i;
3607 + MP_intsrc_info(&intsrc);
3611 +int mp_register_gsi(u32 gsi, int triggering, int polarity)
3615 +#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
3616 +#define MAX_GSI_NUM 4096
3617 +#define IRQ_COMPRESSION_START 64
3619 + static int pci_irq = IRQ_COMPRESSION_START;
3621 + * Mapping between Global System Interrupts, which
3622 + * represent all possible interrupts, and IRQs
3623 + * assigned to actual devices.
3625 + static int gsi_to_irq[MAX_GSI_NUM];
3628 + if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
3632 + /* Don't set up the ACPI SCI because it's already set up */
3633 + if (acpi_gbl_FADT.sci_interrupt == gsi)
3636 + ioapic = mp_find_ioapic(gsi);
3638 + printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
3642 + ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
3644 +#ifndef CONFIG_X86_32
3645 + if (ioapic_renumber_irq)
3646 + gsi = ioapic_renumber_irq(ioapic, gsi);
3650 + * Avoid pin reprogramming. PRTs typically include entries
3651 + * with redundant pin->gsi mappings (but unique PCI devices);
3652 + * we only program the IOAPIC on the first.
3654 + if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
3655 + printk(KERN_ERR "Invalid reference to IOAPIC pin "
3656 + "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
3660 + if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
3661 + Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
3662 + mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
3663 +#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
3664 + return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
3670 + set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
3671 +#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
3673 + * For GSI >= 64, use IRQ compression
3675 + if ((gsi >= IRQ_COMPRESSION_START)
3676 + && (triggering == ACPI_LEVEL_SENSITIVE)) {
3678 + * For PCI devices assign IRQs in order, avoiding gaps
3679 + * due to unused I/O APIC pins.
3682 + if (gsi < MAX_GSI_NUM) {
3684 + * Retain the VIA chipset work-around (gsi > 15), but
3685 + * avoid a problem where the 8254 timer (IRQ0) is setup
3686 + * via an override (so it's not on pin 0 of the ioapic),
3687 + * and at the same time, the pin 0 interrupt is a PCI
3688 + * type. The gsi > 15 test could cause these two pins
3689 + * to be shared as IRQ0, and they are not shareable.
3690 + * So test for this condition, and if necessary, avoid
3691 + * the pin collision.
3695 + * Don't assign IRQ used by ACPI SCI
3697 + if (gsi == acpi_gbl_FADT.sci_interrupt)
3699 + gsi_to_irq[irq] = gsi;
3701 + printk(KERN_ERR "GSI %u is too high\n", gsi);
3706 + io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
3707 + triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
3708 + polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
3712 +#endif /* CONFIG_X86_IO_APIC */
3713 +#endif /* CONFIG_ACPI */
3714 --- sle11-2009-10-16.orig/arch/x86/kernel/mpparse_32-xen.c 2009-03-16 16:33:40.000000000 +0100
3715 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
3718 - * Intel Multiprocessor Specification 1.1 and 1.4
3719 - * compliant MP-table parsing routines.
3721 - * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
3722 - * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
3725 - * Erich Boleyn : MP v1.4 and additional changes.
3726 - * Alan Cox : Added EBDA scanning
3727 - * Ingo Molnar : various cleanups and rewrites
3728 - * Maciej W. Rozycki: Bits for default MP configurations
3729 - * Paul Diefenbaugh: Added full ACPI support
3732 -#include <linux/mm.h>
3733 -#include <linux/init.h>
3734 -#include <linux/acpi.h>
3735 -#include <linux/delay.h>
3736 -#include <linux/bootmem.h>
3737 -#include <linux/kernel_stat.h>
3738 -#include <linux/mc146818rtc.h>
3739 -#include <linux/bitops.h>
3741 -#include <asm/smp.h>
3742 -#include <asm/acpi.h>
3743 -#include <asm/mtrr.h>
3744 -#include <asm/mpspec.h>
3745 -#include <asm/io_apic.h>
3747 -#include <mach_apic.h>
3748 -#include <mach_apicdef.h>
3749 -#include <mach_mpparse.h>
3750 -#include <bios_ebda.h>
3752 -/* Have we found an MP table */
3753 -int smp_found_config;
3754 -unsigned int __cpuinitdata maxcpus = NR_CPUS;
3757 - * Various Linux-internal data structures created from the
3760 -int apic_version [MAX_APICS];
3761 -int mp_bus_id_to_type [MAX_MP_BUSSES];
3762 -int mp_bus_id_to_node [MAX_MP_BUSSES];
3763 -int mp_bus_id_to_local [MAX_MP_BUSSES];
3764 -int quad_local_to_mp_bus_id [NR_CPUS/4][4];
3765 -int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
3766 -static int mp_current_pci_id;
3768 -/* I/O APIC entries */
3769 -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
3771 -/* # of MP IRQ source entries */
3772 -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
3774 -/* MP IRQ source entries */
3775 -int mp_irq_entries;
3780 -unsigned long mp_lapic_addr;
3782 -unsigned int def_to_bigsmp = 0;
3784 -/* Processor that is doing the boot up */
3785 -unsigned int boot_cpu_physical_apicid = -1U;
3786 -/* Internal processor count */
3787 -unsigned int num_processors;
3789 -/* Bitmask of physically existing CPUs */
3790 -physid_mask_t phys_cpu_present_map;
3792 -u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
3795 - * Intel MP BIOS table parsing routines:
3800 - * Checksum an MP configuration block.
3803 -static int __init mpf_checksum(unsigned char *mp, int len)
3810 - return sum & 0xFF;
3814 - * Have to match translation table entries to main table entries by counter
3815 - * hence the mpc_record variable .... can't see a less disgusting way of
3819 -static int mpc_record;
3820 -static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata;
3823 -static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
3826 - physid_mask_t phys_cpu;
3828 - if (!(m->mpc_cpuflag & CPU_ENABLED))
3831 - apicid = mpc_apic_id(m, translation_table[mpc_record]);
3833 - if (m->mpc_featureflag&(1<<0))
3834 - Dprintk(" Floating point unit present.\n");
3835 - if (m->mpc_featureflag&(1<<7))
3836 - Dprintk(" Machine Exception supported.\n");
3837 - if (m->mpc_featureflag&(1<<8))
3838 - Dprintk(" 64 bit compare & exchange supported.\n");
3839 - if (m->mpc_featureflag&(1<<9))
3840 - Dprintk(" Internal APIC present.\n");
3841 - if (m->mpc_featureflag&(1<<11))
3842 - Dprintk(" SEP present.\n");
3843 - if (m->mpc_featureflag&(1<<12))
3844 - Dprintk(" MTRR present.\n");
3845 - if (m->mpc_featureflag&(1<<13))
3846 - Dprintk(" PGE present.\n");
3847 - if (m->mpc_featureflag&(1<<14))
3848 - Dprintk(" MCA present.\n");
3849 - if (m->mpc_featureflag&(1<<15))
3850 - Dprintk(" CMOV present.\n");
3851 - if (m->mpc_featureflag&(1<<16))
3852 - Dprintk(" PAT present.\n");
3853 - if (m->mpc_featureflag&(1<<17))
3854 - Dprintk(" PSE present.\n");
3855 - if (m->mpc_featureflag&(1<<18))
3856 - Dprintk(" PSN present.\n");
3857 - if (m->mpc_featureflag&(1<<19))
3858 - Dprintk(" Cache Line Flush Instruction present.\n");
3860 - if (m->mpc_featureflag&(1<<21))
3861 - Dprintk(" Debug Trace and EMON Store present.\n");
3862 - if (m->mpc_featureflag&(1<<22))
3863 - Dprintk(" ACPI Thermal Throttle Registers present.\n");
3864 - if (m->mpc_featureflag&(1<<23))
3865 - Dprintk(" MMX present.\n");
3866 - if (m->mpc_featureflag&(1<<24))
3867 - Dprintk(" FXSR present.\n");
3868 - if (m->mpc_featureflag&(1<<25))
3869 - Dprintk(" XMM present.\n");
3870 - if (m->mpc_featureflag&(1<<26))
3871 - Dprintk(" Willamette New Instructions present.\n");
3872 - if (m->mpc_featureflag&(1<<27))
3873 - Dprintk(" Self Snoop present.\n");
3874 - if (m->mpc_featureflag&(1<<28))
3875 - Dprintk(" HT present.\n");
3876 - if (m->mpc_featureflag&(1<<29))
3877 - Dprintk(" Thermal Monitor present.\n");
3878 - /* 30, 31 Reserved */
3881 - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
3882 - Dprintk(" Bootup CPU\n");
3883 - boot_cpu_physical_apicid = m->mpc_apicid;
3886 - ver = m->mpc_apicver;
3889 - * Validate version
3892 - printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
3893 - "fixing up to 0x10. (tell your hw vendor)\n",
3897 - apic_version[m->mpc_apicid] = ver;
3899 - phys_cpu = apicid_to_cpu_present(apicid);
3900 - physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
3902 - if (num_processors >= NR_CPUS) {
3903 - printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
3904 - " Processor ignored.\n", NR_CPUS);
3908 - if (num_processors >= maxcpus) {
3909 - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
3910 - " Processor ignored.\n", maxcpus);
3914 - cpu_set(num_processors, cpu_possible_map);
3918 - * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
3919 - * but we need to work other dependencies like SMP_SUSPEND etc
3920 - * before this can be done without some confusion.
3921 - * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
3922 - * - Ashok Raj <ashok.raj@intel.com>
3924 - if (num_processors > 8) {
3925 - switch (boot_cpu_data.x86_vendor) {
3926 - case X86_VENDOR_INTEL:
3927 - if (!APIC_XAPIC(ver)) {
3928 - def_to_bigsmp = 0;
3931 - /* If P4 and above fall through */
3932 - case X86_VENDOR_AMD:
3933 - def_to_bigsmp = 1;
3936 - bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
3939 -static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
3943 -#endif /* CONFIG_XEN */
3945 -static void __init MP_bus_info (struct mpc_config_bus *m)
3949 - memcpy(str, m->mpc_bustype, 6);
3952 - mpc_oem_bus_info(m, str, translation_table[mpc_record]);
3954 -#if MAX_MP_BUSSES < 256
3955 - if (m->mpc_busid >= MAX_MP_BUSSES) {
3956 - printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
3957 - " is too large, max. supported is %d\n",
3958 - m->mpc_busid, str, MAX_MP_BUSSES - 1);
3963 - if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
3964 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
3965 - } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
3966 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
3967 - } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
3968 - mpc_oem_pci_bus(m, translation_table[mpc_record]);
3969 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
3970 - mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
3971 - mp_current_pci_id++;
3972 - } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
3973 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
3975 - printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
3979 -static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
3981 - if (!(m->mpc_flags & MPC_APIC_USABLE))
3984 - printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
3985 - m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
3986 - if (nr_ioapics >= MAX_IO_APICS) {
3987 - printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n",
3988 - MAX_IO_APICS, nr_ioapics);
3989 - panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
3991 - if (!m->mpc_apicaddr) {
3992 - printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
3993 - " found in MP table, skipping!\n");
3996 - mp_ioapics[nr_ioapics] = *m;
4000 -static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
4002 - mp_irqs [mp_irq_entries] = *m;
4003 - Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
4004 - " IRQ %02x, APIC ID %x, APIC INT %02x\n",
4005 - m->mpc_irqtype, m->mpc_irqflag & 3,
4006 - (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
4007 - m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
4008 - if (++mp_irq_entries == MAX_IRQ_SOURCES)
4009 - panic("Max # of irq sources exceeded!!\n");
4012 -static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
4014 - Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
4015 - " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
4016 - m->mpc_irqtype, m->mpc_irqflag & 3,
4017 - (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
4018 - m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
4021 -#ifdef CONFIG_X86_NUMAQ
4022 -static void __init MP_translation_info (struct mpc_config_translation *m)
4024 - printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
4026 - if (mpc_record >= MAX_MPC_ENTRY)
4027 - printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
4029 - translation_table[mpc_record] = m; /* stash this for later */
4030 - if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
4031 - node_set_online(m->trans_quad);
4035 - * Read/parse the MPC oem tables
4038 -static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
4039 - unsigned short oemsize)
4041 - int count = sizeof (*oemtable); /* the header size */
4042 - unsigned char *oemptr = ((unsigned char *)oemtable)+count;
4045 - printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
4046 - if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
4048 - printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
4049 - oemtable->oem_signature[0],
4050 - oemtable->oem_signature[1],
4051 - oemtable->oem_signature[2],
4052 - oemtable->oem_signature[3]);
4055 - if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
4057 - printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
4060 - while (count < oemtable->oem_length) {
4061 - switch (*oemptr) {
4062 - case MP_TRANSLATION:
4064 - struct mpc_config_translation *m=
4065 - (struct mpc_config_translation *)oemptr;
4066 - MP_translation_info(m);
4067 - oemptr += sizeof(*m);
4068 - count += sizeof(*m);
4074 - printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
4081 -static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
4084 - if (strncmp(oem, "IBM NUMA", 8))
4085 - printk("Warning! May not be a NUMA-Q system!\n");
4086 - if (mpc->mpc_oemptr)
4087 - smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr,
4088 - mpc->mpc_oemsize);
4090 -#endif /* CONFIG_X86_NUMAQ */
4093 - * Read/parse the MPC
4096 -static int __init smp_read_mpc(struct mp_config_table *mpc)
4100 - int count=sizeof(*mpc);
4101 - unsigned char *mpt=((unsigned char *)mpc)+count;
4103 - if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
4104 - printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n",
4105 - *(u32 *)mpc->mpc_signature);
4108 - if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
4109 - printk(KERN_ERR "SMP mptable: checksum error!\n");
4112 - if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
4113 - printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
4117 - if (!mpc->mpc_lapic) {
4118 - printk(KERN_ERR "SMP mptable: null local APIC address!\n");
4121 - memcpy(oem,mpc->mpc_oem,8);
4123 - printk(KERN_INFO "OEM ID: %s ",oem);
4125 - memcpy(str,mpc->mpc_productid,12);
4127 - printk("Product ID: %s ",str);
4129 - mps_oem_check(mpc, oem, str);
4131 - printk("APIC at: 0x%X\n", mpc->mpc_lapic);
4134 - * Save the local APIC address (it might be non-default) -- but only
4135 - * if we're not using ACPI.
4138 - mp_lapic_addr = mpc->mpc_lapic;
4141 - * Now process the configuration blocks.
4144 - while (count < mpc->mpc_length) {
4146 - case MP_PROCESSOR:
4148 - struct mpc_config_processor *m=
4149 - (struct mpc_config_processor *)mpt;
4150 - /* ACPI may have already provided this data */
4152 - MP_processor_info(m);
4153 - mpt += sizeof(*m);
4154 - count += sizeof(*m);
4159 - struct mpc_config_bus *m=
4160 - (struct mpc_config_bus *)mpt;
4162 - mpt += sizeof(*m);
4163 - count += sizeof(*m);
4168 - struct mpc_config_ioapic *m=
4169 - (struct mpc_config_ioapic *)mpt;
4170 - MP_ioapic_info(m);
4172 - count+=sizeof(*m);
4177 - struct mpc_config_intsrc *m=
4178 - (struct mpc_config_intsrc *)mpt;
4180 - MP_intsrc_info(m);
4182 - count+=sizeof(*m);
4187 - struct mpc_config_lintsrc *m=
4188 - (struct mpc_config_lintsrc *)mpt;
4189 - MP_lintsrc_info(m);
4191 - count+=sizeof(*m);
4196 - count = mpc->mpc_length;
4202 - setup_apic_routing();
4203 - if (!num_processors)
4204 - printk(KERN_ERR "SMP mptable: no processors registered!\n");
4205 - return num_processors;
4208 -static int __init ELCR_trigger(unsigned int irq)
4210 - unsigned int port;
4212 - port = 0x4d0 + (irq >> 3);
4213 - return (inb(port) >> (irq & 7)) & 1;
4216 -static void __init construct_default_ioirq_mptable(int mpc_default_type)
4218 - struct mpc_config_intsrc intsrc;
4220 - int ELCR_fallback = 0;
4222 - intsrc.mpc_type = MP_INTSRC;
4223 - intsrc.mpc_irqflag = 0; /* conforming */
4224 - intsrc.mpc_srcbus = 0;
4225 - intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
4227 - intsrc.mpc_irqtype = mp_INT;
4230 - * If true, we have an ISA/PCI system with no IRQ entries
4231 - * in the MP table. To prevent the PCI interrupts from being set up
4232 - * incorrectly, we try to use the ELCR. The sanity check to see if
4233 - * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
4234 - * never be level sensitive, so we simply see if the ELCR agrees.
4235 - * If it does, we assume it's valid.
4237 - if (mpc_default_type == 5) {
4238 - printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
4240 - if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
4241 - printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n");
4243 - printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
4244 - ELCR_fallback = 1;
4248 - for (i = 0; i < 16; i++) {
4249 - switch (mpc_default_type) {
4251 - if (i == 0 || i == 13)
4252 - continue; /* IRQ0 & IRQ13 not connected */
4253 - /* fall through */
4256 - continue; /* IRQ2 is never connected */
4259 - if (ELCR_fallback) {
4261 - * If the ELCR indicates a level-sensitive interrupt, we
4262 - * copy that information over to the MP table in the
4263 - * irqflag field (level sensitive, active high polarity).
4265 - if (ELCR_trigger(i))
4266 - intsrc.mpc_irqflag = 13;
4268 - intsrc.mpc_irqflag = 0;
4271 - intsrc.mpc_srcbusirq = i;
4272 - intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
4273 - MP_intsrc_info(&intsrc);
4276 - intsrc.mpc_irqtype = mp_ExtINT;
4277 - intsrc.mpc_srcbusirq = 0;
4278 - intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */
4279 - MP_intsrc_info(&intsrc);
4282 -static inline void __init construct_default_ISA_mptable(int mpc_default_type)
4284 - struct mpc_config_processor processor;
4285 - struct mpc_config_bus bus;
4286 - struct mpc_config_ioapic ioapic;
4287 - struct mpc_config_lintsrc lintsrc;
4288 - int linttypes[2] = { mp_ExtINT, mp_NMI };
4292 - * local APIC has default address
4294 - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
4297 - * 2 CPUs, numbered 0 & 1.
4299 - processor.mpc_type = MP_PROCESSOR;
4300 - /* Either an integrated APIC or a discrete 82489DX. */
4301 - processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
4302 - processor.mpc_cpuflag = CPU_ENABLED;
4303 - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
4304 - (boot_cpu_data.x86_model << 4) |
4305 - boot_cpu_data.x86_mask;
4306 - processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
4307 - processor.mpc_reserved[0] = 0;
4308 - processor.mpc_reserved[1] = 0;
4309 - for (i = 0; i < 2; i++) {
4310 - processor.mpc_apicid = i;
4311 - MP_processor_info(&processor);
4314 - bus.mpc_type = MP_BUS;
4315 - bus.mpc_busid = 0;
4316 - switch (mpc_default_type) {
4319 - printk(KERN_ERR "Unknown standard configuration %d\n",
4320 - mpc_default_type);
4321 - /* fall through */
4324 - memcpy(bus.mpc_bustype, "ISA ", 6);
4329 - memcpy(bus.mpc_bustype, "EISA ", 6);
4333 - memcpy(bus.mpc_bustype, "MCA ", 6);
4335 - MP_bus_info(&bus);
4336 - if (mpc_default_type > 4) {
4337 - bus.mpc_busid = 1;
4338 - memcpy(bus.mpc_bustype, "PCI ", 6);
4339 - MP_bus_info(&bus);
4342 - ioapic.mpc_type = MP_IOAPIC;
4343 - ioapic.mpc_apicid = 2;
4344 - ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
4345 - ioapic.mpc_flags = MPC_APIC_USABLE;
4346 - ioapic.mpc_apicaddr = 0xFEC00000;
4347 - MP_ioapic_info(&ioapic);
4350 - * We set up most of the low 16 IO-APIC pins according to MPS rules.
4352 - construct_default_ioirq_mptable(mpc_default_type);
4354 - lintsrc.mpc_type = MP_LINTSRC;
4355 - lintsrc.mpc_irqflag = 0; /* conforming */
4356 - lintsrc.mpc_srcbusid = 0;
4357 - lintsrc.mpc_srcbusirq = 0;
4358 - lintsrc.mpc_destapic = MP_APIC_ALL;
4359 - for (i = 0; i < 2; i++) {
4360 - lintsrc.mpc_irqtype = linttypes[i];
4361 - lintsrc.mpc_destapiclint = i;
4362 - MP_lintsrc_info(&lintsrc);
4366 -static struct intel_mp_floating *mpf_found;
4369 - * Scan the memory blocks for an SMP configuration block.
4371 -void __init get_smp_config (void)
4373 - struct intel_mp_floating *mpf = mpf_found;
4376 - * ACPI supports both logical (e.g. Hyper-Threading) and physical
4377 - * processors, where MPS only supports physical.
4379 - if (acpi_lapic && acpi_ioapic) {
4380 - printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
4383 - else if (acpi_lapic)
4384 - printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
4386 - printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
4387 - if (mpf->mpf_feature2 & (1<<7)) {
4388 - printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
4391 - printk(KERN_INFO " Virtual Wire compatibility mode.\n");
4396 - * Now see if we need to read further.
4398 - if (mpf->mpf_feature1 != 0) {
4400 - printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
4401 - construct_default_ISA_mptable(mpf->mpf_feature1);
4403 - } else if (mpf->mpf_physptr) {
4406 - * Read the physical hardware table. Anything here will
4407 - * override the defaults.
4409 - if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
4410 - smp_found_config = 0;
4411 - printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
4412 - printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
4416 - * If there are no explicit MP IRQ entries, then we are
4417 - * broken. We set up most of the low 16 IO-APIC pins to
4418 - * ISA defaults and hope it will work.
4420 - if (!mp_irq_entries) {
4421 - struct mpc_config_bus bus;
4423 - printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
4425 - bus.mpc_type = MP_BUS;
4426 - bus.mpc_busid = 0;
4427 - memcpy(bus.mpc_bustype, "ISA ", 6);
4428 - MP_bus_info(&bus);
4430 - construct_default_ioirq_mptable(0);
4436 - printk(KERN_INFO "Processors: %d\n", num_processors);
4438 - * Only use the first configuration found.
4442 -static int __init smp_scan_config (unsigned long base, unsigned long length)
4444 - unsigned long *bp = isa_bus_to_virt(base);
4445 - struct intel_mp_floating *mpf;
4447 - printk(KERN_INFO "Scan SMP from %p for %ld bytes.\n", bp,length);
4448 - if (sizeof(*mpf) != 16)
4449 - printk("Error: MPF size\n");
4451 - while (length > 0) {
4452 - mpf = (struct intel_mp_floating *)bp;
4453 - if ((*bp == SMP_MAGIC_IDENT) &&
4454 - (mpf->mpf_length == 1) &&
4455 - !mpf_checksum((unsigned char *)bp, 16) &&
4456 - ((mpf->mpf_specification == 1)
4457 - || (mpf->mpf_specification == 4)) ) {
4459 - smp_found_config = 1;
4461 - printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
4462 - mpf, virt_to_phys(mpf));
4463 - reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE,
4465 - if (mpf->mpf_physptr) {
4467 - * We cannot access to MPC table to compute
4468 - * table size yet, as only few megabytes from
4469 - * the bottom is mapped now.
4470 - * PC-9800's MPC table places on the very last
4471 - * of physical memory; so that simply reserving
4472 - * PAGE_SIZE from mpg->mpf_physptr yields BUG()
4473 - * in reserve_bootmem.
4475 - unsigned long size = PAGE_SIZE;
4476 - unsigned long end = max_low_pfn * PAGE_SIZE;
4477 - if (mpf->mpf_physptr + size > end)
4478 - size = end - mpf->mpf_physptr;
4479 - reserve_bootmem(mpf->mpf_physptr, size,
4483 - printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
4484 - mpf, ((void *)bp - isa_bus_to_virt(base)) + base);
4496 -void __init find_smp_config (void)
4499 - unsigned int address;
4503 - * FIXME: Linux assumes you have 640K of base ram..
4504 - * this continues the error...
4506 - * 1) Scan the bottom 1K for a signature
4507 - * 2) Scan the top 1K of base RAM
4508 - * 3) Scan the 64K of bios
4510 - if (smp_scan_config(0x0,0x400) ||
4511 - smp_scan_config(639*0x400,0x400) ||
4512 - smp_scan_config(0xF0000,0x10000))
4515 - * If it is an SMP machine we should know now, unless the
4516 - * configuration is in an EISA/MCA bus machine with an
4517 - * extended bios data area.
4519 - * there is a real-mode segmented pointer pointing to the
4520 - * 4K EBDA area at 0x40E, calculate and scan it here.
4522 - * NOTE! There are Linux loaders that will corrupt the EBDA
4523 - * area, and as such this kind of SMP config may be less
4524 - * trustworthy, simply because the SMP table may have been
4525 - * stomped on during early boot. These loaders are buggy and
4526 - * should be fixed.
4528 - * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
4532 - address = get_bios_ebda();
4534 - smp_scan_config(address, 0x400);
4540 -/* --------------------------------------------------------------------------
4541 - ACPI-based MP Configuration
4542 - -------------------------------------------------------------------------- */
4546 -void __init mp_register_lapic_address(u64 address)
4549 - mp_lapic_addr = (unsigned long) address;
4551 - set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
4553 - if (boot_cpu_physical_apicid == -1U)
4554 - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
4556 - Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
4560 -void __cpuinit mp_register_lapic (u8 id, u8 enabled)
4562 - struct mpc_config_processor processor;
4565 - if (MAX_APICS - id <= 0) {
4566 - printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
4571 - if (id == boot_cpu_physical_apicid)
4575 - processor.mpc_type = MP_PROCESSOR;
4576 - processor.mpc_apicid = id;
4577 - processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
4578 - processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
4579 - processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
4580 - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
4581 - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
4582 - processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
4583 - processor.mpc_reserved[0] = 0;
4584 - processor.mpc_reserved[1] = 0;
4587 - MP_processor_info(&processor);
4590 -#ifdef CONFIG_X86_IO_APIC
4592 -#define MP_ISA_BUS 0
4593 -#define MP_MAX_IOAPIC_PIN 127
4595 -static struct mp_ioapic_routing {
4599 - u32 pin_programmed[4];
4600 -} mp_ioapic_routing[MAX_IO_APICS];
4602 -static int mp_find_ioapic (int gsi)
4606 - /* Find the IOAPIC that manages this GSI. */
4607 - for (i = 0; i < nr_ioapics; i++) {
4608 - if ((gsi >= mp_ioapic_routing[i].gsi_base)
4609 - && (gsi <= mp_ioapic_routing[i].gsi_end))
4613 - printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
4618 -void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
4623 - if (nr_ioapics >= MAX_IO_APICS) {
4624 - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
4625 - "(found %d)\n", MAX_IO_APICS, nr_ioapics);
4626 - panic("Recompile kernel with bigger MAX_IO_APICS!\n");
4629 - printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
4630 - " found in MADT table, skipping!\n");
4634 - idx = nr_ioapics++;
4636 - mp_ioapics[idx].mpc_type = MP_IOAPIC;
4637 - mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
4638 - mp_ioapics[idx].mpc_apicaddr = address;
4641 - set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
4643 - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
4644 - && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
4645 - tmpid = io_apic_get_unique_id(idx, id);
4648 - if (tmpid == -1) {
4652 - mp_ioapics[idx].mpc_apicid = tmpid;
4653 - mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
4656 - * Build basic GSI lookup table to facilitate gsi->io_apic lookups
4657 - * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
4659 - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
4660 - mp_ioapic_routing[idx].gsi_base = gsi_base;
4661 - mp_ioapic_routing[idx].gsi_end = gsi_base +
4662 - io_apic_get_redir_entries(idx);
4664 - printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
4665 - "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
4666 - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
4667 - mp_ioapic_routing[idx].gsi_base,
4668 - mp_ioapic_routing[idx].gsi_end);
4672 -mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
4674 - struct mpc_config_intsrc intsrc;
4679 - * Convert 'gsi' to 'ioapic.pin'.
4681 - ioapic = mp_find_ioapic(gsi);
4684 - pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
4687 - * TBD: This check is for faulty timer entries, where the override
4688 - * erroneously sets the trigger to level, resulting in a HUGE
4689 - * increase of timer interrupts!
4691 - if ((bus_irq == 0) && (trigger == 3))
4694 - intsrc.mpc_type = MP_INTSRC;
4695 - intsrc.mpc_irqtype = mp_INT;
4696 - intsrc.mpc_irqflag = (trigger << 2) | polarity;
4697 - intsrc.mpc_srcbus = MP_ISA_BUS;
4698 - intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
4699 - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */
4700 - intsrc.mpc_dstirq = pin; /* INTIN# */
4702 - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
4703 - intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
4704 - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
4705 - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
4707 - mp_irqs[mp_irq_entries] = intsrc;
4708 - if (++mp_irq_entries == MAX_IRQ_SOURCES)
4709 - panic("Max # of irq sources exceeded!\n");
4712 -void __init mp_config_acpi_legacy_irqs (void)
4714 - struct mpc_config_intsrc intsrc;
4719 - * Fabricate the legacy ISA bus (bus #31).
4721 - mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
4722 - Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
4725 - * Older generations of ES7000 have no legacy identity mappings
4727 - if (es7000_plat == 1)
4731 - * Locate the IOAPIC that manages the ISA IRQs (0-15).
4733 - ioapic = mp_find_ioapic(0);
4737 - intsrc.mpc_type = MP_INTSRC;
4738 - intsrc.mpc_irqflag = 0; /* Conforming */
4739 - intsrc.mpc_srcbus = MP_ISA_BUS;
4740 - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
4743 - * Use the default configuration for the IRQs 0-15. Unless
4744 - * overridden by (MADT) interrupt source override entries.
4746 - for (i = 0; i < 16; i++) {
4749 - for (idx = 0; idx < mp_irq_entries; idx++) {
4750 - struct mpc_config_intsrc *irq = mp_irqs + idx;
4752 - /* Do we already have a mapping for this ISA IRQ? */
4753 - if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
4756 - /* Do we already have a mapping for this IOAPIC pin */
4757 - if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
4758 - (irq->mpc_dstirq == i))
4762 - if (idx != mp_irq_entries) {
4763 - printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
4764 - continue; /* IRQ already used */
4767 - intsrc.mpc_irqtype = mp_INT;
4768 - intsrc.mpc_srcbusirq = i; /* Identity mapped */
4769 - intsrc.mpc_dstirq = i;
4771 - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
4772 - "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
4773 - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
4774 - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic,
4775 - intsrc.mpc_dstirq);
4777 - mp_irqs[mp_irq_entries] = intsrc;
4778 - if (++mp_irq_entries == MAX_IRQ_SOURCES)
4779 - panic("Max # of irq sources exceeded!\n");
4783 -#define MAX_GSI_NUM 4096
4784 -#define IRQ_COMPRESSION_START 64
4786 -int mp_register_gsi(u32 gsi, int triggering, int polarity)
4789 - int ioapic_pin = 0;
4791 - static int pci_irq = IRQ_COMPRESSION_START;
4793 - * Mapping between Global System Interrupts, which
4794 - * represent all possible interrupts, and IRQs
4795 - * assigned to actual devices.
4797 - static int gsi_to_irq[MAX_GSI_NUM];
4799 - /* Don't set up the ACPI SCI because it's already set up */
4800 - if (acpi_gbl_FADT.sci_interrupt == gsi)
4803 - ioapic = mp_find_ioapic(gsi);
4805 - printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
4809 - ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
4811 - if (ioapic_renumber_irq)
4812 - gsi = ioapic_renumber_irq(ioapic, gsi);
4815 - * Avoid pin reprogramming. PRTs typically include entries
4816 - * with redundant pin->gsi mappings (but unique PCI devices);
4817 - * we only program the IOAPIC on the first.
4819 - bit = ioapic_pin % 32;
4820 - idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
4822 - printk(KERN_ERR "Invalid reference to IOAPIC pin "
4823 - "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
4827 - if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
4828 - Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
4829 - mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
4830 - return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
4833 - mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
4836 - * For GSI >= 64, use IRQ compression
4838 - if ((gsi >= IRQ_COMPRESSION_START)
4839 - && (triggering == ACPI_LEVEL_SENSITIVE)) {
4841 - * For PCI devices assign IRQs in order, avoiding gaps
4842 - * due to unused I/O APIC pins.
4845 - if (gsi < MAX_GSI_NUM) {
4847 - * Retain the VIA chipset work-around (gsi > 15), but
4848 - * avoid a problem where the 8254 timer (IRQ0) is setup
4849 - * via an override (so it's not on pin 0 of the ioapic),
4850 - * and at the same time, the pin 0 interrupt is a PCI
4851 - * type. The gsi > 15 test could cause these two pins
4852 - * to be shared as IRQ0, and they are not shareable.
4853 - * So test for this condition, and if necessary, avoid
4854 - * the pin collision.
4856 - if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
4859 - * Don't assign IRQ used by ACPI SCI
4861 - if (gsi == acpi_gbl_FADT.sci_interrupt)
4863 - gsi_to_irq[irq] = gsi;
4865 - printk(KERN_ERR "GSI %u is too high\n", gsi);
4870 - io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
4871 - triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
4872 - polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
4876 -#endif /* CONFIG_X86_IO_APIC */
4877 -#endif /* CONFIG_ACPI */
4878 --- sle11-2009-10-16.orig/arch/x86/kernel/mpparse_64-xen.c 2009-03-16 16:33:40.000000000 +0100
4879 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
4882 - * Intel Multiprocessor Specification 1.1 and 1.4
4883 - * compliant MP-table parsing routines.
4885 - * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
4886 - * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
4889 - * Erich Boleyn : MP v1.4 and additional changes.
4890 - * Alan Cox : Added EBDA scanning
4891 - * Ingo Molnar : various cleanups and rewrites
4892 - * Maciej W. Rozycki: Bits for default MP configurations
4893 - * Paul Diefenbaugh: Added full ACPI support
4896 -#include <linux/mm.h>
4897 -#include <linux/init.h>
4898 -#include <linux/delay.h>
4899 -#include <linux/bootmem.h>
4900 -#include <linux/kernel_stat.h>
4901 -#include <linux/mc146818rtc.h>
4902 -#include <linux/acpi.h>
4903 -#include <linux/module.h>
4905 -#include <asm/smp.h>
4906 -#include <asm/mtrr.h>
4907 -#include <asm/mpspec.h>
4908 -#include <asm/pgalloc.h>
4909 -#include <asm/io_apic.h>
4910 -#include <asm/proto.h>
4911 -#include <asm/acpi.h>
4913 -/* Have we found an MP table */
4914 -int smp_found_config;
4917 - * Various Linux-internal data structures created from the
4920 -DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
4921 -int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
4923 -static int mp_current_pci_id = 0;
4924 -/* I/O APIC entries */
4925 -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
4927 -/* # of MP IRQ source entries */
4928 -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
4930 -/* MP IRQ source entries */
4931 -int mp_irq_entries;
4934 -unsigned long mp_lapic_addr = 0;
4938 -/* Processor that is doing the boot up */
4939 -unsigned int boot_cpu_id = -1U;
4940 -EXPORT_SYMBOL(boot_cpu_id);
4942 -/* Internal processor count */
4943 -unsigned int num_processors;
4945 -unsigned disabled_cpus __cpuinitdata;
4947 -/* Bitmask of physically existing CPUs */
4948 -physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
4951 -u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
4952 - = { [0 ... NR_CPUS-1] = BAD_APICID };
4953 -void *x86_bios_cpu_apicid_early_ptr;
4955 -DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
4956 -EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
4960 - * Intel MP BIOS table parsing routines:
4964 - * Checksum an MP configuration block.
4967 -static int __init mpf_checksum(unsigned char *mp, int len)
4974 - return sum & 0xFF;
4978 -static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
4981 - cpumask_t tmp_map;
4982 - char *bootup_cpu = "";
4984 - if (!(m->mpc_cpuflag & CPU_ENABLED)) {
4988 - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
4989 - bootup_cpu = " (Bootup-CPU)";
4990 - boot_cpu_id = m->mpc_apicid;
4993 - printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
4995 - if (num_processors >= NR_CPUS) {
4996 - printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
4997 - " Processor ignored.\n", NR_CPUS);
5002 - cpus_complement(tmp_map, cpu_present_map);
5003 - cpu = first_cpu(tmp_map);
5005 - physid_set(m->mpc_apicid, phys_cpu_present_map);
5006 - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
5008 - * x86_bios_cpu_apicid is required to have processors listed
5009 - * in same order as logical cpu numbers. Hence the first
5010 - * entry is BSP, and so on.
5014 - /* are we being called early in kernel startup? */
5015 - if (x86_cpu_to_apicid_early_ptr) {
5016 - u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
5017 - u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
5019 - cpu_to_apicid[cpu] = m->mpc_apicid;
5020 - bios_cpu_apicid[cpu] = m->mpc_apicid;
5022 - per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid;
5023 - per_cpu(x86_bios_cpu_apicid, cpu) = m->mpc_apicid;
5026 - cpu_set(cpu, cpu_possible_map);
5027 - cpu_set(cpu, cpu_present_map);
5030 -static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
5034 -#endif /* CONFIG_XEN */
5036 -static void __init MP_bus_info (struct mpc_config_bus *m)
5040 - memcpy(str, m->mpc_bustype, 6);
5042 - Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
5044 - if (strncmp(str, "ISA", 3) == 0) {
5045 - set_bit(m->mpc_busid, mp_bus_not_pci);
5046 - } else if (strncmp(str, "PCI", 3) == 0) {
5047 - clear_bit(m->mpc_busid, mp_bus_not_pci);
5048 - mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
5049 - mp_current_pci_id++;
5051 - printk(KERN_ERR "Unknown bustype %s\n", str);
5055 -static int bad_ioapic(unsigned long address)
5057 - if (nr_ioapics >= MAX_IO_APICS) {
5058 - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
5059 - "(found %d)\n", MAX_IO_APICS, nr_ioapics);
5060 - panic("Recompile kernel with bigger MAX_IO_APICS!\n");
5063 - printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
5064 - " found in table, skipping!\n");
5070 -static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
5072 - if (!(m->mpc_flags & MPC_APIC_USABLE))
5075 - printk("I/O APIC #%d at 0x%X.\n",
5076 - m->mpc_apicid, m->mpc_apicaddr);
5078 - if (bad_ioapic(m->mpc_apicaddr))
5081 - mp_ioapics[nr_ioapics] = *m;
5085 -static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
5087 - mp_irqs [mp_irq_entries] = *m;
5088 - Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
5089 - " IRQ %02x, APIC ID %x, APIC INT %02x\n",
5090 - m->mpc_irqtype, m->mpc_irqflag & 3,
5091 - (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
5092 - m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
5093 - if (++mp_irq_entries >= MAX_IRQ_SOURCES)
5094 - panic("Max # of irq sources exceeded!!\n");
5097 -static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
5099 - Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
5100 - " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
5101 - m->mpc_irqtype, m->mpc_irqflag & 3,
5102 - (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
5103 - m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
5107 - * Read/parse the MPC
5110 -static int __init smp_read_mpc(struct mp_config_table *mpc)
5113 - int count=sizeof(*mpc);
5114 - unsigned char *mpt=((unsigned char *)mpc)+count;
5116 - if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
5117 - printk("MPTABLE: bad signature [%c%c%c%c]!\n",
5118 - mpc->mpc_signature[0],
5119 - mpc->mpc_signature[1],
5120 - mpc->mpc_signature[2],
5121 - mpc->mpc_signature[3]);
5124 - if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
5125 - printk("MPTABLE: checksum error!\n");
5128 - if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
5129 - printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
5133 - if (!mpc->mpc_lapic) {
5134 - printk(KERN_ERR "MPTABLE: null local APIC address!\n");
5137 - memcpy(str,mpc->mpc_oem,8);
5139 - printk(KERN_INFO "MPTABLE: OEM ID: %s ",str);
5141 - memcpy(str,mpc->mpc_productid,12);
5143 - printk("MPTABLE: Product ID: %s ",str);
5145 - printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic);
5147 - /* save the local APIC address, it might be non-default */
5149 - mp_lapic_addr = mpc->mpc_lapic;
5152 - * Now process the configuration blocks.
5154 - while (count < mpc->mpc_length) {
5156 - case MP_PROCESSOR:
5158 - struct mpc_config_processor *m=
5159 - (struct mpc_config_processor *)mpt;
5161 - MP_processor_info(m);
5162 - mpt += sizeof(*m);
5163 - count += sizeof(*m);
5168 - struct mpc_config_bus *m=
5169 - (struct mpc_config_bus *)mpt;
5171 - mpt += sizeof(*m);
5172 - count += sizeof(*m);
5177 - struct mpc_config_ioapic *m=
5178 - (struct mpc_config_ioapic *)mpt;
5179 - MP_ioapic_info(m);
5180 - mpt += sizeof(*m);
5181 - count += sizeof(*m);
5186 - struct mpc_config_intsrc *m=
5187 - (struct mpc_config_intsrc *)mpt;
5189 - MP_intsrc_info(m);
5190 - mpt += sizeof(*m);
5191 - count += sizeof(*m);
5196 - struct mpc_config_lintsrc *m=
5197 - (struct mpc_config_lintsrc *)mpt;
5198 - MP_lintsrc_info(m);
5199 - mpt += sizeof(*m);
5200 - count += sizeof(*m);
5205 - setup_apic_routing();
5206 - if (!num_processors)
5207 - printk(KERN_ERR "MPTABLE: no processors registered!\n");
5208 - return num_processors;
5211 -static int __init ELCR_trigger(unsigned int irq)
5213 - unsigned int port;
5215 - port = 0x4d0 + (irq >> 3);
5216 - return (inb(port) >> (irq & 7)) & 1;
5219 -static void __init construct_default_ioirq_mptable(int mpc_default_type)
5221 - struct mpc_config_intsrc intsrc;
5223 - int ELCR_fallback = 0;
5225 - intsrc.mpc_type = MP_INTSRC;
5226 - intsrc.mpc_irqflag = 0; /* conforming */
5227 - intsrc.mpc_srcbus = 0;
5228 - intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
5230 - intsrc.mpc_irqtype = mp_INT;
5233 - * If true, we have an ISA/PCI system with no IRQ entries
5234 - * in the MP table. To prevent the PCI interrupts from being set up
5235 - * incorrectly, we try to use the ELCR. The sanity check to see if
5236 - * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
5237 - * never be level sensitive, so we simply see if the ELCR agrees.
5238 - * If it does, we assume it's valid.
5240 - if (mpc_default_type == 5) {
5241 - printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
5243 - if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
5244 - printk(KERN_ERR "ELCR contains invalid data... not using ELCR\n");
5246 - printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
5247 - ELCR_fallback = 1;
5251 - for (i = 0; i < 16; i++) {
5252 - switch (mpc_default_type) {
5254 - if (i == 0 || i == 13)
5255 - continue; /* IRQ0 & IRQ13 not connected */
5256 - /* fall through */
5259 - continue; /* IRQ2 is never connected */
5262 - if (ELCR_fallback) {
5264 - * If the ELCR indicates a level-sensitive interrupt, we
5265 - * copy that information over to the MP table in the
5266 - * irqflag field (level sensitive, active high polarity).
5268 - if (ELCR_trigger(i))
5269 - intsrc.mpc_irqflag = 13;
5271 - intsrc.mpc_irqflag = 0;
5274 - intsrc.mpc_srcbusirq = i;
5275 - intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
5276 - MP_intsrc_info(&intsrc);
5279 - intsrc.mpc_irqtype = mp_ExtINT;
5280 - intsrc.mpc_srcbusirq = 0;
5281 - intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */
5282 - MP_intsrc_info(&intsrc);
5285 -static inline void __init construct_default_ISA_mptable(int mpc_default_type)
5287 - struct mpc_config_processor processor;
5288 - struct mpc_config_bus bus;
5289 - struct mpc_config_ioapic ioapic;
5290 - struct mpc_config_lintsrc lintsrc;
5291 - int linttypes[2] = { mp_ExtINT, mp_NMI };
5295 - * local APIC has default address
5297 - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
5300 - * 2 CPUs, numbered 0 & 1.
5302 - processor.mpc_type = MP_PROCESSOR;
5303 - processor.mpc_apicver = 0;
5304 - processor.mpc_cpuflag = CPU_ENABLED;
5305 - processor.mpc_cpufeature = 0;
5306 - processor.mpc_featureflag = 0;
5307 - processor.mpc_reserved[0] = 0;
5308 - processor.mpc_reserved[1] = 0;
5309 - for (i = 0; i < 2; i++) {
5310 - processor.mpc_apicid = i;
5311 - MP_processor_info(&processor);
5314 - bus.mpc_type = MP_BUS;
5315 - bus.mpc_busid = 0;
5316 - switch (mpc_default_type) {
5318 - printk(KERN_ERR "???\nUnknown standard configuration %d\n",
5319 - mpc_default_type);
5320 - /* fall through */
5323 - memcpy(bus.mpc_bustype, "ISA ", 6);
5326 - MP_bus_info(&bus);
5327 - if (mpc_default_type > 4) {
5328 - bus.mpc_busid = 1;
5329 - memcpy(bus.mpc_bustype, "PCI ", 6);
5330 - MP_bus_info(&bus);
5333 - ioapic.mpc_type = MP_IOAPIC;
5334 - ioapic.mpc_apicid = 2;
5335 - ioapic.mpc_apicver = 0;
5336 - ioapic.mpc_flags = MPC_APIC_USABLE;
5337 - ioapic.mpc_apicaddr = 0xFEC00000;
5338 - MP_ioapic_info(&ioapic);
5341 - * We set up most of the low 16 IO-APIC pins according to MPS rules.
5343 - construct_default_ioirq_mptable(mpc_default_type);
5345 - lintsrc.mpc_type = MP_LINTSRC;
5346 - lintsrc.mpc_irqflag = 0; /* conforming */
5347 - lintsrc.mpc_srcbusid = 0;
5348 - lintsrc.mpc_srcbusirq = 0;
5349 - lintsrc.mpc_destapic = MP_APIC_ALL;
5350 - for (i = 0; i < 2; i++) {
5351 - lintsrc.mpc_irqtype = linttypes[i];
5352 - lintsrc.mpc_destapiclint = i;
5353 - MP_lintsrc_info(&lintsrc);
5357 -static struct intel_mp_floating *mpf_found;
5360 - * Scan the memory blocks for an SMP configuration block.
5362 -void __init get_smp_config (void)
5364 - struct intel_mp_floating *mpf = mpf_found;
5367 - * ACPI supports both logical (e.g. Hyper-Threading) and physical
5368 - * processors, where MPS only supports physical.
5370 - if (acpi_lapic && acpi_ioapic) {
5371 - printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
5374 - else if (acpi_lapic)
5375 - printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
5377 - printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
5380 - * Now see if we need to read further.
5382 - if (mpf->mpf_feature1 != 0) {
5384 - printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
5385 - construct_default_ISA_mptable(mpf->mpf_feature1);
5387 - } else if (mpf->mpf_physptr) {
5390 - * Read the physical hardware table. Anything here will
5391 - * override the defaults.
5393 - if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
5394 - smp_found_config = 0;
5395 - printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
5396 - printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
5400 - * If there are no explicit MP IRQ entries, then we are
5401 - * broken. We set up most of the low 16 IO-APIC pins to
5402 - * ISA defaults and hope it will work.
5404 - if (!mp_irq_entries) {
5405 - struct mpc_config_bus bus;
5407 - printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
5409 - bus.mpc_type = MP_BUS;
5410 - bus.mpc_busid = 0;
5411 - memcpy(bus.mpc_bustype, "ISA ", 6);
5412 - MP_bus_info(&bus);
5414 - construct_default_ioirq_mptable(0);
5420 - printk(KERN_INFO "Processors: %d\n", num_processors);
5422 - * Only use the first configuration found.
5426 -static int __init smp_scan_config (unsigned long base, unsigned long length)
5428 - extern void __bad_mpf_size(void);
5429 - unsigned int *bp = isa_bus_to_virt(base);
5430 - struct intel_mp_floating *mpf;
5432 - Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
5433 - if (sizeof(*mpf) != 16)
5436 - while (length > 0) {
5437 - mpf = (struct intel_mp_floating *)bp;
5438 - if ((*bp == SMP_MAGIC_IDENT) &&
5439 - (mpf->mpf_length == 1) &&
5440 - !mpf_checksum((unsigned char *)bp, 16) &&
5441 - ((mpf->mpf_specification == 1)
5442 - || (mpf->mpf_specification == 4)) ) {
5444 - smp_found_config = 1;
5454 -void __init find_smp_config(void)
5456 - unsigned int address;
5459 - * FIXME: Linux assumes you have 640K of base ram..
5460 - * this continues the error...
5462 - * 1) Scan the bottom 1K for a signature
5463 - * 2) Scan the top 1K of base RAM
5464 - * 3) Scan the 64K of bios
5466 - if (smp_scan_config(0x0,0x400) ||
5467 - smp_scan_config(639*0x400,0x400) ||
5468 - smp_scan_config(0xF0000,0x10000))
5471 - * If it is an SMP machine we should know now.
5473 - * there is a real-mode segmented pointer pointing to the
5474 - * 4K EBDA area at 0x40E, calculate and scan it here.
5476 - * NOTE! There are Linux loaders that will corrupt the EBDA
5477 - * area, and as such this kind of SMP config may be less
5478 - * trustworthy, simply because the SMP table may have been
5479 - * stomped on during early boot. These loaders are buggy and
5480 - * should be fixed.
5483 - address = *(unsigned short *)phys_to_virt(0x40E);
5485 - if (smp_scan_config(address, 0x1000))
5488 - /* If we have come this far, we did not find an MP table */
5489 - printk(KERN_INFO "No mptable found.\n");
5492 -/* --------------------------------------------------------------------------
5493 - ACPI-based MP Configuration
5494 - -------------------------------------------------------------------------- */
5498 -void __init mp_register_lapic_address(u64 address)
5501 - mp_lapic_addr = (unsigned long) address;
5502 - set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
5503 - if (boot_cpu_id == -1U)
5504 - boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
5508 -void __cpuinit mp_register_lapic (u8 id, u8 enabled)
5510 - struct mpc_config_processor processor;
5513 - if (id == boot_cpu_id)
5517 - processor.mpc_type = MP_PROCESSOR;
5518 - processor.mpc_apicid = id;
5519 - processor.mpc_apicver = 0;
5520 - processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
5521 - processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
5522 - processor.mpc_cpufeature = 0;
5523 - processor.mpc_featureflag = 0;
5524 - processor.mpc_reserved[0] = 0;
5525 - processor.mpc_reserved[1] = 0;
5528 - MP_processor_info(&processor);
5531 -#define MP_ISA_BUS 0
5532 -#define MP_MAX_IOAPIC_PIN 127
5534 -static struct mp_ioapic_routing {
5538 - u32 pin_programmed[4];
5539 -} mp_ioapic_routing[MAX_IO_APICS];
5541 -static int mp_find_ioapic(int gsi)
5545 - /* Find the IOAPIC that manages this GSI. */
5546 - for (i = 0; i < nr_ioapics; i++) {
5547 - if ((gsi >= mp_ioapic_routing[i].gsi_start)
5548 - && (gsi <= mp_ioapic_routing[i].gsi_end))
5552 - printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
5556 -static u8 uniq_ioapic_id(u8 id)
5559 - DECLARE_BITMAP(used, 256);
5560 - bitmap_zero(used, 256);
5561 - for (i = 0; i < nr_ioapics; i++) {
5562 - struct mpc_config_ioapic *ia = &mp_ioapics[i];
5563 - __set_bit(ia->mpc_apicid, used);
5565 - if (!test_bit(id, used))
5567 - return find_first_zero_bit(used, 256);
5570 -void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
5574 - if (bad_ioapic(address))
5579 - mp_ioapics[idx].mpc_type = MP_IOAPIC;
5580 - mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
5581 - mp_ioapics[idx].mpc_apicaddr = address;
5584 - set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
5586 - mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
5587 - mp_ioapics[idx].mpc_apicver = 0;
5590 - * Build basic IRQ lookup table to facilitate gsi->io_apic lookups
5591 - * and to prevent reprogramming of IOAPIC pins (PCI IRQs).
5593 - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
5594 - mp_ioapic_routing[idx].gsi_start = gsi_base;
5595 - mp_ioapic_routing[idx].gsi_end = gsi_base +
5596 - io_apic_get_redir_entries(idx);
5598 - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, "
5599 - "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
5600 - mp_ioapics[idx].mpc_apicaddr,
5601 - mp_ioapic_routing[idx].gsi_start,
5602 - mp_ioapic_routing[idx].gsi_end);
5608 -mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
5610 - struct mpc_config_intsrc intsrc;
5615 - * Convert 'gsi' to 'ioapic.pin'.
5617 - ioapic = mp_find_ioapic(gsi);
5620 - pin = gsi - mp_ioapic_routing[ioapic].gsi_start;
5623 - * TBD: This check is for faulty timer entries, where the override
5624 - * erroneously sets the trigger to level, resulting in a HUGE
5625 - * increase of timer interrupts!
5627 - if ((bus_irq == 0) && (trigger == 3))
5630 - intsrc.mpc_type = MP_INTSRC;
5631 - intsrc.mpc_irqtype = mp_INT;
5632 - intsrc.mpc_irqflag = (trigger << 2) | polarity;
5633 - intsrc.mpc_srcbus = MP_ISA_BUS;
5634 - intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
5635 - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */
5636 - intsrc.mpc_dstirq = pin; /* INTIN# */
5638 - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
5639 - intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
5640 - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
5641 - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
5643 - mp_irqs[mp_irq_entries] = intsrc;
5644 - if (++mp_irq_entries == MAX_IRQ_SOURCES)
5645 - panic("Max # of irq sources exceeded!\n");
5648 -void __init mp_config_acpi_legacy_irqs(void)
5650 - struct mpc_config_intsrc intsrc;
5655 - * Fabricate the legacy ISA bus (bus #31).
5657 - set_bit(MP_ISA_BUS, mp_bus_not_pci);
5660 - * Locate the IOAPIC that manages the ISA IRQs (0-15).
5662 - ioapic = mp_find_ioapic(0);
5666 - intsrc.mpc_type = MP_INTSRC;
5667 - intsrc.mpc_irqflag = 0; /* Conforming */
5668 - intsrc.mpc_srcbus = MP_ISA_BUS;
5669 - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
5672 - * Use the default configuration for the IRQs 0-15. Unless
5673 - * overridden by (MADT) interrupt source override entries.
5675 - for (i = 0; i < 16; i++) {
5678 - for (idx = 0; idx < mp_irq_entries; idx++) {
5679 - struct mpc_config_intsrc *irq = mp_irqs + idx;
5681 - /* Do we already have a mapping for this ISA IRQ? */
5682 - if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
5685 - /* Do we already have a mapping for this IOAPIC pin */
5686 - if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
5687 - (irq->mpc_dstirq == i))
5691 - if (idx != mp_irq_entries) {
5692 - printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
5693 - continue; /* IRQ already used */
5696 - intsrc.mpc_irqtype = mp_INT;
5697 - intsrc.mpc_srcbusirq = i; /* Identity mapped */
5698 - intsrc.mpc_dstirq = i;
5700 - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
5701 - "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
5702 - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
5703 - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic,
5704 - intsrc.mpc_dstirq);
5706 - mp_irqs[mp_irq_entries] = intsrc;
5707 - if (++mp_irq_entries == MAX_IRQ_SOURCES)
5708 - panic("Max # of irq sources exceeded!\n");
5712 -int mp_register_gsi(u32 gsi, int triggering, int polarity)
5715 - int ioapic_pin = 0;
5718 - if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
5721 - /* Don't set up the ACPI SCI because it's already set up */
5722 - if (acpi_gbl_FADT.sci_interrupt == gsi)
5725 - ioapic = mp_find_ioapic(gsi);
5727 - printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
5731 - ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_start;
5734 - * Avoid pin reprogramming. PRTs typically include entries
5735 - * with redundant pin->gsi mappings (but unique PCI devices);
5736 - * we only program the IOAPIC on the first.
5738 - bit = ioapic_pin % 32;
5739 - idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
5741 - printk(KERN_ERR "Invalid reference to IOAPIC pin "
5742 - "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
5746 - if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
5747 - Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
5748 - mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
5752 - mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
5754 - io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
5755 - triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
5756 - polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
5759 -#endif /*CONFIG_ACPI*/
5760 --- sle11-2009-10-16.orig/arch/x86/kernel/pci-dma-xen.c 2009-03-16 16:33:40.000000000 +0100
5761 +++ sle11-2009-10-16/arch/x86/kernel/pci-dma-xen.c 2009-10-22 11:31:59.000000000 +0200
5764 - * Dynamic DMA mapping support.
5766 - * On i386 there is no hardware dynamic DMA address translation,
5767 - * so consistent alloc/free are merely page allocation/freeing.
5768 - * The rest of the dynamic DMA mapping interface is implemented
5772 -#include <linux/types.h>
5773 -#include <linux/mm.h>
5774 -#include <linux/string.h>
5775 +#include <linux/dma-mapping.h>
5776 +#include <linux/dmar.h>
5777 +#include <linux/bootmem.h>
5778 #include <linux/pci.h>
5779 -#include <linux/module.h>
5780 -#include <linux/version.h>
5781 -#include <asm/io.h>
5782 -#include <xen/balloon.h>
5783 -#include <xen/gnttab.h>
5784 -#include <asm/swiotlb.h>
5785 -#include <asm/tlbflush.h>
5786 -#include <asm/swiotlb_32.h>
5787 -#include <asm/gnttab_dma.h>
5788 -#include <asm/bug.h>
5791 -#include <asm/iommu.h>
5792 +#include <asm/proto.h>
5793 +#include <asm/dma.h>
5794 +#include <asm/gart.h>
5795 +#include <asm/calgary.h>
5797 +int forbid_dac __read_mostly;
5798 +EXPORT_SYMBOL(forbid_dac);
5800 +const struct dma_mapping_ops *dma_ops;
5801 +EXPORT_SYMBOL(dma_ops);
5803 +static int iommu_sac_force __read_mostly;
5805 +#ifdef CONFIG_IOMMU_DEBUG
5806 +int panic_on_overflow __read_mostly = 1;
5807 +int force_iommu __read_mostly = 1;
5809 +int panic_on_overflow __read_mostly = 0;
5810 +int force_iommu __read_mostly = 0;
5813 int iommu_merge __read_mostly = 0;
5814 -EXPORT_SYMBOL(iommu_merge);
5816 -dma_addr_t bad_dma_address __read_mostly;
5817 -EXPORT_SYMBOL(bad_dma_address);
5818 +int no_iommu __read_mostly;
5819 +/* Set this to 1 if there is a HW IOMMU in the system */
5820 +int iommu_detected __read_mostly = 0;
5822 /* This tells the BIO block layer to assume merging. Default to off
5823 because we cannot guarantee merging later. */
5824 int iommu_bio_merge __read_mostly = 0;
5825 EXPORT_SYMBOL(iommu_bio_merge);
5827 -int force_iommu __read_mostly= 0;
5828 +dma_addr_t bad_dma_address __read_mostly = 0;
5829 +EXPORT_SYMBOL(bad_dma_address);
5831 -__init int iommu_setup(char *p)
5835 +/* Dummy device used for NULL arguments (normally ISA). Better would
5836 + be probably a smaller DMA mask, but this is bug-to-bug compatible
5838 +struct device fallback_dev = {
5839 + .bus_id = "fallback device",
5840 + .coherent_dma_mask = DMA_32BIT_MASK,
5841 + .dma_mask = &fallback_dev.coherent_dma_mask,
5844 -void __init pci_iommu_alloc(void)
5845 +int dma_set_mask(struct device *dev, u64 mask)
5847 -#ifdef CONFIG_SWIOTLB
5848 - pci_swiotlb_init();
5851 + if (!dev->dma_mask || !dma_supported(dev, mask))
5854 + *dev->dma_mask = mask;
5856 -static int __init pci_iommu_init(void)
5861 +EXPORT_SYMBOL(dma_set_mask);
5863 -/* Must execute after PCI subsystem */
5864 -fs_initcall(pci_iommu_init);
5867 -struct dma_coherent_mem {
5872 - unsigned long *bitmap;
5875 -#define IOMMU_BUG_ON(test) \
5877 - if (unlikely(test)) { \
5878 - printk(KERN_ALERT "Fatal DMA error! " \
5879 - "Please use 'swiotlb=force'\n"); \
5883 +#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
5884 +static __initdata void *dma32_bootmem_ptr;
5885 +static unsigned long dma32_bootmem_size __initdata = (128ULL<<20);
5887 -static int check_pages_physically_contiguous(unsigned long pfn,
5888 - unsigned int offset,
5890 +static int __init parse_dma32_size_opt(char *p)
5892 - unsigned long next_mfn;
5896 - next_mfn = pfn_to_mfn(pfn);
5897 - nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT;
5899 - for (i = 1; i < nr_pages; i++) {
5900 - if (pfn_to_mfn(++pfn) != ++next_mfn)
5906 + dma32_bootmem_size = memparse(p, &p);
5909 +early_param("dma32_size", parse_dma32_size_opt);
5911 -int range_straddles_page_boundary(paddr_t p, size_t size)
5912 +void __init dma32_reserve_bootmem(void)
5914 - unsigned long pfn = p >> PAGE_SHIFT;
5915 - unsigned int offset = p & ~PAGE_MASK;
5916 + unsigned long size, align;
5917 + if (end_pfn <= MAX_DMA32_PFN)
5920 - return ((offset + size > PAGE_SIZE) &&
5921 - !check_pages_physically_contiguous(pfn, offset, size));
5922 + align = 64ULL<<20;
5923 + size = round_up(dma32_bootmem_size, align);
5924 + dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
5925 + __pa(MAX_DMA_ADDRESS));
5926 + if (dma32_bootmem_ptr)
5927 + dma32_bootmem_size = size;
5929 + dma32_bootmem_size = 0;
5933 -dma_map_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
5934 - enum dma_data_direction direction)
5935 +static void __init dma32_free_bootmem(void)
5940 + if (end_pfn <= MAX_DMA32_PFN)
5943 - BUG_ON(!valid_dma_direction(direction));
5944 - WARN_ON(nents == 0 || sgl->length == 0);
5945 + if (!dma32_bootmem_ptr)
5949 - rc = swiotlb_map_sg(hwdev, sgl, nents, direction);
5951 - struct scatterlist *sg;
5953 - for_each_sg(sgl, sg, nents, i) {
5954 - BUG_ON(!sg_page(sg));
5956 - gnttab_dma_map_page(sg_page(sg)) + sg->offset;
5957 - sg->dma_length = sg->length;
5958 - IOMMU_BUG_ON(address_needs_mapping(
5959 - hwdev, sg->dma_address));
5960 - IOMMU_BUG_ON(range_straddles_page_boundary(
5961 - page_to_pseudophys(sg_page(sg)) + sg->offset,
5966 + for_each_online_node(node)
5967 + free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr),
5968 + dma32_bootmem_size);
5970 - flush_write_buffers();
5972 + dma32_bootmem_ptr = NULL;
5973 + dma32_bootmem_size = 0;
5975 -EXPORT_SYMBOL(dma_map_sg);
5977 +#define dma32_free_bootmem() ((void)0)
5981 -dma_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
5982 - enum dma_data_direction direction)
5985 +static const struct dma_mapping_ops swiotlb_dma_ops = {
5986 + .mapping_error = swiotlb_dma_mapping_error,
5987 + .map_single = swiotlb_map_single_phys,
5988 + .unmap_single = swiotlb_unmap_single,
5989 + .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
5990 + .sync_single_for_device = swiotlb_sync_single_for_device,
5991 + .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
5992 + .sync_single_range_for_device = swiotlb_sync_single_range_for_device,
5993 + .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
5994 + .sync_sg_for_device = swiotlb_sync_sg_for_device,
5995 + .map_sg = swiotlb_map_sg,
5996 + .unmap_sg = swiotlb_unmap_sg,
5997 + .dma_supported = swiotlb_dma_supported
6000 - BUG_ON(!valid_dma_direction(direction));
6002 - swiotlb_unmap_sg(hwdev, sgl, nents, direction);
6004 - struct scatterlist *sg;
6005 +void __init pci_iommu_alloc(void)
6007 + /* free the range so iommu could get some range less than 4G */
6008 + dma32_free_bootmem();
6010 + * The order of these functions is important for
6011 + * fall-back/fail-over reasons
6013 +#ifdef CONFIG_GART_IOMMU
6014 + gart_iommu_hole_init();
6017 - for_each_sg(sgl, sg, nents, i)
6018 - gnttab_dma_unmap_page(sg->dma_address);
6021 -EXPORT_SYMBOL(dma_unmap_sg);
6022 +#ifdef CONFIG_CALGARY_IOMMU
6026 -#ifdef CONFIG_HIGHMEM
6028 -dma_map_page(struct device *dev, struct page *page, unsigned long offset,
6029 - size_t size, enum dma_data_direction direction)
6031 - dma_addr_t dma_addr;
6032 + detect_intel_iommu();
6034 - BUG_ON(!valid_dma_direction(direction));
6035 +#ifdef CONFIG_SWIOTLB
6038 - dma_addr = swiotlb_map_page(
6039 - dev, page, offset, size, direction);
6041 - dma_addr = gnttab_dma_map_page(page) + offset;
6042 - IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr));
6043 + printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
6044 + dma_ops = &swiotlb_dma_ops;
6050 -EXPORT_SYMBOL(dma_map_page);
6053 -dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
6054 - enum dma_data_direction direction)
6056 + * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
6059 +static __init int iommu_setup(char *p)
6061 - BUG_ON(!valid_dma_direction(direction));
6063 - swiotlb_unmap_page(dev, dma_address, size, direction);
6065 - gnttab_dma_unmap_page(dma_address);
6067 -EXPORT_SYMBOL(dma_unmap_page);
6068 -#endif /* CONFIG_HIGHMEM */
6072 -dma_mapping_error(dma_addr_t dma_addr)
6075 - return swiotlb_dma_mapping_error(dma_addr);
6078 -EXPORT_SYMBOL(dma_mapping_error);
6083 -dma_supported(struct device *dev, u64 mask)
6086 - return swiotlb_dma_supported(dev, mask);
6088 - * By default we'll BUG when an infeasible DMA is requested, and
6089 - * request swiotlb=force (see IOMMU_BUG_ON).
6093 -EXPORT_SYMBOL(dma_supported);
6095 + if (!strncmp(p, "off", 3))
6097 + /* gart_parse_options has more force support */
6098 + if (!strncmp(p, "force", 5))
6100 + if (!strncmp(p, "noforce", 7)) {
6105 -void *dma_alloc_coherent(struct device *dev, size_t size,
6106 - dma_addr_t *dma_handle, gfp_t gfp)
6109 - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
6110 - unsigned int order = get_order(size);
6111 - unsigned long vstart;
6113 + if (!strncmp(p, "biomerge", 8)) {
6114 + iommu_bio_merge = 4096;
6118 + if (!strncmp(p, "panic", 5))
6119 + panic_on_overflow = 1;
6120 + if (!strncmp(p, "nopanic", 7))
6121 + panic_on_overflow = 0;
6122 + if (!strncmp(p, "merge", 5)) {
6126 + if (!strncmp(p, "nomerge", 7))
6128 + if (!strncmp(p, "forcesac", 8))
6129 + iommu_sac_force = 1;
6130 + if (!strncmp(p, "allowdac", 8))
6132 + if (!strncmp(p, "nodac", 5))
6134 + if (!strncmp(p, "usedac", 6)) {
6138 +#ifdef CONFIG_SWIOTLB
6139 + if (!strncmp(p, "soft", 4))
6143 - /* ignore region specifiers */
6144 - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
6145 +#ifdef CONFIG_GART_IOMMU
6146 + gart_parse_options(p);
6150 - int page = bitmap_find_free_region(mem->bitmap, mem->size,
6153 - *dma_handle = mem->device_base + (page << PAGE_SHIFT);
6154 - ret = mem->virt_base + (page << PAGE_SHIFT);
6155 - memset(ret, 0, size);
6158 - if (mem->flags & DMA_MEMORY_EXCLUSIVE)
6160 +#ifdef CONFIG_CALGARY_IOMMU
6161 + if (!strncmp(p, "calgary", 7))
6163 +#endif /* CONFIG_CALGARY_IOMMU */
6165 + p += strcspn(p, ",");
6171 +early_param("iommu", iommu_setup);
6173 - if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
6176 - vstart = __get_free_pages(gfp, order);
6177 - ret = (void *)vstart;
6178 +static int check_pages_physically_contiguous(unsigned long pfn,
6179 + unsigned int offset,
6182 + unsigned long next_mfn;
6186 - if (dev != NULL && dev->coherent_dma_mask)
6187 - mask = dev->coherent_dma_mask;
6189 - mask = 0xffffffff;
6190 + next_mfn = pfn_to_mfn(pfn);
6191 + nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT;
6193 - if (ret != NULL) {
6194 - if (xen_create_contiguous_region(vstart, order,
6195 - fls64(mask)) != 0) {
6196 - free_pages(vstart, order);
6199 - memset(ret, 0, size);
6200 - *dma_handle = virt_to_bus(ret);
6201 + for (i = 1; i < nr_pages; i++) {
6202 + if (pfn_to_mfn(++pfn) != ++next_mfn)
6208 -EXPORT_SYMBOL(dma_alloc_coherent);
6210 -void dma_free_coherent(struct device *dev, size_t size,
6211 - void *vaddr, dma_addr_t dma_handle)
6212 +int range_straddles_page_boundary(paddr_t p, size_t size)
6214 - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
6215 - int order = get_order(size);
6217 - WARN_ON(irqs_disabled()); /* for portability */
6218 - if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) {
6219 - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
6220 + unsigned long pfn = p >> PAGE_SHIFT;
6221 + unsigned int offset = p & ~PAGE_MASK;
6223 - bitmap_release_region(mem->bitmap, page, order);
6225 - xen_destroy_contiguous_region((unsigned long)vaddr, order);
6226 - free_pages((unsigned long)vaddr, order);
6228 + return ((offset + size > PAGE_SIZE) &&
6229 + !check_pages_physically_contiguous(pfn, offset, size));
6231 -EXPORT_SYMBOL(dma_free_coherent);
6233 -#ifdef ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
6234 +#ifdef CONFIG_X86_32
6235 int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
6236 dma_addr_t device_addr, size_t size, int flags)
6238 @@ -327,8 +295,8 @@ EXPORT_SYMBOL(dma_declare_coherent_memor
6239 void dma_release_declared_memory(struct device *dev)
6241 struct dma_coherent_mem *mem = dev->dma_mem;
6247 dev->dma_mem = NULL;
6248 iounmap(mem->virt_base);
6249 @@ -341,8 +309,10 @@ void *dma_mark_declared_memory_occupied(
6250 dma_addr_t device_addr, size_t size)
6252 struct dma_coherent_mem *mem = dev->dma_mem;
6253 - int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
6255 + int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1);
6257 + pages >>= PAGE_SHIFT;
6260 return ERR_PTR(-EINVAL);
6261 @@ -354,103 +324,270 @@ void *dma_mark_declared_memory_occupied(
6262 return mem->virt_base + (pos << PAGE_SHIFT);
6264 EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
6265 -#endif /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */
6267 -#if defined(CONFIG_PCI) && !defined(CONFIG_XEN)
6268 -/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
6271 -EXPORT_SYMBOL(forbid_dac);
6273 -static __devinit void via_no_dac(struct pci_dev *dev)
6274 +static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size,
6275 + dma_addr_t *dma_handle, void **ret)
6277 - if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
6278 - printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n");
6280 + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
6281 + int order = get_order(size);
6284 + int page = bitmap_find_free_region(mem->bitmap, mem->size,
6287 + *dma_handle = mem->device_base + (page << PAGE_SHIFT);
6288 + *ret = mem->virt_base + (page << PAGE_SHIFT);
6289 + memset(*ret, 0, size);
6291 + if (mem->flags & DMA_MEMORY_EXCLUSIVE)
6294 + return (mem != NULL);
6296 -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
6298 -static int check_iommu(char *s)
6299 +static int dma_release_coherent(struct device *dev, int order, void *vaddr)
6301 - if (!strcmp(s, "usedac")) {
6303 + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
6305 + if (mem && vaddr >= mem->virt_base && vaddr <
6306 + (mem->virt_base + (mem->size << PAGE_SHIFT))) {
6307 + int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
6309 + bitmap_release_region(mem->bitmap, page, order);
6314 -__setup("iommu=", check_iommu);
6316 +#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0)
6317 +#define dma_release_coherent(dev, order, vaddr) (0)
6318 +#endif /* CONFIG_X86_32 */
6320 +int dma_supported(struct device *dev, u64 mask)
6323 + if (mask > 0xffffffff && forbid_dac > 0) {
6324 + printk(KERN_INFO "PCI: Disallowing DAC for device %s\n",
6331 -dma_map_single(struct device *dev, void *ptr, size_t size,
6332 - enum dma_data_direction direction)
6333 + if (dma_ops->dma_supported)
6334 + return dma_ops->dma_supported(dev, mask);
6336 + /* Copied from i386. Doesn't make much sense, because it will
6337 + only work for pci_alloc_coherent.
6338 + The caller just has to use GFP_DMA in this case. */
6339 + if (mask < DMA_24BIT_MASK)
6342 + /* Tell the device to use SAC when IOMMU force is on. This
6343 + allows the driver to use cheaper accesses in some cases.
6345 + Problem with this is that if we overflow the IOMMU area and
6346 + return DAC as fallback address the device may not handle it
6349 + As a special case some controllers have a 39bit address
6350 + mode that is as efficient as 32bit (aic79xx). Don't force
6351 + SAC for these. Assume all masks <= 40 bits are of this
6352 + type. Normally this doesn't make any difference, but gives
6353 + more gentle handling of IOMMU overflow. */
6354 + if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
6355 + printk(KERN_INFO "%s: Force SAC with mask %Lx\n",
6356 + dev->bus_id, mask);
6362 +EXPORT_SYMBOL(dma_supported);
6364 +/* Allocate DMA memory on node near device */
6365 +static struct page *
6366 +dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
6371 - BUG_ON(!valid_dma_direction(direction));
6372 - WARN_ON(size == 0);
6373 + node = dev_to_node(dev);
6376 - dma = swiotlb_map_single(dev, ptr, size, direction);
6378 - dma = gnttab_dma_map_page(virt_to_page(ptr)) +
6379 - offset_in_page(ptr);
6380 - IOMMU_BUG_ON(range_straddles_page_boundary(__pa(ptr), size));
6381 - IOMMU_BUG_ON(address_needs_mapping(dev, dma));
6384 - flush_write_buffers();
6387 -EXPORT_SYMBOL(dma_map_single);
6390 -dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
6391 - enum dma_data_direction direction)
6393 - BUG_ON(!valid_dma_direction(direction));
6395 - swiotlb_unmap_single(dev, dma_addr, size, direction);
6397 - gnttab_dma_unmap_page(dma_addr);
6398 + return alloc_pages_node(node, gfp, order);
6402 + * Allocate memory for a coherent mapping.
6405 +dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
6408 + void *memory = NULL;
6409 + struct page *page;
6410 + unsigned long dma_mask = 0;
6412 + unsigned int order = get_order(size);
6414 + /* ignore region specifiers */
6415 + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
6417 + if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory))
6421 + dev = &fallback_dev;
6424 + dma_mask = dev->coherent_dma_mask;
6425 + if (dma_mask == 0)
6426 + dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK;
6428 + /* Device not DMA able */
6429 + if (dev->dma_mask == NULL)
6433 + gfp &= ~(__GFP_DMA | __GFP_DMA32);
6435 + /* Don't invoke OOM killer or retry in lower 16MB DMA zone */
6436 + if (gfp & __GFP_DMA)
6439 +#ifdef CONFIG_X86_64
6440 + /* Why <=? Even when the mask is smaller than 4GB it is often
6441 + larger than 16MB and in this case we have a chance of
6442 + finding fitting memory in the next higher zone first. If
6443 + not retry with true GFP_DMA. -AK */
6444 + if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
6450 + page = dma_alloc_pages(dev,
6451 + noretry ? gfp | __GFP_NORETRY : gfp, order);
6458 + dma_addr_t bus = page_to_phys(page);
6459 + memory = page_address(page);
6460 + high = (bus + size) >= dma_mask;
6462 + if (force_iommu && !(gfp & GFP_DMA))
6465 + free_pages((unsigned long)memory, order);
6467 + /* Don't use the 16MB ZONE_DMA unless absolutely
6468 + needed. It's better to use remapping first. */
6469 + if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
6470 + gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
6474 + /* Let low level make its own zone decisions */
6475 + gfp &= ~(GFP_DMA32|GFP_DMA);
6477 + if (dma_ops->alloc_coherent)
6478 + return dma_ops->alloc_coherent(dev, size,
6483 + memset(memory, 0, size);
6485 + *dma_handle = bus;
6490 + if (dma_ops->alloc_coherent) {
6491 + free_pages((unsigned long)memory, order);
6492 + gfp &= ~(GFP_DMA|GFP_DMA32);
6493 + return dma_ops->alloc_coherent(dev, size, dma_handle, gfp);
6496 + if (dma_ops->map_simple) {
6497 + *dma_handle = dma_ops->map_simple(dev, virt_to_bus(memory),
6499 + PCI_DMA_BIDIRECTIONAL);
6500 + if (*dma_handle != bad_dma_address)
6504 + memory = page_address(page);
6505 + if (xen_create_contiguous_region((unsigned long)memory, order,
6506 + fls64(dma_mask)) == 0) {
6507 + memset(memory, 0, size);
6508 + *dma_handle = virt_to_bus(memory);
6513 + if (panic_on_overflow)
6514 + panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",
6515 + (unsigned long)size);
6516 + free_pages((unsigned long)memory, order);
6519 -EXPORT_SYMBOL(dma_unmap_single);
6520 +EXPORT_SYMBOL(dma_alloc_coherent);
6523 -dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
6524 - enum dma_data_direction direction)
6526 + * Unmap coherent memory.
6527 + * The caller must ensure that the device has finished accessing the mapping.
6529 +void dma_free_coherent(struct device *dev, size_t size,
6530 + void *vaddr, dma_addr_t bus)
6533 - swiotlb_sync_single_for_cpu(dev, dma_handle, size, direction);
6534 + int order = get_order(size);
6535 + WARN_ON(irqs_disabled()); /* for portability */
6536 + if (dma_release_coherent(dev, order, vaddr))
6539 + if (dma_ops->unmap_single)
6540 + dma_ops->unmap_single(dev, bus, size, 0);
6542 + xen_destroy_contiguous_region((unsigned long)vaddr, order);
6543 + free_pages((unsigned long)vaddr, order);
6545 -EXPORT_SYMBOL(dma_sync_single_for_cpu);
6546 +EXPORT_SYMBOL(dma_free_coherent);
6549 -dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
6550 - enum dma_data_direction direction)
6551 +static int __init pci_iommu_init(void)
6554 - swiotlb_sync_single_for_device(dev, dma_handle, size, direction);
6555 +#ifdef CONFIG_CALGARY_IOMMU
6556 + calgary_iommu_init();
6559 + intel_iommu_init();
6561 +#ifdef CONFIG_GART_IOMMU
6562 + gart_iommu_init();
6568 -EXPORT_SYMBOL(dma_sync_single_for_device);
6571 -dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
6572 - enum dma_data_direction direction)
6573 +void pci_iommu_shutdown(void)
6576 - swiotlb_sync_sg_for_cpu(dev,sg,nelems,direction);
6577 - flush_write_buffers();
6578 + gart_iommu_shutdown();
6580 -EXPORT_SYMBOL(dma_sync_sg_for_cpu);
6581 +/* Must execute after PCI subsystem */
6582 +fs_initcall(pci_iommu_init);
6585 +/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
6588 -dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
6589 - enum dma_data_direction direction)
6590 +static __devinit void via_no_dac(struct pci_dev *dev)
6593 - swiotlb_sync_sg_for_device(dev,sg,nelems,direction);
6594 - flush_write_buffers();
6595 + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
6596 + printk(KERN_INFO "PCI: VIA PCI bridge detected."
6597 + "Disabling DAC.\n");
6601 -EXPORT_SYMBOL(dma_sync_sg_for_device);
6602 +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
6604 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
6605 +++ sle11-2009-10-16/arch/x86/kernel/pci-nommu-xen.c 2009-03-16 16:38:05.000000000 +0100
6607 +#include <linux/dma-mapping.h>
6608 +#include <linux/dmar.h>
6609 +#include <linux/bootmem.h>
6610 +#include <linux/pci.h>
6612 +#include <xen/gnttab.h>
6614 +#include <asm/proto.h>
6615 +#include <asm/dma.h>
6616 +#include <asm/swiotlb.h>
6617 +#include <asm/tlbflush.h>
6618 +#include <asm/gnttab_dma.h>
6619 +#include <asm/bug.h>
6621 +#define IOMMU_BUG_ON(test) \
6623 + if (unlikely(test)) { \
6624 + printk(KERN_ALERT "Fatal DMA error! " \
6625 + "Please use 'swiotlb=force'\n"); \
6631 +gnttab_map_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
6635 + struct scatterlist *sg;
6637 + WARN_ON(nents == 0 || sgl->length == 0);
6639 + for_each_sg(sgl, sg, nents, i) {
6640 + BUG_ON(!sg_page(sg));
6642 + gnttab_dma_map_page(sg_page(sg)) + sg->offset;
6643 + sg->dma_length = sg->length;
6644 + IOMMU_BUG_ON(address_needs_mapping(
6645 + hwdev, sg->dma_address));
6646 + IOMMU_BUG_ON(range_straddles_page_boundary(
6647 + page_to_pseudophys(sg_page(sg)) + sg->offset,
6655 +gnttab_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
6659 + struct scatterlist *sg;
6661 + for_each_sg(sgl, sg, nents, i)
6662 + gnttab_dma_unmap_page(sg->dma_address);
6666 +gnttab_map_single(struct device *dev, phys_addr_t paddr, size_t size,
6671 + WARN_ON(size == 0);
6673 + dma = gnttab_dma_map_page(pfn_to_page(paddr >> PAGE_SHIFT)) +
6674 + offset_in_page(paddr);
6675 + IOMMU_BUG_ON(range_straddles_page_boundary(paddr, size));
6676 + IOMMU_BUG_ON(address_needs_mapping(dev, dma));
6682 +gnttab_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
6685 + gnttab_dma_unmap_page(dma_addr);
6688 +static int nommu_mapping_error(dma_addr_t dma_addr)
6690 + return (dma_addr == bad_dma_address);
6693 +static const struct dma_mapping_ops nommu_dma_ops = {
6694 + .map_single = gnttab_map_single,
6695 + .unmap_single = gnttab_unmap_single,
6696 + .map_sg = gnttab_map_sg,
6697 + .unmap_sg = gnttab_unmap_sg,
6698 + .dma_supported = swiotlb_dma_supported,
6699 + .mapping_error = nommu_mapping_error
6702 +void __init no_iommu_init(void)
6707 + force_iommu = 0; /* no HW IOMMU */
6708 + dma_ops = &nommu_dma_ops;
6710 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
6711 +++ sle11-2009-10-16/arch/x86/kernel/process-xen.c 2009-03-16 16:38:05.000000000 +0100
6713 +#include <linux/errno.h>
6714 +#include <linux/kernel.h>
6715 +#include <linux/mm.h>
6716 +#include <linux/smp.h>
6717 +#include <linux/slab.h>
6718 +#include <linux/sched.h>
6719 +#include <linux/module.h>
6720 +#include <linux/pm.h>
6722 +struct kmem_cache *task_xstate_cachep;
6724 +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
6727 + if (src->thread.xstate) {
6728 + dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
6730 + if (!dst->thread.xstate)
6732 + WARN_ON((unsigned long)dst->thread.xstate & 15);
6733 + memcpy(dst->thread.xstate, src->thread.xstate, xstate_size);
6738 +void free_thread_xstate(struct task_struct *tsk)
6740 + if (tsk->thread.xstate) {
6741 + kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
6742 + tsk->thread.xstate = NULL;
6746 +void free_thread_info(struct thread_info *ti)
6748 + free_thread_xstate(ti->task);
6749 + free_pages((unsigned long)ti, get_order(THREAD_SIZE));
6752 +void arch_task_cache_init(void)
6754 + task_xstate_cachep =
6755 + kmem_cache_create("task_xstate", xstate_size,
6756 + __alignof__(union thread_xstate),
6757 + SLAB_PANIC, NULL);
6760 +static void do_nothing(void *unused)
6765 + * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
6766 + * pm_idle and update to new pm_idle value. Required while changing pm_idle
6767 + * handler on SMP systems.
6769 + * Caller must have changed pm_idle to the new value before the call. Old
6770 + * pm_idle value will not be used by any CPU after the return of this function.
6772 +void cpu_idle_wait(void)
6775 + /* kick all the CPUs so that they exit out of pm_idle */
6776 + smp_call_function(do_nothing, NULL, 0, 1);
6778 +EXPORT_SYMBOL_GPL(cpu_idle_wait);
6782 + * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
6783 + * which can obviate IPI to trigger checking of need_resched.
6784 + * We execute MONITOR against need_resched and enter optimized wait state
6785 + * through MWAIT. Whenever someone changes need_resched, we would be woken
6786 + * up from MWAIT (without an IPI).
6788 + * New with Core Duo processors, MWAIT can take some hints based on CPU
6791 +void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
6793 + if (!need_resched()) {
6794 + __monitor((void *)¤t_thread_info()->flags, 0, 0);
6796 + if (!need_resched())
6801 +/* Default MONITOR/MWAIT with no hints, used for default C1 state */
6802 +static void mwait_idle(void)
6804 + if (!need_resched()) {
6805 + __monitor((void *)¤t_thread_info()->flags, 0, 0);
6807 + if (!need_resched())
6808 + __sti_mwait(0, 0);
6810 + local_irq_enable();
6812 + local_irq_enable();
6817 + * On SMP it's slightly faster (but much more power-consuming!)
6818 + * to poll the ->work.need_resched flag instead of waiting for the
6819 + * cross-CPU IPI to arrive. Use this option with caution.
6821 +static void poll_idle(void)
6823 + local_irq_enable();
6829 + * mwait selection logic:
6831 + * It depends on the CPU. For AMD CPUs that support MWAIT this is
6832 + * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings
6833 + * then depend on a clock divisor and current Pstate of the core. If
6834 + * all cores of a processor are in halt state (C1) the processor can
6835 + * enter the C1E (C1 enhanced) state. If mwait is used this will never
6838 + * idle=mwait overrides this decision and forces the usage of mwait.
6840 +static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
6845 + if (c->x86_vendor == X86_VENDOR_AMD) {
6856 +void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
6859 + static int selected;
6863 +#ifdef CONFIG_X86_SMP
6864 + if (pm_idle == poll_idle && smp_num_siblings > 1) {
6865 + printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
6866 + " performance may degrade.\n");
6869 + if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
6871 + * Skip, if setup has overridden idle.
6872 + * One CPU supports mwait => All CPUs supports mwait
6875 + printk(KERN_INFO "using mwait in idle threads.\n");
6876 + pm_idle = mwait_idle;
6883 +static int __init idle_setup(char *str)
6885 + if (!strcmp(str, "poll")) {
6886 + printk("using polling idle threads.\n");
6887 + pm_idle = poll_idle;
6890 + else if (!strcmp(str, "mwait"))
6896 + boot_option_idle_override = 1;
6899 +early_param("idle", idle_setup);
6901 --- sle11-2009-10-16.orig/arch/x86/kernel/process_32-xen.c 2009-03-16 16:33:40.000000000 +0100
6902 +++ sle11-2009-10-16/arch/x86/kernel/process_32-xen.c 2009-03-16 16:38:05.000000000 +0100
6904 #include <linux/personality.h>
6905 #include <linux/tick.h>
6906 #include <linux/percpu.h>
6907 +#include <linux/prctl.h>
6909 #include <asm/uaccess.h>
6910 #include <asm/pgtable.h>
6912 #include <asm/processor.h>
6913 #include <asm/i387.h>
6914 #include <asm/desc.h>
6915 -#include <asm/vm86.h>
6916 #ifdef CONFIG_MATH_EMULATION
6917 #include <asm/math_emu.h>
6919 @@ -102,16 +102,6 @@ void enable_hlt(void)
6921 EXPORT_SYMBOL(enable_hlt);
6924 - * On SMP it's slightly faster (but much more power-consuming!)
6925 - * to poll the ->work.need_resched flag instead of waiting for the
6926 - * cross-CPU IPI to arrive. Use this option with caution.
6928 -static void poll_idle(void)
6933 static void xen_idle(void)
6935 current_thread_info()->status &= ~TS_POLLING;
6936 @@ -121,20 +111,10 @@ static void xen_idle(void)
6940 - local_irq_disable();
6941 - if (!need_resched()) {
6946 - t0n = ktime_to_ns(t0);
6947 + if (!need_resched())
6948 safe_halt(); /* enables interrupts racelessly */
6949 - local_irq_disable();
6951 - t1n = ktime_to_ns(t1);
6952 - sched_clock_idle_wakeup_event(t1n - t0n);
6954 - local_irq_enable();
6956 + local_irq_enable();
6957 current_thread_info()->status |= TS_POLLING;
6959 #ifdef CONFIG_APM_MODULE
6960 @@ -142,7 +122,6 @@ EXPORT_SYMBOL(default_idle);
6963 #ifdef CONFIG_HOTPLUG_CPU
6964 -extern cpumask_t cpu_initialized;
6965 static inline void play_dead(void)
6968 @@ -187,6 +166,7 @@ void cpu_idle(void)
6969 if (cpu_is_offline(cpu))
6972 + local_irq_disable();
6973 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
6976 @@ -197,44 +177,6 @@ void cpu_idle(void)
6980 -static void do_nothing(void *unused)
6985 - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
6986 - * pm_idle and update to new pm_idle value. Required while changing pm_idle
6987 - * handler on SMP systems.
6989 - * Caller must have changed pm_idle to the new value before the call. Old
6990 - * pm_idle value will not be used by any CPU after the return of this function.
6992 -void cpu_idle_wait(void)
6995 - /* kick all the CPUs so that they exit out of pm_idle */
6996 - smp_call_function(do_nothing, NULL, 0, 1);
6998 -EXPORT_SYMBOL_GPL(cpu_idle_wait);
7000 -void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
7004 -static int __init idle_setup(char *str)
7006 - if (!strcmp(str, "poll")) {
7007 - printk("using polling idle threads.\n");
7008 - pm_idle = poll_idle;
7013 - boot_option_idle_override = 1;
7016 -early_param("idle", idle_setup);
7018 void __show_registers(struct pt_regs *regs, int all)
7020 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
7021 @@ -260,7 +202,7 @@ void __show_registers(struct pt_regs *re
7022 init_utsname()->version);
7024 printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
7025 - 0xffff & regs->cs, regs->ip, regs->flags,
7026 + (u16)regs->cs, regs->ip, regs->flags,
7027 smp_processor_id());
7028 print_symbol("EIP is at %s\n", regs->ip);
7030 @@ -269,8 +211,7 @@ void __show_registers(struct pt_regs *re
7031 printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
7032 regs->si, regs->di, regs->bp, sp);
7033 printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
7034 - regs->ds & 0xffff, regs->es & 0xffff,
7035 - regs->fs & 0xffff, gs, ss);
7036 + (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
7040 @@ -367,6 +308,7 @@ void flush_thread(void)
7042 * Forget coprocessor state..
7044 + tsk->fpu_counter = 0;
7048 @@ -437,11 +379,30 @@ int copy_thread(int nr, unsigned long cl
7052 -#ifdef CONFIG_SECCOMP
7054 +start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
7056 + __asm__("movl %0, %%gs" :: "r"(0));
7059 + regs->ds = __USER_DS;
7060 + regs->es = __USER_DS;
7061 + regs->ss = __USER_DS;
7062 + regs->cs = __USER_CS;
7063 + regs->ip = new_ip;
7064 + regs->sp = new_sp;
7066 + * Free the old FP and other extended state
7068 + free_thread_xstate(current);
7070 +EXPORT_SYMBOL_GPL(start_thread);
7072 static void hard_disable_TSC(void)
7074 write_cr4(read_cr4() | X86_CR4_TSD);
7077 void disable_TSC(void)
7080 @@ -453,11 +414,47 @@ void disable_TSC(void)
7085 static void hard_enable_TSC(void)
7087 write_cr4(read_cr4() & ~X86_CR4_TSD);
7089 -#endif /* CONFIG_SECCOMP */
7091 +static void enable_TSC(void)
7093 + preempt_disable();
7094 + if (test_and_clear_thread_flag(TIF_NOTSC))
7096 + * Must flip the CPU state synchronously with
7097 + * TIF_NOTSC in the current running context.
7099 + hard_enable_TSC();
7103 +int get_tsc_mode(unsigned long adr)
7107 + if (test_thread_flag(TIF_NOTSC))
7108 + val = PR_TSC_SIGSEGV;
7110 + val = PR_TSC_ENABLE;
7112 + return put_user(val, (unsigned int __user *)adr);
7115 +int set_tsc_mode(unsigned int val)
7117 + if (val == PR_TSC_SIGSEGV)
7119 + else if (val == PR_TSC_ENABLE)
7127 static noinline void
7128 __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
7129 @@ -473,12 +470,12 @@ __switch_to_xtra(struct task_struct *pre
7130 /* we clear debugctl to make sure DS
7131 * is not in use when we change it */
7133 - wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
7134 + update_debugctlmsr(0);
7135 wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
7138 if (next->debugctlmsr != debugctl)
7139 - wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0);
7140 + update_debugctlmsr(next->debugctlmsr);
7142 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
7143 set_debugreg(next->debugreg0, 0);
7144 @@ -490,7 +487,6 @@ __switch_to_xtra(struct task_struct *pre
7145 set_debugreg(next->debugreg7, 7);
7148 -#ifdef CONFIG_SECCOMP
7149 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
7150 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
7151 /* prev and next are different */
7152 @@ -499,7 +495,6 @@ __switch_to_xtra(struct task_struct *pre
7159 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
7160 @@ -637,7 +632,7 @@ struct task_struct * __switch_to(struct
7162 /* we're going to use this soon, after a few expensive things */
7163 if (next_p->fpu_counter > 5)
7164 - prefetch(&next->i387.fxsave);
7165 + prefetch(next->xstate);
7168 * Now maybe handle debug registers
7169 @@ -658,8 +653,11 @@ struct task_struct * __switch_to(struct
7170 /* If the task has used fpu the last 5 timeslices, just do a full
7171 * restore of the math state immediately to avoid the trap; the
7172 * chances of needing FPU soon are obviously high now
7174 + * tsk_used_math() checks prevent calling math_state_restore(),
7175 + * which can sleep in the case of !tsk_used_math()
7177 - if (next_p->fpu_counter > 5)
7178 + if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
7179 math_state_restore();
7182 --- sle11-2009-10-16.orig/arch/x86/kernel/process_64-xen.c 2009-03-16 16:33:40.000000000 +0100
7183 +++ sle11-2009-10-16/arch/x86/kernel/process_64-xen.c 2009-03-16 16:38:05.000000000 +0100
7185 #include <linux/kprobes.h>
7186 #include <linux/kdebug.h>
7187 #include <linux/tick.h>
7188 +#include <linux/prctl.h>
7190 #include <asm/uaccess.h>
7191 #include <asm/pgtable.h>
7192 @@ -102,17 +103,6 @@ void exit_idle(void)
7197 - * On SMP it's slightly faster (but much more power-consuming!)
7198 - * to poll the ->need_resched flag instead of waiting for the
7199 - * cross-CPU IPI to arrive. Use this option with caution.
7201 -static void poll_idle(void)
7203 - local_irq_enable();
7207 static void xen_idle(void)
7209 current_thread_info()->status &= ~TS_POLLING;
7210 @@ -121,20 +111,10 @@ static void xen_idle(void)
7211 * test NEED_RESCHED:
7214 - local_irq_disable();
7215 - if (!need_resched()) {
7220 - t0n = ktime_to_ns(t0);
7221 + if (!need_resched())
7222 safe_halt(); /* enables interrupts racelessly */
7223 - local_irq_disable();
7225 - t1n = ktime_to_ns(t1);
7226 - sched_clock_idle_wakeup_event(t1n - t0n);
7228 - local_irq_enable();
7230 + local_irq_enable();
7231 current_thread_info()->status |= TS_POLLING;
7234 @@ -195,45 +175,6 @@ void cpu_idle(void)
7238 -static void do_nothing(void *unused)
7243 - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
7244 - * pm_idle and update to new pm_idle value. Required while changing pm_idle
7245 - * handler on SMP systems.
7247 - * Caller must have changed pm_idle to the new value before the call. Old
7248 - * pm_idle value will not be used by any CPU after the return of this function.
7250 -void cpu_idle_wait(void)
7253 - /* kick all the CPUs so that they exit out of pm_idle */
7254 - smp_call_function(do_nothing, NULL, 0, 1);
7256 -EXPORT_SYMBOL_GPL(cpu_idle_wait);
7258 -void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
7262 -static int __init idle_setup(char *str)
7264 - if (!strcmp(str, "poll")) {
7265 - printk("using polling idle threads.\n");
7266 - pm_idle = poll_idle;
7267 - } else if (!strcmp(str, "mwait"))
7272 - boot_option_idle_override = 1;
7275 -early_param("idle", idle_setup);
7277 /* Prints also some state that isn't saved in the pt_regs */
7278 void __show_regs(struct pt_regs * regs)
7280 @@ -360,6 +301,7 @@ void flush_thread(void)
7282 * Forget coprocessor state..
7284 + tsk->fpu_counter = 0;
7288 @@ -472,6 +414,83 @@ out:
7293 +start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
7295 + asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
7297 + regs->ip = new_ip;
7298 + regs->sp = new_sp;
7299 + write_pda(oldrsp, new_sp);
7300 + regs->cs = __USER_CS;
7301 + regs->ss = __USER_DS;
7302 + regs->flags = 0x200;
7305 + * Free the old FP and other extended state
7307 + free_thread_xstate(current);
7309 +EXPORT_SYMBOL_GPL(start_thread);
7311 +static void hard_disable_TSC(void)
7313 + write_cr4(read_cr4() | X86_CR4_TSD);
7316 +void disable_TSC(void)
7318 + preempt_disable();
7319 + if (!test_and_set_thread_flag(TIF_NOTSC))
7321 + * Must flip the CPU state synchronously with
7322 + * TIF_NOTSC in the current running context.
7324 + hard_disable_TSC();
7328 +static void hard_enable_TSC(void)
7330 + write_cr4(read_cr4() & ~X86_CR4_TSD);
7333 +static void enable_TSC(void)
7335 + preempt_disable();
7336 + if (test_and_clear_thread_flag(TIF_NOTSC))
7338 + * Must flip the CPU state synchronously with
7339 + * TIF_NOTSC in the current running context.
7341 + hard_enable_TSC();
7345 +int get_tsc_mode(unsigned long adr)
7349 + if (test_thread_flag(TIF_NOTSC))
7350 + val = PR_TSC_SIGSEGV;
7352 + val = PR_TSC_ENABLE;
7354 + return put_user(val, (unsigned int __user *)adr);
7357 +int set_tsc_mode(unsigned int val)
7359 + if (val == PR_TSC_SIGSEGV)
7361 + else if (val == PR_TSC_ENABLE)
7370 * This special macro can be used to load a debugging register
7372 @@ -491,12 +510,12 @@ static inline void __switch_to_xtra(stru
7373 /* we clear debugctl to make sure DS
7374 * is not in use when we change it */
7376 - wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
7377 + update_debugctlmsr(0);
7378 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
7381 if (next->debugctlmsr != debugctl)
7382 - wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
7383 + update_debugctlmsr(next->debugctlmsr);
7385 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
7387 @@ -508,6 +527,15 @@ static inline void __switch_to_xtra(stru
7391 + if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
7392 + test_tsk_thread_flag(next_p, TIF_NOTSC)) {
7393 + /* prev and next are different */
7394 + if (test_tsk_thread_flag(next_p, TIF_NOTSC))
7395 + hard_disable_TSC();
7397 + hard_enable_TSC();
7401 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
7402 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
7403 @@ -547,7 +575,7 @@ __switch_to(struct task_struct *prev_p,
7405 /* we're going to use this soon, after a few expensive things */
7406 if (next_p->fpu_counter>5)
7407 - prefetch(&next->i387.fxsave);
7408 + prefetch(next->xstate);
7411 * This is basically '__unlazy_fpu', except that we queue a
7412 @@ -680,8 +708,11 @@ __switch_to(struct task_struct *prev_p,
7413 /* If the task has used fpu the last 5 timeslices, just do a full
7414 * restore of the math state immediately to avoid the trap; the
7415 * chances of needing FPU soon are obviously high now
7417 + * tsk_used_math() checks prevent calling math_state_restore(),
7418 + * which can sleep in the case of !tsk_used_math()
7420 - if (next_p->fpu_counter>5)
7421 + if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
7422 math_state_restore();
7425 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
7426 +++ sle11-2009-10-16/arch/x86/kernel/setup-xen.c 2009-03-16 16:38:05.000000000 +0100
7428 +#include <linux/kernel.h>
7429 +#include <linux/module.h>
7430 +#include <linux/init.h>
7431 +#include <linux/bootmem.h>
7432 +#include <linux/percpu.h>
7433 +#include <asm/smp.h>
7434 +#include <asm/percpu.h>
7435 +#include <asm/sections.h>
7436 +#include <asm/processor.h>
7437 +#include <asm/setup.h>
7438 +#include <asm/topology.h>
7439 +#include <asm/mpspec.h>
7440 +#include <asm/apicdef.h>
7442 +#ifdef CONFIG_X86_LOCAL_APIC
7443 +unsigned int num_processors;
7444 +unsigned disabled_cpus __cpuinitdata;
7445 +/* Processor that is doing the boot up */
7446 +unsigned int boot_cpu_physical_apicid = -1U;
7447 +EXPORT_SYMBOL(boot_cpu_physical_apicid);
7449 +DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
7450 +EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
7452 +/* Bitmask of physically existing CPUs */
7453 +physid_mask_t phys_cpu_present_map;
7456 +#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
7458 + * Copy data used in early init routines from the initial arrays to the
7459 + * per cpu data areas. These arrays then become expendable and the
7460 + * *_early_ptr's are zeroed indicating that the static arrays are gone.
7462 +static void __init setup_per_cpu_maps(void)
7467 + for_each_possible_cpu(cpu) {
7468 + per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu];
7469 + per_cpu(x86_bios_cpu_apicid, cpu) =
7470 + x86_bios_cpu_apicid_init[cpu];
7472 + per_cpu(x86_cpu_to_node_map, cpu) =
7473 + x86_cpu_to_node_map_init[cpu];
7477 + /* indicate the early static arrays will soon be gone */
7478 + x86_cpu_to_apicid_early_ptr = NULL;
7479 + x86_bios_cpu_apicid_early_ptr = NULL;
7481 + x86_cpu_to_node_map_early_ptr = NULL;
7486 +#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
7487 +cpumask_t *cpumask_of_cpu_map __read_mostly;
7488 +EXPORT_SYMBOL(cpumask_of_cpu_map);
7490 +/* requires nr_cpu_ids to be initialized */
7491 +static void __init setup_cpumask_of_cpu(void)
7495 + /* alloc_bootmem zeroes memory */
7496 + cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
7497 + for (i = 0; i < nr_cpu_ids; i++)
7498 + cpu_set(i, cpumask_of_cpu_map[i]);
7501 +static inline void setup_cpumask_of_cpu(void) { }
7504 +#ifdef CONFIG_X86_32
7506 + * Great future not-so-futuristic plan: make i386 and x86_64 do it
7509 +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
7510 +EXPORT_SYMBOL(__per_cpu_offset);
7514 + * Great future plan:
7515 + * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
7516 + * Always point %gs to its beginning
7518 +void __init setup_per_cpu_areas(void)
7520 + int i, highest_cpu = 0;
7521 + unsigned long size;
7523 +#ifdef CONFIG_HOTPLUG_CPU
7524 + prefill_possible_map();
7527 + /* Copy section for each CPU (we discard the original) */
7528 + size = PERCPU_ENOUGH_ROOM;
7529 + printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n",
7532 + for_each_possible_cpu(i) {
7534 +#ifndef CONFIG_NEED_MULTIPLE_NODES
7535 + ptr = alloc_bootmem_pages(size);
7537 + int node = early_cpu_to_node(i);
7538 + if (!node_online(node) || !NODE_DATA(node)) {
7539 + ptr = alloc_bootmem_pages(size);
7541 + "cpu %d has no node or node-local memory\n", i);
7544 + ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
7547 + panic("Cannot allocate cpu data for CPU %d\n", i);
7548 +#ifdef CONFIG_X86_64
7549 + cpu_pda(i)->data_offset = ptr - __per_cpu_start;
7551 + __per_cpu_offset[i] = ptr - __per_cpu_start;
7553 + memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
7558 + nr_cpu_ids = highest_cpu + 1;
7559 + printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids);
7561 + /* Setup percpu data maps */
7562 + setup_per_cpu_maps();
7564 + /* Setup cpumask_of_cpu map */
7565 + setup_cpumask_of_cpu();
7569 --- sle11-2009-10-16.orig/arch/x86/kernel/setup64-xen.c 2009-03-16 16:33:40.000000000 +0100
7570 +++ sle11-2009-10-16/arch/x86/kernel/setup64-xen.c 2009-03-16 16:38:05.000000000 +0100
7572 #include <linux/bootmem.h>
7573 #include <linux/bitops.h>
7574 #include <linux/module.h>
7575 +#include <linux/kgdb.h>
7576 #include <asm/pda.h>
7577 #include <asm/pgtable.h>
7578 #include <asm/processor.h>
7580 #include <asm/proto.h>
7581 #include <asm/sections.h>
7582 #include <asm/setup.h>
7583 +#include <asm/genapic.h>
7585 #include <asm/hypervisor.h>
7587 @@ -81,8 +83,8 @@ int force_personality32 = 0;
7588 Control non executable heap for 32bit processes.
7589 To control the stack too use noexec=off
7591 -on PROT_READ does not imply PROT_EXEC for 32bit processes
7592 -off PROT_READ implies PROT_EXEC (default)
7593 +on PROT_READ does not imply PROT_EXEC for 32bit processes (default)
7594 +off PROT_READ implies PROT_EXEC
7596 static int __init nonx32_setup(char *str)
7598 @@ -94,85 +96,6 @@ static int __init nonx32_setup(char *str
7600 __setup("noexec32=", nonx32_setup);
7603 - * Copy data used in early init routines from the initial arrays to the
7604 - * per cpu data areas. These arrays then become expendable and the
7605 - * *_early_ptr's are zeroed indicating that the static arrays are gone.
7607 -static void __init setup_per_cpu_maps(void)
7612 - for_each_possible_cpu(cpu) {
7614 - if (per_cpu_offset(cpu)) {
7616 - per_cpu(x86_cpu_to_apicid, cpu) =
7617 - x86_cpu_to_apicid_init[cpu];
7618 - per_cpu(x86_bios_cpu_apicid, cpu) =
7619 - x86_bios_cpu_apicid_init[cpu];
7621 - per_cpu(x86_cpu_to_node_map, cpu) =
7622 - x86_cpu_to_node_map_init[cpu];
7627 - printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n",
7632 - /* indicate the early static arrays will soon be gone */
7633 - x86_cpu_to_apicid_early_ptr = NULL;
7634 - x86_bios_cpu_apicid_early_ptr = NULL;
7636 - x86_cpu_to_node_map_early_ptr = NULL;
7642 - * Great future plan:
7643 - * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
7644 - * Always point %gs to its beginning
7646 -void __init setup_per_cpu_areas(void)
7649 - unsigned long size;
7651 -#ifdef CONFIG_HOTPLUG_CPU
7652 - prefill_possible_map();
7655 - /* Copy section for each CPU (we discard the original) */
7656 - size = PERCPU_ENOUGH_ROOM;
7658 - printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
7659 - for_each_cpu_mask (i, cpu_possible_map) {
7661 -#ifndef CONFIG_NEED_MULTIPLE_NODES
7662 - ptr = alloc_bootmem_pages(size);
7664 - int node = early_cpu_to_node(i);
7666 - if (!node_online(node) || !NODE_DATA(node))
7667 - ptr = alloc_bootmem_pages(size);
7669 - ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
7672 - panic("Cannot allocate cpu data for CPU %d\n", i);
7673 - cpu_pda(i)->data_offset = ptr - __per_cpu_start;
7674 - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
7677 - /* setup percpu data maps early */
7678 - setup_per_cpu_maps();
7682 static void __init_refok switch_pt(int cpu)
7684 @@ -410,6 +333,17 @@ void __cpuinit cpu_init (void)
7686 load_LDT(&init_mm.context);
7690 + * If the kgdb is connected no debug regs should be altered. This
7691 + * is only applicable when KGDB and a KGDB I/O module are built
7692 + * into the kernel and you are using early debugging with
7693 + * kgdbwait. KGDB will control the kernel HW breakpoint registers.
7695 + if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
7696 + arch_kgdb_ops.correct_hw_break();
7700 * Clear all 6 debug registers:
7702 @@ -420,10 +354,17 @@ void __cpuinit cpu_init (void)
7703 set_debugreg(0UL, 3);
7704 set_debugreg(0UL, 6);
7705 set_debugreg(0UL, 7);
7707 + /* If the kgdb is connected no debug regs should be altered. */
7713 asm ("pushfq; popq %0" : "=rm" (kernel_eflags));
7714 if (raw_irqs_disabled())
7715 kernel_eflags &= ~X86_EFLAGS_IF;
7717 + if (is_uv_system())
7720 --- sle11-2009-10-16.orig/arch/x86/kernel/setup_32-xen.c 2009-03-16 16:33:40.000000000 +0100
7721 +++ sle11-2009-10-16/arch/x86/kernel/setup_32-xen.c 2009-03-16 16:38:05.000000000 +0100
7723 #include <linux/efi.h>
7724 #include <linux/init.h>
7725 #include <linux/edd.h>
7726 +#include <linux/iscsi_ibft.h>
7727 #include <linux/nodemask.h>
7728 #include <linux/kernel.h>
7729 #include <linux/percpu.h>
7731 #include <linux/pfn.h>
7732 #include <linux/pci.h>
7733 #include <linux/init_ohci1394_dma.h>
7734 +#include <linux/kvm_para.h>
7736 #include <video/edid.h>
7739 #include <xen/firmware.h>
7740 #include <xen/xencons.h>
7741 #include <setup_arch.h>
7742 -#include <bios_ebda.h>
7743 +#include <asm/bios_ebda.h>
7744 #include <asm/cacheflush.h>
7745 +#include <asm/processor.h>
7748 #include <xen/interface/kexec.h>
7749 @@ -136,7 +139,12 @@ static struct resource standard_io_resou
7755 + .flags = IORESOURCE_BUSY | IORESOURCE_IO
7757 + .name = "keyboard",
7760 .flags = IORESOURCE_BUSY | IORESOURCE_IO
7762 .name = "dma page reg",
7763 @@ -166,6 +174,8 @@ struct cpuinfo_x86 new_cpu_data __cpuini
7764 struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
7765 EXPORT_SYMBOL(boot_cpu_data);
7767 +unsigned int def_to_bigsmp;
7769 #ifndef CONFIG_X86_PAE
7770 unsigned long mmu_cr4_features;
7772 @@ -204,7 +214,7 @@ EXPORT_SYMBOL(ist_info);
7773 extern void early_cpu_init(void);
7774 extern int root_mountflags;
7776 -unsigned long saved_videomode;
7777 +unsigned long saved_video_mode;
7779 #define RAMDISK_IMAGE_START_MASK 0x07FF
7780 #define RAMDISK_PROMPT_FLAG 0x8000
7781 @@ -259,7 +269,7 @@ static inline void copy_edd(void)
7785 -int __initdata user_defined_memmap = 0;
7786 +int __initdata user_defined_memmap;
7789 * "mem=nopentium" disables the 4MB page tables.
7790 @@ -420,20 +430,59 @@ unsigned long __init find_max_low_pfn(vo
7794 +#define BIOS_LOWMEM_KILOBYTES 0x413
7797 - * workaround for Dell systems that neglect to reserve EBDA
7798 + * The BIOS places the EBDA/XBDA at the top of conventional
7799 + * memory, and usually decreases the reported amount of
7800 + * conventional memory (int 0x12) too. This also contains a
7801 + * workaround for Dell systems that neglect to reserve EBDA.
7802 + * The same workaround also avoids a problem with the AMD768MPX
7803 + * chipset: reserve a page before VGA to prevent PCI prefetch
7804 + * into it (errata #56). Usually the page is reserved anyways,
7805 + * unless you have no PS/2 mouse plugged in.
7807 static void __init reserve_ebda_region(void)
7809 - unsigned int addr;
7810 - addr = get_bios_ebda();
7812 - reserve_bootmem(addr, PAGE_SIZE, BOOTMEM_DEFAULT);
7813 + unsigned int lowmem, ebda_addr;
7815 + /* To determine the position of the EBDA and the */
7816 + /* end of conventional memory, we need to look at */
7817 + /* the BIOS data area. In a paravirtual environment */
7818 + /* that area is absent. We'll just have to assume */
7819 + /* that the paravirt case can handle memory setup */
7820 + /* correctly, without our help. */
7821 + if (paravirt_enabled())
7824 + /* end of low (conventional) memory */
7825 + lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
7828 + /* start of EBDA area */
7829 + ebda_addr = get_bios_ebda();
7831 + /* Fixup: bios puts an EBDA in the top 64K segment */
7832 + /* of conventional memory, but does not adjust lowmem. */
7833 + if ((lowmem - ebda_addr) <= 0x10000)
7834 + lowmem = ebda_addr;
7836 + /* Fixup: bios does not report an EBDA at all. */
7837 + /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
7838 + if ((ebda_addr == 0) && (lowmem >= 0x9f000))
7841 + /* Paranoia: should never happen, but... */
7842 + if ((lowmem == 0) || (lowmem >= 0x100000))
7845 + /* reserve all memory between lowmem and the 1MB mark */
7846 + reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT);
7850 #ifndef CONFIG_NEED_MULTIPLE_NODES
7851 -void __init setup_bootmem_allocator(void);
7852 +static void __init setup_bootmem_allocator(void);
7853 static unsigned long __init setup_memory(void)
7856 @@ -469,7 +518,7 @@ static unsigned long __init setup_memory
7860 -void __init zone_sizes_init(void)
7861 +static void __init zone_sizes_init(void)
7863 unsigned long max_zone_pfns[MAX_NR_ZONES];
7864 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
7865 @@ -521,10 +570,16 @@ static void __init reserve_crashkernel(v
7866 (unsigned long)(crash_size >> 20),
7867 (unsigned long)(crash_base >> 20),
7868 (unsigned long)(total_mem >> 20));
7870 + if (reserve_bootmem(crash_base, crash_size,
7871 + BOOTMEM_EXCLUSIVE) < 0) {
7872 + printk(KERN_INFO "crashkernel reservation "
7873 + "failed - memory is in use\n");
7877 crashk_res.start = crash_base;
7878 crashk_res.end = crash_base + crash_size - 1;
7879 - reserve_bootmem(crash_base, crash_size,
7882 printk(KERN_INFO "crashkernel reservation failed - "
7883 "you have to specify a base address\n");
7884 @@ -658,16 +713,9 @@ void __init setup_bootmem_allocator(void
7886 reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT);
7888 - /* reserve EBDA region, it's a 4K region */
7889 + /* reserve EBDA region */
7890 reserve_ebda_region();
7892 - /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
7893 - PCI prefetch into it (errata #56). Usually the page is reserved anyways,
7894 - unless you have no PS/2 mouse plugged in. */
7895 - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
7896 - boot_cpu_data.x86 == 6)
7897 - reserve_bootmem(0xa0000 - 4096, 4096, BOOTMEM_DEFAULT);
7901 * But first pinch a few for the stack/trampoline stuff
7902 @@ -689,6 +737,8 @@ void __init setup_bootmem_allocator(void
7905 reserve_crashkernel();
7907 + reserve_ibft_region();
7911 @@ -724,6 +774,18 @@ char * __init __attribute__((weak)) memo
7912 return machine_specific_memory_setup();
7917 + * In the golden day, when everything among i386 and x86_64 will be
7918 + * integrated, this will not live here
7920 +void *x86_cpu_to_node_map_early_ptr;
7921 +int x86_cpu_to_node_map_init[NR_CPUS] = {
7922 + [0 ... NR_CPUS-1] = NUMA_NO_NODE
7924 +DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
7928 * Determine if we were loaded by an EFI loader. If so, then we have also been
7929 * passed the efi memmap, systab, etc., so we should use these data structures
7930 @@ -773,7 +835,7 @@ void __init setup_arch(char **cmdline_p)
7932 apm_info.bios = boot_params.apm_bios_info;
7933 ist_info = boot_params.ist_info;
7934 - saved_videomode = boot_params.hdr.vid_mode;
7935 + saved_video_mode = boot_params.hdr.vid_mode;
7936 if( boot_params.sys_desc_table.length != 0 ) {
7937 set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
7938 machine_id = boot_params.sys_desc_table.table[0];
7939 @@ -840,15 +902,19 @@ void __init setup_arch(char **cmdline_p)
7942 /* update e820 for memory not covered by WB MTRRs */
7944 + propagate_e820_map();
7947 if (mtrr_trim_uncached_memory(max_pfn))
7949 + propagate_e820_map();
7952 max_low_pfn = setup_memory();
7954 +#ifdef CONFIG_KVM_CLOCK
7960 * Must be after max_low_pfn is determined, and before kernel
7961 @@ -856,6 +922,7 @@ void __init setup_arch(char **cmdline_p)
7968 * NOTE: before this point _nobody_ is allowed to allocate
7969 @@ -977,6 +1044,18 @@ void __init setup_arch(char **cmdline_p)
7973 +#if defined(CONFIG_X86_SMP) && !defined(CONFIG_XEN)
7975 + * setup to use the early static init tables during kernel startup
7976 + * X86_SMP will exclude sub-arches that don't deal well with it.
7978 + x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
7979 + x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
7981 + x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
7985 #ifdef CONFIG_X86_GENERICARCH
7986 generic_apic_probe();
7988 --- sle11-2009-10-16.orig/arch/x86/kernel/setup_64-xen.c 2009-03-16 16:33:40.000000000 +0100
7989 +++ sle11-2009-10-16/arch/x86/kernel/setup_64-xen.c 2009-03-16 16:38:05.000000000 +0100
7991 #include <linux/crash_dump.h>
7992 #include <linux/root_dev.h>
7993 #include <linux/pci.h>
7994 +#include <asm/pci-direct.h>
7995 #include <linux/efi.h>
7996 #include <linux/acpi.h>
7997 #include <linux/kallsyms.h>
7998 #include <linux/edd.h>
7999 +#include <linux/iscsi_ibft.h>
8000 #include <linux/mmzone.h>
8001 #include <linux/kexec.h>
8002 #include <linux/cpufreq.h>
8003 #include <linux/dmi.h>
8004 #include <linux/dma-mapping.h>
8005 #include <linux/ctype.h>
8006 +#include <linux/sort.h>
8007 #include <linux/uaccess.h>
8008 #include <linux/init_ohci1394_dma.h>
8009 +#include <linux/kvm_para.h>
8011 #include <asm/mtrr.h>
8012 #include <asm/uaccess.h>
8014 #include <asm/mmu_context.h>
8015 #include <asm/proto.h>
8016 #include <asm/setup.h>
8017 -#include <asm/mach_apic.h>
8018 #include <asm/numa.h>
8019 #include <asm/sections.h>
8020 #include <asm/dmi.h>
8022 #include <asm/mce.h>
8024 #include <asm/topology.h>
8025 +#include <asm/pat.h>
8027 +#include <mach_apic.h>
8029 #include <linux/percpu.h>
8030 #include <xen/interface/physdev.h>
8031 @@ -149,7 +155,7 @@ extern int root_mountflags;
8033 char __initdata command_line[COMMAND_LINE_SIZE];
8035 -struct resource standard_io_resources[] = {
8036 +static struct resource standard_io_resources[] = {
8037 { .name = "dma1", .start = 0x00, .end = 0x1f,
8038 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
8039 { .name = "pic1", .start = 0x20, .end = 0x21,
8040 @@ -158,7 +164,9 @@ struct resource standard_io_resources[]
8041 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
8042 { .name = "timer1", .start = 0x50, .end = 0x53,
8043 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
8044 - { .name = "keyboard", .start = 0x60, .end = 0x6f,
8045 + { .name = "keyboard", .start = 0x60, .end = 0x60,
8046 + .flags = IORESOURCE_BUSY | IORESOURCE_IO },
8047 + { .name = "keyboard", .start = 0x64, .end = 0x64,
8048 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
8049 { .name = "dma page reg", .start = 0x80, .end = 0x8f,
8050 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
8051 @@ -224,8 +232,10 @@ contig_initmem_init(unsigned long start_
8052 e820_register_active_regions(0, start_pfn, end_pfn);
8054 free_bootmem_with_active_regions(0, xen_start_info->nr_pages);
8055 + early_res_to_bootmem(0, xen_start_info->nr_pages<<PAGE_SHIFT);
8057 free_bootmem_with_active_regions(0, end_pfn);
8058 + early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
8060 reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
8062 @@ -290,6 +300,7 @@ static void __init reserve_crashkernel(v
8063 (unsigned long)(total_mem >> 20));
8064 crashk_res.start = crash_base;
8065 crashk_res.end = crash_base + crash_size - 1;
8066 + insert_resource(&iomem_resource, &crashk_res);
8070 @@ -306,6 +317,40 @@ void __attribute__((weak)) __init memory
8071 machine_specific_memory_setup();
8074 +static void __init parse_setup_data(void)
8076 + struct setup_data *data;
8077 + unsigned long pa_data;
8079 + if (boot_params.hdr.version < 0x0209)
8081 + pa_data = boot_params.hdr.setup_data;
8083 + data = early_ioremap(pa_data, PAGE_SIZE);
8084 + switch (data->type) {
8088 +#ifndef CONFIG_DEBUG_BOOT_PARAMS
8089 + free_early(pa_data, pa_data+sizeof(*data)+data->len);
8091 + pa_data = data->next;
8092 + early_iounmap(data, PAGE_SIZE);
8096 +#ifdef CONFIG_PCI_MMCONFIG
8097 +extern void __cpuinit fam10h_check_enable_mmcfg(void);
8098 +extern void __init check_enable_amd_mmconf_dmi(void);
8100 +void __cpuinit fam10h_check_enable_mmcfg(void)
8103 +void __init check_enable_amd_mmconf_dmi(void)
8109 * setup_arch - architecture-specific boot-time initializations
8111 @@ -389,6 +434,8 @@ void __init setup_arch(char **cmdline_p)
8112 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
8113 *cmdline_p = command_line;
8115 + parse_setup_data();
8117 parse_early_param();
8119 #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
8120 @@ -398,6 +445,13 @@ void __init setup_arch(char **cmdline_p)
8122 finish_e820_parsing();
8125 + /* after parse_early_param, so could debug it */
8126 + insert_resource(&iomem_resource, &code_resource);
8127 + insert_resource(&iomem_resource, &data_resource);
8128 + insert_resource(&iomem_resource, &bss_resource);
8131 early_gart_iommu_check();
8133 e820_register_active_regions(0, 0, -1UL);
8134 @@ -420,15 +474,23 @@ void __init setup_arch(char **cmdline_p)
8138 - init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
8139 + max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT));
8147 if (is_initial_xendomain())
8152 +#ifdef CONFIG_KVM_CLOCK
8156 #if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
8157 /* setup to use the early static init tables during kernel startup */
8158 x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
8159 @@ -459,9 +521,9 @@ void __init setup_arch(char **cmdline_p)
8160 contig_initmem_init(0, end_pfn);
8163 - early_res_to_bootmem();
8166 + dma32_reserve_bootmem();
8168 #ifdef CONFIG_ACPI_SLEEP
8170 * Reserve low memory region for sleep support.
8171 @@ -487,16 +549,17 @@ void __init setup_arch(char **cmdline_p)
8172 unsigned long end_of_mem = end_pfn << PAGE_SHIFT;
8174 if (ramdisk_end <= end_of_mem) {
8176 - reserve_bootmem_generic(ramdisk_image, ramdisk_size);
8179 + * don't need to reserve again, already reserved early
8180 + * in x86_64_start_kernel, and early_res_to_bootmem
8181 + * convert that to reserved in bootmem
8183 initrd_start = ramdisk_image + PAGE_OFFSET;
8184 initrd_end = initrd_start+ramdisk_size;
8186 initrd_below_start_ok = 1;
8189 - /* Assumes everything on node 0 */
8190 free_bootmem(ramdisk_image, ramdisk_size);
8191 printk(KERN_ERR "initrd extends beyond end of memory "
8192 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
8193 @@ -506,6 +569,9 @@ void __init setup_arch(char **cmdline_p)
8196 reserve_crashkernel();
8198 + reserve_ibft_region();
8202 #ifdef CONFIG_X86_LOCAL_APIC
8203 @@ -633,16 +699,16 @@ void __init setup_arch(char **cmdline_p)
8204 prefill_possible_map();
8210 * We trust e820 completely. No explicit ROM probing in memory.
8213 if (is_initial_xendomain())
8214 - e820_reserve_resources(machine_e820.map, machine_e820.nr_map,
8215 - &code_resource, &data_resource, &bss_resource);
8216 + e820_reserve_resources(machine_e820.map, machine_e820.nr_map);
8218 - e820_reserve_resources(e820.map, e820.nr_map,
8219 - &code_resource, &data_resource, &bss_resource);
8220 + e820_reserve_resources(e820.map, e820.nr_map);
8221 e820_mark_nosave_regions();
8224 @@ -690,6 +756,9 @@ void __init setup_arch(char **cmdline_p)
8227 #endif /* !CONFIG_XEN */
8229 + /* do this before identify_cpu for boot cpu */
8230 + check_enable_amd_mmconf_dmi();
8234 @@ -786,9 +855,9 @@ static void __cpuinit amd_detect_cmp(str
8235 bits = c->x86_coreid_bits;
8237 /* Low order bits define the core id (index of core in socket) */
8238 - c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
8239 - /* Convert the APIC ID into the socket ID */
8240 - c->phys_proc_id = phys_pkg_id(bits);
8241 + c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
8242 + /* Convert the initial APIC ID into the socket ID */
8243 + c->phys_proc_id = c->initial_apicid >> bits;
8246 node = c->phys_proc_id;
8247 @@ -805,7 +874,7 @@ static void __cpuinit amd_detect_cmp(str
8248 If that doesn't result in a usable node fall back to the
8249 path for the previous case. */
8251 - int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits);
8252 + int ht_nodeid = c->initial_apicid;
8254 if (ht_nodeid >= 0 &&
8255 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
8256 @@ -913,7 +982,7 @@ static void __cpuinit init_amd(struct cp
8258 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
8259 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
8260 - clear_bit(0*32+31, (unsigned long *)&c->x86_capability);
8261 + clear_cpu_cap(c, 0*32+31);
8263 /* On C+ stepping K8 rep microcode works well for copy/memset */
8264 level = cpuid_eax(1);
8265 @@ -955,9 +1024,25 @@ static void __cpuinit init_amd(struct cp
8266 /* MFENCE stops RDTSC speculation */
8267 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
8269 + if (c->x86 == 0x10)
8270 + fam10h_check_enable_mmcfg();
8273 if (amd_apic_timer_broken())
8274 disable_apic_timer = 1;
8276 + if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
8277 + unsigned long long tseg;
8280 + * Split up direct mapping around the TSEG SMM area.
8281 + * Don't do it for gbpages because there seems very little
8282 + * benefit in doing so.
8284 + if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
8285 + (tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
8286 + set_memory_4k((unsigned long)__va(tseg), 1);
8291 @@ -1051,7 +1136,7 @@ static void __cpuinit early_init_intel(s
8293 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
8294 (c->x86 == 0x6 && c->x86_model >= 0x0e))
8295 - set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
8296 + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
8299 static void __cpuinit init_intel(struct cpuinfo_x86 *c)
8300 @@ -1094,9 +1179,6 @@ static void __cpuinit init_intel(struct
8303 c->x86_cache_alignment = c->x86_clflush_size * 2;
8304 - if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
8305 - (c->x86 == 0x6 && c->x86_model >= 0x0e))
8306 - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
8308 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
8309 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
8310 @@ -1105,6 +1187,32 @@ static void __cpuinit init_intel(struct
8314 +static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
8316 + if (c->x86 == 0x6 && c->x86_model >= 0xf)
8317 + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
8320 +static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
8325 + n = c->extended_cpuid_level;
8326 + if (n >= 0x80000008) {
8327 + unsigned eax = cpuid_eax(0x80000008);
8328 + c->x86_virt_bits = (eax >> 8) & 0xff;
8329 + c->x86_phys_bits = eax & 0xff;
8332 + if (c->x86 == 0x6 && c->x86_model >= 0xf) {
8333 + c->x86_cache_alignment = c->x86_clflush_size * 2;
8334 + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
8335 + set_cpu_cap(c, X86_FEATURE_REP_GOOD);
8337 + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
8340 static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
8342 char *v = c->x86_vendor_id;
8343 @@ -1113,6 +1221,8 @@ static void __cpuinit get_cpu_vendor(str
8344 c->x86_vendor = X86_VENDOR_AMD;
8345 else if (!strcmp(v, "GenuineIntel"))
8346 c->x86_vendor = X86_VENDOR_INTEL;
8347 + else if (!strcmp(v, "CentaurHauls"))
8348 + c->x86_vendor = X86_VENDOR_CENTAUR;
8350 c->x86_vendor = X86_VENDOR_UNKNOWN;
8352 @@ -1160,15 +1270,16 @@ static void __cpuinit early_identify_cpu
8353 c->x86 += (tfms >> 20) & 0xff;
8355 c->x86_model += ((tfms >> 16) & 0xF) << 4;
8356 - if (c->x86_capability[0] & (1<<19))
8357 + if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
8358 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
8360 /* Have CPUID level 0 only - unheard of */
8364 + c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
8366 - c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
8367 + c->phys_proc_id = c->initial_apicid;
8369 /* AMD-defined flags: level 0x80000001 */
8370 xlvl = cpuid_eax(0x80000000);
8371 @@ -1201,8 +1312,12 @@ static void __cpuinit early_identify_cpu
8372 case X86_VENDOR_INTEL:
8373 early_init_intel(c);
8375 + case X86_VENDOR_CENTAUR:
8376 + early_init_centaur(c);
8380 + validate_pat_support(c);
8384 @@ -1237,6 +1352,10 @@ void __cpuinit identify_cpu(struct cpuin
8388 + case X86_VENDOR_CENTAUR:
8392 case X86_VENDOR_UNKNOWN:
8394 display_cacheinfo(c);
8395 @@ -1266,14 +1385,24 @@ void __cpuinit identify_cpu(struct cpuin
8397 select_idle_routine(c);
8399 - if (c != &boot_cpu_data)
8402 numa_add_cpu(smp_processor_id());
8407 +void __cpuinit identify_boot_cpu(void)
8409 + identify_cpu(&boot_cpu_data);
8412 +void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
8414 + BUG_ON(c == &boot_cpu_data);
8419 static __init int setup_noclflush(char *arg)
8421 setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
8422 @@ -1302,123 +1431,3 @@ static __init int setup_disablecpuid(cha
8425 __setup("clearcpuid=", setup_disablecpuid);
8428 - * Get CPU information for use by the procfs.
8431 -static int show_cpuinfo(struct seq_file *m, void *v)
8433 - struct cpuinfo_x86 *c = v;
8437 - cpu = c->cpu_index;
8440 - seq_printf(m, "processor\t: %u\n"
8441 - "vendor_id\t: %s\n"
8442 - "cpu family\t: %d\n"
8444 - "model name\t: %s\n",
8446 - c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
8448 - (int)c->x86_model,
8449 - c->x86_model_id[0] ? c->x86_model_id : "unknown");
8451 - if (c->x86_mask || c->cpuid_level >= 0)
8452 - seq_printf(m, "stepping\t: %d\n", c->x86_mask);
8454 - seq_printf(m, "stepping\t: unknown\n");
8456 - if (cpu_has(c, X86_FEATURE_TSC)) {
8457 - unsigned int freq = cpufreq_quick_get((unsigned)cpu);
8461 - seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
8462 - freq / 1000, (freq % 1000));
8466 - if (c->x86_cache_size >= 0)
8467 - seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
8470 - if (smp_num_siblings * c->x86_max_cores > 1) {
8471 - seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
8472 - seq_printf(m, "siblings\t: %d\n",
8473 - cpus_weight(per_cpu(cpu_core_map, cpu)));
8474 - seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
8475 - seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
8481 - "fpu_exception\t: yes\n"
8482 - "cpuid level\t: %d\n"
8487 - for (i = 0; i < 32*NCAPINTS; i++)
8488 - if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
8489 - seq_printf(m, " %s", x86_cap_flags[i]);
8491 - seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
8492 - c->loops_per_jiffy/(500000/HZ),
8493 - (c->loops_per_jiffy/(5000/HZ)) % 100);
8495 - if (c->x86_tlbsize > 0)
8496 - seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
8497 - seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
8498 - seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
8500 - seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
8501 - c->x86_phys_bits, c->x86_virt_bits);
8503 - seq_printf(m, "power management:");
8504 - for (i = 0; i < 32; i++) {
8505 - if (c->x86_power & (1 << i)) {
8506 - if (i < ARRAY_SIZE(x86_power_flags) &&
8507 - x86_power_flags[i])
8508 - seq_printf(m, "%s%s",
8509 - x86_power_flags[i][0]?" ":"",
8510 - x86_power_flags[i]);
8512 - seq_printf(m, " [%d]", i);
8516 - seq_printf(m, "\n\n");
8521 -static void *c_start(struct seq_file *m, loff_t *pos)
8523 - if (*pos == 0) /* just in case, cpu 0 is not the first */
8524 - *pos = first_cpu(cpu_online_map);
8525 - if ((*pos) < NR_CPUS && cpu_online(*pos))
8526 - return &cpu_data(*pos);
8530 -static void *c_next(struct seq_file *m, void *v, loff_t *pos)
8532 - *pos = next_cpu(*pos, cpu_online_map);
8533 - return c_start(m, pos);
8536 -static void c_stop(struct seq_file *m, void *v)
8540 -const struct seq_operations cpuinfo_op = {
8544 - .show = show_cpuinfo,
8546 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
8547 +++ sle11-2009-10-16/arch/x86/kernel/smp-xen.c 2009-03-16 16:38:05.000000000 +0100
8550 + * Intel SMP support routines.
8552 + * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
8553 + * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
8554 + * (c) 2002,2003 Andi Kleen, SuSE Labs.
8556 + * i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com>
8558 + * This code is released under the GNU General Public License version 2 or
8562 +#include <linux/init.h>
8564 +#include <linux/mm.h>
8565 +#include <linux/delay.h>
8566 +#include <linux/spinlock.h>
8567 +#include <linux/kernel_stat.h>
8568 +#include <linux/mc146818rtc.h>
8569 +#include <linux/cache.h>
8570 +#include <linux/interrupt.h>
8571 +#include <linux/cpu.h>
8573 +#include <asm/mtrr.h>
8574 +#include <asm/tlbflush.h>
8575 +#include <asm/mmu_context.h>
8576 +#include <asm/proto.h>
8577 +#include <mach_ipi.h>
8578 +#include <xen/evtchn.h>
8580 + * Some notes on x86 processor bugs affecting SMP operation:
8582 + * Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
8583 + * The Linux implications for SMP are handled as follows:
8585 + * Pentium III / [Xeon]
8586 + * None of the E1AP-E3AP errata are visible to the user.
8588 + * E1AP. see PII A1AP
8589 + * E2AP. see PII A2AP
8590 + * E3AP. see PII A3AP
8592 + * Pentium II / [Xeon]
8593 + * None of the A1AP-A3AP errata are visible to the user.
8595 + * A1AP. see PPro 1AP
8596 + * A2AP. see PPro 2AP
8597 + * A3AP. see PPro 7AP
8600 + * None of 1AP-9AP errata are visible to the normal user,
8601 + * except occasional delivery of 'spurious interrupt' as trap #15.
8602 + * This is very rare and a non-problem.
8604 + * 1AP. Linux maps APIC as non-cacheable
8605 + * 2AP. worked around in hardware
8606 + * 3AP. fixed in C0 and above steppings microcode update.
8607 + * Linux does not use excessive STARTUP_IPIs.
8608 + * 4AP. worked around in hardware
8609 + * 5AP. symmetric IO mode (normal Linux operation) not affected.
8610 + * 'noapic' mode has vector 0xf filled out properly.
8611 + * 6AP. 'noapic' mode might be affected - fixed in later steppings
8612 + * 7AP. We do not assume writes to the LVT deassering IRQs
8613 + * 8AP. We do not enable low power mode (deep sleep) during MP bootup
8614 + * 9AP. We do not use mixed mode
8617 + * There is a marginal case where REP MOVS on 100MHz SMP
8618 + * machines with B stepping processors can fail. XXX should provide
8619 + * an L1cache=Writethrough or L1cache=off option.
8621 + * B stepping CPUs may hang. There are hardware work arounds
8622 + * for this. We warn about it in case your board doesn't have the work
8623 + * arounds. Basically that's so I can tell anyone with a B stepping
8624 + * CPU and SMP problems "tough".
8626 + * Specific items [From Pentium Processor Specification Update]
8628 + * 1AP. Linux doesn't use remote read
8629 + * 2AP. Linux doesn't trust APIC errors
8630 + * 3AP. We work around this
8631 + * 4AP. Linux never generated 3 interrupts of the same priority
8632 + * to cause a lost local interrupt.
8633 + * 5AP. Remote read is never used
8634 + * 6AP. not affected - worked around in hardware
8635 + * 7AP. not affected - worked around in hardware
8636 + * 8AP. worked around in hardware - we get explicit CS errors if not
8637 + * 9AP. only 'noapic' mode affected. Might generate spurious
8638 + * interrupts, we log only the first one and count the
8640 + * 10AP. not affected - worked around in hardware
8641 + * 11AP. Linux reads the APIC between writes to avoid this, as per
8642 + * the documentation. Make sure you preserve this as it affects
8643 + * the C stepping chips too.
8644 + * 12AP. not affected - worked around in hardware
8645 + * 13AP. not affected - worked around in hardware
8646 + * 14AP. we always deassert INIT during bootup
8647 + * 15AP. not affected - worked around in hardware
8648 + * 16AP. not affected - worked around in hardware
8649 + * 17AP. not affected - worked around in hardware
8650 + * 18AP. not affected - worked around in hardware
8651 + * 19AP. not affected - worked around in BIOS
8653 + * If this sounds worrying believe me these bugs are either ___RARE___,
8654 + * or are signal timing bugs worked around in hardware and there's
8655 + * about nothing of note with C stepping upwards.
8659 + * this function sends a 'reschedule' IPI to another CPU.
8660 + * it goes straight through and wastes no time serializing
8661 + * anything. Worst case is that we lose a reschedule ...
8663 +void xen_smp_send_reschedule(int cpu)
8665 + if (unlikely(cpu_is_offline(cpu))) {
8669 + send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
8673 + * Structure and data for smp_call_function(). This is designed to minimise
8674 + * static memory requirements. It also looks cleaner.
8676 +static DEFINE_SPINLOCK(call_lock);
8678 +struct call_data_struct {
8679 + void (*func) (void *info);
8682 + atomic_t finished;
8686 +void lock_ipi_call_lock(void)
8688 + spin_lock_irq(&call_lock);
8691 +void unlock_ipi_call_lock(void)
8693 + spin_unlock_irq(&call_lock);
8696 +static struct call_data_struct *call_data;
8698 +static void __smp_call_function(void (*func) (void *info), void *info,
8699 + int nonatomic, int wait)
8701 + struct call_data_struct data;
8702 + int cpus = num_online_cpus() - 1;
8709 + atomic_set(&data.started, 0);
8712 + atomic_set(&data.finished, 0);
8714 + call_data = &data;
8717 + /* Send a message to all other CPUs and wait for them to respond */
8718 + send_IPI_allbutself(CALL_FUNCTION_VECTOR);
8720 + /* Wait for response */
8721 + while (atomic_read(&data.started) != cpus)
8725 + while (atomic_read(&data.finished) != cpus)
8731 + * smp_call_function_mask(): Run a function on a set of other CPUs.
8732 + * @mask: The set of cpus to run on. Must not include the current cpu.
8733 + * @func: The function to run. This must be fast and non-blocking.
8734 + * @info: An arbitrary pointer to pass to the function.
8735 + * @wait: If true, wait (atomically) until function has completed on other CPUs.
8737 + * Returns 0 on success, else a negative status code.
8739 + * If @wait is true, then returns once @func has returned; otherwise
8740 + * it returns just before the target cpu calls @func.
8742 + * You must not call this function with disabled interrupts or from a
8743 + * hardware interrupt handler or from a bottom half handler.
8746 +xen_smp_call_function_mask(cpumask_t mask,
8747 + void (*func)(void *), void *info,
8750 + struct call_data_struct data;
8751 + cpumask_t allbutself;
8754 + /* Can deadlock when called with interrupts disabled */
8755 + WARN_ON(irqs_disabled());
8757 + /* Holding any lock stops cpus from going down. */
8758 + spin_lock(&call_lock);
8760 + allbutself = cpu_online_map;
8761 + cpu_clear(smp_processor_id(), allbutself);
8763 + cpus_and(mask, mask, allbutself);
8764 + cpus = cpus_weight(mask);
8767 + spin_unlock(&call_lock);
8773 + atomic_set(&data.started, 0);
8776 + atomic_set(&data.finished, 0);
8778 + call_data = &data;
8781 + /* Send a message to other CPUs */
8782 + if (cpus_equal(mask, allbutself) &&
8783 + cpus_equal(cpu_online_map, cpu_callout_map))
8784 + send_IPI_allbutself(CALL_FUNCTION_VECTOR);
8786 + send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
8788 + /* Wait for response */
8789 + while (atomic_read(&data.started) != cpus)
8793 + while (atomic_read(&data.finished) != cpus)
8795 + spin_unlock(&call_lock);
8800 +static void stop_this_cpu(void *dummy)
8802 + local_irq_disable();
8804 + * Remove this CPU:
8806 + cpu_clear(smp_processor_id(), cpu_online_map);
8807 + disable_all_local_evtchn();
8808 + if (hlt_works(smp_processor_id()))
8814 + * this function calls the 'stop' function on all other CPUs in the system.
8817 +void xen_smp_send_stop(void)
8820 + unsigned long flags;
8822 + /* Don't deadlock on the call lock in panic */
8823 + nolock = !spin_trylock(&call_lock);
8824 + local_irq_save(flags);
8825 + __smp_call_function(stop_this_cpu, NULL, 0, 0);
8827 + spin_unlock(&call_lock);
8828 + disable_all_local_evtchn();
8829 + local_irq_restore(flags);
8833 + * Reschedule call back. Nothing to do,
8834 + * all the work is done automatically when
8835 + * we return from the interrupt.
8837 +irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
8839 +#ifdef CONFIG_X86_32
8840 + __get_cpu_var(irq_stat).irq_resched_count++;
8842 + add_pda(irq_resched_count, 1);
8844 + return IRQ_HANDLED;
8847 +irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
8849 + void (*func) (void *info) = call_data->func;
8850 + void *info = call_data->info;
8851 + int wait = call_data->wait;
8854 + * Notify initiating CPU that I've grabbed the data and am
8855 + * about to execute the function
8858 + atomic_inc(&call_data->started);
8860 + * At this point the info structure may be out of scope unless wait==1
8864 +#ifdef CONFIG_X86_32
8865 + __get_cpu_var(irq_stat).irq_call_count++;
8867 + add_pda(irq_call_count, 1);
8873 + atomic_inc(&call_data->finished);
8876 + return IRQ_HANDLED;
8878 --- sle11-2009-10-16.orig/arch/x86/kernel/smp_32-xen.c 2009-03-16 16:33:40.000000000 +0100
8879 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
8882 - * Intel SMP support routines.
8884 - * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
8885 - * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
8887 - * This code is released under the GNU General Public License version 2 or
8891 -#include <linux/init.h>
8893 -#include <linux/mm.h>
8894 -#include <linux/delay.h>
8895 -#include <linux/spinlock.h>
8896 -#include <linux/kernel_stat.h>
8897 -#include <linux/mc146818rtc.h>
8898 -#include <linux/cache.h>
8899 -#include <linux/interrupt.h>
8900 -#include <linux/cpu.h>
8901 -#include <linux/module.h>
8903 -#include <asm/mtrr.h>
8904 -#include <asm/tlbflush.h>
8905 -#include <asm/mmu_context.h>
8907 -#include <mach_apic.h>
8909 -#include <xen/evtchn.h>
8912 - * Some notes on x86 processor bugs affecting SMP operation:
8914 - * Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
8915 - * The Linux implications for SMP are handled as follows:
8917 - * Pentium III / [Xeon]
8918 - * None of the E1AP-E3AP errata are visible to the user.
8920 - * E1AP. see PII A1AP
8921 - * E2AP. see PII A2AP
8922 - * E3AP. see PII A3AP
8924 - * Pentium II / [Xeon]
8925 - * None of the A1AP-A3AP errata are visible to the user.
8927 - * A1AP. see PPro 1AP
8928 - * A2AP. see PPro 2AP
8929 - * A3AP. see PPro 7AP
8932 - * None of 1AP-9AP errata are visible to the normal user,
8933 - * except occasional delivery of 'spurious interrupt' as trap #15.
8934 - * This is very rare and a non-problem.
8936 - * 1AP. Linux maps APIC as non-cacheable
8937 - * 2AP. worked around in hardware
8938 - * 3AP. fixed in C0 and above steppings microcode update.
8939 - * Linux does not use excessive STARTUP_IPIs.
8940 - * 4AP. worked around in hardware
8941 - * 5AP. symmetric IO mode (normal Linux operation) not affected.
8942 - * 'noapic' mode has vector 0xf filled out properly.
8943 - * 6AP. 'noapic' mode might be affected - fixed in later steppings
8944 - * 7AP. We do not assume writes to the LVT deassering IRQs
8945 - * 8AP. We do not enable low power mode (deep sleep) during MP bootup
8946 - * 9AP. We do not use mixed mode
8949 - * There is a marginal case where REP MOVS on 100MHz SMP
8950 - * machines with B stepping processors can fail. XXX should provide
8951 - * an L1cache=Writethrough or L1cache=off option.
8953 - * B stepping CPUs may hang. There are hardware work arounds
8954 - * for this. We warn about it in case your board doesn't have the work
8955 - * arounds. Basically that's so I can tell anyone with a B stepping
8956 - * CPU and SMP problems "tough".
8958 - * Specific items [From Pentium Processor Specification Update]
8960 - * 1AP. Linux doesn't use remote read
8961 - * 2AP. Linux doesn't trust APIC errors
8962 - * 3AP. We work around this
8963 - * 4AP. Linux never generated 3 interrupts of the same priority
8964 - * to cause a lost local interrupt.
8965 - * 5AP. Remote read is never used
8966 - * 6AP. not affected - worked around in hardware
8967 - * 7AP. not affected - worked around in hardware
8968 - * 8AP. worked around in hardware - we get explicit CS errors if not
8969 - * 9AP. only 'noapic' mode affected. Might generate spurious
8970 - * interrupts, we log only the first one and count the
8972 - * 10AP. not affected - worked around in hardware
8973 - * 11AP. Linux reads the APIC between writes to avoid this, as per
8974 - * the documentation. Make sure you preserve this as it affects
8975 - * the C stepping chips too.
8976 - * 12AP. not affected - worked around in hardware
8977 - * 13AP. not affected - worked around in hardware
8978 - * 14AP. we always deassert INIT during bootup
8979 - * 15AP. not affected - worked around in hardware
8980 - * 16AP. not affected - worked around in hardware
8981 - * 17AP. not affected - worked around in hardware
8982 - * 18AP. not affected - worked around in hardware
8983 - * 19AP. not affected - worked around in BIOS
8985 - * If this sounds worrying believe me these bugs are either ___RARE___,
8986 - * or are signal timing bugs worked around in hardware and there's
8987 - * about nothing of note with C stepping upwards.
8990 -DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
8993 - * the following functions deal with sending IPIs between CPUs.
8995 - * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
8998 -static inline int __prepare_ICR (unsigned int shortcut, int vector)
9000 - unsigned int icr = shortcut | APIC_DEST_LOGICAL;
9004 - icr |= APIC_DM_FIXED | vector;
9007 - icr |= APIC_DM_NMI;
9013 -static inline int __prepare_ICR2 (unsigned int mask)
9015 - return SET_APIC_DEST_FIELD(mask);
9018 -DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
9020 -static inline void __send_IPI_one(unsigned int cpu, int vector)
9022 - int irq = per_cpu(ipi_to_irq, cpu)[vector];
9024 - notify_remote_via_irq(irq);
9027 -void __send_IPI_shortcut(unsigned int shortcut, int vector)
9031 - switch (shortcut) {
9032 - case APIC_DEST_SELF:
9033 - __send_IPI_one(smp_processor_id(), vector);
9035 - case APIC_DEST_ALLBUT:
9036 - for (cpu = 0; cpu < NR_CPUS; ++cpu) {
9037 - if (cpu == smp_processor_id())
9039 - if (cpu_isset(cpu, cpu_online_map)) {
9040 - __send_IPI_one(cpu, vector);
9045 - printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
9051 -void send_IPI_self(int vector)
9053 - __send_IPI_shortcut(APIC_DEST_SELF, vector);
9057 - * This is only used on smaller machines.
9059 -void send_IPI_mask_bitmask(cpumask_t mask, int vector)
9061 - unsigned long flags;
9064 - local_irq_save(flags);
9065 - WARN_ON(cpus_addr(mask)[0] & ~cpus_addr(cpu_online_map)[0]);
9067 - for (cpu = 0; cpu < NR_CPUS; ++cpu) {
9068 - if (cpu_isset(cpu, mask)) {
9069 - __send_IPI_one(cpu, vector);
9073 - local_irq_restore(flags);
9076 -void send_IPI_mask_sequence(cpumask_t mask, int vector)
9079 - send_IPI_mask_bitmask(mask, vector);
9082 -#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
9086 - * Smarter SMP flushing macros.
9087 - * c/o Linus Torvalds.
9089 - * These mean you can really definitely utterly forget about
9090 - * writing to user space from interrupts. (Its not allowed anyway).
9092 - * Optimizations Manfred Spraul <manfred@colorfullife.com>
9095 -static cpumask_t flush_cpumask;
9096 -static struct mm_struct * flush_mm;
9097 -static unsigned long flush_va;
9098 -static DEFINE_SPINLOCK(tlbstate_lock);
9101 - * We cannot call mmdrop() because we are in interrupt context,
9102 - * instead update mm->cpu_vm_mask.
9104 - * We need to reload %cr3 since the page tables may be going
9105 - * away from under us..
9107 -void leave_mm(int cpu)
9109 - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
9111 - cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
9112 - load_cr3(swapper_pg_dir);
9114 -EXPORT_SYMBOL_GPL(leave_mm);
9118 - * The flush IPI assumes that a thread switch happens in this order:
9119 - * [cpu0: the cpu that switches]
9120 - * 1) switch_mm() either 1a) or 1b)
9121 - * 1a) thread switch to a different mm
9122 - * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
9123 - * Stop ipi delivery for the old mm. This is not synchronized with
9124 - * the other cpus, but smp_invalidate_interrupt ignore flush ipis
9125 - * for the wrong mm, and in the worst case we perform a superfluous
9127 - * 1a2) set cpu_tlbstate to TLBSTATE_OK
9128 - * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
9129 - * was in lazy tlb mode.
9130 - * 1a3) update cpu_tlbstate[].active_mm
9131 - * Now cpu0 accepts tlb flushes for the new mm.
9132 - * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
9133 - * Now the other cpus will send tlb flush ipis.
9134 - * 1a4) change cr3.
9135 - * 1b) thread switch without mm change
9136 - * cpu_tlbstate[].active_mm is correct, cpu0 already handles
9138 - * 1b1) set cpu_tlbstate to TLBSTATE_OK
9139 - * 1b2) test_and_set the cpu bit in cpu_vm_mask.
9140 - * Atomically set the bit [other cpus will start sending flush ipis],
9141 - * and test the bit.
9142 - * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
9143 - * 2) switch %%esp, ie current
9145 - * The interrupt must handle 2 special cases:
9146 - * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
9147 - * - the cpu performs speculative tlb reads, i.e. even if the cpu only
9148 - * runs in kernel space, the cpu could load tlb entries for user space
9151 - * The good news is that cpu_tlbstate is local to each cpu, no
9152 - * write/read ordering problems.
9158 - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
9159 - * 2) Leave the mm if we are in the lazy tlb mode.
9162 -irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id)
9164 - unsigned long cpu;
9168 - if (!cpu_isset(cpu, flush_cpumask))
9171 - * This was a BUG() but until someone can quote me the
9172 - * line from the intel manual that guarantees an IPI to
9173 - * multiple CPUs is retried _only_ on the erroring CPUs
9174 - * its staying as a return
9179 - if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
9180 - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
9181 - if (flush_va == TLB_FLUSH_ALL)
9182 - local_flush_tlb();
9184 - __flush_tlb_one(flush_va);
9188 - smp_mb__before_clear_bit();
9189 - cpu_clear(cpu, flush_cpumask);
9190 - smp_mb__after_clear_bit();
9192 - put_cpu_no_resched();
9193 - __get_cpu_var(irq_stat).irq_tlb_count++;
9195 - return IRQ_HANDLED;
9198 -void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
9201 - cpumask_t cpumask = *cpumaskp;
9204 - * A couple of (to be removed) sanity checks:
9206 - * - current CPU must not be in mask
9207 - * - mask must exist :)
9209 - BUG_ON(cpus_empty(cpumask));
9210 - BUG_ON(cpu_isset(smp_processor_id(), cpumask));
9213 -#ifdef CONFIG_HOTPLUG_CPU
9214 - /* If a CPU which we ran on has gone down, OK. */
9215 - cpus_and(cpumask, cpumask, cpu_online_map);
9216 - if (unlikely(cpus_empty(cpumask)))
9221 - * i'm not happy about this global shared spinlock in the
9222 - * MM hot path, but we'll see how contended it is.
9223 - * AK: x86-64 has a faster method that could be ported.
9225 - spin_lock(&tlbstate_lock);
9229 - cpus_or(flush_cpumask, cpumask, flush_cpumask);
9231 - * We have to send the IPI only to
9234 - send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
9236 - while (!cpus_empty(flush_cpumask))
9237 - /* nothing. lockup detection does not belong here */
9242 - spin_unlock(&tlbstate_lock);
9245 -void flush_tlb_current_task(void)
9247 - struct mm_struct *mm = current->mm;
9248 - cpumask_t cpu_mask;
9250 - preempt_disable();
9251 - cpu_mask = mm->cpu_vm_mask;
9252 - cpu_clear(smp_processor_id(), cpu_mask);
9254 - local_flush_tlb();
9255 - if (!cpus_empty(cpu_mask))
9256 - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
9260 -void flush_tlb_mm (struct mm_struct * mm)
9262 - cpumask_t cpu_mask;
9264 - preempt_disable();
9265 - cpu_mask = mm->cpu_vm_mask;
9266 - cpu_clear(smp_processor_id(), cpu_mask);
9268 - if (current->active_mm == mm) {
9270 - local_flush_tlb();
9272 - leave_mm(smp_processor_id());
9274 - if (!cpus_empty(cpu_mask))
9275 - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
9280 -void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
9282 - struct mm_struct *mm = vma->vm_mm;
9283 - cpumask_t cpu_mask;
9285 - preempt_disable();
9286 - cpu_mask = mm->cpu_vm_mask;
9287 - cpu_clear(smp_processor_id(), cpu_mask);
9289 - if (current->active_mm == mm) {
9291 - __flush_tlb_one(va);
9293 - leave_mm(smp_processor_id());
9296 - if (!cpus_empty(cpu_mask))
9297 - flush_tlb_others(cpu_mask, mm, va);
9301 -EXPORT_SYMBOL(flush_tlb_page);
9303 -static void do_flush_tlb_all(void* info)
9305 - unsigned long cpu = smp_processor_id();
9307 - __flush_tlb_all();
9308 - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
9312 -void flush_tlb_all(void)
9314 - on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
9320 - * this function sends a 'reschedule' IPI to another CPU.
9321 - * it goes straight through and wastes no time serializing
9322 - * anything. Worst case is that we lose a reschedule ...
9324 -void xen_smp_send_reschedule(int cpu)
9326 - WARN_ON(cpu_is_offline(cpu));
9327 - send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
9331 - * Structure and data for smp_call_function(). This is designed to minimise
9332 - * static memory requirements. It also looks cleaner.
9334 -static DEFINE_SPINLOCK(call_lock);
9336 -struct call_data_struct {
9337 - void (*func) (void *info);
9340 - atomic_t finished;
9344 -void lock_ipi_call_lock(void)
9346 - spin_lock_irq(&call_lock);
9349 -void unlock_ipi_call_lock(void)
9351 - spin_unlock_irq(&call_lock);
9354 -static struct call_data_struct *call_data;
9356 -static void __smp_call_function(void (*func) (void *info), void *info,
9357 - int nonatomic, int wait)
9359 - struct call_data_struct data;
9360 - int cpus = num_online_cpus() - 1;
9367 - atomic_set(&data.started, 0);
9370 - atomic_set(&data.finished, 0);
9372 - call_data = &data;
9375 - /* Send a message to all other CPUs and wait for them to respond */
9376 - send_IPI_allbutself(CALL_FUNCTION_VECTOR);
9378 - /* Wait for response */
9379 - while (atomic_read(&data.started) != cpus)
9383 - while (atomic_read(&data.finished) != cpus)
9389 - * smp_call_function_mask(): Run a function on a set of other CPUs.
9390 - * @mask: The set of cpus to run on. Must not include the current cpu.
9391 - * @func: The function to run. This must be fast and non-blocking.
9392 - * @info: An arbitrary pointer to pass to the function.
9393 - * @wait: If true, wait (atomically) until function has completed on other CPUs.
9395 - * Returns 0 on success, else a negative status code.
9397 - * If @wait is true, then returns once @func has returned; otherwise
9398 - * it returns just before the target cpu calls @func.
9400 - * You must not call this function with disabled interrupts or from a
9401 - * hardware interrupt handler or from a bottom half handler.
9404 -xen_smp_call_function_mask(cpumask_t mask,
9405 - void (*func)(void *), void *info,
9408 - struct call_data_struct data;
9409 - cpumask_t allbutself;
9412 - /* Can deadlock when called with interrupts disabled */
9413 - WARN_ON(irqs_disabled());
9415 - /* Holding any lock stops cpus from going down. */
9416 - spin_lock(&call_lock);
9418 - allbutself = cpu_online_map;
9419 - cpu_clear(smp_processor_id(), allbutself);
9421 - cpus_and(mask, mask, allbutself);
9422 - cpus = cpus_weight(mask);
9425 - spin_unlock(&call_lock);
9431 - atomic_set(&data.started, 0);
9434 - atomic_set(&data.finished, 0);
9436 - call_data = &data;
9439 - /* Send a message to other CPUs */
9440 - if (cpus_equal(mask, allbutself))
9441 - send_IPI_allbutself(CALL_FUNCTION_VECTOR);
9443 - send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
9445 - /* Wait for response */
9446 - while (atomic_read(&data.started) != cpus)
9450 - while (atomic_read(&data.finished) != cpus)
9452 - spin_unlock(&call_lock);
9457 -static void stop_this_cpu (void * dummy)
9459 - local_irq_disable();
9461 - * Remove this CPU:
9463 - cpu_clear(smp_processor_id(), cpu_online_map);
9464 - disable_all_local_evtchn();
9465 - if (cpu_data(smp_processor_id()).hlt_works_ok)
9471 - * this function calls the 'stop' function on all other CPUs in the system.
9474 -void xen_smp_send_stop(void)
9476 - /* Don't deadlock on the call lock in panic */
9477 - int nolock = !spin_trylock(&call_lock);
9478 - unsigned long flags;
9480 - local_irq_save(flags);
9481 - __smp_call_function(stop_this_cpu, NULL, 0, 0);
9483 - spin_unlock(&call_lock);
9484 - disable_all_local_evtchn();
9485 - local_irq_restore(flags);
9489 - * Reschedule call back. Nothing to do,
9490 - * all the work is done automatically when
9491 - * we return from the interrupt.
9493 -irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
9495 - __get_cpu_var(irq_stat).irq_resched_count++;
9497 - return IRQ_HANDLED;
9500 -#include <linux/kallsyms.h>
9501 -irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
9503 - void (*func) (void *info) = call_data->func;
9504 - void *info = call_data->info;
9505 - int wait = call_data->wait;
9508 - * Notify initiating CPU that I've grabbed the data and am
9509 - * about to execute the function
9512 - atomic_inc(&call_data->started);
9514 - * At this point the info structure may be out of scope unless wait==1
9518 - __get_cpu_var(irq_stat).irq_call_count++;
9523 - atomic_inc(&call_data->finished);
9526 - return IRQ_HANDLED;
9528 --- sle11-2009-10-16.orig/arch/x86/kernel/smp_64-xen.c 2009-03-16 16:33:40.000000000 +0100
9529 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
9532 - * Intel SMP support routines.
9534 - * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
9535 - * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
9536 - * (c) 2002,2003 Andi Kleen, SuSE Labs.
9538 - * This code is released under the GNU General Public License version 2 or
9542 -#include <linux/init.h>
9544 -#include <linux/mm.h>
9545 -#include <linux/delay.h>
9546 -#include <linux/spinlock.h>
9547 -#include <linux/smp.h>
9548 -#include <linux/kernel_stat.h>
9549 -#include <linux/mc146818rtc.h>
9550 -#include <linux/interrupt.h>
9552 -#include <asm/mtrr.h>
9553 -#include <asm/pgalloc.h>
9554 -#include <asm/tlbflush.h>
9555 -#include <asm/mach_apic.h>
9556 -#include <asm/mmu_context.h>
9557 -#include <asm/proto.h>
9558 -#include <asm/apicdef.h>
9559 -#include <asm/idle.h>
9561 -#include <xen/evtchn.h>
9566 - * Smarter SMP flushing macros.
9567 - * c/o Linus Torvalds.
9569 - * These mean you can really definitely utterly forget about
9570 - * writing to user space from interrupts. (Its not allowed anyway).
9572 - * Optimizations Manfred Spraul <manfred@colorfullife.com>
9574 - * More scalable flush, from Andi Kleen
9576 - * To avoid global state use 8 different call vectors.
9577 - * Each CPU uses a specific vector to trigger flushes on other
9578 - * CPUs. Depending on the received vector the target CPUs look into
9579 - * the right per cpu variable for the flush data.
9581 - * With more than 8 CPUs they are hashed to the 8 available
9582 - * vectors. The limited global vector space forces us to this right now.
9583 - * In future when interrupts are split into per CPU domains this could be
9584 - * fixed, at the cost of triggering multiple IPIs in some cases.
9587 -union smp_flush_state {
9589 - cpumask_t flush_cpumask;
9590 - struct mm_struct *flush_mm;
9591 - unsigned long flush_va;
9592 - spinlock_t tlbstate_lock;
9594 - char pad[SMP_CACHE_BYTES];
9595 -} ____cacheline_aligned;
9597 -/* State is put into the per CPU data section, but padded
9598 - to a full cache line because other CPUs can access it and we don't
9599 - want false sharing in the per cpu data segment. */
9600 -static DEFINE_PER_CPU(union smp_flush_state, flush_state);
9603 - * We cannot call mmdrop() because we are in interrupt context,
9604 - * instead update mm->cpu_vm_mask.
9606 -void leave_mm(int cpu)
9608 - if (read_pda(mmu_state) == TLBSTATE_OK)
9610 - cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
9611 - load_cr3(swapper_pg_dir);
9613 -EXPORT_SYMBOL_GPL(leave_mm);
9617 - * The flush IPI assumes that a thread switch happens in this order:
9618 - * [cpu0: the cpu that switches]
9619 - * 1) switch_mm() either 1a) or 1b)
9620 - * 1a) thread switch to a different mm
9621 - * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
9622 - * Stop ipi delivery for the old mm. This is not synchronized with
9623 - * the other cpus, but smp_invalidate_interrupt ignore flush ipis
9624 - * for the wrong mm, and in the worst case we perform a superfluous
9626 - * 1a2) set cpu mmu_state to TLBSTATE_OK
9627 - * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
9628 - * was in lazy tlb mode.
9629 - * 1a3) update cpu active_mm
9630 - * Now cpu0 accepts tlb flushes for the new mm.
9631 - * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
9632 - * Now the other cpus will send tlb flush ipis.
9633 - * 1a4) change cr3.
9634 - * 1b) thread switch without mm change
9635 - * cpu active_mm is correct, cpu0 already handles
9637 - * 1b1) set cpu mmu_state to TLBSTATE_OK
9638 - * 1b2) test_and_set the cpu bit in cpu_vm_mask.
9639 - * Atomically set the bit [other cpus will start sending flush ipis],
9640 - * and test the bit.
9641 - * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
9642 - * 2) switch %%esp, ie current
9644 - * The interrupt must handle 2 special cases:
9645 - * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
9646 - * - the cpu performs speculative tlb reads, i.e. even if the cpu only
9647 - * runs in kernel space, the cpu could load tlb entries for user space
9650 - * The good news is that cpu mmu_state is local to each cpu, no
9651 - * write/read ordering problems.
9657 - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
9658 - * 2) Leave the mm if we are in the lazy tlb mode.
9660 - * Interrupts are disabled.
9663 -asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
9667 - union smp_flush_state *f;
9669 - cpu = smp_processor_id();
9671 - * orig_rax contains the negated interrupt vector.
9672 - * Use that to determine where the sender put the data.
9674 - sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
9675 - f = &per_cpu(flush_state, sender);
9677 - if (!cpu_isset(cpu, f->flush_cpumask))
9680 - * This was a BUG() but until someone can quote me the
9681 - * line from the intel manual that guarantees an IPI to
9682 - * multiple CPUs is retried _only_ on the erroring CPUs
9683 - * its staying as a return
9688 - if (f->flush_mm == read_pda(active_mm)) {
9689 - if (read_pda(mmu_state) == TLBSTATE_OK) {
9690 - if (f->flush_va == TLB_FLUSH_ALL)
9691 - local_flush_tlb();
9693 - __flush_tlb_one(f->flush_va);
9699 - cpu_clear(cpu, f->flush_cpumask);
9700 - add_pda(irq_tlb_count, 1);
9703 -void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
9707 - union smp_flush_state *f;
9708 - cpumask_t cpumask = *cpumaskp;
9710 - /* Caller has disabled preemption */
9711 - sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
9712 - f = &per_cpu(flush_state, sender);
9715 - * Could avoid this lock when
9716 - * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
9717 - * probably not worth checking this for a cache-hot lock.
9719 - spin_lock(&f->tlbstate_lock);
9723 - cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
9726 - * We have to send the IPI only to
9729 - send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
9731 - while (!cpus_empty(f->flush_cpumask))
9734 - f->flush_mm = NULL;
9736 - spin_unlock(&f->tlbstate_lock);
9739 -int __cpuinit init_smp_flush(void)
9743 - for_each_cpu_mask(i, cpu_possible_map) {
9744 - spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
9748 -core_initcall(init_smp_flush);
9750 -void flush_tlb_current_task(void)
9752 - struct mm_struct *mm = current->mm;
9753 - cpumask_t cpu_mask;
9755 - preempt_disable();
9756 - cpu_mask = mm->cpu_vm_mask;
9757 - cpu_clear(smp_processor_id(), cpu_mask);
9759 - local_flush_tlb();
9760 - if (!cpus_empty(cpu_mask))
9761 - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
9765 -void flush_tlb_mm (struct mm_struct * mm)
9767 - cpumask_t cpu_mask;
9769 - preempt_disable();
9770 - cpu_mask = mm->cpu_vm_mask;
9771 - cpu_clear(smp_processor_id(), cpu_mask);
9773 - if (current->active_mm == mm) {
9775 - local_flush_tlb();
9777 - leave_mm(smp_processor_id());
9779 - if (!cpus_empty(cpu_mask))
9780 - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
9785 -void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
9787 - struct mm_struct *mm = vma->vm_mm;
9788 - cpumask_t cpu_mask;
9790 - preempt_disable();
9791 - cpu_mask = mm->cpu_vm_mask;
9792 - cpu_clear(smp_processor_id(), cpu_mask);
9794 - if (current->active_mm == mm) {
9796 - __flush_tlb_one(va);
9798 - leave_mm(smp_processor_id());
9801 - if (!cpus_empty(cpu_mask))
9802 - flush_tlb_others(cpu_mask, mm, va);
9807 -static void do_flush_tlb_all(void* info)
9809 - unsigned long cpu = smp_processor_id();
9811 - __flush_tlb_all();
9812 - if (read_pda(mmu_state) == TLBSTATE_LAZY)
9816 -void flush_tlb_all(void)
9818 - on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
9823 - * this function sends a 'reschedule' IPI to another CPU.
9824 - * it goes straight through and wastes no time serializing
9825 - * anything. Worst case is that we lose a reschedule ...
9828 -void smp_send_reschedule(int cpu)
9830 - send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
9834 - * Structure and data for smp_call_function(). This is designed to minimise
9835 - * static memory requirements. It also looks cleaner.
9837 -static DEFINE_SPINLOCK(call_lock);
9839 -struct call_data_struct {
9840 - void (*func) (void *info);
9843 - atomic_t finished;
9847 -static struct call_data_struct * call_data;
9849 -void lock_ipi_call_lock(void)
9851 - spin_lock_irq(&call_lock);
9854 -void unlock_ipi_call_lock(void)
9856 - spin_unlock_irq(&call_lock);
9860 - * this function sends a 'generic call function' IPI to all other CPU
9861 - * of the system defined in the mask.
9863 -static int __smp_call_function_mask(cpumask_t mask,
9864 - void (*func)(void *), void *info,
9867 - struct call_data_struct data;
9868 - cpumask_t allbutself;
9871 - allbutself = cpu_online_map;
9872 - cpu_clear(smp_processor_id(), allbutself);
9874 - cpus_and(mask, mask, allbutself);
9875 - cpus = cpus_weight(mask);
9882 - atomic_set(&data.started, 0);
9885 - atomic_set(&data.finished, 0);
9887 - call_data = &data;
9890 - /* Send a message to other CPUs */
9891 - if (cpus_equal(mask, allbutself))
9892 - send_IPI_allbutself(CALL_FUNCTION_VECTOR);
9894 - send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
9896 - /* Wait for response */
9897 - while (atomic_read(&data.started) != cpus)
9903 - while (atomic_read(&data.finished) != cpus)
9909 - * smp_call_function_mask(): Run a function on a set of other CPUs.
9910 - * @mask: The set of cpus to run on. Must not include the current cpu.
9911 - * @func: The function to run. This must be fast and non-blocking.
9912 - * @info: An arbitrary pointer to pass to the function.
9913 - * @wait: If true, wait (atomically) until function has completed on other CPUs.
9915 - * Returns 0 on success, else a negative status code.
9917 - * If @wait is true, then returns once @func has returned; otherwise
9918 - * it returns just before the target cpu calls @func.
9920 - * You must not call this function with disabled interrupts or from a
9921 - * hardware interrupt handler or from a bottom half handler.
9923 -int smp_call_function_mask(cpumask_t mask,
9924 - void (*func)(void *), void *info,
9929 - /* Can deadlock when called with interrupts disabled */
9930 - WARN_ON(irqs_disabled());
9932 - spin_lock(&call_lock);
9933 - ret = __smp_call_function_mask(mask, func, info, wait);
9934 - spin_unlock(&call_lock);
9937 -EXPORT_SYMBOL(smp_call_function_mask);
9940 - * smp_call_function_single - Run a function on a specific CPU
9941 - * @func: The function to run. This must be fast and non-blocking.
9942 - * @info: An arbitrary pointer to pass to the function.
9943 - * @nonatomic: Currently unused.
9944 - * @wait: If true, wait until function has completed on other CPUs.
9946 - * Retrurns 0 on success, else a negative status code.
9948 - * Does not return until the remote CPU is nearly ready to execute <func>
9949 - * or is or has executed.
9952 -int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
9953 - int nonatomic, int wait)
9955 - /* prevent preemption and reschedule on another processor */
9956 - int ret, me = get_cpu();
9958 - /* Can deadlock when called with interrupts disabled */
9959 - WARN_ON(irqs_disabled());
9962 - local_irq_disable();
9964 - local_irq_enable();
9969 - ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
9974 -EXPORT_SYMBOL(smp_call_function_single);
9977 - * smp_call_function - run a function on all other CPUs.
9978 - * @func: The function to run. This must be fast and non-blocking.
9979 - * @info: An arbitrary pointer to pass to the function.
9980 - * @nonatomic: currently unused.
9981 - * @wait: If true, wait (atomically) until function has completed on other
9984 - * Returns 0 on success, else a negative status code. Does not return until
9985 - * remote CPUs are nearly ready to execute func or are or have executed.
9987 - * You must not call this function with disabled interrupts or from a
9988 - * hardware interrupt handler or from a bottom half handler.
9989 - * Actually there are a few legal cases, like panic.
9991 -int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
9994 - return smp_call_function_mask(cpu_online_map, func, info, wait);
9996 -EXPORT_SYMBOL(smp_call_function);
9998 -static void stop_this_cpu(void *dummy)
10000 - local_irq_disable();
10002 - * Remove this CPU:
10004 - cpu_clear(smp_processor_id(), cpu_online_map);
10005 - disable_all_local_evtchn();
10010 -void smp_send_stop(void)
10013 - unsigned long flags;
10015 -#ifndef CONFIG_XEN
10016 - if (reboot_force)
10020 - /* Don't deadlock on the call lock in panic */
10021 - nolock = !spin_trylock(&call_lock);
10022 - local_irq_save(flags);
10023 - __smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0);
10025 - spin_unlock(&call_lock);
10026 - disable_all_local_evtchn();
10027 - local_irq_restore(flags);
10031 - * Reschedule call back. Nothing to do,
10032 - * all the work is done automatically when
10033 - * we return from the interrupt.
10035 -#ifndef CONFIG_XEN
10036 -asmlinkage void smp_reschedule_interrupt(void)
10038 -asmlinkage irqreturn_t smp_reschedule_interrupt(int irq, void *ctx)
10041 -#ifndef CONFIG_XEN
10044 - add_pda(irq_resched_count, 1);
10046 - return IRQ_HANDLED;
10050 -#ifndef CONFIG_XEN
10051 -asmlinkage void smp_call_function_interrupt(void)
10053 -asmlinkage irqreturn_t smp_call_function_interrupt(int irq, void *ctx)
10056 - void (*func) (void *info) = call_data->func;
10057 - void *info = call_data->info;
10058 - int wait = call_data->wait;
10060 -#ifndef CONFIG_XEN
10064 - * Notify initiating CPU that I've grabbed the data and am
10065 - * about to execute the function
10068 - atomic_inc(&call_data->started);
10070 - * At this point the info structure may be out of scope unless wait==1
10075 - add_pda(irq_call_count, 1);
10079 - atomic_inc(&call_data->finished);
10082 - return IRQ_HANDLED;
10085 --- sle11-2009-10-16.orig/arch/x86/kernel/time_32-xen.c 2009-10-28 14:58:05.000000000 +0100
10086 +++ sle11-2009-10-16/arch/x86/kernel/time_32-xen.c 2009-10-28 14:58:12.000000000 +0100
10087 @@ -700,8 +700,6 @@ int xen_update_persistent_clock(void)
10091 -extern void (*late_time_init)(void);
10093 /* Dynamically-mapped IRQ. */
10094 DEFINE_PER_CPU(int, timer_irq);
10096 --- sle11-2009-10-16.orig/arch/x86/kernel/traps_32-xen.c 2009-03-16 16:33:40.000000000 +0100
10097 +++ sle11-2009-10-16/arch/x86/kernel/traps_32-xen.c 2009-03-16 16:38:05.000000000 +0100
10099 * 'Traps.c' handles hardware traps and faults after we have saved some
10100 * state in 'asm.s'.
10102 -#include <linux/sched.h>
10103 +#include <linux/interrupt.h>
10104 +#include <linux/kallsyms.h>
10105 +#include <linux/spinlock.h>
10106 +#include <linux/highmem.h>
10107 +#include <linux/kprobes.h>
10108 +#include <linux/uaccess.h>
10109 +#include <linux/utsname.h>
10110 +#include <linux/kdebug.h>
10111 #include <linux/kernel.h>
10112 +#include <linux/module.h>
10113 +#include <linux/ptrace.h>
10114 #include <linux/string.h>
10115 +#include <linux/unwind.h>
10116 +#include <linux/delay.h>
10117 #include <linux/errno.h>
10118 +#include <linux/kexec.h>
10119 +#include <linux/sched.h>
10120 #include <linux/timer.h>
10121 -#include <linux/mm.h>
10122 #include <linux/init.h>
10123 -#include <linux/delay.h>
10124 -#include <linux/spinlock.h>
10125 -#include <linux/interrupt.h>
10126 -#include <linux/highmem.h>
10127 -#include <linux/kallsyms.h>
10128 -#include <linux/ptrace.h>
10129 -#include <linux/utsname.h>
10130 -#include <linux/kprobes.h>
10131 -#include <linux/kexec.h>
10132 -#include <linux/unwind.h>
10133 -#include <linux/uaccess.h>
10134 -#include <linux/nmi.h>
10135 #include <linux/bug.h>
10136 +#include <linux/nmi.h>
10137 +#include <linux/mm.h>
10140 #include <linux/ioport.h>
10141 @@ -43,21 +45,18 @@
10142 #include <linux/edac.h>
10145 +#include <asm/arch_hooks.h>
10146 +#include <asm/stacktrace.h>
10147 #include <asm/processor.h>
10148 -#include <asm/system.h>
10149 -#include <asm/io.h>
10150 -#include <asm/atomic.h>
10151 #include <asm/debugreg.h>
10152 +#include <asm/atomic.h>
10153 +#include <asm/system.h>
10154 +#include <asm/unwind.h>
10155 #include <asm/desc.h>
10156 #include <asm/i387.h>
10157 #include <asm/nmi.h>
10158 -#include <asm/unwind.h>
10159 #include <asm/smp.h>
10160 -#include <asm/arch_hooks.h>
10161 -#include <linux/kdebug.h>
10162 -#include <asm/stacktrace.h>
10164 -#include <linux/module.h>
10165 +#include <asm/io.h>
10167 #include "mach_traps.h"
10169 @@ -71,7 +70,7 @@ EXPORT_SYMBOL_GPL(used_vectors);
10170 asmlinkage int system_call(void);
10172 /* Do we ignore FPU interrupts ? */
10173 -char ignore_fpu_irq = 0;
10174 +char ignore_fpu_irq;
10176 #ifndef CONFIG_X86_NO_IDT
10178 @@ -113,12 +112,13 @@ static unsigned int code_bytes = 64;
10179 void printk_address(unsigned long address, int reliable)
10181 #ifdef CONFIG_KALLSYMS
10182 - unsigned long offset = 0, symsize;
10183 + char namebuf[KSYM_NAME_LEN];
10184 + unsigned long offset = 0;
10185 + unsigned long symsize;
10186 const char *symname;
10188 - char *delim = ":";
10189 - char namebuf[128];
10190 char reliab[4] = "";
10191 + char *delim = ":";
10194 symname = kallsyms_lookup(address, &symsize, &offset,
10195 &modname, namebuf);
10196 @@ -146,13 +146,14 @@ static inline int valid_stack_ptr(struct
10198 /* The form of the top of the frame on the stack */
10199 struct stack_frame {
10200 - struct stack_frame *next_frame;
10201 - unsigned long return_address;
10202 + struct stack_frame *next_frame;
10203 + unsigned long return_address;
10206 -static inline unsigned long print_context_stack(struct thread_info *tinfo,
10207 - unsigned long *stack, unsigned long bp,
10208 - const struct stacktrace_ops *ops, void *data)
10209 +static inline unsigned long
10210 +print_context_stack(struct thread_info *tinfo,
10211 + unsigned long *stack, unsigned long bp,
10212 + const struct stacktrace_ops *ops, void *data)
10214 struct stack_frame *frame = (struct stack_frame *)bp;
10216 @@ -174,7 +175,7 @@ static inline unsigned long print_contex
10220 -#define MSG(msg) ops->warning(data, msg)
10221 +#define MSG(msg) ops->warning(data, msg)
10223 void dump_trace(struct task_struct *task, struct pt_regs *regs,
10224 unsigned long *stack, unsigned long bp,
10225 @@ -185,6 +186,7 @@ void dump_trace(struct task_struct *task
10228 unsigned long dummy;
10231 if (task != current)
10232 stack = (unsigned long *)task->thread.sp;
10233 @@ -194,7 +196,7 @@ void dump_trace(struct task_struct *task
10235 if (task == current) {
10236 /* Grab bp right from our regs */
10237 - asm ("movl %%ebp, %0" : "=r" (bp) : );
10238 + asm("movl %%ebp, %0" : "=r" (bp) :);
10240 /* bp is the last reg pushed by switch_to */
10241 bp = *(unsigned long *) task->thread.sp;
10242 @@ -204,15 +206,18 @@ void dump_trace(struct task_struct *task
10245 struct thread_info *context;
10247 context = (struct thread_info *)
10248 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
10249 bp = print_context_stack(context, stack, bp, ops, data);
10250 - /* Should be after the line below, but somewhere
10251 - in early boot context comes out corrupted and we
10252 - can't reference it -AK */
10254 + * Should be after the line below, but somewhere
10255 + * in early boot context comes out corrupted and we
10256 + * can't reference it:
10258 if (ops->stack(data, "IRQ") < 0)
10260 - stack = (unsigned long*)context->previous_esp;
10261 + stack = (unsigned long *)context->previous_esp;
10264 touch_nmi_watchdog();
10265 @@ -251,15 +256,15 @@ static void print_trace_address(void *da
10268 static const struct stacktrace_ops print_trace_ops = {
10269 - .warning = print_trace_warning,
10270 - .warning_symbol = print_trace_warning_symbol,
10271 - .stack = print_trace_stack,
10272 - .address = print_trace_address,
10273 + .warning = print_trace_warning,
10274 + .warning_symbol = print_trace_warning_symbol,
10275 + .stack = print_trace_stack,
10276 + .address = print_trace_address,
10280 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
10281 - unsigned long *stack, unsigned long bp, char *log_lvl)
10282 + unsigned long *stack, unsigned long bp, char *log_lvl)
10284 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
10285 printk("%s =======================\n", log_lvl);
10286 @@ -271,21 +276,22 @@ void show_trace(struct task_struct *task
10287 show_trace_log_lvl(task, regs, stack, bp, "");
10290 -static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
10291 - unsigned long *sp, unsigned long bp, char *log_lvl)
10293 +show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
10294 + unsigned long *sp, unsigned long bp, char *log_lvl)
10296 unsigned long *stack;
10301 - sp = (unsigned long*)task->thread.sp;
10302 + sp = (unsigned long *)task->thread.sp;
10304 sp = (unsigned long *)&sp;
10308 - for(i = 0; i < kstack_depth_to_print; i++) {
10309 + for (i = 0; i < kstack_depth_to_print; i++) {
10310 if (kstack_end(stack))
10312 if (i && ((i % 8) == 0))
10313 @@ -293,6 +299,7 @@ static void show_stack_log_lvl(struct ta
10314 printk("%08lx ", *stack++);
10316 printk("\n%sCall Trace:\n", log_lvl);
10318 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
10321 @@ -307,8 +314,8 @@ void show_stack(struct task_struct *task
10323 void dump_stack(void)
10325 - unsigned long stack;
10326 unsigned long bp = 0;
10327 + unsigned long stack;
10329 #ifdef CONFIG_FRAME_POINTER
10331 @@ -320,6 +327,7 @@ void dump_stack(void)
10332 init_utsname()->release,
10333 (int)strcspn(init_utsname()->version, " "),
10334 init_utsname()->version);
10336 show_trace(current, NULL, &stack, bp);
10339 @@ -331,6 +339,7 @@ void show_registers(struct pt_regs *regs
10342 __show_registers(regs, 0);
10344 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
10345 TASK_COMM_LEN, current->comm, task_pid_nr(current),
10346 current_thread_info(), current, task_thread_info(current));
10347 @@ -339,10 +348,10 @@ void show_registers(struct pt_regs *regs
10348 * time of the fault..
10350 if (!user_mode_vm(regs)) {
10352 unsigned int code_prologue = code_bytes * 43 / 64;
10353 unsigned int code_len = code_bytes;
10357 printk("\n" KERN_EMERG "Stack: ");
10358 show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG);
10359 @@ -369,7 +378,7 @@ void show_registers(struct pt_regs *regs
10366 int is_valid_bugaddr(unsigned long ip)
10368 @@ -385,10 +394,10 @@ int is_valid_bugaddr(unsigned long ip)
10370 static int die_counter;
10372 -int __kprobes __die(const char * str, struct pt_regs * regs, long err)
10373 +int __kprobes __die(const char *str, struct pt_regs *regs, long err)
10375 - unsigned long sp;
10377 + unsigned long sp;
10379 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
10380 #ifdef CONFIG_PREEMPT
10381 @@ -403,8 +412,8 @@ int __kprobes __die(const char * str, st
10384 if (notify_die(DIE_OOPS, str, regs, err,
10385 - current->thread.trap_no, SIGSEGV) !=
10387 + current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) {
10389 show_registers(regs);
10390 /* Executive summary in case the oops scrolled away */
10391 sp = (unsigned long) (®s->sp);
10392 @@ -416,17 +425,18 @@ int __kprobes __die(const char * str, st
10393 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
10394 print_symbol("%s", regs->ip);
10395 printk(" SS:ESP %04x:%08lx\n", ss, sp);
10406 - * This is gone through when something in the kernel has done something bad and
10407 - * is about to be terminated.
10408 + * This is gone through when something in the kernel has done something bad
10409 + * and is about to be terminated:
10411 -void die(const char * str, struct pt_regs * regs, long err)
10412 +void die(const char *str, struct pt_regs *regs, long err)
10415 raw_spinlock_t lock;
10416 @@ -448,8 +458,9 @@ void die(const char * str, struct pt_reg
10417 die.lock_owner = smp_processor_id();
10418 die.lock_owner_depth = 0;
10422 raw_local_irq_save(flags);
10425 if (++die.lock_owner_depth < 3) {
10426 report_bug(regs->ip, regs);
10427 @@ -482,19 +493,20 @@ void die(const char * str, struct pt_reg
10431 -static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
10432 +static inline void
10433 +die_if_kernel(const char *str, struct pt_regs *regs, long err)
10435 if (!user_mode_vm(regs))
10436 die(str, regs, err);
10439 -static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
10440 - struct pt_regs * regs, long error_code,
10442 +static void __kprobes
10443 +do_trap(int trapnr, int signr, char *str, int vm86, struct pt_regs *regs,
10444 + long error_code, siginfo_t *info)
10446 struct task_struct *tsk = current;
10448 - if (regs->flags & VM_MASK) {
10449 + if (regs->flags & X86_VM_MASK) {
10453 @@ -503,109 +515,112 @@ static void __kprobes do_trap(int trapnr
10454 if (!user_mode(regs))
10459 - * We want error_code and trap_no set for userspace faults and
10460 - * kernelspace faults which result in die(), but not
10461 - * kernelspace faults which are fixed up. die() gives the
10462 - * process no chance to handle the signal and notice the
10463 - * kernel fault information, so that won't result in polluting
10464 - * the information about previously queued, but not yet
10465 - * delivered, faults. See also do_general_protection below.
10467 - tsk->thread.error_code = error_code;
10468 - tsk->thread.trap_no = trapnr;
10471 + * We want error_code and trap_no set for userspace faults and
10472 + * kernelspace faults which result in die(), but not
10473 + * kernelspace faults which are fixed up. die() gives the
10474 + * process no chance to handle the signal and notice the
10475 + * kernel fault information, so that won't result in polluting
10476 + * the information about previously queued, but not yet
10477 + * delivered, faults. See also do_general_protection below.
10479 + tsk->thread.error_code = error_code;
10480 + tsk->thread.trap_no = trapnr;
10483 - force_sig_info(signr, info, tsk);
10485 - force_sig(signr, tsk);
10489 + force_sig_info(signr, info, tsk);
10491 + force_sig(signr, tsk);
10495 - if (!fixup_exception(regs)) {
10496 - tsk->thread.error_code = error_code;
10497 - tsk->thread.trap_no = trapnr;
10498 - die(str, regs, error_code);
10502 + if (!fixup_exception(regs)) {
10503 + tsk->thread.error_code = error_code;
10504 + tsk->thread.trap_no = trapnr;
10505 + die(str, regs, error_code);
10510 - int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr);
10511 - if (ret) goto trap_signal;
10515 + if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
10516 + error_code, trapnr))
10517 + goto trap_signal;
10521 -#define DO_ERROR(trapnr, signr, str, name) \
10522 -void do_##name(struct pt_regs * regs, long error_code) \
10524 - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
10525 - == NOTIFY_STOP) \
10527 - do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
10530 -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \
10531 -void do_##name(struct pt_regs * regs, long error_code) \
10533 - siginfo_t info; \
10535 - local_irq_enable(); \
10536 - info.si_signo = signr; \
10537 - info.si_errno = 0; \
10538 - info.si_code = sicode; \
10539 - info.si_addr = (void __user *)siaddr; \
10540 - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
10541 - == NOTIFY_STOP) \
10543 - do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
10546 -#define DO_VM86_ERROR(trapnr, signr, str, name) \
10547 -void do_##name(struct pt_regs * regs, long error_code) \
10549 - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
10550 - == NOTIFY_STOP) \
10552 - do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
10555 -#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
10556 -void do_##name(struct pt_regs * regs, long error_code) \
10558 - siginfo_t info; \
10559 - info.si_signo = signr; \
10560 - info.si_errno = 0; \
10561 - info.si_code = sicode; \
10562 - info.si_addr = (void __user *)siaddr; \
10563 - trace_hardirqs_fixup(); \
10564 - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
10565 - == NOTIFY_STOP) \
10567 - do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
10568 +#define DO_ERROR(trapnr, signr, str, name) \
10569 +void do_##name(struct pt_regs *regs, long error_code) \
10571 + trace_hardirqs_fixup(); \
10572 + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
10573 + == NOTIFY_STOP) \
10575 + do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
10578 +#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \
10579 +void do_##name(struct pt_regs *regs, long error_code) \
10581 + siginfo_t info; \
10583 + local_irq_enable(); \
10584 + info.si_signo = signr; \
10585 + info.si_errno = 0; \
10586 + info.si_code = sicode; \
10587 + info.si_addr = (void __user *)siaddr; \
10588 + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
10589 + == NOTIFY_STOP) \
10591 + do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
10594 +#define DO_VM86_ERROR(trapnr, signr, str, name) \
10595 +void do_##name(struct pt_regs *regs, long error_code) \
10597 + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
10598 + == NOTIFY_STOP) \
10600 + do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
10603 +#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
10604 +void do_##name(struct pt_regs *regs, long error_code) \
10606 + siginfo_t info; \
10607 + info.si_signo = signr; \
10608 + info.si_errno = 0; \
10609 + info.si_code = sicode; \
10610 + info.si_addr = (void __user *)siaddr; \
10611 + trace_hardirqs_fixup(); \
10612 + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
10613 + == NOTIFY_STOP) \
10615 + do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
10618 -DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
10619 +DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
10620 #ifndef CONFIG_KPROBES
10621 -DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
10622 +DO_VM86_ERROR(3, SIGTRAP, "int3", int3)
10624 -DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
10625 -DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
10626 -DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
10627 -DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
10628 +DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow)
10629 +DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds)
10630 +DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
10631 +DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
10632 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
10633 DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
10634 DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
10635 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
10636 -DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
10637 +DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1)
10639 void __kprobes do_general_protection(struct pt_regs * regs,
10642 - if (regs->flags & VM_MASK)
10643 + struct thread_struct *thread;
10645 + thread = ¤t->thread;
10647 + if (regs->flags & X86_VM_MASK)
10650 if (!user_mode(regs))
10651 @@ -613,6 +628,7 @@ void __kprobes do_general_protection(str
10653 current->thread.error_code = error_code;
10654 current->thread.trap_no = 13;
10656 if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
10657 printk_ratelimit()) {
10659 @@ -642,22 +658,25 @@ gp_in_kernel:
10663 -static __kprobes void
10664 -mem_parity_error(unsigned char reason, struct pt_regs * regs)
10665 +static notrace __kprobes void
10666 +mem_parity_error(unsigned char reason, struct pt_regs *regs)
10668 - printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
10669 - "CPU %d.\n", reason, smp_processor_id());
10670 - printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
10671 + printk(KERN_EMERG
10672 + "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
10673 + reason, smp_processor_id());
10675 + printk(KERN_EMERG
10676 + "You have some hardware problem, likely on the PCI bus.\n");
10678 #if defined(CONFIG_EDAC)
10679 - if(edac_handler_set()) {
10680 + if (edac_handler_set()) {
10681 edac_atomic_assert_error();
10686 if (panic_on_unrecovered_nmi)
10687 - panic("NMI: Not continuing");
10688 + panic("NMI: Not continuing");
10690 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
10692 @@ -665,8 +684,8 @@ mem_parity_error(unsigned char reason, s
10693 clear_mem_error(reason);
10696 -static __kprobes void
10697 -io_check_error(unsigned char reason, struct pt_regs * regs)
10698 +static notrace __kprobes void
10699 +io_check_error(unsigned char reason, struct pt_regs *regs)
10701 printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
10702 show_registers(regs);
10703 @@ -675,38 +694,43 @@ io_check_error(unsigned char reason, str
10704 clear_io_check_error(reason);
10707 -static __kprobes void
10708 -unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
10709 +static notrace __kprobes void
10710 +unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
10712 + if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
10715 - /* Might actually be able to figure out what the guilty party
10719 + * Might actually be able to figure out what the guilty party
10727 - printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
10728 - "CPU %d.\n", reason, smp_processor_id());
10729 + printk(KERN_EMERG
10730 + "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
10731 + reason, smp_processor_id());
10733 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
10734 if (panic_on_unrecovered_nmi)
10735 - panic("NMI: Not continuing");
10736 + panic("NMI: Not continuing");
10738 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
10741 static DEFINE_SPINLOCK(nmi_print_lock);
10743 -void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
10744 +void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg)
10746 - if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
10748 + if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP)
10751 spin_lock(&nmi_print_lock);
10753 * We are in trouble anyway, lets at least try
10754 - * to get a message out.
10755 + * to get a message out:
10758 printk(KERN_EMERG "%s", msg);
10759 @@ -717,9 +741,10 @@ void __kprobes die_nmi(struct pt_regs *r
10760 spin_unlock(&nmi_print_lock);
10763 - /* If we are in kernel we are probably nested up pretty bad
10764 - * and might aswell get out now while we still can.
10767 + * If we are in kernel we are probably nested up pretty bad
10768 + * and might aswell get out now while we still can:
10770 if (!user_mode_vm(regs)) {
10771 current->thread.trap_no = 2;
10773 @@ -728,14 +753,14 @@ void __kprobes die_nmi(struct pt_regs *r
10777 -static __kprobes void default_do_nmi(struct pt_regs * regs)
10778 +static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
10780 unsigned char reason = 0;
10782 - /* Only the BSP gets external NMIs from the system. */
10783 + /* Only the BSP gets external NMIs from the system: */
10784 if (!smp_processor_id())
10785 reason = get_nmi_reason();
10788 if (!(reason & 0xc0)) {
10789 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
10791 @@ -748,8 +773,10 @@ static __kprobes void default_do_nmi(str
10792 if (nmi_watchdog_tick(regs, reason))
10794 if (!do_nmi_callback(regs, smp_processor_id()))
10796 unknown_nmi_error(reason, regs);
10798 + unknown_nmi_error(reason, regs);
10803 @@ -761,14 +788,14 @@ static __kprobes void default_do_nmi(str
10804 io_check_error(reason, regs);
10806 * Reassert NMI in case it became active meanwhile
10807 - * as it's edge-triggered.
10808 + * as it's edge-triggered:
10813 static int ignore_nmis;
10815 -__kprobes void do_nmi(struct pt_regs * regs, long error_code)
10816 +notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code)
10820 @@ -804,9 +831,12 @@ void __kprobes do_int3(struct pt_regs *r
10821 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
10824 - /* This is an interrupt gate, because kprobes wants interrupts
10825 - disabled. Normal trap handlers don't. */
10827 + * This is an interrupt gate, because kprobes wants interrupts
10828 + * disabled. Normal trap handlers don't.
10830 restore_interrupts(regs);
10832 do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
10835 @@ -821,7 +851,7 @@ void __kprobes do_int3(struct pt_regs *r
10836 * from user space. Such code must not hold kernel locks (since it
10837 * can equally take a page fault), therefore it is safe to call
10838 * force_sig_info even though that claims and releases locks.
10841 * Code in ./signal.c ensures that the debug control register
10842 * is restored before we deliver any signal, and therefore that
10843 * user code runs with the correct debug control register even though
10844 @@ -833,10 +863,10 @@ void __kprobes do_int3(struct pt_regs *r
10845 * find every occurrence of the TF bit that could be saved away even
10848 -void __kprobes do_debug(struct pt_regs * regs, long error_code)
10849 +void __kprobes do_debug(struct pt_regs *regs, long error_code)
10851 - unsigned int condition;
10852 struct task_struct *tsk = current;
10853 + unsigned int condition;
10855 trace_hardirqs_fixup();
10857 @@ -861,7 +891,7 @@ void __kprobes do_debug(struct pt_regs *
10861 - if (regs->flags & VM_MASK)
10862 + if (regs->flags & X86_VM_MASK)
10865 /* Save debug status register where ptrace can see it */
10866 @@ -884,7 +914,8 @@ void __kprobes do_debug(struct pt_regs *
10867 /* Ok, finally something we can handle */
10868 send_sigtrap(tsk, regs, error_code);
10870 - /* Disable additional traps. They'll be re-enabled when
10872 + * Disable additional traps. They'll be re-enabled when
10873 * the signal is delivered.
10876 @@ -897,7 +928,7 @@ debug_vm86:
10879 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
10880 - regs->flags &= ~TF_MASK;
10881 + regs->flags &= ~X86_EFLAGS_TF;
10885 @@ -908,9 +939,10 @@ clear_TF_reenable:
10887 void math_error(void __user *ip)
10889 - struct task_struct * task;
10890 + struct task_struct *task;
10891 + unsigned short cwd;
10892 + unsigned short swd;
10894 - unsigned short cwd, swd;
10897 * Save the info for the exception handler and clear the error.
10898 @@ -936,36 +968,36 @@ void math_error(void __user *ip)
10899 cwd = get_fpu_cwd(task);
10900 swd = get_fpu_swd(task);
10901 switch (swd & ~cwd & 0x3f) {
10902 - case 0x000: /* No unmasked exception */
10904 - default: /* Multiple exceptions */
10906 - case 0x001: /* Invalid Op */
10908 - * swd & 0x240 == 0x040: Stack Underflow
10909 - * swd & 0x240 == 0x240: Stack Overflow
10910 - * User must clear the SF bit (0x40) if set
10912 - info.si_code = FPE_FLTINV;
10914 - case 0x002: /* Denormalize */
10915 - case 0x010: /* Underflow */
10916 - info.si_code = FPE_FLTUND;
10918 - case 0x004: /* Zero Divide */
10919 - info.si_code = FPE_FLTDIV;
10921 - case 0x008: /* Overflow */
10922 - info.si_code = FPE_FLTOVF;
10924 - case 0x020: /* Precision */
10925 - info.si_code = FPE_FLTRES;
10927 + case 0x000: /* No unmasked exception */
10929 + default: /* Multiple exceptions */
10931 + case 0x001: /* Invalid Op */
10933 + * swd & 0x240 == 0x040: Stack Underflow
10934 + * swd & 0x240 == 0x240: Stack Overflow
10935 + * User must clear the SF bit (0x40) if set
10937 + info.si_code = FPE_FLTINV;
10939 + case 0x002: /* Denormalize */
10940 + case 0x010: /* Underflow */
10941 + info.si_code = FPE_FLTUND;
10943 + case 0x004: /* Zero Divide */
10944 + info.si_code = FPE_FLTDIV;
10946 + case 0x008: /* Overflow */
10947 + info.si_code = FPE_FLTOVF;
10949 + case 0x020: /* Precision */
10950 + info.si_code = FPE_FLTRES;
10953 force_sig_info(SIGFPE, &info, task);
10956 -void do_coprocessor_error(struct pt_regs * regs, long error_code)
10957 +void do_coprocessor_error(struct pt_regs *regs, long error_code)
10959 ignore_fpu_irq = 1;
10960 math_error((void __user *)regs->ip);
10961 @@ -973,9 +1005,9 @@ void do_coprocessor_error(struct pt_regs
10963 static void simd_math_error(void __user *ip)
10965 - struct task_struct * task;
10967 + struct task_struct *task;
10968 unsigned short mxcsr;
10972 * Save the info for the exception handler and clear the error.
10973 @@ -996,84 +1028,82 @@ static void simd_math_error(void __user
10975 mxcsr = get_fpu_mxcsr(task);
10976 switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
10980 - case 0x001: /* Invalid Op */
10981 - info.si_code = FPE_FLTINV;
10983 - case 0x002: /* Denormalize */
10984 - case 0x010: /* Underflow */
10985 - info.si_code = FPE_FLTUND;
10987 - case 0x004: /* Zero Divide */
10988 - info.si_code = FPE_FLTDIV;
10990 - case 0x008: /* Overflow */
10991 - info.si_code = FPE_FLTOVF;
10993 - case 0x020: /* Precision */
10994 - info.si_code = FPE_FLTRES;
10999 + case 0x001: /* Invalid Op */
11000 + info.si_code = FPE_FLTINV;
11002 + case 0x002: /* Denormalize */
11003 + case 0x010: /* Underflow */
11004 + info.si_code = FPE_FLTUND;
11006 + case 0x004: /* Zero Divide */
11007 + info.si_code = FPE_FLTDIV;
11009 + case 0x008: /* Overflow */
11010 + info.si_code = FPE_FLTOVF;
11012 + case 0x020: /* Precision */
11013 + info.si_code = FPE_FLTRES;
11016 force_sig_info(SIGFPE, &info, task);
11019 -void do_simd_coprocessor_error(struct pt_regs * regs,
11021 +void do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
11024 /* Handle SIMD FPU exceptions on PIII+ processors. */
11025 ignore_fpu_irq = 1;
11026 simd_math_error((void __user *)regs->ip);
11029 - * Handle strange cache flush from user space exception
11030 - * in all other cases. This is undocumented behaviour.
11032 - if (regs->flags & VM_MASK) {
11033 - handle_vm86_fault((struct kernel_vm86_regs *)regs,
11037 - current->thread.trap_no = 19;
11038 - current->thread.error_code = error_code;
11039 - die_if_kernel("cache flush denied", regs, error_code);
11040 - force_sig(SIGSEGV, current);
11044 + * Handle strange cache flush from user space exception
11045 + * in all other cases. This is undocumented behaviour.
11047 + if (regs->flags & X86_VM_MASK) {
11048 + handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code);
11051 + current->thread.trap_no = 19;
11052 + current->thread.error_code = error_code;
11053 + die_if_kernel("cache flush denied", regs, error_code);
11054 + force_sig(SIGSEGV, current);
11058 -void do_spurious_interrupt_bug(struct pt_regs * regs,
11060 +void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
11063 /* No need to warn about this any longer. */
11064 - printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
11065 + printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
11069 -unsigned long patch_espfix_desc(unsigned long uesp,
11070 - unsigned long kesp)
11071 +unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
11073 struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt;
11074 unsigned long base = (kesp - uesp) & -THREAD_SIZE;
11075 unsigned long new_kesp = kesp - base;
11076 unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
11077 __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
11079 /* Set up base for espfix segment */
11080 - desc &= 0x00f0ff0000000000ULL;
11081 - desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
11082 + desc &= 0x00f0ff0000000000ULL;
11083 + desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
11084 ((((__u64)base) << 32) & 0xff00000000000000ULL) |
11085 ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
11086 (lim_pages & 0xffff);
11087 *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
11094 - * 'math_state_restore()' saves the current math information in the
11095 + * 'math_state_restore()' saves the current math information in the
11096 * old math state array, and gets the new ones from the current task
11098 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
11099 @@ -1087,9 +1117,22 @@ asmlinkage void math_state_restore(void)
11100 struct thread_info *thread = current_thread_info();
11101 struct task_struct *tsk = thread->task;
11103 + if (!tsk_used_math(tsk)) {
11104 + local_irq_enable();
11106 + * does a slab alloc which can sleep
11108 + if (init_fpu(tsk)) {
11110 + * ran out of memory!
11112 + do_group_exit(SIGKILL);
11115 + local_irq_disable();
11118 /* NB. 'clts' is done for us by Xen during virtual trap. */
11119 - if (!tsk_used_math(tsk))
11122 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
11123 tsk->fpu_counter++;
11124 @@ -1100,15 +1143,15 @@ EXPORT_SYMBOL_GPL(math_state_restore);
11126 asmlinkage void math_emulate(long arg)
11128 - printk(KERN_EMERG "math-emulation not enabled and no coprocessor found.\n");
11129 - printk(KERN_EMERG "killing %s.\n",current->comm);
11130 - force_sig(SIGFPE,current);
11131 + printk(KERN_EMERG
11132 + "math-emulation not enabled and no coprocessor found.\n");
11133 + printk(KERN_EMERG "killing %s.\n", current->comm);
11134 + force_sig(SIGFPE, current);
11138 #endif /* CONFIG_MATH_EMULATION */
11142 * NB. All these are "trap gates" (i.e. events_mask isn't set) except
11143 * for those that specify <dpl>|4 in the second field.
11144 @@ -1146,25 +1189,21 @@ void __init trap_init(void)
11146 printk("HYPERVISOR_set_trap_table failed: error %d\n", ret);
11149 - * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
11150 - * Generate a build-time error if the alignment is wrong.
11152 - BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15);
11153 if (cpu_has_fxsr) {
11154 printk(KERN_INFO "Enabling fast FPU save and restore... ");
11155 set_in_cr4(X86_CR4_OSFXSR);
11159 - printk(KERN_INFO "Enabling unmasked SIMD FPU exception "
11162 + "Enabling unmasked SIMD FPU exception support... ");
11163 set_in_cr4(X86_CR4_OSXMMEXCPT);
11167 + init_thread_xstate();
11169 - * Should be a barrier for any external CPU state.
11170 + * Should be a barrier for any external CPU state:
11174 @@ -1183,6 +1222,7 @@ void __cpuinit smp_trap_init(trap_info_t
11175 static int __init kstack_setup(char *s)
11177 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
11181 __setup("kstack=", kstack_setup);
11182 --- sle11-2009-10-16.orig/arch/x86/kernel/traps_64-xen.c 2009-03-16 16:33:40.000000000 +0100
11183 +++ sle11-2009-10-16/arch/x86/kernel/traps_64-xen.c 2009-03-16 16:38:05.000000000 +0100
11185 #include <linux/kdebug.h>
11186 #include <linux/utsname.h>
11188 +#include <mach_traps.h>
11190 #if defined(CONFIG_EDAC)
11191 #include <linux/edac.h>
11193 @@ -601,10 +603,16 @@ void die(const char * str, struct pt_reg
11196 #if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_SYSCTL)
11197 -void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
11198 +notrace __kprobes void
11199 +die_nmi(char *str, struct pt_regs *regs, int do_panic)
11201 - unsigned long flags = oops_begin();
11202 + unsigned long flags;
11204 + if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) ==
11208 + flags = oops_begin();
11210 * We are in trouble anyway, lets at least try
11211 * to get a message out.
11212 @@ -769,7 +777,7 @@ asmlinkage void __kprobes do_general_pro
11213 die("general protection fault", regs, error_code);
11216 -static __kprobes void
11217 +static notrace __kprobes void
11218 mem_parity_error(unsigned char reason, struct pt_regs * regs)
11220 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
11221 @@ -792,7 +800,7 @@ mem_parity_error(unsigned char reason, s
11222 clear_mem_error(reason);
11225 -static __kprobes void
11226 +static notrace __kprobes void
11227 io_check_error(unsigned char reason, struct pt_regs * regs)
11229 printk("NMI: IOCK error (debug interrupt?)\n");
11230 @@ -802,9 +810,11 @@ io_check_error(unsigned char reason, str
11231 clear_io_check_error(reason);
11234 -static __kprobes void
11235 +static notrace __kprobes void
11236 unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
11238 + if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
11240 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
11242 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
11243 @@ -817,7 +827,7 @@ unknown_nmi_error(unsigned char reason,
11245 /* Runs on IST stack. This code must keep interrupts off all the time.
11246 Nested NMIs are prevented by the CPU. */
11247 -asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs)
11248 +asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
11250 unsigned char reason = 0;
11252 @@ -1117,11 +1127,25 @@ asmlinkage void __attribute__((weak)) mc
11253 asmlinkage void math_state_restore(void)
11255 struct task_struct *me = current;
11257 + if (!used_math()) {
11258 + local_irq_enable();
11260 + * does a slab alloc which can sleep
11262 + if (init_fpu(me)) {
11264 + * ran out of memory!
11266 + do_group_exit(SIGKILL);
11269 + local_irq_disable();
11272 /* clts(); */ /* 'clts' is done for us by Xen during virtual trap. */
11274 - if (!used_math())
11276 - restore_fpu_checking(&me->thread.i387.fxsave);
11277 + restore_fpu_checking(&me->thread.xstate->fxsave);
11278 task_thread_info(me)->status |= TS_USEDFPU;
11281 @@ -1168,6 +1192,10 @@ void __init trap_init(void)
11282 printk("HYPERVISOR_set_trap_table failed: error %d\n", ret);
11285 + * initialize the per thread extended state:
11287 + init_thread_xstate();
11289 * Should be a barrier for any external CPU state.
11292 --- sle11-2009-10-16.orig/arch/x86/kernel/vsyscall_64-xen.c 2009-03-16 16:33:40.000000000 +0100
11293 +++ sle11-2009-10-16/arch/x86/kernel/vsyscall_64-xen.c 2009-03-16 16:38:05.000000000 +0100
11294 @@ -216,7 +216,7 @@ vgetcpu(unsigned *cpu, unsigned *node, s
11298 -long __vsyscall(3) venosys_1(void)
11299 +static long __vsyscall(3) venosys_1(void)
11303 --- sle11-2009-10-16.orig/arch/x86/mm/fault-xen.c 2009-03-16 16:33:40.000000000 +0100
11304 +++ sle11-2009-10-16/arch/x86/mm/fault-xen.c 2009-03-16 16:38:05.000000000 +0100
11305 @@ -510,6 +510,11 @@ static int vmalloc_fault(unsigned long a
11306 unsigned long pgd_paddr;
11310 + /* Make sure we are in vmalloc area */
11311 + if (!(address >= VMALLOC_START && address < VMALLOC_END))
11315 * Synchronize this task's top level page-table
11316 * with the 'reference' page table.
11317 @@ -670,7 +675,7 @@ void __kprobes do_page_fault(struct pt_r
11318 #ifdef CONFIG_X86_32
11319 /* It's safe to allow irq's after cr2 has been saved and the vmalloc
11320 fault has been handled. */
11321 - if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
11322 + if (regs->flags & (X86_EFLAGS_IF | X86_VM_MASK))
11323 local_irq_enable();
11326 @@ -1017,9 +1022,5 @@ void vmalloc_sync_all(void)
11327 if (address == start)
11328 start = address + PGDIR_SIZE;
11330 - /* Check that there is no need to do the same for the modules area. */
11331 - BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
11332 - BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
11333 - (__START_KERNEL & PGDIR_MASK)));
11336 --- sle11-2009-10-16.orig/arch/x86/mm/highmem_32-xen.c 2009-03-16 16:33:40.000000000 +0100
11337 +++ sle11-2009-10-16/arch/x86/mm/highmem_32-xen.c 2009-03-16 16:38:05.000000000 +0100
11338 @@ -200,6 +200,8 @@ EXPORT_SYMBOL(kmap);
11339 EXPORT_SYMBOL(kunmap);
11340 EXPORT_SYMBOL(kmap_atomic);
11341 EXPORT_SYMBOL(kunmap_atomic);
11342 +#ifdef CONFIG_HIGHPTE
11343 EXPORT_SYMBOL(kmap_atomic_to_page);
11345 EXPORT_SYMBOL(clear_highpage);
11346 EXPORT_SYMBOL(copy_highpage);
11347 --- sle11-2009-10-16.orig/arch/x86/mm/init_32-xen.c 2009-03-16 16:33:40.000000000 +0100
11348 +++ sle11-2009-10-16/arch/x86/mm/init_32-xen.c 2009-03-16 16:38:05.000000000 +0100
11351 - * linux/arch/i386/mm/init.c
11353 * Copyright (C) 1995 Linus Torvalds
11356 #include <linux/init.h>
11357 #include <linux/highmem.h>
11358 #include <linux/pagemap.h>
11359 +#include <linux/pci.h>
11360 #include <linux/pfn.h>
11361 #include <linux/poison.h>
11362 #include <linux/bootmem.h>
11365 unsigned int __VMALLOC_RESERVE = 128 << 20;
11367 +unsigned long max_pfn_mapped;
11369 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
11370 unsigned long highstart_pfn, highend_pfn;
11372 @@ -73,7 +75,7 @@ static pmd_t * __init one_md_table_init(
11373 if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) {
11374 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
11376 - paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
11377 + paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
11378 make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
11379 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
11380 pud = pud_offset(pgd, 0);
11381 @@ -107,7 +109,7 @@ static pte_t * __init one_page_table_ini
11382 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
11385 - paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT);
11386 + paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
11387 make_lowmem_page_readonly(page_table,
11388 XENFEAT_writable_page_tables);
11389 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
11390 @@ -209,8 +211,13 @@ static void __init kernel_physical_mappi
11392 * Map with big pages if possible, otherwise
11393 * create normal page tables:
11395 + * Don't use a large page for the first 2/4MB of memory
11396 + * because there are often fixed size MTRRs in there
11397 + * and overlapping MTRRs into large pages can cause
11400 - if (cpu_has_pse) {
11401 + if (cpu_has_pse && !(pgd_idx == 0 && pmd_idx == 0)) {
11402 unsigned int addr2;
11403 pgprot_t prot = PAGE_KERNEL_LARGE;
11405 @@ -224,6 +231,7 @@ static void __init kernel_physical_mappi
11406 set_pmd(pmd, pfn_pmd(pfn, prot));
11408 pfn += PTRS_PER_PTE;
11409 + max_pfn_mapped = pfn;
11412 pte = one_page_table_init(pmd);
11413 @@ -241,6 +249,7 @@ static void __init kernel_physical_mappi
11415 set_pte(pte, pfn_pte(pfn, prot));
11417 + max_pfn_mapped = pfn;
11421 @@ -262,6 +271,25 @@ static inline int page_kills_ppro(unsign
11426 + * devmem_is_allowed() checks to see if /dev/mem access to a certain address
11427 + * is valid. The argument is a physical page number.
11430 + * On x86, access has to be given to the first megabyte of ram because that area
11431 + * contains bios code and data regions used by X and dosemu and similar apps.
11432 + * Access has to be given to non-kernel-ram areas as well, these contain the PCI
11433 + * mmio resources as well as potential bios/acpi data regions.
11435 +int devmem_is_allowed(unsigned long pagenr)
11437 + if (pagenr <= 256)
11439 + if (mfn_to_local_pfn(pagenr) >= max_pfn)
11444 #ifdef CONFIG_HIGHMEM
11446 pgprot_t kmap_prot;
11447 @@ -303,48 +331,18 @@ static void __init permanent_kmaps_init(
11448 pkmap_page_table = pte;
11451 -static void __meminit free_new_highpage(struct page *page, int pfn)
11453 - init_page_count(page);
11454 - if (pfn < xen_start_info->nr_pages)
11455 - __free_page(page);
11456 - totalhigh_pages++;
11459 void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
11461 if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
11462 ClearPageReserved(page);
11463 - free_new_highpage(page, pfn);
11464 + init_page_count(page);
11465 + if (pfn < xen_start_info->nr_pages)
11466 + __free_page(page);
11467 + totalhigh_pages++;
11469 SetPageReserved(page);
11472 -static int __meminit
11473 -add_one_highpage_hotplug(struct page *page, unsigned long pfn)
11475 - free_new_highpage(page, pfn);
11476 - totalram_pages++;
11477 -#ifdef CONFIG_FLATMEM
11478 - max_mapnr = max(pfn, max_mapnr);
11486 - * Not currently handling the NUMA case.
11487 - * Assuming single node and all memory that
11488 - * has been added dynamically that would be
11489 - * onlined here is in HIGHMEM.
11491 -void __meminit online_page(struct page *page)
11493 - ClearPageReserved(page);
11494 - add_one_highpage_hotplug(page, page_to_pfn(page));
11497 #ifndef CONFIG_NUMA
11498 static void __init set_highmem_pages_init(int bad_ppro)
11500 @@ -459,15 +457,13 @@ void zap_low_mappings(void)
11507 * Zap initial low-memory mappings.
11509 * Note that "pgd_clear()" doesn't do it for
11510 * us, because pgd_clear() is a no-op on i386.
11512 - for (i = 0; i < USER_PTRS_PER_PGD; i++) {
11513 + for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) {
11514 #if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
11515 set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
11517 @@ -572,9 +568,9 @@ void __init paging_init(void)
11520 * Test if the WP bit works in supervisor mode. It isn't supported on 386's
11521 - * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This
11522 - * used to involve black magic jumps to work around some nasty CPU bugs,
11523 - * but fortunately the switch to using exceptions got rid of all that.
11524 + * and also on some strange 486's. All 586+'s are OK. This used to involve
11525 + * black magic jumps to work around some nasty CPU bugs, but fortunately the
11526 + * switch to using exceptions got rid of all that.
11528 static void __init test_wp_bit(void)
11530 @@ -605,9 +601,7 @@ void __init mem_init(void)
11534 -#if defined(CONFIG_SWIOTLB)
11537 + pci_iommu_alloc();
11539 #ifdef CONFIG_FLATMEM
11541 @@ -710,16 +704,8 @@ void __init mem_init(void)
11547 - * Subtle. SMP is doing it's boot stuff late (because it has to
11548 - * fork idle threads) - but it also needs low mappings for the
11549 - * protected-mode entry to work. We zap these entries only after
11550 - * the WP-bit has been tested.
11552 -#ifndef CONFIG_SMP
11554 zap_low_mappings();
11557 SetPagePinned(virt_to_page(init_mm.pgd));
11559 @@ -769,25 +755,17 @@ void mark_rodata_ro(void)
11560 unsigned long start = PFN_ALIGN(_text);
11561 unsigned long size = PFN_ALIGN(_etext) - start;
11563 -#ifndef CONFIG_KPROBES
11564 -#ifdef CONFIG_HOTPLUG_CPU
11565 - /* It must still be possible to apply SMP alternatives. */
11566 - if (num_possible_cpus() <= 1)
11569 - set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
11570 - printk(KERN_INFO "Write protecting the kernel text: %luk\n",
11572 + set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
11573 + printk(KERN_INFO "Write protecting the kernel text: %luk\n",
11576 #ifdef CONFIG_CPA_DEBUG
11577 - printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
11578 - start, start+size);
11579 - set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
11580 + printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
11581 + start, start+size);
11582 + set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
11584 - printk(KERN_INFO "Testing CPA: write protecting again\n");
11585 - set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
11588 + printk(KERN_INFO "Testing CPA: write protecting again\n");
11589 + set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
11592 size = (unsigned long)__end_rodata - start;
11593 --- sle11-2009-10-16.orig/arch/x86/mm/init_64-xen.c 2009-03-16 16:33:40.000000000 +0100
11594 +++ sle11-2009-10-16/arch/x86/mm/init_64-xen.c 2009-03-16 16:38:05.000000000 +0100
11597 #include <xen/features.h>
11599 -const struct dma_mapping_ops *dma_ops;
11600 -EXPORT_SYMBOL(dma_ops);
11602 #if CONFIG_XEN_COMPAT <= 0x030002
11603 unsigned int __kernel_page_user;
11604 EXPORT_SYMBOL(__kernel_page_user);
11605 @@ -68,6 +65,28 @@ extern unsigned long start_pfn;
11606 extern pmd_t level2_fixmap_pgt[PTRS_PER_PMD];
11607 extern pte_t level1_fixmap_pgt[PTRS_PER_PTE];
11609 +#ifndef CONFIG_XEN
11610 +int direct_gbpages __meminitdata
11611 +#ifdef CONFIG_DIRECT_GBPAGES
11616 +static int __init parse_direct_gbpages_off(char *arg)
11618 + direct_gbpages = 0;
11621 +early_param("nogbpages", parse_direct_gbpages_off);
11623 +static int __init parse_direct_gbpages_on(char *arg)
11625 + direct_gbpages = 1;
11628 +early_param("gbpages", parse_direct_gbpages_on);
11632 * Use this until direct mapping is established, i.e. before __va() is
11633 * available in init_memory_mapping().
11634 @@ -135,9 +154,6 @@ void show_mem(void)
11636 printk(KERN_INFO "Mem-info:\n");
11638 - printk(KERN_INFO "Free swap: %6ldkB\n",
11639 - nr_swap_pages << (PAGE_SHIFT-10));
11641 for_each_online_pgdat(pgdat) {
11642 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
11644 @@ -328,7 +344,7 @@ void __init cleanup_highmap(void)
11645 pmd_t *last_pmd = pmd + PTRS_PER_PMD;
11647 for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) {
11648 - if (!pmd_present(*pmd))
11649 + if (pmd_none(*pmd))
11651 if (vaddr < (unsigned long) _text || vaddr > end)
11652 set_pmd(pmd, __pmd(0));
11653 @@ -337,8 +353,7 @@ void __init cleanup_highmap(void)
11656 /* NOTE: this is meant to be run only at boot */
11658 -__set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
11659 +void __init __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
11661 unsigned long address = __fix_to_virt(idx);
11663 @@ -463,7 +478,7 @@ __meminit void early_iounmap(void *addr,
11667 -static void __meminit
11668 +static unsigned long __meminit
11669 phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
11671 int i = pmd_index(address);
11672 @@ -503,21 +518,26 @@ phys_pmd_init(pmd_t *pmd_page, unsigned
11673 set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE));
11679 -static void __meminit
11680 +static unsigned long __meminit
11681 phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
11683 pmd_t *pmd = pmd_offset(pud, 0);
11684 + unsigned long last_map_addr;
11686 spin_lock(&init_mm.page_table_lock);
11687 - phys_pmd_init(pmd, address, end);
11688 + last_map_addr = phys_pmd_init(pmd, address, end);
11689 spin_unlock(&init_mm.page_table_lock);
11691 + return last_map_addr;
11694 -static void __meminit
11695 +static unsigned long __meminit
11696 phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
11698 + unsigned long last_map_addr = end;
11699 int i = pud_index(addr);
11701 for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) {
11702 @@ -529,7 +549,15 @@ phys_pud_init(pud_t *pud_page, unsigned
11705 if (__pud_val(*pud)) {
11706 - phys_pmd_update(pud, addr, end);
11707 + if (!pud_large(*pud))
11708 + last_map_addr = phys_pmd_update(pud, addr, end);
11712 + if (direct_gbpages) {
11713 + set_pte((pte_t *)pud,
11714 + pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
11715 + last_map_addr = (addr & PUD_MASK) + PUD_SIZE;
11719 @@ -537,12 +565,14 @@ phys_pud_init(pud_t *pud_page, unsigned
11721 spin_lock(&init_mm.page_table_lock);
11722 *pud = __pud(pmd_phys | _KERNPG_TABLE);
11723 - phys_pmd_init(pmd, addr, end);
11724 + last_map_addr = phys_pmd_init(pmd, addr, end);
11725 spin_unlock(&init_mm.page_table_lock);
11727 early_make_page_readonly(pmd, XENFEAT_writable_page_tables);
11731 + return last_map_addr >> PAGE_SHIFT;
11734 void __init xen_init_pt(void)
11735 @@ -754,16 +784,138 @@ static void __init xen_finish_init_mappi
11736 table_end = start_pfn;
11739 +static void __init init_gbpages(void)
11741 +#ifndef CONFIG_XEN
11742 + if (direct_gbpages && cpu_has_gbpages)
11743 + printk(KERN_INFO "Using GB pages for direct mapping\n");
11745 + direct_gbpages = 0;
11749 +#ifdef CONFIG_MEMTEST_BOOTPARAM
11751 +static void __init memtest(unsigned long start_phys, unsigned long size,
11752 + unsigned pattern)
11755 + unsigned long *start;
11756 + unsigned long start_bad;
11757 + unsigned long last_bad;
11758 + unsigned long val;
11759 + unsigned long start_phys_aligned;
11760 + unsigned long count;
11761 + unsigned long incr;
11763 + switch (pattern) {
11771 + val = 0x5555555555555555UL;
11774 + val = 0xaaaaaaaaaaaaaaaaUL;
11780 + incr = sizeof(unsigned long);
11781 + start_phys_aligned = ALIGN(start_phys, incr);
11782 + count = (size - (start_phys_aligned - start_phys))/incr;
11783 + start = __va(start_phys_aligned);
11787 + for (i = 0; i < count; i++)
11789 + for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
11790 + if (*start != val) {
11791 + if (start_phys_aligned == last_bad + incr) {
11792 + last_bad += incr;
11795 + printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved",
11796 + val, start_bad, last_bad + incr);
11797 + reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
11799 + start_bad = last_bad = start_phys_aligned;
11804 + printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved",
11805 + val, start_bad, last_bad + incr);
11806 + reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
11811 +static int memtest_pattern __initdata = CONFIG_MEMTEST_BOOTPARAM_VALUE;
11813 +static int __init parse_memtest(char *arg)
11816 + memtest_pattern = simple_strtoul(arg, NULL, 0);
11820 +early_param("memtest", parse_memtest);
11822 +static void __init early_memtest(unsigned long start, unsigned long end)
11824 + u64 t_start, t_size;
11825 + unsigned pattern;
11827 + if (!memtest_pattern)
11830 + printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
11831 + for (pattern = 0; pattern < memtest_pattern; pattern++) {
11834 + while (t_start < end) {
11835 + t_start = find_e820_area_size(t_start, &t_size, 1);
11838 + if (t_start >= end)
11840 + if (t_start + t_size > end)
11841 + t_size = end - t_start;
11843 + printk(KERN_CONT "\n %016llx - %016llx pattern %d",
11844 + (unsigned long long)t_start,
11845 + (unsigned long long)t_start + t_size, pattern);
11847 + memtest(t_start, t_size, pattern);
11849 + t_start += t_size;
11852 + printk(KERN_CONT "\n");
11855 +static void __init early_memtest(unsigned long start, unsigned long end)
11861 * Setup the direct mapping of the physical memory at PAGE_OFFSET.
11862 * This runs before bootmem is initialized and gets pages directly from
11863 * the physical memory. To access them they are temporarily mapped.
11865 -void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
11866 +unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long end)
11868 - unsigned long next;
11869 + unsigned long next, last_map_addr = end;
11870 + unsigned long start_phys = start, end_phys = end;
11872 - pr_debug("init_memory_mapping\n");
11873 + printk(KERN_INFO "init_memory_mapping\n");
11876 * Find space for the kernel direct mapping tables.
11877 @@ -772,8 +924,10 @@ void __init_refok init_memory_mapping(un
11878 * memory mapped. Unfortunately this is done currently before the
11879 * nodes are discovered.
11881 - if (!after_bootmem)
11882 + if (!after_bootmem) {
11884 find_early_table_space(end);
11887 start = (unsigned long)__va(start);
11888 end = (unsigned long)__va(end);
11889 @@ -790,7 +944,7 @@ void __init_refok init_memory_mapping(un
11890 next = start + PGDIR_SIZE;
11893 - phys_pud_init(pud, __pa(start), __pa(next));
11894 + last_map_addr = phys_pud_init(pud, __pa(start), __pa(next));
11895 if (!after_bootmem) {
11896 early_make_page_readonly(pud, XENFEAT_writable_page_tables);
11897 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
11898 @@ -807,6 +961,11 @@ void __init_refok init_memory_mapping(un
11899 if (!after_bootmem)
11900 reserve_early(table_start << PAGE_SHIFT,
11901 table_end << PAGE_SHIFT, "PGTABLE");
11903 + if (!after_bootmem)
11904 + early_memtest(start_phys, end_phys);
11906 + return last_map_addr;
11909 #ifndef CONFIG_NUMA
11910 @@ -830,15 +989,6 @@ void __init paging_init(void)
11912 * Memory hotplug specific functions
11914 -void online_page(struct page *page)
11916 - ClearPageReserved(page);
11917 - init_page_count(page);
11918 - __free_page(page);
11919 - totalram_pages++;
11923 #ifdef CONFIG_MEMORY_HOTPLUG
11925 * Memory is added always to NORMAL zone. This means you will never get
11926 @@ -848,11 +998,13 @@ int arch_add_memory(int nid, u64 start,
11928 struct pglist_data *pgdat = NODE_DATA(nid);
11929 struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
11930 - unsigned long start_pfn = start >> PAGE_SHIFT;
11931 + unsigned long last_mapped_pfn, start_pfn = start >> PAGE_SHIFT;
11932 unsigned long nr_pages = size >> PAGE_SHIFT;
11935 - init_memory_mapping(start, start + size-1);
11936 + last_mapped_pfn = init_memory_mapping(start, start + size-1);
11937 + if (last_mapped_pfn > max_pfn_mapped)
11938 + max_pfn_mapped = last_mapped_pfn;
11940 ret = __add_pages(zone, start_pfn, nr_pages);
11942 @@ -871,6 +1023,26 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to
11944 #endif /* CONFIG_MEMORY_HOTPLUG */
11947 + * devmem_is_allowed() checks to see if /dev/mem access to a certain address
11948 + * is valid. The argument is a physical page number.
11951 + * On x86, access has to be given to the first megabyte of ram because that area
11952 + * contains bios code and data regions used by X and dosemu and similar apps.
11953 + * Access has to be given to non-kernel-ram areas as well, these contain the PCI
11954 + * mmio resources as well as potential bios/acpi data regions.
11956 +int devmem_is_allowed(unsigned long pagenr)
11958 + if (pagenr <= 256)
11960 + if (mfn_to_local_pfn(pagenr) >= max_pfn)
11966 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
11967 kcore_modules, kcore_vsyscall;
11969 @@ -979,24 +1151,7 @@ EXPORT_SYMBOL_GPL(rodata_test_data);
11971 void mark_rodata_ro(void)
11973 - unsigned long start = (unsigned long)_stext, end;
11975 -#ifdef CONFIG_HOTPLUG_CPU
11976 - /* It must still be possible to apply SMP alternatives. */
11977 - if (num_possible_cpus() > 1)
11978 - start = (unsigned long)_etext;
11981 -#ifdef CONFIG_KPROBES
11982 - start = (unsigned long)__start_rodata;
11985 - end = (unsigned long)__end_rodata;
11986 - start = (start + PAGE_SIZE - 1) & PAGE_MASK;
11987 - end &= PAGE_MASK;
11988 - if (end <= start)
11991 + unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
11993 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
11994 (end - start) >> 10);
11995 @@ -1019,6 +1174,7 @@ void mark_rodata_ro(void)
11996 set_memory_ro(start, (end-start) >> PAGE_SHIFT);
12002 #ifdef CONFIG_BLK_DEV_INITRD
12003 @@ -1031,7 +1187,7 @@ void free_initrd_mem(unsigned long start
12004 void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
12007 - int nid = phys_to_nid(phys);
12008 + int nid, next_nid;
12010 unsigned long pfn = phys >> PAGE_SHIFT;
12012 @@ -1040,7 +1196,7 @@ void __init reserve_bootmem_generic(unsi
12013 * This can happen with kdump kernels when accessing
12016 - if (pfn < end_pfn_map)
12017 + if (pfn < max_pfn_mapped)
12020 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
12021 @@ -1050,10 +1206,16 @@ void __init reserve_bootmem_generic(unsi
12023 /* Should check here against the e820 map to avoid double free */
12025 - reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
12026 + nid = phys_to_nid(phys);
12027 + next_nid = phys_to_nid(phys + len - 1);
12028 + if (nid == next_nid)
12029 + reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
12031 + reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
12033 reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
12037 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
12038 dma_reserve += len / PAGE_SIZE;
12039 @@ -1149,6 +1311,10 @@ const char *arch_vma_name(struct vm_area
12041 * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
12043 +static long __meminitdata addr_start, addr_end;
12044 +static void __meminitdata *p_start, *p_end;
12045 +static int __meminitdata node_start;
12048 vmemmap_populate(struct page *start_page, unsigned long size, int node)
12050 @@ -1183,12 +1349,32 @@ vmemmap_populate(struct page *start_page
12051 PAGE_KERNEL_LARGE);
12052 set_pmd(pmd, __pmd_ma(__pte_val(entry)));
12054 - printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
12055 - addr, addr + PMD_SIZE - 1, p, node);
12056 + /* check to see if we have contiguous blocks */
12057 + if (p_end != p || node_start != node) {
12059 + printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
12060 + addr_start, addr_end-1, p_start, p_end-1, node_start);
12061 + addr_start = addr;
12062 + node_start = node;
12065 + addr_end = addr + PMD_SIZE;
12066 + p_end = p + PMD_SIZE;
12068 vmemmap_verify((pte_t *)pmd, node, addr, next);
12074 +void __meminit vmemmap_populate_print_last(void)
12077 + printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
12078 + addr_start, addr_end-1, p_start, p_end-1, node_start);
12085 --- sle11-2009-10-16.orig/arch/x86/mm/ioremap-xen.c 2009-03-16 16:33:40.000000000 +0100
12086 +++ sle11-2009-10-16/arch/x86/mm/ioremap-xen.c 2009-03-16 16:38:05.000000000 +0100
12087 @@ -20,14 +20,11 @@
12088 #include <asm/pgtable.h>
12089 #include <asm/tlbflush.h>
12090 #include <asm/pgalloc.h>
12091 +#include <asm/pat.h>
12093 -enum ioremap_mode {
12094 - IOR_MODE_UNCACHED,
12098 -#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
12099 +#ifdef CONFIG_X86_64
12101 +#ifndef CONFIG_XEN
12102 unsigned long __phys_addr(unsigned long x)
12104 if (x >= __START_KERNEL_map)
12105 @@ -35,6 +32,19 @@ unsigned long __phys_addr(unsigned long
12106 return x - PAGE_OFFSET;
12108 EXPORT_SYMBOL(__phys_addr);
12111 +static inline int phys_addr_valid(unsigned long addr)
12113 + return addr < (1UL << boot_cpu_data.x86_phys_bits);
12118 +static inline int phys_addr_valid(unsigned long addr)
12125 @@ -92,7 +102,8 @@ static int __direct_remap_pfn_range(stru
12126 * Fill in the machine address: PTE ptr is done later by
12127 * apply_to_page_range().
12129 - v->val = __pte_val(pfn_pte_ma(mfn, prot)) | _PAGE_IO;
12130 + pgprot_val(prot) |= _PAGE_IO;
12131 + v->val = __pte_val(pte_mkspecial(pfn_pte_ma(mfn, prot)));
12134 address += PAGE_SIZE;
12135 @@ -189,10 +200,9 @@ int touch_pte_range(struct mm_struct *mm
12137 EXPORT_SYMBOL(touch_pte_range);
12139 -#ifdef CONFIG_X86_32
12140 int page_is_ram(unsigned long pagenr)
12142 - unsigned long addr, end;
12143 + resource_size_t addr, end;
12147 @@ -228,31 +238,51 @@ int page_is_ram(unsigned long pagenr)
12154 * Fix up the linear direct mapping of the kernel to avoid cache attribute
12157 static int ioremap_change_attr(unsigned long vaddr, unsigned long size,
12158 - enum ioremap_mode mode)
12159 + unsigned long prot_val)
12161 unsigned long nrpages = size >> PAGE_SHIFT;
12165 - case IOR_MODE_UNCACHED:
12166 + switch (prot_val) {
12167 + case _PAGE_CACHE_UC:
12169 - err = set_memory_uc(vaddr, nrpages);
12170 + err = _set_memory_uc(vaddr, nrpages);
12172 + case _PAGE_CACHE_WC:
12173 + err = _set_memory_wc(vaddr, nrpages);
12175 - case IOR_MODE_CACHED:
12176 - err = set_memory_wb(vaddr, nrpages);
12177 + case _PAGE_CACHE_WB:
12178 + err = _set_memory_wb(vaddr, nrpages);
12185 +int ioremap_check_change_attr(unsigned long mfn, unsigned long size,
12186 + unsigned long prot_val)
12188 + unsigned long sz;
12191 + for (sz = rc = 0; sz < size && !rc; ++mfn, sz += PAGE_SIZE) {
12192 + unsigned long pfn = mfn_to_local_pfn(mfn);
12194 + if (pfn >= max_pfn_mapped)
12196 + rc = ioremap_change_attr((unsigned long)__va(pfn << PAGE_SHIFT),
12197 + PAGE_SIZE, prot_val);
12204 * Remap an arbitrary physical address space into the kernel virtual
12205 * address space. Needed when the kernel wants to access high addresses
12206 @@ -262,12 +292,15 @@ static int ioremap_change_attr(unsigned
12207 * have to convert them into an offset in a page-aligned mapping, but the
12208 * caller shouldn't need to know that small detail.
12210 -static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
12211 - enum ioremap_mode mode)
12212 +static void __iomem *__ioremap_caller(resource_size_t phys_addr,
12213 + unsigned long size, unsigned long prot_val, void *caller)
12215 - unsigned long mfn, offset, last_addr, vaddr;
12216 + unsigned long mfn, offset, vaddr;
12217 + resource_size_t last_addr;
12218 struct vm_struct *area;
12219 + unsigned long new_prot_val;
12222 domid_t domid = DOMID_IO;
12224 /* Don't allow wraparound or zero size */
12225 @@ -275,6 +308,13 @@ static void __iomem *__ioremap(resource_
12226 if (!size || last_addr < phys_addr)
12229 + if (!phys_addr_valid(phys_addr)) {
12230 + printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
12231 + (unsigned long long)phys_addr);
12237 * Don't remap the low PCI/ISA area, it's always mapped..
12239 @@ -287,55 +327,86 @@ static void __iomem *__ioremap(resource_
12240 for (mfn = PFN_DOWN(phys_addr); mfn < PFN_UP(last_addr); mfn++) {
12241 unsigned long pfn = mfn_to_local_pfn(mfn);
12243 - if (pfn >= max_pfn)
12245 + if (pfn_valid(pfn)) {
12246 + if (!PageReserved(pfn_to_page(pfn)))
12248 + domid = DOMID_SELF;
12251 + WARN_ON_ONCE(domid == DOMID_SELF);
12253 - domid = DOMID_SELF;
12255 + * Mappings have to be page-aligned
12257 + offset = phys_addr & ~PAGE_MASK;
12258 + phys_addr &= PAGE_MASK;
12259 + size = PAGE_ALIGN(last_addr+1) - phys_addr;
12261 - if (pfn >= max_pfn_mapped) /* bogus */
12263 + retval = reserve_memtype(phys_addr, phys_addr + size,
12264 + prot_val, &new_prot_val);
12266 + pr_debug("Warning: reserve_memtype returned %d\n", retval);
12270 - if (pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn)))
12271 + if (prot_val != new_prot_val) {
12273 + * Do not fallback to certain memory types with certain
12274 + * requested type:
12275 + * - request is uc-, return cannot be write-back
12276 + * - request is uc-, return cannot be write-combine
12277 + * - request is write-combine, return cannot be write-back
12279 + if ((prot_val == _PAGE_CACHE_UC_MINUS &&
12280 + (new_prot_val == _PAGE_CACHE_WB ||
12281 + new_prot_val == _PAGE_CACHE_WC)) ||
12282 + (prot_val == _PAGE_CACHE_WC &&
12283 + new_prot_val == _PAGE_CACHE_WB)) {
12285 + "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
12286 + (unsigned long long)phys_addr,
12287 + (unsigned long long)(phys_addr + size),
12288 + prot_val, new_prot_val);
12289 + free_memtype(phys_addr, phys_addr + size);
12292 + prot_val = new_prot_val;
12296 - case IOR_MODE_UNCACHED:
12297 + switch (prot_val) {
12298 + case _PAGE_CACHE_UC:
12301 - * FIXME: we will use UC MINUS for now, as video fb drivers
12302 - * depend on it. Upcoming ioremap_wc() will fix this behavior.
12304 + prot = PAGE_KERNEL_NOCACHE;
12306 + case _PAGE_CACHE_UC_MINUS:
12307 prot = PAGE_KERNEL_UC_MINUS;
12309 - case IOR_MODE_CACHED:
12310 + case _PAGE_CACHE_WC:
12311 + prot = PAGE_KERNEL_WC;
12313 + case _PAGE_CACHE_WB:
12314 prot = PAGE_KERNEL;
12319 - * Mappings have to be page-aligned
12321 - offset = phys_addr & ~PAGE_MASK;
12322 - phys_addr &= PAGE_MASK;
12323 - size = PAGE_ALIGN(last_addr+1) - phys_addr;
12328 - area = get_vm_area(size, VM_IOREMAP | (mode << 20));
12329 + area = get_vm_area_caller(size, VM_IOREMAP, caller);
12332 area->phys_addr = phys_addr;
12333 vaddr = (unsigned long) area->addr;
12334 if (__direct_remap_pfn_range(&init_mm, vaddr, PFN_DOWN(phys_addr),
12335 size, prot, domid)) {
12336 + free_memtype(phys_addr, phys_addr + size);
12337 free_vm_area(area);
12341 - if (ioremap_change_attr(vaddr, size, mode) < 0) {
12342 - iounmap((void __iomem *) vaddr);
12343 + if (ioremap_change_attr(vaddr, size, prot_val) < 0) {
12344 + free_memtype(phys_addr, phys_addr + size);
12345 + vunmap(area->addr);
12349 @@ -365,16 +436,72 @@ static void __iomem *__ioremap(resource_
12351 void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
12353 - return __ioremap(phys_addr, size, IOR_MODE_UNCACHED);
12355 + * Ideally, this should be:
12356 + * pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS;
12358 + * Till we fix all X drivers to use ioremap_wc(), we will use
12361 + unsigned long val = _PAGE_CACHE_UC_MINUS;
12363 + return __ioremap_caller(phys_addr, size, val,
12364 + __builtin_return_address(0));
12366 EXPORT_SYMBOL(ioremap_nocache);
12369 + * ioremap_wc - map memory into CPU space write combined
12370 + * @offset: bus address of the memory
12371 + * @size: size of the resource to map
12373 + * This version of ioremap ensures that the memory is marked write combining.
12374 + * Write combining allows faster writes to some hardware devices.
12376 + * Must be freed with iounmap.
12378 +void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
12380 + if (pat_wc_enabled)
12381 + return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
12382 + __builtin_return_address(0));
12384 + return ioremap_nocache(phys_addr, size);
12386 +EXPORT_SYMBOL(ioremap_wc);
12388 void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
12390 - return __ioremap(phys_addr, size, IOR_MODE_CACHED);
12391 + return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WB,
12392 + __builtin_return_address(0));
12394 EXPORT_SYMBOL(ioremap_cache);
12396 +#ifndef CONFIG_XEN
12397 +static void __iomem *ioremap_default(resource_size_t phys_addr,
12398 + unsigned long size)
12400 + unsigned long flags;
12405 + * - WB for WB-able memory and no other conflicting mappings
12406 + * - UC_MINUS for non-WB-able memory with no other conflicting mappings
12407 + * - Inherit from confliting mappings otherwise
12409 + err = reserve_memtype(phys_addr, phys_addr + size, -1, &flags);
12413 + ret = (void *) __ioremap_caller(phys_addr, size, flags,
12414 + __builtin_return_address(0));
12416 + free_memtype(phys_addr, phys_addr + size);
12417 + return (void __iomem *)ret;
12422 * iounmap - Free a IO remapping
12423 * @addr: virtual address from ioremap_*
12424 @@ -417,15 +544,7 @@ void iounmap(volatile void __iomem *addr
12428 - if ((p->flags >> 20) != IOR_MODE_CACHED) {
12429 - unsigned long n = get_vm_area_size(p) >> PAGE_SHIFT;
12430 - unsigned long mfn = p->phys_addr;
12431 - unsigned long va = (unsigned long)addr;
12433 - for (; n > 0; n--, mfn++, va += PAGE_SIZE)
12434 - if (mfn_to_local_pfn(mfn) < max_pfn)
12435 - set_memory_wb(va, 1);
12437 + free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
12439 /* Finally remove it */
12440 o = remove_vm_area((void *)addr);
12441 @@ -434,6 +553,37 @@ void iounmap(volatile void __iomem *addr
12443 EXPORT_SYMBOL(iounmap);
12445 +#ifndef CONFIG_XEN
12447 + * Convert a physical pointer to a virtual kernel pointer for /dev/mem
12450 +void *xlate_dev_mem_ptr(unsigned long phys)
12453 + unsigned long start = phys & PAGE_MASK;
12455 + /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
12456 + if (page_is_ram(start >> PAGE_SHIFT))
12457 + return __va(phys);
12459 + addr = (void *)ioremap_default(start, PAGE_SIZE);
12461 + addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
12466 +void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
12468 + if (page_is_ram(phys >> PAGE_SHIFT))
12471 + iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
12476 int __initdata early_ioremap_debug;
12478 static int __init early_ioremap_debug_setup(char *str)
12479 @@ -445,8 +595,8 @@ static int __init early_ioremap_debug_se
12480 early_param("early_ioremap_debug", early_ioremap_debug_setup);
12482 static __initdata int after_paging_init;
12483 -static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
12484 - __attribute__((aligned(PAGE_SIZE)));
12485 +static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
12486 + __section(.bss.page_aligned);
12488 #ifdef CONFIG_X86_32
12489 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
12490 @@ -461,8 +611,8 @@ static inline pmd_t * __init early_iorem
12493 #define early_ioremap_pmd early_get_pmd
12494 +#undef make_lowmem_page_readonly
12495 #define make_lowmem_page_readonly early_make_page_readonly
12496 -#define make_lowmem_page_writable make_page_writable
12499 static inline pte_t * __init early_ioremap_pte(unsigned long addr)
12500 @@ -512,7 +662,7 @@ void __init early_ioremap_clear(void)
12501 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
12503 make_lowmem_page_writable(bm_pte, XENFEAT_writable_page_tables);
12504 - /* paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT); */
12505 + /* paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT); */
12509 @@ -654,10 +804,11 @@ void __init early_iounmap(void *addr, un
12510 unsigned long offset;
12511 unsigned int nrpages;
12512 enum fixed_addresses idx;
12513 - unsigned int nesting;
12516 nesting = --early_ioremap_nested;
12517 - WARN_ON(nesting < 0);
12518 + if (WARN_ON(nesting < 0))
12521 if (early_ioremap_debug) {
12522 printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr,
12523 --- sle11-2009-10-16.orig/arch/x86/mm/pageattr-xen.c 2009-03-16 16:37:14.000000000 +0100
12524 +++ sle11-2009-10-16/arch/x86/mm/pageattr-xen.c 2009-03-16 16:38:05.000000000 +0100
12526 #include <linux/slab.h>
12527 #include <linux/mm.h>
12528 #include <linux/interrupt.h>
12529 +#include <linux/seq_file.h>
12530 +#include <linux/debugfs.h>
12532 #include <asm/e820.h>
12533 #include <asm/processor.h>
12534 @@ -17,370 +19,7 @@
12535 #include <asm/uaccess.h>
12536 #include <asm/pgalloc.h>
12537 #include <asm/proto.h>
12538 -#include <asm/mmu_context.h>
12540 -#ifndef CONFIG_X86_64
12541 -#define TASK_SIZE64 TASK_SIZE
12544 -static void _pin_lock(struct mm_struct *mm, int lock) {
12546 - spin_lock(&mm->page_table_lock);
12547 -#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
12548 - /* While mm->page_table_lock protects us against insertions and
12549 - * removals of higher level page table pages, it doesn't protect
12550 - * against updates of pte-s. Such updates, however, require the
12551 - * pte pages to be in consistent state (unpinned+writable or
12552 - * pinned+readonly). The pinning and attribute changes, however
12553 - * cannot be done atomically, which is why such updates must be
12554 - * prevented from happening concurrently.
12555 - * Note that no pte lock can ever elsewhere be acquired nesting
12556 - * with an already acquired one in the same mm, or with the mm's
12557 - * page_table_lock already acquired, as that would break in the
12558 - * non-split case (where all these are actually resolving to the
12559 - * one page_table_lock). Thus acquiring all of them here is not
12560 - * going to result in dead locks, and the order of acquires
12561 - * doesn't matter.
12564 - pgd_t *pgd = mm->pgd;
12567 - for (g = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
12571 - if (pgd_none(*pgd))
12573 - pud = pud_offset(pgd, 0);
12574 - for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
12578 - if (pud_none(*pud))
12580 - pmd = pmd_offset(pud, 0);
12581 - for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
12584 - if (pmd_none(*pmd))
12586 - ptl = pte_lockptr(0, pmd);
12590 - spin_unlock(ptl);
12597 - spin_unlock(&mm->page_table_lock);
12599 -#define pin_lock(mm) _pin_lock(mm, 1)
12600 -#define pin_unlock(mm) _pin_lock(mm, 0)
12602 -#define PIN_BATCH sizeof(void *)
12603 -static DEFINE_PER_CPU(multicall_entry_t[PIN_BATCH], pb_mcl);
12605 -static inline unsigned int pgd_walk_set_prot(struct page *page, pgprot_t flags,
12606 - unsigned int cpu, unsigned int seq)
12608 - unsigned long pfn = page_to_pfn(page);
12610 - if (PageHighMem(page)) {
12611 - if (pgprot_val(flags) & _PAGE_RW)
12612 - ClearPagePinned(page);
12614 - SetPagePinned(page);
12616 - MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq,
12617 - (unsigned long)__va(pfn << PAGE_SHIFT),
12618 - pfn_pte(pfn, flags), 0);
12619 - if (unlikely(++seq == PIN_BATCH)) {
12620 - if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu),
12621 - PIN_BATCH, NULL)))
12630 -static void pgd_walk(pgd_t *pgd_base, pgprot_t flags)
12632 - pgd_t *pgd = pgd_base;
12636 - unsigned int cpu, seq;
12637 - multicall_entry_t *mcl;
12639 - if (xen_feature(XENFEAT_auto_translated_physmap))
12645 - * Cannot iterate up to USER_PTRS_PER_PGD on x86-64 as these pagetables
12646 - * may not be the 'current' task's pagetables (e.g., current may be
12647 - * 32-bit, but the pagetables may be for a 64-bit task).
12648 - * Subtracting 1 from TASK_SIZE64 means the loop limit is correct
12649 - * regardless of whether TASK_SIZE64 is a multiple of PGDIR_SIZE.
12651 - for (g = 0, seq = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
12652 - if (pgd_none(*pgd))
12654 - pud = pud_offset(pgd, 0);
12655 - if (PTRS_PER_PUD > 1) /* not folded */
12656 - seq = pgd_walk_set_prot(virt_to_page(pud),flags,cpu,seq);
12657 - for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
12658 - if (pud_none(*pud))
12660 - pmd = pmd_offset(pud, 0);
12661 - if (PTRS_PER_PMD > 1) /* not folded */
12662 - seq = pgd_walk_set_prot(virt_to_page(pmd),flags,cpu,seq);
12663 - for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
12664 - if (pmd_none(*pmd))
12666 - seq = pgd_walk_set_prot(pmd_page(*pmd),flags,cpu,seq);
12671 - mcl = per_cpu(pb_mcl, cpu);
12672 -#ifdef CONFIG_X86_64
12673 - if (unlikely(seq > PIN_BATCH - 2)) {
12674 - if (unlikely(HYPERVISOR_multicall_check(mcl, seq, NULL)))
12678 - MULTI_update_va_mapping(mcl + seq,
12679 - (unsigned long)__user_pgd(pgd_base),
12680 - pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags),
12682 - MULTI_update_va_mapping(mcl + seq + 1,
12683 - (unsigned long)pgd_base,
12684 - pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
12686 - if (unlikely(HYPERVISOR_multicall_check(mcl, seq + 2, NULL)))
12689 - if (likely(seq != 0)) {
12690 - MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq,
12691 - (unsigned long)pgd_base,
12692 - pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
12694 - if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu),
12697 - } else if(HYPERVISOR_update_va_mapping((unsigned long)pgd_base,
12698 - pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
12706 -static void __pgd_pin(pgd_t *pgd)
12708 - pgd_walk(pgd, PAGE_KERNEL_RO);
12709 - kmap_flush_unused();
12710 - xen_pgd_pin(__pa(pgd)); /* kernel */
12711 -#ifdef CONFIG_X86_64
12712 - xen_pgd_pin(__pa(__user_pgd(pgd))); /* user */
12714 - SetPagePinned(virt_to_page(pgd));
12717 -static void __pgd_unpin(pgd_t *pgd)
12719 - xen_pgd_unpin(__pa(pgd));
12720 -#ifdef CONFIG_X86_64
12721 - xen_pgd_unpin(__pa(__user_pgd(pgd)));
12723 - pgd_walk(pgd, PAGE_KERNEL);
12724 - ClearPagePinned(virt_to_page(pgd));
12727 -void pgd_test_and_unpin(pgd_t *pgd)
12729 - if (PagePinned(virt_to_page(pgd)))
12730 - __pgd_unpin(pgd);
12733 -void mm_pin(struct mm_struct *mm)
12735 - if (xen_feature(XENFEAT_writable_page_tables))
12739 - __pgd_pin(mm->pgd);
12743 -void mm_unpin(struct mm_struct *mm)
12745 - if (xen_feature(XENFEAT_writable_page_tables))
12749 - __pgd_unpin(mm->pgd);
12753 -void mm_pin_all(void)
12755 - struct page *page;
12756 - unsigned long flags;
12758 - if (xen_feature(XENFEAT_writable_page_tables))
12762 - * Allow uninterrupted access to the pgd_list. Also protects
12763 - * __pgd_pin() by disabling preemption.
12764 - * All other CPUs must be at a safe point (e.g., in stop_machine
12765 - * or offlined entirely).
12767 - spin_lock_irqsave(&pgd_lock, flags);
12768 - list_for_each_entry(page, &pgd_list, lru) {
12769 - if (!PagePinned(page))
12770 - __pgd_pin((pgd_t *)page_address(page));
12772 - spin_unlock_irqrestore(&pgd_lock, flags);
12775 -void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
12777 - if (!PagePinned(virt_to_page(mm->pgd)))
12781 -void arch_exit_mmap(struct mm_struct *mm)
12783 - struct task_struct *tsk = current;
12788 - * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
12789 - * *much* faster this way, as no tlb flushes means bigger wrpt batches.
12791 - if (tsk->active_mm == mm) {
12792 - tsk->active_mm = &init_mm;
12793 - atomic_inc(&init_mm.mm_count);
12795 - switch_mm(mm, &init_mm, tsk);
12797 - atomic_dec(&mm->mm_count);
12798 - BUG_ON(atomic_read(&mm->mm_count) == 0);
12801 - task_unlock(tsk);
12803 - if (PagePinned(virt_to_page(mm->pgd))
12804 - && atomic_read(&mm->mm_count) == 1
12805 - && !mm->context.has_foreign_mappings)
12809 -static void _pte_free(struct page *page, unsigned int order)
12812 - __pte_free(page);
12815 -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
12817 - struct page *pte;
12819 -#ifdef CONFIG_HIGHPTE
12820 - pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
12822 - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
12825 - pgtable_page_ctor(pte);
12826 - SetPageForeign(pte, _pte_free);
12827 - init_page_count(pte);
12832 -void __pte_free(pgtable_t pte)
12834 - if (!PageHighMem(pte)) {
12835 - unsigned long va = (unsigned long)page_address(pte);
12836 - unsigned int level;
12837 - pte_t *ptep = lookup_address(va, &level);
12839 - BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep));
12840 - if (!pte_write(*ptep)
12841 - && HYPERVISOR_update_va_mapping(va,
12842 - mk_pte(pte, PAGE_KERNEL),
12846 -#ifdef CONFIG_HIGHPTE
12847 - ClearPagePinned(pte);
12852 - ClearPageForeign(pte);
12853 - init_page_count(pte);
12854 - pgtable_page_dtor(pte);
12855 - __free_page(pte);
12858 -#if PAGETABLE_LEVELS >= 3
12859 -static void _pmd_free(struct page *page, unsigned int order)
12862 - __pmd_free(page);
12865 -pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
12867 - struct page *pmd;
12869 - pmd = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
12872 - SetPageForeign(pmd, _pmd_free);
12873 - init_page_count(pmd);
12874 - return page_address(pmd);
12877 -void __pmd_free(pgtable_t pmd)
12879 - unsigned long va = (unsigned long)page_address(pmd);
12880 - unsigned int level;
12881 - pte_t *ptep = lookup_address(va, &level);
12883 - BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep));
12884 - if (!pte_write(*ptep)
12885 - && HYPERVISOR_update_va_mapping(va, mk_pte(pmd, PAGE_KERNEL), 0))
12888 - ClearPageForeign(pmd);
12889 - init_page_count(pmd);
12890 - __free_page(pmd);
12894 -/* blktap and gntdev need this, as otherwise they would implicitly (and
12895 - * needlessly, as they never use it) reference init_mm. */
12896 -pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *vma,
12897 - unsigned long addr, pte_t *ptep, int full)
12899 - return ptep_get_and_clear_full(vma->vm_mm, addr, ptep, full);
12901 -EXPORT_SYMBOL_GPL(xen_ptep_get_and_clear_full);
12902 +#include <asm/pat.h>
12905 * The current flushing context - we pass it instead of 5 arguments:
12906 @@ -392,6 +31,7 @@ struct cpa_data {
12910 + unsigned force_split : 1;
12913 #ifdef CONFIG_X86_64
12914 @@ -637,6 +277,9 @@ try_preserve_large_page(pte_t *kpte, uns
12915 int i, do_split = 1;
12916 unsigned int level;
12918 + if (cpa->force_split)
12921 spin_lock_irqsave(&pgd_lock, flags);
12923 * Check for races, another CPU might have split this page
12924 @@ -856,9 +499,7 @@ static int split_large_page(pte_t *kpte,
12927 pbase = (pte_t *)page_address(base);
12928 -#ifdef CONFIG_X86_32
12929 - paravirt_alloc_pt(&init_mm, page_to_pfn(base));
12931 + paravirt_alloc_pte(&init_mm, page_to_pfn(base));
12932 ref_prot = pte_pgprot(pte_clrhuge(*kpte));
12934 #ifdef CONFIG_X86_64
12935 @@ -919,7 +560,7 @@ static int __change_page_attr(struct cpa
12937 kpte = lookup_address(address, &level);
12939 - return primary ? -EINVAL : 0;
12943 if (!__pte_val(old_pte)) {
12944 @@ -1078,7 +719,8 @@ static inline int cache_attr(pgprot_t at
12947 static int change_page_attr_set_clr(unsigned long addr, int numpages,
12948 - pgprot_t mask_set, pgprot_t mask_clr)
12949 + pgprot_t mask_set, pgprot_t mask_clr,
12952 struct cpa_data cpa;
12953 int ret, cache, checkalias;
12954 @@ -1089,7 +731,7 @@ static int change_page_attr_set_clr(unsi
12956 mask_set = canon_pgprot(mask_set);
12957 mask_clr = canon_pgprot(mask_clr);
12958 - if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
12959 + if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split)
12962 /* Ensure we are PAGE_SIZE aligned */
12963 @@ -1106,6 +748,7 @@ static int change_page_attr_set_clr(unsi
12964 cpa.mask_set = mask_set;
12965 cpa.mask_clr = mask_clr;
12967 + cpa.force_split = force_split;
12969 /* No alias checking for _NX bit modifications */
12970 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
12971 @@ -1144,26 +787,67 @@ out:
12972 static inline int change_page_attr_set(unsigned long addr, int numpages,
12975 - return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
12976 + return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0);
12979 static inline int change_page_attr_clear(unsigned long addr, int numpages,
12982 - return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
12983 + return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0);
12986 -int set_memory_uc(unsigned long addr, int numpages)
12987 +int _set_memory_uc(unsigned long addr, int numpages)
12990 + * for now UC MINUS. see comments in ioremap_nocache()
12992 return change_page_attr_set(addr, numpages,
12993 - __pgprot(_PAGE_PCD));
12994 + __pgprot(_PAGE_CACHE_UC_MINUS));
12997 +int set_memory_uc(unsigned long addr, int numpages)
13000 + * for now UC MINUS. see comments in ioremap_nocache()
13002 + if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
13003 + _PAGE_CACHE_UC_MINUS, NULL))
13006 + return _set_memory_uc(addr, numpages);
13008 EXPORT_SYMBOL(set_memory_uc);
13010 -int set_memory_wb(unsigned long addr, int numpages)
13011 +int _set_memory_wc(unsigned long addr, int numpages)
13013 + return change_page_attr_set(addr, numpages,
13014 + __pgprot(_PAGE_CACHE_WC));
13017 +int set_memory_wc(unsigned long addr, int numpages)
13019 + if (!pat_wc_enabled)
13020 + return set_memory_uc(addr, numpages);
13022 + if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
13023 + _PAGE_CACHE_WC, NULL))
13026 + return _set_memory_wc(addr, numpages);
13028 +EXPORT_SYMBOL(set_memory_wc);
13030 +int _set_memory_wb(unsigned long addr, int numpages)
13032 return change_page_attr_clear(addr, numpages,
13033 - __pgprot(_PAGE_PCD | _PAGE_PWT));
13034 + __pgprot(_PAGE_CACHE_MASK));
13037 +int set_memory_wb(unsigned long addr, int numpages)
13039 + free_memtype(addr, addr + numpages * PAGE_SIZE);
13041 + return _set_memory_wb(addr, numpages);
13043 EXPORT_SYMBOL(set_memory_wb);
13045 @@ -1194,6 +878,12 @@ int set_memory_np(unsigned long addr, in
13046 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
13049 +int set_memory_4k(unsigned long addr, int numpages)
13051 + return change_page_attr_set_clr(addr, numpages, __pgprot(0),
13055 int set_pages_uc(struct page *page, int numpages)
13057 unsigned long addr = (unsigned long)page_address(page);
13058 @@ -1303,6 +993,45 @@ void kernel_map_pages(struct page *page,
13059 cpa_fill_pool(NULL);
13062 +#ifdef CONFIG_DEBUG_FS
13063 +static int dpa_show(struct seq_file *m, void *v)
13065 + seq_puts(m, "DEBUG_PAGEALLOC\n");
13066 + seq_printf(m, "pool_size : %lu\n", pool_size);
13067 + seq_printf(m, "pool_pages : %lu\n", pool_pages);
13068 + seq_printf(m, "pool_low : %lu\n", pool_low);
13069 + seq_printf(m, "pool_used : %lu\n", pool_used);
13070 + seq_printf(m, "pool_failed : %lu\n", pool_failed);
13075 +static int dpa_open(struct inode *inode, struct file *filp)
13077 + return single_open(filp, dpa_show, NULL);
13080 +static const struct file_operations dpa_fops = {
13081 + .open = dpa_open,
13082 + .read = seq_read,
13083 + .llseek = seq_lseek,
13084 + .release = single_release,
13087 +static int __init debug_pagealloc_proc_init(void)
13089 + struct dentry *de;
13091 + de = debugfs_create_file("debug_pagealloc", 0600, NULL, NULL,
13098 +__initcall(debug_pagealloc_proc_init);
13101 #ifdef CONFIG_HIBERNATION
13103 bool kernel_page_present(struct page *page)
13104 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
13105 +++ sle11-2009-10-16/arch/x86/mm/pat-xen.c 2009-03-16 16:38:05.000000000 +0100
13108 + * Handle caching attributes in page tables (PAT)
13110 + * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
13111 + * Suresh B Siddha <suresh.b.siddha@intel.com>
13113 + * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
13116 +#include <linux/mm.h>
13117 +#include <linux/kernel.h>
13118 +#include <linux/gfp.h>
13119 +#include <linux/fs.h>
13120 +#include <linux/bootmem.h>
13122 +#include <asm/msr.h>
13123 +#include <asm/tlbflush.h>
13124 +#include <asm/processor.h>
13125 +#include <asm/page.h>
13126 +#include <asm/pgtable.h>
13127 +#include <asm/pat.h>
13128 +#include <asm/e820.h>
13129 +#include <asm/cacheflush.h>
13130 +#include <asm/fcntl.h>
13131 +#include <asm/mtrr.h>
13132 +#include <asm/io.h>
13134 +#ifdef CONFIG_X86_PAT
13135 +int __read_mostly pat_wc_enabled = 1;
13137 +void __cpuinit pat_disable(char *reason)
13139 + pat_wc_enabled = 0;
13140 + printk(KERN_INFO "%s\n", reason);
13143 +static int __init nopat(char *str)
13145 + pat_disable("PAT support disabled.");
13148 +early_param("nopat", nopat);
13151 +static u64 __read_mostly boot_pat_state;
13154 + PAT_UC = 0, /* uncached */
13155 + PAT_WC = 1, /* Write combining */
13156 + PAT_WT = 4, /* Write Through */
13157 + PAT_WP = 5, /* Write Protected */
13158 + PAT_WB = 6, /* Write Back (default) */
13159 + PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */
13162 +#define PAT(x,y) ((u64)PAT_ ## y << ((x)*8))
13164 +void pat_init(void)
13168 + if (!pat_wc_enabled)
13171 + /* Paranoia check. */
13172 + if (!cpu_has_pat) {
13173 + printk(KERN_ERR "PAT enabled, but CPU feature cleared\n");
13175 + * Panic if this happens on the secondary CPU, and we
13176 + * switched to PAT on the boot CPU. We have no way to
13179 + BUG_ON(boot_pat_state);
13182 +#ifndef CONFIG_XEN
13183 + /* Set PWT to Write-Combining. All other bits stay the same */
13185 + * PTE encoding used in Linux:
13190 + * 000 WB _PAGE_CACHE_WB
13191 + * 001 WC _PAGE_CACHE_WC
13192 + * 010 UC- _PAGE_CACHE_UC_MINUS
13193 + * 011 UC _PAGE_CACHE_UC
13196 + pat = PAT(0,WB) | PAT(1,WC) | PAT(2,UC_MINUS) | PAT(3,UC) |
13197 + PAT(4,WB) | PAT(5,WC) | PAT(6,UC_MINUS) | PAT(7,UC);
13199 + /* Boot CPU check */
13200 + if (!boot_pat_state)
13201 + rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
13203 + wrmsrl(MSR_IA32_CR_PAT, pat);
13206 + * PAT settings are part of the hypervisor interface, and their
13207 + * assignment cannot be changed.
13209 + rdmsrl(MSR_IA32_CR_PAT, pat);
13210 + if (!boot_pat_state)
13211 + boot_pat_state = pat;
13213 + printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
13214 + smp_processor_id(), boot_pat_state, pat);
13219 +static char *cattr_name(unsigned long flags)
13221 + switch (flags & _PAGE_CACHE_MASK) {
13222 + case _PAGE_CACHE_UC: return "uncached";
13223 + case _PAGE_CACHE_UC_MINUS: return "uncached-minus";
13224 + case _PAGE_CACHE_WB: return "write-back";
13225 + case _PAGE_CACHE_WC: return "write-combining";
13226 + case _PAGE_CACHE_WP: return "write-protected";
13227 + case _PAGE_CACHE_WT: return "write-through";
13228 + default: return "broken";
13233 + * The global memtype list keeps track of memory type for specific
13234 + * physical memory areas. Conflicting memory types in different
13235 + * mappings can cause CPU cache corruption. To avoid this we keep track.
13237 + * The list is sorted based on starting address and can contain multiple
13238 + * entries for each address (this allows reference counting for overlapping
13239 + * areas). All the aliases have the same cache attributes of course.
13240 + * Zero attributes are represented as holes.
13242 + * Currently the data structure is a list because the number of mappings
13243 + * are expected to be relatively small. If this should be a problem
13244 + * it could be changed to a rbtree or similar.
13246 + * memtype_lock protects the whole list.
13252 + unsigned long type;
13253 + struct list_head nd;
13256 +static LIST_HEAD(memtype_list);
13257 +static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
13260 + * Does intersection of PAT memory type and MTRR memory type and returns
13261 + * the resulting memory type as PAT understands it.
13262 + * (Type in pat and mtrr will not have same value)
13263 + * The intersection is based on "Effective Memory Type" tables in IA-32
13266 +static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot,
13267 + unsigned long *ret_prot)
13269 + unsigned long pat_type;
13272 + pat_type = prot & _PAGE_CACHE_MASK;
13273 + prot &= (~_PAGE_CACHE_MASK);
13276 + * We return the PAT request directly for types where PAT takes
13277 + * precedence with respect to MTRR and for UC_MINUS.
13278 + * Consistency checks with other PAT requests is done later
13279 + * while going through memtype list.
13281 + if (pat_type == _PAGE_CACHE_WC) {
13282 + *ret_prot = prot | _PAGE_CACHE_WC;
13284 + } else if (pat_type == _PAGE_CACHE_UC_MINUS) {
13285 + *ret_prot = prot | _PAGE_CACHE_UC_MINUS;
13287 + } else if (pat_type == _PAGE_CACHE_UC) {
13288 + *ret_prot = prot | _PAGE_CACHE_UC;
13293 + * Look for MTRR hint to get the effective type in case where PAT
13294 + * request is for WB.
13296 + mtrr_type = mtrr_type_lookup(start, end);
13298 + if (mtrr_type == MTRR_TYPE_UNCACHABLE) {
13299 + *ret_prot = prot | _PAGE_CACHE_UC;
13300 + } else if (mtrr_type == MTRR_TYPE_WRCOMB) {
13301 + *ret_prot = prot | _PAGE_CACHE_WC;
13303 + *ret_prot = prot | _PAGE_CACHE_WB;
13310 + * req_type typically has one of the:
13311 + * - _PAGE_CACHE_WB
13312 + * - _PAGE_CACHE_WC
13313 + * - _PAGE_CACHE_UC_MINUS
13314 + * - _PAGE_CACHE_UC
13316 + * req_type will have a special case value '-1', when requester want to inherit
13317 + * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS.
13319 + * If ret_type is NULL, function will return an error if it cannot reserve the
13320 + * region with req_type. If ret_type is non-null, function will return
13321 + * available type in ret_type in case of no error. In case of any error
13322 + * it will return a negative return value.
13324 +int reserve_memtype(u64 start, u64 end, unsigned long req_type,
13325 + unsigned long *ret_type)
13327 + struct memtype *new_entry = NULL;
13328 + struct memtype *parse;
13329 + unsigned long actual_type;
13332 + /* Only track when pat_wc_enabled */
13333 + if (!pat_wc_enabled) {
13334 + /* This is identical to page table setting without PAT */
13336 + if (req_type == -1) {
13337 + *ret_type = _PAGE_CACHE_WB;
13339 + *ret_type = req_type;
13345 + /* Low ISA region is always mapped WB in page table. No need to track */
13346 + if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) {
13348 + *ret_type = _PAGE_CACHE_WB;
13353 + if (req_type == -1) {
13355 + * Call mtrr_lookup to get the type hint. This is an
13356 + * optimization for /dev/mem mmap'ers into WB memory (BIOS
13357 + * tools and ACPI tools). Use WB request for WB memory and use
13358 + * UC_MINUS otherwise.
13360 + u8 mtrr_type = mtrr_type_lookup(start, end);
13362 + if (mtrr_type == MTRR_TYPE_WRBACK) {
13363 + req_type = _PAGE_CACHE_WB;
13364 + actual_type = _PAGE_CACHE_WB;
13366 + req_type = _PAGE_CACHE_UC_MINUS;
13367 + actual_type = _PAGE_CACHE_UC_MINUS;
13370 + req_type &= _PAGE_CACHE_MASK;
13371 + err = pat_x_mtrr_type(start, end, req_type, &actual_type);
13376 + *ret_type = actual_type;
13381 + new_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL);
13385 + new_entry->start = start;
13386 + new_entry->end = end;
13387 + new_entry->type = actual_type;
13390 + *ret_type = actual_type;
13392 + spin_lock(&memtype_lock);
13394 + /* Search for existing mapping that overlaps the current range */
13395 + list_for_each_entry(parse, &memtype_list, nd) {
13396 + struct memtype *saved_ptr;
13398 + if (parse->start >= end) {
13399 + pr_debug("New Entry\n");
13400 + list_add(&new_entry->nd, parse->nd.prev);
13401 + new_entry = NULL;
13405 + if (start <= parse->start && end >= parse->start) {
13406 + if (actual_type != parse->type && ret_type) {
13407 + actual_type = parse->type;
13408 + *ret_type = actual_type;
13409 + new_entry->type = actual_type;
13412 + if (actual_type != parse->type) {
13414 + KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
13415 + current->comm, current->pid,
13417 + cattr_name(actual_type),
13418 + cattr_name(parse->type));
13423 + saved_ptr = parse;
13425 + * Check to see whether the request overlaps more
13426 + * than one entry in the list
13428 + list_for_each_entry_continue(parse, &memtype_list, nd) {
13429 + if (end <= parse->start) {
13433 + if (actual_type != parse->type) {
13435 + KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
13436 + current->comm, current->pid,
13438 + cattr_name(actual_type),
13439 + cattr_name(parse->type));
13449 + pr_debug("Overlap at 0x%Lx-0x%Lx\n",
13450 + saved_ptr->start, saved_ptr->end);
13451 + /* No conflict. Go ahead and add this new entry */
13452 + list_add(&new_entry->nd, saved_ptr->nd.prev);
13453 + new_entry = NULL;
13457 + if (start < parse->end) {
13458 + if (actual_type != parse->type && ret_type) {
13459 + actual_type = parse->type;
13460 + *ret_type = actual_type;
13461 + new_entry->type = actual_type;
13464 + if (actual_type != parse->type) {
13466 + KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
13467 + current->comm, current->pid,
13469 + cattr_name(actual_type),
13470 + cattr_name(parse->type));
13475 + saved_ptr = parse;
13477 + * Check to see whether the request overlaps more
13478 + * than one entry in the list
13480 + list_for_each_entry_continue(parse, &memtype_list, nd) {
13481 + if (end <= parse->start) {
13485 + if (actual_type != parse->type) {
13487 + KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
13488 + current->comm, current->pid,
13490 + cattr_name(actual_type),
13491 + cattr_name(parse->type));
13501 + pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n",
13502 + saved_ptr->start, saved_ptr->end);
13503 + /* No conflict. Go ahead and add this new entry */
13504 + list_add(&new_entry->nd, &saved_ptr->nd);
13505 + new_entry = NULL;
13512 + "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n",
13513 + start, end, cattr_name(new_entry->type),
13514 + cattr_name(req_type));
13515 + kfree(new_entry);
13516 + spin_unlock(&memtype_lock);
13521 + /* No conflict. Not yet added to the list. Add to the tail */
13522 + list_add_tail(&new_entry->nd, &memtype_list);
13523 + pr_debug("New Entry\n");
13528 + "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
13529 + start, end, cattr_name(actual_type),
13530 + cattr_name(req_type), cattr_name(*ret_type));
13533 + "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n",
13534 + start, end, cattr_name(actual_type),
13535 + cattr_name(req_type));
13538 + spin_unlock(&memtype_lock);
13542 +int free_memtype(u64 start, u64 end)
13544 + struct memtype *ml;
13545 + int err = -EINVAL;
13547 + /* Only track when pat_wc_enabled */
13548 + if (!pat_wc_enabled) {
13552 + /* Low ISA region is always mapped WB. No need to track */
13553 + if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) {
13557 + spin_lock(&memtype_lock);
13558 + list_for_each_entry(ml, &memtype_list, nd) {
13559 + if (ml->start == start && ml->end == end) {
13560 + list_del(&ml->nd);
13566 + spin_unlock(&memtype_lock);
13569 + printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n",
13570 + current->comm, current->pid, start, end);
13573 + pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end);
13579 + * /dev/mem mmap interface. The memtype used for mapping varies:
13580 + * - Use UC for mappings with O_SYNC flag
13581 + * - Without O_SYNC flag, if there is any conflict in reserve_memtype,
13582 + * inherit the memtype from existing mapping.
13583 + * - Else use UC_MINUS memtype (for backward compatibility with existing
13586 +pgprot_t phys_mem_access_prot(struct file *file, unsigned long mfn,
13587 + unsigned long size, pgprot_t vma_prot)
13592 +#ifdef CONFIG_NONPROMISC_DEVMEM
13593 +/* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/
13594 +static inline int range_is_allowed(unsigned long mfn, unsigned long size)
13599 +static inline int range_is_allowed(unsigned long mfn, unsigned long size)
13601 + u64 from = ((u64)mfn) << PAGE_SHIFT;
13602 + u64 to = from + size;
13603 + u64 cursor = from;
13605 + while (cursor < to) {
13606 + if (!devmem_is_allowed(mfn)) {
13608 + "Program %s tried to access /dev/mem between %Lx->%Lx.\n",
13609 + current->comm, from, to);
13612 + cursor += PAGE_SIZE;
13617 +#endif /* CONFIG_NONPROMISC_DEVMEM */
13619 +int phys_mem_access_prot_allowed(struct file *file, unsigned long mfn,
13620 + unsigned long size, pgprot_t *vma_prot)
13622 + u64 addr = (u64)mfn << PAGE_SHIFT;
13623 + unsigned long flags = _PAGE_CACHE_UC_MINUS;
13626 + if (!range_is_allowed(mfn, size))
13629 + if (file->f_flags & O_SYNC) {
13630 + flags = _PAGE_CACHE_UC;
13633 +#ifndef CONFIG_X86_32
13634 +#ifndef CONFIG_XEN /* Xen sets correct MTRR type on non-RAM for us. */
13636 + * On the PPro and successors, the MTRRs are used to set
13637 + * memory types for physical addresses outside main memory,
13638 + * so blindly setting UC or PWT on those pages is wrong.
13639 + * For Pentiums and earlier, the surround logic should disable
13640 + * caching for the high addresses through the KEN pin, but
13641 + * we maintain the tradition of paranoia in this code.
13643 + if (!pat_wc_enabled &&
13644 + ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) ||
13645 + test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) ||
13646 + test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) ||
13647 + test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) &&
13648 + (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
13649 + flags = _PAGE_CACHE_UC;
13655 + * With O_SYNC, we can only take UC mapping. Fail if we cannot.
13656 + * Without O_SYNC, we want to get
13657 + * - WB for WB-able memory and no other conflicting mappings
13658 + * - UC_MINUS for non-WB-able memory with no other conflicting mappings
13659 + * - Inherit from confliting mappings otherwise
13661 + if (flags != _PAGE_CACHE_UC_MINUS) {
13662 + retval = reserve_memtype(addr, addr + size, flags, NULL);
13664 + retval = reserve_memtype(addr, addr + size, -1, &flags);
13670 + if (ioremap_check_change_attr(mfn, size, flags) < 0) {
13671 + free_memtype(addr, addr + size);
13673 + "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
13674 + current->comm, current->pid,
13675 + cattr_name(flags),
13676 + addr, addr + size);
13680 + *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
13685 +void map_devmem(unsigned long mfn, unsigned long size, pgprot_t vma_prot)
13687 + u64 addr = (u64)mfn << PAGE_SHIFT;
13688 + unsigned long flags;
13689 + unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
13691 + reserve_memtype(addr, addr + size, want_flags, &flags);
13692 + if (flags != want_flags) {
13694 + "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n",
13695 + current->comm, current->pid,
13696 + cattr_name(want_flags),
13697 + addr, (unsigned long long)(addr + size),
13698 + cattr_name(flags));
13702 +void unmap_devmem(unsigned long mfn, unsigned long size, pgprot_t vma_prot)
13704 + u64 addr = (u64)mfn << PAGE_SHIFT;
13706 + free_memtype(addr, addr + size);
13709 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
13710 +++ sle11-2009-10-16/arch/x86/mm/pgtable-xen.c 2009-03-16 16:38:05.000000000 +0100
13712 +#include <linux/mm.h>
13713 +#include <linux/module.h>
13714 +#include <xen/features.h>
13715 +#include <asm/pgalloc.h>
13716 +#include <asm/pgtable.h>
13717 +#include <asm/tlb.h>
13718 +#include <asm/hypervisor.h>
13719 +#include <asm/mmu_context.h>
13721 +pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
13723 + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
13725 + make_lowmem_page_readonly(pte, XENFEAT_writable_page_tables);
13729 +static void _pte_free(struct page *page, unsigned int order)
13732 + __pte_free(page);
13735 +pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
13737 + struct page *pte;
13739 +#ifdef CONFIG_HIGHPTE
13740 + pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
13742 + pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
13745 + pgtable_page_ctor(pte);
13746 + SetPageForeign(pte, _pte_free);
13747 + init_page_count(pte);
13752 +void __pte_free(pgtable_t pte)
13754 + if (!PageHighMem(pte)) {
13755 + unsigned long va = (unsigned long)page_address(pte);
13756 + unsigned int level;
13757 + pte_t *ptep = lookup_address(va, &level);
13759 + BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep));
13760 + if (!pte_write(*ptep)
13761 + && HYPERVISOR_update_va_mapping(va,
13762 + mk_pte(pte, PAGE_KERNEL),
13766 +#ifdef CONFIG_HIGHPTE
13767 + ClearPagePinned(pte);
13772 + ClearPageForeign(pte);
13773 + init_page_count(pte);
13774 + pgtable_page_dtor(pte);
13775 + __free_page(pte);
13778 +void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
13780 + pgtable_page_dtor(pte);
13781 + paravirt_release_pte(page_to_pfn(pte));
13782 + tlb_remove_page(tlb, pte);
13785 +#if PAGETABLE_LEVELS > 2
13786 +static void _pmd_free(struct page *page, unsigned int order)
13789 + __pmd_free(page);
13792 +pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
13794 + struct page *pmd;
13796 + pmd = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
13799 + SetPageForeign(pmd, _pmd_free);
13800 + init_page_count(pmd);
13801 + return page_address(pmd);
13804 +void __pmd_free(pgtable_t pmd)
13806 + unsigned long va = (unsigned long)page_address(pmd);
13807 + unsigned int level;
13808 + pte_t *ptep = lookup_address(va, &level);
13810 + BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep));
13811 + if (!pte_write(*ptep)
13812 + && HYPERVISOR_update_va_mapping(va, mk_pte(pmd, PAGE_KERNEL), 0))
13815 + ClearPageForeign(pmd);
13816 + init_page_count(pmd);
13817 + __free_page(pmd);
13820 +void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
13822 + paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
13823 + tlb_remove_page(tlb, virt_to_page(pmd));
13826 +#if PAGETABLE_LEVELS > 3
13827 +void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
13829 + paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
13830 + tlb_remove_page(tlb, virt_to_page(pud));
13832 +#endif /* PAGETABLE_LEVELS > 3 */
13833 +#endif /* PAGETABLE_LEVELS > 2 */
13835 +#ifndef CONFIG_X86_64
13836 +#define TASK_SIZE64 TASK_SIZE
13839 +static void _pin_lock(struct mm_struct *mm, int lock) {
13841 + spin_lock(&mm->page_table_lock);
13842 +#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
13843 + /* While mm->page_table_lock protects us against insertions and
13844 + * removals of higher level page table pages, it doesn't protect
13845 + * against updates of pte-s. Such updates, however, require the
13846 + * pte pages to be in consistent state (unpinned+writable or
13847 + * pinned+readonly). The pinning and attribute changes, however
13848 + * cannot be done atomically, which is why such updates must be
13849 + * prevented from happening concurrently.
13850 + * Note that no pte lock can ever elsewhere be acquired nesting
13851 + * with an already acquired one in the same mm, or with the mm's
13852 + * page_table_lock already acquired, as that would break in the
13853 + * non-split case (where all these are actually resolving to the
13854 + * one page_table_lock). Thus acquiring all of them here is not
13855 + * going to result in dead locks, and the order of acquires
13856 + * doesn't matter.
13859 + pgd_t *pgd = mm->pgd;
13862 + for (g = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
13866 + if (pgd_none(*pgd))
13868 + pud = pud_offset(pgd, 0);
13869 + for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
13873 + if (pud_none(*pud))
13875 + pmd = pmd_offset(pud, 0);
13876 + for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
13879 + if (pmd_none(*pmd))
13881 + ptl = pte_lockptr(0, pmd);
13885 + spin_unlock(ptl);
13892 + spin_unlock(&mm->page_table_lock);
13894 +#define pin_lock(mm) _pin_lock(mm, 1)
13895 +#define pin_unlock(mm) _pin_lock(mm, 0)
13897 +#define PIN_BATCH sizeof(void *)
13898 +static DEFINE_PER_CPU(multicall_entry_t[PIN_BATCH], pb_mcl);
13900 +static inline unsigned int pgd_walk_set_prot(struct page *page, pgprot_t flags,
13901 + unsigned int cpu, unsigned int seq)
13903 + unsigned long pfn = page_to_pfn(page);
13905 + if (PageHighMem(page)) {
13906 + if (pgprot_val(flags) & _PAGE_RW)
13907 + ClearPagePinned(page);
13909 + SetPagePinned(page);
13911 + MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq,
13912 + (unsigned long)__va(pfn << PAGE_SHIFT),
13913 + pfn_pte(pfn, flags), 0);
13914 + if (unlikely(++seq == PIN_BATCH)) {
13915 + if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu),
13916 + PIN_BATCH, NULL)))
13925 +static void pgd_walk(pgd_t *pgd_base, pgprot_t flags)
13927 + pgd_t *pgd = pgd_base;
13931 + unsigned int cpu, seq;
13932 + multicall_entry_t *mcl;
13934 + if (xen_feature(XENFEAT_auto_translated_physmap))
13940 + * Cannot iterate up to USER_PTRS_PER_PGD on x86-64 as these pagetables
13941 + * may not be the 'current' task's pagetables (e.g., current may be
13942 + * 32-bit, but the pagetables may be for a 64-bit task).
13943 + * Subtracting 1 from TASK_SIZE64 means the loop limit is correct
13944 + * regardless of whether TASK_SIZE64 is a multiple of PGDIR_SIZE.
13946 + for (g = 0, seq = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
13947 + if (pgd_none(*pgd))
13949 + pud = pud_offset(pgd, 0);
13950 + if (PTRS_PER_PUD > 1) /* not folded */
13951 + seq = pgd_walk_set_prot(virt_to_page(pud),flags,cpu,seq);
13952 + for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
13953 + if (pud_none(*pud))
13955 + pmd = pmd_offset(pud, 0);
13956 + if (PTRS_PER_PMD > 1) /* not folded */
13957 + seq = pgd_walk_set_prot(virt_to_page(pmd),flags,cpu,seq);
13958 + for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
13959 + if (pmd_none(*pmd))
13961 + seq = pgd_walk_set_prot(pmd_page(*pmd),flags,cpu,seq);
13966 + mcl = per_cpu(pb_mcl, cpu);
13967 +#ifdef CONFIG_X86_64
13968 + if (unlikely(seq > PIN_BATCH - 2)) {
13969 + if (unlikely(HYPERVISOR_multicall_check(mcl, seq, NULL)))
13973 + MULTI_update_va_mapping(mcl + seq,
13974 + (unsigned long)__user_pgd(pgd_base),
13975 + pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags),
13977 + MULTI_update_va_mapping(mcl + seq + 1,
13978 + (unsigned long)pgd_base,
13979 + pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
13981 + if (unlikely(HYPERVISOR_multicall_check(mcl, seq + 2, NULL)))
13984 + if (likely(seq != 0)) {
13985 + MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq,
13986 + (unsigned long)pgd_base,
13987 + pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
13989 + if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu),
13992 + } else if(HYPERVISOR_update_va_mapping((unsigned long)pgd_base,
13993 + pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
14001 +static void __pgd_pin(pgd_t *pgd)
14003 + pgd_walk(pgd, PAGE_KERNEL_RO);
14004 + kmap_flush_unused();
14005 + xen_pgd_pin(__pa(pgd)); /* kernel */
14006 +#ifdef CONFIG_X86_64
14007 + xen_pgd_pin(__pa(__user_pgd(pgd))); /* user */
14009 + SetPagePinned(virt_to_page(pgd));
14012 +static void __pgd_unpin(pgd_t *pgd)
14014 + xen_pgd_unpin(__pa(pgd));
14015 +#ifdef CONFIG_X86_64
14016 + xen_pgd_unpin(__pa(__user_pgd(pgd)));
14018 + pgd_walk(pgd, PAGE_KERNEL);
14019 + ClearPagePinned(virt_to_page(pgd));
14022 +static void pgd_test_and_unpin(pgd_t *pgd)
14024 + if (PagePinned(virt_to_page(pgd)))
14025 + __pgd_unpin(pgd);
14028 +void mm_pin(struct mm_struct *mm)
14030 + if (xen_feature(XENFEAT_writable_page_tables))
14034 + __pgd_pin(mm->pgd);
14038 +void mm_unpin(struct mm_struct *mm)
14040 + if (xen_feature(XENFEAT_writable_page_tables))
14044 + __pgd_unpin(mm->pgd);
14048 +void mm_pin_all(void)
14050 + struct page *page;
14051 + unsigned long flags;
14053 + if (xen_feature(XENFEAT_writable_page_tables))
14057 + * Allow uninterrupted access to the pgd_list. Also protects
14058 + * __pgd_pin() by disabling preemption.
14059 + * All other CPUs must be at a safe point (e.g., in stop_machine
14060 + * or offlined entirely).
14062 + spin_lock_irqsave(&pgd_lock, flags);
14063 + list_for_each_entry(page, &pgd_list, lru) {
14064 + if (!PagePinned(page))
14065 + __pgd_pin((pgd_t *)page_address(page));
14067 + spin_unlock_irqrestore(&pgd_lock, flags);
14070 +void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
14072 + if (!PagePinned(virt_to_page(mm->pgd)))
14076 +void arch_exit_mmap(struct mm_struct *mm)
14078 + struct task_struct *tsk = current;
14083 + * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
14084 + * *much* faster this way, as no tlb flushes means bigger wrpt batches.
14086 + if (tsk->active_mm == mm) {
14087 + tsk->active_mm = &init_mm;
14088 + atomic_inc(&init_mm.mm_count);
14090 + switch_mm(mm, &init_mm, tsk);
14092 + atomic_dec(&mm->mm_count);
14093 + BUG_ON(atomic_read(&mm->mm_count) == 0);
14096 + task_unlock(tsk);
14098 + if (PagePinned(virt_to_page(mm->pgd))
14099 + && atomic_read(&mm->mm_count) == 1
14100 + && !mm->context.has_foreign_mappings)
14104 +static inline void pgd_list_add(pgd_t *pgd)
14106 + struct page *page = virt_to_page(pgd);
14108 + list_add(&page->lru, &pgd_list);
14111 +static inline void pgd_list_del(pgd_t *pgd)
14113 + struct page *page = virt_to_page(pgd);
14115 + list_del(&page->lru);
14118 +#define UNSHARED_PTRS_PER_PGD \
14119 + (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
14121 +static void pgd_ctor(void *p)
14124 + unsigned long flags;
14126 + pgd_test_and_unpin(pgd);
14128 + /* Clear usermode parts of PGD */
14129 + memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t));
14131 + spin_lock_irqsave(&pgd_lock, flags);
14133 + /* If the pgd points to a shared pagetable level (either the
14134 + ptes in non-PAE, or shared PMD in PAE), then just copy the
14135 + references from swapper_pg_dir. */
14136 + if (PAGETABLE_LEVELS == 2 ||
14137 + (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
14138 + PAGETABLE_LEVELS == 4) {
14139 + clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
14140 + swapper_pg_dir + KERNEL_PGD_BOUNDARY,
14141 + KERNEL_PGD_PTRS);
14142 + paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT,
14143 + __pa(swapper_pg_dir) >> PAGE_SHIFT,
14144 + KERNEL_PGD_BOUNDARY,
14145 + KERNEL_PGD_PTRS);
14148 +#ifdef CONFIG_X86_64
14149 + /* set level3_user_pgt for vsyscall area */
14150 + __user_pgd(pgd)[pgd_index(VSYSCALL_START)] =
14151 + __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
14154 +#ifndef CONFIG_X86_PAE
14155 + /* list required to sync kernel mapping updates */
14156 + if (!SHARED_KERNEL_PMD)
14157 + pgd_list_add(pgd);
14160 + spin_unlock_irqrestore(&pgd_lock, flags);
14163 +static void pgd_dtor(void *pgd)
14165 + unsigned long flags; /* can be called from interrupt context */
14167 + if (!SHARED_KERNEL_PMD) {
14168 + spin_lock_irqsave(&pgd_lock, flags);
14169 + pgd_list_del(pgd);
14170 + spin_unlock_irqrestore(&pgd_lock, flags);
14173 + pgd_test_and_unpin(pgd);
14177 + * List of all pgd's needed for non-PAE so it can invalidate entries
14178 + * in both cached and uncached pgd's; not needed for PAE since the
14179 + * kernel pmd is shared. If PAE were not to share the pmd a similar
14180 + * tactic would be needed. This is essentially codepath-based locking
14181 + * against pageattr.c; it is the unique case in which a valid change
14182 + * of kernel pagetables can't be lazily synchronized by vmalloc faults.
14183 + * vmalloc faults work because attached pagetables are never freed.
14187 +#ifdef CONFIG_X86_PAE
14189 + * Mop up any pmd pages which may still be attached to the pgd.
14190 + * Normally they will be freed by munmap/exit_mmap, but any pmd we
14191 + * preallocate which never got a corresponding vma will need to be
14192 + * freed manually.
14194 +static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
14198 + for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
14199 + pgd_t pgd = pgdp[i];
14201 + if (__pgd_val(pgd) != 0) {
14202 + pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
14204 + pgdp[i] = xen_make_pgd(0);
14206 + paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
14207 + pmd_free(mm, pmd);
14211 + if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
14212 + xen_destroy_contiguous_region((unsigned long)pgdp, 0);
14216 + * In PAE mode, we need to do a cr3 reload (=tlb flush) when
14217 + * updating the top-level pagetable entries to guarantee the
14218 + * processor notices the update. Since this is expensive, and
14219 + * all 4 top-level entries are used almost immediately in a
14220 + * new process's life, we just pre-populate them here.
14222 + * Also, if we're in a paravirt environment where the kernel pmd is
14223 + * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
14224 + * and initialize the kernel pmds here.
14226 +static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
14229 + pmd_t *pmds[UNSHARED_PTRS_PER_PGD];
14230 + unsigned long addr, flags;
14234 + * We can race save/restore (if we sleep during a GFP_KERNEL memory
14235 + * allocation). We therefore store virtual addresses of pmds as they
14236 + * do not change across save/restore, and poke the machine addresses
14237 + * into the pgdir under the pgd_lock.
14239 + for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; i++, addr += PUD_SIZE) {
14240 + pmds[i] = pmd_alloc_one(mm, addr);
14245 + spin_lock_irqsave(&pgd_lock, flags);
14247 + /* Protect against save/restore: move below 4GB under pgd_lock. */
14248 + if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)
14249 + && xen_create_contiguous_region((unsigned long)pgd, 0, 32)) {
14250 + spin_unlock_irqrestore(&pgd_lock, flags);
14253 + pmd_free(mm, pmds[i]);
14257 + /* Copy kernel pmd contents and write-protect the new pmds. */
14258 + pud = pud_offset(pgd, 0);
14259 + for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
14260 + i++, pud++, addr += PUD_SIZE) {
14261 + if (i >= KERNEL_PGD_BOUNDARY) {
14263 + (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
14264 + sizeof(pmd_t) * PTRS_PER_PMD);
14265 + make_lowmem_page_readonly(
14266 + pmds[i], XENFEAT_writable_page_tables);
14269 + /* It is safe to poke machine addresses of pmds under the pgd_lock. */
14270 + pud_populate(mm, pud, pmds[i]);
14273 + /* List required to sync kernel mapping updates and
14274 + * to pin/unpin on save/restore. */
14275 + pgd_list_add(pgd);
14277 + spin_unlock_irqrestore(&pgd_lock, flags);
14282 +void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
14284 + struct page *page = virt_to_page(pmd);
14285 + unsigned long pfn = page_to_pfn(page);
14287 + paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
14289 + /* Note: almost everything apart from _PAGE_PRESENT is
14290 + reserved at the pmd (PDPT) level. */
14291 + if (PagePinned(virt_to_page(mm->pgd))) {
14292 + BUG_ON(PageHighMem(page));
14293 + BUG_ON(HYPERVISOR_update_va_mapping(
14294 + (unsigned long)__va(pfn << PAGE_SHIFT),
14295 + pfn_pte(pfn, PAGE_KERNEL_RO), 0));
14296 + set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
14298 + *pudp = __pud(__pa(pmd) | _PAGE_PRESENT);
14301 + * According to Intel App note "TLBs, Paging-Structure Caches,
14302 + * and Their Invalidation", April 2007, document 317080-001,
14303 + * section 8.1: in PAE mode we explicitly have to flush the
14304 + * TLB via cr3 if the top-level pgd is changed...
14306 + if (mm == current->active_mm)
14309 +#else /* !CONFIG_X86_PAE */
14310 +/* No need to prepopulate any pagetable entries in non-PAE modes. */
14311 +static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
14316 +static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd)
14319 +#endif /* CONFIG_X86_PAE */
14321 +#ifdef CONFIG_X86_64
14322 +/* We allocate two contiguous pages for kernel and user. */
14323 +#define PGD_ORDER 1
14325 +#define PGD_ORDER 0
14328 +pgd_t *pgd_alloc(struct mm_struct *mm)
14330 + pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER);
14332 + /* so that alloc_pd can use it */
14337 + if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
14338 + free_pages((unsigned long)pgd, PGD_ORDER);
14345 +void pgd_free(struct mm_struct *mm, pgd_t *pgd)
14348 + * After this the pgd should not be pinned for the duration of this
14349 + * function's execution. We should never sleep and thus never race:
14350 + * 1. User pmds will not become write-protected under our feet due
14351 + * to a concurrent mm_pin_all().
14352 + * 2. The machine addresses in PGD entries will not become invalid
14353 + * due to a concurrent save/restore.
14357 + pgd_mop_up_pmds(mm, pgd);
14358 + free_pages((unsigned long)pgd, PGD_ORDER);
14361 +/* blktap and gntdev need this, as otherwise they would implicitly (and
14362 + * needlessly, as they never use it) reference init_mm. */
14363 +pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *vma,
14364 + unsigned long addr, pte_t *ptep, int full)
14366 + return ptep_get_and_clear_full(vma->vm_mm, addr, ptep, full);
14368 +EXPORT_SYMBOL_GPL(xen_ptep_get_and_clear_full);
14370 +int ptep_set_access_flags(struct vm_area_struct *vma,
14371 + unsigned long address, pte_t *ptep,
14372 + pte_t entry, int dirty)
14374 + int changed = !pte_same(*ptep, entry);
14376 + if (changed && dirty) {
14377 + if (likely(vma->vm_mm == current->mm)) {
14378 + if (HYPERVISOR_update_va_mapping(address,
14380 + (unsigned long)vma->vm_mm->cpu_vm_mask.bits|
14381 + UVMF_INVLPG|UVMF_MULTI))
14384 + xen_l1_entry_update(ptep, entry);
14385 + flush_tlb_page(vma, address);
14392 +int ptep_test_and_clear_young(struct vm_area_struct *vma,
14393 + unsigned long addr, pte_t *ptep)
14397 + if (pte_young(*ptep))
14398 + ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
14402 + pte_update(vma->vm_mm, addr, ptep);
14407 +int ptep_clear_flush_young(struct vm_area_struct *vma,
14408 + unsigned long address, pte_t *ptep)
14410 + pte_t pte = *ptep;
14411 + int young = pte_young(pte);
14413 + pte = pte_mkold(pte);
14414 + if (PagePinned(virt_to_page(vma->vm_mm->pgd)))
14415 + ptep_set_access_flags(vma, address, ptep, pte, young);
14417 + ptep->pte_low = pte.pte_low;
14421 --- sle11-2009-10-16.orig/arch/x86/mm/pgtable_32-xen.c 2009-03-16 16:33:40.000000000 +0100
14422 +++ sle11-2009-10-16/arch/x86/mm/pgtable_32-xen.c 2009-03-16 16:38:05.000000000 +0100
14425 - * linux/arch/i386/mm/pgtable.c
14428 #include <linux/sched.h>
14429 #include <linux/kernel.h>
14430 #include <linux/errno.h>
14431 @@ -41,7 +37,6 @@ void show_mem(void)
14433 printk(KERN_INFO "Mem-info:\n");
14435 - printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
14436 for_each_online_pgdat(pgdat) {
14437 pgdat_resize_lock(pgdat, &flags);
14438 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
14439 @@ -157,243 +152,6 @@ void __init reserve_top_address(unsigned
14440 __VMALLOC_RESERVE += reserve;
14443 -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
14445 - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
14447 - make_lowmem_page_readonly(pte, XENFEAT_writable_page_tables);
14452 - * List of all pgd's needed for non-PAE so it can invalidate entries
14453 - * in both cached and uncached pgd's; not needed for PAE since the
14454 - * kernel pmd is shared. If PAE were not to share the pmd a similar
14455 - * tactic would be needed. This is essentially codepath-based locking
14456 - * against pageattr.c; it is the unique case in which a valid change
14457 - * of kernel pagetables can't be lazily synchronized by vmalloc faults.
14458 - * vmalloc faults work because attached pagetables are never freed.
14461 -static inline void pgd_list_add(pgd_t *pgd)
14463 - struct page *page = virt_to_page(pgd);
14465 - list_add(&page->lru, &pgd_list);
14468 -static inline void pgd_list_del(pgd_t *pgd)
14470 - struct page *page = virt_to_page(pgd);
14472 - list_del(&page->lru);
14475 -#define UNSHARED_PTRS_PER_PGD \
14476 - (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
14478 -static void pgd_ctor(void *p)
14481 - unsigned long flags;
14483 - pgd_test_and_unpin(pgd);
14485 - /* Clear usermode parts of PGD */
14486 - memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
14488 - spin_lock_irqsave(&pgd_lock, flags);
14490 - /* If the pgd points to a shared pagetable level (either the
14491 - ptes in non-PAE, or shared PMD in PAE), then just copy the
14492 - references from swapper_pg_dir. */
14493 - if (PAGETABLE_LEVELS == 2 ||
14494 - (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
14495 - clone_pgd_range(pgd + USER_PTRS_PER_PGD,
14496 - swapper_pg_dir + USER_PTRS_PER_PGD,
14497 - KERNEL_PGD_PTRS);
14498 - paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
14499 - __pa(swapper_pg_dir) >> PAGE_SHIFT,
14500 - USER_PTRS_PER_PGD,
14501 - KERNEL_PGD_PTRS);
14504 - /* list required to sync kernel mapping updates */
14505 - if (PAGETABLE_LEVELS == 2)
14506 - pgd_list_add(pgd);
14508 - spin_unlock_irqrestore(&pgd_lock, flags);
14511 -static void pgd_dtor(void *pgd)
14513 - unsigned long flags; /* can be called from interrupt context */
14515 - if (!SHARED_KERNEL_PMD) {
14516 - spin_lock_irqsave(&pgd_lock, flags);
14517 - pgd_list_del(pgd);
14518 - spin_unlock_irqrestore(&pgd_lock, flags);
14521 - pgd_test_and_unpin(pgd);
14524 -#ifdef CONFIG_X86_PAE
14526 - * Mop up any pmd pages which may still be attached to the pgd.
14527 - * Normally they will be freed by munmap/exit_mmap, but any pmd we
14528 - * preallocate which never got a corresponding vma will need to be
14529 - * freed manually.
14531 -static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
14535 - for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
14536 - pgd_t pgd = pgdp[i];
14538 - if (__pgd_val(pgd) != 0) {
14539 - pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
14541 - pgdp[i] = xen_make_pgd(0);
14543 - paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
14544 - pmd_free(mm, pmd);
14550 - * In PAE mode, we need to do a cr3 reload (=tlb flush) when
14551 - * updating the top-level pagetable entries to guarantee the
14552 - * processor notices the update. Since this is expensive, and
14553 - * all 4 top-level entries are used almost immediately in a
14554 - * new process's life, we just pre-populate them here.
14556 - * Also, if we're in a paravirt environment where the kernel pmd is
14557 - * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
14558 - * and initialize the kernel pmds here.
14560 -static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
14563 - pmd_t *pmds[UNSHARED_PTRS_PER_PGD];
14564 - unsigned long addr, flags;
14568 - * We can race save/restore (if we sleep during a GFP_KERNEL memory
14569 - * allocation). We therefore store virtual addresses of pmds as they
14570 - * do not change across save/restore, and poke the machine addresses
14571 - * into the pgdir under the pgd_lock.
14573 - for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; i++, addr += PUD_SIZE) {
14574 - pmds[i] = pmd_alloc_one(mm, addr);
14579 - spin_lock_irqsave(&pgd_lock, flags);
14581 - /* Protect against save/restore: move below 4GB under pgd_lock. */
14582 - if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)
14583 - && xen_create_contiguous_region((unsigned long)pgd, 0, 32)) {
14584 - spin_unlock_irqrestore(&pgd_lock, flags);
14587 - pmd_free(mm, pmds[i]);
14591 - /* Copy kernel pmd contents and write-protect the new pmds. */
14592 - pud = pud_offset(pgd, 0);
14593 - for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
14594 - i++, pud++, addr += PUD_SIZE) {
14595 - if (i >= USER_PTRS_PER_PGD) {
14597 - (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
14598 - sizeof(pmd_t) * PTRS_PER_PMD);
14599 - make_lowmem_page_readonly(
14600 - pmds[i], XENFEAT_writable_page_tables);
14603 - /* It is safe to poke machine addresses of pmds under the pgd_lock. */
14604 - pud_populate(mm, pud, pmds[i]);
14607 - /* List required to sync kernel mapping updates and
14608 - * to pin/unpin on save/restore. */
14609 - pgd_list_add(pgd);
14611 - spin_unlock_irqrestore(&pgd_lock, flags);
14615 -#else /* !CONFIG_X86_PAE */
14616 -/* No need to prepopulate any pagetable entries in non-PAE modes. */
14617 -static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
14622 -static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
14625 -#endif /* CONFIG_X86_PAE */
14627 -pgd_t *pgd_alloc(struct mm_struct *mm)
14629 - pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
14631 - /* so that alloc_pd can use it */
14636 - if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
14637 - free_page((unsigned long)pgd);
14644 -void pgd_free(struct mm_struct *mm, pgd_t *pgd)
14647 - * After this the pgd should not be pinned for the duration of this
14648 - * function's execution. We should never sleep and thus never race:
14649 - * 1. User pmds will not become write-protected under our feet due
14650 - * to a concurrent mm_pin_all().
14651 - * 2. The machine addresses in PGD entries will not become invalid
14652 - * due to a concurrent save/restore.
14656 - if (PTRS_PER_PMD > 1 && !xen_feature(XENFEAT_pae_pgdir_above_4gb))
14657 - xen_destroy_contiguous_region((unsigned long)pgd, 0);
14659 - pgd_mop_up_pmds(mm, pgd);
14660 - free_page((unsigned long)pgd);
14663 -void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
14665 - pgtable_page_dtor(pte);
14666 - paravirt_release_pt(page_to_pfn(pte));
14667 - tlb_remove_page(tlb, pte);
14670 -#ifdef CONFIG_X86_PAE
14672 -void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
14674 - paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
14675 - tlb_remove_page(tlb, virt_to_page(pmd));
14680 void make_lowmem_page_readonly(void *va, unsigned int feature)
14683 --- sle11-2009-10-16.orig/arch/x86/pci/i386.c 2009-10-28 14:55:03.000000000 +0100
14684 +++ sle11-2009-10-16/arch/x86/pci/i386.c 2009-10-08 12:08:34.000000000 +0200
14685 @@ -338,10 +338,14 @@ int pci_mmap_page_range(struct pci_dev *
14689 +#ifndef CONFIG_XEN
14690 if (((vma->vm_pgoff < max_low_pfn_mapped) ||
14691 (vma->vm_pgoff >= (1UL<<(32 - PAGE_SHIFT)) &&
14692 vma->vm_pgoff < max_pfn_mapped)) &&
14693 ioremap_change_attr((unsigned long)__va(addr), len, flags)) {
14695 + if (ioremap_check_change_attr(vma->vm_pgoff, len, flags)) {
14697 free_memtype(addr, addr + len);
14700 --- sle11-2009-10-16.orig/arch/x86/pci/irq-xen.c 2009-03-16 16:33:40.000000000 +0100
14701 +++ sle11-2009-10-16/arch/x86/pci/irq-xen.c 2009-03-16 16:38:05.000000000 +0100
14702 @@ -140,9 +140,11 @@ static void __init pirq_peer_trick(void)
14703 busmap[e->bus] = 1;
14705 for(i = 1; i < 256; i++) {
14707 if (!busmap[i] || pci_find_bus(0, i))
14709 - if (pci_scan_bus_with_sysdata(i))
14710 + node = get_mp_bus_to_node(i);
14711 + if (pci_scan_bus_on_node(i, &pci_root_ops, node))
14712 printk(KERN_INFO "PCI: Discovered primary peer "
14713 "bus %02x [IRQ]\n", i);
14715 @@ -204,7 +206,7 @@ static int pirq_ali_get(struct pci_dev *
14717 static const unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
14719 - WARN_ON_ONCE(pirq >= 16);
14720 + WARN_ON_ONCE(pirq > 16);
14721 return irqmap[read_config_nybble(router, 0x48, pirq-1)];
14724 @@ -213,7 +215,7 @@ static int pirq_ali_set(struct pci_dev *
14725 static const unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
14726 unsigned int val = irqmap[irq];
14728 - WARN_ON_ONCE(pirq >= 16);
14729 + WARN_ON_ONCE(pirq > 16);
14731 write_config_nybble(router, 0x48, pirq-1, val);
14733 @@ -264,7 +266,7 @@ static int pirq_via586_get(struct pci_de
14735 static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
14737 - WARN_ON_ONCE(pirq >= 5);
14738 + WARN_ON_ONCE(pirq > 5);
14739 return read_config_nybble(router, 0x55, pirqmap[pirq-1]);
14742 @@ -272,7 +274,7 @@ static int pirq_via586_set(struct pci_de
14744 static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
14746 - WARN_ON_ONCE(pirq >= 5);
14747 + WARN_ON_ONCE(pirq > 5);
14748 write_config_nybble(router, 0x55, pirqmap[pirq-1], irq);
14751 @@ -286,7 +288,7 @@ static int pirq_ite_get(struct pci_dev *
14753 static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
14755 - WARN_ON_ONCE(pirq >= 4);
14756 + WARN_ON_ONCE(pirq > 4);
14757 return read_config_nybble(router,0x43, pirqmap[pirq-1]);
14760 @@ -294,7 +296,7 @@ static int pirq_ite_set(struct pci_dev *
14762 static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
14764 - WARN_ON_ONCE(pirq >= 4);
14765 + WARN_ON_ONCE(pirq > 4);
14766 write_config_nybble(router, 0x43, pirqmap[pirq-1], irq);
14769 @@ -623,6 +625,13 @@ static __init int via_router_probe(struc
14771 device = PCI_DEVICE_ID_VIA_8235;
14773 + case PCI_DEVICE_ID_VIA_8237:
14775 + * Asus a7v600 bios wrongly reports 8237
14776 + * as 586-compatible
14778 + device = PCI_DEVICE_ID_VIA_8237;
14783 --- sle11-2009-10-16.orig/arch/x86/vdso/vdso32-setup-xen.c 2009-03-16 16:33:40.000000000 +0100
14784 +++ sle11-2009-10-16/arch/x86/vdso/vdso32-setup-xen.c 2009-03-16 16:38:05.000000000 +0100
14785 @@ -164,7 +164,7 @@ static __init void relocate_vdso(Elf32_E
14789 - BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 ||
14790 + BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 ||
14791 !elf_check_arch_ia32(ehdr) ||
14792 ehdr->e_type != ET_DYN);
14794 @@ -233,8 +233,12 @@ void syscall32_cpu_init(void)
14798 - if (use_sysenter < 0)
14799 - use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
14800 + if (use_sysenter < 0) {
14801 + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
14802 + use_sysenter = 1;
14803 + if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)
14804 + use_sysenter = 1;
14808 #define compat_uses_vma 1
14809 @@ -337,8 +341,6 @@ int __init sysenter_setup(void)
14811 #ifdef CONFIG_X86_32
14814 - printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
14817 #if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT < 0x030200
14818 @@ -383,6 +385,9 @@ int arch_setup_additional_pages(struct l
14822 + if (vdso_enabled == VDSO_DISABLED)
14825 down_write(&mm->mmap_sem);
14827 /* Test compat mode once here, in case someone
14828 --- sle11-2009-10-16.orig/drivers/acpi/processor_core.c 2009-08-26 11:52:33.000000000 +0200
14829 +++ sle11-2009-10-16/drivers/acpi/processor_core.c 2009-08-26 11:54:44.000000000 +0200
14830 @@ -666,7 +666,7 @@ static int acpi_processor_get_info(struc
14833 status = acpi_evaluate_object(pr->handle, "_SUN", NULL, &buffer);
14834 - if (ACPI_SUCCESS(status))
14835 + if (ACPI_SUCCESS(status) && pr->id != -1)
14836 arch_fix_phys_package_id(pr->id, object.integer.value);
14839 --- sle11-2009-10-16.orig/drivers/firmware/iscsi_ibft.c 2009-10-28 14:55:03.000000000 +0100
14840 +++ sle11-2009-10-16/drivers/firmware/iscsi_ibft.c 2009-09-24 09:54:51.000000000 +0200
14841 @@ -943,7 +943,7 @@ static int __init ibft_init(void)
14844 printk(KERN_INFO "iBFT detected at 0x%lx.\n",
14845 - virt_to_phys((void *)ibft_addr));
14846 + isa_virt_to_bus(ibft_addr));
14848 rc = ibft_check_device();
14850 --- sle11-2009-10-16.orig/drivers/firmware/iscsi_ibft_find.c 2009-10-28 14:55:03.000000000 +0100
14851 +++ sle11-2009-10-16/drivers/firmware/iscsi_ibft_find.c 2009-09-24 09:52:18.000000000 +0200
14852 @@ -65,10 +65,10 @@ void __init reserve_ibft_region(void)
14853 * so skip that area */
14854 if (pos == VGA_MEM)
14856 - virt = phys_to_virt(pos);
14857 + virt = isa_bus_to_virt(pos);
14858 if (memcmp(virt, IBFT_SIGN, IBFT_SIGN_LEN) == 0) {
14859 unsigned long *addr =
14860 - (unsigned long *)phys_to_virt(pos + 4);
14861 + (unsigned long *)isa_bus_to_virt(pos + 4);
14863 /* if the length of the table extends past 1M,
14864 * the table cannot be valid. */
14865 @@ -78,6 +78,8 @@ void __init reserve_ibft_region(void)
14869 +#ifndef CONFIG_XEN
14871 reserve_bootmem(pos, PAGE_ALIGN(len), BOOTMEM_DEFAULT);
14874 --- sle11-2009-10-16.orig/drivers/input/xen-kbdfront.c 2009-10-28 14:55:03.000000000 +0100
14875 +++ sle11-2009-10-16/drivers/input/xen-kbdfront.c 2009-03-16 16:38:05.000000000 +0100
14876 @@ -325,7 +325,6 @@ static struct xenbus_device_id xenkbd_id
14878 static struct xenbus_driver xenkbd = {
14880 - .owner = THIS_MODULE,
14882 .probe = xenkbd_probe,
14883 .remove = xenkbd_remove,
14884 --- sle11-2009-10-16.orig/drivers/oprofile/cpu_buffer.c 2009-03-12 16:15:32.000000000 +0100
14885 +++ sle11-2009-10-16/drivers/oprofile/cpu_buffer.c 2009-03-16 16:38:05.000000000 +0100
14886 @@ -341,7 +341,7 @@ void oprofile_add_mode(int cpu_mode)
14888 int oprofile_add_domain_switch(int32_t domain_id)
14890 - struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
14891 + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
14893 /* should have space for switching into and out of domain
14894 (2 slots each) plus one sample and one cpu mode switch */
14895 --- sle11-2009-10-16.orig/drivers/pci/msi-xen.c 2009-03-16 16:33:40.000000000 +0100
14896 +++ sle11-2009-10-16/drivers/pci/msi-xen.c 2009-03-16 16:38:05.000000000 +0100
14897 @@ -583,7 +583,7 @@ int pci_enable_msi(struct pci_dev* dev)
14898 EXPORT_SYMBOL(pci_enable_msi);
14900 extern void pci_frontend_disable_msi(struct pci_dev* dev);
14901 -void pci_disable_msi(struct pci_dev* dev)
14902 +void pci_msi_shutdown(struct pci_dev* dev)
14906 @@ -612,6 +612,10 @@ void pci_disable_msi(struct pci_dev* dev
14907 pci_intx_for_msi(dev, 1);
14908 dev->msi_enabled = 0;
14910 +void pci_disable_msi(struct pci_dev* dev)
14912 + pci_msi_shutdown(dev);
14914 EXPORT_SYMBOL(pci_disable_msi);
14917 @@ -714,7 +718,7 @@ int pci_enable_msix(struct pci_dev* dev,
14918 EXPORT_SYMBOL(pci_enable_msix);
14920 extern void pci_frontend_disable_msix(struct pci_dev* dev);
14921 -void pci_disable_msix(struct pci_dev* dev)
14922 +void pci_msix_shutdown(struct pci_dev* dev)
14924 if (!pci_msi_enable)
14926 @@ -751,6 +755,10 @@ void pci_disable_msix(struct pci_dev* de
14927 pci_intx_for_msi(dev, 1);
14928 dev->msix_enabled = 0;
14930 +void pci_disable_msix(struct pci_dev* dev)
14932 + pci_msix_shutdown(dev);
14934 EXPORT_SYMBOL(pci_disable_msix);
14937 --- sle11-2009-10-16.orig/drivers/video/Kconfig 2009-06-04 10:18:21.000000000 +0200
14938 +++ sle11-2009-10-16/drivers/video/Kconfig 2009-03-16 16:38:05.000000000 +0100
14939 @@ -2029,7 +2029,7 @@ config FB_VIRTUAL
14941 config XEN_FBDEV_FRONTEND
14942 tristate "Xen virtual frame buffer support"
14943 - depends on FB && XEN
14944 + depends on FB && PARAVIRT_XEN
14945 select FB_SYS_FILLRECT
14946 select FB_SYS_COPYAREA
14947 select FB_SYS_IMAGEBLIT
14948 --- sle11-2009-10-16.orig/drivers/video/xen-fbfront.c 2009-10-28 14:55:03.000000000 +0100
14949 +++ sle11-2009-10-16/drivers/video/xen-fbfront.c 2009-03-16 16:38:05.000000000 +0100
14950 @@ -670,7 +670,6 @@ static struct xenbus_device_id xenfb_ids
14952 static struct xenbus_driver xenfb = {
14954 - .owner = THIS_MODULE,
14956 .probe = xenfb_probe,
14957 .remove = xenfb_remove,
14958 --- sle11-2009-10-16.orig/drivers/xen/Kconfig 2009-03-04 11:28:34.000000000 +0100
14959 +++ sle11-2009-10-16/drivers/xen/Kconfig 2009-03-16 16:38:05.000000000 +0100
14961 # This Kconfig describe xen options
14964 -mainmenu "Xen Configuration"
14969 --- sle11-2009-10-16.orig/drivers/xen/Makefile 2009-02-16 16:17:21.000000000 +0100
14970 +++ sle11-2009-10-16/drivers/xen/Makefile 2009-03-16 16:38:05.000000000 +0100
14972 -obj-$(CONFIG_PARAVIRT_XEN) += grant-table.o
14973 +obj-$(CONFIG_PARAVIRT_XEN) += grant-table.o features.o events.o
14974 +xen-xencomm-$(CONFIG_PARAVIRT_XEN) := xencomm.o
14975 +xen-balloon-$(CONFIG_PARAVIRT_XEN) := balloon.o
14977 +xen-balloon-$(CONFIG_XEN) := balloon/
14978 obj-$(CONFIG_XEN) += core/
14979 obj-$(CONFIG_XEN) += console/
14980 obj-$(CONFIG_XEN) += evtchn/
14981 @@ -7,7 +10,8 @@ obj-y += xenbus/
14982 obj-$(CONFIG_XEN) += char/
14984 obj-$(CONFIG_XEN) += util.o
14985 -obj-$(CONFIG_XEN_BALLOON) += balloon/
14986 +obj-$(CONFIG_XEN_XENCOMM) += $(xen-xencomm-y)
14987 +obj-$(CONFIG_XEN_BALLOON) += $(xen-balloon-y)
14988 obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/
14989 obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/
14990 obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/
14991 --- sle11-2009-10-16.orig/drivers/xen/blkfront/blkfront.c 2009-03-24 10:12:53.000000000 +0100
14992 +++ sle11-2009-10-16/drivers/xen/blkfront/blkfront.c 2009-05-19 10:38:53.000000000 +0200
14993 @@ -285,7 +285,11 @@ static void backend_changed(struct xenbu
14996 case XenbusStateClosing:
14997 - bd = bdget(info->dev);
14999 + xenbus_frontend_closed(dev);
15002 + bd = bdget_disk(info->gd, 0);
15004 xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
15006 --- sle11-2009-10-16.orig/drivers/xen/blkfront/block.h 2009-03-24 10:11:58.000000000 +0100
15007 +++ sle11-2009-10-16/drivers/xen/blkfront/block.h 2009-03-16 16:38:05.000000000 +0100
15008 @@ -96,7 +96,6 @@ struct blk_shadow {
15009 struct blkfront_info
15011 struct xenbus_device *xbdev;
15013 struct gendisk *gd;
15015 blkif_vdev_t handle;
15016 --- sle11-2009-10-16.orig/drivers/xen/blkfront/vbd.c 2009-02-16 16:17:21.000000000 +0100
15017 +++ sle11-2009-10-16/drivers/xen/blkfront/vbd.c 2009-03-16 16:38:05.000000000 +0100
15018 @@ -246,17 +246,32 @@ xlvbd_init_blk_queue(struct gendisk *gd,
15023 -xlvbd_alloc_gendisk(int major, int minor, blkif_sector_t capacity, int vdevice,
15024 - u16 vdisk_info, u16 sector_size,
15025 - struct blkfront_info *info)
15027 +xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info,
15028 + u16 sector_size, struct blkfront_info *info)
15030 + int major, minor;
15031 struct gendisk *gd;
15032 struct xlbd_major_info *mi;
15035 unsigned int offset;
15037 + if ((vdevice>>EXT_SHIFT) > 1) {
15038 + /* this is above the extended range; something is wrong */
15039 + printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", vdevice);
15043 + if (!VDEV_IS_EXTENDED(vdevice)) {
15044 + major = BLKIF_MAJOR(vdevice);
15045 + minor = BLKIF_MINOR(vdevice);
15049 + minor = BLKIF_MINOR_EXT(vdevice);
15052 BUG_ON(info->gd != NULL);
15053 BUG_ON(info->mi != NULL);
15054 BUG_ON(info->rq != NULL);
15055 @@ -337,41 +352,6 @@ xlvbd_alloc_gendisk(int major, int minor
15060 -xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info,
15061 - u16 sector_size, struct blkfront_info *info)
15063 - struct block_device *bd;
15065 - int major, minor;
15067 - if ((vdevice>>EXT_SHIFT) > 1) {
15068 - /* this is above the extended range; something is wrong */
15069 - printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", vdevice);
15073 - if (!VDEV_IS_EXTENDED(vdevice)) {
15074 - major = BLKIF_MAJOR(vdevice);
15075 - minor = BLKIF_MINOR(vdevice);
15079 - minor = BLKIF_MINOR_EXT(vdevice);
15082 - info->dev = MKDEV(major, minor);
15083 - bd = bdget(info->dev);
15087 - err = xlvbd_alloc_gendisk(major, minor, capacity, vdevice, vdisk_info,
15088 - sector_size, info);
15095 xlvbd_del(struct blkfront_info *info)
15097 --- sle11-2009-10-16.orig/drivers/xen/blktap/blktap.c 2009-04-20 11:38:54.000000000 +0200
15098 +++ sle11-2009-10-16/drivers/xen/blktap/blktap.c 2009-04-20 11:40:14.000000000 +0200
15099 @@ -111,6 +111,7 @@ typedef struct tap_blkif {
15100 unsigned long mode; /*current switching mode */
15101 int minor; /*Minor number for tapdisk device */
15102 pid_t pid; /*tapdisk process id */
15103 + struct pid_namespace *pid_ns; /*... and its corresponding namespace */
15104 enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace
15106 unsigned long *idx_map; /*Record the user ring id to kern
15107 @@ -299,16 +300,14 @@ struct tap_vma_priv {
15108 struct page *map[];
15111 -static struct page *blktap_nopage(struct vm_area_struct *vma,
15112 - unsigned long address,
15114 +static int blktap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
15117 * if the page has not been mapped in by the driver then return
15118 - * NOPAGE_SIGBUS to the domain.
15119 + * VM_FAULT_SIGBUS to the domain.
15122 - return NOPAGE_SIGBUS;
15123 + return VM_FAULT_SIGBUS;
15126 static pte_t blktap_clear_pte(struct vm_area_struct *vma,
15127 @@ -404,7 +403,7 @@ static void blktap_vma_close(struct vm_a
15130 struct vm_operations_struct blktap_vm_ops = {
15131 - nopage: blktap_nopage,
15132 + fault: blktap_fault,
15133 zap_pte: blktap_clear_pte,
15134 close: blktap_vma_close,
15136 @@ -498,9 +497,8 @@ found:
15137 tapfds[minor] = info;
15139 if ((class = get_xen_class()) != NULL)
15140 - class_device_create(class, NULL,
15141 - MKDEV(blktap_major, minor), NULL,
15142 - "blktap%d", minor);
15143 + device_create(class, NULL, MKDEV(blktap_major, minor),
15144 + "blktap%d", minor);
15148 @@ -542,7 +540,7 @@ void signal_tapdisk(int idx)
15151 if (info->pid > 0) {
15152 - ptask = find_task_by_pid(info->pid);
15153 + ptask = find_task_by_pid_ns(info->pid, info->pid_ns);
15155 info->status = CLEANSHUTDOWN;
15157 @@ -770,8 +768,9 @@ static int blktap_ioctl(struct inode *in
15160 info->pid = (pid_t)arg;
15161 - DPRINTK("blktap: pid received %d\n",
15163 + info->pid_ns = current->nsproxy->pid_ns;
15164 + DPRINTK("blktap: pid received %p:%d\n",
15165 + info->pid_ns, info->pid);
15169 @@ -1684,9 +1683,7 @@ static int __init blkif_init(void)
15170 * We only create the device when a request of a new device is
15173 - class_device_create(class, NULL,
15174 - MKDEV(blktap_major, 0), NULL,
15176 + device_create(class, NULL, MKDEV(blktap_major, 0), "blktap0");
15178 /* this is bad, but not fatal */
15179 WPRINTK("blktap: sysfs xen_class not created\n");
15180 --- sle11-2009-10-16.orig/drivers/xen/char/mem.c 2008-12-15 11:27:22.000000000 +0100
15181 +++ sle11-2009-10-16/drivers/xen/char/mem.c 2009-03-16 16:38:05.000000000 +0100
15182 @@ -33,6 +33,27 @@ static inline int uncached_access(struct
15186 +static inline int range_is_allowed(unsigned long pfn, unsigned long size)
15188 +#ifdef CONFIG_NONPROMISC_DEVMEM
15189 + u64 from = ((u64)pfn) << PAGE_SHIFT;
15190 + u64 to = from + size;
15191 + u64 cursor = from;
15193 + while (cursor < to) {
15194 + if (!devmem_is_allowed(pfn)) {
15196 + "Program %s tried to access /dev/mem between %Lx->%Lx.\n",
15197 + current->comm, from, to);
15200 + cursor += PAGE_SIZE;
15208 * This funcion reads the *physical* memory. The f_pos points directly to the
15210 @@ -55,6 +76,9 @@ static ssize_t read_mem(struct file * fi
15212 sz = min_t(unsigned long, sz, count);
15214 + if (!range_is_allowed(p >> PAGE_SHIFT, count))
15217 v = ioremap(p, sz);
15218 if (IS_ERR(v) || v == NULL) {
15220 @@ -103,6 +127,9 @@ static ssize_t write_mem(struct file * f
15222 sz = min_t(unsigned long, sz, count);
15224 + if (!range_is_allowed(p >> PAGE_SHIFT, sz))
15227 v = ioremap(p, sz);
15230 @@ -131,6 +158,23 @@ static ssize_t write_mem(struct file * f
15233 #ifndef ARCH_HAS_DEV_MEM_MMAP_MEM
15234 +static void mmap_mem_open(struct vm_area_struct *vma)
15236 + map_devmem(vma->vm_pgoff, vma->vm_end - vma->vm_start,
15237 + vma->vm_page_prot);
15240 +static void mmap_mem_close(struct vm_area_struct *vma)
15242 + unmap_devmem(vma->vm_pgoff, vma->vm_end - vma->vm_start,
15243 + vma->vm_page_prot);
15246 +static struct vm_operations_struct mmap_mem_ops = {
15247 + .open = mmap_mem_open,
15248 + .close = mmap_mem_close
15251 static int xen_mmap_mem(struct file * file, struct vm_area_struct * vma)
15253 size_t size = vma->vm_end - vma->vm_start;
15254 @@ -138,6 +182,15 @@ static int xen_mmap_mem(struct file * fi
15255 if (uncached_access(file))
15256 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
15258 + if (!range_is_allowed(vma->vm_pgoff, size))
15261 + if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size,
15262 + &vma->vm_page_prot))
15265 + vma->vm_ops = &mmap_mem_ops;
15267 /* We want to return the real error code, not EAGAIN. */
15268 return direct_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
15269 size, vma->vm_page_prot, DOMID_IO);
15270 --- sle11-2009-10-16.orig/drivers/xen/console/console.c 2008-12-15 11:26:44.000000000 +0100
15271 +++ sle11-2009-10-16/drivers/xen/console/console.c 2009-03-16 16:38:05.000000000 +0100
15272 @@ -552,16 +552,18 @@ static int xencons_write(
15276 -static void xencons_put_char(struct tty_struct *tty, u_char ch)
15277 +static int xencons_put_char(struct tty_struct *tty, u_char ch)
15279 unsigned long flags;
15282 if (DUMMY_TTY(tty))
15286 spin_lock_irqsave(&xencons_lock, flags);
15287 - (void)__xencons_put_char(ch);
15288 + ret = __xencons_put_char(ch);
15289 spin_unlock_irqrestore(&xencons_lock, flags);
15293 static void xencons_flush_chars(struct tty_struct *tty)
15294 @@ -583,7 +585,7 @@ static void xencons_wait_until_sent(stru
15295 if (DUMMY_TTY(tty))
15298 - while (DRV(tty->driver)->chars_in_buffer(tty)) {
15299 + while (tty_chars_in_buffer(tty)) {
15300 set_current_state(TASK_INTERRUPTIBLE);
15301 schedule_timeout(1);
15302 if (signal_pending(current))
15303 @@ -632,8 +634,7 @@ static void xencons_close(struct tty_str
15306 tty_wait_until_sent(tty, 0);
15307 - if (DRV(tty->driver)->flush_buffer != NULL)
15308 - DRV(tty->driver)->flush_buffer(tty);
15309 + tty_driver_flush_buffer(tty);
15310 if (tty->ldisc.flush_buffer != NULL)
15311 tty->ldisc.flush_buffer(tty);
15313 --- sle11-2009-10-16.orig/drivers/xen/core/machine_kexec.c 2009-02-17 11:46:41.000000000 +0100
15314 +++ sle11-2009-10-16/drivers/xen/core/machine_kexec.c 2009-03-16 16:38:05.000000000 +0100
15317 #include <linux/kexec.h>
15318 #include <xen/interface/kexec.h>
15319 +#include <linux/reboot.h>
15320 #include <linux/mm.h>
15321 #include <linux/bootmem.h>
15323 @@ -90,6 +91,9 @@ void __init xen_machine_kexec_setup_reso
15324 xen_hypervisor_res.start = range.start;
15325 xen_hypervisor_res.end = range.start + range.size - 1;
15326 xen_hypervisor_res.flags = IORESOURCE_BUSY | IORESOURCE_MEM;
15327 +#ifdef CONFIG_X86_64
15328 + insert_resource(&iomem_resource, &xen_hypervisor_res);
15331 /* fill in crashk_res if range is reserved by hypervisor */
15333 @@ -102,6 +106,9 @@ void __init xen_machine_kexec_setup_reso
15335 crashk_res.start = range.start;
15336 crashk_res.end = range.start + range.size - 1;
15337 +#ifdef CONFIG_X86_64
15338 + insert_resource(&iomem_resource, &crashk_res);
15342 /* get physical address of vmcoreinfo */
15343 @@ -153,11 +160,13 @@ void __init xen_machine_kexec_setup_reso
15347 +#ifndef CONFIG_X86_64
15348 void __init xen_machine_kexec_register_resources(struct resource *res)
15350 request_resource(res, &xen_hypervisor_res);
15351 machine_kexec_register_resources(res);
15355 static void setup_load_arg(xen_kexec_image_t *xki, struct kimage *image)
15357 @@ -228,6 +237,11 @@ void machine_shutdown(void)
15361 +void machine_crash_shutdown(struct pt_regs *regs)
15363 + /* The kernel is broken so disable interrupts */
15364 + local_irq_disable();
15369 --- sle11-2009-10-16.orig/drivers/xen/core/smpboot.c 2009-03-16 16:33:40.000000000 +0100
15370 +++ sle11-2009-10-16/drivers/xen/core/smpboot.c 2009-03-16 16:38:05.000000000 +0100
15371 @@ -53,17 +53,16 @@ static DEFINE_PER_CPU(int, callfunc_irq)
15372 static char resched_name[NR_CPUS][15];
15373 static char callfunc_name[NR_CPUS][15];
15375 -u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
15376 +#ifdef CONFIG_X86_LOCAL_APIC
15377 +#define set_cpu_to_apicid(cpu, apicid) (per_cpu(x86_cpu_to_apicid, cpu) = (apicid))
15379 +#define set_cpu_to_apicid(cpu, apicid)
15382 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
15383 DEFINE_PER_CPU(cpumask_t, cpu_core_map);
15384 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
15386 -#if defined(__i386__)
15387 -DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
15388 -EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
15391 void __init prefill_possible_map(void)
15394 @@ -154,7 +153,7 @@ static int __cpuinit xen_smp_intr_init(u
15397 #ifdef CONFIG_HOTPLUG_CPU
15398 -static void xen_smp_intr_exit(unsigned int cpu)
15399 +static void __cpuexit xen_smp_intr_exit(unsigned int cpu)
15402 local_teardown_timer(cpu);
15403 @@ -263,8 +262,7 @@ void __init smp_prepare_cpus(unsigned in
15404 boot_cpu_data.apicid = apicid;
15405 cpu_data(0) = boot_cpu_data;
15407 - cpu_2_logical_apicid[0] = apicid;
15408 - per_cpu(x86_cpu_to_apicid, 0) = apicid;
15409 + set_cpu_to_apicid(0, apicid);
15411 current_thread_info()->cpu = 0;
15413 @@ -319,8 +317,7 @@ void __init smp_prepare_cpus(unsigned in
15414 cpu_data(cpu).cpu_index = cpu;
15415 cpu_data(cpu).apicid = apicid;
15417 - cpu_2_logical_apicid[cpu] = apicid;
15418 - per_cpu(x86_cpu_to_apicid, cpu) = apicid;
15419 + set_cpu_to_apicid(cpu, apicid);
15422 cpu_pda(cpu)->pcurrent = idle;
15423 @@ -375,7 +372,7 @@ static int __init initialize_cpu_present
15425 core_initcall(initialize_cpu_present_map);
15427 -int __cpu_disable(void)
15428 +int __cpuexit __cpu_disable(void)
15430 cpumask_t map = cpu_online_map;
15431 unsigned int cpu = smp_processor_id();
15432 @@ -392,7 +389,7 @@ int __cpu_disable(void)
15436 -void __cpu_die(unsigned int cpu)
15437 +void __cpuexit __cpu_die(unsigned int cpu)
15439 while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
15440 current->state = TASK_UNINTERRUPTIBLE;
15441 --- sle11-2009-10-16.orig/drivers/xen/core/xen_proc.c 2009-10-28 14:55:03.000000000 +0100
15442 +++ sle11-2009-10-16/drivers/xen/core/xen_proc.c 2009-03-16 16:38:05.000000000 +0100
15443 @@ -8,7 +8,7 @@ static struct proc_dir_entry *xen_base;
15444 struct proc_dir_entry *create_xen_proc_entry(const char *name, mode_t mode)
15446 if ( xen_base == NULL )
15447 - if ( (xen_base = proc_mkdir("xen", &proc_root)) == NULL )
15448 + if ( (xen_base = proc_mkdir("xen", NULL)) == NULL )
15449 panic("Couldn't create /proc/xen");
15450 return create_proc_entry(name, mode, xen_base);
15452 --- sle11-2009-10-16.orig/drivers/xen/fbfront/xenfb.c 2009-03-04 11:25:55.000000000 +0100
15453 +++ sle11-2009-10-16/drivers/xen/fbfront/xenfb.c 2009-03-16 16:38:05.000000000 +0100
15454 @@ -93,7 +93,7 @@ struct xenfb_info
15455 * only mappings. The former creates unfaulted pages. Preserves
15456 * invariant. The latter removes pages. Preserves invariant.
15458 - * 3. Holding both locks: xenfb_vm_nopage(). Extends the dirty
15459 + * 3. Holding both locks: xenfb_vm_fault(). Extends the dirty
15460 * rectangle and updates mappings consistently. Preserves
15463 @@ -112,13 +112,13 @@ struct xenfb_info
15465 * But FIXME: the invariant is too weak. It misses that the fault
15466 * record in mappings must be consistent with the mapping of pages in
15467 - * the associated address space! do_no_page() updates the PTE after
15468 - * xenfb_vm_nopage() returns, i.e. outside the critical region. This
15469 + * the associated address space! __do_fault() updates the PTE after
15470 + * xenfb_vm_fault() returns, i.e. outside the critical region. This
15471 * allows the following race:
15473 * X writes to some address in the Xen frame buffer
15474 - * Fault - call do_no_page()
15475 - * call xenfb_vm_nopage()
15476 + * Fault - call __do_fault()
15477 + * call xenfb_vm_fault()
15481 @@ -387,18 +387,17 @@ static void xenfb_vm_close(struct vm_are
15482 mutex_unlock(&info->mm_lock);
15485 -static struct page *xenfb_vm_nopage(struct vm_area_struct *vma,
15486 - unsigned long vaddr, int *type)
15487 +static int xenfb_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
15489 struct xenfb_mapping *map = vma->vm_private_data;
15490 struct xenfb_info *info = map->info;
15491 - int pgnr = (vaddr - vma->vm_start) >> PAGE_SHIFT;
15492 + int pgnr = ((long)vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT;
15493 unsigned long flags;
15497 if (pgnr >= info->nr_pages)
15498 - return NOPAGE_SIGBUS;
15499 + return VM_FAULT_SIGBUS;
15501 mutex_lock(&info->mm_lock);
15502 spin_lock_irqsave(&info->dirty_lock, flags);
15503 @@ -414,16 +413,15 @@ static struct page *xenfb_vm_nopage(stru
15504 spin_unlock_irqrestore(&info->dirty_lock, flags);
15505 mutex_unlock(&info->mm_lock);
15508 - *type = VM_FAULT_MINOR;
15509 + vmf->page = page;
15512 + return VM_FAULT_MINOR;
15515 static struct vm_operations_struct xenfb_vm_ops = {
15516 .open = xenfb_vm_open,
15517 .close = xenfb_vm_close,
15518 - .nopage = xenfb_vm_nopage,
15519 + .fault = xenfb_vm_fault,
15522 static int xenfb_mmap(struct fb_info *fb_info, struct vm_area_struct *vma)
15523 --- sle11-2009-10-16.orig/drivers/xen/gntdev/gntdev.c 2009-03-16 16:33:40.000000000 +0100
15524 +++ sle11-2009-10-16/drivers/xen/gntdev/gntdev.c 2009-03-16 16:38:05.000000000 +0100
15525 @@ -392,7 +392,7 @@ nomem_out:
15526 static int __init gntdev_init(void)
15528 struct class *class;
15529 - struct class_device *device;
15530 + struct device *device;
15532 if (!is_running_on_xen()) {
15533 printk(KERN_ERR "You must be running Xen to use gntdev\n");
15534 @@ -417,8 +417,8 @@ static int __init gntdev_init(void)
15538 - device = class_device_create(class, NULL, MKDEV(gntdev_major, 0),
15539 - NULL, GNTDEV_NAME);
15540 + device = device_create(class, NULL, MKDEV(gntdev_major, 0),
15542 if (IS_ERR(device)) {
15543 printk(KERN_ERR "Error creating gntdev device in xen_class\n");
15544 printk(KERN_ERR "gntdev created with major number = %d\n",
15545 @@ -435,7 +435,7 @@ static void __exit gntdev_exit(void)
15547 struct class *class;
15548 if ((class = get_xen_class()) != NULL)
15549 - class_device_destroy(class, MKDEV(gntdev_major, 0));
15550 + device_destroy(class, MKDEV(gntdev_major, 0));
15551 unregister_chrdev(gntdev_major, GNTDEV_NAME);
15554 --- sle11-2009-10-16.orig/drivers/xen/netfront/netfront.c 2009-03-30 16:39:44.000000000 +0200
15555 +++ sle11-2009-10-16/drivers/xen/netfront/netfront.c 2009-03-30 16:40:17.000000000 +0200
15556 @@ -1464,8 +1464,7 @@ err:
15560 - while ((skb = __skb_dequeue(&errq)))
15562 + __skb_queue_purge(&errq);
15564 while ((skb = __skb_dequeue(&rxq)) != NULL) {
15565 struct page *page = NETFRONT_SKB_CB(skb)->page;
15566 @@ -1630,8 +1629,7 @@ static void netif_release_rx_bufs_flip(s
15570 - while ((skb = __skb_dequeue(&free_list)) != NULL)
15571 - dev_kfree_skb(skb);
15572 + __skb_queue_purge(&free_list);
15574 spin_unlock_bh(&np->rx_lock);
15576 --- sle11-2009-10-16.orig/drivers/xen/privcmd/privcmd.c 2009-03-04 11:28:34.000000000 +0100
15577 +++ sle11-2009-10-16/drivers/xen/privcmd/privcmd.c 2009-03-16 16:38:05.000000000 +0100
15578 @@ -261,15 +261,13 @@ static long privcmd_ioctl(struct file *f
15581 #ifndef HAVE_ARCH_PRIVCMD_MMAP
15582 -static struct page *privcmd_nopage(struct vm_area_struct *vma,
15583 - unsigned long address,
15585 +static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
15587 - return NOPAGE_SIGBUS;
15588 + return VM_FAULT_SIGBUS;
15591 static struct vm_operations_struct privcmd_vm_ops = {
15592 - .nopage = privcmd_nopage
15593 + .fault = privcmd_fault
15596 static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
15597 --- sle11-2009-10-16.orig/drivers/xen/xenbus/xenbus_client.c 2009-03-24 10:12:22.000000000 +0100
15598 +++ sle11-2009-10-16/drivers/xen/xenbus/xenbus_client.c 2009-03-24 10:13:17.000000000 +0100
15599 @@ -442,7 +442,7 @@ int xenbus_map_ring_valloc(struct xenbus
15603 - area = alloc_vm_area(PAGE_SIZE);
15604 + area = xen_alloc_vm_area(PAGE_SIZE);
15608 @@ -452,7 +452,7 @@ int xenbus_map_ring_valloc(struct xenbus
15611 if (op.status != GNTST_okay) {
15612 - free_vm_area(area);
15613 + xen_free_vm_area(area);
15614 xenbus_dev_fatal(dev, op.status,
15615 "mapping in shared page %d from domain %d",
15616 gnt_ref, dev->otherend_id);
15617 @@ -551,7 +551,7 @@ int xenbus_unmap_ring_vfree(struct xenbu
15620 if (op.status == GNTST_okay)
15621 - free_vm_area(area);
15622 + xen_free_vm_area(area);
15624 xenbus_dev_error(dev, op.status,
15625 "unmapping page at handle %d error %d",
15626 --- sle11-2009-10-16.orig/drivers/xen/xenbus/xenbus_probe.c 2009-02-16 16:18:36.000000000 +0100
15627 +++ sle11-2009-10-16/drivers/xen/xenbus/xenbus_probe.c 2009-03-16 16:38:05.000000000 +0100
15628 @@ -173,7 +173,7 @@ static int read_backend_details(struct x
15629 return read_otherend_details(xendev, "backend-id", "backend");
15632 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) && (defined(CONFIG_XEN) || defined(MODULE))
15633 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
15634 static int xenbus_uevent_frontend(struct device *dev, struct kobj_uevent_env *env)
15636 struct xenbus_device *xdev;
15637 @@ -185,8 +185,10 @@ static int xenbus_uevent_frontend(struct
15640 /* stuff we want to pass to /sbin/hotplug */
15641 +#if defined(CONFIG_XEN) || defined(MODULE)
15642 add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype);
15643 add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename);
15645 add_uevent_var(env, "MODALIAS=xen:%s", xdev->devicetype);
15648 @@ -207,10 +209,8 @@ static struct xen_bus_type xenbus_fronte
15649 .probe = xenbus_dev_probe,
15650 .remove = xenbus_dev_remove,
15651 .shutdown = xenbus_dev_shutdown,
15652 -#if defined(CONFIG_XEN) || defined(MODULE)
15653 .uevent = xenbus_uevent_frontend,
15657 #if defined(CONFIG_XEN) || defined(MODULE)
15659 @@ -519,6 +519,15 @@ static ssize_t xendev_show_devtype(struc
15661 DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
15663 +static ssize_t xendev_show_modalias(struct device *dev,
15664 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)
15665 + struct device_attribute *attr,
15669 + return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype);
15671 +DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL);
15673 int xenbus_probe_node(struct xen_bus_type *bus,
15675 @@ -579,10 +588,16 @@ int xenbus_probe_node(struct xen_bus_typ
15677 err = device_create_file(&xendev->dev, &dev_attr_devtype);
15679 - goto fail_remove_file;
15680 + goto fail_remove_nodename;
15682 + err = device_create_file(&xendev->dev, &dev_attr_modalias);
15684 + goto fail_remove_devtype;
15688 +fail_remove_devtype:
15689 + device_remove_file(&xendev->dev, &dev_attr_devtype);
15690 +fail_remove_nodename:
15691 device_remove_file(&xendev->dev, &dev_attr_nodename);
15693 device_unregister(&xendev->dev);
15694 --- sle11-2009-10-16.orig/fs/aio.c 2009-03-24 10:11:37.000000000 +0100
15695 +++ sle11-2009-10-16/fs/aio.c 2009-03-24 10:13:25.000000000 +0100
15696 @@ -1271,6 +1271,7 @@ static void io_destroy(struct kioctx *io
15697 #ifdef CONFIG_EPOLL
15698 /* forget the poll file, but it's up to the user to close it */
15700 + fput(ioctx->file);
15701 ioctx->file->private_data = 0;
15704 @@ -1295,6 +1296,7 @@ static int aio_queue_fd_close(struct ino
15705 spin_lock_irq(&ioctx->ctx_lock);
15707 spin_unlock_irq(&ioctx->ctx_lock);
15712 @@ -1330,16 +1332,17 @@ static const struct file_operations aioq
15714 static int make_aio_fd(struct kioctx *ioctx)
15717 - struct inode *inode;
15721 - error = anon_inode_getfd(&fd, &inode, &file, "[aioq]",
15722 - &aioq_fops, ioctx);
15725 + fd = anon_inode_getfd("[aioq]", &aioq_fops, ioctx);
15729 /* associate the file with the IO context */
15733 file->private_data = ioctx;
15734 ioctx->file = file;
15735 init_waitqueue_head(&ioctx->poll_wait);
15736 --- sle11-2009-10-16.orig/include/asm-x86/dma-mapping.h 2009-10-28 14:55:03.000000000 +0100
15737 +++ sle11-2009-10-16/include/asm-x86/dma-mapping.h 2009-03-16 16:38:05.000000000 +0100
15738 @@ -223,8 +223,13 @@ static inline dma_addr_t dma_map_page(st
15739 struct dma_mapping_ops *ops = get_dma_ops(dev);
15741 BUG_ON(!valid_dma_direction(direction));
15742 +#ifndef CONFIG_XEN
15743 return ops->map_single(dev, page_to_phys(page) + offset,
15746 + return ops->map_single(dev, page_to_pseudophys(page) + offset,
15747 + size, direction);
15751 static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
15752 --- sle11-2009-10-16.orig/include/asm-x86/genapic_64.h 2009-10-28 14:55:03.000000000 +0100
15753 +++ sle11-2009-10-16/include/asm-x86/genapic_64.h 2009-03-16 16:38:05.000000000 +0100
15754 @@ -46,6 +46,7 @@ extern struct genapic apic_x2apic_phys;
15755 extern int acpi_madt_oem_check(char *, char *);
15757 extern void apic_send_IPI_self(int vector);
15758 +#ifndef CONFIG_XEN
15759 enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
15760 extern enum uv_system_type get_uv_system_type(void);
15761 extern int is_uv_system(void);
15762 @@ -55,6 +56,10 @@ DECLARE_PER_CPU(int, x2apic_extra_bits);
15763 extern void uv_cpu_init(void);
15764 extern void uv_system_init(void);
15765 extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
15767 +#define is_uv_system() 0
15768 +#define uv_cpu_init() ((void)0)
15771 extern void setup_apic_routing(void);
15773 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/desc.h 2009-03-16 16:33:40.000000000 +0100
15774 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/desc.h 2009-03-16 16:38:05.000000000 +0100
15775 @@ -64,8 +64,8 @@ static inline struct desc_struct *get_cp
15778 static inline void pack_gate(gate_desc *gate, unsigned char type,
15779 - unsigned long base, unsigned dpl, unsigned flags, unsigned short seg)
15781 + unsigned long base, unsigned dpl, unsigned flags,
15782 + unsigned short seg)
15784 gate->a = (seg << 16) | (base & 0xffff);
15785 gate->b = (base & 0xffff0000) |
15786 @@ -84,22 +84,23 @@ static inline int desc_empty(const void
15787 #define load_TR_desc() native_load_tr_desc()
15788 #define load_gdt(dtr) native_load_gdt(dtr)
15789 #define load_idt(dtr) native_load_idt(dtr)
15790 -#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
15791 -#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
15792 +#define load_tr(tr) asm volatile("ltr %0"::"m" (tr))
15793 +#define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt))
15795 #define store_gdt(dtr) native_store_gdt(dtr)
15796 #define store_idt(dtr) native_store_idt(dtr)
15797 #define store_tr(tr) (tr = native_store_tr())
15798 -#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
15799 +#define store_ldt(ldt) asm("sldt %0":"=m" (ldt))
15801 #define load_TLS(t, cpu) native_load_tls(t, cpu)
15802 #define set_ldt native_set_ldt
15804 -#define write_ldt_entry(dt, entry, desc) \
15805 - native_write_ldt_entry(dt, entry, desc)
15806 -#define write_gdt_entry(dt, entry, desc, type) \
15807 - native_write_gdt_entry(dt, entry, desc, type)
15808 -#define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g)
15809 +#define write_ldt_entry(dt, entry, desc) \
15810 + native_write_ldt_entry(dt, entry, desc)
15811 +#define write_gdt_entry(dt, entry, desc, type) \
15812 + native_write_gdt_entry(dt, entry, desc, type)
15813 +#define write_idt_entry(dt, entry, g) \
15814 + native_write_idt_entry(dt, entry, g)
15816 static inline void native_write_idt_entry(gate_desc *idt, int entry,
15817 const gate_desc *gate)
15818 @@ -138,8 +139,8 @@ static inline void pack_descriptor(struc
15820 desc->a = ((base & 0xffff) << 16) | (limit & 0xffff);
15821 desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
15822 - (limit & 0x000f0000) | ((type & 0xff) << 8) |
15823 - ((flags & 0xf) << 20);
15824 + (limit & 0x000f0000) | ((type & 0xff) << 8) |
15825 + ((flags & 0xf) << 20);
15829 @@ -160,7 +161,6 @@ static inline void set_tssldt_descriptor
15830 desc->base2 = (PTR_MIDDLE(addr) >> 8) & 0xFF;
15831 desc->base3 = PTR_HIGH(addr);
15834 pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
15837 @@ -178,7 +178,8 @@ static inline void __set_tss_desc(unsign
15840 set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
15841 - IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
15842 + IO_BITMAP_OFFSET + IO_BITMAP_BYTES +
15843 + sizeof(unsigned long) - 1);
15844 write_gdt_entry(d, entry, &tss, DESC_TSS);
15847 @@ -187,16 +188,16 @@ static inline void __set_tss_desc(unsign
15848 static inline void native_set_ldt(const void *addr, unsigned int entries)
15850 if (likely(entries == 0))
15851 - __asm__ __volatile__("lldt %w0"::"q" (0));
15852 + asm volatile("lldt %w0"::"q" (0));
15854 unsigned cpu = smp_processor_id();
15857 - set_tssldt_descriptor(&ldt, (unsigned long)addr,
15858 - DESC_LDT, entries * sizeof(ldt) - 1);
15859 + set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
15860 + entries * LDT_ENTRY_SIZE - 1);
15861 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
15863 - __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
15864 + asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
15868 @@ -261,15 +262,15 @@ static inline void xen_load_tls(struct t
15872 -#define _LDT_empty(info) (\
15873 - (info)->base_addr == 0 && \
15874 - (info)->limit == 0 && \
15875 - (info)->contents == 0 && \
15876 - (info)->read_exec_only == 1 && \
15877 - (info)->seg_32bit == 0 && \
15878 - (info)->limit_in_pages == 0 && \
15879 - (info)->seg_not_present == 1 && \
15880 - (info)->useable == 0)
15881 +#define _LDT_empty(info) \
15882 + ((info)->base_addr == 0 && \
15883 + (info)->limit == 0 && \
15884 + (info)->contents == 0 && \
15885 + (info)->read_exec_only == 1 && \
15886 + (info)->seg_32bit == 0 && \
15887 + (info)->limit_in_pages == 0 && \
15888 + (info)->seg_not_present == 1 && \
15889 + (info)->useable == 0)
15891 #ifdef CONFIG_X86_64
15892 #define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
15893 @@ -309,7 +310,7 @@ static inline unsigned long get_desc_lim
15895 #ifndef CONFIG_X86_NO_IDT
15896 static inline void _set_gate(int gate, unsigned type, void *addr,
15897 - unsigned dpl, unsigned ist, unsigned seg)
15898 + unsigned dpl, unsigned ist, unsigned seg)
15901 pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
15902 @@ -393,10 +394,10 @@ static inline void set_system_gate_ist(i
15903 * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
15905 #define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
15906 - movb idx*8+4(gdt), lo_b; \
15907 - movb idx*8+7(gdt), hi_b; \
15908 - shll $16, base; \
15909 - movw idx*8+2(gdt), lo_w;
15910 + movb idx * 8 + 4(gdt), lo_b; \
15911 + movb idx * 8 + 7(gdt), hi_b; \
15912 + shll $16, base; \
15913 + movw idx * 8 + 2(gdt), lo_w;
15916 #endif /* __ASSEMBLY__ */
15917 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/dma-mapping.h 2009-02-16 16:18:36.000000000 +0100
15918 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/dma-mapping.h 2009-03-16 16:38:05.000000000 +0100
15920 -#ifdef CONFIG_X86_32
15921 -# include "dma-mapping_32.h"
15923 -# include "dma-mapping_64.h"
15925 +#ifndef _ASM_DMA_MAPPING_H_
15927 +#include "../../dma-mapping.h"
15930 +address_needs_mapping(struct device *hwdev, dma_addr_t addr)
15932 + dma_addr_t mask = 0xffffffff;
15933 + /* If the device has a mask, use it, otherwise default to 32 bits */
15934 + if (hwdev && hwdev->dma_mask)
15935 + mask = *hwdev->dma_mask;
15936 + return (addr & ~mask) != 0;
15939 +extern int range_straddles_page_boundary(paddr_t p, size_t size);
15941 +#endif /* _ASM_DMA_MAPPING_H_ */
15942 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/dma-mapping_32.h 2009-03-16 16:33:40.000000000 +0100
15943 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
15945 -#ifndef _ASM_I386_DMA_MAPPING_H
15946 -#define _ASM_I386_DMA_MAPPING_H
15949 - * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
15953 -#include <linux/mm.h>
15954 -#include <linux/scatterlist.h>
15955 -#include <asm/cache.h>
15956 -#include <asm/io.h>
15957 -#include <asm/swiotlb.h>
15960 -address_needs_mapping(struct device *hwdev, dma_addr_t addr)
15962 - dma_addr_t mask = 0xffffffff;
15963 - /* If the device has a mask, use it, otherwise default to 32 bits */
15964 - if (hwdev && hwdev->dma_mask)
15965 - mask = *hwdev->dma_mask;
15966 - return (addr & ~mask) != 0;
15969 -extern int range_straddles_page_boundary(paddr_t p, size_t size);
15971 -#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
15972 -#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
15974 -void *dma_alloc_coherent(struct device *dev, size_t size,
15975 - dma_addr_t *dma_handle, gfp_t flag);
15977 -void dma_free_coherent(struct device *dev, size_t size,
15978 - void *vaddr, dma_addr_t dma_handle);
15981 -dma_map_single(struct device *dev, void *ptr, size_t size,
15982 - enum dma_data_direction direction);
15985 -dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
15986 - enum dma_data_direction direction);
15988 -extern int dma_map_sg(struct device *hwdev, struct scatterlist *sg,
15989 - int nents, enum dma_data_direction direction);
15990 -extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg,
15991 - int nents, enum dma_data_direction direction);
15993 -#ifdef CONFIG_HIGHMEM
15995 -dma_map_page(struct device *dev, struct page *page, unsigned long offset,
15996 - size_t size, enum dma_data_direction direction);
15999 -dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
16000 - enum dma_data_direction direction);
16002 -#define dma_map_page(dev, page, offset, size, dir) \
16003 - dma_map_single(dev, page_address(page) + (offset), (size), (dir))
16004 -#define dma_unmap_page dma_unmap_single
16008 -dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
16009 - enum dma_data_direction direction);
16012 -dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
16013 - enum dma_data_direction direction);
16015 -static inline void
16016 -dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
16017 - unsigned long offset, size_t size,
16018 - enum dma_data_direction direction)
16020 - dma_sync_single_for_cpu(dev, dma_handle+offset, size, direction);
16023 -static inline void
16024 -dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
16025 - unsigned long offset, size_t size,
16026 - enum dma_data_direction direction)
16028 - dma_sync_single_for_device(dev, dma_handle+offset, size, direction);
16032 -dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
16033 - enum dma_data_direction direction);
16036 -dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
16037 - enum dma_data_direction direction);
16040 -dma_mapping_error(dma_addr_t dma_addr);
16043 -dma_supported(struct device *dev, u64 mask);
16046 -dma_set_mask(struct device *dev, u64 mask)
16048 - if(!dev->dma_mask || !dma_supported(dev, mask))
16051 - *dev->dma_mask = mask;
16057 -dma_get_cache_alignment(void)
16059 - /* no easy way to get cache size on all x86, so return the
16060 - * maximum possible, to be safe */
16061 - return (1 << INTERNODE_CACHE_SHIFT);
16064 -#define dma_is_consistent(d, h) (1)
16066 -static inline void
16067 -dma_cache_sync(struct device *dev, void *vaddr, size_t size,
16068 - enum dma_data_direction direction)
16070 - flush_write_buffers();
16073 -#define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
16075 -dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
16076 - dma_addr_t device_addr, size_t size, int flags);
16079 -dma_release_declared_memory(struct device *dev);
16082 -dma_mark_declared_memory_occupied(struct device *dev,
16083 - dma_addr_t device_addr, size_t size);
16086 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2009-02-16 16:18:36.000000000 +0100
16087 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
16089 -#ifndef _X8664_DMA_MAPPING_H
16090 -#define _X8664_DMA_MAPPING_H 1
16093 - * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
16097 -#include <linux/scatterlist.h>
16098 -#include <asm/io.h>
16100 -struct dma_mapping_ops {
16101 - int (*mapping_error)(dma_addr_t dma_addr);
16102 - void* (*alloc_coherent)(struct device *dev, size_t size,
16103 - dma_addr_t *dma_handle, gfp_t gfp);
16104 - void (*free_coherent)(struct device *dev, size_t size,
16105 - void *vaddr, dma_addr_t dma_handle);
16106 - dma_addr_t (*map_single)(struct device *hwdev, void *ptr,
16107 - size_t size, int direction);
16108 - /* like map_single, but doesn't check the device mask */
16109 - dma_addr_t (*map_simple)(struct device *hwdev, char *ptr,
16110 - size_t size, int direction);
16111 - void (*unmap_single)(struct device *dev, dma_addr_t addr,
16112 - size_t size, int direction);
16113 - void (*sync_single_for_cpu)(struct device *hwdev,
16114 - dma_addr_t dma_handle, size_t size,
16116 - void (*sync_single_for_device)(struct device *hwdev,
16117 - dma_addr_t dma_handle, size_t size,
16119 - void (*sync_single_range_for_cpu)(struct device *hwdev,
16120 - dma_addr_t dma_handle, unsigned long offset,
16121 - size_t size, int direction);
16122 - void (*sync_single_range_for_device)(struct device *hwdev,
16123 - dma_addr_t dma_handle, unsigned long offset,
16124 - size_t size, int direction);
16125 - void (*sync_sg_for_cpu)(struct device *hwdev,
16126 - struct scatterlist *sg, int nelems,
16128 - void (*sync_sg_for_device)(struct device *hwdev,
16129 - struct scatterlist *sg, int nelems,
16131 - int (*map_sg)(struct device *hwdev, struct scatterlist *sg,
16132 - int nents, int direction);
16133 - void (*unmap_sg)(struct device *hwdev,
16134 - struct scatterlist *sg, int nents,
16136 - int (*dma_supported)(struct device *hwdev, u64 mask);
16140 -extern dma_addr_t bad_dma_address;
16141 -extern const struct dma_mapping_ops* dma_ops;
16142 -extern int iommu_merge;
16145 -static inline int dma_mapping_error(dma_addr_t dma_addr)
16147 - if (dma_ops->mapping_error)
16148 - return dma_ops->mapping_error(dma_addr);
16150 - return (dma_addr == bad_dma_address);
16153 -#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
16154 -#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
16156 -#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
16157 -#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
16159 -extern void *dma_alloc_coherent(struct device *dev, size_t size,
16160 - dma_addr_t *dma_handle, gfp_t gfp);
16161 -extern void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
16162 - dma_addr_t dma_handle);
16164 -static inline dma_addr_t
16165 -dma_map_single(struct device *hwdev, void *ptr, size_t size,
16168 - BUG_ON(!valid_dma_direction(direction));
16169 - return dma_ops->map_single(hwdev, ptr, size, direction);
16172 -static inline void
16173 -dma_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
16176 - BUG_ON(!valid_dma_direction(direction));
16177 - dma_ops->unmap_single(dev, addr, size, direction);
16180 -#define dma_map_page(dev,page,offset,size,dir) \
16181 - dma_map_single((dev), page_address(page)+(offset), (size), (dir))
16183 -#define dma_unmap_page dma_unmap_single
16185 -static inline void
16186 -dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
16187 - size_t size, int direction)
16189 - BUG_ON(!valid_dma_direction(direction));
16190 - if (dma_ops->sync_single_for_cpu)
16191 - dma_ops->sync_single_for_cpu(hwdev, dma_handle, size,
16193 - flush_write_buffers();
16196 -static inline void
16197 -dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle,
16198 - size_t size, int direction)
16200 - BUG_ON(!valid_dma_direction(direction));
16201 - if (dma_ops->sync_single_for_device)
16202 - dma_ops->sync_single_for_device(hwdev, dma_handle, size,
16204 - flush_write_buffers();
16207 -static inline void
16208 -dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
16209 - unsigned long offset, size_t size, int direction)
16211 - BUG_ON(!valid_dma_direction(direction));
16212 - if (dma_ops->sync_single_range_for_cpu) {
16213 - dma_ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, size, direction);
16216 - flush_write_buffers();
16219 -static inline void
16220 -dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle,
16221 - unsigned long offset, size_t size, int direction)
16223 - BUG_ON(!valid_dma_direction(direction));
16224 - if (dma_ops->sync_single_range_for_device)
16225 - dma_ops->sync_single_range_for_device(hwdev, dma_handle,
16226 - offset, size, direction);
16228 - flush_write_buffers();
16231 -static inline void
16232 -dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
16233 - int nelems, int direction)
16235 - BUG_ON(!valid_dma_direction(direction));
16236 - if (dma_ops->sync_sg_for_cpu)
16237 - dma_ops->sync_sg_for_cpu(hwdev, sg, nelems, direction);
16238 - flush_write_buffers();
16241 -static inline void
16242 -dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
16243 - int nelems, int direction)
16245 - BUG_ON(!valid_dma_direction(direction));
16246 - if (dma_ops->sync_sg_for_device) {
16247 - dma_ops->sync_sg_for_device(hwdev, sg, nelems, direction);
16250 - flush_write_buffers();
16254 -dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction)
16256 - BUG_ON(!valid_dma_direction(direction));
16257 - return dma_ops->map_sg(hwdev, sg, nents, direction);
16260 -static inline void
16261 -dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
16264 - BUG_ON(!valid_dma_direction(direction));
16265 - dma_ops->unmap_sg(hwdev, sg, nents, direction);
16268 -extern int dma_supported(struct device *hwdev, u64 mask);
16270 -/* same for gart, swiotlb, and nommu */
16271 -static inline int dma_get_cache_alignment(void)
16273 - return boot_cpu_data.x86_clflush_size;
16276 -#define dma_is_consistent(d, h) 1
16278 -extern int dma_set_mask(struct device *dev, u64 mask);
16280 -static inline void
16281 -dma_cache_sync(struct device *dev, void *vaddr, size_t size,
16282 - enum dma_data_direction dir)
16284 - flush_write_buffers();
16287 -extern struct device fallback_dev;
16288 -extern int panic_on_overflow;
16291 -#endif /* _X8664_DMA_MAPPING_H */
16293 -#include "dma-mapping_32.h"
16294 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/fixmap.h 2009-02-16 16:18:36.000000000 +0100
16295 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/fixmap.h 2009-03-16 16:38:05.000000000 +0100
16297 +#ifndef _ASM_FIXMAP_H
16298 +#define _ASM_FIXMAP_H
16300 #ifdef CONFIG_X86_32
16301 # include "fixmap_32.h"
16303 # include "fixmap_64.h"
16306 +#define clear_fixmap(idx) \
16307 + __set_fixmap(idx, 0, __pgprot(0))
16310 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/fixmap_32.h 2009-03-16 16:33:40.000000000 +0100
16311 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/fixmap_32.h 2009-03-16 16:38:05.000000000 +0100
16313 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
16316 -#ifndef _ASM_FIXMAP_H
16317 -#define _ASM_FIXMAP_H
16318 +#ifndef _ASM_FIXMAP_32_H
16319 +#define _ASM_FIXMAP_32_H
16321 /* used by vmalloc.c, vsyscall.lds.S.
16323 @@ -102,8 +102,7 @@ enum fixed_addresses {
16325 #define NR_FIX_BTMAPS 64
16326 #define FIX_BTMAPS_NESTING 4
16328 - __end_of_permanent_fixed_addresses + 512 -
16329 + FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 512 -
16330 (__end_of_permanent_fixed_addresses & 511),
16331 FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
16333 @@ -114,19 +113,16 @@ enum fixed_addresses {
16336 extern void __set_fixmap(enum fixed_addresses idx,
16337 - maddr_t phys, pgprot_t flags);
16338 + maddr_t phys, pgprot_t flags);
16339 extern void reserve_top_address(unsigned long reserve);
16341 -#define set_fixmap(idx, phys) \
16342 - __set_fixmap(idx, phys, PAGE_KERNEL)
16343 +#define set_fixmap(idx, phys) \
16344 + __set_fixmap(idx, phys, PAGE_KERNEL)
16346 * Some hardware wants to get fixmapped without caching.
16348 -#define set_fixmap_nocache(idx, phys) \
16349 - __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
16351 -#define clear_fixmap(idx) \
16352 - __set_fixmap(idx, 0, __pgprot(0))
16353 +#define set_fixmap_nocache(idx, phys) \
16354 + __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
16356 #define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP)
16358 @@ -159,7 +155,7 @@ static __always_inline unsigned long fix
16359 if (idx >= __end_of_fixed_addresses)
16360 __this_fixmap_does_not_exist();
16362 - return __fix_to_virt(idx);
16363 + return __fix_to_virt(idx);
16366 static inline unsigned long virt_to_fix(const unsigned long vaddr)
16367 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/fixmap_64.h 2009-03-16 16:33:40.000000000 +0100
16368 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/fixmap_64.h 2009-03-16 16:38:05.000000000 +0100
16370 * Copyright (C) 1998 Ingo Molnar
16373 -#ifndef _ASM_FIXMAP_H
16374 -#define _ASM_FIXMAP_H
16375 +#ifndef _ASM_FIXMAP_64_H
16376 +#define _ASM_FIXMAP_64_H
16378 #include <linux/kernel.h>
16379 #include <asm/apicdef.h>
16382 enum fixed_addresses {
16383 VSYSCALL_LAST_PAGE,
16384 - VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
16385 + VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
16386 + + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
16389 FIX_EARLYCON_MEM_BASE,
16390 @@ -45,11 +46,12 @@ enum fixed_addresses {
16393 FIX_IO_APIC_BASE_0,
16394 - FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
16395 + FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
16398 FIX_EFI_IO_MAP_LAST_PAGE,
16399 - FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE+MAX_EFI_IO_PAGES-1,
16400 + FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE
16401 + + MAX_EFI_IO_PAGES - 1,
16405 @@ -79,19 +81,16 @@ enum fixed_addresses {
16406 __end_of_fixed_addresses
16409 -extern void __set_fixmap (enum fixed_addresses idx,
16410 - unsigned long phys, pgprot_t flags);
16411 +extern void __set_fixmap(enum fixed_addresses idx,
16412 + unsigned long phys, pgprot_t flags);
16414 -#define set_fixmap(idx, phys) \
16415 - __set_fixmap(idx, phys, PAGE_KERNEL)
16416 +#define set_fixmap(idx, phys) \
16417 + __set_fixmap(idx, phys, PAGE_KERNEL)
16419 * Some hardware wants to get fixmapped without caching.
16421 -#define set_fixmap_nocache(idx, phys) \
16422 - __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
16424 -#define clear_fixmap(idx) \
16425 - __set_fixmap(idx, 0, __pgprot(0))
16426 +#define set_fixmap_nocache(idx, phys) \
16427 + __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
16429 #define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE)
16430 #define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
16431 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/highmem.h 2009-03-16 16:33:40.000000000 +0100
16432 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/highmem.h 2009-03-16 16:38:05.000000000 +0100
16434 * Gerhard.Wichert@pdb.siemens.de
16437 - * Redesigned the x86 32-bit VM architecture to deal with
16438 + * Redesigned the x86 32-bit VM architecture to deal with
16439 * up to 16 Terabyte physical memory. With current x86 CPUs
16440 * we now support up to 64 Gigabytes physical RAM.
16442 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/io.h 2009-02-16 16:18:36.000000000 +0100
16443 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/io.h 2009-03-16 16:38:05.000000000 +0100
16445 +#ifndef _ASM_X86_IO_H
16446 +#define _ASM_X86_IO_H
16448 +#define ARCH_HAS_IOREMAP_WC
16450 #ifdef CONFIG_X86_32
16451 # include "io_32.h"
16453 # include "io_64.h"
16456 +extern void *xlate_dev_mem_ptr(unsigned long phys);
16457 +extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr);
16459 +extern void map_devmem(unsigned long pfn, unsigned long len, pgprot_t);
16460 +extern void unmap_devmem(unsigned long pfn, unsigned long len, pgprot_t);
16462 +extern int ioremap_check_change_attr(unsigned long mfn, unsigned long size,
16463 + unsigned long prot_val);
16464 +extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size);
16466 +#endif /* _ASM_X86_IO_H */
16467 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/irqflags.h 2009-03-16 16:33:40.000000000 +0100
16468 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/irqflags.h 2009-03-16 16:38:05.000000000 +0100
16469 @@ -137,11 +137,11 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
16470 #endif /* __ASSEMBLY__ */
16472 #ifndef __ASSEMBLY__
16473 -#define raw_local_save_flags(flags) \
16474 - do { (flags) = __raw_local_save_flags(); } while (0)
16475 +#define raw_local_save_flags(flags) \
16476 + do { (flags) = __raw_local_save_flags(); } while (0)
16478 -#define raw_local_irq_save(flags) \
16479 - do { (flags) = __raw_local_irq_save(); } while (0)
16480 +#define raw_local_irq_save(flags) \
16481 + do { (flags) = __raw_local_irq_save(); } while (0)
16483 static inline int raw_irqs_disabled_flags(unsigned long flags)
16485 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/mmu_context_32.h 2009-03-16 16:33:40.000000000 +0100
16486 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/mmu_context_32.h 2009-03-16 16:38:05.000000000 +0100
16487 @@ -94,7 +94,7 @@ static inline void switch_mm(struct mm_s
16488 BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next);
16490 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
16491 - /* We were in lazy tlb mode and leave_mm disabled
16492 + /* We were in lazy tlb mode and leave_mm disabled
16493 * tlb flush IPI delivery. We must reload %cr3.
16495 load_cr3(next->pgd);
16496 @@ -107,10 +107,10 @@ static inline void switch_mm(struct mm_s
16497 #define deactivate_mm(tsk, mm) \
16498 asm("movl %0,%%gs": :"r" (0));
16500 -#define activate_mm(prev, next) \
16502 - xen_activate_mm(prev, next); \
16503 - switch_mm((prev),(next),NULL); \
16505 +#define activate_mm(prev, next) \
16507 + xen_activate_mm(prev, next); \
16508 + switch_mm((prev), (next), NULL); \
16512 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/mmu_context_64.h 2009-03-16 16:33:40.000000000 +0100
16513 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/mmu_context_64.h 2009-03-16 16:38:05.000000000 +0100
16514 @@ -21,7 +21,7 @@ void destroy_context(struct mm_struct *m
16515 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
16517 #if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
16518 - if (read_pda(mmu_state) == TLBSTATE_OK)
16519 + if (read_pda(mmu_state) == TLBSTATE_OK)
16520 write_pda(mmu_state, TLBSTATE_LAZY);
16523 @@ -62,7 +62,7 @@ extern void mm_pin(struct mm_struct *mm)
16524 extern void mm_unpin(struct mm_struct *mm);
16525 void mm_pin_all(void);
16527 -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
16528 +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
16529 struct task_struct *tsk)
16531 unsigned cpu = smp_processor_id();
16532 @@ -106,7 +106,7 @@ static inline void switch_mm(struct mm_s
16533 if (read_pda(active_mm) != next)
16535 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
16536 - /* We were in lazy tlb mode and leave_mm disabled
16537 + /* We were in lazy tlb mode and leave_mm disabled
16538 * tlb flush IPI delivery. We must reload CR3
16539 * to make sure to use no freed page tables.
16541 @@ -118,10 +118,11 @@ static inline void switch_mm(struct mm_s
16545 -#define deactivate_mm(tsk,mm) do { \
16546 - load_gs_index(0); \
16547 - asm volatile("movl %0,%%fs"::"r"(0)); \
16549 +#define deactivate_mm(tsk, mm) \
16551 + load_gs_index(0); \
16552 + asm volatile("movl %0,%%fs"::"r"(0)); \
16555 static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
16557 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/page.h 2009-03-16 16:33:40.000000000 +0100
16558 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/page.h 2009-03-16 16:38:05.000000000 +0100
16560 #define _PAGE_BIT_IO 9
16561 #define _PAGE_IO (_AC(1, L)<<_PAGE_BIT_IO)
16563 -#define PHYSICAL_PAGE_MASK (~(_AT(phys_addr_t, PAGE_SIZE) - 1) & __PHYSICAL_MASK)
16564 -#define PTE_MASK _AT(pteval_t, PHYSICAL_PAGE_MASK)
16565 +#define __PHYSICAL_MASK ((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1)
16566 +#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
16568 +/* Cast PAGE_MASK to a signed type so that it is sign-extended if
16569 + virtual addresses are 32-bits but physical addresses are larger
16570 + (ie, 32-bit PAE). */
16571 +#define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK)
16573 +/* PTE_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
16574 +#define PTE_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
16576 #define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT)
16577 #define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))
16578 @@ -34,19 +42,14 @@
16579 /* to align the pointer to the (next) page boundary */
16580 #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
16582 -#define __PHYSICAL_MASK _AT(phys_addr_t, (_AC(1,ULL) << __PHYSICAL_MASK_SHIFT) - 1)
16583 -#define __VIRTUAL_MASK ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1)
16585 #ifndef __ASSEMBLY__
16586 #include <linux/types.h>
16589 #ifdef CONFIG_X86_64
16590 #include <asm/page_64.h>
16591 -#define max_pfn_mapped end_pfn_map
16593 #include <asm/page_32.h>
16594 -#define max_pfn_mapped max_low_pfn
16595 #endif /* CONFIG_X86_64 */
16597 #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
16599 #ifndef __ASSEMBLY__
16601 extern int page_is_ram(unsigned long pagenr);
16602 +extern int devmem_is_allowed(unsigned long pagenr);
16604 +extern unsigned long max_pfn_mapped;
16608 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/page_64.h 2009-03-16 16:33:40.000000000 +0100
16609 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/page_64.h 2009-03-16 16:38:05.000000000 +0100
16612 #define THREAD_ORDER 1
16613 #define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
16614 -#define CURRENT_MASK (~(THREAD_SIZE-1))
16615 +#define CURRENT_MASK (~(THREAD_SIZE - 1))
16617 #define EXCEPTION_STACK_ORDER 0
16618 #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
16619 @@ -53,10 +53,10 @@
16620 #define __VIRTUAL_MASK_SHIFT 48
16623 - * Kernel image size is limited to 128 MB (see level2_kernel_pgt in
16624 + * Kernel image size is limited to 512 MB (see level2_kernel_pgt in
16625 * arch/x86/kernel/head_64.S), and it is mapped here:
16627 -#define KERNEL_IMAGE_SIZE (128*1024*1024)
16628 +#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
16629 #define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL)
16631 #ifndef __ASSEMBLY__
16632 @@ -64,7 +64,6 @@ void clear_page(void *page);
16633 void copy_page(void *to, void *from);
16635 extern unsigned long end_pfn;
16636 -extern unsigned long end_pfn_map;
16638 static inline unsigned long __phys_addr(unsigned long x)
16640 @@ -89,6 +88,9 @@ typedef union { pteval_t pte; unsigned i
16642 #define vmemmap ((struct page *)VMEMMAP_START)
16644 +extern unsigned long init_memory_mapping(unsigned long start,
16645 + unsigned long end);
16647 #endif /* !__ASSEMBLY__ */
16649 #ifdef CONFIG_FLATMEM
16650 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pci.h 2009-03-16 16:33:40.000000000 +0100
16651 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pci.h 2009-03-16 16:38:05.000000000 +0100
16653 #include <asm/scatterlist.h>
16654 #include <asm/io.h>
16659 struct pci_sysdata {
16660 int domain; /* PCI domain */
16661 int node; /* NUMA node */
16662 #ifdef CONFIG_X86_64
16663 - void* iommu; /* IOMMU private data */
16664 + void *iommu; /* IOMMU private data */
16666 #ifdef CONFIG_XEN_PCIDEV_FRONTEND
16667 struct pcifront_device *pdev;
16668 @@ -23,6 +22,8 @@ struct pci_sysdata {
16671 /* scan a bus after allocating a pci_sysdata for it */
16672 +extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
16674 extern struct pci_bus *pci_scan_bus_with_sysdata(int busno);
16676 static inline int pci_domain_nr(struct pci_bus *bus)
16677 @@ -36,6 +37,7 @@ static inline int pci_proc_domain(struct
16678 return pci_domain_nr(bus);
16681 +extern void pci_iommu_alloc(void);
16683 /* Can be used to override the logic in pci_scan_bus for skipping
16684 already-configured bus numbers - to be used for buggy BIOSes
16685 @@ -57,7 +59,7 @@ extern unsigned long pci_mem_start;
16686 #define PCIBIOS_MIN_CARDBUS_IO 0x4000
16688 void pcibios_config_init(void);
16689 -struct pci_bus * pcibios_scan_root(int bus);
16690 +struct pci_bus *pcibios_scan_root(int bus);
16692 void pcibios_set_master(struct pci_dev *dev);
16693 void pcibios_penalize_isa_irq(int irq, int active);
16694 @@ -67,7 +69,8 @@ int pcibios_set_irq_routing(struct pci_d
16696 #define HAVE_PCI_MMAP
16697 extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
16698 - enum pci_mmap_state mmap_state, int write_combine);
16699 + enum pci_mmap_state mmap_state,
16700 + int write_combine);
16704 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pgalloc.h 2009-02-16 16:18:36.000000000 +0100
16705 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pgalloc.h 2009-03-16 16:38:05.000000000 +0100
16707 -#ifdef CONFIG_X86_32
16708 -# include "pgalloc_32.h"
16710 -# include "pgalloc_64.h"
16711 +#ifndef _ASM_X86_PGALLOC_H
16712 +#define _ASM_X86_PGALLOC_H
16714 +#include <linux/threads.h>
16715 +#include <linux/mm.h> /* for struct page */
16716 +#include <linux/pagemap.h>
16718 +#include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */
16720 +static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) {}
16721 +static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {}
16722 +static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
16723 + unsigned long start, unsigned long count) {}
16724 +static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) {}
16725 +static inline void paravirt_release_pte(unsigned long pfn) {}
16726 +static inline void paravirt_release_pmd(unsigned long pfn) {}
16727 +static inline void paravirt_release_pud(unsigned long pfn) {}
16729 +#ifdef CONFIG_X86_64
16730 +void early_make_page_readonly(void *va, unsigned int feature);
16731 +pmd_t *early_get_pmd(unsigned long va);
16732 +#define make_lowmem_page_readonly make_page_readonly
16733 +#define make_lowmem_page_writable make_page_writable
16737 + * Allocate and free page tables.
16739 +extern pgd_t *pgd_alloc(struct mm_struct *);
16740 +extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
16742 +extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
16743 +extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
16745 +/* Should really implement gc for free page table pages. This could be
16746 + done with a reference count in struct page. */
16748 +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
16750 + BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
16751 + make_lowmem_page_writable(pte, XENFEAT_writable_page_tables);
16752 + free_page((unsigned long)pte);
16755 +extern void __pte_free(pgtable_t);
16756 +static inline void pte_free(struct mm_struct *mm, struct page *pte)
16761 +extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
16763 +static inline void pmd_populate_kernel(struct mm_struct *mm,
16764 + pmd_t *pmd, pte_t *pte)
16766 + paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT);
16767 + set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
16770 +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
16771 + struct page *pte)
16773 + unsigned long pfn = page_to_pfn(pte);
16775 + paravirt_alloc_pte(mm, pfn);
16776 + if (PagePinned(virt_to_page(mm->pgd))) {
16777 + if (!PageHighMem(pte))
16778 + BUG_ON(HYPERVISOR_update_va_mapping(
16779 + (unsigned long)__va(pfn << PAGE_SHIFT),
16780 + pfn_pte(pfn, PAGE_KERNEL_RO), 0));
16781 +#ifndef CONFIG_X86_64
16782 + else if (!TestSetPagePinned(pte))
16783 + kmap_flush_unused();
16785 + set_pmd(pmd, __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
16787 + *pmd = __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE);
16790 +#define pmd_pgtable(pmd) pmd_page(pmd)
16792 +#if PAGETABLE_LEVELS > 2
16793 +extern pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr);
16794 +extern void __pmd_free(pgtable_t);
16796 +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
16798 + BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
16799 + __pmd_free(virt_to_page(pmd));
16802 +extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
16804 +#ifdef CONFIG_X86_PAE
16805 +extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd);
16806 +#else /* !CONFIG_X86_PAE */
16807 +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
16809 + paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
16810 + if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
16811 + BUG_ON(HYPERVISOR_update_va_mapping(
16812 + (unsigned long)pmd,
16813 + pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT,
16814 + PAGE_KERNEL_RO), 0));
16815 + set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
16817 + *pud = __pud(_PAGE_TABLE | __pa(pmd));
16819 +#endif /* CONFIG_X86_PAE */
16821 +#if PAGETABLE_LEVELS > 3
16822 +#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
16825 + * We need to use the batch mode here, but pgd_pupulate() won't be
16826 + * be called frequently.
16828 +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
16830 + paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
16831 + if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
16832 + BUG_ON(HYPERVISOR_update_va_mapping(
16833 + (unsigned long)pud,
16834 + pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT,
16835 + PAGE_KERNEL_RO), 0));
16836 + set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
16837 + set_pgd(__user_pgd(pgd), __pgd(_PAGE_TABLE | __pa(pud)));
16839 + *(pgd) = __pgd(_PAGE_TABLE | __pa(pud));
16840 + *__user_pgd(pgd) = *(pgd);
16844 +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
16846 + return (pud_t *)pmd_alloc_one(mm, addr);
16849 +static inline void pud_free(struct mm_struct *mm, pud_t *pud)
16851 + BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
16852 + __pmd_free(virt_to_page(pud));
16855 +extern void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
16856 +#endif /* PAGETABLE_LEVELS > 3 */
16857 +#endif /* PAGETABLE_LEVELS > 2 */
16859 +#endif /* _ASM_X86_PGALLOC_H */
16860 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pgalloc_32.h 2009-03-16 16:33:40.000000000 +0100
16861 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
16863 -#ifndef _I386_PGALLOC_H
16864 -#define _I386_PGALLOC_H
16866 -#include <linux/threads.h>
16867 -#include <linux/mm.h> /* for struct page */
16868 -#include <linux/pagemap.h>
16869 -#include <asm/tlb.h>
16870 -#include <asm-generic/tlb.h>
16871 -#include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */
16873 -#define paravirt_alloc_pt(mm, pfn) do { } while (0)
16874 -#define paravirt_alloc_pd(mm, pfn) do { } while (0)
16875 -#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0)
16876 -#define paravirt_release_pt(pfn) do { } while (0)
16877 -#define paravirt_release_pd(pfn) do { } while (0)
16879 -static inline void pmd_populate_kernel(struct mm_struct *mm,
16880 - pmd_t *pmd, pte_t *pte)
16882 - paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT);
16883 - set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
16886 -static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
16888 - unsigned long pfn = page_to_pfn(pte);
16890 - paravirt_alloc_pt(mm, pfn);
16891 - if (PagePinned(virt_to_page(mm->pgd))) {
16892 - if (!PageHighMem(pte))
16893 - BUG_ON(HYPERVISOR_update_va_mapping(
16894 - (unsigned long)__va(pfn << PAGE_SHIFT),
16895 - pfn_pte(pfn, PAGE_KERNEL_RO), 0));
16896 - else if (!test_and_set_bit(PG_pinned, &pte->flags))
16897 - kmap_flush_unused();
16898 - set_pmd(pmd, __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
16900 - *pmd = __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE);
16902 -#define pmd_pgtable(pmd) pmd_page(pmd)
16905 - * Allocate and free page tables.
16907 -extern void pgd_test_and_unpin(pgd_t *);
16908 -extern pgd_t *pgd_alloc(struct mm_struct *);
16909 -extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
16911 -extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
16912 -extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
16914 -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
16916 - make_lowmem_page_writable(pte, XENFEAT_writable_page_tables);
16917 - free_page((unsigned long)pte);
16920 -extern void __pte_free(pgtable_t);
16921 -static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
16927 -extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
16929 -#ifdef CONFIG_X86_PAE
16931 - * In the PAE case we free the pmds as part of the pgd.
16933 -extern pmd_t *pmd_alloc_one(struct mm_struct *, unsigned long);
16935 -extern void __pmd_free(pgtable_t);
16936 -static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
16938 - BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
16939 - __pmd_free(virt_to_page(pmd));
16942 -extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
16944 -static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
16946 - struct page *page = virt_to_page(pmd);
16947 - unsigned long pfn = page_to_pfn(page);
16949 - paravirt_alloc_pd(mm, pfn);
16951 - /* Note: almost everything apart from _PAGE_PRESENT is
16952 - reserved at the pmd (PDPT) level. */
16953 - if (PagePinned(virt_to_page(mm->pgd))) {
16954 - BUG_ON(PageHighMem(page));
16955 - BUG_ON(HYPERVISOR_update_va_mapping(
16956 - (unsigned long)__va(pfn << PAGE_SHIFT),
16957 - pfn_pte(pfn, PAGE_KERNEL_RO), 0));
16958 - set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
16960 - *pudp = __pud(__pa(pmd) | _PAGE_PRESENT);
16963 - * According to Intel App note "TLBs, Paging-Structure Caches,
16964 - * and Their Invalidation", April 2007, document 317080-001,
16965 - * section 8.1: in PAE mode we explicitly have to flush the
16966 - * TLB via cr3 if the top-level pgd is changed...
16968 - if (mm == current->active_mm)
16971 -#endif /* CONFIG_X86_PAE */
16973 -#endif /* _I386_PGALLOC_H */
16974 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pgalloc_64.h 2009-03-16 16:33:40.000000000 +0100
16975 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
16977 -#ifndef _X86_64_PGALLOC_H
16978 -#define _X86_64_PGALLOC_H
16980 -#include <asm/pda.h>
16981 -#include <linux/threads.h>
16982 -#include <linux/mm.h>
16983 -#include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */
16985 -pmd_t *early_get_pmd(unsigned long va);
16986 -void early_make_page_readonly(void *va, unsigned int feature);
16988 -#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
16990 -#define pmd_populate_kernel(mm, pmd, pte) \
16991 - set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)))
16993 -static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
16995 - if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
16996 - BUG_ON(HYPERVISOR_update_va_mapping(
16997 - (unsigned long)pmd,
16998 - pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT,
16999 - PAGE_KERNEL_RO), 0));
17000 - set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
17002 - *(pud) = __pud(_PAGE_TABLE | __pa(pmd));
17007 - * We need to use the batch mode here, but pgd_pupulate() won't be
17008 - * be called frequently.
17010 -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
17012 - if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
17013 - BUG_ON(HYPERVISOR_update_va_mapping(
17014 - (unsigned long)pud,
17015 - pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT,
17016 - PAGE_KERNEL_RO), 0));
17017 - set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
17018 - set_pgd(__user_pgd(pgd), __pgd(_PAGE_TABLE | __pa(pud)));
17020 - *(pgd) = __pgd(_PAGE_TABLE | __pa(pud));
17021 - *(__user_pgd(pgd)) = *(pgd);
17025 -#define pmd_pgtable(pmd) pmd_page(pmd)
17027 -static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
17029 - if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
17030 - BUG_ON(HYPERVISOR_update_va_mapping(
17031 - (unsigned long)__va(page_to_pfn(pte) << PAGE_SHIFT),
17032 - pfn_pte(page_to_pfn(pte), PAGE_KERNEL_RO), 0));
17033 - set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)));
17035 - *(pmd) = __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT));
17039 -extern void __pmd_free(pgtable_t);
17040 -static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
17042 - BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
17043 - __pmd_free(virt_to_page(pmd));
17046 -extern pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr);
17048 -static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
17050 - return (pud_t *)pmd_alloc_one(mm, addr);
17053 -static inline void pud_free(struct mm_struct *mm, pud_t *pud)
17055 - BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
17056 - __pmd_free(virt_to_page(pud));
17059 -static inline void pgd_list_add(pgd_t *pgd)
17061 - struct page *page = virt_to_page(pgd);
17062 - unsigned long flags;
17064 - spin_lock_irqsave(&pgd_lock, flags);
17065 - list_add(&page->lru, &pgd_list);
17066 - spin_unlock_irqrestore(&pgd_lock, flags);
17069 -static inline void pgd_list_del(pgd_t *pgd)
17071 - struct page *page = virt_to_page(pgd);
17072 - unsigned long flags;
17074 - spin_lock_irqsave(&pgd_lock, flags);
17075 - list_del(&page->lru);
17076 - spin_unlock_irqrestore(&pgd_lock, flags);
17079 -extern void pgd_test_and_unpin(pgd_t *);
17081 -static inline pgd_t *pgd_alloc(struct mm_struct *mm)
17084 - * We allocate two contiguous pages for kernel and user.
17086 - unsigned boundary;
17087 - pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, 1);
17090 - pgd_list_add(pgd);
17091 - pgd_test_and_unpin(pgd);
17093 - * Copy kernel pointers in from init.
17094 - * Could keep a freelist or slab cache of those because the kernel
17095 - * part never changes.
17097 - boundary = pgd_index(__PAGE_OFFSET);
17098 - memset(pgd, 0, boundary * sizeof(pgd_t));
17099 - memcpy(pgd + boundary,
17100 - init_level4_pgt + boundary,
17101 - (PTRS_PER_PGD - boundary) * sizeof(pgd_t));
17103 - memset(__user_pgd(pgd), 0, PAGE_SIZE); /* clean up user pgd */
17105 - * Set level3_user_pgt for vsyscall area
17107 - __user_pgd(pgd)[pgd_index(VSYSCALL_START)] =
17108 - __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
17112 -static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
17114 - pgd_test_and_unpin(pgd);
17115 - pgd_list_del(pgd);
17116 - free_pages((unsigned long)pgd, 1);
17119 -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
17121 - pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
17123 - make_page_readonly(pte, XENFEAT_writable_page_tables);
17128 -extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
17130 -/* Should really implement gc for free page table pages. This could be
17131 - done with a reference count in struct page. */
17133 -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
17135 - BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
17136 - make_page_writable(pte, XENFEAT_writable_page_tables);
17137 - free_page((unsigned long)pte);
17140 -extern void __pte_free(pgtable_t);
17141 -static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
17146 -#define __pte_free_tlb(tlb,pte) \
17148 - pgtable_page_dtor((pte)); \
17149 - tlb_remove_page((tlb), (pte)); \
17152 -#define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
17153 -#define __pud_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
17155 -#endif /* _X86_64_PGALLOC_H */
17156 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pgtable.h 2009-03-16 16:33:40.000000000 +0100
17157 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pgtable.h 2009-03-16 16:38:05.000000000 +0100
17159 #ifndef _ASM_X86_PGTABLE_H
17160 #define _ASM_X86_PGTABLE_H
17162 -#define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1)
17163 #define FIRST_USER_ADDRESS 0
17165 -#define _PAGE_BIT_PRESENT 0
17166 -#define _PAGE_BIT_RW 1
17167 -#define _PAGE_BIT_USER 2
17168 -#define _PAGE_BIT_PWT 3
17169 -#define _PAGE_BIT_PCD 4
17170 -#define _PAGE_BIT_ACCESSED 5
17171 -#define _PAGE_BIT_DIRTY 6
17172 -#define _PAGE_BIT_FILE 6
17173 +#define _PAGE_BIT_PRESENT 0 /* is present */
17174 +#define _PAGE_BIT_RW 1 /* writeable */
17175 +#define _PAGE_BIT_USER 2 /* userspace addressable */
17176 +#define _PAGE_BIT_PWT 3 /* page write through */
17177 +#define _PAGE_BIT_PCD 4 /* page cache disabled */
17178 +#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */
17179 +#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */
17180 #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
17181 #define _PAGE_BIT_PAT 7 /* on 4KB pages */
17182 #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
17184 #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
17185 #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
17187 +/* If _PAGE_BIT_PRESENT is clear, we use these: */
17189 +/* set: nonlinear file mapping, saved PTE; unset:swap */
17190 +#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY
17192 +/* if the user mapped it with PROT_NONE; pte_present gives true */
17193 +#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
17196 * Note: we use _AC(1, L) instead of _AC(1, UL) so that we get a
17197 * sign-extended value on 32-bit with all 1's in the upper word,
17202 -/* If _PAGE_PRESENT is clear, we use these: */
17203 -#define _PAGE_FILE _PAGE_DIRTY /* nonlinear file mapping, saved PTE; unset:swap */
17204 -#define _PAGE_PROTNONE _PAGE_PSE /* if the user mapped it with PROT_NONE;
17205 - pte_present gives true */
17206 +#define _PAGE_FILE (_AC(1, L)<<_PAGE_BIT_FILE)
17207 +#define _PAGE_PROTNONE (_AC(1, L)<<_PAGE_BIT_PROTNONE)
17209 #ifndef __ASSEMBLY__
17210 #if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT <= 0x030002
17211 @@ -61,20 +65,42 @@ extern unsigned int __kernel_page_user;
17215 -#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
17216 -#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | __kernel_page_user)
17217 +#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
17218 + _PAGE_ACCESSED | _PAGE_DIRTY)
17219 +#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
17220 + _PAGE_DIRTY | __kernel_page_user)
17222 +/* Set of bits not changed in pte_modify */
17223 +#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_CACHE_MASK | _PAGE_IO | \
17224 + _PAGE_ACCESSED | _PAGE_DIRTY)
17226 -#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_IO)
17228 + * PAT settings are part of the hypervisor interface, which sets the
17229 + * MSR to 0x050100070406 (i.e. WB, WT, UC-, UC, WC, WP [, UC, UC]).
17231 +#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT | _PAGE_PAT)
17232 +#define _PAGE_CACHE_WB (0)
17233 +#define _PAGE_CACHE_WT (_PAGE_PWT)
17234 +#define _PAGE_CACHE_WC (_PAGE_PAT)
17235 +#define _PAGE_CACHE_WP (_PAGE_PAT | _PAGE_PWT)
17236 +#define _PAGE_CACHE_UC_MINUS (_PAGE_PCD)
17237 +#define _PAGE_CACHE_UC (_PAGE_PCD | _PAGE_PWT)
17239 #define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
17240 -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
17241 +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
17242 + _PAGE_ACCESSED | _PAGE_NX)
17244 -#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
17245 -#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
17246 -#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
17247 +#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \
17248 + _PAGE_USER | _PAGE_ACCESSED)
17249 +#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
17250 + _PAGE_ACCESSED | _PAGE_NX)
17251 +#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
17253 #define PAGE_COPY PAGE_COPY_NOEXEC
17254 -#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
17255 -#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
17256 +#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \
17257 + _PAGE_ACCESSED | _PAGE_NX)
17258 +#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
17261 #ifdef CONFIG_X86_32
17262 #define _PAGE_KERNEL_EXEC \
17263 @@ -93,6 +119,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KE
17264 #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
17265 #define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
17266 #define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT)
17267 +#define __PAGE_KERNEL_WC (__PAGE_KERNEL | _PAGE_CACHE_WC)
17268 #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
17269 #define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD)
17270 #define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER)
17271 @@ -109,6 +136,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KE
17272 #define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO)
17273 #define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC)
17274 #define PAGE_KERNEL_RX MAKE_GLOBAL(__PAGE_KERNEL_RX)
17275 +#define PAGE_KERNEL_WC MAKE_GLOBAL(__PAGE_KERNEL_WC)
17276 #define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
17277 #define PAGE_KERNEL_UC_MINUS MAKE_GLOBAL(__PAGE_KERNEL_UC_MINUS)
17278 #define PAGE_KERNEL_EXEC_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_EXEC_NOCACHE)
17279 @@ -142,7 +170,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KE
17280 * ZERO_PAGE is a global shared page that is always zero: used
17281 * for zero-mapped memory areas etc..
17283 -extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
17284 +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
17285 #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
17287 extern spinlock_t pgd_lock;
17288 @@ -152,30 +180,111 @@ extern struct list_head pgd_list;
17289 * The following only work if pte_present() is true.
17290 * Undefined behaviour if not..
17292 -static inline int pte_dirty(pte_t pte) { return __pte_val(pte) & _PAGE_DIRTY; }
17293 -static inline int pte_young(pte_t pte) { return __pte_val(pte) & _PAGE_ACCESSED; }
17294 -static inline int pte_write(pte_t pte) { return __pte_val(pte) & _PAGE_RW; }
17295 -static inline int pte_file(pte_t pte) { return __pte_val(pte) & _PAGE_FILE; }
17296 -static inline int pte_huge(pte_t pte) { return __pte_val(pte) & _PAGE_PSE; }
17297 -static inline int pte_global(pte_t pte) { return 0; }
17298 -static inline int pte_exec(pte_t pte) { return !(__pte_val(pte) & _PAGE_NX); }
17300 -static inline int pmd_large(pmd_t pte) {
17301 - return (__pmd_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
17302 - (_PAGE_PSE|_PAGE_PRESENT);
17305 -static inline pte_t pte_mkclean(pte_t pte) { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_DIRTY); }
17306 -static inline pte_t pte_mkold(pte_t pte) { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED); }
17307 -static inline pte_t pte_wrprotect(pte_t pte) { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_RW); }
17308 -static inline pte_t pte_mkexec(pte_t pte) { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_NX); }
17309 -static inline pte_t pte_mkdirty(pte_t pte) { return __pte_ma(__pte_val(pte) | _PAGE_DIRTY); }
17310 -static inline pte_t pte_mkyoung(pte_t pte) { return __pte_ma(__pte_val(pte) | _PAGE_ACCESSED); }
17311 -static inline pte_t pte_mkwrite(pte_t pte) { return __pte_ma(__pte_val(pte) | _PAGE_RW); }
17312 -static inline pte_t pte_mkhuge(pte_t pte) { return __pte_ma(__pte_val(pte) | _PAGE_PSE); }
17313 -static inline pte_t pte_clrhuge(pte_t pte) { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_PSE); }
17314 -static inline pte_t pte_mkglobal(pte_t pte) { return pte; }
17315 -static inline pte_t pte_clrglobal(pte_t pte) { return pte; }
17316 +static inline int pte_dirty(pte_t pte)
17318 + return __pte_val(pte) & _PAGE_DIRTY;
17321 +static inline int pte_young(pte_t pte)
17323 + return __pte_val(pte) & _PAGE_ACCESSED;
17326 +static inline int pte_write(pte_t pte)
17328 + return __pte_val(pte) & _PAGE_RW;
17331 +static inline int pte_file(pte_t pte)
17333 + return __pte_val(pte) & _PAGE_FILE;
17336 +static inline int pte_huge(pte_t pte)
17338 + return __pte_val(pte) & _PAGE_PSE;
17341 +static inline int pte_global(pte_t pte)
17346 +static inline int pte_exec(pte_t pte)
17348 + return !(__pte_val(pte) & _PAGE_NX);
17351 +static inline int pte_special(pte_t pte)
17356 +static inline int pmd_large(pmd_t pte)
17358 + return (__pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
17359 + (_PAGE_PSE | _PAGE_PRESENT);
17362 +static inline pte_t pte_mkclean(pte_t pte)
17364 + return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_DIRTY);
17367 +static inline pte_t pte_mkold(pte_t pte)
17369 + return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED);
17372 +static inline pte_t pte_wrprotect(pte_t pte)
17374 + return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_RW);
17377 +static inline pte_t pte_mkexec(pte_t pte)
17379 + return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_NX);
17382 +static inline pte_t pte_mkdirty(pte_t pte)
17384 + return __pte_ma(__pte_val(pte) | _PAGE_DIRTY);
17387 +static inline pte_t pte_mkyoung(pte_t pte)
17389 + return __pte_ma(__pte_val(pte) | _PAGE_ACCESSED);
17392 +static inline pte_t pte_mkwrite(pte_t pte)
17394 + return __pte_ma(__pte_val(pte) | _PAGE_RW);
17397 +static inline pte_t pte_mkhuge(pte_t pte)
17399 + return __pte_ma(__pte_val(pte) | _PAGE_PSE);
17402 +static inline pte_t pte_clrhuge(pte_t pte)
17404 + return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_PSE);
17407 +static inline pte_t pte_mkglobal(pte_t pte)
17412 +static inline pte_t pte_clrglobal(pte_t pte)
17417 +static inline pte_t pte_mkspecial(pte_t pte)
17422 extern pteval_t __supported_pte_mask;
17424 @@ -202,15 +311,33 @@ static inline pte_t pte_modify(pte_t pte
17425 pteval_t val = pte_val(pte);
17427 val &= _PAGE_CHG_MASK;
17428 - val |= pgprot_val(newprot) & __supported_pte_mask;
17429 + val |= pgprot_val(newprot) & (~_PAGE_CHG_MASK) & __supported_pte_mask;
17434 -#define pte_pgprot(x) __pgprot(pte_val(x) & (0xfff | _PAGE_NX))
17435 +/* mprotect needs to preserve PAT bits when updating vm_page_prot */
17436 +#define pgprot_modify pgprot_modify
17437 +static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
17439 + pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK;
17440 + pgprotval_t addbits = pgprot_val(newprot);
17441 + return __pgprot(preservebits | addbits);
17444 +#define pte_pgprot(x) __pgprot(__pte_val(x) & ~PTE_MASK)
17446 #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
17448 +#ifndef __ASSEMBLY__
17449 +#define __HAVE_PHYS_MEM_ACCESS_PROT
17451 +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
17452 + unsigned long size, pgprot_t vma_prot);
17453 +int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
17454 + unsigned long size, pgprot_t *vma_prot);
17457 #define set_pte(ptep, pte) xen_set_pte(ptep, pte)
17458 #define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte)
17460 @@ -246,6 +373,9 @@ static inline pte_t pte_modify(pte_t pte
17461 # include "pgtable_64.h"
17464 +#define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET)
17465 +#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
17467 #ifndef __ASSEMBLY__
17470 @@ -312,46 +442,17 @@ static inline void xen_pte_clear(struct
17471 * bit at the same time.
17473 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
17474 -#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
17476 - int __changed = !pte_same(*(ptep), entry); \
17477 - if (__changed && (dirty)) { \
17478 - if ( likely((vma)->vm_mm == current->mm) ) { \
17479 - BUG_ON(HYPERVISOR_update_va_mapping(address, \
17481 - (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
17482 - UVMF_INVLPG|UVMF_MULTI)); \
17484 - xen_l1_entry_update(ptep, entry); \
17485 - flush_tlb_page(vma, address); \
17490 +extern int ptep_set_access_flags(struct vm_area_struct *vma,
17491 + unsigned long address, pte_t *ptep,
17492 + pte_t entry, int dirty);
17494 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
17495 -#define ptep_test_and_clear_young(vma, addr, ptep) ({ \
17497 - if (pte_young(*(ptep))) \
17498 - __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \
17501 - pte_update((vma)->vm_mm, addr, ptep); \
17504 +extern int ptep_test_and_clear_young(struct vm_area_struct *vma,
17505 + unsigned long addr, pte_t *ptep);
17507 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
17508 -#define ptep_clear_flush_young(vma, address, ptep) \
17510 - pte_t __pte = *(ptep); \
17511 - int __young = pte_young(__pte); \
17512 - __pte = pte_mkold(__pte); \
17513 - if (PagePinned(virt_to_page((vma)->vm_mm->pgd))) \
17514 - (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \
17515 - else if (__young) \
17516 - (ptep)->pte_low = __pte.pte_low; \
17519 +extern int ptep_clear_flush_young(struct vm_area_struct *vma,
17520 + unsigned long address, pte_t *ptep);
17522 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
17523 #define ptep_clear_flush(vma, addr, ptep) \
17524 @@ -370,7 +471,8 @@ static inline void xen_pte_clear(struct
17527 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
17528 -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
17529 +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
17534 @@ -398,13 +500,29 @@ static inline pte_t ptep_get_and_clear(s
17535 pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *, unsigned long, pte_t *, int);
17537 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
17538 -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
17539 +static inline void ptep_set_wrprotect(struct mm_struct *mm,
17540 + unsigned long addr, pte_t *ptep)
17543 if (pte_write(pte))
17544 set_pte_at(mm, addr, ptep, pte_wrprotect(pte));
17548 + * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
17550 + * dst - pointer to pgd range anwhere on a pgd page
17552 + * count - the number of pgds to copy.
17554 + * dst and src can be on the same page, but the range must not overlap,
17555 + * and must not cross a page boundary.
17557 +static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
17559 + memcpy(dst, src, count * sizeof(pgd_t));
17562 #define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
17563 xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
17565 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h 2009-03-16 16:33:40.000000000 +0100
17566 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pgtable-3level.h 2009-03-16 16:38:05.000000000 +0100
17568 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
17571 -#define pte_ERROR(e) \
17572 - printk("%s:%d: bad pte %p(%016Lx pfn %08lx).\n", __FILE__, __LINE__, \
17573 - &(e), __pte_val(e), pte_pfn(e))
17574 -#define pmd_ERROR(e) \
17575 - printk("%s:%d: bad pmd %p(%016Lx pfn %08Lx).\n", __FILE__, __LINE__, \
17576 - &(e), __pmd_val(e), (pmd_val(e) & PTE_MASK) >> PAGE_SHIFT)
17577 -#define pgd_ERROR(e) \
17578 - printk("%s:%d: bad pgd %p(%016Lx pfn %08Lx).\n", __FILE__, __LINE__, \
17579 - &(e), __pgd_val(e), (pgd_val(e) & PTE_MASK) >> PAGE_SHIFT)
17581 +#define pte_ERROR(e) \
17582 + printk("%s:%d: bad pte %p(%016Lx pfn %08lx).\n", \
17583 + __FILE__, __LINE__, &(e), __pte_val(e), pte_pfn(e))
17584 +#define pmd_ERROR(e) \
17585 + printk("%s:%d: bad pmd %p(%016Lx pfn %08Lx).\n", \
17586 + __FILE__, __LINE__, &(e), __pmd_val(e), \
17587 + (pmd_val(e) & PTE_MASK) >> PAGE_SHIFT)
17588 +#define pgd_ERROR(e) \
17589 + printk("%s:%d: bad pgd %p(%016Lx pfn %08Lx).\n", \
17590 + __FILE__, __LINE__, &(e), __pgd_val(e), \
17591 + (pgd_val(e) & PTE_MASK) >> PAGE_SHIFT)
17593 static inline int pud_none(pud_t pud)
17595 return __pud_val(pud) == 0;
17598 static inline int pud_bad(pud_t pud)
17600 return (__pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0;
17603 static inline int pud_present(pud_t pud)
17605 return __pud_val(pud) & _PAGE_PRESENT;
17606 @@ -48,12 +51,14 @@ static inline void xen_set_pte(pte_t *pt
17608 static inline void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
17610 - set_64bit((unsigned long long *)(ptep),__pte_val(pte));
17611 + set_64bit((unsigned long long *)(ptep), __pte_val(pte));
17614 static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd)
17616 xen_l2_entry_update(pmdp, pmd);
17619 static inline void xen_set_pud(pud_t *pudp, pud_t pud)
17621 xen_l3_entry_update(pudp, pud);
17622 @@ -92,20 +97,19 @@ static inline void pud_clear(pud_t *pudp
17623 * current pgd to avoid unnecessary TLB flushes.
17626 - if (__pa(pudp) >= pgd && __pa(pudp) < (pgd + sizeof(pgd_t)*PTRS_PER_PGD))
17627 + if (__pa(pudp) >= pgd && __pa(pudp) <
17628 + (pgd + sizeof(pgd_t)*PTRS_PER_PGD))
17632 -#define pud_page(pud) \
17633 -((struct page *) __va(pud_val(pud) & PAGE_MASK))
17634 +#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_MASK))
17636 -#define pud_page_vaddr(pud) \
17637 -((unsigned long) __va(pud_val(pud) & PAGE_MASK))
17638 +#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_MASK))
17641 /* Find an entry in the second-level page table.. */
17642 -#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
17643 - pmd_index(address))
17644 +#define pmd_offset(pud, address) ((pmd_t *)pud_page(*(pud)) + \
17645 + pmd_index(address))
17648 static inline pte_t xen_ptep_get_and_clear(pte_t *ptep, pte_t res)
17649 @@ -150,7 +154,8 @@ static inline int pte_none(pte_t pte)
17650 * put the 32 bits of offset into the high part.
17652 #define pte_to_pgoff(pte) ((pte).pte_high)
17653 -#define pgoff_to_pte(off) ((pte_t) { { .pte_low = _PAGE_FILE, .pte_high = (off) } })
17654 +#define pgoff_to_pte(off) \
17655 + ((pte_t) { { .pte_low = _PAGE_FILE, .pte_high = (off) } })
17656 #define PTE_FILE_MAX_BITS 32
17658 /* Encode and de-code a swap entry */
17659 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pgtable_32.h 2009-03-16 16:33:40.000000000 +0100
17660 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pgtable_32.h 2009-03-16 16:38:05.000000000 +0100
17661 @@ -38,16 +38,13 @@ void paging_init(void);
17662 #ifdef CONFIG_X86_PAE
17663 # include <asm/pgtable-3level-defs.h>
17664 # define PMD_SIZE (1UL << PMD_SHIFT)
17665 -# define PMD_MASK (~(PMD_SIZE-1))
17666 +# define PMD_MASK (~(PMD_SIZE - 1))
17668 # include <asm/pgtable-2level-defs.h>
17671 #define PGDIR_SIZE (1UL << PGDIR_SHIFT)
17672 -#define PGDIR_MASK (~(PGDIR_SIZE-1))
17674 -#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
17675 -#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
17676 +#define PGDIR_MASK (~(PGDIR_SIZE - 1))
17678 /* Just any arbitrary offset to the start of the vmalloc VM area: the
17679 * current 8MB value just means that there will be a 8MB "hole" after the
17680 @@ -56,21 +53,22 @@ void paging_init(void);
17681 * The vmalloc() routines leaves a hole of 4kB between each vmalloced
17682 * area for the same reason. ;)
17684 -#define VMALLOC_OFFSET (8*1024*1024)
17685 -#define VMALLOC_START (((unsigned long) high_memory + \
17686 - 2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1))
17687 +#define VMALLOC_OFFSET (8 * 1024 * 1024)
17688 +#define VMALLOC_START (((unsigned long)high_memory + 2 * VMALLOC_OFFSET - 1) \
17689 + & ~(VMALLOC_OFFSET - 1))
17690 #ifdef CONFIG_X86_PAE
17691 #define LAST_PKMAP 512
17693 #define LAST_PKMAP 1024
17696 -#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE*(LAST_PKMAP + 1)) & PMD_MASK)
17697 +#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE * (LAST_PKMAP + 1)) \
17700 #ifdef CONFIG_HIGHMEM
17701 -# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE)
17702 +# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
17704 -# define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE)
17705 +# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE)
17709 @@ -91,10 +89,10 @@ extern unsigned long pg0[];
17710 /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
17711 can temporarily clear it. */
17712 #define pmd_present(x) (__pmd_val(x))
17713 -#define pmd_bad(x) ((__pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
17714 +#define pmd_bad(x) ((__pmd_val(x) & (~PTE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
17716 #define pmd_present(x) (__pmd_val(x) & _PAGE_PRESENT)
17717 -#define pmd_bad(x) ((__pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
17718 +#define pmd_bad(x) ((__pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
17722 @@ -107,32 +105,18 @@ extern unsigned long pg0[];
17726 - * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
17728 - * dst - pointer to pgd range anwhere on a pgd page
17730 - * count - the number of pgds to copy.
17732 - * dst and src can be on the same page, but the range must not overlap,
17733 - * and must not cross a page boundary.
17734 + * Macro to mark a page protection value as "uncacheable".
17735 + * On processors which do not support it, this is a no-op.
17737 -static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
17739 - memcpy(dst, src, count * sizeof(pgd_t));
17743 - * Macro to mark a page protection value as "uncacheable". On processors which do not support
17744 - * it, this is a no-op.
17746 -#define pgprot_noncached(prot) ((boot_cpu_data.x86 > 3) \
17747 - ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) : (prot))
17748 +#define pgprot_noncached(prot) \
17749 + ((boot_cpu_data.x86 > 3) \
17750 + ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) \
17754 * Conversion functions: convert a page and protection to a page entry,
17755 * and a page entry and page directory to the page they refer to.
17758 #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
17761 @@ -141,20 +125,20 @@ static inline void clone_pgd_range(pgd_t
17762 * this macro returns the index of the entry in the pgd page which would
17763 * control the given virtual address
17765 -#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
17766 -#define pgd_index_k(addr) pgd_index(addr)
17767 +#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
17768 +#define pgd_index_k(addr) pgd_index((addr))
17771 * pgd_offset() returns a (pgd_t *)
17772 * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
17774 -#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
17775 +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
17778 * a shortcut which implies the use of the kernel's pgd, instead
17781 -#define pgd_offset_k(address) pgd_offset(&init_mm, address)
17782 +#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
17784 static inline int pud_large(pud_t pud) { return 0; }
17786 @@ -164,8 +148,8 @@ static inline int pud_large(pud_t pud) {
17787 * this macro returns the index of the entry in the pmd page which would
17788 * control the given virtual address
17790 -#define pmd_index(address) \
17791 - (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
17792 +#define pmd_index(address) \
17793 + (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
17796 * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
17797 @@ -173,33 +157,36 @@ static inline int pud_large(pud_t pud) {
17798 * this macro returns the index of the entry in the pte page which would
17799 * control the given virtual address
17801 -#define pte_index(address) \
17802 - (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
17803 -#define pte_offset_kernel(dir, address) \
17804 - ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
17805 +#define pte_index(address) \
17806 + (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
17807 +#define pte_offset_kernel(dir, address) \
17808 + ((pte_t *)pmd_page_vaddr(*(dir)) + pte_index((address)))
17810 -#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
17811 +#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
17813 -#define pmd_page_vaddr(pmd) \
17814 - ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
17815 +#define pmd_page_vaddr(pmd) \
17816 + ((unsigned long)__va(pmd_val((pmd)) & PTE_MASK))
17818 #if defined(CONFIG_HIGHPTE)
17819 -#define pte_offset_map(dir, address) \
17820 - ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
17821 -#define pte_offset_map_nested(dir, address) \
17822 - ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address))
17823 -#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
17824 -#define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
17826 -#define pte_offset_map(dir, address) \
17827 - ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address))
17828 -#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
17829 +#define pte_offset_map(dir, address) \
17830 + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \
17831 + pte_index((address)))
17832 +#define pte_offset_map_nested(dir, address) \
17833 + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \
17834 + pte_index((address)))
17835 +#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0)
17836 +#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
17838 +#define pte_offset_map(dir, address) \
17839 + ((pte_t *)page_address(pmd_page(*(dir))) + pte_index((address)))
17840 +#define pte_offset_map_nested(dir, address) pte_offset_map((dir), (address))
17841 #define pte_unmap(pte) do { } while (0)
17842 #define pte_unmap_nested(pte) do { } while (0)
17845 /* Clear a kernel PTE and flush it from the TLB */
17846 -#define kpte_clear_flush(ptep, vaddr) do { \
17847 +#define kpte_clear_flush(ptep, vaddr) \
17849 if (HYPERVISOR_update_va_mapping(vaddr, __pte(0), UVMF_INVLPG)) \
17852 @@ -208,7 +195,7 @@ static inline int pud_large(pud_t pud) {
17853 * The i386 doesn't have any external MMU info: the kernel page
17854 * tables contain all the necessary information.
17856 -#define update_mmu_cache(vma,address,pte) do { } while (0)
17857 +#define update_mmu_cache(vma, address, pte) do { } while (0)
17859 void make_lowmem_page_readonly(void *va, unsigned int feature);
17860 void make_lowmem_page_writable(void *va, unsigned int feature);
17861 @@ -225,7 +212,7 @@ void make_lowmem_page_writable(void *va,
17862 #define kern_addr_valid(kaddr) (0)
17865 -#define io_remap_pfn_range(vma,from,pfn,size,prot) \
17866 -direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO)
17867 +#define io_remap_pfn_range(vma, from, pfn, size, prot) \
17868 + direct_remap_pfn_range(vma, from, pfn, size, prot, DOMID_IO)
17870 #endif /* _I386_PGTABLE_H */
17871 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-03-16 16:33:40.000000000 +0100
17872 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-03-16 16:38:05.000000000 +0100
17873 @@ -31,7 +31,7 @@ extern void paging_init(void);
17875 #endif /* !__ASSEMBLY__ */
17877 -#define SHARED_KERNEL_PMD 1
17878 +#define SHARED_KERNEL_PMD 0
17881 * PGDIR_SHIFT determines what a top-level page table entry can map
17882 @@ -59,18 +59,20 @@ extern void paging_init(void);
17884 #ifndef __ASSEMBLY__
17886 -#define pte_ERROR(e) \
17887 - printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
17888 - &(e), __pte_val(e), pte_pfn(e))
17889 -#define pmd_ERROR(e) \
17890 - printk("%s:%d: bad pmd %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
17891 - &(e), __pmd_val(e), pmd_pfn(e))
17892 -#define pud_ERROR(e) \
17893 - printk("%s:%d: bad pud %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
17894 - &(e), __pud_val(e), (pud_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
17895 -#define pgd_ERROR(e) \
17896 - printk("%s:%d: bad pgd %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
17897 - &(e), __pgd_val(e), (pgd_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
17898 +#define pte_ERROR(e) \
17899 + printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n", \
17900 + __FILE__, __LINE__, &(e), __pte_val(e), pte_pfn(e))
17901 +#define pmd_ERROR(e) \
17902 + printk("%s:%d: bad pmd %p(%016lx pfn %010lx).\n", \
17903 + __FILE__, __LINE__, &(e), __pmd_val(e), pmd_pfn(e))
17904 +#define pud_ERROR(e) \
17905 + printk("%s:%d: bad pud %p(%016lx pfn %010lx).\n", \
17906 + __FILE__, __LINE__, &(e), __pud_val(e), \
17907 + (pud_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
17908 +#define pgd_ERROR(e) \
17909 + printk("%s:%d: bad pgd %p(%016lx pfn %010lx).\n", \
17910 + __FILE__, __LINE__, &(e), __pgd_val(e), \
17911 + (pgd_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
17913 #define pgd_none(x) (!__pgd_val(x))
17914 #define pud_none(x) (!__pud_val(x))
17915 @@ -125,7 +127,7 @@ static inline void xen_set_pgd(pgd_t *pg
17916 xen_l4_entry_update(pgdp, pgd);
17919 -static inline void xen_pgd_clear(pgd_t * pgd)
17920 +static inline void xen_pgd_clear(pgd_t *pgd)
17922 xen_set_pgd(pgd, xen_make_pgd(0));
17923 xen_set_pgd(__user_pgd(pgd), xen_make_pgd(0));
17924 @@ -135,43 +137,43 @@ static inline void xen_pgd_clear(pgd_t *
17926 #endif /* !__ASSEMBLY__ */
17928 -#define PMD_SIZE (_AC(1,UL) << PMD_SHIFT)
17929 -#define PMD_MASK (~(PMD_SIZE-1))
17930 -#define PUD_SIZE (_AC(1,UL) << PUD_SHIFT)
17931 -#define PUD_MASK (~(PUD_SIZE-1))
17932 -#define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT)
17933 -#define PGDIR_MASK (~(PGDIR_SIZE-1))
17934 +#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
17935 +#define PMD_MASK (~(PMD_SIZE - 1))
17936 +#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
17937 +#define PUD_MASK (~(PUD_SIZE - 1))
17938 +#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
17939 +#define PGDIR_MASK (~(PGDIR_SIZE - 1))
17942 -#define MAXMEM _AC(0x3fffffffffff, UL)
17943 +#define MAXMEM _AC(0x00003fffffffffff, UL)
17944 #define VMALLOC_START _AC(0xffffc20000000000, UL)
17945 #define VMALLOC_END _AC(0xffffe1ffffffffff, UL)
17946 #define VMEMMAP_START _AC(0xffffe20000000000, UL)
17947 -#define MODULES_VADDR _AC(0xffffffff88000000, UL)
17948 +#define MODULES_VADDR _AC(0xffffffffa0000000, UL)
17949 #define MODULES_END _AC(0xfffffffffff00000, UL)
17950 #define MODULES_LEN (MODULES_END - MODULES_VADDR)
17952 #ifndef __ASSEMBLY__
17954 -static inline unsigned long pgd_bad(pgd_t pgd)
17955 +static inline int pgd_bad(pgd_t pgd)
17957 - return __pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
17958 + return (__pgd_val(pgd) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
17961 -static inline unsigned long pud_bad(pud_t pud)
17962 +static inline int pud_bad(pud_t pud)
17964 - return __pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
17965 + return (__pud_val(pud) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
17968 -static inline unsigned long pmd_bad(pmd_t pmd)
17969 +static inline int pmd_bad(pmd_t pmd)
17971 - return __pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
17972 + return (__pmd_val(pmd) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
17975 #define pte_none(x) (!(x).pte)
17976 #define pte_present(x) ((x).pte & (_PAGE_PRESENT | _PAGE_PROTNONE))
17978 -#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) /* FIXME: is this right? */
17979 +#define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */
17981 #define __pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT)
17982 #define pte_mfn(_pte) ((_pte).pte & _PAGE_PRESENT ? \
17983 @@ -181,13 +183,13 @@ static inline unsigned long pmd_bad(pmd_
17984 mfn_to_local_pfn(__pte_mfn(_pte)) : \
17987 -#define pte_page(x) pfn_to_page(pte_pfn(x))
17988 +#define pte_page(x) pfn_to_page(pte_pfn((x)))
17991 * Macro to mark a page protection value as "uncacheable".
17993 -#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT))
17995 +#define pgprot_noncached(prot) \
17996 + (__pgprot(pgprot_val((prot)) | _PAGE_PCD | _PAGE_PWT))
17999 * Conversion functions: convert a page and protection to a page entry,
18000 @@ -197,36 +199,39 @@ static inline unsigned long pmd_bad(pmd_
18004 -#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
18005 -#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT))
18006 -#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
18007 -#define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
18008 -#define pgd_offset_k(address) (init_level4_pgt + pgd_index(address))
18009 +#define pgd_page_vaddr(pgd) \
18010 + ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_MASK))
18011 +#define pgd_page(pgd) (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT))
18012 +#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
18013 +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
18014 +#define pgd_offset_k(address) (init_level4_pgt + pgd_index((address)))
18015 #define pgd_present(pgd) (__pgd_val(pgd) & _PAGE_PRESENT)
18016 static inline int pgd_large(pgd_t pgd) { return 0; }
18017 #define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE)
18019 /* PUD - Level3 access */
18020 /* to find an entry in a page-table-directory. */
18021 -#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
18022 -#define pud_page(pud) (pfn_to_page(pud_val(pud) >> PAGE_SHIFT))
18023 -#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
18024 -#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address))
18025 +#define pud_page_vaddr(pud) \
18026 + ((unsigned long)__va(pud_val((pud)) & PHYSICAL_PAGE_MASK))
18027 +#define pud_page(pud) (pfn_to_page(pud_val((pud)) >> PAGE_SHIFT))
18028 +#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
18029 +#define pud_offset(pgd, address) \
18030 + ((pud_t *)pgd_page_vaddr(*(pgd)) + pud_index((address)))
18031 #define pud_present(pud) (__pud_val(pud) & _PAGE_PRESENT)
18033 static inline int pud_large(pud_t pte)
18035 - return (__pud_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
18036 - (_PAGE_PSE|_PAGE_PRESENT);
18037 + return (__pud_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
18038 + (_PAGE_PSE | _PAGE_PRESENT);
18041 /* PMD - Level 2 access */
18042 -#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
18043 -#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
18044 +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_MASK))
18045 +#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
18047 -#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
18048 -#define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \
18049 - pmd_index(address))
18050 +#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
18051 +#define pmd_offset(dir, address) ((pmd_t *)pud_page_vaddr(*(dir)) + \
18052 + pmd_index(address))
18053 #define pmd_none(x) (!__pmd_val(x))
18054 #if CONFIG_XEN_COMPAT <= 0x030002
18055 /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
18056 @@ -235,43 +240,56 @@ static inline int pud_large(pud_t pte)
18058 #define pmd_present(x) (__pmd_val(x) & _PAGE_PRESENT)
18060 -#define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
18061 -#define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
18062 +#define pfn_pmd(nr, prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val((prot))))
18063 +#define pmd_pfn(x) ((pmd_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT)
18065 #define pte_to_pgoff(pte) ((__pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
18066 -#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | _PAGE_FILE })
18067 +#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | \
18069 #define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT
18071 /* PTE - Level 1 access. */
18073 /* page, protection -> pte */
18074 -#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
18076 -#define pte_index(address) \
18077 - (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
18078 +#define mk_pte(page, pgprot) pfn_pte(page_to_pfn((page)), (pgprot))
18080 +#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
18081 #define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
18082 - pte_index(address))
18083 + pte_index((address)))
18085 /* x86-64 always has all page tables mapped. */
18086 -#define pte_offset_map(dir,address) pte_offset_kernel(dir,address)
18087 -#define pte_offset_map_nested(dir,address) pte_offset_kernel(dir,address)
18088 +#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
18089 +#define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address))
18090 #define pte_unmap(pte) /* NOP */
18091 -#define pte_unmap_nested(pte) /* NOP */
18092 +#define pte_unmap_nested(pte) /* NOP */
18094 +#define update_mmu_cache(vma, address, pte) do { } while (0)
18096 -#define update_mmu_cache(vma,address,pte) do { } while (0)
18097 +#define direct_gbpages 0
18099 /* Encode and de-code a swap entry */
18100 -#define __swp_type(x) (((x).val >> 1) & 0x3f)
18101 -#define __swp_offset(x) ((x).val >> 8)
18102 -#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
18103 +#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
18104 +#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
18105 +#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
18107 +#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1)
18108 +#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1)
18111 +#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \
18112 + & ((1U << SWP_TYPE_BITS) - 1))
18113 +#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT)
18114 +#define __swp_entry(type, offset) ((swp_entry_t) { \
18115 + ((type) << (_PAGE_BIT_PRESENT + 1)) \
18116 + | ((offset) << SWP_OFFSET_SHIFT) })
18117 #define __pte_to_swp_entry(pte) ((swp_entry_t) { __pte_val(pte) })
18118 #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
18120 -extern int kern_addr_valid(unsigned long addr);
18121 +extern int kern_addr_valid(unsigned long addr);
18122 extern void cleanup_highmap(void);
18124 -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
18125 - direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
18126 +#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
18127 + direct_remap_pfn_range(vma, vaddr, pfn, size, prot, DOMID_IO)
18129 #define HAVE_ARCH_UNMAPPED_AREA
18130 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
18131 @@ -284,8 +302,10 @@ extern void cleanup_highmap(void);
18133 /* fs/proc/kcore.c */
18134 #define kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK)
18135 -#define kc_offset_to_vaddr(o) \
18136 - (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
18137 +#define kc_offset_to_vaddr(o) \
18138 + (((o) & (1UL << (__VIRTUAL_MASK_SHIFT - 1))) \
18139 + ? ((o) | ~__VIRTUAL_MASK) \
18142 #define __HAVE_ARCH_PTE_SAME
18143 #endif /* !__ASSEMBLY__ */
18144 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/processor.h 2009-03-16 16:33:40.000000000 +0100
18145 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/processor.h 2009-03-16 16:38:05.000000000 +0100
18148 #include <asm/processor-flags.h>
18150 -/* migration helpers, for KVM - will be removed in 2.6.25: */
18151 -#include <asm/vm86.h>
18152 -#define Xgt_desc_struct desc_ptr
18154 /* Forward declaration, a strange C thing */
18155 struct task_struct;
18157 @@ -24,6 +20,7 @@ struct mm_struct;
18158 #include <asm/msr.h>
18159 #include <asm/desc_defs.h>
18160 #include <asm/nops.h>
18162 #include <linux/personality.h>
18163 #include <linux/cpumask.h>
18164 #include <linux/cache.h>
18165 @@ -38,16 +35,18 @@ struct mm_struct;
18166 static inline void *current_text_addr(void)
18169 - asm volatile("mov $1f,%0\n1:":"=r" (pc));
18171 + asm volatile("mov $1f, %0; 1:":"=r" (pc));
18176 #ifdef CONFIG_X86_VSMP
18177 -#define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT)
18178 -#define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT)
18179 +# define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT)
18180 +# define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT)
18182 -#define ARCH_MIN_TASKALIGN 16
18183 -#define ARCH_MIN_MMSTRUCT_ALIGN 0
18184 +# define ARCH_MIN_TASKALIGN 16
18185 +# define ARCH_MIN_MMSTRUCT_ALIGN 0
18189 @@ -57,68 +56,80 @@ static inline void *current_text_addr(vo
18192 struct cpuinfo_x86 {
18193 - __u8 x86; /* CPU family */
18194 - __u8 x86_vendor; /* CPU vendor */
18197 + __u8 x86; /* CPU family */
18198 + __u8 x86_vendor; /* CPU vendor */
18201 #ifdef CONFIG_X86_32
18202 - char wp_works_ok; /* It doesn't on 386's */
18203 - char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */
18210 + char wp_works_ok; /* It doesn't on 386's */
18212 + /* Problems on some 486Dx4's and old 386's: */
18213 + char hlt_works_ok;
18221 - /* number of 4K pages in DTLB/ITLB combined(in pages)*/
18223 - __u8 x86_virt_bits, x86_phys_bits;
18224 - /* cpuid returned core id bits */
18225 - __u8 x86_coreid_bits;
18226 - /* Max extended CPUID function supported */
18227 - __u32 extended_cpuid_level;
18229 - int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */
18230 - __u32 x86_capability[NCAPINTS];
18231 - char x86_vendor_id[16];
18232 - char x86_model_id[64];
18233 - int x86_cache_size; /* in KB - valid for CPUS which support this
18235 - int x86_cache_alignment; /* In bytes */
18237 - unsigned long loops_per_jiffy;
18238 + /* Number of 4K pages in DTLB/ITLB combined(in pages): */
18240 + __u8 x86_virt_bits;
18241 + __u8 x86_phys_bits;
18242 + /* CPUID returned core id bits: */
18243 + __u8 x86_coreid_bits;
18244 + /* Max extended CPUID function supported: */
18245 + __u32 extended_cpuid_level;
18247 + /* Maximum supported CPUID level, -1=no CPUID: */
18249 + __u32 x86_capability[NCAPINTS];
18250 + char x86_vendor_id[16];
18251 + char x86_model_id[64];
18252 + /* in KB - valid for CPUS which support this call: */
18253 + int x86_cache_size;
18254 + int x86_cache_alignment; /* In bytes */
18256 + unsigned long loops_per_jiffy;
18258 - cpumask_t llc_shared_map; /* cpus sharing the last level cache */
18259 + /* cpus sharing the last level cache: */
18260 + cpumask_t llc_shared_map;
18262 - u16 x86_max_cores; /* cpuid returned max cores value */
18264 - u16 x86_clflush_size;
18265 + /* cpuid returned max cores value: */
18266 + u16 x86_max_cores;
18268 + u16 initial_apicid;
18269 + u16 x86_clflush_size;
18271 - u16 booted_cores; /* number of cores as seen by OS */
18272 - u16 phys_proc_id; /* Physical processor id. */
18273 - u16 cpu_core_id; /* Core id */
18274 - u16 cpu_index; /* index into per_cpu list */
18275 + /* number of cores as seen by the OS: */
18276 + u16 booted_cores;
18277 + /* Physical processor id: */
18278 + u16 phys_proc_id;
18281 + /* Index into per_cpu list: */
18284 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
18286 -#define X86_VENDOR_INTEL 0
18287 -#define X86_VENDOR_CYRIX 1
18288 -#define X86_VENDOR_AMD 2
18289 -#define X86_VENDOR_UMC 3
18290 -#define X86_VENDOR_NEXGEN 4
18291 -#define X86_VENDOR_CENTAUR 5
18292 -#define X86_VENDOR_TRANSMETA 7
18293 -#define X86_VENDOR_NSC 8
18294 -#define X86_VENDOR_NUM 9
18295 -#define X86_VENDOR_UNKNOWN 0xff
18296 +#define X86_VENDOR_INTEL 0
18297 +#define X86_VENDOR_CYRIX 1
18298 +#define X86_VENDOR_AMD 2
18299 +#define X86_VENDOR_UMC 3
18300 +#define X86_VENDOR_CENTAUR 5
18301 +#define X86_VENDOR_TRANSMETA 7
18302 +#define X86_VENDOR_NSC 8
18303 +#define X86_VENDOR_NUM 9
18305 +#define X86_VENDOR_UNKNOWN 0xff
18308 * capabilities of CPUs
18310 -extern struct cpuinfo_x86 boot_cpu_data;
18311 -extern struct cpuinfo_x86 new_cpu_data;
18312 -extern __u32 cleared_cpu_caps[NCAPINTS];
18313 +extern struct cpuinfo_x86 boot_cpu_data;
18314 +extern struct cpuinfo_x86 new_cpu_data;
18316 +extern __u32 cleared_cpu_caps[NCAPINTS];
18319 DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
18320 @@ -129,7 +140,18 @@ DECLARE_PER_CPU(struct cpuinfo_x86, cpu_
18321 #define current_cpu_data boot_cpu_data
18324 -void cpu_detect(struct cpuinfo_x86 *c);
18325 +static inline int hlt_works(int cpu)
18327 +#ifdef CONFIG_X86_32
18328 + return cpu_data(cpu).hlt_works_ok;
18334 +#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
18336 +extern void cpu_detect(struct cpuinfo_x86 *c);
18338 extern void identify_cpu(struct cpuinfo_x86 *);
18339 extern void identify_boot_cpu(void);
18340 @@ -149,12 +171,12 @@ static inline void xen_cpuid(unsigned in
18341 unsigned int *ecx, unsigned int *edx)
18343 /* ecx is often an input as well as an output. */
18344 - __asm__(XEN_CPUID
18349 - : "0" (*eax), "2" (*ecx));
18355 + : "0" (*eax), "2" (*ecx));
18358 static inline void load_cr3(pgd_t *pgdir)
18359 @@ -166,57 +188,70 @@ static inline void load_cr3(pgd_t *pgdir
18360 #ifdef CONFIG_X86_32
18361 /* This is the TSS defined by the hardware. */
18362 struct x86_hw_tss {
18363 - unsigned short back_link, __blh;
18364 - unsigned long sp0;
18365 - unsigned short ss0, __ss0h;
18366 - unsigned long sp1;
18367 - unsigned short ss1, __ss1h; /* ss1 caches MSR_IA32_SYSENTER_CS */
18368 - unsigned long sp2;
18369 - unsigned short ss2, __ss2h;
18370 - unsigned long __cr3;
18371 - unsigned long ip;
18372 - unsigned long flags;
18373 - unsigned long ax, cx, dx, bx;
18374 - unsigned long sp, bp, si, di;
18375 - unsigned short es, __esh;
18376 - unsigned short cs, __csh;
18377 - unsigned short ss, __ssh;
18378 - unsigned short ds, __dsh;
18379 - unsigned short fs, __fsh;
18380 - unsigned short gs, __gsh;
18381 - unsigned short ldt, __ldth;
18382 - unsigned short trace, io_bitmap_base;
18383 + unsigned short back_link, __blh;
18384 + unsigned long sp0;
18385 + unsigned short ss0, __ss0h;
18386 + unsigned long sp1;
18387 + /* ss1 caches MSR_IA32_SYSENTER_CS: */
18388 + unsigned short ss1, __ss1h;
18389 + unsigned long sp2;
18390 + unsigned short ss2, __ss2h;
18391 + unsigned long __cr3;
18392 + unsigned long ip;
18393 + unsigned long flags;
18394 + unsigned long ax;
18395 + unsigned long cx;
18396 + unsigned long dx;
18397 + unsigned long bx;
18398 + unsigned long sp;
18399 + unsigned long bp;
18400 + unsigned long si;
18401 + unsigned long di;
18402 + unsigned short es, __esh;
18403 + unsigned short cs, __csh;
18404 + unsigned short ss, __ssh;
18405 + unsigned short ds, __dsh;
18406 + unsigned short fs, __fsh;
18407 + unsigned short gs, __gsh;
18408 + unsigned short ldt, __ldth;
18409 + unsigned short trace;
18410 + unsigned short io_bitmap_base;
18412 } __attribute__((packed));
18413 extern struct tss_struct doublefault_tss;
18415 struct x86_hw_tss {
18425 - u16 io_bitmap_base;
18435 + u16 io_bitmap_base;
18437 } __attribute__((packed)) ____cacheline_aligned;
18439 #endif /* CONFIG_X86_NO_TSS */
18442 - * Size of io_bitmap.
18443 + * IO-bitmap sizes:
18445 -#define IO_BITMAP_BITS 65536
18446 -#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
18447 -#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
18448 -#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap)
18449 -#define INVALID_IO_BITMAP_OFFSET 0x8000
18450 -#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000
18451 +#define IO_BITMAP_BITS 65536
18452 +#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
18453 +#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
18454 +#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap)
18455 +#define INVALID_IO_BITMAP_OFFSET 0x8000
18456 +#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000
18458 #ifndef CONFIG_X86_NO_TSS
18459 struct tss_struct {
18460 - struct x86_hw_tss x86_tss;
18462 + * The hardware state:
18464 + struct x86_hw_tss x86_tss;
18467 * The extra 1 is there because the CPU will access an
18468 @@ -224,136 +259,162 @@ struct tss_struct {
18469 * bitmap. The extra byte must be all 1 bits, and must
18470 * be within the limit.
18472 - unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
18473 + unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
18475 * Cache the current maximum and the last task that used the bitmap:
18477 - unsigned long io_bitmap_max;
18478 - struct thread_struct *io_bitmap_owner;
18479 + unsigned long io_bitmap_max;
18480 + struct thread_struct *io_bitmap_owner;
18483 - * pads the TSS to be cacheline-aligned (size is 0x100)
18484 + * Pad the TSS to be cacheline-aligned (size is 0x100):
18486 - unsigned long __cacheline_filler[35];
18487 + unsigned long __cacheline_filler[35];
18489 - * .. and then another 0x100 bytes for emergency kernel stack
18490 + * .. and then another 0x100 bytes for the emergency kernel stack:
18492 - unsigned long stack[64];
18493 + unsigned long stack[64];
18495 } __attribute__((packed));
18497 DECLARE_PER_CPU(struct tss_struct, init_tss);
18499 -/* Save the original ist values for checking stack pointers during debugging */
18501 + * Save the original ist values for checking stack pointers during debugging
18504 - unsigned long ist[7];
18505 + unsigned long ist[7];
18507 #endif /* CONFIG_X86_NO_TSS */
18509 #define MXCSR_DEFAULT 0x1f80
18511 struct i387_fsave_struct {
18519 - u32 st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */
18520 - u32 status; /* software status information */
18521 + u32 cwd; /* FPU Control Word */
18522 + u32 swd; /* FPU Status Word */
18523 + u32 twd; /* FPU Tag Word */
18524 + u32 fip; /* FPU IP Offset */
18525 + u32 fcs; /* FPU IP Selector */
18526 + u32 foo; /* FPU Operand Pointer Offset */
18527 + u32 fos; /* FPU Operand Pointer Selector */
18529 + /* 8*10 bytes for each FP-reg = 80 bytes: */
18530 + u32 st_space[20];
18532 + /* Software status information [not touched by FSAVE ]: */
18536 struct i387_fxsave_struct {
18541 + u16 cwd; /* Control Word */
18542 + u16 swd; /* Status Word */
18543 + u16 twd; /* Tag Word */
18544 + u16 fop; /* Last Instruction Opcode */
18549 + u64 rip; /* Instruction Pointer */
18550 + u64 rdp; /* Data Pointer */
18557 + u32 fip; /* FPU IP Offset */
18558 + u32 fcs; /* FPU IP Selector */
18559 + u32 foo; /* FPU Operand Offset */
18560 + u32 fos; /* FPU Operand Selector */
18565 - u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
18566 - u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
18568 + u32 mxcsr; /* MXCSR Register State */
18569 + u32 mxcsr_mask; /* MXCSR Mask */
18571 + /* 8*16 bytes for each FP-reg = 128 bytes: */
18572 + u32 st_space[32];
18574 + /* 16*16 bytes for each XMM-reg = 256 bytes: */
18575 + u32 xmm_space[64];
18579 } __attribute__((aligned(16)));
18581 struct i387_soft_struct {
18589 - u32 st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */
18590 - u8 ftop, changed, lookahead, no_update, rm, alimit;
18591 - struct info *info;
18600 + /* 8*10 bytes for each FP-reg = 80 bytes: */
18601 + u32 st_space[20];
18608 + struct info *info;
18612 -union i387_union {
18613 +union thread_xstate {
18614 struct i387_fsave_struct fsave;
18615 struct i387_fxsave_struct fxsave;
18616 - struct i387_soft_struct soft;
18617 + struct i387_soft_struct soft;
18620 -#ifdef CONFIG_X86_32
18621 -DECLARE_PER_CPU(u8, cpu_llc_id);
18622 -#elif !defined(CONFIG_X86_NO_TSS)
18623 +#if defined(CONFIG_X86_64) && !defined(CONFIG_X86_NO_TSS)
18624 DECLARE_PER_CPU(struct orig_ist, orig_ist);
18627 extern void print_cpu_info(struct cpuinfo_x86 *);
18628 +extern unsigned int xstate_size;
18629 +extern void free_thread_xstate(struct task_struct *);
18630 +extern struct kmem_cache *task_xstate_cachep;
18631 extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
18632 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
18633 extern unsigned short num_cache_leaves;
18635 struct thread_struct {
18636 -/* cached TLS descriptors. */
18637 - struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
18638 - unsigned long sp0;
18639 - unsigned long sp;
18640 + /* Cached TLS descriptors: */
18641 + struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
18642 + unsigned long sp0;
18643 + unsigned long sp;
18644 #ifdef CONFIG_X86_32
18645 - unsigned long sysenter_cs;
18646 + unsigned long sysenter_cs;
18648 - unsigned long usersp; /* Copy from PDA */
18649 - unsigned short es, ds, fsindex, gsindex;
18651 - unsigned long ip;
18652 - unsigned long fs;
18653 - unsigned long gs;
18654 -/* Hardware debugging registers */
18655 - unsigned long debugreg0;
18656 - unsigned long debugreg1;
18657 - unsigned long debugreg2;
18658 - unsigned long debugreg3;
18659 - unsigned long debugreg6;
18660 - unsigned long debugreg7;
18662 - unsigned long cr2, trap_no, error_code;
18663 -/* floating point info */
18664 - union i387_union i387 __attribute__((aligned(16)));;
18665 + unsigned long usersp; /* Copy from PDA */
18666 + unsigned short es;
18667 + unsigned short ds;
18668 + unsigned short fsindex;
18669 + unsigned short gsindex;
18671 + unsigned long ip;
18672 + unsigned long fs;
18673 + unsigned long gs;
18674 + /* Hardware debugging registers: */
18675 + unsigned long debugreg0;
18676 + unsigned long debugreg1;
18677 + unsigned long debugreg2;
18678 + unsigned long debugreg3;
18679 + unsigned long debugreg6;
18680 + unsigned long debugreg7;
18681 + /* Fault info: */
18682 + unsigned long cr2;
18683 + unsigned long trap_no;
18684 + unsigned long error_code;
18685 + /* floating point and extended processor state */
18686 + union thread_xstate *xstate;
18687 #ifdef CONFIG_X86_32
18688 -/* virtual 86 mode info */
18689 + /* Virtual 86 mode info */
18690 struct vm86_struct __user *vm86_info;
18691 unsigned long screen_bitmap;
18692 unsigned long v86flags, v86mask, saved_sp0;
18693 unsigned int saved_fs, saved_gs;
18695 -/* IO permissions */
18696 - unsigned long *io_bitmap_ptr;
18697 - unsigned long iopl;
18698 -/* max allowed port in the bitmap, in bytes: */
18699 - unsigned io_bitmap_max;
18700 + /* IO permissions: */
18701 + unsigned long *io_bitmap_ptr;
18702 + unsigned long iopl;
18703 + /* Max allowed port in the bitmap, in bytes: */
18704 + unsigned io_bitmap_max;
18705 /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */
18706 unsigned long debugctlmsr;
18707 /* Debug Store - if not 0 points to a DS Save Area configuration;
18708 @@ -384,12 +445,12 @@ static inline void xen_set_iopl_mask(uns
18711 #ifndef CONFIG_X86_NO_TSS
18712 -static inline void native_load_sp0(struct tss_struct *tss,
18713 - struct thread_struct *thread)
18714 +static inline void
18715 +native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
18717 tss->x86_tss.sp0 = thread->sp0;
18718 #ifdef CONFIG_X86_32
18719 - /* Only happens when SEP is enabled, no need to test "SEP"arately */
18720 + /* Only happens when SEP is enabled, no need to test "SEP"arately: */
18721 if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
18722 tss->x86_tss.ss1 = thread->sysenter_cs;
18723 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
18724 @@ -403,8 +464,8 @@ static inline void native_load_sp0(struc
18728 -#define __cpuid xen_cpuid
18729 -#define paravirt_enabled() 0
18730 +#define __cpuid xen_cpuid
18731 +#define paravirt_enabled() 0
18734 * These special macros can be used to get or set a debugging register
18735 @@ -424,11 +485,12 @@ static inline void native_load_sp0(struc
18736 * enable), so that any CPU's that boot up
18737 * after us can get the correct flags.
18739 -extern unsigned long mmu_cr4_features;
18740 +extern unsigned long mmu_cr4_features;
18742 static inline void set_in_cr4(unsigned long mask)
18746 mmu_cr4_features |= mask;
18749 @@ -438,6 +500,7 @@ static inline void set_in_cr4(unsigned l
18750 static inline void clear_in_cr4(unsigned long mask)
18754 mmu_cr4_features &= ~mask;
18757 @@ -445,42 +508,42 @@ static inline void clear_in_cr4(unsigned
18760 struct microcode_header {
18761 - unsigned int hdrver;
18762 - unsigned int rev;
18763 - unsigned int date;
18764 - unsigned int sig;
18765 - unsigned int cksum;
18766 - unsigned int ldrver;
18768 - unsigned int datasize;
18769 - unsigned int totalsize;
18770 - unsigned int reserved[3];
18771 + unsigned int hdrver;
18772 + unsigned int rev;
18773 + unsigned int date;
18774 + unsigned int sig;
18775 + unsigned int cksum;
18776 + unsigned int ldrver;
18778 + unsigned int datasize;
18779 + unsigned int totalsize;
18780 + unsigned int reserved[3];
18784 - struct microcode_header hdr;
18785 - unsigned int bits[0];
18786 + struct microcode_header hdr;
18787 + unsigned int bits[0];
18790 -typedef struct microcode microcode_t;
18791 -typedef struct microcode_header microcode_header_t;
18792 +typedef struct microcode microcode_t;
18793 +typedef struct microcode_header microcode_header_t;
18795 /* microcode format is extended from prescott processors */
18796 struct extended_signature {
18797 - unsigned int sig;
18799 - unsigned int cksum;
18800 + unsigned int sig;
18802 + unsigned int cksum;
18805 struct extended_sigtable {
18806 - unsigned int count;
18807 - unsigned int cksum;
18808 - unsigned int reserved[3];
18809 + unsigned int count;
18810 + unsigned int cksum;
18811 + unsigned int reserved[3];
18812 struct extended_signature sigs[0];
18816 - unsigned long seg;
18817 + unsigned long seg;
18821 @@ -492,7 +555,7 @@ extern int kernel_thread(int (*fn)(void
18822 /* Free all resources held by a thread. */
18823 extern void release_thread(struct task_struct *);
18825 -/* Prepare to copy thread state - unlazy all lazy status */
18826 +/* Prepare to copy thread state - unlazy all lazy state */
18827 extern void prepare_to_copy(struct task_struct *tsk);
18829 unsigned long get_wchan(struct task_struct *p);
18830 @@ -529,118 +592,138 @@ static inline unsigned int cpuid_eax(uns
18831 unsigned int eax, ebx, ecx, edx;
18833 cpuid(op, &eax, &ebx, &ecx, &edx);
18838 static inline unsigned int cpuid_ebx(unsigned int op)
18840 unsigned int eax, ebx, ecx, edx;
18842 cpuid(op, &eax, &ebx, &ecx, &edx);
18847 static inline unsigned int cpuid_ecx(unsigned int op)
18849 unsigned int eax, ebx, ecx, edx;
18851 cpuid(op, &eax, &ebx, &ecx, &edx);
18856 static inline unsigned int cpuid_edx(unsigned int op)
18858 unsigned int eax, ebx, ecx, edx;
18860 cpuid(op, &eax, &ebx, &ecx, &edx);
18865 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
18866 static inline void rep_nop(void)
18868 - __asm__ __volatile__("rep;nop": : :"memory");
18869 + asm volatile("rep; nop" ::: "memory");
18872 -/* Stop speculative execution */
18873 +static inline void cpu_relax(void)
18878 +/* Stop speculative execution: */
18879 static inline void sync_core(void)
18883 asm volatile("cpuid" : "=a" (tmp) : "0" (1)
18884 - : "ebx", "ecx", "edx", "memory");
18885 + : "ebx", "ecx", "edx", "memory");
18888 -#define cpu_relax() rep_nop()
18890 static inline void __monitor(const void *eax, unsigned long ecx,
18891 - unsigned long edx)
18892 + unsigned long edx)
18894 - /* "monitor %eax,%ecx,%edx;" */
18896 - ".byte 0x0f,0x01,0xc8;"
18897 - : :"a" (eax), "c" (ecx), "d"(edx));
18898 + /* "monitor %eax, %ecx, %edx;" */
18899 + asm volatile(".byte 0x0f, 0x01, 0xc8;"
18900 + :: "a" (eax), "c" (ecx), "d"(edx));
18903 static inline void __mwait(unsigned long eax, unsigned long ecx)
18905 - /* "mwait %eax,%ecx;" */
18907 - ".byte 0x0f,0x01,0xc9;"
18908 - : :"a" (eax), "c" (ecx));
18909 + /* "mwait %eax, %ecx;" */
18910 + asm volatile(".byte 0x0f, 0x01, 0xc9;"
18911 + :: "a" (eax), "c" (ecx));
18914 static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
18916 - /* "mwait %eax,%ecx;" */
18918 - "sti; .byte 0x0f,0x01,0xc9;"
18919 - : :"a" (eax), "c" (ecx));
18920 + trace_hardirqs_on();
18921 + /* "mwait %eax, %ecx;" */
18922 + asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
18923 + :: "a" (eax), "c" (ecx));
18926 extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
18928 -extern int force_mwait;
18929 +extern int force_mwait;
18931 extern void select_idle_routine(const struct cpuinfo_x86 *c);
18933 -extern unsigned long boot_option_idle_override;
18934 +extern unsigned long boot_option_idle_override;
18936 extern void enable_sep_cpu(void);
18937 extern int sysenter_setup(void);
18939 /* Defined in head.S */
18940 -extern struct desc_ptr early_gdt_descr;
18941 +extern struct desc_ptr early_gdt_descr;
18943 extern void cpu_set_gdt(int);
18944 extern void switch_to_new_gdt(void);
18945 extern void cpu_init(void);
18946 extern void init_gdt(int cpu);
18948 -/* from system description table in BIOS. Mostly for MCA use, but
18949 - * others may find it useful. */
18950 -extern unsigned int machine_id;
18951 -extern unsigned int machine_submodel_id;
18952 -extern unsigned int BIOS_revision;
18953 +static inline void update_debugctlmsr(unsigned long debugctlmsr)
18955 +#ifndef CONFIG_X86_DEBUGCTLMSR
18956 + if (boot_cpu_data.x86 < 6)
18959 + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
18962 -/* Boot loader type from the setup header */
18963 -extern int bootloader_type;
18965 + * from system description table in BIOS. Mostly for MCA use, but
18966 + * others may find it useful:
18968 +extern unsigned int machine_id;
18969 +extern unsigned int machine_submodel_id;
18970 +extern unsigned int BIOS_revision;
18972 +/* Boot loader type from the setup header: */
18973 +extern int bootloader_type;
18975 -extern char ignore_fpu_irq;
18976 -#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
18977 +extern char ignore_fpu_irq;
18979 #define HAVE_ARCH_PICK_MMAP_LAYOUT 1
18980 #define ARCH_HAS_PREFETCHW
18981 #define ARCH_HAS_SPINLOCK_PREFETCH
18983 #ifdef CONFIG_X86_32
18984 -#define BASE_PREFETCH ASM_NOP4
18985 -#define ARCH_HAS_PREFETCH
18986 +# define BASE_PREFETCH ASM_NOP4
18987 +# define ARCH_HAS_PREFETCH
18989 -#define BASE_PREFETCH "prefetcht0 (%1)"
18990 +# define BASE_PREFETCH "prefetcht0 (%1)"
18993 -/* Prefetch instructions for Pentium III and AMD Athlon */
18994 -/* It's not worth to care about 3dnow! prefetches for the K6
18995 - because they are microcoded there and very slow.
18996 - However we don't do prefetches for pre XP Athlons currently
18997 - That should be fixed. */
18999 + * Prefetch instructions for Pentium III (+) and AMD Athlon (+)
19001 + * It's not worth to care about 3dnow prefetches for the K6
19002 + * because they are microcoded there and very slow.
19004 static inline void prefetch(const void *x)
19006 alternative_input(BASE_PREFETCH,
19007 @@ -649,8 +732,11 @@ static inline void prefetch(const void *
19011 -/* 3dnow! prefetch to get an exclusive cache line. Useful for
19012 - spinlocks to avoid one state transition in the cache coherency protocol. */
19014 + * 3dnow prefetch to get an exclusive cache line.
19015 + * Useful for spinlocks to avoid one state transition in the
19016 + * cache coherency protocol:
19018 static inline void prefetchw(const void *x)
19020 alternative_input(BASE_PREFETCH,
19021 @@ -659,21 +745,25 @@ static inline void prefetchw(const void
19025 -#define spin_lock_prefetch(x) prefetchw(x)
19026 +static inline void spin_lock_prefetch(const void *x)
19031 #ifdef CONFIG_X86_32
19033 * User space process size: 3GB (default).
19035 -#define TASK_SIZE (PAGE_OFFSET)
19036 -#define STACK_TOP TASK_SIZE
19037 -#define STACK_TOP_MAX STACK_TOP
19039 -#define INIT_THREAD { \
19040 - .sp0 = sizeof(init_stack) + (long)&init_stack, \
19041 - .vm86_info = NULL, \
19042 - .sysenter_cs = __KERNEL_CS, \
19043 - .io_bitmap_ptr = NULL, \
19044 - .fs = __KERNEL_PERCPU, \
19045 +#define TASK_SIZE PAGE_OFFSET
19046 +#define STACK_TOP TASK_SIZE
19047 +#define STACK_TOP_MAX STACK_TOP
19049 +#define INIT_THREAD { \
19050 + .sp0 = sizeof(init_stack) + (long)&init_stack, \
19051 + .vm86_info = NULL, \
19052 + .sysenter_cs = __KERNEL_CS, \
19053 + .io_bitmap_ptr = NULL, \
19054 + .fs = __KERNEL_PERCPU, \
19058 @@ -682,28 +772,15 @@ static inline void prefetchw(const void
19059 * permission bitmap. The extra byte must be all 1 bits, and must
19060 * be within the limit.
19062 -#define INIT_TSS { \
19064 +#define INIT_TSS { \
19066 .sp0 = sizeof(init_stack) + (long)&init_stack, \
19067 - .ss0 = __KERNEL_DS, \
19068 - .ss1 = __KERNEL_CS, \
19069 - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
19071 - .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, \
19074 -#define start_thread(regs, new_eip, new_esp) do { \
19075 - __asm__("movl %0,%%gs": :"r" (0)); \
19077 - set_fs(USER_DS); \
19078 - regs->ds = __USER_DS; \
19079 - regs->es = __USER_DS; \
19080 - regs->ss = __USER_DS; \
19081 - regs->cs = __USER_CS; \
19082 - regs->ip = new_eip; \
19083 - regs->sp = new_esp; \
19086 + .ss0 = __KERNEL_DS, \
19087 + .ss1 = __KERNEL_CS, \
19088 + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
19090 + .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, \
19093 extern unsigned long thread_saved_pc(struct task_struct *tsk);
19095 @@ -731,24 +808,24 @@ extern unsigned long thread_saved_pc(str
19099 -#define KSTK_ESP(task) (task_pt_regs(task)->sp)
19100 +#define KSTK_ESP(task) (task_pt_regs(task)->sp)
19104 * User space process size. 47bits minus one guard page.
19106 -#define TASK_SIZE64 (0x800000000000UL - 4096)
19107 +#define TASK_SIZE64 ((1UL << 47) - PAGE_SIZE)
19109 /* This decides where the kernel will search for a free chunk of vm
19110 * space during mmap's.
19112 -#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
19113 - 0xc0000000 : 0xFFFFe000)
19114 +#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
19115 + 0xc0000000 : 0xFFFFe000)
19117 -#define TASK_SIZE (test_thread_flag(TIF_IA32) ? \
19118 - IA32_PAGE_OFFSET : TASK_SIZE64)
19119 -#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \
19120 - IA32_PAGE_OFFSET : TASK_SIZE64)
19121 +#define TASK_SIZE (test_thread_flag(TIF_IA32) ? \
19122 + IA32_PAGE_OFFSET : TASK_SIZE64)
19123 +#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \
19124 + IA32_PAGE_OFFSET : TASK_SIZE64)
19126 #define STACK_TOP TASK_SIZE
19127 #define STACK_TOP_MAX TASK_SIZE64
19128 @@ -761,33 +838,32 @@ extern unsigned long thread_saved_pc(str
19129 .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
19132 -#define start_thread(regs, new_rip, new_rsp) do { \
19133 - asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0)); \
19134 - load_gs_index(0); \
19135 - (regs)->ip = (new_rip); \
19136 - (regs)->sp = (new_rsp); \
19137 - write_pda(oldrsp, (new_rsp)); \
19138 - (regs)->cs = __USER_CS; \
19139 - (regs)->ss = __USER_DS; \
19140 - (regs)->flags = 0x200; \
19141 - set_fs(USER_DS); \
19145 * Return saved PC of a blocked thread.
19146 * What is this good for? it will be always the scheduler or ret_from_fork.
19148 -#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8))
19149 +#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8))
19151 -#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
19152 -#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */
19153 +#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
19154 +#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */
19155 #endif /* CONFIG_X86_64 */
19157 -/* This decides where the kernel will search for a free chunk of vm
19158 +extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
19159 + unsigned long new_sp);
19162 + * This decides where the kernel will search for a free chunk of vm
19163 * space during mmap's.
19165 #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3))
19167 -#define KSTK_EIP(task) (task_pt_regs(task)->ip)
19168 +#define KSTK_EIP(task) (task_pt_regs(task)->ip)
19170 +/* Get/set a process' ability to use the timestamp counter instruction */
19171 +#define GET_TSC_CTL(adr) get_tsc_mode((adr))
19172 +#define SET_TSC_CTL(val) set_tsc_mode((val))
19174 +extern int get_tsc_mode(unsigned long adr);
19175 +extern int set_tsc_mode(unsigned int val);
19178 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/segment.h 2009-03-16 16:33:40.000000000 +0100
19179 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/segment.h 2009-03-16 16:38:05.000000000 +0100
19180 @@ -191,13 +191,14 @@
19181 #define SEGMENT_TI_MASK 0x4
19183 #define IDT_ENTRIES 256
19184 +#define NUM_EXCEPTION_VECTORS 32
19185 #define GDT_SIZE (GDT_ENTRIES * 8)
19186 #define GDT_ENTRY_TLS_ENTRIES 3
19187 #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
19190 #ifndef __ASSEMBLY__
19191 -extern const char early_idt_handlers[IDT_ENTRIES][10];
19192 +extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][10];
19196 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/smp.h 2009-02-16 16:18:36.000000000 +0100
19197 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/smp.h 2009-03-16 16:38:05.000000000 +0100
19199 -#ifdef CONFIG_X86_32
19200 -# include "smp_32.h"
19201 +#ifndef _ASM_X86_SMP_H_
19202 +#define _ASM_X86_SMP_H_
19203 +#ifndef __ASSEMBLY__
19204 +#include <linux/cpumask.h>
19205 +#include <linux/init.h>
19206 +#include <asm/percpu.h>
19209 + * We need the APIC definitions automatically as part of 'smp.h'
19211 +#ifdef CONFIG_X86_LOCAL_APIC
19212 +# include <asm/mpspec.h>
19213 +# include <asm/apic.h>
19214 +# ifdef CONFIG_X86_IO_APIC
19215 +# include <asm/io_apic.h>
19218 +#include <asm/pda.h>
19219 +#include <asm/thread_info.h>
19221 +#define cpu_callout_map cpu_possible_map
19222 +extern cpumask_t cpu_initialized;
19223 +#define cpu_callin_map cpu_possible_map
19225 +extern void (*mtrr_hook)(void);
19226 +extern void zap_low_mappings(void);
19228 +extern int smp_num_siblings;
19229 +extern unsigned int num_processors;
19230 +extern cpumask_t cpu_initialized;
19232 +#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
19233 +extern u16 x86_cpu_to_apicid_init[];
19234 +extern u16 x86_bios_cpu_apicid_init[];
19235 +extern void *x86_cpu_to_apicid_early_ptr;
19236 +extern void *x86_bios_cpu_apicid_early_ptr;
19238 -# include "smp_64.h"
19239 +#define x86_cpu_to_apicid_early_ptr NULL
19240 +#define x86_bios_cpu_apicid_early_ptr NULL
19243 +DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
19244 +DECLARE_PER_CPU(cpumask_t, cpu_core_map);
19245 +DECLARE_PER_CPU(u16, cpu_llc_id);
19246 +DECLARE_PER_CPU(u16, x86_cpu_to_apicid);
19247 +DECLARE_PER_CPU(u16, x86_bios_cpu_apicid);
19251 +#ifndef CONFIG_XEN
19253 +/* Static state in head.S used to set up a CPU */
19256 + unsigned short ss;
19260 + void (*smp_prepare_boot_cpu)(void);
19261 + void (*smp_prepare_cpus)(unsigned max_cpus);
19262 + int (*cpu_up)(unsigned cpu);
19263 + void (*smp_cpus_done)(unsigned max_cpus);
19265 + void (*smp_send_stop)(void);
19266 + void (*smp_send_reschedule)(int cpu);
19267 + int (*smp_call_function_mask)(cpumask_t mask,
19268 + void (*func)(void *info), void *info,
19272 +/* Globals due to paravirt */
19273 +extern void set_cpu_sibling_map(int cpu);
19275 +#ifndef CONFIG_PARAVIRT
19276 +#define startup_ipi_hook(phys_apicid, start_eip, start_esp) do { } while (0)
19278 +extern struct smp_ops smp_ops;
19280 +static inline void smp_send_stop(void)
19282 + smp_ops.smp_send_stop();
19285 +static inline void smp_prepare_boot_cpu(void)
19287 + smp_ops.smp_prepare_boot_cpu();
19290 +static inline void smp_prepare_cpus(unsigned int max_cpus)
19292 + smp_ops.smp_prepare_cpus(max_cpus);
19295 +static inline void smp_cpus_done(unsigned int max_cpus)
19297 + smp_ops.smp_cpus_done(max_cpus);
19300 +static inline int __cpu_up(unsigned int cpu)
19302 + return smp_ops.cpu_up(cpu);
19305 +static inline void smp_send_reschedule(int cpu)
19307 + smp_ops.smp_send_reschedule(cpu);
19310 +static inline int smp_call_function_mask(cpumask_t mask,
19311 + void (*func) (void *info), void *info,
19314 + return smp_ops.smp_call_function_mask(mask, func, info, wait);
19317 +void native_smp_prepare_boot_cpu(void);
19318 +void native_smp_prepare_cpus(unsigned int max_cpus);
19319 +void native_smp_cpus_done(unsigned int max_cpus);
19320 +int native_cpu_up(unsigned int cpunum);
19322 +#else /* CONFIG_XEN */
19324 +void xen_smp_send_stop(void);
19325 +void xen_smp_send_reschedule(int cpu);
19326 +int xen_smp_call_function_mask(cpumask_t mask,
19327 + void (*func) (void *info), void *info,
19330 +#define smp_send_stop xen_smp_send_stop
19331 +#define smp_send_reschedule xen_smp_send_reschedule
19332 +#define smp_call_function_mask xen_smp_call_function_mask
19334 +extern void prefill_possible_map(void);
19336 +#endif /* CONFIG_XEN */
19338 +extern int __cpu_disable(void);
19339 +extern void __cpu_die(unsigned int cpu);
19341 +extern void prefill_possible_map(void);
19343 +void smp_store_cpu_info(int id);
19344 +#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
19346 +/* We don't mark CPUs online until __cpu_up(), so we need another measure */
19347 +static inline int num_booting_cpus(void)
19349 + return cpus_weight(cpu_callout_map);
19351 +#endif /* CONFIG_SMP */
19353 +extern unsigned disabled_cpus __cpuinitdata;
19355 +#ifdef CONFIG_X86_32_SMP
19357 + * This function is needed by all SMP systems. It must _always_ be valid
19358 + * from the initial startup. We map APIC_BASE very early in page_setup(),
19359 + * so this is correct in the x86 case.
19361 +DECLARE_PER_CPU(int, cpu_number);
19362 +#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
19363 +#define safe_smp_processor_id() smp_processor_id()
19365 +#elif defined(CONFIG_X86_64_SMP)
19366 +#define raw_smp_processor_id() read_pda(cpunumber)
19368 +#define stack_smp_processor_id() \
19370 + struct thread_info *ti; \
19371 + __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \
19374 +#define safe_smp_processor_id() smp_processor_id()
19376 +#else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */
19377 +#define cpu_physical_id(cpu) boot_cpu_physical_apicid
19378 +#define safe_smp_processor_id() 0
19379 +#define stack_smp_processor_id() 0
19382 +#ifdef CONFIG_X86_LOCAL_APIC
19384 +static inline int logical_smp_processor_id(void)
19386 + /* we don't want to mark this access volatile - bad code generation */
19387 + return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
19390 +#ifndef CONFIG_X86_64
19391 +static inline unsigned int read_apic_id(void)
19393 + return *(u32 *)(APIC_BASE + APIC_ID);
19396 +extern unsigned int read_apic_id(void);
19400 +# ifdef APIC_DEFINITION
19401 +extern int hard_smp_processor_id(void);
19403 +# include <mach_apicdef.h>
19404 +static inline int hard_smp_processor_id(void)
19406 + /* we don't want to mark this access volatile - bad code generation */
19407 + return GET_APIC_ID(read_apic_id());
19409 +# endif /* APIC_DEFINITION */
19411 +#else /* CONFIG_X86_LOCAL_APIC */
19413 +# ifndef CONFIG_SMP
19414 +# define hard_smp_processor_id() 0
19417 +#endif /* CONFIG_X86_LOCAL_APIC */
19419 +#ifdef CONFIG_HOTPLUG_CPU
19420 +extern void cpu_exit_clear(void);
19421 +extern void cpu_uninit(void);
19424 +extern void smp_alloc_memory(void);
19425 +extern void lock_ipi_call_lock(void);
19426 +extern void unlock_ipi_call_lock(void);
19427 +#endif /* __ASSEMBLY__ */
19429 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/smp_32.h 2009-03-16 16:33:40.000000000 +0100
19430 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
19432 -#ifndef __ASM_SMP_H
19433 -#define __ASM_SMP_H
19435 -#ifndef __ASSEMBLY__
19436 -#include <linux/cpumask.h>
19437 -#include <linux/init.h>
19440 - * We need the APIC definitions automatically as part of 'smp.h'
19442 -#ifdef CONFIG_X86_LOCAL_APIC
19443 -# include <asm/mpspec.h>
19444 -# include <asm/apic.h>
19445 -# ifdef CONFIG_X86_IO_APIC
19446 -# include <asm/io_apic.h>
19450 -#define cpu_callout_map cpu_possible_map
19451 -#define cpu_callin_map cpu_possible_map
19453 -extern int smp_num_siblings;
19454 -extern unsigned int num_processors;
19456 -extern void smp_alloc_memory(void);
19457 -extern void lock_ipi_call_lock(void);
19458 -extern void unlock_ipi_call_lock(void);
19460 -extern void (*mtrr_hook) (void);
19461 -extern void zap_low_mappings (void);
19463 -DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
19464 -DECLARE_PER_CPU(cpumask_t, cpu_core_map);
19465 -DECLARE_PER_CPU(u8, cpu_llc_id);
19466 -DECLARE_PER_CPU(u8, x86_cpu_to_apicid);
19468 -#ifdef CONFIG_HOTPLUG_CPU
19469 -extern void cpu_exit_clear(void);
19470 -extern void cpu_uninit(void);
19475 -#ifndef CONFIG_XEN
19477 -/* Globals due to paravirt */
19478 -extern void set_cpu_sibling_map(int cpu);
19482 - void (*smp_prepare_boot_cpu)(void);
19483 - void (*smp_prepare_cpus)(unsigned max_cpus);
19484 - int (*cpu_up)(unsigned cpu);
19485 - void (*smp_cpus_done)(unsigned max_cpus);
19487 - void (*smp_send_stop)(void);
19488 - void (*smp_send_reschedule)(int cpu);
19489 - int (*smp_call_function_mask)(cpumask_t mask,
19490 - void (*func)(void *info), void *info,
19494 -extern struct smp_ops smp_ops;
19496 -static inline void smp_prepare_boot_cpu(void)
19498 - smp_ops.smp_prepare_boot_cpu();
19500 -static inline void smp_prepare_cpus(unsigned int max_cpus)
19502 - smp_ops.smp_prepare_cpus(max_cpus);
19504 -static inline int __cpu_up(unsigned int cpu)
19506 - return smp_ops.cpu_up(cpu);
19508 -static inline void smp_cpus_done(unsigned int max_cpus)
19510 - smp_ops.smp_cpus_done(max_cpus);
19513 -static inline void smp_send_stop(void)
19515 - smp_ops.smp_send_stop();
19517 -static inline void smp_send_reschedule(int cpu)
19519 - smp_ops.smp_send_reschedule(cpu);
19521 -static inline int smp_call_function_mask(cpumask_t mask,
19522 - void (*func) (void *info), void *info,
19525 - return smp_ops.smp_call_function_mask(mask, func, info, wait);
19528 -void native_smp_prepare_boot_cpu(void);
19529 -void native_smp_prepare_cpus(unsigned int max_cpus);
19530 -int native_cpu_up(unsigned int cpunum);
19531 -void native_smp_cpus_done(unsigned int max_cpus);
19533 -#ifndef CONFIG_PARAVIRT
19534 -#define startup_ipi_hook(phys_apicid, start_eip, start_esp) do { } while (0)
19537 -#else /* CONFIG_XEN */
19539 -void xen_smp_send_stop(void);
19540 -void xen_smp_send_reschedule(int cpu);
19541 -int xen_smp_call_function_mask(cpumask_t mask,
19542 - void (*func) (void *info), void *info,
19545 -#define smp_send_stop xen_smp_send_stop
19546 -#define smp_send_reschedule xen_smp_send_reschedule
19547 -#define smp_call_function_mask xen_smp_call_function_mask
19549 -extern void prefill_possible_map(void);
19551 -#endif /* CONFIG_XEN */
19553 -extern int __cpu_disable(void);
19554 -extern void __cpu_die(unsigned int cpu);
19557 - * This function is needed by all SMP systems. It must _always_ be valid
19558 - * from the initial startup. We map APIC_BASE very early in page_setup(),
19559 - * so this is correct in the x86 case.
19561 -DECLARE_PER_CPU(int, cpu_number);
19562 -#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
19564 -#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
19566 -#define safe_smp_processor_id() smp_processor_id()
19568 -/* We don't mark CPUs online until __cpu_up(), so we need another measure */
19569 -static inline int num_booting_cpus(void)
19571 - return cpus_weight(cpu_callout_map);
19574 -#else /* CONFIG_SMP */
19576 -#define safe_smp_processor_id() 0
19577 -#define cpu_physical_id(cpu) boot_cpu_physical_apicid
19579 -#endif /* !CONFIG_SMP */
19581 -#ifdef CONFIG_X86_LOCAL_APIC
19583 -static __inline int logical_smp_processor_id(void)
19585 - /* we don't want to mark this access volatile - bad code generation */
19586 - return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
19589 -# ifdef APIC_DEFINITION
19590 -extern int hard_smp_processor_id(void);
19592 -# include <mach_apicdef.h>
19593 -static inline int hard_smp_processor_id(void)
19595 - /* we don't want to mark this access volatile - bad code generation */
19596 - return GET_APIC_ID(*(u32 *)(APIC_BASE + APIC_ID));
19598 -# endif /* APIC_DEFINITION */
19600 -#else /* CONFIG_X86_LOCAL_APIC */
19602 -# ifndef CONFIG_SMP
19603 -# define hard_smp_processor_id() 0
19606 -#endif /* CONFIG_X86_LOCAL_APIC */
19608 -#endif /* !ASSEMBLY */
19610 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/smp_64.h 2009-03-16 16:33:40.000000000 +0100
19611 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
19613 -#ifndef __ASM_SMP_H
19614 -#define __ASM_SMP_H
19616 -#include <linux/cpumask.h>
19617 -#include <linux/init.h>
19619 -#ifdef CONFIG_X86_LOCAL_APIC
19621 - * We need the APIC definitions automatically as part of 'smp.h'
19623 -#include <asm/apic.h>
19624 -#ifdef CONFIG_X86_IO_APIC
19625 -#include <asm/io_apic.h>
19627 -#include <asm/mpspec.h>
19629 -#include <asm/pda.h>
19630 -#include <asm/thread_info.h>
19632 -extern cpumask_t cpu_initialized;
19634 -extern int smp_num_siblings;
19635 -extern unsigned int num_processors;
19637 -extern void smp_alloc_memory(void);
19638 -extern void lock_ipi_call_lock(void);
19639 -extern void unlock_ipi_call_lock(void);
19641 -extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *),
19642 - void *info, int wait);
19644 -DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
19645 -DECLARE_PER_CPU(cpumask_t, cpu_core_map);
19646 -DECLARE_PER_CPU(u16, cpu_llc_id);
19647 -DECLARE_PER_CPU(u16, x86_cpu_to_apicid);
19648 -DECLARE_PER_CPU(u16, x86_bios_cpu_apicid);
19650 -#ifdef CONFIG_X86_LOCAL_APIC
19651 -static inline int cpu_present_to_apicid(int mps_cpu)
19653 - if (cpu_present(mps_cpu))
19654 - return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
19656 - return BAD_APICID;
19662 -#define SMP_TRAMPOLINE_BASE 0x6000
19664 -extern int __cpu_disable(void);
19665 -extern void __cpu_die(unsigned int cpu);
19666 -extern void prefill_possible_map(void);
19667 -extern unsigned __cpuinitdata disabled_cpus;
19669 -#define raw_smp_processor_id() read_pda(cpunumber)
19670 -#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
19672 -#define stack_smp_processor_id() \
19674 - struct thread_info *ti; \
19675 - __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \
19680 - * On x86 all CPUs are mapped 1:1 to the APIC space. This simplifies
19681 - * scheduling and IPI sending and compresses data structures.
19683 -static inline int num_booting_cpus(void)
19685 - return cpus_weight(cpu_possible_map);
19688 -extern void smp_send_reschedule(int cpu);
19690 -#else /* CONFIG_SMP */
19692 -extern unsigned int boot_cpu_id;
19693 -#define cpu_physical_id(cpu) boot_cpu_id
19694 -#define stack_smp_processor_id() 0
19696 -#endif /* !CONFIG_SMP */
19698 -#define safe_smp_processor_id() smp_processor_id()
19700 -#ifdef CONFIG_X86_LOCAL_APIC
19701 -static __inline int logical_smp_processor_id(void)
19703 - /* we don't want to mark this access volatile - bad code generation */
19704 - return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
19707 -static inline int hard_smp_processor_id(void)
19709 - /* we don't want to mark this access volatile - bad code generation */
19710 - return GET_APIC_ID(*(u32 *)(APIC_BASE + APIC_ID));
19716 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/spinlock.h 2009-03-16 16:33:40.000000000 +0100
19717 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/spinlock.h 2009-03-16 16:38:05.000000000 +0100
19718 @@ -88,7 +88,7 @@ extern void xen_spin_kick(raw_spinlock_t
19722 -static inline int __raw_spin_trylock(raw_spinlock_t *lock)
19723 +static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
19727 @@ -107,7 +107,7 @@ static inline int __raw_spin_trylock(raw
19731 -static inline void __raw_spin_unlock(raw_spinlock_t *lock)
19732 +static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
19734 unsigned int token;
19735 unsigned char kick;
19736 @@ -155,7 +155,7 @@ static inline void __raw_spin_unlock(raw
19737 : "memory", "cc"); \
19740 -static inline int __raw_spin_trylock(raw_spinlock_t *lock)
19741 +static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
19745 @@ -177,7 +177,7 @@ static inline int __raw_spin_trylock(raw
19749 -static inline void __raw_spin_unlock(raw_spinlock_t *lock)
19750 +static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
19752 unsigned int token, tmp;
19754 @@ -197,19 +197,19 @@ static inline void __raw_spin_unlock(raw
19756 static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
19758 - int tmp = *(volatile signed int *)(&(lock)->slock);
19759 + int tmp = ACCESS_ONCE(lock->slock);
19761 return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
19764 static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
19766 - int tmp = *(volatile signed int *)(&(lock)->slock);
19767 + int tmp = ACCESS_ONCE(lock->slock);
19769 return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
19772 -static inline void __raw_spin_lock(raw_spinlock_t *lock)
19773 +static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
19775 unsigned int token, count;
19777 @@ -223,8 +223,8 @@ static inline void __raw_spin_lock(raw_s
19778 } while (unlikely(!count) && !xen_spin_wait(lock, token));
19781 -static inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
19782 - unsigned long flags)
19783 +static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
19784 + unsigned long flags)
19786 unsigned int token, count;
19788 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/swiotlb.h 2009-02-16 16:18:36.000000000 +0100
19789 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/swiotlb.h 2009-03-16 16:38:05.000000000 +0100
19791 -#ifdef CONFIG_X86_32
19792 -# include "swiotlb_32.h"
19794 -# include "../../swiotlb.h"
19796 +#ifndef _ASM_SWIOTLB_H
19798 +#include "../../swiotlb.h"
19800 +dma_addr_t swiotlb_map_single_phys(struct device *, phys_addr_t, size_t size,
19803 +#endif /* _ASM_SWIOTLB_H */
19804 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/swiotlb_32.h 2009-10-28 14:55:03.000000000 +0100
19805 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
19807 -#ifndef _ASM_SWIOTLB_H
19808 -#define _ASM_SWIOTLB_H 1
19810 -/* SWIOTLB interface */
19812 -extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr, size_t size,
19814 -extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
19815 - size_t size, int dir);
19816 -extern void swiotlb_sync_single_for_cpu(struct device *hwdev,
19817 - dma_addr_t dev_addr,
19818 - size_t size, int dir);
19819 -extern void swiotlb_sync_single_for_device(struct device *hwdev,
19820 - dma_addr_t dev_addr,
19821 - size_t size, int dir);
19822 -extern void swiotlb_sync_sg_for_cpu(struct device *hwdev,
19823 - struct scatterlist *sg, int nelems,
19825 -extern void swiotlb_sync_sg_for_device(struct device *hwdev,
19826 - struct scatterlist *sg, int nelems,
19828 -extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg,
19829 - int nents, int direction);
19830 -extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg,
19831 - int nents, int direction);
19832 -extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr);
19833 -#ifdef CONFIG_HIGHMEM
19834 -extern dma_addr_t swiotlb_map_page(struct device *hwdev, struct page *page,
19835 - unsigned long offset, size_t size,
19836 - enum dma_data_direction direction);
19837 -extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address,
19838 - size_t size, enum dma_data_direction direction);
19840 -extern int swiotlb_dma_supported(struct device *hwdev, u64 mask);
19841 -extern void swiotlb_init(void);
19843 -#ifdef CONFIG_SWIOTLB
19844 -extern int swiotlb;
19850 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/system.h 2009-03-16 16:33:40.000000000 +0100
19851 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/system.h 2009-03-16 16:38:05.000000000 +0100
19852 @@ -28,22 +28,44 @@ struct task_struct *__switch_to(struct t
19853 * Saving eflags is important. It switches not only IOPL between tasks,
19854 * it also protects other tasks from NT leaking through sysenter etc.
19856 -#define switch_to(prev, next, last) do { \
19857 - unsigned long esi, edi; \
19858 - asm volatile("pushfl\n\t" /* Save flags */ \
19859 - "pushl %%ebp\n\t" \
19860 - "movl %%esp,%0\n\t" /* save ESP */ \
19861 - "movl %5,%%esp\n\t" /* restore ESP */ \
19862 - "movl $1f,%1\n\t" /* save EIP */ \
19863 - "pushl %6\n\t" /* restore EIP */ \
19864 - "jmp __switch_to\n" \
19865 +#define switch_to(prev, next, last) \
19868 + * Context-switching clobbers all registers, so we clobber \
19869 + * them explicitly, via unused output variables. \
19870 + * (EAX and EBP is not listed because EBP is saved/restored \
19871 + * explicitly for wchan access and EAX is the return value of \
19872 + * __switch_to()) \
19874 + unsigned long ebx, ecx, edx, esi, edi; \
19876 + asm volatile("pushfl\n\t" /* save flags */ \
19877 + "pushl %%ebp\n\t" /* save EBP */ \
19878 + "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \
19879 + "movl %[next_sp],%%esp\n\t" /* restore ESP */ \
19880 + "movl $1f,%[prev_ip]\n\t" /* save EIP */ \
19881 + "pushl %[next_ip]\n\t" /* restore EIP */ \
19882 + "jmp __switch_to\n" /* regparm call */ \
19884 - "popl %%ebp\n\t" \
19886 - :"=m" (prev->thread.sp), "=m" (prev->thread.ip), \
19887 - "=a" (last), "=S" (esi), "=D" (edi) \
19888 - :"m" (next->thread.sp), "m" (next->thread.ip), \
19889 - "2" (prev), "d" (next)); \
19890 + "popl %%ebp\n\t" /* restore EBP */ \
19891 + "popfl\n" /* restore flags */ \
19893 + /* output parameters */ \
19894 + : [prev_sp] "=m" (prev->thread.sp), \
19895 + [prev_ip] "=m" (prev->thread.ip), \
19898 + /* clobbered output registers: */ \
19899 + "=b" (ebx), "=c" (ecx), "=d" (edx), \
19900 + "=S" (esi), "=D" (edi) \
19902 + /* input parameters: */ \
19903 + : [next_sp] "m" (next->thread.sp), \
19904 + [next_ip] "m" (next->thread.ip), \
19906 + /* regparm parameters for __switch_to(): */ \
19907 + [prev] "a" (prev), \
19908 + [next] "d" (next)); \
19912 @@ -123,30 +145,29 @@ extern void load_gs_index(unsigned);
19914 #define loadsegment(seg, value) \
19915 asm volatile("\n" \
19917 - "movl %k0,%%" #seg "\n" \
19919 - ".section .fixup,\"ax\"\n" \
19921 - "movl %k1, %%" #seg "\n\t" \
19924 - _ASM_EXTABLE(1b,3b) \
19925 - : :"r" (value), "r" (0))
19927 + "movl %k0,%%" #seg "\n" \
19929 + ".section .fixup,\"ax\"\n" \
19931 + "movl %k1, %%" #seg "\n\t" \
19934 + _ASM_EXTABLE(1b,3b) \
19935 + : :"r" (value), "r" (0))
19939 * Save a segment register away
19941 -#define savesegment(seg, value) \
19942 +#define savesegment(seg, value) \
19943 asm volatile("mov %%" #seg ",%0":"=rm" (value))
19945 static inline unsigned long get_limit(unsigned long segment)
19947 unsigned long __limit;
19948 - __asm__("lsll %1,%0"
19949 - :"=r" (__limit):"r" (segment));
19950 - return __limit+1;
19951 + asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
19952 + return __limit + 1;
19955 static inline void xen_clts(void)
19956 @@ -171,13 +192,13 @@ static unsigned long __force_order;
19957 static inline unsigned long xen_read_cr0(void)
19960 - asm volatile("mov %%cr0,%0\n\t" :"=r" (val), "=m" (__force_order));
19961 + asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order));
19965 static inline void xen_write_cr0(unsigned long val)
19967 - asm volatile("mov %0,%%cr0": :"r" (val), "m" (__force_order));
19968 + asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order));
19971 #define xen_read_cr2() (current_vcpu_info()->arch.cr2)
19972 @@ -186,7 +207,7 @@ static inline void xen_write_cr0(unsigne
19973 static inline unsigned long xen_read_cr3(void)
19976 - asm volatile("mov %%cr3,%0\n\t" :"=r" (val), "=m" (__force_order));
19977 + asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
19978 #ifdef CONFIG_X86_32
19979 return mfn_to_pfn(xen_cr3_to_pfn(val)) << PAGE_SHIFT;
19981 @@ -201,13 +222,13 @@ static inline void xen_write_cr3(unsigne
19983 val = phys_to_machine(val);
19985 - asm volatile("mov %0,%%cr3": :"r" (val), "m" (__force_order));
19986 + asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order));
19989 static inline unsigned long xen_read_cr4(void)
19992 - asm volatile("mov %%cr4,%0\n\t" :"=r" (val), "=m" (__force_order));
19993 + asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
19997 @@ -215,7 +236,7 @@ static inline unsigned long xen_read_cr4
19999 static inline void xen_write_cr4(unsigned long val)
20001 - asm volatile("mov %0,%%cr4": :"r" (val), "m" (__force_order));
20002 + asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
20005 #ifdef CONFIG_X86_64
20006 @@ -234,6 +255,7 @@ static inline void xen_wbinvd(void)
20008 asm volatile("wbinvd": : :"memory");
20011 #define read_cr0() (xen_read_cr0())
20012 #define write_cr0(x) (xen_write_cr0(x))
20013 #define read_cr2() (xen_read_cr2())
20014 @@ -260,7 +282,7 @@ static inline void clflush(volatile void
20015 asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
20018 -#define nop() __asm__ __volatile__ ("nop")
20019 +#define nop() asm volatile ("nop")
20021 void disable_hlt(void);
20022 void enable_hlt(void);
20023 @@ -280,16 +302,7 @@ void default_idle(void);
20025 #ifdef CONFIG_X86_32
20027 - * For now, "wmb()" doesn't actually do anything, as all
20028 - * Intel CPU's follow what Intel calls a *Processor Order*,
20029 - * in which all writes are seen in the program order even
20030 - * outside the CPU.
20032 - * I expect future Intel CPU's to have a weaker ordering,
20033 - * but I'd also expect them to finally get their act together
20034 - * and add some real memory barriers if so.
20036 - * Some non intel clones support out of order store. wmb() ceases to be a
20037 + * Some non-Intel clones support out of order store. wmb() ceases to be a
20040 #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
20041 @@ -368,7 +381,7 @@ void default_idle(void);
20042 # define smp_wmb() barrier()
20044 #define smp_read_barrier_depends() read_barrier_depends()
20045 -#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
20046 +#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
20048 #define smp_mb() barrier()
20049 #define smp_rmb() barrier()
20050 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/tlbflush.h 2009-03-16 16:33:40.000000000 +0100
20051 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/tlbflush.h 2009-03-16 16:38:05.000000000 +0100
20052 @@ -86,8 +86,7 @@ static inline void flush_tlb_range(struc
20053 #define TLBSTATE_LAZY 2
20055 #ifdef CONFIG_X86_32
20058 +struct tlb_state {
20059 struct mm_struct *active_mm;
20061 char __cacheline_padding[L1_CACHE_BYTES-8];
20062 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/vga.h 2009-10-28 14:55:03.000000000 +0100
20063 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/vga.h 2009-03-16 16:38:05.000000000 +0100
20065 * access the videoram directly without any black magic.
20068 -#define VGA_MAP_MEM(x,s) (unsigned long)isa_bus_to_virt(x)
20069 +#define VGA_MAP_MEM(x, s) (unsigned long)isa_bus_to_virt(x)
20071 #define vga_readb(x) (*(x))
20072 -#define vga_writeb(x,y) (*(y) = (x))
20073 +#define vga_writeb(x, y) (*(y) = (x))
20076 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/xor_64.h 2009-10-28 14:55:03.000000000 +0100
20077 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/xor_64.h 2009-03-16 16:38:05.000000000 +0100
20080 - * x86-64 changes / gcc fixes from Andi Kleen.
20081 + * x86-64 changes / gcc fixes from Andi Kleen.
20082 * Copyright 2002 Andi Kleen, SuSE Labs.
20084 * This hasn't been optimized for the hammer yet, but there are likely
20085 * no advantages to be gotten from x86-64 here anyways.
20088 -typedef struct { unsigned long a,b; } __attribute__((aligned(16))) xmm_store_t;
20090 + unsigned long a, b;
20091 +} __attribute__((aligned(16))) xmm_store_t;
20093 -/* Doesn't use gcc to save the XMM registers, because there is no easy way to
20094 +/* Doesn't use gcc to save the XMM registers, because there is no easy way to
20095 tell it to do a clts before the register saving. */
20096 -#define XMMS_SAVE do { \
20097 +#define XMMS_SAVE \
20099 preempt_disable(); \
20100 if (!(current_thread_info()->status & TS_USEDFPU)) \
20102 - __asm__ __volatile__ ( \
20104 "movups %%xmm0,(%1) ;\n\t" \
20105 "movups %%xmm1,0x10(%1) ;\n\t" \
20106 "movups %%xmm2,0x20(%1) ;\n\t" \
20107 @@ -22,10 +25,11 @@ typedef struct { unsigned long a,b; } __
20114 -#define XMMS_RESTORE do { \
20116 +#define XMMS_RESTORE \
20120 "movups (%1),%%xmm0 ;\n\t" \
20121 "movups 0x10(%1),%%xmm1 ;\n\t" \
20122 @@ -37,72 +41,72 @@ typedef struct { unsigned long a,b; } __
20123 if (!(current_thread_info()->status & TS_USEDFPU)) \
20125 preempt_enable(); \
20129 #define OFFS(x) "16*("#x")"
20130 #define PF_OFFS(x) "256+16*("#x")"
20131 #define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n"
20132 -#define LD(x,y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
20133 -#define ST(x,y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
20134 +#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
20135 +#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
20136 #define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n"
20137 #define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n"
20138 #define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n"
20139 #define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n"
20140 #define PF5(x) " prefetchnta "PF_OFFS(x)"(%[p6]) ;\n"
20141 -#define XO1(x,y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
20142 -#define XO2(x,y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
20143 -#define XO3(x,y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
20144 -#define XO4(x,y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
20145 -#define XO5(x,y) " xorps "OFFS(x)"(%[p6]), %%xmm"#y" ;\n"
20146 +#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
20147 +#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
20148 +#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
20149 +#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
20150 +#define XO5(x, y) " xorps "OFFS(x)"(%[p6]), %%xmm"#y" ;\n"
20154 xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
20156 - unsigned int lines = bytes >> 8;
20157 + unsigned int lines = bytes >> 8;
20159 xmm_store_t xmm_save[4];
20212 - " addq %[inc], %[p1] ;\n"
20213 - " addq %[inc], %[p2] ;\n"
20214 + " addq %[inc], %[p1] ;\n"
20215 + " addq %[inc], %[p2] ;\n"
20216 " decl %[cnt] ; jnz 1b"
20217 : [p1] "+r" (p1), [p2] "+r" (p2), [cnt] "+r" (lines)
20218 - : [inc] "r" (256UL)
20220 + : [inc] "r" (256UL)
20225 @@ -117,52 +121,52 @@ xor_sse_3(unsigned long bytes, unsigned
20229 - __asm__ __volatile__ (
20289 - " addq %[inc], %[p1] ;\n"
20290 - " addq %[inc], %[p2] ;\n"
20291 - " addq %[inc], %[p3] ;\n"
20292 + " addq %[inc], %[p1] ;\n"
20293 + " addq %[inc], %[p2] ;\n"
20294 + " addq %[inc], %[p3] ;\n"
20295 " decl %[cnt] ; jnz 1b"
20296 : [cnt] "+r" (lines),
20297 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
20298 : [inc] "r" (256UL)
20304 @@ -171,64 +175,64 @@ xor_sse_4(unsigned long bytes, unsigned
20305 unsigned long *p3, unsigned long *p4)
20307 unsigned int lines = bytes >> 8;
20308 - xmm_store_t xmm_save[4];
20309 + xmm_store_t xmm_save[4];
20314 - __asm__ __volatile__ (
20385 - " addq %[inc], %[p1] ;\n"
20386 - " addq %[inc], %[p2] ;\n"
20387 - " addq %[inc], %[p3] ;\n"
20388 - " addq %[inc], %[p4] ;\n"
20389 + " addq %[inc], %[p1] ;\n"
20390 + " addq %[inc], %[p2] ;\n"
20391 + " addq %[inc], %[p3] ;\n"
20392 + " addq %[inc], %[p4] ;\n"
20393 " decl %[cnt] ; jnz 1b"
20394 : [cnt] "+c" (lines),
20395 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
20396 : [inc] "r" (256UL)
20402 @@ -237,70 +241,70 @@ static void
20403 xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
20404 unsigned long *p3, unsigned long *p4, unsigned long *p5)
20406 - unsigned int lines = bytes >> 8;
20407 + unsigned int lines = bytes >> 8;
20408 xmm_store_t xmm_save[4];
20413 - __asm__ __volatile__ (
20495 - " addq %[inc], %[p1] ;\n"
20496 - " addq %[inc], %[p2] ;\n"
20497 - " addq %[inc], %[p3] ;\n"
20498 - " addq %[inc], %[p4] ;\n"
20499 - " addq %[inc], %[p5] ;\n"
20500 + " addq %[inc], %[p1] ;\n"
20501 + " addq %[inc], %[p2] ;\n"
20502 + " addq %[inc], %[p3] ;\n"
20503 + " addq %[inc], %[p4] ;\n"
20504 + " addq %[inc], %[p5] ;\n"
20505 " decl %[cnt] ; jnz 1b"
20506 : [cnt] "+c" (lines),
20507 - [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4),
20508 + [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4),
20510 : [inc] "r" (256UL)
20512 @@ -309,18 +313,18 @@ xor_sse_5(unsigned long bytes, unsigned
20515 static struct xor_block_template xor_block_sse = {
20516 - .name = "generic_sse",
20517 - .do_2 = xor_sse_2,
20518 - .do_3 = xor_sse_3,
20519 - .do_4 = xor_sse_4,
20520 - .do_5 = xor_sse_5,
20521 + .name = "generic_sse",
20522 + .do_2 = xor_sse_2,
20523 + .do_3 = xor_sse_3,
20524 + .do_4 = xor_sse_4,
20525 + .do_5 = xor_sse_5,
20528 #undef XOR_TRY_TEMPLATES
20529 -#define XOR_TRY_TEMPLATES \
20531 - xor_speed(&xor_block_sse); \
20533 +#define XOR_TRY_TEMPLATES \
20535 + xor_speed(&xor_block_sse); \
20538 /* We force the use of the SSE xor block because it can write around L2.
20539 We may also be able to load into the L1 only depending on how the cpu
20540 --- sle11-2009-10-16.orig/include/asm-x86/scatterlist.h 2009-10-28 14:55:03.000000000 +0100
20541 +++ sle11-2009-10-16/include/asm-x86/scatterlist.h 2009-03-16 16:38:05.000000000 +0100
20542 @@ -24,7 +24,7 @@ struct scatterlist {
20545 #define sg_dma_address(sg) ((sg)->dma_address)
20546 -#ifdef CONFIG_X86_32
20547 +#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
20548 # define sg_dma_len(sg) ((sg)->length)
20550 # define sg_dma_len(sg) ((sg)->dma_length)
20551 --- sle11-2009-10-16.orig/include/linux/page-flags.h 2009-03-16 16:33:40.000000000 +0100
20552 +++ sle11-2009-10-16/include/linux/page-flags.h 2009-03-16 16:38:05.000000000 +0100
20553 @@ -278,18 +278,25 @@ static inline void SetPageUptodate(struc
20555 CLEARPAGEFLAG(Uptodate, uptodate)
20557 -#define PageForeign(page) test_bit(PG_foreign, &(page)->flags)
20558 -#define SetPageForeign(_page, dtor) do { \
20559 - set_bit(PG_foreign, &(_page)->flags); \
20560 - BUG_ON((dtor) == (void (*)(struct page *, unsigned int))0); \
20561 - (_page)->index = (long)(dtor); \
20563 -#define ClearPageForeign(page) do { \
20564 - clear_bit(PG_foreign, &(page)->flags); \
20565 - (page)->index = 0; \
20567 -#define PageForeignDestructor(_page, order) \
20568 - ((void (*)(struct page *, unsigned int))(_page)->index)(_page, order)
20570 +TESTPAGEFLAG(Foreign, foreign)
20571 +static inline void SetPageForeign(struct page *page,
20572 + void (*dtor)(struct page *, unsigned int))
20575 + set_bit(PG_foreign, &page->flags);
20576 + page->index = (long)dtor;
20578 +static inline void ClearPageForeign(struct page *page)
20580 + clear_bit(PG_foreign, &page->flags);
20583 +static inline void PageForeignDestructor(struct page *page, unsigned int order)
20585 + ((void (*)(struct page *, unsigned int))page->index)(page, order);
20589 extern void cancel_dirty_page(struct page *page, unsigned int account_size);
20591 --- sle11-2009-10-16.orig/include/xen/balloon.h 2008-11-25 12:35:56.000000000 +0100
20592 +++ sle11-2009-10-16/include/xen/balloon.h 2009-03-16 16:38:05.000000000 +0100
20597 -#ifndef __ASM_BALLOON_H__
20598 -#define __ASM_BALLOON_H__
20599 +#ifndef __XEN_BALLOON_H__
20600 +#define __XEN_BALLOON_H__
20602 +#include <linux/spinlock.h>
20604 +#if !defined(CONFIG_PARAVIRT_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
20606 * Inform the balloon driver that it should allow some slop for device-driver
20607 * memory activities.
20608 @@ -53,5 +56,6 @@ void balloon_release_driver_page(struct
20609 extern spinlock_t balloon_lock;
20610 #define balloon_lock(__flags) spin_lock_irqsave(&balloon_lock, __flags)
20611 #define balloon_unlock(__flags) spin_unlock_irqrestore(&balloon_lock, __flags)
20614 -#endif /* __ASM_BALLOON_H__ */
20615 +#endif /* __XEN_BALLOON_H__ */
20616 --- sle11-2009-10-16.orig/include/xen/interface/grant_table.h 2008-11-25 12:22:34.000000000 +0100
20617 +++ sle11-2009-10-16/include/xen/interface/grant_table.h 2009-03-16 16:38:05.000000000 +0100
20618 @@ -193,6 +193,7 @@ struct gnttab_map_grant_ref {
20619 grant_handle_t handle;
20620 uint64_t dev_bus_addr;
20622 +DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref);
20623 typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t;
20624 DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t);
20626 @@ -216,6 +217,7 @@ struct gnttab_unmap_grant_ref {
20627 /* OUT parameters. */
20628 int16_t status; /* GNTST_* */
20630 +DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref);
20631 typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t;
20632 DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t);
20634 @@ -237,6 +239,7 @@ struct gnttab_setup_table {
20635 int16_t status; /* GNTST_* */
20636 XEN_GUEST_HANDLE(ulong) frame_list;
20638 +DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_setup_table);
20639 typedef struct gnttab_setup_table gnttab_setup_table_t;
20640 DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t);
20642 @@ -251,6 +254,7 @@ struct gnttab_dump_table {
20643 /* OUT parameters. */
20644 int16_t status; /* GNTST_* */
20646 +DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_dump_table);
20647 typedef struct gnttab_dump_table gnttab_dump_table_t;
20648 DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t);
20650 @@ -271,6 +275,7 @@ struct gnttab_transfer {
20651 /* OUT parameters. */
20654 +DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_transfer);
20655 typedef struct gnttab_transfer gnttab_transfer_t;
20656 DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t);
20658 @@ -314,6 +319,7 @@ typedef struct gnttab_copy {
20659 /* OUT parameters. */
20662 +DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_copy);
20663 DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t);
20666 @@ -332,6 +338,7 @@ struct gnttab_query_size {
20667 uint32_t max_nr_frames;
20668 int16_t status; /* GNTST_* */
20670 +DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_query_size);
20671 typedef struct gnttab_query_size gnttab_query_size_t;
20672 DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t);
20674 --- sle11-2009-10-16.orig/include/xen/interface/io/fbif.h 2008-11-25 12:35:56.000000000 +0100
20675 +++ sle11-2009-10-16/include/xen/interface/io/fbif.h 2009-03-16 16:38:05.000000000 +0100
20676 @@ -150,7 +150,12 @@ struct xenfb_page
20677 * framebuffer with a max resolution of 12,800x10,240. Should
20678 * be enough for a while with room leftover for expansion.
20680 +#ifndef CONFIG_PARAVIRT_XEN
20681 unsigned long pd[256];
20683 + /* Two directory pages should be enough for a while. */
20684 + unsigned long pd[2];
20689 --- sle11-2009-10-16.orig/include/xen/interface/memory.h 2009-02-16 16:17:21.000000000 +0100
20690 +++ sle11-2009-10-16/include/xen/interface/memory.h 2009-03-16 16:38:05.000000000 +0100
20691 @@ -62,7 +62,7 @@ struct xen_memory_reservation {
20692 * OUT: GMFN bases of extents that were allocated
20693 * (NB. This command also updates the mach_to_phys translation table)
20695 - XEN_GUEST_HANDLE(ulong) extent_start;
20696 + XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
20698 /* Number of extents, and size/alignment of each (2^extent_order pages). */
20699 xen_ulong_t nr_extents;
20700 @@ -82,7 +82,6 @@ struct xen_memory_reservation {
20704 -DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_memory_reservation);
20705 typedef struct xen_memory_reservation xen_memory_reservation_t;
20706 DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);
20708 @@ -168,7 +167,11 @@ struct xen_machphys_mfn_list {
20709 * any large discontiguities in the machine address space, 2MB gaps in
20710 * the machphys table will be represented by an MFN base of zero.
20712 +#ifndef CONFIG_PARAVIRT_XEN
20713 XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
20715 + ulong extent_start;
20719 * Number of extents written to the above array. This will be smaller
20720 @@ -176,7 +179,6 @@ struct xen_machphys_mfn_list {
20722 unsigned int nr_extents;
20724 -DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
20725 typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t;
20726 DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t);
20728 @@ -216,7 +218,6 @@ struct xen_add_to_physmap {
20729 /* GPFN where the source mapping page should appear. */
20732 -DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
20733 typedef struct xen_add_to_physmap xen_add_to_physmap_t;
20734 DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
20736 @@ -249,13 +250,21 @@ struct xen_translate_gpfn_list {
20737 xen_ulong_t nr_gpfns;
20739 /* List of GPFNs to translate. */
20740 +#ifndef CONFIG_PARAVIRT_XEN
20741 XEN_GUEST_HANDLE(xen_pfn_t) gpfn_list;
20747 * Output list to contain MFN translations. May be the same as the input
20748 * list (in which case each input GPFN is overwritten with the output MFN).
20750 +#ifndef CONFIG_PARAVIRT_XEN
20751 XEN_GUEST_HANDLE(xen_pfn_t) mfn_list;
20756 DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
20757 typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t;
20758 --- sle11-2009-10-16.orig/include/xen/interface/vcpu.h 2008-11-25 12:35:56.000000000 +0100
20759 +++ sle11-2009-10-16/include/xen/interface/vcpu.h 2009-03-16 16:38:05.000000000 +0100
20760 @@ -85,6 +85,7 @@ struct vcpu_runstate_info {
20764 +DEFINE_XEN_GUEST_HANDLE_STRUCT(vcpu_runstate_info);
20765 typedef struct vcpu_runstate_info vcpu_runstate_info_t;
20766 DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_t);
20768 @@ -140,6 +141,7 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_register_ru
20769 struct vcpu_set_periodic_timer {
20770 uint64_t period_ns;
20772 +DEFINE_XEN_GUEST_HANDLE_STRUCT(vcpu_set_periodic_timer);
20773 typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t;
20774 DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t);
20776 @@ -153,6 +155,7 @@ struct vcpu_set_singleshot_timer {
20777 uint64_t timeout_abs_ns; /* Absolute system time value in nanoseconds. */
20778 uint32_t flags; /* VCPU_SSHOTTMR_??? */
20780 +DEFINE_XEN_GUEST_HANDLE_STRUCT(vcpu_set_singleshot_timer);
20781 typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t;
20782 DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t);
20784 @@ -176,6 +179,7 @@ struct vcpu_register_vcpu_info {
20785 uint32_t offset; /* offset within page */
20786 uint32_t rsvd; /* unused */
20788 +DEFINE_XEN_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info);
20789 typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t;
20790 DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t);
20792 --- sle11-2009-10-16.orig/lib/swiotlb-xen.c 2009-03-16 16:33:40.000000000 +0100
20793 +++ sle11-2009-10-16/lib/swiotlb-xen.c 2009-03-16 16:38:05.000000000 +0100
20795 #include <linux/ctype.h>
20796 #include <linux/init.h>
20797 #include <linux/bootmem.h>
20798 +#include <linux/iommu-helper.h>
20799 #include <linux/highmem.h>
20800 #include <asm/io.h>
20801 #include <asm/pci.h>
20802 @@ -288,15 +289,6 @@ __sync_single(struct phys_addr buffer, c
20806 -static inline unsigned int is_span_boundary(unsigned int index,
20807 - unsigned int nslots,
20808 - unsigned long offset_slots,
20809 - unsigned long max_slots)
20811 - unsigned long offset = (offset_slots + index) & (max_slots - 1);
20812 - return offset + nslots > max_slots;
20816 * Allocates bounce buffer and returns its kernel virtual address.
20818 @@ -335,61 +327,53 @@ map_single(struct device *hwdev, struct
20819 * request and allocate a buffer from that IO TLB pool.
20821 spin_lock_irqsave(&io_tlb_lock, flags);
20823 - index = ALIGN(io_tlb_index, stride);
20824 - if (index >= iotlb_nslabs)
20827 + index = ALIGN(io_tlb_index, stride);
20828 + if (index >= iotlb_nslabs)
20833 - while (is_span_boundary(index, nslots, offset_slots,
20836 - if (index >= iotlb_nslabs)
20838 - if (index == wrap)
20842 + while (iommu_is_span_boundary(index, nslots, offset_slots,
20845 + if (index >= iotlb_nslabs)
20847 + if (index == wrap)
20852 + * If we find a slot that indicates we have 'nslots' number of
20853 + * contiguous buffers, we allocate the buffers from that slot
20854 + * and mark the entries as '0' indicating unavailable.
20856 + if (io_tlb_list[index] >= nslots) {
20859 + for (i = index; i < (int) (index + nslots); i++)
20860 + io_tlb_list[i] = 0;
20861 + for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
20862 + io_tlb_list[i] = ++count;
20863 + dma_addr = iotlb_virt_start + (index << IO_TLB_SHIFT);
20866 - * If we find a slot that indicates we have 'nslots'
20867 - * number of contiguous buffers, we allocate the
20868 - * buffers from that slot and mark the entries as '0'
20869 - * indicating unavailable.
20870 + * Update the indices to avoid searching in the next
20873 - if (io_tlb_list[index] >= nslots) {
20876 - for (i = index; i < (int)(index + nslots); i++)
20877 - io_tlb_list[i] = 0;
20878 - for (i = index - 1;
20879 - (OFFSET(i, IO_TLB_SEGSIZE) !=
20880 - IO_TLB_SEGSIZE -1) && io_tlb_list[i];
20882 - io_tlb_list[i] = ++count;
20883 - dma_addr = iotlb_virt_start +
20884 - (index << IO_TLB_SHIFT);
20887 - * Update the indices to avoid searching in
20888 - * the next round.
20891 - ((index + nslots) < iotlb_nslabs
20892 - ? (index + nslots) : 0);
20893 + io_tlb_index = ((index + nslots) < iotlb_nslabs
20894 + ? (index + nslots) : 0);
20899 - if (index >= iotlb_nslabs)
20901 - } while (index != wrap);
20905 + if (index >= iotlb_nslabs)
20907 + } while (index != wrap);
20910 - spin_unlock_irqrestore(&io_tlb_lock, flags);
20915 + spin_unlock_irqrestore(&io_tlb_lock, flags);
20918 spin_unlock_irqrestore(&io_tlb_lock, flags);
20921 @@ -502,11 +486,13 @@ swiotlb_full(struct device *dev, size_t
20922 * Once the device is given the dma address, the device owns this memory until
20923 * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
20926 -swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
20928 - dma_addr_t dev_addr = gnttab_dma_map_page(virt_to_page(ptr)) +
20929 - offset_in_page(ptr);
20931 +_swiotlb_map_single(struct device *hwdev, phys_addr_t paddr, size_t size,
20932 + int dir, struct dma_attrs *attrs)
20934 + struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
20935 + dma_addr_t dev_addr = gnttab_dma_map_page(page) +
20936 + offset_in_page(paddr);
20938 struct phys_addr buffer;
20940 @@ -517,7 +503,7 @@ swiotlb_map_single(struct device *hwdev,
20941 * we can safely return the device addr and not worry about bounce
20944 - if (!range_straddles_page_boundary(__pa(ptr), size) &&
20945 + if (!range_straddles_page_boundary(paddr, size) &&
20946 !address_needs_mapping(hwdev, dev_addr))
20949 @@ -525,8 +511,8 @@ swiotlb_map_single(struct device *hwdev,
20950 * Oh well, have to allocate and map a bounce buffer.
20952 gnttab_dma_unmap_page(dev_addr);
20953 - buffer.page = virt_to_page(ptr);
20954 - buffer.offset = (unsigned long)ptr & ~PAGE_MASK;
20955 + buffer.page = page;
20956 + buffer.offset = offset_in_page(paddr);
20957 map = map_single(hwdev, buffer, size, dir);
20959 swiotlb_full(hwdev, size, dir, 1);
20960 @@ -537,6 +523,26 @@ swiotlb_map_single(struct device *hwdev,
20965 +swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
20966 + int dir, struct dma_attrs *attrs)
20968 + return _swiotlb_map_single(hwdev, virt_to_phys(ptr), size, dir, attrs);
20970 +EXPORT_SYMBOL(swiotlb_map_single_attrs);
20973 +swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
20975 + return _swiotlb_map_single(hwdev, virt_to_phys(ptr), size, dir, NULL);
20979 +swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)
20981 + return _swiotlb_map_single(hwdev, paddr, size, dir, NULL);
20985 * Unmap a single streaming mode DMA translation. The dma_addr and size must
20986 * match what was provided for in a previous swiotlb_map_single call. All
20987 @@ -546,8 +552,8 @@ swiotlb_map_single(struct device *hwdev,
20988 * whatever the device wrote there.
20991 -swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
20993 +swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
20994 + size_t size, int dir, struct dma_attrs *attrs)
20996 BUG_ON(dir == DMA_NONE);
20997 if (in_swiotlb_aperture(dev_addr))
20998 @@ -555,7 +561,14 @@ swiotlb_unmap_single(struct device *hwde
21000 gnttab_dma_unmap_page(dev_addr);
21002 +EXPORT_SYMBOL(swiotlb_unmap_single_attrs);
21005 +swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
21008 + return swiotlb_unmap_single_attrs(hwdev, dev_addr, size, dir, NULL);
21011 * Make physical memory consistent for a single streaming mode DMA translation
21012 * after a transfer.
21013 @@ -584,6 +597,26 @@ swiotlb_sync_single_for_device(struct de
21014 sync_single(hwdev, bus_to_virt(dev_addr), size, dir);
21018 +swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
21019 + unsigned long offset, size_t size, int dir)
21021 + BUG_ON(dir == DMA_NONE);
21022 + if (in_swiotlb_aperture(dev_addr))
21023 + sync_single(hwdev, bus_to_virt(dev_addr + offset), size, dir);
21027 +swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
21028 + unsigned long offset, size_t size, int dir)
21030 + BUG_ON(dir == DMA_NONE);
21031 + if (in_swiotlb_aperture(dev_addr))
21032 + sync_single(hwdev, bus_to_virt(dev_addr + offset), size, dir);
21035 +void swiotlb_unmap_sg_attrs(struct device *, struct scatterlist *, int, int,
21036 + struct dma_attrs *);
21038 * Map a set of buffers described by scatterlist in streaming mode for DMA.
21039 * This is the scatter-gather version of the above swiotlb_map_single
21040 @@ -601,8 +634,8 @@ swiotlb_sync_single_for_device(struct de
21044 -swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
21046 +swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
21047 + int dir, struct dma_attrs *attrs)
21049 struct scatterlist *sg;
21050 struct phys_addr buffer;
21051 @@ -626,7 +659,8 @@ swiotlb_map_sg(struct device *hwdev, str
21052 /* Don't panic here, we expect map_sg users
21053 to do proper error handling. */
21054 swiotlb_full(hwdev, sg->length, dir, 0);
21055 - swiotlb_unmap_sg(hwdev, sgl, i, dir);
21056 + swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
21058 sgl[0].dma_length = 0;
21061 @@ -637,14 +671,22 @@ swiotlb_map_sg(struct device *hwdev, str
21065 +EXPORT_SYMBOL(swiotlb_map_sg_attrs);
21068 +swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
21071 + return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
21075 * Unmap a set of streaming mode DMA translations. Again, cpu read rules
21076 * concerning calls here are the same as for swiotlb_unmap_single() above.
21079 -swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
21081 +swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
21082 + int nelems, int dir, struct dma_attrs *attrs)
21084 struct scatterlist *sg;
21086 @@ -659,6 +701,14 @@ swiotlb_unmap_sg(struct device *hwdev, s
21087 gnttab_dma_unmap_page(sg->dma_address);
21090 +EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
21093 +swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
21096 + return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
21100 * Make physical memory consistent for a set of streaming mode DMA translations
21101 @@ -699,46 +749,6 @@ swiotlb_sync_sg_for_device(struct device
21105 -#ifdef CONFIG_HIGHMEM
21108 -swiotlb_map_page(struct device *hwdev, struct page *page,
21109 - unsigned long offset, size_t size,
21110 - enum dma_data_direction direction)
21112 - struct phys_addr buffer;
21113 - dma_addr_t dev_addr;
21116 - dev_addr = gnttab_dma_map_page(page) + offset;
21117 - if (address_needs_mapping(hwdev, dev_addr)) {
21118 - gnttab_dma_unmap_page(dev_addr);
21119 - buffer.page = page;
21120 - buffer.offset = offset;
21121 - map = map_single(hwdev, buffer, size, direction);
21123 - swiotlb_full(hwdev, size, direction, 1);
21124 - map = io_tlb_overflow_buffer;
21126 - dev_addr = (dma_addr_t)virt_to_bus(map);
21133 -swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address,
21134 - size_t size, enum dma_data_direction direction)
21136 - BUG_ON(direction == DMA_NONE);
21137 - if (in_swiotlb_aperture(dma_address))
21138 - unmap_single(hwdev, bus_to_virt(dma_address), size, direction);
21140 - gnttab_dma_unmap_page(dma_address);
21146 swiotlb_dma_mapping_error(dma_addr_t dma_addr)