]>
Commit | Line | Data |
---|---|---|
cc90b958 BS |
1 | From: www.kernel.org |
2 | Subject: Linux 2.6.20 | |
3 | Patch-mainline: 2.6.20 | |
4 | ||
5 | Automatically created from "patches.kernel.org/patch-2.6.20" by xen-port-patches.py | |
6 | ||
7 | Acked-by: jbeulich@novell.com | |
8 | ||
9 | Index: head-2008-12-01/arch/x86/Kconfig | |
10 | =================================================================== | |
11 | --- head-2008-12-01.orig/arch/x86/Kconfig 2008-12-01 11:29:05.000000000 +0100 | |
12 | +++ head-2008-12-01/arch/x86/Kconfig 2008-12-01 11:32:38.000000000 +0100 | |
13 | @@ -1427,7 +1427,7 @@ config PHYSICAL_START | |
14 | ||
15 | config RELOCATABLE | |
16 | bool "Build a relocatable kernel (EXPERIMENTAL)" | |
17 | - depends on EXPERIMENTAL | |
18 | + depends on EXPERIMENTAL && !X86_XEN | |
19 | help | |
20 | This builds a kernel image that retains relocation information | |
21 | so it can be loaded someplace besides the default 1MB. | |
22 | Index: head-2008-12-01/arch/x86/kernel/asm-offsets_32.c | |
23 | =================================================================== | |
24 | --- head-2008-12-01.orig/arch/x86/kernel/asm-offsets_32.c 2008-12-01 11:21:02.000000000 +0100 | |
25 | +++ head-2008-12-01/arch/x86/kernel/asm-offsets_32.c 2008-12-01 11:32:38.000000000 +0100 | |
26 | @@ -54,6 +54,7 @@ void foo(void) | |
27 | OFFSET(TI_exec_domain, thread_info, exec_domain); | |
28 | OFFSET(TI_flags, thread_info, flags); | |
29 | OFFSET(TI_status, thread_info, status); | |
30 | + OFFSET(TI_cpu, thread_info, cpu); | |
31 | OFFSET(TI_preempt_count, thread_info, preempt_count); | |
32 | OFFSET(TI_addr_limit, thread_info, addr_limit); | |
33 | OFFSET(TI_restart_block, thread_info, restart_block); | |
34 | @@ -108,6 +109,11 @@ void foo(void) | |
35 | ||
36 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); | |
37 | ||
38 | +#ifdef CONFIG_XEN | |
39 | + BLANK(); | |
40 | + OFFSET(XEN_START_mfn_list, start_info, mfn_list); | |
41 | +#endif | |
42 | + | |
43 | #ifdef CONFIG_PARAVIRT | |
44 | BLANK(); | |
45 | OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); | |
46 | Index: head-2008-12-01/arch/x86/kernel/cpu/common-xen.c | |
47 | =================================================================== | |
48 | --- head-2008-12-01.orig/arch/x86/kernel/cpu/common-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
49 | +++ head-2008-12-01/arch/x86/kernel/cpu/common-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
50 | @@ -22,6 +22,7 @@ | |
51 | #define phys_pkg_id(a,b) a | |
52 | #endif | |
53 | #endif | |
54 | +#include <asm/pda.h> | |
55 | #include <asm/hypervisor.h> | |
56 | ||
57 | #include "cpu.h" | |
58 | @@ -29,10 +30,8 @@ | |
59 | DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); | |
60 | EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); | |
61 | ||
62 | -#ifndef CONFIG_XEN | |
63 | -DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); | |
64 | -EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); | |
65 | -#endif | |
66 | +struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly; | |
67 | +EXPORT_SYMBOL(_cpu_pda); | |
68 | ||
69 | static int cachesize_override __cpuinitdata = -1; | |
70 | static int disable_x86_fxsr __cpuinitdata; | |
71 | @@ -60,7 +59,7 @@ static struct cpu_dev __cpuinitdata defa | |
72 | .c_init = default_init, | |
73 | .c_vendor = "Unknown", | |
74 | }; | |
75 | -static struct cpu_dev * this_cpu = &default_cpu; | |
76 | +static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu; | |
77 | ||
78 | static int __init cachesize_setup(char *str) | |
79 | { | |
80 | @@ -242,29 +241,14 @@ static int __cpuinit have_cpuid_p(void) | |
81 | return flag_is_changeable_p(X86_EFLAGS_ID); | |
82 | } | |
83 | ||
84 | -/* Do minimum CPU detection early. | |
85 | - Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. | |
86 | - The others are not touched to avoid unwanted side effects. | |
87 | - | |
88 | - WARNING: this function is only called on the BP. Don't add code here | |
89 | - that is supposed to run on all CPUs. */ | |
90 | -static void __init early_cpu_detect(void) | |
91 | +void __init cpu_detect(struct cpuinfo_x86 *c) | |
92 | { | |
93 | - struct cpuinfo_x86 *c = &boot_cpu_data; | |
94 | - | |
95 | - c->x86_cache_alignment = 32; | |
96 | - | |
97 | - if (!have_cpuid_p()) | |
98 | - return; | |
99 | - | |
100 | /* Get vendor name */ | |
101 | cpuid(0x00000000, &c->cpuid_level, | |
102 | (int *)&c->x86_vendor_id[0], | |
103 | (int *)&c->x86_vendor_id[8], | |
104 | (int *)&c->x86_vendor_id[4]); | |
105 | ||
106 | - get_cpu_vendor(c, 1); | |
107 | - | |
108 | c->x86 = 4; | |
109 | if (c->cpuid_level >= 0x00000001) { | |
110 | u32 junk, tfms, cap0, misc; | |
111 | @@ -281,6 +265,26 @@ static void __init early_cpu_detect(void | |
112 | } | |
113 | } | |
114 | ||
115 | +/* Do minimum CPU detection early. | |
116 | + Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. | |
117 | + The others are not touched to avoid unwanted side effects. | |
118 | + | |
119 | + WARNING: this function is only called on the BP. Don't add code here | |
120 | + that is supposed to run on all CPUs. */ | |
121 | +static void __init early_cpu_detect(void) | |
122 | +{ | |
123 | + struct cpuinfo_x86 *c = &boot_cpu_data; | |
124 | + | |
125 | + c->x86_cache_alignment = 32; | |
126 | + | |
127 | + if (!have_cpuid_p()) | |
128 | + return; | |
129 | + | |
130 | + cpu_detect(c); | |
131 | + | |
132 | + get_cpu_vendor(c, 1); | |
133 | +} | |
134 | + | |
135 | static void __cpuinit generic_identify(struct cpuinfo_x86 * c) | |
136 | { | |
137 | u32 tfms, xlvl; | |
138 | @@ -315,6 +319,8 @@ static void __cpuinit generic_identify(s | |
139 | #else | |
140 | c->apicid = (ebx >> 24) & 0xFF; | |
141 | #endif | |
142 | + if (c->x86_capability[0] & (1<<19)) | |
143 | + c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; | |
144 | } else { | |
145 | /* Have CPUID level 0 only - unheard of */ | |
146 | c->x86 = 4; | |
147 | @@ -379,6 +385,7 @@ void __cpuinit identify_cpu(struct cpuin | |
148 | c->x86_vendor_id[0] = '\0'; /* Unset */ | |
149 | c->x86_model_id[0] = '\0'; /* Unset */ | |
150 | c->x86_max_cores = 1; | |
151 | + c->x86_clflush_size = 32; | |
152 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | |
153 | ||
154 | if (!have_cpuid_p()) { | |
155 | @@ -599,61 +606,23 @@ void __init early_cpu_init(void) | |
156 | #endif | |
157 | } | |
158 | ||
159 | -static void __cpuinit cpu_gdt_init(const struct Xgt_desc_struct *gdt_descr) | |
160 | +/* Make sure %gs is initialized properly in idle threads */ | |
161 | +struct pt_regs * __devinit idle_regs(struct pt_regs *regs) | |
162 | { | |
163 | - unsigned long frames[16]; | |
164 | - unsigned long va; | |
165 | - int f; | |
166 | - | |
167 | - for (va = gdt_descr->address, f = 0; | |
168 | - va < gdt_descr->address + gdt_descr->size; | |
169 | - va += PAGE_SIZE, f++) { | |
170 | - frames[f] = virt_to_mfn(va); | |
171 | - make_lowmem_page_readonly( | |
172 | - (void *)va, XENFEAT_writable_descriptor_tables); | |
173 | - } | |
174 | - if (HYPERVISOR_set_gdt(frames, (gdt_descr->size + 1) / 8)) | |
175 | - BUG(); | |
176 | + memset(regs, 0, sizeof(struct pt_regs)); | |
177 | + regs->xgs = __KERNEL_PDA; | |
178 | + return regs; | |
179 | } | |
180 | ||
181 | -/* | |
182 | - * cpu_init() initializes state that is per-CPU. Some data is already | |
183 | - * initialized (naturally) in the bootstrap process, such as the GDT | |
184 | - * and IDT. We reload them nevertheless, this function acts as a | |
185 | - * 'CPU state barrier', nothing should get across. | |
186 | - */ | |
187 | -void __cpuinit cpu_init(void) | |
188 | +static __cpuinit int alloc_gdt(int cpu) | |
189 | { | |
190 | - int cpu = smp_processor_id(); | |
191 | -#ifndef CONFIG_X86_NO_TSS | |
192 | - struct tss_struct * t = &per_cpu(init_tss, cpu); | |
193 | -#endif | |
194 | - struct thread_struct *thread = ¤t->thread; | |
195 | - struct desc_struct *gdt; | |
196 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | |
197 | + struct desc_struct *gdt; | |
198 | + struct i386_pda *pda; | |
199 | ||
200 | - if (cpu_test_and_set(cpu, cpu_initialized)) { | |
201 | - printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); | |
202 | - for (;;) local_irq_enable(); | |
203 | - } | |
204 | - printk(KERN_INFO "Initializing CPU#%d\n", cpu); | |
205 | - | |
206 | - if (cpu_has_vme || cpu_has_de) | |
207 | - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | |
208 | - if (tsc_disable && cpu_has_tsc) { | |
209 | - printk(KERN_NOTICE "Disabling TSC...\n"); | |
210 | - /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ | |
211 | - clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); | |
212 | - set_in_cr4(X86_CR4_TSD); | |
213 | - } | |
214 | + gdt = (struct desc_struct *)cpu_gdt_descr->address; | |
215 | + pda = cpu_pda(cpu); | |
216 | ||
217 | -#ifndef CONFIG_XEN | |
218 | - /* The CPU hotplug case */ | |
219 | - if (cpu_gdt_descr->address) { | |
220 | - gdt = (struct desc_struct *)cpu_gdt_descr->address; | |
221 | - memset(gdt, 0, PAGE_SIZE); | |
222 | - goto old_gdt; | |
223 | - } | |
224 | /* | |
225 | * This is a horrible hack to allocate the GDT. The problem | |
226 | * is that cpu_init() is called really early for the boot CPU | |
227 | @@ -661,54 +630,141 @@ void __cpuinit cpu_init(void) | |
228 | * CPUs, when bootmem will have gone away | |
229 | */ | |
230 | if (NODE_DATA(0)->bdata->node_bootmem_map) { | |
231 | - gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); | |
232 | - /* alloc_bootmem_pages panics on failure, so no check */ | |
233 | + BUG_ON(gdt != NULL || pda != NULL); | |
234 | + | |
235 | + gdt = alloc_bootmem_pages(PAGE_SIZE); | |
236 | + pda = alloc_bootmem(sizeof(*pda)); | |
237 | + /* alloc_bootmem(_pages) panics on failure, so no check */ | |
238 | + | |
239 | memset(gdt, 0, PAGE_SIZE); | |
240 | + memset(pda, 0, sizeof(*pda)); | |
241 | } else { | |
242 | - gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); | |
243 | - if (unlikely(!gdt)) { | |
244 | - printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); | |
245 | - for (;;) | |
246 | - local_irq_enable(); | |
247 | + /* GDT and PDA might already have been allocated if | |
248 | + this is a CPU hotplug re-insertion. */ | |
249 | + if (gdt == NULL) | |
250 | + gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); | |
251 | + | |
252 | + if (pda == NULL) | |
253 | + pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu)); | |
254 | + | |
255 | + if (unlikely(!gdt || !pda)) { | |
256 | + free_pages((unsigned long)gdt, 0); | |
257 | + kfree(pda); | |
258 | + return 0; | |
259 | } | |
260 | } | |
261 | -old_gdt: | |
262 | + | |
263 | + cpu_gdt_descr->address = (unsigned long)gdt; | |
264 | + cpu_pda(cpu) = pda; | |
265 | + | |
266 | + return 1; | |
267 | +} | |
268 | + | |
269 | +/* Initial PDA used by boot CPU */ | |
270 | +struct i386_pda boot_pda = { | |
271 | + ._pda = &boot_pda, | |
272 | + .cpu_number = 0, | |
273 | + .pcurrent = &init_task, | |
274 | +}; | |
275 | + | |
276 | +static inline void set_kernel_gs(void) | |
277 | +{ | |
278 | + /* Set %gs for this CPU's PDA. Memory clobber is to create a | |
279 | + barrier with respect to any PDA operations, so the compiler | |
280 | + doesn't move any before here. */ | |
281 | + asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory"); | |
282 | +} | |
283 | + | |
284 | +/* Initialize the CPU's GDT and PDA. The boot CPU does this for | |
285 | + itself, but secondaries find this done for them. */ | |
286 | +__cpuinit int init_gdt(int cpu, struct task_struct *idle) | |
287 | +{ | |
288 | + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | |
289 | + struct desc_struct *gdt; | |
290 | + struct i386_pda *pda; | |
291 | + | |
292 | + /* For non-boot CPUs, the GDT and PDA should already have been | |
293 | + allocated. */ | |
294 | + if (!alloc_gdt(cpu)) { | |
295 | + printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu); | |
296 | + return 0; | |
297 | + } | |
298 | + | |
299 | + gdt = (struct desc_struct *)cpu_gdt_descr->address; | |
300 | + pda = cpu_pda(cpu); | |
301 | + | |
302 | + BUG_ON(gdt == NULL || pda == NULL); | |
303 | + | |
304 | /* | |
305 | * Initialize the per-CPU GDT with the boot GDT, | |
306 | * and set up the GDT descriptor: | |
307 | */ | |
308 | memcpy(gdt, cpu_gdt_table, GDT_SIZE); | |
309 | + cpu_gdt_descr->size = GDT_SIZE - 1; | |
310 | ||
311 | - /* Set up GDT entry for 16bit stack */ | |
312 | - *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |= | |
313 | - ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) | | |
314 | - ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | | |
315 | - (CPU_16BIT_STACK_SIZE - 1); | |
316 | + pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a, | |
317 | + (u32 *)&gdt[GDT_ENTRY_PDA].b, | |
318 | + (unsigned long)pda, sizeof(*pda) - 1, | |
319 | + 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */ | |
320 | + | |
321 | + memset(pda, 0, sizeof(*pda)); | |
322 | + pda->_pda = pda; | |
323 | + pda->cpu_number = cpu; | |
324 | + pda->pcurrent = idle; | |
325 | ||
326 | - cpu_gdt_descr->size = GDT_SIZE - 1; | |
327 | - cpu_gdt_descr->address = (unsigned long)gdt; | |
328 | -#else | |
329 | - if (cpu == 0 && cpu_gdt_descr->address == 0) { | |
330 | - gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); | |
331 | - /* alloc_bootmem_pages panics on failure, so no check */ | |
332 | - memset(gdt, 0, PAGE_SIZE); | |
333 | + return 1; | |
334 | +} | |
335 | ||
336 | - memcpy(gdt, cpu_gdt_table, GDT_SIZE); | |
337 | - | |
338 | - cpu_gdt_descr->size = GDT_SIZE; | |
339 | - cpu_gdt_descr->address = (unsigned long)gdt; | |
340 | +void __cpuinit cpu_set_gdt(int cpu) | |
341 | +{ | |
342 | + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | |
343 | + unsigned long va, frames[16]; | |
344 | + int f; | |
345 | + | |
346 | + for (va = cpu_gdt_descr->address, f = 0; | |
347 | + va < cpu_gdt_descr->address + cpu_gdt_descr->size; | |
348 | + va += PAGE_SIZE, f++) { | |
349 | + frames[f] = virt_to_mfn(va); | |
350 | + make_lowmem_page_readonly( | |
351 | + (void *)va, XENFEAT_writable_descriptor_tables); | |
352 | } | |
353 | + BUG_ON(HYPERVISOR_set_gdt(frames, (cpu_gdt_descr->size + 1) / 8)); | |
354 | + | |
355 | + set_kernel_gs(); | |
356 | +} | |
357 | + | |
358 | +/* Common CPU init for both boot and secondary CPUs */ | |
359 | +static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) | |
360 | +{ | |
361 | +#ifndef CONFIG_X86_NO_TSS | |
362 | + struct tss_struct * t = &per_cpu(init_tss, cpu); | |
363 | #endif | |
364 | + struct thread_struct *thread = &curr->thread; | |
365 | + | |
366 | + if (cpu_test_and_set(cpu, cpu_initialized)) { | |
367 | + printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); | |
368 | + for (;;) local_irq_enable(); | |
369 | + } | |
370 | ||
371 | - cpu_gdt_init(cpu_gdt_descr); | |
372 | + printk(KERN_INFO "Initializing CPU#%d\n", cpu); | |
373 | + | |
374 | + if (cpu_has_vme || cpu_has_de) | |
375 | + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | |
376 | + if (tsc_disable && cpu_has_tsc) { | |
377 | + printk(KERN_NOTICE "Disabling TSC...\n"); | |
378 | + /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ | |
379 | + clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); | |
380 | + set_in_cr4(X86_CR4_TSD); | |
381 | + } | |
382 | ||
383 | /* | |
384 | * Set up and load the per-CPU TSS and LDT | |
385 | */ | |
386 | atomic_inc(&init_mm.mm_count); | |
387 | - current->active_mm = &init_mm; | |
388 | - BUG_ON(current->mm); | |
389 | - enter_lazy_tlb(&init_mm, current); | |
390 | + curr->active_mm = &init_mm; | |
391 | + if (curr->mm) | |
392 | + BUG(); | |
393 | + enter_lazy_tlb(&init_mm, curr); | |
394 | ||
395 | load_esp0(t, thread); | |
396 | ||
397 | @@ -719,8 +775,8 @@ old_gdt: | |
398 | __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); | |
399 | #endif | |
400 | ||
401 | - /* Clear %fs and %gs. */ | |
402 | - asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0)); | |
403 | + /* Clear %fs. */ | |
404 | + asm volatile ("mov %0, %%fs" : : "r" (0)); | |
405 | ||
406 | /* Clear all 6 debug registers: */ | |
407 | set_debugreg(0, 0); | |
408 | @@ -738,6 +794,38 @@ old_gdt: | |
409 | mxcsr_feature_mask_init(); | |
410 | } | |
411 | ||
412 | +/* Entrypoint to initialize secondary CPU */ | |
413 | +void __cpuinit secondary_cpu_init(void) | |
414 | +{ | |
415 | + int cpu = smp_processor_id(); | |
416 | + struct task_struct *curr = current; | |
417 | + | |
418 | + _cpu_init(cpu, curr); | |
419 | +} | |
420 | + | |
421 | +/* | |
422 | + * cpu_init() initializes state that is per-CPU. Some data is already | |
423 | + * initialized (naturally) in the bootstrap process, such as the GDT | |
424 | + * and IDT. We reload them nevertheless, this function acts as a | |
425 | + * 'CPU state barrier', nothing should get across. | |
426 | + */ | |
427 | +void __cpuinit cpu_init(void) | |
428 | +{ | |
429 | + int cpu = smp_processor_id(); | |
430 | + struct task_struct *curr = current; | |
431 | + | |
432 | + /* Set up the real GDT and PDA, so we can transition from the | |
433 | + boot versions. */ | |
434 | + if (!init_gdt(cpu, curr)) { | |
435 | + /* failed to allocate something; not much we can do... */ | |
436 | + for (;;) | |
437 | + local_irq_enable(); | |
438 | + } | |
439 | + | |
440 | + cpu_set_gdt(cpu); | |
441 | + _cpu_init(cpu, curr); | |
442 | +} | |
443 | + | |
444 | #ifdef CONFIG_HOTPLUG_CPU | |
445 | void __cpuinit cpu_uninit(void) | |
446 | { | |
447 | Index: head-2008-12-01/arch/x86/kernel/cpu/mtrr/main-xen.c | |
448 | =================================================================== | |
449 | --- head-2008-12-01.orig/arch/x86/kernel/cpu/mtrr/main-xen.c 2008-01-28 12:24:18.000000000 +0100 | |
450 | +++ head-2008-12-01/arch/x86/kernel/cpu/mtrr/main-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
451 | @@ -12,7 +12,7 @@ | |
452 | static DEFINE_MUTEX(mtrr_mutex); | |
453 | ||
454 | void generic_get_mtrr(unsigned int reg, unsigned long *base, | |
455 | - unsigned int *size, mtrr_type * type) | |
456 | + unsigned long *size, mtrr_type * type) | |
457 | { | |
458 | struct xen_platform_op op; | |
459 | ||
460 | @@ -115,8 +115,7 @@ int mtrr_del_page(int reg, unsigned long | |
461 | { | |
462 | unsigned i; | |
463 | mtrr_type ltype; | |
464 | - unsigned long lbase; | |
465 | - unsigned int lsize; | |
466 | + unsigned long lbase, lsize; | |
467 | int error = -EINVAL; | |
468 | struct xen_platform_op op; | |
469 | ||
470 | Index: head-2008-12-01/arch/x86/kernel/e820_32-xen.c | |
471 | =================================================================== | |
472 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
473 | +++ head-2008-12-01/arch/x86/kernel/e820_32-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
474 | @@ -0,0 +1,1015 @@ | |
475 | +#include <linux/kernel.h> | |
476 | +#include <linux/types.h> | |
477 | +#include <linux/init.h> | |
478 | +#include <linux/bootmem.h> | |
479 | +#include <linux/ioport.h> | |
480 | +#include <linux/string.h> | |
481 | +#include <linux/kexec.h> | |
482 | +#include <linux/module.h> | |
483 | +#include <linux/mm.h> | |
484 | +#include <linux/efi.h> | |
485 | +#include <linux/pfn.h> | |
486 | +#include <linux/uaccess.h> | |
487 | + | |
488 | +#include <asm/pgtable.h> | |
489 | +#include <asm/page.h> | |
490 | +#include <asm/e820.h> | |
491 | +#include <xen/interface/memory.h> | |
492 | + | |
493 | +#ifdef CONFIG_EFI | |
494 | +int efi_enabled = 0; | |
495 | +EXPORT_SYMBOL(efi_enabled); | |
496 | +#endif | |
497 | + | |
498 | +struct e820map e820; | |
499 | +struct change_member { | |
500 | + struct e820entry *pbios; /* pointer to original bios entry */ | |
501 | + unsigned long long addr; /* address for this change point */ | |
502 | +}; | |
503 | +static struct change_member change_point_list[2*E820MAX] __initdata; | |
504 | +static struct change_member *change_point[2*E820MAX] __initdata; | |
505 | +static struct e820entry *overlap_list[E820MAX] __initdata; | |
506 | +static struct e820entry new_bios[E820MAX] __initdata; | |
507 | +/* For PCI or other memory-mapped resources */ | |
508 | +unsigned long pci_mem_start = 0x10000000; | |
509 | +#ifdef CONFIG_PCI | |
510 | +EXPORT_SYMBOL(pci_mem_start); | |
511 | +#endif | |
512 | +extern int user_defined_memmap; | |
513 | +struct resource data_resource = { | |
514 | + .name = "Kernel data", | |
515 | + .start = 0, | |
516 | + .end = 0, | |
517 | + .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
518 | +}; | |
519 | + | |
520 | +struct resource code_resource = { | |
521 | + .name = "Kernel code", | |
522 | + .start = 0, | |
523 | + .end = 0, | |
524 | + .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
525 | +}; | |
526 | + | |
527 | +static struct resource system_rom_resource = { | |
528 | + .name = "System ROM", | |
529 | + .start = 0xf0000, | |
530 | + .end = 0xfffff, | |
531 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
532 | +}; | |
533 | + | |
534 | +static struct resource extension_rom_resource = { | |
535 | + .name = "Extension ROM", | |
536 | + .start = 0xe0000, | |
537 | + .end = 0xeffff, | |
538 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
539 | +}; | |
540 | + | |
541 | +static struct resource adapter_rom_resources[] = { { | |
542 | + .name = "Adapter ROM", | |
543 | + .start = 0xc8000, | |
544 | + .end = 0, | |
545 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
546 | +}, { | |
547 | + .name = "Adapter ROM", | |
548 | + .start = 0, | |
549 | + .end = 0, | |
550 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
551 | +}, { | |
552 | + .name = "Adapter ROM", | |
553 | + .start = 0, | |
554 | + .end = 0, | |
555 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
556 | +}, { | |
557 | + .name = "Adapter ROM", | |
558 | + .start = 0, | |
559 | + .end = 0, | |
560 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
561 | +}, { | |
562 | + .name = "Adapter ROM", | |
563 | + .start = 0, | |
564 | + .end = 0, | |
565 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
566 | +}, { | |
567 | + .name = "Adapter ROM", | |
568 | + .start = 0, | |
569 | + .end = 0, | |
570 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
571 | +} }; | |
572 | + | |
573 | +static struct resource video_rom_resource = { | |
574 | + .name = "Video ROM", | |
575 | + .start = 0xc0000, | |
576 | + .end = 0xc7fff, | |
577 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
578 | +}; | |
579 | + | |
580 | +static struct resource video_ram_resource = { | |
581 | + .name = "Video RAM area", | |
582 | + .start = 0xa0000, | |
583 | + .end = 0xbffff, | |
584 | + .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
585 | +}; | |
586 | + | |
587 | +static struct resource standard_io_resources[] = { { | |
588 | + .name = "dma1", | |
589 | + .start = 0x0000, | |
590 | + .end = 0x001f, | |
591 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
592 | +}, { | |
593 | + .name = "pic1", | |
594 | + .start = 0x0020, | |
595 | + .end = 0x0021, | |
596 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
597 | +}, { | |
598 | + .name = "timer0", | |
599 | + .start = 0x0040, | |
600 | + .end = 0x0043, | |
601 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
602 | +}, { | |
603 | + .name = "timer1", | |
604 | + .start = 0x0050, | |
605 | + .end = 0x0053, | |
606 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
607 | +}, { | |
608 | + .name = "keyboard", | |
609 | + .start = 0x0060, | |
610 | + .end = 0x006f, | |
611 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
612 | +}, { | |
613 | + .name = "dma page reg", | |
614 | + .start = 0x0080, | |
615 | + .end = 0x008f, | |
616 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
617 | +}, { | |
618 | + .name = "pic2", | |
619 | + .start = 0x00a0, | |
620 | + .end = 0x00a1, | |
621 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
622 | +}, { | |
623 | + .name = "dma2", | |
624 | + .start = 0x00c0, | |
625 | + .end = 0x00df, | |
626 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
627 | +}, { | |
628 | + .name = "fpu", | |
629 | + .start = 0x00f0, | |
630 | + .end = 0x00ff, | |
631 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
632 | +} }; | |
633 | + | |
634 | +static int romsignature(const unsigned char *x) | |
635 | +{ | |
636 | + unsigned short sig; | |
637 | + int ret = 0; | |
638 | + if (probe_kernel_address((const unsigned short *)x, sig) == 0) | |
639 | + ret = (sig == 0xaa55); | |
640 | + return ret; | |
641 | +} | |
642 | + | |
643 | +static int __init romchecksum(unsigned char *rom, unsigned long length) | |
644 | +{ | |
645 | + unsigned char *p, sum = 0; | |
646 | + | |
647 | + for (p = rom; p < rom + length; p++) | |
648 | + sum += *p; | |
649 | + return sum == 0; | |
650 | +} | |
651 | + | |
652 | +static void __init probe_roms(void) | |
653 | +{ | |
654 | + unsigned long start, length, upper; | |
655 | + unsigned char *rom; | |
656 | + int i; | |
657 | + | |
658 | +#ifdef CONFIG_XEN | |
659 | + /* Nothing to do if not running in dom0. */ | |
660 | + if (!is_initial_xendomain()) | |
661 | + return; | |
662 | +#endif | |
663 | + | |
664 | + /* video rom */ | |
665 | + upper = adapter_rom_resources[0].start; | |
666 | + for (start = video_rom_resource.start; start < upper; start += 2048) { | |
667 | + rom = isa_bus_to_virt(start); | |
668 | + if (!romsignature(rom)) | |
669 | + continue; | |
670 | + | |
671 | + video_rom_resource.start = start; | |
672 | + | |
673 | + /* 0 < length <= 0x7f * 512, historically */ | |
674 | + length = rom[2] * 512; | |
675 | + | |
676 | + /* if checksum okay, trust length byte */ | |
677 | + if (length && romchecksum(rom, length)) | |
678 | + video_rom_resource.end = start + length - 1; | |
679 | + | |
680 | + request_resource(&iomem_resource, &video_rom_resource); | |
681 | + break; | |
682 | + } | |
683 | + | |
684 | + start = (video_rom_resource.end + 1 + 2047) & ~2047UL; | |
685 | + if (start < upper) | |
686 | + start = upper; | |
687 | + | |
688 | + /* system rom */ | |
689 | + request_resource(&iomem_resource, &system_rom_resource); | |
690 | + upper = system_rom_resource.start; | |
691 | + | |
692 | + /* check for extension rom (ignore length byte!) */ | |
693 | + rom = isa_bus_to_virt((unsigned long)extension_rom_resource.start); | |
694 | + if (romsignature(rom)) { | |
695 | + length = extension_rom_resource.end - extension_rom_resource.start + 1; | |
696 | + if (romchecksum(rom, length)) { | |
697 | + request_resource(&iomem_resource, &extension_rom_resource); | |
698 | + upper = extension_rom_resource.start; | |
699 | + } | |
700 | + } | |
701 | + | |
702 | + /* check for adapter roms on 2k boundaries */ | |
703 | + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { | |
704 | + rom = isa_bus_to_virt(start); | |
705 | + if (!romsignature(rom)) | |
706 | + continue; | |
707 | + | |
708 | + /* 0 < length <= 0x7f * 512, historically */ | |
709 | + length = rom[2] * 512; | |
710 | + | |
711 | + /* but accept any length that fits if checksum okay */ | |
712 | + if (!length || start + length > upper || !romchecksum(rom, length)) | |
713 | + continue; | |
714 | + | |
715 | + adapter_rom_resources[i].start = start; | |
716 | + adapter_rom_resources[i].end = start + length - 1; | |
717 | + request_resource(&iomem_resource, &adapter_rom_resources[i]); | |
718 | + | |
719 | + start = adapter_rom_resources[i++].end & ~2047UL; | |
720 | + } | |
721 | +} | |
722 | + | |
723 | +#ifdef CONFIG_XEN | |
724 | +static struct e820map machine_e820 __initdata; | |
725 | +#define e820 machine_e820 | |
726 | +#endif | |
727 | + | |
728 | +/* | |
729 | + * Request address space for all standard RAM and ROM resources | |
730 | + * and also for regions reported as reserved by the e820. | |
731 | + */ | |
732 | +static void __init | |
733 | +legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource) | |
734 | +{ | |
735 | + int i; | |
736 | + | |
737 | + probe_roms(); | |
738 | + for (i = 0; i < e820.nr_map; i++) { | |
739 | + struct resource *res; | |
740 | +#ifndef CONFIG_RESOURCES_64BIT | |
741 | + if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) | |
742 | + continue; | |
743 | +#endif | |
744 | + res = kzalloc(sizeof(struct resource), GFP_ATOMIC); | |
745 | + switch (e820.map[i].type) { | |
746 | + case E820_RAM: res->name = "System RAM"; break; | |
747 | + case E820_ACPI: res->name = "ACPI Tables"; break; | |
748 | + case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; | |
749 | + default: res->name = "reserved"; | |
750 | + } | |
751 | + res->start = e820.map[i].addr; | |
752 | + res->end = res->start + e820.map[i].size - 1; | |
753 | + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; | |
754 | + if (request_resource(&iomem_resource, res)) { | |
755 | + kfree(res); | |
756 | + continue; | |
757 | + } | |
758 | + if (e820.map[i].type == E820_RAM) { | |
759 | + /* | |
760 | + * We don't know which RAM region contains kernel data, | |
761 | + * so we try it repeatedly and let the resource manager | |
762 | + * test it. | |
763 | + */ | |
764 | +#ifndef CONFIG_XEN | |
765 | + request_resource(res, code_resource); | |
766 | + request_resource(res, data_resource); | |
767 | +#endif | |
768 | +#ifdef CONFIG_KEXEC | |
769 | + request_resource(res, &crashk_res); | |
770 | +#ifdef CONFIG_XEN | |
771 | + xen_machine_kexec_register_resources(res); | |
772 | +#endif | |
773 | +#endif | |
774 | + } | |
775 | + } | |
776 | +} | |
777 | + | |
778 | +#undef e820 | |
779 | + | |
780 | +/* | |
781 | + * Request address space for all standard resources | |
782 | + * | |
783 | + * This is called just before pcibios_init(), which is also a | |
784 | + * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). | |
785 | + */ | |
786 | +static int __init request_standard_resources(void) | |
787 | +{ | |
788 | + int i; | |
789 | + | |
790 | + /* Nothing to do if not running in dom0. */ | |
791 | + if (!is_initial_xendomain()) | |
792 | + return 0; | |
793 | + | |
794 | + printk("Setting up standard PCI resources\n"); | |
795 | + if (efi_enabled) | |
796 | + efi_initialize_iomem_resources(&code_resource, &data_resource); | |
797 | + else | |
798 | + legacy_init_iomem_resources(&code_resource, &data_resource); | |
799 | + | |
800 | + /* EFI systems may still have VGA */ | |
801 | + request_resource(&iomem_resource, &video_ram_resource); | |
802 | + | |
803 | + /* request I/O space for devices used on all i[345]86 PCs */ | |
804 | + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) | |
805 | + request_resource(&ioport_resource, &standard_io_resources[i]); | |
806 | + return 0; | |
807 | +} | |
808 | + | |
809 | +subsys_initcall(request_standard_resources); | |
810 | + | |
811 | +void __init add_memory_region(unsigned long long start, | |
812 | + unsigned long long size, int type) | |
813 | +{ | |
814 | + int x; | |
815 | + | |
816 | + if (!efi_enabled) { | |
817 | + x = e820.nr_map; | |
818 | + | |
819 | + if (x == E820MAX) { | |
820 | + printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); | |
821 | + return; | |
822 | + } | |
823 | + | |
824 | + e820.map[x].addr = start; | |
825 | + e820.map[x].size = size; | |
826 | + e820.map[x].type = type; | |
827 | + e820.nr_map++; | |
828 | + } | |
829 | +} /* add_memory_region */ | |
830 | + | |
831 | +/* | |
832 | + * Sanitize the BIOS e820 map. | |
833 | + * | |
834 | + * Some e820 responses include overlapping entries. The following | |
835 | + * replaces the original e820 map with a new one, removing overlaps. | |
836 | + * | |
837 | + */ | |
838 | +int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | |
839 | +{ | |
840 | + struct change_member *change_tmp; | |
841 | + unsigned long current_type, last_type; | |
842 | + unsigned long long last_addr; | |
843 | + int chgidx, still_changing; | |
844 | + int overlap_entries; | |
845 | + int new_bios_entry; | |
846 | + int old_nr, new_nr, chg_nr; | |
847 | + int i; | |
848 | + | |
849 | + /* | |
850 | + Visually we're performing the following (1,2,3,4 = memory types)... | |
851 | + | |
852 | + Sample memory map (w/overlaps): | |
853 | + ____22__________________ | |
854 | + ______________________4_ | |
855 | + ____1111________________ | |
856 | + _44_____________________ | |
857 | + 11111111________________ | |
858 | + ____________________33__ | |
859 | + ___________44___________ | |
860 | + __________33333_________ | |
861 | + ______________22________ | |
862 | + ___________________2222_ | |
863 | + _________111111111______ | |
864 | + _____________________11_ | |
865 | + _________________4______ | |
866 | + | |
867 | + Sanitized equivalent (no overlap): | |
868 | + 1_______________________ | |
869 | + _44_____________________ | |
870 | + ___1____________________ | |
871 | + ____22__________________ | |
872 | + ______11________________ | |
873 | + _________1______________ | |
874 | + __________3_____________ | |
875 | + ___________44___________ | |
876 | + _____________33_________ | |
877 | + _______________2________ | |
878 | + ________________1_______ | |
879 | + _________________4______ | |
880 | + ___________________2____ | |
881 | + ____________________33__ | |
882 | + ______________________4_ | |
883 | + */ | |
884 | + printk("sanitize start\n"); | |
885 | + /* if there's only one memory region, don't bother */ | |
886 | + if (*pnr_map < 2) { | |
887 | + printk("sanitize bail 0\n"); | |
888 | + return -1; | |
889 | + } | |
890 | + | |
891 | + old_nr = *pnr_map; | |
892 | + | |
893 | + /* bail out if we find any unreasonable addresses in bios map */ | |
894 | + for (i=0; i<old_nr; i++) | |
895 | + if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) { | |
896 | + printk("sanitize bail 1\n"); | |
897 | + return -1; | |
898 | + } | |
899 | + | |
900 | + /* create pointers for initial change-point information (for sorting) */ | |
901 | + for (i=0; i < 2*old_nr; i++) | |
902 | + change_point[i] = &change_point_list[i]; | |
903 | + | |
904 | + /* record all known change-points (starting and ending addresses), | |
905 | + omitting those that are for empty memory regions */ | |
906 | + chgidx = 0; | |
907 | + for (i=0; i < old_nr; i++) { | |
908 | + if (biosmap[i].size != 0) { | |
909 | + change_point[chgidx]->addr = biosmap[i].addr; | |
910 | + change_point[chgidx++]->pbios = &biosmap[i]; | |
911 | + change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; | |
912 | + change_point[chgidx++]->pbios = &biosmap[i]; | |
913 | + } | |
914 | + } | |
915 | + chg_nr = chgidx; /* true number of change-points */ | |
916 | + | |
917 | + /* sort change-point list by memory addresses (low -> high) */ | |
918 | + still_changing = 1; | |
919 | + while (still_changing) { | |
920 | + still_changing = 0; | |
921 | + for (i=1; i < chg_nr; i++) { | |
922 | + /* if <current_addr> > <last_addr>, swap */ | |
923 | + /* or, if current=<start_addr> & last=<end_addr>, swap */ | |
924 | + if ((change_point[i]->addr < change_point[i-1]->addr) || | |
925 | + ((change_point[i]->addr == change_point[i-1]->addr) && | |
926 | + (change_point[i]->addr == change_point[i]->pbios->addr) && | |
927 | + (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) | |
928 | + ) | |
929 | + { | |
930 | + change_tmp = change_point[i]; | |
931 | + change_point[i] = change_point[i-1]; | |
932 | + change_point[i-1] = change_tmp; | |
933 | + still_changing=1; | |
934 | + } | |
935 | + } | |
936 | + } | |
937 | + | |
938 | + /* create a new bios memory map, removing overlaps */ | |
939 | + overlap_entries=0; /* number of entries in the overlap table */ | |
940 | + new_bios_entry=0; /* index for creating new bios map entries */ | |
941 | + last_type = 0; /* start with undefined memory type */ | |
942 | + last_addr = 0; /* start with 0 as last starting address */ | |
943 | + /* loop through change-points, determining affect on the new bios map */ | |
944 | + for (chgidx=0; chgidx < chg_nr; chgidx++) | |
945 | + { | |
946 | + /* keep track of all overlapping bios entries */ | |
947 | + if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) | |
948 | + { | |
949 | + /* add map entry to overlap list (> 1 entry implies an overlap) */ | |
950 | + overlap_list[overlap_entries++]=change_point[chgidx]->pbios; | |
951 | + } | |
952 | + else | |
953 | + { | |
954 | + /* remove entry from list (order independent, so swap with last) */ | |
955 | + for (i=0; i<overlap_entries; i++) | |
956 | + { | |
957 | + if (overlap_list[i] == change_point[chgidx]->pbios) | |
958 | + overlap_list[i] = overlap_list[overlap_entries-1]; | |
959 | + } | |
960 | + overlap_entries--; | |
961 | + } | |
962 | + /* if there are overlapping entries, decide which "type" to use */ | |
963 | + /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ | |
964 | + current_type = 0; | |
965 | + for (i=0; i<overlap_entries; i++) | |
966 | + if (overlap_list[i]->type > current_type) | |
967 | + current_type = overlap_list[i]->type; | |
968 | + /* continue building up new bios map based on this information */ | |
969 | + if (current_type != last_type) { | |
970 | + if (last_type != 0) { | |
971 | + new_bios[new_bios_entry].size = | |
972 | + change_point[chgidx]->addr - last_addr; | |
973 | + /* move forward only if the new size was non-zero */ | |
974 | + if (new_bios[new_bios_entry].size != 0) | |
975 | + if (++new_bios_entry >= E820MAX) | |
976 | + break; /* no more space left for new bios entries */ | |
977 | + } | |
978 | + if (current_type != 0) { | |
979 | + new_bios[new_bios_entry].addr = change_point[chgidx]->addr; | |
980 | + new_bios[new_bios_entry].type = current_type; | |
981 | + last_addr=change_point[chgidx]->addr; | |
982 | + } | |
983 | + last_type = current_type; | |
984 | + } | |
985 | + } | |
986 | + new_nr = new_bios_entry; /* retain count for new bios entries */ | |
987 | + | |
988 | + /* copy new bios mapping into original location */ | |
989 | + memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); | |
990 | + *pnr_map = new_nr; | |
991 | + | |
992 | + printk("sanitize end\n"); | |
993 | + return 0; | |
994 | +} | |
995 | + | |
996 | +/* | |
997 | + * Copy the BIOS e820 map into a safe place. | |
998 | + * | |
999 | + * Sanity-check it while we're at it.. | |
1000 | + * | |
1001 | + * If we're lucky and live on a modern system, the setup code | |
1002 | + * will have given us a memory map that we can use to properly | |
1003 | + * set up memory. If we aren't, we'll fake a memory map. | |
1004 | + * | |
1005 | + * We check to see that the memory map contains at least 2 elements | |
1006 | + * before we'll use it, because the detection code in setup.S may | |
1007 | + * not be perfect and most every PC known to man has two memory | |
1008 | + * regions: one from 0 to 640k, and one from 1mb up. (The IBM | |
1009 | + * thinkpad 560x, for example, does not cooperate with the memory | |
1010 | + * detection code.) | |
1011 | + */ | |
1012 | +int __init copy_e820_map(struct e820entry * biosmap, int nr_map) | |
1013 | +{ | |
1014 | +#ifndef CONFIG_XEN | |
1015 | + /* Only one memory region (or negative)? Ignore it */ | |
1016 | + if (nr_map < 2) | |
1017 | + return -1; | |
1018 | +#else | |
1019 | + BUG_ON(nr_map < 1); | |
1020 | +#endif | |
1021 | + | |
1022 | + do { | |
1023 | + unsigned long long start = biosmap->addr; | |
1024 | + unsigned long long size = biosmap->size; | |
1025 | + unsigned long long end = start + size; | |
1026 | + unsigned long type = biosmap->type; | |
1027 | + printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type); | |
1028 | + | |
1029 | + /* Overflow in 64 bits? Ignore the memory map. */ | |
1030 | + if (start > end) | |
1031 | + return -1; | |
1032 | + | |
1033 | +#ifndef CONFIG_XEN | |
1034 | + /* | |
1035 | + * Some BIOSes claim RAM in the 640k - 1M region. | |
1036 | + * Not right. Fix it up. | |
1037 | + */ | |
1038 | + if (type == E820_RAM) { | |
1039 | + printk("copy_e820_map() type is E820_RAM\n"); | |
1040 | + if (start < 0x100000ULL && end > 0xA0000ULL) { | |
1041 | + printk("copy_e820_map() lies in range...\n"); | |
1042 | + if (start < 0xA0000ULL) { | |
1043 | + printk("copy_e820_map() start < 0xA0000ULL\n"); | |
1044 | + add_memory_region(start, 0xA0000ULL-start, type); | |
1045 | + } | |
1046 | + if (end <= 0x100000ULL) { | |
1047 | + printk("copy_e820_map() end <= 0x100000ULL\n"); | |
1048 | + continue; | |
1049 | + } | |
1050 | + start = 0x100000ULL; | |
1051 | + size = end - start; | |
1052 | + } | |
1053 | + } | |
1054 | +#endif | |
1055 | + add_memory_region(start, size, type); | |
1056 | + } while (biosmap++,--nr_map); | |
1057 | + | |
1058 | +#ifdef CONFIG_XEN | |
1059 | + if (is_initial_xendomain()) { | |
1060 | + struct xen_memory_map memmap; | |
1061 | + | |
1062 | + memmap.nr_entries = E820MAX; | |
1063 | + set_xen_guest_handle(memmap.buffer, machine_e820.map); | |
1064 | + | |
1065 | + if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)) | |
1066 | + BUG(); | |
1067 | + machine_e820.nr_map = memmap.nr_entries; | |
1068 | + } else | |
1069 | + machine_e820 = e820; | |
1070 | +#endif | |
1071 | + | |
1072 | + return 0; | |
1073 | +} | |
1074 | + | |
1075 | +/* | |
1076 | + * Callback for efi_memory_walk. | |
1077 | + */ | |
1078 | +static int __init | |
1079 | +efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) | |
1080 | +{ | |
1081 | + unsigned long *max_pfn = arg, pfn; | |
1082 | + | |
1083 | + if (start < end) { | |
1084 | + pfn = PFN_UP(end -1); | |
1085 | + if (pfn > *max_pfn) | |
1086 | + *max_pfn = pfn; | |
1087 | + } | |
1088 | + return 0; | |
1089 | +} | |
1090 | + | |
1091 | +static int __init | |
1092 | +efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) | |
1093 | +{ | |
1094 | + memory_present(0, PFN_UP(start), PFN_DOWN(end)); | |
1095 | + return 0; | |
1096 | +} | |
1097 | + | |
1098 | +/* | |
1099 | + * Find the highest page frame number we have available | |
1100 | + */ | |
1101 | +void __init find_max_pfn(void) | |
1102 | +{ | |
1103 | + int i; | |
1104 | + | |
1105 | + max_pfn = 0; | |
1106 | + if (efi_enabled) { | |
1107 | + efi_memmap_walk(efi_find_max_pfn, &max_pfn); | |
1108 | + efi_memmap_walk(efi_memory_present_wrapper, NULL); | |
1109 | + return; | |
1110 | + } | |
1111 | + | |
1112 | + for (i = 0; i < e820.nr_map; i++) { | |
1113 | + unsigned long start, end; | |
1114 | + /* RAM? */ | |
1115 | + if (e820.map[i].type != E820_RAM) | |
1116 | + continue; | |
1117 | + start = PFN_UP(e820.map[i].addr); | |
1118 | + end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); | |
1119 | + if (start >= end) | |
1120 | + continue; | |
1121 | + if (end > max_pfn) | |
1122 | + max_pfn = end; | |
1123 | + memory_present(0, start, end); | |
1124 | + } | |
1125 | +} | |
1126 | + | |
1127 | +/* | |
1128 | + * Free all available memory for boot time allocation. Used | |
1129 | + * as a callback function by efi_memory_walk() | |
1130 | + */ | |
1131 | + | |
1132 | +static int __init | |
1133 | +free_available_memory(unsigned long start, unsigned long end, void *arg) | |
1134 | +{ | |
1135 | + /* check max_low_pfn */ | |
1136 | + if (start >= (max_low_pfn << PAGE_SHIFT)) | |
1137 | + return 0; | |
1138 | + if (end >= (max_low_pfn << PAGE_SHIFT)) | |
1139 | + end = max_low_pfn << PAGE_SHIFT; | |
1140 | + if (start < end) | |
1141 | + free_bootmem(start, end - start); | |
1142 | + | |
1143 | + return 0; | |
1144 | +} | |
1145 | +/* | |
1146 | + * Register fully available low RAM pages with the bootmem allocator. | |
1147 | + */ | |
1148 | +void __init register_bootmem_low_pages(unsigned long max_low_pfn) | |
1149 | +{ | |
1150 | + int i; | |
1151 | + | |
1152 | + if (efi_enabled) { | |
1153 | + efi_memmap_walk(free_available_memory, NULL); | |
1154 | + return; | |
1155 | + } | |
1156 | + for (i = 0; i < e820.nr_map; i++) { | |
1157 | + unsigned long curr_pfn, last_pfn, size; | |
1158 | + /* | |
1159 | + * Reserve usable low memory | |
1160 | + */ | |
1161 | + if (e820.map[i].type != E820_RAM) | |
1162 | + continue; | |
1163 | + /* | |
1164 | + * We are rounding up the start address of usable memory: | |
1165 | + */ | |
1166 | + curr_pfn = PFN_UP(e820.map[i].addr); | |
1167 | + if (curr_pfn >= max_low_pfn) | |
1168 | + continue; | |
1169 | + /* | |
1170 | + * ... and at the end of the usable range downwards: | |
1171 | + */ | |
1172 | + last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); | |
1173 | + | |
1174 | +#ifdef CONFIG_XEN | |
1175 | + /* | |
1176 | + * Truncate to the number of actual pages currently | |
1177 | + * present. | |
1178 | + */ | |
1179 | + if (last_pfn > xen_start_info->nr_pages) | |
1180 | + last_pfn = xen_start_info->nr_pages; | |
1181 | +#endif | |
1182 | + | |
1183 | + if (last_pfn > max_low_pfn) | |
1184 | + last_pfn = max_low_pfn; | |
1185 | + | |
1186 | + /* | |
1187 | + * .. finally, did all the rounding and playing | |
1188 | + * around just make the area go away? | |
1189 | + */ | |
1190 | + if (last_pfn <= curr_pfn) | |
1191 | + continue; | |
1192 | + | |
1193 | + size = last_pfn - curr_pfn; | |
1194 | + free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); | |
1195 | + } | |
1196 | +} | |
1197 | + | |
1198 | +void __init e820_register_memory(void) | |
1199 | +{ | |
1200 | + unsigned long gapstart, gapsize, round; | |
1201 | + unsigned long long last; | |
1202 | + int i; | |
1203 | + | |
1204 | +#ifdef CONFIG_XEN | |
1205 | + if (is_initial_xendomain()) { | |
1206 | + struct xen_memory_map memmap; | |
1207 | + | |
1208 | + memmap.nr_entries = E820MAX; | |
1209 | + set_xen_guest_handle(memmap.buffer, machine_e820.map); | |
1210 | + | |
1211 | + if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)) | |
1212 | + BUG(); | |
1213 | + machine_e820.nr_map = memmap.nr_entries; | |
1214 | + } | |
1215 | + else | |
1216 | + machine_e820 = e820; | |
1217 | +#define e820 machine_e820 | |
1218 | +#endif | |
1219 | + | |
1220 | + /* | |
1221 | + * Search for the bigest gap in the low 32 bits of the e820 | |
1222 | + * memory space. | |
1223 | + */ | |
1224 | + last = 0x100000000ull; | |
1225 | + gapstart = 0x10000000; | |
1226 | + gapsize = 0x400000; | |
1227 | + i = e820.nr_map; | |
1228 | + while (--i >= 0) { | |
1229 | + unsigned long long start = e820.map[i].addr; | |
1230 | + unsigned long long end = start + e820.map[i].size; | |
1231 | + | |
1232 | + /* | |
1233 | + * Since "last" is at most 4GB, we know we'll | |
1234 | + * fit in 32 bits if this condition is true | |
1235 | + */ | |
1236 | + if (last > end) { | |
1237 | + unsigned long gap = last - end; | |
1238 | + | |
1239 | + if (gap > gapsize) { | |
1240 | + gapsize = gap; | |
1241 | + gapstart = end; | |
1242 | + } | |
1243 | + } | |
1244 | + if (start < last) | |
1245 | + last = start; | |
1246 | + } | |
1247 | +#undef e820 | |
1248 | + | |
1249 | + /* | |
1250 | + * See how much we want to round up: start off with | |
1251 | + * rounding to the next 1MB area. | |
1252 | + */ | |
1253 | + round = 0x100000; | |
1254 | + while ((gapsize >> 4) > round) | |
1255 | + round += round; | |
1256 | + /* Fun with two's complement */ | |
1257 | + pci_mem_start = (gapstart + round) & -round; | |
1258 | + | |
1259 | + printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", | |
1260 | + pci_mem_start, gapstart, gapsize); | |
1261 | +} | |
1262 | + | |
1263 | +void __init print_memory_map(char *who) | |
1264 | +{ | |
1265 | + int i; | |
1266 | + | |
1267 | + for (i = 0; i < e820.nr_map; i++) { | |
1268 | + printk(" %s: %016Lx - %016Lx ", who, | |
1269 | + e820.map[i].addr, | |
1270 | + e820.map[i].addr + e820.map[i].size); | |
1271 | + switch (e820.map[i].type) { | |
1272 | + case E820_RAM: printk("(usable)\n"); | |
1273 | + break; | |
1274 | + case E820_RESERVED: | |
1275 | + printk("(reserved)\n"); | |
1276 | + break; | |
1277 | + case E820_ACPI: | |
1278 | + printk("(ACPI data)\n"); | |
1279 | + break; | |
1280 | + case E820_NVS: | |
1281 | + printk("(ACPI NVS)\n"); | |
1282 | + break; | |
1283 | + default: printk("type %lu\n", e820.map[i].type); | |
1284 | + break; | |
1285 | + } | |
1286 | + } | |
1287 | +} | |
1288 | + | |
1289 | +static __init __always_inline void efi_limit_regions(unsigned long long size) | |
1290 | +{ | |
1291 | + unsigned long long current_addr = 0; | |
1292 | + efi_memory_desc_t *md, *next_md; | |
1293 | + void *p, *p1; | |
1294 | + int i, j; | |
1295 | + | |
1296 | + j = 0; | |
1297 | + p1 = memmap.map; | |
1298 | + for (p = p1, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) { | |
1299 | + md = p; | |
1300 | + next_md = p1; | |
1301 | + current_addr = md->phys_addr + | |
1302 | + PFN_PHYS(md->num_pages); | |
1303 | + if (is_available_memory(md)) { | |
1304 | + if (md->phys_addr >= size) continue; | |
1305 | + memcpy(next_md, md, memmap.desc_size); | |
1306 | + if (current_addr >= size) { | |
1307 | + next_md->num_pages -= | |
1308 | + PFN_UP(current_addr-size); | |
1309 | + } | |
1310 | + p1 += memmap.desc_size; | |
1311 | + next_md = p1; | |
1312 | + j++; | |
1313 | + } else if ((md->attribute & EFI_MEMORY_RUNTIME) == | |
1314 | + EFI_MEMORY_RUNTIME) { | |
1315 | + /* In order to make runtime services | |
1316 | + * available we have to include runtime | |
1317 | + * memory regions in memory map */ | |
1318 | + memcpy(next_md, md, memmap.desc_size); | |
1319 | + p1 += memmap.desc_size; | |
1320 | + next_md = p1; | |
1321 | + j++; | |
1322 | + } | |
1323 | + } | |
1324 | + memmap.nr_map = j; | |
1325 | + memmap.map_end = memmap.map + | |
1326 | + (memmap.nr_map * memmap.desc_size); | |
1327 | +} | |
1328 | + | |
1329 | +void __init limit_regions(unsigned long long size) | |
1330 | +{ | |
1331 | + unsigned long long current_addr = 0; | |
1332 | + int i; | |
1333 | + | |
1334 | + print_memory_map("limit_regions start"); | |
1335 | + if (efi_enabled) { | |
1336 | + efi_limit_regions(size); | |
1337 | + return; | |
1338 | + } | |
1339 | + for (i = 0; i < e820.nr_map; i++) { | |
1340 | + current_addr = e820.map[i].addr + e820.map[i].size; | |
1341 | + if (current_addr < size) | |
1342 | + continue; | |
1343 | + | |
1344 | + if (e820.map[i].type != E820_RAM) | |
1345 | + continue; | |
1346 | + | |
1347 | + if (e820.map[i].addr >= size) { | |
1348 | + /* | |
1349 | + * This region starts past the end of the | |
1350 | + * requested size, skip it completely. | |
1351 | + */ | |
1352 | + e820.nr_map = i; | |
1353 | + } else { | |
1354 | + e820.nr_map = i + 1; | |
1355 | + e820.map[i].size -= current_addr - size; | |
1356 | + } | |
1357 | + print_memory_map("limit_regions endfor"); | |
1358 | + return; | |
1359 | + } | |
1360 | +#ifdef CONFIG_XEN | |
1361 | + if (current_addr < size) { | |
1362 | + /* | |
1363 | + * The e820 map finished before our requested size so | |
1364 | + * extend the final entry to the requested address. | |
1365 | + */ | |
1366 | + --i; | |
1367 | + if (e820.map[i].type == E820_RAM) | |
1368 | + e820.map[i].size -= current_addr - size; | |
1369 | + else | |
1370 | + add_memory_region(current_addr, size - current_addr, E820_RAM); | |
1371 | + } | |
1372 | +#endif | |
1373 | + print_memory_map("limit_regions endfunc"); | |
1374 | +} | |
1375 | + | |
1376 | +/* | |
1377 | + * This function checks if any part of the range <start,end> is mapped | |
1378 | + * with type. | |
1379 | + */ | |
1380 | +int | |
1381 | +e820_any_mapped(u64 start, u64 end, unsigned type) | |
1382 | +{ | |
1383 | + int i; | |
1384 | + | |
1385 | +#ifndef CONFIG_XEN | |
1386 | + for (i = 0; i < e820.nr_map; i++) { | |
1387 | + const struct e820entry *ei = &e820.map[i]; | |
1388 | +#else | |
1389 | + if (!is_initial_xendomain()) | |
1390 | + return 0; | |
1391 | + for (i = 0; i < machine_e820.nr_map; ++i) { | |
1392 | + const struct e820entry *ei = &machine_e820.map[i]; | |
1393 | +#endif | |
1394 | + | |
1395 | + if (type && ei->type != type) | |
1396 | + continue; | |
1397 | + if (ei->addr >= end || ei->addr + ei->size <= start) | |
1398 | + continue; | |
1399 | + return 1; | |
1400 | + } | |
1401 | + return 0; | |
1402 | +} | |
1403 | +EXPORT_SYMBOL_GPL(e820_any_mapped); | |
1404 | + | |
1405 | + /* | |
1406 | + * This function checks if the entire range <start,end> is mapped with type. | |
1407 | + * | |
1408 | + * Note: this function only works correct if the e820 table is sorted and | |
1409 | + * not-overlapping, which is the case | |
1410 | + */ | |
1411 | +int __init | |
1412 | +e820_all_mapped(unsigned long s, unsigned long e, unsigned type) | |
1413 | +{ | |
1414 | + u64 start = s; | |
1415 | + u64 end = e; | |
1416 | + int i; | |
1417 | + | |
1418 | +#ifndef CONFIG_XEN | |
1419 | + for (i = 0; i < e820.nr_map; i++) { | |
1420 | + struct e820entry *ei = &e820.map[i]; | |
1421 | +#else | |
1422 | + if (!is_initial_xendomain()) | |
1423 | + return 0; | |
1424 | + for (i = 0; i < machine_e820.nr_map; ++i) { | |
1425 | + const struct e820entry *ei = &machine_e820.map[i]; | |
1426 | +#endif | |
1427 | + | |
1428 | + if (type && ei->type != type) | |
1429 | + continue; | |
1430 | + /* is the region (part) in overlap with the current region ?*/ | |
1431 | + if (ei->addr >= end || ei->addr + ei->size <= start) | |
1432 | + continue; | |
1433 | + /* if the region is at the beginning of <start,end> we move | |
1434 | + * start to the end of the region since it's ok until there | |
1435 | + */ | |
1436 | + if (ei->addr <= start) | |
1437 | + start = ei->addr + ei->size; | |
1438 | + /* if start is now at or beyond end, we're done, full | |
1439 | + * coverage */ | |
1440 | + if (start >= end) | |
1441 | + return 1; /* we're done */ | |
1442 | + } | |
1443 | + return 0; | |
1444 | +} | |
1445 | + | |
1446 | +static int __init parse_memmap(char *arg) | |
1447 | +{ | |
1448 | + if (!arg) | |
1449 | + return -EINVAL; | |
1450 | + | |
1451 | + if (strcmp(arg, "exactmap") == 0) { | |
1452 | +#ifdef CONFIG_CRASH_DUMP | |
1453 | + /* If we are doing a crash dump, we | |
1454 | + * still need to know the real mem | |
1455 | + * size before original memory map is | |
1456 | + * reset. | |
1457 | + */ | |
1458 | + find_max_pfn(); | |
1459 | + saved_max_pfn = max_pfn; | |
1460 | +#endif | |
1461 | + e820.nr_map = 0; | |
1462 | + user_defined_memmap = 1; | |
1463 | + } else { | |
1464 | + /* If the user specifies memory size, we | |
1465 | + * limit the BIOS-provided memory map to | |
1466 | + * that size. exactmap can be used to specify | |
1467 | + * the exact map. mem=number can be used to | |
1468 | + * trim the existing memory map. | |
1469 | + */ | |
1470 | + unsigned long long start_at, mem_size; | |
1471 | + | |
1472 | + mem_size = memparse(arg, &arg); | |
1473 | + if (*arg == '@') { | |
1474 | + start_at = memparse(arg+1, &arg); | |
1475 | + add_memory_region(start_at, mem_size, E820_RAM); | |
1476 | + } else if (*arg == '#') { | |
1477 | + start_at = memparse(arg+1, &arg); | |
1478 | + add_memory_region(start_at, mem_size, E820_ACPI); | |
1479 | + } else if (*arg == '$') { | |
1480 | + start_at = memparse(arg+1, &arg); | |
1481 | + add_memory_region(start_at, mem_size, E820_RESERVED); | |
1482 | + } else { | |
1483 | + limit_regions(mem_size); | |
1484 | + user_defined_memmap = 1; | |
1485 | + } | |
1486 | + } | |
1487 | + return 0; | |
1488 | +} | |
1489 | +early_param("memmap", parse_memmap); | |
1490 | Index: head-2008-12-01/arch/x86/kernel/entry_32-xen.S | |
1491 | =================================================================== | |
1492 | --- head-2008-12-01.orig/arch/x86/kernel/entry_32-xen.S 2008-12-01 11:29:05.000000000 +0100 | |
1493 | +++ head-2008-12-01/arch/x86/kernel/entry_32-xen.S 2008-12-01 11:32:38.000000000 +0100 | |
1494 | @@ -30,12 +30,13 @@ | |
1495 | * 18(%esp) - %eax | |
1496 | * 1C(%esp) - %ds | |
1497 | * 20(%esp) - %es | |
1498 | - * 24(%esp) - orig_eax | |
1499 | - * 28(%esp) - %eip | |
1500 | - * 2C(%esp) - %cs | |
1501 | - * 30(%esp) - %eflags | |
1502 | - * 34(%esp) - %oldesp | |
1503 | - * 38(%esp) - %oldss | |
1504 | + * 24(%esp) - %gs | |
1505 | + * 28(%esp) - orig_eax | |
1506 | + * 2C(%esp) - %eip | |
1507 | + * 30(%esp) - %cs | |
1508 | + * 34(%esp) - %eflags | |
1509 | + * 38(%esp) - %oldesp | |
1510 | + * 3C(%esp) - %oldss | |
1511 | * | |
1512 | * "current" is in register %ebx during any slow entries. | |
1513 | */ | |
1514 | @@ -48,27 +49,25 @@ | |
1515 | #include <asm/smp.h> | |
1516 | #include <asm/page.h> | |
1517 | #include <asm/desc.h> | |
1518 | +#include <asm/percpu.h> | |
1519 | #include <asm/dwarf2.h> | |
1520 | #include "irq_vectors.h" | |
1521 | #include <xen/interface/xen.h> | |
1522 | ||
1523 | -#define nr_syscalls ((syscall_table_size)/4) | |
1524 | +/* | |
1525 | + * We use macros for low-level operations which need to be overridden | |
1526 | + * for paravirtualization. The following will never clobber any registers: | |
1527 | + * INTERRUPT_RETURN (aka. "iret") | |
1528 | + * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") | |
1529 | + * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). | |
1530 | + * | |
1531 | + * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must | |
1532 | + * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). | |
1533 | + * Allowing a register to be clobbered can shrink the paravirt replacement | |
1534 | + * enough to patch inline, increasing performance. | |
1535 | + */ | |
1536 | ||
1537 | -EBX = 0x00 | |
1538 | -ECX = 0x04 | |
1539 | -EDX = 0x08 | |
1540 | -ESI = 0x0C | |
1541 | -EDI = 0x10 | |
1542 | -EBP = 0x14 | |
1543 | -EAX = 0x18 | |
1544 | -DS = 0x1C | |
1545 | -ES = 0x20 | |
1546 | -ORIG_EAX = 0x24 | |
1547 | -EIP = 0x28 | |
1548 | -CS = 0x2C | |
1549 | -EFLAGS = 0x30 | |
1550 | -OLDESP = 0x34 | |
1551 | -OLDSS = 0x38 | |
1552 | +#define nr_syscalls ((syscall_table_size)/4) | |
1553 | ||
1554 | CF_MASK = 0x00000001 | |
1555 | TF_MASK = 0x00000100 | |
1556 | @@ -79,61 +78,16 @@ VM_MASK = 0x00020000 | |
1557 | /* Pseudo-eflags. */ | |
1558 | NMI_MASK = 0x80000000 | |
1559 | ||
1560 | -#ifndef CONFIG_XEN | |
1561 | -/* These are replaces for paravirtualization */ | |
1562 | -#define DISABLE_INTERRUPTS cli | |
1563 | -#define ENABLE_INTERRUPTS sti | |
1564 | -#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit | |
1565 | -#define INTERRUPT_RETURN iret | |
1566 | -#define GET_CR0_INTO_EAX movl %cr0, %eax | |
1567 | -#else | |
1568 | -/* Offsets into shared_info_t. */ | |
1569 | -#define evtchn_upcall_pending /* 0 */ | |
1570 | -#define evtchn_upcall_mask 1 | |
1571 | - | |
1572 | -#define sizeof_vcpu_shift 6 | |
1573 | - | |
1574 | -#ifdef CONFIG_SMP | |
1575 | -#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \ | |
1576 | - shl $sizeof_vcpu_shift,%esi ; \ | |
1577 | - addl HYPERVISOR_shared_info,%esi | |
1578 | -#else | |
1579 | -#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi | |
1580 | -#endif | |
1581 | - | |
1582 | -#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi) | |
1583 | -#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi) | |
1584 | -#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi) | |
1585 | -#define DISABLE_INTERRUPTS GET_VCPU_INFO ; \ | |
1586 | - __DISABLE_INTERRUPTS | |
1587 | -#define ENABLE_INTERRUPTS GET_VCPU_INFO ; \ | |
1588 | - __ENABLE_INTERRUPTS | |
1589 | -#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \ | |
1590 | -sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \ | |
1591 | - __TEST_PENDING ; \ | |
1592 | - jnz 14f # process more events if necessary... ; \ | |
1593 | - movl ESI(%esp), %esi ; \ | |
1594 | - sysexit ; \ | |
1595 | -14: __DISABLE_INTERRUPTS ; \ | |
1596 | - TRACE_IRQS_OFF ; \ | |
1597 | -sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \ | |
1598 | - push %esp ; \ | |
1599 | - call evtchn_do_upcall ; \ | |
1600 | - add $4,%esp ; \ | |
1601 | - jmp ret_from_intr | |
1602 | -#define INTERRUPT_RETURN iret | |
1603 | -#endif | |
1604 | - | |
1605 | #ifdef CONFIG_PREEMPT | |
1606 | -#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF | |
1607 | +#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF | |
1608 | #else | |
1609 | -#define preempt_stop | |
1610 | +#define preempt_stop(clobbers) | |
1611 | #define resume_kernel restore_nocheck | |
1612 | #endif | |
1613 | ||
1614 | .macro TRACE_IRQS_IRET | |
1615 | #ifdef CONFIG_TRACE_IRQFLAGS | |
1616 | - testl $IF_MASK,EFLAGS(%esp) # interrupts off? | |
1617 | + testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off? | |
1618 | jz 1f | |
1619 | TRACE_IRQS_ON | |
1620 | 1: | |
1621 | @@ -148,6 +102,9 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT | |
1622 | ||
1623 | #define SAVE_ALL \ | |
1624 | cld; \ | |
1625 | + pushl %gs; \ | |
1626 | + CFI_ADJUST_CFA_OFFSET 4;\ | |
1627 | + /*CFI_REL_OFFSET gs, 0;*/\ | |
1628 | pushl %es; \ | |
1629 | CFI_ADJUST_CFA_OFFSET 4;\ | |
1630 | /*CFI_REL_OFFSET es, 0;*/\ | |
1631 | @@ -177,7 +134,9 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT | |
1632 | CFI_REL_OFFSET ebx, 0;\ | |
1633 | movl $(__USER_DS), %edx; \ | |
1634 | movl %edx, %ds; \ | |
1635 | - movl %edx, %es; | |
1636 | + movl %edx, %es; \ | |
1637 | + movl $(__KERNEL_PDA), %edx; \ | |
1638 | + movl %edx, %gs | |
1639 | ||
1640 | #define RESTORE_INT_REGS \ | |
1641 | popl %ebx; \ | |
1642 | @@ -210,17 +169,22 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT | |
1643 | 2: popl %es; \ | |
1644 | CFI_ADJUST_CFA_OFFSET -4;\ | |
1645 | /*CFI_RESTORE es;*/\ | |
1646 | -.section .fixup,"ax"; \ | |
1647 | -3: movl $0,(%esp); \ | |
1648 | - jmp 1b; \ | |
1649 | +3: popl %gs; \ | |
1650 | + CFI_ADJUST_CFA_OFFSET -4;\ | |
1651 | + /*CFI_RESTORE gs;*/\ | |
1652 | +.pushsection .fixup,"ax"; \ | |
1653 | 4: movl $0,(%esp); \ | |
1654 | + jmp 1b; \ | |
1655 | +5: movl $0,(%esp); \ | |
1656 | jmp 2b; \ | |
1657 | -.previous; \ | |
1658 | +6: movl $0,(%esp); \ | |
1659 | + jmp 3b; \ | |
1660 | .section __ex_table,"a";\ | |
1661 | .align 4; \ | |
1662 | - .long 1b,3b; \ | |
1663 | - .long 2b,4b; \ | |
1664 | -.previous | |
1665 | + .long 1b,4b; \ | |
1666 | + .long 2b,5b; \ | |
1667 | + .long 3b,6b; \ | |
1668 | +.popsection | |
1669 | ||
1670 | #define RING0_INT_FRAME \ | |
1671 | CFI_STARTPROC simple;\ | |
1672 | @@ -239,18 +203,18 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT | |
1673 | #define RING0_PTREGS_FRAME \ | |
1674 | CFI_STARTPROC simple;\ | |
1675 | CFI_SIGNAL_FRAME;\ | |
1676 | - CFI_DEF_CFA esp, OLDESP-EBX;\ | |
1677 | - /*CFI_OFFSET cs, CS-OLDESP;*/\ | |
1678 | - CFI_OFFSET eip, EIP-OLDESP;\ | |
1679 | - /*CFI_OFFSET es, ES-OLDESP;*/\ | |
1680 | - /*CFI_OFFSET ds, DS-OLDESP;*/\ | |
1681 | - CFI_OFFSET eax, EAX-OLDESP;\ | |
1682 | - CFI_OFFSET ebp, EBP-OLDESP;\ | |
1683 | - CFI_OFFSET edi, EDI-OLDESP;\ | |
1684 | - CFI_OFFSET esi, ESI-OLDESP;\ | |
1685 | - CFI_OFFSET edx, EDX-OLDESP;\ | |
1686 | - CFI_OFFSET ecx, ECX-OLDESP;\ | |
1687 | - CFI_OFFSET ebx, EBX-OLDESP | |
1688 | + CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ | |
1689 | + /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ | |
1690 | + CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ | |
1691 | + /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ | |
1692 | + /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ | |
1693 | + CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ | |
1694 | + CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ | |
1695 | + CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ | |
1696 | + CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ | |
1697 | + CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ | |
1698 | + CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ | |
1699 | + CFI_OFFSET ebx, PT_EBX-PT_OLDESP | |
1700 | ||
1701 | ENTRY(ret_from_fork) | |
1702 | CFI_STARTPROC | |
1703 | @@ -278,17 +242,18 @@ ENTRY(ret_from_fork) | |
1704 | ALIGN | |
1705 | RING0_PTREGS_FRAME | |
1706 | ret_from_exception: | |
1707 | - preempt_stop | |
1708 | + preempt_stop(CLBR_ANY) | |
1709 | ret_from_intr: | |
1710 | GET_THREAD_INFO(%ebp) | |
1711 | check_userspace: | |
1712 | - movl EFLAGS(%esp), %eax # mix EFLAGS and CS | |
1713 | - movb CS(%esp), %al | |
1714 | + movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS | |
1715 | + movb PT_CS(%esp), %al | |
1716 | andl $(VM_MASK | SEGMENT_RPL_MASK), %eax | |
1717 | cmpl $USER_RPL, %eax | |
1718 | jb resume_kernel # not returning to v8086 or userspace | |
1719 | + | |
1720 | ENTRY(resume_userspace) | |
1721 | - DISABLE_INTERRUPTS # make sure we don't miss an interrupt | |
1722 | + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt | |
1723 | # setting need_resched or sigpending | |
1724 | # between sampling and the iret | |
1725 | movl TI_flags(%ebp), %ecx | |
1726 | @@ -299,14 +264,14 @@ ENTRY(resume_userspace) | |
1727 | ||
1728 | #ifdef CONFIG_PREEMPT | |
1729 | ENTRY(resume_kernel) | |
1730 | - DISABLE_INTERRUPTS | |
1731 | + DISABLE_INTERRUPTS(CLBR_ANY) | |
1732 | cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? | |
1733 | jnz restore_nocheck | |
1734 | need_resched: | |
1735 | movl TI_flags(%ebp), %ecx # need_resched set ? | |
1736 | testb $_TIF_NEED_RESCHED, %cl | |
1737 | jz restore_all | |
1738 | - testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? | |
1739 | + testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off (exception path) ? | |
1740 | jz restore_all | |
1741 | call preempt_schedule_irq | |
1742 | jmp need_resched | |
1743 | @@ -328,7 +293,7 @@ sysenter_past_esp: | |
1744 | * No need to follow this irqs on/off section: the syscall | |
1745 | * disabled irqs and here we enable it straight after entry: | |
1746 | */ | |
1747 | - ENABLE_INTERRUPTS | |
1748 | + ENABLE_INTERRUPTS(CLBR_NONE) | |
1749 | pushl $(__USER_DS) | |
1750 | CFI_ADJUST_CFA_OFFSET 4 | |
1751 | /*CFI_REL_OFFSET ss, 0*/ | |
1752 | @@ -340,12 +305,16 @@ sysenter_past_esp: | |
1753 | pushl $(__USER_CS) | |
1754 | CFI_ADJUST_CFA_OFFSET 4 | |
1755 | /*CFI_REL_OFFSET cs, 0*/ | |
1756 | +#ifndef CONFIG_COMPAT_VDSO | |
1757 | /* | |
1758 | * Push current_thread_info()->sysenter_return to the stack. | |
1759 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words | |
1760 | * pushed above; +8 corresponds to copy_thread's esp0 setting. | |
1761 | */ | |
1762 | pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) | |
1763 | +#else | |
1764 | + pushl $SYSENTER_RETURN | |
1765 | +#endif | |
1766 | CFI_ADJUST_CFA_OFFSET 4 | |
1767 | CFI_REL_OFFSET eip, 0 | |
1768 | ||
1769 | @@ -372,19 +341,27 @@ sysenter_past_esp: | |
1770 | cmpl $(nr_syscalls), %eax | |
1771 | jae syscall_badsys | |
1772 | call *sys_call_table(,%eax,4) | |
1773 | - movl %eax,EAX(%esp) | |
1774 | - DISABLE_INTERRUPTS | |
1775 | + movl %eax,PT_EAX(%esp) | |
1776 | + DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX) | |
1777 | TRACE_IRQS_OFF | |
1778 | movl TI_flags(%ebp), %ecx | |
1779 | testw $_TIF_ALLWORK_MASK, %cx | |
1780 | jne syscall_exit_work | |
1781 | /* if something modifies registers it must also disable sysexit */ | |
1782 | - movl EIP(%esp), %edx | |
1783 | - movl OLDESP(%esp), %ecx | |
1784 | + movl PT_EIP(%esp), %edx | |
1785 | + movl PT_OLDESP(%esp), %ecx | |
1786 | xorl %ebp,%ebp | |
1787 | TRACE_IRQS_ON | |
1788 | +1: mov PT_GS(%esp), %gs | |
1789 | ENABLE_INTERRUPTS_SYSEXIT | |
1790 | CFI_ENDPROC | |
1791 | +.pushsection .fixup,"ax" | |
1792 | +2: movl $0,PT_GS(%esp) | |
1793 | + jmp 1b | |
1794 | +.section __ex_table,"a" | |
1795 | + .align 4 | |
1796 | + .long 1b,2b | |
1797 | +.popsection | |
1798 | ||
1799 | # pv sysenter call handler stub | |
1800 | ENTRY(sysenter_entry_pv) | |
1801 | @@ -419,7 +396,7 @@ ENTRY(system_call) | |
1802 | CFI_ADJUST_CFA_OFFSET 4 | |
1803 | SAVE_ALL | |
1804 | GET_THREAD_INFO(%ebp) | |
1805 | - testl $TF_MASK,EFLAGS(%esp) | |
1806 | + testl $TF_MASK,PT_EFLAGS(%esp) | |
1807 | jz no_singlestep | |
1808 | orl $_TIF_SINGLESTEP,TI_flags(%ebp) | |
1809 | no_singlestep: | |
1810 | @@ -431,9 +408,9 @@ no_singlestep: | |
1811 | jae syscall_badsys | |
1812 | syscall_call: | |
1813 | call *sys_call_table(,%eax,4) | |
1814 | - movl %eax,EAX(%esp) # store the return value | |
1815 | + movl %eax,PT_EAX(%esp) # store the return value | |
1816 | syscall_exit: | |
1817 | - DISABLE_INTERRUPTS # make sure we don't miss an interrupt | |
1818 | + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt | |
1819 | # setting need_resched or sigpending | |
1820 | # between sampling and the iret | |
1821 | TRACE_IRQS_OFF | |
1822 | @@ -443,12 +420,12 @@ syscall_exit: | |
1823 | ||
1824 | restore_all: | |
1825 | #ifndef CONFIG_XEN | |
1826 | - movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS | |
1827 | - # Warning: OLDSS(%esp) contains the wrong/random values if we | |
1828 | + movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS | |
1829 | + # Warning: PT_OLDSS(%esp) contains the wrong/random values if we | |
1830 | # are returning to the kernel. | |
1831 | # See comments in process.c:copy_thread() for details. | |
1832 | - movb OLDSS(%esp), %ah | |
1833 | - movb CS(%esp), %al | |
1834 | + movb PT_OLDSS(%esp), %ah | |
1835 | + movb PT_CS(%esp), %al | |
1836 | andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax | |
1837 | cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax | |
1838 | CFI_REMEMBER_STATE | |
1839 | @@ -456,7 +433,7 @@ restore_all: | |
1840 | restore_nocheck: | |
1841 | #else | |
1842 | restore_nocheck: | |
1843 | - movl EFLAGS(%esp), %eax | |
1844 | + movl PT_EFLAGS(%esp), %eax | |
1845 | testl $(VM_MASK|NMI_MASK), %eax | |
1846 | CFI_REMEMBER_STATE | |
1847 | jnz hypervisor_iret | |
1848 | @@ -470,13 +447,13 @@ restore_nocheck: | |
1849 | TRACE_IRQS_IRET | |
1850 | restore_nocheck_notrace: | |
1851 | RESTORE_REGS | |
1852 | - addl $4, %esp | |
1853 | + addl $4, %esp # skip orig_eax/error_code | |
1854 | CFI_ADJUST_CFA_OFFSET -4 | |
1855 | 1: INTERRUPT_RETURN | |
1856 | .section .fixup,"ax" | |
1857 | iret_exc: | |
1858 | #ifndef CONFIG_XEN | |
1859 | - ENABLE_INTERRUPTS | |
1860 | + ENABLE_INTERRUPTS(CLBR_NONE) | |
1861 | #endif | |
1862 | pushl $0 # no error code | |
1863 | pushl $do_iret_error | |
1864 | @@ -490,33 +467,42 @@ iret_exc: | |
1865 | CFI_RESTORE_STATE | |
1866 | #ifndef CONFIG_XEN | |
1867 | ldt_ss: | |
1868 | - larl OLDSS(%esp), %eax | |
1869 | + larl PT_OLDSS(%esp), %eax | |
1870 | jnz restore_nocheck | |
1871 | testl $0x00400000, %eax # returning to 32bit stack? | |
1872 | jnz restore_nocheck # allright, normal return | |
1873 | + | |
1874 | +#ifdef CONFIG_PARAVIRT | |
1875 | + /* | |
1876 | + * The kernel can't run on a non-flat stack if paravirt mode | |
1877 | + * is active. Rather than try to fixup the high bits of | |
1878 | + * ESP, bypass this code entirely. This may break DOSemu | |
1879 | + * and/or Wine support in a paravirt VM, although the option | |
1880 | + * is still available to implement the setting of the high | |
1881 | + * 16-bits in the INTERRUPT_RETURN paravirt-op. | |
1882 | + */ | |
1883 | + cmpl $0, paravirt_ops+PARAVIRT_enabled | |
1884 | + jne restore_nocheck | |
1885 | +#endif | |
1886 | + | |
1887 | /* If returning to userspace with 16bit stack, | |
1888 | * try to fix the higher word of ESP, as the CPU | |
1889 | * won't restore it. | |
1890 | * This is an "official" bug of all the x86-compatible | |
1891 | * CPUs, which we can try to work around to make | |
1892 | * dosemu and wine happy. */ | |
1893 | - subl $8, %esp # reserve space for switch16 pointer | |
1894 | - CFI_ADJUST_CFA_OFFSET 8 | |
1895 | - DISABLE_INTERRUPTS | |
1896 | + movl PT_OLDESP(%esp), %eax | |
1897 | + movl %esp, %edx | |
1898 | + call patch_espfix_desc | |
1899 | + pushl $__ESPFIX_SS | |
1900 | + CFI_ADJUST_CFA_OFFSET 4 | |
1901 | + pushl %eax | |
1902 | + CFI_ADJUST_CFA_OFFSET 4 | |
1903 | + DISABLE_INTERRUPTS(CLBR_EAX) | |
1904 | TRACE_IRQS_OFF | |
1905 | - movl %esp, %eax | |
1906 | - /* Set up the 16bit stack frame with switch32 pointer on top, | |
1907 | - * and a switch16 pointer on top of the current frame. */ | |
1908 | - call setup_x86_bogus_stack | |
1909 | - CFI_ADJUST_CFA_OFFSET -8 # frame has moved | |
1910 | - TRACE_IRQS_IRET | |
1911 | - RESTORE_REGS | |
1912 | - lss 20+4(%esp), %esp # switch to 16bit stack | |
1913 | -1: INTERRUPT_RETURN | |
1914 | -.section __ex_table,"a" | |
1915 | - .align 4 | |
1916 | - .long 1b,iret_exc | |
1917 | -.previous | |
1918 | + lss (%esp), %esp | |
1919 | + CFI_ADJUST_CFA_OFFSET -8 | |
1920 | + jmp restore_nocheck | |
1921 | #else | |
1922 | ALIGN | |
1923 | restore_all_enable_events: | |
1924 | @@ -540,7 +526,7 @@ ecrit: /**** END OF CRITICAL REGION *** | |
1925 | ||
1926 | CFI_RESTORE_STATE | |
1927 | hypervisor_iret: | |
1928 | - andl $~NMI_MASK, EFLAGS(%esp) | |
1929 | + andl $~NMI_MASK, PT_EFLAGS(%esp) | |
1930 | RESTORE_REGS | |
1931 | addl $4, %esp | |
1932 | CFI_ADJUST_CFA_OFFSET -4 | |
1933 | @@ -556,7 +542,7 @@ work_pending: | |
1934 | jz work_notifysig | |
1935 | work_resched: | |
1936 | call schedule | |
1937 | - DISABLE_INTERRUPTS # make sure we don't miss an interrupt | |
1938 | + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt | |
1939 | # setting need_resched or sigpending | |
1940 | # between sampling and the iret | |
1941 | TRACE_IRQS_OFF | |
1942 | @@ -569,7 +555,8 @@ work_resched: | |
1943 | ||
1944 | work_notifysig: # deal with pending signals and | |
1945 | # notify-resume requests | |
1946 | - testl $VM_MASK, EFLAGS(%esp) | |
1947 | +#ifdef CONFIG_VM86 | |
1948 | + testl $VM_MASK, PT_EFLAGS(%esp) | |
1949 | movl %esp, %eax | |
1950 | jne work_notifysig_v86 # returning to kernel-space or | |
1951 | # vm86-space | |
1952 | @@ -579,29 +566,30 @@ work_notifysig: # deal with pending s | |
1953 | ||
1954 | ALIGN | |
1955 | work_notifysig_v86: | |
1956 | -#ifdef CONFIG_VM86 | |
1957 | pushl %ecx # save ti_flags for do_notify_resume | |
1958 | CFI_ADJUST_CFA_OFFSET 4 | |
1959 | call save_v86_state # %eax contains pt_regs pointer | |
1960 | popl %ecx | |
1961 | CFI_ADJUST_CFA_OFFSET -4 | |
1962 | movl %eax, %esp | |
1963 | +#else | |
1964 | + movl %esp, %eax | |
1965 | +#endif | |
1966 | xorl %edx, %edx | |
1967 | call do_notify_resume | |
1968 | jmp resume_userspace_sig | |
1969 | -#endif | |
1970 | ||
1971 | # perform syscall exit tracing | |
1972 | ALIGN | |
1973 | syscall_trace_entry: | |
1974 | - movl $-ENOSYS,EAX(%esp) | |
1975 | + movl $-ENOSYS,PT_EAX(%esp) | |
1976 | movl %esp, %eax | |
1977 | xorl %edx,%edx | |
1978 | call do_syscall_trace | |
1979 | cmpl $0, %eax | |
1980 | jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, | |
1981 | # so must skip actual syscall | |
1982 | - movl ORIG_EAX(%esp), %eax | |
1983 | + movl PT_ORIG_EAX(%esp), %eax | |
1984 | cmpl $(nr_syscalls), %eax | |
1985 | jnae syscall_call | |
1986 | jmp syscall_exit | |
1987 | @@ -612,7 +600,7 @@ syscall_exit_work: | |
1988 | testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl | |
1989 | jz work_pending | |
1990 | TRACE_IRQS_ON | |
1991 | - ENABLE_INTERRUPTS # could let do_syscall_trace() call | |
1992 | + ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call | |
1993 | # schedule() instead | |
1994 | movl %esp, %eax | |
1995 | movl $1, %edx | |
1996 | @@ -626,40 +614,39 @@ syscall_fault: | |
1997 | CFI_ADJUST_CFA_OFFSET 4 | |
1998 | SAVE_ALL | |
1999 | GET_THREAD_INFO(%ebp) | |
2000 | - movl $-EFAULT,EAX(%esp) | |
2001 | + movl $-EFAULT,PT_EAX(%esp) | |
2002 | jmp resume_userspace | |
2003 | ||
2004 | syscall_badsys: | |
2005 | - movl $-ENOSYS,EAX(%esp) | |
2006 | + movl $-ENOSYS,PT_EAX(%esp) | |
2007 | jmp resume_userspace | |
2008 | CFI_ENDPROC | |
2009 | ||
2010 | #ifndef CONFIG_XEN | |
2011 | #define FIXUP_ESPFIX_STACK \ | |
2012 | - movl %esp, %eax; \ | |
2013 | - /* switch to 32bit stack using the pointer on top of 16bit stack */ \ | |
2014 | - lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \ | |
2015 | - /* copy data from 16bit stack to 32bit stack */ \ | |
2016 | - call fixup_x86_bogus_stack; \ | |
2017 | - /* put ESP to the proper location */ \ | |
2018 | - movl %eax, %esp; | |
2019 | -#define UNWIND_ESPFIX_STACK \ | |
2020 | + /* since we are on a wrong stack, we cant make it a C code :( */ \ | |
2021 | + movl %gs:PDA_cpu, %ebx; \ | |
2022 | + PER_CPU(cpu_gdt_descr, %ebx); \ | |
2023 | + movl GDS_address(%ebx), %ebx; \ | |
2024 | + GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ | |
2025 | + addl %esp, %eax; \ | |
2026 | + pushl $__KERNEL_DS; \ | |
2027 | + CFI_ADJUST_CFA_OFFSET 4; \ | |
2028 | pushl %eax; \ | |
2029 | CFI_ADJUST_CFA_OFFSET 4; \ | |
2030 | + lss (%esp), %esp; \ | |
2031 | + CFI_ADJUST_CFA_OFFSET -8; | |
2032 | +#define UNWIND_ESPFIX_STACK \ | |
2033 | movl %ss, %eax; \ | |
2034 | - /* see if on 16bit stack */ \ | |
2035 | + /* see if on espfix stack */ \ | |
2036 | cmpw $__ESPFIX_SS, %ax; \ | |
2037 | - je 28f; \ | |
2038 | -27: popl %eax; \ | |
2039 | - CFI_ADJUST_CFA_OFFSET -4; \ | |
2040 | -.section .fixup,"ax"; \ | |
2041 | -28: movl $__KERNEL_DS, %eax; \ | |
2042 | + jne 27f; \ | |
2043 | + movl $__KERNEL_DS, %eax; \ | |
2044 | movl %eax, %ds; \ | |
2045 | movl %eax, %es; \ | |
2046 | - /* switch to 32bit stack */ \ | |
2047 | + /* switch to normal stack */ \ | |
2048 | FIXUP_ESPFIX_STACK; \ | |
2049 | - jmp 27b; \ | |
2050 | -.previous | |
2051 | +27:; | |
2052 | ||
2053 | /* | |
2054 | * Build the entry stubs and pointer table with | |
2055 | @@ -723,13 +710,16 @@ KPROBE_ENTRY(page_fault) | |
2056 | CFI_ADJUST_CFA_OFFSET 4 | |
2057 | ALIGN | |
2058 | error_code: | |
2059 | + /* the function address is in %gs's slot on the stack */ | |
2060 | + pushl %es | |
2061 | + CFI_ADJUST_CFA_OFFSET 4 | |
2062 | + /*CFI_REL_OFFSET es, 0*/ | |
2063 | pushl %ds | |
2064 | CFI_ADJUST_CFA_OFFSET 4 | |
2065 | /*CFI_REL_OFFSET ds, 0*/ | |
2066 | pushl %eax | |
2067 | CFI_ADJUST_CFA_OFFSET 4 | |
2068 | CFI_REL_OFFSET eax, 0 | |
2069 | - xorl %eax, %eax | |
2070 | pushl %ebp | |
2071 | CFI_ADJUST_CFA_OFFSET 4 | |
2072 | CFI_REL_OFFSET ebp, 0 | |
2073 | @@ -742,7 +732,6 @@ error_code: | |
2074 | pushl %edx | |
2075 | CFI_ADJUST_CFA_OFFSET 4 | |
2076 | CFI_REL_OFFSET edx, 0 | |
2077 | - decl %eax # eax = -1 | |
2078 | pushl %ecx | |
2079 | CFI_ADJUST_CFA_OFFSET 4 | |
2080 | CFI_REL_OFFSET ecx, 0 | |
2081 | @@ -750,18 +739,20 @@ error_code: | |
2082 | CFI_ADJUST_CFA_OFFSET 4 | |
2083 | CFI_REL_OFFSET ebx, 0 | |
2084 | cld | |
2085 | - pushl %es | |
2086 | + pushl %gs | |
2087 | CFI_ADJUST_CFA_OFFSET 4 | |
2088 | - /*CFI_REL_OFFSET es, 0*/ | |
2089 | + /*CFI_REL_OFFSET gs, 0*/ | |
2090 | + movl $(__KERNEL_PDA), %ecx | |
2091 | + movl %ecx, %gs | |
2092 | UNWIND_ESPFIX_STACK | |
2093 | popl %ecx | |
2094 | CFI_ADJUST_CFA_OFFSET -4 | |
2095 | /*CFI_REGISTER es, ecx*/ | |
2096 | - movl ES(%esp), %edi # get the function address | |
2097 | - movl ORIG_EAX(%esp), %edx # get the error code | |
2098 | - movl %eax, ORIG_EAX(%esp) | |
2099 | - movl %ecx, ES(%esp) | |
2100 | - /*CFI_REL_OFFSET es, ES*/ | |
2101 | + movl PT_GS(%esp), %edi # get the function address | |
2102 | + movl PT_ORIG_EAX(%esp), %edx # get the error code | |
2103 | + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart | |
2104 | + mov %ecx, PT_GS(%esp) | |
2105 | + /*CFI_REL_OFFSET gs, ES*/ | |
2106 | movl $(__USER_DS), %ecx | |
2107 | movl %ecx, %ds | |
2108 | movl %ecx, %es | |
2109 | @@ -793,7 +784,7 @@ ENTRY(hypervisor_callback) | |
2110 | pushl %eax | |
2111 | CFI_ADJUST_CFA_OFFSET 4 | |
2112 | SAVE_ALL | |
2113 | - movl EIP(%esp),%eax | |
2114 | + movl PT_EIP(%esp),%eax | |
2115 | cmpl $scrit,%eax | |
2116 | jb 11f | |
2117 | cmpl $ecrit,%eax | |
2118 | @@ -802,7 +793,7 @@ ENTRY(hypervisor_callback) | |
2119 | jb 11f | |
2120 | cmpl $sysexit_ecrit,%eax | |
2121 | ja 11f | |
2122 | - addl $OLDESP,%esp # Remove eflags...ebx from stack frame. | |
2123 | + addl $PT_OLDESP,%esp # Remove eflags...ebx from stack frame. | |
2124 | 11: push %esp | |
2125 | CFI_ADJUST_CFA_OFFSET 4 | |
2126 | call evtchn_do_upcall | |
2127 | @@ -824,7 +815,7 @@ critical_region_fixup: | |
2128 | jne 15f | |
2129 | xorl %ecx,%ecx | |
2130 | 15: leal (%esp,%ecx),%esi # %esi points at end of src region | |
2131 | - leal OLDESP(%esp),%edi # %edi points at end of dst region | |
2132 | + leal PT_OLDESP(%esp),%edi # %edi points at end of dst region | |
2133 | shrl $2,%ecx # convert words to bytes | |
2134 | je 17f # skip loop if nothing to copy | |
2135 | 16: subl $4,%esi # pre-decrementing copy loop | |
2136 | @@ -848,8 +839,9 @@ critical_fixup_table: | |
2137 | .byte 0x18 # pop %eax | |
2138 | .byte 0x1c # pop %ds | |
2139 | .byte 0x20 # pop %es | |
2140 | - .byte 0x24,0x24,0x24 # add $4,%esp | |
2141 | - .byte 0x28 # iret | |
2142 | + .byte 0x24,0x24 # pop %gs | |
2143 | + .byte 0x28,0x28,0x28 # add $4,%esp | |
2144 | + .byte 0x2c # iret | |
2145 | .byte 0xff,0xff,0xff,0xff # movb $1,1(%esi) | |
2146 | .byte 0x00,0x00 # jmp 11b | |
2147 | .previous | |
2148 | @@ -940,7 +932,7 @@ ENTRY(device_not_available) | |
2149 | jmp ret_from_exception | |
2150 | device_available_emulate: | |
2151 | #endif | |
2152 | - preempt_stop | |
2153 | + preempt_stop(CLBR_ANY) | |
2154 | call math_state_restore | |
2155 | jmp ret_from_exception | |
2156 | CFI_ENDPROC | |
2157 | @@ -1010,7 +1002,7 @@ KPROBE_ENTRY(nmi) | |
2158 | cmpw $__ESPFIX_SS, %ax | |
2159 | popl %eax | |
2160 | CFI_ADJUST_CFA_OFFSET -4 | |
2161 | - je nmi_16bit_stack | |
2162 | + je nmi_espfix_stack | |
2163 | cmpl $sysenter_entry,(%esp) | |
2164 | je nmi_stack_fixup | |
2165 | pushl %eax | |
2166 | @@ -1053,7 +1045,7 @@ nmi_debug_stack_check: | |
2167 | FIX_STACK(24,nmi_stack_correct, 1) | |
2168 | jmp nmi_stack_correct | |
2169 | ||
2170 | -nmi_16bit_stack: | |
2171 | +nmi_espfix_stack: | |
2172 | /* We have a RING0_INT_FRAME here. | |
2173 | * | |
2174 | * create the pointer to lss back | |
2175 | @@ -1062,7 +1054,6 @@ nmi_16bit_stack: | |
2176 | CFI_ADJUST_CFA_OFFSET 4 | |
2177 | pushl %esp | |
2178 | CFI_ADJUST_CFA_OFFSET 4 | |
2179 | - movzwl %sp, %esp | |
2180 | addw $4, (%esp) | |
2181 | /* copy the iret frame of 12 bytes */ | |
2182 | .rept 3 | |
2183 | @@ -1073,11 +1064,11 @@ nmi_16bit_stack: | |
2184 | CFI_ADJUST_CFA_OFFSET 4 | |
2185 | SAVE_ALL | |
2186 | FIXUP_ESPFIX_STACK # %eax == %esp | |
2187 | - CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved | |
2188 | xorl %edx,%edx # zero error code | |
2189 | call do_nmi | |
2190 | RESTORE_REGS | |
2191 | - lss 12+4(%esp), %esp # back to 16bit stack | |
2192 | + lss 12+4(%esp), %esp # back to espfix stack | |
2193 | + CFI_ADJUST_CFA_OFFSET -24 | |
2194 | 1: INTERRUPT_RETURN | |
2195 | CFI_ENDPROC | |
2196 | .section __ex_table,"a" | |
2197 | @@ -1093,12 +1084,25 @@ KPROBE_ENTRY(nmi) | |
2198 | xorl %edx,%edx # zero error code | |
2199 | movl %esp,%eax # pt_regs pointer | |
2200 | call do_nmi | |
2201 | - orl $NMI_MASK, EFLAGS(%esp) | |
2202 | + orl $NMI_MASK, PT_EFLAGS(%esp) | |
2203 | jmp restore_all | |
2204 | CFI_ENDPROC | |
2205 | #endif | |
2206 | KPROBE_END(nmi) | |
2207 | ||
2208 | +#ifdef CONFIG_PARAVIRT | |
2209 | +ENTRY(native_iret) | |
2210 | +1: iret | |
2211 | +.section __ex_table,"a" | |
2212 | + .align 4 | |
2213 | + .long 1b,iret_exc | |
2214 | +.previous | |
2215 | + | |
2216 | +ENTRY(native_irq_enable_sysexit) | |
2217 | + sti | |
2218 | + sysexit | |
2219 | +#endif | |
2220 | + | |
2221 | KPROBE_ENTRY(int3) | |
2222 | RING0_INT_FRAME | |
2223 | pushl $-1 # mark this as an int | |
2224 | @@ -1214,37 +1218,6 @@ ENTRY(spurious_interrupt_bug) | |
2225 | CFI_ENDPROC | |
2226 | #endif /* !CONFIG_XEN */ | |
2227 | ||
2228 | -#ifdef CONFIG_STACK_UNWIND | |
2229 | -ENTRY(arch_unwind_init_running) | |
2230 | - CFI_STARTPROC | |
2231 | - movl 4(%esp), %edx | |
2232 | - movl (%esp), %ecx | |
2233 | - leal 4(%esp), %eax | |
2234 | - movl %ebx, EBX(%edx) | |
2235 | - xorl %ebx, %ebx | |
2236 | - movl %ebx, ECX(%edx) | |
2237 | - movl %ebx, EDX(%edx) | |
2238 | - movl %esi, ESI(%edx) | |
2239 | - movl %edi, EDI(%edx) | |
2240 | - movl %ebp, EBP(%edx) | |
2241 | - movl %ebx, EAX(%edx) | |
2242 | - movl $__USER_DS, DS(%edx) | |
2243 | - movl $__USER_DS, ES(%edx) | |
2244 | - movl %ebx, ORIG_EAX(%edx) | |
2245 | - movl %ecx, EIP(%edx) | |
2246 | - movl 12(%esp), %ecx | |
2247 | - movl $__KERNEL_CS, CS(%edx) | |
2248 | - movl %ebx, EFLAGS(%edx) | |
2249 | - movl %eax, OLDESP(%edx) | |
2250 | - movl 8(%esp), %eax | |
2251 | - movl %ecx, 8(%esp) | |
2252 | - movl EBX(%edx), %ebx | |
2253 | - movl $__KERNEL_DS, OLDSS(%edx) | |
2254 | - jmpl *%eax | |
2255 | - CFI_ENDPROC | |
2256 | -ENDPROC(arch_unwind_init_running) | |
2257 | -#endif | |
2258 | - | |
2259 | ENTRY(fixup_4gb_segment) | |
2260 | RING0_EC_FRAME | |
2261 | pushl $do_fixup_4gb_segment | |
2262 | Index: head-2008-12-01/arch/x86/kernel/head_32-xen.S | |
2263 | =================================================================== | |
2264 | --- head-2008-12-01.orig/arch/x86/kernel/head_32-xen.S 2008-12-01 11:29:05.000000000 +0100 | |
2265 | +++ head-2008-12-01/arch/x86/kernel/head_32-xen.S 2008-12-01 11:32:38.000000000 +0100 | |
2266 | @@ -9,6 +9,7 @@ | |
2267 | #include <asm/cache.h> | |
2268 | #include <asm/thread_info.h> | |
2269 | #include <asm/asm-offsets.h> | |
2270 | +#include <asm/boot.h> | |
2271 | #include <asm/dwarf2.h> | |
2272 | #include <xen/interface/xen.h> | |
2273 | #include <xen/interface/elfnote.h> | |
2274 | @@ -35,6 +36,8 @@ ENTRY(startup_32) | |
2275 | /* Set up the stack pointer */ | |
2276 | movl $(init_thread_union+THREAD_SIZE),%esp | |
2277 | ||
2278 | + call setup_pda | |
2279 | + | |
2280 | /* get vendor info */ | |
2281 | xorl %eax,%eax # call CPUID with 0 -> return vendor ID | |
2282 | XEN_CPUID | |
2283 | @@ -57,14 +60,58 @@ ENTRY(startup_32) | |
2284 | ||
2285 | movb $1,X86_HARD_MATH | |
2286 | ||
2287 | - xorl %eax,%eax # Clear FS/GS and LDT | |
2288 | + xorl %eax,%eax # Clear FS | |
2289 | movl %eax,%fs | |
2290 | - movl %eax,%gs | |
2291 | + | |
2292 | + movl $(__KERNEL_PDA),%eax | |
2293 | + mov %eax,%gs | |
2294 | + | |
2295 | cld # gcc2 wants the direction flag cleared at all times | |
2296 | ||
2297 | pushl $0 # fake return address for unwinder | |
2298 | jmp start_kernel | |
2299 | ||
2300 | +/* | |
2301 | + * Point the GDT at this CPU's PDA. This will be | |
2302 | + * cpu_gdt_table and boot_pda. | |
2303 | + */ | |
2304 | +setup_pda: | |
2305 | + /* get the PDA pointer */ | |
2306 | + movl $boot_pda, %eax | |
2307 | + | |
2308 | + /* slot the PDA address into the GDT */ | |
2309 | + mov $cpu_gdt_table, %ecx | |
2310 | + mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ | |
2311 | + shr $16, %eax | |
2312 | + mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ | |
2313 | + mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */ | |
2314 | + | |
2315 | + # %esi still points to start_info, and no registers | |
2316 | + # need to be preserved. | |
2317 | + | |
2318 | + movl XEN_START_mfn_list(%esi), %ebx | |
2319 | + movl $(cpu_gdt_table - __PAGE_OFFSET), %eax | |
2320 | + shrl $PAGE_SHIFT, %eax | |
2321 | + movl (%ebx,%eax,4), %ecx | |
2322 | + pushl %ecx # frame number for set_gdt below | |
2323 | + | |
2324 | + xorl %esi, %esi | |
2325 | + xorl %edx, %edx | |
2326 | + shldl $PAGE_SHIFT, %ecx, %edx | |
2327 | + shll $PAGE_SHIFT, %ecx | |
2328 | + orl $0x61, %ecx | |
2329 | + movl $cpu_gdt_table, %ebx | |
2330 | + movl $__HYPERVISOR_update_va_mapping, %eax | |
2331 | + int $0x82 | |
2332 | + | |
2333 | + movl $(PAGE_SIZE_asm / 8), %ecx | |
2334 | + movl %esp, %ebx | |
2335 | + movl $__HYPERVISOR_set_gdt, %eax | |
2336 | + int $0x82 | |
2337 | + | |
2338 | + popl %ecx | |
2339 | + ret | |
2340 | + | |
2341 | #define HYPERCALL_PAGE_OFFSET 0x1000 | |
2342 | .org HYPERCALL_PAGE_OFFSET | |
2343 | ENTRY(hypercall_page) | |
2344 | @@ -93,7 +140,8 @@ ENTRY(empty_zero_page) | |
2345 | /* | |
2346 | * The Global Descriptor Table contains 28 quadwords, per-CPU. | |
2347 | */ | |
2348 | - .align L1_CACHE_BYTES | |
2349 | + .section .data.page_aligned, "aw" | |
2350 | + .align PAGE_SIZE_asm | |
2351 | ENTRY(cpu_gdt_table) | |
2352 | .quad 0x0000000000000000 /* NULL descriptor */ | |
2353 | .quad 0x0000000000000000 /* 0x0b reserved */ | |
2354 | @@ -135,12 +183,13 @@ ENTRY(cpu_gdt_table) | |
2355 | .quad 0x0000000000000000 /* 0xc0 APM CS 16 code (16 bit) */ | |
2356 | .quad 0x0000000000000000 /* 0xc8 APM DS data */ | |
2357 | ||
2358 | - .quad 0x0000000000000000 /* 0xd0 - ESPFIX 16-bit SS */ | |
2359 | - .quad 0x0000000000000000 /* 0xd8 - unused */ | |
2360 | + .quad 0x0000000000000000 /* 0xd0 - ESPFIX SS */ | |
2361 | + .quad 0x00cf92000000ffff /* 0xd8 - PDA */ | |
2362 | .quad 0x0000000000000000 /* 0xe0 - unused */ | |
2363 | .quad 0x0000000000000000 /* 0xe8 - unused */ | |
2364 | .quad 0x0000000000000000 /* 0xf0 - unused */ | |
2365 | .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ | |
2366 | + .align PAGE_SIZE_asm | |
2367 | ||
2368 | #if CONFIG_XEN_COMPAT <= 0x030002 | |
2369 | /* | |
2370 | @@ -165,9 +214,9 @@ ENTRY(cpu_gdt_table) | |
2371 | .ascii ",ELF_PADDR_OFFSET=0x" | |
2372 | utoa __PAGE_OFFSET | |
2373 | .ascii ",VIRT_ENTRY=0x" | |
2374 | - utoa (__PAGE_OFFSET + __PHYSICAL_START + VIRT_ENTRY_OFFSET) | |
2375 | + utoa (__PAGE_OFFSET + LOAD_PHYSICAL_ADDR + VIRT_ENTRY_OFFSET) | |
2376 | .ascii ",HYPERCALL_PAGE=0x" | |
2377 | - utoa ((__PHYSICAL_START+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT) | |
2378 | + utoa ((LOAD_PHYSICAL_ADDR+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT) | |
2379 | .ascii ",FEATURES=writable_page_tables" | |
2380 | .ascii "|writable_descriptor_tables" | |
2381 | .ascii "|auto_translated_physmap" | |
2382 | Index: head-2008-12-01/arch/x86/kernel/io_apic_32-xen.c | |
2383 | =================================================================== | |
2384 | --- head-2008-12-01.orig/arch/x86/kernel/io_apic_32-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
2385 | +++ head-2008-12-01/arch/x86/kernel/io_apic_32-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
2386 | @@ -34,6 +34,7 @@ | |
2387 | #include <linux/pci.h> | |
2388 | #include <linux/msi.h> | |
2389 | #include <linux/htirq.h> | |
2390 | +#include <linux/freezer.h> | |
2391 | ||
2392 | #include <asm/io.h> | |
2393 | #include <asm/smp.h> | |
2394 | @@ -199,14 +200,20 @@ static struct IO_APIC_route_entry ioapic | |
2395 | * the interrupt, and we need to make sure the entry is fully populated | |
2396 | * before that happens. | |
2397 | */ | |
2398 | -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | |
2399 | +static void | |
2400 | +__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | |
2401 | { | |
2402 | - unsigned long flags; | |
2403 | union entry_union eu; | |
2404 | eu.entry = e; | |
2405 | - spin_lock_irqsave(&ioapic_lock, flags); | |
2406 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); | |
2407 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); | |
2408 | +} | |
2409 | + | |
2410 | +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | |
2411 | +{ | |
2412 | + unsigned long flags; | |
2413 | + spin_lock_irqsave(&ioapic_lock, flags); | |
2414 | + __ioapic_write_entry(apic, pin, e); | |
2415 | spin_unlock_irqrestore(&ioapic_lock, flags); | |
2416 | } | |
2417 | ||
2418 | @@ -889,8 +896,7 @@ static int __init find_isa_irq_pin(int i | |
2419 | ||
2420 | if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || | |
2421 | mp_bus_id_to_type[lbus] == MP_BUS_EISA || | |
2422 | - mp_bus_id_to_type[lbus] == MP_BUS_MCA || | |
2423 | - mp_bus_id_to_type[lbus] == MP_BUS_NEC98 | |
2424 | + mp_bus_id_to_type[lbus] == MP_BUS_MCA | |
2425 | ) && | |
2426 | (mp_irqs[i].mpc_irqtype == type) && | |
2427 | (mp_irqs[i].mpc_srcbusirq == irq)) | |
2428 | @@ -909,8 +915,7 @@ static int __init find_isa_irq_apic(int | |
2429 | ||
2430 | if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || | |
2431 | mp_bus_id_to_type[lbus] == MP_BUS_EISA || | |
2432 | - mp_bus_id_to_type[lbus] == MP_BUS_MCA || | |
2433 | - mp_bus_id_to_type[lbus] == MP_BUS_NEC98 | |
2434 | + mp_bus_id_to_type[lbus] == MP_BUS_MCA | |
2435 | ) && | |
2436 | (mp_irqs[i].mpc_irqtype == type) && | |
2437 | (mp_irqs[i].mpc_srcbusirq == irq)) | |
2438 | @@ -1043,12 +1048,6 @@ static int EISA_ELCR(unsigned int irq) | |
2439 | #define default_MCA_trigger(idx) (1) | |
2440 | #define default_MCA_polarity(idx) (0) | |
2441 | ||
2442 | -/* NEC98 interrupts are always polarity zero edge triggered, | |
2443 | - * when listed as conforming in the MP table. */ | |
2444 | - | |
2445 | -#define default_NEC98_trigger(idx) (0) | |
2446 | -#define default_NEC98_polarity(idx) (0) | |
2447 | - | |
2448 | static int __init MPBIOS_polarity(int idx) | |
2449 | { | |
2450 | int bus = mp_irqs[idx].mpc_srcbus; | |
2451 | @@ -1083,11 +1082,6 @@ static int __init MPBIOS_polarity(int id | |
2452 | polarity = default_MCA_polarity(idx); | |
2453 | break; | |
2454 | } | |
2455 | - case MP_BUS_NEC98: /* NEC 98 pin */ | |
2456 | - { | |
2457 | - polarity = default_NEC98_polarity(idx); | |
2458 | - break; | |
2459 | - } | |
2460 | default: | |
2461 | { | |
2462 | printk(KERN_WARNING "broken BIOS!!\n"); | |
2463 | @@ -1157,11 +1151,6 @@ static int MPBIOS_trigger(int idx) | |
2464 | trigger = default_MCA_trigger(idx); | |
2465 | break; | |
2466 | } | |
2467 | - case MP_BUS_NEC98: /* NEC 98 pin */ | |
2468 | - { | |
2469 | - trigger = default_NEC98_trigger(idx); | |
2470 | - break; | |
2471 | - } | |
2472 | default: | |
2473 | { | |
2474 | printk(KERN_WARNING "broken BIOS!!\n"); | |
2475 | @@ -1223,7 +1212,6 @@ static int pin_2_irq(int idx, int apic, | |
2476 | case MP_BUS_ISA: /* ISA pin */ | |
2477 | case MP_BUS_EISA: | |
2478 | case MP_BUS_MCA: | |
2479 | - case MP_BUS_NEC98: | |
2480 | { | |
2481 | irq = mp_irqs[idx].mpc_srcbusirq; | |
2482 | break; | |
2483 | @@ -1291,7 +1279,7 @@ static inline int IO_APIC_irq_trigger(in | |
2484 | } | |
2485 | ||
2486 | /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ | |
2487 | -u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */ | |
2488 | +static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */ | |
2489 | ||
2490 | static int __assign_irq_vector(int irq) | |
2491 | { | |
2492 | @@ -1417,8 +1405,8 @@ static void __init setup_IO_APIC_irqs(vo | |
2493 | if (!apic && (irq < 16)) | |
2494 | disable_8259A_irq(irq); | |
2495 | } | |
2496 | - ioapic_write_entry(apic, pin, entry); | |
2497 | spin_lock_irqsave(&ioapic_lock, flags); | |
2498 | + __ioapic_write_entry(apic, pin, entry); | |
2499 | set_native_irq_info(irq, TARGET_CPUS); | |
2500 | spin_unlock_irqrestore(&ioapic_lock, flags); | |
2501 | } | |
2502 | @@ -1988,6 +1976,15 @@ static void __init setup_ioapic_ids_from | |
2503 | #endif | |
2504 | ||
2505 | #ifndef CONFIG_XEN | |
2506 | +static int no_timer_check __initdata; | |
2507 | + | |
2508 | +static int __init notimercheck(char *s) | |
2509 | +{ | |
2510 | + no_timer_check = 1; | |
2511 | + return 1; | |
2512 | +} | |
2513 | +__setup("no_timer_check", notimercheck); | |
2514 | + | |
2515 | /* | |
2516 | * There is a nasty bug in some older SMP boards, their mptable lies | |
2517 | * about the timer IRQ. We do the following to work around the situation: | |
2518 | @@ -1996,10 +1993,13 @@ static void __init setup_ioapic_ids_from | |
2519 | * - if this function detects that timer IRQs are defunct, then we fall | |
2520 | * back to ISA timer IRQs | |
2521 | */ | |
2522 | -static int __init timer_irq_works(void) | |
2523 | +int __init timer_irq_works(void) | |
2524 | { | |
2525 | unsigned long t1 = jiffies; | |
2526 | ||
2527 | + if (no_timer_check) | |
2528 | + return 1; | |
2529 | + | |
2530 | local_irq_enable(); | |
2531 | /* Let ten ticks pass... */ | |
2532 | mdelay((10 * 1000) / HZ); | |
2533 | @@ -2226,9 +2226,15 @@ static inline void unlock_ExtINT_logic(v | |
2534 | unsigned char save_control, save_freq_select; | |
2535 | ||
2536 | pin = find_isa_irq_pin(8, mp_INT); | |
2537 | + if (pin == -1) { | |
2538 | + WARN_ON_ONCE(1); | |
2539 | + return; | |
2540 | + } | |
2541 | apic = find_isa_irq_apic(8, mp_INT); | |
2542 | - if (pin == -1) | |
2543 | + if (apic == -1) { | |
2544 | + WARN_ON_ONCE(1); | |
2545 | return; | |
2546 | + } | |
2547 | ||
2548 | entry0 = ioapic_read_entry(apic, pin); | |
2549 | clear_IO_APIC_pin(apic, pin); | |
2550 | @@ -2273,7 +2279,7 @@ int timer_uses_ioapic_pin_0; | |
2551 | * is so screwy. Thanks to Brian Perkins for testing/hacking this beast | |
2552 | * fanatically on his truly buggy board. | |
2553 | */ | |
2554 | -static inline void check_timer(void) | |
2555 | +static inline void __init check_timer(void) | |
2556 | { | |
2557 | int apic1, pin1, apic2, pin2; | |
2558 | int vector; | |
2559 | @@ -2558,7 +2564,7 @@ device_initcall(ioapic_init_sysfs); | |
2560 | int create_irq(void) | |
2561 | { | |
2562 | /* Allocate an unused irq */ | |
2563 | - int irq, new, vector; | |
2564 | + int irq, new, vector = 0; | |
2565 | unsigned long flags; | |
2566 | ||
2567 | irq = -ENOSPC; | |
2568 | @@ -2939,8 +2945,8 @@ int io_apic_set_pci_routing (int ioapic, | |
2569 | if (!ioapic && (irq < 16)) | |
2570 | disable_8259A_irq(irq); | |
2571 | ||
2572 | - ioapic_write_entry(ioapic, pin, entry); | |
2573 | spin_lock_irqsave(&ioapic_lock, flags); | |
2574 | + __ioapic_write_entry(ioapic, pin, entry); | |
2575 | set_native_irq_info(irq, TARGET_CPUS); | |
2576 | spin_unlock_irqrestore(&ioapic_lock, flags); | |
2577 | ||
2578 | Index: head-2008-12-01/arch/x86/kernel/ldt_32-xen.c | |
2579 | =================================================================== | |
2580 | --- head-2008-12-01.orig/arch/x86/kernel/ldt_32-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
2581 | +++ head-2008-12-01/arch/x86/kernel/ldt_32-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
2582 | @@ -177,16 +177,14 @@ static int read_default_ldt(void __user | |
2583 | { | |
2584 | int err; | |
2585 | unsigned long size; | |
2586 | - void *address; | |
2587 | ||
2588 | err = 0; | |
2589 | - address = &default_ldt[0]; | |
2590 | size = 5*sizeof(struct desc_struct); | |
2591 | if (size > bytecount) | |
2592 | size = bytecount; | |
2593 | ||
2594 | err = size; | |
2595 | - if (copy_to_user(ptr, address, size)) | |
2596 | + if (clear_user(ptr, size)) | |
2597 | err = -EFAULT; | |
2598 | ||
2599 | return err; | |
2600 | Index: head-2008-12-01/arch/x86/kernel/microcode-xen.c | |
2601 | =================================================================== | |
2602 | --- head-2008-12-01.orig/arch/x86/kernel/microcode-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
2603 | +++ head-2008-12-01/arch/x86/kernel/microcode-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
2604 | @@ -1,7 +1,7 @@ | |
2605 | /* | |
2606 | * Intel CPU Microcode Update Driver for Linux | |
2607 | * | |
2608 | - * Copyright (C) 2000-2004 Tigran Aivazian | |
2609 | + * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> | |
2610 | * 2006 Shaohua Li <shaohua.li@intel.com> | |
2611 | * | |
2612 | * This driver allows to upgrade microcode on Intel processors | |
2613 | @@ -43,7 +43,7 @@ | |
2614 | #include <asm/processor.h> | |
2615 | ||
2616 | MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); | |
2617 | -MODULE_AUTHOR("Tigran Aivazian <tigran@veritas.com>"); | |
2618 | +MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); | |
2619 | MODULE_LICENSE("GPL"); | |
2620 | ||
2621 | static int verbose; | |
2622 | @@ -195,7 +195,7 @@ static int __init microcode_init (void) | |
2623 | request_microcode(); | |
2624 | ||
2625 | printk(KERN_INFO | |
2626 | - "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n"); | |
2627 | + "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n"); | |
2628 | return 0; | |
2629 | } | |
2630 | ||
2631 | Index: head-2008-12-01/arch/x86/kernel/mpparse_32-xen.c | |
2632 | =================================================================== | |
2633 | --- head-2008-12-01.orig/arch/x86/kernel/mpparse_32-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
2634 | +++ head-2008-12-01/arch/x86/kernel/mpparse_32-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
2635 | @@ -36,7 +36,7 @@ | |
2636 | ||
2637 | /* Have we found an MP table */ | |
2638 | int smp_found_config; | |
2639 | -unsigned int __initdata maxcpus = NR_CPUS; | |
2640 | +unsigned int __cpuinitdata maxcpus = NR_CPUS; | |
2641 | ||
2642 | /* | |
2643 | * Various Linux-internal data structures created from the | |
2644 | @@ -102,10 +102,10 @@ static int __init mpf_checksum(unsigned | |
2645 | */ | |
2646 | ||
2647 | static int mpc_record; | |
2648 | -static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata; | |
2649 | +static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata; | |
2650 | ||
2651 | #ifndef CONFIG_XEN | |
2652 | -static void __devinit MP_processor_info (struct mpc_config_processor *m) | |
2653 | +static void __cpuinit MP_processor_info (struct mpc_config_processor *m) | |
2654 | { | |
2655 | int ver, apicid; | |
2656 | physid_mask_t phys_cpu; | |
2657 | @@ -221,7 +221,7 @@ static void __devinit MP_processor_info | |
2658 | bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; | |
2659 | } | |
2660 | #else | |
2661 | -void __init MP_processor_info (struct mpc_config_processor *m) | |
2662 | +static void __cpuinit MP_processor_info (struct mpc_config_processor *m) | |
2663 | { | |
2664 | num_processors++; | |
2665 | } | |
2666 | @@ -256,8 +256,6 @@ static void __init MP_bus_info (struct m | |
2667 | mp_current_pci_id++; | |
2668 | } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) { | |
2669 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; | |
2670 | - } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) { | |
2671 | - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98; | |
2672 | } else { | |
2673 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); | |
2674 | } | |
2675 | @@ -842,7 +840,7 @@ void __init mp_register_lapic_address(u6 | |
2676 | #endif | |
2677 | } | |
2678 | ||
2679 | -void __devinit mp_register_lapic (u8 id, u8 enabled) | |
2680 | +void __cpuinit mp_register_lapic (u8 id, u8 enabled) | |
2681 | { | |
2682 | struct mpc_config_processor processor; | |
2683 | int boot_cpu = 0; | |
2684 | Index: head-2008-12-01/arch/x86/kernel/pci-dma-xen.c | |
2685 | =================================================================== | |
2686 | --- head-2008-12-01.orig/arch/x86/kernel/pci-dma-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
2687 | +++ head-2008-12-01/arch/x86/kernel/pci-dma-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
2688 | @@ -276,7 +276,7 @@ EXPORT_SYMBOL(dma_free_coherent); | |
2689 | int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, | |
2690 | dma_addr_t device_addr, size_t size, int flags) | |
2691 | { | |
2692 | - void __iomem *mem_base; | |
2693 | + void __iomem *mem_base = NULL; | |
2694 | int pages = size >> PAGE_SHIFT; | |
2695 | int bitmap_size = (pages + 31)/32; | |
2696 | ||
2697 | @@ -293,14 +293,12 @@ int dma_declare_coherent_memory(struct d | |
2698 | if (!mem_base) | |
2699 | goto out; | |
2700 | ||
2701 | - dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); | |
2702 | + dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); | |
2703 | if (!dev->dma_mem) | |
2704 | goto out; | |
2705 | - memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem)); | |
2706 | - dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL); | |
2707 | + dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); | |
2708 | if (!dev->dma_mem->bitmap) | |
2709 | goto free1_out; | |
2710 | - memset(dev->dma_mem->bitmap, 0, bitmap_size); | |
2711 | ||
2712 | dev->dma_mem->virt_base = mem_base; | |
2713 | dev->dma_mem->device_base = device_addr; | |
2714 | @@ -315,6 +313,8 @@ int dma_declare_coherent_memory(struct d | |
2715 | free1_out: | |
2716 | kfree(dev->dma_mem->bitmap); | |
2717 | out: | |
2718 | + if (mem_base) | |
2719 | + iounmap(mem_base); | |
2720 | return 0; | |
2721 | } | |
2722 | EXPORT_SYMBOL(dma_declare_coherent_memory); | |
2723 | Index: head-2008-12-01/arch/x86/kernel/process_32-xen.c | |
2724 | =================================================================== | |
2725 | --- head-2008-12-01.orig/arch/x86/kernel/process_32-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
2726 | +++ head-2008-12-01/arch/x86/kernel/process_32-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
2727 | @@ -60,6 +60,7 @@ | |
2728 | ||
2729 | #include <asm/tlbflush.h> | |
2730 | #include <asm/cpu.h> | |
2731 | +#include <asm/pda.h> | |
2732 | ||
2733 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | |
2734 | ||
2735 | @@ -104,28 +105,24 @@ EXPORT_SYMBOL(enable_hlt); | |
2736 | */ | |
2737 | static void poll_idle (void) | |
2738 | { | |
2739 | - local_irq_enable(); | |
2740 | - | |
2741 | - asm volatile( | |
2742 | - "2:" | |
2743 | - "testl %0, %1;" | |
2744 | - "rep; nop;" | |
2745 | - "je 2b;" | |
2746 | - : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); | |
2747 | + cpu_relax(); | |
2748 | } | |
2749 | ||
2750 | static void xen_idle(void) | |
2751 | { | |
2752 | - local_irq_disable(); | |
2753 | + current_thread_info()->status &= ~TS_POLLING; | |
2754 | + /* | |
2755 | + * TS_POLLING-cleared state must be visible before we | |
2756 | + * test NEED_RESCHED: | |
2757 | + */ | |
2758 | + smp_mb(); | |
2759 | ||
2760 | - if (need_resched()) | |
2761 | + local_irq_disable(); | |
2762 | + if (!need_resched()) | |
2763 | + safe_halt(); /* enables interrupts racelessly */ | |
2764 | + else | |
2765 | local_irq_enable(); | |
2766 | - else { | |
2767 | - current_thread_info()->status &= ~TS_POLLING; | |
2768 | - smp_mb__after_clear_bit(); | |
2769 | - safe_halt(); | |
2770 | - current_thread_info()->status |= TS_POLLING; | |
2771 | - } | |
2772 | + current_thread_info()->status |= TS_POLLING; | |
2773 | } | |
2774 | #ifdef CONFIG_APM_MODULE | |
2775 | EXPORT_SYMBOL(default_idle); | |
2776 | @@ -250,8 +247,8 @@ void show_regs(struct pt_regs * regs) | |
2777 | regs->eax,regs->ebx,regs->ecx,regs->edx); | |
2778 | printk("ESI: %08lx EDI: %08lx EBP: %08lx", | |
2779 | regs->esi, regs->edi, regs->ebp); | |
2780 | - printk(" DS: %04x ES: %04x\n", | |
2781 | - 0xffff & regs->xds,0xffff & regs->xes); | |
2782 | + printk(" DS: %04x ES: %04x GS: %04x\n", | |
2783 | + 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs); | |
2784 | ||
2785 | cr0 = read_cr0(); | |
2786 | cr2 = read_cr2(); | |
2787 | @@ -282,6 +279,7 @@ int kernel_thread(int (*fn)(void *), voi | |
2788 | ||
2789 | regs.xds = __USER_DS; | |
2790 | regs.xes = __USER_DS; | |
2791 | + regs.xgs = __KERNEL_PDA; | |
2792 | regs.orig_eax = -1; | |
2793 | regs.eip = (unsigned long) kernel_thread_helper; | |
2794 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); | |
2795 | @@ -359,7 +357,6 @@ int copy_thread(int nr, unsigned long cl | |
2796 | p->thread.eip = (unsigned long) ret_from_fork; | |
2797 | ||
2798 | savesegment(fs,p->thread.fs); | |
2799 | - savesegment(gs,p->thread.gs); | |
2800 | ||
2801 | tsk = current; | |
2802 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { | |
2803 | @@ -438,7 +435,7 @@ void dump_thread(struct pt_regs * regs, | |
2804 | dump->regs.ds = regs->xds; | |
2805 | dump->regs.es = regs->xes; | |
2806 | savesegment(fs,dump->regs.fs); | |
2807 | - savesegment(gs,dump->regs.gs); | |
2808 | + dump->regs.gs = regs->xgs; | |
2809 | dump->regs.orig_eax = regs->orig_eax; | |
2810 | dump->regs.eip = regs->eip; | |
2811 | dump->regs.cs = regs->xcs; | |
2812 | @@ -635,17 +632,19 @@ struct task_struct fastcall * __switch_t | |
2813 | if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL))) | |
2814 | BUG(); | |
2815 | ||
2816 | + /* we're going to use this soon, after a few expensive things */ | |
2817 | + if (next_p->fpu_counter > 5) | |
2818 | + prefetch(&next->i387.fxsave); | |
2819 | + | |
2820 | /* | |
2821 | - * Restore %fs and %gs if needed. | |
2822 | + * Restore %fs if needed. | |
2823 | * | |
2824 | - * Glibc normally makes %fs be zero, and %gs is one of | |
2825 | - * the TLS segments. | |
2826 | + * Glibc normally makes %fs be zero. | |
2827 | */ | |
2828 | if (unlikely(next->fs)) | |
2829 | loadsegment(fs, next->fs); | |
2830 | ||
2831 | - if (next->gs) | |
2832 | - loadsegment(gs, next->gs); | |
2833 | + write_pda(pcurrent, next_p); | |
2834 | ||
2835 | /* | |
2836 | * Now maybe handle debug registers | |
2837 | @@ -655,6 +654,13 @@ struct task_struct fastcall * __switch_t | |
2838 | ||
2839 | disable_tsc(prev_p, next_p); | |
2840 | ||
2841 | + /* If the task has used fpu the last 5 timeslices, just do a full | |
2842 | + * restore of the math state immediately to avoid the trap; the | |
2843 | + * chances of needing FPU soon are obviously high now | |
2844 | + */ | |
2845 | + if (next_p->fpu_counter > 5) | |
2846 | + math_state_restore(); | |
2847 | + | |
2848 | return prev_p; | |
2849 | } | |
2850 | ||
2851 | Index: head-2008-12-01/arch/x86/kernel/quirks-xen.c | |
2852 | =================================================================== | |
2853 | --- head-2008-12-01.orig/arch/x86/kernel/quirks-xen.c 2008-01-28 12:24:19.000000000 +0100 | |
2854 | +++ head-2008-12-01/arch/x86/kernel/quirks-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
2855 | @@ -3,10 +3,12 @@ | |
2856 | */ | |
2857 | #include <linux/pci.h> | |
2858 | #include <linux/irq.h> | |
2859 | +#include <asm/pci-direct.h> | |
2860 | +#include <asm/genapic.h> | |
2861 | +#include <asm/cpu.h> | |
2862 | ||
2863 | #if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_SMP) || defined(CONFIG_XEN)) && defined(CONFIG_PCI) | |
2864 | - | |
2865 | -static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) | |
2866 | +static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev) | |
2867 | { | |
2868 | u8 config, rev; | |
2869 | u32 word; | |
2870 | @@ -14,14 +16,12 @@ static void __devinit quirk_intel_irqbal | |
2871 | /* BIOS may enable hardware IRQ balancing for | |
2872 | * E7520/E7320/E7525(revision ID 0x9 and below) | |
2873 | * based platforms. | |
2874 | - * Disable SW irqbalance/affinity on those platforms. | |
2875 | + * For those platforms, make sure that the genapic is set to 'flat' | |
2876 | */ | |
2877 | pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); | |
2878 | if (rev > 0x9) | |
2879 | return; | |
2880 | ||
2881 | - printk(KERN_INFO "Intel E7520/7320/7525 detected."); | |
2882 | - | |
2883 | /* enable access to config space*/ | |
2884 | pci_read_config_byte(dev, 0xf4, &config); | |
2885 | pci_write_config_byte(dev, 0xf4, config|0x2); | |
2886 | @@ -30,6 +30,46 @@ static void __devinit quirk_intel_irqbal | |
2887 | raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); | |
2888 | ||
2889 | if (!(word & (1 << 13))) { | |
2890 | +#ifndef CONFIG_XEN | |
2891 | +#ifdef CONFIG_X86_64 | |
2892 | + if (genapic != &apic_flat) | |
2893 | + panic("APIC mode must be flat on this system\n"); | |
2894 | +#elif defined(CONFIG_X86_GENERICARCH) | |
2895 | + if (genapic != &apic_default) | |
2896 | + panic("APIC mode must be default(flat) on this system. Use apic=default\n"); | |
2897 | +#endif | |
2898 | +#endif | |
2899 | + } | |
2900 | + | |
2901 | + /* put back the original value for config space*/ | |
2902 | + if (!(config & 0x2)) | |
2903 | + pci_write_config_byte(dev, 0xf4, config); | |
2904 | +} | |
2905 | + | |
2906 | +void __init quirk_intel_irqbalance(void) | |
2907 | +{ | |
2908 | + u8 config, rev; | |
2909 | + u32 word; | |
2910 | + | |
2911 | + /* BIOS may enable hardware IRQ balancing for | |
2912 | + * E7520/E7320/E7525(revision ID 0x9 and below) | |
2913 | + * based platforms. | |
2914 | + * Disable SW irqbalance/affinity on those platforms. | |
2915 | + */ | |
2916 | + rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION); | |
2917 | + if (rev > 0x9) | |
2918 | + return; | |
2919 | + | |
2920 | + printk(KERN_INFO "Intel E7520/7320/7525 detected."); | |
2921 | + | |
2922 | + /* enable access to config space */ | |
2923 | + config = read_pci_config_byte(0, 0, 0, 0xf4); | |
2924 | + write_pci_config_byte(0, 0, 0, 0xf4, config|0x2); | |
2925 | + | |
2926 | + /* read xTPR register */ | |
2927 | + word = read_pci_config_16(0, 0, 0x40, 0x4c); | |
2928 | + | |
2929 | + if (!(word & (1 << 13))) { | |
2930 | struct xen_platform_op op; | |
2931 | printk(KERN_INFO "Disabling irq balancing and affinity\n"); | |
2932 | op.cmd = XENPF_platform_quirk; | |
2933 | @@ -37,11 +77,12 @@ static void __devinit quirk_intel_irqbal | |
2934 | WARN_ON(HYPERVISOR_platform_op(&op)); | |
2935 | } | |
2936 | ||
2937 | - /* put back the original value for config space*/ | |
2938 | + /* put back the original value for config space */ | |
2939 | if (!(config & 0x2)) | |
2940 | - pci_write_config_byte(dev, 0xf4, config); | |
2941 | + write_pci_config_byte(0, 0, 0, 0xf4, config); | |
2942 | } | |
2943 | -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); | |
2944 | -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); | |
2945 | -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); | |
2946 | +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, verify_quirk_intel_irqbalance); | |
2947 | +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, verify_quirk_intel_irqbalance); | |
2948 | +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, verify_quirk_intel_irqbalance); | |
2949 | + | |
2950 | #endif | |
2951 | Index: head-2008-12-01/arch/x86/kernel/setup_32-xen.c | |
2952 | =================================================================== | |
2953 | --- head-2008-12-01.orig/arch/x86/kernel/setup_32-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
2954 | +++ head-2008-12-01/arch/x86/kernel/setup_32-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
2955 | @@ -76,9 +76,6 @@ | |
2956 | #include <xen/interface/kexec.h> | |
2957 | #endif | |
2958 | ||
2959 | -/* Forward Declaration. */ | |
2960 | -void __init find_max_pfn(void); | |
2961 | - | |
2962 | static int xen_panic_event(struct notifier_block *, unsigned long, void *); | |
2963 | static struct notifier_block xen_panic_block = { | |
2964 | xen_panic_event, NULL, 0 /* try to go last */ | |
2965 | @@ -89,14 +86,11 @@ int disable_pse __devinitdata = 0; | |
2966 | /* | |
2967 | * Machine setup.. | |
2968 | */ | |
2969 | - | |
2970 | -#ifdef CONFIG_EFI | |
2971 | -int efi_enabled = 0; | |
2972 | -EXPORT_SYMBOL(efi_enabled); | |
2973 | -#endif | |
2974 | +extern struct resource code_resource; | |
2975 | +extern struct resource data_resource; | |
2976 | ||
2977 | /* cpu data as detected by the assembly code in head.S */ | |
2978 | -struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; | |
2979 | +struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; | |
2980 | /* common cpu data for all cpus */ | |
2981 | struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; | |
2982 | EXPORT_SYMBOL(boot_cpu_data); | |
2983 | @@ -112,12 +106,6 @@ unsigned int machine_submodel_id; | |
2984 | unsigned int BIOS_revision; | |
2985 | unsigned int mca_pentium_flag; | |
2986 | ||
2987 | -/* For PCI or other memory-mapped resources */ | |
2988 | -unsigned long pci_mem_start = 0x10000000; | |
2989 | -#ifdef CONFIG_PCI | |
2990 | -EXPORT_SYMBOL(pci_mem_start); | |
2991 | -#endif | |
2992 | - | |
2993 | /* Boot loader ID as an integer, for the benefit of proc_dointvec */ | |
2994 | int bootloader_type; | |
2995 | ||
2996 | @@ -150,10 +138,6 @@ struct ist_info ist_info; | |
2997 | defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) | |
2998 | EXPORT_SYMBOL(ist_info); | |
2999 | #endif | |
3000 | -struct e820map e820; | |
3001 | -#ifdef CONFIG_XEN | |
3002 | -struct e820map machine_e820; | |
3003 | -#endif | |
3004 | ||
3005 | extern void early_cpu_init(void); | |
3006 | extern int root_mountflags; | |
3007 | @@ -168,209 +152,6 @@ static char command_line[COMMAND_LINE_SI | |
3008 | ||
3009 | unsigned char __initdata boot_params[PARAM_SIZE]; | |
3010 | ||
3011 | -static struct resource data_resource = { | |
3012 | - .name = "Kernel data", | |
3013 | - .start = 0, | |
3014 | - .end = 0, | |
3015 | - .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
3016 | -}; | |
3017 | - | |
3018 | -static struct resource code_resource = { | |
3019 | - .name = "Kernel code", | |
3020 | - .start = 0, | |
3021 | - .end = 0, | |
3022 | - .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
3023 | -}; | |
3024 | - | |
3025 | -static struct resource system_rom_resource = { | |
3026 | - .name = "System ROM", | |
3027 | - .start = 0xf0000, | |
3028 | - .end = 0xfffff, | |
3029 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
3030 | -}; | |
3031 | - | |
3032 | -static struct resource extension_rom_resource = { | |
3033 | - .name = "Extension ROM", | |
3034 | - .start = 0xe0000, | |
3035 | - .end = 0xeffff, | |
3036 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
3037 | -}; | |
3038 | - | |
3039 | -static struct resource adapter_rom_resources[] = { { | |
3040 | - .name = "Adapter ROM", | |
3041 | - .start = 0xc8000, | |
3042 | - .end = 0, | |
3043 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
3044 | -}, { | |
3045 | - .name = "Adapter ROM", | |
3046 | - .start = 0, | |
3047 | - .end = 0, | |
3048 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
3049 | -}, { | |
3050 | - .name = "Adapter ROM", | |
3051 | - .start = 0, | |
3052 | - .end = 0, | |
3053 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
3054 | -}, { | |
3055 | - .name = "Adapter ROM", | |
3056 | - .start = 0, | |
3057 | - .end = 0, | |
3058 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
3059 | -}, { | |
3060 | - .name = "Adapter ROM", | |
3061 | - .start = 0, | |
3062 | - .end = 0, | |
3063 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
3064 | -}, { | |
3065 | - .name = "Adapter ROM", | |
3066 | - .start = 0, | |
3067 | - .end = 0, | |
3068 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
3069 | -} }; | |
3070 | - | |
3071 | -static struct resource video_rom_resource = { | |
3072 | - .name = "Video ROM", | |
3073 | - .start = 0xc0000, | |
3074 | - .end = 0xc7fff, | |
3075 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | |
3076 | -}; | |
3077 | - | |
3078 | -static struct resource video_ram_resource = { | |
3079 | - .name = "Video RAM area", | |
3080 | - .start = 0xa0000, | |
3081 | - .end = 0xbffff, | |
3082 | - .flags = IORESOURCE_BUSY | IORESOURCE_MEM | |
3083 | -}; | |
3084 | - | |
3085 | -static struct resource standard_io_resources[] = { { | |
3086 | - .name = "dma1", | |
3087 | - .start = 0x0000, | |
3088 | - .end = 0x001f, | |
3089 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
3090 | -}, { | |
3091 | - .name = "pic1", | |
3092 | - .start = 0x0020, | |
3093 | - .end = 0x0021, | |
3094 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
3095 | -}, { | |
3096 | - .name = "timer0", | |
3097 | - .start = 0x0040, | |
3098 | - .end = 0x0043, | |
3099 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
3100 | -}, { | |
3101 | - .name = "timer1", | |
3102 | - .start = 0x0050, | |
3103 | - .end = 0x0053, | |
3104 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
3105 | -}, { | |
3106 | - .name = "keyboard", | |
3107 | - .start = 0x0060, | |
3108 | - .end = 0x006f, | |
3109 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
3110 | -}, { | |
3111 | - .name = "dma page reg", | |
3112 | - .start = 0x0080, | |
3113 | - .end = 0x008f, | |
3114 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
3115 | -}, { | |
3116 | - .name = "pic2", | |
3117 | - .start = 0x00a0, | |
3118 | - .end = 0x00a1, | |
3119 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
3120 | -}, { | |
3121 | - .name = "dma2", | |
3122 | - .start = 0x00c0, | |
3123 | - .end = 0x00df, | |
3124 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
3125 | -}, { | |
3126 | - .name = "fpu", | |
3127 | - .start = 0x00f0, | |
3128 | - .end = 0x00ff, | |
3129 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | |
3130 | -} }; | |
3131 | - | |
3132 | -#define romsignature(x) (*(unsigned short *)(x) == 0xaa55) | |
3133 | - | |
3134 | -static int __init romchecksum(unsigned char *rom, unsigned long length) | |
3135 | -{ | |
3136 | - unsigned char *p, sum = 0; | |
3137 | - | |
3138 | - for (p = rom; p < rom + length; p++) | |
3139 | - sum += *p; | |
3140 | - return sum == 0; | |
3141 | -} | |
3142 | - | |
3143 | -static void __init probe_roms(void) | |
3144 | -{ | |
3145 | - unsigned long start, length, upper; | |
3146 | - unsigned char *rom; | |
3147 | - int i; | |
3148 | - | |
3149 | -#ifdef CONFIG_XEN | |
3150 | - /* Nothing to do if not running in dom0. */ | |
3151 | - if (!is_initial_xendomain()) | |
3152 | - return; | |
3153 | -#endif | |
3154 | - | |
3155 | - /* video rom */ | |
3156 | - upper = adapter_rom_resources[0].start; | |
3157 | - for (start = video_rom_resource.start; start < upper; start += 2048) { | |
3158 | - rom = isa_bus_to_virt(start); | |
3159 | - if (!romsignature(rom)) | |
3160 | - continue; | |
3161 | - | |
3162 | - video_rom_resource.start = start; | |
3163 | - | |
3164 | - /* 0 < length <= 0x7f * 512, historically */ | |
3165 | - length = rom[2] * 512; | |
3166 | - | |
3167 | - /* if checksum okay, trust length byte */ | |
3168 | - if (length && romchecksum(rom, length)) | |
3169 | - video_rom_resource.end = start + length - 1; | |
3170 | - | |
3171 | - request_resource(&iomem_resource, &video_rom_resource); | |
3172 | - break; | |
3173 | - } | |
3174 | - | |
3175 | - start = (video_rom_resource.end + 1 + 2047) & ~2047UL; | |
3176 | - if (start < upper) | |
3177 | - start = upper; | |
3178 | - | |
3179 | - /* system rom */ | |
3180 | - request_resource(&iomem_resource, &system_rom_resource); | |
3181 | - upper = system_rom_resource.start; | |
3182 | - | |
3183 | - /* check for extension rom (ignore length byte!) */ | |
3184 | - rom = isa_bus_to_virt(extension_rom_resource.start); | |
3185 | - if (romsignature(rom)) { | |
3186 | - length = extension_rom_resource.end - extension_rom_resource.start + 1; | |
3187 | - if (romchecksum(rom, length)) { | |
3188 | - request_resource(&iomem_resource, &extension_rom_resource); | |
3189 | - upper = extension_rom_resource.start; | |
3190 | - } | |
3191 | - } | |
3192 | - | |
3193 | - /* check for adapter roms on 2k boundaries */ | |
3194 | - for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { | |
3195 | - rom = isa_bus_to_virt(start); | |
3196 | - if (!romsignature(rom)) | |
3197 | - continue; | |
3198 | - | |
3199 | - /* 0 < length <= 0x7f * 512, historically */ | |
3200 | - length = rom[2] * 512; | |
3201 | - | |
3202 | - /* but accept any length that fits if checksum okay */ | |
3203 | - if (!length || start + length > upper || !romchecksum(rom, length)) | |
3204 | - continue; | |
3205 | - | |
3206 | - adapter_rom_resources[i].start = start; | |
3207 | - adapter_rom_resources[i].end = start + length - 1; | |
3208 | - request_resource(&iomem_resource, &adapter_rom_resources[i]); | |
3209 | - | |
3210 | - start = adapter_rom_resources[i++].end & ~2047UL; | |
3211 | - } | |
3212 | -} | |
3213 | - | |
3214 | /* | |
3215 | * Point at the empty zero page to start with. We map the real shared_info | |
3216 | * page as soon as fixmap is up and running. | |
3217 | @@ -386,353 +167,6 @@ EXPORT_SYMBOL(phys_to_machine_mapping); | |
3218 | start_info_t *xen_start_info; | |
3219 | EXPORT_SYMBOL(xen_start_info); | |
3220 | ||
3221 | -void __init add_memory_region(unsigned long long start, | |
3222 | - unsigned long long size, int type) | |
3223 | -{ | |
3224 | - int x; | |
3225 | - | |
3226 | - if (!efi_enabled) { | |
3227 | - x = e820.nr_map; | |
3228 | - | |
3229 | - if (x == E820MAX) { | |
3230 | - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); | |
3231 | - return; | |
3232 | - } | |
3233 | - | |
3234 | - e820.map[x].addr = start; | |
3235 | - e820.map[x].size = size; | |
3236 | - e820.map[x].type = type; | |
3237 | - e820.nr_map++; | |
3238 | - } | |
3239 | -} /* add_memory_region */ | |
3240 | - | |
3241 | -static void __init limit_regions(unsigned long long size) | |
3242 | -{ | |
3243 | - unsigned long long current_addr = 0; | |
3244 | - int i; | |
3245 | - | |
3246 | - if (efi_enabled) { | |
3247 | - efi_memory_desc_t *md; | |
3248 | - void *p; | |
3249 | - | |
3250 | - for (p = memmap.map, i = 0; p < memmap.map_end; | |
3251 | - p += memmap.desc_size, i++) { | |
3252 | - md = p; | |
3253 | - current_addr = md->phys_addr + (md->num_pages << 12); | |
3254 | - if (md->type == EFI_CONVENTIONAL_MEMORY) { | |
3255 | - if (current_addr >= size) { | |
3256 | - md->num_pages -= | |
3257 | - (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT); | |
3258 | - memmap.nr_map = i + 1; | |
3259 | - return; | |
3260 | - } | |
3261 | - } | |
3262 | - } | |
3263 | - } | |
3264 | - for (i = 0; i < e820.nr_map; i++) { | |
3265 | - current_addr = e820.map[i].addr + e820.map[i].size; | |
3266 | - if (current_addr < size) | |
3267 | - continue; | |
3268 | - | |
3269 | - if (e820.map[i].type != E820_RAM) | |
3270 | - continue; | |
3271 | - | |
3272 | - if (e820.map[i].addr >= size) { | |
3273 | - /* | |
3274 | - * This region starts past the end of the | |
3275 | - * requested size, skip it completely. | |
3276 | - */ | |
3277 | - e820.nr_map = i; | |
3278 | - } else { | |
3279 | - e820.nr_map = i + 1; | |
3280 | - e820.map[i].size -= current_addr - size; | |
3281 | - } | |
3282 | - return; | |
3283 | - } | |
3284 | -#ifdef CONFIG_XEN | |
3285 | - if (i==e820.nr_map && current_addr < size) { | |
3286 | - /* | |
3287 | - * The e820 map finished before our requested size so | |
3288 | - * extend the final entry to the requested address. | |
3289 | - */ | |
3290 | - --i; | |
3291 | - if (e820.map[i].type == E820_RAM) | |
3292 | - e820.map[i].size -= current_addr - size; | |
3293 | - else | |
3294 | - add_memory_region(current_addr, size - current_addr, E820_RAM); | |
3295 | - } | |
3296 | -#endif | |
3297 | -} | |
3298 | - | |
3299 | -#define E820_DEBUG 1 | |
3300 | - | |
3301 | -static void __init print_memory_map(char *who) | |
3302 | -{ | |
3303 | - int i; | |
3304 | - | |
3305 | - for (i = 0; i < e820.nr_map; i++) { | |
3306 | - printk(" %s: %016Lx - %016Lx ", who, | |
3307 | - e820.map[i].addr, | |
3308 | - e820.map[i].addr + e820.map[i].size); | |
3309 | - switch (e820.map[i].type) { | |
3310 | - case E820_RAM: printk("(usable)\n"); | |
3311 | - break; | |
3312 | - case E820_RESERVED: | |
3313 | - printk("(reserved)\n"); | |
3314 | - break; | |
3315 | - case E820_ACPI: | |
3316 | - printk("(ACPI data)\n"); | |
3317 | - break; | |
3318 | - case E820_NVS: | |
3319 | - printk("(ACPI NVS)\n"); | |
3320 | - break; | |
3321 | - default: printk("type %lu\n", e820.map[i].type); | |
3322 | - break; | |
3323 | - } | |
3324 | - } | |
3325 | -} | |
3326 | - | |
3327 | -/* | |
3328 | - * Sanitize the BIOS e820 map. | |
3329 | - * | |
3330 | - * Some e820 responses include overlapping entries. The following | |
3331 | - * replaces the original e820 map with a new one, removing overlaps. | |
3332 | - * | |
3333 | - */ | |
3334 | -struct change_member { | |
3335 | - struct e820entry *pbios; /* pointer to original bios entry */ | |
3336 | - unsigned long long addr; /* address for this change point */ | |
3337 | -}; | |
3338 | -static struct change_member change_point_list[2*E820MAX] __initdata; | |
3339 | -static struct change_member *change_point[2*E820MAX] __initdata; | |
3340 | -static struct e820entry *overlap_list[E820MAX] __initdata; | |
3341 | -static struct e820entry new_bios[E820MAX] __initdata; | |
3342 | - | |
3343 | -int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | |
3344 | -{ | |
3345 | - struct change_member *change_tmp; | |
3346 | - unsigned long current_type, last_type; | |
3347 | - unsigned long long last_addr; | |
3348 | - int chgidx, still_changing; | |
3349 | - int overlap_entries; | |
3350 | - int new_bios_entry; | |
3351 | - int old_nr, new_nr, chg_nr; | |
3352 | - int i; | |
3353 | - | |
3354 | - /* | |
3355 | - Visually we're performing the following (1,2,3,4 = memory types)... | |
3356 | - | |
3357 | - Sample memory map (w/overlaps): | |
3358 | - ____22__________________ | |
3359 | - ______________________4_ | |
3360 | - ____1111________________ | |
3361 | - _44_____________________ | |
3362 | - 11111111________________ | |
3363 | - ____________________33__ | |
3364 | - ___________44___________ | |
3365 | - __________33333_________ | |
3366 | - ______________22________ | |
3367 | - ___________________2222_ | |
3368 | - _________111111111______ | |
3369 | - _____________________11_ | |
3370 | - _________________4______ | |
3371 | - | |
3372 | - Sanitized equivalent (no overlap): | |
3373 | - 1_______________________ | |
3374 | - _44_____________________ | |
3375 | - ___1____________________ | |
3376 | - ____22__________________ | |
3377 | - ______11________________ | |
3378 | - _________1______________ | |
3379 | - __________3_____________ | |
3380 | - ___________44___________ | |
3381 | - _____________33_________ | |
3382 | - _______________2________ | |
3383 | - ________________1_______ | |
3384 | - _________________4______ | |
3385 | - ___________________2____ | |
3386 | - ____________________33__ | |
3387 | - ______________________4_ | |
3388 | - */ | |
3389 | - | |
3390 | - /* if there's only one memory region, don't bother */ | |
3391 | - if (*pnr_map < 2) | |
3392 | - return -1; | |
3393 | - | |
3394 | - old_nr = *pnr_map; | |
3395 | - | |
3396 | - /* bail out if we find any unreasonable addresses in bios map */ | |
3397 | - for (i=0; i<old_nr; i++) | |
3398 | - if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) | |
3399 | - return -1; | |
3400 | - | |
3401 | - /* create pointers for initial change-point information (for sorting) */ | |
3402 | - for (i=0; i < 2*old_nr; i++) | |
3403 | - change_point[i] = &change_point_list[i]; | |
3404 | - | |
3405 | - /* record all known change-points (starting and ending addresses), | |
3406 | - omitting those that are for empty memory regions */ | |
3407 | - chgidx = 0; | |
3408 | - for (i=0; i < old_nr; i++) { | |
3409 | - if (biosmap[i].size != 0) { | |
3410 | - change_point[chgidx]->addr = biosmap[i].addr; | |
3411 | - change_point[chgidx++]->pbios = &biosmap[i]; | |
3412 | - change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; | |
3413 | - change_point[chgidx++]->pbios = &biosmap[i]; | |
3414 | - } | |
3415 | - } | |
3416 | - chg_nr = chgidx; /* true number of change-points */ | |
3417 | - | |
3418 | - /* sort change-point list by memory addresses (low -> high) */ | |
3419 | - still_changing = 1; | |
3420 | - while (still_changing) { | |
3421 | - still_changing = 0; | |
3422 | - for (i=1; i < chg_nr; i++) { | |
3423 | - /* if <current_addr> > <last_addr>, swap */ | |
3424 | - /* or, if current=<start_addr> & last=<end_addr>, swap */ | |
3425 | - if ((change_point[i]->addr < change_point[i-1]->addr) || | |
3426 | - ((change_point[i]->addr == change_point[i-1]->addr) && | |
3427 | - (change_point[i]->addr == change_point[i]->pbios->addr) && | |
3428 | - (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) | |
3429 | - ) | |
3430 | - { | |
3431 | - change_tmp = change_point[i]; | |
3432 | - change_point[i] = change_point[i-1]; | |
3433 | - change_point[i-1] = change_tmp; | |
3434 | - still_changing=1; | |
3435 | - } | |
3436 | - } | |
3437 | - } | |
3438 | - | |
3439 | - /* create a new bios memory map, removing overlaps */ | |
3440 | - overlap_entries=0; /* number of entries in the overlap table */ | |
3441 | - new_bios_entry=0; /* index for creating new bios map entries */ | |
3442 | - last_type = 0; /* start with undefined memory type */ | |
3443 | - last_addr = 0; /* start with 0 as last starting address */ | |
3444 | - /* loop through change-points, determining affect on the new bios map */ | |
3445 | - for (chgidx=0; chgidx < chg_nr; chgidx++) | |
3446 | - { | |
3447 | - /* keep track of all overlapping bios entries */ | |
3448 | - if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) | |
3449 | - { | |
3450 | - /* add map entry to overlap list (> 1 entry implies an overlap) */ | |
3451 | - overlap_list[overlap_entries++]=change_point[chgidx]->pbios; | |
3452 | - } | |
3453 | - else | |
3454 | - { | |
3455 | - /* remove entry from list (order independent, so swap with last) */ | |
3456 | - for (i=0; i<overlap_entries; i++) | |
3457 | - { | |
3458 | - if (overlap_list[i] == change_point[chgidx]->pbios) | |
3459 | - overlap_list[i] = overlap_list[overlap_entries-1]; | |
3460 | - } | |
3461 | - overlap_entries--; | |
3462 | - } | |
3463 | - /* if there are overlapping entries, decide which "type" to use */ | |
3464 | - /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ | |
3465 | - current_type = 0; | |
3466 | - for (i=0; i<overlap_entries; i++) | |
3467 | - if (overlap_list[i]->type > current_type) | |
3468 | - current_type = overlap_list[i]->type; | |
3469 | - /* continue building up new bios map based on this information */ | |
3470 | - if (current_type != last_type) { | |
3471 | - if (last_type != 0) { | |
3472 | - new_bios[new_bios_entry].size = | |
3473 | - change_point[chgidx]->addr - last_addr; | |
3474 | - /* move forward only if the new size was non-zero */ | |
3475 | - if (new_bios[new_bios_entry].size != 0) | |
3476 | - if (++new_bios_entry >= E820MAX) | |
3477 | - break; /* no more space left for new bios entries */ | |
3478 | - } | |
3479 | - if (current_type != 0) { | |
3480 | - new_bios[new_bios_entry].addr = change_point[chgidx]->addr; | |
3481 | - new_bios[new_bios_entry].type = current_type; | |
3482 | - last_addr=change_point[chgidx]->addr; | |
3483 | - } | |
3484 | - last_type = current_type; | |
3485 | - } | |
3486 | - } | |
3487 | - new_nr = new_bios_entry; /* retain count for new bios entries */ | |
3488 | - | |
3489 | - /* copy new bios mapping into original location */ | |
3490 | - memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); | |
3491 | - *pnr_map = new_nr; | |
3492 | - | |
3493 | - return 0; | |
3494 | -} | |
3495 | - | |
3496 | -/* | |
3497 | - * Copy the BIOS e820 map into a safe place. | |
3498 | - * | |
3499 | - * Sanity-check it while we're at it.. | |
3500 | - * | |
3501 | - * If we're lucky and live on a modern system, the setup code | |
3502 | - * will have given us a memory map that we can use to properly | |
3503 | - * set up memory. If we aren't, we'll fake a memory map. | |
3504 | - * | |
3505 | - * We check to see that the memory map contains at least 2 elements | |
3506 | - * before we'll use it, because the detection code in setup.S may | |
3507 | - * not be perfect and most every PC known to man has two memory | |
3508 | - * regions: one from 0 to 640k, and one from 1mb up. (The IBM | |
3509 | - * thinkpad 560x, for example, does not cooperate with the memory | |
3510 | - * detection code.) | |
3511 | - */ | |
3512 | -int __init copy_e820_map(struct e820entry * biosmap, int nr_map) | |
3513 | -{ | |
3514 | -#ifndef CONFIG_XEN | |
3515 | - /* Only one memory region (or negative)? Ignore it */ | |
3516 | - if (nr_map < 2) | |
3517 | - return -1; | |
3518 | -#else | |
3519 | - BUG_ON(nr_map < 1); | |
3520 | -#endif | |
3521 | - | |
3522 | - do { | |
3523 | - unsigned long long start = biosmap->addr; | |
3524 | - unsigned long long size = biosmap->size; | |
3525 | - unsigned long long end = start + size; | |
3526 | - unsigned long type = biosmap->type; | |
3527 | - | |
3528 | - /* Overflow in 64 bits? Ignore the memory map. */ | |
3529 | - if (start > end) | |
3530 | - return -1; | |
3531 | - | |
3532 | -#ifndef CONFIG_XEN | |
3533 | - /* | |
3534 | - * Some BIOSes claim RAM in the 640k - 1M region. | |
3535 | - * Not right. Fix it up. | |
3536 | - */ | |
3537 | - if (type == E820_RAM) { | |
3538 | - if (start < 0x100000ULL && end > 0xA0000ULL) { | |
3539 | - if (start < 0xA0000ULL) | |
3540 | - add_memory_region(start, 0xA0000ULL-start, type); | |
3541 | - if (end <= 0x100000ULL) | |
3542 | - continue; | |
3543 | - start = 0x100000ULL; | |
3544 | - size = end - start; | |
3545 | - } | |
3546 | - } | |
3547 | -#endif | |
3548 | - add_memory_region(start, size, type); | |
3549 | - } while (biosmap++,--nr_map); | |
3550 | - | |
3551 | -#ifdef CONFIG_XEN | |
3552 | - if (is_initial_xendomain()) { | |
3553 | - struct xen_memory_map memmap; | |
3554 | - | |
3555 | - memmap.nr_entries = E820MAX; | |
3556 | - set_xen_guest_handle(memmap.buffer, machine_e820.map); | |
3557 | - | |
3558 | - if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)) | |
3559 | - BUG(); | |
3560 | - machine_e820.nr_map = memmap.nr_entries; | |
3561 | - } else | |
3562 | - machine_e820 = e820; | |
3563 | -#endif | |
3564 | - | |
3565 | - return 0; | |
3566 | -} | |
3567 | - | |
3568 | #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) | |
3569 | struct edd edd; | |
3570 | #ifdef CONFIG_EDD_MODULE | |
3571 | @@ -758,7 +192,7 @@ static inline void copy_edd(void) | |
3572 | } | |
3573 | #endif | |
3574 | ||
3575 | -static int __initdata user_defined_memmap = 0; | |
3576 | +int __initdata user_defined_memmap = 0; | |
3577 | ||
3578 | /* | |
3579 | * "mem=nopentium" disables the 4MB page tables. | |
3580 | @@ -795,51 +229,6 @@ static int __init parse_mem(char *arg) | |
3581 | } | |
3582 | early_param("mem", parse_mem); | |
3583 | ||
3584 | -static int __init parse_memmap(char *arg) | |
3585 | -{ | |
3586 | - if (!arg) | |
3587 | - return -EINVAL; | |
3588 | - | |
3589 | - if (strcmp(arg, "exactmap") == 0) { | |
3590 | -#ifdef CONFIG_CRASH_DUMP | |
3591 | - /* If we are doing a crash dump, we | |
3592 | - * still need to know the real mem | |
3593 | - * size before original memory map is | |
3594 | - * reset. | |
3595 | - */ | |
3596 | - find_max_pfn(); | |
3597 | - saved_max_pfn = max_pfn; | |
3598 | -#endif | |
3599 | - e820.nr_map = 0; | |
3600 | - user_defined_memmap = 1; | |
3601 | - } else { | |
3602 | - /* If the user specifies memory size, we | |
3603 | - * limit the BIOS-provided memory map to | |
3604 | - * that size. exactmap can be used to specify | |
3605 | - * the exact map. mem=number can be used to | |
3606 | - * trim the existing memory map. | |
3607 | - */ | |
3608 | - unsigned long long start_at, mem_size; | |
3609 | - | |
3610 | - mem_size = memparse(arg, &arg); | |
3611 | - if (*arg == '@') { | |
3612 | - start_at = memparse(arg+1, &arg); | |
3613 | - add_memory_region(start_at, mem_size, E820_RAM); | |
3614 | - } else if (*arg == '#') { | |
3615 | - start_at = memparse(arg+1, &arg); | |
3616 | - add_memory_region(start_at, mem_size, E820_ACPI); | |
3617 | - } else if (*arg == '$') { | |
3618 | - start_at = memparse(arg+1, &arg); | |
3619 | - add_memory_region(start_at, mem_size, E820_RESERVED); | |
3620 | - } else { | |
3621 | - limit_regions(mem_size); | |
3622 | - user_defined_memmap = 1; | |
3623 | - } | |
3624 | - } | |
3625 | - return 0; | |
3626 | -} | |
3627 | -early_param("memmap", parse_memmap); | |
3628 | - | |
3629 | #ifdef CONFIG_PROC_VMCORE | |
3630 | /* elfcorehdr= specifies the location of elf core header | |
3631 | * stored by the crashed kernel. | |
3632 | @@ -906,127 +295,6 @@ early_param("reservetop", parse_reservet | |
3633 | #endif | |
3634 | ||
3635 | /* | |
3636 | - * Callback for efi_memory_walk. | |
3637 | - */ | |
3638 | -static int __init | |
3639 | -efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) | |
3640 | -{ | |
3641 | - unsigned long *max_pfn = arg, pfn; | |
3642 | - | |
3643 | - if (start < end) { | |
3644 | - pfn = PFN_UP(end -1); | |
3645 | - if (pfn > *max_pfn) | |
3646 | - *max_pfn = pfn; | |
3647 | - } | |
3648 | - return 0; | |
3649 | -} | |
3650 | - | |
3651 | -static int __init | |
3652 | -efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) | |
3653 | -{ | |
3654 | - memory_present(0, PFN_UP(start), PFN_DOWN(end)); | |
3655 | - return 0; | |
3656 | -} | |
3657 | - | |
3658 | -/* | |
3659 | - * This function checks if any part of the range <start,end> is mapped | |
3660 | - * with type. | |
3661 | - */ | |
3662 | -int | |
3663 | -e820_any_mapped(u64 start, u64 end, unsigned type) | |
3664 | -{ | |
3665 | - int i; | |
3666 | - | |
3667 | -#ifndef CONFIG_XEN | |
3668 | - for (i = 0; i < e820.nr_map; i++) { | |
3669 | - const struct e820entry *ei = &e820.map[i]; | |
3670 | -#else | |
3671 | - if (!is_initial_xendomain()) | |
3672 | - return 0; | |
3673 | - for (i = 0; i < machine_e820.nr_map; ++i) { | |
3674 | - const struct e820entry *ei = &machine_e820.map[i]; | |
3675 | -#endif | |
3676 | - | |
3677 | - if (type && ei->type != type) | |
3678 | - continue; | |
3679 | - if (ei->addr >= end || ei->addr + ei->size <= start) | |
3680 | - continue; | |
3681 | - return 1; | |
3682 | - } | |
3683 | - return 0; | |
3684 | -} | |
3685 | -EXPORT_SYMBOL_GPL(e820_any_mapped); | |
3686 | - | |
3687 | - /* | |
3688 | - * This function checks if the entire range <start,end> is mapped with type. | |
3689 | - * | |
3690 | - * Note: this function only works correct if the e820 table is sorted and | |
3691 | - * not-overlapping, which is the case | |
3692 | - */ | |
3693 | -int __init | |
3694 | -e820_all_mapped(unsigned long s, unsigned long e, unsigned type) | |
3695 | -{ | |
3696 | - u64 start = s; | |
3697 | - u64 end = e; | |
3698 | - int i; | |
3699 | - | |
3700 | -#ifndef CONFIG_XEN | |
3701 | - for (i = 0; i < e820.nr_map; i++) { | |
3702 | - struct e820entry *ei = &e820.map[i]; | |
3703 | -#else | |
3704 | - if (!is_initial_xendomain()) | |
3705 | - return 0; | |
3706 | - for (i = 0; i < machine_e820.nr_map; ++i) { | |
3707 | - const struct e820entry *ei = &machine_e820.map[i]; | |
3708 | -#endif | |
3709 | - if (type && ei->type != type) | |
3710 | - continue; | |
3711 | - /* is the region (part) in overlap with the current region ?*/ | |
3712 | - if (ei->addr >= end || ei->addr + ei->size <= start) | |
3713 | - continue; | |
3714 | - /* if the region is at the beginning of <start,end> we move | |
3715 | - * start to the end of the region since it's ok until there | |
3716 | - */ | |
3717 | - if (ei->addr <= start) | |
3718 | - start = ei->addr + ei->size; | |
3719 | - /* if start is now at or beyond end, we're done, full | |
3720 | - * coverage */ | |
3721 | - if (start >= end) | |
3722 | - return 1; /* we're done */ | |
3723 | - } | |
3724 | - return 0; | |
3725 | -} | |
3726 | - | |
3727 | -/* | |
3728 | - * Find the highest page frame number we have available | |
3729 | - */ | |
3730 | -void __init find_max_pfn(void) | |
3731 | -{ | |
3732 | - int i; | |
3733 | - | |
3734 | - max_pfn = 0; | |
3735 | - if (efi_enabled) { | |
3736 | - efi_memmap_walk(efi_find_max_pfn, &max_pfn); | |
3737 | - efi_memmap_walk(efi_memory_present_wrapper, NULL); | |
3738 | - return; | |
3739 | - } | |
3740 | - | |
3741 | - for (i = 0; i < e820.nr_map; i++) { | |
3742 | - unsigned long start, end; | |
3743 | - /* RAM? */ | |
3744 | - if (e820.map[i].type != E820_RAM) | |
3745 | - continue; | |
3746 | - start = PFN_UP(e820.map[i].addr); | |
3747 | - end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); | |
3748 | - if (start >= end) | |
3749 | - continue; | |
3750 | - if (end > max_pfn) | |
3751 | - max_pfn = end; | |
3752 | - memory_present(0, start, end); | |
3753 | - } | |
3754 | -} | |
3755 | - | |
3756 | -/* | |
3757 | * Determine low and high memory ranges: | |
3758 | */ | |
3759 | unsigned long __init find_max_low_pfn(void) | |
3760 | @@ -1085,77 +353,6 @@ unsigned long __init find_max_low_pfn(vo | |
3761 | return max_low_pfn; | |
3762 | } | |
3763 | ||
3764 | -/* | |
3765 | - * Free all available memory for boot time allocation. Used | |
3766 | - * as a callback function by efi_memory_walk() | |
3767 | - */ | |
3768 | - | |
3769 | -static int __init | |
3770 | -free_available_memory(unsigned long start, unsigned long end, void *arg) | |
3771 | -{ | |
3772 | - /* check max_low_pfn */ | |
3773 | - if (start >= (max_low_pfn << PAGE_SHIFT)) | |
3774 | - return 0; | |
3775 | - if (end >= (max_low_pfn << PAGE_SHIFT)) | |
3776 | - end = max_low_pfn << PAGE_SHIFT; | |
3777 | - if (start < end) | |
3778 | - free_bootmem(start, end - start); | |
3779 | - | |
3780 | - return 0; | |
3781 | -} | |
3782 | -/* | |
3783 | - * Register fully available low RAM pages with the bootmem allocator. | |
3784 | - */ | |
3785 | -static void __init register_bootmem_low_pages(unsigned long max_low_pfn) | |
3786 | -{ | |
3787 | - int i; | |
3788 | - | |
3789 | - if (efi_enabled) { | |
3790 | - efi_memmap_walk(free_available_memory, NULL); | |
3791 | - return; | |
3792 | - } | |
3793 | - for (i = 0; i < e820.nr_map; i++) { | |
3794 | - unsigned long curr_pfn, last_pfn, size; | |
3795 | - /* | |
3796 | - * Reserve usable low memory | |
3797 | - */ | |
3798 | - if (e820.map[i].type != E820_RAM) | |
3799 | - continue; | |
3800 | - /* | |
3801 | - * We are rounding up the start address of usable memory: | |
3802 | - */ | |
3803 | - curr_pfn = PFN_UP(e820.map[i].addr); | |
3804 | - if (curr_pfn >= max_low_pfn) | |
3805 | - continue; | |
3806 | - /* | |
3807 | - * ... and at the end of the usable range downwards: | |
3808 | - */ | |
3809 | - last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); | |
3810 | - | |
3811 | -#ifdef CONFIG_XEN | |
3812 | - /* | |
3813 | - * Truncate to the number of actual pages currently | |
3814 | - * present. | |
3815 | - */ | |
3816 | - if (last_pfn > xen_start_info->nr_pages) | |
3817 | - last_pfn = xen_start_info->nr_pages; | |
3818 | -#endif | |
3819 | - | |
3820 | - if (last_pfn > max_low_pfn) | |
3821 | - last_pfn = max_low_pfn; | |
3822 | - | |
3823 | - /* | |
3824 | - * .. finally, did all the rounding and playing | |
3825 | - * around just make the area go away? | |
3826 | - */ | |
3827 | - if (last_pfn <= curr_pfn) | |
3828 | - continue; | |
3829 | - | |
3830 | - size = last_pfn - curr_pfn; | |
3831 | - free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); | |
3832 | - } | |
3833 | -} | |
3834 | - | |
3835 | #ifndef CONFIG_XEN | |
3836 | /* | |
3837 | * workaround for Dell systems that neglect to reserve EBDA | |
3838 | @@ -1245,8 +442,8 @@ void __init setup_bootmem_allocator(void | |
3839 | * the (very unlikely) case of us accidentally initializing the | |
3840 | * bootmem allocator with an invalid RAM area. | |
3841 | */ | |
3842 | - reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) + | |
3843 | - bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START)); | |
3844 | + reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) + | |
3845 | + bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text)); | |
3846 | ||
3847 | #ifndef CONFIG_XEN | |
3848 | /* | |
3849 | @@ -1328,160 +525,6 @@ void __init remapped_pgdat_init(void) | |
3850 | } | |
3851 | } | |
3852 | ||
3853 | -/* | |
3854 | - * Request address space for all standard RAM and ROM resources | |
3855 | - * and also for regions reported as reserved by the e820. | |
3856 | - */ | |
3857 | -static void __init | |
3858 | -legacy_init_iomem_resources(struct e820entry *e820, int nr_map, | |
3859 | - struct resource *code_resource, | |
3860 | - struct resource *data_resource) | |
3861 | -{ | |
3862 | - int i; | |
3863 | - | |
3864 | - probe_roms(); | |
3865 | - | |
3866 | - for (i = 0; i < nr_map; i++) { | |
3867 | - struct resource *res; | |
3868 | -#ifndef CONFIG_RESOURCES_64BIT | |
3869 | - if (e820[i].addr + e820[i].size > 0x100000000ULL) | |
3870 | - continue; | |
3871 | -#endif | |
3872 | - res = kzalloc(sizeof(struct resource), GFP_ATOMIC); | |
3873 | - switch (e820[i].type) { | |
3874 | - case E820_RAM: res->name = "System RAM"; break; | |
3875 | - case E820_ACPI: res->name = "ACPI Tables"; break; | |
3876 | - case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; | |
3877 | - default: res->name = "reserved"; | |
3878 | - } | |
3879 | - res->start = e820[i].addr; | |
3880 | - res->end = res->start + e820[i].size - 1; | |
3881 | - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; | |
3882 | - if (request_resource(&iomem_resource, res)) { | |
3883 | - kfree(res); | |
3884 | - continue; | |
3885 | - } | |
3886 | - if (e820[i].type == E820_RAM) { | |
3887 | - /* | |
3888 | - * We don't know which RAM region contains kernel data, | |
3889 | - * so we try it repeatedly and let the resource manager | |
3890 | - * test it. | |
3891 | - */ | |
3892 | -#ifndef CONFIG_XEN | |
3893 | - request_resource(res, code_resource); | |
3894 | - request_resource(res, data_resource); | |
3895 | -#endif | |
3896 | -#ifdef CONFIG_KEXEC | |
3897 | - if (crashk_res.start != crashk_res.end) | |
3898 | - request_resource(res, &crashk_res); | |
3899 | -#ifdef CONFIG_XEN | |
3900 | - xen_machine_kexec_register_resources(res); | |
3901 | -#endif | |
3902 | -#endif | |
3903 | - } | |
3904 | - } | |
3905 | -} | |
3906 | - | |
3907 | -/* | |
3908 | - * Locate a unused range of the physical address space below 4G which | |
3909 | - * can be used for PCI mappings. | |
3910 | - */ | |
3911 | -static void __init | |
3912 | -e820_setup_gap(struct e820entry *e820, int nr_map) | |
3913 | -{ | |
3914 | - unsigned long gapstart, gapsize, round; | |
3915 | - unsigned long long last; | |
3916 | - int i; | |
3917 | - | |
3918 | - /* | |
3919 | - * Search for the bigest gap in the low 32 bits of the e820 | |
3920 | - * memory space. | |
3921 | - */ | |
3922 | - last = 0x100000000ull; | |
3923 | - gapstart = 0x10000000; | |
3924 | - gapsize = 0x400000; | |
3925 | - i = nr_map; | |
3926 | - while (--i >= 0) { | |
3927 | - unsigned long long start = e820[i].addr; | |
3928 | - unsigned long long end = start + e820[i].size; | |
3929 | - | |
3930 | - /* | |
3931 | - * Since "last" is at most 4GB, we know we'll | |
3932 | - * fit in 32 bits if this condition is true | |
3933 | - */ | |
3934 | - if (last > end) { | |
3935 | - unsigned long gap = last - end; | |
3936 | - | |
3937 | - if (gap > gapsize) { | |
3938 | - gapsize = gap; | |
3939 | - gapstart = end; | |
3940 | - } | |
3941 | - } | |
3942 | - if (start < last) | |
3943 | - last = start; | |
3944 | - } | |
3945 | - | |
3946 | - /* | |
3947 | - * See how much we want to round up: start off with | |
3948 | - * rounding to the next 1MB area. | |
3949 | - */ | |
3950 | - round = 0x100000; | |
3951 | - while ((gapsize >> 4) > round) | |
3952 | - round += round; | |
3953 | - /* Fun with two's complement */ | |
3954 | - pci_mem_start = (gapstart + round) & -round; | |
3955 | - | |
3956 | - printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", | |
3957 | - pci_mem_start, gapstart, gapsize); | |
3958 | -} | |
3959 | - | |
3960 | -/* | |
3961 | - * Request address space for all standard resources | |
3962 | - * | |
3963 | - * This is called just before pcibios_init(), which is also a | |
3964 | - * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). | |
3965 | - */ | |
3966 | -static int __init request_standard_resources(void) | |
3967 | -{ | |
3968 | - int i; | |
3969 | - | |
3970 | - /* Nothing to do if not running in dom0. */ | |
3971 | - if (!is_initial_xendomain()) | |
3972 | - return 0; | |
3973 | - | |
3974 | - printk("Setting up standard PCI resources\n"); | |
3975 | -#ifdef CONFIG_XEN | |
3976 | - legacy_init_iomem_resources(machine_e820.map, machine_e820.nr_map, | |
3977 | - &code_resource, &data_resource); | |
3978 | -#else | |
3979 | - if (efi_enabled) | |
3980 | - efi_initialize_iomem_resources(&code_resource, &data_resource); | |
3981 | - else | |
3982 | - legacy_init_iomem_resources(e820.map, e820.nr_map, | |
3983 | - &code_resource, &data_resource); | |
3984 | -#endif | |
3985 | - | |
3986 | - /* EFI systems may still have VGA */ | |
3987 | - request_resource(&iomem_resource, &video_ram_resource); | |
3988 | - | |
3989 | - /* request I/O space for devices used on all i[345]86 PCs */ | |
3990 | - for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) | |
3991 | - request_resource(&ioport_resource, &standard_io_resources[i]); | |
3992 | - return 0; | |
3993 | -} | |
3994 | - | |
3995 | -subsys_initcall(request_standard_resources); | |
3996 | - | |
3997 | -static void __init register_memory(void) | |
3998 | -{ | |
3999 | -#ifdef CONFIG_XEN | |
4000 | - if (is_initial_xendomain()) | |
4001 | - e820_setup_gap(machine_e820.map, machine_e820.nr_map); | |
4002 | - else | |
4003 | -#endif | |
4004 | - e820_setup_gap(e820.map, e820.nr_map); | |
4005 | -} | |
4006 | - | |
4007 | #ifdef CONFIG_MCA | |
4008 | static void set_mca_bus(int x) | |
4009 | { | |
4010 | @@ -1491,6 +534,12 @@ static void set_mca_bus(int x) | |
4011 | static void set_mca_bus(int x) { } | |
4012 | #endif | |
4013 | ||
4014 | +/* Overridden in paravirt.c if CONFIG_PARAVIRT */ | |
4015 | +char * __init __attribute__((weak)) memory_setup(void) | |
4016 | +{ | |
4017 | + return machine_specific_memory_setup(); | |
4018 | +} | |
4019 | + | |
4020 | /* | |
4021 | * Determine if we were loaded by an EFI loader. If so, then we have also been | |
4022 | * passed the efi memmap, systab, etc., so we should use these data structures | |
4023 | @@ -1578,7 +627,7 @@ void __init setup_arch(char **cmdline_p) | |
4024 | efi_init(); | |
4025 | else { | |
4026 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | |
4027 | - print_memory_map(machine_specific_memory_setup()); | |
4028 | + print_memory_map(memory_setup()); | |
4029 | } | |
4030 | ||
4031 | copy_edd(); | |
4032 | @@ -1757,7 +806,7 @@ void __init setup_arch(char **cmdline_p) | |
4033 | get_smp_config(); | |
4034 | #endif | |
4035 | ||
4036 | - register_memory(); | |
4037 | + e820_register_memory(); | |
4038 | ||
4039 | if (is_initial_xendomain()) { | |
4040 | #ifdef CONFIG_VT | |
4041 | Index: head-2008-12-01/arch/x86/kernel/smp_32-xen.c | |
4042 | =================================================================== | |
4043 | --- head-2008-12-01.orig/arch/x86/kernel/smp_32-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
4044 | +++ head-2008-12-01/arch/x86/kernel/smp_32-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
4045 | @@ -659,6 +659,10 @@ int smp_call_function_single(int cpu, vo | |
4046 | put_cpu(); | |
4047 | return -EBUSY; | |
4048 | } | |
4049 | + | |
4050 | + /* Can deadlock when called with interrupts disabled */ | |
4051 | + WARN_ON(irqs_disabled()); | |
4052 | + | |
4053 | spin_lock_bh(&call_lock); | |
4054 | __smp_call_function_single(cpu, func, info, nonatomic, wait); | |
4055 | spin_unlock_bh(&call_lock); | |
4056 | Index: head-2008-12-01/arch/x86/kernel/time_32-xen.c | |
4057 | =================================================================== | |
4058 | --- head-2008-12-01.orig/arch/x86/kernel/time_32-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
4059 | +++ head-2008-12-01/arch/x86/kernel/time_32-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
4060 | @@ -61,6 +61,7 @@ | |
4061 | #include <asm/uaccess.h> | |
4062 | #include <asm/processor.h> | |
4063 | #include <asm/timer.h> | |
4064 | +#include <asm/time.h> | |
4065 | #include <asm/sections.h> | |
4066 | ||
4067 | #include "mach_time.h" | |
4068 | @@ -129,11 +130,11 @@ static DEFINE_PER_CPU(struct vcpu_runsta | |
4069 | /* Must be signed, as it's compared with s64 quantities which can be -ve. */ | |
4070 | #define NS_PER_TICK (1000000000LL/HZ) | |
4071 | ||
4072 | -static void __clock_was_set(void *unused) | |
4073 | +static void __clock_was_set(struct work_struct *unused) | |
4074 | { | |
4075 | clock_was_set(); | |
4076 | } | |
4077 | -static DECLARE_WORK(clock_was_set_work, __clock_was_set, NULL); | |
4078 | +static DECLARE_WORK(clock_was_set_work, __clock_was_set); | |
4079 | ||
4080 | /* | |
4081 | * GCC 4.3 can turn loops over an induction variable into division. We do | |
4082 | @@ -544,10 +545,7 @@ static int set_rtc_mmss(unsigned long no | |
4083 | /* gets recalled with irq locally disabled */ | |
4084 | /* XXX - does irqsave resolve this? -johnstul */ | |
4085 | spin_lock_irqsave(&rtc_lock, flags); | |
4086 | - if (efi_enabled) | |
4087 | - retval = efi_set_rtc_mmss(nowtime); | |
4088 | - else | |
4089 | - retval = mach_set_rtc_mmss(nowtime); | |
4090 | + retval = set_wallclock(nowtime); | |
4091 | spin_unlock_irqrestore(&rtc_lock, flags); | |
4092 | ||
4093 | return retval; | |
4094 | @@ -874,10 +872,7 @@ unsigned long get_cmos_time(void) | |
4095 | ||
4096 | spin_lock_irqsave(&rtc_lock, flags); | |
4097 | ||
4098 | - if (efi_enabled) | |
4099 | - retval = efi_get_time(); | |
4100 | - else | |
4101 | - retval = mach_get_cmos_time(); | |
4102 | + retval = get_wallclock(); | |
4103 | ||
4104 | spin_unlock_irqrestore(&rtc_lock, flags); | |
4105 | ||
4106 | @@ -979,7 +974,7 @@ static void __init hpet_time_init(void) | |
4107 | printk("Using HPET for base-timer\n"); | |
4108 | } | |
4109 | ||
4110 | - time_init_hook(); | |
4111 | + do_time_init(); | |
4112 | } | |
4113 | #endif | |
4114 | ||
4115 | Index: head-2008-12-01/arch/x86/kernel/traps_32-xen.c | |
4116 | =================================================================== | |
4117 | --- head-2008-12-01.orig/arch/x86/kernel/traps_32-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
4118 | +++ head-2008-12-01/arch/x86/kernel/traps_32-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
4119 | @@ -29,6 +29,8 @@ | |
4120 | #include <linux/kexec.h> | |
4121 | #include <linux/unwind.h> | |
4122 | #include <linux/uaccess.h> | |
4123 | +#include <linux/nmi.h> | |
4124 | +#include <linux/bug.h> | |
4125 | ||
4126 | #ifdef CONFIG_EISA | |
4127 | #include <linux/ioport.h> | |
4128 | @@ -61,9 +63,6 @@ int panic_on_unrecovered_nmi; | |
4129 | ||
4130 | asmlinkage int system_call(void); | |
4131 | ||
4132 | -struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, | |
4133 | - { 0, 0 }, { 0, 0 } }; | |
4134 | - | |
4135 | /* Do we ignore FPU interrupts ? */ | |
4136 | char ignore_fpu_irq = 0; | |
4137 | ||
4138 | @@ -100,12 +99,7 @@ asmlinkage void fixup_4gb_segment(void); | |
4139 | #endif | |
4140 | asmlinkage void machine_check(void); | |
4141 | ||
4142 | -static int kstack_depth_to_print = 24; | |
4143 | -#ifdef CONFIG_STACK_UNWIND | |
4144 | -static int call_trace = 1; | |
4145 | -#else | |
4146 | -#define call_trace (-1) | |
4147 | -#endif | |
4148 | +int kstack_depth_to_print = 24; | |
4149 | ATOMIC_NOTIFIER_HEAD(i386die_chain); | |
4150 | ||
4151 | int register_die_notifier(struct notifier_block *nb) | |
4152 | @@ -159,25 +153,7 @@ static inline unsigned long print_contex | |
4153 | return ebp; | |
4154 | } | |
4155 | ||
4156 | -struct ops_and_data { | |
4157 | - struct stacktrace_ops *ops; | |
4158 | - void *data; | |
4159 | -}; | |
4160 | - | |
4161 | -static asmlinkage int | |
4162 | -dump_trace_unwind(struct unwind_frame_info *info, void *data) | |
4163 | -{ | |
4164 | - struct ops_and_data *oad = (struct ops_and_data *)data; | |
4165 | - int n = 0; | |
4166 | - | |
4167 | - while (unwind(info) == 0 && UNW_PC(info)) { | |
4168 | - n++; | |
4169 | - oad->ops->address(oad->data, UNW_PC(info)); | |
4170 | - if (arch_unw_user_mode(info)) | |
4171 | - break; | |
4172 | - } | |
4173 | - return n; | |
4174 | -} | |
4175 | +#define MSG(msg) ops->warning(data, msg) | |
4176 | ||
4177 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | |
4178 | unsigned long *stack, | |
4179 | @@ -188,39 +164,6 @@ void dump_trace(struct task_struct *task | |
4180 | if (!task) | |
4181 | task = current; | |
4182 | ||
4183 | - if (call_trace >= 0) { | |
4184 | - int unw_ret = 0; | |
4185 | - struct unwind_frame_info info; | |
4186 | - struct ops_and_data oad = { .ops = ops, .data = data }; | |
4187 | - | |
4188 | - if (regs) { | |
4189 | - if (unwind_init_frame_info(&info, task, regs) == 0) | |
4190 | - unw_ret = dump_trace_unwind(&info, &oad); | |
4191 | - } else if (task == current) | |
4192 | - unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); | |
4193 | - else { | |
4194 | - if (unwind_init_blocked(&info, task) == 0) | |
4195 | - unw_ret = dump_trace_unwind(&info, &oad); | |
4196 | - } | |
4197 | - if (unw_ret > 0) { | |
4198 | - if (call_trace == 1 && !arch_unw_user_mode(&info)) { | |
4199 | - ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", | |
4200 | - UNW_PC(&info)); | |
4201 | - if (UNW_SP(&info) >= PAGE_OFFSET) { | |
4202 | - ops->warning(data, "Leftover inexact backtrace:\n"); | |
4203 | - stack = (void *)UNW_SP(&info); | |
4204 | - if (!stack) | |
4205 | - return; | |
4206 | - ebp = UNW_FP(&info); | |
4207 | - } else | |
4208 | - ops->warning(data, "Full inexact backtrace again:\n"); | |
4209 | - } else if (call_trace >= 1) | |
4210 | - return; | |
4211 | - else | |
4212 | - ops->warning(data, "Full inexact backtrace again:\n"); | |
4213 | - } else | |
4214 | - ops->warning(data, "Inexact backtrace:\n"); | |
4215 | - } | |
4216 | if (!stack) { | |
4217 | unsigned long dummy; | |
4218 | stack = &dummy; | |
4219 | @@ -253,6 +196,7 @@ void dump_trace(struct task_struct *task | |
4220 | stack = (unsigned long*)context->previous_esp; | |
4221 | if (!stack) | |
4222 | break; | |
4223 | + touch_nmi_watchdog(); | |
4224 | } | |
4225 | } | |
4226 | EXPORT_SYMBOL(dump_trace); | |
4227 | @@ -385,7 +329,7 @@ void show_registers(struct pt_regs *regs | |
4228 | * time of the fault.. | |
4229 | */ | |
4230 | if (in_kernel) { | |
4231 | - u8 __user *eip; | |
4232 | + u8 *eip; | |
4233 | int code_bytes = 64; | |
4234 | unsigned char c; | |
4235 | ||
4236 | @@ -394,18 +338,20 @@ void show_registers(struct pt_regs *regs | |
4237 | ||
4238 | printk(KERN_EMERG "Code: "); | |
4239 | ||
4240 | - eip = (u8 __user *)regs->eip - 43; | |
4241 | - if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { | |
4242 | + eip = (u8 *)regs->eip - 43; | |
4243 | + if (eip < (u8 *)PAGE_OFFSET || | |
4244 | + probe_kernel_address(eip, c)) { | |
4245 | /* try starting at EIP */ | |
4246 | - eip = (u8 __user *)regs->eip; | |
4247 | + eip = (u8 *)regs->eip; | |
4248 | code_bytes = 32; | |
4249 | } | |
4250 | for (i = 0; i < code_bytes; i++, eip++) { | |
4251 | - if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { | |
4252 | + if (eip < (u8 *)PAGE_OFFSET || | |
4253 | + probe_kernel_address(eip, c)) { | |
4254 | printk(" Bad EIP value."); | |
4255 | break; | |
4256 | } | |
4257 | - if (eip == (u8 __user *)regs->eip) | |
4258 | + if (eip == (u8 *)regs->eip) | |
4259 | printk("<%02x> ", c); | |
4260 | else | |
4261 | printk("%02x ", c); | |
4262 | @@ -414,43 +360,22 @@ void show_registers(struct pt_regs *regs | |
4263 | printk("\n"); | |
4264 | } | |
4265 | ||
4266 | -static void handle_BUG(struct pt_regs *regs) | |
4267 | +int is_valid_bugaddr(unsigned long eip) | |
4268 | { | |
4269 | - unsigned long eip = regs->eip; | |
4270 | unsigned short ud2; | |
4271 | ||
4272 | if (eip < PAGE_OFFSET) | |
4273 | - return; | |
4274 | - if (probe_kernel_address((unsigned short __user *)eip, ud2)) | |
4275 | - return; | |
4276 | - if (ud2 != 0x0b0f) | |
4277 | - return; | |
4278 | + return 0; | |
4279 | + if (probe_kernel_address((unsigned short *)eip, ud2)) | |
4280 | + return 0; | |
4281 | ||
4282 | - printk(KERN_EMERG "------------[ cut here ]------------\n"); | |
4283 | - | |
4284 | -#ifdef CONFIG_DEBUG_BUGVERBOSE | |
4285 | - do { | |
4286 | - unsigned short line; | |
4287 | - char *file; | |
4288 | - char c; | |
4289 | - | |
4290 | - if (probe_kernel_address((unsigned short __user *)(eip + 2), | |
4291 | - line)) | |
4292 | - break; | |
4293 | - if (__get_user(file, (char * __user *)(eip + 4)) || | |
4294 | - (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) | |
4295 | - file = "<bad filename>"; | |
4296 | - | |
4297 | - printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line); | |
4298 | - return; | |
4299 | - } while (0); | |
4300 | -#endif | |
4301 | - printk(KERN_EMERG "Kernel BUG at [verbose debug info unavailable]\n"); | |
4302 | + return ud2 == 0x0b0f; | |
4303 | } | |
4304 | ||
4305 | -/* This is gone through when something in the kernel | |
4306 | - * has done something bad and is about to be terminated. | |
4307 | -*/ | |
4308 | +/* | |
4309 | + * This is gone through when something in the kernel has done something bad and | |
4310 | + * is about to be terminated. | |
4311 | + */ | |
4312 | void die(const char * str, struct pt_regs * regs, long err) | |
4313 | { | |
4314 | static struct { | |
4315 | @@ -458,7 +383,7 @@ void die(const char * str, struct pt_reg | |
4316 | u32 lock_owner; | |
4317 | int lock_owner_depth; | |
4318 | } die = { | |
4319 | - .lock = SPIN_LOCK_UNLOCKED, | |
4320 | + .lock = __SPIN_LOCK_UNLOCKED(die.lock), | |
4321 | .lock_owner = -1, | |
4322 | .lock_owner_depth = 0 | |
4323 | }; | |
4324 | @@ -482,7 +407,8 @@ void die(const char * str, struct pt_reg | |
4325 | unsigned long esp; | |
4326 | unsigned short ss; | |
4327 | ||
4328 | - handle_BUG(regs); | |
4329 | + report_bug(regs->eip); | |
4330 | + | |
4331 | printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); | |
4332 | #ifdef CONFIG_PREEMPT | |
4333 | printk(KERN_EMERG "PREEMPT "); | |
4334 | @@ -682,8 +608,7 @@ mem_parity_error(unsigned char reason, s | |
4335 | { | |
4336 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " | |
4337 | "CPU %d.\n", reason, smp_processor_id()); | |
4338 | - printk(KERN_EMERG "You probably have a hardware problem with your RAM " | |
4339 | - "chips\n"); | |
4340 | + printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); | |
4341 | if (panic_on_unrecovered_nmi) | |
4342 | panic("NMI: Not continuing"); | |
4343 | ||
4344 | @@ -741,7 +666,6 @@ void __kprobes die_nmi(struct pt_regs *r | |
4345 | printk(" on CPU%d, eip %08lx, registers:\n", | |
4346 | smp_processor_id(), regs->eip); | |
4347 | show_registers(regs); | |
4348 | - printk(KERN_EMERG "console shuts up ...\n"); | |
4349 | console_silent(); | |
4350 | spin_unlock(&nmi_print_lock); | |
4351 | bust_spinlocks(0); | |
4352 | @@ -1057,49 +981,24 @@ fastcall void do_spurious_interrupt_bug( | |
4353 | #endif | |
4354 | } | |
4355 | ||
4356 | -fastcall void setup_x86_bogus_stack(unsigned char * stk) | |
4357 | +fastcall unsigned long patch_espfix_desc(unsigned long uesp, | |
4358 | + unsigned long kesp) | |
4359 | { | |
4360 | - unsigned long *switch16_ptr, *switch32_ptr; | |
4361 | - struct pt_regs *regs; | |
4362 | - unsigned long stack_top, stack_bot; | |
4363 | - unsigned short iret_frame16_off; | |
4364 | - int cpu = smp_processor_id(); | |
4365 | - /* reserve the space on 32bit stack for the magic switch16 pointer */ | |
4366 | - memmove(stk, stk + 8, sizeof(struct pt_regs)); | |
4367 | - switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs)); | |
4368 | - regs = (struct pt_regs *)stk; | |
4369 | - /* now the switch32 on 16bit stack */ | |
4370 | - stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); | |
4371 | - stack_top = stack_bot + CPU_16BIT_STACK_SIZE; | |
4372 | - switch32_ptr = (unsigned long *)(stack_top - 8); | |
4373 | - iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20; | |
4374 | - /* copy iret frame on 16bit stack */ | |
4375 | - memcpy((void *)(stack_bot + iret_frame16_off), ®s->eip, 20); | |
4376 | - /* fill in the switch pointers */ | |
4377 | - switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off; | |
4378 | - switch16_ptr[1] = __ESPFIX_SS; | |
4379 | - switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) + | |
4380 | - 8 - CPU_16BIT_STACK_SIZE; | |
4381 | - switch32_ptr[1] = __KERNEL_DS; | |
4382 | -} | |
4383 | - | |
4384 | -fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp) | |
4385 | -{ | |
4386 | - unsigned long *switch32_ptr; | |
4387 | - unsigned char *stack16, *stack32; | |
4388 | - unsigned long stack_top, stack_bot; | |
4389 | - int len; | |
4390 | int cpu = smp_processor_id(); | |
4391 | - stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); | |
4392 | - stack_top = stack_bot + CPU_16BIT_STACK_SIZE; | |
4393 | - switch32_ptr = (unsigned long *)(stack_top - 8); | |
4394 | - /* copy the data from 16bit stack to 32bit stack */ | |
4395 | - len = CPU_16BIT_STACK_SIZE - 8 - sp; | |
4396 | - stack16 = (unsigned char *)(stack_bot + sp); | |
4397 | - stack32 = (unsigned char *) | |
4398 | - (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len); | |
4399 | - memcpy(stack32, stack16, len); | |
4400 | - return stack32; | |
4401 | + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | |
4402 | + struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address; | |
4403 | + unsigned long base = (kesp - uesp) & -THREAD_SIZE; | |
4404 | + unsigned long new_kesp = kesp - base; | |
4405 | + unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; | |
4406 | + __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; | |
4407 | + /* Set up base for espfix segment */ | |
4408 | + desc &= 0x00f0ff0000000000ULL; | |
4409 | + desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | | |
4410 | + ((((__u64)base) << 32) & 0xff00000000000000ULL) | | |
4411 | + ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | | |
4412 | + (lim_pages & 0xffff); | |
4413 | + *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; | |
4414 | + return new_kesp; | |
4415 | } | |
4416 | #endif | |
4417 | ||
4418 | @@ -1113,7 +1012,7 @@ fastcall unsigned char * fixup_x86_bogus | |
4419 | * Must be called with kernel preemption disabled (in this case, | |
4420 | * local interrupts are disabled at the call-site in entry.S). | |
4421 | */ | |
4422 | -asmlinkage void math_state_restore(struct pt_regs regs) | |
4423 | +asmlinkage void math_state_restore(void) | |
4424 | { | |
4425 | struct thread_info *thread = current_thread_info(); | |
4426 | struct task_struct *tsk = thread->task; | |
4427 | @@ -1123,6 +1022,7 @@ asmlinkage void math_state_restore(struc | |
4428 | init_fpu(tsk); | |
4429 | restore_fpu(tsk); | |
4430 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | |
4431 | + tsk->fpu_counter++; | |
4432 | } | |
4433 | ||
4434 | #ifndef CONFIG_MATH_EMULATION | |
4435 | @@ -1234,19 +1134,3 @@ static int __init kstack_setup(char *s) | |
4436 | return 1; | |
4437 | } | |
4438 | __setup("kstack=", kstack_setup); | |
4439 | - | |
4440 | -#ifdef CONFIG_STACK_UNWIND | |
4441 | -static int __init call_trace_setup(char *s) | |
4442 | -{ | |
4443 | - if (strcmp(s, "old") == 0) | |
4444 | - call_trace = -1; | |
4445 | - else if (strcmp(s, "both") == 0) | |
4446 | - call_trace = 0; | |
4447 | - else if (strcmp(s, "newfallback") == 0) | |
4448 | - call_trace = 1; | |
4449 | - else if (strcmp(s, "new") == 2) | |
4450 | - call_trace = 2; | |
4451 | - return 1; | |
4452 | -} | |
4453 | -__setup("call_trace=", call_trace_setup); | |
4454 | -#endif | |
4455 | Index: head-2008-12-01/arch/x86/kernel/vmlinux_32.lds.S | |
4456 | =================================================================== | |
4457 | --- head-2008-12-01.orig/arch/x86/kernel/vmlinux_32.lds.S 2008-12-01 10:53:14.000000000 +0100 | |
4458 | +++ head-2008-12-01/arch/x86/kernel/vmlinux_32.lds.S 2008-12-01 11:32:38.000000000 +0100 | |
4459 | @@ -29,6 +29,12 @@ PHDRS { | |
4460 | SECTIONS | |
4461 | { | |
4462 | . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; | |
4463 | + | |
4464 | +#if defined(CONFIG_XEN) && CONFIG_XEN_COMPAT <= 0x030002 | |
4465 | +#undef LOAD_OFFSET | |
4466 | +#define LOAD_OFFSET 0 | |
4467 | +#endif | |
4468 | + | |
4469 | phys_startup_32 = startup_32 - LOAD_OFFSET; | |
4470 | ||
4471 | .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { | |
4472 | Index: head-2008-12-01/arch/x86/kvm/Kconfig | |
4473 | =================================================================== | |
4474 | --- head-2008-12-01.orig/arch/x86/kvm/Kconfig 2008-12-01 10:53:14.000000000 +0100 | |
4475 | +++ head-2008-12-01/arch/x86/kvm/Kconfig 2008-12-01 11:32:38.000000000 +0100 | |
4476 | @@ -7,6 +7,7 @@ config HAVE_KVM | |
4477 | menuconfig VIRTUALIZATION | |
4478 | bool "Virtualization" | |
4479 | depends on HAVE_KVM || X86 | |
4480 | + depends on !XEN | |
4481 | default y | |
4482 | ---help--- | |
4483 | Say Y here to get to see options for using your Linux host to run other | |
4484 | Index: head-2008-12-01/arch/x86/mm/fault_32-xen.c | |
4485 | =================================================================== | |
4486 | --- head-2008-12-01.orig/arch/x86/mm/fault_32-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
4487 | +++ head-2008-12-01/arch/x86/mm/fault_32-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
4488 | @@ -22,9 +22,9 @@ | |
4489 | #include <linux/highmem.h> | |
4490 | #include <linux/module.h> | |
4491 | #include <linux/kprobes.h> | |
4492 | +#include <linux/uaccess.h> | |
4493 | ||
4494 | #include <asm/system.h> | |
4495 | -#include <asm/uaccess.h> | |
4496 | #include <asm/desc.h> | |
4497 | #include <asm/kdebug.h> | |
4498 | #include <asm/segment.h> | |
4499 | @@ -167,7 +167,7 @@ static inline unsigned long get_segment_ | |
4500 | static int __is_prefetch(struct pt_regs *regs, unsigned long addr) | |
4501 | { | |
4502 | unsigned long limit; | |
4503 | - unsigned long instr = get_segment_eip (regs, &limit); | |
4504 | + unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit); | |
4505 | int scan_more = 1; | |
4506 | int prefetch = 0; | |
4507 | int i; | |
4508 | @@ -177,9 +177,9 @@ static int __is_prefetch(struct pt_regs | |
4509 | unsigned char instr_hi; | |
4510 | unsigned char instr_lo; | |
4511 | ||
4512 | - if (instr > limit) | |
4513 | + if (instr > (unsigned char *)limit) | |
4514 | break; | |
4515 | - if (__get_user(opcode, (unsigned char __user *) instr)) | |
4516 | + if (probe_kernel_address(instr, opcode)) | |
4517 | break; | |
4518 | ||
4519 | instr_hi = opcode & 0xf0; | |
4520 | @@ -204,9 +204,9 @@ static int __is_prefetch(struct pt_regs | |
4521 | case 0x00: | |
4522 | /* Prefetch instruction is 0x0F0D or 0x0F18 */ | |
4523 | scan_more = 0; | |
4524 | - if (instr > limit) | |
4525 | + if (instr > (unsigned char *)limit) | |
4526 | break; | |
4527 | - if (__get_user(opcode, (unsigned char __user *) instr)) | |
4528 | + if (probe_kernel_address(instr, opcode)) | |
4529 | break; | |
4530 | prefetch = (instr_lo == 0xF) && | |
4531 | (opcode == 0x0D || opcode == 0x18); | |
4532 | Index: head-2008-12-01/arch/x86/mm/highmem_32-xen.c | |
4533 | =================================================================== | |
4534 | --- head-2008-12-01.orig/arch/x86/mm/highmem_32-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
4535 | +++ head-2008-12-01/arch/x86/mm/highmem_32-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
4536 | @@ -32,7 +32,7 @@ static void *__kmap_atomic(struct page * | |
4537 | unsigned long vaddr; | |
4538 | ||
4539 | /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ | |
4540 | - inc_preempt_count(); | |
4541 | + pagefault_disable(); | |
4542 | if (!PageHighMem(page)) | |
4543 | return page_address(page); | |
4544 | ||
4545 | @@ -63,26 +63,22 @@ void kunmap_atomic(void *kvaddr, enum km | |
4546 | unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; | |
4547 | enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); | |
4548 | ||
4549 | -#ifdef CONFIG_DEBUG_HIGHMEM | |
4550 | - if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) { | |
4551 | - dec_preempt_count(); | |
4552 | - preempt_check_resched(); | |
4553 | - return; | |
4554 | - } | |
4555 | - | |
4556 | - if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) | |
4557 | - BUG(); | |
4558 | -#endif | |
4559 | /* | |
4560 | * Force other mappings to Oops if they'll try to access this pte | |
4561 | * without first remap it. Keeping stale mappings around is a bad idea | |
4562 | * also, in case the page changes cacheability attributes or becomes | |
4563 | * a protected page in a hypervisor. | |
4564 | */ | |
4565 | - kpte_clear_flush(kmap_pte-idx, vaddr); | |
4566 | + if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) | |
4567 | + kpte_clear_flush(kmap_pte-idx, vaddr); | |
4568 | + else { | |
4569 | +#ifdef CONFIG_DEBUG_HIGHMEM | |
4570 | + BUG_ON(vaddr < PAGE_OFFSET); | |
4571 | + BUG_ON(vaddr >= (unsigned long)high_memory); | |
4572 | +#endif | |
4573 | + } | |
4574 | ||
4575 | - dec_preempt_count(); | |
4576 | - preempt_check_resched(); | |
4577 | + pagefault_enable(); | |
4578 | } | |
4579 | ||
4580 | /* This is the same as kmap_atomic() but can map memory that doesn't | |
4581 | @@ -93,7 +89,7 @@ void *kmap_atomic_pfn(unsigned long pfn, | |
4582 | enum fixed_addresses idx; | |
4583 | unsigned long vaddr; | |
4584 | ||
4585 | - inc_preempt_count(); | |
4586 | + pagefault_disable(); | |
4587 | ||
4588 | idx = type + KM_TYPE_NR*smp_processor_id(); | |
4589 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | |
4590 | Index: head-2008-12-01/arch/x86/mm/init_32-xen.c | |
4591 | =================================================================== | |
4592 | --- head-2008-12-01.orig/arch/x86/mm/init_32-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
4593 | +++ head-2008-12-01/arch/x86/mm/init_32-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
4594 | @@ -233,8 +233,6 @@ static inline int page_kills_ppro(unsign | |
4595 | ||
4596 | #endif | |
4597 | ||
4598 | -extern int is_available_memory(efi_memory_desc_t *); | |
4599 | - | |
4600 | int page_is_ram(unsigned long pagenr) | |
4601 | { | |
4602 | int i; | |
4603 | @@ -327,7 +325,7 @@ void __init add_one_highpage_init(struct | |
4604 | SetPageReserved(page); | |
4605 | } | |
4606 | ||
4607 | -static int add_one_highpage_hotplug(struct page *page, unsigned long pfn) | |
4608 | +static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long pfn) | |
4609 | { | |
4610 | free_new_highpage(page, pfn); | |
4611 | totalram_pages++; | |
4612 | @@ -344,7 +342,7 @@ static int add_one_highpage_hotplug(stru | |
4613 | * has been added dynamically that would be | |
4614 | * onlined here is in HIGHMEM | |
4615 | */ | |
4616 | -void online_page(struct page *page) | |
4617 | +void __meminit online_page(struct page *page) | |
4618 | { | |
4619 | ClearPageReserved(page); | |
4620 | add_one_highpage_hotplug(page, page_to_pfn(page)); | |
4621 | @@ -732,16 +730,10 @@ void __init mem_init(void) | |
4622 | set_bit(PG_pinned, &virt_to_page(init_mm.pgd)->flags); | |
4623 | } | |
4624 | ||
4625 | -/* | |
4626 | - * this is for the non-NUMA, single node SMP system case. | |
4627 | - * Specifically, in the case of x86, we will always add | |
4628 | - * memory to the highmem for now. | |
4629 | - */ | |
4630 | #ifdef CONFIG_MEMORY_HOTPLUG | |
4631 | -#ifndef CONFIG_NEED_MULTIPLE_NODES | |
4632 | int arch_add_memory(int nid, u64 start, u64 size) | |
4633 | { | |
4634 | - struct pglist_data *pgdata = &contig_page_data; | |
4635 | + struct pglist_data *pgdata = NODE_DATA(nid); | |
4636 | struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; | |
4637 | unsigned long start_pfn = start >> PAGE_SHIFT; | |
4638 | unsigned long nr_pages = size >> PAGE_SHIFT; | |
4639 | @@ -753,11 +745,11 @@ int remove_memory(u64 start, u64 size) | |
4640 | { | |
4641 | return -EINVAL; | |
4642 | } | |
4643 | -#endif | |
4644 | +EXPORT_SYMBOL_GPL(remove_memory); | |
4645 | #endif | |
4646 | ||
4647 | -kmem_cache_t *pgd_cache; | |
4648 | -kmem_cache_t *pmd_cache; | |
4649 | +struct kmem_cache *pgd_cache; | |
4650 | +struct kmem_cache *pmd_cache; | |
4651 | ||
4652 | void __init pgtable_cache_init(void) | |
4653 | { | |
4654 | Index: head-2008-12-01/arch/x86/mm/pgtable_32-xen.c | |
4655 | =================================================================== | |
4656 | --- head-2008-12-01.orig/arch/x86/mm/pgtable_32-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
4657 | +++ head-2008-12-01/arch/x86/mm/pgtable_32-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
4658 | @@ -203,7 +203,7 @@ void pte_free(struct page *pte) | |
4659 | __free_page(pte); | |
4660 | } | |
4661 | ||
4662 | -void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags) | |
4663 | +void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags) | |
4664 | { | |
4665 | memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); | |
4666 | } | |
4667 | @@ -243,7 +243,7 @@ static inline void pgd_list_del(pgd_t *p | |
4668 | set_page_private(next, (unsigned long)pprev); | |
4669 | } | |
4670 | ||
4671 | -void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) | |
4672 | +void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) | |
4673 | { | |
4674 | unsigned long flags; | |
4675 | ||
4676 | @@ -264,7 +264,7 @@ void pgd_ctor(void *pgd, kmem_cache_t *c | |
4677 | } | |
4678 | ||
4679 | /* never called when PTRS_PER_PMD > 1 */ | |
4680 | -void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused) | |
4681 | +void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) | |
4682 | { | |
4683 | unsigned long flags; /* can be called from interrupt context */ | |
4684 | ||
4685 | Index: head-2008-12-01/arch/x86/pci/irq-xen.c | |
4686 | =================================================================== | |
4687 | --- head-2008-12-01.orig/arch/x86/pci/irq-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
4688 | +++ head-2008-12-01/arch/x86/pci/irq-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
4689 | @@ -768,7 +768,7 @@ static void __init pirq_find_router(stru | |
4690 | DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", | |
4691 | rt->rtr_vendor, rt->rtr_device); | |
4692 | ||
4693 | - pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn); | |
4694 | + pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn); | |
4695 | if (!pirq_router_dev) { | |
4696 | DBG(KERN_DEBUG "PCI: Interrupt router not found at " | |
4697 | "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn); | |
4698 | @@ -788,6 +788,8 @@ static void __init pirq_find_router(stru | |
4699 | pirq_router_dev->vendor, | |
4700 | pirq_router_dev->device, | |
4701 | pci_name(pirq_router_dev)); | |
4702 | + | |
4703 | + /* The device remains referenced for the kernel lifetime */ | |
4704 | } | |
4705 | ||
4706 | static struct irq_info *pirq_get_info(struct pci_dev *dev) | |
4707 | Index: head-2008-12-01/arch/x86/kernel/entry_64-xen.S | |
4708 | =================================================================== | |
4709 | --- head-2008-12-01.orig/arch/x86/kernel/entry_64-xen.S 2008-12-01 11:29:05.000000000 +0100 | |
4710 | +++ head-2008-12-01/arch/x86/kernel/entry_64-xen.S 2008-12-01 11:32:38.000000000 +0100 | |
4711 | @@ -261,7 +261,6 @@ ENTRY(system_call) | |
4712 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | |
4713 | GET_THREAD_INFO(%rcx) | |
4714 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) | |
4715 | - CFI_REMEMBER_STATE | |
4716 | jnz tracesys | |
4717 | cmpq $__NR_syscall_max,%rax | |
4718 | ja badsys | |
4719 | @@ -272,7 +271,6 @@ ENTRY(system_call) | |
4720 | * Syscall return path ending with SYSRET (fast path) | |
4721 | * Has incomplete stack frame and undefined top of stack. | |
4722 | */ | |
4723 | - .globl ret_from_sys_call | |
4724 | ret_from_sys_call: | |
4725 | movl $_TIF_ALLWORK_MASK,%edi | |
4726 | /* edi: flagmask */ | |
4727 | @@ -282,8 +280,8 @@ sysret_check: | |
4728 | TRACE_IRQS_OFF | |
4729 | movl threadinfo_flags(%rcx),%edx | |
4730 | andl %edi,%edx | |
4731 | - CFI_REMEMBER_STATE | |
4732 | jnz sysret_careful | |
4733 | + CFI_REMEMBER_STATE | |
4734 | /* | |
4735 | * sysretq will re-enable interrupts: | |
4736 | */ | |
4737 | @@ -292,10 +290,10 @@ sysret_check: | |
4738 | RESTORE_ARGS 0,8,0 | |
4739 | HYPERVISOR_IRET VGCF_IN_SYSCALL | |
4740 | ||
4741 | + CFI_RESTORE_STATE | |
4742 | /* Handle reschedules */ | |
4743 | /* edx: work, edi: workmask */ | |
4744 | sysret_careful: | |
4745 | - CFI_RESTORE_STATE | |
4746 | bt $TIF_NEED_RESCHED,%edx | |
4747 | jnc sysret_signal | |
4748 | TRACE_IRQS_ON | |
4749 | @@ -334,7 +332,6 @@ badsys: | |
4750 | ||
4751 | /* Do syscall tracing */ | |
4752 | tracesys: | |
4753 | - CFI_RESTORE_STATE | |
4754 | SAVE_REST | |
4755 | movq $-ENOSYS,RAX(%rsp) | |
4756 | FIXUP_TOP_OF_STACK %rdi | |
4757 | @@ -350,32 +347,13 @@ tracesys: | |
4758 | call *sys_call_table(,%rax,8) | |
4759 | 1: movq %rax,RAX-ARGOFFSET(%rsp) | |
4760 | /* Use IRET because user could have changed frame */ | |
4761 | - jmp int_ret_from_sys_call | |
4762 | - CFI_ENDPROC | |
4763 | -END(system_call) | |
4764 | ||
4765 | /* | |
4766 | * Syscall return path ending with IRET. | |
4767 | * Has correct top of stack, but partial stack frame. | |
4768 | - */ | |
4769 | -ENTRY(int_ret_from_sys_call) | |
4770 | - CFI_STARTPROC simple | |
4771 | - CFI_SIGNAL_FRAME | |
4772 | - CFI_DEF_CFA rsp,SS+8-ARGOFFSET | |
4773 | - /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/ | |
4774 | - CFI_REL_OFFSET rsp,RSP-ARGOFFSET | |
4775 | - /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ | |
4776 | - /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/ | |
4777 | - CFI_REL_OFFSET rip,RIP-ARGOFFSET | |
4778 | - CFI_REL_OFFSET rdx,RDX-ARGOFFSET | |
4779 | - CFI_REL_OFFSET rcx,RCX-ARGOFFSET | |
4780 | - CFI_REL_OFFSET rax,RAX-ARGOFFSET | |
4781 | - CFI_REL_OFFSET rdi,RDI-ARGOFFSET | |
4782 | - CFI_REL_OFFSET rsi,RSI-ARGOFFSET | |
4783 | - CFI_REL_OFFSET r8,R8-ARGOFFSET | |
4784 | - CFI_REL_OFFSET r9,R9-ARGOFFSET | |
4785 | - CFI_REL_OFFSET r10,R10-ARGOFFSET | |
4786 | - CFI_REL_OFFSET r11,R11-ARGOFFSET | |
4787 | + */ | |
4788 | + .globl int_ret_from_sys_call | |
4789 | +int_ret_from_sys_call: | |
4790 | XEN_BLOCK_EVENTS(%rsi) | |
4791 | TRACE_IRQS_OFF | |
4792 | testb $3,CS-ARGOFFSET(%rsp) | |
4793 | @@ -428,8 +406,6 @@ int_very_careful: | |
4794 | popq %rdi | |
4795 | CFI_ADJUST_CFA_OFFSET -8 | |
4796 | andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi | |
4797 | - XEN_BLOCK_EVENTS(%rsi) | |
4798 | - TRACE_IRQS_OFF | |
4799 | jmp int_restore_rest | |
4800 | ||
4801 | int_signal: | |
4802 | @@ -445,7 +421,7 @@ int_restore_rest: | |
4803 | TRACE_IRQS_OFF | |
4804 | jmp int_with_check | |
4805 | CFI_ENDPROC | |
4806 | -END(int_ret_from_sys_call) | |
4807 | +END(system_call) | |
4808 | ||
4809 | /* | |
4810 | * Certain special system calls that need to save a complete full stack frame. | |
4811 | @@ -1275,36 +1251,3 @@ ENTRY(call_softirq) | |
4812 | ret | |
4813 | CFI_ENDPROC | |
4814 | ENDPROC(call_softirq) | |
4815 | - | |
4816 | -#ifdef CONFIG_STACK_UNWIND | |
4817 | -ENTRY(arch_unwind_init_running) | |
4818 | - CFI_STARTPROC | |
4819 | - movq %r15, R15(%rdi) | |
4820 | - movq %r14, R14(%rdi) | |
4821 | - xchgq %rsi, %rdx | |
4822 | - movq %r13, R13(%rdi) | |
4823 | - movq %r12, R12(%rdi) | |
4824 | - xorl %eax, %eax | |
4825 | - movq %rbp, RBP(%rdi) | |
4826 | - movq %rbx, RBX(%rdi) | |
4827 | - movq (%rsp), %rcx | |
4828 | - movq %rax, R11(%rdi) | |
4829 | - movq %rax, R10(%rdi) | |
4830 | - movq %rax, R9(%rdi) | |
4831 | - movq %rax, R8(%rdi) | |
4832 | - movq %rax, RAX(%rdi) | |
4833 | - movq %rax, RCX(%rdi) | |
4834 | - movq %rax, RDX(%rdi) | |
4835 | - movq %rax, RSI(%rdi) | |
4836 | - movq %rax, RDI(%rdi) | |
4837 | - movq %rax, ORIG_RAX(%rdi) | |
4838 | - movq %rcx, RIP(%rdi) | |
4839 | - leaq 8(%rsp), %rcx | |
4840 | - movq $__KERNEL_CS, CS(%rdi) | |
4841 | - movq %rax, EFLAGS(%rdi) | |
4842 | - movq %rcx, RSP(%rdi) | |
4843 | - movq $__KERNEL_DS, SS(%rdi) | |
4844 | - jmpq *%rdx | |
4845 | - CFI_ENDPROC | |
4846 | -ENDPROC(arch_unwind_init_running) | |
4847 | -#endif | |
4848 | Index: head-2008-12-01/arch/x86/kernel/genapic_64-xen.c | |
4849 | =================================================================== | |
4850 | --- head-2008-12-01.orig/arch/x86/kernel/genapic_64-xen.c 2007-06-12 13:13:01.000000000 +0200 | |
4851 | +++ head-2008-12-01/arch/x86/kernel/genapic_64-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
4852 | @@ -34,6 +34,7 @@ extern struct genapic apic_physflat; | |
4853 | ||
4854 | #ifndef CONFIG_XEN | |
4855 | struct genapic *genapic = &apic_flat; | |
4856 | +struct genapic *genapic_force; | |
4857 | #else | |
4858 | extern struct genapic apic_xen; | |
4859 | struct genapic *genapic = &apic_xen; | |
4860 | @@ -52,6 +53,13 @@ void __init clustered_apic_check(void) | |
4861 | u8 cluster_cnt[NUM_APIC_CLUSTERS]; | |
4862 | int max_apic = 0; | |
4863 | ||
4864 | + /* genapic selection can be forced because of certain quirks. | |
4865 | + */ | |
4866 | + if (genapic_force) { | |
4867 | + genapic = genapic_force; | |
4868 | + goto print; | |
4869 | + } | |
4870 | + | |
4871 | #if defined(CONFIG_ACPI) | |
4872 | /* | |
4873 | * Some x86_64 machines use physical APIC mode regardless of how many | |
4874 | Index: head-2008-12-01/arch/x86/kernel/head64-xen.c | |
4875 | =================================================================== | |
4876 | --- head-2008-12-01.orig/arch/x86/kernel/head64-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
4877 | +++ head-2008-12-01/arch/x86/kernel/head64-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
4878 | @@ -104,7 +104,10 @@ void __init x86_64_start_kernel(char * r | |
4879 | machine_to_phys_order++; | |
4880 | ||
4881 | #if 0 | |
4882 | - for (i = 0; i < 256; i++) | |
4883 | + /* clear bss before set_intr_gate with early_idt_handler */ | |
4884 | + clear_bss(); | |
4885 | + | |
4886 | + for (i = 0; i < IDT_ENTRIES; i++) | |
4887 | set_intr_gate(i, early_idt_handler); | |
4888 | asm volatile("lidt %0" :: "m" (idt_descr)); | |
4889 | #endif | |
4890 | Index: head-2008-12-01/arch/x86/kernel/io_apic_64-xen.c | |
4891 | =================================================================== | |
4892 | --- head-2008-12-01.orig/arch/x86/kernel/io_apic_64-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
4893 | +++ head-2008-12-01/arch/x86/kernel/io_apic_64-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
4894 | @@ -202,14 +202,20 @@ static struct IO_APIC_route_entry ioapic | |
4895 | * the interrupt, and we need to make sure the entry is fully populated | |
4896 | * before that happens. | |
4897 | */ | |
4898 | -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | |
4899 | +static void | |
4900 | +__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | |
4901 | { | |
4902 | - unsigned long flags; | |
4903 | union entry_union eu; | |
4904 | eu.entry = e; | |
4905 | - spin_lock_irqsave(&ioapic_lock, flags); | |
4906 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); | |
4907 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); | |
4908 | +} | |
4909 | + | |
4910 | +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | |
4911 | +{ | |
4912 | + unsigned long flags; | |
4913 | + spin_lock_irqsave(&ioapic_lock, flags); | |
4914 | + __ioapic_write_entry(apic, pin, e); | |
4915 | spin_unlock_irqrestore(&ioapic_lock, flags); | |
4916 | } | |
4917 | ||
4918 | @@ -720,6 +726,22 @@ static int assign_irq_vector(int irq, cp | |
4919 | } | |
4920 | ||
4921 | #ifndef CONFIG_XEN | |
4922 | +static void __clear_irq_vector(int irq) | |
4923 | +{ | |
4924 | + cpumask_t mask; | |
4925 | + int cpu, vector; | |
4926 | + | |
4927 | + BUG_ON(!irq_vector[irq]); | |
4928 | + | |
4929 | + vector = irq_vector[irq]; | |
4930 | + cpus_and(mask, irq_domain[irq], cpu_online_map); | |
4931 | + for_each_cpu_mask(cpu, mask) | |
4932 | + per_cpu(vector_irq, cpu)[vector] = -1; | |
4933 | + | |
4934 | + irq_vector[irq] = 0; | |
4935 | + irq_domain[irq] = CPU_MASK_NONE; | |
4936 | +} | |
4937 | + | |
4938 | void __setup_vector_irq(int cpu) | |
4939 | { | |
4940 | /* Initialize vector_irq on a new cpu */ | |
4941 | @@ -767,26 +789,65 @@ static void ioapic_register_intr(int irq | |
4942 | #define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq) | |
4943 | #endif /* !CONFIG_XEN */ | |
4944 | ||
4945 | -static void __init setup_IO_APIC_irqs(void) | |
4946 | +static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq) | |
4947 | { | |
4948 | struct IO_APIC_route_entry entry; | |
4949 | - int apic, pin, idx, irq, first_notcon = 1, vector; | |
4950 | + int vector; | |
4951 | unsigned long flags; | |
4952 | ||
4953 | - apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); | |
4954 | ||
4955 | - for (apic = 0; apic < nr_ioapics; apic++) { | |
4956 | - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | |
4957 | + /* | |
4958 | + * add it to the IO-APIC irq-routing table: | |
4959 | + */ | |
4960 | + memset(&entry,0,sizeof(entry)); | |
4961 | ||
4962 | - /* | |
4963 | - * add it to the IO-APIC irq-routing table: | |
4964 | - */ | |
4965 | - memset(&entry,0,sizeof(entry)); | |
4966 | + entry.delivery_mode = INT_DELIVERY_MODE; | |
4967 | + entry.dest_mode = INT_DEST_MODE; | |
4968 | + entry.mask = 0; /* enable IRQ */ | |
4969 | + entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); | |
4970 | ||
4971 | - entry.delivery_mode = INT_DELIVERY_MODE; | |
4972 | - entry.dest_mode = INT_DEST_MODE; | |
4973 | - entry.mask = 0; /* enable IRQ */ | |
4974 | + entry.trigger = irq_trigger(idx); | |
4975 | + entry.polarity = irq_polarity(idx); | |
4976 | + | |
4977 | + if (irq_trigger(idx)) { | |
4978 | + entry.trigger = 1; | |
4979 | + entry.mask = 1; | |
4980 | entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); | |
4981 | + } | |
4982 | + | |
4983 | + if (/* !apic && */ !IO_APIC_IRQ(irq)) | |
4984 | + return; | |
4985 | + | |
4986 | + if (IO_APIC_IRQ(irq)) { | |
4987 | + cpumask_t mask; | |
4988 | + vector = assign_irq_vector(irq, TARGET_CPUS, &mask); | |
4989 | + if (vector < 0) | |
4990 | + return; | |
4991 | + | |
4992 | + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); | |
4993 | + entry.vector = vector; | |
4994 | + | |
4995 | + ioapic_register_intr(irq, vector, IOAPIC_AUTO); | |
4996 | + if (!apic && (irq < 16)) | |
4997 | + disable_8259A_irq(irq); | |
4998 | + } | |
4999 | + | |
5000 | + ioapic_write_entry(apic, pin, entry); | |
5001 | + | |
5002 | + spin_lock_irqsave(&ioapic_lock, flags); | |
5003 | + set_native_irq_info(irq, TARGET_CPUS); | |
5004 | + spin_unlock_irqrestore(&ioapic_lock, flags); | |
5005 | + | |
5006 | +} | |
5007 | + | |
5008 | +static void __init setup_IO_APIC_irqs(void) | |
5009 | +{ | |
5010 | + int apic, pin, idx, irq, first_notcon = 1; | |
5011 | + | |
5012 | + apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); | |
5013 | + | |
5014 | + for (apic = 0; apic < nr_ioapics; apic++) { | |
5015 | + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | |
5016 | ||
5017 | idx = find_irq_entry(apic,pin,mp_INT); | |
5018 | if (idx == -1) { | |
5019 | @@ -798,39 +859,11 @@ static void __init setup_IO_APIC_irqs(vo | |
5020 | continue; | |
5021 | } | |
5022 | ||
5023 | - entry.trigger = irq_trigger(idx); | |
5024 | - entry.polarity = irq_polarity(idx); | |
5025 | - | |
5026 | - if (irq_trigger(idx)) { | |
5027 | - entry.trigger = 1; | |
5028 | - entry.mask = 1; | |
5029 | - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); | |
5030 | - } | |
5031 | - | |
5032 | irq = pin_2_irq(idx, apic, pin); | |
5033 | add_pin_to_irq(irq, apic, pin); | |
5034 | ||
5035 | - if (/* !apic && */ !IO_APIC_IRQ(irq)) | |
5036 | - continue; | |
5037 | - | |
5038 | - if (IO_APIC_IRQ(irq)) { | |
5039 | - cpumask_t mask; | |
5040 | - vector = assign_irq_vector(irq, TARGET_CPUS, &mask); | |
5041 | - if (vector < 0) | |
5042 | - continue; | |
5043 | - | |
5044 | - entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); | |
5045 | - entry.vector = vector; | |
5046 | + setup_IO_APIC_irq(apic, pin, idx, irq); | |
5047 | ||
5048 | - ioapic_register_intr(irq, vector, IOAPIC_AUTO); | |
5049 | - if (!apic && (irq < 16)) | |
5050 | - disable_8259A_irq(irq); | |
5051 | - } | |
5052 | - ioapic_write_entry(apic, pin, entry); | |
5053 | - | |
5054 | - spin_lock_irqsave(&ioapic_lock, flags); | |
5055 | - set_native_irq_info(irq, TARGET_CPUS); | |
5056 | - spin_unlock_irqrestore(&ioapic_lock, flags); | |
5057 | } | |
5058 | } | |
5059 | ||
5060 | @@ -1826,7 +1859,7 @@ void destroy_irq(unsigned int irq) | |
5061 | dynamic_irq_cleanup(irq); | |
5062 | ||
5063 | spin_lock_irqsave(&vector_lock, flags); | |
5064 | - irq_vector[irq] = 0; | |
5065 | + __clear_irq_vector(irq); | |
5066 | spin_unlock_irqrestore(&vector_lock, flags); | |
5067 | } | |
5068 | ||
5069 | @@ -2131,7 +2164,15 @@ void __init setup_ioapic_dest(void) | |
5070 | if (irq_entry == -1) | |
5071 | continue; | |
5072 | irq = pin_2_irq(irq_entry, ioapic, pin); | |
5073 | - set_ioapic_affinity_irq(irq, TARGET_CPUS); | |
5074 | + | |
5075 | + /* setup_IO_APIC_irqs could fail to get vector for some device | |
5076 | + * when you have too many devices, because at that time only boot | |
5077 | + * cpu is online. | |
5078 | + */ | |
5079 | + if(!irq_vector[irq]) | |
5080 | + setup_IO_APIC_irq(ioapic, pin, irq_entry, irq); | |
5081 | + else | |
5082 | + set_ioapic_affinity_irq(irq, TARGET_CPUS); | |
5083 | } | |
5084 | ||
5085 | } | |
5086 | Index: head-2008-12-01/arch/x86/kernel/irq_64-xen.c | |
5087 | =================================================================== | |
5088 | --- head-2008-12-01.orig/arch/x86/kernel/irq_64-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
5089 | +++ head-2008-12-01/arch/x86/kernel/irq_64-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
5090 | @@ -120,7 +120,7 @@ asmlinkage unsigned int do_IRQ(struct pt | |
5091 | ||
5092 | if (likely(irq < NR_IRQS)) | |
5093 | generic_handle_irq(irq); | |
5094 | - else | |
5095 | + else if (printk_ratelimit()) | |
5096 | printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n", | |
5097 | __func__, smp_processor_id(), irq); | |
5098 | ||
5099 | Index: head-2008-12-01/arch/x86/kernel/mpparse_64-xen.c | |
5100 | =================================================================== | |
5101 | --- head-2008-12-01.orig/arch/x86/kernel/mpparse_64-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
5102 | +++ head-2008-12-01/arch/x86/kernel/mpparse_64-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
5103 | @@ -35,8 +35,6 @@ | |
5104 | int smp_found_config; | |
5105 | unsigned int __initdata maxcpus = NR_CPUS; | |
5106 | ||
5107 | -int acpi_found_madt; | |
5108 | - | |
5109 | /* | |
5110 | * Various Linux-internal data structures created from the | |
5111 | * MP-table. | |
5112 | Index: head-2008-12-01/arch/x86/kernel/process_64-xen.c | |
5113 | =================================================================== | |
5114 | --- head-2008-12-01.orig/arch/x86/kernel/process_64-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
5115 | +++ head-2008-12-01/arch/x86/kernel/process_64-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
5116 | @@ -119,29 +119,23 @@ void exit_idle(void) | |
5117 | static void poll_idle (void) | |
5118 | { | |
5119 | local_irq_enable(); | |
5120 | - | |
5121 | - asm volatile( | |
5122 | - "2:" | |
5123 | - "testl %0,%1;" | |
5124 | - "rep; nop;" | |
5125 | - "je 2b;" | |
5126 | - : : | |
5127 | - "i" (_TIF_NEED_RESCHED), | |
5128 | - "m" (current_thread_info()->flags)); | |
5129 | + cpu_relax(); | |
5130 | } | |
5131 | ||
5132 | static void xen_idle(void) | |
5133 | { | |
5134 | + current_thread_info()->status &= ~TS_POLLING; | |
5135 | + /* | |
5136 | + * TS_POLLING-cleared state must be visible before we | |
5137 | + * test NEED_RESCHED: | |
5138 | + */ | |
5139 | + smp_mb(); | |
5140 | local_irq_disable(); | |
5141 | - | |
5142 | - if (need_resched()) | |
5143 | - local_irq_enable(); | |
5144 | - else { | |
5145 | - current_thread_info()->status &= ~TS_POLLING; | |
5146 | - smp_mb__after_clear_bit(); | |
5147 | + if (!need_resched()) | |
5148 | safe_halt(); | |
5149 | - current_thread_info()->status |= TS_POLLING; | |
5150 | - } | |
5151 | + else | |
5152 | + local_irq_enable(); | |
5153 | + current_thread_info()->status |= TS_POLLING; | |
5154 | } | |
5155 | ||
5156 | #ifdef CONFIG_HOTPLUG_CPU | |
5157 | @@ -181,6 +175,12 @@ void cpu_idle (void) | |
5158 | idle = xen_idle; /* no alternatives */ | |
5159 | if (cpu_is_offline(smp_processor_id())) | |
5160 | play_dead(); | |
5161 | + /* | |
5162 | + * Idle routines should keep interrupts disabled | |
5163 | + * from here on, until they go to idle. | |
5164 | + * Otherwise, idle callbacks can misfire. | |
5165 | + */ | |
5166 | + local_irq_disable(); | |
5167 | enter_idle(); | |
5168 | idle(); | |
5169 | /* In many cases the interrupt that ended idle | |
5170 | Index: head-2008-12-01/arch/x86/kernel/setup_64-xen.c | |
5171 | =================================================================== | |
5172 | --- head-2008-12-01.orig/arch/x86/kernel/setup_64-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
5173 | +++ head-2008-12-01/arch/x86/kernel/setup_64-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
5174 | @@ -573,8 +573,7 @@ void __init setup_arch(char **cmdline_p) | |
5175 | if (LOADER_TYPE && INITRD_START) { | |
5176 | if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { | |
5177 | reserve_bootmem_generic(INITRD_START, INITRD_SIZE); | |
5178 | - initrd_start = | |
5179 | - INITRD_START ? INITRD_START + PAGE_OFFSET : 0; | |
5180 | + initrd_start = INITRD_START + PAGE_OFFSET; | |
5181 | initrd_end = initrd_start+INITRD_SIZE; | |
5182 | } | |
5183 | else { | |
5184 | @@ -990,11 +989,8 @@ static void __cpuinit init_amd(struct cp | |
5185 | /* Fix cpuid4 emulation for more */ | |
5186 | num_cache_leaves = 3; | |
5187 | ||
5188 | - /* When there is only one core no need to synchronize RDTSC */ | |
5189 | - if (num_possible_cpus() == 1) | |
5190 | - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | |
5191 | - else | |
5192 | - clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | |
5193 | + /* RDTSC can be speculated around */ | |
5194 | + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | |
5195 | } | |
5196 | ||
5197 | static void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |
5198 | @@ -1093,6 +1089,15 @@ static void __cpuinit init_intel(struct | |
5199 | set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability); | |
5200 | } | |
5201 | ||
5202 | + if (cpu_has_ds) { | |
5203 | + unsigned int l1, l2; | |
5204 | + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); | |
5205 | + if (!(l1 & (1<<11))) | |
5206 | + set_bit(X86_FEATURE_BTS, c->x86_capability); | |
5207 | + if (!(l1 & (1<<12))) | |
5208 | + set_bit(X86_FEATURE_PEBS, c->x86_capability); | |
5209 | + } | |
5210 | + | |
5211 | n = c->extended_cpuid_level; | |
5212 | if (n >= 0x80000008) { | |
5213 | unsigned eax = cpuid_eax(0x80000008); | |
5214 | @@ -1112,7 +1117,10 @@ static void __cpuinit init_intel(struct | |
5215 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); | |
5216 | if (c->x86 == 6) | |
5217 | set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); | |
5218 | - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | |
5219 | + if (c->x86 == 15) | |
5220 | + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | |
5221 | + else | |
5222 | + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | |
5223 | c->x86_max_cores = intel_num_cpu_cores(c); | |
5224 | ||
5225 | srat_detect_node(); | |
5226 | Index: head-2008-12-01/arch/x86/kernel/smp_64-xen.c | |
5227 | =================================================================== | |
5228 | --- head-2008-12-01.orig/arch/x86/kernel/smp_64-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
5229 | +++ head-2008-12-01/arch/x86/kernel/smp_64-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
5230 | @@ -384,12 +384,17 @@ int smp_call_function_single (int cpu, v | |
5231 | put_cpu(); | |
5232 | return 0; | |
5233 | } | |
5234 | + | |
5235 | + /* Can deadlock when called with interrupts disabled */ | |
5236 | + WARN_ON(irqs_disabled()); | |
5237 | + | |
5238 | spin_lock_bh(&call_lock); | |
5239 | __smp_call_function_single(cpu, func, info, nonatomic, wait); | |
5240 | spin_unlock_bh(&call_lock); | |
5241 | put_cpu(); | |
5242 | return 0; | |
5243 | } | |
5244 | +EXPORT_SYMBOL(smp_call_function_single); | |
5245 | ||
5246 | /* | |
5247 | * this function sends a 'generic call function' IPI to all other CPUs | |
5248 | Index: head-2008-12-01/arch/x86/kernel/traps_64-xen.c | |
5249 | =================================================================== | |
5250 | --- head-2008-12-01.orig/arch/x86/kernel/traps_64-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
5251 | +++ head-2008-12-01/arch/x86/kernel/traps_64-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
5252 | @@ -30,9 +30,10 @@ | |
5253 | #include <linux/kprobes.h> | |
5254 | #include <linux/kexec.h> | |
5255 | #include <linux/unwind.h> | |
5256 | +#include <linux/uaccess.h> | |
5257 | +#include <linux/bug.h> | |
5258 | ||
5259 | #include <asm/system.h> | |
5260 | -#include <asm/uaccess.h> | |
5261 | #include <asm/io.h> | |
5262 | #include <asm/atomic.h> | |
5263 | #include <asm/debugreg.h> | |
5264 | @@ -108,12 +109,7 @@ static inline void preempt_conditional_c | |
5265 | preempt_enable_no_resched(); | |
5266 | } | |
5267 | ||
5268 | -static int kstack_depth_to_print = 12; | |
5269 | -#ifdef CONFIG_STACK_UNWIND | |
5270 | -static int call_trace = 1; | |
5271 | -#else | |
5272 | -#define call_trace (-1) | |
5273 | -#endif | |
5274 | +int kstack_depth_to_print = 12; | |
5275 | ||
5276 | #ifdef CONFIG_KALLSYMS | |
5277 | void printk_address(unsigned long address) | |
5278 | @@ -218,24 +214,7 @@ static unsigned long *in_exception_stack | |
5279 | return NULL; | |
5280 | } | |
5281 | ||
5282 | -struct ops_and_data { | |
5283 | - struct stacktrace_ops *ops; | |
5284 | - void *data; | |
5285 | -}; | |
5286 | - | |
5287 | -static int dump_trace_unwind(struct unwind_frame_info *info, void *context) | |
5288 | -{ | |
5289 | - struct ops_and_data *oad = (struct ops_and_data *)context; | |
5290 | - int n = 0; | |
5291 | - | |
5292 | - while (unwind(info) == 0 && UNW_PC(info)) { | |
5293 | - n++; | |
5294 | - oad->ops->address(oad->data, UNW_PC(info)); | |
5295 | - if (arch_unw_user_mode(info)) | |
5296 | - break; | |
5297 | - } | |
5298 | - return n; | |
5299 | -} | |
5300 | +#define MSG(txt) ops->warning(data, txt) | |
5301 | ||
5302 | /* | |
5303 | * x86-64 can have upto three kernel stacks: | |
5304 | @@ -250,61 +229,24 @@ static inline int valid_stack_ptr(struct | |
5305 | return p > t && p < t + THREAD_SIZE - 3; | |
5306 | } | |
5307 | ||
5308 | -void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack, | |
5309 | +void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | |
5310 | + unsigned long *stack, | |
5311 | struct stacktrace_ops *ops, void *data) | |
5312 | { | |
5313 | - const unsigned cpu = smp_processor_id(); | |
5314 | - unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; | |
5315 | + const unsigned cpu = get_cpu(); | |
5316 | + unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; | |
5317 | unsigned used = 0; | |
5318 | struct thread_info *tinfo; | |
5319 | ||
5320 | if (!tsk) | |
5321 | tsk = current; | |
5322 | ||
5323 | - if (call_trace >= 0) { | |
5324 | - int unw_ret = 0; | |
5325 | - struct unwind_frame_info info; | |
5326 | - struct ops_and_data oad = { .ops = ops, .data = data }; | |
5327 | - | |
5328 | - if (regs) { | |
5329 | - if (unwind_init_frame_info(&info, tsk, regs) == 0) | |
5330 | - unw_ret = dump_trace_unwind(&info, &oad); | |
5331 | - } else if (tsk == current) | |
5332 | - unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); | |
5333 | - else { | |
5334 | - if (unwind_init_blocked(&info, tsk) == 0) | |
5335 | - unw_ret = dump_trace_unwind(&info, &oad); | |
5336 | - } | |
5337 | - if (unw_ret > 0) { | |
5338 | - if (call_trace == 1 && !arch_unw_user_mode(&info)) { | |
5339 | - ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", | |
5340 | - UNW_PC(&info)); | |
5341 | - if ((long)UNW_SP(&info) < 0) { | |
5342 | - ops->warning(data, "Leftover inexact backtrace:\n"); | |
5343 | - stack = (unsigned long *)UNW_SP(&info); | |
5344 | - if (!stack) | |
5345 | - return; | |
5346 | - } else | |
5347 | - ops->warning(data, "Full inexact backtrace again:\n"); | |
5348 | - } else if (call_trace >= 1) | |
5349 | - return; | |
5350 | - else | |
5351 | - ops->warning(data, "Full inexact backtrace again:\n"); | |
5352 | - } else | |
5353 | - ops->warning(data, "Inexact backtrace:\n"); | |
5354 | - } | |
5355 | if (!stack) { | |
5356 | unsigned long dummy; | |
5357 | stack = &dummy; | |
5358 | if (tsk && tsk != current) | |
5359 | stack = (unsigned long *)tsk->thread.rsp; | |
5360 | } | |
5361 | - /* | |
5362 | - * Align the stack pointer on word boundary, later loops | |
5363 | - * rely on that (and corruption / debug info bugs can cause | |
5364 | - * unaligned values here): | |
5365 | - */ | |
5366 | - stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1)); | |
5367 | ||
5368 | /* | |
5369 | * Print function call entries within a stack. 'cond' is the | |
5370 | @@ -314,9 +256,9 @@ void dump_trace(struct task_struct *tsk, | |
5371 | #define HANDLE_STACK(cond) \ | |
5372 | do while (cond) { \ | |
5373 | unsigned long addr = *stack++; \ | |
5374 | - if (oops_in_progress ? \ | |
5375 | - __kernel_text_address(addr) : \ | |
5376 | - kernel_text_address(addr)) { \ | |
5377 | + /* Use unlocked access here because except for NMIs \ | |
5378 | + we should be already protected against module unloads */ \ | |
5379 | + if (__kernel_text_address(addr)) { \ | |
5380 | /* \ | |
5381 | * If the address is either in the text segment of the \ | |
5382 | * kernel, or in the region which contains vmalloc'ed \ | |
5383 | @@ -379,9 +321,10 @@ void dump_trace(struct task_struct *tsk, | |
5384 | /* | |
5385 | * This handles the process stack: | |
5386 | */ | |
5387 | - tinfo = current_thread_info(); | |
5388 | + tinfo = task_thread_info(tsk); | |
5389 | HANDLE_STACK (valid_stack_ptr(tinfo, stack)); | |
5390 | #undef HANDLE_STACK | |
5391 | + put_cpu(); | |
5392 | } | |
5393 | EXPORT_SYMBOL(dump_trace); | |
5394 | ||
5395 | @@ -518,30 +461,15 @@ bad: | |
5396 | printk("\n"); | |
5397 | } | |
5398 | ||
5399 | -void handle_BUG(struct pt_regs *regs) | |
5400 | -{ | |
5401 | - struct bug_frame f; | |
5402 | - long len; | |
5403 | - const char *prefix = ""; | |
5404 | +int is_valid_bugaddr(unsigned long rip) | |
5405 | +{ | |
5406 | + unsigned short ud2; | |
5407 | ||
5408 | - if (user_mode(regs)) | |
5409 | - return; | |
5410 | - if (__copy_from_user(&f, (const void __user *) regs->rip, | |
5411 | - sizeof(struct bug_frame))) | |
5412 | - return; | |
5413 | - if (f.filename >= 0 || | |
5414 | - f.ud2[0] != 0x0f || f.ud2[1] != 0x0b) | |
5415 | - return; | |
5416 | - len = __strnlen_user((char *)(long)f.filename, PATH_MAX) - 1; | |
5417 | - if (len < 0 || len >= PATH_MAX) | |
5418 | - f.filename = (int)(long)"unmapped filename"; | |
5419 | - else if (len > 50) { | |
5420 | - f.filename += len - 50; | |
5421 | - prefix = "..."; | |
5422 | - } | |
5423 | - printk("----------- [cut here ] --------- [please bite here ] ---------\n"); | |
5424 | - printk(KERN_ALERT "Kernel BUG at %s%.50s:%d\n", prefix, (char *)(long)f.filename, f.line); | |
5425 | -} | |
5426 | + if (__copy_from_user(&ud2, (const void __user *) rip, sizeof(ud2))) | |
5427 | + return 0; | |
5428 | + | |
5429 | + return ud2 == 0x0b0f; | |
5430 | +} | |
5431 | ||
5432 | #ifdef CONFIG_BUG | |
5433 | void out_of_line_bug(void) | |
5434 | @@ -621,7 +549,9 @@ void die(const char * str, struct pt_reg | |
5435 | { | |
5436 | unsigned long flags = oops_begin(); | |
5437 | ||
5438 | - handle_BUG(regs); | |
5439 | + if (!user_mode(regs)) | |
5440 | + report_bug(regs->rip); | |
5441 | + | |
5442 | __die(str, regs, err); | |
5443 | oops_end(flags); | |
5444 | do_exit(SIGSEGV); | |
5445 | @@ -790,8 +720,7 @@ mem_parity_error(unsigned char reason, s | |
5446 | { | |
5447 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", | |
5448 | reason); | |
5449 | - printk(KERN_EMERG "You probably have a hardware problem with your " | |
5450 | - "RAM chips\n"); | |
5451 | + printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); | |
5452 | ||
5453 | if (panic_on_unrecovered_nmi) | |
5454 | panic("NMI: Not continuing"); | |
5455 | @@ -1227,21 +1156,3 @@ static int __init kstack_setup(char *s) | |
5456 | return 0; | |
5457 | } | |
5458 | early_param("kstack", kstack_setup); | |
5459 | - | |
5460 | -#ifdef CONFIG_STACK_UNWIND | |
5461 | -static int __init call_trace_setup(char *s) | |
5462 | -{ | |
5463 | - if (!s) | |
5464 | - return -EINVAL; | |
5465 | - if (strcmp(s, "old") == 0) | |
5466 | - call_trace = -1; | |
5467 | - else if (strcmp(s, "both") == 0) | |
5468 | - call_trace = 0; | |
5469 | - else if (strcmp(s, "newfallback") == 0) | |
5470 | - call_trace = 1; | |
5471 | - else if (strcmp(s, "new") == 0) | |
5472 | - call_trace = 2; | |
5473 | - return 0; | |
5474 | -} | |
5475 | -early_param("call_trace", call_trace_setup); | |
5476 | -#endif | |
5477 | Index: head-2008-12-01/arch/x86/kernel/vsyscall_64-xen.c | |
5478 | =================================================================== | |
5479 | --- head-2008-12-01.orig/arch/x86/kernel/vsyscall_64-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
5480 | +++ head-2008-12-01/arch/x86/kernel/vsyscall_64-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
5481 | @@ -42,6 +42,7 @@ | |
5482 | #include <asm/topology.h> | |
5483 | ||
5484 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) | |
5485 | +#define __syscall_clobber "r11","rcx","memory" | |
5486 | ||
5487 | int __sysctl_vsyscall __section_sysctl_vsyscall = 1; | |
5488 | seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; | |
5489 | @@ -224,8 +225,7 @@ out: | |
5490 | ||
5491 | static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen, | |
5492 | void __user *oldval, size_t __user *oldlenp, | |
5493 | - void __user *newval, size_t newlen, | |
5494 | - void **context) | |
5495 | + void __user *newval, size_t newlen) | |
5496 | { | |
5497 | return -ENOSYS; | |
5498 | } | |
5499 | @@ -277,7 +277,6 @@ static void __cpuinit cpu_vsyscall_init( | |
5500 | vsyscall_set_cpu(raw_smp_processor_id()); | |
5501 | } | |
5502 | ||
5503 | -#ifdef CONFIG_HOTPLUG_CPU | |
5504 | static int __cpuinit | |
5505 | cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) | |
5506 | { | |
5507 | @@ -286,13 +285,13 @@ cpu_vsyscall_notifier(struct notifier_bl | |
5508 | smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); | |
5509 | return NOTIFY_DONE; | |
5510 | } | |
5511 | -#endif | |
5512 | ||
5513 | static void __init map_vsyscall(void) | |
5514 | { | |
5515 | extern char __vsyscall_0; | |
5516 | unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); | |
5517 | ||
5518 | + /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ | |
5519 | __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); | |
5520 | } | |
5521 | ||
5522 | Index: head-2008-12-01/arch/x86/mm/fault_64-xen.c | |
5523 | =================================================================== | |
5524 | --- head-2008-12-01.orig/arch/x86/mm/fault_64-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
5525 | +++ head-2008-12-01/arch/x86/mm/fault_64-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
5526 | @@ -23,9 +23,9 @@ | |
5527 | #include <linux/compiler.h> | |
5528 | #include <linux/module.h> | |
5529 | #include <linux/kprobes.h> | |
5530 | +#include <linux/uaccess.h> | |
5531 | ||
5532 | #include <asm/system.h> | |
5533 | -#include <asm/uaccess.h> | |
5534 | #include <asm/pgalloc.h> | |
5535 | #include <asm/smp.h> | |
5536 | #include <asm/tlbflush.h> | |
5537 | @@ -96,7 +96,7 @@ void bust_spinlocks(int yes) | |
5538 | static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, | |
5539 | unsigned long error_code) | |
5540 | { | |
5541 | - unsigned char __user *instr; | |
5542 | + unsigned char *instr; | |
5543 | int scan_more = 1; | |
5544 | int prefetch = 0; | |
5545 | unsigned char *max_instr; | |
5546 | @@ -116,7 +116,7 @@ static noinline int is_prefetch(struct p | |
5547 | unsigned char instr_hi; | |
5548 | unsigned char instr_lo; | |
5549 | ||
5550 | - if (__get_user(opcode, (char __user *)instr)) | |
5551 | + if (probe_kernel_address(instr, opcode)) | |
5552 | break; | |
5553 | ||
5554 | instr_hi = opcode & 0xf0; | |
5555 | @@ -154,7 +154,7 @@ static noinline int is_prefetch(struct p | |
5556 | case 0x00: | |
5557 | /* Prefetch instruction is 0x0F0D or 0x0F18 */ | |
5558 | scan_more = 0; | |
5559 | - if (__get_user(opcode, (char __user *)instr)) | |
5560 | + if (probe_kernel_address(instr, opcode)) | |
5561 | break; | |
5562 | prefetch = (instr_lo == 0xF) && | |
5563 | (opcode == 0x0D || opcode == 0x18); | |
5564 | @@ -170,7 +170,7 @@ static noinline int is_prefetch(struct p | |
5565 | static int bad_address(void *p) | |
5566 | { | |
5567 | unsigned long dummy; | |
5568 | - return __get_user(dummy, (unsigned long __user *)p); | |
5569 | + return probe_kernel_address((unsigned long *)p, dummy); | |
5570 | } | |
5571 | ||
5572 | void dump_pagetable(unsigned long address) | |
5573 | Index: head-2008-12-01/arch/x86/mm/init_64-xen.c | |
5574 | =================================================================== | |
5575 | --- head-2008-12-01.orig/arch/x86/mm/init_64-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
5576 | +++ head-2008-12-01/arch/x86/mm/init_64-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
5577 | @@ -1129,14 +1129,15 @@ static __init int x8664_sysctl_init(void | |
5578 | __initcall(x8664_sysctl_init); | |
5579 | #endif | |
5580 | ||
5581 | -/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only | |
5582 | +/* A pseudo VMA to allow ptrace access for the vsyscall page. This only | |
5583 | covers the 64bit vsyscall page now. 32bit has a real VMA now and does | |
5584 | not need special handling anymore. */ | |
5585 | ||
5586 | static struct vm_area_struct gate_vma = { | |
5587 | .vm_start = VSYSCALL_START, | |
5588 | - .vm_end = VSYSCALL_END, | |
5589 | - .vm_page_prot = PAGE_READONLY | |
5590 | + .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT), | |
5591 | + .vm_page_prot = PAGE_READONLY_EXEC, | |
5592 | + .vm_flags = VM_READ | VM_EXEC | |
5593 | }; | |
5594 | ||
5595 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) | |
5596 | Index: head-2008-12-01/arch/x86/mm/pageattr_64-xen.c | |
5597 | =================================================================== | |
5598 | --- head-2008-12-01.orig/arch/x86/mm/pageattr_64-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
5599 | +++ head-2008-12-01/arch/x86/mm/pageattr_64-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
5600 | @@ -330,34 +330,40 @@ static struct page *split_large_page(uns | |
5601 | return base; | |
5602 | } | |
5603 | ||
5604 | - | |
5605 | -static void flush_kernel_map(void *address) | |
5606 | +static void cache_flush_page(void *adr) | |
5607 | { | |
5608 | - if (0 && address && cpu_has_clflush) { | |
5609 | - /* is this worth it? */ | |
5610 | - int i; | |
5611 | - for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) | |
5612 | - asm volatile("clflush (%0)" :: "r" (address + i)); | |
5613 | - } else | |
5614 | - asm volatile("wbinvd":::"memory"); | |
5615 | - if (address) | |
5616 | - __flush_tlb_one(address); | |
5617 | - else | |
5618 | - __flush_tlb_all(); | |
5619 | + int i; | |
5620 | + for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) | |
5621 | + asm volatile("clflush (%0)" :: "r" (adr + i)); | |
5622 | } | |
5623 | ||
5624 | +static void flush_kernel_map(void *arg) | |
5625 | +{ | |
5626 | + struct list_head *l = (struct list_head *)arg; | |
5627 | + struct page *pg; | |
5628 | ||
5629 | -static inline void flush_map(unsigned long address) | |
5630 | + /* When clflush is available always use it because it is | |
5631 | + much cheaper than WBINVD */ | |
5632 | + if (!cpu_has_clflush) | |
5633 | + asm volatile("wbinvd" ::: "memory"); | |
5634 | + list_for_each_entry(pg, l, lru) { | |
5635 | + void *adr = page_address(pg); | |
5636 | + if (cpu_has_clflush) | |
5637 | + cache_flush_page(adr); | |
5638 | + __flush_tlb_one(adr); | |
5639 | + } | |
5640 | +} | |
5641 | + | |
5642 | +static inline void flush_map(struct list_head *l) | |
5643 | { | |
5644 | - on_each_cpu(flush_kernel_map, (void *)address, 1, 1); | |
5645 | + on_each_cpu(flush_kernel_map, l, 1, 1); | |
5646 | } | |
5647 | ||
5648 | -static struct page *deferred_pages; /* protected by init_mm.mmap_sem */ | |
5649 | +static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */ | |
5650 | ||
5651 | static inline void save_page(struct page *fpage) | |
5652 | { | |
5653 | - fpage->lru.next = (struct list_head *)deferred_pages; | |
5654 | - deferred_pages = fpage; | |
5655 | + list_add(&fpage->lru, &deferred_pages); | |
5656 | } | |
5657 | ||
5658 | /* | |
5659 | @@ -487,18 +493,18 @@ int change_page_attr(struct page *page, | |
5660 | ||
5661 | void global_flush_tlb(void) | |
5662 | { | |
5663 | - struct page *dpage; | |
5664 | + struct page *pg, *next; | |
5665 | + struct list_head l; | |
5666 | ||
5667 | down_read(&init_mm.mmap_sem); | |
5668 | - dpage = xchg(&deferred_pages, NULL); | |
5669 | + list_replace_init(&deferred_pages, &l); | |
5670 | up_read(&init_mm.mmap_sem); | |
5671 | ||
5672 | - flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0); | |
5673 | - while (dpage) { | |
5674 | - struct page *tmp = dpage; | |
5675 | - dpage = (struct page *)dpage->lru.next; | |
5676 | - ClearPagePrivate(tmp); | |
5677 | - __free_page(tmp); | |
5678 | + flush_map(&l); | |
5679 | + | |
5680 | + list_for_each_entry_safe(pg, next, &l, lru) { | |
5681 | + ClearPagePrivate(pg); | |
5682 | + __free_page(pg); | |
5683 | } | |
5684 | } | |
5685 | ||
5686 | Index: head-2008-12-01/drivers/pci/msi-xen.c | |
5687 | =================================================================== | |
5688 | --- head-2008-12-01.orig/drivers/pci/msi-xen.c 2008-12-01 11:29:05.000000000 +0100 | |
5689 | +++ head-2008-12-01/drivers/pci/msi-xen.c 2008-12-01 11:32:38.000000000 +0100 | |
5690 | @@ -263,10 +263,8 @@ void disable_msi_mode(struct pci_dev *de | |
5691 | pci_write_config_word(dev, msi_control_reg(pos), control); | |
5692 | dev->msix_enabled = 0; | |
5693 | } | |
5694 | - if (pci_find_capability(dev, PCI_CAP_ID_EXP)) { | |
5695 | - /* PCI Express Endpoint device detected */ | |
5696 | - pci_intx(dev, 1); /* enable intx */ | |
5697 | - } | |
5698 | + | |
5699 | + pci_intx(dev, 1); /* enable intx */ | |
5700 | } | |
5701 | ||
5702 | static void enable_msi_mode(struct pci_dev *dev, int pos, int type) | |
5703 | @@ -284,10 +282,8 @@ static void enable_msi_mode(struct pci_d | |
5704 | pci_write_config_word(dev, msi_control_reg(pos), control); | |
5705 | dev->msix_enabled = 1; | |
5706 | } | |
5707 | - if (pci_find_capability(dev, PCI_CAP_ID_EXP)) { | |
5708 | - /* PCI Express Endpoint device detected */ | |
5709 | - pci_intx(dev, 0); /* disable intx */ | |
5710 | - } | |
5711 | + | |
5712 | + pci_intx(dev, 0); /* disable intx */ | |
5713 | } | |
5714 | ||
5715 | #ifdef CONFIG_PM | |
5716 | Index: head-2008-12-01/drivers/xen/balloon/balloon.c | |
5717 | =================================================================== | |
5718 | --- head-2008-12-01.orig/drivers/xen/balloon/balloon.c 2008-12-01 11:29:05.000000000 +0100 | |
5719 | +++ head-2008-12-01/drivers/xen/balloon/balloon.c 2008-12-01 11:32:38.000000000 +0100 | |
5720 | @@ -97,8 +97,8 @@ extern unsigned long totalhigh_pages; | |
5721 | static LIST_HEAD(ballooned_pages); | |
5722 | ||
5723 | /* Main work function, always executed in process context. */ | |
5724 | -static void balloon_process(void *unused); | |
5725 | -static DECLARE_WORK(balloon_worker, balloon_process, NULL); | |
5726 | +static void balloon_process(struct work_struct *unused); | |
5727 | +static DECLARE_WORK(balloon_worker, balloon_process); | |
5728 | static struct timer_list balloon_timer; | |
5729 | ||
5730 | /* When ballooning out (allocating memory to return to Xen) we don't really | |
5731 | @@ -389,7 +389,7 @@ static int decrease_reservation(unsigned | |
5732 | * by the balloon lock), or with changes to the Xen hard limit, but we will | |
5733 | * recover from these in time. | |
5734 | */ | |
5735 | -static void balloon_process(void *unused) | |
5736 | +static void balloon_process(struct work_struct *unused) | |
5737 | { | |
5738 | int need_sleep = 0; | |
5739 | long credit; | |
5740 | Index: head-2008-12-01/drivers/xen/blkback/blkback.c | |
5741 | =================================================================== | |
5742 | --- head-2008-12-01.orig/drivers/xen/blkback/blkback.c 2008-12-01 11:29:05.000000000 +0100 | |
5743 | +++ head-2008-12-01/drivers/xen/blkback/blkback.c 2008-12-01 11:32:38.000000000 +0100 | |
5744 | @@ -37,6 +37,7 @@ | |
5745 | ||
5746 | #include <linux/spinlock.h> | |
5747 | #include <linux/kthread.h> | |
5748 | +#include <linux/freezer.h> | |
5749 | #include <linux/list.h> | |
5750 | #include <linux/delay.h> | |
5751 | #include <xen/balloon.h> | |
5752 | Index: head-2008-12-01/drivers/xen/blkback/interface.c | |
5753 | =================================================================== | |
5754 | --- head-2008-12-01.orig/drivers/xen/blkback/interface.c 2007-06-12 13:13:44.000000000 +0200 | |
5755 | +++ head-2008-12-01/drivers/xen/blkback/interface.c 2008-12-01 11:32:38.000000000 +0100 | |
5756 | @@ -34,7 +34,7 @@ | |
5757 | #include <xen/evtchn.h> | |
5758 | #include <linux/kthread.h> | |
5759 | ||
5760 | -static kmem_cache_t *blkif_cachep; | |
5761 | +static struct kmem_cache *blkif_cachep; | |
5762 | ||
5763 | blkif_t *blkif_alloc(domid_t domid) | |
5764 | { | |
5765 | Index: head-2008-12-01/drivers/xen/blkfront/blkfront.c | |
5766 | =================================================================== | |
5767 | --- head-2008-12-01.orig/drivers/xen/blkfront/blkfront.c 2008-12-01 11:29:05.000000000 +0100 | |
5768 | +++ head-2008-12-01/drivers/xen/blkfront/blkfront.c 2008-12-01 11:32:38.000000000 +0100 | |
5769 | @@ -70,7 +70,7 @@ static int setup_blkring(struct xenbus_d | |
5770 | static void kick_pending_request_queues(struct blkfront_info *); | |
5771 | ||
5772 | static irqreturn_t blkif_int(int irq, void *dev_id); | |
5773 | -static void blkif_restart_queue(void *arg); | |
5774 | +static void blkif_restart_queue(struct work_struct *arg); | |
5775 | static void blkif_recover(struct blkfront_info *); | |
5776 | static void blkif_completion(struct blk_shadow *); | |
5777 | static void blkif_free(struct blkfront_info *, int); | |
5778 | @@ -110,7 +110,7 @@ static int blkfront_probe(struct xenbus_ | |
5779 | info->xbdev = dev; | |
5780 | info->vdevice = vdevice; | |
5781 | info->connected = BLKIF_STATE_DISCONNECTED; | |
5782 | - INIT_WORK(&info->work, blkif_restart_queue, (void *)info); | |
5783 | + INIT_WORK(&info->work, blkif_restart_queue); | |
5784 | ||
5785 | for (i = 0; i < BLK_RING_SIZE; i++) | |
5786 | info->shadow[i].req.id = i+1; | |
5787 | @@ -459,9 +459,9 @@ static void kick_pending_request_queues( | |
5788 | } | |
5789 | } | |
5790 | ||
5791 | -static void blkif_restart_queue(void *arg) | |
5792 | +static void blkif_restart_queue(struct work_struct *arg) | |
5793 | { | |
5794 | - struct blkfront_info *info = (struct blkfront_info *)arg; | |
5795 | + struct blkfront_info *info = container_of(arg, struct blkfront_info, work); | |
5796 | spin_lock_irq(&blkif_io_lock); | |
5797 | if (info->connected == BLKIF_STATE_CONNECTED) | |
5798 | kick_pending_request_queues(info); | |
5799 | Index: head-2008-12-01/drivers/xen/blktap/blktap.c | |
5800 | =================================================================== | |
5801 | --- head-2008-12-01.orig/drivers/xen/blktap/blktap.c 2008-12-01 11:29:05.000000000 +0100 | |
5802 | +++ head-2008-12-01/drivers/xen/blktap/blktap.c 2008-12-01 11:32:38.000000000 +0100 | |
5803 | @@ -40,6 +40,7 @@ | |
5804 | ||
5805 | #include <linux/spinlock.h> | |
5806 | #include <linux/kthread.h> | |
5807 | +#include <linux/freezer.h> | |
5808 | #include <linux/list.h> | |
5809 | #include <asm/hypervisor.h> | |
5810 | #include "common.h" | |
5811 | Index: head-2008-12-01/drivers/xen/blktap/interface.c | |
5812 | =================================================================== | |
5813 | --- head-2008-12-01.orig/drivers/xen/blktap/interface.c 2008-09-15 13:40:15.000000000 +0200 | |
5814 | +++ head-2008-12-01/drivers/xen/blktap/interface.c 2008-12-01 11:32:38.000000000 +0100 | |
5815 | @@ -34,7 +34,7 @@ | |
5816 | #include "common.h" | |
5817 | #include <xen/evtchn.h> | |
5818 | ||
5819 | -static kmem_cache_t *blkif_cachep; | |
5820 | +static struct kmem_cache *blkif_cachep; | |
5821 | ||
5822 | blkif_t *tap_alloc_blkif(domid_t domid) | |
5823 | { | |
5824 | Index: head-2008-12-01/drivers/xen/char/mem.c | |
5825 | =================================================================== | |
5826 | --- head-2008-12-01.orig/drivers/xen/char/mem.c 2007-08-06 15:10:49.000000000 +0200 | |
5827 | +++ head-2008-12-01/drivers/xen/char/mem.c 2008-12-01 11:32:38.000000000 +0100 | |
5828 | @@ -157,7 +157,7 @@ static loff_t memory_lseek(struct file * | |
5829 | { | |
5830 | loff_t ret; | |
5831 | ||
5832 | - mutex_lock(&file->f_dentry->d_inode->i_mutex); | |
5833 | + mutex_lock(&file->f_path.dentry->d_inode->i_mutex); | |
5834 | switch (orig) { | |
5835 | case 0: | |
5836 | file->f_pos = offset; | |
5837 | @@ -172,7 +172,7 @@ static loff_t memory_lseek(struct file * | |
5838 | default: | |
5839 | ret = -EINVAL; | |
5840 | } | |
5841 | - mutex_unlock(&file->f_dentry->d_inode->i_mutex); | |
5842 | + mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); | |
5843 | return ret; | |
5844 | } | |
5845 | ||
5846 | Index: head-2008-12-01/drivers/xen/console/console.c | |
5847 | =================================================================== | |
5848 | --- head-2008-12-01.orig/drivers/xen/console/console.c 2008-12-01 11:29:05.000000000 +0100 | |
5849 | +++ head-2008-12-01/drivers/xen/console/console.c 2008-12-01 11:32:38.000000000 +0100 | |
5850 | @@ -80,11 +80,6 @@ static int xc_num = -1; | |
5851 | #define XEN_XVC_MAJOR 204 | |
5852 | #define XEN_XVC_MINOR 191 | |
5853 | ||
5854 | -#ifdef CONFIG_MAGIC_SYSRQ | |
5855 | -static unsigned long sysrq_requested; | |
5856 | -extern int sysrq_enabled; | |
5857 | -#endif | |
5858 | - | |
5859 | static int __init xencons_setup(char *str) | |
5860 | { | |
5861 | char *q; | |
5862 | @@ -339,8 +334,8 @@ void __init dom0_init_screen_info(const | |
5863 | #define DUMMY_TTY(_tty) ((xc_mode == XC_TTY) && \ | |
5864 | ((_tty)->index != (xc_num - 1))) | |
5865 | ||
5866 | -static struct termios *xencons_termios[MAX_NR_CONSOLES]; | |
5867 | -static struct termios *xencons_termios_locked[MAX_NR_CONSOLES]; | |
5868 | +static struct ktermios *xencons_termios[MAX_NR_CONSOLES]; | |
5869 | +static struct ktermios *xencons_termios_locked[MAX_NR_CONSOLES]; | |
5870 | static struct tty_struct *xencons_tty; | |
5871 | static int xencons_priv_irq; | |
5872 | static char x_char; | |
5873 | @@ -356,7 +351,9 @@ void xencons_rx(char *buf, unsigned len) | |
5874 | ||
5875 | for (i = 0; i < len; i++) { | |
5876 | #ifdef CONFIG_MAGIC_SYSRQ | |
5877 | - if (sysrq_enabled) { | |
5878 | + if (sysrq_on()) { | |
5879 | + static unsigned long sysrq_requested; | |
5880 | + | |
5881 | if (buf[i] == '\x0f') { /* ^O */ | |
5882 | if (!sysrq_requested) { | |
5883 | sysrq_requested = jiffies; | |
5884 | Index: head-2008-12-01/drivers/xen/core/reboot.c | |
5885 | =================================================================== | |
5886 | --- head-2008-12-01.orig/drivers/xen/core/reboot.c 2008-12-01 11:29:05.000000000 +0100 | |
5887 | +++ head-2008-12-01/drivers/xen/core/reboot.c 2008-12-01 11:32:38.000000000 +0100 | |
5888 | @@ -34,8 +34,8 @@ static int suspend_cancelled; | |
5889 | /* Can we leave APs online when we suspend? */ | |
5890 | static int fast_suspend; | |
5891 | ||
5892 | -static void __shutdown_handler(void *unused); | |
5893 | -static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL); | |
5894 | +static void __shutdown_handler(struct work_struct *unused); | |
5895 | +static DECLARE_DELAYED_WORK(shutdown_work, __shutdown_handler); | |
5896 | ||
5897 | static int setup_suspend_evtchn(void); | |
5898 | ||
5899 | @@ -105,7 +105,7 @@ static int xen_suspend(void *__unused) | |
5900 | case SHUTDOWN_RESUMING: | |
5901 | break; | |
5902 | default: | |
5903 | - schedule_work(&shutdown_work); | |
5904 | + schedule_delayed_work(&shutdown_work, 0); | |
5905 | break; | |
5906 | } | |
5907 | ||
5908 | @@ -137,12 +137,12 @@ static void switch_shutdown_state(int ne | |
5909 | ||
5910 | /* Either we kick off the work, or we leave it to xen_suspend(). */ | |
5911 | if (old_state == SHUTDOWN_INVALID) | |
5912 | - schedule_work(&shutdown_work); | |
5913 | + schedule_delayed_work(&shutdown_work, 0); | |
5914 | else | |
5915 | BUG_ON(old_state != SHUTDOWN_RESUMING); | |
5916 | } | |
5917 | ||
5918 | -static void __shutdown_handler(void *unused) | |
5919 | +static void __shutdown_handler(struct work_struct *unused) | |
5920 | { | |
5921 | int err; | |
5922 | ||
5923 | Index: head-2008-12-01/drivers/xen/core/smpboot.c | |
5924 | =================================================================== | |
5925 | --- head-2008-12-01.orig/drivers/xen/core/smpboot.c 2008-12-01 11:29:05.000000000 +0100 | |
5926 | +++ head-2008-12-01/drivers/xen/core/smpboot.c 2008-12-01 11:32:38.000000000 +0100 | |
5927 | @@ -165,7 +165,12 @@ static void xen_smp_intr_exit(unsigned i | |
5928 | ||
5929 | void __cpuinit cpu_bringup(void) | |
5930 | { | |
5931 | +#ifdef __i386__ | |
5932 | + cpu_set_gdt(current_thread_info()->cpu); | |
5933 | + secondary_cpu_init(); | |
5934 | +#else | |
5935 | cpu_init(); | |
5936 | +#endif | |
5937 | identify_cpu(cpu_data + smp_processor_id()); | |
5938 | touch_softlockup_watchdog(); | |
5939 | preempt_disable(); | |
5940 | @@ -304,11 +309,12 @@ void __init smp_prepare_cpus(unsigned in | |
5941 | if (cpu == 0) | |
5942 | continue; | |
5943 | ||
5944 | + idle = fork_idle(cpu); | |
5945 | + if (IS_ERR(idle)) | |
5946 | + panic("failed fork for CPU %d", cpu); | |
5947 | + | |
5948 | #ifdef __x86_64__ | |
5949 | gdt_descr = &cpu_gdt_descr[cpu]; | |
5950 | -#else | |
5951 | - gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | |
5952 | -#endif | |
5953 | gdt_descr->address = get_zeroed_page(GFP_KERNEL); | |
5954 | if (unlikely(!gdt_descr->address)) { | |
5955 | printk(KERN_CRIT "CPU%d failed to allocate GDT\n", | |
5956 | @@ -317,6 +323,11 @@ void __init smp_prepare_cpus(unsigned in | |
5957 | } | |
5958 | gdt_descr->size = GDT_SIZE; | |
5959 | memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE); | |
5960 | +#else | |
5961 | + if (unlikely(!init_gdt(cpu, idle))) | |
5962 | + continue; | |
5963 | + gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | |
5964 | +#endif | |
5965 | make_page_readonly( | |
5966 | (void *)gdt_descr->address, | |
5967 | XENFEAT_writable_descriptor_tables); | |
5968 | @@ -336,10 +347,6 @@ void __init smp_prepare_cpus(unsigned in | |
5969 | cpu_2_logical_apicid[cpu] = apicid; | |
5970 | x86_cpu_to_apicid[cpu] = apicid; | |
5971 | ||
5972 | - idle = fork_idle(cpu); | |
5973 | - if (IS_ERR(idle)) | |
5974 | - panic("failed fork for CPU %d", cpu); | |
5975 | - | |
5976 | #ifdef __x86_64__ | |
5977 | cpu_pda(cpu)->pcurrent = idle; | |
5978 | cpu_pda(cpu)->cpunumber = cpu; | |
5979 | Index: head-2008-12-01/drivers/xen/fbfront/xenfb.c | |
5980 | =================================================================== | |
5981 | --- head-2008-12-01.orig/drivers/xen/fbfront/xenfb.c 2008-12-01 11:29:05.000000000 +0100 | |
5982 | +++ head-2008-12-01/drivers/xen/fbfront/xenfb.c 2008-12-01 11:32:38.000000000 +0100 | |
5983 | @@ -25,6 +25,7 @@ | |
5984 | #include <linux/vmalloc.h> | |
5985 | #include <linux/mm.h> | |
5986 | #include <linux/mutex.h> | |
5987 | +#include <linux/freezer.h> | |
5988 | #include <asm/hypervisor.h> | |
5989 | #include <xen/evtchn.h> | |
5990 | #include <xen/interface/io/fbif.h> | |
5991 | Index: head-2008-12-01/drivers/xen/netback/loopback.c | |
5992 | =================================================================== | |
5993 | --- head-2008-12-01.orig/drivers/xen/netback/loopback.c 2008-12-01 11:29:05.000000000 +0100 | |
5994 | +++ head-2008-12-01/drivers/xen/netback/loopback.c 2008-12-01 11:32:38.000000000 +0100 | |
5995 | @@ -54,6 +54,7 @@ | |
5996 | #include <net/dst.h> | |
5997 | #include <net/xfrm.h> /* secpath_reset() */ | |
5998 | #include <asm/hypervisor.h> /* is_initial_xendomain() */ | |
5999 | +#include <../net/core/kmap_skb.h> /* k{,un}map_skb_frag() */ | |
6000 | ||
6001 | static int nloopbacks = -1; | |
6002 | module_param(nloopbacks, int, 0); | |
6003 | Index: head-2008-12-01/drivers/xen/pciback/conf_space_header.c | |
6004 | =================================================================== | |
6005 | --- head-2008-12-01.orig/drivers/xen/pciback/conf_space_header.c 2008-10-29 09:55:56.000000000 +0100 | |
6006 | +++ head-2008-12-01/drivers/xen/pciback/conf_space_header.c 2008-12-01 11:32:38.000000000 +0100 | |
6007 | @@ -22,14 +22,14 @@ static int command_write(struct pci_dev | |
6008 | { | |
6009 | int err; | |
6010 | ||
6011 | - if (!dev->is_enabled && is_enable_cmd(value)) { | |
6012 | + if (!atomic_read(&dev->enable_cnt) && is_enable_cmd(value)) { | |
6013 | if (unlikely(verbose_request)) | |
6014 | printk(KERN_DEBUG "pciback: %s: enable\n", | |
6015 | pci_name(dev)); | |
6016 | err = pci_enable_device(dev); | |
6017 | if (err) | |
6018 | return err; | |
6019 | - } else if (dev->is_enabled && !is_enable_cmd(value)) { | |
6020 | + } else if (atomic_read(&dev->enable_cnt) && !is_enable_cmd(value)) { | |
6021 | if (unlikely(verbose_request)) | |
6022 | printk(KERN_DEBUG "pciback: %s: disable\n", | |
6023 | pci_name(dev)); | |
6024 | Index: head-2008-12-01/drivers/xen/pciback/pciback.h | |
6025 | =================================================================== | |
6026 | --- head-2008-12-01.orig/drivers/xen/pciback/pciback.h 2008-12-01 11:29:05.000000000 +0100 | |
6027 | +++ head-2008-12-01/drivers/xen/pciback/pciback.h 2008-12-01 11:32:38.000000000 +0100 | |
6028 | @@ -88,7 +88,7 @@ void pciback_release_devices(struct pcib | |
6029 | ||
6030 | /* Handles events from front-end */ | |
6031 | irqreturn_t pciback_handle_event(int irq, void *dev_id); | |
6032 | -void pciback_do_op(void *data); | |
6033 | +void pciback_do_op(struct work_struct *work); | |
6034 | ||
6035 | int pciback_xenbus_register(void); | |
6036 | void pciback_xenbus_unregister(void); | |
6037 | Index: head-2008-12-01/drivers/xen/pciback/pciback_ops.c | |
6038 | =================================================================== | |
6039 | --- head-2008-12-01.orig/drivers/xen/pciback/pciback_ops.c 2008-12-01 11:29:05.000000000 +0100 | |
6040 | +++ head-2008-12-01/drivers/xen/pciback/pciback_ops.c 2008-12-01 11:32:38.000000000 +0100 | |
6041 | @@ -25,7 +25,7 @@ void pciback_reset_device(struct pci_dev | |
6042 | ||
6043 | pci_write_config_word(dev, PCI_COMMAND, 0); | |
6044 | ||
6045 | - dev->is_enabled = 0; | |
6046 | + atomic_set(&dev->enable_cnt, 0); | |
6047 | dev->is_busmaster = 0; | |
6048 | } else { | |
6049 | pci_read_config_word(dev, PCI_COMMAND, &cmd); | |
6050 | @@ -51,9 +51,9 @@ static inline void test_and_schedule_op( | |
6051 | * context because some of the pci_* functions can sleep (mostly due to ACPI | |
6052 | * use of semaphores). This function is intended to be called from a work | |
6053 | * queue in process context taking a struct pciback_device as a parameter */ | |
6054 | -void pciback_do_op(void *data) | |
6055 | +void pciback_do_op(struct work_struct *work) | |
6056 | { | |
6057 | - struct pciback_device *pdev = data; | |
6058 | + struct pciback_device *pdev = container_of(work, struct pciback_device, op_work); | |
6059 | struct pci_dev *dev; | |
6060 | struct xen_pci_op *op = &pdev->sh_info->op; | |
6061 | ||
6062 | Index: head-2008-12-01/drivers/xen/pciback/xenbus.c | |
6063 | =================================================================== | |
6064 | --- head-2008-12-01.orig/drivers/xen/pciback/xenbus.c 2008-07-21 11:00:33.000000000 +0200 | |
6065 | +++ head-2008-12-01/drivers/xen/pciback/xenbus.c 2008-12-01 11:32:38.000000000 +0100 | |
6066 | @@ -32,7 +32,7 @@ static struct pciback_device *alloc_pdev | |
6067 | pdev->evtchn_irq = INVALID_EVTCHN_IRQ; | |
6068 | pdev->be_watching = 0; | |
6069 | ||
6070 | - INIT_WORK(&pdev->op_work, pciback_do_op, pdev); | |
6071 | + INIT_WORK(&pdev->op_work, pciback_do_op); | |
6072 | ||
6073 | if (pciback_init_devices(pdev)) { | |
6074 | kfree(pdev); | |
6075 | @@ -54,7 +54,6 @@ static void pciback_disconnect(struct pc | |
6076 | ||
6077 | /* If the driver domain started an op, make sure we complete it or | |
6078 | * delete it before releasing the shared memory */ | |
6079 | - cancel_delayed_work(&pdev->op_work); | |
6080 | flush_scheduled_work(); | |
6081 | ||
6082 | if (pdev->sh_info != NULL) { | |
6083 | Index: head-2008-12-01/drivers/xen/scsiback/interface.c | |
6084 | =================================================================== | |
6085 | --- head-2008-12-01.orig/drivers/xen/scsiback/interface.c 2008-07-21 11:00:33.000000000 +0200 | |
6086 | +++ head-2008-12-01/drivers/xen/scsiback/interface.c 2008-12-01 11:32:38.000000000 +0100 | |
6087 | @@ -39,7 +39,7 @@ | |
6088 | #include <linux/kthread.h> | |
6089 | ||
6090 | ||
6091 | -static kmem_cache_t *scsiback_cachep; | |
6092 | +static struct kmem_cache *scsiback_cachep; | |
6093 | ||
6094 | struct vscsibk_info *vscsibk_info_alloc(domid_t domid) | |
6095 | { | |
6096 | Index: head-2008-12-01/drivers/xen/scsiback/scsiback.c | |
6097 | =================================================================== | |
6098 | --- head-2008-12-01.orig/drivers/xen/scsiback/scsiback.c 2008-12-01 11:29:05.000000000 +0100 | |
6099 | +++ head-2008-12-01/drivers/xen/scsiback/scsiback.c 2008-12-01 11:32:38.000000000 +0100 | |
6100 | @@ -322,13 +322,11 @@ static int scsiback_merge_bio(struct req | |
6101 | ||
6102 | if (!rq->bio) | |
6103 | blk_rq_bio_prep(q, rq, bio); | |
6104 | - else if (!q->back_merge_fn(q, rq, bio)) | |
6105 | + else if (!ll_back_merge_fn(q, rq, bio)) | |
6106 | return -EINVAL; | |
6107 | else { | |
6108 | rq->biotail->bi_next = bio; | |
6109 | rq->biotail = bio; | |
6110 | - rq->hard_nr_sectors += bio_sectors(bio); | |
6111 | - rq->nr_sectors = rq->hard_nr_sectors; | |
6112 | } | |
6113 | ||
6114 | return 0; | |
6115 | Index: head-2008-12-01/drivers/xen/sfc_netfront/accel_vi.c | |
6116 | =================================================================== | |
6117 | --- head-2008-12-01.orig/drivers/xen/sfc_netfront/accel_vi.c 2008-12-01 11:29:05.000000000 +0100 | |
6118 | +++ head-2008-12-01/drivers/xen/sfc_netfront/accel_vi.c 2008-12-01 11:32:38.000000000 +0100 | |
6119 | @@ -463,7 +463,7 @@ netfront_accel_enqueue_skb_multi(netfron | |
6120 | ||
6121 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | |
6122 | /* Set to zero to encourage falcon to work it out for us */ | |
6123 | - *(u16*)(skb->h.raw + skb->csum) = 0; | |
6124 | + *(u16*)(skb->h.raw + skb->csum_offset) = 0; | |
6125 | } | |
6126 | ||
6127 | if (multi_post_start_new_buffer(vnic, &state)) { | |
6128 | @@ -582,7 +582,7 @@ netfront_accel_enqueue_skb_single(netfro | |
6129 | ||
6130 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | |
6131 | /* Set to zero to encourage falcon to work it out for us */ | |
6132 | - *(u16*)(skb->h.raw + skb->csum) = 0; | |
6133 | + *(u16*)(skb->h.raw + skb->csum_offset) = 0; | |
6134 | } | |
6135 | NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT | |
6136 | (skb, idx, frag_data, frag_len, { | |
6137 | Index: head-2008-12-01/drivers/xen/tpmback/interface.c | |
6138 | =================================================================== | |
6139 | --- head-2008-12-01.orig/drivers/xen/tpmback/interface.c 2008-01-21 11:15:26.000000000 +0100 | |
6140 | +++ head-2008-12-01/drivers/xen/tpmback/interface.c 2008-12-01 11:32:38.000000000 +0100 | |
6141 | @@ -15,7 +15,7 @@ | |
6142 | #include <xen/balloon.h> | |
6143 | #include <xen/gnttab.h> | |
6144 | ||
6145 | -static kmem_cache_t *tpmif_cachep; | |
6146 | +static struct kmem_cache *tpmif_cachep; | |
6147 | int num_frontends = 0; | |
6148 | ||
6149 | LIST_HEAD(tpmif_list); | |
6150 | Index: head-2008-12-01/drivers/xen/xenbus/xenbus_comms.c | |
6151 | =================================================================== | |
6152 | --- head-2008-12-01.orig/drivers/xen/xenbus/xenbus_comms.c 2008-12-01 11:29:05.000000000 +0100 | |
6153 | +++ head-2008-12-01/drivers/xen/xenbus/xenbus_comms.c 2008-12-01 11:32:38.000000000 +0100 | |
6154 | @@ -49,9 +49,9 @@ | |
6155 | ||
6156 | static int xenbus_irq; | |
6157 | ||
6158 | -extern void xenbus_probe(void *); | |
6159 | +extern void xenbus_probe(struct work_struct *); | |
6160 | extern int xenstored_ready; | |
6161 | -static DECLARE_WORK(probe_work, xenbus_probe, NULL); | |
6162 | +static DECLARE_WORK(probe_work, xenbus_probe); | |
6163 | ||
6164 | static DECLARE_WAIT_QUEUE_HEAD(xb_waitq); | |
6165 | ||
6166 | Index: head-2008-12-01/drivers/xen/xenbus/xenbus_probe.c | |
6167 | =================================================================== | |
6168 | --- head-2008-12-01.orig/drivers/xen/xenbus/xenbus_probe.c 2008-12-01 11:22:58.000000000 +0100 | |
6169 | +++ head-2008-12-01/drivers/xen/xenbus/xenbus_probe.c 2008-12-01 11:32:38.000000000 +0100 | |
6170 | @@ -843,7 +843,7 @@ void unregister_xenstore_notifier(struct | |
6171 | EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); | |
6172 | ||
6173 | ||
6174 | -void xenbus_probe(void *unused) | |
6175 | +void xenbus_probe(struct work_struct *unused) | |
6176 | { | |
6177 | BUG_ON((xenstored_ready <= 0)); | |
6178 | ||
6179 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/desc_32.h | |
6180 | =================================================================== | |
6181 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/desc_32.h 2008-12-01 11:29:05.000000000 +0100 | |
6182 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/desc_32.h 2008-12-01 11:32:38.000000000 +0100 | |
6183 | @@ -4,8 +4,6 @@ | |
6184 | #include <asm/ldt.h> | |
6185 | #include <asm/segment.h> | |
6186 | ||
6187 | -#define CPU_16BIT_STACK_SIZE 1024 | |
6188 | - | |
6189 | #ifndef __ASSEMBLY__ | |
6190 | ||
6191 | #include <linux/preempt.h> | |
6192 | @@ -15,8 +13,6 @@ | |
6193 | ||
6194 | extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; | |
6195 | ||
6196 | -DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); | |
6197 | - | |
6198 | struct Xgt_desc_struct { | |
6199 | unsigned short size; | |
6200 | unsigned long address __attribute__((packed)); | |
6201 | @@ -32,11 +28,6 @@ static inline struct desc_struct *get_cp | |
6202 | return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address; | |
6203 | } | |
6204 | ||
6205 | -/* | |
6206 | - * This is the ldt that every process will get unless we need | |
6207 | - * something other than this. | |
6208 | - */ | |
6209 | -extern struct desc_struct default_ldt[]; | |
6210 | extern struct desc_struct idt_table[]; | |
6211 | extern void set_intr_gate(unsigned int irq, void * addr); | |
6212 | ||
6213 | @@ -63,8 +54,8 @@ static inline void pack_gate(__u32 *a, _ | |
6214 | #define DESCTYPE_DPL3 0x60 /* DPL-3 */ | |
6215 | #define DESCTYPE_S 0x10 /* !system */ | |
6216 | ||
6217 | +#ifndef CONFIG_XEN | |
6218 | #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) | |
6219 | -#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)) | |
6220 | ||
6221 | #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) | |
6222 | #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr)) | |
6223 | @@ -75,6 +66,7 @@ static inline void pack_gate(__u32 *a, _ | |
6224 | #define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr)) | |
6225 | #define store_tr(tr) __asm__ ("str %0":"=m" (tr)) | |
6226 | #define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt)) | |
6227 | +#endif | |
6228 | ||
6229 | #if TLS_SIZE != 24 | |
6230 | # error update this code. | |
6231 | @@ -90,22 +82,43 @@ static inline void load_TLS(struct threa | |
6232 | } | |
6233 | ||
6234 | #ifndef CONFIG_XEN | |
6235 | +#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) | |
6236 | +#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) | |
6237 | +#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) | |
6238 | + | |
6239 | static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b) | |
6240 | { | |
6241 | __u32 *lp = (__u32 *)((char *)dt + entry*8); | |
6242 | *lp = entry_a; | |
6243 | *(lp+1) = entry_b; | |
6244 | } | |
6245 | - | |
6246 | -#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) | |
6247 | -#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) | |
6248 | +#define set_ldt native_set_ldt | |
6249 | #else | |
6250 | extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b); | |
6251 | extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b); | |
6252 | +#define set_ldt xen_set_ldt | |
6253 | +#endif | |
6254 | + | |
6255 | +#ifndef CONFIG_XEN | |
6256 | +static inline fastcall void native_set_ldt(const void *addr, | |
6257 | + unsigned int entries) | |
6258 | +{ | |
6259 | + if (likely(entries == 0)) | |
6260 | + __asm__ __volatile__("lldt %w0"::"q" (0)); | |
6261 | + else { | |
6262 | + unsigned cpu = smp_processor_id(); | |
6263 | + __u32 a, b; | |
6264 | + | |
6265 | + pack_descriptor(&a, &b, (unsigned long)addr, | |
6266 | + entries * sizeof(struct desc_struct) - 1, | |
6267 | + DESCTYPE_LDT, 0); | |
6268 | + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b); | |
6269 | + __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); | |
6270 | + } | |
6271 | +} | |
6272 | #endif | |
6273 | -#ifndef CONFIG_X86_NO_IDT | |
6274 | -#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) | |
6275 | ||
6276 | +#ifndef CONFIG_X86_NO_IDT | |
6277 | static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg) | |
6278 | { | |
6279 | __u32 a, b; | |
6280 | @@ -125,14 +138,6 @@ static inline void __set_tss_desc(unsign | |
6281 | } | |
6282 | #endif | |
6283 | ||
6284 | -static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries) | |
6285 | -{ | |
6286 | - __u32 a, b; | |
6287 | - pack_descriptor(&a, &b, (unsigned long)addr, | |
6288 | - entries * sizeof(struct desc_struct) - 1, | |
6289 | - DESCTYPE_LDT, 0); | |
6290 | - write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b); | |
6291 | -} | |
6292 | ||
6293 | #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) | |
6294 | ||
6295 | @@ -163,36 +168,22 @@ static inline void set_ldt_desc(unsigned | |
6296 | ||
6297 | static inline void clear_LDT(void) | |
6298 | { | |
6299 | - int cpu = get_cpu(); | |
6300 | - | |
6301 | - /* | |
6302 | - * NB. We load the default_ldt for lcall7/27 handling on demand, as | |
6303 | - * it slows down context switching. Noone uses it anyway. | |
6304 | - */ | |
6305 | - cpu = cpu; /* XXX avoid compiler warning */ | |
6306 | - xen_set_ldt(NULL, 0); | |
6307 | - put_cpu(); | |
6308 | + set_ldt(NULL, 0); | |
6309 | } | |
6310 | ||
6311 | /* | |
6312 | * load one particular LDT into the current CPU | |
6313 | */ | |
6314 | -static inline void load_LDT_nolock(mm_context_t *pc, int cpu) | |
6315 | +static inline void load_LDT_nolock(mm_context_t *pc) | |
6316 | { | |
6317 | - void *segments = pc->ldt; | |
6318 | - int count = pc->size; | |
6319 | - | |
6320 | - if (likely(!count)) | |
6321 | - segments = NULL; | |
6322 | - | |
6323 | - xen_set_ldt(segments, count); | |
6324 | + set_ldt(pc->ldt, pc->size); | |
6325 | } | |
6326 | ||
6327 | static inline void load_LDT(mm_context_t *pc) | |
6328 | { | |
6329 | - int cpu = get_cpu(); | |
6330 | - load_LDT_nolock(pc, cpu); | |
6331 | - put_cpu(); | |
6332 | + preempt_disable(); | |
6333 | + load_LDT_nolock(pc); | |
6334 | + preempt_enable(); | |
6335 | } | |
6336 | ||
6337 | static inline unsigned long get_desc_base(unsigned long *desc) | |
6338 | @@ -204,6 +195,29 @@ static inline unsigned long get_desc_bas | |
6339 | return base; | |
6340 | } | |
6341 | ||
6342 | +#else /* __ASSEMBLY__ */ | |
6343 | + | |
6344 | +/* | |
6345 | + * GET_DESC_BASE reads the descriptor base of the specified segment. | |
6346 | + * | |
6347 | + * Args: | |
6348 | + * idx - descriptor index | |
6349 | + * gdt - GDT pointer | |
6350 | + * base - 32bit register to which the base will be written | |
6351 | + * lo_w - lo word of the "base" register | |
6352 | + * lo_b - lo byte of the "base" register | |
6353 | + * hi_b - hi byte of the low word of the "base" register | |
6354 | + * | |
6355 | + * Example: | |
6356 | + * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) | |
6357 | + * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax. | |
6358 | + */ | |
6359 | +#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \ | |
6360 | + movb idx*8+4(gdt), lo_b; \ | |
6361 | + movb idx*8+7(gdt), hi_b; \ | |
6362 | + shll $16, base; \ | |
6363 | + movw idx*8+2(gdt), lo_w; | |
6364 | + | |
6365 | #endif /* !__ASSEMBLY__ */ | |
6366 | ||
6367 | #endif | |
6368 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/dma-mapping_32.h | |
6369 | =================================================================== | |
6370 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/dma-mapping_32.h 2008-04-02 12:34:02.000000000 +0200 | |
6371 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/dma-mapping_32.h 2008-12-01 11:32:38.000000000 +0100 | |
6372 | @@ -127,10 +127,10 @@ dma_get_cache_alignment(void) | |
6373 | return (1 << INTERNODE_CACHE_SHIFT); | |
6374 | } | |
6375 | ||
6376 | -#define dma_is_consistent(d) (1) | |
6377 | +#define dma_is_consistent(d, h) (1) | |
6378 | ||
6379 | static inline void | |
6380 | -dma_cache_sync(void *vaddr, size_t size, | |
6381 | +dma_cache_sync(struct device *dev, void *vaddr, size_t size, | |
6382 | enum dma_data_direction direction) | |
6383 | { | |
6384 | flush_write_buffers(); | |
6385 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/fixmap_32.h | |
6386 | =================================================================== | |
6387 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-12-01 11:29:05.000000000 +0100 | |
6388 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-12-01 11:32:38.000000000 +0100 | |
6389 | @@ -13,13 +13,16 @@ | |
6390 | #ifndef _ASM_FIXMAP_H | |
6391 | #define _ASM_FIXMAP_H | |
6392 | ||
6393 | - | |
6394 | /* used by vmalloc.c, vsyscall.lds.S. | |
6395 | * | |
6396 | * Leave one empty page between vmalloc'ed areas and | |
6397 | * the start of the fixmap. | |
6398 | */ | |
6399 | extern unsigned long __FIXADDR_TOP; | |
6400 | +#ifdef CONFIG_COMPAT_VDSO | |
6401 | +#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) | |
6402 | +#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) | |
6403 | +#endif | |
6404 | ||
6405 | #ifndef __ASSEMBLY__ | |
6406 | #include <linux/kernel.h> | |
6407 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/highmem.h | |
6408 | =================================================================== | |
6409 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/highmem.h 2008-10-29 09:55:56.000000000 +0100 | |
6410 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/highmem.h 2008-12-01 11:32:38.000000000 +0100 | |
6411 | @@ -85,7 +85,7 @@ static inline void clear_user_highpage(s | |
6412 | ||
6413 | void copy_highpage(struct page *to, struct page *from); | |
6414 | static inline void copy_user_highpage(struct page *to, struct page *from, | |
6415 | - unsigned long vaddr) | |
6416 | + unsigned long vaddr, struct vm_area_struct *vma) | |
6417 | { | |
6418 | copy_highpage(to, from); | |
6419 | } | |
6420 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/hypervisor.h | |
6421 | =================================================================== | |
6422 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/hypervisor.h 2008-12-01 11:29:05.000000000 +0100 | |
6423 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/hypervisor.h 2008-12-01 11:32:38.000000000 +0100 | |
6424 | @@ -46,15 +46,6 @@ | |
6425 | #include <asm/percpu.h> | |
6426 | #include <asm/ptrace.h> | |
6427 | #include <asm/page.h> | |
6428 | -#if defined(__i386__) | |
6429 | -# ifdef CONFIG_X86_PAE | |
6430 | -# include <asm-generic/pgtable-nopud.h> | |
6431 | -# else | |
6432 | -# include <asm-generic/pgtable-nopmd.h> | |
6433 | -# endif | |
6434 | -#elif defined(__x86_64__) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11) | |
6435 | -# include <asm-generic/pgtable-nopud.h> | |
6436 | -#endif | |
6437 | ||
6438 | extern shared_info_t *HYPERVISOR_shared_info; | |
6439 | ||
6440 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/io_32.h | |
6441 | =================================================================== | |
6442 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/io_32.h 2008-12-01 11:29:05.000000000 +0100 | |
6443 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/io_32.h 2008-12-01 11:32:38.000000000 +0100 | |
6444 | @@ -269,11 +269,7 @@ static inline void flush_write_buffers(v | |
6445 | ||
6446 | #endif /* __KERNEL__ */ | |
6447 | ||
6448 | -#ifdef SLOW_IO_BY_JUMPING | |
6449 | -#define __SLOW_DOWN_IO "jmp 1f; 1: jmp 1f; 1:" | |
6450 | -#else | |
6451 | #define __SLOW_DOWN_IO "outb %%al,$0x80;" | |
6452 | -#endif | |
6453 | ||
6454 | static inline void slow_down_io(void) { | |
6455 | __asm__ __volatile__( | |
6456 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/irqflags_32.h | |
6457 | =================================================================== | |
6458 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/irqflags_32.h 2007-06-12 13:14:02.000000000 +0200 | |
6459 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/irqflags_32.h 2008-12-01 11:32:38.000000000 +0100 | |
6460 | @@ -22,9 +22,6 @@ | |
6461 | ||
6462 | #define __raw_local_save_flags() (current_vcpu_info()->evtchn_upcall_mask) | |
6463 | ||
6464 | -#define raw_local_save_flags(flags) \ | |
6465 | - do { (flags) = __raw_local_save_flags(); } while (0) | |
6466 | - | |
6467 | #define raw_local_irq_restore(x) \ | |
6468 | do { \ | |
6469 | vcpu_info_t *_vcpu; \ | |
6470 | @@ -66,18 +63,6 @@ void raw_safe_halt(void); | |
6471 | */ | |
6472 | void halt(void); | |
6473 | ||
6474 | -static inline int raw_irqs_disabled_flags(unsigned long flags) | |
6475 | -{ | |
6476 | - return (flags != 0); | |
6477 | -} | |
6478 | - | |
6479 | -#define raw_irqs_disabled() \ | |
6480 | -({ \ | |
6481 | - unsigned long flags = __raw_local_save_flags(); \ | |
6482 | - \ | |
6483 | - raw_irqs_disabled_flags(flags); \ | |
6484 | -}) | |
6485 | - | |
6486 | /* | |
6487 | * For spinlocks, etc: | |
6488 | */ | |
6489 | @@ -90,9 +75,62 @@ static inline int raw_irqs_disabled_flag | |
6490 | flags; \ | |
6491 | }) | |
6492 | ||
6493 | +#else | |
6494 | +/* Offsets into shared_info_t. */ | |
6495 | +#define evtchn_upcall_pending /* 0 */ | |
6496 | +#define evtchn_upcall_mask 1 | |
6497 | + | |
6498 | +#define sizeof_vcpu_shift 6 | |
6499 | + | |
6500 | +#ifdef CONFIG_SMP | |
6501 | +#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \ | |
6502 | + shl $sizeof_vcpu_shift,%esi ; \ | |
6503 | + addl HYPERVISOR_shared_info,%esi | |
6504 | +#else | |
6505 | +#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi | |
6506 | +#endif | |
6507 | + | |
6508 | +#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi) | |
6509 | +#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi) | |
6510 | +#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi) | |
6511 | +#define DISABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \ | |
6512 | + __DISABLE_INTERRUPTS | |
6513 | +#define ENABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \ | |
6514 | + __ENABLE_INTERRUPTS | |
6515 | +#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \ | |
6516 | +sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \ | |
6517 | + __TEST_PENDING ; \ | |
6518 | + jnz 14f /* process more events if necessary... */ ; \ | |
6519 | + movl PT_ESI(%esp), %esi ; \ | |
6520 | + sysexit ; \ | |
6521 | +14: __DISABLE_INTERRUPTS ; \ | |
6522 | + TRACE_IRQS_OFF ; \ | |
6523 | +sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \ | |
6524 | + push %esp ; \ | |
6525 | + call evtchn_do_upcall ; \ | |
6526 | + add $4,%esp ; \ | |
6527 | + jmp ret_from_intr | |
6528 | +#define INTERRUPT_RETURN iret | |
6529 | +#endif /* __ASSEMBLY__ */ | |
6530 | + | |
6531 | +#ifndef __ASSEMBLY__ | |
6532 | +#define raw_local_save_flags(flags) \ | |
6533 | + do { (flags) = __raw_local_save_flags(); } while (0) | |
6534 | + | |
6535 | #define raw_local_irq_save(flags) \ | |
6536 | do { (flags) = __raw_local_irq_save(); } while (0) | |
6537 | ||
6538 | +static inline int raw_irqs_disabled_flags(unsigned long flags) | |
6539 | +{ | |
6540 | + return (flags != 0); | |
6541 | +} | |
6542 | + | |
6543 | +#define raw_irqs_disabled() \ | |
6544 | +({ \ | |
6545 | + unsigned long flags = __raw_local_save_flags(); \ | |
6546 | + \ | |
6547 | + raw_irqs_disabled_flags(flags); \ | |
6548 | +}) | |
6549 | #endif /* __ASSEMBLY__ */ | |
6550 | ||
6551 | /* | |
6552 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/mmu_context_32.h | |
6553 | =================================================================== | |
6554 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/mmu_context_32.h 2007-06-12 13:14:02.000000000 +0200 | |
6555 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/mmu_context_32.h 2008-12-01 11:32:38.000000000 +0100 | |
6556 | @@ -27,14 +27,13 @@ static inline void enter_lazy_tlb(struct | |
6557 | static inline void __prepare_arch_switch(void) | |
6558 | { | |
6559 | /* | |
6560 | - * Save away %fs and %gs. No need to save %es and %ds, as those | |
6561 | - * are always kernel segments while inside the kernel. Must | |
6562 | - * happen before reload of cr3/ldt (i.e., not in __switch_to). | |
6563 | + * Save away %fs. No need to save %gs, as it was saved on the | |
6564 | + * stack on entry. No need to save %es and %ds, as those are | |
6565 | + * always kernel segments while inside the kernel. | |
6566 | */ | |
6567 | - asm volatile ( "mov %%fs,%0 ; mov %%gs,%1" | |
6568 | - : "=m" (current->thread.fs), | |
6569 | - "=m" (current->thread.gs)); | |
6570 | - asm volatile ( "movl %0,%%fs ; movl %0,%%gs" | |
6571 | + asm volatile ( "mov %%fs,%0" | |
6572 | + : "=m" (current->thread.fs)); | |
6573 | + asm volatile ( "movl %0,%%fs" | |
6574 | : : "r" (0) ); | |
6575 | } | |
6576 | ||
6577 | @@ -89,14 +88,14 @@ static inline void switch_mm(struct mm_s | |
6578 | * tlb flush IPI delivery. We must reload %cr3. | |
6579 | */ | |
6580 | load_cr3(next->pgd); | |
6581 | - load_LDT_nolock(&next->context, cpu); | |
6582 | + load_LDT_nolock(&next->context); | |
6583 | } | |
6584 | } | |
6585 | #endif | |
6586 | } | |
6587 | ||
6588 | -#define deactivate_mm(tsk, mm) \ | |
6589 | - asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0)) | |
6590 | +#define deactivate_mm(tsk, mm) \ | |
6591 | + asm("movl %0,%%fs": :"r" (0)); | |
6592 | ||
6593 | static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) | |
6594 | { | |
6595 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable-3level.h | |
6596 | =================================================================== | |
6597 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-12-01 11:29:05.000000000 +0100 | |
6598 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-12-01 11:32:38.000000000 +0100 | |
6599 | @@ -1,8 +1,6 @@ | |
6600 | #ifndef _I386_PGTABLE_3LEVEL_H | |
6601 | #define _I386_PGTABLE_3LEVEL_H | |
6602 | ||
6603 | -#include <asm-generic/pgtable-nopud.h> | |
6604 | - | |
6605 | /* | |
6606 | * Intel Physical Address Extension (PAE) Mode - three-level page | |
6607 | * tables on PPro+ CPUs. | |
6608 | @@ -75,6 +73,23 @@ static inline void set_pte(pte_t *ptep, | |
6609 | xen_l3_entry_update((pudptr), (pudval)) | |
6610 | ||
6611 | /* | |
6612 | + * For PTEs and PDEs, we must clear the P-bit first when clearing a page table | |
6613 | + * entry, so clear the bottom half first and enforce ordering with a compiler | |
6614 | + * barrier. | |
6615 | + */ | |
6616 | +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |
6617 | +{ | |
6618 | + if ((mm != current->mm && mm != &init_mm) | |
6619 | + || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { | |
6620 | + ptep->pte_low = 0; | |
6621 | + smp_wmb(); | |
6622 | + ptep->pte_high = 0; | |
6623 | + } | |
6624 | +} | |
6625 | + | |
6626 | +#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) | |
6627 | + | |
6628 | +/* | |
6629 | * Pentium-II erratum A13: in PAE mode we explicitly have to flush | |
6630 | * the TLB via cr3 if the top-level pgd is changed... | |
6631 | * We do not let the generic code free and clear pgd entries due to | |
6632 | @@ -93,45 +108,16 @@ static inline void pud_clear (pud_t * pu | |
6633 | #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ | |
6634 | pmd_index(address)) | |
6635 | ||
6636 | -static inline int pte_none(pte_t pte) | |
6637 | -{ | |
6638 | - return !(pte.pte_low | pte.pte_high); | |
6639 | -} | |
6640 | - | |
6641 | -/* | |
6642 | - * For PTEs and PDEs, we must clear the P-bit first when clearing a page table | |
6643 | - * entry, so clear the bottom half first and enforce ordering with a compiler | |
6644 | - * barrier. | |
6645 | - */ | |
6646 | -static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |
6647 | +static inline pte_t raw_ptep_get_and_clear(pte_t *ptep, pte_t res) | |
6648 | { | |
6649 | - if ((mm != current->mm && mm != &init_mm) | |
6650 | - || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { | |
6651 | - ptep->pte_low = 0; | |
6652 | - smp_wmb(); | |
6653 | + uint64_t val = __pte_val(res); | |
6654 | + if (__cmpxchg64(ptep, val, 0) != val) { | |
6655 | + /* xchg acts as a barrier before the setting of the high bits */ | |
6656 | + res.pte_low = xchg(&ptep->pte_low, 0); | |
6657 | + res.pte_high = ptep->pte_high; | |
6658 | ptep->pte_high = 0; | |
6659 | } | |
6660 | -} | |
6661 | - | |
6662 | -#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) | |
6663 | - | |
6664 | -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR | |
6665 | -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |
6666 | -{ | |
6667 | - pte_t pte = *ptep; | |
6668 | - if (!pte_none(pte)) { | |
6669 | - if ((mm != &init_mm) || | |
6670 | - HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { | |
6671 | - uint64_t val = __pte_val(pte); | |
6672 | - if (__cmpxchg64(ptep, val, 0) != val) { | |
6673 | - /* xchg acts as a barrier before the setting of the high bits */ | |
6674 | - pte.pte_low = xchg(&ptep->pte_low, 0); | |
6675 | - pte.pte_high = ptep->pte_high; | |
6676 | - ptep->pte_high = 0; | |
6677 | - } | |
6678 | - } | |
6679 | - } | |
6680 | - return pte; | |
6681 | + return res; | |
6682 | } | |
6683 | ||
6684 | #define __HAVE_ARCH_PTEP_CLEAR_FLUSH | |
6685 | @@ -160,6 +146,11 @@ static inline int pte_same(pte_t a, pte_ | |
6686 | ||
6687 | #define pte_page(x) pfn_to_page(pte_pfn(x)) | |
6688 | ||
6689 | +static inline int pte_none(pte_t pte) | |
6690 | +{ | |
6691 | + return !(pte.pte_low | pte.pte_high); | |
6692 | +} | |
6693 | + | |
6694 | #define __pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \ | |
6695 | ((_pte).pte_high << (32-PAGE_SHIFT))) | |
6696 | #define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \ | |
6697 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_32.h | |
6698 | =================================================================== | |
6699 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-12-01 11:29:05.000000000 +0100 | |
6700 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-12-01 11:32:38.000000000 +0100 | |
6701 | @@ -38,14 +38,14 @@ struct vm_area_struct; | |
6702 | #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) | |
6703 | extern unsigned long empty_zero_page[1024]; | |
6704 | extern pgd_t *swapper_pg_dir; | |
6705 | -extern kmem_cache_t *pgd_cache; | |
6706 | -extern kmem_cache_t *pmd_cache; | |
6707 | +extern struct kmem_cache *pgd_cache; | |
6708 | +extern struct kmem_cache *pmd_cache; | |
6709 | extern spinlock_t pgd_lock; | |
6710 | extern struct page *pgd_list; | |
6711 | ||
6712 | -void pmd_ctor(void *, kmem_cache_t *, unsigned long); | |
6713 | -void pgd_ctor(void *, kmem_cache_t *, unsigned long); | |
6714 | -void pgd_dtor(void *, kmem_cache_t *, unsigned long); | |
6715 | +void pmd_ctor(void *, struct kmem_cache *, unsigned long); | |
6716 | +void pgd_ctor(void *, struct kmem_cache *, unsigned long); | |
6717 | +void pgd_dtor(void *, struct kmem_cache *, unsigned long); | |
6718 | void pgtable_cache_init(void); | |
6719 | void paging_init(void); | |
6720 | ||
6721 | @@ -276,7 +276,6 @@ static inline pte_t pte_mkhuge(pte_t pte | |
6722 | #define pte_update(mm, addr, ptep) do { } while (0) | |
6723 | #define pte_update_defer(mm, addr, ptep) do { } while (0) | |
6724 | ||
6725 | - | |
6726 | /* | |
6727 | * We only update the dirty/accessed state if we set | |
6728 | * the dirty bit by hand in the kernel, since the hardware | |
6729 | @@ -342,6 +341,19 @@ do { \ | |
6730 | __young; \ | |
6731 | }) | |
6732 | ||
6733 | +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR | |
6734 | +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |
6735 | +{ | |
6736 | + pte_t pte = *ptep; | |
6737 | + if (!pte_none(pte) | |
6738 | + && (mm != &init_mm | |
6739 | + || HYPERVISOR_update_va_mapping(addr, __pte(0), 0))) { | |
6740 | + pte = raw_ptep_get_and_clear(ptep, pte); | |
6741 | + pte_update(mm, addr, ptep); | |
6742 | + } | |
6743 | + return pte; | |
6744 | +} | |
6745 | + | |
6746 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL | |
6747 | #define ptep_get_and_clear_full(mm, addr, ptep, full) \ | |
6748 | ((full) ? ({ \ | |
6749 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/processor_32.h | |
6750 | =================================================================== | |
6751 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/processor_32.h 2008-12-01 11:29:05.000000000 +0100 | |
6752 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/processor_32.h 2008-12-01 11:32:38.000000000 +0100 | |
6753 | @@ -20,6 +20,7 @@ | |
6754 | #include <linux/threads.h> | |
6755 | #include <asm/percpu.h> | |
6756 | #include <linux/cpumask.h> | |
6757 | +#include <linux/init.h> | |
6758 | #include <xen/interface/physdev.h> | |
6759 | ||
6760 | /* flag for disabling the tsc */ | |
6761 | @@ -73,6 +74,7 @@ struct cpuinfo_x86 { | |
6762 | #endif | |
6763 | unsigned char x86_max_cores; /* cpuid returned max cores value */ | |
6764 | unsigned char apicid; | |
6765 | + unsigned short x86_clflush_size; | |
6766 | #ifdef CONFIG_SMP | |
6767 | unsigned char booted_cores; /* number of cores as seen by OS */ | |
6768 | __u8 phys_proc_id; /* Physical processor id. */ | |
6769 | @@ -114,6 +116,8 @@ extern struct cpuinfo_x86 cpu_data[]; | |
6770 | extern int cpu_llc_id[NR_CPUS]; | |
6771 | extern char ignore_fpu_irq; | |
6772 | ||
6773 | +void __init cpu_detect(struct cpuinfo_x86 *c); | |
6774 | + | |
6775 | extern void identify_cpu(struct cpuinfo_x86 *); | |
6776 | extern void print_cpu_info(struct cpuinfo_x86 *); | |
6777 | extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); | |
6778 | @@ -146,8 +150,8 @@ static inline void detect_ht(struct cpui | |
6779 | #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ | |
6780 | #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ | |
6781 | ||
6782 | -static inline void __cpuid(unsigned int *eax, unsigned int *ebx, | |
6783 | - unsigned int *ecx, unsigned int *edx) | |
6784 | +static inline fastcall void xen_cpuid(unsigned int *eax, unsigned int *ebx, | |
6785 | + unsigned int *ecx, unsigned int *edx) | |
6786 | { | |
6787 | /* ecx is often an input as well as an output. */ | |
6788 | __asm__(XEN_CPUID | |
6789 | @@ -158,59 +162,6 @@ static inline void __cpuid(unsigned int | |
6790 | : "0" (*eax), "2" (*ecx)); | |
6791 | } | |
6792 | ||
6793 | -/* | |
6794 | - * Generic CPUID function | |
6795 | - * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx | |
6796 | - * resulting in stale register contents being returned. | |
6797 | - */ | |
6798 | -static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) | |
6799 | -{ | |
6800 | - *eax = op; | |
6801 | - *ecx = 0; | |
6802 | - __cpuid(eax, ebx, ecx, edx); | |
6803 | -} | |
6804 | - | |
6805 | -/* Some CPUID calls want 'count' to be placed in ecx */ | |
6806 | -static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, | |
6807 | - int *edx) | |
6808 | -{ | |
6809 | - *eax = op; | |
6810 | - *ecx = count; | |
6811 | - __cpuid(eax, ebx, ecx, edx); | |
6812 | -} | |
6813 | - | |
6814 | -/* | |
6815 | - * CPUID functions returning a single datum | |
6816 | - */ | |
6817 | -static inline unsigned int cpuid_eax(unsigned int op) | |
6818 | -{ | |
6819 | - unsigned int eax, ebx, ecx, edx; | |
6820 | - | |
6821 | - cpuid(op, &eax, &ebx, &ecx, &edx); | |
6822 | - return eax; | |
6823 | -} | |
6824 | -static inline unsigned int cpuid_ebx(unsigned int op) | |
6825 | -{ | |
6826 | - unsigned int eax, ebx, ecx, edx; | |
6827 | - | |
6828 | - cpuid(op, &eax, &ebx, &ecx, &edx); | |
6829 | - return ebx; | |
6830 | -} | |
6831 | -static inline unsigned int cpuid_ecx(unsigned int op) | |
6832 | -{ | |
6833 | - unsigned int eax, ebx, ecx, edx; | |
6834 | - | |
6835 | - cpuid(op, &eax, &ebx, &ecx, &edx); | |
6836 | - return ecx; | |
6837 | -} | |
6838 | -static inline unsigned int cpuid_edx(unsigned int op) | |
6839 | -{ | |
6840 | - unsigned int eax, ebx, ecx, edx; | |
6841 | - | |
6842 | - cpuid(op, &eax, &ebx, &ecx, &edx); | |
6843 | - return edx; | |
6844 | -} | |
6845 | - | |
6846 | #define load_cr3(pgdir) write_cr3(__pa(pgdir)) | |
6847 | ||
6848 | /* | |
6849 | @@ -480,9 +431,9 @@ struct thread_struct { | |
6850 | .vm86_info = NULL, \ | |
6851 | .sysenter_cs = __KERNEL_CS, \ | |
6852 | .io_bitmap_ptr = NULL, \ | |
6853 | + .gs = __KERNEL_PDA, \ | |
6854 | } | |
6855 | ||
6856 | -#ifndef CONFIG_X86_NO_TSS | |
6857 | /* | |
6858 | * Note that the .io_bitmap member must be extra-big. This is because | |
6859 | * the CPU will access an additional byte beyond the end of the IO | |
6860 | @@ -497,26 +448,9 @@ struct thread_struct { | |
6861 | .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \ | |
6862 | } | |
6863 | ||
6864 | -static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread) | |
6865 | -{ | |
6866 | - tss->esp0 = thread->esp0; | |
6867 | - /* This can only happen when SEP is enabled, no need to test "SEP"arately */ | |
6868 | - if (unlikely(tss->ss1 != thread->sysenter_cs)) { | |
6869 | - tss->ss1 = thread->sysenter_cs; | |
6870 | - wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | |
6871 | - } | |
6872 | -} | |
6873 | -#define load_esp0(tss, thread) \ | |
6874 | - __load_esp0(tss, thread) | |
6875 | -#else | |
6876 | -#define load_esp0(tss, thread) do { \ | |
6877 | - if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \ | |
6878 | - BUG(); \ | |
6879 | -} while (0) | |
6880 | -#endif | |
6881 | - | |
6882 | #define start_thread(regs, new_eip, new_esp) do { \ | |
6883 | - __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \ | |
6884 | + __asm__("movl %0,%%fs": :"r" (0)); \ | |
6885 | + regs->xgs = 0; \ | |
6886 | set_fs(USER_DS); \ | |
6887 | regs->xds = __USER_DS; \ | |
6888 | regs->xes = __USER_DS; \ | |
6889 | @@ -526,26 +460,6 @@ static inline void __load_esp0(struct ts | |
6890 | regs->esp = new_esp; \ | |
6891 | } while (0) | |
6892 | ||
6893 | -/* | |
6894 | - * These special macros can be used to get or set a debugging register | |
6895 | - */ | |
6896 | -#define get_debugreg(var, register) \ | |
6897 | - (var) = HYPERVISOR_get_debugreg((register)) | |
6898 | -#define set_debugreg(value, register) \ | |
6899 | - WARN_ON(HYPERVISOR_set_debugreg((register), (value))) | |
6900 | - | |
6901 | -/* | |
6902 | - * Set IOPL bits in EFLAGS from given mask | |
6903 | - */ | |
6904 | -static inline void set_iopl_mask(unsigned mask) | |
6905 | -{ | |
6906 | - struct physdev_set_iopl set_iopl; | |
6907 | - | |
6908 | - /* Force the change at ring 0. */ | |
6909 | - set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; | |
6910 | - WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl)); | |
6911 | -} | |
6912 | - | |
6913 | /* Forward declaration, a strange C thing */ | |
6914 | struct task_struct; | |
6915 | struct mm_struct; | |
6916 | @@ -637,6 +551,105 @@ static inline void rep_nop(void) | |
6917 | ||
6918 | #define cpu_relax() rep_nop() | |
6919 | ||
6920 | +#define paravirt_enabled() 0 | |
6921 | +#define __cpuid xen_cpuid | |
6922 | + | |
6923 | +#ifndef CONFIG_X86_NO_TSS | |
6924 | +static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread) | |
6925 | +{ | |
6926 | + tss->esp0 = thread->esp0; | |
6927 | + /* This can only happen when SEP is enabled, no need to test "SEP"arately */ | |
6928 | + if (unlikely(tss->ss1 != thread->sysenter_cs)) { | |
6929 | + tss->ss1 = thread->sysenter_cs; | |
6930 | + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | |
6931 | + } | |
6932 | +} | |
6933 | +#define load_esp0(tss, thread) \ | |
6934 | + __load_esp0(tss, thread) | |
6935 | +#else | |
6936 | +#define load_esp0(tss, thread) do { \ | |
6937 | + if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \ | |
6938 | + BUG(); \ | |
6939 | +} while (0) | |
6940 | +#endif | |
6941 | + | |
6942 | + | |
6943 | +/* | |
6944 | + * These special macros can be used to get or set a debugging register | |
6945 | + */ | |
6946 | +#define get_debugreg(var, register) \ | |
6947 | + (var) = HYPERVISOR_get_debugreg(register) | |
6948 | +#define set_debugreg(value, register) \ | |
6949 | + WARN_ON(HYPERVISOR_set_debugreg(register, value)) | |
6950 | + | |
6951 | +#define set_iopl_mask xen_set_iopl_mask | |
6952 | + | |
6953 | +/* | |
6954 | + * Set IOPL bits in EFLAGS from given mask | |
6955 | + */ | |
6956 | +static inline void xen_set_iopl_mask(unsigned mask) | |
6957 | +{ | |
6958 | + struct physdev_set_iopl set_iopl; | |
6959 | + | |
6960 | + /* Force the change at ring 0. */ | |
6961 | + set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; | |
6962 | + WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl)); | |
6963 | +} | |
6964 | + | |
6965 | + | |
6966 | +/* | |
6967 | + * Generic CPUID function | |
6968 | + * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx | |
6969 | + * resulting in stale register contents being returned. | |
6970 | + */ | |
6971 | +static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) | |
6972 | +{ | |
6973 | + *eax = op; | |
6974 | + *ecx = 0; | |
6975 | + __cpuid(eax, ebx, ecx, edx); | |
6976 | +} | |
6977 | + | |
6978 | +/* Some CPUID calls want 'count' to be placed in ecx */ | |
6979 | +static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, | |
6980 | + int *edx) | |
6981 | +{ | |
6982 | + *eax = op; | |
6983 | + *ecx = count; | |
6984 | + __cpuid(eax, ebx, ecx, edx); | |
6985 | +} | |
6986 | + | |
6987 | +/* | |
6988 | + * CPUID functions returning a single datum | |
6989 | + */ | |
6990 | +static inline unsigned int cpuid_eax(unsigned int op) | |
6991 | +{ | |
6992 | + unsigned int eax, ebx, ecx, edx; | |
6993 | + | |
6994 | + cpuid(op, &eax, &ebx, &ecx, &edx); | |
6995 | + return eax; | |
6996 | +} | |
6997 | +static inline unsigned int cpuid_ebx(unsigned int op) | |
6998 | +{ | |
6999 | + unsigned int eax, ebx, ecx, edx; | |
7000 | + | |
7001 | + cpuid(op, &eax, &ebx, &ecx, &edx); | |
7002 | + return ebx; | |
7003 | +} | |
7004 | +static inline unsigned int cpuid_ecx(unsigned int op) | |
7005 | +{ | |
7006 | + unsigned int eax, ebx, ecx, edx; | |
7007 | + | |
7008 | + cpuid(op, &eax, &ebx, &ecx, &edx); | |
7009 | + return ecx; | |
7010 | +} | |
7011 | +static inline unsigned int cpuid_edx(unsigned int op) | |
7012 | +{ | |
7013 | + unsigned int eax, ebx, ecx, edx; | |
7014 | + | |
7015 | + cpuid(op, &eax, &ebx, &ecx, &edx); | |
7016 | + return edx; | |
7017 | +} | |
7018 | + | |
7019 | /* generic versions from gas */ | |
7020 | #define GENERIC_NOP1 ".byte 0x90\n" | |
7021 | #define GENERIC_NOP2 ".byte 0x89,0xf6\n" | |
7022 | @@ -736,4 +749,8 @@ extern unsigned long boot_option_idle_ov | |
7023 | extern void enable_sep_cpu(void); | |
7024 | extern int sysenter_setup(void); | |
7025 | ||
7026 | +extern int init_gdt(int cpu, struct task_struct *idle); | |
7027 | +extern void cpu_set_gdt(int); | |
7028 | +extern void secondary_cpu_init(void); | |
7029 | + | |
7030 | #endif /* __ASM_I386_PROCESSOR_H */ | |
7031 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/segment_32.h | |
7032 | =================================================================== | |
7033 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/segment_32.h 2008-12-01 11:29:05.000000000 +0100 | |
7034 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/segment_32.h 2008-12-01 11:32:38.000000000 +0100 | |
7035 | @@ -39,7 +39,7 @@ | |
7036 | * 25 - APM BIOS support | |
7037 | * | |
7038 | * 26 - ESPFIX small SS | |
7039 | - * 27 - unused | |
7040 | + * 27 - PDA [ per-cpu private data area ] | |
7041 | * 28 - unused | |
7042 | * 29 - unused | |
7043 | * 30 - unused | |
7044 | @@ -74,6 +74,9 @@ | |
7045 | #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14) | |
7046 | #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) | |
7047 | ||
7048 | +#define GDT_ENTRY_PDA (GDT_ENTRY_KERNEL_BASE + 15) | |
7049 | +#define __KERNEL_PDA (GDT_ENTRY_PDA * 8) | |
7050 | + | |
7051 | #define GDT_ENTRY_DOUBLEFAULT_TSS 31 | |
7052 | ||
7053 | /* | |
7054 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/smp_32.h | |
7055 | =================================================================== | |
7056 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/smp_32.h 2008-12-01 11:29:05.000000000 +0100 | |
7057 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/smp_32.h 2008-12-01 11:32:38.000000000 +0100 | |
7058 | @@ -8,6 +8,7 @@ | |
7059 | #include <linux/kernel.h> | |
7060 | #include <linux/threads.h> | |
7061 | #include <linux/cpumask.h> | |
7062 | +#include <asm/pda.h> | |
7063 | #endif | |
7064 | ||
7065 | #ifdef CONFIG_X86_LOCAL_APIC | |
7066 | @@ -56,7 +57,7 @@ extern void cpu_uninit(void); | |
7067 | * from the initial startup. We map APIC_BASE very early in page_setup(), | |
7068 | * so this is correct in the x86 case. | |
7069 | */ | |
7070 | -#define raw_smp_processor_id() (current_thread_info()->cpu) | |
7071 | +#define raw_smp_processor_id() (read_pda(cpu_number)) | |
7072 | ||
7073 | extern cpumask_t cpu_possible_map; | |
7074 | #define cpu_callin_map cpu_possible_map | |
7075 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/system_32.h | |
7076 | =================================================================== | |
7077 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/system_32.h 2008-12-01 11:29:05.000000000 +0100 | |
7078 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/system_32.h 2008-12-01 11:32:38.000000000 +0100 | |
7079 | @@ -139,17 +139,17 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" | |
7080 | #define write_cr4(x) \ | |
7081 | __asm__ __volatile__("movl %0,%%cr4": :"r" (x)) | |
7082 | ||
7083 | -/* | |
7084 | - * Clear and set 'TS' bit respectively | |
7085 | - */ | |
7086 | +#define wbinvd() \ | |
7087 | + __asm__ __volatile__ ("wbinvd": : :"memory") | |
7088 | + | |
7089 | +/* Clear the 'TS' bit */ | |
7090 | #define clts() (HYPERVISOR_fpu_taskswitch(0)) | |
7091 | + | |
7092 | +/* Set the 'TS' bit */ | |
7093 | #define stts() (HYPERVISOR_fpu_taskswitch(1)) | |
7094 | ||
7095 | #endif /* __KERNEL__ */ | |
7096 | ||
7097 | -#define wbinvd() \ | |
7098 | - __asm__ __volatile__ ("wbinvd": : :"memory") | |
7099 | - | |
7100 | static inline unsigned long get_limit(unsigned long segment) | |
7101 | { | |
7102 | unsigned long __limit; | |
7103 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/desc_64.h | |
7104 | =================================================================== | |
7105 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/desc_64.h 2008-01-28 12:24:19.000000000 +0100 | |
7106 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/desc_64.h 2008-12-01 11:32:38.000000000 +0100 | |
7107 | @@ -9,62 +9,11 @@ | |
7108 | ||
7109 | #include <linux/string.h> | |
7110 | #include <linux/smp.h> | |
7111 | +#include <asm/desc_defs.h> | |
7112 | ||
7113 | #include <asm/segment.h> | |
7114 | #include <asm/mmu.h> | |
7115 | ||
7116 | -// 8 byte segment descriptor | |
7117 | -struct desc_struct { | |
7118 | - u16 limit0; | |
7119 | - u16 base0; | |
7120 | - unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1; | |
7121 | - unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8; | |
7122 | -} __attribute__((packed)); | |
7123 | - | |
7124 | -struct n_desc_struct { | |
7125 | - unsigned int a,b; | |
7126 | -}; | |
7127 | - | |
7128 | -enum { | |
7129 | - GATE_INTERRUPT = 0xE, | |
7130 | - GATE_TRAP = 0xF, | |
7131 | - GATE_CALL = 0xC, | |
7132 | -}; | |
7133 | - | |
7134 | -// 16byte gate | |
7135 | -struct gate_struct { | |
7136 | - u16 offset_low; | |
7137 | - u16 segment; | |
7138 | - unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; | |
7139 | - u16 offset_middle; | |
7140 | - u32 offset_high; | |
7141 | - u32 zero1; | |
7142 | -} __attribute__((packed)); | |
7143 | - | |
7144 | -#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF) | |
7145 | -#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF) | |
7146 | -#define PTR_HIGH(x) ((unsigned long)(x) >> 32) | |
7147 | - | |
7148 | -enum { | |
7149 | - DESC_TSS = 0x9, | |
7150 | - DESC_LDT = 0x2, | |
7151 | -}; | |
7152 | - | |
7153 | -// LDT or TSS descriptor in the GDT. 16 bytes. | |
7154 | -struct ldttss_desc { | |
7155 | - u16 limit0; | |
7156 | - u16 base0; | |
7157 | - unsigned base1 : 8, type : 5, dpl : 2, p : 1; | |
7158 | - unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; | |
7159 | - u32 base3; | |
7160 | - u32 zero1; | |
7161 | -} __attribute__((packed)); | |
7162 | - | |
7163 | -struct desc_ptr { | |
7164 | - unsigned short size; | |
7165 | - unsigned long address; | |
7166 | -} __attribute__((packed)) ; | |
7167 | - | |
7168 | extern struct desc_ptr idt_descr, cpu_gdt_descr[NR_CPUS]; | |
7169 | ||
7170 | extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; | |
7171 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/dma-mapping_64.h | |
7172 | =================================================================== | |
7173 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2008-12-01 11:29:05.000000000 +0100 | |
7174 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2008-12-01 11:32:38.000000000 +0100 | |
7175 | @@ -64,6 +64,9 @@ static inline int dma_mapping_error(dma_ | |
7176 | return (dma_addr == bad_dma_address); | |
7177 | } | |
7178 | ||
7179 | +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) | |
7180 | +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) | |
7181 | + | |
7182 | extern void *dma_alloc_coherent(struct device *dev, size_t size, | |
7183 | dma_addr_t *dma_handle, gfp_t gfp); | |
7184 | extern void dma_free_coherent(struct device *dev, size_t size, void *vaddr, | |
7185 | @@ -181,12 +184,13 @@ static inline int dma_get_cache_alignmen | |
7186 | return boot_cpu_data.x86_clflush_size; | |
7187 | } | |
7188 | ||
7189 | -#define dma_is_consistent(h) 1 | |
7190 | +#define dma_is_consistent(d, h) 1 | |
7191 | ||
7192 | extern int dma_set_mask(struct device *dev, u64 mask); | |
7193 | ||
7194 | static inline void | |
7195 | -dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction dir) | |
7196 | +dma_cache_sync(struct device *dev, void *vaddr, size_t size, | |
7197 | + enum dma_data_direction dir) | |
7198 | { | |
7199 | flush_write_buffers(); | |
7200 | } | |
7201 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_64.h | |
7202 | =================================================================== | |
7203 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-12-01 11:29:05.000000000 +0100 | |
7204 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-12-01 11:32:38.000000000 +0100 | |
7205 | @@ -235,19 +235,18 @@ extern unsigned int __kernel_page_user; | |
7206 | ||
7207 | static inline unsigned long pgd_bad(pgd_t pgd) | |
7208 | { | |
7209 | - unsigned long val = __pgd_val(pgd); | |
7210 | - val &= ~PTE_MASK; | |
7211 | - val &= ~(_PAGE_USER | _PAGE_DIRTY); | |
7212 | - return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED); | |
7213 | + return __pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); | |
7214 | } | |
7215 | ||
7216 | -static inline unsigned long pud_bad(pud_t pud) | |
7217 | -{ | |
7218 | - unsigned long val = __pud_val(pud); | |
7219 | - val &= ~PTE_MASK; | |
7220 | - val &= ~(_PAGE_USER | _PAGE_DIRTY); | |
7221 | - return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED); | |
7222 | -} | |
7223 | +static inline unsigned long pud_bad(pud_t pud) | |
7224 | +{ | |
7225 | + return __pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); | |
7226 | +} | |
7227 | + | |
7228 | +static inline unsigned long pmd_bad(pmd_t pmd) | |
7229 | +{ | |
7230 | + return __pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); | |
7231 | +} | |
7232 | ||
7233 | #define set_pte_at(_mm,addr,ptep,pteval) do { \ | |
7234 | if (((_mm) != current->mm && (_mm) != &init_mm) || \ | |
7235 | @@ -402,8 +401,6 @@ static inline int pmd_large(pmd_t pte) { | |
7236 | #define pmd_present(x) (__pmd_val(x) & _PAGE_PRESENT) | |
7237 | #endif | |
7238 | #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) | |
7239 | -#define pmd_bad(x) ((__pmd_val(x) & ~(PTE_MASK | _PAGE_USER | _PAGE_PRESENT)) \ | |
7240 | - != (_KERNPG_TABLE & ~(_PAGE_USER | _PAGE_PRESENT))) | |
7241 | #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot))) | |
7242 | #define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT) | |
7243 | ||
7244 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/processor_64.h | |
7245 | =================================================================== | |
7246 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/processor_64.h 2008-12-01 11:29:05.000000000 +0100 | |
7247 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/processor_64.h 2008-12-01 11:32:38.000000000 +0100 | |
7248 | @@ -484,6 +484,14 @@ static inline void __mwait(unsigned long | |
7249 | : :"a" (eax), "c" (ecx)); | |
7250 | } | |
7251 | ||
7252 | +static inline void __sti_mwait(unsigned long eax, unsigned long ecx) | |
7253 | +{ | |
7254 | + /* "mwait %eax,%ecx;" */ | |
7255 | + asm volatile( | |
7256 | + "sti; .byte 0x0f,0x01,0xc9;" | |
7257 | + : :"a" (eax), "c" (ecx)); | |
7258 | +} | |
7259 | + | |
7260 | extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); | |
7261 | ||
7262 | #define stack_current() \ | |
7263 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/smp_64.h | |
7264 | =================================================================== | |
7265 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/smp_64.h 2008-12-01 11:29:05.000000000 +0100 | |
7266 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/smp_64.h 2008-12-01 11:32:38.000000000 +0100 | |
7267 | @@ -88,11 +88,6 @@ extern u8 x86_cpu_to_log_apicid[NR_CPUS] | |
7268 | extern u8 bios_cpu_apicid[]; | |
7269 | ||
7270 | #ifdef CONFIG_X86_LOCAL_APIC | |
7271 | -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | |
7272 | -{ | |
7273 | - return cpus_addr(cpumask)[0]; | |
7274 | -} | |
7275 | - | |
7276 | static inline int cpu_present_to_apicid(int mps_cpu) | |
7277 | { | |
7278 | if (mps_cpu < NR_CPUS) | |
7279 | @@ -127,13 +122,6 @@ static __inline int logical_smp_processo | |
7280 | #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] | |
7281 | #else | |
7282 | #define cpu_physical_id(cpu) boot_cpu_id | |
7283 | -static inline int smp_call_function_single(int cpuid, void (*func) (void *info), | |
7284 | - void *info, int retry, int wait) | |
7285 | -{ | |
7286 | - /* Disable interrupts here? */ | |
7287 | - func(info); | |
7288 | - return 0; | |
7289 | -} | |
7290 | #endif /* !CONFIG_SMP */ | |
7291 | #endif | |
7292 | ||
7293 | Index: head-2008-12-01/kernel/kexec.c | |
7294 | =================================================================== | |
7295 | --- head-2008-12-01.orig/kernel/kexec.c 2008-12-01 11:23:01.000000000 +0100 | |
7296 | +++ head-2008-12-01/kernel/kexec.c 2008-12-01 11:32:38.000000000 +0100 | |
7297 | @@ -372,7 +372,7 @@ static struct page *kimage_alloc_pages(g | |
7298 | if (limit == ~0UL) | |
7299 | address_bits = BITS_PER_LONG; | |
7300 | else | |
7301 | - address_bits = long_log2(limit); | |
7302 | + address_bits = ilog2(limit); | |
7303 | ||
7304 | if (xen_limit_pages_to_max_mfn(pages, order, address_bits) < 0) { | |
7305 | __free_pages(pages, order); | |
7306 | Index: head-2008-12-01/net/core/dev.c | |
7307 | =================================================================== | |
7308 | --- head-2008-12-01.orig/net/core/dev.c 2008-12-01 11:29:05.000000000 +0100 | |
7309 | +++ head-2008-12-01/net/core/dev.c 2008-12-01 11:32:38.000000000 +0100 | |
7310 | @@ -1751,10 +1751,10 @@ inline int skb_checksum_setup(struct sk_ | |
7311 | goto out; | |
7312 | switch (skb->nh.iph->protocol) { | |
7313 | case IPPROTO_TCP: | |
7314 | - skb->csum = offsetof(struct tcphdr, check); | |
7315 | + skb->csum_offset = offsetof(struct tcphdr, check); | |
7316 | break; | |
7317 | case IPPROTO_UDP: | |
7318 | - skb->csum = offsetof(struct udphdr, check); | |
7319 | + skb->csum_offset = offsetof(struct udphdr, check); | |
7320 | break; | |
7321 | default: | |
7322 | if (net_ratelimit()) | |
7323 | @@ -1763,7 +1763,7 @@ inline int skb_checksum_setup(struct sk_ | |
7324 | " %d packet", skb->nh.iph->protocol); | |
7325 | goto out; | |
7326 | } | |
7327 | - if ((skb->h.raw + skb->csum + 2) > skb->tail) | |
7328 | + if ((skb->h.raw + skb->csum_offset + 2) > skb->tail) | |
7329 | goto out; | |
7330 | skb->ip_summed = CHECKSUM_PARTIAL; | |
7331 | skb->proto_csum_blank = 0; |