]>
Commit | Line | Data |
---|---|---|
2cb7cef9 BS |
1 | From: Nick Piggin <npiggin@suse.de> |
2 | Subject: x86: contiguous kmap fix | |
3 | References: bnc#449812 | |
4 | Patch-mainline: perhaps 2.6.30 (maybe .29) | |
5 | ||
6 | The early fixmap pmd entry inserted at the very top of the KVA is casing the | |
7 | subsequent fixmap mapping code to not provide physically linear pte pages over | |
8 | the kmap atomic portion of the fixmap (which relies on said property to calculate | |
9 | pte address). | |
10 | ||
11 | This has caused weird boot failures in kmap_atomic much later in the boot | |
12 | process (initial userspace faults) on a 32-bit PAE system with a larger number | |
13 | of CPUs (smaller CPU counts tend not to run over into the next page so don't | |
14 | show up the problem). | |
15 | ||
16 | Solve this by attempting to clear out the page table, and copy any of its | |
17 | entries to the new one. Also, add a bug if a nonlinear condition is encountered | |
18 | and can't be resolved, which might save some hours of debugging if this fragile | |
19 | scheme ever breaks again... | |
20 | ||
21 | Signed-off-by: Nick Piggin <npiggin@suse.de> | |
22 | ||
23 | Once we have such logic, we can also use it to eliminate the early ioremap | |
24 | trickery around the page table setup for the fixmap area. This also fixes | |
25 | potential issues with FIX_* entries sharing the leaf page table with the early | |
26 | ioremap ones getting discarded by early_ioremap_clear() and not restored by | |
27 | early_ioremap_reset(). It at once eliminates the temporary (and configuration, | |
28 | namely NR_CPUS, dependent) unavailability of early fixed mappings during the | |
29 | time the fixmap area page tables get constructed. | |
30 | ||
31 | Finally, also replace the hard coded calculation of the initial table space | |
32 | needed for the fixmap area with a proper one, allowing kernels configured for | |
33 | large CPU counts to actually boot. | |
34 | ||
35 | Signed-off-by: Jan Beulich <jbeulich@novell.com> | |
36 | ||
37 | Automatically created from "patches.arch/x86-fix-kmap-contig.patch" by xen-port-patches.py | |
38 | ||
39 | --- sle11-2009-04-20.orig/arch/x86/mm/init_32-xen.c 2009-04-24 13:36:23.000000000 +0200 | |
40 | +++ sle11-2009-04-20/arch/x86/mm/init_32-xen.c 2009-02-17 18:06:20.000000000 +0100 | |
41 | @@ -148,6 +148,52 @@ static pte_t * __init one_page_table_ini | |
42 | return pte_offset_kernel(pmd, 0); | |
43 | } | |
44 | ||
45 | +static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, | |
46 | + unsigned long vaddr, pte_t *lastpte) | |
47 | +{ | |
48 | +#ifdef CONFIG_HIGHMEM | |
49 | + /* | |
50 | + * Something (early fixmap) may already have put a pte | |
51 | + * page here, which causes the page table allocation | |
52 | + * to become nonlinear. Attempt to fix it, and if it | |
53 | + * is still nonlinear then we have to bug. | |
54 | + */ | |
55 | + int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT; | |
56 | + int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT; | |
57 | + | |
58 | + if (pmd_idx_kmap_begin != pmd_idx_kmap_end | |
59 | + && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin | |
60 | + && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end | |
61 | + && ((__pa(pte) >> PAGE_SHIFT) < table_start | |
62 | + || (__pa(pte) >> PAGE_SHIFT) >= table_end)) { | |
63 | + pte_t *newpte; | |
64 | + unsigned long phys; | |
65 | + int i; | |
66 | + | |
67 | + BUG_ON(after_init_bootmem); | |
68 | + newpte = alloc_low_page(&phys); | |
69 | + for (i = 0; i < PTRS_PER_PTE; i++) | |
70 | + set_pte(newpte + i, pte[i]); | |
71 | + | |
72 | + paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT); | |
73 | + make_lowmem_page_readonly(newpte, | |
74 | + XENFEAT_writable_page_tables); | |
75 | + set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE)); | |
76 | + BUG_ON(newpte != pte_offset_kernel(pmd, 0)); | |
77 | + __flush_tlb_all(); | |
78 | + | |
79 | + paravirt_release_pte(__pa(pte) >> PAGE_SHIFT); | |
80 | + make_lowmem_page_writable(pte, | |
81 | + XENFEAT_writable_page_tables); | |
82 | + pte = newpte; | |
83 | + } | |
84 | + BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1) | |
85 | + && vaddr > fix_to_virt(FIX_KMAP_END) | |
86 | + && lastpte && lastpte + PTRS_PER_PTE != pte); | |
87 | +#endif | |
88 | + return pte; | |
89 | +} | |
90 | + | |
91 | /* | |
92 | * This function initializes a certain range of kernel virtual memory | |
93 | * with new bootmem page tables, everywhere page tables are missing in | |
94 | @@ -164,6 +210,7 @@ page_table_range_init(unsigned long star | |
95 | unsigned long vaddr; | |
96 | pgd_t *pgd; | |
97 | pmd_t *pmd; | |
98 | + pte_t *pte = NULL; | |
99 | ||
100 | vaddr = start; | |
101 | pgd_idx = pgd_index(vaddr); | |
102 | @@ -175,8 +222,9 @@ page_table_range_init(unsigned long star | |
103 | pmd = pmd + pmd_index(vaddr); | |
104 | for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); | |
105 | pmd++, pmd_idx++) { | |
106 | - if (vaddr < hypervisor_virt_start) | |
107 | - one_page_table_init(pmd); | |
108 | + BUG_ON(vaddr >= hypervisor_virt_start); | |
109 | + pte = page_table_kmap_check(one_page_table_init(pmd), | |
110 | + pmd, vaddr, pte); | |
111 | ||
112 | vaddr += PMD_SIZE; | |
113 | } | |
114 | @@ -443,7 +491,6 @@ static void __init early_ioremap_page_ta | |
115 | * Fixed mappings, only the page table structure has to be | |
116 | * created - mappings will be set by set_fixmap(): | |
117 | */ | |
118 | - early_ioremap_clear(); | |
119 | vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; | |
120 | end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; | |
121 | page_table_range_init(vaddr, end, pgd_base); | |
122 | @@ -790,10 +837,7 @@ static void __init find_early_table_spac | |
123 | tables += PAGE_ALIGN(ptes * sizeof(pte_t)); | |
124 | ||
125 | /* for fixmap */ | |
126 | - tables += PAGE_SIZE | |
127 | - * ((((FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK) | |
128 | - - (__fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK)) | |
129 | - >> PMD_SHIFT); | |
130 | + tables += PAGE_ALIGN(__end_of_fixed_addresses * sizeof(pte_t)); | |
131 | ||
132 | table_start = extend_init_mapping(tables); | |
133 | ||
134 | --- sle11-2009-04-20.orig/arch/x86/mm/ioremap-xen.c 2009-04-24 13:36:23.000000000 +0200 | |
135 | +++ sle11-2009-04-20/arch/x86/mm/ioremap-xen.c 2009-01-16 10:45:51.000000000 +0100 | |
136 | @@ -669,35 +669,9 @@ void __init early_ioremap_init(void) | |
137 | } | |
138 | ||
139 | #ifdef CONFIG_X86_32 | |
140 | -void __init early_ioremap_clear(void) | |
141 | -{ | |
142 | - pmd_t *pmd; | |
143 | - | |
144 | - if (early_ioremap_debug) | |
145 | - printk(KERN_INFO "early_ioremap_clear()\n"); | |
146 | - | |
147 | - pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); | |
148 | - pmd_clear(pmd); | |
149 | - make_lowmem_page_writable(bm_pte, XENFEAT_writable_page_tables); | |
150 | - /* paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT); */ | |
151 | - __flush_tlb_all(); | |
152 | -} | |
153 | - | |
154 | void __init early_ioremap_reset(void) | |
155 | { | |
156 | - enum fixed_addresses idx; | |
157 | - unsigned long addr, phys; | |
158 | - pte_t *pte; | |
159 | - | |
160 | after_paging_init = 1; | |
161 | - for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) { | |
162 | - addr = fix_to_virt(idx); | |
163 | - pte = early_ioremap_pte(addr); | |
164 | - if (pte_present(*pte)) { | |
165 | - phys = __pte_val(*pte) & PAGE_MASK; | |
166 | - set_fixmap(idx, phys); | |
167 | - } | |
168 | - } | |
169 | } | |
170 | #endif /* CONFIG_X86_32 */ | |
171 | ||
172 | --- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/io.h 2009-04-24 13:36:23.000000000 +0200 | |
173 | +++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/io.h 2009-04-24 13:36:29.000000000 +0200 | |
174 | @@ -12,7 +12,6 @@ | |
175 | */ | |
176 | #ifndef __ASSEMBLY__ | |
177 | extern void early_ioremap_init(void); | |
178 | -extern void early_ioremap_clear(void); | |
179 | extern void early_ioremap_reset(void); | |
180 | extern void *early_ioremap(unsigned long offset, unsigned long size); | |
181 | extern void early_iounmap(void *addr, unsigned long size); | |
182 | @@ -131,7 +130,6 @@ extern void __iomem *ioremap_wc(unsigned | |
183 | * A boot-time mapping is currently limited to at most 16 pages. | |
184 | */ | |
185 | extern void early_ioremap_init(void); | |
186 | -extern void early_ioremap_clear(void); | |
187 | extern void early_ioremap_reset(void); | |
188 | extern void *early_ioremap(unsigned long offset, unsigned long size); | |
189 | extern void early_iounmap(void *addr, unsigned long size); |