]>
Commit | Line | Data |
---|---|---|
2874c5fd | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2bfd65e4 AK |
2 | /* |
3 | * Page table handling routines for radix page table. | |
4 | * | |
5 | * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. | |
2bfd65e4 | 6 | */ |
bd350f71 ME |
7 | |
8 | #define pr_fmt(fmt) "radix-mmu: " fmt | |
9 | ||
d38153f9 | 10 | #include <linux/io.h> |
bd350f71 | 11 | #include <linux/kernel.h> |
589ee628 | 12 | #include <linux/sched/mm.h> |
2bfd65e4 | 13 | #include <linux/memblock.h> |
13a9a5d1 | 14 | #include <linux/of.h> |
2bfd65e4 | 15 | #include <linux/of_fdt.h> |
7614ff32 | 16 | #include <linux/mm.h> |
997cdcb0 | 17 | #include <linux/hugetlb.h> |
6deb6b47 | 18 | #include <linux/string_helpers.h> |
af9d00e9 | 19 | #include <linux/memory.h> |
2bfd65e4 | 20 | |
2bfd65e4 | 21 | #include <asm/pgalloc.h> |
eeb715c3 | 22 | #include <asm/mmu_context.h> |
2bfd65e4 AK |
23 | #include <asm/dma.h> |
24 | #include <asm/machdep.h> | |
25 | #include <asm/mmu.h> | |
26 | #include <asm/firmware.h> | |
1d0761d2 | 27 | #include <asm/powernv.h> |
9abcc981 | 28 | #include <asm/sections.h> |
993cfecc | 29 | #include <asm/smp.h> |
0428491c | 30 | #include <asm/trace.h> |
890274c2 | 31 | #include <asm/uaccess.h> |
52231340 | 32 | #include <asm/ultravisor.h> |
2bfd65e4 | 33 | |
bde3eb62 AK |
34 | #include <trace/events/thp.h> |
35 | ||
a25bd72b | 36 | unsigned int mmu_base_pid; |
950805f4 | 37 | unsigned long radix_mem_block_size __ro_after_init; |
a25bd72b | 38 | |
2ad452ff NP |
39 | static __ref void *early_alloc_pgtable(unsigned long size, int nid, |
40 | unsigned long region_start, unsigned long region_end) | |
2bfd65e4 | 41 | { |
f806714f MR |
42 | phys_addr_t min_addr = MEMBLOCK_LOW_LIMIT; |
43 | phys_addr_t max_addr = MEMBLOCK_ALLOC_ANYWHERE; | |
8a7f97b9 | 44 | void *ptr; |
2bfd65e4 | 45 | |
f806714f MR |
46 | if (region_start) |
47 | min_addr = region_start; | |
48 | if (region_end) | |
49 | max_addr = region_end; | |
2ad452ff | 50 | |
8a7f97b9 MR |
51 | ptr = memblock_alloc_try_nid(size, size, min_addr, max_addr, nid); |
52 | ||
53 | if (!ptr) | |
54 | panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa max_addr=%pa\n", | |
55 | __func__, size, size, nid, &min_addr, &max_addr); | |
56 | ||
57 | return ptr; | |
2bfd65e4 AK |
58 | } |
59 | ||
645d5ce2 AK |
60 | /* |
61 | * When allocating pud or pmd pointers, we allocate a complete page | |
62 | * of PAGE_SIZE rather than PUD_TABLE_SIZE or PMD_TABLE_SIZE. This | |
63 | * is to ensure that the page obtained from the memblock allocator | |
64 | * can be completely used as page table page and can be freed | |
65 | * correctly when the page table entries are removed. | |
66 | */ | |
0633dafc | 67 | static int early_map_kernel_page(unsigned long ea, unsigned long pa, |
2bfd65e4 | 68 | pgprot_t flags, |
2ad452ff NP |
69 | unsigned int map_page_size, |
70 | int nid, | |
71 | unsigned long region_start, unsigned long region_end) | |
2bfd65e4 | 72 | { |
2ad452ff | 73 | unsigned long pfn = pa >> PAGE_SHIFT; |
0633dafc | 74 | pgd_t *pgdp; |
2fb47060 | 75 | p4d_t *p4dp; |
0633dafc NP |
76 | pud_t *pudp; |
77 | pmd_t *pmdp; | |
78 | pte_t *ptep; | |
79 | ||
80 | pgdp = pgd_offset_k(ea); | |
2fb47060 MR |
81 | p4dp = p4d_offset(pgdp, ea); |
82 | if (p4d_none(*p4dp)) { | |
645d5ce2 AK |
83 | pudp = early_alloc_pgtable(PAGE_SIZE, nid, |
84 | region_start, region_end); | |
2fb47060 | 85 | p4d_populate(&init_mm, p4dp, pudp); |
0633dafc | 86 | } |
2fb47060 | 87 | pudp = pud_offset(p4dp, ea); |
0633dafc NP |
88 | if (map_page_size == PUD_SIZE) { |
89 | ptep = (pte_t *)pudp; | |
90 | goto set_the_pte; | |
91 | } | |
92 | if (pud_none(*pudp)) { | |
645d5ce2 AK |
93 | pmdp = early_alloc_pgtable(PAGE_SIZE, nid, region_start, |
94 | region_end); | |
0633dafc NP |
95 | pud_populate(&init_mm, pudp, pmdp); |
96 | } | |
97 | pmdp = pmd_offset(pudp, ea); | |
98 | if (map_page_size == PMD_SIZE) { | |
99 | ptep = pmdp_ptep(pmdp); | |
100 | goto set_the_pte; | |
101 | } | |
102 | if (!pmd_present(*pmdp)) { | |
2ad452ff NP |
103 | ptep = early_alloc_pgtable(PAGE_SIZE, nid, |
104 | region_start, region_end); | |
0633dafc NP |
105 | pmd_populate_kernel(&init_mm, pmdp, ptep); |
106 | } | |
107 | ptep = pte_offset_kernel(pmdp, ea); | |
108 | ||
109 | set_the_pte: | |
2ad452ff | 110 | set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); |
b8b2f37c | 111 | asm volatile("ptesync": : :"memory"); |
0633dafc NP |
112 | return 0; |
113 | } | |
114 | ||
2ad452ff NP |
115 | /* |
116 | * nid, region_start, and region_end are hints to try to place the page | |
117 | * table memory in the same node or region. | |
118 | */ | |
119 | static int __map_kernel_page(unsigned long ea, unsigned long pa, | |
2bfd65e4 | 120 | pgprot_t flags, |
2ad452ff NP |
121 | unsigned int map_page_size, |
122 | int nid, | |
123 | unsigned long region_start, unsigned long region_end) | |
2bfd65e4 | 124 | { |
2ad452ff | 125 | unsigned long pfn = pa >> PAGE_SHIFT; |
2bfd65e4 | 126 | pgd_t *pgdp; |
2fb47060 | 127 | p4d_t *p4dp; |
2bfd65e4 AK |
128 | pud_t *pudp; |
129 | pmd_t *pmdp; | |
130 | pte_t *ptep; | |
131 | /* | |
132 | * Make sure task size is correct as per the max adddr | |
133 | */ | |
134 | BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE); | |
0633dafc | 135 | |
0034d395 AK |
136 | #ifdef CONFIG_PPC_64K_PAGES |
137 | BUILD_BUG_ON(RADIX_KERN_MAP_SIZE != (1UL << MAX_EA_BITS_PER_CONTEXT)); | |
138 | #endif | |
139 | ||
2ad452ff NP |
140 | if (unlikely(!slab_is_available())) |
141 | return early_map_kernel_page(ea, pa, flags, map_page_size, | |
142 | nid, region_start, region_end); | |
0633dafc | 143 | |
2ad452ff NP |
144 | /* |
145 | * Should make page table allocation functions be able to take a | |
146 | * node, so we can place kernel page tables on the right nodes after | |
147 | * boot. | |
148 | */ | |
0633dafc | 149 | pgdp = pgd_offset_k(ea); |
2fb47060 MR |
150 | p4dp = p4d_offset(pgdp, ea); |
151 | pudp = pud_alloc(&init_mm, p4dp, ea); | |
0633dafc NP |
152 | if (!pudp) |
153 | return -ENOMEM; | |
154 | if (map_page_size == PUD_SIZE) { | |
155 | ptep = (pte_t *)pudp; | |
156 | goto set_the_pte; | |
2bfd65e4 | 157 | } |
0633dafc NP |
158 | pmdp = pmd_alloc(&init_mm, pudp, ea); |
159 | if (!pmdp) | |
160 | return -ENOMEM; | |
161 | if (map_page_size == PMD_SIZE) { | |
162 | ptep = pmdp_ptep(pmdp); | |
163 | goto set_the_pte; | |
2bfd65e4 | 164 | } |
0633dafc NP |
165 | ptep = pte_alloc_kernel(pmdp, ea); |
166 | if (!ptep) | |
167 | return -ENOMEM; | |
2bfd65e4 AK |
168 | |
169 | set_the_pte: | |
2ad452ff | 170 | set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); |
b8b2f37c | 171 | asm volatile("ptesync": : :"memory"); |
2bfd65e4 AK |
172 | return 0; |
173 | } | |
174 | ||
2ad452ff NP |
175 | int radix__map_kernel_page(unsigned long ea, unsigned long pa, |
176 | pgprot_t flags, | |
177 | unsigned int map_page_size) | |
178 | { | |
179 | return __map_kernel_page(ea, pa, flags, map_page_size, -1, 0, 0); | |
180 | } | |
181 | ||
7614ff32 | 182 | #ifdef CONFIG_STRICT_KERNEL_RWX |
7098f8f0 ME |
183 | static void radix__change_memory_range(unsigned long start, unsigned long end, |
184 | unsigned long clear) | |
7614ff32 | 185 | { |
7614ff32 BS |
186 | unsigned long idx; |
187 | pgd_t *pgdp; | |
2fb47060 | 188 | p4d_t *p4dp; |
7614ff32 BS |
189 | pud_t *pudp; |
190 | pmd_t *pmdp; | |
191 | pte_t *ptep; | |
192 | ||
193 | start = ALIGN_DOWN(start, PAGE_SIZE); | |
194 | end = PAGE_ALIGN(end); // aligns up | |
195 | ||
b134bd90 ME |
196 | pr_debug("Changing flags on range %lx-%lx removing 0x%lx\n", |
197 | start, end, clear); | |
7614ff32 BS |
198 | |
199 | for (idx = start; idx < end; idx += PAGE_SIZE) { | |
200 | pgdp = pgd_offset_k(idx); | |
2fb47060 MR |
201 | p4dp = p4d_offset(pgdp, idx); |
202 | pudp = pud_alloc(&init_mm, p4dp, idx); | |
7614ff32 BS |
203 | if (!pudp) |
204 | continue; | |
d6eacedd | 205 | if (pud_is_leaf(*pudp)) { |
7614ff32 BS |
206 | ptep = (pte_t *)pudp; |
207 | goto update_the_pte; | |
208 | } | |
209 | pmdp = pmd_alloc(&init_mm, pudp, idx); | |
210 | if (!pmdp) | |
211 | continue; | |
d6eacedd | 212 | if (pmd_is_leaf(*pmdp)) { |
7614ff32 BS |
213 | ptep = pmdp_ptep(pmdp); |
214 | goto update_the_pte; | |
215 | } | |
216 | ptep = pte_alloc_kernel(pmdp, idx); | |
217 | if (!ptep) | |
218 | continue; | |
219 | update_the_pte: | |
b134bd90 | 220 | radix__pte_update(&init_mm, idx, ptep, clear, 0, 0); |
7614ff32 BS |
221 | } |
222 | ||
223 | radix__flush_tlb_kernel_range(start, end); | |
224 | } | |
b134bd90 ME |
225 | |
226 | void radix__mark_rodata_ro(void) | |
227 | { | |
228 | unsigned long start, end; | |
229 | ||
230 | start = (unsigned long)_stext; | |
231 | end = (unsigned long)__init_begin; | |
232 | ||
233 | radix__change_memory_range(start, end, _PAGE_WRITE); | |
234 | } | |
029d9252 ME |
235 | |
236 | void radix__mark_initmem_nx(void) | |
237 | { | |
238 | unsigned long start = (unsigned long)__init_begin; | |
239 | unsigned long end = (unsigned long)__init_end; | |
240 | ||
241 | radix__change_memory_range(start, end, _PAGE_EXEC); | |
242 | } | |
7614ff32 BS |
243 | #endif /* CONFIG_STRICT_KERNEL_RWX */ |
244 | ||
afb6d064 ME |
245 | static inline void __meminit |
246 | print_mapping(unsigned long start, unsigned long end, unsigned long size, bool exec) | |
b5200ec9 | 247 | { |
6deb6b47 ME |
248 | char buf[10]; |
249 | ||
b5200ec9 RA |
250 | if (end <= start) |
251 | return; | |
252 | ||
6deb6b47 ME |
253 | string_get_size(size, 1, STRING_UNITS_2, buf, sizeof(buf)); |
254 | ||
afb6d064 ME |
255 | pr_info("Mapped 0x%016lx-0x%016lx with %s pages%s\n", start, end, buf, |
256 | exec ? " (exec)" : ""); | |
b5200ec9 RA |
257 | } |
258 | ||
232aa407 ME |
259 | static unsigned long next_boundary(unsigned long addr, unsigned long end) |
260 | { | |
261 | #ifdef CONFIG_STRICT_KERNEL_RWX | |
262 | if (addr < __pa_symbol(__init_begin)) | |
263 | return __pa_symbol(__init_begin); | |
264 | #endif | |
265 | return end; | |
266 | } | |
267 | ||
b5200ec9 | 268 | static int __meminit create_physical_mapping(unsigned long start, |
2ad452ff | 269 | unsigned long end, |
af9d00e9 | 270 | unsigned long max_mapping_size, |
4e00c5af | 271 | int nid, pgprot_t _prot) |
b5200ec9 | 272 | { |
9abcc981 | 273 | unsigned long vaddr, addr, mapping_size = 0; |
afb6d064 | 274 | bool prev_exec, exec = false; |
9abcc981 | 275 | pgprot_t prot; |
a2dc009a | 276 | int psize; |
b5200ec9 | 277 | |
b7115316 | 278 | start = ALIGN(start, PAGE_SIZE); |
79b123cd | 279 | end = ALIGN_DOWN(end, PAGE_SIZE); |
b5200ec9 RA |
280 | for (addr = start; addr < end; addr += mapping_size) { |
281 | unsigned long gap, previous_size; | |
282 | int rc; | |
283 | ||
232aa407 | 284 | gap = next_boundary(addr, end) - addr; |
af9d00e9 AK |
285 | if (gap > max_mapping_size) |
286 | gap = max_mapping_size; | |
b5200ec9 | 287 | previous_size = mapping_size; |
afb6d064 | 288 | prev_exec = exec; |
b5200ec9 RA |
289 | |
290 | if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE && | |
57306c66 | 291 | mmu_psize_defs[MMU_PAGE_1G].shift) { |
b5200ec9 | 292 | mapping_size = PUD_SIZE; |
a2dc009a AK |
293 | psize = MMU_PAGE_1G; |
294 | } else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE && | |
295 | mmu_psize_defs[MMU_PAGE_2M].shift) { | |
b5200ec9 | 296 | mapping_size = PMD_SIZE; |
a2dc009a AK |
297 | psize = MMU_PAGE_2M; |
298 | } else { | |
b5200ec9 | 299 | mapping_size = PAGE_SIZE; |
a2dc009a AK |
300 | psize = mmu_virtual_psize; |
301 | } | |
7614ff32 | 302 | |
9abcc981 ME |
303 | vaddr = (unsigned long)__va(addr); |
304 | ||
7f6d498e | 305 | if (overlaps_kernel_text(vaddr, vaddr + mapping_size) || |
afb6d064 | 306 | overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size)) { |
9abcc981 | 307 | prot = PAGE_KERNEL_X; |
afb6d064 ME |
308 | exec = true; |
309 | } else { | |
4e00c5af | 310 | prot = _prot; |
afb6d064 ME |
311 | exec = false; |
312 | } | |
313 | ||
314 | if (mapping_size != previous_size || exec != prev_exec) { | |
315 | print_mapping(start, addr, previous_size, prev_exec); | |
316 | start = addr; | |
317 | } | |
9abcc981 | 318 | |
2ad452ff | 319 | rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end); |
b5200ec9 RA |
320 | if (rc) |
321 | return rc; | |
a2dc009a AK |
322 | |
323 | update_page_count(psize, 1); | |
b5200ec9 RA |
324 | } |
325 | ||
afb6d064 | 326 | print_mapping(start, addr, mapping_size, exec); |
b5200ec9 RA |
327 | return 0; |
328 | } | |
329 | ||
d667edc0 | 330 | static void __init radix_init_pgtable(void) |
2bfd65e4 | 331 | { |
2bfd65e4 | 332 | unsigned long rts_field; |
b10d6bca MR |
333 | phys_addr_t start, end; |
334 | u64 i; | |
2bfd65e4 AK |
335 | |
336 | /* We don't support slb for radix */ | |
387e220a | 337 | slb_set_size(0); |
af9d00e9 | 338 | |
2bfd65e4 | 339 | /* |
af9d00e9 | 340 | * Create the linear mapping |
2bfd65e4 | 341 | */ |
b10d6bca | 342 | for_each_mem_range(i, &start, &end) { |
2ad452ff NP |
343 | /* |
344 | * The memblock allocator is up at this point, so the | |
345 | * page tables will be allocated within the range. No | |
346 | * need or a node (which we don't have yet). | |
347 | */ | |
e0909392 | 348 | |
b10d6bca | 349 | if (end >= RADIX_VMALLOC_START) { |
f341d897 | 350 | pr_warn("Outside the supported range\n"); |
e0909392 AK |
351 | continue; |
352 | } | |
353 | ||
b10d6bca | 354 | WARN_ON(create_physical_mapping(start, end, |
af9d00e9 | 355 | radix_mem_block_size, |
4e00c5af | 356 | -1, PAGE_KERNEL)); |
2ad452ff | 357 | } |
a25bd72b | 358 | |
2e1ae9cd NP |
359 | if (!cpu_has_feature(CPU_FTR_HVMODE) && |
360 | cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) { | |
a25bd72b | 361 | /* |
1fd02f66 | 362 | * Older versions of KVM on these machines prefer if the |
2e1ae9cd | 363 | * guest only uses the low 19 PID bits. |
a25bd72b | 364 | */ |
5402e239 | 365 | mmu_pid_bits = 19; |
a25bd72b | 366 | } |
2e1ae9cd | 367 | mmu_base_pid = 1; |
a25bd72b | 368 | |
2bfd65e4 AK |
369 | /* |
370 | * Allocate Partition table and process table for the | |
371 | * host. | |
372 | */ | |
a25bd72b | 373 | BUG_ON(PRTB_SIZE_SHIFT > 36); |
2ad452ff | 374 | process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1, 0, 0); |
2bfd65e4 AK |
375 | /* |
376 | * Fill in the process table. | |
2bfd65e4 | 377 | */ |
b23d9c5b | 378 | rts_field = radix__get_tree_size(); |
2bfd65e4 | 379 | process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE); |
ed6546bd | 380 | |
eeb715c3 NP |
381 | /* |
382 | * The init_mm context is given the first available (non-zero) PID, | |
383 | * which is the "guard PID" and contains no page table. PIDR should | |
384 | * never be set to zero because that duplicates the kernel address | |
385 | * space at the 0x0... offset (quadrant 0)! | |
386 | * | |
387 | * An arbitrary PID that may later be allocated by the PID allocator | |
388 | * for userspace processes must not be used either, because that | |
389 | * would cause stale user mappings for that PID on CPUs outside of | |
390 | * the TLB invalidation scheme (because it won't be in mm_cpumask). | |
391 | * | |
392 | * So permanently carve out one PID for the purpose of a guard PID. | |
393 | */ | |
394 | init_mm.context.id = mmu_base_pid; | |
395 | mmu_base_pid++; | |
2bfd65e4 AK |
396 | } |
397 | ||
398 | static void __init radix_init_partition_table(void) | |
399 | { | |
ed6546bd | 400 | unsigned long rts_field, dw0, dw1; |
b23d9c5b | 401 | |
9d661958 | 402 | mmu_partition_table_init(); |
b23d9c5b | 403 | rts_field = radix__get_tree_size(); |
9d661958 | 404 | dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR; |
ed6546bd | 405 | dw1 = __pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR; |
7d805acc | 406 | mmu_partition_table_set_entry(0, dw0, dw1, false); |
2bfd65e4 | 407 | |
56547411 | 408 | pr_info("Initializing Radix MMU\n"); |
2bfd65e4 AK |
409 | } |
410 | ||
2bfd65e4 AK |
411 | static int __init get_idx_from_shift(unsigned int shift) |
412 | { | |
413 | int idx = -1; | |
414 | ||
415 | switch (shift) { | |
416 | case 0xc: | |
417 | idx = MMU_PAGE_4K; | |
418 | break; | |
419 | case 0x10: | |
420 | idx = MMU_PAGE_64K; | |
421 | break; | |
422 | case 0x15: | |
423 | idx = MMU_PAGE_2M; | |
424 | break; | |
425 | case 0x1e: | |
426 | idx = MMU_PAGE_1G; | |
427 | break; | |
428 | } | |
429 | return idx; | |
430 | } | |
431 | ||
432 | static int __init radix_dt_scan_page_sizes(unsigned long node, | |
433 | const char *uname, int depth, | |
434 | void *data) | |
435 | { | |
436 | int size = 0; | |
437 | int shift, idx; | |
438 | unsigned int ap; | |
439 | const __be32 *prop; | |
440 | const char *type = of_get_flat_dt_prop(node, "device_type", NULL); | |
441 | ||
442 | /* We are scanning "cpu" nodes only */ | |
443 | if (type == NULL || strcmp(type, "cpu") != 0) | |
444 | return 0; | |
445 | ||
a25bd72b | 446 | /* Grab page size encodings */ |
2bfd65e4 AK |
447 | prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size); |
448 | if (!prop) | |
449 | return 0; | |
450 | ||
451 | pr_info("Page sizes from device-tree:\n"); | |
452 | for (; size >= 4; size -= 4, ++prop) { | |
453 | ||
454 | struct mmu_psize_def *def; | |
455 | ||
456 | /* top 3 bit is AP encoding */ | |
457 | shift = be32_to_cpu(prop[0]) & ~(0xe << 28); | |
458 | ap = be32_to_cpu(prop[0]) >> 29; | |
ac8d3818 | 459 | pr_info("Page size shift = %d AP=0x%x\n", shift, ap); |
2bfd65e4 AK |
460 | |
461 | idx = get_idx_from_shift(shift); | |
462 | if (idx < 0) | |
463 | continue; | |
464 | ||
465 | def = &mmu_psize_defs[idx]; | |
466 | def->shift = shift; | |
467 | def->ap = ap; | |
d6265cb3 | 468 | def->h_rpt_pgsize = psize_to_rpti_pgsize(idx); |
2bfd65e4 AK |
469 | } |
470 | ||
471 | /* needed ? */ | |
472 | cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B; | |
473 | return 1; | |
474 | } | |
475 | ||
af9d00e9 AK |
476 | #ifdef CONFIG_MEMORY_HOTPLUG |
477 | static int __init probe_memory_block_size(unsigned long node, const char *uname, int | |
478 | depth, void *data) | |
479 | { | |
480 | unsigned long *mem_block_size = (unsigned long *)data; | |
fbf2f134 | 481 | const __be32 *prop; |
af9d00e9 AK |
482 | int len; |
483 | ||
484 | if (depth != 1) | |
485 | return 0; | |
486 | ||
487 | if (strcmp(uname, "ibm,dynamic-reconfiguration-memory")) | |
488 | return 0; | |
489 | ||
490 | prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len); | |
fbf2f134 AK |
491 | |
492 | if (!prop || len < dt_root_size_cells * sizeof(__be32)) | |
af9d00e9 AK |
493 | /* |
494 | * Nothing in the device tree | |
495 | */ | |
496 | *mem_block_size = MIN_MEMORY_BLOCK_SIZE; | |
497 | else | |
fbf2f134 | 498 | *mem_block_size = of_read_number(prop, dt_root_size_cells); |
af9d00e9 AK |
499 | return 1; |
500 | } | |
501 | ||
c13f2b2b | 502 | static unsigned long __init radix_memory_block_size(void) |
af9d00e9 AK |
503 | { |
504 | unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE; | |
505 | ||
506 | /* | |
507 | * OPAL firmware feature is set by now. Hence we are ok | |
508 | * to test OPAL feature. | |
509 | */ | |
510 | if (firmware_has_feature(FW_FEATURE_OPAL)) | |
511 | mem_block_size = 1UL * 1024 * 1024 * 1024; | |
512 | else | |
513 | of_scan_flat_dt(probe_memory_block_size, &mem_block_size); | |
514 | ||
515 | return mem_block_size; | |
516 | } | |
517 | ||
518 | #else /* CONFIG_MEMORY_HOTPLUG */ | |
519 | ||
c13f2b2b | 520 | static unsigned long __init radix_memory_block_size(void) |
af9d00e9 AK |
521 | { |
522 | return 1UL * 1024 * 1024 * 1024; | |
523 | } | |
524 | ||
525 | #endif /* CONFIG_MEMORY_HOTPLUG */ | |
526 | ||
527 | ||
2537b09c | 528 | void __init radix__early_init_devtree(void) |
2bfd65e4 AK |
529 | { |
530 | int rc; | |
531 | ||
532 | /* | |
533 | * Try to find the available page sizes in the device-tree | |
534 | */ | |
535 | rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL); | |
af9d00e9 AK |
536 | if (!rc) { |
537 | /* | |
538 | * No page size details found in device tree. | |
539 | * Let's assume we have page 4k and 64k support | |
540 | */ | |
541 | mmu_psize_defs[MMU_PAGE_4K].shift = 12; | |
542 | mmu_psize_defs[MMU_PAGE_4K].ap = 0x0; | |
d6265cb3 BR |
543 | mmu_psize_defs[MMU_PAGE_4K].h_rpt_pgsize = |
544 | psize_to_rpti_pgsize(MMU_PAGE_4K); | |
af9d00e9 AK |
545 | |
546 | mmu_psize_defs[MMU_PAGE_64K].shift = 16; | |
547 | mmu_psize_defs[MMU_PAGE_64K].ap = 0x5; | |
d6265cb3 BR |
548 | mmu_psize_defs[MMU_PAGE_64K].h_rpt_pgsize = |
549 | psize_to_rpti_pgsize(MMU_PAGE_64K); | |
af9d00e9 AK |
550 | } |
551 | ||
2bfd65e4 | 552 | /* |
af9d00e9 AK |
553 | * Max mapping size used when mapping pages. We don't use |
554 | * ppc_md.memory_block_size() here because this get called | |
555 | * early and we don't have machine probe called yet. Also | |
556 | * the pseries implementation only check for ibm,lmb-size. | |
557 | * All hypervisor supporting radix do expose that device | |
558 | * tree node. | |
2bfd65e4 | 559 | */ |
af9d00e9 | 560 | radix_mem_block_size = radix_memory_block_size(); |
2bfd65e4 AK |
561 | return; |
562 | } | |
563 | ||
564 | void __init radix__early_init_mmu(void) | |
565 | { | |
566 | unsigned long lpcr; | |
2bfd65e4 | 567 | |
387e220a | 568 | #ifdef CONFIG_PPC_64S_HASH_MMU |
2bfd65e4 AK |
569 | #ifdef CONFIG_PPC_64K_PAGES |
570 | /* PAGE_SIZE mappings */ | |
571 | mmu_virtual_psize = MMU_PAGE_64K; | |
572 | #else | |
573 | mmu_virtual_psize = MMU_PAGE_4K; | |
574 | #endif | |
575 | ||
576 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | |
577 | /* vmemmap mapping */ | |
89a3496e AK |
578 | if (mmu_psize_defs[MMU_PAGE_2M].shift) { |
579 | /* | |
580 | * map vmemmap using 2M if available | |
581 | */ | |
582 | mmu_vmemmap_psize = MMU_PAGE_2M; | |
583 | } else | |
584 | mmu_vmemmap_psize = mmu_virtual_psize; | |
387e220a | 585 | #endif |
2bfd65e4 AK |
586 | #endif |
587 | /* | |
588 | * initialize page table size | |
589 | */ | |
590 | __pte_index_size = RADIX_PTE_INDEX_SIZE; | |
591 | __pmd_index_size = RADIX_PMD_INDEX_SIZE; | |
592 | __pud_index_size = RADIX_PUD_INDEX_SIZE; | |
593 | __pgd_index_size = RADIX_PGD_INDEX_SIZE; | |
fae22116 | 594 | __pud_cache_index = RADIX_PUD_INDEX_SIZE; |
2bfd65e4 AK |
595 | __pte_table_size = RADIX_PTE_TABLE_SIZE; |
596 | __pmd_table_size = RADIX_PMD_TABLE_SIZE; | |
597 | __pud_table_size = RADIX_PUD_TABLE_SIZE; | |
598 | __pgd_table_size = RADIX_PGD_TABLE_SIZE; | |
599 | ||
a2f41eb9 AK |
600 | __pmd_val_bits = RADIX_PMD_VAL_BITS; |
601 | __pud_val_bits = RADIX_PUD_VAL_BITS; | |
602 | __pgd_val_bits = RADIX_PGD_VAL_BITS; | |
2bfd65e4 | 603 | |
d6a9996e | 604 | __kernel_virt_start = RADIX_KERN_VIRT_START; |
d6a9996e AK |
605 | __vmalloc_start = RADIX_VMALLOC_START; |
606 | __vmalloc_end = RADIX_VMALLOC_END; | |
63ee9b2f | 607 | __kernel_io_start = RADIX_KERN_IO_START; |
a35a3c6f | 608 | __kernel_io_end = RADIX_KERN_IO_END; |
0034d395 | 609 | vmemmap = (struct page *)RADIX_VMEMMAP_START; |
d6a9996e | 610 | ioremap_bot = IOREMAP_BASE; |
bfa37087 DS |
611 | |
612 | #ifdef CONFIG_PCI | |
613 | pci_io_base = ISA_IO_BASE; | |
614 | #endif | |
fb4e5dbd AK |
615 | __pte_frag_nr = RADIX_PTE_FRAG_NR; |
616 | __pte_frag_size_shift = RADIX_PTE_FRAG_SIZE_SHIFT; | |
8a6c697b AK |
617 | __pmd_frag_nr = RADIX_PMD_FRAG_NR; |
618 | __pmd_frag_size_shift = RADIX_PMD_FRAG_SIZE_SHIFT; | |
d6a9996e | 619 | |
ed6546bd NP |
620 | radix_init_pgtable(); |
621 | ||
d6c88600 AK |
622 | if (!firmware_has_feature(FW_FEATURE_LPAR)) { |
623 | lpcr = mfspr(SPRN_LPCR); | |
bf16cdf4 | 624 | mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); |
2bfd65e4 | 625 | radix_init_partition_table(); |
cc3d2940 PM |
626 | } else { |
627 | radix_init_pseries(); | |
d6c88600 | 628 | } |
2bfd65e4 | 629 | |
9d661958 PM |
630 | memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); |
631 | ||
eeb715c3 NP |
632 | /* Switch to the guard PID before turning on MMU */ |
633 | radix__switch_mmu_context(NULL, &init_mm); | |
7e71c428 | 634 | tlbiel_all(); |
2bfd65e4 AK |
635 | } |
636 | ||
637 | void radix__early_init_mmu_secondary(void) | |
638 | { | |
639 | unsigned long lpcr; | |
640 | /* | |
d6c88600 | 641 | * update partition table control register and UPRT |
2bfd65e4 | 642 | */ |
d6c88600 AK |
643 | if (!firmware_has_feature(FW_FEATURE_LPAR)) { |
644 | lpcr = mfspr(SPRN_LPCR); | |
bf16cdf4 | 645 | mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); |
d6c88600 | 646 | |
52231340 CC |
647 | set_ptcr_when_no_uv(__pa(partition_tb) | |
648 | (PATB_SIZE_SHIFT - 12)); | |
d6c88600 | 649 | } |
d4748276 | 650 | |
eeb715c3 | 651 | radix__switch_mmu_context(NULL, &init_mm); |
7e71c428 | 652 | tlbiel_all(); |
39df17bc AK |
653 | |
654 | /* Make sure userspace can't change the AMR */ | |
655 | mtspr(SPRN_UAMOR, 0); | |
2bfd65e4 AK |
656 | } |
657 | ||
8119cefd HB |
658 | /* Called during kexec sequence with MMU off */ |
659 | notrace void radix__mmu_cleanup_all(void) | |
fe036a06 BH |
660 | { |
661 | unsigned long lpcr; | |
662 | ||
663 | if (!firmware_has_feature(FW_FEATURE_LPAR)) { | |
664 | lpcr = mfspr(SPRN_LPCR); | |
665 | mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT); | |
52231340 | 666 | set_ptcr_when_no_uv(0); |
1d0761d2 | 667 | powernv_set_nmmu_ptcr(0); |
fe036a06 BH |
668 | radix__flush_tlb_all(); |
669 | } | |
670 | } | |
671 | ||
6cc27341 | 672 | #ifdef CONFIG_MEMORY_HOTPLUG |
4b5d62ca RA |
673 | static void free_pte_table(pte_t *pte_start, pmd_t *pmd) |
674 | { | |
675 | pte_t *pte; | |
676 | int i; | |
677 | ||
678 | for (i = 0; i < PTRS_PER_PTE; i++) { | |
679 | pte = pte_start + i; | |
680 | if (!pte_none(*pte)) | |
681 | return; | |
682 | } | |
683 | ||
684 | pte_free_kernel(&init_mm, pte_start); | |
685 | pmd_clear(pmd); | |
686 | } | |
687 | ||
688 | static void free_pmd_table(pmd_t *pmd_start, pud_t *pud) | |
689 | { | |
690 | pmd_t *pmd; | |
691 | int i; | |
692 | ||
693 | for (i = 0; i < PTRS_PER_PMD; i++) { | |
694 | pmd = pmd_start + i; | |
695 | if (!pmd_none(*pmd)) | |
696 | return; | |
697 | } | |
698 | ||
699 | pmd_free(&init_mm, pmd_start); | |
700 | pud_clear(pud); | |
701 | } | |
702 | ||
9ce8853b BR |
703 | static void free_pud_table(pud_t *pud_start, p4d_t *p4d) |
704 | { | |
705 | pud_t *pud; | |
706 | int i; | |
707 | ||
708 | for (i = 0; i < PTRS_PER_PUD; i++) { | |
709 | pud = pud_start + i; | |
710 | if (!pud_none(*pud)) | |
711 | return; | |
712 | } | |
713 | ||
714 | pud_free(&init_mm, pud_start); | |
715 | p4d_clear(p4d); | |
716 | } | |
717 | ||
4b5d62ca RA |
718 | static void remove_pte_table(pte_t *pte_start, unsigned long addr, |
719 | unsigned long end) | |
720 | { | |
721 | unsigned long next; | |
722 | pte_t *pte; | |
723 | ||
724 | pte = pte_start + pte_index(addr); | |
725 | for (; addr < end; addr = next, pte++) { | |
726 | next = (addr + PAGE_SIZE) & PAGE_MASK; | |
727 | if (next > end) | |
728 | next = end; | |
729 | ||
730 | if (!pte_present(*pte)) | |
731 | continue; | |
732 | ||
0d0a4bc2 RA |
733 | if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) { |
734 | /* | |
735 | * The vmemmap_free() and remove_section_mapping() | |
736 | * codepaths call us with aligned addresses. | |
737 | */ | |
738 | WARN_ONCE(1, "%s: unaligned range\n", __func__); | |
739 | continue; | |
740 | } | |
741 | ||
4b5d62ca RA |
742 | pte_clear(&init_mm, addr, pte); |
743 | } | |
744 | } | |
745 | ||
aff77951 | 746 | static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr, |
4b5d62ca RA |
747 | unsigned long end) |
748 | { | |
749 | unsigned long next; | |
750 | pte_t *pte_base; | |
751 | pmd_t *pmd; | |
752 | ||
753 | pmd = pmd_start + pmd_index(addr); | |
754 | for (; addr < end; addr = next, pmd++) { | |
755 | next = pmd_addr_end(addr, end); | |
756 | ||
757 | if (!pmd_present(*pmd)) | |
758 | continue; | |
759 | ||
d6eacedd | 760 | if (pmd_is_leaf(*pmd)) { |
d6d6ebfc BR |
761 | if (!IS_ALIGNED(addr, PMD_SIZE) || |
762 | !IS_ALIGNED(next, PMD_SIZE)) { | |
763 | WARN_ONCE(1, "%s: unaligned range\n", __func__); | |
764 | continue; | |
765 | } | |
766 | pte_clear(&init_mm, addr, (pte_t *)pmd); | |
4b5d62ca RA |
767 | continue; |
768 | } | |
769 | ||
770 | pte_base = (pte_t *)pmd_page_vaddr(*pmd); | |
771 | remove_pte_table(pte_base, addr, next); | |
772 | free_pte_table(pte_base, pmd); | |
773 | } | |
774 | } | |
775 | ||
aff77951 | 776 | static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr, |
4b5d62ca RA |
777 | unsigned long end) |
778 | { | |
779 | unsigned long next; | |
780 | pmd_t *pmd_base; | |
781 | pud_t *pud; | |
782 | ||
783 | pud = pud_start + pud_index(addr); | |
784 | for (; addr < end; addr = next, pud++) { | |
785 | next = pud_addr_end(addr, end); | |
786 | ||
787 | if (!pud_present(*pud)) | |
788 | continue; | |
789 | ||
d6eacedd | 790 | if (pud_is_leaf(*pud)) { |
d6d6ebfc BR |
791 | if (!IS_ALIGNED(addr, PUD_SIZE) || |
792 | !IS_ALIGNED(next, PUD_SIZE)) { | |
793 | WARN_ONCE(1, "%s: unaligned range\n", __func__); | |
794 | continue; | |
795 | } | |
796 | pte_clear(&init_mm, addr, (pte_t *)pud); | |
4b5d62ca RA |
797 | continue; |
798 | } | |
799 | ||
9cf6fa24 | 800 | pmd_base = pud_pgtable(*pud); |
4b5d62ca RA |
801 | remove_pmd_table(pmd_base, addr, next); |
802 | free_pmd_table(pmd_base, pud); | |
803 | } | |
804 | } | |
805 | ||
bde709a7 | 806 | static void __meminit remove_pagetable(unsigned long start, unsigned long end) |
4b5d62ca RA |
807 | { |
808 | unsigned long addr, next; | |
809 | pud_t *pud_base; | |
810 | pgd_t *pgd; | |
2fb47060 | 811 | p4d_t *p4d; |
4b5d62ca RA |
812 | |
813 | spin_lock(&init_mm.page_table_lock); | |
814 | ||
815 | for (addr = start; addr < end; addr = next) { | |
816 | next = pgd_addr_end(addr, end); | |
817 | ||
818 | pgd = pgd_offset_k(addr); | |
2fb47060 MR |
819 | p4d = p4d_offset(pgd, addr); |
820 | if (!p4d_present(*p4d)) | |
4b5d62ca RA |
821 | continue; |
822 | ||
2fb47060 | 823 | if (p4d_is_leaf(*p4d)) { |
d6d6ebfc BR |
824 | if (!IS_ALIGNED(addr, P4D_SIZE) || |
825 | !IS_ALIGNED(next, P4D_SIZE)) { | |
826 | WARN_ONCE(1, "%s: unaligned range\n", __func__); | |
827 | continue; | |
828 | } | |
829 | ||
830 | pte_clear(&init_mm, addr, (pte_t *)pgd); | |
4b5d62ca RA |
831 | continue; |
832 | } | |
833 | ||
dc4875f0 | 834 | pud_base = p4d_pgtable(*p4d); |
4b5d62ca | 835 | remove_pud_table(pud_base, addr, next); |
9ce8853b | 836 | free_pud_table(pud_base, p4d); |
4b5d62ca RA |
837 | } |
838 | ||
839 | spin_unlock(&init_mm.page_table_lock); | |
840 | radix__flush_tlb_kernel_range(start, end); | |
841 | } | |
842 | ||
4e00c5af LG |
843 | int __meminit radix__create_section_mapping(unsigned long start, |
844 | unsigned long end, int nid, | |
845 | pgprot_t prot) | |
6cc27341 | 846 | { |
e0909392 | 847 | if (end >= RADIX_VMALLOC_START) { |
f341d897 | 848 | pr_warn("Outside the supported range\n"); |
e0909392 AK |
849 | return -1; |
850 | } | |
851 | ||
af9d00e9 AK |
852 | return create_physical_mapping(__pa(start), __pa(end), |
853 | radix_mem_block_size, nid, prot); | |
6cc27341 | 854 | } |
4b5d62ca | 855 | |
bde709a7 | 856 | int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end) |
4b5d62ca RA |
857 | { |
858 | remove_pagetable(start, end); | |
859 | return 0; | |
860 | } | |
6cc27341 RA |
861 | #endif /* CONFIG_MEMORY_HOTPLUG */ |
862 | ||
d9225ad9 | 863 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
29ab6c47 NP |
864 | static int __map_kernel_page_nid(unsigned long ea, unsigned long pa, |
865 | pgprot_t flags, unsigned int map_page_size, | |
866 | int nid) | |
867 | { | |
868 | return __map_kernel_page(ea, pa, flags, map_page_size, nid, 0, 0); | |
869 | } | |
870 | ||
d9225ad9 AK |
871 | int __meminit radix__vmemmap_create_mapping(unsigned long start, |
872 | unsigned long page_size, | |
873 | unsigned long phys) | |
874 | { | |
875 | /* Create a PTE encoding */ | |
876 | unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW; | |
2ad452ff NP |
877 | int nid = early_pfn_to_nid(phys >> PAGE_SHIFT); |
878 | int ret; | |
879 | ||
e0909392 | 880 | if ((start + page_size) >= RADIX_VMEMMAP_END) { |
f341d897 | 881 | pr_warn("Outside the supported range\n"); |
e0909392 AK |
882 | return -1; |
883 | } | |
884 | ||
2ad452ff NP |
885 | ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid); |
886 | BUG_ON(ret); | |
d9225ad9 | 887 | |
d9225ad9 AK |
888 | return 0; |
889 | } | |
890 | ||
891 | #ifdef CONFIG_MEMORY_HOTPLUG | |
bde709a7 | 892 | void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size) |
d9225ad9 | 893 | { |
0d0a4bc2 | 894 | remove_pagetable(start, start + page_size); |
d9225ad9 AK |
895 | } |
896 | #endif | |
897 | #endif | |
bde3eb62 | 898 | |
4f703e7f JS |
899 | #ifdef CONFIG_DEBUG_PAGEALLOC |
900 | void radix__kernel_map_pages(struct page *page, int numpages, int enable) | |
901 | { | |
902 | pr_warn_once("DEBUG_PAGEALLOC not supported in radix mode\n"); | |
903 | } | |
904 | #endif | |
905 | ||
bde3eb62 AK |
906 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
907 | ||
908 | unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, | |
909 | pmd_t *pmdp, unsigned long clr, | |
910 | unsigned long set) | |
911 | { | |
912 | unsigned long old; | |
913 | ||
914 | #ifdef CONFIG_DEBUG_VM | |
ebd31197 | 915 | WARN_ON(!radix__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp)); |
af60a4cf | 916 | assert_spin_locked(pmd_lockptr(mm, pmdp)); |
bde3eb62 AK |
917 | #endif |
918 | ||
919 | old = radix__pte_update(mm, addr, (pte_t *)pmdp, clr, set, 1); | |
920 | trace_hugepage_update(addr, old, clr, set); | |
921 | ||
922 | return old; | |
923 | } | |
924 | ||
925 | pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, | |
926 | pmd_t *pmdp) | |
927 | ||
928 | { | |
929 | pmd_t pmd; | |
930 | ||
931 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | |
932 | VM_BUG_ON(radix__pmd_trans_huge(*pmdp)); | |
ebd31197 | 933 | VM_BUG_ON(pmd_devmap(*pmdp)); |
bde3eb62 AK |
934 | /* |
935 | * khugepaged calls this for normal pmd | |
936 | */ | |
937 | pmd = *pmdp; | |
938 | pmd_clear(pmdp); | |
424de9c6 | 939 | |
424de9c6 BH |
940 | radix__flush_tlb_collapsed_pmd(vma->vm_mm, address); |
941 | ||
bde3eb62 AK |
942 | return pmd; |
943 | } | |
944 | ||
945 | /* | |
946 | * For us pgtable_t is pte_t *. Inorder to save the deposisted | |
947 | * page table, we consider the allocated page table as a list | |
948 | * head. On withdraw we need to make sure we zero out the used | |
949 | * list_head memory area. | |
950 | */ | |
951 | void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, | |
952 | pgtable_t pgtable) | |
953 | { | |
47d99948 | 954 | struct list_head *lh = (struct list_head *) pgtable; |
bde3eb62 | 955 | |
47d99948 | 956 | assert_spin_locked(pmd_lockptr(mm, pmdp)); |
bde3eb62 | 957 | |
47d99948 CL |
958 | /* FIFO */ |
959 | if (!pmd_huge_pte(mm, pmdp)) | |
960 | INIT_LIST_HEAD(lh); | |
961 | else | |
962 | list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); | |
963 | pmd_huge_pte(mm, pmdp) = pgtable; | |
bde3eb62 AK |
964 | } |
965 | ||
966 | pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) | |
967 | { | |
47d99948 CL |
968 | pte_t *ptep; |
969 | pgtable_t pgtable; | |
970 | struct list_head *lh; | |
bde3eb62 | 971 | |
47d99948 CL |
972 | assert_spin_locked(pmd_lockptr(mm, pmdp)); |
973 | ||
974 | /* FIFO */ | |
975 | pgtable = pmd_huge_pte(mm, pmdp); | |
976 | lh = (struct list_head *) pgtable; | |
977 | if (list_empty(lh)) | |
978 | pmd_huge_pte(mm, pmdp) = NULL; | |
979 | else { | |
980 | pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; | |
981 | list_del(lh); | |
982 | } | |
983 | ptep = (pte_t *) pgtable; | |
984 | *ptep = __pte(0); | |
985 | ptep++; | |
986 | *ptep = __pte(0); | |
987 | return pgtable; | |
988 | } | |
bde3eb62 AK |
989 | |
990 | pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm, | |
47d99948 | 991 | unsigned long addr, pmd_t *pmdp) |
bde3eb62 AK |
992 | { |
993 | pmd_t old_pmd; | |
994 | unsigned long old; | |
995 | ||
996 | old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0); | |
997 | old_pmd = __pmd(old); | |
bde3eb62 AK |
998 | return old_pmd; |
999 | } | |
1000 | ||
bde3eb62 | 1001 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
044003b5 | 1002 | |
e4c1112c AK |
1003 | void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, |
1004 | pte_t entry, unsigned long address, int psize) | |
044003b5 | 1005 | { |
e4c1112c | 1006 | struct mm_struct *mm = vma->vm_mm; |
044003b5 AK |
1007 | unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | |
1008 | _PAGE_RW | _PAGE_EXEC); | |
f08d08f3 AK |
1009 | |
1010 | unsigned long change = pte_val(entry) ^ pte_val(*ptep); | |
bd5050e3 | 1011 | /* |
2a8a0f42 NP |
1012 | * On POWER9, the NMMU is not able to relax PTE access permissions |
1013 | * for a translation with a TLB. The PTE must be invalidated, TLB | |
1014 | * flushed before the new PTE is installed. | |
1015 | * | |
1016 | * This only needs to be done for radix, because hash translation does | |
1017 | * flush when updating the linux pte (and we don't support NMMU | |
1018 | * accelerators on HPT on POWER9 anyway XXX: do we?). | |
1019 | * | |
1020 | * POWER10 (and P9P) NMMU does behave as per ISA. | |
bd5050e3 | 1021 | */ |
2a8a0f42 NP |
1022 | if (!cpu_has_feature(CPU_FTR_ARCH_31) && (change & _PAGE_RW) && |
1023 | atomic_read(&mm->context.copros) > 0) { | |
044003b5 AK |
1024 | unsigned long old_pte, new_pte; |
1025 | ||
f08d08f3 | 1026 | old_pte = __radix_pte_update(ptep, _PAGE_PRESENT, _PAGE_INVALID); |
044003b5 | 1027 | new_pte = old_pte | set; |
bd5050e3 | 1028 | radix__flush_tlb_page_psize(mm, address, psize); |
f08d08f3 | 1029 | __radix_pte_update(ptep, _PAGE_INVALID, new_pte); |
bd5050e3 | 1030 | } else { |
044003b5 | 1031 | __radix_pte_update(ptep, 0, set); |
e5f7cb58 NP |
1032 | /* |
1033 | * Book3S does not require a TLB flush when relaxing access | |
2a8a0f42 NP |
1034 | * restrictions when the address space (modulo the POWER9 nest |
1035 | * MMU issue above) because the MMU will reload the PTE after | |
1036 | * taking an access fault, as defined by the architecture. See | |
1037 | * "Setting a Reference or Change Bit or Upgrading Access | |
1038 | * Authority (PTE Subject to Atomic Hardware Updates)" in | |
1039 | * Power ISA Version 3.1B. | |
e5f7cb58 | 1040 | */ |
bd5050e3 | 1041 | } |
f1cb8f9b | 1042 | /* See ptesync comment in radix__set_pte_at */ |
044003b5 | 1043 | } |
5b323367 AK |
1044 | |
1045 | void radix__ptep_modify_prot_commit(struct vm_area_struct *vma, | |
1046 | unsigned long addr, pte_t *ptep, | |
1047 | pte_t old_pte, pte_t pte) | |
1048 | { | |
1049 | struct mm_struct *mm = vma->vm_mm; | |
1050 | ||
1051 | /* | |
2a8a0f42 NP |
1052 | * POWER9 NMMU must flush the TLB after clearing the PTE before |
1053 | * installing a PTE with more relaxed access permissions, see | |
1054 | * radix__ptep_set_access_flags. | |
5b323367 | 1055 | */ |
2a8a0f42 NP |
1056 | if (!cpu_has_feature(CPU_FTR_ARCH_31) && |
1057 | is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && | |
5b323367 AK |
1058 | (atomic_read(&mm->context.copros) > 0)) |
1059 | radix__flush_tlb_page(vma, addr); | |
1060 | ||
1061 | set_pte_at(mm, addr, ptep, pte); | |
1062 | } | |
d38153f9 | 1063 | |
d909f910 NP |
1064 | int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) |
1065 | { | |
1066 | pte_t *ptep = (pte_t *)pud; | |
1067 | pte_t new_pud = pfn_pte(__phys_to_pfn(addr), prot); | |
1068 | ||
1069 | if (!radix_enabled()) | |
1070 | return 0; | |
1071 | ||
1072 | set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pud); | |
1073 | ||
1074 | return 1; | |
1075 | } | |
1076 | ||
1077 | int pud_clear_huge(pud_t *pud) | |
1078 | { | |
467ba14e | 1079 | if (pud_is_leaf(*pud)) { |
d909f910 NP |
1080 | pud_clear(pud); |
1081 | return 1; | |
1082 | } | |
1083 | ||
1084 | return 0; | |
1085 | } | |
1086 | ||
1087 | int pud_free_pmd_page(pud_t *pud, unsigned long addr) | |
1088 | { | |
1089 | pmd_t *pmd; | |
1090 | int i; | |
1091 | ||
9cf6fa24 | 1092 | pmd = pud_pgtable(*pud); |
d909f910 NP |
1093 | pud_clear(pud); |
1094 | ||
1095 | flush_tlb_kernel_range(addr, addr + PUD_SIZE); | |
1096 | ||
1097 | for (i = 0; i < PTRS_PER_PMD; i++) { | |
1098 | if (!pmd_none(pmd[i])) { | |
1099 | pte_t *pte; | |
1100 | pte = (pte_t *)pmd_page_vaddr(pmd[i]); | |
1101 | ||
1102 | pte_free_kernel(&init_mm, pte); | |
1103 | } | |
1104 | } | |
1105 | ||
1106 | pmd_free(&init_mm, pmd); | |
1107 | ||
1108 | return 1; | |
1109 | } | |
1110 | ||
1111 | int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) | |
1112 | { | |
1113 | pte_t *ptep = (pte_t *)pmd; | |
1114 | pte_t new_pmd = pfn_pte(__phys_to_pfn(addr), prot); | |
1115 | ||
1116 | if (!radix_enabled()) | |
1117 | return 0; | |
1118 | ||
1119 | set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pmd); | |
1120 | ||
1121 | return 1; | |
1122 | } | |
1123 | ||
1124 | int pmd_clear_huge(pmd_t *pmd) | |
1125 | { | |
467ba14e | 1126 | if (pmd_is_leaf(*pmd)) { |
d909f910 NP |
1127 | pmd_clear(pmd); |
1128 | return 1; | |
1129 | } | |
1130 | ||
1131 | return 0; | |
1132 | } | |
1133 | ||
1134 | int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) | |
1135 | { | |
1136 | pte_t *pte; | |
1137 | ||
1138 | pte = (pte_t *)pmd_page_vaddr(*pmd); | |
1139 | pmd_clear(pmd); | |
1140 | ||
1141 | flush_tlb_kernel_range(addr, addr + PMD_SIZE); | |
1142 | ||
1143 | pte_free_kernel(&init_mm, pte); | |
1144 | ||
1145 | return 1; | |
1146 | } |