]>
Commit | Line | Data |
---|---|---|
1802d0be | 1 | // SPDX-License-Identifier: GPL-2.0-only |
084bd298 SC |
2 | /* |
3 | * arch/arm64/mm/hugetlbpage.c | |
4 | * | |
5 | * Copyright (C) 2013 Linaro Ltd. | |
6 | * | |
7 | * Based on arch/x86/mm/hugetlbpage.c. | |
084bd298 SC |
8 | */ |
9 | ||
10 | #include <linux/init.h> | |
11 | #include <linux/fs.h> | |
12 | #include <linux/mm.h> | |
13 | #include <linux/hugetlb.h> | |
14 | #include <linux/pagemap.h> | |
15 | #include <linux/err.h> | |
16 | #include <linux/sysctl.h> | |
17 | #include <asm/mman.h> | |
18 | #include <asm/tlb.h> | |
19 | #include <asm/tlbflush.h> | |
20 | #include <asm/pgalloc.h> | |
21 | ||
5480280d AK |
22 | #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION |
23 | bool arch_hugetlb_migration_supported(struct hstate *h) | |
24 | { | |
25 | size_t pagesize = huge_page_size(h); | |
26 | ||
27 | switch (pagesize) { | |
28 | #ifdef CONFIG_ARM64_4K_PAGES | |
29 | case PUD_SIZE: | |
30 | #endif | |
31 | case PMD_SIZE: | |
32 | case CONT_PMD_SIZE: | |
33 | case CONT_PTE_SIZE: | |
34 | return true; | |
35 | } | |
36 | pr_warn("%s: unrecognized huge page size 0x%lx\n", | |
37 | __func__, pagesize); | |
38 | return false; | |
39 | } | |
40 | #endif | |
41 | ||
084bd298 SC |
42 | int pmd_huge(pmd_t pmd) |
43 | { | |
fd28f5d4 | 44 | return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); |
084bd298 SC |
45 | } |
46 | ||
47 | int pud_huge(pud_t pud) | |
48 | { | |
4797ec2d | 49 | #ifndef __PAGETABLE_PMD_FOLDED |
fd28f5d4 | 50 | return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT); |
4797ec2d MS |
51 | #else |
52 | return 0; | |
53 | #endif | |
084bd298 SC |
54 | } |
55 | ||
b5b0be86 SC |
56 | /* |
57 | * Select all bits except the pfn | |
58 | */ | |
59 | static inline pgprot_t pte_pgprot(pte_t pte) | |
60 | { | |
61 | unsigned long pfn = pte_pfn(pte); | |
62 | ||
63 | return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); | |
64 | } | |
65 | ||
66b3923a | 66 | static int find_num_contig(struct mm_struct *mm, unsigned long addr, |
bb9dd3df | 67 | pte_t *ptep, size_t *pgsize) |
66b3923a | 68 | { |
20a004e7 WD |
69 | pgd_t *pgdp = pgd_offset(mm, addr); |
70 | pud_t *pudp; | |
71 | pmd_t *pmdp; | |
66b3923a DW |
72 | |
73 | *pgsize = PAGE_SIZE; | |
20a004e7 WD |
74 | pudp = pud_offset(pgdp, addr); |
75 | pmdp = pmd_offset(pudp, addr); | |
76 | if ((pte_t *)pmdp == ptep) { | |
66b3923a DW |
77 | *pgsize = PMD_SIZE; |
78 | return CONT_PMDS; | |
79 | } | |
80 | return CONT_PTES; | |
81 | } | |
82 | ||
c3e4ed5c PA |
83 | static inline int num_contig_ptes(unsigned long size, size_t *pgsize) |
84 | { | |
85 | int contig_ptes = 0; | |
86 | ||
87 | *pgsize = size; | |
88 | ||
89 | switch (size) { | |
90 | #ifdef CONFIG_ARM64_4K_PAGES | |
91 | case PUD_SIZE: | |
92 | #endif | |
93 | case PMD_SIZE: | |
94 | contig_ptes = 1; | |
95 | break; | |
96 | case CONT_PMD_SIZE: | |
97 | *pgsize = PMD_SIZE; | |
98 | contig_ptes = CONT_PMDS; | |
99 | break; | |
100 | case CONT_PTE_SIZE: | |
101 | *pgsize = PAGE_SIZE; | |
102 | contig_ptes = CONT_PTES; | |
103 | break; | |
104 | } | |
105 | ||
106 | return contig_ptes; | |
107 | } | |
108 | ||
d8bdcff2 SC |
109 | /* |
110 | * Changing some bits of contiguous entries requires us to follow a | |
111 | * Break-Before-Make approach, breaking the whole contiguous set | |
112 | * before we can change any entries. See ARM DDI 0487A.k_iss10775, | |
113 | * "Misprogramming of the Contiguous bit", page D4-1762. | |
114 | * | |
115 | * This helper performs the break step. | |
116 | */ | |
117 | static pte_t get_clear_flush(struct mm_struct *mm, | |
118 | unsigned long addr, | |
119 | pte_t *ptep, | |
120 | unsigned long pgsize, | |
121 | unsigned long ncontig) | |
122 | { | |
d8bdcff2 SC |
123 | pte_t orig_pte = huge_ptep_get(ptep); |
124 | bool valid = pte_valid(orig_pte); | |
125 | unsigned long i, saddr = addr; | |
126 | ||
127 | for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) { | |
128 | pte_t pte = ptep_get_and_clear(mm, addr, ptep); | |
129 | ||
130 | /* | |
131 | * If HW_AFDBM is enabled, then the HW could turn on | |
469ed9d8 SC |
132 | * the dirty or accessed bit for any page in the set, |
133 | * so check them all. | |
d8bdcff2 SC |
134 | */ |
135 | if (pte_dirty(pte)) | |
136 | orig_pte = pte_mkdirty(orig_pte); | |
469ed9d8 SC |
137 | |
138 | if (pte_young(pte)) | |
139 | orig_pte = pte_mkyoung(orig_pte); | |
d8bdcff2 SC |
140 | } |
141 | ||
8b11ec1b LT |
142 | if (valid) { |
143 | struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); | |
d8bdcff2 | 144 | flush_tlb_range(&vma, saddr, addr); |
8b11ec1b | 145 | } |
d8bdcff2 SC |
146 | return orig_pte; |
147 | } | |
148 | ||
149 | /* | |
150 | * Changing some bits of contiguous entries requires us to follow a | |
151 | * Break-Before-Make approach, breaking the whole contiguous set | |
152 | * before we can change any entries. See ARM DDI 0487A.k_iss10775, | |
153 | * "Misprogramming of the Contiguous bit", page D4-1762. | |
154 | * | |
155 | * This helper performs the break step for use cases where the | |
156 | * original pte is not needed. | |
157 | */ | |
158 | static void clear_flush(struct mm_struct *mm, | |
159 | unsigned long addr, | |
160 | pte_t *ptep, | |
161 | unsigned long pgsize, | |
162 | unsigned long ncontig) | |
163 | { | |
8b11ec1b | 164 | struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); |
d8bdcff2 SC |
165 | unsigned long i, saddr = addr; |
166 | ||
167 | for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) | |
168 | pte_clear(mm, addr, ptep); | |
169 | ||
170 | flush_tlb_range(&vma, saddr, addr); | |
171 | } | |
172 | ||
66b3923a DW |
173 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
174 | pte_t *ptep, pte_t pte) | |
175 | { | |
176 | size_t pgsize; | |
177 | int i; | |
bb9dd3df | 178 | int ncontig; |
29a7287d | 179 | unsigned long pfn, dpfn; |
66b3923a DW |
180 | pgprot_t hugeprot; |
181 | ||
d3ea7952 SC |
182 | /* |
183 | * Code needs to be expanded to handle huge swap and migration | |
184 | * entries. Needed for HUGETLB and MEMORY_FAILURE. | |
185 | */ | |
186 | WARN_ON(!pte_present(pte)); | |
187 | ||
bb9dd3df | 188 | if (!pte_cont(pte)) { |
66b3923a DW |
189 | set_pte_at(mm, addr, ptep, pte); |
190 | return; | |
191 | } | |
192 | ||
bb9dd3df | 193 | ncontig = find_num_contig(mm, addr, ptep, &pgsize); |
66b3923a | 194 | pfn = pte_pfn(pte); |
29a7287d | 195 | dpfn = pgsize >> PAGE_SHIFT; |
b5b0be86 | 196 | hugeprot = pte_pgprot(pte); |
29a7287d | 197 | |
d8bdcff2 SC |
198 | clear_flush(mm, addr, ptep, pgsize, ncontig); |
199 | ||
20a004e7 | 200 | for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) |
66b3923a | 201 | set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); |
66b3923a DW |
202 | } |
203 | ||
a8d623ee PA |
204 | void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, |
205 | pte_t *ptep, pte_t pte, unsigned long sz) | |
206 | { | |
207 | int i, ncontig; | |
208 | size_t pgsize; | |
209 | ||
210 | ncontig = num_contig_ptes(sz, &pgsize); | |
211 | ||
212 | for (i = 0; i < ncontig; i++, ptep++) | |
213 | set_pte(ptep, pte); | |
214 | } | |
215 | ||
66b3923a DW |
216 | pte_t *huge_pte_alloc(struct mm_struct *mm, |
217 | unsigned long addr, unsigned long sz) | |
218 | { | |
20a004e7 WD |
219 | pgd_t *pgdp; |
220 | pud_t *pudp; | |
221 | pmd_t *pmdp; | |
222 | pte_t *ptep = NULL; | |
223 | ||
224 | pgdp = pgd_offset(mm, addr); | |
225 | pudp = pud_alloc(mm, pgdp, addr); | |
226 | if (!pudp) | |
66b3923a DW |
227 | return NULL; |
228 | ||
229 | if (sz == PUD_SIZE) { | |
20a004e7 | 230 | ptep = (pte_t *)pudp; |
441a6278 | 231 | } else if (sz == (CONT_PTE_SIZE)) { |
20a004e7 | 232 | pmdp = pmd_alloc(mm, pudp, addr); |
027d0c71 MR |
233 | if (!pmdp) |
234 | return NULL; | |
66b3923a DW |
235 | |
236 | WARN_ON(addr & (sz - 1)); | |
237 | /* | |
238 | * Note that if this code were ever ported to the | |
239 | * 32-bit arm platform then it will cause trouble in | |
240 | * the case where CONFIG_HIGHPTE is set, since there | |
241 | * will be no pte_unmap() to correspond with this | |
242 | * pte_alloc_map(). | |
243 | */ | |
20a004e7 | 244 | ptep = pte_alloc_map(mm, pmdp, addr); |
66b3923a DW |
245 | } else if (sz == PMD_SIZE) { |
246 | if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && | |
20a004e7 WD |
247 | pud_none(READ_ONCE(*pudp))) |
248 | ptep = huge_pmd_share(mm, addr, pudp); | |
66b3923a | 249 | else |
20a004e7 | 250 | ptep = (pte_t *)pmd_alloc(mm, pudp, addr); |
441a6278 | 251 | } else if (sz == (CONT_PMD_SIZE)) { |
20a004e7 | 252 | pmdp = pmd_alloc(mm, pudp, addr); |
66b3923a | 253 | WARN_ON(addr & (sz - 1)); |
20a004e7 | 254 | return (pte_t *)pmdp; |
66b3923a DW |
255 | } |
256 | ||
20a004e7 | 257 | return ptep; |
66b3923a DW |
258 | } |
259 | ||
7868a208 PA |
260 | pte_t *huge_pte_offset(struct mm_struct *mm, |
261 | unsigned long addr, unsigned long sz) | |
66b3923a | 262 | { |
20a004e7 WD |
263 | pgd_t *pgdp; |
264 | pud_t *pudp, pud; | |
265 | pmd_t *pmdp, pmd; | |
66b3923a | 266 | |
20a004e7 WD |
267 | pgdp = pgd_offset(mm, addr); |
268 | if (!pgd_present(READ_ONCE(*pgdp))) | |
66b3923a | 269 | return NULL; |
f02ab08a | 270 | |
20a004e7 WD |
271 | pudp = pud_offset(pgdp, addr); |
272 | pud = READ_ONCE(*pudp); | |
273 | if (sz != PUD_SIZE && pud_none(pud)) | |
66b3923a | 274 | return NULL; |
30f3ac00 | 275 | /* hugepage or swap? */ |
20a004e7 WD |
276 | if (pud_huge(pud) || !pud_present(pud)) |
277 | return (pte_t *)pudp; | |
f02ab08a PA |
278 | /* table; check the next level */ |
279 | ||
30f3ac00 PA |
280 | if (sz == CONT_PMD_SIZE) |
281 | addr &= CONT_PMD_MASK; | |
282 | ||
20a004e7 WD |
283 | pmdp = pmd_offset(pudp, addr); |
284 | pmd = READ_ONCE(*pmdp); | |
30f3ac00 | 285 | if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) && |
20a004e7 | 286 | pmd_none(pmd)) |
66b3923a | 287 | return NULL; |
20a004e7 WD |
288 | if (pmd_huge(pmd) || !pmd_present(pmd)) |
289 | return (pte_t *)pmdp; | |
f02ab08a | 290 | |
20a004e7 WD |
291 | if (sz == CONT_PTE_SIZE) |
292 | return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK)); | |
30f3ac00 | 293 | |
66b3923a DW |
294 | return NULL; |
295 | } | |
296 | ||
297 | pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, | |
298 | struct page *page, int writable) | |
299 | { | |
300 | size_t pagesize = huge_page_size(hstate_vma(vma)); | |
301 | ||
302 | if (pagesize == CONT_PTE_SIZE) { | |
303 | entry = pte_mkcont(entry); | |
304 | } else if (pagesize == CONT_PMD_SIZE) { | |
305 | entry = pmd_pte(pmd_mkcont(pte_pmd(entry))); | |
306 | } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) { | |
307 | pr_warn("%s: unrecognized huge page size 0x%lx\n", | |
308 | __func__, pagesize); | |
309 | } | |
310 | return entry; | |
311 | } | |
312 | ||
c3e4ed5c PA |
313 | void huge_pte_clear(struct mm_struct *mm, unsigned long addr, |
314 | pte_t *ptep, unsigned long sz) | |
315 | { | |
316 | int i, ncontig; | |
317 | size_t pgsize; | |
318 | ||
319 | ncontig = num_contig_ptes(sz, &pgsize); | |
320 | ||
321 | for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) | |
322 | pte_clear(mm, addr, ptep); | |
323 | } | |
324 | ||
66b3923a DW |
325 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, |
326 | unsigned long addr, pte_t *ptep) | |
327 | { | |
d8bdcff2 | 328 | int ncontig; |
29a7287d SC |
329 | size_t pgsize; |
330 | pte_t orig_pte = huge_ptep_get(ptep); | |
331 | ||
332 | if (!pte_cont(orig_pte)) | |
66b3923a | 333 | return ptep_get_and_clear(mm, addr, ptep); |
29a7287d SC |
334 | |
335 | ncontig = find_num_contig(mm, addr, ptep, &pgsize); | |
29a7287d | 336 | |
d8bdcff2 | 337 | return get_clear_flush(mm, addr, ptep, pgsize, ncontig); |
66b3923a DW |
338 | } |
339 | ||
031e6e6b SC |
340 | /* |
341 | * huge_ptep_set_access_flags will update access flags (dirty, accesssed) | |
342 | * and write permission. | |
343 | * | |
344 | * For a contiguous huge pte range we need to check whether or not write | |
345 | * permission has to change only on the first pte in the set. Then for | |
346 | * all the contiguous ptes we need to check whether or not there is a | |
347 | * discrepancy between dirty or young. | |
348 | */ | |
349 | static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig) | |
350 | { | |
351 | int i; | |
352 | ||
353 | if (pte_write(pte) != pte_write(huge_ptep_get(ptep))) | |
354 | return 1; | |
355 | ||
356 | for (i = 0; i < ncontig; i++) { | |
357 | pte_t orig_pte = huge_ptep_get(ptep + i); | |
358 | ||
359 | if (pte_dirty(pte) != pte_dirty(orig_pte)) | |
360 | return 1; | |
361 | ||
362 | if (pte_young(pte) != pte_young(orig_pte)) | |
363 | return 1; | |
364 | } | |
365 | ||
366 | return 0; | |
367 | } | |
368 | ||
66b3923a DW |
369 | int huge_ptep_set_access_flags(struct vm_area_struct *vma, |
370 | unsigned long addr, pte_t *ptep, | |
371 | pte_t pte, int dirty) | |
372 | { | |
031e6e6b | 373 | int ncontig, i; |
29a7287d SC |
374 | size_t pgsize = 0; |
375 | unsigned long pfn = pte_pfn(pte), dpfn; | |
376 | pgprot_t hugeprot; | |
d8bdcff2 | 377 | pte_t orig_pte; |
29a7287d SC |
378 | |
379 | if (!pte_cont(pte)) | |
66b3923a | 380 | return ptep_set_access_flags(vma, addr, ptep, pte, dirty); |
29a7287d SC |
381 | |
382 | ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); | |
383 | dpfn = pgsize >> PAGE_SHIFT; | |
29a7287d | 384 | |
031e6e6b SC |
385 | if (!__cont_access_flags_changed(ptep, pte, ncontig)) |
386 | return 0; | |
387 | ||
d8bdcff2 | 388 | orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); |
d8bdcff2 | 389 | |
469ed9d8 | 390 | /* Make sure we don't lose the dirty or young state */ |
d8bdcff2 SC |
391 | if (pte_dirty(orig_pte)) |
392 | pte = pte_mkdirty(pte); | |
393 | ||
469ed9d8 SC |
394 | if (pte_young(orig_pte)) |
395 | pte = pte_mkyoung(pte); | |
396 | ||
d8bdcff2 SC |
397 | hugeprot = pte_pgprot(pte); |
398 | for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) | |
399 | set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot)); | |
29a7287d | 400 | |
031e6e6b | 401 | return 1; |
66b3923a DW |
402 | } |
403 | ||
404 | void huge_ptep_set_wrprotect(struct mm_struct *mm, | |
405 | unsigned long addr, pte_t *ptep) | |
406 | { | |
d8bdcff2 SC |
407 | unsigned long pfn, dpfn; |
408 | pgprot_t hugeprot; | |
29a7287d SC |
409 | int ncontig, i; |
410 | size_t pgsize; | |
d8bdcff2 | 411 | pte_t pte; |
66b3923a | 412 | |
20a004e7 | 413 | if (!pte_cont(READ_ONCE(*ptep))) { |
66b3923a | 414 | ptep_set_wrprotect(mm, addr, ptep); |
29a7287d | 415 | return; |
66b3923a | 416 | } |
29a7287d SC |
417 | |
418 | ncontig = find_num_contig(mm, addr, ptep, &pgsize); | |
d8bdcff2 SC |
419 | dpfn = pgsize >> PAGE_SHIFT; |
420 | ||
421 | pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig); | |
422 | pte = pte_wrprotect(pte); | |
423 | ||
424 | hugeprot = pte_pgprot(pte); | |
425 | pfn = pte_pfn(pte); | |
426 | ||
427 | for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) | |
428 | set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); | |
66b3923a DW |
429 | } |
430 | ||
431 | void huge_ptep_clear_flush(struct vm_area_struct *vma, | |
432 | unsigned long addr, pte_t *ptep) | |
433 | { | |
29a7287d | 434 | size_t pgsize; |
d8bdcff2 | 435 | int ncontig; |
29a7287d | 436 | |
20a004e7 | 437 | if (!pte_cont(READ_ONCE(*ptep))) { |
66b3923a | 438 | ptep_clear_flush(vma, addr, ptep); |
29a7287d | 439 | return; |
66b3923a | 440 | } |
29a7287d SC |
441 | |
442 | ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); | |
d8bdcff2 | 443 | clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); |
66b3923a DW |
444 | } |
445 | ||
a21b0b78 AP |
446 | static void __init add_huge_page_size(unsigned long size) |
447 | { | |
448 | if (size_to_hstate(size)) | |
449 | return; | |
450 | ||
451 | hugetlb_add_hstate(ilog2(size) - PAGE_SHIFT); | |
452 | } | |
453 | ||
454 | static int __init hugetlbpage_init(void) | |
455 | { | |
456 | #ifdef CONFIG_ARM64_4K_PAGES | |
457 | add_huge_page_size(PUD_SIZE); | |
458 | #endif | |
441a6278 | 459 | add_huge_page_size(CONT_PMD_SIZE); |
a21b0b78 | 460 | add_huge_page_size(PMD_SIZE); |
441a6278 | 461 | add_huge_page_size(CONT_PTE_SIZE); |
a21b0b78 AP |
462 | |
463 | return 0; | |
464 | } | |
465 | arch_initcall(hugetlbpage_init); | |
466 | ||
084bd298 SC |
467 | static __init int setup_hugepagesz(char *opt) |
468 | { | |
469 | unsigned long ps = memparse(opt, &opt); | |
66b3923a | 470 | |
828f193d SC |
471 | switch (ps) { |
472 | #ifdef CONFIG_ARM64_4K_PAGES | |
473 | case PUD_SIZE: | |
474 | #endif | |
441a6278 | 475 | case CONT_PMD_SIZE: |
828f193d | 476 | case PMD_SIZE: |
441a6278 | 477 | case CONT_PTE_SIZE: |
a21b0b78 | 478 | add_huge_page_size(ps); |
828f193d | 479 | return 1; |
084bd298 | 480 | } |
828f193d SC |
481 | |
482 | hugetlb_bad_size(); | |
483 | pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10); | |
484 | return 0; | |
084bd298 SC |
485 | } |
486 | __setup("hugepagesz=", setup_hugepagesz); |