]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
ab537dca AK |
2 | #ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H |
3 | #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H | |
4 | ||
eea86aa4 ME |
5 | #define H_PTE_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 64KB = 16MB |
6 | #define H_PMD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16MB = 16GB | |
7 | #define H_PUD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16GB = 16TB | |
8 | #define H_PGD_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 16TB = 4PB | |
9 | ||
ab537dca | 10 | |
f384796c AK |
11 | /* |
12 | * Each context is 512TB size. SLB miss for first context/default context | |
13 | * is handled in the hotpath. | |
14 | */ | |
15 | #define MAX_EA_BITS_PER_CONTEXT 49 | |
1c946c1b | 16 | #define REGION_SHIFT MAX_EA_BITS_PER_CONTEXT |
f384796c | 17 | |
0034d395 AK |
18 | /* |
19 | * We use one context for each MAP area. | |
20 | */ | |
21 | #define H_KERN_MAP_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT) | |
22 | ||
23 | /* | |
24 | * Define the address range of the kernel non-linear virtual area | |
25 | * 2PB | |
26 | */ | |
27 | #define H_KERN_VIRT_START ASM_CONST(0xc008000000000000) | |
28 | ||
f5bd0fdc AK |
29 | /* |
30 | * 64k aligned address free up few of the lower bits of RPN for us | |
31 | * We steal that here. For more deatils look at pte_pfn/pfn_pte() | |
32 | */ | |
32789d38 AK |
33 | #define H_PAGE_COMBO _RPAGE_RPN0 /* this is a combo 4k page */ |
34 | #define H_PAGE_4K_PFN _RPAGE_RPN1 /* PFN is for a single 4k page */ | |
bf9a95f9 | 35 | #define H_PAGE_BUSY _RPAGE_RPN44 /* software: PTE & hash are busy */ |
273b4936 | 36 | #define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */ |
9d2edb18 | 37 | |
1a2f7789 AK |
38 | /* memory key bits. */ |
39 | #define H_PTE_PKEY_BIT0 _RPAGE_RSV1 | |
40 | #define H_PTE_PKEY_BIT1 _RPAGE_RSV2 | |
41 | #define H_PTE_PKEY_BIT2 _RPAGE_RSV3 | |
42 | #define H_PTE_PKEY_BIT3 _RPAGE_RSV4 | |
43 | #define H_PTE_PKEY_BIT4 _RPAGE_RSV5 | |
44 | ||
bf680d51 | 45 | /* |
945537df AK |
46 | * We need to differentiate between explicit huge page and THP huge |
47 | * page, since THP huge page also need to track real subpage details | |
16c2d476 | 48 | */ |
945537df AK |
49 | #define H_PAGE_THP_HUGE H_PAGE_4K_PFN |
50 | ||
3c726f8d | 51 | /* PTE flags to conserve for HPTE identification */ |
bf9a95f9 | 52 | #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | H_PAGE_COMBO) |
62607bc6 AK |
53 | /* |
54 | * We use a 2K PTE page fragment and another 2K for storing | |
55 | * real_pte_t hash index | |
fb4e5dbd AK |
56 | * 8 bytes per each pte entry and another 8 bytes for storing |
57 | * slot details. | |
62607bc6 | 58 | */ |
fb4e5dbd AK |
59 | #define H_PTE_FRAG_SIZE_SHIFT (H_PTE_INDEX_SIZE + 3 + 1) |
60 | #define H_PTE_FRAG_NR (PAGE_SIZE >> H_PTE_FRAG_SIZE_SHIFT) | |
62607bc6 | 61 | |
8a6c697b AK |
62 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) |
63 | #define H_PMD_FRAG_SIZE_SHIFT (H_PMD_INDEX_SIZE + 3 + 1) | |
64 | #else | |
65 | #define H_PMD_FRAG_SIZE_SHIFT (H_PMD_INDEX_SIZE + 3) | |
66 | #endif | |
67 | #define H_PMD_FRAG_NR (PAGE_SIZE >> H_PMD_FRAG_SIZE_SHIFT) | |
68 | ||
c605782b | 69 | #ifndef __ASSEMBLY__ |
96270b1f | 70 | #include <asm/errno.h> |
3c726f8d | 71 | |
c605782b BH |
72 | /* |
73 | * With 64K pages on hash table, we have a special PTE format that | |
74 | * uses a second "half" of the page table to encode sub-page information | |
75 | * in order to deal with 64K made of 4K HW pages. Thus we override the | |
76 | * generic accessors and iterators here | |
77 | */ | |
85c1fafd | 78 | #define __real_pte __real_pte |
ff31e105 | 79 | static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset) |
85c1fafd AK |
80 | { |
81 | real_pte_t rpte; | |
506b863c | 82 | unsigned long *hidxp; |
85c1fafd AK |
83 | |
84 | rpte.pte = pte; | |
bf9a95f9 RP |
85 | |
86 | /* | |
87 | * Ensure that we do not read the hidx before we read the PTE. Because | |
88 | * the writer side is expected to finish writing the hidx first followed | |
89 | * by the PTE, by using smp_wmb(). pte_set_hash_slot() ensures that. | |
90 | */ | |
91 | smp_rmb(); | |
92 | ||
ff31e105 | 93 | hidxp = (unsigned long *)(ptep + offset); |
bf9a95f9 | 94 | rpte.hidx = *hidxp; |
85c1fafd AK |
95 | return rpte; |
96 | } | |
97 | ||
7b84947c RP |
98 | /* |
99 | * shift the hidx representation by one-modulo-0xf; i.e hidx 0 is respresented | |
100 | * as 1, 1 as 2,... , and 0xf as 0. This convention lets us represent a | |
101 | * invalid hidx 0xf with a 0x0 bit value. PTEs are anyway zero'd when | |
102 | * allocated. We dont have to zero them gain; thus save on the initialization. | |
103 | */ | |
104 | #define HIDX_UNSHIFT_BY_ONE(x) ((x + 0xfUL) & 0xfUL) /* shift backward by one */ | |
105 | #define HIDX_SHIFT_BY_ONE(x) ((x + 0x1UL) & 0xfUL) /* shift forward by one */ | |
59aa31fd | 106 | #define HIDX_BITS(x, index) (x << (index << 2)) |
bf9a95f9 | 107 | #define BITS_TO_HIDX(x, index) ((x >> (index << 2)) & 0xfUL) |
7b84947c | 108 | #define INVALID_RPTE_HIDX 0x0UL |
59aa31fd | 109 | |
85c1fafd AK |
110 | static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) |
111 | { | |
7b84947c | 112 | return HIDX_UNSHIFT_BY_ONE(BITS_TO_HIDX(rpte.hidx, index)); |
85c1fafd AK |
113 | } |
114 | ||
59aa31fd RP |
115 | /* |
116 | * Commit the hidx and return PTE bits that needs to be modified. The caller is | |
117 | * expected to modify the PTE bits accordingly and commit the PTE to memory. | |
118 | */ | |
119 | static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte, | |
ff31e105 AK |
120 | unsigned int subpg_index, |
121 | unsigned long hidx, int offset) | |
59aa31fd | 122 | { |
ff31e105 | 123 | unsigned long *hidxp = (unsigned long *)(ptep + offset); |
59aa31fd RP |
124 | |
125 | rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index); | |
7b84947c | 126 | *hidxp = rpte.hidx | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index); |
59aa31fd RP |
127 | |
128 | /* | |
129 | * Anyone reading PTE must ensure hidx bits are read after reading the | |
130 | * PTE by using the read-side barrier smp_rmb(). __real_pte() can be | |
131 | * used for that. | |
132 | */ | |
133 | smp_wmb(); | |
134 | ||
135 | /* No PTE bits to be modified, return 0x0UL */ | |
136 | return 0x0UL; | |
85c1fafd AK |
137 | } |
138 | ||
3c726f8d | 139 | #define __rpte_to_pte(r) ((r).pte) |
bf680d51 | 140 | extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index); |
ab537dca AK |
141 | /* |
142 | * Trick: we set __end to va + 64k, which happens works for | |
3c726f8d BH |
143 | * a 16M page as well as we want only one iteration |
144 | */ | |
5524a27d AK |
145 | #define pte_iterate_hashed_subpages(rpte, psize, vpn, index, shift) \ |
146 | do { \ | |
147 | unsigned long __end = vpn + (1UL << (PAGE_SHIFT - VPN_SHIFT)); \ | |
148 | unsigned __split = (psize == MMU_PAGE_4K || \ | |
149 | psize == MMU_PAGE_64K_AP); \ | |
150 | shift = mmu_psize_defs[psize].shift; \ | |
151 | for (index = 0; vpn < __end; index++, \ | |
152 | vpn += (1L << (shift - VPN_SHIFT))) { \ | |
f405b510 | 153 | if (!__split || __rpte_sub_valid(rpte, index)) |
3c726f8d | 154 | |
f405b510 | 155 | #define pte_iterate_hashed_end() } } while(0) |
3c726f8d | 156 | |
16c2d476 | 157 | #define pte_pagesize_index(mm, addr, pte) \ |
945537df | 158 | (((pte) & H_PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K) |
3c726f8d | 159 | |
96270b1f AK |
160 | extern int remap_pfn_range(struct vm_area_struct *, unsigned long addr, |
161 | unsigned long pfn, unsigned long size, pgprot_t); | |
6cc1a0ee AK |
162 | static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr, |
163 | unsigned long pfn, pgprot_t prot) | |
96270b1f AK |
164 | { |
165 | if (pfn > (PTE_RPN_MASK >> PAGE_SHIFT)) { | |
166 | WARN(1, "remap_4k_pfn called with wrong pfn value\n"); | |
167 | return -EINVAL; | |
168 | } | |
169 | return remap_pfn_range(vma, addr, pfn, PAGE_SIZE, | |
945537df | 170 | __pgprot(pgprot_val(prot) | H_PAGE_4K_PFN)); |
96270b1f | 171 | } |
721151d0 | 172 | |
dd1842a2 | 173 | #define H_PTE_TABLE_SIZE PTE_FRAG_SIZE |
4a7aa4fe | 174 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE) |
dd1842a2 AK |
175 | #define H_PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \ |
176 | (sizeof(unsigned long) << PMD_INDEX_SIZE)) | |
62607bc6 | 177 | #else |
dd1842a2 | 178 | #define H_PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) |
62607bc6 | 179 | #endif |
fae22116 AK |
180 | #ifdef CONFIG_HUGETLB_PAGE |
181 | #define H_PUD_TABLE_SIZE ((sizeof(pud_t) << PUD_INDEX_SIZE) + \ | |
182 | (sizeof(unsigned long) << PUD_INDEX_SIZE)) | |
183 | #else | |
dd1842a2 | 184 | #define H_PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) |
fae22116 | 185 | #endif |
dd1842a2 | 186 | #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) |
ab537dca | 187 | |
e34aa03c | 188 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
e34aa03c AK |
189 | static inline char *get_hpte_slot_array(pmd_t *pmdp) |
190 | { | |
191 | /* | |
192 | * The hpte hindex is stored in the pgtable whose address is in the | |
193 | * second half of the PMD | |
194 | * | |
195 | * Order this load with the test for pmd_trans_huge in the caller | |
196 | */ | |
197 | smp_rmb(); | |
198 | return *(char **)(pmdp + PTRS_PER_PMD); | |
199 | ||
200 | ||
201 | } | |
202 | /* | |
203 | * The linux hugepage PMD now include the pmd entries followed by the address | |
204 | * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits. | |
849f86a6 | 205 | * [ 000 | 1 bit secondary | 3 bit hidx | 1 bit valid]. We use one byte per |
e34aa03c AK |
206 | * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and |
207 | * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t. | |
208 | * | |
849f86a6 | 209 | * The top three bits are intentionally left as zero. This memory location |
e34aa03c AK |
210 | * are also used as normal page PTE pointers. So if we have any pointers |
211 | * left around while we collapse a hugepage, we need to make sure | |
212 | * _PAGE_PRESENT bit of that is zero when we look at them | |
213 | */ | |
214 | static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index) | |
215 | { | |
849f86a6 | 216 | return hpte_slot_array[index] & 0x1; |
e34aa03c AK |
217 | } |
218 | ||
219 | static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array, | |
220 | int index) | |
221 | { | |
849f86a6 | 222 | return hpte_slot_array[index] >> 1; |
e34aa03c AK |
223 | } |
224 | ||
225 | static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, | |
226 | unsigned int index, unsigned int hidx) | |
227 | { | |
849f86a6 | 228 | hpte_slot_array[index] = (hidx << 1) | 0x1; |
e34aa03c AK |
229 | } |
230 | ||
231 | /* | |
232 | * | |
233 | * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs | |
234 | * page. The hugetlbfs page table walking and mangling paths are totally | |
235 | * separated form the core VM paths and they're differentiated by | |
236 | * VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run. | |
237 | * | |
238 | * pmd_trans_huge() is defined as false at build time if | |
239 | * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build | |
240 | * time in such case. | |
241 | * | |
242 | * For ppc64 we need to differntiate from explicit hugepages from THP, because | |
243 | * for THP we also track the subpage details at the pmd level. We don't do | |
244 | * that for explicit huge pages. | |
245 | * | |
246 | */ | |
6cc1a0ee | 247 | static inline int hash__pmd_trans_huge(pmd_t pmd) |
e34aa03c | 248 | { |
95a7901a | 249 | return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE | _PAGE_DEVMAP)) == |
945537df | 250 | (_PAGE_PTE | H_PAGE_THP_HUGE)); |
e34aa03c AK |
251 | } |
252 | ||
6cc1a0ee | 253 | static inline int hash__pmd_same(pmd_t pmd_a, pmd_t pmd_b) |
e34aa03c | 254 | { |
ee3caed3 | 255 | return (((pmd_raw(pmd_a) ^ pmd_raw(pmd_b)) & ~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0); |
e34aa03c AK |
256 | } |
257 | ||
3df33f12 AK |
258 | static inline pmd_t hash__pmd_mkhuge(pmd_t pmd) |
259 | { | |
260 | return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE)); | |
261 | } | |
262 | ||
263 | extern unsigned long hash__pmd_hugepage_update(struct mm_struct *mm, | |
264 | unsigned long addr, pmd_t *pmdp, | |
265 | unsigned long clr, unsigned long set); | |
266 | extern pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, | |
267 | unsigned long address, pmd_t *pmdp); | |
268 | extern void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, | |
269 | pgtable_t pgtable); | |
270 | extern pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); | |
3df33f12 AK |
271 | extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, |
272 | unsigned long addr, pmd_t *pmdp); | |
273 | extern int hash__has_transparent_hugepage(void); | |
e34aa03c | 274 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
95a7901a AK |
275 | |
276 | static inline pmd_t hash__pmd_mkdevmap(pmd_t pmd) | |
277 | { | |
278 | return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE | _PAGE_DEVMAP)); | |
279 | } | |
280 | ||
c605782b | 281 | #endif /* __ASSEMBLY__ */ |
ab537dca AK |
282 | |
283 | #endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */ |