]>
Commit | Line | Data |
---|---|---|
3fe0778e JS |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright(c) 2016-20 Intel Corporation. */ | |
3 | ||
4 | #include <linux/lockdep.h> | |
5 | #include <linux/mm.h> | |
6 | #include <linux/mman.h> | |
7 | #include <linux/shmem_fs.h> | |
8 | #include <linux/suspend.h> | |
9 | #include <linux/sched/mm.h> | |
8ca52cc3 | 10 | #include <asm/sgx.h> |
3fe0778e JS |
11 | #include "encl.h" |
12 | #include "encls.h" | |
13 | #include "sgx.h" | |
14 | ||
af117837 RC |
15 | #define PCMDS_PER_PAGE (PAGE_SIZE / sizeof(struct sgx_pcmd)) |
16 | /* | |
17 | * 32 PCMD entries share a PCMD page. PCMD_FIRST_MASK is used to | |
18 | * determine the page index associated with the first PCMD entry | |
19 | * within a PCMD page. | |
20 | */ | |
21 | #define PCMD_FIRST_MASK GENMASK(4, 0) | |
22 | ||
23 | /** | |
24 | * reclaimer_writing_to_pcmd() - Query if any enclave page associated with | |
25 | * a PCMD page is in process of being reclaimed. | |
26 | * @encl: Enclave to which PCMD page belongs | |
27 | * @start_addr: Address of enclave page using first entry within the PCMD page | |
28 | * | |
29 | * When an enclave page is reclaimed some Paging Crypto MetaData (PCMD) is | |
30 | * stored. The PCMD data of a reclaimed enclave page contains enough | |
31 | * information for the processor to verify the page at the time | |
32 | * it is loaded back into the Enclave Page Cache (EPC). | |
33 | * | |
34 | * The backing storage to which enclave pages are reclaimed is laid out as | |
35 | * follows: | |
36 | * Encrypted enclave pages:SECS page:PCMD pages | |
37 | * | |
38 | * Each PCMD page contains the PCMD metadata of | |
39 | * PAGE_SIZE/sizeof(struct sgx_pcmd) enclave pages. | |
40 | * | |
41 | * A PCMD page can only be truncated if it is (a) empty, and (b) not in the | |
42 | * process of getting data (and thus soon being non-empty). (b) is tested with | |
43 | * a check if an enclave page sharing the PCMD page is in the process of being | |
44 | * reclaimed. | |
45 | * | |
46 | * The reclaimer sets the SGX_ENCL_PAGE_BEING_RECLAIMED flag when it | |
47 | * intends to reclaim that enclave page - it means that the PCMD page | |
48 | * associated with that enclave page is about to get some data and thus | |
49 | * even if the PCMD page is empty, it should not be truncated. | |
50 | * | |
51 | * Context: Enclave mutex (&sgx_encl->lock) must be held. | |
52 | * Return: 1 if the reclaimer is about to write to the PCMD page | |
53 | * 0 if the reclaimer has no intention to write to the PCMD page | |
54 | */ | |
55 | static int reclaimer_writing_to_pcmd(struct sgx_encl *encl, | |
56 | unsigned long start_addr) | |
57 | { | |
58 | int reclaimed = 0; | |
59 | int i; | |
60 | ||
61 | /* | |
62 | * PCMD_FIRST_MASK is based on number of PCMD entries within | |
63 | * PCMD page being 32. | |
64 | */ | |
65 | BUILD_BUG_ON(PCMDS_PER_PAGE != 32); | |
66 | ||
67 | for (i = 0; i < PCMDS_PER_PAGE; i++) { | |
68 | struct sgx_encl_page *entry; | |
69 | unsigned long addr; | |
70 | ||
71 | addr = start_addr + i * PAGE_SIZE; | |
72 | ||
73 | /* | |
74 | * Stop when reaching the SECS page - it does not | |
75 | * have a page_array entry and its reclaim is | |
76 | * started and completed with enclave mutex held so | |
77 | * it does not use the SGX_ENCL_PAGE_BEING_RECLAIMED | |
78 | * flag. | |
79 | */ | |
80 | if (addr == encl->base + encl->size) | |
81 | break; | |
82 | ||
83 | entry = xa_load(&encl->page_array, PFN_DOWN(addr)); | |
84 | if (!entry) | |
85 | continue; | |
86 | ||
87 | /* | |
88 | * VA page slot ID uses same bit as the flag so it is important | |
89 | * to ensure that the page is not already in backing store. | |
90 | */ | |
91 | if (entry->epc_page && | |
92 | (entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED)) { | |
93 | reclaimed = 1; | |
94 | break; | |
95 | } | |
96 | } | |
97 | ||
98 | return reclaimed; | |
99 | } | |
100 | ||
08999b24 JS |
101 | /* |
102 | * Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's | |
103 | * follow right after the EPC data in the backing storage. In addition to the | |
104 | * visible enclave pages, there's one extra page slot for SECS, before PCMD | |
105 | * structs. | |
106 | */ | |
107 | static inline pgoff_t sgx_encl_get_backing_page_pcmd_offset(struct sgx_encl *encl, | |
108 | unsigned long page_index) | |
109 | { | |
110 | pgoff_t epc_end_off = encl->size + sizeof(struct sgx_secs); | |
111 | ||
112 | return epc_end_off + page_index * sizeof(struct sgx_pcmd); | |
113 | } | |
114 | ||
115 | /* | |
116 | * Free a page from the backing storage in the given page index. | |
117 | */ | |
118 | static inline void sgx_encl_truncate_backing_page(struct sgx_encl *encl, unsigned long page_index) | |
119 | { | |
120 | struct inode *inode = file_inode(encl->backing); | |
121 | ||
122 | shmem_truncate_range(inode, PFN_PHYS(page_index), PFN_PHYS(page_index) + PAGE_SIZE - 1); | |
123 | } | |
124 | ||
1728ab54 JS |
125 | /* |
126 | * ELDU: Load an EPC page as unblocked. For more info, see "OS Management of EPC | |
127 | * Pages" in the SDM. | |
128 | */ | |
129 | static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, | |
130 | struct sgx_epc_page *epc_page, | |
131 | struct sgx_epc_page *secs_page) | |
132 | { | |
133 | unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK; | |
134 | struct sgx_encl *encl = encl_page->encl; | |
08999b24 | 135 | pgoff_t page_index, page_pcmd_off; |
af117837 | 136 | unsigned long pcmd_first_page; |
1728ab54 JS |
137 | struct sgx_pageinfo pginfo; |
138 | struct sgx_backing b; | |
08999b24 JS |
139 | bool pcmd_page_empty; |
140 | u8 *pcmd_page; | |
1728ab54 JS |
141 | int ret; |
142 | ||
143 | if (secs_page) | |
144 | page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base); | |
145 | else | |
146 | page_index = PFN_DOWN(encl->size); | |
147 | ||
af117837 RC |
148 | /* |
149 | * Address of enclave page using the first entry within the PCMD page. | |
150 | */ | |
151 | pcmd_first_page = PFN_PHYS(page_index & ~PCMD_FIRST_MASK) + encl->base; | |
152 | ||
08999b24 JS |
153 | page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); |
154 | ||
0c9782e2 | 155 | ret = sgx_encl_lookup_backing(encl, page_index, &b); |
1728ab54 JS |
156 | if (ret) |
157 | return ret; | |
158 | ||
159 | pginfo.addr = encl_page->desc & PAGE_MASK; | |
160 | pginfo.contents = (unsigned long)kmap_atomic(b.contents); | |
08999b24 JS |
161 | pcmd_page = kmap_atomic(b.pcmd); |
162 | pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset; | |
1728ab54 JS |
163 | |
164 | if (secs_page) | |
165 | pginfo.secs = (u64)sgx_get_epc_virt_addr(secs_page); | |
166 | else | |
167 | pginfo.secs = 0; | |
168 | ||
169 | ret = __eldu(&pginfo, sgx_get_epc_virt_addr(epc_page), | |
170 | sgx_get_epc_virt_addr(encl_page->va_page->epc_page) + va_offset); | |
171 | if (ret) { | |
172 | if (encls_failed(ret)) | |
173 | ENCLS_WARN(ret, "ELDU"); | |
174 | ||
175 | ret = -EFAULT; | |
176 | } | |
177 | ||
08999b24 | 178 | memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd)); |
2154e1c1 | 179 | set_page_dirty(b.pcmd); |
08999b24 JS |
180 | |
181 | /* | |
182 | * The area for the PCMD in the page was zeroed above. Check if the | |
183 | * whole page is now empty meaning that all PCMD's have been zeroed: | |
184 | */ | |
185 | pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE); | |
186 | ||
187 | kunmap_atomic(pcmd_page); | |
1728ab54 JS |
188 | kunmap_atomic((void *)(unsigned long)pginfo.contents); |
189 | ||
e3a3bbe3 | 190 | get_page(b.pcmd); |
6bd42964 | 191 | sgx_encl_put_backing(&b); |
1728ab54 | 192 | |
08999b24 JS |
193 | sgx_encl_truncate_backing_page(encl, page_index); |
194 | ||
e3a3bbe3 | 195 | if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) { |
08999b24 | 196 | sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off)); |
e3a3bbe3 RC |
197 | pcmd_page = kmap_atomic(b.pcmd); |
198 | if (memchr_inv(pcmd_page, 0, PAGE_SIZE)) | |
199 | pr_warn("PCMD page not empty after truncate.\n"); | |
200 | kunmap_atomic(pcmd_page); | |
201 | } | |
202 | ||
203 | put_page(b.pcmd); | |
08999b24 | 204 | |
1728ab54 JS |
205 | return ret; |
206 | } | |
207 | ||
208 | static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page, | |
209 | struct sgx_epc_page *secs_page) | |
210 | { | |
211 | ||
212 | unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK; | |
213 | struct sgx_encl *encl = encl_page->encl; | |
214 | struct sgx_epc_page *epc_page; | |
215 | int ret; | |
216 | ||
217 | epc_page = sgx_alloc_epc_page(encl_page, false); | |
218 | if (IS_ERR(epc_page)) | |
219 | return epc_page; | |
220 | ||
221 | ret = __sgx_encl_eldu(encl_page, epc_page, secs_page); | |
222 | if (ret) { | |
b0c7459b | 223 | sgx_encl_free_epc_page(epc_page); |
1728ab54 JS |
224 | return ERR_PTR(ret); |
225 | } | |
226 | ||
227 | sgx_free_va_slot(encl_page->va_page, va_offset); | |
228 | list_move(&encl_page->va_page->list, &encl->va_pages); | |
229 | encl_page->desc &= ~SGX_ENCL_PAGE_VA_OFFSET_MASK; | |
230 | encl_page->epc_page = epc_page; | |
231 | ||
232 | return epc_page; | |
233 | } | |
234 | ||
b3fb517d RC |
235 | static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl, |
236 | struct sgx_encl_page *entry) | |
3fe0778e | 237 | { |
1728ab54 | 238 | struct sgx_epc_page *epc_page; |
3fe0778e | 239 | |
3fe0778e | 240 | /* Entry successfully located. */ |
1728ab54 JS |
241 | if (entry->epc_page) { |
242 | if (entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED) | |
243 | return ERR_PTR(-EBUSY); | |
244 | ||
245 | return entry; | |
246 | } | |
247 | ||
248 | if (!(encl->secs.epc_page)) { | |
249 | epc_page = sgx_encl_eldu(&encl->secs, NULL); | |
250 | if (IS_ERR(epc_page)) | |
251 | return ERR_CAST(epc_page); | |
252 | } | |
253 | ||
254 | epc_page = sgx_encl_eldu(entry, encl->secs.epc_page); | |
255 | if (IS_ERR(epc_page)) | |
256 | return ERR_CAST(epc_page); | |
257 | ||
258 | encl->secs_child_cnt++; | |
259 | sgx_mark_page_reclaimable(entry->epc_page); | |
260 | ||
3fe0778e JS |
261 | return entry; |
262 | } | |
263 | ||
b3fb517d RC |
264 | static struct sgx_encl_page *sgx_encl_load_page_in_vma(struct sgx_encl *encl, |
265 | unsigned long addr, | |
266 | unsigned long vm_flags) | |
267 | { | |
268 | unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC); | |
269 | struct sgx_encl_page *entry; | |
270 | ||
271 | entry = xa_load(&encl->page_array, PFN_DOWN(addr)); | |
272 | if (!entry) | |
273 | return ERR_PTR(-EFAULT); | |
274 | ||
275 | /* | |
276 | * Verify that the page has equal or higher build time | |
277 | * permissions than the VMA permissions (i.e. the subset of {VM_READ, | |
278 | * VM_WRITE, VM_EXECUTE} in vma->vm_flags). | |
279 | */ | |
280 | if ((entry->vm_max_prot_bits & vm_prot_bits) != vm_prot_bits) | |
281 | return ERR_PTR(-EFAULT); | |
282 | ||
283 | return __sgx_encl_load_page(encl, entry); | |
284 | } | |
285 | ||
286 | struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl, | |
287 | unsigned long addr) | |
288 | { | |
289 | struct sgx_encl_page *entry; | |
290 | ||
291 | entry = xa_load(&encl->page_array, PFN_DOWN(addr)); | |
292 | if (!entry) | |
293 | return ERR_PTR(-EFAULT); | |
294 | ||
295 | return __sgx_encl_load_page(encl, entry); | |
296 | } | |
297 | ||
5a90d2c3 RC |
298 | /** |
299 | * sgx_encl_eaug_page() - Dynamically add page to initialized enclave | |
300 | * @vma: VMA obtained from fault info from where page is accessed | |
301 | * @encl: enclave accessing the page | |
302 | * @addr: address that triggered the page fault | |
303 | * | |
304 | * When an initialized enclave accesses a page with no backing EPC page | |
305 | * on a SGX2 system then the EPC can be added dynamically via the SGX2 | |
306 | * ENCLS[EAUG] instruction. | |
307 | * | |
308 | * Returns: Appropriate vm_fault_t: VM_FAULT_NOPAGE when PTE was installed | |
309 | * successfully, VM_FAULT_SIGBUS or VM_FAULT_OOM as error otherwise. | |
310 | */ | |
311 | static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma, | |
312 | struct sgx_encl *encl, unsigned long addr) | |
313 | { | |
314 | vm_fault_t vmret = VM_FAULT_SIGBUS; | |
315 | struct sgx_pageinfo pginfo = {0}; | |
316 | struct sgx_encl_page *encl_page; | |
317 | struct sgx_epc_page *epc_page; | |
318 | struct sgx_va_page *va_page; | |
319 | unsigned long phys_addr; | |
320 | u64 secinfo_flags; | |
321 | int ret; | |
322 | ||
323 | if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) | |
324 | return VM_FAULT_SIGBUS; | |
325 | ||
326 | /* | |
327 | * Ignore internal permission checking for dynamically added pages. | |
328 | * They matter only for data added during the pre-initialization | |
329 | * phase. The enclave decides the permissions by the means of | |
330 | * EACCEPT, EACCEPTCOPY and EMODPE. | |
331 | */ | |
332 | secinfo_flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X; | |
333 | encl_page = sgx_encl_page_alloc(encl, addr - encl->base, secinfo_flags); | |
334 | if (IS_ERR(encl_page)) | |
335 | return VM_FAULT_OOM; | |
336 | ||
337 | mutex_lock(&encl->lock); | |
338 | ||
339 | epc_page = sgx_alloc_epc_page(encl_page, false); | |
340 | if (IS_ERR(epc_page)) { | |
341 | if (PTR_ERR(epc_page) == -EBUSY) | |
342 | vmret = VM_FAULT_NOPAGE; | |
343 | goto err_out_unlock; | |
344 | } | |
345 | ||
346 | va_page = sgx_encl_grow(encl, false); | |
81fa6fd1 HH |
347 | if (IS_ERR(va_page)) { |
348 | if (PTR_ERR(va_page) == -EBUSY) | |
349 | vmret = VM_FAULT_NOPAGE; | |
5a90d2c3 | 350 | goto err_out_epc; |
81fa6fd1 | 351 | } |
5a90d2c3 RC |
352 | |
353 | if (va_page) | |
354 | list_add(&va_page->list, &encl->va_pages); | |
355 | ||
356 | ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc), | |
357 | encl_page, GFP_KERNEL); | |
358 | /* | |
359 | * If ret == -EBUSY then page was created in another flow while | |
360 | * running without encl->lock | |
361 | */ | |
362 | if (ret) | |
363 | goto err_out_shrink; | |
364 | ||
365 | pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page); | |
366 | pginfo.addr = encl_page->desc & PAGE_MASK; | |
367 | pginfo.metadata = 0; | |
368 | ||
369 | ret = __eaug(&pginfo, sgx_get_epc_virt_addr(epc_page)); | |
370 | if (ret) | |
371 | goto err_out; | |
372 | ||
373 | encl_page->encl = encl; | |
374 | encl_page->epc_page = epc_page; | |
375 | encl_page->type = SGX_PAGE_TYPE_REG; | |
376 | encl->secs_child_cnt++; | |
377 | ||
378 | sgx_mark_page_reclaimable(encl_page->epc_page); | |
379 | ||
380 | phys_addr = sgx_get_epc_phys_addr(epc_page); | |
381 | /* | |
382 | * Do not undo everything when creating PTE entry fails - next #PF | |
383 | * would find page ready for a PTE. | |
384 | */ | |
385 | vmret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys_addr)); | |
386 | if (vmret != VM_FAULT_NOPAGE) { | |
387 | mutex_unlock(&encl->lock); | |
388 | return VM_FAULT_SIGBUS; | |
389 | } | |
390 | mutex_unlock(&encl->lock); | |
391 | return VM_FAULT_NOPAGE; | |
392 | ||
393 | err_out: | |
394 | xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc)); | |
395 | ||
396 | err_out_shrink: | |
397 | sgx_encl_shrink(encl, va_page); | |
398 | err_out_epc: | |
399 | sgx_encl_free_epc_page(epc_page); | |
400 | err_out_unlock: | |
401 | mutex_unlock(&encl->lock); | |
402 | kfree(encl_page); | |
403 | ||
404 | return vmret; | |
405 | } | |
406 | ||
3fe0778e JS |
407 | static vm_fault_t sgx_vma_fault(struct vm_fault *vmf) |
408 | { | |
409 | unsigned long addr = (unsigned long)vmf->address; | |
410 | struct vm_area_struct *vma = vmf->vma; | |
411 | struct sgx_encl_page *entry; | |
412 | unsigned long phys_addr; | |
413 | struct sgx_encl *encl; | |
414 | vm_fault_t ret; | |
415 | ||
416 | encl = vma->vm_private_data; | |
417 | ||
1728ab54 JS |
418 | /* |
419 | * It's very unlikely but possible that allocating memory for the | |
420 | * mm_list entry of a forked process failed in sgx_vma_open(). When | |
421 | * this happens, vm_private_data is set to NULL. | |
422 | */ | |
423 | if (unlikely(!encl)) | |
424 | return VM_FAULT_SIGBUS; | |
425 | ||
5a90d2c3 RC |
426 | /* |
427 | * The page_array keeps track of all enclave pages, whether they | |
428 | * are swapped out or not. If there is no entry for this page and | |
429 | * the system supports SGX2 then it is possible to dynamically add | |
430 | * a new enclave page. This is only possible for an initialized | |
431 | * enclave that will be checked for right away. | |
432 | */ | |
433 | if (cpu_feature_enabled(X86_FEATURE_SGX2) && | |
434 | (!xa_load(&encl->page_array, PFN_DOWN(addr)))) | |
435 | return sgx_encl_eaug_page(vma, encl, addr); | |
436 | ||
3fe0778e JS |
437 | mutex_lock(&encl->lock); |
438 | ||
b3fb517d | 439 | entry = sgx_encl_load_page_in_vma(encl, addr, vma->vm_flags); |
3fe0778e JS |
440 | if (IS_ERR(entry)) { |
441 | mutex_unlock(&encl->lock); | |
442 | ||
1728ab54 JS |
443 | if (PTR_ERR(entry) == -EBUSY) |
444 | return VM_FAULT_NOPAGE; | |
445 | ||
3fe0778e JS |
446 | return VM_FAULT_SIGBUS; |
447 | } | |
448 | ||
449 | phys_addr = sgx_get_epc_phys_addr(entry->epc_page); | |
450 | ||
451 | ret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys_addr)); | |
452 | if (ret != VM_FAULT_NOPAGE) { | |
453 | mutex_unlock(&encl->lock); | |
454 | ||
455 | return VM_FAULT_SIGBUS; | |
456 | } | |
457 | ||
1728ab54 | 458 | sgx_encl_test_and_clear_young(vma->vm_mm, entry); |
3fe0778e JS |
459 | mutex_unlock(&encl->lock); |
460 | ||
461 | return VM_FAULT_NOPAGE; | |
462 | } | |
463 | ||
1728ab54 JS |
464 | static void sgx_vma_open(struct vm_area_struct *vma) |
465 | { | |
466 | struct sgx_encl *encl = vma->vm_private_data; | |
467 | ||
468 | /* | |
469 | * It's possible but unlikely that vm_private_data is NULL. This can | |
470 | * happen in a grandchild of a process, when sgx_encl_mm_add() had | |
471 | * failed to allocate memory in this callback. | |
472 | */ | |
473 | if (unlikely(!encl)) | |
474 | return; | |
475 | ||
476 | if (sgx_encl_mm_add(encl, vma->vm_mm)) | |
477 | vma->vm_private_data = NULL; | |
478 | } | |
479 | ||
480 | ||
3fe0778e JS |
481 | /** |
482 | * sgx_encl_may_map() - Check if a requested VMA mapping is allowed | |
483 | * @encl: an enclave pointer | |
484 | * @start: lower bound of the address range, inclusive | |
485 | * @end: upper bound of the address range, exclusive | |
486 | * @vm_flags: VMA flags | |
487 | * | |
488 | * Iterate through the enclave pages contained within [@start, @end) to verify | |
489 | * that the permissions requested by a subset of {VM_READ, VM_WRITE, VM_EXEC} | |
490 | * do not contain any permissions that are not contained in the build time | |
491 | * permissions of any of the enclave pages within the given address range. | |
492 | * | |
493 | * An enclave creator must declare the strongest permissions that will be | |
494 | * needed for each enclave page. This ensures that mappings have the identical | |
495 | * or weaker permissions than the earlier declared permissions. | |
496 | * | |
497 | * Return: 0 on success, -EACCES otherwise | |
498 | */ | |
499 | int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start, | |
500 | unsigned long end, unsigned long vm_flags) | |
501 | { | |
502 | unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC); | |
503 | struct sgx_encl_page *page; | |
504 | unsigned long count = 0; | |
505 | int ret = 0; | |
506 | ||
507 | XA_STATE(xas, &encl->page_array, PFN_DOWN(start)); | |
508 | ||
7b013e72 RC |
509 | /* Disallow mapping outside enclave's address range. */ |
510 | if (test_bit(SGX_ENCL_INITIALIZED, &encl->flags) && | |
511 | (start < encl->base || end > encl->base + encl->size)) | |
512 | return -EACCES; | |
513 | ||
3fe0778e JS |
514 | /* |
515 | * Disallow READ_IMPLIES_EXEC tasks as their VMA permissions might | |
516 | * conflict with the enclave page permissions. | |
517 | */ | |
518 | if (current->personality & READ_IMPLIES_EXEC) | |
519 | return -EACCES; | |
520 | ||
521 | mutex_lock(&encl->lock); | |
522 | xas_lock(&xas); | |
523 | xas_for_each(&xas, page, PFN_DOWN(end - 1)) { | |
524 | if (~page->vm_max_prot_bits & vm_prot_bits) { | |
525 | ret = -EACCES; | |
526 | break; | |
527 | } | |
528 | ||
529 | /* Reschedule on every XA_CHECK_SCHED iteration. */ | |
530 | if (!(++count % XA_CHECK_SCHED)) { | |
531 | xas_pause(&xas); | |
532 | xas_unlock(&xas); | |
533 | mutex_unlock(&encl->lock); | |
534 | ||
535 | cond_resched(); | |
536 | ||
537 | mutex_lock(&encl->lock); | |
538 | xas_lock(&xas); | |
539 | } | |
540 | } | |
541 | xas_unlock(&xas); | |
542 | mutex_unlock(&encl->lock); | |
543 | ||
544 | return ret; | |
545 | } | |
546 | ||
547 | static int sgx_vma_mprotect(struct vm_area_struct *vma, unsigned long start, | |
548 | unsigned long end, unsigned long newflags) | |
549 | { | |
550 | return sgx_encl_may_map(vma->vm_private_data, start, end, newflags); | |
551 | } | |
552 | ||
947c6e11 JS |
553 | static int sgx_encl_debug_read(struct sgx_encl *encl, struct sgx_encl_page *page, |
554 | unsigned long addr, void *data) | |
555 | { | |
556 | unsigned long offset = addr & ~PAGE_MASK; | |
557 | int ret; | |
558 | ||
559 | ||
560 | ret = __edbgrd(sgx_get_epc_virt_addr(page->epc_page) + offset, data); | |
561 | if (ret) | |
562 | return -EIO; | |
563 | ||
564 | return 0; | |
565 | } | |
566 | ||
567 | static int sgx_encl_debug_write(struct sgx_encl *encl, struct sgx_encl_page *page, | |
568 | unsigned long addr, void *data) | |
569 | { | |
570 | unsigned long offset = addr & ~PAGE_MASK; | |
571 | int ret; | |
572 | ||
573 | ret = __edbgwr(sgx_get_epc_virt_addr(page->epc_page) + offset, data); | |
574 | if (ret) | |
575 | return -EIO; | |
576 | ||
577 | return 0; | |
578 | } | |
579 | ||
580 | /* | |
581 | * Load an enclave page to EPC if required, and take encl->lock. | |
582 | */ | |
583 | static struct sgx_encl_page *sgx_encl_reserve_page(struct sgx_encl *encl, | |
584 | unsigned long addr, | |
585 | unsigned long vm_flags) | |
586 | { | |
587 | struct sgx_encl_page *entry; | |
588 | ||
589 | for ( ; ; ) { | |
590 | mutex_lock(&encl->lock); | |
591 | ||
b3fb517d | 592 | entry = sgx_encl_load_page_in_vma(encl, addr, vm_flags); |
947c6e11 JS |
593 | if (PTR_ERR(entry) != -EBUSY) |
594 | break; | |
595 | ||
596 | mutex_unlock(&encl->lock); | |
597 | } | |
598 | ||
599 | if (IS_ERR(entry)) | |
600 | mutex_unlock(&encl->lock); | |
601 | ||
602 | return entry; | |
603 | } | |
604 | ||
605 | static int sgx_vma_access(struct vm_area_struct *vma, unsigned long addr, | |
606 | void *buf, int len, int write) | |
607 | { | |
608 | struct sgx_encl *encl = vma->vm_private_data; | |
609 | struct sgx_encl_page *entry = NULL; | |
610 | char data[sizeof(unsigned long)]; | |
611 | unsigned long align; | |
612 | int offset; | |
613 | int cnt; | |
614 | int ret = 0; | |
615 | int i; | |
616 | ||
617 | /* | |
618 | * If process was forked, VMA is still there but vm_private_data is set | |
619 | * to NULL. | |
620 | */ | |
621 | if (!encl) | |
622 | return -EFAULT; | |
623 | ||
624 | if (!test_bit(SGX_ENCL_DEBUG, &encl->flags)) | |
625 | return -EFAULT; | |
626 | ||
627 | for (i = 0; i < len; i += cnt) { | |
628 | entry = sgx_encl_reserve_page(encl, (addr + i) & PAGE_MASK, | |
629 | vma->vm_flags); | |
630 | if (IS_ERR(entry)) { | |
631 | ret = PTR_ERR(entry); | |
632 | break; | |
633 | } | |
634 | ||
635 | align = ALIGN_DOWN(addr + i, sizeof(unsigned long)); | |
636 | offset = (addr + i) & (sizeof(unsigned long) - 1); | |
637 | cnt = sizeof(unsigned long) - offset; | |
638 | cnt = min(cnt, len - i); | |
639 | ||
640 | ret = sgx_encl_debug_read(encl, entry, align, data); | |
641 | if (ret) | |
642 | goto out; | |
643 | ||
644 | if (write) { | |
645 | memcpy(data + offset, buf + i, cnt); | |
646 | ret = sgx_encl_debug_write(encl, entry, align, data); | |
647 | if (ret) | |
648 | goto out; | |
649 | } else { | |
650 | memcpy(buf + i, data + offset, cnt); | |
651 | } | |
652 | ||
653 | out: | |
654 | mutex_unlock(&encl->lock); | |
655 | ||
656 | if (ret) | |
657 | break; | |
658 | } | |
659 | ||
660 | return ret < 0 ? ret : i; | |
661 | } | |
662 | ||
3fe0778e JS |
663 | const struct vm_operations_struct sgx_vm_ops = { |
664 | .fault = sgx_vma_fault, | |
665 | .mprotect = sgx_vma_mprotect, | |
1728ab54 | 666 | .open = sgx_vma_open, |
947c6e11 | 667 | .access = sgx_vma_access, |
1728ab54 JS |
668 | }; |
669 | ||
670 | /** | |
671 | * sgx_encl_release - Destroy an enclave instance | |
1d315639 | 672 | * @ref: address of a kref inside &sgx_encl |
1728ab54 JS |
673 | * |
674 | * Used together with kref_put(). Frees all the resources associated with the | |
675 | * enclave and the instance itself. | |
676 | */ | |
677 | void sgx_encl_release(struct kref *ref) | |
678 | { | |
679 | struct sgx_encl *encl = container_of(ref, struct sgx_encl, refcount); | |
680 | struct sgx_va_page *va_page; | |
681 | struct sgx_encl_page *entry; | |
682 | unsigned long index; | |
683 | ||
684 | xa_for_each(&encl->page_array, index, entry) { | |
685 | if (entry->epc_page) { | |
686 | /* | |
687 | * The page and its radix tree entry cannot be freed | |
688 | * if the page is being held by the reclaimer. | |
689 | */ | |
690 | if (sgx_unmark_page_reclaimable(entry->epc_page)) | |
691 | continue; | |
692 | ||
b0c7459b | 693 | sgx_encl_free_epc_page(entry->epc_page); |
1728ab54 JS |
694 | encl->secs_child_cnt--; |
695 | entry->epc_page = NULL; | |
696 | } | |
697 | ||
698 | kfree(entry); | |
8795359e RC |
699 | /* Invoke scheduler to prevent soft lockups. */ |
700 | cond_resched(); | |
1728ab54 JS |
701 | } |
702 | ||
703 | xa_destroy(&encl->page_array); | |
704 | ||
705 | if (!encl->secs_child_cnt && encl->secs.epc_page) { | |
b0c7459b | 706 | sgx_encl_free_epc_page(encl->secs.epc_page); |
1728ab54 JS |
707 | encl->secs.epc_page = NULL; |
708 | } | |
709 | ||
710 | while (!list_empty(&encl->va_pages)) { | |
711 | va_page = list_first_entry(&encl->va_pages, struct sgx_va_page, | |
712 | list); | |
713 | list_del(&va_page->list); | |
b0c7459b | 714 | sgx_encl_free_epc_page(va_page->epc_page); |
1728ab54 JS |
715 | kfree(va_page); |
716 | } | |
717 | ||
718 | if (encl->backing) | |
719 | fput(encl->backing); | |
720 | ||
721 | cleanup_srcu_struct(&encl->srcu); | |
722 | ||
723 | WARN_ON_ONCE(!list_empty(&encl->mm_list)); | |
724 | ||
725 | /* Detect EPC page leak's. */ | |
726 | WARN_ON_ONCE(encl->secs_child_cnt); | |
727 | WARN_ON_ONCE(encl->secs.epc_page); | |
728 | ||
729 | kfree(encl); | |
730 | } | |
731 | ||
732 | /* | |
733 | * 'mm' is exiting and no longer needs mmu notifications. | |
734 | */ | |
735 | static void sgx_mmu_notifier_release(struct mmu_notifier *mn, | |
736 | struct mm_struct *mm) | |
737 | { | |
738 | struct sgx_encl_mm *encl_mm = container_of(mn, struct sgx_encl_mm, mmu_notifier); | |
739 | struct sgx_encl_mm *tmp = NULL; | |
740 | ||
741 | /* | |
742 | * The enclave itself can remove encl_mm. Note, objects can't be moved | |
743 | * off an RCU protected list, but deletion is ok. | |
744 | */ | |
745 | spin_lock(&encl_mm->encl->mm_lock); | |
746 | list_for_each_entry(tmp, &encl_mm->encl->mm_list, list) { | |
747 | if (tmp == encl_mm) { | |
748 | list_del_rcu(&encl_mm->list); | |
749 | break; | |
750 | } | |
751 | } | |
752 | spin_unlock(&encl_mm->encl->mm_lock); | |
753 | ||
754 | if (tmp == encl_mm) { | |
755 | synchronize_srcu(&encl_mm->encl->srcu); | |
756 | mmu_notifier_put(mn); | |
757 | } | |
758 | } | |
759 | ||
760 | static void sgx_mmu_notifier_free(struct mmu_notifier *mn) | |
761 | { | |
762 | struct sgx_encl_mm *encl_mm = container_of(mn, struct sgx_encl_mm, mmu_notifier); | |
763 | ||
2ade0d60 JS |
764 | /* 'encl_mm' is going away, put encl_mm->encl reference: */ |
765 | kref_put(&encl_mm->encl->refcount, sgx_encl_release); | |
766 | ||
1728ab54 JS |
767 | kfree(encl_mm); |
768 | } | |
769 | ||
770 | static const struct mmu_notifier_ops sgx_mmu_notifier_ops = { | |
771 | .release = sgx_mmu_notifier_release, | |
772 | .free_notifier = sgx_mmu_notifier_free, | |
3fe0778e | 773 | }; |
1728ab54 JS |
774 | |
775 | static struct sgx_encl_mm *sgx_encl_find_mm(struct sgx_encl *encl, | |
776 | struct mm_struct *mm) | |
777 | { | |
778 | struct sgx_encl_mm *encl_mm = NULL; | |
779 | struct sgx_encl_mm *tmp; | |
780 | int idx; | |
781 | ||
782 | idx = srcu_read_lock(&encl->srcu); | |
783 | ||
784 | list_for_each_entry_rcu(tmp, &encl->mm_list, list) { | |
785 | if (tmp->mm == mm) { | |
786 | encl_mm = tmp; | |
787 | break; | |
788 | } | |
789 | } | |
790 | ||
791 | srcu_read_unlock(&encl->srcu, idx); | |
792 | ||
793 | return encl_mm; | |
794 | } | |
795 | ||
796 | int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm) | |
797 | { | |
798 | struct sgx_encl_mm *encl_mm; | |
799 | int ret; | |
800 | ||
801 | /* | |
802 | * Even though a single enclave may be mapped into an mm more than once, | |
803 | * each 'mm' only appears once on encl->mm_list. This is guaranteed by | |
804 | * holding the mm's mmap lock for write before an mm can be added or | |
805 | * remove to an encl->mm_list. | |
806 | */ | |
807 | mmap_assert_write_locked(mm); | |
808 | ||
809 | /* | |
810 | * It's possible that an entry already exists in the mm_list, because it | |
811 | * is removed only on VFS release or process exit. | |
812 | */ | |
813 | if (sgx_encl_find_mm(encl, mm)) | |
814 | return 0; | |
815 | ||
816 | encl_mm = kzalloc(sizeof(*encl_mm), GFP_KERNEL); | |
817 | if (!encl_mm) | |
818 | return -ENOMEM; | |
819 | ||
2ade0d60 JS |
820 | /* Grab a refcount for the encl_mm->encl reference: */ |
821 | kref_get(&encl->refcount); | |
1728ab54 JS |
822 | encl_mm->encl = encl; |
823 | encl_mm->mm = mm; | |
824 | encl_mm->mmu_notifier.ops = &sgx_mmu_notifier_ops; | |
825 | ||
826 | ret = __mmu_notifier_register(&encl_mm->mmu_notifier, mm); | |
827 | if (ret) { | |
828 | kfree(encl_mm); | |
829 | return ret; | |
830 | } | |
831 | ||
832 | spin_lock(&encl->mm_lock); | |
833 | list_add_rcu(&encl_mm->list, &encl->mm_list); | |
f89c2f9b | 834 | /* Pairs with smp_rmb() in sgx_zap_enclave_ptes(). */ |
1728ab54 JS |
835 | smp_wmb(); |
836 | encl->mm_list_version++; | |
837 | spin_unlock(&encl->mm_lock); | |
838 | ||
839 | return 0; | |
840 | } | |
841 | ||
7f391752 | 842 | /** |
bdaa8799 | 843 | * sgx_encl_cpumask() - Query which CPUs might be accessing the enclave |
7f391752 RC |
844 | * @encl: the enclave |
845 | * | |
846 | * Some SGX functions require that no cached linear-to-physical address | |
847 | * mappings are present before they can succeed. For example, ENCLS[EWB] | |
848 | * copies a page from the enclave page cache to regular main memory but | |
849 | * it fails if it cannot ensure that there are no cached | |
850 | * linear-to-physical address mappings referring to the page. | |
851 | * | |
852 | * SGX hardware flushes all cached linear-to-physical mappings on a CPU | |
853 | * when an enclave is exited via ENCLU[EEXIT] or an Asynchronous Enclave | |
854 | * Exit (AEX). Exiting an enclave will thus ensure cached linear-to-physical | |
855 | * address mappings are cleared but coordination with the tracking done within | |
856 | * the SGX hardware is needed to support the SGX functions that depend on this | |
857 | * cache clearing. | |
858 | * | |
859 | * When the ENCLS[ETRACK] function is issued on an enclave the hardware | |
860 | * tracks threads operating inside the enclave at that time. The SGX | |
861 | * hardware tracking require that all the identified threads must have | |
862 | * exited the enclave in order to flush the mappings before a function such | |
863 | * as ENCLS[EWB] will be permitted | |
864 | * | |
865 | * The following flow is used to support SGX functions that require that | |
866 | * no cached linear-to-physical address mappings are present: | |
867 | * 1) Execute ENCLS[ETRACK] to initiate hardware tracking. | |
bdaa8799 | 868 | * 2) Use this function (sgx_encl_cpumask()) to query which CPUs might be |
7f391752 RC |
869 | * accessing the enclave. |
870 | * 3) Send IPI to identified CPUs, kicking them out of the enclave and | |
871 | * thus flushing all locally cached linear-to-physical address mappings. | |
872 | * 4) Execute SGX function. | |
873 | * | |
874 | * Context: It is required to call this function after ENCLS[ETRACK]. | |
875 | * This will ensure that if any new mm appears (racing with | |
876 | * sgx_encl_mm_add()) then the new mm will enter into the | |
877 | * enclave with fresh linear-to-physical address mappings. | |
878 | * | |
879 | * It is required that all IPIs are completed before a new | |
880 | * ENCLS[ETRACK] is issued so be sure to protect steps 1 to 3 | |
881 | * of the above flow with the enclave's mutex. | |
882 | * | |
883 | * Return: cpumask of CPUs that might be accessing @encl | |
884 | */ | |
bdaa8799 | 885 | const cpumask_t *sgx_encl_cpumask(struct sgx_encl *encl) |
7f391752 RC |
886 | { |
887 | cpumask_t *cpumask = &encl->cpumask; | |
888 | struct sgx_encl_mm *encl_mm; | |
889 | int idx; | |
890 | ||
891 | cpumask_clear(cpumask); | |
892 | ||
893 | idx = srcu_read_lock(&encl->srcu); | |
894 | ||
895 | list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) { | |
896 | if (!mmget_not_zero(encl_mm->mm)) | |
897 | continue; | |
898 | ||
899 | cpumask_or(cpumask, cpumask, mm_cpumask(encl_mm->mm)); | |
900 | ||
901 | mmput_async(encl_mm->mm); | |
902 | } | |
903 | ||
904 | srcu_read_unlock(&encl->srcu, idx); | |
905 | ||
906 | return cpumask; | |
907 | } | |
908 | ||
1728ab54 JS |
909 | static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl, |
910 | pgoff_t index) | |
911 | { | |
912 | struct inode *inode = encl->backing->f_path.dentry->d_inode; | |
913 | struct address_space *mapping = inode->i_mapping; | |
914 | gfp_t gfpmask = mapping_gfp_mask(mapping); | |
915 | ||
916 | return shmem_read_mapping_page_gfp(mapping, index, gfpmask); | |
917 | } | |
918 | ||
919 | /** | |
920 | * sgx_encl_get_backing() - Pin the backing storage | |
921 | * @encl: an enclave pointer | |
922 | * @page_index: enclave page index | |
923 | * @backing: data for accessing backing storage for the page | |
924 | * | |
925 | * Pin the backing storage pages for storing the encrypted contents and Paging | |
926 | * Crypto MetaData (PCMD) of an enclave page. | |
927 | * | |
928 | * Return: | |
929 | * 0 on success, | |
930 | * -errno otherwise. | |
931 | */ | |
0c9782e2 | 932 | static int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, |
1728ab54 JS |
933 | struct sgx_backing *backing) |
934 | { | |
08999b24 | 935 | pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); |
1728ab54 JS |
936 | struct page *contents; |
937 | struct page *pcmd; | |
938 | ||
939 | contents = sgx_encl_get_backing_page(encl, page_index); | |
940 | if (IS_ERR(contents)) | |
941 | return PTR_ERR(contents); | |
942 | ||
08999b24 | 943 | pcmd = sgx_encl_get_backing_page(encl, PFN_DOWN(page_pcmd_off)); |
1728ab54 JS |
944 | if (IS_ERR(pcmd)) { |
945 | put_page(contents); | |
946 | return PTR_ERR(pcmd); | |
947 | } | |
948 | ||
1728ab54 JS |
949 | backing->contents = contents; |
950 | backing->pcmd = pcmd; | |
08999b24 | 951 | backing->pcmd_offset = page_pcmd_off & (PAGE_SIZE - 1); |
1728ab54 JS |
952 | |
953 | return 0; | |
954 | } | |
955 | ||
0c9782e2 KCA |
956 | /* |
957 | * When called from ksgxd, returns the mem_cgroup of a struct mm stored | |
958 | * in the enclave's mm_list. When not called from ksgxd, just returns | |
959 | * the mem_cgroup of the current task. | |
960 | */ | |
961 | static struct mem_cgroup *sgx_encl_get_mem_cgroup(struct sgx_encl *encl) | |
962 | { | |
963 | struct mem_cgroup *memcg = NULL; | |
964 | struct sgx_encl_mm *encl_mm; | |
965 | int idx; | |
966 | ||
967 | /* | |
968 | * If called from normal task context, return the mem_cgroup | |
969 | * of the current task's mm. The remainder of the handling is for | |
970 | * ksgxd. | |
971 | */ | |
972 | if (!current_is_ksgxd()) | |
973 | return get_mem_cgroup_from_mm(current->mm); | |
974 | ||
975 | /* | |
976 | * Search the enclave's mm_list to find an mm associated with | |
977 | * this enclave to charge the allocation to. | |
978 | */ | |
979 | idx = srcu_read_lock(&encl->srcu); | |
980 | ||
981 | list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) { | |
982 | if (!mmget_not_zero(encl_mm->mm)) | |
983 | continue; | |
984 | ||
985 | memcg = get_mem_cgroup_from_mm(encl_mm->mm); | |
986 | ||
987 | mmput_async(encl_mm->mm); | |
988 | ||
989 | break; | |
990 | } | |
991 | ||
992 | srcu_read_unlock(&encl->srcu, idx); | |
993 | ||
994 | /* | |
995 | * In the rare case that there isn't an mm associated with | |
996 | * the enclave, set memcg to the current active mem_cgroup. | |
997 | * This will be the root mem_cgroup if there is no active | |
998 | * mem_cgroup. | |
999 | */ | |
1000 | if (!memcg) | |
1001 | return get_mem_cgroup_from_mm(NULL); | |
1002 | ||
1003 | return memcg; | |
1004 | } | |
1005 | ||
1006 | /** | |
1007 | * sgx_encl_alloc_backing() - allocate a new backing storage page | |
1008 | * @encl: an enclave pointer | |
1009 | * @page_index: enclave page index | |
1010 | * @backing: data for accessing backing storage for the page | |
1011 | * | |
1012 | * When called from ksgxd, sets the active memcg from one of the | |
1013 | * mms in the enclave's mm_list prior to any backing page allocation, | |
1014 | * in order to ensure that shmem page allocations are charged to the | |
1015 | * enclave. | |
1016 | * | |
1017 | * Return: | |
1018 | * 0 on success, | |
1019 | * -errno otherwise. | |
1020 | */ | |
1021 | int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index, | |
1022 | struct sgx_backing *backing) | |
1023 | { | |
1024 | struct mem_cgroup *encl_memcg = sgx_encl_get_mem_cgroup(encl); | |
1025 | struct mem_cgroup *memcg = set_active_memcg(encl_memcg); | |
1026 | int ret; | |
1027 | ||
1028 | ret = sgx_encl_get_backing(encl, page_index, backing); | |
1029 | ||
1030 | set_active_memcg(memcg); | |
1031 | mem_cgroup_put(encl_memcg); | |
1032 | ||
1033 | return ret; | |
1034 | } | |
1035 | ||
1036 | /** | |
1037 | * sgx_encl_lookup_backing() - retrieve an existing backing storage page | |
1038 | * @encl: an enclave pointer | |
1039 | * @page_index: enclave page index | |
1040 | * @backing: data for accessing backing storage for the page | |
1041 | * | |
1042 | * Retrieve a backing page for loading data back into an EPC page with ELDU. | |
1043 | * It is the caller's responsibility to ensure that it is appropriate to use | |
1044 | * sgx_encl_lookup_backing() rather than sgx_encl_alloc_backing(). If lookup is | |
1045 | * not used correctly, this will cause an allocation which is not accounted for. | |
1046 | * | |
1047 | * Return: | |
1048 | * 0 on success, | |
1049 | * -errno otherwise. | |
1050 | */ | |
1051 | int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index, | |
1052 | struct sgx_backing *backing) | |
1053 | { | |
1054 | return sgx_encl_get_backing(encl, page_index, backing); | |
1055 | } | |
1056 | ||
1728ab54 JS |
1057 | /** |
1058 | * sgx_encl_put_backing() - Unpin the backing storage | |
1059 | * @backing: data for accessing backing storage for the page | |
1728ab54 | 1060 | */ |
6bd42964 | 1061 | void sgx_encl_put_backing(struct sgx_backing *backing) |
1728ab54 | 1062 | { |
1728ab54 JS |
1063 | put_page(backing->pcmd); |
1064 | put_page(backing->contents); | |
1065 | } | |
1066 | ||
1067 | static int sgx_encl_test_and_clear_young_cb(pte_t *ptep, unsigned long addr, | |
1068 | void *data) | |
1069 | { | |
1070 | pte_t pte; | |
1071 | int ret; | |
1072 | ||
1073 | ret = pte_young(*ptep); | |
1074 | if (ret) { | |
1075 | pte = pte_mkold(*ptep); | |
1076 | set_pte_at((struct mm_struct *)data, addr, ptep, pte); | |
1077 | } | |
1078 | ||
1079 | return ret; | |
1080 | } | |
1081 | ||
1082 | /** | |
1083 | * sgx_encl_test_and_clear_young() - Test and reset the accessed bit | |
1084 | * @mm: mm_struct that is checked | |
1085 | * @page: enclave page to be tested for recent access | |
1086 | * | |
1087 | * Checks the Access (A) bit from the PTE corresponding to the enclave page and | |
1088 | * clears it. | |
1089 | * | |
1090 | * Return: 1 if the page has been recently accessed and 0 if not. | |
1091 | */ | |
1092 | int sgx_encl_test_and_clear_young(struct mm_struct *mm, | |
1093 | struct sgx_encl_page *page) | |
1094 | { | |
1095 | unsigned long addr = page->desc & PAGE_MASK; | |
1096 | struct sgx_encl *encl = page->encl; | |
1097 | struct vm_area_struct *vma; | |
1098 | int ret; | |
1099 | ||
1100 | ret = sgx_encl_find(mm, addr, &vma); | |
1101 | if (ret) | |
1102 | return 0; | |
1103 | ||
1104 | if (encl != vma->vm_private_data) | |
1105 | return 0; | |
1106 | ||
1107 | ret = apply_to_page_range(vma->vm_mm, addr, PAGE_SIZE, | |
1108 | sgx_encl_test_and_clear_young_cb, vma->vm_mm); | |
1109 | if (ret < 0) | |
1110 | return 0; | |
1111 | ||
1112 | return ret; | |
1113 | } | |
1114 | ||
8123073c JS |
1115 | struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl, |
1116 | unsigned long offset, | |
1117 | u64 secinfo_flags) | |
1118 | { | |
1119 | struct sgx_encl_page *encl_page; | |
1120 | unsigned long prot; | |
1121 | ||
1122 | encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL); | |
1123 | if (!encl_page) | |
1124 | return ERR_PTR(-ENOMEM); | |
1125 | ||
1126 | encl_page->desc = encl->base + offset; | |
1127 | encl_page->encl = encl; | |
1128 | ||
1129 | prot = _calc_vm_trans(secinfo_flags, SGX_SECINFO_R, PROT_READ) | | |
1130 | _calc_vm_trans(secinfo_flags, SGX_SECINFO_W, PROT_WRITE) | | |
1131 | _calc_vm_trans(secinfo_flags, SGX_SECINFO_X, PROT_EXEC); | |
1132 | ||
1133 | /* | |
1134 | * TCS pages must always RW set for CPU access while the SECINFO | |
1135 | * permissions are *always* zero - the CPU ignores the user provided | |
1136 | * values and silently overwrites them with zero permissions. | |
1137 | */ | |
1138 | if ((secinfo_flags & SGX_SECINFO_PAGE_TYPE_MASK) == SGX_SECINFO_TCS) | |
1139 | prot |= PROT_READ | PROT_WRITE; | |
1140 | ||
1141 | /* Calculate maximum of the VM flags for the page. */ | |
1142 | encl_page->vm_max_prot_bits = calc_vm_prot_bits(prot, 0); | |
1143 | ||
1144 | return encl_page; | |
1145 | } | |
1146 | ||
f89c2f9b RC |
1147 | /** |
1148 | * sgx_zap_enclave_ptes() - remove PTEs mapping the address from enclave | |
1149 | * @encl: the enclave | |
1150 | * @addr: page aligned pointer to single page for which PTEs will be removed | |
1151 | * | |
1152 | * Multiple VMAs may have an enclave page mapped. Remove the PTE mapping | |
1153 | * @addr from each VMA. Ensure that page fault handler is ready to handle | |
1154 | * new mappings of @addr before calling this function. | |
1155 | */ | |
1156 | void sgx_zap_enclave_ptes(struct sgx_encl *encl, unsigned long addr) | |
1157 | { | |
1158 | unsigned long mm_list_version; | |
1159 | struct sgx_encl_mm *encl_mm; | |
1160 | struct vm_area_struct *vma; | |
1161 | int idx, ret; | |
1162 | ||
1163 | do { | |
1164 | mm_list_version = encl->mm_list_version; | |
1165 | ||
1166 | /* Pairs with smp_wmb() in sgx_encl_mm_add(). */ | |
1167 | smp_rmb(); | |
1168 | ||
1169 | idx = srcu_read_lock(&encl->srcu); | |
1170 | ||
1171 | list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) { | |
1172 | if (!mmget_not_zero(encl_mm->mm)) | |
1173 | continue; | |
1174 | ||
1175 | mmap_read_lock(encl_mm->mm); | |
1176 | ||
1177 | ret = sgx_encl_find(encl_mm->mm, addr, &vma); | |
1178 | if (!ret && encl == vma->vm_private_data) | |
1179 | zap_vma_ptes(vma, addr, PAGE_SIZE); | |
1180 | ||
1181 | mmap_read_unlock(encl_mm->mm); | |
1182 | ||
1183 | mmput_async(encl_mm->mm); | |
1184 | } | |
1185 | ||
1186 | srcu_read_unlock(&encl->srcu, idx); | |
1187 | } while (unlikely(encl->mm_list_version != mm_list_version)); | |
1188 | } | |
1189 | ||
1728ab54 JS |
1190 | /** |
1191 | * sgx_alloc_va_page() - Allocate a Version Array (VA) page | |
a76e7f1f RC |
1192 | * @reclaim: Reclaim EPC pages directly if none available. Enclave |
1193 | * mutex should not be held if this is set. | |
1728ab54 JS |
1194 | * |
1195 | * Allocate a free EPC page and convert it to a Version Array (VA) page. | |
1196 | * | |
1197 | * Return: | |
1198 | * a VA page, | |
1199 | * -errno otherwise | |
1200 | */ | |
a76e7f1f | 1201 | struct sgx_epc_page *sgx_alloc_va_page(bool reclaim) |
1728ab54 JS |
1202 | { |
1203 | struct sgx_epc_page *epc_page; | |
1204 | int ret; | |
1205 | ||
a76e7f1f | 1206 | epc_page = sgx_alloc_epc_page(NULL, reclaim); |
1728ab54 JS |
1207 | if (IS_ERR(epc_page)) |
1208 | return ERR_CAST(epc_page); | |
1209 | ||
1210 | ret = __epa(sgx_get_epc_virt_addr(epc_page)); | |
1211 | if (ret) { | |
1212 | WARN_ONCE(1, "EPA returned %d (0x%x)", ret, ret); | |
b0c7459b | 1213 | sgx_encl_free_epc_page(epc_page); |
1728ab54 JS |
1214 | return ERR_PTR(-EFAULT); |
1215 | } | |
1216 | ||
1217 | return epc_page; | |
1218 | } | |
1219 | ||
1220 | /** | |
1221 | * sgx_alloc_va_slot - allocate a VA slot | |
1222 | * @va_page: a &struct sgx_va_page instance | |
1223 | * | |
1224 | * Allocates a slot from a &struct sgx_va_page instance. | |
1225 | * | |
1226 | * Return: offset of the slot inside the VA page | |
1227 | */ | |
1228 | unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page) | |
1229 | { | |
1230 | int slot = find_first_zero_bit(va_page->slots, SGX_VA_SLOT_COUNT); | |
1231 | ||
1232 | if (slot < SGX_VA_SLOT_COUNT) | |
1233 | set_bit(slot, va_page->slots); | |
1234 | ||
1235 | return slot << 3; | |
1236 | } | |
1237 | ||
1238 | /** | |
1239 | * sgx_free_va_slot - free a VA slot | |
1240 | * @va_page: a &struct sgx_va_page instance | |
1241 | * @offset: offset of the slot inside the VA page | |
1242 | * | |
1243 | * Frees a slot from a &struct sgx_va_page instance. | |
1244 | */ | |
1245 | void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset) | |
1246 | { | |
1247 | clear_bit(offset >> 3, va_page->slots); | |
1248 | } | |
1249 | ||
1250 | /** | |
1251 | * sgx_va_page_full - is the VA page full? | |
1252 | * @va_page: a &struct sgx_va_page instance | |
1253 | * | |
1254 | * Return: true if all slots have been taken | |
1255 | */ | |
1256 | bool sgx_va_page_full(struct sgx_va_page *va_page) | |
1257 | { | |
1258 | int slot = find_first_zero_bit(va_page->slots, SGX_VA_SLOT_COUNT); | |
1259 | ||
1260 | return slot == SGX_VA_SLOT_COUNT; | |
1261 | } | |
b0c7459b KH |
1262 | |
1263 | /** | |
1264 | * sgx_encl_free_epc_page - free an EPC page assigned to an enclave | |
1265 | * @page: EPC page to be freed | |
1266 | * | |
1267 | * Free an EPC page assigned to an enclave. It does EREMOVE for the page, and | |
1268 | * only upon success, it puts the page back to free page list. Otherwise, it | |
1269 | * gives a WARNING to indicate page is leaked. | |
1270 | */ | |
1271 | void sgx_encl_free_epc_page(struct sgx_epc_page *page) | |
1272 | { | |
1273 | int ret; | |
1274 | ||
1275 | WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED); | |
1276 | ||
1277 | ret = __eremove(sgx_get_epc_virt_addr(page)); | |
1278 | if (WARN_ONCE(ret, EREMOVE_ERROR_MESSAGE, ret, ret)) | |
1279 | return; | |
1280 | ||
1281 | sgx_free_epc_page(page); | |
1282 | } |