]>
Commit | Line | Data |
---|---|---|
dd873966 | 1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ |
c1fcf220 DDAG |
2 | /* |
3 | * include/linux/userfaultfd.h | |
4 | * | |
5 | * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> | |
6 | * Copyright (C) 2015 Red Hat, Inc. | |
7 | * | |
8 | */ | |
9 | ||
10 | #ifndef _LINUX_USERFAULTFD_H | |
11 | #define _LINUX_USERFAULTFD_H | |
12 | ||
13 | #include <linux/types.h> | |
14 | ||
93e0932b PX |
15 | /* ioctls for /dev/userfaultfd */ |
16 | #define USERFAULTFD_IOC 0xAA | |
17 | #define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00) | |
18 | ||
c1fcf220 | 19 | /* |
3a5eb5b4 PB |
20 | * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and |
21 | * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In | |
22 | * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ | |
23 | * means the userland is reading). | |
c1fcf220 | 24 | */ |
3a5eb5b4 | 25 | #define UFFD_API ((__u64)0xAA) |
278f064e EH |
26 | #define UFFD_API_REGISTER_MODES (UFFDIO_REGISTER_MODE_MISSING | \ |
27 | UFFDIO_REGISTER_MODE_WP | \ | |
28 | UFFDIO_REGISTER_MODE_MINOR) | |
dc6f8d45 CH |
29 | #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ |
30 | UFFD_FEATURE_EVENT_FORK | \ | |
3a5eb5b4 | 31 | UFFD_FEATURE_EVENT_REMAP | \ |
278f064e | 32 | UFFD_FEATURE_EVENT_REMOVE | \ |
74c98e20 | 33 | UFFD_FEATURE_EVENT_UNMAP | \ |
3a5eb5b4 | 34 | UFFD_FEATURE_MISSING_HUGETLBFS | \ |
d4083f50 AP |
35 | UFFD_FEATURE_MISSING_SHMEM | \ |
36 | UFFD_FEATURE_SIGBUS | \ | |
278f064e | 37 | UFFD_FEATURE_THREAD_ID | \ |
327d4b7f | 38 | UFFD_FEATURE_MINOR_HUGETLBFS | \ |
e4082063 | 39 | UFFD_FEATURE_MINOR_SHMEM | \ |
d525f73f | 40 | UFFD_FEATURE_EXACT_ADDRESS | \ |
d0bf492f | 41 | UFFD_FEATURE_WP_HUGETLBFS_SHMEM | \ |
da3c22c7 TH |
42 | UFFD_FEATURE_WP_UNPOPULATED | \ |
43 | UFFD_FEATURE_POISON) | |
c1fcf220 DDAG |
44 | #define UFFD_API_IOCTLS \ |
45 | ((__u64)1 << _UFFDIO_REGISTER | \ | |
46 | (__u64)1 << _UFFDIO_UNREGISTER | \ | |
47 | (__u64)1 << _UFFDIO_API) | |
48 | #define UFFD_API_RANGE_IOCTLS \ | |
49 | ((__u64)1 << _UFFDIO_WAKE | \ | |
50 | (__u64)1 << _UFFDIO_COPY | \ | |
dc6f8d45 | 51 | (__u64)1 << _UFFDIO_ZEROPAGE | \ |
278f064e | 52 | (__u64)1 << _UFFDIO_WRITEPROTECT | \ |
da3c22c7 TH |
53 | (__u64)1 << _UFFDIO_CONTINUE | \ |
54 | (__u64)1 << _UFFDIO_POISON) | |
3a5eb5b4 PB |
55 | #define UFFD_API_RANGE_IOCTLS_BASIC \ |
56 | ((__u64)1 << _UFFDIO_WAKE | \ | |
278f064e | 57 | (__u64)1 << _UFFDIO_COPY | \ |
da3c22c7 | 58 | (__u64)1 << _UFFDIO_WRITEPROTECT | \ |
d525f73f | 59 | (__u64)1 << _UFFDIO_CONTINUE | \ |
da3c22c7 | 60 | (__u64)1 << _UFFDIO_POISON) |
c1fcf220 DDAG |
61 | |
62 | /* | |
63 | * Valid ioctl command number range with this API is from 0x00 to | |
64 | * 0x3F. UFFDIO_API is the fixed number, everything else can be | |
65 | * changed by implementing a different UFFD_API. If sticking to the | |
66 | * same UFFD_API more ioctl can be added and userland will be aware of | |
67 | * which ioctl the running kernel implements through the ioctl command | |
68 | * bitmask written by the UFFDIO_API. | |
69 | */ | |
70 | #define _UFFDIO_REGISTER (0x00) | |
71 | #define _UFFDIO_UNREGISTER (0x01) | |
72 | #define _UFFDIO_WAKE (0x02) | |
73 | #define _UFFDIO_COPY (0x03) | |
74 | #define _UFFDIO_ZEROPAGE (0x04) | |
dc6f8d45 | 75 | #define _UFFDIO_WRITEPROTECT (0x06) |
278f064e | 76 | #define _UFFDIO_CONTINUE (0x07) |
da3c22c7 | 77 | #define _UFFDIO_POISON (0x08) |
c1fcf220 DDAG |
78 | #define _UFFDIO_API (0x3F) |
79 | ||
80 | /* userfaultfd ioctl ids */ | |
81 | #define UFFDIO 0xAA | |
82 | #define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \ | |
83 | struct uffdio_api) | |
84 | #define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \ | |
85 | struct uffdio_register) | |
86 | #define UFFDIO_UNREGISTER _IOR(UFFDIO, _UFFDIO_UNREGISTER, \ | |
87 | struct uffdio_range) | |
88 | #define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \ | |
89 | struct uffdio_range) | |
90 | #define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \ | |
91 | struct uffdio_copy) | |
92 | #define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \ | |
93 | struct uffdio_zeropage) | |
dc6f8d45 CH |
94 | #define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ |
95 | struct uffdio_writeprotect) | |
327d4b7f BR |
96 | #define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \ |
97 | struct uffdio_continue) | |
da3c22c7 TH |
98 | #define UFFDIO_POISON _IOWR(UFFDIO, _UFFDIO_POISON, \ |
99 | struct uffdio_poison) | |
c1fcf220 DDAG |
100 | |
101 | /* read() structure */ | |
102 | struct uffd_msg { | |
103 | __u8 event; | |
104 | ||
105 | __u8 reserved1; | |
106 | __u16 reserved2; | |
107 | __u32 reserved3; | |
108 | ||
109 | union { | |
110 | struct { | |
111 | __u64 flags; | |
112 | __u64 address; | |
d4083f50 AP |
113 | union { |
114 | __u32 ptid; | |
115 | } feat; | |
c1fcf220 DDAG |
116 | } pagefault; |
117 | ||
3a5eb5b4 PB |
118 | struct { |
119 | __u32 ufd; | |
120 | } fork; | |
121 | ||
122 | struct { | |
123 | __u64 from; | |
124 | __u64 to; | |
125 | __u64 len; | |
126 | } remap; | |
127 | ||
128 | struct { | |
129 | __u64 start; | |
130 | __u64 end; | |
74c98e20 | 131 | } remove; |
3a5eb5b4 | 132 | |
c1fcf220 DDAG |
133 | struct { |
134 | /* unused reserved fields */ | |
135 | __u64 reserved1; | |
136 | __u64 reserved2; | |
137 | __u64 reserved3; | |
138 | } reserved; | |
139 | } arg; | |
b89485a5 | 140 | } __attribute__((packed)); |
c1fcf220 DDAG |
141 | |
142 | /* | |
143 | * Start at 0x12 and not at 0 to be more strict against bugs. | |
144 | */ | |
145 | #define UFFD_EVENT_PAGEFAULT 0x12 | |
c1fcf220 | 146 | #define UFFD_EVENT_FORK 0x13 |
3a5eb5b4 | 147 | #define UFFD_EVENT_REMAP 0x14 |
74c98e20 CH |
148 | #define UFFD_EVENT_REMOVE 0x15 |
149 | #define UFFD_EVENT_UNMAP 0x16 | |
c1fcf220 DDAG |
150 | |
151 | /* flags for UFFD_EVENT_PAGEFAULT */ | |
152 | #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ | |
153 | #define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */ | |
278f064e | 154 | #define UFFD_PAGEFAULT_FLAG_MINOR (1<<2) /* If reason is VM_UFFD_MINOR */ |
c1fcf220 DDAG |
155 | |
156 | struct uffdio_api { | |
157 | /* userland asks for an API number and the features to enable */ | |
158 | __u64 api; | |
159 | /* | |
160 | * Kernel answers below with the all available features for | |
161 | * the API, this notifies userland of which events and/or | |
162 | * which flags for each event are enabled in the current | |
163 | * kernel. | |
164 | * | |
165 | * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE | |
166 | * are to be considered implicitly always enabled in all kernels as | |
167 | * long as the uffdio_api.api requested matches UFFD_API. | |
3a5eb5b4 PB |
168 | * |
169 | * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER | |
170 | * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on | |
171 | * hugetlbfs virtual memory ranges. Adding or not adding | |
172 | * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has | |
173 | * no real functional effect after UFFDIO_API returns, but | |
174 | * it's only useful for an initial feature set probe at | |
175 | * UFFDIO_API time. There are two ways to use it: | |
176 | * | |
177 | * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the | |
178 | * uffdio_api.features before calling UFFDIO_API, an error | |
179 | * will be returned by UFFDIO_API on a kernel without | |
180 | * hugetlbfs missing support | |
181 | * | |
182 | * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in | |
183 | * uffdio_api.features and instead it will be set by the | |
184 | * kernel in the uffdio_api.features if the kernel supports | |
185 | * it, so userland can later check if the feature flag is | |
186 | * present in uffdio_api.features after UFFDIO_API | |
187 | * succeeded. | |
188 | * | |
189 | * UFFD_FEATURE_MISSING_SHMEM works the same as | |
190 | * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem | |
191 | * (i.e. tmpfs and other shmem based APIs). | |
d4083f50 AP |
192 | * |
193 | * UFFD_FEATURE_SIGBUS feature means no page-fault | |
194 | * (UFFD_EVENT_PAGEFAULT) event will be delivered, instead | |
195 | * a SIGBUS signal will be sent to the faulting process. | |
196 | * | |
197 | * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will | |
198 | * be returned, if feature is not requested 0 will be returned. | |
278f064e EH |
199 | * |
200 | * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults | |
201 | * can be intercepted (via REGISTER_MODE_MINOR) for | |
202 | * hugetlbfs-backed pages. | |
327d4b7f BR |
203 | * |
204 | * UFFD_FEATURE_MINOR_SHMEM indicates the same support as | |
205 | * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead. | |
e4082063 AW |
206 | * |
207 | * UFFD_FEATURE_EXACT_ADDRESS indicates that the exact address of page | |
208 | * faults would be provided and the offset within the page would not be | |
209 | * masked. | |
d525f73f CQ |
210 | * |
211 | * UFFD_FEATURE_WP_HUGETLBFS_SHMEM indicates that userfaultfd | |
212 | * write-protection mode is supported on both shmem and hugetlbfs. | |
d0bf492f CLG |
213 | * |
214 | * UFFD_FEATURE_WP_UNPOPULATED indicates that userfaultfd | |
215 | * write-protection mode will always apply to unpopulated pages | |
216 | * (i.e. empty ptes). This will be the default behavior for shmem | |
217 | * & hugetlbfs, so this flag only affects anonymous memory behavior | |
218 | * when userfault write-protection mode is registered. | |
c1fcf220 | 219 | */ |
c1fcf220 DDAG |
220 | #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) |
221 | #define UFFD_FEATURE_EVENT_FORK (1<<1) | |
3a5eb5b4 | 222 | #define UFFD_FEATURE_EVENT_REMAP (1<<2) |
74c98e20 | 223 | #define UFFD_FEATURE_EVENT_REMOVE (1<<3) |
3a5eb5b4 PB |
224 | #define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) |
225 | #define UFFD_FEATURE_MISSING_SHMEM (1<<5) | |
74c98e20 | 226 | #define UFFD_FEATURE_EVENT_UNMAP (1<<6) |
d4083f50 AP |
227 | #define UFFD_FEATURE_SIGBUS (1<<7) |
228 | #define UFFD_FEATURE_THREAD_ID (1<<8) | |
278f064e | 229 | #define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) |
327d4b7f | 230 | #define UFFD_FEATURE_MINOR_SHMEM (1<<10) |
e4082063 | 231 | #define UFFD_FEATURE_EXACT_ADDRESS (1<<11) |
d525f73f | 232 | #define UFFD_FEATURE_WP_HUGETLBFS_SHMEM (1<<12) |
d0bf492f | 233 | #define UFFD_FEATURE_WP_UNPOPULATED (1<<13) |
da3c22c7 | 234 | #define UFFD_FEATURE_POISON (1<<14) |
c1fcf220 DDAG |
235 | __u64 features; |
236 | ||
237 | __u64 ioctls; | |
238 | }; | |
239 | ||
240 | struct uffdio_range { | |
241 | __u64 start; | |
242 | __u64 len; | |
243 | }; | |
244 | ||
245 | struct uffdio_register { | |
246 | struct uffdio_range range; | |
247 | #define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0) | |
248 | #define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1) | |
278f064e | 249 | #define UFFDIO_REGISTER_MODE_MINOR ((__u64)1<<2) |
c1fcf220 DDAG |
250 | __u64 mode; |
251 | ||
252 | /* | |
253 | * kernel answers which ioctl commands are available for the | |
254 | * range, keep at the end as the last 8 bytes aren't read. | |
255 | */ | |
256 | __u64 ioctls; | |
257 | }; | |
258 | ||
259 | struct uffdio_copy { | |
260 | __u64 dst; | |
261 | __u64 src; | |
262 | __u64 len; | |
dc6f8d45 | 263 | #define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0) |
c1fcf220 | 264 | /* |
dc6f8d45 CH |
265 | * UFFDIO_COPY_MODE_WP will map the page write protected on |
266 | * the fly. UFFDIO_COPY_MODE_WP is available only if the | |
267 | * write protected ioctl is implemented for the range | |
268 | * according to the uffdio_register.ioctls. | |
c1fcf220 | 269 | */ |
dc6f8d45 | 270 | #define UFFDIO_COPY_MODE_WP ((__u64)1<<1) |
c1fcf220 DDAG |
271 | __u64 mode; |
272 | ||
273 | /* | |
274 | * "copy" is written by the ioctl and must be at the end: the | |
275 | * copy_from_user will not read the last 8 bytes. | |
276 | */ | |
277 | __s64 copy; | |
278 | }; | |
279 | ||
280 | struct uffdio_zeropage { | |
281 | struct uffdio_range range; | |
282 | #define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0) | |
283 | __u64 mode; | |
284 | ||
285 | /* | |
286 | * "zeropage" is written by the ioctl and must be at the end: | |
287 | * the copy_from_user will not read the last 8 bytes. | |
288 | */ | |
289 | __s64 zeropage; | |
290 | }; | |
291 | ||
dc6f8d45 CH |
292 | struct uffdio_writeprotect { |
293 | struct uffdio_range range; | |
294 | /* | |
295 | * UFFDIO_WRITEPROTECT_MODE_WP: set the flag to write protect a range, | |
296 | * unset the flag to undo protection of a range which was previously | |
297 | * write protected. | |
298 | * | |
299 | * UFFDIO_WRITEPROTECT_MODE_DONTWAKE: set the flag to avoid waking up | |
300 | * any wait thread after the operation succeeds. | |
301 | * | |
302 | * NOTE: Write protecting a region (WP=1) is unrelated to page faults, | |
303 | * therefore DONTWAKE flag is meaningless with WP=1. Removing write | |
304 | * protection (WP=0) in response to a page fault wakes the faulting | |
305 | * task unless DONTWAKE is set. | |
306 | */ | |
307 | #define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0) | |
308 | #define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1) | |
309 | __u64 mode; | |
310 | }; | |
311 | ||
278f064e EH |
312 | struct uffdio_continue { |
313 | struct uffdio_range range; | |
314 | #define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0) | |
d0bf492f CLG |
315 | /* |
316 | * UFFDIO_CONTINUE_MODE_WP will map the page write protected on | |
317 | * the fly. UFFDIO_CONTINUE_MODE_WP is available only if the | |
318 | * write protected ioctl is implemented for the range | |
319 | * according to the uffdio_register.ioctls. | |
320 | */ | |
321 | #define UFFDIO_CONTINUE_MODE_WP ((__u64)1<<1) | |
278f064e EH |
322 | __u64 mode; |
323 | ||
324 | /* | |
325 | * Fields below here are written by the ioctl and must be at the end: | |
326 | * the copy_from_user will not read past here. | |
327 | */ | |
328 | __s64 mapped; | |
329 | }; | |
330 | ||
da3c22c7 TH |
331 | struct uffdio_poison { |
332 | struct uffdio_range range; | |
333 | #define UFFDIO_POISON_MODE_DONTWAKE ((__u64)1<<0) | |
334 | __u64 mode; | |
335 | ||
336 | /* | |
337 | * Fields below here are written by the ioctl and must be at the end: | |
338 | * the copy_from_user will not read past here. | |
339 | */ | |
340 | __s64 updated; | |
341 | }; | |
342 | ||
b3c818a4 EF |
343 | /* |
344 | * Flags for the userfaultfd(2) system call itself. | |
345 | */ | |
346 | ||
347 | /* | |
348 | * Create a userfaultfd that can handle page faults only in user mode. | |
349 | */ | |
350 | #define UFFD_USER_MODE_ONLY 1 | |
351 | ||
c1fcf220 | 352 | #endif /* _LINUX_USERFAULTFD_H */ |