]>
Commit | Line | Data |
---|---|---|
dd873966 | 1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ |
c1fcf220 DDAG |
2 | /* |
3 | * include/linux/userfaultfd.h | |
4 | * | |
5 | * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> | |
6 | * Copyright (C) 2015 Red Hat, Inc. | |
7 | * | |
8 | */ | |
9 | ||
10 | #ifndef _LINUX_USERFAULTFD_H | |
11 | #define _LINUX_USERFAULTFD_H | |
12 | ||
13 | #include <linux/types.h> | |
14 | ||
c1fcf220 | 15 | /* |
3a5eb5b4 PB |
16 | * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and |
17 | * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In | |
18 | * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ | |
19 | * means the userland is reading). | |
c1fcf220 | 20 | */ |
3a5eb5b4 | 21 | #define UFFD_API ((__u64)0xAA) |
278f064e EH |
22 | #define UFFD_API_REGISTER_MODES (UFFDIO_REGISTER_MODE_MISSING | \ |
23 | UFFDIO_REGISTER_MODE_WP | \ | |
24 | UFFDIO_REGISTER_MODE_MINOR) | |
dc6f8d45 CH |
25 | #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ |
26 | UFFD_FEATURE_EVENT_FORK | \ | |
3a5eb5b4 | 27 | UFFD_FEATURE_EVENT_REMAP | \ |
278f064e | 28 | UFFD_FEATURE_EVENT_REMOVE | \ |
74c98e20 | 29 | UFFD_FEATURE_EVENT_UNMAP | \ |
3a5eb5b4 | 30 | UFFD_FEATURE_MISSING_HUGETLBFS | \ |
d4083f50 AP |
31 | UFFD_FEATURE_MISSING_SHMEM | \ |
32 | UFFD_FEATURE_SIGBUS | \ | |
278f064e | 33 | UFFD_FEATURE_THREAD_ID | \ |
327d4b7f | 34 | UFFD_FEATURE_MINOR_HUGETLBFS | \ |
e4082063 | 35 | UFFD_FEATURE_MINOR_SHMEM | \ |
d525f73f CQ |
36 | UFFD_FEATURE_EXACT_ADDRESS | \ |
37 | UFFD_FEATURE_WP_HUGETLBFS_SHMEM) | |
c1fcf220 DDAG |
38 | #define UFFD_API_IOCTLS \ |
39 | ((__u64)1 << _UFFDIO_REGISTER | \ | |
40 | (__u64)1 << _UFFDIO_UNREGISTER | \ | |
41 | (__u64)1 << _UFFDIO_API) | |
42 | #define UFFD_API_RANGE_IOCTLS \ | |
43 | ((__u64)1 << _UFFDIO_WAKE | \ | |
44 | (__u64)1 << _UFFDIO_COPY | \ | |
dc6f8d45 | 45 | (__u64)1 << _UFFDIO_ZEROPAGE | \ |
278f064e EH |
46 | (__u64)1 << _UFFDIO_WRITEPROTECT | \ |
47 | (__u64)1 << _UFFDIO_CONTINUE) | |
3a5eb5b4 PB |
48 | #define UFFD_API_RANGE_IOCTLS_BASIC \ |
49 | ((__u64)1 << _UFFDIO_WAKE | \ | |
278f064e | 50 | (__u64)1 << _UFFDIO_COPY | \ |
d525f73f CQ |
51 | (__u64)1 << _UFFDIO_CONTINUE | \ |
52 | (__u64)1 << _UFFDIO_WRITEPROTECT) | |
c1fcf220 DDAG |
53 | |
54 | /* | |
55 | * Valid ioctl command number range with this API is from 0x00 to | |
56 | * 0x3F. UFFDIO_API is the fixed number, everything else can be | |
57 | * changed by implementing a different UFFD_API. If sticking to the | |
58 | * same UFFD_API more ioctl can be added and userland will be aware of | |
59 | * which ioctl the running kernel implements through the ioctl command | |
60 | * bitmask written by the UFFDIO_API. | |
61 | */ | |
62 | #define _UFFDIO_REGISTER (0x00) | |
63 | #define _UFFDIO_UNREGISTER (0x01) | |
64 | #define _UFFDIO_WAKE (0x02) | |
65 | #define _UFFDIO_COPY (0x03) | |
66 | #define _UFFDIO_ZEROPAGE (0x04) | |
dc6f8d45 | 67 | #define _UFFDIO_WRITEPROTECT (0x06) |
278f064e | 68 | #define _UFFDIO_CONTINUE (0x07) |
c1fcf220 DDAG |
69 | #define _UFFDIO_API (0x3F) |
70 | ||
71 | /* userfaultfd ioctl ids */ | |
72 | #define UFFDIO 0xAA | |
73 | #define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \ | |
74 | struct uffdio_api) | |
75 | #define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \ | |
76 | struct uffdio_register) | |
77 | #define UFFDIO_UNREGISTER _IOR(UFFDIO, _UFFDIO_UNREGISTER, \ | |
78 | struct uffdio_range) | |
79 | #define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \ | |
80 | struct uffdio_range) | |
81 | #define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \ | |
82 | struct uffdio_copy) | |
83 | #define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \ | |
84 | struct uffdio_zeropage) | |
dc6f8d45 CH |
85 | #define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ |
86 | struct uffdio_writeprotect) | |
327d4b7f BR |
87 | #define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \ |
88 | struct uffdio_continue) | |
c1fcf220 DDAG |
89 | |
90 | /* read() structure */ | |
91 | struct uffd_msg { | |
92 | __u8 event; | |
93 | ||
94 | __u8 reserved1; | |
95 | __u16 reserved2; | |
96 | __u32 reserved3; | |
97 | ||
98 | union { | |
99 | struct { | |
100 | __u64 flags; | |
101 | __u64 address; | |
d4083f50 AP |
102 | union { |
103 | __u32 ptid; | |
104 | } feat; | |
c1fcf220 DDAG |
105 | } pagefault; |
106 | ||
3a5eb5b4 PB |
107 | struct { |
108 | __u32 ufd; | |
109 | } fork; | |
110 | ||
111 | struct { | |
112 | __u64 from; | |
113 | __u64 to; | |
114 | __u64 len; | |
115 | } remap; | |
116 | ||
117 | struct { | |
118 | __u64 start; | |
119 | __u64 end; | |
74c98e20 | 120 | } remove; |
3a5eb5b4 | 121 | |
c1fcf220 DDAG |
122 | struct { |
123 | /* unused reserved fields */ | |
124 | __u64 reserved1; | |
125 | __u64 reserved2; | |
126 | __u64 reserved3; | |
127 | } reserved; | |
128 | } arg; | |
b89485a5 | 129 | } __attribute__((packed)); |
c1fcf220 DDAG |
130 | |
131 | /* | |
132 | * Start at 0x12 and not at 0 to be more strict against bugs. | |
133 | */ | |
134 | #define UFFD_EVENT_PAGEFAULT 0x12 | |
c1fcf220 | 135 | #define UFFD_EVENT_FORK 0x13 |
3a5eb5b4 | 136 | #define UFFD_EVENT_REMAP 0x14 |
74c98e20 CH |
137 | #define UFFD_EVENT_REMOVE 0x15 |
138 | #define UFFD_EVENT_UNMAP 0x16 | |
c1fcf220 DDAG |
139 | |
140 | /* flags for UFFD_EVENT_PAGEFAULT */ | |
141 | #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ | |
142 | #define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */ | |
278f064e | 143 | #define UFFD_PAGEFAULT_FLAG_MINOR (1<<2) /* If reason is VM_UFFD_MINOR */ |
c1fcf220 DDAG |
144 | |
145 | struct uffdio_api { | |
146 | /* userland asks for an API number and the features to enable */ | |
147 | __u64 api; | |
148 | /* | |
149 | * Kernel answers below with the all available features for | |
150 | * the API, this notifies userland of which events and/or | |
151 | * which flags for each event are enabled in the current | |
152 | * kernel. | |
153 | * | |
154 | * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE | |
155 | * are to be considered implicitly always enabled in all kernels as | |
156 | * long as the uffdio_api.api requested matches UFFD_API. | |
3a5eb5b4 PB |
157 | * |
158 | * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER | |
159 | * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on | |
160 | * hugetlbfs virtual memory ranges. Adding or not adding | |
161 | * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has | |
162 | * no real functional effect after UFFDIO_API returns, but | |
163 | * it's only useful for an initial feature set probe at | |
164 | * UFFDIO_API time. There are two ways to use it: | |
165 | * | |
166 | * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the | |
167 | * uffdio_api.features before calling UFFDIO_API, an error | |
168 | * will be returned by UFFDIO_API on a kernel without | |
169 | * hugetlbfs missing support | |
170 | * | |
171 | * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in | |
172 | * uffdio_api.features and instead it will be set by the | |
173 | * kernel in the uffdio_api.features if the kernel supports | |
174 | * it, so userland can later check if the feature flag is | |
175 | * present in uffdio_api.features after UFFDIO_API | |
176 | * succeeded. | |
177 | * | |
178 | * UFFD_FEATURE_MISSING_SHMEM works the same as | |
179 | * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem | |
180 | * (i.e. tmpfs and other shmem based APIs). | |
d4083f50 AP |
181 | * |
182 | * UFFD_FEATURE_SIGBUS feature means no page-fault | |
183 | * (UFFD_EVENT_PAGEFAULT) event will be delivered, instead | |
184 | * a SIGBUS signal will be sent to the faulting process. | |
185 | * | |
186 | * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will | |
187 | * be returned, if feature is not requested 0 will be returned. | |
278f064e EH |
188 | * |
189 | * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults | |
190 | * can be intercepted (via REGISTER_MODE_MINOR) for | |
191 | * hugetlbfs-backed pages. | |
327d4b7f BR |
192 | * |
193 | * UFFD_FEATURE_MINOR_SHMEM indicates the same support as | |
194 | * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead. | |
e4082063 AW |
195 | * |
196 | * UFFD_FEATURE_EXACT_ADDRESS indicates that the exact address of page | |
197 | * faults would be provided and the offset within the page would not be | |
198 | * masked. | |
d525f73f CQ |
199 | * |
200 | * UFFD_FEATURE_WP_HUGETLBFS_SHMEM indicates that userfaultfd | |
201 | * write-protection mode is supported on both shmem and hugetlbfs. | |
c1fcf220 | 202 | */ |
c1fcf220 DDAG |
203 | #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) |
204 | #define UFFD_FEATURE_EVENT_FORK (1<<1) | |
3a5eb5b4 | 205 | #define UFFD_FEATURE_EVENT_REMAP (1<<2) |
74c98e20 | 206 | #define UFFD_FEATURE_EVENT_REMOVE (1<<3) |
3a5eb5b4 PB |
207 | #define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) |
208 | #define UFFD_FEATURE_MISSING_SHMEM (1<<5) | |
74c98e20 | 209 | #define UFFD_FEATURE_EVENT_UNMAP (1<<6) |
d4083f50 AP |
210 | #define UFFD_FEATURE_SIGBUS (1<<7) |
211 | #define UFFD_FEATURE_THREAD_ID (1<<8) | |
278f064e | 212 | #define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) |
327d4b7f | 213 | #define UFFD_FEATURE_MINOR_SHMEM (1<<10) |
e4082063 | 214 | #define UFFD_FEATURE_EXACT_ADDRESS (1<<11) |
d525f73f | 215 | #define UFFD_FEATURE_WP_HUGETLBFS_SHMEM (1<<12) |
c1fcf220 DDAG |
216 | __u64 features; |
217 | ||
218 | __u64 ioctls; | |
219 | }; | |
220 | ||
221 | struct uffdio_range { | |
222 | __u64 start; | |
223 | __u64 len; | |
224 | }; | |
225 | ||
226 | struct uffdio_register { | |
227 | struct uffdio_range range; | |
228 | #define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0) | |
229 | #define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1) | |
278f064e | 230 | #define UFFDIO_REGISTER_MODE_MINOR ((__u64)1<<2) |
c1fcf220 DDAG |
231 | __u64 mode; |
232 | ||
233 | /* | |
234 | * kernel answers which ioctl commands are available for the | |
235 | * range, keep at the end as the last 8 bytes aren't read. | |
236 | */ | |
237 | __u64 ioctls; | |
238 | }; | |
239 | ||
240 | struct uffdio_copy { | |
241 | __u64 dst; | |
242 | __u64 src; | |
243 | __u64 len; | |
dc6f8d45 | 244 | #define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0) |
c1fcf220 | 245 | /* |
dc6f8d45 CH |
246 | * UFFDIO_COPY_MODE_WP will map the page write protected on |
247 | * the fly. UFFDIO_COPY_MODE_WP is available only if the | |
248 | * write protected ioctl is implemented for the range | |
249 | * according to the uffdio_register.ioctls. | |
c1fcf220 | 250 | */ |
dc6f8d45 | 251 | #define UFFDIO_COPY_MODE_WP ((__u64)1<<1) |
c1fcf220 DDAG |
252 | __u64 mode; |
253 | ||
254 | /* | |
255 | * "copy" is written by the ioctl and must be at the end: the | |
256 | * copy_from_user will not read the last 8 bytes. | |
257 | */ | |
258 | __s64 copy; | |
259 | }; | |
260 | ||
261 | struct uffdio_zeropage { | |
262 | struct uffdio_range range; | |
263 | #define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0) | |
264 | __u64 mode; | |
265 | ||
266 | /* | |
267 | * "zeropage" is written by the ioctl and must be at the end: | |
268 | * the copy_from_user will not read the last 8 bytes. | |
269 | */ | |
270 | __s64 zeropage; | |
271 | }; | |
272 | ||
dc6f8d45 CH |
273 | struct uffdio_writeprotect { |
274 | struct uffdio_range range; | |
275 | /* | |
276 | * UFFDIO_WRITEPROTECT_MODE_WP: set the flag to write protect a range, | |
277 | * unset the flag to undo protection of a range which was previously | |
278 | * write protected. | |
279 | * | |
280 | * UFFDIO_WRITEPROTECT_MODE_DONTWAKE: set the flag to avoid waking up | |
281 | * any wait thread after the operation succeeds. | |
282 | * | |
283 | * NOTE: Write protecting a region (WP=1) is unrelated to page faults, | |
284 | * therefore DONTWAKE flag is meaningless with WP=1. Removing write | |
285 | * protection (WP=0) in response to a page fault wakes the faulting | |
286 | * task unless DONTWAKE is set. | |
287 | */ | |
288 | #define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0) | |
289 | #define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1) | |
290 | __u64 mode; | |
291 | }; | |
292 | ||
278f064e EH |
293 | struct uffdio_continue { |
294 | struct uffdio_range range; | |
295 | #define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0) | |
296 | __u64 mode; | |
297 | ||
298 | /* | |
299 | * Fields below here are written by the ioctl and must be at the end: | |
300 | * the copy_from_user will not read past here. | |
301 | */ | |
302 | __s64 mapped; | |
303 | }; | |
304 | ||
b3c818a4 EF |
305 | /* |
306 | * Flags for the userfaultfd(2) system call itself. | |
307 | */ | |
308 | ||
309 | /* | |
310 | * Create a userfaultfd that can handle page faults only in user mode. | |
311 | */ | |
312 | #define UFFD_USER_MODE_ONLY 1 | |
313 | ||
c1fcf220 | 314 | #endif /* _LINUX_USERFAULTFD_H */ |