]>
Commit | Line | Data |
---|---|---|
97b6084b MK |
1 | .\" Copyright (c) 2016, IBM Corporation. |
2 | .\" Written by Mike Rapoport <rppt@linux.vnet.ibm.com> | |
3 | .\" and Copyright (C) 2016 Michael Kerrisk <mtk.manpages@gmail.com> | |
4 | .\" | |
5 | .\" %%%LICENSE_START(VERBATIM) | |
6 | .\" Permission is granted to make and distribute verbatim copies of this | |
7 | .\" manual provided the copyright notice and this permission notice are | |
8 | .\" preserved on all copies. | |
9 | .\" | |
10 | .\" Permission is granted to copy and distribute modified versions of this | |
11 | .\" manual under the conditions for verbatim copying, provided that the | |
12 | .\" entire resulting derived work is distributed under the terms of a | |
13 | .\" permission notice identical to this one. | |
14 | .\" | |
15 | .\" Since the Linux kernel and libraries are constantly changing, this | |
16 | .\" manual page may be incorrect or out-of-date. The author(s) assume no | |
17 | .\" responsibility for errors or omissions, or for damages resulting from | |
18 | .\" the use of the information contained herein. The author(s) may not | |
19 | .\" have taken the same level of care in the production of this manual, | |
20 | .\" which is licensed free of charge, as they might when working | |
21 | .\" professionally. | |
22 | .\" | |
23 | .\" Formatted or processed versions of this manual, if unaccompanied by | |
24 | .\" the source, must acknowledge the copyright and authors of this work. | |
25 | .\" %%%LICENSE_END | |
26 | .\" | |
27 | .\" | |
9ba01802 | 28 | .TH IOCTL_USERFAULTFD 2 2019-03-06 "Linux" "Linux Programmer's Manual" |
97b6084b | 29 | .SH NAME |
7c4dfcac | 30 | ioctl_userfaultfd \- create a file descriptor for handling page faults in user |
97b6084b MK |
31 | space |
32 | .SH SYNOPSIS | |
33 | .nf | |
34 | .B #include <sys/ioctl.h> | |
dbfe9c70 | 35 | .PP |
97b6084b MK |
36 | .BI "int ioctl(int " fd ", int " cmd ", ...);" |
37 | .fi | |
38 | .SH DESCRIPTION | |
39 | Various | |
40 | .BR ioctl (2) | |
41 | operations can be performed on a userfaultfd object (created by a call to | |
42 | .BR userfaultfd (2)) | |
43 | using calls of the form: | |
a458bc45 MK |
44 | .PP |
45 | .in +4n | |
46 | .EX | |
47 | ioctl(fd, cmd, argp); | |
48 | .EE | |
49 | .in | |
97b6084b MK |
50 | In the above, |
51 | .I fd | |
52 | is a file descriptor referring to a userfaultfd object, | |
53 | .I cmd | |
54 | is one of the commands listed below, and | |
55 | .I argp | |
56 | is a pointer to a data structure that is specific to | |
57 | .IR cmd . | |
efeece04 | 58 | .PP |
fe905357 | 59 | The various |
97b6084b | 60 | .BR ioctl (2) |
fe905357 MK |
61 | operations are described below. |
62 | The | |
f7111396 | 63 | .BR UFFDIO_API , |
fe905357 MK |
64 | .BR UFFDIO_REGISTER , |
65 | and | |
66 | .BR UFFDIO_UNREGISTER | |
67 | operations are used to | |
68 | .I configure | |
69 | userfaultfd behavior. | |
70 | These operations allow the caller to choose what features will be enabled and | |
97b6084b | 71 | what kinds of events will be delivered to the application. |
fe905357 MK |
72 | The remaining operations are |
73 | .IR range | |
74 | operations. | |
e8a846ae | 75 | These operations enable the calling application to resolve page-fault |
a18cc179 | 76 | events. |
637512ed MK |
77 | .\" |
78 | .SS UFFDIO_API | |
1b945386 | 79 | (Since Linux 4.3.) |
97b6084b | 80 | Enable operation of the userfaultfd and perform API handshake. |
efeece04 | 81 | .PP |
97b6084b | 82 | The |
74bd6e93 MK |
83 | .I argp |
84 | argument is a pointer to a | |
85 | .IR uffdio_api | |
86 | structure, defined as: | |
b76974c1 | 87 | .PP |
97b6084b | 88 | .in +4n |
b76974c1 | 89 | .EX |
97b6084b | 90 | struct uffdio_api { |
7cb4a3d6 | 91 | __u64 api; /* Requested API version (input) */ |
fa744079 | 92 | __u64 features; /* Requested features (input/output) */ |
7cb4a3d6 | 93 | __u64 ioctls; /* Available ioctl() operations (output) */ |
97b6084b | 94 | }; |
b76974c1 | 95 | .EE |
97b6084b | 96 | .in |
b76974c1 | 97 | .PP |
97b6084b MK |
98 | The |
99 | .I api | |
100 | field denotes the API version requested by the application. | |
efeece04 | 101 | .PP |
9a3c2194 MK |
102 | The kernel verifies that it can support the requested API version, |
103 | and sets the | |
97b6084b MK |
104 | .I features |
105 | and | |
106 | .I ioctls | |
107 | fields to bit masks representing all the available features and the generic | |
405513d3 | 108 | .BR ioctl (2) |
97b6084b | 109 | operations available. |
efeece04 | 110 | .PP |
30dced33 MR |
111 | For Linux kernel versions before 4.11, the |
112 | .I features | |
113 | field must be initialized to zero before the call to | |
1a053be3 MK |
114 | .IR UFFDIO_API , |
115 | and zero (i.e., no feature bits) is placed in the | |
30dced33 MR |
116 | .I features |
117 | field by the kernel upon return from | |
118 | .BR ioctl (2). | |
efeece04 | 119 | .PP |
30dced33 MR |
120 | Starting from Linux 4.11, the |
121 | .I features | |
1a053be3 | 122 | field can be used to ask whether particular features are supported |
30dced33 MR |
123 | and explicitly enable userfaultfd features that are disabled by default. |
124 | The kernel always reports all the available features in the | |
9a3c2194 MK |
125 | .I features |
126 | field. | |
efeece04 | 127 | .PP |
66487517 MR |
128 | To enable userfaultfd features the application should set |
129 | a bit corresponding to each feature it wants to enable in the | |
130 | .I features | |
131 | field. | |
132 | If the kernel supports all the requested features it will enable them. | |
133 | Otherwise it will zero out the returned | |
134 | .I uffdio_api | |
135 | structure and return | |
136 | .BR EINVAL . | |
30dced33 | 137 | .\" FIXME add more details about feature negotiation and enablement |
efeece04 | 138 | .PP |
364270d0 | 139 | The following feature bits may be set: |
30dced33 | 140 | .TP |
364270d0 | 141 | .BR UFFD_FEATURE_EVENT_FORK " (since Linux 4.11)" |
66487517 MR |
142 | When this feature is enabled, |
143 | the userfaultfd objects associated with a parent process are duplicated | |
144 | into the child process during | |
145 | .BR fork (2) | |
42630fa9 MK |
146 | and a |
147 | .B UFFD_EVENT_FORK | |
81bb1233 | 148 | event is delivered to the userfaultfd monitor |
30dced33 | 149 | .TP |
364270d0 | 150 | .BR UFFD_FEATURE_EVENT_REMAP " (since Linux 4.11)" |
66487517 MR |
151 | If this feature is enabled, |
152 | when the faulting process invokes | |
42630fa9 | 153 | .BR mremap (2), |
66487517 | 154 | the userfaultfd monitor will receive an event of type |
42630fa9 | 155 | .BR UFFD_EVENT_REMAP . |
30dced33 | 156 | .TP |
364270d0 | 157 | .BR UFFD_FEATURE_EVENT_REMOVE " (since Linux 4.11)" |
66487517 MR |
158 | If this feature is enabled, |
159 | when the faulting process calls | |
42630fa9 MK |
160 | .BR madvise (2) |
161 | with the | |
162 | .B MADV_DONTNEED | |
66487517 | 163 | or |
42630fa9 MK |
164 | .B MADV_REMOVE |
165 | advice value to free a virtual memory area | |
66487517 | 166 | the userfaultfd monitor will receive an event of type |
42630fa9 | 167 | .BR UFFD_EVENT_REMOVE . |
30dced33 | 168 | .TP |
364270d0 | 169 | .BR UFFD_FEATURE_EVENT_UNMAP " (since Linux 4.11)" |
66487517 MR |
170 | If this feature is enabled, |
171 | when the faulting process unmaps virtual memory either explicitly with | |
42630fa9 MK |
172 | .BR munmap (2), |
173 | or implicitly during either | |
66487517 MR |
174 | .BR mmap (2) |
175 | or | |
42630fa9 | 176 | .BR mremap (2). |
66487517 | 177 | the userfaultfd monitor will receive an event of type |
42630fa9 | 178 | .BR UFFD_EVENT_UNMAP . |
30dced33 | 179 | .TP |
364270d0 | 180 | .BR UFFD_FEATURE_MISSING_HUGETLBFS " (since Linux 4.11)" |
66487517 MR |
181 | If this feature bit is set, |
182 | the kernel supports registering userfaultfd ranges on hugetlbfs | |
183 | virtual memory areas | |
30dced33 | 184 | .TP |
364270d0 | 185 | .BR UFFD_FEATURE_MISSING_SHMEM " (since Linux 4.11)" |
66487517 | 186 | If this feature bit is set, |
41e9e648 MR |
187 | the kernel supports registering userfaultfd ranges on shared memory areas. |
188 | This includes all kernel shared memory APIs: | |
189 | System V shared memory, | |
5191c688 MK |
190 | .BR tmpfs (5), |
191 | shared mappings of | |
192 | .IR /dev/zero , | |
193 | .BR mmap (2) | |
194 | with the | |
f1008f5f | 195 | .B MAP_SHARED |
41e9e648 MR |
196 | flag set, |
197 | .BR memfd_create (2), | |
5191c688 | 198 | and so on. |
2c371032 | 199 | .TP |
364270d0 | 200 | .BR UFFD_FEATURE_SIGBUS " (since Linux 4.14)" |
1df2779d | 201 | .\" commit 2d6d6f5a09a96cc1fec7ed992b825e05f64cb50e |
1df2779d MK |
202 | If this feature bit is set, no page-fault events |
203 | .RB ( UFFD_EVENT_PAGEFAULT ) | |
204 | will be delivered. | |
205 | Instead, a | |
2c371032 | 206 | .B SIGBUS |
1df2779d MK |
207 | signal will be sent to the faulting process. |
208 | Applications using this | |
2c371032 PS |
209 | feature will not require the use of a userfaultfd monitor for processing |
210 | memory accesses to the regions registered with userfaultfd. | |
1df2779d | 211 | .PP |
9a3c2194 MK |
212 | The returned |
213 | .I ioctls | |
214 | field can contain the following bits: | |
215 | .\" FIXME This user-space API seems not fully polished. Why are there | |
53b95540 | 216 | .\" not constants defined for each of the bit-mask values listed below? |
9a3c2194 MK |
217 | .TP |
218 | .B 1 << _UFFDIO_API | |
219 | The | |
220 | .B UFFDIO_API | |
221 | operation is supported. | |
222 | .TP | |
223 | .B 1 << _UFFDIO_REGISTER | |
224 | The | |
225 | .B UFFDIO_REGISTER | |
226 | operation is supported. | |
227 | .TP | |
228 | .B 1 << _UFFDIO_UNREGISTER | |
229 | The | |
230 | .B UFFDIO_UNREGISTER | |
231 | operation is supported. | |
9a3c2194 | 232 | .PP |
97b6084b MK |
233 | This |
234 | .BR ioctl (2) | |
235 | operation returns 0 on success. | |
236 | On error, \-1 is returned and | |
237 | .I errno | |
238 | is set to indicate the cause of the error. | |
239 | Possible errors include: | |
cd8389ff MK |
240 | .TP |
241 | .B EFAULT | |
242 | .I argp | |
243 | refers to an address that is outside the calling process's | |
244 | accessible address space. | |
97b6084b MK |
245 | .TP |
246 | .B EINVAL | |
ffbc7efc MK |
247 | The userfaultfd has already been enabled by a previous |
248 | .BR UFFDIO_API | |
249 | operation. | |
250 | .TP | |
251 | .B EINVAL | |
9a3c2194 MK |
252 | The API version requested in the |
253 | .I api | |
254 | field is not supported by this kernel, or the | |
255 | .I features | |
66487517 MR |
256 | field passed to the kernel includes feature bits that are not supported |
257 | by the current kernel version. | |
ef443f07 | 258 | .\" FIXME In the above error case, the returned 'uffdio_api' structure is |
53b95540 | 259 | .\" zeroed out. Why is this done? This should be explained in the manual page. |
637512ed | 260 | .\" |
1c63cefd MK |
261 | .\" Mike Rapoport: |
262 | .\" In my understanding the uffdio_api | |
263 | .\" structure is zeroed to allow the caller | |
264 | .\" to distinguish the reasons for -EINVAL. | |
265 | .\" | |
637512ed | 266 | .SS UFFDIO_REGISTER |
1b945386 | 267 | (Since Linux 4.3.) |
97b6084b | 268 | Register a memory address range with the userfaultfd object. |
d87f377e | 269 | The pages in the range must be "compatible". |
efeece04 | 270 | .PP |
ba92d8b4 | 271 | Up to Linux kernel 4.11, |
d87f377e MK |
272 | only private anonymous ranges are compatible for registering with |
273 | .BR UFFDIO_REGISTER . | |
efeece04 | 274 | .PP |
ba92d8b4 MR |
275 | Since Linux 4.11, |
276 | hugetlbfs and shared memory ranges are also compatible with | |
277 | .BR UFFDIO_REGISTER . | |
efeece04 | 278 | .PP |
97b6084b | 279 | The |
74bd6e93 MK |
280 | .I argp |
281 | argument is a pointer to a | |
97b6084b | 282 | .I uffdio_register |
74bd6e93 | 283 | structure, defined as: |
b76974c1 | 284 | .PP |
97b6084b | 285 | .in +4n |
b76974c1 | 286 | .EX |
97b6084b | 287 | struct uffdio_range { |
7cb4a3d6 | 288 | __u64 start; /* Start of range */ |
b38f1fa2 | 289 | __u64 len; /* Length of range (bytes) */ |
97b6084b MK |
290 | }; |
291 | ||
292 | struct uffdio_register { | |
293 | struct uffdio_range range; | |
7cb4a3d6 MK |
294 | __u64 mode; /* Desired mode of operation (input) */ |
295 | __u64 ioctls; /* Available ioctl() operations (output) */ | |
97b6084b | 296 | }; |
b76974c1 | 297 | .EE |
97b6084b | 298 | .in |
efeece04 | 299 | .PP |
97b6084b MK |
300 | The |
301 | .I range | |
302 | field defines a memory range starting at | |
303 | .I start | |
304 | and continuing for | |
305 | .I len | |
306 | bytes that should be handled by the userfaultfd. | |
efeece04 | 307 | .PP |
97b6084b MK |
308 | The |
309 | .I mode | |
310 | field defines the mode of operation desired for this memory region. | |
311 | The following values may be bitwise ORed to set the userfaultfd mode for | |
312 | the specified range: | |
97b6084b MK |
313 | .TP |
314 | .B UFFDIO_REGISTER_MODE_MISSING | |
a23c93d8 | 315 | Track page faults on missing pages. |
97b6084b MK |
316 | .TP |
317 | .B UFFDIO_REGISTER_MODE_WP | |
318 | Track page faults on write-protected pages. | |
52b794f1 | 319 | .PP |
97b6084b MK |
320 | Currently, the only supported mode is |
321 | .BR UFFDIO_REGISTER_MODE_MISSING . | |
637512ed | 322 | .PP |
264a0a30 | 323 | If the operation is successful, the kernel modifies the |
97b6084b | 324 | .I ioctls |
264a0a30 MK |
325 | bit-mask field to indicate which |
326 | .BR ioctl (2) | |
327 | operations are available for the specified range. | |
328 | This returned bit mask is as for | |
329 | .BR UFFDIO_API . | |
efeece04 | 330 | .PP |
264a0a30 MK |
331 | This |
332 | .BR ioctl (2) | |
333 | operation returns 0 on success. | |
334 | On error, \-1 is returned and | |
335 | .I errno | |
336 | is set to indicate the cause of the error. | |
337 | Possible errors include: | |
338 | .\" FIXME Is the following error list correct? | |
339 | .\" | |
340 | .TP | |
341 | .B EBUSY | |
342 | A mapping in the specified range is registered with another | |
343 | userfaultfd object. | |
344 | .TP | |
cd8389ff MK |
345 | .B EFAULT |
346 | .I argp | |
347 | refers to an address that is outside the calling process's | |
348 | accessible address space. | |
349 | .TP | |
264a0a30 MK |
350 | .B EINVAL |
351 | An invalid or unsupported bit was specified in the | |
352 | .I mode | |
353 | field; or the | |
354 | .I mode | |
355 | field was zero. | |
356 | .TP | |
357 | .B EINVAL | |
358 | There is no mapping in the specified address range. | |
359 | .TP | |
360 | .B EINVAL | |
242c2b6b MK |
361 | .I range.start |
362 | or | |
363 | .I range.len | |
364 | is not a multiple of the system page size; or, | |
365 | .I range.len | |
366 | is zero; or these fields are otherwise invalid. | |
367 | .TP | |
368 | .B EINVAL | |
264a0a30 | 369 | There as an incompatible mapping in the specified address range. |
a440cc33 MK |
370 | .\" Mike Rapoport: |
371 | .\" ENOMEM if the process is exiting and the | |
99cf1681 | 372 | .\" mm_struct has gone by the time userfault grabs it. |
637512ed | 373 | .SS UFFDIO_UNREGISTER |
1b945386 | 374 | (Since Linux 4.3.) |
97b6084b | 375 | Unregister a memory address range from userfaultfd. |
1c63cefd MK |
376 | The pages in the range must be "compatible" (see the description of |
377 | .BR UFFDIO_REGISTER .) | |
efeece04 | 378 | .PP |
97b6084b MK |
379 | The address range to unregister is specified in the |
380 | .IR uffdio_range | |
381 | structure pointed to by | |
382 | .IR argp . | |
efeece04 | 383 | .PP |
97b6084b MK |
384 | This |
385 | .BR ioctl (2) | |
386 | operation returns 0 on success. | |
387 | On error, \-1 is returned and | |
388 | .I errno | |
389 | is set to indicate the cause of the error. | |
390 | Possible errors include: | |
97b6084b MK |
391 | .TP |
392 | .B EINVAL | |
393 | Either the | |
394 | .I start | |
395 | or the | |
396 | .I len | |
397 | field of the | |
398 | .I ufdio_range | |
242c2b6b MK |
399 | structure was not a multiple of the system page size; or the |
400 | .I len | |
401 | field was zero; or these fields were otherwise invalid. | |
97b6084b MK |
402 | .TP |
403 | .B EINVAL | |
404 | There as an incompatible mapping in the specified address range. | |
405 | .TP | |
406 | .B EINVAL | |
407 | There was no mapping in the specified address range. | |
637512ed MK |
408 | .\" |
409 | .SS UFFDIO_COPY | |
1b945386 | 410 | (Since Linux 4.3.) |
97b6084b MK |
411 | Atomically copy a continuous memory chunk into the userfault registered |
412 | range and optionally wake up the blocked thread. | |
413 | The source and destination addresses and the number of bytes to copy are | |
414 | specified by the | |
415 | .IR src ", " dst ", and " len | |
74bd6e93 MK |
416 | fields of the |
417 | .I uffdio_copy | |
418 | structure pointed to by | |
419 | .IR argp : | |
efeece04 | 420 | .PP |
97b6084b | 421 | .in +4n |
b76974c1 | 422 | .EX |
97b6084b | 423 | struct uffdio_copy { |
03ba66f3 YJW |
424 | __u64 dst; /* Destination of copy */ |
425 | __u64 src; /* Source of copy */ | |
7cb4a3d6 MK |
426 | __u64 len; /* Number of bytes to copy */ |
427 | __u64 mode; /* Flags controlling behavior of copy */ | |
428 | __s64 copy; /* Number of bytes copied, or negated error */ | |
97b6084b | 429 | }; |
b76974c1 | 430 | .EE |
97b6084b | 431 | .in |
637512ed | 432 | .PP |
1dca8e3d | 433 | The following value may be bitwise ORed in |
97b6084b MK |
434 | .IR mode |
435 | to change the behavior of the | |
436 | .B UFFDIO_COPY | |
437 | operation: | |
97b6084b MK |
438 | .TP |
439 | .B UFFDIO_COPY_MODE_DONTWAKE | |
e8a846ae | 440 | Do not wake up the thread that waits for page-fault resolution |
637512ed | 441 | .PP |
97b6084b MK |
442 | The |
443 | .I copy | |
1dca8e3d MK |
444 | field is used by the kernel to return the number of bytes |
445 | that was actually copied, or an error (a negated | |
446 | .IR errno -style | |
447 | value). | |
53b95540 MK |
448 | .\" FIXME Above: Why is the 'copy' field used to return error values? |
449 | .\" This should be explained in the manual page. | |
1dca8e3d MK |
450 | If the value returned in |
451 | .I copy | |
452 | doesn't match the value that was specified in | |
453 | .IR len , | |
454 | the operation fails with the error | |
455 | .BR EAGAIN . | |
456 | The | |
457 | .I copy | |
458 | field is output-only; | |
97b6084b MK |
459 | it is not read by the |
460 | .B UFFDIO_COPY | |
461 | operation. | |
efeece04 | 462 | .PP |
1dca8e3d MK |
463 | This |
464 | .BR ioctl (2) | |
465 | operation returns 0 on success. | |
466 | In this case, the entire area was copied. | |
467 | On error, \-1 is returned and | |
468 | .I errno | |
469 | is set to indicate the cause of the error. | |
470 | Possible errors include: | |
471 | .TP | |
472 | .B EAGAIN | |
473 | The number of bytes copied (i.e., the value returned in the | |
474 | .I copy | |
475 | field) | |
476 | does not equal the value that was specified in the | |
477 | .I len | |
478 | field. | |
479 | .TP | |
480 | .B EINVAL | |
481 | Either | |
482 | .I dst | |
483 | or | |
484 | .I len | |
485 | was not a multiple of the system page size, or the range specified by | |
486 | .IR src | |
487 | and | |
488 | .IR len | |
489 | or | |
490 | .IR dst | |
491 | and | |
492 | .IR len | |
493 | was invalid. | |
494 | .TP | |
495 | .B EINVAL | |
496 | An invalid bit was specified in the | |
497 | .IR mode | |
498 | field. | |
337405ef | 499 | .TP |
5191c688 | 500 | .BR ENOENT " (since Linux 4.11)" |
337405ef | 501 | The faulting process has changed |
5191c688 | 502 | its virtual memory layout simultaneously with an outstanding |
337405ef MR |
503 | .I UFFDIO_COPY |
504 | operation. | |
505 | .TP | |
9dc44855 | 506 | .BR ENOSPC " (from Linux 4.11 until Linux 4.13)" |
e3ec1293 MR |
507 | The faulting process has exited at the time of a |
508 | .I UFFDIO_COPY | |
509 | operation. | |
510 | .TP | |
511 | .BR ESRCH " (since Linux 4.13)" | |
5191c688 | 512 | The faulting process has exited at the time of a |
337405ef MR |
513 | .I UFFDIO_COPY |
514 | operation. | |
97b6084b | 515 | .\" |
611ec3a3 | 516 | .SS UFFDIO_ZEROPAGE |
1b945386 | 517 | (Since Linux 4.3.) |
611ec3a3 | 518 | Zero out a memory range registered with userfaultfd. |
efeece04 | 519 | .PP |
97b6084b MK |
520 | The requested range is specified by the |
521 | .I range | |
522 | field of the | |
523 | .I uffdio_zeropage | |
74bd6e93 MK |
524 | structure pointed to by |
525 | .IR argp : | |
efeece04 | 526 | .PP |
97b6084b | 527 | .in +4n |
b8302363 | 528 | .EX |
97b6084b MK |
529 | struct uffdio_zeropage { |
530 | struct uffdio_range range; | |
7cb4a3d6 MK |
531 | __u64 mode; /* Flags controlling behavior of copy */ |
532 | __s64 zeropage; /* Number of bytes zeroed, or negated error */ | |
97b6084b | 533 | }; |
b8302363 | 534 | .EE |
97b6084b | 535 | .in |
637512ed | 536 | .PP |
611ec3a3 | 537 | The following value may be bitwise ORed in |
97b6084b | 538 | .IR mode |
405513d3 | 539 | to change the behavior of the |
99de80c5 | 540 | .B UFFDIO_ZEROPAGE |
97b6084b | 541 | operation: |
97b6084b MK |
542 | .TP |
543 | .B UFFDIO_ZEROPAGE_MODE_DONTWAKE | |
544 | Do not wake up the thread that waits for page-fault resolution. | |
637512ed | 545 | .PP |
97b6084b MK |
546 | The |
547 | .I zeropage | |
611ec3a3 | 548 | field is used by the kernel to return the number of bytes |
97b6084b MK |
549 | that was actually zeroed, |
550 | or an error in the same manner as | |
611ec3a3 MK |
551 | .BR UFFDIO_COPY . |
552 | .\" FIXME Why is the 'zeropage' field used to return error values? | |
53b95540 | 553 | .\" This should be explained in the manual page. |
611ec3a3 MK |
554 | If the value returned in the |
555 | .I zeropage | |
556 | field doesn't match the value that was specified in | |
557 | .IR range.len , | |
558 | the operation fails with the error | |
559 | .BR EAGAIN . | |
560 | The | |
561 | .I zeropage | |
562 | field is output-only; | |
563 | it is not read by the | |
99de80c5 | 564 | .B UFFDIO_ZEROPAGE |
611ec3a3 | 565 | operation. |
efeece04 | 566 | .PP |
611ec3a3 MK |
567 | This |
568 | .BR ioctl (2) | |
569 | operation returns 0 on success. | |
570 | In this case, the entire area was zeroed. | |
571 | On error, \-1 is returned and | |
572 | .I errno | |
573 | is set to indicate the cause of the error. | |
574 | Possible errors include: | |
575 | .TP | |
576 | .B EAGAIN | |
577 | The number of bytes zeroed (i.e., the value returned in the | |
578 | .I zeropage | |
579 | field) | |
580 | does not equal the value that was specified in the | |
581 | .I range.len | |
582 | field. | |
583 | .TP | |
584 | .B EINVAL | |
585 | Either | |
586 | .I range.start | |
587 | or | |
588 | .I range.len | |
242c2b6b MK |
589 | was not a multiple of the system page size; or |
590 | .I range.len | |
591 | was zero; or the range specified was invalid. | |
611ec3a3 MK |
592 | .TP |
593 | .B EINVAL | |
594 | An invalid bit was specified in the | |
595 | .IR mode | |
596 | field. | |
e3ec1293 MR |
597 | .TP |
598 | .BR ESRCH " (since Linux 4.13)" | |
599 | The faulting process has exited at the time of a | |
600 | .I UFFDIO_ZEROPAGE | |
601 | operation. | |
97b6084b | 602 | .\" |
637512ed | 603 | .SS UFFDIO_WAKE |
1b945386 | 604 | (Since Linux 4.3.) |
66e11c1f MK |
605 | Wake up the thread waiting for page-fault resolution on |
606 | a specified memory address range. | |
efeece04 | 607 | .PP |
be7979d7 MK |
608 | The |
609 | .B UFFDIO_WAKE | |
610 | operation is used in conjunction with | |
611 | .BR UFFDIO_COPY | |
612 | and | |
613 | .BR UFFDIO_ZEROPAGE | |
614 | operations that have the | |
615 | .BR UFFDIO_COPY_MODE_DONTWAKE | |
616 | or | |
617 | .BR UFFDIO_ZEROPAGE_MODE_DONTWAKE | |
618 | bit set in the | |
619 | .I mode | |
620 | field. | |
621 | The userfault monitor can perform several | |
622 | .BR UFFDIO_COPY | |
623 | and | |
624 | .BR UFFDIO_ZEROPAGE | |
625 | operations in a batch and then explicitly wake up the faulting thread using | |
626 | .BR UFFDIO_WAKE . | |
efeece04 | 627 | .PP |
74bd6e93 MK |
628 | The |
629 | .I argp | |
630 | argument is a pointer to a | |
631 | .I uffdio_range | |
66e11c1f | 632 | structure (shown above) that specifies the address range. |
efeece04 | 633 | .PP |
97b6084b MK |
634 | This |
635 | .BR ioctl (2) | |
636 | operation returns 0 on success. | |
637 | On error, \-1 is returned and | |
638 | .I errno | |
639 | is set to indicate the cause of the error. | |
640 | Possible errors include: | |
97b6084b MK |
641 | .TP |
642 | .B EINVAL | |
242c2b6b | 643 | The |
97b6084b MK |
644 | .I start |
645 | or the | |
646 | .I len | |
647 | field of the | |
648 | .I ufdio_range | |
242c2b6b MK |
649 | structure was not a multiple of the system page size; or |
650 | .I len | |
651 | was zero; or the specified range was otherwise invalid. | |
97b6084b MK |
652 | .SH RETURN VALUE |
653 | See descriptions of the individual operations, above. | |
654 | .SH ERRORS | |
655 | See descriptions of the individual operations, above. | |
24fa87ff MK |
656 | In addition, the following general errors can occur for all of the |
657 | operations described above: | |
658 | .TP | |
659 | .B EFAULT | |
660 | .I argp | |
661 | does not point to a valid memory address. | |
662 | .TP | |
663 | .B EINVAL | |
664 | (For all operations except | |
665 | .BR UFFDIO_API .) | |
666 | The userfaultfd object has not yet been enabled (via the | |
667 | .BR UFFDIO_API | |
668 | operation). | |
97b6084b MK |
669 | .SH CONFORMING TO |
670 | These | |
671 | .BR ioctl (2) | |
405513d3 | 672 | operations are Linux-specific. |
53004b3b MR |
673 | .SH BUGS |
674 | In order to detect available userfault features and | |
5191c688 MK |
675 | enable some subset of those features |
676 | the userfaultfd file descriptor must be closed after the first | |
53004b3b | 677 | .BR UFFDIO_API |
5191c688 | 678 | operation that queries features availability and reopened before |
53004b3b MR |
679 | the second |
680 | .BR UFFDIO_API | |
5191c688 | 681 | operation that actually enables the desired features. |
e5efbe91 MK |
682 | .SH EXAMPLE |
683 | See | |
684 | .BR userfaultfd (2). | |
97b6084b MK |
685 | .SH SEE ALSO |
686 | .BR ioctl (2), | |
687 | .BR mmap (2), | |
688 | .BR userfaultfd (2) | |
efeece04 | 689 | .PP |
a2463bae | 690 | .IR Documentation/admin-guide/mm/userfaultfd.rst |
97b6084b | 691 | in the Linux kernel source tree |