]> git.ipfire.org Git - thirdparty/man-pages.git/blame - man2/userfaultfd.2
ioctl_userfaultfd.2: Minor fixes to Prakash Sangappa's patch
[thirdparty/man-pages.git] / man2 / userfaultfd.2
CommitLineData
bf9b5158
MR
1.\" Copyright (c) 2016, IBM Corporation.
2.\" Written by Mike Rapoport <rppt@linux.vnet.ibm.com>
54e031a1 3.\" and Copyright (C) 2017 Michael Kerrisk <mtk.manpages@gmail.com>
bf9b5158
MR
4.\"
5.\" %%%LICENSE_START(VERBATIM)
6.\" Permission is granted to make and distribute verbatim copies of this
7.\" manual provided the copyright notice and this permission notice are
8.\" preserved on all copies.
9.\"
10.\" Permission is granted to copy and distribute modified versions of this
11.\" manual under the conditions for verbatim copying, provided that the
12.\" entire resulting derived work is distributed under the terms of a
13.\" permission notice identical to this one.
14.\"
15.\" Since the Linux kernel and libraries are constantly changing, this
16.\" manual page may be incorrect or out-of-date. The author(s) assume no
17.\" responsibility for errors or omissions, or for damages resulting from
18.\" the use of the information contained herein. The author(s) may not
19.\" have taken the same level of care in the production of this manual,
20.\" which is licensed free of charge, as they might when working
21.\" professionally.
22.\"
23.\" Formatted or processed versions of this manual, if unaccompanied by
24.\" the source, must acknowledge the copyright and authors of this work.
25.\" %%%LICENSE_END
26.\"
4b8c67d9 27.TH USERFAULTFD 2 2017-09-15 "Linux" "Linux Programmer's Manual"
bf9b5158 28.SH NAME
68a9e23c 29userfaultfd \- create a file descriptor for handling page faults in user space
bf9b5158
MR
30.SH SYNOPSIS
31.nf
32.B #include <sys/types.h>
3f894561 33.B #include <linux/userfaultfd.h>
68e4db0a 34.PP
bf9b5158
MR
35.BI "int userfaultfd(int " flags );
36.fi
37.PP
38.IR Note :
39There is no glibc wrapper for this system call; see NOTES.
40.SH DESCRIPTION
4aa7f5cf
MK
41.BR userfaultfd ()
42creates a new userfaultfd object that can be used for delegation of page-fault
43handling to a user-space application,
44and returns a file descriptor that refers to the new object.
45The new userfaultfd object is configured using
bf9b5158 46.BR ioctl (2).
efeece04 47.PP
4aa7f5cf 48Once the userfaultfd object is configured, the application can use
bf9b5158
MR
49.BR read (2)
50to receive userfaultfd notifications.
4aa7f5cf
MK
51The reads from userfaultfd may be blocking or non-blocking,
52depending on the value of
bf9b5158
MR
53.I flags
54used for the creation of the userfaultfd or subsequent calls to
4aa7f5cf 55.BR fcntl (2).
efeece04 56.PP
bf9b5158
MR
57The following values may be bitwise ORed in
58.IR flags
59to change the behavior of
60.BR userfaultfd ():
61.TP
62.BR O_CLOEXEC
4aa7f5cf 63Enable the close-on-exec flag for the new userfaultfd file descriptor.
bf9b5158
MR
64See the description of the
65.B O_CLOEXEC
66flag in
4aa7f5cf 67.BR open (2).
bf9b5158
MR
68.TP
69.BR O_NONBLOCK
4aa7f5cf 70Enables non-blocking operation for the userfaultfd object.
bf9b5158
MR
71See the description of the
72.BR O_NONBLOCK
73flag in
74.BR open (2).
7b8695f2
MK
75.PP
76When the last file descriptor referring to a userfaultfd object is closed,
77all memory ranges that were registered with the object are unregistered
5b1c4a1e 78and unread events are flushed.
bf9b5158 79.\"
54e031a1
MK
80.SS Usage
81The userfaultfd mechanism is designed to allow a thread in a multithreaded
82program to perform user-space paging for the other threads in the process.
83When a page fault occurs for one of the regions registered
84to the userfaultfd object,
85the faulting thread is put to sleep and
86an event is generated that can be read via the userfaultfd file descriptor.
87The fault-handling thread reads events from this file descriptor and services
88them using the operations described in
89.BR ioctl_userfaultfd (2).
90When servicing the page fault events,
91the fault-handling thread can trigger a wake-up for the sleeping thread.
efeece04 92.PP
d28aadf6
MR
93It is possible for the faulting threads and the fault-handling threads
94to run in the context of different processes.
95In this case, these threads may belong to different programs,
96and the program that executes the faulting threads
97will not necessarily cooperate with the program that handles the page faults.
98In such non-cooperative mode,
792570de 99the process that monitors userfaultfd and handles page faults
d28aadf6
MR
100needs to be aware of the changes in the virtual memory layout
101of the faulting process to avoid memory corruption.
efeece04 102.PP
5b1c4a1e 103Starting from Linux 4.11,
c2b3668b 104userfaultfd can also notify the fault-handling threads about changes
5b1c4a1e
MR
105in the virtual memory layout of the faulting process.
106In addition, if the faulting process invokes
c2b3668b 107.BR fork (2),
5b1c4a1e
MR
108the userfaultfd objects associated with the parent may be duplicated
109into the child process and the userfaultfd monitor will be notified
522ab2ff
MK
110(via the
111.B UFFD_EVENT_FORK
112described below)
5b1c4a1e
MR
113about the file descriptor associated with the userfault objects
114created for the child process,
522ab2ff 115which allows the userfaultfd monitor to perform user-space paging
5b1c4a1e 116for the child process.
265225c1 117Unlike page faults which have to be synchronous and require an
20050169
MR
118explicit or implicit wakeup,
119all other events are delivered asynchronously and
120the non-cooperative process resumes execution as
265225c1
MK
121soon as the userfaultfd manager executes
122.BR read (2).
123The userfaultfd manager should carefully synchronize calls to
124.B UFFDIO_COPY
125with the processing of events.
efeece04 126.PP
20050169
MR
127The current asynchronous model of the event delivery is optimal for
128single threaded non-cooperative userfaultfd manager implementations.
94a8ebda
MK
129.\" Regarding the preceding sentence, Mike Rapoport says:
130.\" The major point here is that current events delivery model could be
131.\" problematic for multi-threaded monitor. I even suspect that it would be
132.\" impossible to ensure synchronization between page faults and non-page
133.\" fault events in multi-threaded monitor.
efeece04 134.PP
d28aadf6 135.\" FIXME elaborate about non-cooperating mode, describe its limitations
792570de 136.\" for kernels before 4.11, features added in 4.11
d28aadf6
MR
137.\" and limitations remaining in 4.11
138.\" Maybe it's worth adding a dedicated sub-section...
54e031a1 139.\"
bf9b5158
MR
140.SS Userfaultfd operation
141After the userfaultfd object is created with
4aa7f5cf
MK
142.BR userfaultfd (),
143the application must enable it using the
144.B UFFDIO_API
145.BR ioctl (2)
146operation.
147This operation allows a handshake between the kernel and user space
148to determine the API version and supported features.
5074d846
MK
149This operation must be performed before any of the other
150.BR ioctl (2)
151operations described below (or those operations fail with the
152.BR EINVAL
153error).
efeece04 154.PP
4aa7f5cf
MK
155After a successful
156.B UFFDIO_API
157operation,
158the application then registers memory address ranges using the
159.B UFFDIO_REGISTER
160.BR ioctl (2)
161operation.
162After successful completion of a
163.B UFFDIO_REGISTER
164operation,
165a page fault occurring in the requested memory range, and satisfying
166the mode defined at the registration time, will be forwarded by the kernel to
167the user-space application.
168The application can then use the
169.B UFFDIO_COPY
bf9b5158 170or
4aa7f5cf
MK
171.B UFFDIO_ZERO
172.BR ioctl (2)
173operations to resolve the page fault.
2c371032
PS
174.PP
175Starting from Linux 4.14, if application sets
176.B UFFD_FEATURE_SIGBUS
177feature bit using
178.B UFFDIO_API
179.BR ioctl (2),
180no page fault notification will be forwarded to
181the user-space, instead a
182.B SIGBUS
183signal is delivered to the faulting process. With this feature,
184userfaultfd can be used for robustness purpose to simply catch
185any access to areas within the registered address range that do not
186have pages allocated, without having to listen to userfaultfd events.
187No userfaultfd monitor will be required for dealing with such memory
188accesses. For example, this feature can be useful for applications that
189want to prevent the kernel from automatically allocating pages and filling
190holes in sparse files when the hole is accessed thru mapped address.
191.PP
192The
193.B UFFD_FEATURE_SIGBUS
194feature is implicitly inherited through fork() if used in combination with
195.BR UFFD_FEATURE_FORK .
196
efeece04 197.PP
6bc6d124 198Details of the various
4aa7f5cf 199.BR ioctl (2)
6bc6d124
MK
200operations can be found in
201.BR ioctl_userfaultfd (2).
efeece04 202.PP
5b1c4a1e
MR
203Since Linux 4.11, events other than page-fault may enabled during
204.B UFFDIO_API
205operation.
efeece04 206.PP
c3d5d9df
MR
207Up to Linux 4.11,
208userfaultfd can be used only with anonymous private memory mappings.
c3d5d9df
MR
209Since Linux 4.11,
210userfaultfd can be also used with hugetlbfs and shared memory mappings.
efeece04 211.PP
54e031a1
MK
212.\"
213.SS Reading from the userfaultfd structure
54e031a1
MK
214Each
215.BR read (2)
216from the userfaultfd file descriptor returns one or more
217.I uffd_msg
5b1c4a1e
MR
218structures, each of which describes a page-fault event
219or an event required for the non-cooperative userfaultfd usage:
efeece04 220.PP
54e031a1 221.in +4n
b8302363 222.EX
54e031a1 223struct uffd_msg {
c2b3668b 224 __u8 event; /* Type of event */
54e031a1
MK
225 ...
226 union {
00f4853c 227 struct {
c2b3668b
MK
228 __u64 flags; /* Flags describing fault */
229 __u64 address; /* Faulting address */
54e031a1 230 } pagefault;
c2b3668b
MK
231
232 struct { /* Since Linux 4.11 */
233 __u32 ufd; /* Userfault file descriptor
234 of the child process */
235 } fork;
236
237 struct { /* Since Linux 4.11 */
238 __u64 from; /* Old address of remapped area */
239 __u64 to; /* New address of remapped area */
240 __u64 len; /* Original mapping length */
241 } remap;
242
243 struct { /* Since Linux 4.11 */
244 __u64 start; /* Start address of removed area */
245 __u64 end; /* End address of removed area */
246 } remove;
b07243ab 247 ...
54e031a1
MK
248 } arg;
249
250 /* Padding fields omitted */
251} __packed;
b8302363 252.EE
e646a1ba 253.in
efeece04 254.PP
54e031a1
MK
255If multiple events are available and the supplied buffer is large enough,
256.BR read (2)
257returns as many events as will fit in the supplied buffer.
258If the buffer supplied to
259.BR read (2)
260is smaller than the size of the
261.I uffd_msg
262structure, the
263.BR read (2)
264fails with the error
265.BR EINVAL .
efeece04 266.PP
54e031a1
MK
267The fields set in the
268.I uffd_msg
269structure are as follows:
270.TP
271.I event
272The type of event.
5b1c4a1e
MR
273Depending of the event type,
274different fields of the
275.I arg
276union represent details required for the event processing.
277The non-page-fault events are generated only when appropriate feature
278is enabled during API handshake with
279.B UFFDIO_API
280.BR ioctl (2).
efeece04 281.IP
5b1c4a1e
MR
282The following values can appear in the
283.I event
284field:
285.RS
286.TP
6c12d34e 287.BR UFFD_EVENT_PAGEFAULT " (since Linux 4.3)"
5b1c4a1e
MR
288A page-fault event.
289The page-fault details are available in the
290.I pagefault
291field.
54e031a1 292.TP
6c12d34e 293.BR UFFD_EVENT_FORK " (since Linux 4.11)"
5b1c4a1e 294Generated when the faulting process invokes
522ab2ff
MK
295.BR fork (2)
296(or
297.BR clone (2)
298without the
299.BR CLONE_VM
300flag).
5b1c4a1e
MR
301The event details are available in the
302.I fork
303field.
23db3790 304.\" FIXME describe duplication of userfault file descriptor during fork
5b1c4a1e 305.TP
6c12d34e 306.BR UFFD_EVENT_REMAP " (since Linux 4.11)"
5b1c4a1e 307Generated when the faulting process invokes
c2b3668b 308.BR mremap (2).
5b1c4a1e
MR
309The event details are available in the
310.I remap
311field.
312.TP
6c12d34e 313.BR UFFD_EVENT_REMOVE " (since Linux 4.11)"
5b1c4a1e
MR
314Generated when the faulting process invokes
315.BR madvise (2)
c2b3668b 316with
5b1c4a1e
MR
317.BR MADV_DONTNEED
318or
319.BR MADV_REMOVE
320advice.
321The event details are available in the
322.I remove
323field.
324.TP
6c12d34e 325.BR UFFD_EVENT_UNMAP " (since Linux 4.11)"
5b1c4a1e
MR
326Generated when the faulting process unmaps a memory range,
327either explicitly using
328.BR munmap (2)
c2b3668b 329or implicitly during
5b1c4a1e
MR
330.BR mmap (2)
331or
c2b3668b 332.BR mremap (2).
5b1c4a1e
MR
333The event details are available in the
334.I remove
335field.
336.RE
337.TP
338.I pagefault.address
54e031a1
MK
339The address that triggered the page fault.
340.TP
5b1c4a1e 341.I pagefault.flags
54e031a1
MK
342A bit mask of flags that describe the event.
343For
344.BR UFFD_EVENT_PAGEFAULT ,
345the following flag may appear:
346.RS
347.TP
348.B UFFD_PAGEFAULT_FLAG_WRITE
349If the address is in a range that was registered with the
350.B UFFDIO_REGISTER_MODE_MISSING
351flag (see
352.BR ioctl_userfaultfd (2))
353and this flag is set, this a write fault;
354otherwise it is a read fault.
355.\"
356.\" UFFD_PAGEFAULT_FLAG_WP is not yet supported.
357.RE
5b1c4a1e
MR
358.TP
359.I fork.ufd
360The file descriptor associated with the userfault object
522ab2ff
MK
361created for the child created by
362.BR fork (2).
5b1c4a1e
MR
363.TP
364.I remap.from
365The original address of the memory range that was remapped using
366.BR mremap (2).
367.TP
368.I remap.to
369The new address of the memory range that was remapped using
370.BR mremap (2).
371.TP
372.I remap.len
23db3790 373The original length of the memory range that was remapped using
5b1c4a1e
MR
374.BR mremap (2).
375.TP
376.I remove.start
377The start address of the memory range that was freed using
378.BR madvise (2)
379or unmapped
380.TP
381.I remove.end
382The end address of the memory range that was freed using
383.BR madvise (2)
384or unmapped
54e031a1 385.PP
058b32ac
MK
386A
387.BR read (2)
388on a userfaultfd file descriptor can fail with the following errors:
389.TP
390.B EINVAL
391The userfaultfd object has not yet been enabled using the
392.BR UFFDIO_API
393.BR ioctl (2)
394operation
395.PP
1bf00323
MK
396If the
397.B O_NONBLOCK
398flag is enabled in the associated open file description,
399the userfaultfd file descriptor can be monitored with
54e031a1
MK
400.BR poll (2),
401.BR select (2),
402and
403.BR epoll (7).
404When events are available, the file descriptor indicates as readable.
1bf00323
MK
405If the
406.B O_NONBLOCK
407flag is not enabled, then
408.BR poll (2)
409(always) indicates the file as having a
410.BR POLLERR
411condition, and
412.BR select (2)
413indicates the file descriptor as both readable and writable.
414.\" FIXME What is the reason for this seemingly odd behavior with respect
415.\" to the O_NONBLOCK flag? (see userfaultfd_poll() in fs/userfaultfd.c).
416.\" Something needs to be said about this.
bf9b5158 417.SH RETURN VALUE
4aa7f5cf
MK
418On success,
419.BR userfaultfd ()
420returns a new file descriptor that refers to the userfaultfd object.
bf9b5158
MR
421On error, \-1 is returned, and
422.I errno
423is set appropriately.
424.SH ERRORS
425.TP
426.B EINVAL
427An unsupported value was specified in
428.IR flags .
429.TP
430.BR EMFILE
431The per-process limit on the number of open file descriptors has been
432reached
433.TP
434.B ENFILE
435The system-wide limit on the total number of open files has been
436reached.
437.TP
438.B ENOMEM
439Insufficient kernel memory was available.
0da8f5c1
MK
440.SH VERSIONS
441The
442.BR userfaultfd ()
443system call first appeared in Linux 4.3.
efeece04 444.PP
be9021b1
MR
445The support for hugetlbfs and shared memory areas and
446non-page-fault events was added in Linux 4.11
bf9b5158
MR
447.SH CONFORMING TO
448.BR userfaultfd ()
449is Linux-specific and should not be used in programs intended to be
450portable.
451.SH NOTES
452Glibc does not provide a wrapper for this system call; call it using
453.BR syscall (2).
efeece04 454.PP
de6943fa
MK
455The userfaultfd mechanism can be used as an alternative to
456traditional user-space paging techniques based on the use of the
457.BR SIGSEGV
458signal and
459.BR mmap (2).
460It can also be used to implement lazy restore
461for checkpoint/restore mechanisms,
462as well as post-copy migration to allow (nearly) uninterrupted execution
18448166
MR
463when transferring virtual machines and Linux containers
464from one host to another.
b07243ab
MK
465.SH EXAMPLE
466The program below demonstrates the use of the userfaultfd mechanism.
467The program creates two threads, one of which acts as the
468page-fault handler for the process, for the pages in a demand-page zero
469region created using
470.BR mmap (2).
efeece04 471.PP
b07243ab
MK
472The program takes one command-line argument,
473which is the number of pages that will be created in a mapping
474whose page faults will be handled via userfaultfd.
475After creating a userfaultfd object,
476the program then creates an anonymous private mapping of the specified size
477and registers the address range of that mapping using the
478.B UFFDIO_REGISTER
479.BR ioctl (2)
480operation.
00f4853c 481The program then creates a second thread that will perform the
b07243ab 482task of handling page faults.
efeece04 483.PP
b07243ab
MK
484The main thread then walks through the pages of the mapping fetching
485bytes from successive pages.
486Because the pages have not yet been accessed,
487the first access of a byte in each page will trigger a page-fault event
488on the userfaultfd file descriptor.
efeece04 489.PP
b07243ab
MK
490Each of the page-fault events is handled by the second thread,
491which sits in a loop processing input from the userfaultfd file descriptor.
492In each loop iteration, the second thread first calls
493.BR poll (2)
494to check the state of the file descriptor,
495and then reads an event from the file descriptor.
496All such events should be
497.B UFFD_EVENT_PAGEFAULT
498events,
499which the thread handles by copying a page of data into
500the faulting region using the
00f4853c 501.B UFFDIO_COPY
b07243ab
MK
502.BR ioctl (2)
503operation.
efeece04 504.PP
b07243ab 505The following is an example of what we see when running the program:
efeece04 506.PP
b07243ab 507.in +4n
b8302363 508.EX
b07243ab
MK
509$ \fB./userfaultfd_demo 3\fP
510Address returned by mmap() = 0x7fd30106c000
511
512fault_handler_thread():
513 poll() returns: nready = 1; POLLIN = 1; POLLERR = 0
514 UFFD_EVENT_PAGEFAULT event: flags = 0; address = 7fd30106c00f
515 (uffdio_copy.copy returned 4096)
516Read address 0x7fd30106c00f in main(): A
517Read address 0x7fd30106c40f in main(): A
518Read address 0x7fd30106c80f in main(): A
519Read address 0x7fd30106cc0f in main(): A
520
521fault_handler_thread():
522 poll() returns: nready = 1; POLLIN = 1; POLLERR = 0
523 UFFD_EVENT_PAGEFAULT event: flags = 0; address = 7fd30106d00f
524 (uffdio_copy.copy returned 4096)
525Read address 0x7fd30106d00f in main(): B
526Read address 0x7fd30106d40f in main(): B
527Read address 0x7fd30106d80f in main(): B
528Read address 0x7fd30106dc0f in main(): B
529
530fault_handler_thread():
531 poll() returns: nready = 1; POLLIN = 1; POLLERR = 0
532 UFFD_EVENT_PAGEFAULT event: flags = 0; address = 7fd30106e00f
533 (uffdio_copy.copy returned 4096)
534Read address 0x7fd30106e00f in main(): C
535Read address 0x7fd30106e40f in main(): C
536Read address 0x7fd30106e80f in main(): C
537Read address 0x7fd30106ec0f in main(): C
b8302363 538.EE
e646a1ba 539.in
b07243ab
MK
540.SS Program source
541\&
e7d0bb47 542.EX
b07243ab 543/* userfaultfd_demo.c
00f4853c 544
b07243ab
MK
545 Licensed under the GNU General Public License version 2 or later.
546*/
547#define _GNU_SOURCE
548#include <sys/types.h>
549#include <stdio.h>
550#include <linux/userfaultfd.h>
551#include <pthread.h>
552#include <errno.h>
553#include <unistd.h>
554#include <stdlib.h>
555#include <fcntl.h>
556#include <signal.h>
557#include <poll.h>
558#include <string.h>
559#include <sys/mman.h>
560#include <sys/syscall.h>
561#include <sys/ioctl.h>
562#include <poll.h>
563
564#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
565 } while (0)
566
567static int page_size;
568
569static void *
570fault_handler_thread(void *arg)
571{
572 static struct uffd_msg msg; /* Data read from userfaultfd */
573 static int fault_cnt = 0; /* Number of faults so far handled */
574 long uffd; /* userfaultfd file descriptor */
575 static char *page = NULL;
576 struct uffdio_copy uffdio_copy;
577 ssize_t nread;
578
579 uffd = (long) arg;
580
581 /* Create a page that will be copied into the faulting region */
582
583 if (page == NULL) {
584 page = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
585 MAP_PRIVATE | MAP_ANONYMOUS, \-1, 0);
586 if (page == MAP_FAILED)
587 errExit("mmap");
588 }
589
590 /* Loop, handling incoming events on the userfaultfd
591 file descriptor */
592
593 for (;;) {
594
595 /* See what poll() tells us about the userfaultfd */
596
597 struct pollfd pollfd;
598 int nready;
599 pollfd.fd = uffd;
600 pollfd.events = POLLIN;
601 nready = poll(&pollfd, 1, \-1);
602 if (nready == \-1)
603 errExit("poll");
604
605 printf("\\nfault_handler_thread():\\n");
606 printf(" poll() returns: nready = %d; "
607 "POLLIN = %d; POLLERR = %d\\n", nready,
608 (pollfd.revents & POLLIN) != 0,
609 (pollfd.revents & POLLERR) != 0);
610
611 /* Read an event from the userfaultfd */
612
613 nread = read(uffd, &msg, sizeof(msg));
614 if (nread == 0) {
615 printf("EOF on userfaultfd!\\n");
616 exit(EXIT_FAILURE);
00f4853c 617 }
b07243ab
MK
618
619 if (nread == \-1)
620 errExit("read");
00f4853c 621
b07243ab
MK
622 /* We expect only one kind of event; verify that assumption */
623
624 if (msg.event != UFFD_EVENT_PAGEFAULT) {
625 fprintf(stderr, "Unexpected event on userfaultfd\\n");
626 exit(EXIT_FAILURE);
627 }
628
629 /* Display info about the page\-fault event */
630
631 printf(" UFFD_EVENT_PAGEFAULT event: ");
632 printf("flags = %llx; ", msg.arg.pagefault.flags);
633 printf("address = %llx\\n", msg.arg.pagefault.address);
634
635 /* Copy the page pointed to by \(aqpage\(aq into the faulting
636 region. Vary the contents that are copied in, so that it
637 is more obvious that each fault is handled separately. */
638
639 memset(page, \(aqA\(aq + fault_cnt % 20, page_size);
640 fault_cnt++;
641
642 uffdio_copy.src = (unsigned long) page;
643
644 /* We need to handle page faults in units of pages(!).
645 So, round faulting address down to page boundary */
646
647 uffdio_copy.dst = (unsigned long) msg.arg.pagefault.address &
648 ~(page_size \- 1);
649 uffdio_copy.len = page_size;
650 uffdio_copy.mode = 0;
651 uffdio_copy.copy = 0;
652 if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == \-1)
653 errExit("ioctl\-UFFDIO_COPY");
654
655 printf(" (uffdio_copy.copy returned %lld)\\n",
656 uffdio_copy.copy);
657 }
658}
659
660int
661main(int argc, char *argv[])
662{
663 long uffd; /* userfaultfd file descriptor */
664 char *addr; /* Start of region handled by userfaultfd */
665 unsigned long len; /* Length of region handled by userfaultfd */
666 pthread_t thr; /* ID of thread that handles page faults */
667 struct uffdio_api uffdio_api;
668 struct uffdio_register uffdio_register;
669 int s;
670
671 if (argc != 2) {
672 fprintf(stderr, "Usage: %s num\-pages\\n", argv[0]);
673 exit(EXIT_FAILURE);
674 }
675
676 page_size = sysconf(_SC_PAGE_SIZE);
677 len = strtoul(argv[1], NULL, 0) * page_size;
678
679 /* Create and enable userfaultfd object */
680
681 uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
682 if (uffd == \-1)
683 errExit("userfaultfd");
684
685 uffdio_api.api = UFFD_API;
686 uffdio_api.features = 0;
687 if (ioctl(uffd, UFFDIO_API, &uffdio_api) == \-1)
688 errExit("ioctl\-UFFDIO_API");
689
690 /* Create a private anonymous mapping. The memory will be
691 demand\-zero paged\-\-that is, not yet allocated. When we
692 actually touch the memory, it will be allocated via
693 the userfaultfd. */
694
695 addr = mmap(NULL, len, PROT_READ | PROT_WRITE,
696 MAP_PRIVATE | MAP_ANONYMOUS, \-1, 0);
697 if (addr == MAP_FAILED)
698 errExit("mmap");
699
700 printf("Address returned by mmap() = %p\\n", addr);
701
702 /* Register the memory range of the mapping we just created for
703 handling by the userfaultfd object. In mode, we request to track
704 missing pages (i.e., pages that have not yet been faulted in). */
705
706 uffdio_register.range.start = (unsigned long) addr;
707 uffdio_register.range.len = len;
708 uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
709 if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == \-1)
710 errExit("ioctl\-UFFDIO_REGISTER");
711
712 /* Create a thread that will process the userfaultfd events */
713
714 s = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd);
715 if (s != 0) {
716 errno = s;
717 errExit("pthread_create");
718 }
719
720 /* Main thread now touches memory in the mapping, touching
721 locations 1024 bytes apart. This will trigger userfaultfd
722 events for all pages in the region. */
723
724 int l;
725 l = 0xf; /* Ensure that faulting address is not on a page
726 boundary, in order to test that we correctly
727 handle that case in fault_handling_thread() */
728 while (l < len) {
729 char c = addr[l];
730 printf("Read address %p in main(): ", addr + l);
731 printf("%c\\n", c);
732 l += 1024;
733 usleep(100000); /* Slow things down a little */
734 }
735
736 exit(EXIT_SUCCESS);
737}
e7d0bb47 738.EE
bf9b5158
MR
739.SH SEE ALSO
740.BR fcntl (2),
4aa7f5cf 741.BR ioctl (2),
6bc6d124 742.BR ioctl_userfaultfd (2),
6d3e02a5 743.BR madvise (2),
4aa7f5cf 744.BR mmap (2)
efeece04 745.PP
bf9b5158
MR
746.IR Documentation/vm/userfaultfd.txt
747in the Linux kernel source tree
efeece04 748.PP