]> git.ipfire.org Git - thirdparty/man-pages.git/blame - man/man2/memfd_create.2
man/: EXAMPLES: Add missing includes
[thirdparty/man-pages.git] / man / man2 / memfd_create.2
CommitLineData
878cc348
MK
1.\" Copyright (C) 2014 Michael Kerrisk <mtk.manpages@gmail.com>
2.\" and Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com>
73fc0b53 3.\"
e4a74ca8 4.\" SPDX-License-Identifier: GPL-2.0-or-later
73fc0b53 5.\"
4c1c5274 6.TH memfd_create 2 (date) "Linux man-pages (unreleased)"
73fc0b53
DH
7.SH NAME
8memfd_create \- create an anonymous file
4367326d
AC
9.SH LIBRARY
10Standard C library
8fc3b2cf 11.RI ( libc ", " \-lc )
73fc0b53 12.SH SYNOPSIS
d6d367c7
JS
13.nf
14.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
6971614d 15.B #include <sys/mman.h>
c6d039a3 16.P
73fc0b53 17.BI "int memfd_create(const char *" name ", unsigned int " flags ");"
7a92eea0 18.fi
73fc0b53
DH
19.SH DESCRIPTION
20.BR memfd_create ()
f00ce3a0
MK
21creates an anonymous file and returns a file descriptor that refers to it.
22The file behaves like a regular file, and so can be modified,
afc5ca18 23truncated, memory-mapped, and so on.
f00ce3a0
MK
24However, unlike a regular file,
25it lives in RAM and has a volatile backing storage.
cb5b73cc
MK
26Once all references to the file are dropped, it is automatically released.
27Anonymous memory is used for all backing pages of the file.
f00ce3a0
MK
28Therefore, files created by
29.BR memfd_create ()
46832662
MK
30have the same semantics as other anonymous
31.\" David Herrmann:
32.\" memfd uses VM_NORESERVE so each page is accounted on first access.
33.\" This means, the overcommit-limits (see __vm_enough_memory()) and the
34.\" memory-cgroup limits (mem_cgroup_try_charge()) are applied. Note that
35.\" those are accounted on "current" and "current->mm", that is, the
36.\" process doing the first page access.
f00ce3a0 37memory allocations such as those allocated using
73fc0b53 38.BR mmap (2)
f00ce3a0 39with the
1ae6b2c7 40.B MAP_ANONYMOUS
f00ce3a0 41flag.
c6d039a3 42.P
73fc0b53 43The initial size of the file is set to 0.
f00ce3a0
MK
44Following the call, the file size should be set using
45.BR ftruncate (2).
46832662
MK
46(Alternatively, the file may be populated by calls to
47.BR write (2)
48or similar.)
c6d039a3 49.P
f00ce3a0 50The name supplied in
73fc0b53 51.I name
46832662 52is used as a filename and will be displayed
f00ce3a0 53as the target of the corresponding symbolic link in the directory
73fc0b53 54.IR /proc/self/fd/ .
f00ce3a0 55The displayed name is always prefixed with
1ae6b2c7 56.I memfd:
f00ce3a0 57and serves only for debugging purposes.
46832662 58Names do not affect the behavior of the file descriptor,
cb5b73cc 59and as such multiple files can have the same name without any side effects.
c6d039a3 60.P
73fc0b53 61The following values may be bitwise ORed in
1ae6b2c7 62.I flags
553deb41 63to change the behavior of
73fc0b53
DH
64.BR memfd_create ():
65.TP
1ae6b2c7 66.B MFD_CLOEXEC
73fc0b53
DH
67Set the close-on-exec
68.RB ( FD_CLOEXEC )
69flag on the new file descriptor.
70See the description of the
71.B O_CLOEXEC
72flag in
73.BR open (2)
cb5b73cc 74for reasons why this may be useful.
73fc0b53 75.TP
1ae6b2c7 76.B MFD_ALLOW_SEALING
3a71dcd6 77Allow sealing operations on this file.
e8a0dfae 78See the discussion of the
73fc0b53
DH
79.B F_ADD_SEALS
80and
1ae6b2c7 81.B F_GET_SEALS
e8a0dfae
MK
82operations in
83.BR fcntl (2),
3a71dcd6 84and also NOTES, below.
cb5b73cc
MK
85The initial set of seals is empty.
86If this flag is not set, the initial set of seals will be
f00ce3a0
MK
87.BR F_SEAL_SEAL ,
88meaning that no other seals can be set on the file.
89.\" FIXME Why is the MFD_ALLOW_SEALING behavior not simply the default?
39b15554 90.\" Is it worth adding some text explaining this?
ce7fa2be
MK
91.TP
92.BR MFD_HUGETLB " (since Linux 4.14)"
d2cfa322 93.\" commit 749df87bd7bee5a79cef073f5d032ddb2b211de8
ce7fa2be 94The anonymous file will be created in the hugetlbfs filesystem using
d2cfa322
MK
95huge pages.
96See the Linux kernel source file
b49c2acb 97.I Documentation/admin\-guide/mm/hugetlbpage.rst
d2cfa322 98for more information about hugetlbfs.
659beec7
MAL
99.\" commit 47b9012ecdc747f6936395265e677d41e11a31ff
100Specifying both
ce7fa2be
MK
101.B MFD_HUGETLB
102and
103.B MFD_ALLOW_SEALING
88aa124a
MK
104in
105.I flags
659beec7 106is supported since Linux 4.16.
ce7fa2be 107.TP
6fdb1c03
AC
108.B MFD_HUGE_2MB
109.TQ
110.B MFD_HUGE_1GB
111.TQ
112\&.\|.\|.
ce7fa2be
MK
113Used in conjunction with
114.B MFD_HUGETLB
58988360 115to select alternative hugetlb page sizes (respectively, 2\ MB, 1\ GB, ...)
4332849f
MK
116on systems that support multiple hugetlb page sizes.
117Definitions for known
ce7fa2be 118huge page sizes are included in the header file
6971614d 119.I <linux/memfd.h>.
4332849f 120.IP
ce7fa2be
MK
121For details on encoding huge page sizes not included in the header file,
122see the discussion of the similarly named constants in
123.BR mmap (2).
c6d039a3 124.P
f00ce3a0
MK
125Unused bits in
126.I flags
127must be 0.
c6d039a3 128.P
73fc0b53
DH
129As its return value,
130.BR memfd_create ()
131returns a new file descriptor that can be used to refer to the file.
f00ce3a0
MK
132This file descriptor is opened for both reading and writing
133.RB ( O_RDWR )
134and
135.B O_LARGEFILE
d9cb0d7d 136is set for the file descriptor.
c6d039a3 137.P
f00ce3a0
MK
138With respect to
139.BR fork (2)
140and
141.BR execve (2),
142the usual semantics apply for the file descriptor created by
143.BR memfd_create ().
144A copy of the file descriptor is inherited by the child produced by
145.BR fork (2)
146and refers to the same file.
147The file descriptor is preserved across
73fc0b53
DH
148.BR execve (2),
149unless the close-on-exec flag has been set.
150.SH RETURN VALUE
151On success,
152.BR memfd_create ()
153returns a new file descriptor.
154On error, \-1 is returned and
155.I errno
156is set to indicate the error.
157.SH ERRORS
158.TP
f00ce3a0
MK
159.B EFAULT
160The address in
1ae6b2c7 161.I name
f00ce3a0
MK
162points to invalid memory.
163.TP
73fc0b53 164.B EINVAL
20acd21a 165.I flags
0fd5731e
MK
166included unknown bits.
167.TP
168.B EINVAL
20acd21a
MK
169.I name
170was too long.
5ade353d
MK
171(The limit is
172.\" NAME_MAX - strlen("memfd:")
173249 bytes, excluding the terminating null byte.)
73fc0b53 174.TP
88aa124a
MK
175.B EINVAL
176Both
177.B MFD_HUGETLB
178and
179.B MFD_ALLOW_SEALING
180were specified in
181.IR flags .
182.TP
73fc0b53 183.B EMFILE
26c32fab 184The per-process limit on the number of open file descriptors has been reached.
73fc0b53
DH
185.TP
186.B ENFILE
cb5b73cc 187The system-wide limit on the total number of open files has been reached.
73fc0b53 188.TP
73fc0b53
DH
189.B ENOMEM
190There was insufficient memory to create a new anonymous file.
090fdddb
MK
191.TP
192.B EPERM
193The
194.B MFD_HUGETLB
195flag was specified, but the caller was not privileged (did not have the
196.B CAP_IPC_LOCK
197capability)
198and is not a member of the
199.I sysctl_hugetlb_shm_group
200group; see the description of
201.I /proc/sys/vm/sysctl_hugetlb_shm_group
202in
203.BR proc (5).
3113c7f3 204.SH STANDARDS
4131356c
AC
205Linux.
206.SH HISTORY
207Linux 3.17,
208glibc 2.27.
51fa3cbf
MK
209.SH NOTES
210.\" See also http://lwn.net/Articles/593918/
211.\" and http://lwn.net/Articles/594919/ and http://lwn.net/Articles/591108/
212The
213.BR memfd_create ()
214system call provides a simple alternative to manually mounting a
4e07c70f 215.BR tmpfs (5)
51fa3cbf
MK
216filesystem and creating and opening a file in that filesystem.
217The primary purpose of
218.BR memfd_create ()
219is to create files and associated file descriptors that are
220used with the file-sealing APIs provided by
221.BR fcntl (2).
c6d039a3 222.P
46832662
MK
223The
224.BR memfd_create ()
225system call also has uses without file sealing
226(which is why file-sealing is disabled, unless explicitly requested with the
1ae6b2c7 227.B MFD_ALLOW_SEALING
46832662
MK
228flag).
229In particular, it can be used as an alternative to creating files in
1ae6b2c7 230.I tmp
46832662
MK
231or as an alternative to using the
232.BR open (2)
233.B O_TMPFILE
234in cases where there is no intention to actually link the
235resulting file into the filesystem.
51fa3cbf
MK
236.SS File sealing
237In the absence of file sealing,
238processes that communicate via shared memory must either trust each other,
239or take measures to deal with the possibility that an untrusted peer
db61d4b2 240may manipulate the shared memory region in problematic ways.
51fa3cbf
MK
241For example, an untrusted peer might modify the contents of the
242shared memory at any time, or shrink the shared memory region.
243The former possibility leaves the local process vulnerable to
244time-of-check-to-time-of-use race conditions
245(typically dealt with by copying data from
246the shared memory region before checking and using it).
247The latter possibility leaves the local process vulnerable to
1ae6b2c7 248.B SIGBUS
51fa3cbf
MK
249signals when an attempt is made to access a now-nonexistent
250location in the shared memory region.
251(Dealing with this possibility necessitates the use of a handler for the
1ae6b2c7 252.B SIGBUS
51fa3cbf 253signal.)
c6d039a3 254.P
51fa3cbf
MK
255Dealing with untrusted peers imposes extra complexity on
256code that employs shared memory.
257Memory sealing enables that extra complexity to be eliminated,
258by allowing a process to operate secure in the knowledge that
259its peer can't modify the shared memory in an undesired fashion.
c6d039a3 260.P
51fa3cbf 261An example of the usage of the sealing mechanism is as follows:
22356d97 262.IP (1) 5
51fa3cbf 263The first process creates a
4e07c70f 264.BR tmpfs (5)
771e13d4 265file using
51fa3cbf
MK
266.BR memfd_create ().
267The call yields a file descriptor used in subsequent steps.
22356d97 268.IP (2)
51fa3cbf
MK
269The first process
270sizes the file created in the previous step using
271.BR ftruncate (2),
272maps it using
273.BR mmap (2),
274and populates the shared memory with the desired data.
22356d97 275.IP (3)
51fa3cbf
MK
276The first process uses the
277.BR fcntl (2)
278.B F_ADD_SEALS
279operation to place one or more seals on the file,
280in order to restrict further modifications on the file.
281(If placing the seal
282.BR F_SEAL_WRITE ,
283then it will be necessary to first unmap the shared writable mapping
fc6a14f5
MK
284created in the previous step.
285Otherwise, behavior similar to
1ae6b2c7 286.B F_SEAL_WRITE
e15b10ba
MK
287can be achieved by using
288.BR F_SEAL_FUTURE_WRITE ,
98eff9f7
JFG
289which will prevent future writes via
290.BR mmap (2)
291and
292.BR write (2)
e15b10ba 293from succeeding while keeping existing shared writable mappings).
22356d97 294.IP (4)
51fa3cbf 295A second process obtains a file descriptor for the
4e07c70f 296.BR tmpfs (5)
51fa3cbf 297file and maps it.
46832662 298Among the possible ways in which this could happen are the following:
51fa3cbf 299.RS
cdede5cd 300.IP \[bu] 3
46832662
MK
301The process that called
302.BR memfd_create ()
303could transfer the resulting file descriptor to the second process
304via a UNIX domain socket (see
305.BR unix (7)
306and
307.BR cmsg (3)).
308The second process then maps the file using
309.BR mmap (2).
cdede5cd 310.IP \[bu]
51fa3cbf
MK
311The second process is created via
312.BR fork (2)
313and thus automatically inherits the file descriptor and mapping.
46832662
MK
314(Note that in this case and the next,
315there is a natural trust relationship between the two processes,
316since they are running under the same user ID.
317Therefore, file sealing would not normally be necessary.)
cdede5cd 318.IP \[bu]
771e13d4 319The second process opens the file
0d782b8d 320.IR /proc/ pid /fd/ fd,
51fa3cbf
MK
321where
322.I <pid>
323is the PID of the first process (the one that called
324.BR memfd_create ()),
325and
326.I <fd>
327is the number of the file descriptor returned by the call to
328.BR memfd_create ()
329in that process.
330The second process then maps the file using
331.BR mmap (2).
332.RE
22356d97 333.IP (5)
51fa3cbf
MK
334The second process uses the
335.BR fcntl (2)
336.B F_GET_SEALS
4f32648e
MK
337operation to retrieve the bit mask of seals
338that has been applied to the file.
339This bit mask can be inspected in order to determine
340what kinds of restrictions have been placed on file modifications.
51fa3cbf
MK
341If desired, the second process can apply further seals
342to impose additional restrictions (so long as the
1ae6b2c7 343.B F_SEAL_SEAL
51fa3cbf 344seal has not yet been applied).
a14af333 345.SH EXAMPLES
878cc348
MK
346Below are shown two example programs that demonstrate the use of
347.BR memfd_create ()
348and the file sealing API.
c6d039a3 349.P
878cc348
MK
350The first program,
351.IR t_memfd_create.c ,
352creates a
4e07c70f 353.BR tmpfs (5)
878cc348
MK
354file using
355.BR memfd_create (),
356sets a size for the file, maps it into memory,
357and optionally places some seals on the file.
358The program accepts up to three command-line arguments,
359of which the first two are required.
360The first argument is the name to associate with the file,
361the second argument is the size to be set for the file,
e57f8d34 362and the optional third argument is a string of characters that specify
845b3463 363seals to be set on the file.
c6d039a3 364.P
878cc348
MK
365The second program,
366.IR t_get_seals.c ,
367can be used to open an existing file that was created via
368.BR memfd_create ()
369and inspect the set of seals that have been applied to that file.
c6d039a3 370.P
878cc348
MK
371The following shell session demonstrates the use of these programs.
372First we create a
4e07c70f 373.BR tmpfs (5)
878cc348 374file and set some seals on it:
c6d039a3 375.P
878cc348 376.in +4n
b8302363 377.EX
878cc348
MK
378$ \fB./t_memfd_create my_memfd_file 4096 sw &\fP
379[1] 11775
380PID: 11775; fd: 3; /proc/11775/fd/3
b8302363 381.EE
878cc348 382.in
c6d039a3 383.P
878cc348
MK
384At this point, the
385.I t_memfd_create
386program continues to run in the background.
387From another program, we can obtain a file descriptor for the
46832662
MK
388file created by
389.BR memfd_create ()
390by opening the
1ae6b2c7 391.IR /proc/ pid /fd
d9cb0d7d 392file that corresponds to the file descriptor opened by
878cc348
MK
393.BR memfd_create ().
394Using that pathname, we inspect the content of the
1ae6b2c7 395.IR /proc/ pid /fd
878cc348
MK
396symbolic link, and use our
397.I t_get_seals
398program to view the seals that have been placed on the file:
c6d039a3 399.P
878cc348 400.in +4n
b8302363 401.EX
878cc348
MK
402$ \fBreadlink /proc/11775/fd/3\fP
403/memfd:my_memfd_file (deleted)
404$ \fB./t_get_seals /proc/11775/fd/3\fP
405Existing seals: WRITE SHRINK
b8302363 406.EE
878cc348
MK
407.in
408.SS Program source: t_memfd_create.c
409\&
33857069 410.\" SRC BEGIN (t_memfd_create.c)
408731d4 411.EX
d6d367c7 412#define _GNU_SOURCE
5a5208c1 413#include <err.h>
878cc348 414#include <fcntl.h>
4ae706b0
AC
415#include <stdint.h>
416#include <stdio.h>
878cc348 417#include <stdlib.h>
878cc348 418#include <string.h>
4ae706b0 419#include <sys/mman.h>
0320049e 420#include <sys/types.h>
4ae706b0 421#include <unistd.h>
fe5dba13 422\&
878cc348
MK
423int
424main(int argc, char *argv[])
425{
0b94bd78
AC
426 int fd;
427 char *name, *seals_arg;
428 ssize_t len;
429 unsigned int seals;
fe5dba13 430\&
878cc348 431 if (argc < 3) {
d1a71985 432 fprintf(stderr, "%s name size [seals]\en", argv[0]);
b957f81f 433 fprintf(stderr, "\et\[aq]seals\[aq] can contain any of the "
d1a71985
MK
434 "following characters:\en");
435 fprintf(stderr, "\et\etg \- F_SEAL_GROW\en");
436 fprintf(stderr, "\et\ets \- F_SEAL_SHRINK\en");
437 fprintf(stderr, "\et\etw \- F_SEAL_WRITE\en");
98eff9f7 438 fprintf(stderr, "\et\etW \- F_SEAL_FUTURE_WRITE\en");
d1a71985 439 fprintf(stderr, "\et\etS \- F_SEAL_SEAL\en");
878cc348
MK
440 exit(EXIT_FAILURE);
441 }
fe5dba13 442\&
878cc348
MK
443 name = argv[1];
444 len = atoi(argv[2]);
445 seals_arg = argv[3];
fe5dba13 446\&
878cc348 447 /* Create an anonymous file in tmpfs; allow seals to be
46b20ca1 448 placed on the file. */
fe5dba13 449\&
878cc348
MK
450 fd = memfd_create(name, MFD_ALLOW_SEALING);
451 if (fd == \-1)
5a5208c1 452 err(EXIT_FAILURE, "memfd_create");
fe5dba13 453\&
46b20ca1 454 /* Size the file as specified on the command line. */
fe5dba13 455\&
878cc348 456 if (ftruncate(fd, len) == \-1)
5a5208c1 457 err(EXIT_FAILURE, "truncate");
fe5dba13 458\&
8eb90116 459 printf("PID: %jd; fd: %d; /proc/%jd/fd/%d\en",
4687ab0e 460 (intmax_t) getpid(), fd, (intmax_t) getpid(), fd);
fe5dba13 461\&
878cc348 462 /* Code to map the file and populate the mapping with data
46b20ca1 463 omitted. */
fe5dba13 464\&
b957f81f 465 /* If a \[aq]seals\[aq] command\-line argument was supplied, set some
46b20ca1 466 seals on the file. */
fe5dba13 467\&
878cc348
MK
468 if (seals_arg != NULL) {
469 seals = 0;
fe5dba13 470\&
b957f81f 471 if (strchr(seals_arg, \[aq]g\[aq]) != NULL)
878cc348 472 seals |= F_SEAL_GROW;
b957f81f 473 if (strchr(seals_arg, \[aq]s\[aq]) != NULL)
878cc348 474 seals |= F_SEAL_SHRINK;
b957f81f 475 if (strchr(seals_arg, \[aq]w\[aq]) != NULL)
878cc348 476 seals |= F_SEAL_WRITE;
b957f81f 477 if (strchr(seals_arg, \[aq]W\[aq]) != NULL)
98eff9f7 478 seals |= F_SEAL_FUTURE_WRITE;
b957f81f 479 if (strchr(seals_arg, \[aq]S\[aq]) != NULL)
878cc348 480 seals |= F_SEAL_SEAL;
fe5dba13 481\&
878cc348 482 if (fcntl(fd, F_ADD_SEALS, seals) == \-1)
5a5208c1 483 err(EXIT_FAILURE, "fcntl");
878cc348 484 }
fe5dba13 485\&
878cc348 486 /* Keep running, so that the file created by memfd_create()
46b20ca1 487 continues to exist. */
fe5dba13 488\&
878cc348 489 pause();
fe5dba13 490\&
878cc348
MK
491 exit(EXIT_SUCCESS);
492}
408731d4 493.EE
33857069 494.\" SRC END
878cc348
MK
495.SS Program source: t_get_seals.c
496\&
33857069 497.\" SRC BEGIN (t_get_seals.c)
408731d4 498.EX
d6d367c7 499#define _GNU_SOURCE
5a5208c1 500#include <err.h>
878cc348 501#include <fcntl.h>
878cc348 502#include <stdio.h>
47b94bbd 503#include <stdlib.h>
fe5dba13 504\&
878cc348
MK
505int
506main(int argc, char *argv[])
507{
0b94bd78
AC
508 int fd;
509 unsigned int seals;
fe5dba13 510\&
878cc348 511 if (argc != 2) {
d1a71985 512 fprintf(stderr, "%s /proc/PID/fd/FD\en", argv[0]);
878cc348
MK
513 exit(EXIT_FAILURE);
514 }
fe5dba13 515\&
878cc348
MK
516 fd = open(argv[1], O_RDWR);
517 if (fd == \-1)
5a5208c1 518 err(EXIT_FAILURE, "open");
fe5dba13 519\&
878cc348
MK
520 seals = fcntl(fd, F_GET_SEALS);
521 if (seals == \-1)
5a5208c1 522 err(EXIT_FAILURE, "fcntl");
fe5dba13 523\&
878cc348
MK
524 printf("Existing seals:");
525 if (seals & F_SEAL_SEAL)
526 printf(" SEAL");
527 if (seals & F_SEAL_GROW)
528 printf(" GROW");
529 if (seals & F_SEAL_WRITE)
530 printf(" WRITE");
98eff9f7
JFG
531 if (seals & F_SEAL_FUTURE_WRITE)
532 printf(" FUTURE_WRITE");
878cc348
MK
533 if (seals & F_SEAL_SHRINK)
534 printf(" SHRINK");
d1a71985 535 printf("\en");
fe5dba13 536\&
878cc348 537 /* Code to map the file and access the contents of the
46b20ca1 538 resulting mapping omitted. */
fe5dba13 539\&
878cc348
MK
540 exit(EXIT_SUCCESS);
541}
408731d4 542.EE
33857069 543.\" SRC END
73fc0b53 544.SH SEE ALSO
73fc0b53 545.BR fcntl (2),
3a71dcd6 546.BR ftruncate (2),
e817f70a 547.BR memfd_secret (2),
3a71dcd6 548.BR mmap (2),
46832662
MK
549.BR shmget (2),
550.BR shm_open (3)