]> git.ipfire.org Git - thirdparty/man-pages.git/blame - man2/memfd_create.2
All pages: Remove the 5th argument to .TH
[thirdparty/man-pages.git] / man2 / memfd_create.2
CommitLineData
878cc348
MK
1.\" Copyright (C) 2014 Michael Kerrisk <mtk.manpages@gmail.com>
2.\" and Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com>
73fc0b53 3.\"
e4a74ca8 4.\" SPDX-License-Identifier: GPL-2.0-or-later
73fc0b53 5.\"
45186a5d 6.TH MEMFD_CREATE 2 2021-03-22 "Linux man-pages (unreleased)"
73fc0b53
DH
7.SH NAME
8memfd_create \- create an anonymous file
4367326d
AC
9.SH LIBRARY
10Standard C library
8fc3b2cf 11.RI ( libc ", " \-lc )
73fc0b53 12.SH SYNOPSIS
d6d367c7
JS
13.nf
14.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
6971614d 15.B #include <sys/mman.h>
68e4db0a 16.PP
73fc0b53 17.BI "int memfd_create(const char *" name ", unsigned int " flags ");"
7a92eea0 18.fi
73fc0b53
DH
19.SH DESCRIPTION
20.BR memfd_create ()
f00ce3a0
MK
21creates an anonymous file and returns a file descriptor that refers to it.
22The file behaves like a regular file, and so can be modified,
afc5ca18 23truncated, memory-mapped, and so on.
f00ce3a0
MK
24However, unlike a regular file,
25it lives in RAM and has a volatile backing storage.
cb5b73cc
MK
26Once all references to the file are dropped, it is automatically released.
27Anonymous memory is used for all backing pages of the file.
f00ce3a0
MK
28Therefore, files created by
29.BR memfd_create ()
46832662
MK
30have the same semantics as other anonymous
31.\" David Herrmann:
32.\" memfd uses VM_NORESERVE so each page is accounted on first access.
33.\" This means, the overcommit-limits (see __vm_enough_memory()) and the
34.\" memory-cgroup limits (mem_cgroup_try_charge()) are applied. Note that
35.\" those are accounted on "current" and "current->mm", that is, the
36.\" process doing the first page access.
f00ce3a0 37memory allocations such as those allocated using
73fc0b53 38.BR mmap (2)
f00ce3a0 39with the
1ae6b2c7 40.B MAP_ANONYMOUS
f00ce3a0 41flag.
efeece04 42.PP
73fc0b53 43The initial size of the file is set to 0.
f00ce3a0
MK
44Following the call, the file size should be set using
45.BR ftruncate (2).
46832662
MK
46(Alternatively, the file may be populated by calls to
47.BR write (2)
48or similar.)
efeece04 49.PP
f00ce3a0 50The name supplied in
73fc0b53 51.I name
46832662 52is used as a filename and will be displayed
f00ce3a0 53as the target of the corresponding symbolic link in the directory
73fc0b53 54.IR /proc/self/fd/ .
f00ce3a0 55The displayed name is always prefixed with
1ae6b2c7 56.I memfd:
f00ce3a0 57and serves only for debugging purposes.
46832662 58Names do not affect the behavior of the file descriptor,
cb5b73cc 59and as such multiple files can have the same name without any side effects.
efeece04 60.PP
73fc0b53 61The following values may be bitwise ORed in
1ae6b2c7 62.I flags
553deb41 63to change the behavior of
73fc0b53
DH
64.BR memfd_create ():
65.TP
1ae6b2c7 66.B MFD_CLOEXEC
73fc0b53
DH
67Set the close-on-exec
68.RB ( FD_CLOEXEC )
69flag on the new file descriptor.
70See the description of the
71.B O_CLOEXEC
72flag in
73.BR open (2)
cb5b73cc 74for reasons why this may be useful.
73fc0b53 75.TP
1ae6b2c7 76.B MFD_ALLOW_SEALING
3a71dcd6 77Allow sealing operations on this file.
e8a0dfae 78See the discussion of the
73fc0b53
DH
79.B F_ADD_SEALS
80and
1ae6b2c7 81.B F_GET_SEALS
e8a0dfae
MK
82operations in
83.BR fcntl (2),
3a71dcd6 84and also NOTES, below.
cb5b73cc
MK
85The initial set of seals is empty.
86If this flag is not set, the initial set of seals will be
f00ce3a0
MK
87.BR F_SEAL_SEAL ,
88meaning that no other seals can be set on the file.
89.\" FIXME Why is the MFD_ALLOW_SEALING behavior not simply the default?
39b15554 90.\" Is it worth adding some text explaining this?
ce7fa2be
MK
91.TP
92.BR MFD_HUGETLB " (since Linux 4.14)"
d2cfa322 93.\" commit 749df87bd7bee5a79cef073f5d032ddb2b211de8
ce7fa2be 94The anonymous file will be created in the hugetlbfs filesystem using
d2cfa322
MK
95huge pages.
96See the Linux kernel source file
b49c2acb 97.I Documentation/admin\-guide/mm/hugetlbpage.rst
d2cfa322 98for more information about hugetlbfs.
659beec7
MAL
99.\" commit 47b9012ecdc747f6936395265e677d41e11a31ff
100Specifying both
ce7fa2be
MK
101.B MFD_HUGETLB
102and
103.B MFD_ALLOW_SEALING
88aa124a
MK
104in
105.I flags
659beec7 106is supported since Linux 4.16.
ce7fa2be
MK
107.TP
108.BR MFD_HUGE_2MB ", " MFD_HUGE_1GB ", " "..."
109Used in conjunction with
110.B MFD_HUGETLB
58988360 111to select alternative hugetlb page sizes (respectively, 2\ MB, 1\ GB, ...)
4332849f
MK
112on systems that support multiple hugetlb page sizes.
113Definitions for known
ce7fa2be 114huge page sizes are included in the header file
6971614d 115.I <linux/memfd.h>.
4332849f 116.IP
ce7fa2be
MK
117For details on encoding huge page sizes not included in the header file,
118see the discussion of the similarly named constants in
119.BR mmap (2).
73fc0b53 120.PP
f00ce3a0
MK
121Unused bits in
122.I flags
123must be 0.
efeece04 124.PP
73fc0b53
DH
125As its return value,
126.BR memfd_create ()
127returns a new file descriptor that can be used to refer to the file.
f00ce3a0
MK
128This file descriptor is opened for both reading and writing
129.RB ( O_RDWR )
130and
131.B O_LARGEFILE
d9cb0d7d 132is set for the file descriptor.
efeece04 133.PP
f00ce3a0
MK
134With respect to
135.BR fork (2)
136and
137.BR execve (2),
138the usual semantics apply for the file descriptor created by
139.BR memfd_create ().
140A copy of the file descriptor is inherited by the child produced by
141.BR fork (2)
142and refers to the same file.
143The file descriptor is preserved across
73fc0b53
DH
144.BR execve (2),
145unless the close-on-exec flag has been set.
146.SH RETURN VALUE
147On success,
148.BR memfd_create ()
149returns a new file descriptor.
150On error, \-1 is returned and
151.I errno
152is set to indicate the error.
153.SH ERRORS
154.TP
f00ce3a0
MK
155.B EFAULT
156The address in
1ae6b2c7 157.I name
f00ce3a0
MK
158points to invalid memory.
159.TP
73fc0b53 160.B EINVAL
20acd21a 161.I flags
0fd5731e
MK
162included unknown bits.
163.TP
164.B EINVAL
20acd21a
MK
165.I name
166was too long.
5ade353d
MK
167(The limit is
168.\" NAME_MAX - strlen("memfd:")
169249 bytes, excluding the terminating null byte.)
73fc0b53 170.TP
88aa124a
MK
171.B EINVAL
172Both
173.B MFD_HUGETLB
174and
175.B MFD_ALLOW_SEALING
176were specified in
177.IR flags .
178.TP
73fc0b53 179.B EMFILE
26c32fab 180The per-process limit on the number of open file descriptors has been reached.
73fc0b53
DH
181.TP
182.B ENFILE
cb5b73cc 183The system-wide limit on the total number of open files has been reached.
73fc0b53 184.TP
73fc0b53
DH
185.B ENOMEM
186There was insufficient memory to create a new anonymous file.
187.SH VERSIONS
f00ce3a0
MK
188The
189.BR memfd_create ()
3411d30b
MK
190system call first appeared in Linux 3.17;
191glibc support was added in version 2.27.
090fdddb
MK
192.TP
193.B EPERM
194The
195.B MFD_HUGETLB
196flag was specified, but the caller was not privileged (did not have the
197.B CAP_IPC_LOCK
198capability)
199and is not a member of the
200.I sysctl_hugetlb_shm_group
201group; see the description of
202.I /proc/sys/vm/sysctl_hugetlb_shm_group
203in
204.BR proc (5).
3113c7f3 205.SH STANDARDS
f00ce3a0 206The
73fc0b53 207.BR memfd_create ()
f00ce3a0 208system call is Linux-specific.
51fa3cbf
MK
209.SH NOTES
210.\" See also http://lwn.net/Articles/593918/
211.\" and http://lwn.net/Articles/594919/ and http://lwn.net/Articles/591108/
212The
213.BR memfd_create ()
214system call provides a simple alternative to manually mounting a
4e07c70f 215.BR tmpfs (5)
51fa3cbf
MK
216filesystem and creating and opening a file in that filesystem.
217The primary purpose of
218.BR memfd_create ()
219is to create files and associated file descriptors that are
220used with the file-sealing APIs provided by
221.BR fcntl (2).
efeece04 222.PP
46832662
MK
223The
224.BR memfd_create ()
225system call also has uses without file sealing
226(which is why file-sealing is disabled, unless explicitly requested with the
1ae6b2c7 227.B MFD_ALLOW_SEALING
46832662
MK
228flag).
229In particular, it can be used as an alternative to creating files in
1ae6b2c7 230.I tmp
46832662
MK
231or as an alternative to using the
232.BR open (2)
233.B O_TMPFILE
234in cases where there is no intention to actually link the
235resulting file into the filesystem.
51fa3cbf
MK
236.SS File sealing
237In the absence of file sealing,
238processes that communicate via shared memory must either trust each other,
239or take measures to deal with the possibility that an untrusted peer
db61d4b2 240may manipulate the shared memory region in problematic ways.
51fa3cbf
MK
241For example, an untrusted peer might modify the contents of the
242shared memory at any time, or shrink the shared memory region.
243The former possibility leaves the local process vulnerable to
244time-of-check-to-time-of-use race conditions
245(typically dealt with by copying data from
246the shared memory region before checking and using it).
247The latter possibility leaves the local process vulnerable to
1ae6b2c7 248.B SIGBUS
51fa3cbf
MK
249signals when an attempt is made to access a now-nonexistent
250location in the shared memory region.
251(Dealing with this possibility necessitates the use of a handler for the
1ae6b2c7 252.B SIGBUS
51fa3cbf 253signal.)
efeece04 254.PP
51fa3cbf
MK
255Dealing with untrusted peers imposes extra complexity on
256code that employs shared memory.
257Memory sealing enables that extra complexity to be eliminated,
258by allowing a process to operate secure in the knowledge that
259its peer can't modify the shared memory in an undesired fashion.
efeece04 260.PP
51fa3cbf 261An example of the usage of the sealing mechanism is as follows:
51fa3cbf
MK
262.IP 1. 3
263The first process creates a
4e07c70f 264.BR tmpfs (5)
771e13d4 265file using
51fa3cbf
MK
266.BR memfd_create ().
267The call yields a file descriptor used in subsequent steps.
268.IP 2.
269The first process
270sizes the file created in the previous step using
271.BR ftruncate (2),
272maps it using
273.BR mmap (2),
274and populates the shared memory with the desired data.
275.IP 3.
276The first process uses the
277.BR fcntl (2)
278.B F_ADD_SEALS
279operation to place one or more seals on the file,
280in order to restrict further modifications on the file.
281(If placing the seal
282.BR F_SEAL_WRITE ,
283then it will be necessary to first unmap the shared writable mapping
fc6a14f5
MK
284created in the previous step.
285Otherwise, behavior similar to
1ae6b2c7 286.B F_SEAL_WRITE
e15b10ba
MK
287can be achieved by using
288.BR F_SEAL_FUTURE_WRITE ,
98eff9f7
JFG
289which will prevent future writes via
290.BR mmap (2)
291and
292.BR write (2)
e15b10ba 293from succeeding while keeping existing shared writable mappings).
51fa3cbf
MK
294.IP 4.
295A second process obtains a file descriptor for the
4e07c70f 296.BR tmpfs (5)
51fa3cbf 297file and maps it.
46832662 298Among the possible ways in which this could happen are the following:
51fa3cbf
MK
299.RS
300.IP * 3
46832662
MK
301The process that called
302.BR memfd_create ()
303could transfer the resulting file descriptor to the second process
304via a UNIX domain socket (see
305.BR unix (7)
306and
307.BR cmsg (3)).
308The second process then maps the file using
309.BR mmap (2).
310.IP *
51fa3cbf
MK
311The second process is created via
312.BR fork (2)
313and thus automatically inherits the file descriptor and mapping.
46832662
MK
314(Note that in this case and the next,
315there is a natural trust relationship between the two processes,
316since they are running under the same user ID.
317Therefore, file sealing would not normally be necessary.)
51fa3cbf 318.IP *
771e13d4 319The second process opens the file
9aae8d48 320.IR /proc/<pid>/fd/<fd> ,
51fa3cbf
MK
321where
322.I <pid>
323is the PID of the first process (the one that called
324.BR memfd_create ()),
325and
326.I <fd>
327is the number of the file descriptor returned by the call to
328.BR memfd_create ()
329in that process.
330The second process then maps the file using
331.BR mmap (2).
332.RE
333.IP 5.
334The second process uses the
335.BR fcntl (2)
336.B F_GET_SEALS
4f32648e
MK
337operation to retrieve the bit mask of seals
338that has been applied to the file.
339This bit mask can be inspected in order to determine
340what kinds of restrictions have been placed on file modifications.
51fa3cbf
MK
341If desired, the second process can apply further seals
342to impose additional restrictions (so long as the
1ae6b2c7 343.B F_SEAL_SEAL
51fa3cbf 344seal has not yet been applied).
a14af333 345.SH EXAMPLES
878cc348
MK
346Below are shown two example programs that demonstrate the use of
347.BR memfd_create ()
348and the file sealing API.
efeece04 349.PP
878cc348
MK
350The first program,
351.IR t_memfd_create.c ,
352creates a
4e07c70f 353.BR tmpfs (5)
878cc348
MK
354file using
355.BR memfd_create (),
356sets a size for the file, maps it into memory,
357and optionally places some seals on the file.
358The program accepts up to three command-line arguments,
359of which the first two are required.
360The first argument is the name to associate with the file,
361the second argument is the size to be set for the file,
e57f8d34 362and the optional third argument is a string of characters that specify
878cc348 363seals to be set on file.
efeece04 364.PP
878cc348
MK
365The second program,
366.IR t_get_seals.c ,
367can be used to open an existing file that was created via
368.BR memfd_create ()
369and inspect the set of seals that have been applied to that file.
efeece04 370.PP
878cc348
MK
371The following shell session demonstrates the use of these programs.
372First we create a
4e07c70f 373.BR tmpfs (5)
878cc348 374file and set some seals on it:
efeece04 375.PP
878cc348 376.in +4n
b8302363 377.EX
878cc348
MK
378$ \fB./t_memfd_create my_memfd_file 4096 sw &\fP
379[1] 11775
380PID: 11775; fd: 3; /proc/11775/fd/3
b8302363 381.EE
878cc348 382.in
efeece04 383.PP
878cc348
MK
384At this point, the
385.I t_memfd_create
386program continues to run in the background.
387From another program, we can obtain a file descriptor for the
46832662
MK
388file created by
389.BR memfd_create ()
390by opening the
1ae6b2c7 391.IR /proc/ pid /fd
d9cb0d7d 392file that corresponds to the file descriptor opened by
878cc348
MK
393.BR memfd_create ().
394Using that pathname, we inspect the content of the
1ae6b2c7 395.IR /proc/ pid /fd
878cc348
MK
396symbolic link, and use our
397.I t_get_seals
398program to view the seals that have been placed on the file:
efeece04 399.PP
878cc348 400.in +4n
b8302363 401.EX
878cc348
MK
402$ \fBreadlink /proc/11775/fd/3\fP
403/memfd:my_memfd_file (deleted)
404$ \fB./t_get_seals /proc/11775/fd/3\fP
405Existing seals: WRITE SHRINK
b8302363 406.EE
878cc348
MK
407.in
408.SS Program source: t_memfd_create.c
409\&
33857069 410.\" SRC BEGIN (t_memfd_create.c)
408731d4 411.EX
d6d367c7 412#define _GNU_SOURCE
878cc348 413#include <fcntl.h>
4ae706b0
AC
414#include <stdint.h>
415#include <stdio.h>
878cc348 416#include <stdlib.h>
878cc348 417#include <string.h>
4ae706b0
AC
418#include <sys/mman.h>
419#include <unistd.h>
878cc348 420
d1a71985 421#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \e
878cc348
MK
422 } while (0)
423
424int
425main(int argc, char *argv[])
426{
427 int fd;
428 unsigned int seals;
878cc348
MK
429 char *name, *seals_arg;
430 ssize_t len;
431
432 if (argc < 3) {
d1a71985
MK
433 fprintf(stderr, "%s name size [seals]\en", argv[0]);
434 fprintf(stderr, "\et\(aqseals\(aq can contain any of the "
435 "following characters:\en");
436 fprintf(stderr, "\et\etg \- F_SEAL_GROW\en");
437 fprintf(stderr, "\et\ets \- F_SEAL_SHRINK\en");
438 fprintf(stderr, "\et\etw \- F_SEAL_WRITE\en");
98eff9f7 439 fprintf(stderr, "\et\etW \- F_SEAL_FUTURE_WRITE\en");
d1a71985 440 fprintf(stderr, "\et\etS \- F_SEAL_SEAL\en");
878cc348
MK
441 exit(EXIT_FAILURE);
442 }
443
444 name = argv[1];
445 len = atoi(argv[2]);
446 seals_arg = argv[3];
447
448 /* Create an anonymous file in tmpfs; allow seals to be
46b20ca1 449 placed on the file. */
878cc348
MK
450
451 fd = memfd_create(name, MFD_ALLOW_SEALING);
452 if (fd == \-1)
453 errExit("memfd_create");
454
46b20ca1 455 /* Size the file as specified on the command line. */
878cc348
MK
456
457 if (ftruncate(fd, len) == \-1)
458 errExit("truncate");
459
8eb90116 460 printf("PID: %jd; fd: %d; /proc/%jd/fd/%d\en",
4687ab0e 461 (intmax_t) getpid(), fd, (intmax_t) getpid(), fd);
878cc348
MK
462
463 /* Code to map the file and populate the mapping with data
46b20ca1 464 omitted. */
878cc348
MK
465
466 /* If a \(aqseals\(aq command\-line argument was supplied, set some
46b20ca1 467 seals on the file. */
878cc348
MK
468
469 if (seals_arg != NULL) {
470 seals = 0;
471
472 if (strchr(seals_arg, \(aqg\(aq) != NULL)
473 seals |= F_SEAL_GROW;
474 if (strchr(seals_arg, \(aqs\(aq) != NULL)
475 seals |= F_SEAL_SHRINK;
476 if (strchr(seals_arg, \(aqw\(aq) != NULL)
477 seals |= F_SEAL_WRITE;
98eff9f7
JFG
478 if (strchr(seals_arg, \(aqW\(aq) != NULL)
479 seals |= F_SEAL_FUTURE_WRITE;
878cc348
MK
480 if (strchr(seals_arg, \(aqS\(aq) != NULL)
481 seals |= F_SEAL_SEAL;
482
483 if (fcntl(fd, F_ADD_SEALS, seals) == \-1)
484 errExit("fcntl");
485 }
486
487 /* Keep running, so that the file created by memfd_create()
46b20ca1 488 continues to exist. */
878cc348
MK
489
490 pause();
491
492 exit(EXIT_SUCCESS);
493}
408731d4 494.EE
33857069 495.\" SRC END
878cc348
MK
496.SS Program source: t_get_seals.c
497\&
33857069 498.\" SRC BEGIN (t_get_seals.c)
408731d4 499.EX
d6d367c7 500#define _GNU_SOURCE
878cc348 501#include <fcntl.h>
878cc348 502#include <stdio.h>
47b94bbd 503#include <stdlib.h>
878cc348 504
d1a71985 505#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \e
878cc348
MK
506 } while (0)
507
508int
509main(int argc, char *argv[])
510{
511 int fd;
512 unsigned int seals;
513
514 if (argc != 2) {
d1a71985 515 fprintf(stderr, "%s /proc/PID/fd/FD\en", argv[0]);
878cc348
MK
516 exit(EXIT_FAILURE);
517 }
518
519 fd = open(argv[1], O_RDWR);
520 if (fd == \-1)
521 errExit("open");
522
523 seals = fcntl(fd, F_GET_SEALS);
524 if (seals == \-1)
525 errExit("fcntl");
526
527 printf("Existing seals:");
528 if (seals & F_SEAL_SEAL)
529 printf(" SEAL");
530 if (seals & F_SEAL_GROW)
531 printf(" GROW");
532 if (seals & F_SEAL_WRITE)
533 printf(" WRITE");
98eff9f7
JFG
534 if (seals & F_SEAL_FUTURE_WRITE)
535 printf(" FUTURE_WRITE");
878cc348
MK
536 if (seals & F_SEAL_SHRINK)
537 printf(" SHRINK");
d1a71985 538 printf("\en");
878cc348
MK
539
540 /* Code to map the file and access the contents of the
46b20ca1 541 resulting mapping omitted. */
878cc348
MK
542
543 exit(EXIT_SUCCESS);
544}
408731d4 545.EE
33857069 546.\" SRC END
73fc0b53 547.SH SEE ALSO
73fc0b53 548.BR fcntl (2),
3a71dcd6 549.BR ftruncate (2),
e817f70a 550.BR memfd_secret (2),
3a71dcd6 551.BR mmap (2),
46832662
MK
552.BR shmget (2),
553.BR shm_open (3)