]> git.ipfire.org Git - thirdparty/man-pages.git/blame - man2/memfd_create.2
accept.2, dup.2, eventfd.2, execve.2, fcntl.2, memfd_create.2, open.2, perf_event_ope...
[thirdparty/man-pages.git] / man2 / memfd_create.2
CommitLineData
878cc348
MK
1.\" Copyright (C) 2014 Michael Kerrisk <mtk.manpages@gmail.com>
2.\" and Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com>
73fc0b53 3.\"
46832662 4.\" %%%LICENSE_START(GPLv2+)
771e13d4 5.\"
73fc0b53
DH
6.\" This program is free software; you can redistribute it and/or modify
7.\" it under the terms of the GNU General Public License as published by
8.\" the Free Software Foundation; either version 2 of the License, or
9.\" (at your option) any later version.
10.\"
11.\" This program is distributed in the hope that it will be useful,
12.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
13.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14.\" GNU General Public License for more details.
15.\"
16.\" You should have received a copy of the GNU General Public
17.\" License along with this manual; if not, see
18.\" <http://www.gnu.org/licenses/>.
19.\" %%%LICENSE_END
20.\"
8392a3b3 21.TH MEMFD_CREATE 2 2015-01-22 Linux "Linux Programmer's Manual"
73fc0b53
DH
22.SH NAME
23memfd_create \- create an anonymous file
24.SH SYNOPSIS
25.B #include <sys/memfd.h>
26.sp
27.BI "int memfd_create(const char *" name ", unsigned int " flags ");"
28.SH DESCRIPTION
29.BR memfd_create ()
f00ce3a0
MK
30creates an anonymous file and returns a file descriptor that refers to it.
31The file behaves like a regular file, and so can be modified,
afc5ca18 32truncated, memory-mapped, and so on.
f00ce3a0
MK
33However, unlike a regular file,
34it lives in RAM and has a volatile backing storage.
cb5b73cc
MK
35Once all references to the file are dropped, it is automatically released.
36Anonymous memory is used for all backing pages of the file.
f00ce3a0
MK
37Therefore, files created by
38.BR memfd_create ()
46832662
MK
39have the same semantics as other anonymous
40.\" David Herrmann:
41.\" memfd uses VM_NORESERVE so each page is accounted on first access.
42.\" This means, the overcommit-limits (see __vm_enough_memory()) and the
43.\" memory-cgroup limits (mem_cgroup_try_charge()) are applied. Note that
44.\" those are accounted on "current" and "current->mm", that is, the
45.\" process doing the first page access.
f00ce3a0 46memory allocations such as those allocated using
73fc0b53 47.BR mmap (2)
f00ce3a0
MK
48with the
49.BR MAP_ANONYMOUS
50flag.
73fc0b53
DH
51
52The initial size of the file is set to 0.
f00ce3a0
MK
53Following the call, the file size should be set using
54.BR ftruncate (2).
46832662
MK
55(Alternatively, the file may be populated by calls to
56.BR write (2)
57or similar.)
f00ce3a0
MK
58
59The name supplied in
73fc0b53 60.I name
46832662 61is used as a filename and will be displayed
f00ce3a0 62as the target of the corresponding symbolic link in the directory
73fc0b53 63.IR /proc/self/fd/ .
f00ce3a0
MK
64The displayed name is always prefixed with
65.IR memfd:
66and serves only for debugging purposes.
46832662 67Names do not affect the behavior of the file descriptor,
cb5b73cc 68and as such multiple files can have the same name without any side effects.
73fc0b53
DH
69
70The following values may be bitwise ORed in
71.IR flags
553deb41 72to change the behavior of
73fc0b53
DH
73.BR memfd_create ():
74.TP
75.BR MFD_CLOEXEC
76Set the close-on-exec
77.RB ( FD_CLOEXEC )
78flag on the new file descriptor.
79See the description of the
80.B O_CLOEXEC
81flag in
82.BR open (2)
cb5b73cc 83for reasons why this may be useful.
73fc0b53
DH
84.TP
85.BR MFD_ALLOW_SEALING
3a71dcd6 86Allow sealing operations on this file.
e8a0dfae 87See the discussion of the
73fc0b53
DH
88.B F_ADD_SEALS
89and
e8a0dfae
MK
90.BR F_GET_SEALS
91operations in
92.BR fcntl (2),
3a71dcd6 93and also NOTES, below.
cb5b73cc
MK
94The initial set of seals is empty.
95If this flag is not set, the initial set of seals will be
f00ce3a0
MK
96.BR F_SEAL_SEAL ,
97meaning that no other seals can be set on the file.
98.\" FIXME Why is the MFD_ALLOW_SEALING behavior not simply the default?
99.\" Is it worth adding some text explaining this?
73fc0b53 100.PP
f00ce3a0
MK
101Unused bits in
102.I flags
103must be 0.
73fc0b53
DH
104
105As its return value,
106.BR memfd_create ()
107returns a new file descriptor that can be used to refer to the file.
f00ce3a0
MK
108This file descriptor is opened for both reading and writing
109.RB ( O_RDWR )
110and
111.B O_LARGEFILE
112is set for the descriptor.
113
114With respect to
115.BR fork (2)
116and
117.BR execve (2),
118the usual semantics apply for the file descriptor created by
119.BR memfd_create ().
120A copy of the file descriptor is inherited by the child produced by
121.BR fork (2)
122and refers to the same file.
123The file descriptor is preserved across
73fc0b53
DH
124.BR execve (2),
125unless the close-on-exec flag has been set.
126.SH RETURN VALUE
127On success,
128.BR memfd_create ()
129returns a new file descriptor.
130On error, \-1 is returned and
131.I errno
132is set to indicate the error.
133.SH ERRORS
134.TP
f00ce3a0
MK
135.B EFAULT
136The address in
137.IR name
138points to invalid memory.
139.TP
73fc0b53 140.B EINVAL
20acd21a
MK
141An unsupported value was specified in one of the arguments:
142.I flags
143included unknown bits, or
144.I name
145was too long.
73fc0b53
DH
146.TP
147.B EMFILE
26c32fab 148The per-process limit on the number of open file descriptors has been reached.
73fc0b53
DH
149.TP
150.B ENFILE
cb5b73cc 151The system-wide limit on the total number of open files has been reached.
73fc0b53 152.TP
73fc0b53
DH
153.B ENOMEM
154There was insufficient memory to create a new anonymous file.
155.SH VERSIONS
f00ce3a0
MK
156The
157.BR memfd_create ()
158system call first appeared in Linux 3.17.
8b987bc3
MK
159.\" FIXME . When glibc support appears, update the following sentence:
160Support in the GNU C library is pending.
73fc0b53 161.SH CONFORMING TO
f00ce3a0 162The
73fc0b53 163.BR memfd_create ()
f00ce3a0 164system call is Linux-specific.
51fa3cbf
MK
165.SH NOTES
166.\" See also http://lwn.net/Articles/593918/
167.\" and http://lwn.net/Articles/594919/ and http://lwn.net/Articles/591108/
168The
169.BR memfd_create ()
170system call provides a simple alternative to manually mounting a
171.I tmpfs
172filesystem and creating and opening a file in that filesystem.
173The primary purpose of
174.BR memfd_create ()
175is to create files and associated file descriptors that are
176used with the file-sealing APIs provided by
177.BR fcntl (2).
46832662
MK
178
179The
180.BR memfd_create ()
181system call also has uses without file sealing
182(which is why file-sealing is disabled, unless explicitly requested with the
183.BR MFD_ALLOW_SEALING
184flag).
185In particular, it can be used as an alternative to creating files in
186.IR tmp
187or as an alternative to using the
188.BR open (2)
189.B O_TMPFILE
190in cases where there is no intention to actually link the
191resulting file into the filesystem.
51fa3cbf
MK
192.SS File sealing
193In the absence of file sealing,
194processes that communicate via shared memory must either trust each other,
195or take measures to deal with the possibility that an untrusted peer
db61d4b2 196may manipulate the shared memory region in problematic ways.
51fa3cbf
MK
197For example, an untrusted peer might modify the contents of the
198shared memory at any time, or shrink the shared memory region.
199The former possibility leaves the local process vulnerable to
200time-of-check-to-time-of-use race conditions
201(typically dealt with by copying data from
202the shared memory region before checking and using it).
203The latter possibility leaves the local process vulnerable to
204.BR SIGBUS
205signals when an attempt is made to access a now-nonexistent
206location in the shared memory region.
207(Dealing with this possibility necessitates the use of a handler for the
208.BR SIGBUS
209signal.)
210
211Dealing with untrusted peers imposes extra complexity on
212code that employs shared memory.
213Memory sealing enables that extra complexity to be eliminated,
214by allowing a process to operate secure in the knowledge that
215its peer can't modify the shared memory in an undesired fashion.
216
217An example of the usage of the sealing mechanism is as follows:
218
219.IP 1. 3
220The first process creates a
221.I tmpfs
771e13d4 222file using
51fa3cbf
MK
223.BR memfd_create ().
224The call yields a file descriptor used in subsequent steps.
225.IP 2.
226The first process
227sizes the file created in the previous step using
228.BR ftruncate (2),
229maps it using
230.BR mmap (2),
231and populates the shared memory with the desired data.
232.IP 3.
233The first process uses the
234.BR fcntl (2)
235.B F_ADD_SEALS
236operation to place one or more seals on the file,
237in order to restrict further modifications on the file.
238(If placing the seal
239.BR F_SEAL_WRITE ,
240then it will be necessary to first unmap the shared writable mapping
241created in the previous step.)
242.IP 4.
243A second process obtains a file descriptor for the
244.I tmpfs
245file and maps it.
46832662 246Among the possible ways in which this could happen are the following:
51fa3cbf
MK
247.RS
248.IP * 3
46832662
MK
249The process that called
250.BR memfd_create ()
251could transfer the resulting file descriptor to the second process
252via a UNIX domain socket (see
253.BR unix (7)
254and
255.BR cmsg (3)).
256The second process then maps the file using
257.BR mmap (2).
258.IP *
51fa3cbf
MK
259The second process is created via
260.BR fork (2)
261and thus automatically inherits the file descriptor and mapping.
46832662
MK
262(Note that in this case and the next,
263there is a natural trust relationship between the two processes,
264since they are running under the same user ID.
265Therefore, file sealing would not normally be necessary.)
51fa3cbf 266.IP *
771e13d4 267The second process opens the file
51fa3cbf
MK
268.IR /proc/<pd>/fd/<fd> ,
269where
270.I <pid>
271is the PID of the first process (the one that called
272.BR memfd_create ()),
273and
274.I <fd>
275is the number of the file descriptor returned by the call to
276.BR memfd_create ()
277in that process.
278The second process then maps the file using
279.BR mmap (2).
280.RE
281.IP 5.
282The second process uses the
283.BR fcntl (2)
284.B F_GET_SEALS
4f32648e
MK
285operation to retrieve the bit mask of seals
286that has been applied to the file.
287This bit mask can be inspected in order to determine
288what kinds of restrictions have been placed on file modifications.
51fa3cbf
MK
289If desired, the second process can apply further seals
290to impose additional restrictions (so long as the
291.BR F_SEAL_SEAL
292seal has not yet been applied).
878cc348
MK
293.SH EXAMPLE
294Below are shown two example programs that demonstrate the use of
295.BR memfd_create ()
296and the file sealing API.
297
298The first program,
299.IR t_memfd_create.c ,
300creates a
301.I tmpfs
302file using
303.BR memfd_create (),
304sets a size for the file, maps it into memory,
305and optionally places some seals on the file.
306The program accepts up to three command-line arguments,
307of which the first two are required.
308The first argument is the name to associate with the file,
309the second argument is the size to be set for the file,
310and the optional third is a string of characters that specify
311seals to be set on file.
312
313The second program,
314.IR t_get_seals.c ,
315can be used to open an existing file that was created via
316.BR memfd_create ()
317and inspect the set of seals that have been applied to that file.
318
319The following shell session demonstrates the use of these programs.
320First we create a
321.I tmpfs
322file and set some seals on it:
323
324.in +4n
325.nf
326$ \fB./t_memfd_create my_memfd_file 4096 sw &\fP
327[1] 11775
328PID: 11775; fd: 3; /proc/11775/fd/3
329.fi
330.in
331
332At this point, the
333.I t_memfd_create
334program continues to run in the background.
335From another program, we can obtain a file descriptor for the
46832662
MK
336file created by
337.BR memfd_create ()
338by opening the
878cc348
MK
339.IR /proc/PID/fd
340file that corresponds to the descriptor opened by
341.BR memfd_create ().
342Using that pathname, we inspect the content of the
343.IR /proc/PID/fd
344symbolic link, and use our
345.I t_get_seals
346program to view the seals that have been placed on the file:
347
348.in +4n
349.nf
350$ \fBreadlink /proc/11775/fd/3\fP
351/memfd:my_memfd_file (deleted)
352$ \fB./t_get_seals /proc/11775/fd/3\fP
353Existing seals: WRITE SHRINK
354.fi
355.in
356.SS Program source: t_memfd_create.c
357\&
358.nf
359#include <sys/memfd.h>
360#include <fcntl.h>
361#include <stdlib.h>
362#include <unistd.h>
363#include <string.h>
364#include <stdio.h>
365
366#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
367 } while (0)
368
369int
370main(int argc, char *argv[])
371{
372 int fd;
373 unsigned int seals;
374 char *addr;
375 char *name, *seals_arg;
376 ssize_t len;
377
378 if (argc < 3) {
379 fprintf(stderr, "%s name size [seals]\\n", argv[0]);
380 fprintf(stderr, "\\t\(aqseals\(aq can contain any of the "
381 "following characters:\\n");
382 fprintf(stderr, "\\t\\tg \- F_SEAL_GROW\\n");
383 fprintf(stderr, "\\t\\ts \- F_SEAL_SHRINK\\n");
384 fprintf(stderr, "\\t\\tw \- F_SEAL_WRITE\\n");
385 fprintf(stderr, "\\t\\tS \- F_SEAL_SEAL\\n");
386 exit(EXIT_FAILURE);
387 }
388
389 name = argv[1];
390 len = atoi(argv[2]);
391 seals_arg = argv[3];
392
393 /* Create an anonymous file in tmpfs; allow seals to be
394 placed on the file */
395
396 fd = memfd_create(name, MFD_ALLOW_SEALING);
397 if (fd == \-1)
398 errExit("memfd_create");
399
400 /* Size the file as specified on the command line */
401
402 if (ftruncate(fd, len) == \-1)
403 errExit("truncate");
404
405 printf("PID: %ld; fd: %d; /proc/%ld/fd/%d\\n",
406 (long) getpid(), fd, (long) getpid(), fd);
407
408 /* Code to map the file and populate the mapping with data
409 omitted */
410
411 /* If a \(aqseals\(aq command\-line argument was supplied, set some
412 seals on the file */
413
414 if (seals_arg != NULL) {
415 seals = 0;
416
417 if (strchr(seals_arg, \(aqg\(aq) != NULL)
418 seals |= F_SEAL_GROW;
419 if (strchr(seals_arg, \(aqs\(aq) != NULL)
420 seals |= F_SEAL_SHRINK;
421 if (strchr(seals_arg, \(aqw\(aq) != NULL)
422 seals |= F_SEAL_WRITE;
423 if (strchr(seals_arg, \(aqS\(aq) != NULL)
424 seals |= F_SEAL_SEAL;
425
426 if (fcntl(fd, F_ADD_SEALS, seals) == \-1)
427 errExit("fcntl");
428 }
429
430 /* Keep running, so that the file created by memfd_create()
431 continues to exist */
432
433 pause();
434
435 exit(EXIT_SUCCESS);
436}
437.fi
438.SS Program source: t_get_seals.c
439\&
440.nf
441#include <sys/memfd.h>
442#include <fcntl.h>
443#include <unistd.h>
444#include <stdlib.h>
445#include <string.h>
446#include <stdio.h>
447
448#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
449 } while (0)
450
451int
452main(int argc, char *argv[])
453{
454 int fd;
455 unsigned int seals;
456
457 if (argc != 2) {
458 fprintf(stderr, "%s /proc/PID/fd/FD\\n", argv[0]);
459 exit(EXIT_FAILURE);
460 }
461
462 fd = open(argv[1], O_RDWR);
463 if (fd == \-1)
464 errExit("open");
465
466 seals = fcntl(fd, F_GET_SEALS);
467 if (seals == \-1)
468 errExit("fcntl");
469
470 printf("Existing seals:");
471 if (seals & F_SEAL_SEAL)
472 printf(" SEAL");
473 if (seals & F_SEAL_GROW)
474 printf(" GROW");
475 if (seals & F_SEAL_WRITE)
476 printf(" WRITE");
477 if (seals & F_SEAL_SHRINK)
478 printf(" SHRINK");
479 printf("\\n");
480
481 /* Code to map the file and access the contents of the
482 resulting mapping omitted */
483
484 exit(EXIT_SUCCESS);
485}
486.fi
73fc0b53 487.SH SEE ALSO
73fc0b53 488.BR fcntl (2),
3a71dcd6
MK
489.BR ftruncate (2),
490.BR mmap (2),
46832662
MK
491.BR shmget (2),
492.BR shm_open (3)