]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/memfd_create.2
memfd_create.2: Fix header for memfd_create()
[thirdparty/man-pages.git] / man2 / memfd_create.2
1 .\" Copyright (C) 2014 Michael Kerrisk <mtk.manpages@gmail.com>
2 .\" and Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com>
3 .\"
4 .\" %%%LICENSE_START(GPLv2+)
5 .\"
6 .\" This program is free software; you can redistribute it and/or modify
7 .\" it under the terms of the GNU General Public License as published by
8 .\" the Free Software Foundation; either version 2 of the License, or
9 .\" (at your option) any later version.
10 .\"
11 .\" This program is distributed in the hope that it will be useful,
12 .\" but WITHOUT ANY WARRANTY; without even the implied warranty of
13 .\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 .\" GNU General Public License for more details.
15 .\"
16 .\" You should have received a copy of the GNU General Public
17 .\" License along with this manual; if not, see
18 .\" <http://www.gnu.org/licenses/>.
19 .\" %%%LICENSE_END
20 .\"
21 .TH MEMFD_CREATE 2 2018-02-02 Linux "Linux Programmer's Manual"
22 .SH NAME
23 memfd_create \- create an anonymous file
24 .SH SYNOPSIS
25 .B #include <sys/mman.h>
26 .PP
27 .BI "int memfd_create(const char *" name ", unsigned int " flags ");"
28 .SH DESCRIPTION
29 .BR memfd_create ()
30 creates an anonymous file and returns a file descriptor that refers to it.
31 The file behaves like a regular file, and so can be modified,
32 truncated, memory-mapped, and so on.
33 However, unlike a regular file,
34 it lives in RAM and has a volatile backing storage.
35 Once all references to the file are dropped, it is automatically released.
36 Anonymous memory is used for all backing pages of the file.
37 Therefore, files created by
38 .BR memfd_create ()
39 have the same semantics as other anonymous
40 .\" David Herrmann:
41 .\" memfd uses VM_NORESERVE so each page is accounted on first access.
42 .\" This means, the overcommit-limits (see __vm_enough_memory()) and the
43 .\" memory-cgroup limits (mem_cgroup_try_charge()) are applied. Note that
44 .\" those are accounted on "current" and "current->mm", that is, the
45 .\" process doing the first page access.
46 memory allocations such as those allocated using
47 .BR mmap (2)
48 with the
49 .BR MAP_ANONYMOUS
50 flag.
51 .PP
52 The initial size of the file is set to 0.
53 Following the call, the file size should be set using
54 .BR ftruncate (2).
55 (Alternatively, the file may be populated by calls to
56 .BR write (2)
57 or similar.)
58 .PP
59 The name supplied in
60 .I name
61 is used as a filename and will be displayed
62 as the target of the corresponding symbolic link in the directory
63 .IR /proc/self/fd/ .
64 The displayed name is always prefixed with
65 .IR memfd:
66 and serves only for debugging purposes.
67 Names do not affect the behavior of the file descriptor,
68 and as such multiple files can have the same name without any side effects.
69 .PP
70 The following values may be bitwise ORed in
71 .IR flags
72 to change the behavior of
73 .BR memfd_create ():
74 .TP
75 .BR MFD_CLOEXEC
76 Set the close-on-exec
77 .RB ( FD_CLOEXEC )
78 flag on the new file descriptor.
79 See the description of the
80 .B O_CLOEXEC
81 flag in
82 .BR open (2)
83 for reasons why this may be useful.
84 .TP
85 .BR MFD_ALLOW_SEALING
86 Allow sealing operations on this file.
87 See the discussion of the
88 .B F_ADD_SEALS
89 and
90 .BR F_GET_SEALS
91 operations in
92 .BR fcntl (2),
93 and also NOTES, below.
94 The initial set of seals is empty.
95 If this flag is not set, the initial set of seals will be
96 .BR F_SEAL_SEAL ,
97 meaning that no other seals can be set on the file.
98 .\" FIXME Why is the MFD_ALLOW_SEALING behavior not simply the default?
99 .\" Is it worth adding some text explaining this?
100 .TP
101 .BR MFD_HUGETLB " (since Linux 4.14)"
102 .\" commit 749df87bd7bee5a79cef073f5d032ddb2b211de8
103 The anonymous file will be created in the hugetlbfs filesystem using
104 huge pages.
105 See the Linux kernel source file
106 .I Documentation/vm/hugetlbpage.txt
107 for more information about hugetlbfs.
108 The hugetlbfs filesystem does not support file-sealing operations.
109 Therefore, specifying both
110 .B MFD_HUGETLB
111 and
112 .B MFD_ALLOW_SEALING
113 in
114 .I flags
115 is disallowed.
116 .TP
117 .BR MFD_HUGE_2MB ", " MFD_HUGE_1GB ", " "..."
118 Used in conjunction with
119 .B MFD_HUGETLB
120 to select alternative hugetlb page sizes (respectively, 2\ MB, 1\ GB, ...)
121 on systems that support multiple hugetlb page sizes.
122 Definitions for known
123 huge page sizes are included in the header file
124 .I <linux/memfd.h>.
125 .IP
126 For details on encoding huge page sizes not included in the header file,
127 see the discussion of the similarly named constants in
128 .BR mmap (2).
129 .PP
130 Unused bits in
131 .I flags
132 must be 0.
133 .PP
134 As its return value,
135 .BR memfd_create ()
136 returns a new file descriptor that can be used to refer to the file.
137 This file descriptor is opened for both reading and writing
138 .RB ( O_RDWR )
139 and
140 .B O_LARGEFILE
141 is set for the file descriptor.
142 .PP
143 With respect to
144 .BR fork (2)
145 and
146 .BR execve (2),
147 the usual semantics apply for the file descriptor created by
148 .BR memfd_create ().
149 A copy of the file descriptor is inherited by the child produced by
150 .BR fork (2)
151 and refers to the same file.
152 The file descriptor is preserved across
153 .BR execve (2),
154 unless the close-on-exec flag has been set.
155 .SH RETURN VALUE
156 On success,
157 .BR memfd_create ()
158 returns a new file descriptor.
159 On error, \-1 is returned and
160 .I errno
161 is set to indicate the error.
162 .SH ERRORS
163 .TP
164 .B EFAULT
165 The address in
166 .IR name
167 points to invalid memory.
168 .TP
169 .B EINVAL
170 .I flags
171 included unknown bits.
172 .TP
173 .B EINVAL
174 .I name
175 was too long.
176 (The limit is
177 .\" NAME_MAX - strlen("memfd:")
178 249 bytes, excluding the terminating null byte.)
179 .TP
180 .B EINVAL
181 Both
182 .B MFD_HUGETLB
183 and
184 .B MFD_ALLOW_SEALING
185 were specified in
186 .IR flags .
187 .TP
188 .B EMFILE
189 The per-process limit on the number of open file descriptors has been reached.
190 .TP
191 .B ENFILE
192 The system-wide limit on the total number of open files has been reached.
193 .TP
194 .B ENOMEM
195 There was insufficient memory to create a new anonymous file.
196 .SH VERSIONS
197 The
198 .BR memfd_create ()
199 system call first appeared in Linux 3.17;
200 glibc support was added in version 2.27.
201 .SH CONFORMING TO
202 The
203 .BR memfd_create ()
204 system call is Linux-specific.
205 .SH NOTES
206 .PP
207 .\" See also http://lwn.net/Articles/593918/
208 .\" and http://lwn.net/Articles/594919/ and http://lwn.net/Articles/591108/
209 The
210 .BR memfd_create ()
211 system call provides a simple alternative to manually mounting a
212 .BR tmpfs (5)
213 filesystem and creating and opening a file in that filesystem.
214 The primary purpose of
215 .BR memfd_create ()
216 is to create files and associated file descriptors that are
217 used with the file-sealing APIs provided by
218 .BR fcntl (2).
219 .PP
220 The
221 .BR memfd_create ()
222 system call also has uses without file sealing
223 (which is why file-sealing is disabled, unless explicitly requested with the
224 .BR MFD_ALLOW_SEALING
225 flag).
226 In particular, it can be used as an alternative to creating files in
227 .IR tmp
228 or as an alternative to using the
229 .BR open (2)
230 .B O_TMPFILE
231 in cases where there is no intention to actually link the
232 resulting file into the filesystem.
233 .SS File sealing
234 In the absence of file sealing,
235 processes that communicate via shared memory must either trust each other,
236 or take measures to deal with the possibility that an untrusted peer
237 may manipulate the shared memory region in problematic ways.
238 For example, an untrusted peer might modify the contents of the
239 shared memory at any time, or shrink the shared memory region.
240 The former possibility leaves the local process vulnerable to
241 time-of-check-to-time-of-use race conditions
242 (typically dealt with by copying data from
243 the shared memory region before checking and using it).
244 The latter possibility leaves the local process vulnerable to
245 .BR SIGBUS
246 signals when an attempt is made to access a now-nonexistent
247 location in the shared memory region.
248 (Dealing with this possibility necessitates the use of a handler for the
249 .BR SIGBUS
250 signal.)
251 .PP
252 Dealing with untrusted peers imposes extra complexity on
253 code that employs shared memory.
254 Memory sealing enables that extra complexity to be eliminated,
255 by allowing a process to operate secure in the knowledge that
256 its peer can't modify the shared memory in an undesired fashion.
257 .PP
258 An example of the usage of the sealing mechanism is as follows:
259 .IP 1. 3
260 The first process creates a
261 .BR tmpfs (5)
262 file using
263 .BR memfd_create ().
264 The call yields a file descriptor used in subsequent steps.
265 .IP 2.
266 The first process
267 sizes the file created in the previous step using
268 .BR ftruncate (2),
269 maps it using
270 .BR mmap (2),
271 and populates the shared memory with the desired data.
272 .IP 3.
273 The first process uses the
274 .BR fcntl (2)
275 .B F_ADD_SEALS
276 operation to place one or more seals on the file,
277 in order to restrict further modifications on the file.
278 (If placing the seal
279 .BR F_SEAL_WRITE ,
280 then it will be necessary to first unmap the shared writable mapping
281 created in the previous step.)
282 .IP 4.
283 A second process obtains a file descriptor for the
284 .BR tmpfs (5)
285 file and maps it.
286 Among the possible ways in which this could happen are the following:
287 .RS
288 .IP * 3
289 The process that called
290 .BR memfd_create ()
291 could transfer the resulting file descriptor to the second process
292 via a UNIX domain socket (see
293 .BR unix (7)
294 and
295 .BR cmsg (3)).
296 The second process then maps the file using
297 .BR mmap (2).
298 .IP *
299 The second process is created via
300 .BR fork (2)
301 and thus automatically inherits the file descriptor and mapping.
302 (Note that in this case and the next,
303 there is a natural trust relationship between the two processes,
304 since they are running under the same user ID.
305 Therefore, file sealing would not normally be necessary.)
306 .IP *
307 The second process opens the file
308 .IR /proc/<pid>/fd/<fd> ,
309 where
310 .I <pid>
311 is the PID of the first process (the one that called
312 .BR memfd_create ()),
313 and
314 .I <fd>
315 is the number of the file descriptor returned by the call to
316 .BR memfd_create ()
317 in that process.
318 The second process then maps the file using
319 .BR mmap (2).
320 .RE
321 .IP 5.
322 The second process uses the
323 .BR fcntl (2)
324 .B F_GET_SEALS
325 operation to retrieve the bit mask of seals
326 that has been applied to the file.
327 This bit mask can be inspected in order to determine
328 what kinds of restrictions have been placed on file modifications.
329 If desired, the second process can apply further seals
330 to impose additional restrictions (so long as the
331 .BR F_SEAL_SEAL
332 seal has not yet been applied).
333 .SH EXAMPLE
334 Below are shown two example programs that demonstrate the use of
335 .BR memfd_create ()
336 and the file sealing API.
337 .PP
338 The first program,
339 .IR t_memfd_create.c ,
340 creates a
341 .BR tmpfs (5)
342 file using
343 .BR memfd_create (),
344 sets a size for the file, maps it into memory,
345 and optionally places some seals on the file.
346 The program accepts up to three command-line arguments,
347 of which the first two are required.
348 The first argument is the name to associate with the file,
349 the second argument is the size to be set for the file,
350 and the optional third argument is a string of characters that specify
351 seals to be set on file.
352 .PP
353 The second program,
354 .IR t_get_seals.c ,
355 can be used to open an existing file that was created via
356 .BR memfd_create ()
357 and inspect the set of seals that have been applied to that file.
358 .PP
359 The following shell session demonstrates the use of these programs.
360 First we create a
361 .BR tmpfs (5)
362 file and set some seals on it:
363 .PP
364 .in +4n
365 .EX
366 $ \fB./t_memfd_create my_memfd_file 4096 sw &\fP
367 [1] 11775
368 PID: 11775; fd: 3; /proc/11775/fd/3
369 .EE
370 .in
371 .PP
372 At this point, the
373 .I t_memfd_create
374 program continues to run in the background.
375 From another program, we can obtain a file descriptor for the
376 file created by
377 .BR memfd_create ()
378 by opening the
379 .IR /proc/[pid]/fd
380 file that corresponds to the file descriptor opened by
381 .BR memfd_create ().
382 Using that pathname, we inspect the content of the
383 .IR /proc/[pid]/fd
384 symbolic link, and use our
385 .I t_get_seals
386 program to view the seals that have been placed on the file:
387 .PP
388 .in +4n
389 .EX
390 $ \fBreadlink /proc/11775/fd/3\fP
391 /memfd:my_memfd_file (deleted)
392 $ \fB./t_get_seals /proc/11775/fd/3\fP
393 Existing seals: WRITE SHRINK
394 .EE
395 .in
396 .SS Program source: t_memfd_create.c
397 \&
398 .EX
399 #include <sys/mman.h>
400 #include <fcntl.h>
401 #include <stdlib.h>
402 #include <unistd.h>
403 #include <string.h>
404 #include <stdio.h>
405
406 #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
407 } while (0)
408
409 int
410 main(int argc, char *argv[])
411 {
412 int fd;
413 unsigned int seals;
414 char *addr;
415 char *name, *seals_arg;
416 ssize_t len;
417
418 if (argc < 3) {
419 fprintf(stderr, "%s name size [seals]\\n", argv[0]);
420 fprintf(stderr, "\\t\(aqseals\(aq can contain any of the "
421 "following characters:\\n");
422 fprintf(stderr, "\\t\\tg \- F_SEAL_GROW\\n");
423 fprintf(stderr, "\\t\\ts \- F_SEAL_SHRINK\\n");
424 fprintf(stderr, "\\t\\tw \- F_SEAL_WRITE\\n");
425 fprintf(stderr, "\\t\\tS \- F_SEAL_SEAL\\n");
426 exit(EXIT_FAILURE);
427 }
428
429 name = argv[1];
430 len = atoi(argv[2]);
431 seals_arg = argv[3];
432
433 /* Create an anonymous file in tmpfs; allow seals to be
434 placed on the file */
435
436 fd = memfd_create(name, MFD_ALLOW_SEALING);
437 if (fd == \-1)
438 errExit("memfd_create");
439
440 /* Size the file as specified on the command line */
441
442 if (ftruncate(fd, len) == \-1)
443 errExit("truncate");
444
445 printf("PID: %ld; fd: %d; /proc/%ld/fd/%d\\n",
446 (long) getpid(), fd, (long) getpid(), fd);
447
448 /* Code to map the file and populate the mapping with data
449 omitted */
450
451 /* If a \(aqseals\(aq command\-line argument was supplied, set some
452 seals on the file */
453
454 if (seals_arg != NULL) {
455 seals = 0;
456
457 if (strchr(seals_arg, \(aqg\(aq) != NULL)
458 seals |= F_SEAL_GROW;
459 if (strchr(seals_arg, \(aqs\(aq) != NULL)
460 seals |= F_SEAL_SHRINK;
461 if (strchr(seals_arg, \(aqw\(aq) != NULL)
462 seals |= F_SEAL_WRITE;
463 if (strchr(seals_arg, \(aqS\(aq) != NULL)
464 seals |= F_SEAL_SEAL;
465
466 if (fcntl(fd, F_ADD_SEALS, seals) == \-1)
467 errExit("fcntl");
468 }
469
470 /* Keep running, so that the file created by memfd_create()
471 continues to exist */
472
473 pause();
474
475 exit(EXIT_SUCCESS);
476 }
477 .EE
478 .SS Program source: t_get_seals.c
479 \&
480 .EX
481 #include <sys/mman.h>
482 #include <fcntl.h>
483 #include <unistd.h>
484 #include <stdlib.h>
485 #include <string.h>
486 #include <stdio.h>
487
488 #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
489 } while (0)
490
491 int
492 main(int argc, char *argv[])
493 {
494 int fd;
495 unsigned int seals;
496
497 if (argc != 2) {
498 fprintf(stderr, "%s /proc/PID/fd/FD\\n", argv[0]);
499 exit(EXIT_FAILURE);
500 }
501
502 fd = open(argv[1], O_RDWR);
503 if (fd == \-1)
504 errExit("open");
505
506 seals = fcntl(fd, F_GET_SEALS);
507 if (seals == \-1)
508 errExit("fcntl");
509
510 printf("Existing seals:");
511 if (seals & F_SEAL_SEAL)
512 printf(" SEAL");
513 if (seals & F_SEAL_GROW)
514 printf(" GROW");
515 if (seals & F_SEAL_WRITE)
516 printf(" WRITE");
517 if (seals & F_SEAL_SHRINK)
518 printf(" SHRINK");
519 printf("\\n");
520
521 /* Code to map the file and access the contents of the
522 resulting mapping omitted */
523
524 exit(EXIT_SUCCESS);
525 }
526 .EE
527 .SH SEE ALSO
528 .BR fcntl (2),
529 .BR ftruncate (2),
530 .BR mmap (2),
531 .BR shmget (2),
532 .BR shm_open (3)