]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/memfd_create.2
9e55b96b9f136a53e8fe6dbbc74cf9f1a65c1fcd
[thirdparty/man-pages.git] / man2 / memfd_create.2
1 .\" Copyright (C) 2014 Michael Kerrisk <mtk.manpages@gmail.com>
2 .\" and Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com>
3 .\"
4 .\" %%%LICENSE_START(GPLv2+)
5 .\"
6 .\" This program is free software; you can redistribute it and/or modify
7 .\" it under the terms of the GNU General Public License as published by
8 .\" the Free Software Foundation; either version 2 of the License, or
9 .\" (at your option) any later version.
10 .\"
11 .\" This program is distributed in the hope that it will be useful,
12 .\" but WITHOUT ANY WARRANTY; without even the implied warranty of
13 .\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 .\" GNU General Public License for more details.
15 .\"
16 .\" You should have received a copy of the GNU General Public
17 .\" License along with this manual; if not, see
18 .\" <http://www.gnu.org/licenses/>.
19 .\" %%%LICENSE_END
20 .\"
21 .TH MEMFD_CREATE 2 2016-10-08 Linux "Linux Programmer's Manual"
22 .SH NAME
23 memfd_create \- create an anonymous file
24 .SH SYNOPSIS
25 .B #include <sys/memfd.h>
26 .PP
27 .BI "int memfd_create(const char *" name ", unsigned int " flags ");"
28
29 .IR Note :
30 There is no glibc wrapper for this system call; see NOTES.
31 .SH DESCRIPTION
32 .BR memfd_create ()
33 creates an anonymous file and returns a file descriptor that refers to it.
34 The file behaves like a regular file, and so can be modified,
35 truncated, memory-mapped, and so on.
36 However, unlike a regular file,
37 it lives in RAM and has a volatile backing storage.
38 Once all references to the file are dropped, it is automatically released.
39 Anonymous memory is used for all backing pages of the file.
40 Therefore, files created by
41 .BR memfd_create ()
42 have the same semantics as other anonymous
43 .\" David Herrmann:
44 .\" memfd uses VM_NORESERVE so each page is accounted on first access.
45 .\" This means, the overcommit-limits (see __vm_enough_memory()) and the
46 .\" memory-cgroup limits (mem_cgroup_try_charge()) are applied. Note that
47 .\" those are accounted on "current" and "current->mm", that is, the
48 .\" process doing the first page access.
49 memory allocations such as those allocated using
50 .BR mmap (2)
51 with the
52 .BR MAP_ANONYMOUS
53 flag.
54
55 The initial size of the file is set to 0.
56 Following the call, the file size should be set using
57 .BR ftruncate (2).
58 (Alternatively, the file may be populated by calls to
59 .BR write (2)
60 or similar.)
61
62 The name supplied in
63 .I name
64 is used as a filename and will be displayed
65 as the target of the corresponding symbolic link in the directory
66 .IR /proc/self/fd/ .
67 The displayed name is always prefixed with
68 .IR memfd:
69 and serves only for debugging purposes.
70 Names do not affect the behavior of the file descriptor,
71 and as such multiple files can have the same name without any side effects.
72
73 The following values may be bitwise ORed in
74 .IR flags
75 to change the behavior of
76 .BR memfd_create ():
77 .TP
78 .BR MFD_CLOEXEC
79 Set the close-on-exec
80 .RB ( FD_CLOEXEC )
81 flag on the new file descriptor.
82 See the description of the
83 .B O_CLOEXEC
84 flag in
85 .BR open (2)
86 for reasons why this may be useful.
87 .TP
88 .BR MFD_ALLOW_SEALING
89 Allow sealing operations on this file.
90 See the discussion of the
91 .B F_ADD_SEALS
92 and
93 .BR F_GET_SEALS
94 operations in
95 .BR fcntl (2),
96 and also NOTES, below.
97 The initial set of seals is empty.
98 If this flag is not set, the initial set of seals will be
99 .BR F_SEAL_SEAL ,
100 meaning that no other seals can be set on the file.
101 .\" FIXME Why is the MFD_ALLOW_SEALING behavior not simply the default?
102 .\" Is it worth adding some text explaining this?
103 .PP
104 Unused bits in
105 .I flags
106 must be 0.
107
108 As its return value,
109 .BR memfd_create ()
110 returns a new file descriptor that can be used to refer to the file.
111 This file descriptor is opened for both reading and writing
112 .RB ( O_RDWR )
113 and
114 .B O_LARGEFILE
115 is set for the file descriptor.
116
117 With respect to
118 .BR fork (2)
119 and
120 .BR execve (2),
121 the usual semantics apply for the file descriptor created by
122 .BR memfd_create ().
123 A copy of the file descriptor is inherited by the child produced by
124 .BR fork (2)
125 and refers to the same file.
126 The file descriptor is preserved across
127 .BR execve (2),
128 unless the close-on-exec flag has been set.
129 .SH RETURN VALUE
130 On success,
131 .BR memfd_create ()
132 returns a new file descriptor.
133 On error, \-1 is returned and
134 .I errno
135 is set to indicate the error.
136 .SH ERRORS
137 .TP
138 .B EFAULT
139 The address in
140 .IR name
141 points to invalid memory.
142 .TP
143 .B EINVAL
144 An unsupported value was specified in one of the arguments:
145 .I flags
146 included unknown bits, or
147 .I name
148 was too long.
149 .TP
150 .B EMFILE
151 The per-process limit on the number of open file descriptors has been reached.
152 .TP
153 .B ENFILE
154 The system-wide limit on the total number of open files has been reached.
155 .TP
156 .B ENOMEM
157 There was insufficient memory to create a new anonymous file.
158 .SH VERSIONS
159 The
160 .BR memfd_create ()
161 system call first appeared in Linux 3.17.
162 .SH CONFORMING TO
163 The
164 .BR memfd_create ()
165 system call is Linux-specific.
166 .SH NOTES
167 Glibc does not provide a wrapper for this system call; call it using
168 .BR syscall (2).
169
170 .\" See also http://lwn.net/Articles/593918/
171 .\" and http://lwn.net/Articles/594919/ and http://lwn.net/Articles/591108/
172 The
173 .BR memfd_create ()
174 system call provides a simple alternative to manually mounting a
175 .BR tmpfs (5)
176 filesystem and creating and opening a file in that filesystem.
177 The primary purpose of
178 .BR memfd_create ()
179 is to create files and associated file descriptors that are
180 used with the file-sealing APIs provided by
181 .BR fcntl (2).
182
183 The
184 .BR memfd_create ()
185 system call also has uses without file sealing
186 (which is why file-sealing is disabled, unless explicitly requested with the
187 .BR MFD_ALLOW_SEALING
188 flag).
189 In particular, it can be used as an alternative to creating files in
190 .IR tmp
191 or as an alternative to using the
192 .BR open (2)
193 .B O_TMPFILE
194 in cases where there is no intention to actually link the
195 resulting file into the filesystem.
196 .SS File sealing
197 In the absence of file sealing,
198 processes that communicate via shared memory must either trust each other,
199 or take measures to deal with the possibility that an untrusted peer
200 may manipulate the shared memory region in problematic ways.
201 For example, an untrusted peer might modify the contents of the
202 shared memory at any time, or shrink the shared memory region.
203 The former possibility leaves the local process vulnerable to
204 time-of-check-to-time-of-use race conditions
205 (typically dealt with by copying data from
206 the shared memory region before checking and using it).
207 The latter possibility leaves the local process vulnerable to
208 .BR SIGBUS
209 signals when an attempt is made to access a now-nonexistent
210 location in the shared memory region.
211 (Dealing with this possibility necessitates the use of a handler for the
212 .BR SIGBUS
213 signal.)
214
215 Dealing with untrusted peers imposes extra complexity on
216 code that employs shared memory.
217 Memory sealing enables that extra complexity to be eliminated,
218 by allowing a process to operate secure in the knowledge that
219 its peer can't modify the shared memory in an undesired fashion.
220
221 An example of the usage of the sealing mechanism is as follows:
222 .IP 1. 3
223 The first process creates a
224 .BR tmpfs (5)
225 file using
226 .BR memfd_create ().
227 The call yields a file descriptor used in subsequent steps.
228 .IP 2.
229 The first process
230 sizes the file created in the previous step using
231 .BR ftruncate (2),
232 maps it using
233 .BR mmap (2),
234 and populates the shared memory with the desired data.
235 .IP 3.
236 The first process uses the
237 .BR fcntl (2)
238 .B F_ADD_SEALS
239 operation to place one or more seals on the file,
240 in order to restrict further modifications on the file.
241 (If placing the seal
242 .BR F_SEAL_WRITE ,
243 then it will be necessary to first unmap the shared writable mapping
244 created in the previous step.)
245 .IP 4.
246 A second process obtains a file descriptor for the
247 .BR tmpfs (5)
248 file and maps it.
249 Among the possible ways in which this could happen are the following:
250 .RS
251 .IP * 3
252 The process that called
253 .BR memfd_create ()
254 could transfer the resulting file descriptor to the second process
255 via a UNIX domain socket (see
256 .BR unix (7)
257 and
258 .BR cmsg (3)).
259 The second process then maps the file using
260 .BR mmap (2).
261 .IP *
262 The second process is created via
263 .BR fork (2)
264 and thus automatically inherits the file descriptor and mapping.
265 (Note that in this case and the next,
266 there is a natural trust relationship between the two processes,
267 since they are running under the same user ID.
268 Therefore, file sealing would not normally be necessary.)
269 .IP *
270 The second process opens the file
271 .IR /proc/<pid>/fd/<fd> ,
272 where
273 .I <pid>
274 is the PID of the first process (the one that called
275 .BR memfd_create ()),
276 and
277 .I <fd>
278 is the number of the file descriptor returned by the call to
279 .BR memfd_create ()
280 in that process.
281 The second process then maps the file using
282 .BR mmap (2).
283 .RE
284 .IP 5.
285 The second process uses the
286 .BR fcntl (2)
287 .B F_GET_SEALS
288 operation to retrieve the bit mask of seals
289 that has been applied to the file.
290 This bit mask can be inspected in order to determine
291 what kinds of restrictions have been placed on file modifications.
292 If desired, the second process can apply further seals
293 to impose additional restrictions (so long as the
294 .BR F_SEAL_SEAL
295 seal has not yet been applied).
296 .SH EXAMPLE
297 Below are shown two example programs that demonstrate the use of
298 .BR memfd_create ()
299 and the file sealing API.
300
301 The first program,
302 .IR t_memfd_create.c ,
303 creates a
304 .BR tmpfs (5)
305 file using
306 .BR memfd_create (),
307 sets a size for the file, maps it into memory,
308 and optionally places some seals on the file.
309 The program accepts up to three command-line arguments,
310 of which the first two are required.
311 The first argument is the name to associate with the file,
312 the second argument is the size to be set for the file,
313 and the optional third argument is a string of characters that specify
314 seals to be set on file.
315
316 The second program,
317 .IR t_get_seals.c ,
318 can be used to open an existing file that was created via
319 .BR memfd_create ()
320 and inspect the set of seals that have been applied to that file.
321
322 The following shell session demonstrates the use of these programs.
323 First we create a
324 .BR tmpfs (5)
325 file and set some seals on it:
326
327 .in +4n
328 .nf
329 $ \fB./t_memfd_create my_memfd_file 4096 sw &\fP
330 [1] 11775
331 PID: 11775; fd: 3; /proc/11775/fd/3
332 .fi
333 .in
334
335 At this point, the
336 .I t_memfd_create
337 program continues to run in the background.
338 From another program, we can obtain a file descriptor for the
339 file created by
340 .BR memfd_create ()
341 by opening the
342 .IR /proc/[pid]/fd
343 file that corresponds to the file descriptor opened by
344 .BR memfd_create ().
345 Using that pathname, we inspect the content of the
346 .IR /proc/[pid]/fd
347 symbolic link, and use our
348 .I t_get_seals
349 program to view the seals that have been placed on the file:
350
351 .in +4n
352 .nf
353 $ \fBreadlink /proc/11775/fd/3\fP
354 /memfd:my_memfd_file (deleted)
355 $ \fB./t_get_seals /proc/11775/fd/3\fP
356 Existing seals: WRITE SHRINK
357 .fi
358 .in
359 .SS Program source: t_memfd_create.c
360 \&
361 .nf
362 #include <sys/memfd.h>
363 #include <fcntl.h>
364 #include <stdlib.h>
365 #include <unistd.h>
366 #include <string.h>
367 #include <stdio.h>
368
369 #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
370 } while (0)
371
372 int
373 main(int argc, char *argv[])
374 {
375 int fd;
376 unsigned int seals;
377 char *addr;
378 char *name, *seals_arg;
379 ssize_t len;
380
381 if (argc < 3) {
382 fprintf(stderr, "%s name size [seals]\\n", argv[0]);
383 fprintf(stderr, "\\t\(aqseals\(aq can contain any of the "
384 "following characters:\\n");
385 fprintf(stderr, "\\t\\tg \- F_SEAL_GROW\\n");
386 fprintf(stderr, "\\t\\ts \- F_SEAL_SHRINK\\n");
387 fprintf(stderr, "\\t\\tw \- F_SEAL_WRITE\\n");
388 fprintf(stderr, "\\t\\tS \- F_SEAL_SEAL\\n");
389 exit(EXIT_FAILURE);
390 }
391
392 name = argv[1];
393 len = atoi(argv[2]);
394 seals_arg = argv[3];
395
396 /* Create an anonymous file in tmpfs; allow seals to be
397 placed on the file */
398
399 fd = memfd_create(name, MFD_ALLOW_SEALING);
400 if (fd == \-1)
401 errExit("memfd_create");
402
403 /* Size the file as specified on the command line */
404
405 if (ftruncate(fd, len) == \-1)
406 errExit("truncate");
407
408 printf("PID: %ld; fd: %d; /proc/%ld/fd/%d\\n",
409 (long) getpid(), fd, (long) getpid(), fd);
410
411 /* Code to map the file and populate the mapping with data
412 omitted */
413
414 /* If a \(aqseals\(aq command\-line argument was supplied, set some
415 seals on the file */
416
417 if (seals_arg != NULL) {
418 seals = 0;
419
420 if (strchr(seals_arg, \(aqg\(aq) != NULL)
421 seals |= F_SEAL_GROW;
422 if (strchr(seals_arg, \(aqs\(aq) != NULL)
423 seals |= F_SEAL_SHRINK;
424 if (strchr(seals_arg, \(aqw\(aq) != NULL)
425 seals |= F_SEAL_WRITE;
426 if (strchr(seals_arg, \(aqS\(aq) != NULL)
427 seals |= F_SEAL_SEAL;
428
429 if (fcntl(fd, F_ADD_SEALS, seals) == \-1)
430 errExit("fcntl");
431 }
432
433 /* Keep running, so that the file created by memfd_create()
434 continues to exist */
435
436 pause();
437
438 exit(EXIT_SUCCESS);
439 }
440 .fi
441 .SS Program source: t_get_seals.c
442 \&
443 .nf
444 #include <sys/memfd.h>
445 #include <fcntl.h>
446 #include <unistd.h>
447 #include <stdlib.h>
448 #include <string.h>
449 #include <stdio.h>
450
451 #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
452 } while (0)
453
454 int
455 main(int argc, char *argv[])
456 {
457 int fd;
458 unsigned int seals;
459
460 if (argc != 2) {
461 fprintf(stderr, "%s /proc/PID/fd/FD\\n", argv[0]);
462 exit(EXIT_FAILURE);
463 }
464
465 fd = open(argv[1], O_RDWR);
466 if (fd == \-1)
467 errExit("open");
468
469 seals = fcntl(fd, F_GET_SEALS);
470 if (seals == \-1)
471 errExit("fcntl");
472
473 printf("Existing seals:");
474 if (seals & F_SEAL_SEAL)
475 printf(" SEAL");
476 if (seals & F_SEAL_GROW)
477 printf(" GROW");
478 if (seals & F_SEAL_WRITE)
479 printf(" WRITE");
480 if (seals & F_SEAL_SHRINK)
481 printf(" SHRINK");
482 printf("\\n");
483
484 /* Code to map the file and access the contents of the
485 resulting mapping omitted */
486
487 exit(EXIT_SUCCESS);
488 }
489 .fi
490 .SH SEE ALSO
491 .BR fcntl (2),
492 .BR ftruncate (2),
493 .BR mmap (2),
494 .BR shmget (2),
495 .BR shm_open (3)