]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/memfd_create.2
memfd_create.2: wfix
[thirdparty/man-pages.git] / man2 / memfd_create.2
1 .\" Copyright (C) 2014 Michael Kerrisk <mtk.manpages@gmail.com>
2 .\" and Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com>
3 .\"
4 .\" %%%LICENSE_START(GPLv2+)
5 .\"
6 .\" This program is free software; you can redistribute it and/or modify
7 .\" it under the terms of the GNU General Public License as published by
8 .\" the Free Software Foundation; either version 2 of the License, or
9 .\" (at your option) any later version.
10 .\"
11 .\" This program is distributed in the hope that it will be useful,
12 .\" but WITHOUT ANY WARRANTY; without even the implied warranty of
13 .\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 .\" GNU General Public License for more details.
15 .\"
16 .\" You should have received a copy of the GNU General Public
17 .\" License along with this manual; if not, see
18 .\" <http://www.gnu.org/licenses/>.
19 .\" %%%LICENSE_END
20 .\"
21 .TH MEMFD_CREATE 2 2016-03-15 Linux "Linux Programmer's Manual"
22 .SH NAME
23 memfd_create \- create an anonymous file
24 .SH SYNOPSIS
25 .B #include <sys/memfd.h>
26 .sp
27 .BI "int memfd_create(const char *" name ", unsigned int " flags ");"
28 .SH DESCRIPTION
29 .BR memfd_create ()
30 creates an anonymous file and returns a file descriptor that refers to it.
31 The file behaves like a regular file, and so can be modified,
32 truncated, memory-mapped, and so on.
33 However, unlike a regular file,
34 it lives in RAM and has a volatile backing storage.
35 Once all references to the file are dropped, it is automatically released.
36 Anonymous memory is used for all backing pages of the file.
37 Therefore, files created by
38 .BR memfd_create ()
39 have the same semantics as other anonymous
40 .\" David Herrmann:
41 .\" memfd uses VM_NORESERVE so each page is accounted on first access.
42 .\" This means, the overcommit-limits (see __vm_enough_memory()) and the
43 .\" memory-cgroup limits (mem_cgroup_try_charge()) are applied. Note that
44 .\" those are accounted on "current" and "current->mm", that is, the
45 .\" process doing the first page access.
46 memory allocations such as those allocated using
47 .BR mmap (2)
48 with the
49 .BR MAP_ANONYMOUS
50 flag.
51
52 The initial size of the file is set to 0.
53 Following the call, the file size should be set using
54 .BR ftruncate (2).
55 (Alternatively, the file may be populated by calls to
56 .BR write (2)
57 or similar.)
58
59 The name supplied in
60 .I name
61 is used as a filename and will be displayed
62 as the target of the corresponding symbolic link in the directory
63 .IR /proc/self/fd/ .
64 The displayed name is always prefixed with
65 .IR memfd:
66 and serves only for debugging purposes.
67 Names do not affect the behavior of the file descriptor,
68 and as such multiple files can have the same name without any side effects.
69
70 The following values may be bitwise ORed in
71 .IR flags
72 to change the behavior of
73 .BR memfd_create ():
74 .TP
75 .BR MFD_CLOEXEC
76 Set the close-on-exec
77 .RB ( FD_CLOEXEC )
78 flag on the new file descriptor.
79 See the description of the
80 .B O_CLOEXEC
81 flag in
82 .BR open (2)
83 for reasons why this may be useful.
84 .TP
85 .BR MFD_ALLOW_SEALING
86 Allow sealing operations on this file.
87 See the discussion of the
88 .B F_ADD_SEALS
89 and
90 .BR F_GET_SEALS
91 operations in
92 .BR fcntl (2),
93 and also NOTES, below.
94 The initial set of seals is empty.
95 If this flag is not set, the initial set of seals will be
96 .BR F_SEAL_SEAL ,
97 meaning that no other seals can be set on the file.
98 .\" FIXME Why is the MFD_ALLOW_SEALING behavior not simply the default?
99 .\" Is it worth adding some text explaining this?
100 .PP
101 Unused bits in
102 .I flags
103 must be 0.
104
105 As its return value,
106 .BR memfd_create ()
107 returns a new file descriptor that can be used to refer to the file.
108 This file descriptor is opened for both reading and writing
109 .RB ( O_RDWR )
110 and
111 .B O_LARGEFILE
112 is set for the file descriptor.
113
114 With respect to
115 .BR fork (2)
116 and
117 .BR execve (2),
118 the usual semantics apply for the file descriptor created by
119 .BR memfd_create ().
120 A copy of the file descriptor is inherited by the child produced by
121 .BR fork (2)
122 and refers to the same file.
123 The file descriptor is preserved across
124 .BR execve (2),
125 unless the close-on-exec flag has been set.
126 .SH RETURN VALUE
127 On success,
128 .BR memfd_create ()
129 returns a new file descriptor.
130 On error, \-1 is returned and
131 .I errno
132 is set to indicate the error.
133 .SH ERRORS
134 .TP
135 .B EFAULT
136 The address in
137 .IR name
138 points to invalid memory.
139 .TP
140 .B EINVAL
141 An unsupported value was specified in one of the arguments:
142 .I flags
143 included unknown bits, or
144 .I name
145 was too long.
146 .TP
147 .B EMFILE
148 The per-process limit on the number of open file descriptors has been reached.
149 .TP
150 .B ENFILE
151 The system-wide limit on the total number of open files has been reached.
152 .TP
153 .B ENOMEM
154 There was insufficient memory to create a new anonymous file.
155 .SH VERSIONS
156 The
157 .BR memfd_create ()
158 system call first appeared in Linux 3.17.
159 .\" FIXME . When glibc support appears, update the following sentence:
160 Support in the GNU C library is pending.
161 .SH CONFORMING TO
162 The
163 .BR memfd_create ()
164 system call is Linux-specific.
165 .SH NOTES
166 .\" See also http://lwn.net/Articles/593918/
167 .\" and http://lwn.net/Articles/594919/ and http://lwn.net/Articles/591108/
168 The
169 .BR memfd_create ()
170 system call provides a simple alternative to manually mounting a
171 .I tmpfs
172 filesystem and creating and opening a file in that filesystem.
173 The primary purpose of
174 .BR memfd_create ()
175 is to create files and associated file descriptors that are
176 used with the file-sealing APIs provided by
177 .BR fcntl (2).
178
179 The
180 .BR memfd_create ()
181 system call also has uses without file sealing
182 (which is why file-sealing is disabled, unless explicitly requested with the
183 .BR MFD_ALLOW_SEALING
184 flag).
185 In particular, it can be used as an alternative to creating files in
186 .IR tmp
187 or as an alternative to using the
188 .BR open (2)
189 .B O_TMPFILE
190 in cases where there is no intention to actually link the
191 resulting file into the filesystem.
192 .SS File sealing
193 In the absence of file sealing,
194 processes that communicate via shared memory must either trust each other,
195 or take measures to deal with the possibility that an untrusted peer
196 may manipulate the shared memory region in problematic ways.
197 For example, an untrusted peer might modify the contents of the
198 shared memory at any time, or shrink the shared memory region.
199 The former possibility leaves the local process vulnerable to
200 time-of-check-to-time-of-use race conditions
201 (typically dealt with by copying data from
202 the shared memory region before checking and using it).
203 The latter possibility leaves the local process vulnerable to
204 .BR SIGBUS
205 signals when an attempt is made to access a now-nonexistent
206 location in the shared memory region.
207 (Dealing with this possibility necessitates the use of a handler for the
208 .BR SIGBUS
209 signal.)
210
211 Dealing with untrusted peers imposes extra complexity on
212 code that employs shared memory.
213 Memory sealing enables that extra complexity to be eliminated,
214 by allowing a process to operate secure in the knowledge that
215 its peer can't modify the shared memory in an undesired fashion.
216
217 An example of the usage of the sealing mechanism is as follows:
218 .IP 1. 3
219 The first process creates a
220 .I tmpfs
221 file using
222 .BR memfd_create ().
223 The call yields a file descriptor used in subsequent steps.
224 .IP 2.
225 The first process
226 sizes the file created in the previous step using
227 .BR ftruncate (2),
228 maps it using
229 .BR mmap (2),
230 and populates the shared memory with the desired data.
231 .IP 3.
232 The first process uses the
233 .BR fcntl (2)
234 .B F_ADD_SEALS
235 operation to place one or more seals on the file,
236 in order to restrict further modifications on the file.
237 (If placing the seal
238 .BR F_SEAL_WRITE ,
239 then it will be necessary to first unmap the shared writable mapping
240 created in the previous step.)
241 .IP 4.
242 A second process obtains a file descriptor for the
243 .I tmpfs
244 file and maps it.
245 Among the possible ways in which this could happen are the following:
246 .RS
247 .IP * 3
248 The process that called
249 .BR memfd_create ()
250 could transfer the resulting file descriptor to the second process
251 via a UNIX domain socket (see
252 .BR unix (7)
253 and
254 .BR cmsg (3)).
255 The second process then maps the file using
256 .BR mmap (2).
257 .IP *
258 The second process is created via
259 .BR fork (2)
260 and thus automatically inherits the file descriptor and mapping.
261 (Note that in this case and the next,
262 there is a natural trust relationship between the two processes,
263 since they are running under the same user ID.
264 Therefore, file sealing would not normally be necessary.)
265 .IP *
266 The second process opens the file
267 .IR /proc/<pd>/fd/<fd> ,
268 where
269 .I <pid>
270 is the PID of the first process (the one that called
271 .BR memfd_create ()),
272 and
273 .I <fd>
274 is the number of the file descriptor returned by the call to
275 .BR memfd_create ()
276 in that process.
277 The second process then maps the file using
278 .BR mmap (2).
279 .RE
280 .IP 5.
281 The second process uses the
282 .BR fcntl (2)
283 .B F_GET_SEALS
284 operation to retrieve the bit mask of seals
285 that has been applied to the file.
286 This bit mask can be inspected in order to determine
287 what kinds of restrictions have been placed on file modifications.
288 If desired, the second process can apply further seals
289 to impose additional restrictions (so long as the
290 .BR F_SEAL_SEAL
291 seal has not yet been applied).
292 .SH EXAMPLE
293 Below are shown two example programs that demonstrate the use of
294 .BR memfd_create ()
295 and the file sealing API.
296
297 The first program,
298 .IR t_memfd_create.c ,
299 creates a
300 .I tmpfs
301 file using
302 .BR memfd_create (),
303 sets a size for the file, maps it into memory,
304 and optionally places some seals on the file.
305 The program accepts up to three command-line arguments,
306 of which the first two are required.
307 The first argument is the name to associate with the file,
308 the second argument is the size to be set for the file,
309 and the optional third argument is a string of characters that specify
310 seals to be set on file.
311
312 The second program,
313 .IR t_get_seals.c ,
314 can be used to open an existing file that was created via
315 .BR memfd_create ()
316 and inspect the set of seals that have been applied to that file.
317
318 The following shell session demonstrates the use of these programs.
319 First we create a
320 .I tmpfs
321 file and set some seals on it:
322
323 .in +4n
324 .nf
325 $ \fB./t_memfd_create my_memfd_file 4096 sw &\fP
326 [1] 11775
327 PID: 11775; fd: 3; /proc/11775/fd/3
328 .fi
329 .in
330
331 At this point, the
332 .I t_memfd_create
333 program continues to run in the background.
334 From another program, we can obtain a file descriptor for the
335 file created by
336 .BR memfd_create ()
337 by opening the
338 .IR /proc/PID/fd
339 file that corresponds to the file descriptor opened by
340 .BR memfd_create ().
341 Using that pathname, we inspect the content of the
342 .IR /proc/PID/fd
343 symbolic link, and use our
344 .I t_get_seals
345 program to view the seals that have been placed on the file:
346
347 .in +4n
348 .nf
349 $ \fBreadlink /proc/11775/fd/3\fP
350 /memfd:my_memfd_file (deleted)
351 $ \fB./t_get_seals /proc/11775/fd/3\fP
352 Existing seals: WRITE SHRINK
353 .fi
354 .in
355 .SS Program source: t_memfd_create.c
356 \&
357 .nf
358 #include <sys/memfd.h>
359 #include <fcntl.h>
360 #include <stdlib.h>
361 #include <unistd.h>
362 #include <string.h>
363 #include <stdio.h>
364
365 #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
366 } while (0)
367
368 int
369 main(int argc, char *argv[])
370 {
371 int fd;
372 unsigned int seals;
373 char *addr;
374 char *name, *seals_arg;
375 ssize_t len;
376
377 if (argc < 3) {
378 fprintf(stderr, "%s name size [seals]\\n", argv[0]);
379 fprintf(stderr, "\\t\(aqseals\(aq can contain any of the "
380 "following characters:\\n");
381 fprintf(stderr, "\\t\\tg \- F_SEAL_GROW\\n");
382 fprintf(stderr, "\\t\\ts \- F_SEAL_SHRINK\\n");
383 fprintf(stderr, "\\t\\tw \- F_SEAL_WRITE\\n");
384 fprintf(stderr, "\\t\\tS \- F_SEAL_SEAL\\n");
385 exit(EXIT_FAILURE);
386 }
387
388 name = argv[1];
389 len = atoi(argv[2]);
390 seals_arg = argv[3];
391
392 /* Create an anonymous file in tmpfs; allow seals to be
393 placed on the file */
394
395 fd = memfd_create(name, MFD_ALLOW_SEALING);
396 if (fd == \-1)
397 errExit("memfd_create");
398
399 /* Size the file as specified on the command line */
400
401 if (ftruncate(fd, len) == \-1)
402 errExit("truncate");
403
404 printf("PID: %ld; fd: %d; /proc/%ld/fd/%d\\n",
405 (long) getpid(), fd, (long) getpid(), fd);
406
407 /* Code to map the file and populate the mapping with data
408 omitted */
409
410 /* If a \(aqseals\(aq command\-line argument was supplied, set some
411 seals on the file */
412
413 if (seals_arg != NULL) {
414 seals = 0;
415
416 if (strchr(seals_arg, \(aqg\(aq) != NULL)
417 seals |= F_SEAL_GROW;
418 if (strchr(seals_arg, \(aqs\(aq) != NULL)
419 seals |= F_SEAL_SHRINK;
420 if (strchr(seals_arg, \(aqw\(aq) != NULL)
421 seals |= F_SEAL_WRITE;
422 if (strchr(seals_arg, \(aqS\(aq) != NULL)
423 seals |= F_SEAL_SEAL;
424
425 if (fcntl(fd, F_ADD_SEALS, seals) == \-1)
426 errExit("fcntl");
427 }
428
429 /* Keep running, so that the file created by memfd_create()
430 continues to exist */
431
432 pause();
433
434 exit(EXIT_SUCCESS);
435 }
436 .fi
437 .SS Program source: t_get_seals.c
438 \&
439 .nf
440 #include <sys/memfd.h>
441 #include <fcntl.h>
442 #include <unistd.h>
443 #include <stdlib.h>
444 #include <string.h>
445 #include <stdio.h>
446
447 #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
448 } while (0)
449
450 int
451 main(int argc, char *argv[])
452 {
453 int fd;
454 unsigned int seals;
455
456 if (argc != 2) {
457 fprintf(stderr, "%s /proc/PID/fd/FD\\n", argv[0]);
458 exit(EXIT_FAILURE);
459 }
460
461 fd = open(argv[1], O_RDWR);
462 if (fd == \-1)
463 errExit("open");
464
465 seals = fcntl(fd, F_GET_SEALS);
466 if (seals == \-1)
467 errExit("fcntl");
468
469 printf("Existing seals:");
470 if (seals & F_SEAL_SEAL)
471 printf(" SEAL");
472 if (seals & F_SEAL_GROW)
473 printf(" GROW");
474 if (seals & F_SEAL_WRITE)
475 printf(" WRITE");
476 if (seals & F_SEAL_SHRINK)
477 printf(" SHRINK");
478 printf("\\n");
479
480 /* Code to map the file and access the contents of the
481 resulting mapping omitted */
482
483 exit(EXIT_SUCCESS);
484 }
485 .fi
486 .SH SEE ALSO
487 .BR fcntl (2),
488 .BR ftruncate (2),
489 .BR mmap (2),
490 .BR shmget (2),
491 .BR shm_open (3)