]>
Commit | Line | Data |
---|---|---|
878cc348 MK |
1 | .\" Copyright (C) 2014 Michael Kerrisk <mtk.manpages@gmail.com> |
2 | .\" and Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com> | |
73fc0b53 | 3 | .\" |
46832662 | 4 | .\" %%%LICENSE_START(GPLv2+) |
771e13d4 | 5 | .\" |
73fc0b53 DH |
6 | .\" This program is free software; you can redistribute it and/or modify |
7 | .\" it under the terms of the GNU General Public License as published by | |
8 | .\" the Free Software Foundation; either version 2 of the License, or | |
9 | .\" (at your option) any later version. | |
10 | .\" | |
11 | .\" This program is distributed in the hope that it will be useful, | |
12 | .\" but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | .\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | .\" GNU General Public License for more details. | |
15 | .\" | |
16 | .\" You should have received a copy of the GNU General Public | |
17 | .\" License along with this manual; if not, see | |
18 | .\" <http://www.gnu.org/licenses/>. | |
19 | .\" %%%LICENSE_END | |
20 | .\" | |
8392a3b3 | 21 | .TH MEMFD_CREATE 2 2015-01-22 Linux "Linux Programmer's Manual" |
73fc0b53 DH |
22 | .SH NAME |
23 | memfd_create \- create an anonymous file | |
24 | .SH SYNOPSIS | |
25 | .B #include <sys/memfd.h> | |
26 | .sp | |
27 | .BI "int memfd_create(const char *" name ", unsigned int " flags ");" | |
28 | .SH DESCRIPTION | |
29 | .BR memfd_create () | |
f00ce3a0 MK |
30 | creates an anonymous file and returns a file descriptor that refers to it. |
31 | The file behaves like a regular file, and so can be modified, | |
afc5ca18 | 32 | truncated, memory-mapped, and so on. |
f00ce3a0 MK |
33 | However, unlike a regular file, |
34 | it lives in RAM and has a volatile backing storage. | |
cb5b73cc MK |
35 | Once all references to the file are dropped, it is automatically released. |
36 | Anonymous memory is used for all backing pages of the file. | |
f00ce3a0 MK |
37 | Therefore, files created by |
38 | .BR memfd_create () | |
46832662 MK |
39 | have the same semantics as other anonymous |
40 | .\" David Herrmann: | |
41 | .\" memfd uses VM_NORESERVE so each page is accounted on first access. | |
42 | .\" This means, the overcommit-limits (see __vm_enough_memory()) and the | |
43 | .\" memory-cgroup limits (mem_cgroup_try_charge()) are applied. Note that | |
44 | .\" those are accounted on "current" and "current->mm", that is, the | |
45 | .\" process doing the first page access. | |
f00ce3a0 | 46 | memory allocations such as those allocated using |
73fc0b53 | 47 | .BR mmap (2) |
f00ce3a0 MK |
48 | with the |
49 | .BR MAP_ANONYMOUS | |
50 | flag. | |
73fc0b53 DH |
51 | |
52 | The initial size of the file is set to 0. | |
f00ce3a0 MK |
53 | Following the call, the file size should be set using |
54 | .BR ftruncate (2). | |
46832662 MK |
55 | (Alternatively, the file may be populated by calls to |
56 | .BR write (2) | |
57 | or similar.) | |
f00ce3a0 MK |
58 | |
59 | The name supplied in | |
73fc0b53 | 60 | .I name |
46832662 | 61 | is used as a filename and will be displayed |
f00ce3a0 | 62 | as the target of the corresponding symbolic link in the directory |
73fc0b53 | 63 | .IR /proc/self/fd/ . |
f00ce3a0 MK |
64 | The displayed name is always prefixed with |
65 | .IR memfd: | |
66 | and serves only for debugging purposes. | |
46832662 | 67 | Names do not affect the behavior of the file descriptor, |
cb5b73cc | 68 | and as such multiple files can have the same name without any side effects. |
73fc0b53 DH |
69 | |
70 | The following values may be bitwise ORed in | |
71 | .IR flags | |
553deb41 | 72 | to change the behavior of |
73fc0b53 DH |
73 | .BR memfd_create (): |
74 | .TP | |
75 | .BR MFD_CLOEXEC | |
76 | Set the close-on-exec | |
77 | .RB ( FD_CLOEXEC ) | |
78 | flag on the new file descriptor. | |
79 | See the description of the | |
80 | .B O_CLOEXEC | |
81 | flag in | |
82 | .BR open (2) | |
cb5b73cc | 83 | for reasons why this may be useful. |
73fc0b53 DH |
84 | .TP |
85 | .BR MFD_ALLOW_SEALING | |
3a71dcd6 | 86 | Allow sealing operations on this file. |
e8a0dfae | 87 | See the discussion of the |
73fc0b53 DH |
88 | .B F_ADD_SEALS |
89 | and | |
e8a0dfae MK |
90 | .BR F_GET_SEALS |
91 | operations in | |
92 | .BR fcntl (2), | |
3a71dcd6 | 93 | and also NOTES, below. |
cb5b73cc MK |
94 | The initial set of seals is empty. |
95 | If this flag is not set, the initial set of seals will be | |
f00ce3a0 MK |
96 | .BR F_SEAL_SEAL , |
97 | meaning that no other seals can be set on the file. | |
98 | .\" FIXME Why is the MFD_ALLOW_SEALING behavior not simply the default? | |
99 | .\" Is it worth adding some text explaining this? | |
73fc0b53 | 100 | .PP |
f00ce3a0 MK |
101 | Unused bits in |
102 | .I flags | |
103 | must be 0. | |
73fc0b53 DH |
104 | |
105 | As its return value, | |
106 | .BR memfd_create () | |
107 | returns a new file descriptor that can be used to refer to the file. | |
f00ce3a0 MK |
108 | This file descriptor is opened for both reading and writing |
109 | .RB ( O_RDWR ) | |
110 | and | |
111 | .B O_LARGEFILE | |
112 | is set for the descriptor. | |
113 | ||
114 | With respect to | |
115 | .BR fork (2) | |
116 | and | |
117 | .BR execve (2), | |
118 | the usual semantics apply for the file descriptor created by | |
119 | .BR memfd_create (). | |
120 | A copy of the file descriptor is inherited by the child produced by | |
121 | .BR fork (2) | |
122 | and refers to the same file. | |
123 | The file descriptor is preserved across | |
73fc0b53 DH |
124 | .BR execve (2), |
125 | unless the close-on-exec flag has been set. | |
126 | .SH RETURN VALUE | |
127 | On success, | |
128 | .BR memfd_create () | |
129 | returns a new file descriptor. | |
130 | On error, \-1 is returned and | |
131 | .I errno | |
132 | is set to indicate the error. | |
133 | .SH ERRORS | |
134 | .TP | |
f00ce3a0 MK |
135 | .B EFAULT |
136 | The address in | |
137 | .IR name | |
138 | points to invalid memory. | |
139 | .TP | |
73fc0b53 | 140 | .B EINVAL |
20acd21a MK |
141 | An unsupported value was specified in one of the arguments: |
142 | .I flags | |
143 | included unknown bits, or | |
144 | .I name | |
145 | was too long. | |
73fc0b53 DH |
146 | .TP |
147 | .B EMFILE | |
26c32fab | 148 | The per-process limit on the number of open file descriptors has been reached. |
73fc0b53 DH |
149 | .TP |
150 | .B ENFILE | |
cb5b73cc | 151 | The system-wide limit on the total number of open files has been reached. |
73fc0b53 | 152 | .TP |
73fc0b53 DH |
153 | .B ENOMEM |
154 | There was insufficient memory to create a new anonymous file. | |
155 | .SH VERSIONS | |
f00ce3a0 MK |
156 | The |
157 | .BR memfd_create () | |
158 | system call first appeared in Linux 3.17. | |
8b987bc3 MK |
159 | .\" FIXME . When glibc support appears, update the following sentence: |
160 | Support in the GNU C library is pending. | |
73fc0b53 | 161 | .SH CONFORMING TO |
f00ce3a0 | 162 | The |
73fc0b53 | 163 | .BR memfd_create () |
f00ce3a0 | 164 | system call is Linux-specific. |
51fa3cbf MK |
165 | .SH NOTES |
166 | .\" See also http://lwn.net/Articles/593918/ | |
167 | .\" and http://lwn.net/Articles/594919/ and http://lwn.net/Articles/591108/ | |
168 | The | |
169 | .BR memfd_create () | |
170 | system call provides a simple alternative to manually mounting a | |
171 | .I tmpfs | |
172 | filesystem and creating and opening a file in that filesystem. | |
173 | The primary purpose of | |
174 | .BR memfd_create () | |
175 | is to create files and associated file descriptors that are | |
176 | used with the file-sealing APIs provided by | |
177 | .BR fcntl (2). | |
46832662 MK |
178 | |
179 | The | |
180 | .BR memfd_create () | |
181 | system call also has uses without file sealing | |
182 | (which is why file-sealing is disabled, unless explicitly requested with the | |
183 | .BR MFD_ALLOW_SEALING | |
184 | flag). | |
185 | In particular, it can be used as an alternative to creating files in | |
186 | .IR tmp | |
187 | or as an alternative to using the | |
188 | .BR open (2) | |
189 | .B O_TMPFILE | |
190 | in cases where there is no intention to actually link the | |
191 | resulting file into the filesystem. | |
51fa3cbf MK |
192 | .SS File sealing |
193 | In the absence of file sealing, | |
194 | processes that communicate via shared memory must either trust each other, | |
195 | or take measures to deal with the possibility that an untrusted peer | |
db61d4b2 | 196 | may manipulate the shared memory region in problematic ways. |
51fa3cbf MK |
197 | For example, an untrusted peer might modify the contents of the |
198 | shared memory at any time, or shrink the shared memory region. | |
199 | The former possibility leaves the local process vulnerable to | |
200 | time-of-check-to-time-of-use race conditions | |
201 | (typically dealt with by copying data from | |
202 | the shared memory region before checking and using it). | |
203 | The latter possibility leaves the local process vulnerable to | |
204 | .BR SIGBUS | |
205 | signals when an attempt is made to access a now-nonexistent | |
206 | location in the shared memory region. | |
207 | (Dealing with this possibility necessitates the use of a handler for the | |
208 | .BR SIGBUS | |
209 | signal.) | |
210 | ||
211 | Dealing with untrusted peers imposes extra complexity on | |
212 | code that employs shared memory. | |
213 | Memory sealing enables that extra complexity to be eliminated, | |
214 | by allowing a process to operate secure in the knowledge that | |
215 | its peer can't modify the shared memory in an undesired fashion. | |
216 | ||
217 | An example of the usage of the sealing mechanism is as follows: | |
218 | ||
219 | .IP 1. 3 | |
220 | The first process creates a | |
221 | .I tmpfs | |
771e13d4 | 222 | file using |
51fa3cbf MK |
223 | .BR memfd_create (). |
224 | The call yields a file descriptor used in subsequent steps. | |
225 | .IP 2. | |
226 | The first process | |
227 | sizes the file created in the previous step using | |
228 | .BR ftruncate (2), | |
229 | maps it using | |
230 | .BR mmap (2), | |
231 | and populates the shared memory with the desired data. | |
232 | .IP 3. | |
233 | The first process uses the | |
234 | .BR fcntl (2) | |
235 | .B F_ADD_SEALS | |
236 | operation to place one or more seals on the file, | |
237 | in order to restrict further modifications on the file. | |
238 | (If placing the seal | |
239 | .BR F_SEAL_WRITE , | |
240 | then it will be necessary to first unmap the shared writable mapping | |
241 | created in the previous step.) | |
242 | .IP 4. | |
243 | A second process obtains a file descriptor for the | |
244 | .I tmpfs | |
245 | file and maps it. | |
46832662 | 246 | Among the possible ways in which this could happen are the following: |
51fa3cbf MK |
247 | .RS |
248 | .IP * 3 | |
46832662 MK |
249 | The process that called |
250 | .BR memfd_create () | |
251 | could transfer the resulting file descriptor to the second process | |
252 | via a UNIX domain socket (see | |
253 | .BR unix (7) | |
254 | and | |
255 | .BR cmsg (3)). | |
256 | The second process then maps the file using | |
257 | .BR mmap (2). | |
258 | .IP * | |
51fa3cbf MK |
259 | The second process is created via |
260 | .BR fork (2) | |
261 | and thus automatically inherits the file descriptor and mapping. | |
46832662 MK |
262 | (Note that in this case and the next, |
263 | there is a natural trust relationship between the two processes, | |
264 | since they are running under the same user ID. | |
265 | Therefore, file sealing would not normally be necessary.) | |
51fa3cbf | 266 | .IP * |
771e13d4 | 267 | The second process opens the file |
51fa3cbf MK |
268 | .IR /proc/<pd>/fd/<fd> , |
269 | where | |
270 | .I <pid> | |
271 | is the PID of the first process (the one that called | |
272 | .BR memfd_create ()), | |
273 | and | |
274 | .I <fd> | |
275 | is the number of the file descriptor returned by the call to | |
276 | .BR memfd_create () | |
277 | in that process. | |
278 | The second process then maps the file using | |
279 | .BR mmap (2). | |
280 | .RE | |
281 | .IP 5. | |
282 | The second process uses the | |
283 | .BR fcntl (2) | |
284 | .B F_GET_SEALS | |
4f32648e MK |
285 | operation to retrieve the bit mask of seals |
286 | that has been applied to the file. | |
287 | This bit mask can be inspected in order to determine | |
288 | what kinds of restrictions have been placed on file modifications. | |
51fa3cbf MK |
289 | If desired, the second process can apply further seals |
290 | to impose additional restrictions (so long as the | |
291 | .BR F_SEAL_SEAL | |
292 | seal has not yet been applied). | |
878cc348 MK |
293 | .SH EXAMPLE |
294 | Below are shown two example programs that demonstrate the use of | |
295 | .BR memfd_create () | |
296 | and the file sealing API. | |
297 | ||
298 | The first program, | |
299 | .IR t_memfd_create.c , | |
300 | creates a | |
301 | .I tmpfs | |
302 | file using | |
303 | .BR memfd_create (), | |
304 | sets a size for the file, maps it into memory, | |
305 | and optionally places some seals on the file. | |
306 | The program accepts up to three command-line arguments, | |
307 | of which the first two are required. | |
308 | The first argument is the name to associate with the file, | |
309 | the second argument is the size to be set for the file, | |
310 | and the optional third is a string of characters that specify | |
311 | seals to be set on file. | |
312 | ||
313 | The second program, | |
314 | .IR t_get_seals.c , | |
315 | can be used to open an existing file that was created via | |
316 | .BR memfd_create () | |
317 | and inspect the set of seals that have been applied to that file. | |
318 | ||
319 | The following shell session demonstrates the use of these programs. | |
320 | First we create a | |
321 | .I tmpfs | |
322 | file and set some seals on it: | |
323 | ||
324 | .in +4n | |
325 | .nf | |
326 | $ \fB./t_memfd_create my_memfd_file 4096 sw &\fP | |
327 | [1] 11775 | |
328 | PID: 11775; fd: 3; /proc/11775/fd/3 | |
329 | .fi | |
330 | .in | |
331 | ||
332 | At this point, the | |
333 | .I t_memfd_create | |
334 | program continues to run in the background. | |
335 | From another program, we can obtain a file descriptor for the | |
46832662 MK |
336 | file created by |
337 | .BR memfd_create () | |
338 | by opening the | |
878cc348 MK |
339 | .IR /proc/PID/fd |
340 | file that corresponds to the descriptor opened by | |
341 | .BR memfd_create (). | |
342 | Using that pathname, we inspect the content of the | |
343 | .IR /proc/PID/fd | |
344 | symbolic link, and use our | |
345 | .I t_get_seals | |
346 | program to view the seals that have been placed on the file: | |
347 | ||
348 | .in +4n | |
349 | .nf | |
350 | $ \fBreadlink /proc/11775/fd/3\fP | |
351 | /memfd:my_memfd_file (deleted) | |
352 | $ \fB./t_get_seals /proc/11775/fd/3\fP | |
353 | Existing seals: WRITE SHRINK | |
354 | .fi | |
355 | .in | |
356 | .SS Program source: t_memfd_create.c | |
357 | \& | |
358 | .nf | |
359 | #include <sys/memfd.h> | |
360 | #include <fcntl.h> | |
361 | #include <stdlib.h> | |
362 | #include <unistd.h> | |
363 | #include <string.h> | |
364 | #include <stdio.h> | |
365 | ||
366 | #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\ | |
367 | } while (0) | |
368 | ||
369 | int | |
370 | main(int argc, char *argv[]) | |
371 | { | |
372 | int fd; | |
373 | unsigned int seals; | |
374 | char *addr; | |
375 | char *name, *seals_arg; | |
376 | ssize_t len; | |
377 | ||
378 | if (argc < 3) { | |
379 | fprintf(stderr, "%s name size [seals]\\n", argv[0]); | |
380 | fprintf(stderr, "\\t\(aqseals\(aq can contain any of the " | |
381 | "following characters:\\n"); | |
382 | fprintf(stderr, "\\t\\tg \- F_SEAL_GROW\\n"); | |
383 | fprintf(stderr, "\\t\\ts \- F_SEAL_SHRINK\\n"); | |
384 | fprintf(stderr, "\\t\\tw \- F_SEAL_WRITE\\n"); | |
385 | fprintf(stderr, "\\t\\tS \- F_SEAL_SEAL\\n"); | |
386 | exit(EXIT_FAILURE); | |
387 | } | |
388 | ||
389 | name = argv[1]; | |
390 | len = atoi(argv[2]); | |
391 | seals_arg = argv[3]; | |
392 | ||
393 | /* Create an anonymous file in tmpfs; allow seals to be | |
394 | placed on the file */ | |
395 | ||
396 | fd = memfd_create(name, MFD_ALLOW_SEALING); | |
397 | if (fd == \-1) | |
398 | errExit("memfd_create"); | |
399 | ||
400 | /* Size the file as specified on the command line */ | |
401 | ||
402 | if (ftruncate(fd, len) == \-1) | |
403 | errExit("truncate"); | |
404 | ||
405 | printf("PID: %ld; fd: %d; /proc/%ld/fd/%d\\n", | |
406 | (long) getpid(), fd, (long) getpid(), fd); | |
407 | ||
408 | /* Code to map the file and populate the mapping with data | |
409 | omitted */ | |
410 | ||
411 | /* If a \(aqseals\(aq command\-line argument was supplied, set some | |
412 | seals on the file */ | |
413 | ||
414 | if (seals_arg != NULL) { | |
415 | seals = 0; | |
416 | ||
417 | if (strchr(seals_arg, \(aqg\(aq) != NULL) | |
418 | seals |= F_SEAL_GROW; | |
419 | if (strchr(seals_arg, \(aqs\(aq) != NULL) | |
420 | seals |= F_SEAL_SHRINK; | |
421 | if (strchr(seals_arg, \(aqw\(aq) != NULL) | |
422 | seals |= F_SEAL_WRITE; | |
423 | if (strchr(seals_arg, \(aqS\(aq) != NULL) | |
424 | seals |= F_SEAL_SEAL; | |
425 | ||
426 | if (fcntl(fd, F_ADD_SEALS, seals) == \-1) | |
427 | errExit("fcntl"); | |
428 | } | |
429 | ||
430 | /* Keep running, so that the file created by memfd_create() | |
431 | continues to exist */ | |
432 | ||
433 | pause(); | |
434 | ||
435 | exit(EXIT_SUCCESS); | |
436 | } | |
437 | .fi | |
438 | .SS Program source: t_get_seals.c | |
439 | \& | |
440 | .nf | |
441 | #include <sys/memfd.h> | |
442 | #include <fcntl.h> | |
443 | #include <unistd.h> | |
444 | #include <stdlib.h> | |
445 | #include <string.h> | |
446 | #include <stdio.h> | |
447 | ||
448 | #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\ | |
449 | } while (0) | |
450 | ||
451 | int | |
452 | main(int argc, char *argv[]) | |
453 | { | |
454 | int fd; | |
455 | unsigned int seals; | |
456 | ||
457 | if (argc != 2) { | |
458 | fprintf(stderr, "%s /proc/PID/fd/FD\\n", argv[0]); | |
459 | exit(EXIT_FAILURE); | |
460 | } | |
461 | ||
462 | fd = open(argv[1], O_RDWR); | |
463 | if (fd == \-1) | |
464 | errExit("open"); | |
465 | ||
466 | seals = fcntl(fd, F_GET_SEALS); | |
467 | if (seals == \-1) | |
468 | errExit("fcntl"); | |
469 | ||
470 | printf("Existing seals:"); | |
471 | if (seals & F_SEAL_SEAL) | |
472 | printf(" SEAL"); | |
473 | if (seals & F_SEAL_GROW) | |
474 | printf(" GROW"); | |
475 | if (seals & F_SEAL_WRITE) | |
476 | printf(" WRITE"); | |
477 | if (seals & F_SEAL_SHRINK) | |
478 | printf(" SHRINK"); | |
479 | printf("\\n"); | |
480 | ||
481 | /* Code to map the file and access the contents of the | |
482 | resulting mapping omitted */ | |
483 | ||
484 | exit(EXIT_SUCCESS); | |
485 | } | |
486 | .fi | |
73fc0b53 | 487 | .SH SEE ALSO |
73fc0b53 | 488 | .BR fcntl (2), |
3a71dcd6 MK |
489 | .BR ftruncate (2), |
490 | .BR mmap (2), | |
46832662 MK |
491 | .BR shmget (2), |
492 | .BR shm_open (3) |