]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/fd-util.c
fd-util: drop stdio_unset_cloexec(), it's not used anymore
[thirdparty/systemd.git] / src / basic / fd-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <sys/resource.h>
24 #include <sys/socket.h>
25 #include <sys/stat.h>
26 #include <unistd.h>
27
28 #include "dirent-util.h"
29 #include "fd-util.h"
30 #include "fileio.h"
31 #include "fs-util.h"
32 #include "macro.h"
33 #include "memfd-util.h"
34 #include "missing.h"
35 #include "parse-util.h"
36 #include "path-util.h"
37 #include "process-util.h"
38 #include "socket-util.h"
39 #include "stdio-util.h"
40 #include "util.h"
41
42 int close_nointr(int fd) {
43 assert(fd >= 0);
44
45 if (close(fd) >= 0)
46 return 0;
47
48 /*
49 * Just ignore EINTR; a retry loop is the wrong thing to do on
50 * Linux.
51 *
52 * http://lkml.indiana.edu/hypermail/linux/kernel/0509.1/0877.html
53 * https://bugzilla.gnome.org/show_bug.cgi?id=682819
54 * http://utcc.utoronto.ca/~cks/space/blog/unix/CloseEINTR
55 * https://sites.google.com/site/michaelsafyan/software-engineering/checkforeintrwheninvokingclosethinkagain
56 */
57 if (errno == EINTR)
58 return 0;
59
60 return -errno;
61 }
62
63 int safe_close(int fd) {
64
65 /*
66 * Like close_nointr() but cannot fail. Guarantees errno is
67 * unchanged. Is a NOP with negative fds passed, and returns
68 * -1, so that it can be used in this syntax:
69 *
70 * fd = safe_close(fd);
71 */
72
73 if (fd >= 0) {
74 PROTECT_ERRNO;
75
76 /* The kernel might return pretty much any error code
77 * via close(), but the fd will be closed anyway. The
78 * only condition we want to check for here is whether
79 * the fd was invalid at all... */
80
81 assert_se(close_nointr(fd) != -EBADF);
82 }
83
84 return -1;
85 }
86
87 void safe_close_pair(int p[]) {
88 assert(p);
89
90 if (p[0] == p[1]) {
91 /* Special case pairs which use the same fd in both
92 * directions... */
93 p[0] = p[1] = safe_close(p[0]);
94 return;
95 }
96
97 p[0] = safe_close(p[0]);
98 p[1] = safe_close(p[1]);
99 }
100
101 void close_many(const int fds[], unsigned n_fd) {
102 unsigned i;
103
104 assert(fds || n_fd <= 0);
105
106 for (i = 0; i < n_fd; i++)
107 safe_close(fds[i]);
108 }
109
110 int fclose_nointr(FILE *f) {
111 assert(f);
112
113 /* Same as close_nointr(), but for fclose() */
114
115 if (fclose(f) == 0)
116 return 0;
117
118 if (errno == EINTR)
119 return 0;
120
121 return -errno;
122 }
123
124 FILE* safe_fclose(FILE *f) {
125
126 /* Same as safe_close(), but for fclose() */
127
128 if (f) {
129 PROTECT_ERRNO;
130
131 assert_se(fclose_nointr(f) != EBADF);
132 }
133
134 return NULL;
135 }
136
137 DIR* safe_closedir(DIR *d) {
138
139 if (d) {
140 PROTECT_ERRNO;
141
142 assert_se(closedir(d) >= 0 || errno != EBADF);
143 }
144
145 return NULL;
146 }
147
148 int fd_nonblock(int fd, bool nonblock) {
149 int flags, nflags;
150
151 assert(fd >= 0);
152
153 flags = fcntl(fd, F_GETFL, 0);
154 if (flags < 0)
155 return -errno;
156
157 if (nonblock)
158 nflags = flags | O_NONBLOCK;
159 else
160 nflags = flags & ~O_NONBLOCK;
161
162 if (nflags == flags)
163 return 0;
164
165 if (fcntl(fd, F_SETFL, nflags) < 0)
166 return -errno;
167
168 return 0;
169 }
170
171 int fd_cloexec(int fd, bool cloexec) {
172 int flags, nflags;
173
174 assert(fd >= 0);
175
176 flags = fcntl(fd, F_GETFD, 0);
177 if (flags < 0)
178 return -errno;
179
180 if (cloexec)
181 nflags = flags | FD_CLOEXEC;
182 else
183 nflags = flags & ~FD_CLOEXEC;
184
185 if (nflags == flags)
186 return 0;
187
188 if (fcntl(fd, F_SETFD, nflags) < 0)
189 return -errno;
190
191 return 0;
192 }
193
194 _pure_ static bool fd_in_set(int fd, const int fdset[], unsigned n_fdset) {
195 unsigned i;
196
197 assert(n_fdset == 0 || fdset);
198
199 for (i = 0; i < n_fdset; i++)
200 if (fdset[i] == fd)
201 return true;
202
203 return false;
204 }
205
206 int close_all_fds(const int except[], unsigned n_except) {
207 _cleanup_closedir_ DIR *d = NULL;
208 struct dirent *de;
209 int r = 0;
210
211 assert(n_except == 0 || except);
212
213 d = opendir("/proc/self/fd");
214 if (!d) {
215 int fd;
216 struct rlimit rl;
217
218 /* When /proc isn't available (for example in chroots)
219 * the fallback is brute forcing through the fd
220 * table */
221
222 assert_se(getrlimit(RLIMIT_NOFILE, &rl) >= 0);
223 for (fd = 3; fd < (int) rl.rlim_max; fd ++) {
224 int q;
225
226 if (fd_in_set(fd, except, n_except))
227 continue;
228
229 q = close_nointr(fd);
230 if (q < 0 && q != -EBADF && r >= 0)
231 r = q;
232 }
233
234 return r;
235 }
236
237 FOREACH_DIRENT(de, d, return -errno) {
238 int fd = -1, q;
239
240 if (safe_atoi(de->d_name, &fd) < 0)
241 /* Let's better ignore this, just in case */
242 continue;
243
244 if (fd < 3)
245 continue;
246
247 if (fd == dirfd(d))
248 continue;
249
250 if (fd_in_set(fd, except, n_except))
251 continue;
252
253 q = close_nointr(fd);
254 if (q < 0 && q != -EBADF && r >= 0) /* Valgrind has its own FD and doesn't want to have it closed */
255 r = q;
256 }
257
258 return r;
259 }
260
261 int same_fd(int a, int b) {
262 struct stat sta, stb;
263 pid_t pid;
264 int r, fa, fb;
265
266 assert(a >= 0);
267 assert(b >= 0);
268
269 /* Compares two file descriptors. Note that semantics are
270 * quite different depending on whether we have kcmp() or we
271 * don't. If we have kcmp() this will only return true for
272 * dup()ed file descriptors, but not otherwise. If we don't
273 * have kcmp() this will also return true for two fds of the same
274 * file, created by separate open() calls. Since we use this
275 * call mostly for filtering out duplicates in the fd store
276 * this difference hopefully doesn't matter too much. */
277
278 if (a == b)
279 return true;
280
281 /* Try to use kcmp() if we have it. */
282 pid = getpid_cached();
283 r = kcmp(pid, pid, KCMP_FILE, a, b);
284 if (r == 0)
285 return true;
286 if (r > 0)
287 return false;
288 if (errno != ENOSYS)
289 return -errno;
290
291 /* We don't have kcmp(), use fstat() instead. */
292 if (fstat(a, &sta) < 0)
293 return -errno;
294
295 if (fstat(b, &stb) < 0)
296 return -errno;
297
298 if ((sta.st_mode & S_IFMT) != (stb.st_mode & S_IFMT))
299 return false;
300
301 /* We consider all device fds different, since two device fds
302 * might refer to quite different device contexts even though
303 * they share the same inode and backing dev_t. */
304
305 if (S_ISCHR(sta.st_mode) || S_ISBLK(sta.st_mode))
306 return false;
307
308 if (sta.st_dev != stb.st_dev || sta.st_ino != stb.st_ino)
309 return false;
310
311 /* The fds refer to the same inode on disk, let's also check
312 * if they have the same fd flags. This is useful to
313 * distinguish the read and write side of a pipe created with
314 * pipe(). */
315 fa = fcntl(a, F_GETFL);
316 if (fa < 0)
317 return -errno;
318
319 fb = fcntl(b, F_GETFL);
320 if (fb < 0)
321 return -errno;
322
323 return fa == fb;
324 }
325
326 void cmsg_close_all(struct msghdr *mh) {
327 struct cmsghdr *cmsg;
328
329 assert(mh);
330
331 CMSG_FOREACH(cmsg, mh)
332 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS)
333 close_many((int*) CMSG_DATA(cmsg), (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int));
334 }
335
336 bool fdname_is_valid(const char *s) {
337 const char *p;
338
339 /* Validates a name for $LISTEN_FDNAMES. We basically allow
340 * everything ASCII that's not a control character. Also, as
341 * special exception the ":" character is not allowed, as we
342 * use that as field separator in $LISTEN_FDNAMES.
343 *
344 * Note that the empty string is explicitly allowed
345 * here. However, we limit the length of the names to 255
346 * characters. */
347
348 if (!s)
349 return false;
350
351 for (p = s; *p; p++) {
352 if (*p < ' ')
353 return false;
354 if (*p >= 127)
355 return false;
356 if (*p == ':')
357 return false;
358 }
359
360 return p - s < 256;
361 }
362
363 int fd_get_path(int fd, char **ret) {
364 char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
365 int r;
366
367 xsprintf(procfs_path, "/proc/self/fd/%i", fd);
368
369 r = readlink_malloc(procfs_path, ret);
370
371 if (r == -ENOENT) /* If the file doesn't exist the fd is invalid */
372 return -EBADF;
373
374 return r;
375 }
376
377 int move_fd(int from, int to, int cloexec) {
378 int r;
379
380 /* Move fd 'from' to 'to', make sure FD_CLOEXEC remains equal if requested, and release the old fd. If
381 * 'cloexec' is passed as -1, the original FD_CLOEXEC is inherited for the new fd. If it is 0, it is turned
382 * off, if it is > 0 it is turned on. */
383
384 if (from < 0)
385 return -EBADF;
386 if (to < 0)
387 return -EBADF;
388
389 if (from == to) {
390
391 if (cloexec >= 0) {
392 r = fd_cloexec(to, cloexec);
393 if (r < 0)
394 return r;
395 }
396
397 return to;
398 }
399
400 if (cloexec < 0) {
401 int fl;
402
403 fl = fcntl(from, F_GETFD, 0);
404 if (fl < 0)
405 return -errno;
406
407 cloexec = !!(fl & FD_CLOEXEC);
408 }
409
410 r = dup3(from, to, cloexec ? O_CLOEXEC : 0);
411 if (r < 0)
412 return -errno;
413
414 assert(r == to);
415
416 safe_close(from);
417
418 return to;
419 }
420
421 int acquire_data_fd(const void *data, size_t size, unsigned flags) {
422
423 char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
424 _cleanup_close_pair_ int pipefds[2] = { -1, -1 };
425 char pattern[] = "/dev/shm/data-fd-XXXXXX";
426 _cleanup_close_ int fd = -1;
427 int isz = 0, r;
428 ssize_t n;
429 off_t f;
430
431 assert(data || size == 0);
432
433 /* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
434 * complex than I wish it was. But here's why:
435 *
436 * a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
437 * read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14.
438 *
439 * b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
440 * a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
441 * clients can only bump their size to a system-wide limit, which might be quite low.
442 *
443 * c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
444 * earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
445 * /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
446 *
447 * d) Finally, we try creating a regular file in /dev/shm, which we then delete.
448 *
449 * It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
450 * figure. */
451
452 if (size == 0 && ((flags & ACQUIRE_NO_DEV_NULL) == 0)) {
453 /* As a special case, return /dev/null if we have been called for an empty data block */
454 r = open("/dev/null", O_RDONLY|O_CLOEXEC|O_NOCTTY);
455 if (r < 0)
456 return -errno;
457
458 return r;
459 }
460
461 if ((flags & ACQUIRE_NO_MEMFD) == 0) {
462 fd = memfd_new("data-fd");
463 if (fd < 0)
464 goto try_pipe;
465
466 n = write(fd, data, size);
467 if (n < 0)
468 return -errno;
469 if ((size_t) n != size)
470 return -EIO;
471
472 f = lseek(fd, 0, SEEK_SET);
473 if (f != 0)
474 return -errno;
475
476 r = memfd_set_sealed(fd);
477 if (r < 0)
478 return r;
479
480 r = fd;
481 fd = -1;
482
483 return r;
484 }
485
486 try_pipe:
487 if ((flags & ACQUIRE_NO_PIPE) == 0) {
488 if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
489 return -errno;
490
491 isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
492 if (isz < 0)
493 return -errno;
494
495 if ((size_t) isz < size) {
496 isz = (int) size;
497 if (isz < 0 || (size_t) isz != size)
498 return -E2BIG;
499
500 /* Try to bump the pipe size */
501 (void) fcntl(pipefds[1], F_SETPIPE_SZ, isz);
502
503 /* See if that worked */
504 isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
505 if (isz < 0)
506 return -errno;
507
508 if ((size_t) isz < size)
509 goto try_dev_shm;
510 }
511
512 n = write(pipefds[1], data, size);
513 if (n < 0)
514 return -errno;
515 if ((size_t) n != size)
516 return -EIO;
517
518 (void) fd_nonblock(pipefds[0], false);
519
520 r = pipefds[0];
521 pipefds[0] = -1;
522
523 return r;
524 }
525
526 try_dev_shm:
527 if ((flags & ACQUIRE_NO_TMPFILE) == 0) {
528 fd = open("/dev/shm", O_RDWR|O_TMPFILE|O_CLOEXEC, 0500);
529 if (fd < 0)
530 goto try_dev_shm_without_o_tmpfile;
531
532 n = write(fd, data, size);
533 if (n < 0)
534 return -errno;
535 if ((size_t) n != size)
536 return -EIO;
537
538 /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
539 xsprintf(procfs_path, "/proc/self/fd/%i", fd);
540 r = open(procfs_path, O_RDONLY|O_CLOEXEC);
541 if (r < 0)
542 return -errno;
543
544 return r;
545 }
546
547 try_dev_shm_without_o_tmpfile:
548 if ((flags & ACQUIRE_NO_REGULAR) == 0) {
549 fd = mkostemp_safe(pattern);
550 if (fd < 0)
551 return fd;
552
553 n = write(fd, data, size);
554 if (n < 0) {
555 r = -errno;
556 goto unlink_and_return;
557 }
558 if ((size_t) n != size) {
559 r = -EIO;
560 goto unlink_and_return;
561 }
562
563 /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
564 r = open(pattern, O_RDONLY|O_CLOEXEC);
565 if (r < 0)
566 r = -errno;
567
568 unlink_and_return:
569 (void) unlink(pattern);
570 return r;
571 }
572
573 return -EOPNOTSUPP;
574 }
575
576 int fd_move_above_stdio(int fd) {
577 int flags, copy;
578 PROTECT_ERRNO;
579
580 /* Moves the specified file descriptor if possible out of the range [0…2], i.e. the range of
581 * stdin/stdout/stderr. If it can't be moved outside of this range the original file descriptor is
582 * returned. This call is supposed to be used for long-lasting file descriptors we allocate in our code that
583 * might get loaded into foreign code, and where we want ensure our fds are unlikely used accidentally as
584 * stdin/stdout/stderr of unrelated code.
585 *
586 * Note that this doesn't fix any real bugs, it just makes it less likely that our code will be affected by
587 * buggy code from others that mindlessly invokes 'fprintf(stderr, …' or similar in places where stderr has
588 * been closed before.
589 *
590 * This function is written in a "best-effort" and "least-impact" style. This means whenever we encounter an
591 * error we simply return the original file descriptor, and we do not touch errno. */
592
593 if (fd < 0 || fd > 2)
594 return fd;
595
596 flags = fcntl(fd, F_GETFD, 0);
597 if (flags < 0)
598 return fd;
599
600 if (flags & FD_CLOEXEC)
601 copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
602 else
603 copy = fcntl(fd, F_DUPFD, 3);
604 if (copy < 0)
605 return fd;
606
607 assert(copy > 2);
608
609 (void) close(fd);
610 return copy;
611 }
612
613 int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd) {
614
615 int fd[3] = { /* Put together an array of fds we work on */
616 original_input_fd,
617 original_output_fd,
618 original_error_fd
619 };
620
621 int r, i,
622 null_fd = -1, /* if we open /dev/null, we store the fd to it here */
623 copy_fd[3] = { -1, -1, -1 }; /* This contains all fds we duplicate here temporarily, and hence need to close at the end */
624 bool null_readable, null_writable;
625
626 /* Sets up stdin, stdout, stderr with the three file descriptors passed in. If any of the descriptors is
627 * specified as -1 it will be connected with /dev/null instead. If any of the file descriptors is passed as
628 * itself (e.g. stdin as STDIN_FILENO) it is left unmodified, but the O_CLOEXEC bit is turned off should it be
629 * on.
630 *
631 * Note that if any of the passed file descriptors are > 2 they will be closed — both on success and on
632 * failure! Thus, callers should assume that when this function returns the input fds are invalidated.
633 *
634 * Note that when this function fails stdin/stdout/stderr might remain half set up!
635 *
636 * O_CLOEXEC is turned off for all three file descriptors (which is how it should be for
637 * stdin/stdout/stderr). */
638
639 null_readable = original_input_fd < 0;
640 null_writable = original_output_fd < 0 || original_error_fd < 0;
641
642 /* First step, open /dev/null once, if we need it */
643 if (null_readable || null_writable) {
644
645 /* Let's open this with O_CLOEXEC first, and convert it to non-O_CLOEXEC when we move the fd to the final position. */
646 null_fd = open("/dev/null", (null_readable && null_writable ? O_RDWR :
647 null_readable ? O_RDONLY : O_WRONLY) | O_CLOEXEC);
648 if (null_fd < 0) {
649 r = -errno;
650 goto finish;
651 }
652
653 /* If this fd is in the 0…2 range, let's move it out of it */
654 if (null_fd < 3) {
655 int copy;
656
657 copy = fcntl(null_fd, F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
658 if (copy < 0) {
659 r = -errno;
660 goto finish;
661 }
662
663 safe_close(null_fd);
664 null_fd = copy;
665 }
666 }
667
668 /* Let's assemble fd[] with the fds to install in place of stdin/stdout/stderr */
669 for (i = 0; i < 3; i++) {
670
671 if (fd[i] < 0)
672 fd[i] = null_fd; /* A negative parameter means: connect this one to /dev/null */
673 else if (fd[i] != i && fd[i] < 3) {
674 /* This fd is in the 0…2 territory, but not at its intended place, move it out of there, so that we can work there. */
675 copy_fd[i] = fcntl(fd[i], F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
676 if (copy_fd[i] < 0) {
677 r = -errno;
678 goto finish;
679 }
680
681 fd[i] = copy_fd[i];
682 }
683 }
684
685 /* At this point we now have the fds to use in fd[], and they are all above the stdio range, so that we
686 * have freedom to move them around. If the fds already were at the right places then the specific fds are
687 * -1. Let's now move them to the right places. This is the point of no return. */
688 for (i = 0; i < 3; i++) {
689
690 if (fd[i] == i) {
691
692 /* fd is already in place, but let's make sure O_CLOEXEC is off */
693 r = fd_cloexec(i, false);
694 if (r < 0)
695 goto finish;
696
697 } else {
698 assert(fd[i] > 2);
699
700 if (dup2(fd[i], i) < 0) { /* Turns off O_CLOEXEC on the new fd. */
701 r = -errno;
702 goto finish;
703 }
704 }
705 }
706
707 r = 0;
708
709 finish:
710 /* Close the original fds, but only if they were outside of the stdio range. Also, properly check for the same
711 * fd passed in multiple times. */
712 safe_close_above_stdio(original_input_fd);
713 if (original_output_fd != original_input_fd)
714 safe_close_above_stdio(original_output_fd);
715 if (original_error_fd != original_input_fd && original_error_fd != original_output_fd)
716 safe_close_above_stdio(original_error_fd);
717
718 /* Close the copies we moved > 2 */
719 for (i = 0; i < 3; i++)
720 safe_close(copy_fd[i]);
721
722 /* Close our null fd, if it's > 2 */
723 safe_close_above_stdio(null_fd);
724
725 return r;
726 }