]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/fd-util.c
tree-wide: remove unused variables (#8612)
[thirdparty/systemd.git] / src / basic / fd-util.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
3ffd4af2
LP
2/***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19***/
20
11c3a366
TA
21#include <errno.h>
22#include <fcntl.h>
23#include <sys/resource.h>
24#include <sys/socket.h>
25#include <sys/stat.h>
26#include <unistd.h>
27
8fb3f009 28#include "dirent-util.h"
3ffd4af2 29#include "fd-util.h"
a548e14d 30#include "fileio.h"
4aeb20f5 31#include "fs-util.h"
11c3a366 32#include "macro.h"
a548e14d 33#include "memfd-util.h"
11c3a366 34#include "missing.h"
93cc7779 35#include "parse-util.h"
11c3a366 36#include "path-util.h"
df0ff127 37#include "process-util.h"
93cc7779 38#include "socket-util.h"
4aeb20f5 39#include "stdio-util.h"
3ffd4af2
LP
40#include "util.h"
41
42int close_nointr(int fd) {
43 assert(fd >= 0);
44
45 if (close(fd) >= 0)
46 return 0;
47
48 /*
49 * Just ignore EINTR; a retry loop is the wrong thing to do on
50 * Linux.
51 *
52 * http://lkml.indiana.edu/hypermail/linux/kernel/0509.1/0877.html
53 * https://bugzilla.gnome.org/show_bug.cgi?id=682819
54 * http://utcc.utoronto.ca/~cks/space/blog/unix/CloseEINTR
55 * https://sites.google.com/site/michaelsafyan/software-engineering/checkforeintrwheninvokingclosethinkagain
56 */
57 if (errno == EINTR)
58 return 0;
59
60 return -errno;
61}
62
63int safe_close(int fd) {
64
65 /*
66 * Like close_nointr() but cannot fail. Guarantees errno is
67 * unchanged. Is a NOP with negative fds passed, and returns
68 * -1, so that it can be used in this syntax:
69 *
70 * fd = safe_close(fd);
71 */
72
73 if (fd >= 0) {
74 PROTECT_ERRNO;
75
76 /* The kernel might return pretty much any error code
77 * via close(), but the fd will be closed anyway. The
78 * only condition we want to check for here is whether
79 * the fd was invalid at all... */
80
81 assert_se(close_nointr(fd) != -EBADF);
82 }
83
84 return -1;
85}
86
87void safe_close_pair(int p[]) {
88 assert(p);
89
90 if (p[0] == p[1]) {
91 /* Special case pairs which use the same fd in both
92 * directions... */
93 p[0] = p[1] = safe_close(p[0]);
94 return;
95 }
96
97 p[0] = safe_close(p[0]);
98 p[1] = safe_close(p[1]);
99}
100
101void close_many(const int fds[], unsigned n_fd) {
102 unsigned i;
103
104 assert(fds || n_fd <= 0);
105
106 for (i = 0; i < n_fd; i++)
107 safe_close(fds[i]);
108}
109
110int fclose_nointr(FILE *f) {
111 assert(f);
112
113 /* Same as close_nointr(), but for fclose() */
114
115 if (fclose(f) == 0)
116 return 0;
117
118 if (errno == EINTR)
119 return 0;
120
121 return -errno;
122}
123
124FILE* safe_fclose(FILE *f) {
125
126 /* Same as safe_close(), but for fclose() */
127
128 if (f) {
129 PROTECT_ERRNO;
130
131 assert_se(fclose_nointr(f) != EBADF);
132 }
133
134 return NULL;
135}
136
137DIR* safe_closedir(DIR *d) {
138
139 if (d) {
140 PROTECT_ERRNO;
141
142 assert_se(closedir(d) >= 0 || errno != EBADF);
143 }
144
145 return NULL;
146}
147
148int fd_nonblock(int fd, bool nonblock) {
149 int flags, nflags;
150
151 assert(fd >= 0);
152
153 flags = fcntl(fd, F_GETFL, 0);
154 if (flags < 0)
155 return -errno;
156
157 if (nonblock)
158 nflags = flags | O_NONBLOCK;
159 else
160 nflags = flags & ~O_NONBLOCK;
161
162 if (nflags == flags)
163 return 0;
164
165 if (fcntl(fd, F_SETFL, nflags) < 0)
166 return -errno;
167
168 return 0;
169}
170
171int fd_cloexec(int fd, bool cloexec) {
172 int flags, nflags;
173
174 assert(fd >= 0);
175
176 flags = fcntl(fd, F_GETFD, 0);
177 if (flags < 0)
178 return -errno;
179
180 if (cloexec)
181 nflags = flags | FD_CLOEXEC;
182 else
183 nflags = flags & ~FD_CLOEXEC;
184
185 if (nflags == flags)
186 return 0;
187
188 if (fcntl(fd, F_SETFD, nflags) < 0)
189 return -errno;
190
191 return 0;
192}
193
194_pure_ static bool fd_in_set(int fd, const int fdset[], unsigned n_fdset) {
195 unsigned i;
196
197 assert(n_fdset == 0 || fdset);
198
199 for (i = 0; i < n_fdset; i++)
200 if (fdset[i] == fd)
201 return true;
202
203 return false;
204}
205
206int close_all_fds(const int except[], unsigned n_except) {
207 _cleanup_closedir_ DIR *d = NULL;
208 struct dirent *de;
209 int r = 0;
210
211 assert(n_except == 0 || except);
212
213 d = opendir("/proc/self/fd");
214 if (!d) {
215 int fd;
216 struct rlimit rl;
217
218 /* When /proc isn't available (for example in chroots)
219 * the fallback is brute forcing through the fd
220 * table */
221
222 assert_se(getrlimit(RLIMIT_NOFILE, &rl) >= 0);
223 for (fd = 3; fd < (int) rl.rlim_max; fd ++) {
e43bc9f5 224 int q;
3ffd4af2
LP
225
226 if (fd_in_set(fd, except, n_except))
227 continue;
228
e43bc9f5
LP
229 q = close_nointr(fd);
230 if (q < 0 && q != -EBADF && r >= 0)
231 r = q;
3ffd4af2
LP
232 }
233
234 return r;
235 }
236
8fb3f009 237 FOREACH_DIRENT(de, d, return -errno) {
e43bc9f5 238 int fd = -1, q;
3ffd4af2 239
3ffd4af2
LP
240 if (safe_atoi(de->d_name, &fd) < 0)
241 /* Let's better ignore this, just in case */
242 continue;
243
244 if (fd < 3)
245 continue;
246
247 if (fd == dirfd(d))
248 continue;
249
250 if (fd_in_set(fd, except, n_except))
251 continue;
252
e43bc9f5
LP
253 q = close_nointr(fd);
254 if (q < 0 && q != -EBADF && r >= 0) /* Valgrind has its own FD and doesn't want to have it closed */
255 r = q;
3ffd4af2
LP
256 }
257
258 return r;
259}
260
261int same_fd(int a, int b) {
262 struct stat sta, stb;
263 pid_t pid;
264 int r, fa, fb;
265
266 assert(a >= 0);
267 assert(b >= 0);
268
269 /* Compares two file descriptors. Note that semantics are
270 * quite different depending on whether we have kcmp() or we
271 * don't. If we have kcmp() this will only return true for
272 * dup()ed file descriptors, but not otherwise. If we don't
273 * have kcmp() this will also return true for two fds of the same
274 * file, created by separate open() calls. Since we use this
275 * call mostly for filtering out duplicates in the fd store
276 * this difference hopefully doesn't matter too much. */
277
278 if (a == b)
279 return true;
280
281 /* Try to use kcmp() if we have it. */
df0ff127 282 pid = getpid_cached();
3ffd4af2
LP
283 r = kcmp(pid, pid, KCMP_FILE, a, b);
284 if (r == 0)
285 return true;
286 if (r > 0)
287 return false;
288 if (errno != ENOSYS)
289 return -errno;
290
291 /* We don't have kcmp(), use fstat() instead. */
292 if (fstat(a, &sta) < 0)
293 return -errno;
294
295 if (fstat(b, &stb) < 0)
296 return -errno;
297
298 if ((sta.st_mode & S_IFMT) != (stb.st_mode & S_IFMT))
299 return false;
300
301 /* We consider all device fds different, since two device fds
302 * might refer to quite different device contexts even though
303 * they share the same inode and backing dev_t. */
304
305 if (S_ISCHR(sta.st_mode) || S_ISBLK(sta.st_mode))
306 return false;
307
308 if (sta.st_dev != stb.st_dev || sta.st_ino != stb.st_ino)
309 return false;
310
311 /* The fds refer to the same inode on disk, let's also check
312 * if they have the same fd flags. This is useful to
313 * distinguish the read and write side of a pipe created with
314 * pipe(). */
315 fa = fcntl(a, F_GETFL);
316 if (fa < 0)
317 return -errno;
318
319 fb = fcntl(b, F_GETFL);
320 if (fb < 0)
321 return -errno;
322
323 return fa == fb;
324}
325
326void cmsg_close_all(struct msghdr *mh) {
327 struct cmsghdr *cmsg;
328
329 assert(mh);
330
331 CMSG_FOREACH(cmsg, mh)
332 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS)
333 close_many((int*) CMSG_DATA(cmsg), (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int));
334}
4fee3975
LP
335
336bool fdname_is_valid(const char *s) {
337 const char *p;
338
339 /* Validates a name for $LISTEN_FDNAMES. We basically allow
340 * everything ASCII that's not a control character. Also, as
341 * special exception the ":" character is not allowed, as we
342 * use that as field separator in $LISTEN_FDNAMES.
343 *
344 * Note that the empty string is explicitly allowed
345 * here. However, we limit the length of the names to 255
346 * characters. */
347
348 if (!s)
349 return false;
350
351 for (p = s; *p; p++) {
352 if (*p < ' ')
353 return false;
354 if (*p >= 127)
355 return false;
356 if (*p == ':')
357 return false;
358 }
359
360 return p - s < 256;
361}
4aeb20f5
LP
362
363int fd_get_path(int fd, char **ret) {
3ceae1bc
ZJS
364 _cleanup_close_ int dir = -1;
365 char fdname[DECIMAL_STR_MAX(int)];
a0fe2a2d 366 int r;
4aeb20f5 367
3ceae1bc
ZJS
368 dir = open("/proc/self/fd/", O_CLOEXEC | O_DIRECTORY | O_PATH);
369 if (dir < 0)
370 /* /proc is not available or not set up properly, we're most likely
371 * in some chroot environment. */
372 return errno == ENOENT ? -EOPNOTSUPP : -errno;
4aeb20f5 373
3ceae1bc 374 xsprintf(fdname, "%i", fd);
a0fe2a2d 375
3ceae1bc
ZJS
376 r = readlinkat_malloc(dir, fdname, ret);
377 if (r == -ENOENT)
378 /* If the file doesn't exist the fd is invalid */
a0fe2a2d
LP
379 return -EBADF;
380
381 return r;
4aeb20f5 382}
046a82c1
LP
383
384int move_fd(int from, int to, int cloexec) {
385 int r;
386
387 /* Move fd 'from' to 'to', make sure FD_CLOEXEC remains equal if requested, and release the old fd. If
388 * 'cloexec' is passed as -1, the original FD_CLOEXEC is inherited for the new fd. If it is 0, it is turned
389 * off, if it is > 0 it is turned on. */
390
391 if (from < 0)
392 return -EBADF;
393 if (to < 0)
394 return -EBADF;
395
396 if (from == to) {
397
398 if (cloexec >= 0) {
399 r = fd_cloexec(to, cloexec);
400 if (r < 0)
401 return r;
402 }
403
404 return to;
405 }
406
407 if (cloexec < 0) {
408 int fl;
409
410 fl = fcntl(from, F_GETFD, 0);
411 if (fl < 0)
412 return -errno;
413
414 cloexec = !!(fl & FD_CLOEXEC);
415 }
416
417 r = dup3(from, to, cloexec ? O_CLOEXEC : 0);
418 if (r < 0)
419 return -errno;
420
421 assert(r == to);
422
423 safe_close(from);
424
425 return to;
426}
a548e14d
LP
427
428int acquire_data_fd(const void *data, size_t size, unsigned flags) {
429
fbd0b64f 430 char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
a548e14d
LP
431 _cleanup_close_pair_ int pipefds[2] = { -1, -1 };
432 char pattern[] = "/dev/shm/data-fd-XXXXXX";
433 _cleanup_close_ int fd = -1;
434 int isz = 0, r;
435 ssize_t n;
436 off_t f;
437
438 assert(data || size == 0);
439
440 /* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
441 * complex than I wish it was. But here's why:
442 *
443 * a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
444 * read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14.
445 *
446 * b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
447 * a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
448 * clients can only bump their size to a system-wide limit, which might be quite low.
449 *
450 * c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
451 * earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
452 * /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
453 *
454 * d) Finally, we try creating a regular file in /dev/shm, which we then delete.
455 *
456 * It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
457 * figure. */
458
459 if (size == 0 && ((flags & ACQUIRE_NO_DEV_NULL) == 0)) {
460 /* As a special case, return /dev/null if we have been called for an empty data block */
461 r = open("/dev/null", O_RDONLY|O_CLOEXEC|O_NOCTTY);
462 if (r < 0)
463 return -errno;
464
465 return r;
466 }
467
468 if ((flags & ACQUIRE_NO_MEMFD) == 0) {
469 fd = memfd_new("data-fd");
470 if (fd < 0)
471 goto try_pipe;
472
473 n = write(fd, data, size);
474 if (n < 0)
475 return -errno;
476 if ((size_t) n != size)
477 return -EIO;
478
479 f = lseek(fd, 0, SEEK_SET);
480 if (f != 0)
481 return -errno;
482
483 r = memfd_set_sealed(fd);
484 if (r < 0)
485 return r;
486
c10d6bdb 487 return TAKE_FD(fd);
a548e14d
LP
488 }
489
490try_pipe:
491 if ((flags & ACQUIRE_NO_PIPE) == 0) {
492 if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
493 return -errno;
494
495 isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
496 if (isz < 0)
497 return -errno;
498
499 if ((size_t) isz < size) {
500 isz = (int) size;
501 if (isz < 0 || (size_t) isz != size)
502 return -E2BIG;
503
504 /* Try to bump the pipe size */
505 (void) fcntl(pipefds[1], F_SETPIPE_SZ, isz);
506
507 /* See if that worked */
508 isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
509 if (isz < 0)
510 return -errno;
511
512 if ((size_t) isz < size)
513 goto try_dev_shm;
514 }
515
516 n = write(pipefds[1], data, size);
517 if (n < 0)
518 return -errno;
519 if ((size_t) n != size)
520 return -EIO;
521
522 (void) fd_nonblock(pipefds[0], false);
523
c10d6bdb 524 return TAKE_FD(pipefds[0]);
a548e14d
LP
525 }
526
527try_dev_shm:
528 if ((flags & ACQUIRE_NO_TMPFILE) == 0) {
529 fd = open("/dev/shm", O_RDWR|O_TMPFILE|O_CLOEXEC, 0500);
530 if (fd < 0)
531 goto try_dev_shm_without_o_tmpfile;
532
533 n = write(fd, data, size);
534 if (n < 0)
535 return -errno;
536 if ((size_t) n != size)
537 return -EIO;
538
539 /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
540 xsprintf(procfs_path, "/proc/self/fd/%i", fd);
541 r = open(procfs_path, O_RDONLY|O_CLOEXEC);
542 if (r < 0)
543 return -errno;
544
545 return r;
546 }
547
548try_dev_shm_without_o_tmpfile:
549 if ((flags & ACQUIRE_NO_REGULAR) == 0) {
550 fd = mkostemp_safe(pattern);
551 if (fd < 0)
552 return fd;
553
554 n = write(fd, data, size);
555 if (n < 0) {
556 r = -errno;
557 goto unlink_and_return;
558 }
559 if ((size_t) n != size) {
560 r = -EIO;
561 goto unlink_and_return;
562 }
563
564 /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
565 r = open(pattern, O_RDONLY|O_CLOEXEC);
566 if (r < 0)
567 r = -errno;
568
569 unlink_and_return:
570 (void) unlink(pattern);
571 return r;
572 }
573
574 return -EOPNOTSUPP;
575}
7fe2903c
LP
576
577int fd_move_above_stdio(int fd) {
578 int flags, copy;
579 PROTECT_ERRNO;
580
581 /* Moves the specified file descriptor if possible out of the range [0…2], i.e. the range of
582 * stdin/stdout/stderr. If it can't be moved outside of this range the original file descriptor is
583 * returned. This call is supposed to be used for long-lasting file descriptors we allocate in our code that
584 * might get loaded into foreign code, and where we want ensure our fds are unlikely used accidentally as
585 * stdin/stdout/stderr of unrelated code.
586 *
587 * Note that this doesn't fix any real bugs, it just makes it less likely that our code will be affected by
588 * buggy code from others that mindlessly invokes 'fprintf(stderr, …' or similar in places where stderr has
589 * been closed before.
590 *
591 * This function is written in a "best-effort" and "least-impact" style. This means whenever we encounter an
592 * error we simply return the original file descriptor, and we do not touch errno. */
593
594 if (fd < 0 || fd > 2)
595 return fd;
596
597 flags = fcntl(fd, F_GETFD, 0);
598 if (flags < 0)
599 return fd;
600
601 if (flags & FD_CLOEXEC)
602 copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
603 else
604 copy = fcntl(fd, F_DUPFD, 3);
605 if (copy < 0)
606 return fd;
607
608 assert(copy > 2);
609
610 (void) close(fd);
611 return copy;
612}
aa11e28b
LP
613
614int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd) {
615
616 int fd[3] = { /* Put together an array of fds we work on */
617 original_input_fd,
618 original_output_fd,
619 original_error_fd
620 };
621
622 int r, i,
623 null_fd = -1, /* if we open /dev/null, we store the fd to it here */
624 copy_fd[3] = { -1, -1, -1 }; /* This contains all fds we duplicate here temporarily, and hence need to close at the end */
625 bool null_readable, null_writable;
626
627 /* Sets up stdin, stdout, stderr with the three file descriptors passed in. If any of the descriptors is
628 * specified as -1 it will be connected with /dev/null instead. If any of the file descriptors is passed as
629 * itself (e.g. stdin as STDIN_FILENO) it is left unmodified, but the O_CLOEXEC bit is turned off should it be
630 * on.
631 *
632 * Note that if any of the passed file descriptors are > 2 they will be closed — both on success and on
633 * failure! Thus, callers should assume that when this function returns the input fds are invalidated.
634 *
635 * Note that when this function fails stdin/stdout/stderr might remain half set up!
636 *
637 * O_CLOEXEC is turned off for all three file descriptors (which is how it should be for
638 * stdin/stdout/stderr). */
639
640 null_readable = original_input_fd < 0;
641 null_writable = original_output_fd < 0 || original_error_fd < 0;
642
643 /* First step, open /dev/null once, if we need it */
644 if (null_readable || null_writable) {
645
646 /* Let's open this with O_CLOEXEC first, and convert it to non-O_CLOEXEC when we move the fd to the final position. */
647 null_fd = open("/dev/null", (null_readable && null_writable ? O_RDWR :
648 null_readable ? O_RDONLY : O_WRONLY) | O_CLOEXEC);
649 if (null_fd < 0) {
650 r = -errno;
651 goto finish;
652 }
653
654 /* If this fd is in the 0…2 range, let's move it out of it */
655 if (null_fd < 3) {
656 int copy;
657
658 copy = fcntl(null_fd, F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
659 if (copy < 0) {
660 r = -errno;
661 goto finish;
662 }
663
664 safe_close(null_fd);
665 null_fd = copy;
666 }
667 }
668
669 /* Let's assemble fd[] with the fds to install in place of stdin/stdout/stderr */
670 for (i = 0; i < 3; i++) {
671
672 if (fd[i] < 0)
673 fd[i] = null_fd; /* A negative parameter means: connect this one to /dev/null */
674 else if (fd[i] != i && fd[i] < 3) {
675 /* This fd is in the 0…2 territory, but not at its intended place, move it out of there, so that we can work there. */
676 copy_fd[i] = fcntl(fd[i], F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
677 if (copy_fd[i] < 0) {
678 r = -errno;
679 goto finish;
680 }
681
682 fd[i] = copy_fd[i];
683 }
684 }
685
686 /* At this point we now have the fds to use in fd[], and they are all above the stdio range, so that we
687 * have freedom to move them around. If the fds already were at the right places then the specific fds are
688 * -1. Let's now move them to the right places. This is the point of no return. */
689 for (i = 0; i < 3; i++) {
690
691 if (fd[i] == i) {
692
693 /* fd is already in place, but let's make sure O_CLOEXEC is off */
694 r = fd_cloexec(i, false);
695 if (r < 0)
696 goto finish;
697
698 } else {
699 assert(fd[i] > 2);
700
701 if (dup2(fd[i], i) < 0) { /* Turns off O_CLOEXEC on the new fd. */
702 r = -errno;
703 goto finish;
704 }
705 }
706 }
707
708 r = 0;
709
710finish:
711 /* Close the original fds, but only if they were outside of the stdio range. Also, properly check for the same
712 * fd passed in multiple times. */
713 safe_close_above_stdio(original_input_fd);
714 if (original_output_fd != original_input_fd)
715 safe_close_above_stdio(original_output_fd);
716 if (original_error_fd != original_input_fd && original_error_fd != original_output_fd)
717 safe_close_above_stdio(original_error_fd);
718
719 /* Close the copies we moved > 2 */
720 for (i = 0; i < 3; i++)
721 safe_close(copy_fd[i]);
722
723 /* Close our null fd, if it's > 2 */
724 safe_close_above_stdio(null_fd);
725
726 return r;
727}