]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/fd-util.c
tree-wide: be more careful with the type of array sizes
[thirdparty/systemd.git] / src / basic / fd-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6 ***/
7
8 #include <errno.h>
9 #include <fcntl.h>
10 #include <sys/resource.h>
11 #include <sys/socket.h>
12 #include <sys/stat.h>
13 #include <unistd.h>
14
15 #include "dirent-util.h"
16 #include "fd-util.h"
17 #include "fileio.h"
18 #include "fs-util.h"
19 #include "macro.h"
20 #include "memfd-util.h"
21 #include "missing.h"
22 #include "parse-util.h"
23 #include "path-util.h"
24 #include "process-util.h"
25 #include "socket-util.h"
26 #include "stdio-util.h"
27 #include "util.h"
28
29 int close_nointr(int fd) {
30 assert(fd >= 0);
31
32 if (close(fd) >= 0)
33 return 0;
34
35 /*
36 * Just ignore EINTR; a retry loop is the wrong thing to do on
37 * Linux.
38 *
39 * http://lkml.indiana.edu/hypermail/linux/kernel/0509.1/0877.html
40 * https://bugzilla.gnome.org/show_bug.cgi?id=682819
41 * http://utcc.utoronto.ca/~cks/space/blog/unix/CloseEINTR
42 * https://sites.google.com/site/michaelsafyan/software-engineering/checkforeintrwheninvokingclosethinkagain
43 */
44 if (errno == EINTR)
45 return 0;
46
47 return -errno;
48 }
49
50 int safe_close(int fd) {
51
52 /*
53 * Like close_nointr() but cannot fail. Guarantees errno is
54 * unchanged. Is a NOP with negative fds passed, and returns
55 * -1, so that it can be used in this syntax:
56 *
57 * fd = safe_close(fd);
58 */
59
60 if (fd >= 0) {
61 PROTECT_ERRNO;
62
63 /* The kernel might return pretty much any error code
64 * via close(), but the fd will be closed anyway. The
65 * only condition we want to check for here is whether
66 * the fd was invalid at all... */
67
68 assert_se(close_nointr(fd) != -EBADF);
69 }
70
71 return -1;
72 }
73
74 void safe_close_pair(int p[]) {
75 assert(p);
76
77 if (p[0] == p[1]) {
78 /* Special case pairs which use the same fd in both
79 * directions... */
80 p[0] = p[1] = safe_close(p[0]);
81 return;
82 }
83
84 p[0] = safe_close(p[0]);
85 p[1] = safe_close(p[1]);
86 }
87
88 void close_many(const int fds[], size_t n_fd) {
89 size_t i;
90
91 assert(fds || n_fd <= 0);
92
93 for (i = 0; i < n_fd; i++)
94 safe_close(fds[i]);
95 }
96
97 int fclose_nointr(FILE *f) {
98 assert(f);
99
100 /* Same as close_nointr(), but for fclose() */
101
102 if (fclose(f) == 0)
103 return 0;
104
105 if (errno == EINTR)
106 return 0;
107
108 return -errno;
109 }
110
111 FILE* safe_fclose(FILE *f) {
112
113 /* Same as safe_close(), but for fclose() */
114
115 if (f) {
116 PROTECT_ERRNO;
117
118 assert_se(fclose_nointr(f) != EBADF);
119 }
120
121 return NULL;
122 }
123
124 DIR* safe_closedir(DIR *d) {
125
126 if (d) {
127 PROTECT_ERRNO;
128
129 assert_se(closedir(d) >= 0 || errno != EBADF);
130 }
131
132 return NULL;
133 }
134
135 int fd_nonblock(int fd, bool nonblock) {
136 int flags, nflags;
137
138 assert(fd >= 0);
139
140 flags = fcntl(fd, F_GETFL, 0);
141 if (flags < 0)
142 return -errno;
143
144 if (nonblock)
145 nflags = flags | O_NONBLOCK;
146 else
147 nflags = flags & ~O_NONBLOCK;
148
149 if (nflags == flags)
150 return 0;
151
152 if (fcntl(fd, F_SETFL, nflags) < 0)
153 return -errno;
154
155 return 0;
156 }
157
158 int fd_cloexec(int fd, bool cloexec) {
159 int flags, nflags;
160
161 assert(fd >= 0);
162
163 flags = fcntl(fd, F_GETFD, 0);
164 if (flags < 0)
165 return -errno;
166
167 if (cloexec)
168 nflags = flags | FD_CLOEXEC;
169 else
170 nflags = flags & ~FD_CLOEXEC;
171
172 if (nflags == flags)
173 return 0;
174
175 if (fcntl(fd, F_SETFD, nflags) < 0)
176 return -errno;
177
178 return 0;
179 }
180
181 _pure_ static bool fd_in_set(int fd, const int fdset[], size_t n_fdset) {
182 size_t i;
183
184 assert(n_fdset == 0 || fdset);
185
186 for (i = 0; i < n_fdset; i++)
187 if (fdset[i] == fd)
188 return true;
189
190 return false;
191 }
192
193 int close_all_fds(const int except[], size_t n_except) {
194 _cleanup_closedir_ DIR *d = NULL;
195 struct dirent *de;
196 int r = 0;
197
198 assert(n_except == 0 || except);
199
200 d = opendir("/proc/self/fd");
201 if (!d) {
202 int fd;
203 struct rlimit rl;
204
205 /* When /proc isn't available (for example in chroots)
206 * the fallback is brute forcing through the fd
207 * table */
208
209 assert_se(getrlimit(RLIMIT_NOFILE, &rl) >= 0);
210 for (fd = 3; fd < (int) rl.rlim_max; fd ++) {
211 int q;
212
213 if (fd_in_set(fd, except, n_except))
214 continue;
215
216 q = close_nointr(fd);
217 if (q < 0 && q != -EBADF && r >= 0)
218 r = q;
219 }
220
221 return r;
222 }
223
224 FOREACH_DIRENT(de, d, return -errno) {
225 int fd = -1, q;
226
227 if (safe_atoi(de->d_name, &fd) < 0)
228 /* Let's better ignore this, just in case */
229 continue;
230
231 if (fd < 3)
232 continue;
233
234 if (fd == dirfd(d))
235 continue;
236
237 if (fd_in_set(fd, except, n_except))
238 continue;
239
240 q = close_nointr(fd);
241 if (q < 0 && q != -EBADF && r >= 0) /* Valgrind has its own FD and doesn't want to have it closed */
242 r = q;
243 }
244
245 return r;
246 }
247
248 int same_fd(int a, int b) {
249 struct stat sta, stb;
250 pid_t pid;
251 int r, fa, fb;
252
253 assert(a >= 0);
254 assert(b >= 0);
255
256 /* Compares two file descriptors. Note that semantics are
257 * quite different depending on whether we have kcmp() or we
258 * don't. If we have kcmp() this will only return true for
259 * dup()ed file descriptors, but not otherwise. If we don't
260 * have kcmp() this will also return true for two fds of the same
261 * file, created by separate open() calls. Since we use this
262 * call mostly for filtering out duplicates in the fd store
263 * this difference hopefully doesn't matter too much. */
264
265 if (a == b)
266 return true;
267
268 /* Try to use kcmp() if we have it. */
269 pid = getpid_cached();
270 r = kcmp(pid, pid, KCMP_FILE, a, b);
271 if (r == 0)
272 return true;
273 if (r > 0)
274 return false;
275 if (errno != ENOSYS)
276 return -errno;
277
278 /* We don't have kcmp(), use fstat() instead. */
279 if (fstat(a, &sta) < 0)
280 return -errno;
281
282 if (fstat(b, &stb) < 0)
283 return -errno;
284
285 if ((sta.st_mode & S_IFMT) != (stb.st_mode & S_IFMT))
286 return false;
287
288 /* We consider all device fds different, since two device fds
289 * might refer to quite different device contexts even though
290 * they share the same inode and backing dev_t. */
291
292 if (S_ISCHR(sta.st_mode) || S_ISBLK(sta.st_mode))
293 return false;
294
295 if (sta.st_dev != stb.st_dev || sta.st_ino != stb.st_ino)
296 return false;
297
298 /* The fds refer to the same inode on disk, let's also check
299 * if they have the same fd flags. This is useful to
300 * distinguish the read and write side of a pipe created with
301 * pipe(). */
302 fa = fcntl(a, F_GETFL);
303 if (fa < 0)
304 return -errno;
305
306 fb = fcntl(b, F_GETFL);
307 if (fb < 0)
308 return -errno;
309
310 return fa == fb;
311 }
312
313 void cmsg_close_all(struct msghdr *mh) {
314 struct cmsghdr *cmsg;
315
316 assert(mh);
317
318 CMSG_FOREACH(cmsg, mh)
319 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS)
320 close_many((int*) CMSG_DATA(cmsg), (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int));
321 }
322
323 bool fdname_is_valid(const char *s) {
324 const char *p;
325
326 /* Validates a name for $LISTEN_FDNAMES. We basically allow
327 * everything ASCII that's not a control character. Also, as
328 * special exception the ":" character is not allowed, as we
329 * use that as field separator in $LISTEN_FDNAMES.
330 *
331 * Note that the empty string is explicitly allowed
332 * here. However, we limit the length of the names to 255
333 * characters. */
334
335 if (!s)
336 return false;
337
338 for (p = s; *p; p++) {
339 if (*p < ' ')
340 return false;
341 if (*p >= 127)
342 return false;
343 if (*p == ':')
344 return false;
345 }
346
347 return p - s < 256;
348 }
349
350 int fd_get_path(int fd, char **ret) {
351 _cleanup_close_ int dir = -1;
352 char fdname[DECIMAL_STR_MAX(int)];
353 int r;
354
355 dir = open("/proc/self/fd/", O_CLOEXEC | O_DIRECTORY | O_PATH);
356 if (dir < 0)
357 /* /proc is not available or not set up properly, we're most likely
358 * in some chroot environment. */
359 return errno == ENOENT ? -EOPNOTSUPP : -errno;
360
361 xsprintf(fdname, "%i", fd);
362
363 r = readlinkat_malloc(dir, fdname, ret);
364 if (r == -ENOENT)
365 /* If the file doesn't exist the fd is invalid */
366 return -EBADF;
367
368 return r;
369 }
370
371 int move_fd(int from, int to, int cloexec) {
372 int r;
373
374 /* Move fd 'from' to 'to', make sure FD_CLOEXEC remains equal if requested, and release the old fd. If
375 * 'cloexec' is passed as -1, the original FD_CLOEXEC is inherited for the new fd. If it is 0, it is turned
376 * off, if it is > 0 it is turned on. */
377
378 if (from < 0)
379 return -EBADF;
380 if (to < 0)
381 return -EBADF;
382
383 if (from == to) {
384
385 if (cloexec >= 0) {
386 r = fd_cloexec(to, cloexec);
387 if (r < 0)
388 return r;
389 }
390
391 return to;
392 }
393
394 if (cloexec < 0) {
395 int fl;
396
397 fl = fcntl(from, F_GETFD, 0);
398 if (fl < 0)
399 return -errno;
400
401 cloexec = !!(fl & FD_CLOEXEC);
402 }
403
404 r = dup3(from, to, cloexec ? O_CLOEXEC : 0);
405 if (r < 0)
406 return -errno;
407
408 assert(r == to);
409
410 safe_close(from);
411
412 return to;
413 }
414
415 int acquire_data_fd(const void *data, size_t size, unsigned flags) {
416
417 _cleanup_close_pair_ int pipefds[2] = { -1, -1 };
418 char pattern[] = "/dev/shm/data-fd-XXXXXX";
419 _cleanup_close_ int fd = -1;
420 int isz = 0, r;
421 ssize_t n;
422 off_t f;
423
424 assert(data || size == 0);
425
426 /* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
427 * complex than I wish it was. But here's why:
428 *
429 * a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
430 * read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14.
431 *
432 * b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
433 * a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
434 * clients can only bump their size to a system-wide limit, which might be quite low.
435 *
436 * c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
437 * earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
438 * /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
439 *
440 * d) Finally, we try creating a regular file in /dev/shm, which we then delete.
441 *
442 * It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
443 * figure. */
444
445 if (size == 0 && ((flags & ACQUIRE_NO_DEV_NULL) == 0)) {
446 /* As a special case, return /dev/null if we have been called for an empty data block */
447 r = open("/dev/null", O_RDONLY|O_CLOEXEC|O_NOCTTY);
448 if (r < 0)
449 return -errno;
450
451 return r;
452 }
453
454 if ((flags & ACQUIRE_NO_MEMFD) == 0) {
455 fd = memfd_new("data-fd");
456 if (fd < 0)
457 goto try_pipe;
458
459 n = write(fd, data, size);
460 if (n < 0)
461 return -errno;
462 if ((size_t) n != size)
463 return -EIO;
464
465 f = lseek(fd, 0, SEEK_SET);
466 if (f != 0)
467 return -errno;
468
469 r = memfd_set_sealed(fd);
470 if (r < 0)
471 return r;
472
473 return TAKE_FD(fd);
474 }
475
476 try_pipe:
477 if ((flags & ACQUIRE_NO_PIPE) == 0) {
478 if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
479 return -errno;
480
481 isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
482 if (isz < 0)
483 return -errno;
484
485 if ((size_t) isz < size) {
486 isz = (int) size;
487 if (isz < 0 || (size_t) isz != size)
488 return -E2BIG;
489
490 /* Try to bump the pipe size */
491 (void) fcntl(pipefds[1], F_SETPIPE_SZ, isz);
492
493 /* See if that worked */
494 isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
495 if (isz < 0)
496 return -errno;
497
498 if ((size_t) isz < size)
499 goto try_dev_shm;
500 }
501
502 n = write(pipefds[1], data, size);
503 if (n < 0)
504 return -errno;
505 if ((size_t) n != size)
506 return -EIO;
507
508 (void) fd_nonblock(pipefds[0], false);
509
510 return TAKE_FD(pipefds[0]);
511 }
512
513 try_dev_shm:
514 if ((flags & ACQUIRE_NO_TMPFILE) == 0) {
515 fd = open("/dev/shm", O_RDWR|O_TMPFILE|O_CLOEXEC, 0500);
516 if (fd < 0)
517 goto try_dev_shm_without_o_tmpfile;
518
519 n = write(fd, data, size);
520 if (n < 0)
521 return -errno;
522 if ((size_t) n != size)
523 return -EIO;
524
525 /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
526 return fd_reopen(fd, O_RDONLY|O_CLOEXEC);
527 }
528
529 try_dev_shm_without_o_tmpfile:
530 if ((flags & ACQUIRE_NO_REGULAR) == 0) {
531 fd = mkostemp_safe(pattern);
532 if (fd < 0)
533 return fd;
534
535 n = write(fd, data, size);
536 if (n < 0) {
537 r = -errno;
538 goto unlink_and_return;
539 }
540 if ((size_t) n != size) {
541 r = -EIO;
542 goto unlink_and_return;
543 }
544
545 /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
546 r = open(pattern, O_RDONLY|O_CLOEXEC);
547 if (r < 0)
548 r = -errno;
549
550 unlink_and_return:
551 (void) unlink(pattern);
552 return r;
553 }
554
555 return -EOPNOTSUPP;
556 }
557
558 int fd_move_above_stdio(int fd) {
559 int flags, copy;
560 PROTECT_ERRNO;
561
562 /* Moves the specified file descriptor if possible out of the range [0…2], i.e. the range of
563 * stdin/stdout/stderr. If it can't be moved outside of this range the original file descriptor is
564 * returned. This call is supposed to be used for long-lasting file descriptors we allocate in our code that
565 * might get loaded into foreign code, and where we want ensure our fds are unlikely used accidentally as
566 * stdin/stdout/stderr of unrelated code.
567 *
568 * Note that this doesn't fix any real bugs, it just makes it less likely that our code will be affected by
569 * buggy code from others that mindlessly invokes 'fprintf(stderr, …' or similar in places where stderr has
570 * been closed before.
571 *
572 * This function is written in a "best-effort" and "least-impact" style. This means whenever we encounter an
573 * error we simply return the original file descriptor, and we do not touch errno. */
574
575 if (fd < 0 || fd > 2)
576 return fd;
577
578 flags = fcntl(fd, F_GETFD, 0);
579 if (flags < 0)
580 return fd;
581
582 if (flags & FD_CLOEXEC)
583 copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
584 else
585 copy = fcntl(fd, F_DUPFD, 3);
586 if (copy < 0)
587 return fd;
588
589 assert(copy > 2);
590
591 (void) close(fd);
592 return copy;
593 }
594
595 int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd) {
596
597 int fd[3] = { /* Put together an array of fds we work on */
598 original_input_fd,
599 original_output_fd,
600 original_error_fd
601 };
602
603 int r, i,
604 null_fd = -1, /* if we open /dev/null, we store the fd to it here */
605 copy_fd[3] = { -1, -1, -1 }; /* This contains all fds we duplicate here temporarily, and hence need to close at the end */
606 bool null_readable, null_writable;
607
608 /* Sets up stdin, stdout, stderr with the three file descriptors passed in. If any of the descriptors is
609 * specified as -1 it will be connected with /dev/null instead. If any of the file descriptors is passed as
610 * itself (e.g. stdin as STDIN_FILENO) it is left unmodified, but the O_CLOEXEC bit is turned off should it be
611 * on.
612 *
613 * Note that if any of the passed file descriptors are > 2 they will be closed — both on success and on
614 * failure! Thus, callers should assume that when this function returns the input fds are invalidated.
615 *
616 * Note that when this function fails stdin/stdout/stderr might remain half set up!
617 *
618 * O_CLOEXEC is turned off for all three file descriptors (which is how it should be for
619 * stdin/stdout/stderr). */
620
621 null_readable = original_input_fd < 0;
622 null_writable = original_output_fd < 0 || original_error_fd < 0;
623
624 /* First step, open /dev/null once, if we need it */
625 if (null_readable || null_writable) {
626
627 /* Let's open this with O_CLOEXEC first, and convert it to non-O_CLOEXEC when we move the fd to the final position. */
628 null_fd = open("/dev/null", (null_readable && null_writable ? O_RDWR :
629 null_readable ? O_RDONLY : O_WRONLY) | O_CLOEXEC);
630 if (null_fd < 0) {
631 r = -errno;
632 goto finish;
633 }
634
635 /* If this fd is in the 0…2 range, let's move it out of it */
636 if (null_fd < 3) {
637 int copy;
638
639 copy = fcntl(null_fd, F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
640 if (copy < 0) {
641 r = -errno;
642 goto finish;
643 }
644
645 safe_close(null_fd);
646 null_fd = copy;
647 }
648 }
649
650 /* Let's assemble fd[] with the fds to install in place of stdin/stdout/stderr */
651 for (i = 0; i < 3; i++) {
652
653 if (fd[i] < 0)
654 fd[i] = null_fd; /* A negative parameter means: connect this one to /dev/null */
655 else if (fd[i] != i && fd[i] < 3) {
656 /* This fd is in the 0…2 territory, but not at its intended place, move it out of there, so that we can work there. */
657 copy_fd[i] = fcntl(fd[i], F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
658 if (copy_fd[i] < 0) {
659 r = -errno;
660 goto finish;
661 }
662
663 fd[i] = copy_fd[i];
664 }
665 }
666
667 /* At this point we now have the fds to use in fd[], and they are all above the stdio range, so that we
668 * have freedom to move them around. If the fds already were at the right places then the specific fds are
669 * -1. Let's now move them to the right places. This is the point of no return. */
670 for (i = 0; i < 3; i++) {
671
672 if (fd[i] == i) {
673
674 /* fd is already in place, but let's make sure O_CLOEXEC is off */
675 r = fd_cloexec(i, false);
676 if (r < 0)
677 goto finish;
678
679 } else {
680 assert(fd[i] > 2);
681
682 if (dup2(fd[i], i) < 0) { /* Turns off O_CLOEXEC on the new fd. */
683 r = -errno;
684 goto finish;
685 }
686 }
687 }
688
689 r = 0;
690
691 finish:
692 /* Close the original fds, but only if they were outside of the stdio range. Also, properly check for the same
693 * fd passed in multiple times. */
694 safe_close_above_stdio(original_input_fd);
695 if (original_output_fd != original_input_fd)
696 safe_close_above_stdio(original_output_fd);
697 if (original_error_fd != original_input_fd && original_error_fd != original_output_fd)
698 safe_close_above_stdio(original_error_fd);
699
700 /* Close the copies we moved > 2 */
701 for (i = 0; i < 3; i++)
702 safe_close(copy_fd[i]);
703
704 /* Close our null fd, if it's > 2 */
705 safe_close_above_stdio(null_fd);
706
707 return r;
708 }
709
710 int fd_reopen(int fd, int flags) {
711 char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
712 int new_fd;
713
714 /* Reopens the specified fd with new flags. This is useful for convert an O_PATH fd into a regular one, or to
715 * turn O_RDWR fds into O_RDONLY fds.
716 *
717 * This doesn't work on sockets (since they cannot be open()ed, ever).
718 *
719 * This implicitly resets the file read index to 0. */
720
721 xsprintf(procfs_path, "/proc/self/fd/%i", fd);
722 new_fd = open(procfs_path, flags);
723 if (new_fd < 0)
724 return -errno;
725
726 return new_fd;
727 }