]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/fs-util.c
Merge pull request #20303 from andir/sysconfig-example
[thirdparty/systemd.git] / src / basic / fs-util.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <stddef.h>
5 #include <stdlib.h>
6 #include <linux/falloc.h>
7 #include <linux/magic.h>
8 #include <unistd.h>
9
10 #include "alloc-util.h"
11 #include "dirent-util.h"
12 #include "fd-util.h"
13 #include "fileio.h"
14 #include "fs-util.h"
15 #include "locale-util.h"
16 #include "log.h"
17 #include "macro.h"
18 #include "missing_fcntl.h"
19 #include "missing_fs.h"
20 #include "missing_syscall.h"
21 #include "mkdir.h"
22 #include "parse-util.h"
23 #include "path-util.h"
24 #include "process-util.h"
25 #include "random-util.h"
26 #include "ratelimit.h"
27 #include "stat-util.h"
28 #include "stdio-util.h"
29 #include "string-util.h"
30 #include "strv.h"
31 #include "time-util.h"
32 #include "tmpfile-util.h"
33 #include "user-util.h"
34 #include "util.h"
35
36 int unlink_noerrno(const char *path) {
37 PROTECT_ERRNO;
38 int r;
39
40 r = unlink(path);
41 if (r < 0)
42 return -errno;
43
44 return 0;
45 }
46
47 int rmdir_parents(const char *path, const char *stop) {
48 size_t l;
49 int r = 0;
50
51 assert(path);
52 assert(stop);
53
54 l = strlen(path);
55
56 /* Skip trailing slashes */
57 while (l > 0 && path[l-1] == '/')
58 l--;
59
60 while (l > 0) {
61 char *t;
62
63 /* Skip last component */
64 while (l > 0 && path[l-1] != '/')
65 l--;
66
67 /* Skip trailing slashes */
68 while (l > 0 && path[l-1] == '/')
69 l--;
70
71 if (l <= 0)
72 break;
73
74 t = strndup(path, l);
75 if (!t)
76 return -ENOMEM;
77
78 if (path_startswith(stop, t)) {
79 free(t);
80 return 0;
81 }
82
83 r = rmdir(t);
84 free(t);
85
86 if (r < 0)
87 if (errno != ENOENT)
88 return -errno;
89 }
90
91 return 0;
92 }
93
94 int rename_noreplace(int olddirfd, const char *oldpath, int newdirfd, const char *newpath) {
95 int r;
96
97 /* Try the ideal approach first */
98 if (renameat2(olddirfd, oldpath, newdirfd, newpath, RENAME_NOREPLACE) >= 0)
99 return 0;
100
101 /* renameat2() exists since Linux 3.15, btrfs and FAT added support for it later. If it is not implemented,
102 * fall back to a different method. */
103 if (!ERRNO_IS_NOT_SUPPORTED(errno) && errno != EINVAL)
104 return -errno;
105
106 /* Let's try to use linkat()+unlinkat() as fallback. This doesn't work on directories and on some file systems
107 * that do not support hard links (such as FAT, most prominently), but for files it's pretty close to what we
108 * want — though not atomic (i.e. for a short period both the new and the old filename will exist). */
109 if (linkat(olddirfd, oldpath, newdirfd, newpath, 0) >= 0) {
110
111 if (unlinkat(olddirfd, oldpath, 0) < 0) {
112 r = -errno; /* Backup errno before the following unlinkat() alters it */
113 (void) unlinkat(newdirfd, newpath, 0);
114 return r;
115 }
116
117 return 0;
118 }
119
120 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !IN_SET(errno, EINVAL, EPERM)) /* FAT returns EPERM on link()… */
121 return -errno;
122
123 /* OK, neither RENAME_NOREPLACE nor linkat()+unlinkat() worked. Let's then fall back to the racy TOCTOU
124 * vulnerable accessat(F_OK) check followed by classic, replacing renameat(), we have nothing better. */
125
126 if (faccessat(newdirfd, newpath, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
127 return -EEXIST;
128 if (errno != ENOENT)
129 return -errno;
130
131 if (renameat(olddirfd, oldpath, newdirfd, newpath) < 0)
132 return -errno;
133
134 return 0;
135 }
136
137 int readlinkat_malloc(int fd, const char *p, char **ret) {
138 size_t l = PATH_MAX;
139
140 assert(p);
141 assert(ret);
142
143 for (;;) {
144 _cleanup_free_ char *c = NULL;
145 ssize_t n;
146
147 c = new(char, l+1);
148 if (!c)
149 return -ENOMEM;
150
151 n = readlinkat(fd, p, c, l);
152 if (n < 0)
153 return -errno;
154
155 if ((size_t) n < l) {
156 c[n] = 0;
157 *ret = TAKE_PTR(c);
158 return 0;
159 }
160
161 if (l > (SSIZE_MAX-1)/2) /* readlinkat() returns an ssize_t, and we want an extra byte for a
162 * trailing NUL, hence do an overflow check relative to SSIZE_MAX-1
163 * here */
164 return -EFBIG;
165
166 l *= 2;
167 }
168 }
169
170 int readlink_malloc(const char *p, char **ret) {
171 return readlinkat_malloc(AT_FDCWD, p, ret);
172 }
173
174 int readlink_value(const char *p, char **ret) {
175 _cleanup_free_ char *link = NULL;
176 char *value;
177 int r;
178
179 r = readlink_malloc(p, &link);
180 if (r < 0)
181 return r;
182
183 value = basename(link);
184 if (!value)
185 return -ENOENT;
186
187 value = strdup(value);
188 if (!value)
189 return -ENOMEM;
190
191 *ret = value;
192
193 return 0;
194 }
195
196 int readlink_and_make_absolute(const char *p, char **r) {
197 _cleanup_free_ char *target = NULL;
198 char *k;
199 int j;
200
201 assert(p);
202 assert(r);
203
204 j = readlink_malloc(p, &target);
205 if (j < 0)
206 return j;
207
208 k = file_in_same_dir(p, target);
209 if (!k)
210 return -ENOMEM;
211
212 *r = k;
213 return 0;
214 }
215
216 int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid) {
217 _cleanup_close_ int fd = -1;
218
219 assert(path);
220
221 fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW); /* Let's acquire an O_PATH fd, as precaution to change
222 * mode/owner on the same file */
223 if (fd < 0)
224 return -errno;
225
226 return fchmod_and_chown(fd, mode, uid, gid);
227 }
228
229 int fchmod_and_chown_with_fallback(int fd, const char *path, mode_t mode, uid_t uid, gid_t gid) {
230 bool do_chown, do_chmod;
231 struct stat st;
232 int r;
233
234 /* Change ownership and access mode of the specified fd. Tries to do so safely, ensuring that at no
235 * point in time the access mode is above the old access mode under the old ownership or the new
236 * access mode under the new ownership. Note: this call tries hard to leave the access mode
237 * unaffected if the uid/gid is changed, i.e. it undoes implicit suid/sgid dropping the kernel does
238 * on chown().
239 *
240 * This call is happy with O_PATH fds.
241 *
242 * If path is given, allow a fallback path which does not use /proc/self/fd/. On any normal system
243 * /proc will be mounted, but in certain improperly assembled environments it might not be. This is
244 * less secure (potential TOCTOU), so should only be used after consideration. */
245
246 if (fstat(fd, &st) < 0)
247 return -errno;
248
249 do_chown =
250 (uid != UID_INVALID && st.st_uid != uid) ||
251 (gid != GID_INVALID && st.st_gid != gid);
252
253 do_chmod =
254 !S_ISLNK(st.st_mode) && /* chmod is not defined on symlinks */
255 ((mode != MODE_INVALID && ((st.st_mode ^ mode) & 07777) != 0) ||
256 do_chown); /* If we change ownership, make sure we reset the mode afterwards, since chown()
257 * modifies the access mode too */
258
259 if (mode == MODE_INVALID)
260 mode = st.st_mode; /* If we only shall do a chown(), save original mode, since chown() might break it. */
261 else if ((mode & S_IFMT) != 0 && ((mode ^ st.st_mode) & S_IFMT) != 0)
262 return -EINVAL; /* insist on the right file type if it was specified */
263
264 if (do_chown && do_chmod) {
265 mode_t minimal = st.st_mode & mode; /* the subset of the old and the new mask */
266
267 if (((minimal ^ st.st_mode) & 07777) != 0) {
268 r = fchmod_opath(fd, minimal & 07777);
269 if (r < 0) {
270 if (!path || r != -ENOSYS)
271 return r;
272
273 /* Fallback path which doesn't use /proc/self/fd/. */
274 if (chmod(path, minimal & 07777) < 0)
275 return -errno;
276 }
277 }
278 }
279
280 if (do_chown)
281 if (fchownat(fd, "", uid, gid, AT_EMPTY_PATH) < 0)
282 return -errno;
283
284 if (do_chmod) {
285 r = fchmod_opath(fd, mode & 07777);
286 if (r < 0) {
287 if (!path || r != -ENOSYS)
288 return r;
289
290 /* Fallback path which doesn't use /proc/self/fd/. */
291 if (chmod(path, mode & 07777) < 0)
292 return -errno;
293 }
294 }
295
296 return do_chown || do_chmod;
297 }
298
299 int fchmod_umask(int fd, mode_t m) {
300 mode_t u;
301 int r;
302
303 u = umask(0777);
304 r = fchmod(fd, m & (~u)) < 0 ? -errno : 0;
305 umask(u);
306
307 return r;
308 }
309
310 int fchmod_opath(int fd, mode_t m) {
311 /* This function operates also on fd that might have been opened with
312 * O_PATH. Indeed fchmodat() doesn't have the AT_EMPTY_PATH flag like
313 * fchownat() does. */
314
315 if (chmod(FORMAT_PROC_FD_PATH(fd), m) < 0) {
316 if (errno != ENOENT)
317 return -errno;
318
319 if (proc_mounted() == 0)
320 return -ENOSYS; /* if we have no /proc/, the concept is not implementable */
321
322 return -ENOENT;
323 }
324
325 return 0;
326 }
327
328 int futimens_opath(int fd, const struct timespec ts[2]) {
329 /* Similar to fchmod_path() but for futimens() */
330
331 if (utimensat(AT_FDCWD, FORMAT_PROC_FD_PATH(fd), ts, 0) < 0) {
332 if (errno != ENOENT)
333 return -errno;
334
335 if (proc_mounted() == 0)
336 return -ENOSYS; /* if we have no /proc/, the concept is not implementable */
337
338 return -ENOENT;
339 }
340
341 return 0;
342 }
343
344 int stat_warn_permissions(const char *path, const struct stat *st) {
345 assert(path);
346 assert(st);
347
348 /* Don't complain if we are reading something that is not a file, for example /dev/null */
349 if (!S_ISREG(st->st_mode))
350 return 0;
351
352 if (st->st_mode & 0111)
353 log_warning("Configuration file %s is marked executable. Please remove executable permission bits. Proceeding anyway.", path);
354
355 if (st->st_mode & 0002)
356 log_warning("Configuration file %s is marked world-writable. Please remove world writability permission bits. Proceeding anyway.", path);
357
358 if (getpid_cached() == 1 && (st->st_mode & 0044) != 0044)
359 log_warning("Configuration file %s is marked world-inaccessible. This has no effect as configuration data is accessible via APIs without restrictions. Proceeding anyway.", path);
360
361 return 0;
362 }
363
364 int fd_warn_permissions(const char *path, int fd) {
365 struct stat st;
366
367 assert(path);
368 assert(fd >= 0);
369
370 if (fstat(fd, &st) < 0)
371 return -errno;
372
373 return stat_warn_permissions(path, &st);
374 }
375
376 int touch_file(const char *path, bool parents, usec_t stamp, uid_t uid, gid_t gid, mode_t mode) {
377 _cleanup_close_ int fd = -1;
378 int r, ret = 0;
379
380 assert(path);
381
382 /* Note that touch_file() does not follow symlinks: if invoked on an existing symlink, then it is the symlink
383 * itself which is updated, not its target
384 *
385 * Returns the first error we encounter, but tries to apply as much as possible. */
386
387 if (parents)
388 (void) mkdir_parents(path, 0755);
389
390 /* Initially, we try to open the node with O_PATH, so that we get a reference to the node. This is useful in
391 * case the path refers to an existing device or socket node, as we can open it successfully in all cases, and
392 * won't trigger any driver magic or so. */
393 fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW);
394 if (fd < 0) {
395 if (errno != ENOENT)
396 return -errno;
397
398 /* if the node doesn't exist yet, we create it, but with O_EXCL, so that we only create a regular file
399 * here, and nothing else */
400 fd = open(path, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, IN_SET(mode, 0, MODE_INVALID) ? 0644 : mode);
401 if (fd < 0)
402 return -errno;
403 }
404
405 /* Let's make a path from the fd, and operate on that. With this logic, we can adjust the access mode,
406 * ownership and time of the file node in all cases, even if the fd refers to an O_PATH object — which is
407 * something fchown(), fchmod(), futimensat() don't allow. */
408 ret = fchmod_and_chown(fd, mode, uid, gid);
409
410 if (stamp != USEC_INFINITY) {
411 struct timespec ts[2];
412
413 timespec_store(&ts[0], stamp);
414 ts[1] = ts[0];
415 r = utimensat(AT_FDCWD, FORMAT_PROC_FD_PATH(fd), ts, 0);
416 } else
417 r = utimensat(AT_FDCWD, FORMAT_PROC_FD_PATH(fd), NULL, 0);
418 if (r < 0 && ret >= 0)
419 return -errno;
420
421 return ret;
422 }
423
424 int touch(const char *path) {
425 return touch_file(path, false, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID);
426 }
427
428 int symlink_idempotent(const char *from, const char *to, bool make_relative) {
429 _cleanup_free_ char *relpath = NULL;
430 int r;
431
432 assert(from);
433 assert(to);
434
435 if (make_relative) {
436 _cleanup_free_ char *parent = NULL;
437
438 r = path_extract_directory(to, &parent);
439 if (r < 0)
440 return r;
441
442 r = path_make_relative(parent, from, &relpath);
443 if (r < 0)
444 return r;
445
446 from = relpath;
447 }
448
449 if (symlink(from, to) < 0) {
450 _cleanup_free_ char *p = NULL;
451
452 if (errno != EEXIST)
453 return -errno;
454
455 r = readlink_malloc(to, &p);
456 if (r == -EINVAL) /* Not a symlink? In that case return the original error we encountered: -EEXIST */
457 return -EEXIST;
458 if (r < 0) /* Any other error? In that case propagate it as is */
459 return r;
460
461 if (!streq(p, from)) /* Not the symlink we want it to be? In that case, propagate the original -EEXIST */
462 return -EEXIST;
463 }
464
465 return 0;
466 }
467
468 int symlink_atomic(const char *from, const char *to) {
469 _cleanup_free_ char *t = NULL;
470 int r;
471
472 assert(from);
473 assert(to);
474
475 r = tempfn_random(to, NULL, &t);
476 if (r < 0)
477 return r;
478
479 if (symlink(from, t) < 0)
480 return -errno;
481
482 if (rename(t, to) < 0) {
483 unlink_noerrno(t);
484 return -errno;
485 }
486
487 return 0;
488 }
489
490 int mknod_atomic(const char *path, mode_t mode, dev_t dev) {
491 _cleanup_free_ char *t = NULL;
492 int r;
493
494 assert(path);
495
496 r = tempfn_random(path, NULL, &t);
497 if (r < 0)
498 return r;
499
500 if (mknod(t, mode, dev) < 0)
501 return -errno;
502
503 if (rename(t, path) < 0) {
504 unlink_noerrno(t);
505 return -errno;
506 }
507
508 return 0;
509 }
510
511 int mkfifo_atomic(const char *path, mode_t mode) {
512 _cleanup_free_ char *t = NULL;
513 int r;
514
515 assert(path);
516
517 r = tempfn_random(path, NULL, &t);
518 if (r < 0)
519 return r;
520
521 if (mkfifo(t, mode) < 0)
522 return -errno;
523
524 if (rename(t, path) < 0) {
525 unlink_noerrno(t);
526 return -errno;
527 }
528
529 return 0;
530 }
531
532 int mkfifoat_atomic(int dirfd, const char *path, mode_t mode) {
533 _cleanup_free_ char *t = NULL;
534 int r;
535
536 assert(path);
537
538 if (path_is_absolute(path))
539 return mkfifo_atomic(path, mode);
540
541 /* We're only interested in the (random) filename. */
542 r = tempfn_random_child("", NULL, &t);
543 if (r < 0)
544 return r;
545
546 if (mkfifoat(dirfd, t, mode) < 0)
547 return -errno;
548
549 if (renameat(dirfd, t, dirfd, path) < 0) {
550 unlink_noerrno(t);
551 return -errno;
552 }
553
554 return 0;
555 }
556
557 int get_files_in_directory(const char *path, char ***list) {
558 _cleanup_strv_free_ char **l = NULL;
559 _cleanup_closedir_ DIR *d = NULL;
560 struct dirent *de;
561 size_t n = 0;
562
563 assert(path);
564
565 /* Returns all files in a directory in *list, and the number
566 * of files as return value. If list is NULL returns only the
567 * number. */
568
569 d = opendir(path);
570 if (!d)
571 return -errno;
572
573 FOREACH_DIRENT_ALL(de, d, return -errno) {
574 if (!dirent_is_file(de))
575 continue;
576
577 if (list) {
578 /* one extra slot is needed for the terminating NULL */
579 if (!GREEDY_REALLOC(l, n + 2))
580 return -ENOMEM;
581
582 l[n] = strdup(de->d_name);
583 if (!l[n])
584 return -ENOMEM;
585
586 l[++n] = NULL;
587 } else
588 n++;
589 }
590
591 if (list)
592 *list = TAKE_PTR(l);
593
594 return n;
595 }
596
597 static int getenv_tmp_dir(const char **ret_path) {
598 const char *n;
599 int r, ret = 0;
600
601 assert(ret_path);
602
603 /* We use the same order of environment variables python uses in tempfile.gettempdir():
604 * https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir */
605 FOREACH_STRING(n, "TMPDIR", "TEMP", "TMP") {
606 const char *e;
607
608 e = secure_getenv(n);
609 if (!e)
610 continue;
611 if (!path_is_absolute(e)) {
612 r = -ENOTDIR;
613 goto next;
614 }
615 if (!path_is_normalized(e)) {
616 r = -EPERM;
617 goto next;
618 }
619
620 r = is_dir(e, true);
621 if (r < 0)
622 goto next;
623 if (r == 0) {
624 r = -ENOTDIR;
625 goto next;
626 }
627
628 *ret_path = e;
629 return 1;
630
631 next:
632 /* Remember first error, to make this more debuggable */
633 if (ret >= 0)
634 ret = r;
635 }
636
637 if (ret < 0)
638 return ret;
639
640 *ret_path = NULL;
641 return ret;
642 }
643
644 static int tmp_dir_internal(const char *def, const char **ret) {
645 const char *e;
646 int r, k;
647
648 assert(def);
649 assert(ret);
650
651 r = getenv_tmp_dir(&e);
652 if (r > 0) {
653 *ret = e;
654 return 0;
655 }
656
657 k = is_dir(def, true);
658 if (k == 0)
659 k = -ENOTDIR;
660 if (k < 0)
661 return r < 0 ? r : k;
662
663 *ret = def;
664 return 0;
665 }
666
667 int var_tmp_dir(const char **ret) {
668
669 /* Returns the location for "larger" temporary files, that is backed by physical storage if available, and thus
670 * even might survive a boot: /var/tmp. If $TMPDIR (or related environment variables) are set, its value is
671 * returned preferably however. Note that both this function and tmp_dir() below are affected by $TMPDIR,
672 * making it a variable that overrides all temporary file storage locations. */
673
674 return tmp_dir_internal("/var/tmp", ret);
675 }
676
677 int tmp_dir(const char **ret) {
678
679 /* Similar to var_tmp_dir() above, but returns the location for "smaller" temporary files, which is usually
680 * backed by an in-memory file system: /tmp. */
681
682 return tmp_dir_internal("/tmp", ret);
683 }
684
685 int unlink_or_warn(const char *filename) {
686 if (unlink(filename) < 0 && errno != ENOENT)
687 /* If the file doesn't exist and the fs simply was read-only (in which
688 * case unlink() returns EROFS even if the file doesn't exist), don't
689 * complain */
690 if (errno != EROFS || access(filename, F_OK) >= 0)
691 return log_error_errno(errno, "Failed to remove \"%s\": %m", filename);
692
693 return 0;
694 }
695
696 int inotify_add_watch_fd(int fd, int what, uint32_t mask) {
697 int wd;
698
699 /* This is like inotify_add_watch(), except that the file to watch is not referenced by a path, but by an fd */
700 wd = inotify_add_watch(fd, FORMAT_PROC_FD_PATH(what), mask);
701 if (wd < 0)
702 return -errno;
703
704 return wd;
705 }
706
707 int inotify_add_watch_and_warn(int fd, const char *pathname, uint32_t mask) {
708 int wd;
709
710 wd = inotify_add_watch(fd, pathname, mask);
711 if (wd < 0) {
712 if (errno == ENOSPC)
713 return log_error_errno(errno, "Failed to add a watch for %s: inotify watch limit reached", pathname);
714
715 return log_error_errno(errno, "Failed to add a watch for %s: %m", pathname);
716 }
717
718 return wd;
719 }
720
721 bool unsafe_transition(const struct stat *a, const struct stat *b) {
722 /* Returns true if the transition from a to b is safe, i.e. that we never transition from unprivileged to
723 * privileged files or directories. Why bother? So that unprivileged code can't symlink to privileged files
724 * making us believe we read something safe even though it isn't safe in the specific context we open it in. */
725
726 if (a->st_uid == 0) /* Transitioning from privileged to unprivileged is always fine */
727 return false;
728
729 return a->st_uid != b->st_uid; /* Otherwise we need to stay within the same UID */
730 }
731
732 static int log_unsafe_transition(int a, int b, const char *path, unsigned flags) {
733 _cleanup_free_ char *n1 = NULL, *n2 = NULL, *user_a = NULL, *user_b = NULL;
734 struct stat st;
735
736 if (!FLAGS_SET(flags, CHASE_WARN))
737 return -ENOLINK;
738
739 (void) fd_get_path(a, &n1);
740 (void) fd_get_path(b, &n2);
741
742 if (fstat(a, &st) == 0)
743 user_a = uid_to_name(st.st_uid);
744 if (fstat(b, &st) == 0)
745 user_b = uid_to_name(st.st_uid);
746
747 return log_warning_errno(SYNTHETIC_ERRNO(ENOLINK),
748 "Detected unsafe path transition %s (owned by %s) %s %s (owned by %s) during canonicalization of %s.",
749 strna(n1), strna(user_a), special_glyph(SPECIAL_GLYPH_ARROW), strna(n2), strna(user_b), path);
750 }
751
752 static int log_autofs_mount_point(int fd, const char *path, unsigned flags) {
753 _cleanup_free_ char *n1 = NULL;
754
755 if (!FLAGS_SET(flags, CHASE_WARN))
756 return -EREMOTE;
757
758 (void) fd_get_path(fd, &n1);
759
760 return log_warning_errno(SYNTHETIC_ERRNO(EREMOTE),
761 "Detected autofs mount point %s during canonicalization of %s.",
762 strna(n1), path);
763 }
764
765 int chase_symlinks(const char *path, const char *original_root, unsigned flags, char **ret_path, int *ret_fd) {
766 _cleanup_free_ char *buffer = NULL, *done = NULL, *root = NULL;
767 _cleanup_close_ int fd = -1;
768 unsigned max_follow = CHASE_SYMLINKS_MAX; /* how many symlinks to follow before giving up and returning ELOOP */
769 bool exists = true, append_trail_slash = false;
770 struct stat previous_stat;
771 const char *todo;
772 int r;
773
774 assert(path);
775
776 /* Either the file may be missing, or we return an fd to the final object, but both make no sense */
777 if ((flags & CHASE_NONEXISTENT) && ret_fd)
778 return -EINVAL;
779
780 if ((flags & CHASE_STEP) && ret_fd)
781 return -EINVAL;
782
783 if (isempty(path))
784 return -EINVAL;
785
786 /* This is a lot like canonicalize_file_name(), but takes an additional "root" parameter, that allows following
787 * symlinks relative to a root directory, instead of the root of the host.
788 *
789 * Note that "root" primarily matters if we encounter an absolute symlink. It is also used when following
790 * relative symlinks to ensure they cannot be used to "escape" the root directory. The path parameter passed is
791 * assumed to be already prefixed by it, except if the CHASE_PREFIX_ROOT flag is set, in which case it is first
792 * prefixed accordingly.
793 *
794 * Algorithmically this operates on two path buffers: "done" are the components of the path we already
795 * processed and resolved symlinks, "." and ".." of. "todo" are the components of the path we still need to
796 * process. On each iteration, we move one component from "todo" to "done", processing it's special meaning
797 * each time. The "todo" path always starts with at least one slash, the "done" path always ends in no
798 * slash. We always keep an O_PATH fd to the component we are currently processing, thus keeping lookup races
799 * to a minimum.
800 *
801 * Suggested usage: whenever you want to canonicalize a path, use this function. Pass the absolute path you got
802 * as-is: fully qualified and relative to your host's root. Optionally, specify the root parameter to tell this
803 * function what to do when encountering a symlink with an absolute path as directory: prefix it by the
804 * specified path.
805 *
806 * There are five ways to invoke this function:
807 *
808 * 1. Without CHASE_STEP or ret_fd: in this case the path is resolved and the normalized path is
809 * returned in `ret_path`. The return value is < 0 on error. If CHASE_NONEXISTENT is also set, 0
810 * is returned if the file doesn't exist, > 0 otherwise. If CHASE_NONEXISTENT is not set, >= 0 is
811 * returned if the destination was found, -ENOENT if it wasn't.
812 *
813 * 2. With ret_fd: in this case the destination is opened after chasing it as O_PATH and this file
814 * descriptor is returned as return value. This is useful to open files relative to some root
815 * directory. Note that the returned O_PATH file descriptors must be converted into a regular one (using
816 * fd_reopen() or such) before it can be used for reading/writing. ret_fd may not be combined with
817 * CHASE_NONEXISTENT.
818 *
819 * 3. With CHASE_STEP: in this case only a single step of the normalization is executed, i.e. only the first
820 * symlink or ".." component of the path is resolved, and the resulting path is returned. This is useful if
821 * a caller wants to trace the path through the file system verbosely. Returns < 0 on error, > 0 if the
822 * path is fully normalized, and == 0 for each normalization step. This may be combined with
823 * CHASE_NONEXISTENT, in which case 1 is returned when a component is not found.
824 *
825 * 4. With CHASE_SAFE: in this case the path must not contain unsafe transitions, i.e. transitions from
826 * unprivileged to privileged files or directories. In such cases the return value is -ENOLINK. If
827 * CHASE_WARN is also set, a warning describing the unsafe transition is emitted.
828 *
829 * 5. With CHASE_NO_AUTOFS: in this case if an autofs mount point is encountered, path normalization
830 * is aborted and -EREMOTE is returned. If CHASE_WARN is also set, a warning showing the path of
831 * the mount point is emitted.
832 */
833
834 /* A root directory of "/" or "" is identical to none */
835 if (empty_or_root(original_root))
836 original_root = NULL;
837
838 if (!original_root && !ret_path && !(flags & (CHASE_NONEXISTENT|CHASE_NO_AUTOFS|CHASE_SAFE|CHASE_STEP)) && ret_fd) {
839 /* Shortcut the ret_fd case if the caller isn't interested in the actual path and has no root set
840 * and doesn't care about any of the other special features we provide either. */
841 r = open(path, O_PATH|O_CLOEXEC|((flags & CHASE_NOFOLLOW) ? O_NOFOLLOW : 0));
842 if (r < 0)
843 return -errno;
844
845 *ret_fd = r;
846 return 0;
847 }
848
849 if (original_root) {
850 r = path_make_absolute_cwd(original_root, &root);
851 if (r < 0)
852 return r;
853
854 /* Simplify the root directory, so that it has no duplicate slashes and nothing at the
855 * end. While we won't resolve the root path we still simplify it. Note that dropping the
856 * trailing slash should not change behaviour, since when opening it we specify O_DIRECTORY
857 * anyway. Moreover at the end of this function after processing everything we'll always turn
858 * the empty string back to "/". */
859 delete_trailing_chars(root, "/");
860 path_simplify(root);
861
862 if (flags & CHASE_PREFIX_ROOT) {
863 /* We don't support relative paths in combination with a root directory */
864 if (!path_is_absolute(path))
865 return -EINVAL;
866
867 path = prefix_roota(root, path);
868 }
869 }
870
871 r = path_make_absolute_cwd(path, &buffer);
872 if (r < 0)
873 return r;
874
875 fd = open(root ?: "/", O_CLOEXEC|O_DIRECTORY|O_PATH);
876 if (fd < 0)
877 return -errno;
878
879 if (flags & CHASE_SAFE)
880 if (fstat(fd, &previous_stat) < 0)
881 return -errno;
882
883 if (flags & CHASE_TRAIL_SLASH)
884 append_trail_slash = endswith(buffer, "/") || endswith(buffer, "/.");
885
886 if (root) {
887 /* If we are operating on a root directory, let's take the root directory as it is. */
888
889 todo = path_startswith(buffer, root);
890 if (!todo)
891 return log_full_errno(flags & CHASE_WARN ? LOG_WARNING : LOG_DEBUG,
892 SYNTHETIC_ERRNO(ECHRNG),
893 "Specified path '%s' is outside of specified root directory '%s', refusing to resolve.",
894 path, root);
895
896 done = strdup(root);
897 } else {
898 todo = buffer;
899 done = strdup("/");
900 }
901
902 for (;;) {
903 _cleanup_free_ char *first = NULL;
904 _cleanup_close_ int child = -1;
905 struct stat st;
906 const char *e;
907
908 r = path_find_first_component(&todo, true, &e);
909 if (r < 0)
910 return r;
911 if (r == 0) { /* We reached the end. */
912 if (append_trail_slash)
913 if (!strextend(&done, "/"))
914 return -ENOMEM;
915 break;
916 }
917
918 first = strndup(e, r);
919 if (!first)
920 return -ENOMEM;
921
922 /* Two dots? Then chop off the last bit of what we already found out. */
923 if (path_equal(first, "..")) {
924 _cleanup_free_ char *parent = NULL;
925 _cleanup_close_ int fd_parent = -1;
926
927 /* If we already are at the top, then going up will not change anything. This is in-line with
928 * how the kernel handles this. */
929 if (empty_or_root(done))
930 continue;
931
932 parent = dirname_malloc(done);
933 if (!parent)
934 return -ENOMEM;
935
936 /* Don't allow this to leave the root dir. */
937 if (root &&
938 path_startswith(done, root) &&
939 !path_startswith(parent, root))
940 continue;
941
942 free_and_replace(done, parent);
943
944 if (flags & CHASE_STEP)
945 goto chased_one;
946
947 fd_parent = openat(fd, "..", O_CLOEXEC|O_NOFOLLOW|O_PATH);
948 if (fd_parent < 0)
949 return -errno;
950
951 if (flags & CHASE_SAFE) {
952 if (fstat(fd_parent, &st) < 0)
953 return -errno;
954
955 if (unsafe_transition(&previous_stat, &st))
956 return log_unsafe_transition(fd, fd_parent, path, flags);
957
958 previous_stat = st;
959 }
960
961 safe_close(fd);
962 fd = TAKE_FD(fd_parent);
963
964 continue;
965 }
966
967 /* Otherwise let's see what this is. */
968 child = openat(fd, first, O_CLOEXEC|O_NOFOLLOW|O_PATH);
969 if (child < 0) {
970 if (errno == ENOENT &&
971 (flags & CHASE_NONEXISTENT) &&
972 (isempty(todo) || path_is_safe(todo))) {
973 /* If CHASE_NONEXISTENT is set, and the path does not exist, then
974 * that's OK, return what we got so far. But don't allow this if the
975 * remaining path contains "../" or something else weird. */
976
977 if (!path_extend(&done, first, todo))
978 return -ENOMEM;
979
980 exists = false;
981 break;
982 }
983
984 return -errno;
985 }
986
987 if (fstat(child, &st) < 0)
988 return -errno;
989 if ((flags & CHASE_SAFE) &&
990 unsafe_transition(&previous_stat, &st))
991 return log_unsafe_transition(fd, child, path, flags);
992
993 previous_stat = st;
994
995 if ((flags & CHASE_NO_AUTOFS) &&
996 fd_is_fs_type(child, AUTOFS_SUPER_MAGIC) > 0)
997 return log_autofs_mount_point(child, path, flags);
998
999 if (S_ISLNK(st.st_mode) && !((flags & CHASE_NOFOLLOW) && isempty(todo))) {
1000 _cleanup_free_ char *destination = NULL;
1001
1002 /* This is a symlink, in this case read the destination. But let's make sure we
1003 * don't follow symlinks without bounds. */
1004 if (--max_follow <= 0)
1005 return -ELOOP;
1006
1007 r = readlinkat_malloc(fd, first, &destination);
1008 if (r < 0)
1009 return r;
1010 if (isempty(destination))
1011 return -EINVAL;
1012
1013 if (path_is_absolute(destination)) {
1014
1015 /* An absolute destination. Start the loop from the beginning, but use the root
1016 * directory as base. */
1017
1018 safe_close(fd);
1019 fd = open(root ?: "/", O_CLOEXEC|O_DIRECTORY|O_PATH);
1020 if (fd < 0)
1021 return -errno;
1022
1023 if (flags & CHASE_SAFE) {
1024 if (fstat(fd, &st) < 0)
1025 return -errno;
1026
1027 if (unsafe_transition(&previous_stat, &st))
1028 return log_unsafe_transition(child, fd, path, flags);
1029
1030 previous_stat = st;
1031 }
1032
1033 /* Note that we do not revalidate the root, we take it as is. */
1034 r = free_and_strdup(&done, empty_to_root(root));
1035 if (r < 0)
1036 return r;
1037 }
1038
1039 /* Prefix what's left to do with what we just read, and start the loop again, but
1040 * remain in the current directory. */
1041 if (!path_extend(&destination, todo))
1042 return -ENOMEM;
1043
1044 free_and_replace(buffer, destination);
1045 todo = buffer;
1046
1047 if (flags & CHASE_STEP)
1048 goto chased_one;
1049
1050 continue;
1051 }
1052
1053 /* If this is not a symlink, then let's just add the name we read to what we already verified. */
1054 if (!path_extend(&done, first))
1055 return -ENOMEM;
1056
1057 /* And iterate again, but go one directory further down. */
1058 safe_close(fd);
1059 fd = TAKE_FD(child);
1060 }
1061
1062 if (ret_path)
1063 *ret_path = TAKE_PTR(done);
1064
1065 if (ret_fd) {
1066 /* Return the O_PATH fd we currently are looking to the caller. It can translate it to a
1067 * proper fd by opening /proc/self/fd/xyz. */
1068
1069 assert(fd >= 0);
1070 *ret_fd = TAKE_FD(fd);
1071 }
1072
1073 if (flags & CHASE_STEP)
1074 return 1;
1075
1076 return exists;
1077
1078 chased_one:
1079 if (ret_path) {
1080 const char *e;
1081
1082 /* todo may contain slashes at the beginning. */
1083 r = path_find_first_component(&todo, true, &e);
1084 if (r < 0)
1085 return r;
1086 if (r == 0)
1087 *ret_path = TAKE_PTR(done);
1088 else {
1089 char *c;
1090
1091 c = path_join(done, e);
1092 if (!c)
1093 return -ENOMEM;
1094
1095 *ret_path = c;
1096 }
1097 }
1098
1099 return 0;
1100 }
1101
1102 int chase_symlinks_and_open(
1103 const char *path,
1104 const char *root,
1105 unsigned chase_flags,
1106 int open_flags,
1107 char **ret_path) {
1108
1109 _cleanup_close_ int path_fd = -1;
1110 _cleanup_free_ char *p = NULL;
1111 int r;
1112
1113 if (chase_flags & CHASE_NONEXISTENT)
1114 return -EINVAL;
1115
1116 if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) {
1117 /* Shortcut this call if none of the special features of this call are requested */
1118 r = open(path, open_flags);
1119 if (r < 0)
1120 return -errno;
1121
1122 return r;
1123 }
1124
1125 r = chase_symlinks(path, root, chase_flags, ret_path ? &p : NULL, &path_fd);
1126 if (r < 0)
1127 return r;
1128 assert(path_fd >= 0);
1129
1130 r = fd_reopen(path_fd, open_flags);
1131 if (r < 0)
1132 return r;
1133
1134 if (ret_path)
1135 *ret_path = TAKE_PTR(p);
1136
1137 return r;
1138 }
1139
1140 int chase_symlinks_and_opendir(
1141 const char *path,
1142 const char *root,
1143 unsigned chase_flags,
1144 char **ret_path,
1145 DIR **ret_dir) {
1146
1147 _cleanup_close_ int path_fd = -1;
1148 _cleanup_free_ char *p = NULL;
1149 DIR *d;
1150 int r;
1151
1152 if (!ret_dir)
1153 return -EINVAL;
1154 if (chase_flags & CHASE_NONEXISTENT)
1155 return -EINVAL;
1156
1157 if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) {
1158 /* Shortcut this call if none of the special features of this call are requested */
1159 d = opendir(path);
1160 if (!d)
1161 return -errno;
1162
1163 *ret_dir = d;
1164 return 0;
1165 }
1166
1167 r = chase_symlinks(path, root, chase_flags, ret_path ? &p : NULL, &path_fd);
1168 if (r < 0)
1169 return r;
1170 assert(path_fd >= 0);
1171
1172 d = opendir(FORMAT_PROC_FD_PATH(path_fd));
1173 if (!d)
1174 return -errno;
1175
1176 if (ret_path)
1177 *ret_path = TAKE_PTR(p);
1178
1179 *ret_dir = d;
1180 return 0;
1181 }
1182
1183 int chase_symlinks_and_stat(
1184 const char *path,
1185 const char *root,
1186 unsigned chase_flags,
1187 char **ret_path,
1188 struct stat *ret_stat,
1189 int *ret_fd) {
1190
1191 _cleanup_close_ int path_fd = -1;
1192 _cleanup_free_ char *p = NULL;
1193 int r;
1194
1195 assert(path);
1196 assert(ret_stat);
1197
1198 if (chase_flags & CHASE_NONEXISTENT)
1199 return -EINVAL;
1200
1201 if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) {
1202 /* Shortcut this call if none of the special features of this call are requested */
1203 if (stat(path, ret_stat) < 0)
1204 return -errno;
1205
1206 return 1;
1207 }
1208
1209 r = chase_symlinks(path, root, chase_flags, ret_path ? &p : NULL, &path_fd);
1210 if (r < 0)
1211 return r;
1212 assert(path_fd >= 0);
1213
1214 if (fstat(path_fd, ret_stat) < 0)
1215 return -errno;
1216
1217 if (ret_path)
1218 *ret_path = TAKE_PTR(p);
1219 if (ret_fd)
1220 *ret_fd = TAKE_FD(path_fd);
1221
1222 return 1;
1223 }
1224
1225 int access_fd(int fd, int mode) {
1226 /* Like access() but operates on an already open fd */
1227
1228 if (access(FORMAT_PROC_FD_PATH(fd), mode) < 0) {
1229 if (errno != ENOENT)
1230 return -errno;
1231
1232 /* ENOENT can mean two things: that the fd does not exist or that /proc is not mounted. Let's
1233 * make things debuggable and distinguish the two. */
1234
1235 if (proc_mounted() == 0)
1236 return -ENOSYS; /* /proc is not available or not set up properly, we're most likely in some chroot
1237 * environment. */
1238
1239 return -EBADF; /* The directory exists, hence it's the fd that doesn't. */
1240 }
1241
1242 return 0;
1243 }
1244
1245 void unlink_tempfilep(char (*p)[]) {
1246 /* If the file is created with mkstemp(), it will (almost always)
1247 * change the suffix. Treat this as a sign that the file was
1248 * successfully created. We ignore both the rare case where the
1249 * original suffix is used and unlink failures. */
1250 if (!endswith(*p, ".XXXXXX"))
1251 (void) unlink_noerrno(*p);
1252 }
1253
1254 int unlinkat_deallocate(int fd, const char *name, UnlinkDeallocateFlags flags) {
1255 _cleanup_close_ int truncate_fd = -1;
1256 struct stat st;
1257 off_t l, bs;
1258
1259 assert((flags & ~(UNLINK_REMOVEDIR|UNLINK_ERASE)) == 0);
1260
1261 /* Operates like unlinkat() but also deallocates the file contents if it is a regular file and there's no other
1262 * link to it. This is useful to ensure that other processes that might have the file open for reading won't be
1263 * able to keep the data pinned on disk forever. This call is particular useful whenever we execute clean-up
1264 * jobs ("vacuuming"), where we want to make sure the data is really gone and the disk space released and
1265 * returned to the free pool.
1266 *
1267 * Deallocation is preferably done by FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE (👊) if supported, which means
1268 * the file won't change size. That's a good thing since we shouldn't needlessly trigger SIGBUS in other
1269 * programs that have mmap()ed the file. (The assumption here is that changing file contents to all zeroes
1270 * underneath those programs is the better choice than simply triggering SIGBUS in them which truncation does.)
1271 * However if hole punching is not implemented in the kernel or file system we'll fall back to normal file
1272 * truncation (🔪), as our goal of deallocating the data space trumps our goal of being nice to readers (💐).
1273 *
1274 * Note that we attempt deallocation, but failure to succeed with that is not considered fatal, as long as the
1275 * primary job – to delete the file – is accomplished. */
1276
1277 if (!FLAGS_SET(flags, UNLINK_REMOVEDIR)) {
1278 truncate_fd = openat(fd, name, O_WRONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK);
1279 if (truncate_fd < 0) {
1280
1281 /* If this failed because the file doesn't exist propagate the error right-away. Also,
1282 * AT_REMOVEDIR wasn't set, and we tried to open the file for writing, which means EISDIR is
1283 * returned when this is a directory but we are not supposed to delete those, hence propagate
1284 * the error right-away too. */
1285 if (IN_SET(errno, ENOENT, EISDIR))
1286 return -errno;
1287
1288 if (errno != ELOOP) /* don't complain if this is a symlink */
1289 log_debug_errno(errno, "Failed to open file '%s' for deallocation, ignoring: %m", name);
1290 }
1291 }
1292
1293 if (unlinkat(fd, name, FLAGS_SET(flags, UNLINK_REMOVEDIR) ? AT_REMOVEDIR : 0) < 0)
1294 return -errno;
1295
1296 if (truncate_fd < 0) /* Don't have a file handle, can't do more ☹️ */
1297 return 0;
1298
1299 if (fstat(truncate_fd, &st) < 0) {
1300 log_debug_errno(errno, "Failed to stat file '%s' for deallocation, ignoring: %m", name);
1301 return 0;
1302 }
1303
1304 if (!S_ISREG(st.st_mode))
1305 return 0;
1306
1307 if (FLAGS_SET(flags, UNLINK_ERASE) && st.st_size > 0 && st.st_nlink == 0) {
1308 uint64_t left = st.st_size;
1309 char buffer[64 * 1024];
1310
1311 /* If erasing is requested, let's overwrite the file with random data once before deleting
1312 * it. This isn't going to give you shred(1) semantics, but hopefully should be good enough
1313 * for stuff backed by tmpfs at least.
1314 *
1315 * Note that we only erase like this if the link count of the file is zero. If it is higher it
1316 * is still linked by someone else and we'll leave it to them to remove it securely
1317 * eventually! */
1318
1319 random_bytes(buffer, sizeof(buffer));
1320
1321 while (left > 0) {
1322 ssize_t n;
1323
1324 n = write(truncate_fd, buffer, MIN(sizeof(buffer), left));
1325 if (n < 0) {
1326 log_debug_errno(errno, "Failed to erase data in file '%s', ignoring.", name);
1327 break;
1328 }
1329
1330 assert(left >= (size_t) n);
1331 left -= n;
1332 }
1333
1334 /* Let's refresh metadata */
1335 if (fstat(truncate_fd, &st) < 0) {
1336 log_debug_errno(errno, "Failed to stat file '%s' for deallocation, ignoring: %m", name);
1337 return 0;
1338 }
1339 }
1340
1341 /* Don't dallocate if there's nothing to deallocate or if the file is linked elsewhere */
1342 if (st.st_blocks == 0 || st.st_nlink > 0)
1343 return 0;
1344
1345 /* If this is a regular file, it actually took up space on disk and there are no other links it's time to
1346 * punch-hole/truncate this to release the disk space. */
1347
1348 bs = MAX(st.st_blksize, 512);
1349 l = DIV_ROUND_UP(st.st_size, bs) * bs; /* Round up to next block size */
1350
1351 if (fallocate(truncate_fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, 0, l) >= 0)
1352 return 0; /* Successfully punched a hole! 😊 */
1353
1354 /* Fall back to truncation */
1355 if (ftruncate(truncate_fd, 0) < 0) {
1356 log_debug_errno(errno, "Failed to truncate file to 0, ignoring: %m");
1357 return 0;
1358 }
1359
1360 return 0;
1361 }
1362
1363 int fsync_directory_of_file(int fd) {
1364 _cleanup_close_ int dfd = -1;
1365 struct stat st;
1366 int r;
1367
1368 assert(fd >= 0);
1369
1370 /* We only reasonably can do this for regular files and directories, or for O_PATH fds, hence check
1371 * for the inode type first */
1372 if (fstat(fd, &st) < 0)
1373 return -errno;
1374
1375 if (S_ISDIR(st.st_mode)) {
1376 dfd = openat(fd, "..", O_RDONLY|O_DIRECTORY|O_CLOEXEC, 0);
1377 if (dfd < 0)
1378 return -errno;
1379
1380 } else if (!S_ISREG(st.st_mode)) { /* Regular files are OK regardless if O_PATH or not, for all other
1381 * types check O_PATH flag */
1382 int flags;
1383
1384 flags = fcntl(fd, F_GETFL);
1385 if (flags < 0)
1386 return -errno;
1387
1388 if (!FLAGS_SET(flags, O_PATH)) /* If O_PATH this refers to the inode in the fs, in which case
1389 * we can sensibly do what is requested. Otherwise this refers
1390 * to a socket, fifo or device node, where the concept of a
1391 * containing directory doesn't make too much sense. */
1392 return -ENOTTY;
1393 }
1394
1395 if (dfd < 0) {
1396 _cleanup_free_ char *path = NULL;
1397
1398 r = fd_get_path(fd, &path);
1399 if (r < 0) {
1400 log_debug_errno(r, "Failed to query /proc/self/fd/%d%s: %m",
1401 fd,
1402 r == -ENOSYS ? ", ignoring" : "");
1403
1404 if (r == -ENOSYS)
1405 /* If /proc is not available, we're most likely running in some
1406 * chroot environment, and syncing the directory is not very
1407 * important in that case. Let's just silently do nothing. */
1408 return 0;
1409
1410 return r;
1411 }
1412
1413 if (!path_is_absolute(path))
1414 return -EINVAL;
1415
1416 dfd = open_parent(path, O_CLOEXEC|O_NOFOLLOW, 0);
1417 if (dfd < 0)
1418 return dfd;
1419 }
1420
1421 if (fsync(dfd) < 0)
1422 return -errno;
1423
1424 return 0;
1425 }
1426
1427 int fsync_full(int fd) {
1428 int r, q;
1429
1430 /* Sync both the file and the directory */
1431
1432 r = fsync(fd) < 0 ? -errno : 0;
1433
1434 q = fsync_directory_of_file(fd);
1435 if (r < 0) /* Return earlier error */
1436 return r;
1437 if (q == -ENOTTY) /* Ignore if the 'fd' refers to a block device or so which doesn't really have a
1438 * parent dir */
1439 return 0;
1440 return q;
1441 }
1442
1443 int fsync_path_at(int at_fd, const char *path) {
1444 _cleanup_close_ int opened_fd = -1;
1445 int fd;
1446
1447 if (isempty(path)) {
1448 if (at_fd == AT_FDCWD) {
1449 opened_fd = open(".", O_RDONLY|O_DIRECTORY|O_CLOEXEC);
1450 if (opened_fd < 0)
1451 return -errno;
1452
1453 fd = opened_fd;
1454 } else
1455 fd = at_fd;
1456 } else {
1457 opened_fd = openat(at_fd, path, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
1458 if (opened_fd < 0)
1459 return -errno;
1460
1461 fd = opened_fd;
1462 }
1463
1464 if (fsync(fd) < 0)
1465 return -errno;
1466
1467 return 0;
1468 }
1469
1470 int fsync_parent_at(int at_fd, const char *path) {
1471 _cleanup_close_ int opened_fd = -1;
1472
1473 if (isempty(path)) {
1474 if (at_fd != AT_FDCWD)
1475 return fsync_directory_of_file(at_fd);
1476
1477 opened_fd = open("..", O_RDONLY|O_DIRECTORY|O_CLOEXEC);
1478 if (opened_fd < 0)
1479 return -errno;
1480
1481 if (fsync(opened_fd) < 0)
1482 return -errno;
1483
1484 return 0;
1485 }
1486
1487 opened_fd = openat(at_fd, path, O_PATH|O_CLOEXEC|O_NOFOLLOW);
1488 if (opened_fd < 0)
1489 return -errno;
1490
1491 return fsync_directory_of_file(opened_fd);
1492 }
1493
1494 int fsync_path_and_parent_at(int at_fd, const char *path) {
1495 _cleanup_close_ int opened_fd = -1;
1496
1497 if (isempty(path)) {
1498 if (at_fd != AT_FDCWD)
1499 return fsync_full(at_fd);
1500
1501 opened_fd = open(".", O_RDONLY|O_DIRECTORY|O_CLOEXEC);
1502 } else
1503 opened_fd = openat(at_fd, path, O_RDONLY|O_NOFOLLOW|O_NONBLOCK|O_CLOEXEC);
1504 if (opened_fd < 0)
1505 return -errno;
1506
1507 return fsync_full(opened_fd);
1508 }
1509
1510 int syncfs_path(int atfd, const char *path) {
1511 _cleanup_close_ int fd = -1;
1512
1513 if (isempty(path)) {
1514 if (atfd != AT_FDCWD)
1515 return syncfs(atfd) < 0 ? -errno : 0;
1516
1517 fd = open(".", O_RDONLY|O_DIRECTORY|O_CLOEXEC);
1518 } else
1519 fd = openat(atfd, path, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
1520 if (fd < 0)
1521 return -errno;
1522
1523 if (syncfs(fd) < 0)
1524 return -errno;
1525
1526 return 0;
1527 }
1528
1529 int open_parent(const char *path, int flags, mode_t mode) {
1530 _cleanup_free_ char *parent = NULL;
1531 int fd, r;
1532
1533 r = path_extract_directory(path, &parent);
1534 if (r < 0)
1535 return r;
1536
1537 /* Let's insist on O_DIRECTORY since the parent of a file or directory is a directory. Except if we open an
1538 * O_TMPFILE file, because in that case we are actually create a regular file below the parent directory. */
1539
1540 if (FLAGS_SET(flags, O_PATH))
1541 flags |= O_DIRECTORY;
1542 else if (!FLAGS_SET(flags, O_TMPFILE))
1543 flags |= O_DIRECTORY|O_RDONLY;
1544
1545 fd = open(parent, flags, mode);
1546 if (fd < 0)
1547 return -errno;
1548
1549 return fd;
1550 }
1551
1552 int conservative_renameat(
1553 int olddirfd, const char *oldpath,
1554 int newdirfd, const char *newpath) {
1555
1556 _cleanup_close_ int old_fd = -1, new_fd = -1;
1557 struct stat old_stat, new_stat;
1558
1559 /* Renames the old path to thew new path, much like renameat() — except if both are regular files and
1560 * have the exact same contents and basic file attributes already. In that case remove the new file
1561 * instead. This call is useful for reducing inotify wakeups on files that are updated but don't
1562 * actually change. This function is written in a style that we rather rename too often than suppress
1563 * too much. i.e. whenever we are in doubt we rather rename than fail. After all reducing inotify
1564 * events is an optimization only, not more. */
1565
1566 old_fd = openat(olddirfd, oldpath, O_CLOEXEC|O_RDONLY|O_NOCTTY|O_NOFOLLOW);
1567 if (old_fd < 0)
1568 goto do_rename;
1569
1570 new_fd = openat(newdirfd, newpath, O_CLOEXEC|O_RDONLY|O_NOCTTY|O_NOFOLLOW);
1571 if (new_fd < 0)
1572 goto do_rename;
1573
1574 if (fstat(old_fd, &old_stat) < 0)
1575 goto do_rename;
1576
1577 if (!S_ISREG(old_stat.st_mode))
1578 goto do_rename;
1579
1580 if (fstat(new_fd, &new_stat) < 0)
1581 goto do_rename;
1582
1583 if (new_stat.st_ino == old_stat.st_ino &&
1584 new_stat.st_dev == old_stat.st_dev)
1585 goto is_same;
1586
1587 if (old_stat.st_mode != new_stat.st_mode ||
1588 old_stat.st_size != new_stat.st_size ||
1589 old_stat.st_uid != new_stat.st_uid ||
1590 old_stat.st_gid != new_stat.st_gid)
1591 goto do_rename;
1592
1593 for (;;) {
1594 uint8_t buf1[16*1024];
1595 uint8_t buf2[sizeof(buf1)];
1596 ssize_t l1, l2;
1597
1598 l1 = read(old_fd, buf1, sizeof(buf1));
1599 if (l1 < 0)
1600 goto do_rename;
1601
1602 if (l1 == sizeof(buf1))
1603 /* Read the full block, hence read a full block in the other file too */
1604
1605 l2 = read(new_fd, buf2, l1);
1606 else {
1607 assert((size_t) l1 < sizeof(buf1));
1608
1609 /* Short read. This hence was the last block in the first file, and then came
1610 * EOF. Read one byte more in the second file, so that we can verify we hit EOF there
1611 * too. */
1612
1613 assert((size_t) (l1 + 1) <= sizeof(buf2));
1614 l2 = read(new_fd, buf2, l1 + 1);
1615 }
1616 if (l2 != l1)
1617 goto do_rename;
1618
1619 if (memcmp(buf1, buf2, l1) != 0)
1620 goto do_rename;
1621
1622 if ((size_t) l1 < sizeof(buf1)) /* We hit EOF on the first file, and the second file too, hence exit
1623 * now. */
1624 break;
1625 }
1626
1627 is_same:
1628 /* Everything matches? Then don't rename, instead remove the source file, and leave the existing
1629 * destination in place */
1630
1631 if (unlinkat(olddirfd, oldpath, 0) < 0)
1632 goto do_rename;
1633
1634 return 0;
1635
1636 do_rename:
1637 if (renameat(olddirfd, oldpath, newdirfd, newpath) < 0)
1638 return -errno;
1639
1640 return 1;
1641 }
1642
1643 int posix_fallocate_loop(int fd, uint64_t offset, uint64_t size) {
1644 RateLimit rl;
1645 int r;
1646
1647 r = posix_fallocate(fd, offset, size); /* returns positive errnos on error */
1648 if (r != EINTR)
1649 return -r; /* Let's return negative errnos, like common in our codebase */
1650
1651 /* On EINTR try a couple of times more, but protect against busy looping
1652 * (not more than 16 times per 10s) */
1653 rl = (RateLimit) { 10 * USEC_PER_SEC, 16 };
1654 while (ratelimit_below(&rl)) {
1655 r = posix_fallocate(fd, offset, size);
1656 if (r != EINTR)
1657 return -r;
1658 }
1659
1660 return -EINTR;
1661 }