]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/fs-util.c
update NEWS with even more features for v258
[thirdparty/systemd.git] / src / basic / fs-util.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
f4f15635 2
1cf40697 3#include <linux/falloc.h>
11c3a366 4#include <stdlib.h>
2646b86d 5#include <sys/file.h>
11c3a366
TA
6#include <unistd.h>
7
b5efdb8a 8#include "alloc-util.h"
bc6a6130 9#include "btrfs.h"
1b05ac94 10#include "chattr-util.h"
f4f15635 11#include "dirent-util.h"
fda65211 12#include "errno-util.h"
f4f15635 13#include "fd-util.h"
f4f15635 14#include "fs-util.h"
68def5a9 15#include "hostname-util.h"
420d2e31 16#include "label.h"
2646b86d 17#include "lock-util.h"
11c3a366 18#include "log.h"
0499585f 19#include "missing_fcntl.h"
f5947a5e 20#include "missing_syscall.h"
93cc7779 21#include "mkdir.h"
93cc7779 22#include "path-util.h"
dccca82b 23#include "process-util.h"
053e0626 24#include "random-util.h"
4c54768c 25#include "ratelimit.h"
34a8f081 26#include "stat-util.h"
f4f15635
LP
27#include "string-util.h"
28#include "strv.h"
93cc7779 29#include "time-util.h"
e4de7287 30#include "tmpfile-util.h"
7c248223 31#include "umask-util.h"
f4f15635 32
f4f15635 33int rmdir_parents(const char *path, const char *stop) {
4e046c5c
YW
34 char *p;
35 int r;
f4f15635
LP
36
37 assert(path);
38 assert(stop);
39
4e046c5c
YW
40 if (!path_is_safe(path))
41 return -EINVAL;
f4f15635 42
4e046c5c
YW
43 if (!path_is_safe(stop))
44 return -EINVAL;
f4f15635 45
2f82562b 46 p = strdupa_safe(path);
f4f15635 47
4e046c5c
YW
48 for (;;) {
49 char *slash = NULL;
f4f15635 50
4e046c5c
YW
51 /* skip the last component. */
52 r = path_find_last_component(p, /* accept_dot_dot= */ false, (const char **) &slash, NULL);
53 if (r <= 0)
54 return r;
55 if (slash == p)
56 return 0;
f4f15635 57
4e046c5c
YW
58 assert(*slash == '/');
59 *slash = '\0';
f4f15635 60
ceed11e4 61 if (path_startswith_full(stop, p, PATH_STARTSWITH_REFUSE_DOT_DOT))
f4f15635 62 return 0;
f4f15635 63
4e046c5c
YW
64 if (rmdir(p) < 0 && errno != ENOENT)
65 return -errno;
f4f15635 66 }
f4f15635
LP
67}
68
f4f15635 69int rename_noreplace(int olddirfd, const char *oldpath, int newdirfd, const char *newpath) {
2f15b625 70 int r;
f4f15635 71
c63c6413
LP
72 assert(olddirfd >= 0 || olddirfd == AT_FDCWD);
73 assert(oldpath);
74 assert(newdirfd >= 0 || newdirfd == AT_FDCWD);
75 assert(newpath);
76
2f15b625
LP
77 /* Try the ideal approach first */
78 if (renameat2(olddirfd, oldpath, newdirfd, newpath, RENAME_NOREPLACE) >= 0)
f4f15635
LP
79 return 0;
80
2f15b625
LP
81 /* renameat2() exists since Linux 3.15, btrfs and FAT added support for it later. If it is not implemented,
82 * fall back to a different method. */
62e10065 83 if (!ERRNO_IS_NOT_SUPPORTED(errno) && errno != EINVAL)
f4f15635
LP
84 return -errno;
85
2f15b625
LP
86 /* Let's try to use linkat()+unlinkat() as fallback. This doesn't work on directories and on some file systems
87 * that do not support hard links (such as FAT, most prominently), but for files it's pretty close to what we
88 * want — though not atomic (i.e. for a short period both the new and the old filename will exist). */
89 if (linkat(olddirfd, oldpath, newdirfd, newpath, 0) >= 0) {
90
7c248223
LP
91 r = RET_NERRNO(unlinkat(olddirfd, oldpath, 0));
92 if (r < 0) {
2f15b625
LP
93 (void) unlinkat(newdirfd, newpath, 0);
94 return r;
95 }
96
97 return 0;
f4f15635
LP
98 }
99
62e10065 100 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !IN_SET(errno, EINVAL, EPERM)) /* FAT returns EPERM on link()… */
f4f15635
LP
101 return -errno;
102
2aed63f4 103 /* OK, neither RENAME_NOREPLACE nor linkat()+unlinkat() worked. Let's then fall back to the racy TOCTOU
2f15b625
LP
104 * vulnerable accessat(F_OK) check followed by classic, replacing renameat(), we have nothing better. */
105
106 if (faccessat(newdirfd, newpath, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
107 return -EEXIST;
108 if (errno != ENOENT)
109 return -errno;
110
7c248223 111 return RET_NERRNO(renameat(olddirfd, oldpath, newdirfd, newpath));
f4f15635
LP
112}
113
114int readlinkat_malloc(int fd, const char *p, char **ret) {
db220032 115 size_t l = PATH_MAX;
f4f15635 116
e4c094c0
YW
117 assert(fd >= 0 || fd == AT_FDCWD);
118
119 if (fd < 0 && isempty(p))
120 return -EISDIR; /* In this case, the fd points to the current working directory, and is
121 * definitely not a symlink. Let's return earlier. */
f4f15635
LP
122
123 for (;;) {
db220032 124 _cleanup_free_ char *c = NULL;
f4f15635
LP
125 ssize_t n;
126
db220032 127 c = new(char, l+1);
f4f15635
LP
128 if (!c)
129 return -ENOMEM;
130
e4c094c0 131 n = readlinkat(fd, strempty(p), c, l);
db220032
LP
132 if (n < 0)
133 return -errno;
f4f15635 134
db220032 135 if ((size_t) n < l) {
f4f15635 136 c[n] = 0;
aed3c5ec
LP
137
138 if (ret)
139 *ret = TAKE_PTR(c);
140
f4f15635
LP
141 return 0;
142 }
143
db220032
LP
144 if (l > (SSIZE_MAX-1)/2) /* readlinkat() returns an ssize_t, and we want an extra byte for a
145 * trailing NUL, hence do an overflow check relative to SSIZE_MAX-1
146 * here */
147 return -EFBIG;
148
f4f15635
LP
149 l *= 2;
150 }
151}
152
f4f15635 153int readlink_value(const char *p, char **ret) {
bb60956b 154 _cleanup_free_ char *link = NULL, *name = NULL;
f4f15635
LP
155 int r;
156
ce8394f9
YW
157 assert(p);
158 assert(ret);
159
f4f15635
LP
160 r = readlink_malloc(p, &link);
161 if (r < 0)
162 return r;
163
bb60956b
YW
164 r = path_extract_filename(link, &name);
165 if (r < 0)
166 return r;
167 if (r == O_DIRECTORY)
168 return -EINVAL;
169
170 *ret = TAKE_PTR(name);
171 return 0;
f4f15635
LP
172}
173
162f6477 174int readlink_and_make_absolute(const char *p, char **ret) {
f4f15635 175 _cleanup_free_ char *target = NULL;
162f6477 176 int r;
f4f15635
LP
177
178 assert(p);
162f6477 179 assert(ret);
f4f15635 180
162f6477
LP
181 r = readlink_malloc(p, &target);
182 if (r < 0)
183 return r;
f4f15635 184
162f6477 185 return file_in_same_dir(p, target, ret);
f4f15635
LP
186}
187
55451417 188int chmod_and_chown_at(int dir_fd, const char *path, mode_t mode, uid_t uid, gid_t gid) {
254d1313 189 _cleanup_close_ int fd = -EBADF;
30ff18d8 190
7d000133
DDM
191 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
192
55451417
DDM
193 if (path) {
194 /* Let's acquire an O_PATH fd, as precaution to change mode/owner on the same file */
195 fd = openat(dir_fd, path, O_PATH|O_CLOEXEC|O_NOFOLLOW);
196 if (fd < 0)
197 return -errno;
340bc268
YW
198 dir_fd = fd;
199
200 } else if (dir_fd == AT_FDCWD) {
201 /* Let's acquire an O_PATH fd of the current directory */
202 fd = openat(dir_fd, ".", O_PATH|O_CLOEXEC|O_NOFOLLOW|O_DIRECTORY);
203 if (fd < 0)
204 return -errno;
205 dir_fd = fd;
55451417 206 }
de321f52 207
340bc268 208 return fchmod_and_chown(dir_fd, mode, uid, gid);
b8da477e
YW
209}
210
0520564d 211int fchmod_and_chown_with_fallback(int fd, const char *path, mode_t mode, uid_t uid, gid_t gid) {
2dbb7e94 212 bool do_chown, do_chmod;
30ff18d8 213 struct stat st;
dee00c19 214 int r;
30ff18d8 215
2dbb7e94
LP
216 /* Change ownership and access mode of the specified fd. Tries to do so safely, ensuring that at no
217 * point in time the access mode is above the old access mode under the old ownership or the new
218 * access mode under the new ownership. Note: this call tries hard to leave the access mode
219 * unaffected if the uid/gid is changed, i.e. it undoes implicit suid/sgid dropping the kernel does
220 * on chown().
221 *
0520564d
ZJS
222 * This call is happy with O_PATH fds.
223 *
224 * If path is given, allow a fallback path which does not use /proc/self/fd/. On any normal system
225 * /proc will be mounted, but in certain improperly assembled environments it might not be. This is
226 * less secure (potential TOCTOU), so should only be used after consideration. */
b8da477e 227
71ec74d1 228 if (fstat(fd, &st) < 0)
2dbb7e94 229 return -errno;
de321f52 230
2dbb7e94
LP
231 do_chown =
232 (uid != UID_INVALID && st.st_uid != uid) ||
233 (gid != GID_INVALID && st.st_gid != gid);
de321f52 234
2dbb7e94
LP
235 do_chmod =
236 !S_ISLNK(st.st_mode) && /* chmod is not defined on symlinks */
237 ((mode != MODE_INVALID && ((st.st_mode ^ mode) & 07777) != 0) ||
238 do_chown); /* If we change ownership, make sure we reset the mode afterwards, since chown()
239 * modifies the access mode too */
30ff18d8 240
2dbb7e94
LP
241 if (mode == MODE_INVALID)
242 mode = st.st_mode; /* If we only shall do a chown(), save original mode, since chown() might break it. */
243 else if ((mode & S_IFMT) != 0 && ((mode ^ st.st_mode) & S_IFMT) != 0)
244 return -EINVAL; /* insist on the right file type if it was specified */
de321f52 245
2dbb7e94
LP
246 if (do_chown && do_chmod) {
247 mode_t minimal = st.st_mode & mode; /* the subset of the old and the new mask */
30ff18d8 248
dee00c19
LP
249 if (((minimal ^ st.st_mode) & 07777) != 0) {
250 r = fchmod_opath(fd, minimal & 07777);
0520564d
ZJS
251 if (r < 0) {
252 if (!path || r != -ENOSYS)
253 return r;
254
255 /* Fallback path which doesn't use /proc/self/fd/. */
256 if (chmod(path, minimal & 07777) < 0)
257 return -errno;
258 }
dee00c19 259 }
de321f52 260 }
b8da477e 261
2dbb7e94 262 if (do_chown)
71ec74d1 263 if (fchownat(fd, "", uid, gid, AT_EMPTY_PATH) < 0)
2dbb7e94 264 return -errno;
30ff18d8 265
dee00c19
LP
266 if (do_chmod) {
267 r = fchmod_opath(fd, mode & 07777);
0520564d
ZJS
268 if (r < 0) {
269 if (!path || r != -ENOSYS)
270 return r;
271
272 /* Fallback path which doesn't use /proc/self/fd/. */
273 if (chmod(path, mode & 07777) < 0)
274 return -errno;
275 }
dee00c19 276 }
30ff18d8 277
2dbb7e94 278 return do_chown || do_chmod;
f4f15635
LP
279}
280
f4f15635 281int fchmod_umask(int fd, mode_t m) {
7c248223 282 _cleanup_umask_ mode_t u = umask(0777);
f4f15635 283
7c248223 284 return RET_NERRNO(fchmod(fd, m & (~u)));
f4f15635
LP
285}
286
4dfaa528 287int fchmod_opath(int fd, mode_t m) {
4dfaa528 288 /* This function operates also on fd that might have been opened with
adecfb3b
AM
289 * O_PATH. The tool set we have is non-intuitive:
290 * - fchmod(2) only operates on open files (i. e., fds with an open file description);
291 * - fchmodat(2) does not have a flag arg like fchownat(2) does, so no way to pass AT_EMPTY_PATH;
292 * + it should not be confused with the libc fchmodat(3) interface, which adds 4th flag argument,
293 * but does not support AT_EMPTY_PATH (only supports AT_SYMLINK_NOFOLLOW);
294 * - fchmodat2(2) supports all the AT_* flags, but is still very recent.
295 *
296 * We try to use fchmodat2(), and, if it is not supported, resort
297 * to the /proc/self/fd dance. */
298
299 assert(fd >= 0);
300
301 if (fchmodat2(fd, "", m, AT_EMPTY_PATH) >= 0)
302 return 0;
303 if (!IN_SET(errno, ENOSYS, EPERM)) /* Some container managers block unknown syscalls with EPERM */
304 return -errno;
4dfaa528 305
ddb6eeaf 306 if (chmod(FORMAT_PROC_FD_PATH(fd), m) < 0) {
f8606626
LP
307 if (errno != ENOENT)
308 return -errno;
309
d19b3c5d 310 return proc_fd_enoent_errno();
f8606626 311 }
4dfaa528
FB
312
313 return 0;
314}
315
f25bff5e 316int futimens_opath(int fd, const struct timespec ts[2]) {
150231d2 317 /* Similar to fchmod_opath() but for futimens() */
f25bff5e 318
973464ad
MY
319 assert(fd >= 0);
320
321 if (utimensat(fd, "", ts, AT_EMPTY_PATH) >= 0)
322 return 0;
323 if (errno != EINVAL)
324 return -errno;
325
326 /* Support for AT_EMPTY_PATH is added rather late (kernel 5.8), so fall back to going through /proc/
327 * if unavailable. */
328
329 if (utimensat(AT_FDCWD, FORMAT_PROC_FD_PATH(fd), ts, /* flags = */ 0) < 0) {
f25bff5e
LP
330 if (errno != ENOENT)
331 return -errno;
332
d19b3c5d 333 return proc_fd_enoent_errno();
f25bff5e
LP
334 }
335
336 return 0;
337}
338
22ed4a6d
LP
339int stat_warn_permissions(const char *path, const struct stat *st) {
340 assert(path);
341 assert(st);
f4f15635 342
b6cceaae 343 /* Don't complain if we are reading something that is not a file, for example /dev/null */
22ed4a6d 344 if (!S_ISREG(st->st_mode))
b6cceaae
LP
345 return 0;
346
22ed4a6d 347 if (st->st_mode & 0111)
f4f15635
LP
348 log_warning("Configuration file %s is marked executable. Please remove executable permission bits. Proceeding anyway.", path);
349
22ed4a6d 350 if (st->st_mode & 0002)
f4f15635
LP
351 log_warning("Configuration file %s is marked world-writable. Please remove world writability permission bits. Proceeding anyway.", path);
352
22ed4a6d 353 if (getpid_cached() == 1 && (st->st_mode & 0044) != 0044)
f4f15635
LP
354 log_warning("Configuration file %s is marked world-inaccessible. This has no effect as configuration data is accessible via APIs without restrictions. Proceeding anyway.", path);
355
356 return 0;
357}
358
22ed4a6d
LP
359int fd_warn_permissions(const char *path, int fd) {
360 struct stat st;
361
362 assert(path);
363 assert(fd >= 0);
364
365 if (fstat(fd, &st) < 0)
366 return -errno;
367
368 return stat_warn_permissions(path, &st);
369}
370
65a76659 371int access_nofollow(const char *path, int mode) {
fda65211
DDM
372 return RET_NERRNO(faccessat(AT_FDCWD, path, mode, AT_SYMLINK_NOFOLLOW));
373}
374
2ee6fa55
LP
375int touch_fd(int fd, usec_t stamp) {
376 assert(fd >= 0);
377
378 if (stamp == USEC_INFINITY)
379 return futimens_opath(fd, /* ts= */ NULL);
380
381 struct timespec ts[2];
382 timespec_store(ts + 0, stamp);
383 ts[1] = ts[0];
384 return futimens_opath(fd, ts);
385}
386
f4f15635 387int touch_file(const char *path, bool parents, usec_t stamp, uid_t uid, gid_t gid, mode_t mode) {
254d1313 388 _cleanup_close_ int fd = -EBADF;
2ee6fa55 389 int ret;
f4f15635
LP
390
391 assert(path);
392
9e3fa6e8
LP
393 /* Note that touch_file() does not follow symlinks: if invoked on an existing symlink, then it is the symlink
394 * itself which is updated, not its target
395 *
396 * Returns the first error we encounter, but tries to apply as much as possible. */
f4f15635 397
9e3fa6e8
LP
398 if (parents)
399 (void) mkdir_parents(path, 0755);
400
401 /* Initially, we try to open the node with O_PATH, so that we get a reference to the node. This is useful in
402 * case the path refers to an existing device or socket node, as we can open it successfully in all cases, and
403 * won't trigger any driver magic or so. */
404 fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW);
405 if (fd < 0) {
406 if (errno != ENOENT)
f4f15635 407 return -errno;
f4f15635 408
9e3fa6e8
LP
409 /* if the node doesn't exist yet, we create it, but with O_EXCL, so that we only create a regular file
410 * here, and nothing else */
411 fd = open(path, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, IN_SET(mode, 0, MODE_INVALID) ? 0644 : mode);
412 if (fd < 0)
f4f15635
LP
413 return -errno;
414 }
415
9e3fa6e8
LP
416 /* Let's make a path from the fd, and operate on that. With this logic, we can adjust the access mode,
417 * ownership and time of the file node in all cases, even if the fd refers to an O_PATH object — which is
418 * something fchown(), fchmod(), futimensat() don't allow. */
4b3b5bc7 419 ret = fchmod_and_chown(fd, mode, uid, gid);
9e3fa6e8 420
2ee6fa55 421 return RET_GATHER(ret, touch_fd(fd, stamp));
f4f15635
LP
422}
423
fda65211
DDM
424int touch(const char *path) {
425 return touch_file(path, false, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID);
426}
427
3e187621 428int symlinkat_idempotent(const char *from, int atfd, const char *to, bool make_relative) {
6c9c51e5 429 _cleanup_free_ char *relpath = NULL;
f4f15635
LP
430 int r;
431
432 assert(from);
433 assert(to);
434
6c9c51e5 435 if (make_relative) {
449375d2 436 r = path_make_relative_parent(to, from, &relpath);
6c9c51e5
YW
437 if (r < 0)
438 return r;
439
440 from = relpath;
441 }
442
3e187621 443 if (symlinkat(from, atfd, to) < 0) {
77b79723
LP
444 _cleanup_free_ char *p = NULL;
445
f4f15635
LP
446 if (errno != EEXIST)
447 return -errno;
448
3e187621 449 r = readlinkat_malloc(atfd, to, &p);
77b79723
LP
450 if (r == -EINVAL) /* Not a symlink? In that case return the original error we encountered: -EEXIST */
451 return -EEXIST;
452 if (r < 0) /* Any other error? In that case propagate it as is */
f4f15635
LP
453 return r;
454
77b79723
LP
455 if (!streq(p, from)) /* Not the symlink we want it to be? In that case, propagate the original -EEXIST */
456 return -EEXIST;
f4f15635
LP
457 }
458
459 return 0;
460}
461
9ea5a6e7 462int symlinkat_atomic_full(const char *from, int atfd, const char *to, SymlinkFlags flags) {
f4f15635
LP
463 int r;
464
465 assert(from);
466 assert(to);
467
9ea5a6e7
LP
468 _cleanup_free_ char *relpath = NULL;
469 if (FLAGS_SET(flags, SYMLINK_MAKE_RELATIVE)) {
590d8100
YW
470 r = path_make_relative_parent(to, from, &relpath);
471 if (r < 0)
472 return r;
473
474 from = relpath;
475 }
476
9ea5a6e7 477 _cleanup_free_ char *t = NULL;
f4f15635
LP
478 r = tempfn_random(to, NULL, &t);
479 if (r < 0)
480 return r;
481
9ea5a6e7
LP
482 bool call_label_ops_post = false;
483 if (FLAGS_SET(flags, SYMLINK_LABEL)) {
484 r = label_ops_pre(atfd, to, S_IFLNK);
485 if (r < 0)
486 return r;
487
488 call_label_ops_post = true;
489 }
490
491 r = RET_NERRNO(symlinkat(from, atfd, t));
492 if (call_label_ops_post)
493 RET_GATHER(r, label_ops_post(atfd, t, /* created= */ r >= 0));
494 if (r < 0)
495 return r;
f4f15635 496
da9dd029
LP
497 r = RET_NERRNO(renameat(atfd, t, atfd, to));
498 if (r < 0) {
499 (void) unlinkat(atfd, t, 0);
500 return r;
f4f15635
LP
501 }
502
503 return 0;
504}
505
497ca785 506int mknodat_atomic(int atfd, const char *path, mode_t mode, dev_t dev) {
f4f15635
LP
507 _cleanup_free_ char *t = NULL;
508 int r;
509
510 assert(path);
511
512 r = tempfn_random(path, NULL, &t);
513 if (r < 0)
514 return r;
515
497ca785 516 if (mknodat(atfd, t, mode, dev) < 0)
f4f15635
LP
517 return -errno;
518
497ca785
LP
519 r = RET_NERRNO(renameat(atfd, t, atfd, path));
520 if (r < 0) {
521 (void) unlinkat(atfd, t, 0);
522 return r;
f4f15635
LP
523 }
524
525 return 0;
526}
527
4f477796 528int mkfifoat_atomic(int atfd, const char *path, mode_t mode) {
f4f15635
LP
529 _cleanup_free_ char *t = NULL;
530 int r;
531
532 assert(path);
533
4f477796 534 /* We're only interested in the (random) filename. */
f4f15635
LP
535 r = tempfn_random(path, NULL, &t);
536 if (r < 0)
537 return r;
538
4f477796 539 if (mkfifoat(atfd, t, mode) < 0)
4fe3828c 540 return -errno;
4fe3828c 541
4f477796
LP
542 r = RET_NERRNO(renameat(atfd, t, atfd, path));
543 if (r < 0) {
544 (void) unlinkat(atfd, t, 0);
4fe3828c 545 return r;
f4f15635
LP
546 }
547
548 return 0;
549}
550
eac4a028 551int get_files_in_directory(const char *path, char ***ret_list) {
319a4f4b 552 _cleanup_strv_free_ char **l = NULL;
f4f15635 553 _cleanup_closedir_ DIR *d = NULL;
319a4f4b 554 size_t n = 0;
f4f15635
LP
555
556 assert(path);
557
558 /* Returns all files in a directory in *list, and the number
559 * of files as return value. If list is NULL returns only the
560 * number. */
561
562 d = opendir(path);
563 if (!d)
564 return -errno;
565
8fb3f009 566 FOREACH_DIRENT_ALL(de, d, return -errno) {
f4f15635
LP
567 if (!dirent_is_file(de))
568 continue;
569
eac4a028 570 if (ret_list) {
f4f15635 571 /* one extra slot is needed for the terminating NULL */
319a4f4b 572 if (!GREEDY_REALLOC(l, n + 2))
f4f15635
LP
573 return -ENOMEM;
574
575 l[n] = strdup(de->d_name);
576 if (!l[n])
577 return -ENOMEM;
578
579 l[++n] = NULL;
580 } else
581 n++;
582 }
583
eac4a028
LP
584 if (ret_list)
585 *ret_list = TAKE_PTR(l);
f4f15635
LP
586
587 return n;
588}
430fbf8e 589
992e8f22 590static int getenv_tmp_dir(const char **ret_path) {
992e8f22 591 int r, ret = 0;
34a8f081 592
992e8f22 593 assert(ret_path);
34a8f081 594
992e8f22
LP
595 /* We use the same order of environment variables python uses in tempfile.gettempdir():
596 * https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir */
597 FOREACH_STRING(n, "TMPDIR", "TEMP", "TMP") {
598 const char *e;
599
600 e = secure_getenv(n);
601 if (!e)
602 continue;
603 if (!path_is_absolute(e)) {
604 r = -ENOTDIR;
605 goto next;
606 }
99be45a4 607 if (!path_is_normalized(e)) {
992e8f22
LP
608 r = -EPERM;
609 goto next;
610 }
611
612 r = is_dir(e, true);
613 if (r < 0)
614 goto next;
615 if (r == 0) {
616 r = -ENOTDIR;
617 goto next;
618 }
619
620 *ret_path = e;
621 return 1;
622
623 next:
624 /* Remember first error, to make this more debuggable */
625 if (ret >= 0)
626 ret = r;
34a8f081
OW
627 }
628
992e8f22
LP
629 if (ret < 0)
630 return ret;
34a8f081 631
992e8f22
LP
632 *ret_path = NULL;
633 return ret;
634}
34a8f081 635
992e8f22
LP
636static int tmp_dir_internal(const char *def, const char **ret) {
637 const char *e;
638 int r, k;
639
640 assert(def);
641 assert(ret);
642
643 r = getenv_tmp_dir(&e);
644 if (r > 0) {
645 *ret = e;
646 return 0;
647 }
648
69baf0cb 649 k = is_dir(def, /* follow = */ true);
992e8f22
LP
650 if (k == 0)
651 k = -ENOTDIR;
652 if (k < 0)
69baf0cb 653 return RET_GATHER(r, k);
992e8f22
LP
654
655 *ret = def;
34a8f081
OW
656 return 0;
657}
658
992e8f22 659int var_tmp_dir(const char **ret) {
6548f0da 660 assert(ret);
992e8f22
LP
661
662 /* Returns the location for "larger" temporary files, that is backed by physical storage if available, and thus
663 * even might survive a boot: /var/tmp. If $TMPDIR (or related environment variables) are set, its value is
664 * returned preferably however. Note that both this function and tmp_dir() below are affected by $TMPDIR,
665 * making it a variable that overrides all temporary file storage locations. */
666
667 return tmp_dir_internal("/var/tmp", ret);
668}
669
670int tmp_dir(const char **ret) {
6548f0da 671 assert(ret);
992e8f22
LP
672
673 /* Similar to var_tmp_dir() above, but returns the location for "smaller" temporary files, which is usually
674 * backed by an in-memory file system: /tmp. */
675
676 return tmp_dir_internal("/tmp", ret);
677}
678
af229d7a 679int unlink_or_warn(const char *filename) {
6548f0da
MY
680 assert(filename);
681
af229d7a
ZJS
682 if (unlink(filename) < 0 && errno != ENOENT)
683 /* If the file doesn't exist and the fs simply was read-only (in which
684 * case unlink() returns EROFS even if the file doesn't exist), don't
685 * complain */
686 if (errno != EROFS || access(filename, F_OK) >= 0)
687 return log_error_errno(errno, "Failed to remove \"%s\": %m", filename);
688
689 return 0;
690}
691
fda65211
DDM
692char *rmdir_and_free(char *p) {
693 PROTECT_ERRNO;
694
695 if (!p)
696 return NULL;
697
698 (void) rmdir(p);
699 return mfree(p);
700}
701
0c15577a
DDM
702char* unlink_and_free(char *p) {
703 PROTECT_ERRNO;
704
705 if (!p)
706 return NULL;
707
708 (void) unlink(p);
709 return mfree(p);
710}
711
57a4359e 712int access_fd(int fd, int mode) {
6548f0da
MY
713 assert(fd >= 0);
714
57a4359e
LP
715 /* Like access() but operates on an already open fd */
716
c675851d
MY
717 if (faccessat(fd, "", mode, AT_EMPTY_PATH) >= 0)
718 return 0;
719 if (errno != EINVAL)
720 return -errno;
721
722 /* Support for AT_EMPTY_PATH is added rather late (kernel 5.8), so fall back to going through /proc/
723 * if unavailable. */
724
ddb6eeaf 725 if (access(FORMAT_PROC_FD_PATH(fd), mode) < 0) {
4265a66a
LP
726 if (errno != ENOENT)
727 return -errno;
57a4359e 728
d19b3c5d 729 return proc_fd_enoent_errno();
4265a66a
LP
730 }
731
732 return 0;
57a4359e 733}
43767d9d 734
053e0626 735int unlinkat_deallocate(int fd, const char *name, UnlinkDeallocateFlags flags) {
254d1313 736 _cleanup_close_ int truncate_fd = -EBADF;
43767d9d
LP
737 struct stat st;
738 off_t l, bs;
739
6548f0da
MY
740 assert(fd >= 0 || fd == AT_FDCWD);
741 assert(name);
053e0626
LP
742 assert((flags & ~(UNLINK_REMOVEDIR|UNLINK_ERASE)) == 0);
743
43767d9d
LP
744 /* Operates like unlinkat() but also deallocates the file contents if it is a regular file and there's no other
745 * link to it. This is useful to ensure that other processes that might have the file open for reading won't be
746 * able to keep the data pinned on disk forever. This call is particular useful whenever we execute clean-up
747 * jobs ("vacuuming"), where we want to make sure the data is really gone and the disk space released and
748 * returned to the free pool.
749 *
750 * Deallocation is preferably done by FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE (👊) if supported, which means
751 * the file won't change size. That's a good thing since we shouldn't needlessly trigger SIGBUS in other
752 * programs that have mmap()ed the file. (The assumption here is that changing file contents to all zeroes
753 * underneath those programs is the better choice than simply triggering SIGBUS in them which truncation does.)
754 * However if hole punching is not implemented in the kernel or file system we'll fall back to normal file
755 * truncation (🔪), as our goal of deallocating the data space trumps our goal of being nice to readers (💐).
756 *
757 * Note that we attempt deallocation, but failure to succeed with that is not considered fatal, as long as the
758 * primary job – to delete the file – is accomplished. */
759
053e0626 760 if (!FLAGS_SET(flags, UNLINK_REMOVEDIR)) {
43767d9d
LP
761 truncate_fd = openat(fd, name, O_WRONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK);
762 if (truncate_fd < 0) {
763
764 /* If this failed because the file doesn't exist propagate the error right-away. Also,
765 * AT_REMOVEDIR wasn't set, and we tried to open the file for writing, which means EISDIR is
766 * returned when this is a directory but we are not supposed to delete those, hence propagate
767 * the error right-away too. */
768 if (IN_SET(errno, ENOENT, EISDIR))
769 return -errno;
770
771 if (errno != ELOOP) /* don't complain if this is a symlink */
772 log_debug_errno(errno, "Failed to open file '%s' for deallocation, ignoring: %m", name);
773 }
774 }
775
053e0626 776 if (unlinkat(fd, name, FLAGS_SET(flags, UNLINK_REMOVEDIR) ? AT_REMOVEDIR : 0) < 0)
43767d9d
LP
777 return -errno;
778
779 if (truncate_fd < 0) /* Don't have a file handle, can't do more ☹️ */
780 return 0;
781
782 if (fstat(truncate_fd, &st) < 0) {
011723a4 783 log_debug_errno(errno, "Failed to stat file '%s' for deallocation, ignoring: %m", name);
43767d9d
LP
784 return 0;
785 }
786
053e0626
LP
787 if (!S_ISREG(st.st_mode))
788 return 0;
789
790 if (FLAGS_SET(flags, UNLINK_ERASE) && st.st_size > 0 && st.st_nlink == 0) {
791 uint64_t left = st.st_size;
792 char buffer[64 * 1024];
793
794 /* If erasing is requested, let's overwrite the file with random data once before deleting
795 * it. This isn't going to give you shred(1) semantics, but hopefully should be good enough
796 * for stuff backed by tmpfs at least.
797 *
15dd4515 798 * Note that we only erase like this if the link count of the file is zero. If it is higher it
053e0626
LP
799 * is still linked by someone else and we'll leave it to them to remove it securely
800 * eventually! */
801
802 random_bytes(buffer, sizeof(buffer));
803
804 while (left > 0) {
805 ssize_t n;
806
807 n = write(truncate_fd, buffer, MIN(sizeof(buffer), left));
808 if (n < 0) {
809 log_debug_errno(errno, "Failed to erase data in file '%s', ignoring.", name);
810 break;
811 }
812
813 assert(left >= (size_t) n);
814 left -= n;
815 }
816
817 /* Let's refresh metadata */
818 if (fstat(truncate_fd, &st) < 0) {
819 log_debug_errno(errno, "Failed to stat file '%s' for deallocation, ignoring: %m", name);
820 return 0;
821 }
822 }
823
8bf8e51b 824 /* Don't deallocate if there's nothing to deallocate or if the file is linked elsewhere */
053e0626 825 if (st.st_blocks == 0 || st.st_nlink > 0)
43767d9d
LP
826 return 0;
827
828 /* If this is a regular file, it actually took up space on disk and there are no other links it's time to
829 * punch-hole/truncate this to release the disk space. */
830
831 bs = MAX(st.st_blksize, 512);
4dcaab9c 832 l = ROUND_UP(st.st_size, bs); /* Round up to next block size */
43767d9d
LP
833
834 if (fallocate(truncate_fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, 0, l) >= 0)
835 return 0; /* Successfully punched a hole! 😊 */
836
837 /* Fall back to truncation */
838 if (ftruncate(truncate_fd, 0) < 0) {
839 log_debug_errno(errno, "Failed to truncate file to 0, ignoring: %m");
840 return 0;
841 }
842
843 return 0;
844}
11b29a96 845
14460a8a 846int open_parent_at(int dir_fd, const char *path, int flags, mode_t mode) {
ef8becfa 847 _cleanup_free_ char *parent = NULL;
7c248223 848 int r;
ef8becfa 849
14460a8a
DDM
850 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
851 assert(path);
852
30cdcd62 853 r = path_extract_directory(path, &parent);
14460a8a
DDM
854 if (r == -EDESTADDRREQ) {
855 parent = strdup(".");
856 if (!parent)
857 return -ENOMEM;
858 } else if (r == -EADDRNOTAVAIL) {
859 parent = strdup(path);
860 if (!parent)
861 return -ENOMEM;
862 } else if (r < 0)
30cdcd62 863 return r;
ef8becfa
LP
864
865 /* Let's insist on O_DIRECTORY since the parent of a file or directory is a directory. Except if we open an
866 * O_TMPFILE file, because in that case we are actually create a regular file below the parent directory. */
867
0c21dafb 868 if (FLAGS_SET(flags, O_PATH))
ef8becfa 869 flags |= O_DIRECTORY;
0c21dafb 870 else if (!FLAGS_SET(flags, O_TMPFILE))
ef8becfa
LP
871 flags |= O_DIRECTORY|O_RDONLY;
872
14460a8a 873 return RET_NERRNO(openat(dir_fd, parent, flags, mode));
ef8becfa 874}
ed9c0851 875
10195179 876int conservative_renameat(
10981424
LP
877 int olddirfd, const char *oldpath,
878 int newdirfd, const char *newpath) {
879
254d1313 880 _cleanup_close_ int old_fd = -EBADF, new_fd = -EBADF;
10981424
LP
881 struct stat old_stat, new_stat;
882
737a9edc 883 /* Renames the old path to the new path, much like renameat() — except if both are regular files and
10981424
LP
884 * have the exact same contents and basic file attributes already. In that case remove the new file
885 * instead. This call is useful for reducing inotify wakeups on files that are updated but don't
886 * actually change. This function is written in a style that we rather rename too often than suppress
2657d5bd 887 * too much. I.e. whenever we are in doubt, we rather rename than fail. After all reducing inotify
10981424
LP
888 * events is an optimization only, not more. */
889
890 old_fd = openat(olddirfd, oldpath, O_CLOEXEC|O_RDONLY|O_NOCTTY|O_NOFOLLOW);
891 if (old_fd < 0)
892 goto do_rename;
893
894 new_fd = openat(newdirfd, newpath, O_CLOEXEC|O_RDONLY|O_NOCTTY|O_NOFOLLOW);
895 if (new_fd < 0)
896 goto do_rename;
897
898 if (fstat(old_fd, &old_stat) < 0)
899 goto do_rename;
900
901 if (!S_ISREG(old_stat.st_mode))
902 goto do_rename;
903
904 if (fstat(new_fd, &new_stat) < 0)
905 goto do_rename;
906
a9dac7a6 907 if (stat_inode_same(&new_stat, &old_stat))
10981424
LP
908 goto is_same;
909
910 if (old_stat.st_mode != new_stat.st_mode ||
911 old_stat.st_size != new_stat.st_size ||
912 old_stat.st_uid != new_stat.st_uid ||
913 old_stat.st_gid != new_stat.st_gid)
914 goto do_rename;
915
916 for (;;) {
eff57d1c
LP
917 uint8_t buf1[16*1024];
918 uint8_t buf2[sizeof(buf1)];
10981424
LP
919 ssize_t l1, l2;
920
921 l1 = read(old_fd, buf1, sizeof(buf1));
922 if (l1 < 0)
923 goto do_rename;
924
eff57d1c
LP
925 if (l1 == sizeof(buf1))
926 /* Read the full block, hence read a full block in the other file too */
10981424 927
eff57d1c
LP
928 l2 = read(new_fd, buf2, l1);
929 else {
930 assert((size_t) l1 < sizeof(buf1));
931
932 /* Short read. This hence was the last block in the first file, and then came
933 * EOF. Read one byte more in the second file, so that we can verify we hit EOF there
934 * too. */
935
936 assert((size_t) (l1 + 1) <= sizeof(buf2));
937 l2 = read(new_fd, buf2, l1 + 1);
938 }
939 if (l2 != l1)
940 goto do_rename;
10981424
LP
941
942 if (memcmp(buf1, buf2, l1) != 0)
943 goto do_rename;
eff57d1c
LP
944
945 if ((size_t) l1 < sizeof(buf1)) /* We hit EOF on the first file, and the second file too, hence exit
946 * now. */
947 break;
10981424
LP
948 }
949
950is_same:
951 /* Everything matches? Then don't rename, instead remove the source file, and leave the existing
952 * destination in place */
953
954 if (unlinkat(olddirfd, oldpath, 0) < 0)
955 goto do_rename;
956
957 return 0;
958
959do_rename:
960 if (renameat(olddirfd, oldpath, newdirfd, newpath) < 0)
961 return -errno;
962
963 return 1;
964}
4c54768c
IZ
965
966int posix_fallocate_loop(int fd, uint64_t offset, uint64_t size) {
967 RateLimit rl;
968 int r;
969
970 r = posix_fallocate(fd, offset, size); /* returns positive errnos on error */
971 if (r != EINTR)
972 return -r; /* Let's return negative errnos, like common in our codebase */
973
974 /* On EINTR try a couple of times more, but protect against busy looping
975 * (not more than 16 times per 10s) */
7d1e61ca 976 rl = (const RateLimit) { 10 * USEC_PER_SEC, 16 };
4c54768c
IZ
977 while (ratelimit_below(&rl)) {
978 r = posix_fallocate(fd, offset, size);
979 if (r != EINTR)
980 return -r;
981 }
982
983 return -EINTR;
984}
68def5a9
LP
985
986int parse_cifs_service(
987 const char *s,
988 char **ret_host,
989 char **ret_service,
990 char **ret_path) {
991
992 _cleanup_free_ char *h = NULL, *ss = NULL, *x = NULL;
993 const char *p, *e, *d;
994 char delimiter;
995
996 /* Parses a CIFS service in form of //host/service/path… and splitting it in three parts. The last
997 * part is optional, in which case NULL is returned there. To maximize compatibility syntax with
998 * backslashes instead of slashes is accepted too. */
999
1000 if (!s)
1001 return -EINVAL;
1002
1003 p = startswith(s, "//");
1004 if (!p) {
1005 p = startswith(s, "\\\\");
1006 if (!p)
1007 return -EINVAL;
1008 }
1009
1010 delimiter = s[0];
1011 e = strchr(p, delimiter);
1012 if (!e)
1013 return -EINVAL;
1014
1015 h = strndup(p, e - p);
1016 if (!h)
1017 return -ENOMEM;
1018
1019 if (!hostname_is_valid(h, 0))
1020 return -EINVAL;
1021
1022 e++;
1023
1024 d = strchrnul(e, delimiter);
1025
1026 ss = strndup(e, d - e);
1027 if (!ss)
1028 return -ENOMEM;
1029
1030 if (!filename_is_valid(ss))
1031 return -EINVAL;
1032
1033 if (!isempty(d)) {
1034 x = strdup(skip_leading_chars(d, CHAR_TO_STR(delimiter)));
1035 if (!x)
1036 return -EINVAL;
1037
1038 /* Make sure to convert Windows-style "\" → Unix-style / */
1039 for (char *i = x; *i; i++)
1040 if (*i == delimiter)
1041 *i = '/';
1042
1043 if (!path_is_valid(x))
1044 return -EINVAL;
1045
1046 path_simplify(x);
1047 if (!path_is_normalized(x))
1048 return -EINVAL;
1049 }
1050
1051 if (ret_host)
1052 *ret_host = TAKE_PTR(h);
1053 if (ret_service)
1054 *ret_service = TAKE_PTR(ss);
1055 if (ret_path)
1056 *ret_path = TAKE_PTR(x);
1057
1058 return 0;
1059}
c73094f3 1060
4be62f82 1061int open_mkdir_at_full(int dirfd, const char *path, int flags, XOpenFlags xopen_flags, mode_t mode) {
254d1313 1062 _cleanup_close_ int fd = -EBADF, parent_fd = -EBADF;
797f6cc5 1063 _cleanup_free_ char *fname = NULL, *parent = NULL;
c73094f3
LP
1064 int r;
1065
1066 /* Creates a directory with mkdirat() and then opens it, in the "most atomic" fashion we can
1067 * do. Guarantees that the returned fd refers to a directory. If O_EXCL is specified will fail if the
1068 * dir already exists. Otherwise will open an existing dir, but only if it is one. */
1069
1070 if (flags & ~(O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_EXCL|O_NOATIME|O_NOFOLLOW|O_PATH))
1071 return -EINVAL;
b1236ce3 1072 if ((flags & O_ACCMODE_STRICT) != O_RDONLY)
c73094f3
LP
1073 return -EINVAL;
1074
1075 /* Note that O_DIRECTORY|O_NOFOLLOW is implied, but we allow specifying it anyway. The following
1076 * flags actually make sense to specify: O_CLOEXEC, O_EXCL, O_NOATIME, O_PATH */
1077
797f6cc5
LP
1078 /* If this is not a valid filename, it's a path. Let's open the parent directory then, so
1079 * that we can pin it, and operate below it. */
1080 r = path_extract_directory(path, &parent);
1081 if (r < 0) {
1082 if (!IN_SET(r, -EDESTADDRREQ, -EADDRNOTAVAIL))
c73094f3 1083 return r;
797f6cc5 1084 } else {
c73094f3
LP
1085 r = path_extract_filename(path, &fname);
1086 if (r < 0)
1087 return r;
1088
1089 parent_fd = openat(dirfd, parent, O_PATH|O_DIRECTORY|O_CLOEXEC);
1090 if (parent_fd < 0)
1091 return -errno;
1092
1093 dirfd = parent_fd;
1094 path = fname;
1095 }
1096
4be62f82 1097 fd = xopenat_full(dirfd, path, flags|O_CREAT|O_DIRECTORY|O_NOFOLLOW, xopen_flags, mode);
7486f9c3
DDM
1098 if (IN_SET(fd, -ELOOP, -ENOTDIR))
1099 return -EEXIST;
1100 if (fd < 0)
c73094f3 1101 return fd;
c73094f3
LP
1102
1103 return TAKE_FD(fd);
1104}
ca8503f1
LP
1105
1106int openat_report_new(int dirfd, const char *pathname, int flags, mode_t mode, bool *ret_newly_created) {
4d5dacbe 1107 int fd;
ca8503f1
LP
1108
1109 /* Just like openat(), but adds one thing: optionally returns whether we created the file anew or if
b3122369 1110 * it already existed before. This is only relevant if O_CREAT is set without O_EXCL, and thus will
4946dd41
LP
1111 * shortcut to openat() otherwise.
1112 *
1113 * Note that this routine is a bit more strict with symlinks than regular openat() is. If O_NOFOLLOW
1114 * is not specified, then we'll follow the symlink when opening an existing file but we will *not*
1115 * follow it when creating a new one (because that's a terrible UNIX misfeature and generally a
1116 * security hole). */
ca8503f1 1117
4d5dacbe
LP
1118 if (!FLAGS_SET(flags, O_CREAT) || FLAGS_SET(flags, O_EXCL)) {
1119 fd = openat(dirfd, pathname, flags, mode);
1120 if (fd < 0)
1121 return -errno;
ca8503f1 1122
4946dd41
LP
1123 if (ret_newly_created)
1124 *ret_newly_created = FLAGS_SET(flags, O_CREAT);
4d5dacbe
LP
1125 return fd;
1126 }
1127
b9633ebb 1128 for (unsigned attempts = 7;;) {
ca8503f1
LP
1129 /* First, attempt to open without O_CREAT/O_EXCL, i.e. open existing file */
1130 fd = openat(dirfd, pathname, flags & ~(O_CREAT | O_EXCL), mode);
1131 if (fd >= 0) {
4946dd41
LP
1132 if (ret_newly_created)
1133 *ret_newly_created = false;
ca8503f1
LP
1134 return fd;
1135 }
1136 if (errno != ENOENT)
1137 return -errno;
1138
4946dd41
LP
1139 /* So the file didn't exist yet, hence create it with O_CREAT/O_EXCL/O_NOFOLLOW. */
1140 fd = openat(dirfd, pathname, flags | O_CREAT | O_EXCL | O_NOFOLLOW, mode);
ca8503f1 1141 if (fd >= 0) {
4946dd41
LP
1142 if (ret_newly_created)
1143 *ret_newly_created = true;
ca8503f1
LP
1144 return fd;
1145 }
1146 if (errno != EEXIST)
1147 return -errno;
1148
4946dd41
LP
1149 /* Hmm, so now we got EEXIST? Then someone might have created the file between the first and
1150 * second call to openat(). Let's try again but with a limit so we don't spin forever. */
ca8503f1
LP
1151
1152 if (--attempts == 0) /* Give up eventually, somebody is playing with us */
1153 return -EEXIST;
1154 }
1155}
7486f9c3 1156
e40b11be 1157int xopenat_full(int dir_fd, const char *path, int open_flags, XOpenFlags xopen_flags, mode_t mode) {
7486f9c3 1158 _cleanup_close_ int fd = -EBADF;
32dfe3b6 1159 bool made_dir = false, made_file = false;
7486f9c3
DDM
1160 int r;
1161
2646b86d 1162 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
2646b86d 1163
dffafa47
LP
1164 /* An inode cannot be both a directory and a regular file at the same time. */
1165 assert(!(FLAGS_SET(open_flags, O_DIRECTORY) && FLAGS_SET(xopen_flags, XO_REGULAR)));
1166
9b85e907
LP
1167 /* This is like openat(), but has a few tricks up its sleeves, extending behaviour:
1168 *
1169 * • O_DIRECTORY|O_CREAT is supported, which causes a directory to be created, and immediately
1170 * opened. When used with the XO_SUBVOLUME flag this will even create a btrfs subvolume.
1171 *
1172 * • If O_CREAT is used with XO_LABEL, any created file will be immediately relabelled.
1173 *
1174 * • If the path is specified NULL or empty, behaves like fd_reopen().
af3baf17
LP
1175 *
1176 * • If XO_NOCOW is specified will turn on the NOCOW btrfs flag on the file, if available.
8beb8c3e 1177 *
dffafa47
LP
1178 * • if XO_REGULAR is specified will return an error if inode is not a regular file.
1179 *
8beb8c3e 1180 * • If mode is specified as MODE_INVALID, we'll use 0755 for dirs, and 0644 for regular files.
9b85e907
LP
1181 */
1182
8beb8c3e
LP
1183 if (mode == MODE_INVALID)
1184 mode = (open_flags & O_DIRECTORY) ? 0755 : 0644;
1185
06ca2db3 1186 if (isempty(path)) {
420d2e31 1187 assert(!FLAGS_SET(open_flags, O_CREAT|O_EXCL));
dffafa47
LP
1188
1189 if (FLAGS_SET(xopen_flags, XO_REGULAR)) {
1190 r = fd_verify_regular(dir_fd);
1191 if (r < 0)
1192 return r;
1193 }
1194
420d2e31 1195 return fd_reopen(dir_fd, open_flags & ~O_NOFOLLOW);
06ca2db3
DDM
1196 }
1197
64053bed
LP
1198 bool call_label_ops_post = false;
1199
420d2e31
DDM
1200 if (FLAGS_SET(open_flags, O_CREAT) && FLAGS_SET(xopen_flags, XO_LABEL)) {
1201 r = label_ops_pre(dir_fd, path, FLAGS_SET(open_flags, O_DIRECTORY) ? S_IFDIR : S_IFREG);
1202 if (r < 0)
1203 return r;
64053bed
LP
1204
1205 call_label_ops_post = true;
420d2e31
DDM
1206 }
1207
1208 if (FLAGS_SET(open_flags, O_DIRECTORY|O_CREAT)) {
bc6a6130
DDM
1209 if (FLAGS_SET(xopen_flags, XO_SUBVOLUME))
1210 r = btrfs_subvol_make_fallback(dir_fd, path, mode);
1211 else
1212 r = RET_NERRNO(mkdirat(dir_fd, path, mode));
7486f9c3 1213 if (r == -EEXIST) {
420d2e31 1214 if (FLAGS_SET(open_flags, O_EXCL))
7486f9c3 1215 return -EEXIST;
7486f9c3
DDM
1216 } else if (r < 0)
1217 return r;
1218 else
32dfe3b6 1219 made_dir = true;
7486f9c3 1220
420d2e31 1221 open_flags &= ~(O_EXCL|O_CREAT);
7486f9c3
DDM
1222 }
1223
dffafa47
LP
1224 if (FLAGS_SET(xopen_flags, XO_REGULAR)) {
1225 /* Guarantee we return a regular fd only, and don't open the file unless we verified it
1226 * first */
1227
1228 if (FLAGS_SET(open_flags, O_PATH)) {
1229 fd = openat(dir_fd, path, open_flags, mode);
1230 if (fd < 0) {
1231 r = -errno;
1232 goto error;
1233 }
1234
1235 r = fd_verify_regular(fd);
1236 if (r < 0)
1237 goto error;
1238
1239 } else if (FLAGS_SET(open_flags, O_CREAT|O_EXCL)) {
1240 /* In O_EXCL mode we can just create the thing, everything is dealt with for us */
1241 fd = openat(dir_fd, path, open_flags, mode);
1242 if (fd < 0) {
1243 r = -errno;
1244 goto error;
1245 }
1246
1247 made_file = true;
1248 } else {
1249 /* Otherwise pin the inode first via O_PATH */
1250 _cleanup_close_ int inode_fd = openat(dir_fd, path, O_PATH|O_CLOEXEC|(open_flags & O_NOFOLLOW));
1251 if (inode_fd < 0) {
1252 if (errno != ENOENT || !FLAGS_SET(open_flags, O_CREAT)) {
1253 r = -errno;
1254 goto error;
1255 }
1256
1257 /* Doesn't exist yet, then try to create it */
1258 fd = openat(dir_fd, path, open_flags|O_CREAT|O_EXCL, mode);
1259 if (fd < 0) {
1260 r = -errno;
1261 goto error;
1262 }
1263
1264 made_file = true;
1265 } else {
1266 /* OK, we pinned it. Now verify it's actually a regular file, and then reopen it */
1267 r = fd_verify_regular(inode_fd);
1268 if (r < 0)
1269 goto error;
1270
1271 fd = fd_reopen(inode_fd, open_flags & ~(O_NOFOLLOW|O_CREAT));
1272 if (fd < 0) {
1273 r = fd;
1274 goto error;
1275 }
1276 }
1277 }
1278 } else {
1279 fd = openat_report_new(dir_fd, path, open_flags, mode, &made_file);
1280 if (fd < 0) {
1281 r = fd;
1282 goto error;
1283 }
7486f9c3
DDM
1284 }
1285
64053bed
LP
1286 if (call_label_ops_post) {
1287 call_label_ops_post = false;
1288
1289 r = label_ops_post(fd, /* path= */ NULL, made_file || made_dir);
420d2e31 1290 if (r < 0)
32dfe3b6 1291 goto error;
420d2e31
DDM
1292 }
1293
1b05ac94 1294 if (FLAGS_SET(xopen_flags, XO_NOCOW)) {
a997f338 1295 r = chattr_fd(fd, FS_NOCOW_FL, FS_NOCOW_FL);
f1ee656d 1296 if (r < 0 && !ERRNO_IS_IOCTL_NOT_SUPPORTED(r))
1b05ac94
DDM
1297 goto error;
1298 }
1299
7486f9c3 1300 return TAKE_FD(fd);
32dfe3b6
DDM
1301
1302error:
64053bed
LP
1303 if (call_label_ops_post)
1304 (void) label_ops_post(fd >= 0 ? fd : dir_fd, fd >= 0 ? NULL : path, made_dir || made_file);
1305
32dfe3b6
DDM
1306 if (made_dir || made_file)
1307 (void) unlinkat(dir_fd, path, made_dir ? AT_REMOVEDIR : 0);
1308
1309 return r;
7486f9c3 1310}
2646b86d 1311
e40b11be 1312int xopenat_lock_full(
420d2e31
DDM
1313 int dir_fd,
1314 const char *path,
1315 int open_flags,
1316 XOpenFlags xopen_flags,
1317 mode_t mode,
1318 LockType locktype,
1319 int operation) {
1320
2646b86d
DDM
1321 _cleanup_close_ int fd = -EBADF;
1322 int r;
1323
1324 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
2646b86d
DDM
1325 assert(IN_SET(operation & ~LOCK_NB, LOCK_EX, LOCK_SH));
1326
1327 /* POSIX/UNPOSIX locks don't work on directories (errno is set to -EBADF so let's return early with
1328 * the same error here). */
0867a465 1329 if (FLAGS_SET(open_flags, O_DIRECTORY) && !IN_SET(locktype, LOCK_BSD, LOCK_NONE))
2646b86d
DDM
1330 return -EBADF;
1331
1332 for (;;) {
1333 struct stat st;
1334
e40b11be 1335 fd = xopenat_full(dir_fd, path, open_flags, xopen_flags, mode);
2646b86d
DDM
1336 if (fd < 0)
1337 return fd;
1338
1339 r = lock_generic(fd, locktype, operation);
1340 if (r < 0)
1341 return r;
1342
1343 /* If we acquired the lock, let's check if the file/directory still exists in the file
1344 * system. If not, then the previous exclusive owner removed it and then closed it. In such a
1345 * case our acquired lock is worthless, hence try again. */
1346
1347 if (fstat(fd, &st) < 0)
1348 return -errno;
1349 if (st.st_nlink > 0)
1350 break;
1351
1352 fd = safe_close(fd);
1353 }
1354
1355 return TAKE_FD(fd);
1356}
0b8e36f0
LP
1357
1358int link_fd(int fd, int newdirfd, const char *newpath) {
7f436569 1359 int r;
0b8e36f0
LP
1360
1361 assert(fd >= 0);
1362 assert(newdirfd >= 0 || newdirfd == AT_FDCWD);
1363 assert(newpath);
1364
7f436569
MY
1365 /* Try to link via AT_EMPTY_PATH first. This fails with ENOENT if we don't have CAP_DAC_READ_SEARCH
1366 * on kernels < 6.10, in which case we'd then resort to /proc/self/fd/ dance.
1367 *
1368 * See also: https://github.com/torvalds/linux/commit/42bd2af5950456d46fdaa91c3a8fb02e680f19f5 */
1369 r = RET_NERRNO(linkat(fd, "", newdirfd, newpath, AT_EMPTY_PATH));
1370 if (r == -ENOENT) {
1371 r = RET_NERRNO(linkat(AT_FDCWD, FORMAT_PROC_FD_PATH(fd), newdirfd, newpath, AT_SYMLINK_FOLLOW));
1372 if (r == -ENOENT && proc_mounted() == 0) /* No proc_fd_enoent_errno() here because we don't
1373 know if it's the target path that's missing. */
1374 return -ENOSYS;
1375 }
0b8e36f0 1376
7f436569 1377 return r;
0b8e36f0 1378}
1f27e7b7
LP
1379
1380int linkat_replace(int olddirfd, const char *oldpath, int newdirfd, const char *newpath) {
1381 _cleanup_close_ int old_fd = -EBADF;
1382 int r;
1383
1384 assert(olddirfd >= 0 || olddirfd == AT_FDCWD);
1385 assert(newdirfd >= 0 || newdirfd == AT_FDCWD);
1386 assert(!isempty(newpath)); /* source path is optional, but the target path is not */
1387
1388 /* Like linkat() but replaces the target if needed. Is a NOP if source and target already share the
1389 * same inode. */
1390
1391 if (olddirfd == AT_FDCWD && isempty(oldpath)) /* Refuse operating on the cwd (which is a dir, and dirs can't be hardlinked) */
1392 return -EISDIR;
1393
1394 if (path_implies_directory(oldpath)) /* Refuse these definite directories early */
1395 return -EISDIR;
1396
1397 if (path_implies_directory(newpath))
1398 return -EISDIR;
1399
1400 /* First, try to link this directly */
1401 if (oldpath)
1402 r = RET_NERRNO(linkat(olddirfd, oldpath, newdirfd, newpath, 0));
1403 else
1404 r = link_fd(olddirfd, newdirfd, newpath);
1405 if (r >= 0)
1406 return 0;
1407 if (r != -EEXIST)
1408 return r;
1409
1410 old_fd = xopenat(olddirfd, oldpath, O_PATH|O_CLOEXEC);
1411 if (old_fd < 0)
1412 return old_fd;
1413
1414 struct stat old_st;
1415 if (fstat(old_fd, &old_st) < 0)
1416 return -errno;
1417
1418 if (S_ISDIR(old_st.st_mode)) /* Don't bother if we are operating on a directory */
1419 return -EISDIR;
1420
1421 struct stat new_st;
1422 if (fstatat(newdirfd, newpath, &new_st, AT_SYMLINK_NOFOLLOW) < 0)
1423 return -errno;
1424
1425 if (S_ISDIR(new_st.st_mode)) /* Refuse replacing directories */
1426 return -EEXIST;
1427
1428 if (stat_inode_same(&old_st, &new_st)) /* Already the same inode? Then shortcut this */
1429 return 0;
1430
1431 _cleanup_free_ char *tmp_path = NULL;
1432 r = tempfn_random(newpath, /* extra= */ NULL, &tmp_path);
1433 if (r < 0)
1434 return r;
1435
1436 r = link_fd(old_fd, newdirfd, tmp_path);
1437 if (r < 0) {
1438 if (!ERRNO_IS_PRIVILEGE(r))
1439 return r;
1440
1441 /* If that didn't work due to permissions then go via the path of the dentry */
1442 r = RET_NERRNO(linkat(olddirfd, oldpath, newdirfd, tmp_path, 0));
1443 if (r < 0)
1444 return r;
1445 }
1446
1447 r = RET_NERRNO(renameat(newdirfd, tmp_path, newdirfd, newpath));
1448 if (r < 0) {
1449 (void) unlinkat(newdirfd, tmp_path, /* flags= */ 0);
1450 return r;
1451 }
1452
1453 return 0;
1454}