]>
git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/fs-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2010 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
27 #include <linux/magic.h>
31 #include "alloc-util.h"
32 #include "dirent-util.h"
40 #include "parse-util.h"
41 #include "path-util.h"
42 #include "process-util.h"
43 #include "stat-util.h"
44 #include "stdio-util.h"
45 #include "string-util.h"
47 #include "time-util.h"
48 #include "user-util.h"
51 int unlink_noerrno ( const char * path
) {
62 int rmdir_parents ( const char * path
, const char * stop
) {
71 /* Skip trailing slashes */
72 while ( l
> 0 && path
[ l
- 1 ] == '/' )
78 /* Skip last component */
79 while ( l
> 0 && path
[ l
- 1 ] != '/' )
82 /* Skip trailing slashes */
83 while ( l
> 0 && path
[ l
- 1 ] == '/' )
93 if ( path_startswith ( stop
, t
)) {
109 int rename_noreplace ( int olddirfd
, const char * oldpath
, int newdirfd
, const char * newpath
) {
113 ret
= renameat2 ( olddirfd
, oldpath
, newdirfd
, newpath
, RENAME_NOREPLACE
);
117 /* renameat2() exists since Linux 3.15, btrfs added support for it later.
118 * If it is not implemented, fallback to another method. */
119 if (! IN_SET ( errno
, EINVAL
, ENOSYS
))
122 /* The link()/unlink() fallback does not work on directories. But
123 * renameat() without RENAME_NOREPLACE gives the same semantics on
124 * directories, except when newpath is an *empty* directory. This is
126 ret
= fstatat ( olddirfd
, oldpath
, & buf
, AT_SYMLINK_NOFOLLOW
);
127 if ( ret
>= 0 && S_ISDIR ( buf
. st_mode
)) {
128 ret
= renameat ( olddirfd
, oldpath
, newdirfd
, newpath
);
129 return ret
>= 0 ? 0 : - errno
;
132 /* If it is not a directory, use the link()/unlink() fallback. */
133 ret
= linkat ( olddirfd
, oldpath
, newdirfd
, newpath
, 0 );
137 ret
= unlinkat ( olddirfd
, oldpath
, 0 );
139 /* backup errno before the following unlinkat() alters it */
141 ( void ) unlinkat ( newdirfd
, newpath
, 0 );
149 int readlinkat_malloc ( int fd
, const char * p
, char ** ret
) {
164 n
= readlinkat ( fd
, p
, c
, l
- 1 );
171 if (( size_t ) n
< l
- 1 ) {
182 int readlink_malloc ( const char * p
, char ** ret
) {
183 return readlinkat_malloc ( AT_FDCWD
, p
, ret
);
186 int readlink_value ( const char * p
, char ** ret
) {
187 _cleanup_free_
char * link
= NULL
;
191 r
= readlink_malloc ( p
, & link
);
195 value
= basename ( link
);
199 value
= strdup ( value
);
208 int readlink_and_make_absolute ( const char * p
, char ** r
) {
209 _cleanup_free_
char * target
= NULL
;
216 j
= readlink_malloc ( p
, & target
);
220 k
= file_in_same_dir ( p
, target
);
228 int chmod_and_chown ( const char * path
, mode_t mode
, uid_t uid
, gid_t gid
) {
231 /* Under the assumption that we are running privileged we
232 * first change the access mode and only then hand out
233 * ownership to avoid a window where access is too open. */
235 if ( mode
!= MODE_INVALID
)
236 if ( chmod ( path
, mode
) < 0 )
239 if ( uid
!= UID_INVALID
|| gid
!= GID_INVALID
)
240 if ( chown ( path
, uid
, gid
) < 0 )
246 int fchmod_umask ( int fd
, mode_t m
) {
251 r
= fchmod ( fd
, m
& (~ u
)) < 0 ? - errno
: 0 ;
257 int fd_warn_permissions ( const char * path
, int fd
) {
260 if ( fstat ( fd
, & st
) < 0 )
263 if ( st
. st_mode
& 0111 )
264 log_warning ( "Configuration file %s is marked executable. Please remove executable permission bits. Proceeding anyway." , path
);
266 if ( st
. st_mode
& 0002 )
267 log_warning ( "Configuration file %s is marked world-writable. Please remove world writability permission bits. Proceeding anyway." , path
);
269 if ( getpid_cached () == 1 && ( st
. st_mode
& 0044 ) != 0044 )
270 log_warning ( "Configuration file %s is marked world-inaccessible. This has no effect as configuration data is accessible via APIs without restrictions. Proceeding anyway." , path
);
275 int touch_file ( const char * path
, bool parents
, usec_t stamp
, uid_t uid
, gid_t gid
, mode_t mode
) {
276 char fdpath
[ STRLEN ( "/proc/self/fd/" ) + DECIMAL_STR_MAX ( int )];
277 _cleanup_close_
int fd
= - 1 ;
282 /* Note that touch_file() does not follow symlinks: if invoked on an existing symlink, then it is the symlink
283 * itself which is updated, not its target
285 * Returns the first error we encounter, but tries to apply as much as possible. */
288 ( void ) mkdir_parents ( path
, 0755 );
290 /* Initially, we try to open the node with O_PATH, so that we get a reference to the node. This is useful in
291 * case the path refers to an existing device or socket node, as we can open it successfully in all cases, and
292 * won't trigger any driver magic or so. */
293 fd
= open ( path
, O_PATH
| O_CLOEXEC
| O_NOFOLLOW
);
298 /* if the node doesn't exist yet, we create it, but with O_EXCL, so that we only create a regular file
299 * here, and nothing else */
300 fd
= open ( path
, O_WRONLY
| O_CREAT
| O_EXCL
| O_CLOEXEC
, IN_SET ( mode
, 0 , MODE_INVALID
) ? 0644 : mode
);
305 /* Let's make a path from the fd, and operate on that. With this logic, we can adjust the access mode,
306 * ownership and time of the file node in all cases, even if the fd refers to an O_PATH object â which is
307 * something fchown(), fchmod(), futimensat() don't allow. */
308 xsprintf ( fdpath
, "/proc/self/fd/%i" , fd
);
310 if ( mode
!= MODE_INVALID
)
311 if ( chmod ( fdpath
, mode
) < 0 )
314 if ( uid_is_valid ( uid
) || gid_is_valid ( gid
))
315 if ( chown ( fdpath
, uid
, gid
) < 0 && ret
>= 0 )
318 if ( stamp
!= USEC_INFINITY
) {
319 struct timespec ts
[ 2 ];
321 timespec_store (& ts
[ 0 ], stamp
);
323 r
= utimensat ( AT_FDCWD
, fdpath
, ts
, 0 );
325 r
= utimensat ( AT_FDCWD
, fdpath
, NULL
, 0 );
326 if ( r
< 0 && ret
>= 0 )
332 int touch ( const char * path
) {
333 return touch_file ( path
, false , USEC_INFINITY
, UID_INVALID
, GID_INVALID
, MODE_INVALID
);
336 int symlink_idempotent ( const char * from
, const char * to
) {
342 if ( symlink ( from
, to
) < 0 ) {
343 _cleanup_free_
char * p
= NULL
;
348 r
= readlink_malloc ( to
, & p
);
349 if ( r
== - EINVAL
) /* Not a symlink? In that case return the original error we encountered: -EEXIST */
351 if ( r
< 0 ) /* Any other error? In that case propagate it as is */
354 if (! streq ( p
, from
)) /* Not the symlink we want it to be? In that case, propagate the original -EEXIST */
361 int symlink_atomic ( const char * from
, const char * to
) {
362 _cleanup_free_
char * t
= NULL
;
368 r
= tempfn_random ( to
, NULL
, & t
);
372 if ( symlink ( from
, t
) < 0 )
375 if ( rename ( t
, to
) < 0 ) {
383 int mknod_atomic ( const char * path
, mode_t mode
, dev_t dev
) {
384 _cleanup_free_
char * t
= NULL
;
389 r
= tempfn_random ( path
, NULL
, & t
);
393 if ( mknod ( t
, mode
, dev
) < 0 )
396 if ( rename ( t
, path
) < 0 ) {
404 int mkfifo_atomic ( const char * path
, mode_t mode
) {
405 _cleanup_free_
char * t
= NULL
;
410 r
= tempfn_random ( path
, NULL
, & t
);
414 if ( mkfifo ( t
, mode
) < 0 )
417 if ( rename ( t
, path
) < 0 ) {
425 int get_files_in_directory ( const char * path
, char *** list
) {
426 _cleanup_closedir_
DIR * d
= NULL
;
428 size_t bufsize
= 0 , n
= 0 ;
429 _cleanup_strv_free_
char ** l
= NULL
;
433 /* Returns all files in a directory in *list, and the number
434 * of files as return value. If list is NULL returns only the
441 FOREACH_DIRENT_ALL ( de
, d
, return - errno
) {
442 dirent_ensure_type ( d
, de
);
444 if (! dirent_is_file ( de
))
448 /* one extra slot is needed for the terminating NULL */
449 if (! GREEDY_REALLOC ( l
, bufsize
, n
+ 2 ))
452 l
[ n
] = strdup ( de
-> d_name
);
463 l
= NULL
; /* avoid freeing */
469 static int getenv_tmp_dir ( const char ** ret_path
) {
475 /* We use the same order of environment variables python uses in tempfile.gettempdir():
476 * https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir */
477 FOREACH_STRING ( n
, "TMPDIR" , "TEMP" , "TMP" ) {
480 e
= secure_getenv ( n
);
483 if (! path_is_absolute ( e
)) {
487 if (! path_is_normalized ( e
)) {
504 /* Remember first error, to make this more debuggable */
516 static int tmp_dir_internal ( const char * def
, const char ** ret
) {
523 r
= getenv_tmp_dir (& e
);
529 k
= is_dir ( def
, true );
533 return r
< 0 ? r
: k
;
539 int var_tmp_dir ( const char ** ret
) {
541 /* Returns the location for "larger" temporary files, that is backed by physical storage if available, and thus
542 * even might survive a boot: /var/tmp. If $TMPDIR (or related environment variables) are set, its value is
543 * returned preferably however. Note that both this function and tmp_dir() below are affected by $TMPDIR,
544 * making it a variable that overrides all temporary file storage locations. */
546 return tmp_dir_internal ( "/var/tmp" , ret
);
549 int tmp_dir ( const char ** ret
) {
551 /* Similar to var_tmp_dir() above, but returns the location for "smaller" temporary files, which is usually
552 * backed by an in-memory file system: /tmp. */
554 return tmp_dir_internal ( "/tmp" , ret
);
557 int unlink_or_warn ( const char * filename
) {
558 if ( unlink ( filename
) < 0 && errno
!= ENOENT
)
559 /* If the file doesn't exist and the fs simply was read-only (in which
560 * case unlink() returns EROFS even if the file doesn't exist), don't
562 if ( errno
!= EROFS
|| access ( filename
, F_OK
) >= 0 )
563 return log_error_errno ( errno
, "Failed to remove \" %s \" : %m" , filename
);
568 int inotify_add_watch_fd ( int fd
, int what
, uint32_t mask
) {
569 char path
[ STRLEN ( "/proc/self/fd/" ) + DECIMAL_STR_MAX ( int ) + 1 ];
572 /* This is like inotify_add_watch(), except that the file to watch is not referenced by a path, but by an fd */
573 xsprintf ( path
, "/proc/self/fd/%i" , what
);
575 r
= inotify_add_watch ( fd
, path
, mask
);
582 static bool safe_transition ( const struct stat
* a
, const struct stat
* b
) {
583 /* Returns true if the transition from a to b is safe, i.e. that we never transition from unprivileged to
584 * privileged files or directories. Why bother? So that unprivileged code can't symlink to privileged files
585 * making us believe we read something safe even though it isn't safe in the specific context we open it in. */
587 if ( a
-> st_uid
== 0 ) /* Transitioning from privileged to unprivileged is always fine */
590 return a
-> st_uid
== b
-> st_uid
; /* Otherwise we need to stay within the same UID */
593 int chase_symlinks ( const char * path
, const char * original_root
, unsigned flags
, char ** ret
) {
594 _cleanup_free_
char * buffer
= NULL
, * done
= NULL
, * root
= NULL
;
595 _cleanup_close_
int fd
= - 1 ;
596 unsigned max_follow
= 32 ; /* how many symlinks to follow before giving up and returning ELOOP */
597 struct stat previous_stat
;
604 /* Either the file may be missing, or we return an fd to the final object, but both make no sense */
605 if (( flags
& ( CHASE_NONEXISTENT
| CHASE_OPEN
)) == ( CHASE_NONEXISTENT
| CHASE_OPEN
))
611 /* This is a lot like canonicalize_file_name(), but takes an additional "root" parameter, that allows following
612 * symlinks relative to a root directory, instead of the root of the host.
614 * Note that "root" primarily matters if we encounter an absolute symlink. It is also used when following
615 * relative symlinks to ensure they cannot be used to "escape" the root directory. The path parameter passed is
616 * assumed to be already prefixed by it, except if the CHASE_PREFIX_ROOT flag is set, in which case it is first
617 * prefixed accordingly.
619 * Algorithmically this operates on two path buffers: "done" are the components of the path we already
620 * processed and resolved symlinks, "." and ".." of. "todo" are the components of the path we still need to
621 * process. On each iteration, we move one component from "todo" to "done", processing it's special meaning
622 * each time. The "todo" path always starts with at least one slash, the "done" path always ends in no
623 * slash. We always keep an O_PATH fd to the component we are currently processing, thus keeping lookup races
626 * Suggested usage: whenever you want to canonicalize a path, use this function. Pass the absolute path you got
627 * as-is: fully qualified and relative to your host's root. Optionally, specify the root parameter to tell this
628 * function what to do when encountering a symlink with an absolute path as directory: prefix it by the
631 /* A root directory of "/" or "" is identical to none */
632 if ( isempty ( original_root
) || path_equal ( original_root
, "/" ))
633 original_root
= NULL
;
636 r
= path_make_absolute_cwd ( original_root
, & root
);
640 if ( flags
& CHASE_PREFIX_ROOT
) {
642 /* We don't support relative paths in combination with a root directory */
643 if (! path_is_absolute ( path
))
646 path
= prefix_roota ( root
, path
);
650 r
= path_make_absolute_cwd ( path
, & buffer
);
654 fd
= open ( "/" , O_CLOEXEC
| O_NOFOLLOW
| O_PATH
);
658 if ( flags
& CHASE_SAFE
) {
659 if ( fstat ( fd
, & previous_stat
) < 0 )
665 _cleanup_free_
char * first
= NULL
;
666 _cleanup_close_
int child
= - 1 ;
670 /* Determine length of first component in the path */
671 n
= strspn ( todo
, "/" ); /* The slashes */
672 m
= n
+ strcspn ( todo
+ n
, "/" ); /* The entire length of the component */
674 /* Extract the first component. */
675 first
= strndup ( todo
, m
);
681 /* Empty? Then we reached the end. */
685 /* Just a single slash? Then we reached the end. */
686 if ( path_equal ( first
, "/" )) {
687 /* Preserve the trailing slash */
688 if (! strextend (& done
, "/" , NULL
))
694 /* Just a dot? Then let's eat this up. */
695 if ( path_equal ( first
, "/." ))
698 /* Two dots? Then chop off the last bit of what we already found out. */
699 if ( path_equal ( first
, "/.." )) {
700 _cleanup_free_
char * parent
= NULL
;
701 _cleanup_close_
int fd_parent
= - 1 ;
703 /* If we already are at the top, then going up will not change anything. This is in-line with
704 * how the kernel handles this. */
705 if ( isempty ( done
) || path_equal ( done
, "/" ))
708 parent
= dirname_malloc ( done
);
712 /* Don't allow this to leave the root dir. */
714 path_startswith ( done
, root
) &&
715 ! path_startswith ( parent
, root
))
718 free_and_replace ( done
, parent
);
720 fd_parent
= openat ( fd
, ".." , O_CLOEXEC
| O_NOFOLLOW
| O_PATH
);
724 if ( flags
& CHASE_SAFE
) {
725 if ( fstat ( fd_parent
, & st
) < 0 )
728 if (! safe_transition (& previous_stat
, & st
))
741 /* Otherwise let's see what this is. */
742 child
= openat ( fd
, first
+ n
, O_CLOEXEC
| O_NOFOLLOW
| O_PATH
);
745 if ( errno
== ENOENT
&&
746 ( flags
& CHASE_NONEXISTENT
) &&
747 ( isempty ( todo
) || path_is_normalized ( todo
))) {
749 /* If CHASE_NONEXISTENT is set, and the path does not exist, then that's OK, return
750 * what we got so far. But don't allow this if the remaining path contains "../ or "./"
751 * or something else weird. */
753 /* If done is "/", as first also contains slash at the head, then remove this redundant slash. */
754 if ( streq_ptr ( done
, "/" ))
757 if (! strextend (& done
, first
, todo
, NULL
))
767 if ( fstat ( child
, & st
) < 0 )
769 if (( flags
& CHASE_SAFE
) &&
770 ! safe_transition (& previous_stat
, & st
))
775 if (( flags
& CHASE_NO_AUTOFS
) &&
776 fd_is_fs_type ( child
, AUTOFS_SUPER_MAGIC
) > 0 )
779 if ( S_ISLNK ( st
. st_mode
)) {
782 _cleanup_free_
char * destination
= NULL
;
784 /* This is a symlink, in this case read the destination. But let's make sure we don't follow
785 * symlinks without bounds. */
786 if (-- max_follow
<= 0 )
789 r
= readlinkat_malloc ( fd
, first
+ n
, & destination
);
792 if ( isempty ( destination
))
795 if ( path_is_absolute ( destination
)) {
797 /* An absolute destination. Start the loop from the beginning, but use the root
798 * directory as base. */
801 fd
= open ( root
?: "/" , O_CLOEXEC
| O_NOFOLLOW
| O_PATH
);
805 if ( flags
& CHASE_SAFE
) {
806 if ( fstat ( fd
, & st
) < 0 )
809 if (! safe_transition (& previous_stat
, & st
))
817 /* Note that we do not revalidate the root, we take it as is. */
826 /* Prefix what's left to do with what we just read, and start the loop again, but
827 * remain in the current directory. */
828 joined
= strjoin ( destination
, todo
);
830 joined
= strjoin ( "/" , destination
, todo
);
835 todo
= buffer
= joined
;
840 /* If this is not a symlink, then let's just add the name we read to what we already verified. */
845 /* If done is "/", as first also contains slash at the head, then remove this redundant slash. */
846 if ( streq ( done
, "/" ))
849 if (! strextend (& done
, first
, NULL
))
853 /* And iterate again, but go one directory further down. */
860 /* Special case, turn the empty string into "/", to indicate the root directory. */
871 if ( flags
& CHASE_OPEN
) {
874 /* Return the O_PATH fd we currently are looking to the caller. It can translate it to a proper fd by
875 * opening /proc/self/fd/xyz. */
887 int access_fd ( int fd
, int mode
) {
888 char p
[ STRLEN ( "/proc/self/fd/" ) + DECIMAL_STR_MAX ( fd
) + 1 ];
891 /* Like access() but operates on an already open fd */
893 xsprintf ( p
, "/proc/self/fd/%i" , fd
);
902 int unlinkat_deallocate ( int fd
, const char * name
, int flags
) {
903 _cleanup_close_
int truncate_fd
= - 1 ;
907 /* Operates like unlinkat() but also deallocates the file contents if it is a regular file and there's no other
908 * link to it. This is useful to ensure that other processes that might have the file open for reading won't be
909 * able to keep the data pinned on disk forever. This call is particular useful whenever we execute clean-up
910 * jobs ("vacuuming"), where we want to make sure the data is really gone and the disk space released and
911 * returned to the free pool.
913 * Deallocation is preferably done by FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE (đ) if supported, which means
914 * the file won't change size. That's a good thing since we shouldn't needlessly trigger SIGBUS in other
915 * programs that have mmap()ed the file. (The assumption here is that changing file contents to all zeroes
916 * underneath those programs is the better choice than simply triggering SIGBUS in them which truncation does.)
917 * However if hole punching is not implemented in the kernel or file system we'll fall back to normal file
918 * truncation (đĒ), as our goal of deallocating the data space trumps our goal of being nice to readers (đ).
920 * Note that we attempt deallocation, but failure to succeed with that is not considered fatal, as long as the
921 * primary job â to delete the file â is accomplished. */
923 if (( flags
& AT_REMOVEDIR
) == 0 ) {
924 truncate_fd
= openat ( fd
, name
, O_WRONLY
| O_CLOEXEC
| O_NOCTTY
| O_NOFOLLOW
| O_NONBLOCK
);
925 if ( truncate_fd
< 0 ) {
927 /* If this failed because the file doesn't exist propagate the error right-away. Also,
928 * AT_REMOVEDIR wasn't set, and we tried to open the file for writing, which means EISDIR is
929 * returned when this is a directory but we are not supposed to delete those, hence propagate
930 * the error right-away too. */
931 if ( IN_SET ( errno
, ENOENT
, EISDIR
))
934 if ( errno
!= ELOOP
) /* don't complain if this is a symlink */
935 log_debug_errno ( errno
, "Failed to open file '%s' for deallocation, ignoring: %m" , name
);
939 if ( unlinkat ( fd
, name
, flags
) < 0 )
942 if ( truncate_fd
< 0 ) /* Don't have a file handle, can't do more âšī¸ */
945 if ( fstat ( truncate_fd
, & st
) < 0 ) {
946 log_debug_errno ( errno
, "Failed to stat file '%s' for deallocation, ignoring." , name
);
950 if (! S_ISREG ( st
. st_mode
) || st
. st_blocks
== 0 || st
. st_nlink
> 0 )
953 /* If this is a regular file, it actually took up space on disk and there are no other links it's time to
954 * punch-hole/truncate this to release the disk space. */
956 bs
= MAX ( st
. st_blksize
, 512 );
957 l
= DIV_ROUND_UP ( st
. st_size
, bs
) * bs
; /* Round up to next block size */
959 if ( fallocate ( truncate_fd
, FALLOC_FL_PUNCH_HOLE
| FALLOC_FL_KEEP_SIZE
, 0 , l
) >= 0 )
960 return 0 ; /* Successfully punched a hole! đ */
962 /* Fall back to truncation */
963 if ( ftruncate ( truncate_fd
, 0 ) < 0 ) {
964 log_debug_errno ( errno
, "Failed to truncate file to 0, ignoring: %m" );
971 int fsync_directory_of_file ( int fd
) {
972 _cleanup_free_
char * path
= NULL
, * dn
= NULL
;
973 _cleanup_close_
int dfd
= - 1 ;
976 r
= fd_verify_regular ( fd
);
980 r
= fd_get_path ( fd
, & path
);
984 if (! path_is_absolute ( path
))
987 dn
= dirname_malloc ( path
);
991 dfd
= open ( dn
, O_RDONLY
| O_CLOEXEC
| O_DIRECTORY
);