]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
tmpfiles: use statx() when aging files 16877/head
authorLennart Poettering <lennart@poettering.net>
Thu, 27 Aug 2020 17:13:30 +0000 (19:13 +0200)
committerLennart Poettering <lennart@poettering.net>
Fri, 28 Aug 2020 13:45:37 +0000 (15:45 +0200)
This allows us to properly detect mount points, for free. (Also, allows
us to respect btimes that are newer than the cutoff, which should be
useful when people untar file trees in /var/tmp)

Fixes: #16848
src/basic/missing_stat.h
src/tmpfiles/tmpfiles.c

index fc5ba25685dbc3a2900f9e1a599d6b44e60f26ef..a5bb2bfd028d330e8e5b0e16e82f93f8e53fec57 100644 (file)
@@ -50,13 +50,48 @@ struct statx STATX_DEFINITION;
 struct new_statx STATX_DEFINITION;
 
 /* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
-#ifndef STATX_BTIME
-#define STATX_BTIME 0x00000800U
+#ifndef AT_STATX_DONT_SYNC
+#define AT_STATX_DONT_SYNC 0x4000
 #endif
 
 /* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
-#ifndef AT_STATX_DONT_SYNC
-#define AT_STATX_DONT_SYNC 0x4000
+#ifndef STATX_TYPE
+#define STATX_TYPE 0x00000001U
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_MODE
+#define STATX_MODE 0x00000002U
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_UID
+#define STATX_UID 0x00000008U
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_ATIME
+#define STATX_ATIME 0x00000020U
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_MTIME
+#define STATX_MTIME 0x00000040U
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_CTIME
+#define STATX_CTIME 0x00000080U
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_INO
+#define STATX_INO 0x00000100U
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_BTIME
+#define STATX_BTIME 0x00000800U
 #endif
 
 /* fa2fcf4f1df1559a0a4ee0f46915b496cc2ebf60 (5.8) */
index 437b8bb4498a3df8fd2eb1d86b1a5b60249ed64e..b15a0d3357005779a20b2cb0388b8e26a35f5084 100644 (file)
@@ -38,6 +38,8 @@
 #include "log.h"
 #include "macro.h"
 #include "main-func.h"
+#include "missing_stat.h"
+#include "missing_syscall.h"
 #include "mkdir.h"
 #include "mount-util.h"
 #include "mountpoint-util.h"
@@ -490,49 +492,137 @@ static DIR* opendir_nomod(const char *path) {
         return xopendirat_nomod(AT_FDCWD, path);
 }
 
+static inline nsec_t load_statx_timestamp_nsec(const struct statx_timestamp *ts) {
+        assert(ts);
+
+        if (ts->tv_sec < 0)
+                return NSEC_INFINITY;
+
+        if ((nsec_t) ts->tv_sec >= (UINT64_MAX - ts->tv_nsec) / NSEC_PER_SEC)
+                return NSEC_INFINITY;
+
+        return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec;
+}
+
 static int dir_cleanup(
                 Item *i,
                 const char *p,
                 DIR *d,
-                const struct stat *ds,
-                usec_t cutoff,
-                dev_t rootdev,
+                nsec_t self_atime_nsec,
+                nsec_t self_mtime_nsec,
+                nsec_t cutoff_nsec,
+                dev_t rootdev_major,
+                dev_t rootdev_minor,
                 bool mountpoint,
                 int maxdepth,
                 bool keep_this_level) {
 
-        struct dirent *dent;
+        static bool use_statx = true;
         bool deleted = false;
+        struct dirent *dent;
         int r = 0;
 
         FOREACH_DIRENT_ALL(dent, d, break) {
-                struct stat s;
-                usec_t age;
                 _cleanup_free_ char *sub_path = NULL;
+                nsec_t atime_nsec, mtime_nsec, ctime_nsec, btime_nsec;
+                mode_t mode;
+                uid_t uid;
 
                 if (dot_or_dot_dot(dent->d_name))
                         continue;
 
-                if (fstatat(dirfd(d), dent->d_name, &s, AT_SYMLINK_NOFOLLOW) < 0) {
-                        if (errno == ENOENT)
-                                continue;
+                if (use_statx) {
+                        /* If statx() is supported, use it. It's preferable over fstatat() since it tells us
+                         * explicitly where we are looking at a mount point, for free as side
+                         * information. Determing the same information without statx() is hard, see the
+                         * complexity of path_is_mount_point(), and also much slower as it requires a numbre
+                         * of syscalls instead of just one. Hence, when we have modern statx() we use it
+                         * instead of fstat() and do proper mount point checks, while on older kernels's well
+                         * do traditional st_dev based detection of mount points.
+                         *
+                         * Using statx() for detecting mount points also has the benfit that we handle weird
+                         * file systems such as overlayfs better where each file is originating from a
+                         * different st_dev. */
+
+                        struct statx sx
+#if HAS_FEATURE_MEMORY_SANITIZER
+                                = {}
+#  warning "Explicitly initializing struct statx, to work around msan limitation. Please remove as soon as msan has been updated to not require this."
+#endif
+                                ;
 
-                        /* FUSE, NFS mounts, SELinux might return EACCES */
-                        r = log_full_errno(errno == EACCES ? LOG_DEBUG : LOG_ERR, errno,
-                                           "stat(%s/%s) failed: %m", p, dent->d_name);
-                        continue;
+                        if (statx(dirfd(d), dent->d_name,
+                                  AT_SYMLINK_NOFOLLOW|AT_NO_AUTOMOUNT,
+                                  STATX_TYPE|STATX_MODE|STATX_UID|STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_BTIME,
+                                  &sx) < 0) {
+
+                                if (errno == ENOENT)
+                                        continue;
+                                if (ERRNO_IS_NOT_SUPPORTED(errno) || errno == EPERM)
+                                        use_statx = false; /* Not supported or blocked by seccomp or so */
+                                else {
+                                        /* FUSE, NFS mounts, SELinux might return EACCES */
+                                        r = log_full_errno(errno == EACCES ? LOG_DEBUG : LOG_ERR, errno,
+                                                           "statx(%s/%s) failed: %m", p, dent->d_name);
+                                        continue;
+                                }
+                        } else {
+                                if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) {
+                                        /* Yay, we have the mount point API, use it */
+                                        if (FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT)) {
+                                                log_debug("Ignoring \"%s/%s\": different mount points.", p, dent->d_name);
+                                                continue;
+                                        }
+                                } else {
+                                        /* So we have statx() but the STATX_ATTR_MOUNT_ROOT flag is not
+                                         * supported, fall back to traditional stx_dev checking. */
+                                        if (sx.stx_dev_major != rootdev_major ||
+                                            sx.stx_dev_minor != rootdev_minor) {
+                                                log_debug("Ignoring \"%s/%s\": different filesystem.", p, dent->d_name);
+                                                continue;
+                                        }
+                                }
+
+                                mode = sx.stx_mode;
+                                uid = sx.stx_uid;
+                                atime_nsec = FLAGS_SET(sx.stx_mask, STATX_ATIME) ? load_statx_timestamp_nsec(&sx.stx_atime) : 0;
+                                mtime_nsec = FLAGS_SET(sx.stx_mask, STATX_MTIME) ? load_statx_timestamp_nsec(&sx.stx_mtime) : 0;
+                                ctime_nsec = FLAGS_SET(sx.stx_mask, STATX_CTIME) ? load_statx_timestamp_nsec(&sx.stx_ctime) : 0;
+                                btime_nsec = FLAGS_SET(sx.stx_mask, STATX_BTIME) ? load_statx_timestamp_nsec(&sx.stx_btime) : 0;
+                        }
                 }
 
-                /* Stay on the same filesystem */
-                if (s.st_dev != rootdev) {
-                        log_debug("Ignoring \"%s/%s\": different filesystem.", p, dent->d_name);
-                        continue;
+                if (!use_statx) {
+                        struct stat s;
+
+                        if (fstatat(dirfd(d), dent->d_name, &s, AT_SYMLINK_NOFOLLOW) < 0) {
+                                if (errno == ENOENT)
+                                        continue;
+
+                                /* FUSE, NFS mounts, SELinux might return EACCES */
+                                r = log_full_errno(errno == EACCES ? LOG_DEBUG : LOG_ERR, errno,
+                                                   "stat(%s/%s) failed: %m", p, dent->d_name);
+                                continue;
+                        }
+
+                        /* Stay on the same filesystem */
+                        if (major(s.st_dev) != rootdev_major || minor(s.st_dev) != rootdev_minor) {
+                                log_debug("Ignoring \"%s/%s\": different filesystem.", p, dent->d_name);
+                                continue;
+                        }
+
+                        mode = s.st_mode;
+                        uid = s.st_uid;
+                        atime_nsec = timespec_load_nsec(&s.st_atim);
+                        mtime_nsec = timespec_load_nsec(&s.st_mtim);
+                        ctime_nsec = timespec_load_nsec(&s.st_ctim);
+                        btime_nsec = 0;
                 }
 
                 /* Try to detect bind mounts of the same filesystem instance; they
                  * do not differ in device major/minors. This type of query is not
                  * supported on all kernels or filesystem types though. */
-                if (S_ISDIR(s.st_mode)) {
+                if (S_ISDIR(mode)) {
                         int q;
 
                         q = fd_is_mount_point(dirfd(d), dent->d_name, 0);
@@ -561,12 +651,12 @@ static int dir_cleanup(
                         continue;
                 }
 
-                if (S_ISDIR(s.st_mode)) {
+                if (S_ISDIR(mode)) {
                         _cleanup_closedir_ DIR *sub_dir = NULL;
 
                         if (mountpoint &&
                             streq(dent->d_name, "lost+found") &&
-                            s.st_uid == 0) {
+                            uid == 0) {
                                 log_debug("Ignoring directory \"%s\".", sub_path);
                                 continue;
                         }
@@ -589,7 +679,11 @@ static int dir_cleanup(
                                         continue;
                                 }
 
-                                q = dir_cleanup(i, sub_path, sub_dir, &s, cutoff, rootdev, false, maxdepth-1, false);
+                                q = dir_cleanup(i,
+                                                sub_path, sub_dir,
+                                                atime_nsec, mtime_nsec, cutoff_nsec,
+                                                rootdev_major, rootdev_minor,
+                                                false, maxdepth-1, false);
                                 if (q < 0)
                                         r = q;
                         }
@@ -605,22 +699,28 @@ static int dir_cleanup(
                         }
 
                         /* Ignore ctime, we change it when deleting */
-                        age = timespec_load(&s.st_mtim);
-                        if (age >= cutoff) {
+                        if (mtime_nsec != NSEC_INFINITY && mtime_nsec >= cutoff_nsec) {
                                 char a[FORMAT_TIMESTAMP_MAX];
                                 /* Follows spelling in stat(1). */
                                 log_debug("Directory \"%s\": modify time %s is too new.",
                                           sub_path,
-                                          format_timestamp_style(a, sizeof(a), age, TIMESTAMP_US));
+                                          format_timestamp_style(a, sizeof(a), mtime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
                                 continue;
                         }
 
-                        age = timespec_load(&s.st_atim);
-                        if (age >= cutoff) {
+                        if (atime_nsec != NSEC_INFINITY && atime_nsec >= cutoff_nsec) {
                                 char a[FORMAT_TIMESTAMP_MAX];
                                 log_debug("Directory \"%s\": access time %s is too new.",
                                           sub_path,
-                                          format_timestamp_style(a, sizeof(a), age, TIMESTAMP_US));
+                                          format_timestamp_style(a, sizeof(a), atime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
+                                continue;
+                        }
+
+                        if (btime_nsec != NSEC_INFINITY && btime_nsec >= cutoff_nsec) {
+                                char a[FORMAT_TIMESTAMP_MAX];
+                                log_debug("Directory \"%s\": birth time %s is too new.",
+                                          sub_path,
+                                          format_timestamp_style(a, sizeof(a), btime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
                                 continue;
                         }
 
@@ -632,14 +732,14 @@ static int dir_cleanup(
                 } else {
                         /* Skip files for which the sticky bit is set. These are semantics we define, and are
                          * unknown elsewhere. See XDG_RUNTIME_DIR specification for details. */
-                        if (s.st_mode & S_ISVTX) {
+                        if (mode & S_ISVTX) {
                                 log_debug("Skipping \"%s\": sticky bit set.", sub_path);
                                 continue;
                         }
 
                         if (mountpoint &&
-                            S_ISREG(s.st_mode) &&
-                            s.st_uid == 0 &&
+                            S_ISREG(mode) &&
+                            uid == 0 &&
                             STR_IN_SET(dent->d_name,
                                        ".journal",
                                        "aquota.user",
@@ -649,13 +749,13 @@ static int dir_cleanup(
                         }
 
                         /* Ignore sockets that are listed in /proc/net/unix */
-                        if (S_ISSOCK(s.st_mode) && unix_socket_alive(sub_path)) {
+                        if (S_ISSOCK(mode) && unix_socket_alive(sub_path)) {
                                 log_debug("Skipping \"%s\": live socket.", sub_path);
                                 continue;
                         }
 
                         /* Ignore device nodes */
-                        if (S_ISCHR(s.st_mode) || S_ISBLK(s.st_mode)) {
+                        if (S_ISCHR(mode) || S_ISBLK(mode)) {
                                 log_debug("Skipping \"%s\": a device.", sub_path);
                                 continue;
                         }
@@ -666,31 +766,36 @@ static int dir_cleanup(
                                 continue;
                         }
 
-                        age = timespec_load(&s.st_mtim);
-                        if (age >= cutoff) {
+                        if (mtime_nsec != NSEC_INFINITY && mtime_nsec >= cutoff_nsec) {
                                 char a[FORMAT_TIMESTAMP_MAX];
                                 /* Follows spelling in stat(1). */
                                 log_debug("File \"%s\": modify time %s is too new.",
                                           sub_path,
-                                          format_timestamp_style(a, sizeof(a), age, TIMESTAMP_US));
+                                          format_timestamp_style(a, sizeof(a), mtime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
                                 continue;
                         }
 
-                        age = timespec_load(&s.st_atim);
-                        if (age >= cutoff) {
+                        if (atime_nsec != NSEC_INFINITY && atime_nsec >= cutoff_nsec) {
                                 char a[FORMAT_TIMESTAMP_MAX];
                                 log_debug("File \"%s\": access time %s is too new.",
                                           sub_path,
-                                          format_timestamp_style(a, sizeof(a), age, TIMESTAMP_US));
+                                          format_timestamp_style(a, sizeof(a), atime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
                                 continue;
                         }
 
-                        age = timespec_load(&s.st_ctim);
-                        if (age >= cutoff) {
+                        if (ctime_nsec != NSEC_INFINITY && ctime_nsec >= cutoff_nsec) {
                                 char a[FORMAT_TIMESTAMP_MAX];
                                 log_debug("File \"%s\": change time %s is too new.",
                                           sub_path,
-                                          format_timestamp_style(a, sizeof(a), age, TIMESTAMP_US));
+                                          format_timestamp_style(a, sizeof(a), ctime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
+                                continue;
+                        }
+
+                        if (btime_nsec != NSEC_INFINITY && btime_nsec >= cutoff_nsec) {
+                                char a[FORMAT_TIMESTAMP_MAX];
+                                log_debug("File \"%s\": birth time %s is too new.",
+                                          sub_path,
+                                          format_timestamp_style(a, sizeof(a), btime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
                                 continue;
                         }
 
@@ -705,21 +810,19 @@ static int dir_cleanup(
 
 finish:
         if (deleted) {
-                char a[FORMAT_TIMESTAMP_MAX], b[FORMAT_TIMESTAMP_MAX];
-                usec_t age1, age2;
-
-                age1 = timespec_load(&ds->st_atim);
-                age2 = timespec_load(&ds->st_mtim);
+                char a[FORMAT_TIMESTAMP_MAX], m[FORMAT_TIMESTAMP_MAX];
+                struct timespec ts[2];
 
                 log_debug("Restoring access and modification time on \"%s\": %s, %s",
                           p,
-                          format_timestamp_style(a, sizeof(a), age1, TIMESTAMP_US),
-                          format_timestamp_style(b, sizeof(b), age2, TIMESTAMP_US));
+                          format_timestamp_style(a, sizeof(a), self_atime_nsec / NSEC_PER_USEC, TIMESTAMP_US),
+                          format_timestamp_style(m, sizeof(m), self_mtime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
+
+                timespec_store_nsec(ts + 0, self_atime_nsec);
+                timespec_store_nsec(ts + 1, self_mtime_nsec);
 
                 /* Restore original directory timestamps */
-                if (futimens(dirfd(d), (struct timespec[]) {
-                                ds->st_atim,
-                                ds->st_mtim }) < 0)
+                if (futimens(dirfd(d), ts) < 0)
                         log_warning_errno(errno, "Failed to revert timestamps of '%s', ignoring: %m", p);
         }
 
@@ -2186,11 +2289,20 @@ static int remove_item(Item *i) {
 }
 
 static int clean_item_instance(Item *i, const char* instance) {
+        char timestamp[FORMAT_TIMESTAMP_MAX];
         _cleanup_closedir_ DIR *d = NULL;
-        struct stat s, ps;
-        bool mountpoint;
+        uint32_t dev_major, dev_minor;
+        nsec_t atime_nsec, mtime_nsec;
+        int mountpoint = -1;
         usec_t cutoff, n;
-        char timestamp[FORMAT_TIMESTAMP_MAX];
+        uint64_t ino;
+
+        struct statx sx
+#if HAS_FEATURE_MEMORY_SANITIZER
+                = {}
+#  warning "Explicitly initializing struct statx, to work around msan limitation. Please remove as soon as msan has been updated to not require this."
+#endif
+                ;
 
         assert(i);
 
@@ -2213,24 +2325,53 @@ static int clean_item_instance(Item *i, const char* instance) {
                 return log_error_errno(errno, "Failed to open directory %s: %m", instance);
         }
 
-        if (fstat(dirfd(d), &s) < 0)
-                return log_error_errno(errno, "stat(%s) failed: %m", i->path);
+        if (statx(dirfd(d), "", AT_EMPTY_PATH, STATX_MODE|STATX_INO|STATX_ATIME|STATX_MTIME, &sx) < 0) {
+                struct stat s;
+
+                if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
+                        return log_error_errno(errno, "statx(%s) failed: %m", i->path);
+
+                if (fstat(dirfd(d), &s) < 0)
+                        return log_error_errno(errno, "stat(%s) failed: %m", i->path);
+
+                dev_major = major(s.st_dev);
+                dev_minor = minor(s.st_dev);
+                ino = s.st_ino;
+                atime_nsec = timespec_load_nsec(&s.st_atim);
+                mtime_nsec = timespec_load_nsec(&s.st_mtim);
+        } else {
+
+                if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT))
+                        mountpoint = FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT);
+
+                dev_major = sx.stx_dev_major;
+                dev_minor = sx.stx_dev_minor;
+                ino = sx.stx_ino;
+                atime_nsec = load_statx_timestamp_nsec(&sx.stx_atime);
+                mtime_nsec = load_statx_timestamp_nsec(&sx.stx_mtime);
+        }
+
+        if (mountpoint < 0) {
+                struct stat ps;
 
-        if (!S_ISDIR(s.st_mode))
-                return log_error_errno(SYNTHETIC_ERRNO(ENOTDIR),
-                                       "%s is not a directory.", i->path);
+                if (fstatat(dirfd(d), "..", &ps, AT_SYMLINK_NOFOLLOW) != 0)
+                        return log_error_errno(errno, "stat(%s/..) failed: %m", i->path);
 
-        if (fstatat(dirfd(d), "..", &ps, AT_SYMLINK_NOFOLLOW) != 0)
-                return log_error_errno(errno, "stat(%s/..) failed: %m", i->path);
+                mountpoint =
+                        dev_major != major(ps.st_dev) ||
+                        dev_minor != minor(ps.st_dev) ||
+                        ino != ps.st_ino;
+        }
 
-        mountpoint = s.st_dev != ps.st_dev || s.st_ino == ps.st_ino;
 
         log_debug("Cleanup threshold for %s \"%s\" is %s",
                   mountpoint ? "mount point" : "directory",
                   instance,
                   format_timestamp_style(timestamp, sizeof(timestamp), cutoff, TIMESTAMP_US));
 
-        return dir_cleanup(i, instance, d, &s, cutoff, s.st_dev, mountpoint,
+        return dir_cleanup(i, instance, d,
+                           atime_nsec, mtime_nsec, cutoff * NSEC_PER_USEC,
+                           dev_major, dev_minor, mountpoint,
                            MAX_DEPTH, i->keep_first_level);
 }