]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
user-runtime-dir: enforce /tmp/ and /dev/shm/ quota
authorLennart Poettering <lennart@poettering.net>
Fri, 10 Jan 2025 10:34:18 +0000 (11:34 +0100)
committerLennart Poettering <lennart@poettering.net>
Thu, 23 Jan 2025 21:36:39 +0000 (22:36 +0100)
Enforce the quota on these two tmpfs at the same place where we mount
the per-user $XDG_RUNTIME_DIR. Conceptually these are very similar
concepts, and it makes sure to enforce the limits at the same place with
the same lifecycle.

README
man/user@.service.xml
src/login/user-runtime-dir.c

diff --git a/README b/README
index 1e55da23f66861d5908a1df28cdf1d7cefdab334..975f5e5a5e5f0d437441a770443eaecae77535e5 100644 (file)
--- a/README
+++ b/README
@@ -61,9 +61,11 @@ REQUIREMENTS:
                      ≥ 5.9 for close_range()
                      ≥ 5.12 for idmapped mount
                      ≥ 5.14 for cgroup.kill
+                     ≥ 5.14 for quotactl_fd()
                      ≥ 6.3 for MFD_EXEC/MFD_NOEXEC_SEAL and tmpfs noswap option
                      ≥ 6.5 for name_to_handle_at() AT_HANDLE_FID, SO_PEERPIDFD/SO_PASSPIDFD,
                                and MOVE_MOUNT_BENEATH
+                     ≥ 6.6 for quota support on tmpfs
                      ≥ 6.9 for pidfs
 
         ✅ systemd utilizes several new kernel APIs, but will fall back gracefully
index cc078d2d3c16d0f4fd56211424e9ed94b8adcc9e..a046a759d5b27305b0b2816dd593dfdee81ac331 100644 (file)
     <citerefentry><refentrytitle>systemd.special</refentrytitle><manvolnum>7</manvolnum></citerefentry> for a
     list of units that form the basis of the unit hierarchies of system and user units.</para>
 
-    <para><filename>user@<replaceable>UID</replaceable>.service</filename> is accompanied by the
-    system unit <filename>user-runtime-dir@<replaceable>UID</replaceable>.service</filename>, which
-    creates the user's runtime directory
-    <filename>/run/user/<replaceable>UID</replaceable></filename>, and then removes it when this
-    unit is stopped. <filename>user-runtime-dir@<replaceable>UID</replaceable>.service</filename>
-    executes the <filename>systemd-user-runtime-dir</filename> binary to do the actual work.</para>
+    <para><filename>user@<replaceable>UID</replaceable>.service</filename> is accompanied by the system unit
+    <filename>user-runtime-dir@<replaceable>UID</replaceable>.service</filename>, which creates the user's
+    runtime directory <filename>/run/user/<replaceable>UID</replaceable></filename> when started, and removes
+    it when it is stopped. It also might apply runtime quota settings on <filename>/tmp/</filename> and/or
+    <filename>/dev/shm/</filename> for the
+    user. <filename>user-runtime-dir@<replaceable>UID</replaceable>.service</filename> executes the
+    <filename>systemd-user-runtime-dir</filename> binary to do the actual work.</para>
 
     <para>User processes may be started by the <filename>user@.service</filename> instance, in which
     case they will be part of that unit in the system hierarchy. They may also be started elsewhere,
index f39c1ad225edddd00f444cf87ed6234b7057a508..6c2fef95dbd416d9b98c862b5411e3a2c8a1f0ca 100644 (file)
@@ -8,15 +8,20 @@
 #include "bus-error.h"
 #include "bus-locator.h"
 #include "dev-setup.h"
+#include "devnum-util.h"
+#include "fd-util.h"
 #include "format-util.h"
 #include "fs-util.h"
 #include "label-util.h"
 #include "limits-util.h"
 #include "main-func.h"
+#include "missing_magic.h"
+#include "missing_syscall.h"
 #include "mkdir-label.h"
 #include "mount-util.h"
 #include "mountpoint-util.h"
 #include "path-util.h"
+#include "quota-util.h"
 #include "rm-rf.h"
 #include "selinux-util.h"
 #include "smack-util.h"
@@ -24,6 +29,7 @@
 #include "string-util.h"
 #include "strv.h"
 #include "user-util.h"
+#include "userdb.h"
 
 static int acquire_runtime_dir_properties(uint64_t *ret_size, uint64_t *ret_inodes) {
         _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
@@ -126,6 +132,26 @@ static int user_mkdir_runtime_path(
         return 0;
 }
 
+static int do_mount(UserRecord *ur) {
+        int r;
+
+        assert(ur);
+
+        if (!uid_is_valid(ur->uid) || !gid_is_valid(ur->gid))
+                return log_error_errno(SYNTHETIC_ERRNO(ENOMSG), "User '%s' lacks UID or GID, refusing.", ur->user_name);
+
+        uint64_t runtime_dir_size, runtime_dir_inodes;
+        r = acquire_runtime_dir_properties(&runtime_dir_size, &runtime_dir_inodes);
+        if (r < 0)
+                return r;
+
+        char runtime_path[STRLEN("/run/user/") + DECIMAL_STR_MAX(uid_t)];
+        xsprintf(runtime_path, "/run/user/" UID_FMT, ur->uid);
+
+        log_debug("Will mount %s owned by "UID_FMT":"GID_FMT, runtime_path, ur->uid, ur->gid);
+        return user_mkdir_runtime_path(runtime_path, ur->uid, ur->gid, runtime_dir_size, runtime_dir_inodes);
+}
+
 static int user_remove_runtime_path(const char *runtime_path) {
         int r;
 
@@ -149,31 +175,6 @@ static int user_remove_runtime_path(const char *runtime_path) {
         return 0;
 }
 
-static int do_mount(const char *user) {
-        char runtime_path[STRLEN("/run/user/") + DECIMAL_STR_MAX(uid_t)];
-        uint64_t runtime_dir_size, runtime_dir_inodes;
-        uid_t uid;
-        gid_t gid;
-        int r;
-
-        r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0);
-        if (r < 0)
-                return log_error_errno(r,
-                                       r == -ESRCH ? "No such user \"%s\"" :
-                                       r == -ENOMSG ? "UID \"%s\" is invalid or has an invalid main group"
-                                                    : "Failed to look up user \"%s\": %m",
-                                       user);
-
-        r = acquire_runtime_dir_properties(&runtime_dir_size, &runtime_dir_inodes);
-        if (r < 0)
-                return r;
-
-        xsprintf(runtime_path, "/run/user/" UID_FMT, uid);
-
-        log_debug("Will mount %s owned by "UID_FMT":"GID_FMT, runtime_path, uid, gid);
-        return user_mkdir_runtime_path(runtime_path, uid, gid, runtime_dir_size, runtime_dir_inodes);
-}
-
 static int do_umount(const char *user) {
         char runtime_path[STRLEN("/run/user/") + DECIMAL_STR_MAX(uid_t)];
         uid_t uid;
@@ -197,6 +198,126 @@ static int do_umount(const char *user) {
         return user_remove_runtime_path(runtime_path);
 }
 
+static int apply_tmpfs_quota(
+                char **paths,
+                uid_t uid,
+                uint64_t limit,
+                uint32_t scale) {
+
+        _cleanup_set_free_ Set *processed = NULL;
+        int r;
+
+        assert(uid_is_valid(uid));
+
+        STRV_FOREACH(p, paths) {
+                _cleanup_close_ int fd = open(*p, O_DIRECTORY|O_CLOEXEC);
+                if (fd < 0) {
+                        log_warning_errno(errno, "Failed to open '%s' in order to set quota, ignoring: %m", *p);
+                        continue;
+                }
+
+                struct stat st;
+                if (fstat(fd, &st) < 0) {
+                        log_warning_errno(errno, "Failed to stat '%s' in order to set quota, ignoring: %m", *p);
+                        continue;
+                }
+
+                /* Cover for bind mounted or symlinked /var/tmp/ + /tmp/ */
+                if (set_contains(processed, DEVNUM_TO_PTR(st.st_dev))) {
+                        log_debug("Not setting quota on '%s', since already processed.", *p);
+                        continue;
+                }
+
+                /* Remember we already dealt with this fs, even if the subsequent operation fails, since
+                 * there's no point in appyling quota twice, regardless if it succeeds or not. */
+                if (set_ensure_put(&processed, /* hash_ops= */ NULL, DEVNUM_TO_PTR(st.st_dev)) < 0)
+                        return log_oom();
+
+                struct statfs sfs;
+                if (fstatfs(fd, &sfs) < 0) {
+                        log_warning_errno(errno, "Failed to statfs '%s' in order to set quota, ignoring: %m", *p);
+                        continue;
+                }
+
+                if (!is_fs_type(&sfs, TMPFS_MAGIC)) {
+                        log_debug("Not setting quota on '%s', since not tmpfs.", *p);
+                        continue;
+                }
+
+                struct dqblk req;
+                r = RET_NERRNO(quotactl_fd(fd, QCMD_FIXED(Q_GETQUOTA, USRQUOTA), uid, &req));
+                if (r == -ESRCH)
+                        zero(req);
+                else if (ERRNO_IS_NEG_NOT_SUPPORTED(r)) {
+                        log_debug_errno(r, "No UID quota support on %s, not setting quota: %m", *p);
+                        continue;
+                } else if (ERRNO_IS_NEG_PRIVILEGE(r)) {
+                        log_debug_errno(r, "Lacking privileges to query UID quota on %s, not setting quota: %m", *p);
+                        continue;
+                } else if (r < 0) {
+                        log_warning_errno(r, "Failed to query disk quota on %s for UID " UID_FMT ", ignoring: %m", *p, uid);
+                        continue;
+                }
+
+                uint64_t v =
+                        (scale == 0) ? 0 :
+                        (scale == UINT32_MAX) ? UINT64_MAX :
+                        (uint64_t) ((double) (sfs.f_blocks * sfs.f_frsize) / scale * UINT32_MAX);
+
+                v = MIN(v, limit);
+                v /= QIF_DQBLKSIZE;
+
+                if (FLAGS_SET(req.dqb_valid, QIF_BLIMITS) && v == req.dqb_bhardlimit) {
+                        /* Shortcut things if everything is set up properly already */
+                        log_debug("Configured quota on '%s' already matches the intended setting, not updating quota.", *p);
+                        continue;
+                }
+
+                req.dqb_valid = QIF_BLIMITS;
+                req.dqb_bsoftlimit = req.dqb_bhardlimit = v;
+
+                r = RET_NERRNO(quotactl_fd(fd, QCMD_FIXED(Q_SETQUOTA, USRQUOTA), uid, &req));
+                if (r == -ESRCH) {
+                        log_debug_errno(r, "Not setting UID quota on %s since UID quota is not supported: %m", *p);
+                        continue;
+                } else if (ERRNO_IS_NEG_PRIVILEGE(r)) {
+                        log_debug_errno(r, "Lacking privileges to set UID quota on %s, skipping: %m", *p);
+                        continue;
+                } else if (r < 0) {
+                        log_warning_errno(r, "Failed to set disk quota on %s for UID " UID_FMT ", ignoring: %m", *p, uid);
+                        continue;
+                }
+
+                log_info("Successfully configured disk quota for UID " UID_FMT " on %s to %s", uid, *p, FORMAT_BYTES(v * QIF_DQBLKSIZE));
+        }
+
+        return 0;
+}
+
+static int do_tmpfs_quota(UserRecord *ur) {
+        int r;
+
+        assert(ur);
+
+        if (user_record_is_root(ur)) {
+                log_debug("Not applying tmpfs quota to root user.");
+                return 0;
+        }
+
+        if (!uid_is_valid(ur->uid))
+                return log_error_errno(SYNTHETIC_ERRNO(ENOMSG), "User '%s' lacks UID, refusing.", ur->user_name);
+
+        r = apply_tmpfs_quota(STRV_MAKE("/tmp", "/var/tmp"), ur->uid, ur->tmp_limit.limit, user_record_tmp_limit_scale(ur));
+        if (r < 0)
+                return r;
+
+        r = apply_tmpfs_quota(STRV_MAKE("/dev/shm"), ur->uid, ur->dev_shm_limit.limit, user_record_dev_shm_limit_scale(ur));
+        if (r < 0)
+                return r;
+
+        return 0;
+}
+
 static int run(int argc, char *argv[]) {
         int r;
 
@@ -218,10 +339,26 @@ static int run(int argc, char *argv[]) {
         if (r < 0)
                 return r;
 
-        if (streq(verb, "start"))
-                return do_mount(user);
+        if (streq(verb, "start")) {
+                _cleanup_(user_record_unrefp) UserRecord *ur = NULL;
+                r = userdb_by_name(user, USERDB_PARSE_NUMERIC|USERDB_SUPPRESS_SHADOW, &ur);
+                if (r == -ESRCH)
+                        return log_error_errno(r, "User '%s' does not exist: %m", user);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to resolve user '%s': %m", user);
+
+                /* We do two things here: mount the per-user XDG_RUNTIME_DIR, and set up tmpfs quota on /tmp/
+                 * and /dev/shm/. */
+
+                r = 0;
+                RET_GATHER(r, do_mount(ur));
+                RET_GATHER(r, do_tmpfs_quota(ur));
+                return r;
+        }
+
         if (streq(verb, "stop"))
                 return do_umount(user);
+
         assert_not_reached();
 }