From: Lennart Poettering Date: Fri, 10 Jan 2025 10:34:18 +0000 (+0100) Subject: user-runtime-dir: enforce /tmp/ and /dev/shm/ quota X-Git-Tag: v258-rc1~1499^2~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b1c95fb2e9d11fc190017dec3d64f468f9d378bc;p=thirdparty%2Fsystemd.git user-runtime-dir: enforce /tmp/ and /dev/shm/ quota Enforce the quota on these two tmpfs at the same place where we mount the per-user $XDG_RUNTIME_DIR. Conceptually these are very similar concepts, and it makes sure to enforce the limits at the same place with the same lifecycle. --- diff --git a/README b/README index 1e55da23f66..975f5e5a5e5 100644 --- a/README +++ b/README @@ -61,9 +61,11 @@ REQUIREMENTS: ≥ 5.9 for close_range() ≥ 5.12 for idmapped mount ≥ 5.14 for cgroup.kill + ≥ 5.14 for quotactl_fd() ≥ 6.3 for MFD_EXEC/MFD_NOEXEC_SEAL and tmpfs noswap option ≥ 6.5 for name_to_handle_at() AT_HANDLE_FID, SO_PEERPIDFD/SO_PASSPIDFD, and MOVE_MOUNT_BENEATH + ≥ 6.6 for quota support on tmpfs ≥ 6.9 for pidfs ✅ systemd utilizes several new kernel APIs, but will fall back gracefully diff --git a/man/user@.service.xml b/man/user@.service.xml index cc078d2d3c1..a046a759d5b 100644 --- a/man/user@.service.xml +++ b/man/user@.service.xml @@ -42,12 +42,13 @@ systemd.special7 for a list of units that form the basis of the unit hierarchies of system and user units. - user@UID.service is accompanied by the - system unit user-runtime-dir@UID.service, which - creates the user's runtime directory - /run/user/UID, and then removes it when this - unit is stopped. user-runtime-dir@UID.service - executes the systemd-user-runtime-dir binary to do the actual work. + user@UID.service is accompanied by the system unit + user-runtime-dir@UID.service, which creates the user's + runtime directory /run/user/UID when started, and removes + it when it is stopped. It also might apply runtime quota settings on /tmp/ and/or + /dev/shm/ for the + user. user-runtime-dir@UID.service executes the + systemd-user-runtime-dir binary to do the actual work. User processes may be started by the user@.service instance, in which case they will be part of that unit in the system hierarchy. They may also be started elsewhere, diff --git a/src/login/user-runtime-dir.c b/src/login/user-runtime-dir.c index f39c1ad225e..6c2fef95dbd 100644 --- a/src/login/user-runtime-dir.c +++ b/src/login/user-runtime-dir.c @@ -8,15 +8,20 @@ #include "bus-error.h" #include "bus-locator.h" #include "dev-setup.h" +#include "devnum-util.h" +#include "fd-util.h" #include "format-util.h" #include "fs-util.h" #include "label-util.h" #include "limits-util.h" #include "main-func.h" +#include "missing_magic.h" +#include "missing_syscall.h" #include "mkdir-label.h" #include "mount-util.h" #include "mountpoint-util.h" #include "path-util.h" +#include "quota-util.h" #include "rm-rf.h" #include "selinux-util.h" #include "smack-util.h" @@ -24,6 +29,7 @@ #include "string-util.h" #include "strv.h" #include "user-util.h" +#include "userdb.h" static int acquire_runtime_dir_properties(uint64_t *ret_size, uint64_t *ret_inodes) { _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -126,6 +132,26 @@ static int user_mkdir_runtime_path( return 0; } +static int do_mount(UserRecord *ur) { + int r; + + assert(ur); + + if (!uid_is_valid(ur->uid) || !gid_is_valid(ur->gid)) + return log_error_errno(SYNTHETIC_ERRNO(ENOMSG), "User '%s' lacks UID or GID, refusing.", ur->user_name); + + uint64_t runtime_dir_size, runtime_dir_inodes; + r = acquire_runtime_dir_properties(&runtime_dir_size, &runtime_dir_inodes); + if (r < 0) + return r; + + char runtime_path[STRLEN("/run/user/") + DECIMAL_STR_MAX(uid_t)]; + xsprintf(runtime_path, "/run/user/" UID_FMT, ur->uid); + + log_debug("Will mount %s owned by "UID_FMT":"GID_FMT, runtime_path, ur->uid, ur->gid); + return user_mkdir_runtime_path(runtime_path, ur->uid, ur->gid, runtime_dir_size, runtime_dir_inodes); +} + static int user_remove_runtime_path(const char *runtime_path) { int r; @@ -149,31 +175,6 @@ static int user_remove_runtime_path(const char *runtime_path) { return 0; } -static int do_mount(const char *user) { - char runtime_path[STRLEN("/run/user/") + DECIMAL_STR_MAX(uid_t)]; - uint64_t runtime_dir_size, runtime_dir_inodes; - uid_t uid; - gid_t gid; - int r; - - r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0); - if (r < 0) - return log_error_errno(r, - r == -ESRCH ? "No such user \"%s\"" : - r == -ENOMSG ? "UID \"%s\" is invalid or has an invalid main group" - : "Failed to look up user \"%s\": %m", - user); - - r = acquire_runtime_dir_properties(&runtime_dir_size, &runtime_dir_inodes); - if (r < 0) - return r; - - xsprintf(runtime_path, "/run/user/" UID_FMT, uid); - - log_debug("Will mount %s owned by "UID_FMT":"GID_FMT, runtime_path, uid, gid); - return user_mkdir_runtime_path(runtime_path, uid, gid, runtime_dir_size, runtime_dir_inodes); -} - static int do_umount(const char *user) { char runtime_path[STRLEN("/run/user/") + DECIMAL_STR_MAX(uid_t)]; uid_t uid; @@ -197,6 +198,126 @@ static int do_umount(const char *user) { return user_remove_runtime_path(runtime_path); } +static int apply_tmpfs_quota( + char **paths, + uid_t uid, + uint64_t limit, + uint32_t scale) { + + _cleanup_set_free_ Set *processed = NULL; + int r; + + assert(uid_is_valid(uid)); + + STRV_FOREACH(p, paths) { + _cleanup_close_ int fd = open(*p, O_DIRECTORY|O_CLOEXEC); + if (fd < 0) { + log_warning_errno(errno, "Failed to open '%s' in order to set quota, ignoring: %m", *p); + continue; + } + + struct stat st; + if (fstat(fd, &st) < 0) { + log_warning_errno(errno, "Failed to stat '%s' in order to set quota, ignoring: %m", *p); + continue; + } + + /* Cover for bind mounted or symlinked /var/tmp/ + /tmp/ */ + if (set_contains(processed, DEVNUM_TO_PTR(st.st_dev))) { + log_debug("Not setting quota on '%s', since already processed.", *p); + continue; + } + + /* Remember we already dealt with this fs, even if the subsequent operation fails, since + * there's no point in appyling quota twice, regardless if it succeeds or not. */ + if (set_ensure_put(&processed, /* hash_ops= */ NULL, DEVNUM_TO_PTR(st.st_dev)) < 0) + return log_oom(); + + struct statfs sfs; + if (fstatfs(fd, &sfs) < 0) { + log_warning_errno(errno, "Failed to statfs '%s' in order to set quota, ignoring: %m", *p); + continue; + } + + if (!is_fs_type(&sfs, TMPFS_MAGIC)) { + log_debug("Not setting quota on '%s', since not tmpfs.", *p); + continue; + } + + struct dqblk req; + r = RET_NERRNO(quotactl_fd(fd, QCMD_FIXED(Q_GETQUOTA, USRQUOTA), uid, &req)); + if (r == -ESRCH) + zero(req); + else if (ERRNO_IS_NEG_NOT_SUPPORTED(r)) { + log_debug_errno(r, "No UID quota support on %s, not setting quota: %m", *p); + continue; + } else if (ERRNO_IS_NEG_PRIVILEGE(r)) { + log_debug_errno(r, "Lacking privileges to query UID quota on %s, not setting quota: %m", *p); + continue; + } else if (r < 0) { + log_warning_errno(r, "Failed to query disk quota on %s for UID " UID_FMT ", ignoring: %m", *p, uid); + continue; + } + + uint64_t v = + (scale == 0) ? 0 : + (scale == UINT32_MAX) ? UINT64_MAX : + (uint64_t) ((double) (sfs.f_blocks * sfs.f_frsize) / scale * UINT32_MAX); + + v = MIN(v, limit); + v /= QIF_DQBLKSIZE; + + if (FLAGS_SET(req.dqb_valid, QIF_BLIMITS) && v == req.dqb_bhardlimit) { + /* Shortcut things if everything is set up properly already */ + log_debug("Configured quota on '%s' already matches the intended setting, not updating quota.", *p); + continue; + } + + req.dqb_valid = QIF_BLIMITS; + req.dqb_bsoftlimit = req.dqb_bhardlimit = v; + + r = RET_NERRNO(quotactl_fd(fd, QCMD_FIXED(Q_SETQUOTA, USRQUOTA), uid, &req)); + if (r == -ESRCH) { + log_debug_errno(r, "Not setting UID quota on %s since UID quota is not supported: %m", *p); + continue; + } else if (ERRNO_IS_NEG_PRIVILEGE(r)) { + log_debug_errno(r, "Lacking privileges to set UID quota on %s, skipping: %m", *p); + continue; + } else if (r < 0) { + log_warning_errno(r, "Failed to set disk quota on %s for UID " UID_FMT ", ignoring: %m", *p, uid); + continue; + } + + log_info("Successfully configured disk quota for UID " UID_FMT " on %s to %s", uid, *p, FORMAT_BYTES(v * QIF_DQBLKSIZE)); + } + + return 0; +} + +static int do_tmpfs_quota(UserRecord *ur) { + int r; + + assert(ur); + + if (user_record_is_root(ur)) { + log_debug("Not applying tmpfs quota to root user."); + return 0; + } + + if (!uid_is_valid(ur->uid)) + return log_error_errno(SYNTHETIC_ERRNO(ENOMSG), "User '%s' lacks UID, refusing.", ur->user_name); + + r = apply_tmpfs_quota(STRV_MAKE("/tmp", "/var/tmp"), ur->uid, ur->tmp_limit.limit, user_record_tmp_limit_scale(ur)); + if (r < 0) + return r; + + r = apply_tmpfs_quota(STRV_MAKE("/dev/shm"), ur->uid, ur->dev_shm_limit.limit, user_record_dev_shm_limit_scale(ur)); + if (r < 0) + return r; + + return 0; +} + static int run(int argc, char *argv[]) { int r; @@ -218,10 +339,26 @@ static int run(int argc, char *argv[]) { if (r < 0) return r; - if (streq(verb, "start")) - return do_mount(user); + if (streq(verb, "start")) { + _cleanup_(user_record_unrefp) UserRecord *ur = NULL; + r = userdb_by_name(user, USERDB_PARSE_NUMERIC|USERDB_SUPPRESS_SHADOW, &ur); + if (r == -ESRCH) + return log_error_errno(r, "User '%s' does not exist: %m", user); + if (r < 0) + return log_error_errno(r, "Failed to resolve user '%s': %m", user); + + /* We do two things here: mount the per-user XDG_RUNTIME_DIR, and set up tmpfs quota on /tmp/ + * and /dev/shm/. */ + + r = 0; + RET_GATHER(r, do_mount(ur)); + RET_GATHER(r, do_tmpfs_quota(ur)); + return r; + } + if (streq(verb, "stop")) return do_umount(user); + assert_not_reached(); }