From: Andres Beltran Date: Tue, 1 Jul 2025 17:37:48 +0000 (+0000) Subject: core: add quota support for State, Cache, and Log exec directories X-Git-Tag: v258-rc1~144^2~3 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=26c6f3271a6c8e904379e69926ee9e5285589429;p=thirdparty%2Fsystemd.git core: add quota support for State, Cache, and Log exec directories --- diff --git a/docs/DISK-QUOTAS-PROJECTIDS.md b/docs/DISK-QUOTAS-PROJECTIDS.md new file mode 100644 index 00000000000..11dc42bda23 --- /dev/null +++ b/docs/DISK-QUOTAS-PROJECTIDS.md @@ -0,0 +1,14 @@ +--- +title: Project IDs for Disk Quotas on Exec Directories +category: Exec directories +layout: default +SPDX-License-Identifier: LGPL-2.1-or-later +--- + +# Project IDs on systemd Systems + +Project IDs are needed to enforce disk quotas for Exec Directories. +Project IDs are unsigned, 32-bit integers. For disk quota enforcement, +the range used is 2147483648 - 4294967294, which is the highest range +inspired from `UIDS-GUID.md`. The range is defined through `PROJ_ID_MIN` +and `PROJ_ID_MAX` in `exec-invoke.c`. diff --git a/docs/TRANSIENT-SETTINGS.md b/docs/TRANSIENT-SETTINGS.md index d5c9a33df82..652ac3d95e6 100644 --- a/docs/TRANSIENT-SETTINGS.md +++ b/docs/TRANSIENT-SETTINGS.md @@ -257,10 +257,16 @@ All execution-related settings are available for transient units. ✓ RuntimeDirectoryMode= ✓ RuntimeDirectory= ✓ StateDirectoryMode= +✓ StateDirectoryAccounting= +✓ StateDirectoryQuota= ✓ StateDirectory= ✓ CacheDirectoryMode= +✓ CacheDirectoryAccounting= +✓ CacheDirectoryQuota= ✓ CacheDirectory= ✓ LogsDirectoryMode= +✓ LogsDirectoryAccounting= +✓ LogsDirectoryQuota= ✓ LogsDirectory= ✓ ConfigurationDirectoryMode= ✓ ConfigurationDirectory= diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index f5066e4834f..27c3929a9cc 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -3288,18 +3288,30 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly u StateDirectoryMode = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b StateDirectoryAccounting = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly (tus) StateDirectoryQuota = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly as StateDirectory = ['...', ...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly a(sst) CacheDirectorySymlink = [...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly u CacheDirectoryMode = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b CacheDirectoryAccounting = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly (tus) CacheDirectoryQuota = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly as CacheDirectory = ['...', ...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly a(sst) LogsDirectorySymlink = [...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly u LogsDirectoryMode = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b LogsDirectoryAccounting = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly (tus) LogsDirectoryQuota = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly as LogsDirectory = ['...', ...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly u ConfigurationDirectoryMode = ...; @@ -3351,6 +3363,12 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { readonly s MountImagePolicy = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s ExtensionImagePolicy = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly (tt) StateDirectoryQuotaUsage = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly (tt) CacheDirectoryQuotaUsage = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly (tt) LogsDirectoryQuotaUsage = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s KillMode = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") @@ -3885,10 +3903,22 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + + + + + + + + + + + @@ -3935,6 +3965,12 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + + + + + @@ -4579,18 +4615,30 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + + + + + + + + + + + @@ -4643,6 +4691,12 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + + + + + @@ -5443,18 +5497,30 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly u StateDirectoryMode = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b StateDirectoryAccounting = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly (tus) StateDirectoryQuota = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly as StateDirectory = ['...', ...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly a(sst) CacheDirectorySymlink = [...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly u CacheDirectoryMode = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b CacheDirectoryAccounting = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly (tus) CacheDirectoryQuota = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly as CacheDirectory = ['...', ...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly a(sst) LogsDirectorySymlink = [...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly u LogsDirectoryMode = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b LogsDirectoryAccounting = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly (tus) LogsDirectoryQuota = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly as LogsDirectory = ['...', ...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly u ConfigurationDirectoryMode = ...; @@ -5506,6 +5572,12 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { readonly s MountImagePolicy = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s ExtensionImagePolicy = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly (tt) StateDirectoryQuotaUsage = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly (tt) CacheDirectoryQuotaUsage = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly (tt) LogsDirectoryQuotaUsage = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s KillMode = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") @@ -6060,10 +6132,22 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + + + + + + + + + + + @@ -6110,6 +6194,12 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + + + + + @@ -6734,18 +6824,30 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + + + + + + + + + + + @@ -6798,6 +6900,12 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + + + + + @@ -7422,18 +7530,30 @@ node /org/freedesktop/systemd1/unit/home_2emount { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly u StateDirectoryMode = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b StateDirectoryAccounting = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly (tus) StateDirectoryQuota = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly as StateDirectory = ['...', ...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly a(sst) CacheDirectorySymlink = [...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly u CacheDirectoryMode = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b CacheDirectoryAccounting = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly (tus) CacheDirectoryQuota = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly as CacheDirectory = ['...', ...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly a(sst) LogsDirectorySymlink = [...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly u LogsDirectoryMode = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b LogsDirectoryAccounting = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly (tus) LogsDirectoryQuota = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly as LogsDirectory = ['...', ...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly u ConfigurationDirectoryMode = ...; @@ -7485,6 +7605,12 @@ node /org/freedesktop/systemd1/unit/home_2emount { readonly s MountImagePolicy = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s ExtensionImagePolicy = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly (tt) StateDirectoryQuotaUsage = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly (tt) CacheDirectoryQuotaUsage = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly (tt) LogsDirectoryQuotaUsage = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s KillMode = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") @@ -7961,10 +8087,22 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + + + + + + + + + + + @@ -8011,6 +8149,12 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + + + + + @@ -8543,18 +8687,30 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + + + + + + + + + + + @@ -8607,6 +8763,12 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + + + + + @@ -9364,18 +9526,30 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly u StateDirectoryMode = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b StateDirectoryAccounting = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly (tus) StateDirectoryQuota = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly as StateDirectory = ['...', ...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly a(sst) CacheDirectorySymlink = [...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly u CacheDirectoryMode = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b CacheDirectoryAccounting = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly (tus) CacheDirectoryQuota = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly as CacheDirectory = ['...', ...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly a(sst) LogsDirectorySymlink = [...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly u LogsDirectoryMode = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b LogsDirectoryAccounting = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly (tus) LogsDirectoryQuota = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly as LogsDirectory = ['...', ...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly u ConfigurationDirectoryMode = ...; @@ -9427,6 +9601,12 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { readonly s MountImagePolicy = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s ExtensionImagePolicy = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly (tt) StateDirectoryQuotaUsage = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly (tt) CacheDirectoryQuotaUsage = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly (tt) LogsDirectoryQuotaUsage = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s KillMode = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") @@ -9885,10 +10065,22 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + + + + + + + + + + + @@ -9935,6 +10127,12 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + + + + + @@ -10449,18 +10647,30 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + + + + + + + + + + + @@ -10513,6 +10723,12 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + + + + + @@ -12076,8 +12292,17 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ PrivateUsersEx, and PrivatePIDs were added in version 257. ProtectHostnameEx, - DelegateNamespaces, and - RemoveSubGroup() were added in version 258. + DelegateNamespaces, + RemoveSubGroup(), + StateDirectoryQuota, + StateDirectoryQuotaUsage, + StateDirectoryAccounting, + CacheDirectoryQuota, + CacheDirectoryQuotaUsage, + CacheDirectoryAccounting, + LogsDirectoryQuota, + LogsDirectoryQuotaUsage, and + LogsDirectoryAccounting, were added in version 258. Socket Unit Objects @@ -12126,8 +12351,17 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ AcceptFileDescriptors, DelegateNamespaces, RemoveSubgroup(), - DeferTrigger, and - DeferTriggerMaxUSec were added in version 258. + DeferTrigger, + DeferTriggerMaxUSec, + StateDirectoryQuota, + StateDirectoryQuotaUsage, + StateDirectoryAccounting, + CacheDirectoryQuota, + CacheDirectoryQuotaUsage, + CacheDirectoryAccounting, + LogsDirectoryQuota, + LogsDirectoryQuotaUsage, and + LogsDirectoryAccounting, were added in version 258. Mount Unit Objects @@ -12171,8 +12405,17 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ ProtectHostnameEx, DelegateNamespaces, RemoveSubgroup(), - ReloadResult, and - CleanResult were added in version 258. + ReloadResult, + CleanResult, + StateDirectoryQuota, + StateDirectoryQuotaUsage, + StateDirectoryAccounting, + CacheDirectoryQuota, + CacheDirectoryQuotaUsage, + CacheDirectoryAccounting, + LogsDirectoryQuota, + LogsDirectoryQuotaUsage, and + LogsDirectoryAccounting, were added in version 258. Swap Unit Objects @@ -12214,8 +12457,17 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ ProtectControlGroupsEx, and PrivatePIDs were added in version 257. ProtectHostnameEx, - DelegateNamespaces, and - RemoveSubgroup() were added in version 258. + DelegateNamespaces, + RemoveSubgroup(), + StateDirectoryQuota, + StateDirectoryQuotaUsage, + StateDirectoryAccounting, + CacheDirectoryQuota, + CacheDirectoryQuotaUsage, + CacheDirectoryAccounting, + LogsDirectoryQuota, + LogsDirectoryQuotaUsage, and + LogsDirectoryAccounting, were added in version 258. Slice Unit Objects diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index f9e790e4923..813ea023138 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -1680,6 +1680,46 @@ StateDirectory=aaa/bbb ccc + + StateDirectoryQuota= + CacheDirectoryQuota= + LogsDirectoryQuota= + + Specifies the storage limits for the directories specified in StateDirectory=, + CacheDirectory=, or LogsDirectory= respectively. + + The storage quota is defined in terms of disk blocks and inodes, as per + quotactl. Takes an absolute size limit + in bytes. If the value is suffixed with K, M, G or T, the specified size is parsed as Kilobytes, Megabytes, Gigabytes, + or Terabytes (with the base 1024), respectively. If an absolute size limit is specified, only the block quota is set + (rounded up to the nearest block). Alternatively, a percentage value may be specified, which applies the same percent + quota to both blocks and inodes. Defaults to off, in which case no storage limits will be set. + + Only hard limits are set, not soft limits. If the underlying filesystem for the specified directories does not + support project quotas, the specified storage limits will not be set. In addition to enabling per-unit quotas with + these settings, it is necessary to enable prjquota on the file system level as well + (i.e. tune2fs -Q prjquota). Quotas must also be turned on with + quotaon. + + + + + + StateDirectoryAccounting= + CacheDirectoryAccounting= + LogsDirectoryAccounting= + + Takes a boolean argument. If true, a project ID is assigned to the directories specified in + StateDirectory=, CacheDirectory=, or LogsDirectory= + respectively, which is used for tracking disk usage when disk quotas are turned on + (see repquota). Defaults to false. + + To set and enforce disk quotas, StateDirectoryQuota=, CacheDirectoryQuota=, + or LogsDirectoryQuota= must be specified. + + + + RuntimeDirectoryPreserve= diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c index deb88e7f6b6..0f428685466 100644 --- a/src/core/exec-invoke.c +++ b/src/core/exec-invoke.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #if HAVE_PAM @@ -30,6 +31,7 @@ #include "cgroup-setup.h" #include "cgroup.h" #include "chase.h" +#include "chattr-util.h" #include "chown-recursive.h" #include "constants.h" #include "copy.h" @@ -60,15 +62,19 @@ #include "open-file.h" #include "osc-context.h" #include "path-util.h" +#include "percent-util.h" #include "pidref.h" #include "proc-cmdline.h" #include "process-util.h" #include "psi-util.h" +#include "quota-util.h" +#include "random-util.h" #include "rlimit-util.h" #include "seccomp-util.h" #include "selinux-util.h" #include "set.h" #include "signal-util.h" +#include "siphash24.h" #include "smack-util.h" #include "socket-util.h" #include "stat-util.h" @@ -84,6 +90,11 @@ #define SNDBUF_SIZE (8*1024*1024) +/* Project id range for disk quotas */ +#define PROJ_ID_MIN UINT32_C(2147483648) +#define PROJ_ID_MAX UINT32_C(4294967294) +#define PROJ_ID_CLAMP_INTO_QUOTA_RANGE(id) ((uint32_t) ((id) % (PROJ_ID_MAX - PROJ_ID_MIN + 1)) + PROJ_ID_MIN) + static int flag_fds( const int fds[], size_t n_socket_fds, @@ -2575,6 +2586,217 @@ static int create_many_symlinks(const char *root, const char *source, char **sym return 0; } +static int set_exec_storage_quota(int fd, uint32_t proj_id, const QuotaLimit *ql) { + int r; + uint64_t block_limit = 0, inode_limit = 0; + + assert(fd >= 0); + assert(ql); + + if (ql->quota_absolute == 0 || ql->quota_scale == 0) + /* Limit of 0 means no usage is allowed. For quotactl, use 1 as the limit, since 0 means that + * hard limits are disabled */ + block_limit = inode_limit = 1; + else if (ql->quota_absolute == UINT64_MAX) { + _cleanup_close_ int fd_parent = -EBADF; + + /* Use target_dir's parent when setting quotas. If a FD for target_dir has been previously + * used for quotactl_fd(SET) and is passed again for fstatvfs(), the total number of blocks is not + * reported accurately (instead, the block limit is reported as total blocks). Thus, use the FD + * associated with the parent, so that total blocks is accurate */ + fd_parent = openat(fd, "..", O_PATH|O_CLOEXEC|O_DIRECTORY); + if (fd_parent < 0) + return -errno; + + uint32_t xattr_flags = 0; + r = read_fs_xattr_fd(fd_parent, &xattr_flags, /* ret_projid = */ NULL); + if (r < 0) + return r; + /* Refuse if parent has FS_XFLAG_PROJINHERIT since this will mean the total number of blocks will not + * be reported accurately */ + if (FLAGS_SET(xattr_flags, FS_XFLAG_PROJINHERIT)) + return -ENOMEDIUM; + + struct statvfs disk_st; + if (fstatvfs(fd_parent, &disk_st) < 0) + return -errno; + + block_limit = (uint64_t) DIV_ROUND_UP((uint64_t)((double) (disk_st.f_frsize * disk_st.f_blocks) / UINT32_MAX * ql->quota_scale), QIF_DQBLKSIZE); + inode_limit = (uint64_t) ((double) disk_st.f_files / UINT32_MAX * ql->quota_scale); + } else + block_limit = (uint64_t) DIV_ROUND_UP(ql->quota_absolute, QIF_DQBLKSIZE); + + struct dqblk req = { + .dqb_bhardlimit = block_limit, + .dqb_ihardlimit = inode_limit, + .dqb_valid = QIF_LIMITS, + }; + + r = quotactl_fd_with_fallback(fd, QCMD_FIXED(Q_SETQUOTA, PRJQUOTA), proj_id, &req); + if (r < 0) + return r; + + log_debug("Storage quotas set for project id %" PRIu32 ". Block limit = %" PRIu64 ", inode limit = %" PRIu64, proj_id, block_limit, inode_limit); + + return 0; +} + +static int unset_exec_storage_quota(int fd, uint32_t proj_id, bool quota_accounting) { + int r, quota_supported; + struct dqblk req; + + assert(fd >= 0); + + quota_supported = quota_query_proj_id(fd, proj_id, &req); + if (quota_supported < 0) + return log_debug_errno(quota_supported, "Failed to query disk quota for project ID %" PRIu32 ": %m", proj_id); + + /* Do not enforce quotas anymore */ + if (quota_supported && FLAGS_SET(req.dqb_valid, QIF_BLIMITS) && (req.dqb_bhardlimit > 0 || req.dqb_ihardlimit > 0)) { + req.dqb_bhardlimit = 0, req.dqb_ihardlimit = 0; + + r = quotactl_fd_with_fallback(fd, QCMD_FIXED(Q_SETQUOTA, PRJQUOTA), proj_id, &req); + if (r < 0) + return log_debug_errno(r, "Failed to disable project quotas for project ID %" PRIu32 ": %m", proj_id); + + log_debug("Storage quotas for project ID %" PRIu32 " were disabled", proj_id); + } + + /* Release project ID if no accounting needed */ + if (!quota_accounting) { + r = set_proj_id_recursive(fd, 0); + if (r < 0) + log_warning_errno(r, "Failed to release project ID %" PRIu32 ", ignoring: %m", proj_id); + } + + return 0; +} + +static int apply_exec_quotas( + const char *target_dir, + const char *cgroup_path, + ExecDirectoryType type, + const QuotaLimit *ql, + uint32_t *exec_dt_proj_id, /* in/out */ + bool *already_enforced) { /* in/out */ + + _cleanup_close_ int fd = -EBADF; + int r, quota_supported = 0; + + assert(target_dir); + assert(cgroup_path); + assert(ql); + assert(exec_dt_proj_id); + assert(already_enforced); + + /* Do not apply to the Runtime directory since tmpfs does not support project IDs yet */ + if (!IN_SET(type, EXEC_DIRECTORY_STATE, EXEC_DIRECTORY_CACHE, EXEC_DIRECTORY_LOGS)) + return 0; + + fd = open(target_dir, O_PATH|O_CLOEXEC|O_DIRECTORY); + if (fd < 0) + return log_debug_errno(errno, "Failed to open %s: %m", target_dir); + + /* Get the project ID of the current directory */ + uint32_t proj_id; + r = read_fs_xattr_fd(fd, /* ret_xflags = */ NULL, &proj_id); + if (ERRNO_IS_NEG_IOCTL_NOT_SUPPORTED(r)) { + log_debug_errno(r, "Not applying storage quotas. FS_IOC_FSGETXATTR not supported for %s: %m", target_dir); + return 0; + } + if (r < 0) + return log_debug_errno(r, "Failed to retrieve project ID for %s: %m", target_dir); + + /* If the first directory of this ExecType already has a project ID, adopt it as the project ID for all dirs of this ExecType */ + bool proj_id_exists = PROJ_ID_MIN <= proj_id && proj_id <= PROJ_ID_MAX; + if (proj_id_exists && *exec_dt_proj_id == 0) + *exec_dt_proj_id = proj_id; + + /* Check if enforcement should be disabled. Do not release project ID if accounting is enabled */ + if (!ql->quota_enforce) { + if (proj_id_exists) { + r = unset_exec_storage_quota(fd, proj_id, ql->quota_accounting); + if (r < 0) + return log_debug_errno(r, "Failed to unset project quotas for %s: %m", target_dir); + } + + if (!ql->quota_accounting) + return 0; + } + + if (*exec_dt_proj_id > 0 && *exec_dt_proj_id != proj_id) { + /* Set the existing project ID only if the current directory's ID does not exist or does not match */ + proj_id = *exec_dt_proj_id; + r = quota_proj_id_set_recursive(fd, proj_id, false); + if (r < 0) + return log_debug_errno(r, "Failed to set project ID for %s: %m", target_dir); + } else if (*exec_dt_proj_id == 0) { + /* Only generate a new project ID if it's the first directory of this ExecType to be processed and does not have an existing ID */ + static const sd_id128_t k = SD_ID128_ARRAY(e1,4a,79,9b,64,40,41,4a,a8,46,c2,f3,f9,19,4f,01); + _cleanup_free_ char *proj_id_plain = NULL; + + /* Generate candidate project id */ + proj_id_plain = strjoin(cgroup_path, "|", exec_directory_type_to_string(type)); + if (!proj_id_plain) + return log_oom_debug(); + + struct siphash state; + siphash24_init(&state, k.bytes); + siphash24_compress_string(proj_id_plain, &state); + proj_id = PROJ_ID_CLAMP_INTO_QUOTA_RANGE(siphash24_finalize(&state)); + +#define MAX_PROJ_ID_RETRIES 10 + for (unsigned attempt = 0;; attempt++) { + if (attempt >= MAX_PROJ_ID_RETRIES) + return log_debug_errno(SYNTHETIC_ERRNO(EBUSY), "Failed to generate unique project ID for %s: %m", target_dir); + + /* Check if project quotas are supported */ + struct dqblk req; + quota_supported = quota_query_proj_id(fd, proj_id, &req); + if (quota_supported < 0) + return log_debug_errno(quota_supported, "Failed to query disk quota for project ID %" PRIu32 ": %m", proj_id); + if (!quota_supported) { + log_debug("Not applying storage quotas. Project quotas are not supported for %s", target_dir); + return 0; + } + + if (!quota_dqblk_is_populated(&req)) { + int proj_id_was_set = quota_proj_id_set_recursive(fd, proj_id, true); + if (proj_id_was_set < 0) + return log_debug_errno(proj_id_was_set, "Failed to set project ID for %s: %m", target_dir); + if (proj_id_was_set) { + *exec_dt_proj_id = proj_id; + log_debug("Project ID %u generated for %s", proj_id, target_dir); + break; + } + } + + proj_id = (uint32_t) (random_u64_range(PROJ_ID_MAX - PROJ_ID_MIN + 1) + PROJ_ID_MIN); + } + } + + if (ql->quota_enforce && !*already_enforced) { + if (!quota_supported) { + struct dqblk req; + quota_supported = quota_query_proj_id(fd, proj_id, &req); + if (quota_supported < 0) + return log_debug_errno(quota_supported, "Failed to query disk quota for project ID %" PRIu32 ": %m", proj_id); + if (!quota_supported) { + log_debug("Not applying storage quotas. Project quotas are not supported for %s", target_dir); + return 0; + } + } + + r = set_exec_storage_quota(fd, proj_id, ql); + if (r < 0) + return log_debug_errno(r, "Failed to set storage quotas for %s: %m", target_dir); + + *already_enforced = true; + } + + return r; +} + static int setup_exec_directory( const ExecContext *context, const ExecParameters *params, @@ -2608,6 +2830,9 @@ static int setup_exec_directory( gid = 0; } + uint32_t exec_dt_proj_id = 0; + bool quota_already_enforced = false; + FOREACH_ARRAY(i, context->directories[type].items, context->directories[type].n_items) { _cleanup_free_ char *p = NULL, *pp = NULL; @@ -2898,6 +3123,11 @@ static int setup_exec_directory( if (r < 0) goto fail; } + + /* Apply storage quotas and accounting */ + r = apply_exec_quotas(target_dir, params->cgroup_path, type, &context->directories[type].exec_quota, &exec_dt_proj_id, "a_already_enforced); + if (r < 0) + goto fail; } /* If we are not going to run in a namespace, set up the symlinks - otherwise diff --git a/src/core/execute-serialize.c b/src/core/execute-serialize.c index 9110d3447f8..0a1af05e51d 100644 --- a/src/core/execute-serialize.c +++ b/src/core/execute-serialize.c @@ -1848,6 +1848,28 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) { r = serialize_item(f, key, value); if (r < 0) return r; + + if (c->directories[dt].exec_quota.quota_enforce) { + _cleanup_free_ char *key_quota = NULL; + key_quota = strjoin("exec-context-quota-directories-", exec_directory_type_to_string(dt)); + if (!key_quota) + return log_oom_debug(); + + r = serialize_item_format(f, key_quota, "%" PRIu64 " %" PRIu32, c->directories[dt].exec_quota.quota_absolute, + c->directories[dt].exec_quota.quota_scale); + if (r < 0) + return r; + + } else if (c->directories[dt].exec_quota.quota_accounting) { + _cleanup_free_ char *key_quota = NULL; + key_quota = strjoin("exec-context-quota-accounting-directories-", exec_directory_type_to_string(dt)); + if (!key_quota) + return log_oom_debug(); + + r = serialize_bool(f, key_quota, c->directories[dt].exec_quota.quota_accounting); + if (r < 0) + return r; + } } r = serialize_usec(f, "exec-context-timeout-clean-usec", c->timeout_clean_usec); @@ -2735,7 +2757,7 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) { dt = exec_directory_type_from_string(type); if (dt < 0) - return -EINVAL; + return dt; r = parse_mode(mode, &c->directories[dt].mode); if (r < 0) @@ -2793,6 +2815,48 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) { return r; } } + } else if ((val = startswith(l, "exec-context-quota-accounting-directories-"))) { + _cleanup_free_ char *type = NULL, *quota_accounting = NULL; + ExecDirectoryType dt; + + r = split_pair(val, "=", &type, "a_accounting); + if (r < 0) + return r; + + dt = exec_directory_type_from_string(type); + if (dt < 0) + return dt; + + r = parse_boolean(quota_accounting); + if (r < 0) + return r; + + c->directories[dt].exec_quota.quota_accounting = r; + } else if ((val = startswith(l, "exec-context-quota-directories-"))) { + _cleanup_free_ char *type = NULL, *quota_info = NULL, *quota_absolute = NULL, *quota_scale = NULL; + ExecDirectoryType dt; + + r = split_pair(val, "=", &type, "a_info); + if (r < 0) + return r; + + r = split_pair(quota_info, " ", "a_absolute, "a_scale); + if (r < 0) + return r; + + dt = exec_directory_type_from_string(type); + if (dt < 0) + return dt; + + r = safe_atou64(quota_absolute, &c->directories[dt].exec_quota.quota_absolute); + if (r < 0) + return r; + + r = safe_atou32(quota_scale, &c->directories[dt].exec_quota.quota_scale); + if (r < 0) + return r; + + c->directories[dt].exec_quota.quota_enforce = true; } else if ((val = startswith(l, "exec-context-timeout-clean-usec="))) { r = deserialize_usec(val, &c->timeout_clean_usec); if (r < 0) diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in index 96fe04aaf40..7d4d174d845 100644 --- a/src/core/load-fragment-gperf.gperf.in +++ b/src/core/load-fragment-gperf.gperf.in @@ -145,10 +145,16 @@ {{type}}.RuntimeDirectoryMode, config_parse_mode, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_RUNTIME].mode) {{type}}.RuntimeDirectory, config_parse_exec_directories, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_RUNTIME]) {{type}}.StateDirectoryMode, config_parse_mode, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_STATE].mode) +{{type}}.StateDirectoryAccounting, config_parse_bool, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_STATE].exec_quota.quota_accounting) +{{type}}.StateDirectoryQuota, config_parse_exec_quota, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_STATE].exec_quota) {{type}}.StateDirectory, config_parse_exec_directories, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_STATE]) {{type}}.CacheDirectoryMode, config_parse_mode, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_CACHE].mode) +{{type}}.CacheDirectoryAccounting, config_parse_bool, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_CACHE].exec_quota.quota_accounting) +{{type}}.CacheDirectoryQuota, config_parse_exec_quota, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_CACHE].exec_quota) {{type}}.CacheDirectory, config_parse_exec_directories, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_CACHE]) {{type}}.LogsDirectoryMode, config_parse_mode, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_LOGS].mode) +{{type}}.LogsDirectoryAccounting, config_parse_bool, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_LOGS].exec_quota.quota_accounting) +{{type}}.LogsDirectoryQuota, config_parse_exec_quota, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_LOGS].exec_quota) {{type}}.LogsDirectory, config_parse_exec_directories, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_LOGS]) {{type}}.ConfigurationDirectoryMode, config_parse_mode, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_CONFIGURATION].mode) {{type}}.ConfigurationDirectory, config_parse_exec_directories, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_CONFIGURATION]) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 3705e958846..c1e704b1c6a 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -4602,6 +4602,48 @@ int config_parse_exec_directories( } } +int config_parse_exec_quota( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + QuotaLimit *quota_limit = ASSERT_PTR(data); + uint64_t quota_absolute = UINT64_MAX; + uint32_t quota_scale = UINT32_MAX; + int r; + + if (isempty(rvalue) || streq(rvalue, "off")) { + quota_limit->quota_enforce = false; + quota_limit->quota_absolute = UINT64_MAX; + quota_limit->quota_scale = UINT32_MAX; + return 0; + } + + r = parse_permyriad(rvalue); + if (r < 0) { + r = parse_size(rvalue, 1024, "a_absolute); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse disk quota value, ignoring: %s", rvalue); + return 0; + } + } else + /* Normalize to 2^32-1 == 100% */ + quota_scale = UINT32_SCALE_FROM_PERMYRIAD(r); + + quota_limit->quota_absolute = quota_absolute; + quota_limit->quota_scale = quota_scale; + quota_limit->quota_enforce = true; + + return 0; +} + int config_parse_set_credential( const char *unit, const char *filename, diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index e2fe7dce1bb..a31ad750d3d 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -101,6 +101,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_exec_smack_process_label); CONFIG_PARSER_PROTOTYPE(config_parse_address_families); CONFIG_PARSER_PROTOTYPE(config_parse_exec_preserve_mode); CONFIG_PARSER_PROTOTYPE(config_parse_exec_directories); +CONFIG_PARSER_PROTOTYPE(config_parse_exec_quota); CONFIG_PARSER_PROTOTYPE(config_parse_set_credential); CONFIG_PARSER_PROTOTYPE(config_parse_load_credential); CONFIG_PARSER_PROTOTYPE(config_parse_import_credential);