1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
9 #include <sys/statfs.h>
10 #include <sys/types.h>
12 #include "constants.h"
16 #define SYSTEMD_CGROUP_CONTROLLER_LEGACY "name=systemd"
17 #define SYSTEMD_CGROUP_CONTROLLER_HYBRID "name=unified"
18 #define SYSTEMD_CGROUP_CONTROLLER "_systemd"
20 /* An enum of well known cgroup controllers */
21 typedef enum CGroupController
{
22 /* Original cgroup controllers */
23 CGROUP_CONTROLLER_CPU
,
24 CGROUP_CONTROLLER_CPUACCT
, /* v1 only */
25 CGROUP_CONTROLLER_CPUSET
, /* v2 only */
26 CGROUP_CONTROLLER_IO
, /* v2 only */
27 CGROUP_CONTROLLER_BLKIO
, /* v1 only */
28 CGROUP_CONTROLLER_MEMORY
,
29 CGROUP_CONTROLLER_DEVICES
, /* v1 only */
30 CGROUP_CONTROLLER_PIDS
,
32 /* BPF-based pseudo-controllers, v2 only */
33 CGROUP_CONTROLLER_BPF_FIREWALL
,
34 CGROUP_CONTROLLER_BPF_DEVICES
,
35 CGROUP_CONTROLLER_BPF_FOREIGN
,
36 CGROUP_CONTROLLER_BPF_SOCKET_BIND
,
37 CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES
,
38 /* The BPF hook implementing RestrictFileSystems= is not defined here.
39 * It's applied as late as possible in exec_invoke() so we don't block
40 * our own unit setup code. */
42 _CGROUP_CONTROLLER_MAX
,
43 _CGROUP_CONTROLLER_INVALID
= -EINVAL
,
46 #define CGROUP_CONTROLLER_TO_MASK(c) (1U << (c))
48 /* A bit mask of well known cgroup controllers */
49 typedef enum CGroupMask
{
50 CGROUP_MASK_CPU
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPU
),
51 CGROUP_MASK_CPUACCT
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPUACCT
),
52 CGROUP_MASK_CPUSET
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPUSET
),
53 CGROUP_MASK_IO
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_IO
),
54 CGROUP_MASK_BLKIO
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BLKIO
),
55 CGROUP_MASK_MEMORY
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_MEMORY
),
56 CGROUP_MASK_DEVICES
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_DEVICES
),
57 CGROUP_MASK_PIDS
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_PIDS
),
58 CGROUP_MASK_BPF_FIREWALL
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_FIREWALL
),
59 CGROUP_MASK_BPF_DEVICES
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_DEVICES
),
60 CGROUP_MASK_BPF_FOREIGN
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_FOREIGN
),
61 CGROUP_MASK_BPF_SOCKET_BIND
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_SOCKET_BIND
),
62 CGROUP_MASK_BPF_RESTRICT_NETWORK_INTERFACES
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES
),
64 /* All real cgroup v1 controllers */
65 CGROUP_MASK_V1
= CGROUP_MASK_CPU
|CGROUP_MASK_CPUACCT
|CGROUP_MASK_BLKIO
|CGROUP_MASK_MEMORY
|CGROUP_MASK_DEVICES
|CGROUP_MASK_PIDS
,
67 /* All real cgroup v2 controllers */
68 CGROUP_MASK_V2
= CGROUP_MASK_CPU
|CGROUP_MASK_CPUSET
|CGROUP_MASK_IO
|CGROUP_MASK_MEMORY
|CGROUP_MASK_PIDS
,
70 /* All controllers we want to delegate in case of Delegate=yes. Which are pretty much the v2 controllers only, as delegation on v1 is not safe, and bpf stuff isn't a real controller */
71 CGROUP_MASK_DELEGATE
= CGROUP_MASK_V2
,
73 /* All cgroup v2 BPF pseudo-controllers */
74 CGROUP_MASK_BPF
= CGROUP_MASK_BPF_FIREWALL
|CGROUP_MASK_BPF_DEVICES
|CGROUP_MASK_BPF_FOREIGN
|CGROUP_MASK_BPF_SOCKET_BIND
|CGROUP_MASK_BPF_RESTRICT_NETWORK_INTERFACES
,
76 _CGROUP_MASK_ALL
= CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX
) - 1,
79 static inline CGroupMask
CGROUP_MASK_EXTEND_JOINED(CGroupMask mask
) {
80 /* We always mount "cpu" and "cpuacct" in the same hierarchy. Hence, when one bit is set also set the other */
82 if (mask
& (CGROUP_MASK_CPU
|CGROUP_MASK_CPUACCT
))
83 mask
|= (CGROUP_MASK_CPU
|CGROUP_MASK_CPUACCT
);
88 CGroupMask
get_cpu_accounting_mask(void);
89 bool cpu_accounting_is_cheap(void);
91 /* Special values for all weight knobs on unified hierarchy */
92 #define CGROUP_WEIGHT_INVALID UINT64_MAX
93 #define CGROUP_WEIGHT_IDLE UINT64_C(0)
94 #define CGROUP_WEIGHT_MIN UINT64_C(1)
95 #define CGROUP_WEIGHT_MAX UINT64_C(10000)
96 #define CGROUP_WEIGHT_DEFAULT UINT64_C(100)
98 #define CGROUP_LIMIT_MIN UINT64_C(0)
99 #define CGROUP_LIMIT_MAX UINT64_MAX
101 static inline bool CGROUP_WEIGHT_IS_OK(uint64_t x
) {
103 x
== CGROUP_WEIGHT_INVALID
||
104 (x
>= CGROUP_WEIGHT_MIN
&& x
<= CGROUP_WEIGHT_MAX
);
107 /* IO limits on unified hierarchy */
108 typedef enum CGroupIOLimitType
{
114 _CGROUP_IO_LIMIT_TYPE_MAX
,
115 _CGROUP_IO_LIMIT_TYPE_INVALID
= -EINVAL
,
118 extern const uint64_t cgroup_io_limit_defaults
[_CGROUP_IO_LIMIT_TYPE_MAX
];
120 const char* cgroup_io_limit_type_to_string(CGroupIOLimitType t
) _const_
;
121 CGroupIOLimitType
cgroup_io_limit_type_from_string(const char *s
) _pure_
;
123 /* Special values for the cpu.shares attribute */
124 #define CGROUP_CPU_SHARES_INVALID UINT64_MAX
125 #define CGROUP_CPU_SHARES_MIN UINT64_C(2)
126 #define CGROUP_CPU_SHARES_MAX UINT64_C(262144)
127 #define CGROUP_CPU_SHARES_DEFAULT UINT64_C(1024)
129 static inline bool CGROUP_CPU_SHARES_IS_OK(uint64_t x
) {
131 x
== CGROUP_CPU_SHARES_INVALID
||
132 (x
>= CGROUP_CPU_SHARES_MIN
&& x
<= CGROUP_CPU_SHARES_MAX
);
135 /* Special values for the special {blkio,io}.bfq.weight attribute */
136 #define CGROUP_BFQ_WEIGHT_INVALID UINT64_MAX
137 #define CGROUP_BFQ_WEIGHT_MIN UINT64_C(1)
138 #define CGROUP_BFQ_WEIGHT_MAX UINT64_C(1000)
139 #define CGROUP_BFQ_WEIGHT_DEFAULT UINT64_C(100)
141 /* Convert the normal io.weight value to io.bfq.weight */
142 static inline uint64_t BFQ_WEIGHT(uint64_t io_weight
) {
144 io_weight
<= CGROUP_WEIGHT_DEFAULT
?
145 CGROUP_BFQ_WEIGHT_DEFAULT
- (CGROUP_WEIGHT_DEFAULT
- io_weight
) * (CGROUP_BFQ_WEIGHT_DEFAULT
- CGROUP_BFQ_WEIGHT_MIN
) / (CGROUP_WEIGHT_DEFAULT
- CGROUP_WEIGHT_MIN
) :
146 CGROUP_BFQ_WEIGHT_DEFAULT
+ (io_weight
- CGROUP_WEIGHT_DEFAULT
) * (CGROUP_BFQ_WEIGHT_MAX
- CGROUP_BFQ_WEIGHT_DEFAULT
) / (CGROUP_WEIGHT_MAX
- CGROUP_WEIGHT_DEFAULT
);
149 /* Special values for the blkio.weight attribute */
150 #define CGROUP_BLKIO_WEIGHT_INVALID UINT64_MAX
151 #define CGROUP_BLKIO_WEIGHT_MIN UINT64_C(10)
152 #define CGROUP_BLKIO_WEIGHT_MAX UINT64_C(1000)
153 #define CGROUP_BLKIO_WEIGHT_DEFAULT UINT64_C(500)
155 static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x
) {
157 x
== CGROUP_BLKIO_WEIGHT_INVALID
||
158 (x
>= CGROUP_BLKIO_WEIGHT_MIN
&& x
<= CGROUP_BLKIO_WEIGHT_MAX
);
161 typedef enum CGroupUnified
{
162 CGROUP_UNIFIED_UNKNOWN
= -1,
163 CGROUP_UNIFIED_NONE
= 0, /* Both systemd and controllers on legacy */
164 CGROUP_UNIFIED_SYSTEMD
= 1, /* Only systemd on unified */
165 CGROUP_UNIFIED_ALL
= 2, /* Both systemd and controllers on unified */
171 * We accept named hierarchies in the syntax "foo" and "name=foo".
173 * We expect that named hierarchies do not conflict in name with a
174 * kernel hierarchy, modulo the "name=" prefix.
176 * We always generate "normalized" controller names, i.e. without the
179 * We require absolute cgroup paths. When returning, we will always
180 * generate paths with multiple adjacent / removed.
183 int cg_path_open(const char *controller
, const char *path
);
184 int cg_cgroupid_open(int fsfd
, uint64_t id
);
186 int cg_enumerate_processes(const char *controller
, const char *path
, FILE **ret
);
187 int cg_read_pid(FILE *f
, pid_t
*ret
);
188 int cg_read_pidref(FILE *f
, PidRef
*ret
);
189 int cg_read_event(const char *controller
, const char *path
, const char *event
, char **ret
);
191 int cg_enumerate_subgroups(const char *controller
, const char *path
, DIR **ret
);
192 int cg_read_subgroup(DIR *d
, char **ret
);
194 typedef enum CGroupFlags
{
195 CGROUP_SIGCONT
= 1 << 0,
196 CGROUP_IGNORE_SELF
= 1 << 1,
197 CGROUP_REMOVE
= 1 << 2,
200 typedef int (*cg_kill_log_func_t
)(const PidRef
*pid
, int sig
, void *userdata
);
202 int cg_kill(const char *path
, int sig
, CGroupFlags flags
, Set
*s
, cg_kill_log_func_t kill_log
, void *userdata
);
203 int cg_kill_kernel_sigkill(const char *path
);
204 int cg_kill_recursive(const char *path
, int sig
, CGroupFlags flags
, Set
*s
, cg_kill_log_func_t kill_log
, void *userdata
);
206 int cg_split_spec(const char *spec
, char **ret_controller
, char **ret_path
);
207 int cg_mangle_path(const char *path
, char **ret
);
209 int cg_get_path(const char *controller
, const char *path
, const char *suffix
, char **ret
);
210 int cg_get_path_and_check(const char *controller
, const char *path
, const char *suffix
, char **ret
);
212 int cg_pid_get_path(const char *controller
, pid_t pid
, char **ret
);
213 int cg_pidref_get_path(const char *controller
, const PidRef
*pidref
, char **ret
);
215 int cg_rmdir(const char *controller
, const char *path
);
217 int cg_is_threaded(const char *path
);
219 int cg_is_delegated(const char *path
);
220 int cg_is_delegated_fd(int fd
);
222 int cg_has_coredump_receive(const char *path
);
225 CG_KEY_MODE_GRACEFUL
= 1 << 0,
228 int cg_set_attribute(const char *controller
, const char *path
, const char *attribute
, const char *value
);
229 int cg_get_attribute(const char *controller
, const char *path
, const char *attribute
, char **ret
);
230 int cg_get_keyed_attribute_full(const char *controller
, const char *path
, const char *attribute
, char **keys
, char **values
, CGroupKeyMode mode
);
232 static inline int cg_get_keyed_attribute(
233 const char *controller
,
235 const char *attribute
,
238 return cg_get_keyed_attribute_full(controller
, path
, attribute
, keys
, ret_values
, 0);
241 static inline int cg_get_keyed_attribute_graceful(
242 const char *controller
,
244 const char *attribute
,
247 return cg_get_keyed_attribute_full(controller
, path
, attribute
, keys
, ret_values
, CG_KEY_MODE_GRACEFUL
);
250 int cg_get_attribute_as_uint64(const char *controller
, const char *path
, const char *attribute
, uint64_t *ret
);
252 /* Does a parse_boolean() on the attribute contents and sets ret accordingly */
253 int cg_get_attribute_as_bool(const char *controller
, const char *path
, const char *attribute
, bool *ret
);
255 int cg_get_owner(const char *path
, uid_t
*ret_uid
);
257 int cg_set_xattr(const char *path
, const char *name
, const void *value
, size_t size
, int flags
);
258 int cg_get_xattr(const char *path
, const char *name
, void *value
, size_t size
);
259 int cg_get_xattr_malloc(const char *path
, const char *name
, char **ret
);
260 /* Returns negative on error, and 0 or 1 on success for the bool value */
261 int cg_get_xattr_bool(const char *path
, const char *name
);
262 int cg_remove_xattr(const char *path
, const char *name
);
264 int cg_install_release_agent(const char *controller
, const char *agent
);
265 int cg_uninstall_release_agent(const char *controller
);
267 int cg_is_empty(const char *controller
, const char *path
);
268 int cg_is_empty_recursive(const char *controller
, const char *path
);
270 int cg_get_root_path(char **path
);
272 int cg_path_get_cgroupid(const char *path
, uint64_t *ret
);
273 int cg_fd_get_cgroupid(int fd
, uint64_t *ret
);
274 int cg_path_get_session(const char *path
, char **ret_session
);
275 int cg_path_get_owner_uid(const char *path
, uid_t
*ret_uid
);
276 int cg_path_get_unit(const char *path
, char **ret_unit
);
277 int cg_path_get_unit_path(const char *path
, char **ret_unit
);
278 int cg_path_get_user_unit(const char *path
, char **ret_unit
);
279 int cg_path_get_machine_name(const char *path
, char **ret_machine
);
280 int cg_path_get_slice(const char *path
, char **ret_slice
);
281 int cg_path_get_user_slice(const char *path
, char **ret_slice
);
283 int cg_shift_path(const char *cgroup
, const char *cached_root
, const char **ret_shifted
);
284 int cg_pid_get_path_shifted(pid_t pid
, const char *cached_root
, char **ret_cgroup
);
286 int cg_pid_get_session(pid_t pid
, char **ret_session
);
287 int cg_pid_get_owner_uid(pid_t pid
, uid_t
*ret_uid
);
288 int cg_pid_get_unit(pid_t pid
, char **ret_unit
);
289 int cg_pidref_get_unit(const PidRef
*pidref
, char **ret
);
290 int cg_pid_get_user_unit(pid_t pid
, char **ret_unit
);
291 int cg_pid_get_machine_name(pid_t pid
, char **ret_machine
);
292 int cg_pid_get_slice(pid_t pid
, char **ret_slice
);
293 int cg_pid_get_user_slice(pid_t pid
, char **ret_slice
);
295 int cg_path_decode_unit(const char *cgroup
, char **ret_unit
);
297 bool cg_needs_escape(const char *p
);
298 int cg_escape(const char *p
, char **ret
);
299 char *cg_unescape(const char *p
) _pure_
;
301 bool cg_controller_is_valid(const char *p
);
303 int cg_slice_to_path(const char *unit
, char **ret
);
305 typedef const char* (*cg_migrate_callback_t
)(CGroupMask mask
, void *userdata
);
307 int cg_mask_supported(CGroupMask
*ret
);
308 int cg_mask_supported_subtree(const char *root
, CGroupMask
*ret
);
309 int cg_mask_from_string(const char *s
, CGroupMask
*ret
);
310 int cg_mask_to_string(CGroupMask mask
, char **ret
);
312 int cg_kernel_controllers(Set
**controllers
);
314 bool cg_ns_supported(void);
315 bool cg_freezer_supported(void);
316 bool cg_kill_supported(void);
318 int cg_all_unified(void);
319 int cg_hybrid_unified(void);
320 int cg_unified_controller(const char *controller
);
321 int cg_unified_cached(bool flush
);
322 static inline int cg_unified(void) {
323 return cg_unified_cached(true);
326 const char* cgroup_controller_to_string(CGroupController c
) _const_
;
327 CGroupController
cgroup_controller_from_string(const char *s
) _pure_
;
329 bool is_cgroup_fs(const struct statfs
*s
);
330 bool fd_is_cgroup_fs(int fd
);
332 typedef enum ManagedOOMMode
{
335 _MANAGED_OOM_MODE_MAX
,
336 _MANAGED_OOM_MODE_INVALID
= -EINVAL
,
339 const char* managed_oom_mode_to_string(ManagedOOMMode m
) _const_
;
340 ManagedOOMMode
managed_oom_mode_from_string(const char *s
) _pure_
;
342 typedef enum ManagedOOMPreference
{
343 MANAGED_OOM_PREFERENCE_NONE
= 0,
344 MANAGED_OOM_PREFERENCE_AVOID
= 1,
345 MANAGED_OOM_PREFERENCE_OMIT
= 2,
346 _MANAGED_OOM_PREFERENCE_MAX
,
347 _MANAGED_OOM_PREFERENCE_INVALID
= -EINVAL
,
348 } ManagedOOMPreference
;
350 const char* managed_oom_preference_to_string(ManagedOOMPreference a
) _const_
;
351 ManagedOOMPreference
managed_oom_preference_from_string(const char *s
) _pure_
;
353 /* The structure to pass to name_to_handle_at() on cgroupfs2 */
355 struct file_handle file_handle
;
356 uint8_t space
[offsetof(struct file_handle
, f_handle
) + sizeof(uint64_t)];
359 #define CG_FILE_HANDLE_INIT \
361 .file_handle.handle_bytes = sizeof(uint64_t), \
362 .file_handle.handle_type = FILEID_KERNFS, \
365 #define CG_FILE_HANDLE_CGROUPID(fh) (*(uint64_t*) (fh).file_handle.f_handle)