1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
9 #include <sys/statfs.h>
10 #include <sys/types.h>
12 #include "constants.h"
16 #define SYSTEMD_CGROUP_CONTROLLER_LEGACY "name=systemd"
17 #define SYSTEMD_CGROUP_CONTROLLER_HYBRID "name=unified"
18 #define SYSTEMD_CGROUP_CONTROLLER "_systemd"
20 /* An enum of well known cgroup controllers */
21 typedef enum CGroupController
{
22 /* Original cgroup controllers */
23 CGROUP_CONTROLLER_CPU
,
24 CGROUP_CONTROLLER_CPUACCT
, /* v1 only */
25 CGROUP_CONTROLLER_CPUSET
, /* v2 only */
26 CGROUP_CONTROLLER_IO
, /* v2 only */
27 CGROUP_CONTROLLER_BLKIO
, /* v1 only */
28 CGROUP_CONTROLLER_MEMORY
,
29 CGROUP_CONTROLLER_DEVICES
, /* v1 only */
30 CGROUP_CONTROLLER_PIDS
,
32 /* BPF-based pseudo-controllers, v2 only */
33 CGROUP_CONTROLLER_BPF_FIREWALL
,
34 CGROUP_CONTROLLER_BPF_DEVICES
,
35 CGROUP_CONTROLLER_BPF_FOREIGN
,
36 CGROUP_CONTROLLER_BPF_SOCKET_BIND
,
37 CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES
,
38 /* The BPF hook implementing RestrictFileSystems= is not defined here.
39 * It's applied as late as possible in exec_invoke() so we don't block
40 * our own unit setup code. */
42 _CGROUP_CONTROLLER_MAX
,
43 _CGROUP_CONTROLLER_INVALID
= -EINVAL
,
46 #define CGROUP_CONTROLLER_TO_MASK(c) (1U << (c))
48 /* A bit mask of well known cgroup controllers */
49 typedef enum CGroupMask
{
50 CGROUP_MASK_CPU
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPU
),
51 CGROUP_MASK_CPUACCT
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPUACCT
),
52 CGROUP_MASK_CPUSET
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPUSET
),
53 CGROUP_MASK_IO
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_IO
),
54 CGROUP_MASK_BLKIO
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BLKIO
),
55 CGROUP_MASK_MEMORY
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_MEMORY
),
56 CGROUP_MASK_DEVICES
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_DEVICES
),
57 CGROUP_MASK_PIDS
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_PIDS
),
58 CGROUP_MASK_BPF_FIREWALL
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_FIREWALL
),
59 CGROUP_MASK_BPF_DEVICES
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_DEVICES
),
60 CGROUP_MASK_BPF_FOREIGN
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_FOREIGN
),
61 CGROUP_MASK_BPF_SOCKET_BIND
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_SOCKET_BIND
),
62 CGROUP_MASK_BPF_RESTRICT_NETWORK_INTERFACES
= CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES
),
64 /* All real cgroup v1 controllers */
65 CGROUP_MASK_V1
= CGROUP_MASK_CPU
|CGROUP_MASK_CPUACCT
|CGROUP_MASK_BLKIO
|CGROUP_MASK_MEMORY
|CGROUP_MASK_DEVICES
|CGROUP_MASK_PIDS
,
67 /* All real cgroup v2 controllers */
68 CGROUP_MASK_V2
= CGROUP_MASK_CPU
|CGROUP_MASK_CPUSET
|CGROUP_MASK_IO
|CGROUP_MASK_MEMORY
|CGROUP_MASK_PIDS
,
70 /* All cgroup v2 BPF pseudo-controllers */
71 CGROUP_MASK_BPF
= CGROUP_MASK_BPF_FIREWALL
|CGROUP_MASK_BPF_DEVICES
|CGROUP_MASK_BPF_FOREIGN
|CGROUP_MASK_BPF_SOCKET_BIND
|CGROUP_MASK_BPF_RESTRICT_NETWORK_INTERFACES
,
73 _CGROUP_MASK_ALL
= CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX
) - 1
76 static inline CGroupMask
CGROUP_MASK_EXTEND_JOINED(CGroupMask mask
) {
77 /* We always mount "cpu" and "cpuacct" in the same hierarchy. Hence, when one bit is set also set the other */
79 if (mask
& (CGROUP_MASK_CPU
|CGROUP_MASK_CPUACCT
))
80 mask
|= (CGROUP_MASK_CPU
|CGROUP_MASK_CPUACCT
);
85 CGroupMask
get_cpu_accounting_mask(void);
86 bool cpu_accounting_is_cheap(void);
88 /* Special values for all weight knobs on unified hierarchy */
89 #define CGROUP_WEIGHT_INVALID UINT64_MAX
90 #define CGROUP_WEIGHT_IDLE UINT64_C(0)
91 #define CGROUP_WEIGHT_MIN UINT64_C(1)
92 #define CGROUP_WEIGHT_MAX UINT64_C(10000)
93 #define CGROUP_WEIGHT_DEFAULT UINT64_C(100)
95 #define CGROUP_LIMIT_MIN UINT64_C(0)
96 #define CGROUP_LIMIT_MAX UINT64_MAX
98 static inline bool CGROUP_WEIGHT_IS_OK(uint64_t x
) {
100 x
== CGROUP_WEIGHT_INVALID
||
101 (x
>= CGROUP_WEIGHT_MIN
&& x
<= CGROUP_WEIGHT_MAX
);
104 /* IO limits on unified hierarchy */
105 typedef enum CGroupIOLimitType
{
111 _CGROUP_IO_LIMIT_TYPE_MAX
,
112 _CGROUP_IO_LIMIT_TYPE_INVALID
= -EINVAL
,
115 extern const uint64_t cgroup_io_limit_defaults
[_CGROUP_IO_LIMIT_TYPE_MAX
];
117 const char* cgroup_io_limit_type_to_string(CGroupIOLimitType t
) _const_
;
118 CGroupIOLimitType
cgroup_io_limit_type_from_string(const char *s
) _pure_
;
120 /* Special values for the cpu.shares attribute */
121 #define CGROUP_CPU_SHARES_INVALID UINT64_MAX
122 #define CGROUP_CPU_SHARES_MIN UINT64_C(2)
123 #define CGROUP_CPU_SHARES_MAX UINT64_C(262144)
124 #define CGROUP_CPU_SHARES_DEFAULT UINT64_C(1024)
126 static inline bool CGROUP_CPU_SHARES_IS_OK(uint64_t x
) {
128 x
== CGROUP_CPU_SHARES_INVALID
||
129 (x
>= CGROUP_CPU_SHARES_MIN
&& x
<= CGROUP_CPU_SHARES_MAX
);
132 /* Special values for the special {blkio,io}.bfq.weight attribute */
133 #define CGROUP_BFQ_WEIGHT_INVALID UINT64_MAX
134 #define CGROUP_BFQ_WEIGHT_MIN UINT64_C(1)
135 #define CGROUP_BFQ_WEIGHT_MAX UINT64_C(1000)
136 #define CGROUP_BFQ_WEIGHT_DEFAULT UINT64_C(100)
138 /* Convert the normal io.weight value to io.bfq.weight */
139 static inline uint64_t BFQ_WEIGHT(uint64_t io_weight
) {
141 io_weight
<= CGROUP_WEIGHT_DEFAULT
?
142 CGROUP_BFQ_WEIGHT_DEFAULT
- (CGROUP_WEIGHT_DEFAULT
- io_weight
) * (CGROUP_BFQ_WEIGHT_DEFAULT
- CGROUP_BFQ_WEIGHT_MIN
) / (CGROUP_WEIGHT_DEFAULT
- CGROUP_WEIGHT_MIN
) :
143 CGROUP_BFQ_WEIGHT_DEFAULT
+ (io_weight
- CGROUP_WEIGHT_DEFAULT
) * (CGROUP_BFQ_WEIGHT_MAX
- CGROUP_BFQ_WEIGHT_DEFAULT
) / (CGROUP_WEIGHT_MAX
- CGROUP_WEIGHT_DEFAULT
);
146 /* Special values for the blkio.weight attribute */
147 #define CGROUP_BLKIO_WEIGHT_INVALID UINT64_MAX
148 #define CGROUP_BLKIO_WEIGHT_MIN UINT64_C(10)
149 #define CGROUP_BLKIO_WEIGHT_MAX UINT64_C(1000)
150 #define CGROUP_BLKIO_WEIGHT_DEFAULT UINT64_C(500)
152 static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x
) {
154 x
== CGROUP_BLKIO_WEIGHT_INVALID
||
155 (x
>= CGROUP_BLKIO_WEIGHT_MIN
&& x
<= CGROUP_BLKIO_WEIGHT_MAX
);
158 typedef enum CGroupUnified
{
159 CGROUP_UNIFIED_UNKNOWN
= -1,
160 CGROUP_UNIFIED_NONE
= 0, /* Both systemd and controllers on legacy */
161 CGROUP_UNIFIED_SYSTEMD
= 1, /* Only systemd on unified */
162 CGROUP_UNIFIED_ALL
= 2, /* Both systemd and controllers on unified */
168 * We accept named hierarchies in the syntax "foo" and "name=foo".
170 * We expect that named hierarchies do not conflict in name with a
171 * kernel hierarchy, modulo the "name=" prefix.
173 * We always generate "normalized" controller names, i.e. without the
176 * We require absolute cgroup paths. When returning, we will always
177 * generate paths with multiple adjacent / removed.
180 int cg_enumerate_processes(const char *controller
, const char *path
, FILE **ret
);
181 int cg_read_pid(FILE *f
, pid_t
*ret
);
182 int cg_read_pidref(FILE *f
, PidRef
*ret
);
183 int cg_read_event(const char *controller
, const char *path
, const char *event
, char **ret
);
185 int cg_enumerate_subgroups(const char *controller
, const char *path
, DIR **ret
);
186 int cg_read_subgroup(DIR *d
, char **ret
);
188 typedef enum CGroupFlags
{
189 CGROUP_SIGCONT
= 1 << 0,
190 CGROUP_IGNORE_SELF
= 1 << 1,
191 CGROUP_REMOVE
= 1 << 2,
194 typedef int (*cg_kill_log_func_t
)(const PidRef
*pid
, int sig
, void *userdata
);
196 int cg_kill(const char *path
, int sig
, CGroupFlags flags
, Set
*s
, cg_kill_log_func_t kill_log
, void *userdata
);
197 int cg_kill_kernel_sigkill(const char *path
);
198 int cg_kill_recursive(const char *path
, int sig
, CGroupFlags flags
, Set
*s
, cg_kill_log_func_t kill_log
, void *userdata
);
200 int cg_split_spec(const char *spec
, char **ret_controller
, char **ret_path
);
201 int cg_mangle_path(const char *path
, char **ret
);
203 int cg_get_path(const char *controller
, const char *path
, const char *suffix
, char **ret
);
204 int cg_get_path_and_check(const char *controller
, const char *path
, const char *suffix
, char **ret
);
206 int cg_pid_get_path(const char *controller
, pid_t pid
, char **ret
);
207 int cg_pidref_get_path(const char *controller
, const PidRef
*pidref
, char **ret
);
209 int cg_rmdir(const char *controller
, const char *path
);
211 int cg_is_threaded(const char *path
);
213 int cg_is_delegated(const char *path
);
214 int cg_is_delegated_fd(int fd
);
216 int cg_has_coredump_receive(const char *path
);
219 CG_KEY_MODE_GRACEFUL
= 1 << 0,
222 int cg_set_attribute(const char *controller
, const char *path
, const char *attribute
, const char *value
);
223 int cg_get_attribute(const char *controller
, const char *path
, const char *attribute
, char **ret
);
224 int cg_get_keyed_attribute_full(const char *controller
, const char *path
, const char *attribute
, char **keys
, char **values
, CGroupKeyMode mode
);
226 static inline int cg_get_keyed_attribute(
227 const char *controller
,
229 const char *attribute
,
232 return cg_get_keyed_attribute_full(controller
, path
, attribute
, keys
, ret_values
, 0);
235 static inline int cg_get_keyed_attribute_graceful(
236 const char *controller
,
238 const char *attribute
,
241 return cg_get_keyed_attribute_full(controller
, path
, attribute
, keys
, ret_values
, CG_KEY_MODE_GRACEFUL
);
244 int cg_get_attribute_as_uint64(const char *controller
, const char *path
, const char *attribute
, uint64_t *ret
);
246 /* Does a parse_boolean() on the attribute contents and sets ret accordingly */
247 int cg_get_attribute_as_bool(const char *controller
, const char *path
, const char *attribute
, bool *ret
);
249 int cg_get_owner(const char *path
, uid_t
*ret_uid
);
251 int cg_set_xattr(const char *path
, const char *name
, const void *value
, size_t size
, int flags
);
252 int cg_get_xattr(const char *path
, const char *name
, void *value
, size_t size
);
253 int cg_get_xattr_malloc(const char *path
, const char *name
, char **ret
);
254 /* Returns negative on error, and 0 or 1 on success for the bool value */
255 int cg_get_xattr_bool(const char *path
, const char *name
);
256 int cg_remove_xattr(const char *path
, const char *name
);
258 int cg_install_release_agent(const char *controller
, const char *agent
);
259 int cg_uninstall_release_agent(const char *controller
);
261 int cg_is_empty(const char *controller
, const char *path
);
262 int cg_is_empty_recursive(const char *controller
, const char *path
);
264 int cg_get_root_path(char **path
);
266 int cg_path_get_cgroupid(const char *path
, uint64_t *ret
);
267 int cg_path_get_session(const char *path
, char **ret_session
);
268 int cg_path_get_owner_uid(const char *path
, uid_t
*ret_uid
);
269 int cg_path_get_unit(const char *path
, char **ret_unit
);
270 int cg_path_get_unit_path(const char *path
, char **ret_unit
);
271 int cg_path_get_user_unit(const char *path
, char **ret_unit
);
272 int cg_path_get_machine_name(const char *path
, char **ret_machine
);
273 int cg_path_get_slice(const char *path
, char **ret_slice
);
274 int cg_path_get_user_slice(const char *path
, char **ret_slice
);
276 int cg_shift_path(const char *cgroup
, const char *cached_root
, const char **ret_shifted
);
277 int cg_pid_get_path_shifted(pid_t pid
, const char *cached_root
, char **ret_cgroup
);
279 int cg_pid_get_session(pid_t pid
, char **ret_session
);
280 int cg_pid_get_owner_uid(pid_t pid
, uid_t
*ret_uid
);
281 int cg_pid_get_unit(pid_t pid
, char **ret_unit
);
282 int cg_pidref_get_unit(const PidRef
*pidref
, char **ret
);
283 int cg_pid_get_user_unit(pid_t pid
, char **ret_unit
);
284 int cg_pid_get_machine_name(pid_t pid
, char **ret_machine
);
285 int cg_pid_get_slice(pid_t pid
, char **ret_slice
);
286 int cg_pid_get_user_slice(pid_t pid
, char **ret_slice
);
288 int cg_path_decode_unit(const char *cgroup
, char **ret_unit
);
290 bool cg_needs_escape(const char *p
);
291 int cg_escape(const char *p
, char **ret
);
292 char *cg_unescape(const char *p
) _pure_
;
294 bool cg_controller_is_valid(const char *p
);
296 int cg_slice_to_path(const char *unit
, char **ret
);
298 typedef const char* (*cg_migrate_callback_t
)(CGroupMask mask
, void *userdata
);
300 int cg_mask_supported(CGroupMask
*ret
);
301 int cg_mask_supported_subtree(const char *root
, CGroupMask
*ret
);
302 int cg_mask_from_string(const char *s
, CGroupMask
*ret
);
303 int cg_mask_to_string(CGroupMask mask
, char **ret
);
305 int cg_kernel_controllers(Set
**controllers
);
307 bool cg_ns_supported(void);
308 bool cg_freezer_supported(void);
309 bool cg_kill_supported(void);
311 int cg_all_unified(void);
312 int cg_hybrid_unified(void);
313 int cg_unified_controller(const char *controller
);
314 int cg_unified_cached(bool flush
);
315 static inline int cg_unified(void) {
316 return cg_unified_cached(true);
319 const char* cgroup_controller_to_string(CGroupController c
) _const_
;
320 CGroupController
cgroup_controller_from_string(const char *s
) _pure_
;
322 bool is_cgroup_fs(const struct statfs
*s
);
323 bool fd_is_cgroup_fs(int fd
);
325 typedef enum ManagedOOMMode
{
328 _MANAGED_OOM_MODE_MAX
,
329 _MANAGED_OOM_MODE_INVALID
= -EINVAL
,
332 const char* managed_oom_mode_to_string(ManagedOOMMode m
) _const_
;
333 ManagedOOMMode
managed_oom_mode_from_string(const char *s
) _pure_
;
335 typedef enum ManagedOOMPreference
{
336 MANAGED_OOM_PREFERENCE_NONE
= 0,
337 MANAGED_OOM_PREFERENCE_AVOID
= 1,
338 MANAGED_OOM_PREFERENCE_OMIT
= 2,
339 _MANAGED_OOM_PREFERENCE_MAX
,
340 _MANAGED_OOM_PREFERENCE_INVALID
= -EINVAL
,
341 } ManagedOOMPreference
;
343 const char* managed_oom_preference_to_string(ManagedOOMPreference a
) _const_
;
344 ManagedOOMPreference
managed_oom_preference_from_string(const char *s
) _pure_
;
346 /* The structure to pass to name_to_handle_at() on cgroupfs2 */
348 struct file_handle file_handle
;
349 uint8_t space
[offsetof(struct file_handle
, f_handle
) + sizeof(uint64_t)];
352 #define CG_FILE_HANDLE_INIT { .file_handle.handle_bytes = sizeof(uint64_t) }
353 #define CG_FILE_HANDLE_CGROUPID(fh) (*(uint64_t*) (fh).file_handle.f_handle)