1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
7 #include "cgroup-util.h"
8 #include "cpu-set-util.h"
9 #include "firewall-util.h"
12 #include "time-util.h"
14 typedef struct TasksMax
{
15 /* If scale == 0, just use value; otherwise, value / scale.
16 * See tasks_max_resolve(). */
21 #define TASKS_MAX_UNSET ((TasksMax) { .value = UINT64_MAX, .scale = 0 })
23 static inline bool tasks_max_isset(const TasksMax
*tasks_max
) {
24 return tasks_max
->value
!= UINT64_MAX
|| tasks_max
->scale
!= 0;
27 uint64_t tasks_max_resolve(const TasksMax
*tasks_max
);
29 typedef struct CGroupContext CGroupContext
;
30 typedef struct CGroupDeviceAllow CGroupDeviceAllow
;
31 typedef struct CGroupIODeviceWeight CGroupIODeviceWeight
;
32 typedef struct CGroupIODeviceLimit CGroupIODeviceLimit
;
33 typedef struct CGroupIODeviceLatency CGroupIODeviceLatency
;
34 typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight
;
35 typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth
;
36 typedef struct CGroupBPFForeignProgram CGroupBPFForeignProgram
;
37 typedef struct CGroupSocketBindItem CGroupSocketBindItem
;
39 typedef enum CGroupDevicePolicy
{
40 /* When devices listed, will allow those, plus built-in ones, if none are listed will allow
42 CGROUP_DEVICE_POLICY_AUTO
,
44 /* Everything forbidden, except built-in ones and listed ones. */
45 CGROUP_DEVICE_POLICY_CLOSED
,
47 /* Everything forbidden, except for the listed devices */
48 CGROUP_DEVICE_POLICY_STRICT
,
50 _CGROUP_DEVICE_POLICY_MAX
,
51 _CGROUP_DEVICE_POLICY_INVALID
= -EINVAL
,
54 typedef enum FreezerAction
{
59 _FREEZER_ACTION_INVALID
= -EINVAL
,
62 struct CGroupDeviceAllow
{
63 LIST_FIELDS(CGroupDeviceAllow
, device_allow
);
70 struct CGroupIODeviceWeight
{
71 LIST_FIELDS(CGroupIODeviceWeight
, device_weights
);
76 struct CGroupIODeviceLimit
{
77 LIST_FIELDS(CGroupIODeviceLimit
, device_limits
);
79 uint64_t limits
[_CGROUP_IO_LIMIT_TYPE_MAX
];
82 struct CGroupIODeviceLatency
{
83 LIST_FIELDS(CGroupIODeviceLatency
, device_latencies
);
88 struct CGroupBlockIODeviceWeight
{
89 LIST_FIELDS(CGroupBlockIODeviceWeight
, device_weights
);
94 struct CGroupBlockIODeviceBandwidth
{
95 LIST_FIELDS(CGroupBlockIODeviceBandwidth
, device_bandwidths
);
101 struct CGroupBPFForeignProgram
{
102 LIST_FIELDS(CGroupBPFForeignProgram
, programs
);
103 uint32_t attach_type
;
107 struct CGroupSocketBindItem
{
108 LIST_FIELDS(CGroupSocketBindItem
, socket_bind_items
);
115 typedef enum CGroupPressureWatch
{
116 CGROUP_PRESSURE_WATCH_OFF
, /* → tells the service payload explicitly not to watch for memory pressure */
117 CGROUP_PRESSURE_WATCH_AUTO
, /* → on if memory account is on anyway for the unit, otherwise off */
118 CGROUP_PRESSURE_WATCH_ON
,
119 CGROUP_PRESSURE_WATCH_SKIP
, /* → doesn't set up memory pressure watch, but also doesn't explicitly tell payload to avoid it */
120 _CGROUP_PRESSURE_WATCH_MAX
,
121 _CGROUP_PRESSURE_WATCH_INVALID
= -EINVAL
,
122 } CGroupPressureWatch
;
124 struct CGroupContext
{
127 bool blockio_accounting
;
128 bool memory_accounting
;
129 bool tasks_accounting
;
132 /* Configures the memory.oom.group attribute (on unified) */
133 bool memory_oom_group
;
136 CGroupMask delegate_controllers
;
137 CGroupMask disable_controllers
;
138 char *delegate_subgroup
;
140 /* For unified hierarchy */
142 uint64_t startup_cpu_weight
;
143 usec_t cpu_quota_per_sec_usec
;
144 usec_t cpu_quota_period_usec
;
147 CPUSet startup_cpuset_cpus
;
149 CPUSet startup_cpuset_mems
;
152 uint64_t startup_io_weight
;
153 LIST_HEAD(CGroupIODeviceWeight
, io_device_weights
);
154 LIST_HEAD(CGroupIODeviceLimit
, io_device_limits
);
155 LIST_HEAD(CGroupIODeviceLatency
, io_device_latencies
);
157 uint64_t default_memory_min
;
158 uint64_t default_memory_low
;
159 uint64_t default_startup_memory_low
;
162 uint64_t startup_memory_low
;
163 uint64_t memory_high
;
164 uint64_t startup_memory_high
;
166 uint64_t startup_memory_max
;
167 uint64_t memory_swap_max
;
168 uint64_t startup_memory_swap_max
;
169 uint64_t memory_zswap_max
;
170 uint64_t startup_memory_zswap_max
;
172 bool default_memory_min_set
:1;
173 bool default_memory_low_set
:1;
174 bool default_startup_memory_low_set
:1;
175 bool memory_min_set
:1;
176 bool memory_low_set
:1;
177 bool startup_memory_low_set
:1;
178 bool startup_memory_high_set
:1;
179 bool startup_memory_max_set
:1;
180 bool startup_memory_swap_max_set
:1;
181 bool startup_memory_zswap_max_set
:1;
183 Set
*ip_address_allow
;
184 Set
*ip_address_deny
;
185 /* These two flags indicate that redundant entries have been removed from
186 * ip_address_allow/ip_address_deny, i.e. in_addr_prefixes_reduce() has already been called. */
187 bool ip_address_allow_reduced
;
188 bool ip_address_deny_reduced
;
190 char **ip_filters_ingress
;
191 char **ip_filters_egress
;
192 LIST_HEAD(CGroupBPFForeignProgram
, bpf_foreign_programs
);
194 Set
*restrict_network_interfaces
;
195 bool restrict_network_interfaces_is_allow_list
;
197 /* For legacy hierarchies */
199 uint64_t startup_cpu_shares
;
201 uint64_t blockio_weight
;
202 uint64_t startup_blockio_weight
;
203 LIST_HEAD(CGroupBlockIODeviceWeight
, blockio_device_weights
);
204 LIST_HEAD(CGroupBlockIODeviceBandwidth
, blockio_device_bandwidths
);
206 uint64_t memory_limit
;
208 CGroupDevicePolicy device_policy
;
209 LIST_HEAD(CGroupDeviceAllow
, device_allow
);
211 LIST_HEAD(CGroupSocketBindItem
, socket_bind_allow
);
212 LIST_HEAD(CGroupSocketBindItem
, socket_bind_deny
);
217 /* Settings for systemd-oomd */
218 ManagedOOMMode moom_swap
;
219 ManagedOOMMode moom_mem_pressure
;
220 uint32_t moom_mem_pressure_limit
; /* Normalized to 2^32-1 == 100% */
221 ManagedOOMPreference moom_preference
;
223 /* Memory pressure logic */
224 CGroupPressureWatch memory_pressure_watch
;
225 usec_t memory_pressure_threshold_usec
;
226 /* NB: For now we don't make the period configurable, not the type, nor do we allow multiple
227 * triggers, nor triggers for non-memory pressure. We might add that later. */
229 NFTSetContext nft_set_context
;
232 /* Used when querying IP accounting data */
233 typedef enum CGroupIPAccountingMetric
{
234 CGROUP_IP_INGRESS_BYTES
,
235 CGROUP_IP_INGRESS_PACKETS
,
236 CGROUP_IP_EGRESS_BYTES
,
237 CGROUP_IP_EGRESS_PACKETS
,
238 _CGROUP_IP_ACCOUNTING_METRIC_MAX
,
239 _CGROUP_IP_ACCOUNTING_METRIC_INVALID
= -EINVAL
,
240 } CGroupIPAccountingMetric
;
242 /* Used when querying IO accounting data */
243 typedef enum CGroupIOAccountingMetric
{
244 CGROUP_IO_READ_BYTES
,
245 CGROUP_IO_WRITE_BYTES
,
246 CGROUP_IO_READ_OPERATIONS
,
247 CGROUP_IO_WRITE_OPERATIONS
,
248 _CGROUP_IO_ACCOUNTING_METRIC_MAX
,
249 _CGROUP_IO_ACCOUNTING_METRIC_INVALID
= -EINVAL
,
250 } CGroupIOAccountingMetric
;
252 typedef struct Unit Unit
;
253 typedef struct Manager Manager
;
255 usec_t
cgroup_cpu_adjust_period(usec_t period
, usec_t quota
, usec_t resolution
, usec_t max_period
);
257 void cgroup_context_init(CGroupContext
*c
);
258 void cgroup_context_done(CGroupContext
*c
);
259 void cgroup_context_dump(Unit
*u
, FILE* f
, const char *prefix
);
260 void cgroup_context_dump_socket_bind_item(const CGroupSocketBindItem
*item
, FILE *f
);
262 void cgroup_context_free_device_allow(CGroupContext
*c
, CGroupDeviceAllow
*a
);
263 void cgroup_context_free_io_device_weight(CGroupContext
*c
, CGroupIODeviceWeight
*w
);
264 void cgroup_context_free_io_device_limit(CGroupContext
*c
, CGroupIODeviceLimit
*l
);
265 void cgroup_context_free_io_device_latency(CGroupContext
*c
, CGroupIODeviceLatency
*l
);
266 void cgroup_context_free_blockio_device_weight(CGroupContext
*c
, CGroupBlockIODeviceWeight
*w
);
267 void cgroup_context_free_blockio_device_bandwidth(CGroupContext
*c
, CGroupBlockIODeviceBandwidth
*b
);
268 void cgroup_context_remove_bpf_foreign_program(CGroupContext
*c
, CGroupBPFForeignProgram
*p
);
269 void cgroup_context_remove_socket_bind(CGroupSocketBindItem
**head
);
271 static inline bool cgroup_context_want_memory_pressure(const CGroupContext
*c
) {
274 return c
->memory_pressure_watch
== CGROUP_PRESSURE_WATCH_ON
||
275 (c
->memory_pressure_watch
== CGROUP_PRESSURE_WATCH_AUTO
&& c
->memory_accounting
);
278 int cgroup_add_device_allow(CGroupContext
*c
, const char *dev
, const char *mode
);
279 int cgroup_add_bpf_foreign_program(CGroupContext
*c
, uint32_t attach_type
, const char *path
);
281 void cgroup_oomd_xattr_apply(Unit
*u
, const char *cgroup_path
);
282 int cgroup_log_xattr_apply(Unit
*u
, const char *cgroup_path
);
284 void cgroup_modify_nft_set(Unit
*u
, bool add
);
286 CGroupMask
unit_get_own_mask(Unit
*u
);
287 CGroupMask
unit_get_delegate_mask(Unit
*u
);
288 CGroupMask
unit_get_members_mask(Unit
*u
);
289 CGroupMask
unit_get_siblings_mask(Unit
*u
);
290 CGroupMask
unit_get_ancestor_disable_mask(Unit
*u
);
292 CGroupMask
unit_get_target_mask(Unit
*u
);
293 CGroupMask
unit_get_enable_mask(Unit
*u
);
295 void unit_invalidate_cgroup_members_masks(Unit
*u
);
297 void unit_add_family_to_cgroup_realize_queue(Unit
*u
);
299 const char *unit_get_realized_cgroup_path(Unit
*u
, CGroupMask mask
);
300 int unit_default_cgroup_path(const Unit
*u
, char **ret
);
301 int unit_set_cgroup_path(Unit
*u
, const char *path
);
302 int unit_pick_cgroup_path(Unit
*u
);
304 int unit_realize_cgroup(Unit
*u
);
305 void unit_prune_cgroup(Unit
*u
);
306 int unit_watch_cgroup(Unit
*u
);
307 int unit_watch_cgroup_memory(Unit
*u
);
308 void unit_add_to_cgroup_realize_queue(Unit
*u
);
310 void unit_release_cgroup(Unit
*u
);
311 /* Releases the cgroup only if it is recursively empty.
312 * Returns true if the cgroup was released, false otherwise. */
313 bool unit_maybe_release_cgroup(Unit
*u
);
315 void unit_add_to_cgroup_empty_queue(Unit
*u
);
316 int unit_check_oomd_kill(Unit
*u
);
317 int unit_check_oom(Unit
*u
);
319 int unit_attach_pids_to_cgroup(Unit
*u
, Set
*pids
, const char *suffix_path
);
321 int manager_setup_cgroup(Manager
*m
);
322 void manager_shutdown_cgroup(Manager
*m
, bool delete);
324 unsigned manager_dispatch_cgroup_realize_queue(Manager
*m
);
326 Unit
*manager_get_unit_by_cgroup(Manager
*m
, const char *cgroup
);
327 Unit
*manager_get_unit_by_pidref_cgroup(Manager
*m
, PidRef
*pid
);
328 Unit
*manager_get_unit_by_pidref_watching(Manager
*m
, PidRef
*pid
);
329 Unit
* manager_get_unit_by_pidref(Manager
*m
, PidRef
*pid
);
330 Unit
* manager_get_unit_by_pid(Manager
*m
, pid_t pid
);
332 uint64_t unit_get_ancestor_memory_min(Unit
*u
);
333 uint64_t unit_get_ancestor_memory_low(Unit
*u
);
334 uint64_t unit_get_ancestor_startup_memory_low(Unit
*u
);
336 int unit_search_main_pid(Unit
*u
, PidRef
*ret
);
337 int unit_watch_all_pids(Unit
*u
);
339 int unit_synthesize_cgroup_empty_event(Unit
*u
);
341 int unit_get_memory_current(Unit
*u
, uint64_t *ret
);
342 int unit_get_memory_available(Unit
*u
, uint64_t *ret
);
343 int unit_get_tasks_current(Unit
*u
, uint64_t *ret
);
344 int unit_get_cpu_usage(Unit
*u
, nsec_t
*ret
);
345 int unit_get_io_accounting(Unit
*u
, CGroupIOAccountingMetric metric
, bool allow_cache
, uint64_t *ret
);
346 int unit_get_ip_accounting(Unit
*u
, CGroupIPAccountingMetric metric
, uint64_t *ret
);
348 int unit_reset_cpu_accounting(Unit
*u
);
349 int unit_reset_ip_accounting(Unit
*u
);
350 int unit_reset_io_accounting(Unit
*u
);
351 int unit_reset_accounting(Unit
*u
);
353 #define UNIT_CGROUP_BOOL(u, name) \
355 CGroupContext *cc = unit_get_cgroup_context(u); \
356 cc ? cc->name : false; \
359 bool manager_owns_host_root_cgroup(Manager
*m
);
360 bool unit_has_host_root_cgroup(Unit
*u
);
362 bool unit_has_startup_cgroup_constraints(Unit
*u
);
364 int manager_notify_cgroup_empty(Manager
*m
, const char *group
);
366 void unit_invalidate_cgroup(Unit
*u
, CGroupMask m
);
367 void unit_invalidate_cgroup_bpf(Unit
*u
);
369 void manager_invalidate_startup_units(Manager
*m
);
371 const char* cgroup_device_policy_to_string(CGroupDevicePolicy i
) _const_
;
372 CGroupDevicePolicy
cgroup_device_policy_from_string(const char *s
) _pure_
;
374 void unit_cgroup_catchup(Unit
*u
);
376 bool unit_cgroup_delegate(Unit
*u
);
378 int compare_job_priority(const void *a
, const void *b
);
380 int unit_get_cpuset(Unit
*u
, CPUSet
*cpus
, const char *name
);
381 int unit_cgroup_freezer_action(Unit
*u
, FreezerAction action
);
383 const char* freezer_action_to_string(FreezerAction a
) _const_
;
384 FreezerAction
freezer_action_from_string(const char *s
) _pure_
;
386 const char* cgroup_pressure_watch_to_string(CGroupPressureWatch a
) _const_
;
387 CGroupPressureWatch
cgroup_pressure_watch_from_string(const char *s
) _pure_
;