]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/cgroup.h
Merge pull request #29343 from DaanDeMeyer/tmp
[thirdparty/systemd.git] / src / core / cgroup.h
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
c2f1db8f 2#pragma once
8e274523 3
c1ff5570
TA
4#include <stdbool.h>
5
b1994387 6#include "bpf-lsm.h"
6a48d82f 7#include "cgroup-util.h"
047f5d63 8#include "cpu-set-util.h"
dc7d69b3 9#include "firewall-util.h"
4ad49000 10#include "list.h"
495e75ed 11#include "pidref.h"
c1ff5570 12#include "time-util.h"
8e274523 13
3a0f06c4
ZJS
14typedef struct TasksMax {
15 /* If scale == 0, just use value; otherwise, value / scale.
16 * See tasks_max_resolve(). */
17 uint64_t value;
18 uint64_t scale;
19} TasksMax;
20
21#define TASKS_MAX_UNSET ((TasksMax) { .value = UINT64_MAX, .scale = 0 })
22
23static inline bool tasks_max_isset(const TasksMax *tasks_max) {
24 return tasks_max->value != UINT64_MAX || tasks_max->scale != 0;
25}
26
27uint64_t tasks_max_resolve(const TasksMax *tasks_max);
28
4ad49000
LP
29typedef struct CGroupContext CGroupContext;
30typedef struct CGroupDeviceAllow CGroupDeviceAllow;
13c31542
TH
31typedef struct CGroupIODeviceWeight CGroupIODeviceWeight;
32typedef struct CGroupIODeviceLimit CGroupIODeviceLimit;
6ae4283c 33typedef struct CGroupIODeviceLatency CGroupIODeviceLatency;
4ad49000
LP
34typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight;
35typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth;
b894ef1b 36typedef struct CGroupBPFForeignProgram CGroupBPFForeignProgram;
b18e9fc1 37typedef struct CGroupSocketBindItem CGroupSocketBindItem;
8e274523 38
4ad49000 39typedef enum CGroupDevicePolicy {
084870f9
ZJS
40 /* When devices listed, will allow those, plus built-in ones, if none are listed will allow
41 * everything. */
42 CGROUP_DEVICE_POLICY_AUTO,
8e274523 43
4ad49000 44 /* Everything forbidden, except built-in ones and listed ones. */
084870f9 45 CGROUP_DEVICE_POLICY_CLOSED,
9d58f1db 46
5238e957 47 /* Everything forbidden, except for the listed devices */
084870f9 48 CGROUP_DEVICE_POLICY_STRICT,
9d58f1db 49
4ad49000 50 _CGROUP_DEVICE_POLICY_MAX,
2d93c20e 51 _CGROUP_DEVICE_POLICY_INVALID = -EINVAL,
4ad49000 52} CGroupDevicePolicy;
8e274523 53
d9e45bc3
MS
54typedef enum FreezerAction {
55 FREEZER_FREEZE,
56 FREEZER_THAW,
57
58 _FREEZER_ACTION_MAX,
2d93c20e 59 _FREEZER_ACTION_INVALID = -EINVAL,
d9e45bc3
MS
60} FreezerAction;
61
4ad49000
LP
62struct CGroupDeviceAllow {
63 LIST_FIELDS(CGroupDeviceAllow, device_allow);
64 char *path;
65 bool r:1;
66 bool w:1;
67 bool m:1;
68};
8c6db833 69
13c31542
TH
70struct CGroupIODeviceWeight {
71 LIST_FIELDS(CGroupIODeviceWeight, device_weights);
72 char *path;
73 uint64_t weight;
74};
75
76struct CGroupIODeviceLimit {
77 LIST_FIELDS(CGroupIODeviceLimit, device_limits);
78 char *path;
9be57249 79 uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX];
13c31542
TH
80};
81
6ae4283c
TH
82struct CGroupIODeviceLatency {
83 LIST_FIELDS(CGroupIODeviceLatency, device_latencies);
84 char *path;
85 usec_t target_usec;
86};
87
4ad49000
LP
88struct CGroupBlockIODeviceWeight {
89 LIST_FIELDS(CGroupBlockIODeviceWeight, device_weights);
90 char *path;
d53d9474 91 uint64_t weight;
8e274523
LP
92};
93
4ad49000
LP
94struct CGroupBlockIODeviceBandwidth {
95 LIST_FIELDS(CGroupBlockIODeviceBandwidth, device_bandwidths);
96 char *path;
979d0311
TH
97 uint64_t rbps;
98 uint64_t wbps;
4ad49000 99};
8e274523 100
b894ef1b
JK
101struct CGroupBPFForeignProgram {
102 LIST_FIELDS(CGroupBPFForeignProgram, programs);
103 uint32_t attach_type;
104 char *bpffs_path;
105};
106
b18e9fc1
JK
107struct CGroupSocketBindItem {
108 LIST_FIELDS(CGroupSocketBindItem, socket_bind_items);
5587ce7f
JK
109 int address_family;
110 int ip_protocol;
b18e9fc1
JK
111 uint16_t nr_ports;
112 uint16_t port_min;
113};
114
6bb00842
LP
115typedef enum CGroupPressureWatch {
116 CGROUP_PRESSURE_WATCH_OFF, /* → tells the service payload explicitly not to watch for memory pressure */
117 CGROUP_PRESSURE_WATCH_AUTO, /* → on if memory account is on anyway for the unit, otherwise off */
118 CGROUP_PRESSURE_WATCH_ON,
119 CGROUP_PRESSURE_WATCH_SKIP, /* → doesn't set up memory pressure watch, but also doesn't explicitly tell payload to avoid it */
120 _CGROUP_PRESSURE_WATCH_MAX,
121 _CGROUP_PRESSURE_WATCH_INVALID = -EINVAL,
122} CGroupPressureWatch;
123
4ad49000
LP
124struct CGroupContext {
125 bool cpu_accounting;
13c31542 126 bool io_accounting;
4ad49000
LP
127 bool blockio_accounting;
128 bool memory_accounting;
03a7b521 129 bool tasks_accounting;
6a48d82f 130 bool ip_accounting;
8e274523 131
afcfaa69
LP
132 /* Configures the memory.oom.group attribute (on unified) */
133 bool memory_oom_group;
134
0a6991e0
LP
135 bool delegate;
136 CGroupMask delegate_controllers;
137 CGroupMask disable_controllers;
a8b993dc 138 char *delegate_subgroup;
0a6991e0 139
13c31542 140 /* For unified hierarchy */
66ebf6c0
TH
141 uint64_t cpu_weight;
142 uint64_t startup_cpu_weight;
143 usec_t cpu_quota_per_sec_usec;
10f28641 144 usec_t cpu_quota_period_usec;
66ebf6c0 145
047f5d63 146 CPUSet cpuset_cpus;
31d3a520 147 CPUSet startup_cpuset_cpus;
047f5d63 148 CPUSet cpuset_mems;
31d3a520 149 CPUSet startup_cpuset_mems;
047f5d63 150
13c31542
TH
151 uint64_t io_weight;
152 uint64_t startup_io_weight;
153 LIST_HEAD(CGroupIODeviceWeight, io_device_weights);
154 LIST_HEAD(CGroupIODeviceLimit, io_device_limits);
6ae4283c 155 LIST_HEAD(CGroupIODeviceLatency, io_device_latencies);
13c31542 156
7ad5439e 157 uint64_t default_memory_min;
c52db42b 158 uint64_t default_memory_low;
53fda560 159 uint64_t default_startup_memory_low;
48422635 160 uint64_t memory_min;
da4d897e 161 uint64_t memory_low;
53fda560 162 uint64_t startup_memory_low;
da4d897e 163 uint64_t memory_high;
53fda560 164 uint64_t startup_memory_high;
da4d897e 165 uint64_t memory_max;
53fda560 166 uint64_t startup_memory_max;
96e131ea 167 uint64_t memory_swap_max;
53fda560 168 uint64_t startup_memory_swap_max;
d7fe0a67 169 uint64_t memory_zswap_max;
53fda560 170 uint64_t startup_memory_zswap_max;
da4d897e 171
60da07ec
YW
172 bool default_memory_min_set:1;
173 bool default_memory_low_set:1;
53fda560 174 bool default_startup_memory_low_set:1;
60da07ec
YW
175 bool memory_min_set:1;
176 bool memory_low_set:1;
53fda560
LB
177 bool startup_memory_low_set:1;
178 bool startup_memory_high_set:1;
179 bool startup_memory_max_set:1;
180 bool startup_memory_swap_max_set:1;
181 bool startup_memory_zswap_max_set:1;
c52db42b 182
84ebe6f0
YW
183 Set *ip_address_allow;
184 Set *ip_address_deny;
185 /* These two flags indicate that redundant entries have been removed from
186 * ip_address_allow/ip_address_deny, i.e. in_addr_prefixes_reduce() has already been called. */
187 bool ip_address_allow_reduced;
188 bool ip_address_deny_reduced;
6a48d82f 189
fab34748
KL
190 char **ip_filters_ingress;
191 char **ip_filters_egress;
b894ef1b 192 LIST_HEAD(CGroupBPFForeignProgram, bpf_foreign_programs);
fab34748 193
6f50d4f7
MV
194 Set *restrict_network_interfaces;
195 bool restrict_network_interfaces_is_allow_list;
196
13c31542 197 /* For legacy hierarchies */
d53d9474
LP
198 uint64_t cpu_shares;
199 uint64_t startup_cpu_shares;
8e274523 200
d53d9474
LP
201 uint64_t blockio_weight;
202 uint64_t startup_blockio_weight;
4ad49000
LP
203 LIST_HEAD(CGroupBlockIODeviceWeight, blockio_device_weights);
204 LIST_HEAD(CGroupBlockIODeviceBandwidth, blockio_device_bandwidths);
246aa6dd 205
4ad49000 206 uint64_t memory_limit;
64747e2d 207
4ad49000
LP
208 CGroupDevicePolicy device_policy;
209 LIST_HEAD(CGroupDeviceAllow, device_allow);
a931ad47 210
b18e9fc1
JK
211 LIST_HEAD(CGroupSocketBindItem, socket_bind_allow);
212 LIST_HEAD(CGroupSocketBindItem, socket_bind_deny);
213
13c31542 214 /* Common */
3a0f06c4 215 TasksMax tasks_max;
4d824a4e
AZ
216
217 /* Settings for systemd-oomd */
218 ManagedOOMMode moom_swap;
219 ManagedOOMMode moom_mem_pressure;
d9d3f05d 220 uint32_t moom_mem_pressure_limit; /* Normalized to 2^32-1 == 100% */
4e806bfa 221 ManagedOOMPreference moom_preference;
6bb00842
LP
222
223 /* Memory pressure logic */
224 CGroupPressureWatch memory_pressure_watch;
225 usec_t memory_pressure_threshold_usec;
226 /* NB: For now we don't make the period configurable, not the type, nor do we allow multiple
227 * triggers, nor triggers for non-memory pressure. We might add that later. */
dc7d69b3
TM
228
229 NFTSetContext nft_set_context;
4ad49000 230};
64747e2d 231
906c06f6
DM
232/* Used when querying IP accounting data */
233typedef enum CGroupIPAccountingMetric {
234 CGROUP_IP_INGRESS_BYTES,
235 CGROUP_IP_INGRESS_PACKETS,
236 CGROUP_IP_EGRESS_BYTES,
237 CGROUP_IP_EGRESS_PACKETS,
238 _CGROUP_IP_ACCOUNTING_METRIC_MAX,
2d93c20e 239 _CGROUP_IP_ACCOUNTING_METRIC_INVALID = -EINVAL,
906c06f6
DM
240} CGroupIPAccountingMetric;
241
fbe14fc9
LP
242/* Used when querying IO accounting data */
243typedef enum CGroupIOAccountingMetric {
244 CGROUP_IO_READ_BYTES,
245 CGROUP_IO_WRITE_BYTES,
246 CGROUP_IO_READ_OPERATIONS,
247 CGROUP_IO_WRITE_OPERATIONS,
248 _CGROUP_IO_ACCOUNTING_METRIC_MAX,
2d93c20e 249 _CGROUP_IO_ACCOUNTING_METRIC_INVALID = -EINVAL,
fbe14fc9
LP
250} CGroupIOAccountingMetric;
251
90a8f0b9
FS
252typedef struct Unit Unit;
253typedef struct Manager Manager;
8e274523 254
10f28641
FB
255usec_t cgroup_cpu_adjust_period(usec_t period, usec_t quota, usec_t resolution, usec_t max_period);
256
4ad49000
LP
257void cgroup_context_init(CGroupContext *c);
258void cgroup_context_done(CGroupContext *c);
bc0623df 259void cgroup_context_dump(Unit *u, FILE* f, const char *prefix);
b18e9fc1 260void cgroup_context_dump_socket_bind_item(const CGroupSocketBindItem *item, FILE *f);
bc432dc7 261
4ad49000 262void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a);
13c31542
TH
263void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight *w);
264void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l);
6ae4283c 265void cgroup_context_free_io_device_latency(CGroupContext *c, CGroupIODeviceLatency *l);
4ad49000
LP
266void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w);
267void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b);
b894ef1b 268void cgroup_context_remove_bpf_foreign_program(CGroupContext *c, CGroupBPFForeignProgram *p);
b18e9fc1 269void cgroup_context_remove_socket_bind(CGroupSocketBindItem **head);
8e274523 270
6bb00842
LP
271static inline bool cgroup_context_want_memory_pressure(const CGroupContext *c) {
272 assert(c);
273
274 return c->memory_pressure_watch == CGROUP_PRESSURE_WATCH_ON ||
275 (c->memory_pressure_watch == CGROUP_PRESSURE_WATCH_AUTO && c->memory_accounting);
276}
277
fd870bac 278int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode);
b894ef1b 279int cgroup_add_bpf_foreign_program(CGroupContext *c, uint32_t attach_type, const char *path);
fd870bac 280
4e806bfa 281void cgroup_oomd_xattr_apply(Unit *u, const char *cgroup_path);
523ea123 282int cgroup_log_xattr_apply(Unit *u, const char *cgroup_path);
4e806bfa 283
dc7d69b3
TM
284void cgroup_modify_nft_set(Unit *u, bool add);
285
efdb0237 286CGroupMask unit_get_own_mask(Unit *u);
02638280 287CGroupMask unit_get_delegate_mask(Unit *u);
efdb0237 288CGroupMask unit_get_members_mask(Unit *u);
02638280 289CGroupMask unit_get_siblings_mask(Unit *u);
c72703e2
CD
290CGroupMask unit_get_ancestor_disable_mask(Unit *u);
291
efdb0237
LP
292CGroupMask unit_get_target_mask(Unit *u);
293CGroupMask unit_get_enable_mask(Unit *u);
bc432dc7 294
5af88058 295void unit_invalidate_cgroup_members_masks(Unit *u);
efdb0237 296
4c591f39 297void unit_add_family_to_cgroup_realize_queue(Unit *u);
27adcc97 298
6592b975 299const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask);
1a56b0c0 300int unit_default_cgroup_path(const Unit *u, char **ret);
efdb0237 301int unit_set_cgroup_path(Unit *u, const char *path);
a4634b21 302int unit_pick_cgroup_path(Unit *u);
efdb0237 303
0a1eb06d 304int unit_realize_cgroup(Unit *u);
efdb0237
LP
305void unit_prune_cgroup(Unit *u);
306int unit_watch_cgroup(Unit *u);
afcfaa69 307int unit_watch_cgroup_memory(Unit *u);
020b2e41 308void unit_add_to_cgroup_realize_queue(Unit *u);
efdb0237 309
e08dabfe
AZ
310void unit_release_cgroup(Unit *u);
311/* Releases the cgroup only if it is recursively empty.
312 * Returns true if the cgroup was released, false otherwise. */
313bool unit_maybe_release_cgroup(Unit *u);
314
09e24654 315void unit_add_to_cgroup_empty_queue(Unit *u);
fe8d22fb 316int unit_check_oomd_kill(Unit *u);
2ba6ae6b 317int unit_check_oom(Unit *u);
09e24654 318
6592b975 319int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path);
8e274523 320
4ad49000
LP
321int manager_setup_cgroup(Manager *m);
322void manager_shutdown_cgroup(Manager *m, bool delete);
6dde1f33 323
91a6073e 324unsigned manager_dispatch_cgroup_realize_queue(Manager *m);
4fbf50b3 325
4ad49000 326Unit *manager_get_unit_by_cgroup(Manager *m, const char *cgroup);
495e75ed
LP
327Unit *manager_get_unit_by_pidref_cgroup(Manager *m, PidRef *pid);
328Unit *manager_get_unit_by_pidref_watching(Manager *m, PidRef *pid);
329Unit* manager_get_unit_by_pidref(Manager *m, PidRef *pid);
4ad49000 330Unit* manager_get_unit_by_pid(Manager *m, pid_t pid);
8e274523 331
7ad5439e 332uint64_t unit_get_ancestor_memory_min(Unit *u);
c52db42b 333uint64_t unit_get_ancestor_memory_low(Unit *u);
53fda560 334uint64_t unit_get_ancestor_startup_memory_low(Unit *u);
c52db42b 335
495e75ed 336int unit_search_main_pid(Unit *u, PidRef *ret);
efdb0237 337int unit_watch_all_pids(Unit *u);
8e274523 338
11aef522
LP
339int unit_synthesize_cgroup_empty_event(Unit *u);
340
5ad096b3 341int unit_get_memory_current(Unit *u, uint64_t *ret);
93ff34e4 342int unit_get_memory_available(Unit *u, uint64_t *ret);
03a7b521 343int unit_get_tasks_current(Unit *u, uint64_t *ret);
5ad096b3 344int unit_get_cpu_usage(Unit *u, nsec_t *ret);
fbe14fc9 345int unit_get_io_accounting(Unit *u, CGroupIOAccountingMetric metric, bool allow_cache, uint64_t *ret);
906c06f6
DM
346int unit_get_ip_accounting(Unit *u, CGroupIPAccountingMetric metric, uint64_t *ret);
347
348int unit_reset_cpu_accounting(Unit *u);
349int unit_reset_ip_accounting(Unit *u);
fbe14fc9 350int unit_reset_io_accounting(Unit *u);
9b2559a1 351int unit_reset_accounting(Unit *u);
5ad096b3 352
2e4025c0
ZJS
353#define UNIT_CGROUP_BOOL(u, name) \
354 ({ \
355 CGroupContext *cc = unit_get_cgroup_context(u); \
356 cc ? cc->name : false; \
357 })
e9db43d5 358
611c4f8a
LP
359bool manager_owns_host_root_cgroup(Manager *m);
360bool unit_has_host_root_cgroup(Unit *u);
f3725e64 361
9dfb6a3a
PM
362bool unit_has_startup_cgroup_constraints(Unit *u);
363
efdb0237
LP
364int manager_notify_cgroup_empty(Manager *m, const char *group);
365
e7ab4d1a 366void unit_invalidate_cgroup(Unit *u, CGroupMask m);
906c06f6 367void unit_invalidate_cgroup_bpf(Unit *u);
e7ab4d1a
LP
368
369void manager_invalidate_startup_units(Manager *m);
370
4ad49000
LP
371const char* cgroup_device_policy_to_string(CGroupDevicePolicy i) _const_;
372CGroupDevicePolicy cgroup_device_policy_from_string(const char *s) _pure_;
1d9cc876 373
869f52f2
DS
374void unit_cgroup_catchup(Unit *u);
375
1d9cc876 376bool unit_cgroup_delegate(Unit *u);
da8e1782
MO
377
378int compare_job_priority(const void *a, const void *b);
047f5d63
PH
379
380int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name);
d9e45bc3
MS
381int unit_cgroup_freezer_action(Unit *u, FreezerAction action);
382
383const char* freezer_action_to_string(FreezerAction a) _const_;
384FreezerAction freezer_action_from_string(const char *s) _pure_;
6bb00842
LP
385
386const char* cgroup_pressure_watch_to_string(CGroupPressureWatch a) _const_;
387CGroupPressureWatch cgroup_pressure_watch_from_string(const char *s) _pure_;