]>
Commit | Line | Data |
---|---|---|
db9ecf05 | 1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
c2f1db8f | 2 | #pragma once |
8e274523 | 3 | |
c1ff5570 TA |
4 | #include <stdbool.h> |
5 | ||
b1994387 | 6 | #include "bpf-lsm.h" |
6a48d82f | 7 | #include "cgroup-util.h" |
047f5d63 | 8 | #include "cpu-set-util.h" |
dc7d69b3 | 9 | #include "firewall-util.h" |
4ad49000 | 10 | #include "list.h" |
495e75ed | 11 | #include "pidref.h" |
c1ff5570 | 12 | #include "time-util.h" |
8e274523 | 13 | |
3a0f06c4 ZJS |
14 | typedef struct TasksMax { |
15 | /* If scale == 0, just use value; otherwise, value / scale. | |
16 | * See tasks_max_resolve(). */ | |
17 | uint64_t value; | |
18 | uint64_t scale; | |
19 | } TasksMax; | |
20 | ||
21 | #define TASKS_MAX_UNSET ((TasksMax) { .value = UINT64_MAX, .scale = 0 }) | |
22 | ||
23 | static inline bool tasks_max_isset(const TasksMax *tasks_max) { | |
24 | return tasks_max->value != UINT64_MAX || tasks_max->scale != 0; | |
25 | } | |
26 | ||
27 | uint64_t tasks_max_resolve(const TasksMax *tasks_max); | |
28 | ||
4ad49000 LP |
29 | typedef struct CGroupContext CGroupContext; |
30 | typedef struct CGroupDeviceAllow CGroupDeviceAllow; | |
13c31542 TH |
31 | typedef struct CGroupIODeviceWeight CGroupIODeviceWeight; |
32 | typedef struct CGroupIODeviceLimit CGroupIODeviceLimit; | |
6ae4283c | 33 | typedef struct CGroupIODeviceLatency CGroupIODeviceLatency; |
4ad49000 LP |
34 | typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight; |
35 | typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth; | |
b894ef1b | 36 | typedef struct CGroupBPFForeignProgram CGroupBPFForeignProgram; |
b18e9fc1 | 37 | typedef struct CGroupSocketBindItem CGroupSocketBindItem; |
8e274523 | 38 | |
4ad49000 | 39 | typedef enum CGroupDevicePolicy { |
084870f9 ZJS |
40 | /* When devices listed, will allow those, plus built-in ones, if none are listed will allow |
41 | * everything. */ | |
42 | CGROUP_DEVICE_POLICY_AUTO, | |
8e274523 | 43 | |
4ad49000 | 44 | /* Everything forbidden, except built-in ones and listed ones. */ |
084870f9 | 45 | CGROUP_DEVICE_POLICY_CLOSED, |
9d58f1db | 46 | |
5238e957 | 47 | /* Everything forbidden, except for the listed devices */ |
084870f9 | 48 | CGROUP_DEVICE_POLICY_STRICT, |
9d58f1db | 49 | |
4ad49000 | 50 | _CGROUP_DEVICE_POLICY_MAX, |
2d93c20e | 51 | _CGROUP_DEVICE_POLICY_INVALID = -EINVAL, |
4ad49000 | 52 | } CGroupDevicePolicy; |
8e274523 | 53 | |
d9e45bc3 MS |
54 | typedef enum FreezerAction { |
55 | FREEZER_FREEZE, | |
56 | FREEZER_THAW, | |
57 | ||
58 | _FREEZER_ACTION_MAX, | |
2d93c20e | 59 | _FREEZER_ACTION_INVALID = -EINVAL, |
d9e45bc3 MS |
60 | } FreezerAction; |
61 | ||
4ad49000 LP |
62 | struct CGroupDeviceAllow { |
63 | LIST_FIELDS(CGroupDeviceAllow, device_allow); | |
64 | char *path; | |
65 | bool r:1; | |
66 | bool w:1; | |
67 | bool m:1; | |
68 | }; | |
8c6db833 | 69 | |
13c31542 TH |
70 | struct CGroupIODeviceWeight { |
71 | LIST_FIELDS(CGroupIODeviceWeight, device_weights); | |
72 | char *path; | |
73 | uint64_t weight; | |
74 | }; | |
75 | ||
76 | struct CGroupIODeviceLimit { | |
77 | LIST_FIELDS(CGroupIODeviceLimit, device_limits); | |
78 | char *path; | |
9be57249 | 79 | uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX]; |
13c31542 TH |
80 | }; |
81 | ||
6ae4283c TH |
82 | struct CGroupIODeviceLatency { |
83 | LIST_FIELDS(CGroupIODeviceLatency, device_latencies); | |
84 | char *path; | |
85 | usec_t target_usec; | |
86 | }; | |
87 | ||
4ad49000 LP |
88 | struct CGroupBlockIODeviceWeight { |
89 | LIST_FIELDS(CGroupBlockIODeviceWeight, device_weights); | |
90 | char *path; | |
d53d9474 | 91 | uint64_t weight; |
8e274523 LP |
92 | }; |
93 | ||
4ad49000 LP |
94 | struct CGroupBlockIODeviceBandwidth { |
95 | LIST_FIELDS(CGroupBlockIODeviceBandwidth, device_bandwidths); | |
96 | char *path; | |
979d0311 TH |
97 | uint64_t rbps; |
98 | uint64_t wbps; | |
4ad49000 | 99 | }; |
8e274523 | 100 | |
b894ef1b JK |
101 | struct CGroupBPFForeignProgram { |
102 | LIST_FIELDS(CGroupBPFForeignProgram, programs); | |
103 | uint32_t attach_type; | |
104 | char *bpffs_path; | |
105 | }; | |
106 | ||
b18e9fc1 JK |
107 | struct CGroupSocketBindItem { |
108 | LIST_FIELDS(CGroupSocketBindItem, socket_bind_items); | |
5587ce7f JK |
109 | int address_family; |
110 | int ip_protocol; | |
b18e9fc1 JK |
111 | uint16_t nr_ports; |
112 | uint16_t port_min; | |
113 | }; | |
114 | ||
6bb00842 LP |
115 | typedef enum CGroupPressureWatch { |
116 | CGROUP_PRESSURE_WATCH_OFF, /* → tells the service payload explicitly not to watch for memory pressure */ | |
117 | CGROUP_PRESSURE_WATCH_AUTO, /* → on if memory account is on anyway for the unit, otherwise off */ | |
118 | CGROUP_PRESSURE_WATCH_ON, | |
119 | CGROUP_PRESSURE_WATCH_SKIP, /* → doesn't set up memory pressure watch, but also doesn't explicitly tell payload to avoid it */ | |
120 | _CGROUP_PRESSURE_WATCH_MAX, | |
121 | _CGROUP_PRESSURE_WATCH_INVALID = -EINVAL, | |
122 | } CGroupPressureWatch; | |
123 | ||
4ad49000 LP |
124 | struct CGroupContext { |
125 | bool cpu_accounting; | |
13c31542 | 126 | bool io_accounting; |
4ad49000 LP |
127 | bool blockio_accounting; |
128 | bool memory_accounting; | |
03a7b521 | 129 | bool tasks_accounting; |
6a48d82f | 130 | bool ip_accounting; |
8e274523 | 131 | |
afcfaa69 LP |
132 | /* Configures the memory.oom.group attribute (on unified) */ |
133 | bool memory_oom_group; | |
134 | ||
0a6991e0 LP |
135 | bool delegate; |
136 | CGroupMask delegate_controllers; | |
137 | CGroupMask disable_controllers; | |
a8b993dc | 138 | char *delegate_subgroup; |
0a6991e0 | 139 | |
13c31542 | 140 | /* For unified hierarchy */ |
66ebf6c0 TH |
141 | uint64_t cpu_weight; |
142 | uint64_t startup_cpu_weight; | |
143 | usec_t cpu_quota_per_sec_usec; | |
10f28641 | 144 | usec_t cpu_quota_period_usec; |
66ebf6c0 | 145 | |
047f5d63 | 146 | CPUSet cpuset_cpus; |
31d3a520 | 147 | CPUSet startup_cpuset_cpus; |
047f5d63 | 148 | CPUSet cpuset_mems; |
31d3a520 | 149 | CPUSet startup_cpuset_mems; |
047f5d63 | 150 | |
13c31542 TH |
151 | uint64_t io_weight; |
152 | uint64_t startup_io_weight; | |
153 | LIST_HEAD(CGroupIODeviceWeight, io_device_weights); | |
154 | LIST_HEAD(CGroupIODeviceLimit, io_device_limits); | |
6ae4283c | 155 | LIST_HEAD(CGroupIODeviceLatency, io_device_latencies); |
13c31542 | 156 | |
7ad5439e | 157 | uint64_t default_memory_min; |
c52db42b | 158 | uint64_t default_memory_low; |
53fda560 | 159 | uint64_t default_startup_memory_low; |
48422635 | 160 | uint64_t memory_min; |
da4d897e | 161 | uint64_t memory_low; |
53fda560 | 162 | uint64_t startup_memory_low; |
da4d897e | 163 | uint64_t memory_high; |
53fda560 | 164 | uint64_t startup_memory_high; |
da4d897e | 165 | uint64_t memory_max; |
53fda560 | 166 | uint64_t startup_memory_max; |
96e131ea | 167 | uint64_t memory_swap_max; |
53fda560 | 168 | uint64_t startup_memory_swap_max; |
d7fe0a67 | 169 | uint64_t memory_zswap_max; |
53fda560 | 170 | uint64_t startup_memory_zswap_max; |
da4d897e | 171 | |
60da07ec YW |
172 | bool default_memory_min_set:1; |
173 | bool default_memory_low_set:1; | |
53fda560 | 174 | bool default_startup_memory_low_set:1; |
60da07ec YW |
175 | bool memory_min_set:1; |
176 | bool memory_low_set:1; | |
53fda560 LB |
177 | bool startup_memory_low_set:1; |
178 | bool startup_memory_high_set:1; | |
179 | bool startup_memory_max_set:1; | |
180 | bool startup_memory_swap_max_set:1; | |
181 | bool startup_memory_zswap_max_set:1; | |
c52db42b | 182 | |
84ebe6f0 YW |
183 | Set *ip_address_allow; |
184 | Set *ip_address_deny; | |
185 | /* These two flags indicate that redundant entries have been removed from | |
186 | * ip_address_allow/ip_address_deny, i.e. in_addr_prefixes_reduce() has already been called. */ | |
187 | bool ip_address_allow_reduced; | |
188 | bool ip_address_deny_reduced; | |
6a48d82f | 189 | |
fab34748 KL |
190 | char **ip_filters_ingress; |
191 | char **ip_filters_egress; | |
b894ef1b | 192 | LIST_HEAD(CGroupBPFForeignProgram, bpf_foreign_programs); |
fab34748 | 193 | |
6f50d4f7 MV |
194 | Set *restrict_network_interfaces; |
195 | bool restrict_network_interfaces_is_allow_list; | |
196 | ||
13c31542 | 197 | /* For legacy hierarchies */ |
d53d9474 LP |
198 | uint64_t cpu_shares; |
199 | uint64_t startup_cpu_shares; | |
8e274523 | 200 | |
d53d9474 LP |
201 | uint64_t blockio_weight; |
202 | uint64_t startup_blockio_weight; | |
4ad49000 LP |
203 | LIST_HEAD(CGroupBlockIODeviceWeight, blockio_device_weights); |
204 | LIST_HEAD(CGroupBlockIODeviceBandwidth, blockio_device_bandwidths); | |
246aa6dd | 205 | |
4ad49000 | 206 | uint64_t memory_limit; |
64747e2d | 207 | |
4ad49000 LP |
208 | CGroupDevicePolicy device_policy; |
209 | LIST_HEAD(CGroupDeviceAllow, device_allow); | |
a931ad47 | 210 | |
b18e9fc1 JK |
211 | LIST_HEAD(CGroupSocketBindItem, socket_bind_allow); |
212 | LIST_HEAD(CGroupSocketBindItem, socket_bind_deny); | |
213 | ||
13c31542 | 214 | /* Common */ |
3a0f06c4 | 215 | TasksMax tasks_max; |
4d824a4e AZ |
216 | |
217 | /* Settings for systemd-oomd */ | |
218 | ManagedOOMMode moom_swap; | |
219 | ManagedOOMMode moom_mem_pressure; | |
d9d3f05d | 220 | uint32_t moom_mem_pressure_limit; /* Normalized to 2^32-1 == 100% */ |
4e806bfa | 221 | ManagedOOMPreference moom_preference; |
6bb00842 LP |
222 | |
223 | /* Memory pressure logic */ | |
224 | CGroupPressureWatch memory_pressure_watch; | |
225 | usec_t memory_pressure_threshold_usec; | |
226 | /* NB: For now we don't make the period configurable, not the type, nor do we allow multiple | |
227 | * triggers, nor triggers for non-memory pressure. We might add that later. */ | |
dc7d69b3 TM |
228 | |
229 | NFTSetContext nft_set_context; | |
4ad49000 | 230 | }; |
64747e2d | 231 | |
906c06f6 DM |
232 | /* Used when querying IP accounting data */ |
233 | typedef enum CGroupIPAccountingMetric { | |
234 | CGROUP_IP_INGRESS_BYTES, | |
235 | CGROUP_IP_INGRESS_PACKETS, | |
236 | CGROUP_IP_EGRESS_BYTES, | |
237 | CGROUP_IP_EGRESS_PACKETS, | |
238 | _CGROUP_IP_ACCOUNTING_METRIC_MAX, | |
2d93c20e | 239 | _CGROUP_IP_ACCOUNTING_METRIC_INVALID = -EINVAL, |
906c06f6 DM |
240 | } CGroupIPAccountingMetric; |
241 | ||
fbe14fc9 LP |
242 | /* Used when querying IO accounting data */ |
243 | typedef enum CGroupIOAccountingMetric { | |
244 | CGROUP_IO_READ_BYTES, | |
245 | CGROUP_IO_WRITE_BYTES, | |
246 | CGROUP_IO_READ_OPERATIONS, | |
247 | CGROUP_IO_WRITE_OPERATIONS, | |
248 | _CGROUP_IO_ACCOUNTING_METRIC_MAX, | |
2d93c20e | 249 | _CGROUP_IO_ACCOUNTING_METRIC_INVALID = -EINVAL, |
fbe14fc9 LP |
250 | } CGroupIOAccountingMetric; |
251 | ||
90a8f0b9 FS |
252 | typedef struct Unit Unit; |
253 | typedef struct Manager Manager; | |
8e274523 | 254 | |
10f28641 FB |
255 | usec_t cgroup_cpu_adjust_period(usec_t period, usec_t quota, usec_t resolution, usec_t max_period); |
256 | ||
4ad49000 LP |
257 | void cgroup_context_init(CGroupContext *c); |
258 | void cgroup_context_done(CGroupContext *c); | |
bc0623df | 259 | void cgroup_context_dump(Unit *u, FILE* f, const char *prefix); |
b18e9fc1 | 260 | void cgroup_context_dump_socket_bind_item(const CGroupSocketBindItem *item, FILE *f); |
bc432dc7 | 261 | |
4ad49000 | 262 | void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a); |
13c31542 TH |
263 | void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight *w); |
264 | void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l); | |
6ae4283c | 265 | void cgroup_context_free_io_device_latency(CGroupContext *c, CGroupIODeviceLatency *l); |
4ad49000 LP |
266 | void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w); |
267 | void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b); | |
b894ef1b | 268 | void cgroup_context_remove_bpf_foreign_program(CGroupContext *c, CGroupBPFForeignProgram *p); |
b18e9fc1 | 269 | void cgroup_context_remove_socket_bind(CGroupSocketBindItem **head); |
8e274523 | 270 | |
6bb00842 LP |
271 | static inline bool cgroup_context_want_memory_pressure(const CGroupContext *c) { |
272 | assert(c); | |
273 | ||
274 | return c->memory_pressure_watch == CGROUP_PRESSURE_WATCH_ON || | |
275 | (c->memory_pressure_watch == CGROUP_PRESSURE_WATCH_AUTO && c->memory_accounting); | |
276 | } | |
277 | ||
fd870bac | 278 | int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode); |
b894ef1b | 279 | int cgroup_add_bpf_foreign_program(CGroupContext *c, uint32_t attach_type, const char *path); |
fd870bac | 280 | |
4e806bfa | 281 | void cgroup_oomd_xattr_apply(Unit *u, const char *cgroup_path); |
523ea123 | 282 | int cgroup_log_xattr_apply(Unit *u, const char *cgroup_path); |
4e806bfa | 283 | |
dc7d69b3 TM |
284 | void cgroup_modify_nft_set(Unit *u, bool add); |
285 | ||
efdb0237 | 286 | CGroupMask unit_get_own_mask(Unit *u); |
02638280 | 287 | CGroupMask unit_get_delegate_mask(Unit *u); |
efdb0237 | 288 | CGroupMask unit_get_members_mask(Unit *u); |
02638280 | 289 | CGroupMask unit_get_siblings_mask(Unit *u); |
c72703e2 CD |
290 | CGroupMask unit_get_ancestor_disable_mask(Unit *u); |
291 | ||
efdb0237 LP |
292 | CGroupMask unit_get_target_mask(Unit *u); |
293 | CGroupMask unit_get_enable_mask(Unit *u); | |
bc432dc7 | 294 | |
5af88058 | 295 | void unit_invalidate_cgroup_members_masks(Unit *u); |
efdb0237 | 296 | |
4c591f39 | 297 | void unit_add_family_to_cgroup_realize_queue(Unit *u); |
27adcc97 | 298 | |
6592b975 | 299 | const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask); |
1a56b0c0 | 300 | int unit_default_cgroup_path(const Unit *u, char **ret); |
efdb0237 | 301 | int unit_set_cgroup_path(Unit *u, const char *path); |
a4634b21 | 302 | int unit_pick_cgroup_path(Unit *u); |
efdb0237 | 303 | |
0a1eb06d | 304 | int unit_realize_cgroup(Unit *u); |
efdb0237 LP |
305 | void unit_prune_cgroup(Unit *u); |
306 | int unit_watch_cgroup(Unit *u); | |
afcfaa69 | 307 | int unit_watch_cgroup_memory(Unit *u); |
020b2e41 | 308 | void unit_add_to_cgroup_realize_queue(Unit *u); |
efdb0237 | 309 | |
e08dabfe AZ |
310 | void unit_release_cgroup(Unit *u); |
311 | /* Releases the cgroup only if it is recursively empty. | |
312 | * Returns true if the cgroup was released, false otherwise. */ | |
313 | bool unit_maybe_release_cgroup(Unit *u); | |
314 | ||
09e24654 | 315 | void unit_add_to_cgroup_empty_queue(Unit *u); |
fe8d22fb | 316 | int unit_check_oomd_kill(Unit *u); |
2ba6ae6b | 317 | int unit_check_oom(Unit *u); |
09e24654 | 318 | |
6592b975 | 319 | int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path); |
8e274523 | 320 | |
4ad49000 LP |
321 | int manager_setup_cgroup(Manager *m); |
322 | void manager_shutdown_cgroup(Manager *m, bool delete); | |
6dde1f33 | 323 | |
91a6073e | 324 | unsigned manager_dispatch_cgroup_realize_queue(Manager *m); |
4fbf50b3 | 325 | |
4ad49000 | 326 | Unit *manager_get_unit_by_cgroup(Manager *m, const char *cgroup); |
495e75ed LP |
327 | Unit *manager_get_unit_by_pidref_cgroup(Manager *m, PidRef *pid); |
328 | Unit *manager_get_unit_by_pidref_watching(Manager *m, PidRef *pid); | |
329 | Unit* manager_get_unit_by_pidref(Manager *m, PidRef *pid); | |
4ad49000 | 330 | Unit* manager_get_unit_by_pid(Manager *m, pid_t pid); |
8e274523 | 331 | |
7ad5439e | 332 | uint64_t unit_get_ancestor_memory_min(Unit *u); |
c52db42b | 333 | uint64_t unit_get_ancestor_memory_low(Unit *u); |
53fda560 | 334 | uint64_t unit_get_ancestor_startup_memory_low(Unit *u); |
c52db42b | 335 | |
495e75ed | 336 | int unit_search_main_pid(Unit *u, PidRef *ret); |
efdb0237 | 337 | int unit_watch_all_pids(Unit *u); |
8e274523 | 338 | |
11aef522 LP |
339 | int unit_synthesize_cgroup_empty_event(Unit *u); |
340 | ||
5ad096b3 | 341 | int unit_get_memory_current(Unit *u, uint64_t *ret); |
93ff34e4 | 342 | int unit_get_memory_available(Unit *u, uint64_t *ret); |
03a7b521 | 343 | int unit_get_tasks_current(Unit *u, uint64_t *ret); |
5ad096b3 | 344 | int unit_get_cpu_usage(Unit *u, nsec_t *ret); |
fbe14fc9 | 345 | int unit_get_io_accounting(Unit *u, CGroupIOAccountingMetric metric, bool allow_cache, uint64_t *ret); |
906c06f6 DM |
346 | int unit_get_ip_accounting(Unit *u, CGroupIPAccountingMetric metric, uint64_t *ret); |
347 | ||
348 | int unit_reset_cpu_accounting(Unit *u); | |
349 | int unit_reset_ip_accounting(Unit *u); | |
fbe14fc9 | 350 | int unit_reset_io_accounting(Unit *u); |
9b2559a1 | 351 | int unit_reset_accounting(Unit *u); |
5ad096b3 | 352 | |
2e4025c0 ZJS |
353 | #define UNIT_CGROUP_BOOL(u, name) \ |
354 | ({ \ | |
355 | CGroupContext *cc = unit_get_cgroup_context(u); \ | |
356 | cc ? cc->name : false; \ | |
357 | }) | |
e9db43d5 | 358 | |
611c4f8a LP |
359 | bool manager_owns_host_root_cgroup(Manager *m); |
360 | bool unit_has_host_root_cgroup(Unit *u); | |
f3725e64 | 361 | |
9dfb6a3a PM |
362 | bool unit_has_startup_cgroup_constraints(Unit *u); |
363 | ||
efdb0237 LP |
364 | int manager_notify_cgroup_empty(Manager *m, const char *group); |
365 | ||
e7ab4d1a | 366 | void unit_invalidate_cgroup(Unit *u, CGroupMask m); |
906c06f6 | 367 | void unit_invalidate_cgroup_bpf(Unit *u); |
e7ab4d1a LP |
368 | |
369 | void manager_invalidate_startup_units(Manager *m); | |
370 | ||
4ad49000 LP |
371 | const char* cgroup_device_policy_to_string(CGroupDevicePolicy i) _const_; |
372 | CGroupDevicePolicy cgroup_device_policy_from_string(const char *s) _pure_; | |
1d9cc876 | 373 | |
869f52f2 DS |
374 | void unit_cgroup_catchup(Unit *u); |
375 | ||
1d9cc876 | 376 | bool unit_cgroup_delegate(Unit *u); |
da8e1782 MO |
377 | |
378 | int compare_job_priority(const void *a, const void *b); | |
047f5d63 PH |
379 | |
380 | int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name); | |
d9e45bc3 MS |
381 | int unit_cgroup_freezer_action(Unit *u, FreezerAction action); |
382 | ||
383 | const char* freezer_action_to_string(FreezerAction a) _const_; | |
384 | FreezerAction freezer_action_from_string(const char *s) _pure_; | |
6bb00842 LP |
385 | |
386 | const char* cgroup_pressure_watch_to_string(CGroupPressureWatch a) _const_; | |
387 | CGroupPressureWatch cgroup_pressure_watch_from_string(const char *s) _pure_; |