1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
11 #include "cgroup-util.h"
17 #include "ratelimit.h"
20 struct libmnt_monitor
;
21 typedef struct Unit Unit
;
23 /* Enforce upper limit how many names we allow */
24 #define MANAGER_MAX_NAMES 131072 /* 128K */
26 typedef struct Manager Manager
;
28 /* An externally visible state. We don't actually maintain this as state variable, but derive it from various fields
30 typedef enum ManagerState
{
38 _MANAGER_STATE_INVALID
= -EINVAL
,
41 typedef enum ManagerObjective
{
51 _MANAGER_OBJECTIVE_MAX
,
52 _MANAGER_OBJECTIVE_INVALID
= -EINVAL
,
55 typedef enum StatusType
{
56 STATUS_TYPE_EPHEMERAL
,
59 STATUS_TYPE_EMERGENCY
,
62 typedef enum OOMPolicy
{
63 OOM_CONTINUE
, /* The kernel or systemd-oomd kills the process it wants to kill, and that's it */
64 OOM_STOP
, /* The kernel or systemd-oomd kills the process it wants to kill, and we stop the unit */
65 OOM_KILL
, /* The kernel or systemd-oomd kills the process it wants to kill, and all others in the unit, and we stop the unit */
67 _OOM_POLICY_INVALID
= -EINVAL
,
71 * 1. TIMESTAMP_FIRMWARE, TIMESTAMP_LOADER, TIMESTAMP_KERNEL, TIMESTAMP_INITRD,
72 * TIMESTAMP_SECURITY_START, and TIMESTAMP_SECURITY_FINISH are set only when
73 * the manager is system and not running under container environment.
75 * 2. The monotonic timestamp of TIMESTAMP_KERNEL is always zero.
77 * 3. The realtime timestamp of TIMESTAMP_KERNEL will be unset if the system does not
80 * 4. TIMESTAMP_FIRMWARE and TIMESTAMP_LOADER will be unset if the system does not
81 * have RTC, or systemd is built without EFI support.
83 * 5. The monotonic timestamps of TIMESTAMP_FIRMWARE and TIMESTAMP_LOADER are stored as
84 * negative of the actual value.
86 * 6. TIMESTAMP_USERSPACE is the timestamp of when the manager was started.
88 * 7. TIMESTAMP_INITRD_* are set only when the system is booted with an initrd.
91 typedef enum ManagerTimestamp
{
92 MANAGER_TIMESTAMP_FIRMWARE
,
93 MANAGER_TIMESTAMP_LOADER
,
94 MANAGER_TIMESTAMP_KERNEL
,
95 MANAGER_TIMESTAMP_INITRD
,
96 MANAGER_TIMESTAMP_USERSPACE
,
97 MANAGER_TIMESTAMP_FINISH
,
99 MANAGER_TIMESTAMP_SECURITY_START
,
100 MANAGER_TIMESTAMP_SECURITY_FINISH
,
101 MANAGER_TIMESTAMP_GENERATORS_START
,
102 MANAGER_TIMESTAMP_GENERATORS_FINISH
,
103 MANAGER_TIMESTAMP_UNITS_LOAD_START
,
104 MANAGER_TIMESTAMP_UNITS_LOAD_FINISH
,
105 MANAGER_TIMESTAMP_UNITS_LOAD
,
107 MANAGER_TIMESTAMP_INITRD_SECURITY_START
,
108 MANAGER_TIMESTAMP_INITRD_SECURITY_FINISH
,
109 MANAGER_TIMESTAMP_INITRD_GENERATORS_START
,
110 MANAGER_TIMESTAMP_INITRD_GENERATORS_FINISH
,
111 MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_START
,
112 MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_FINISH
,
113 _MANAGER_TIMESTAMP_MAX
,
114 _MANAGER_TIMESTAMP_INVALID
= -EINVAL
,
117 typedef enum WatchdogType
{
127 #include "path-lookup.h"
128 #include "show-status.h"
129 #include "unit-name.h"
131 typedef enum ManagerTestRunFlags
{
132 MANAGER_TEST_NORMAL
= 0, /* run normally */
133 MANAGER_TEST_RUN_MINIMAL
= 1 << 0, /* create basic data structures */
134 MANAGER_TEST_RUN_BASIC
= 1 << 1, /* interact with the environment */
135 MANAGER_TEST_RUN_ENV_GENERATORS
= 1 << 2, /* also run env generators */
136 MANAGER_TEST_RUN_GENERATORS
= 1 << 3, /* also run unit generators */
137 MANAGER_TEST_RUN_IGNORE_DEPENDENCIES
= 1 << 4, /* run while ignoring dependencies */
138 MANAGER_TEST_FULL
= MANAGER_TEST_RUN_BASIC
| MANAGER_TEST_RUN_ENV_GENERATORS
| MANAGER_TEST_RUN_GENERATORS
,
139 } ManagerTestRunFlags
;
141 assert_cc((MANAGER_TEST_FULL
& UINT8_MAX
) == MANAGER_TEST_FULL
);
144 /* Note that the set of units we know of is allowed to be
145 * inconsistent. However the subset of it that is loaded may
146 * not, and the list of jobs may neither. */
148 /* Active jobs and units */
149 Hashmap
*units
; /* name string => Unit object n:1 */
150 Hashmap
*units_by_invocation_id
;
151 Hashmap
*jobs
; /* job id => Job object 1:1 */
153 /* To make it easy to iterate through the units of a specific
154 * type we maintain a per type linked list */
155 LIST_HEAD(Unit
, units_by_type
[_UNIT_TYPE_MAX
]);
157 /* Units that need to be loaded */
158 LIST_HEAD(Unit
, load_queue
); /* this is actually more a stack than a queue, but uh. */
160 /* Jobs that need to be run */
161 struct Prioq
*run_queue
;
163 /* Units and jobs that have not yet been announced via
164 * D-Bus. When something about a job changes it is added here
165 * if it is not in there yet. This allows easy coalescing of
166 * D-Bus change signals. */
167 LIST_HEAD(Unit
, dbus_unit_queue
);
168 LIST_HEAD(Job
, dbus_job_queue
);
170 /* Units to remove */
171 LIST_HEAD(Unit
, cleanup_queue
);
173 /* Units and jobs to check when doing GC */
174 LIST_HEAD(Unit
, gc_unit_queue
);
175 LIST_HEAD(Job
, gc_job_queue
);
177 /* Units that should be realized */
178 LIST_HEAD(Unit
, cgroup_realize_queue
);
180 /* Units whose cgroup ran empty */
181 LIST_HEAD(Unit
, cgroup_empty_queue
);
183 /* Units whose memory.event fired */
184 LIST_HEAD(Unit
, cgroup_oom_queue
);
186 /* Target units whose default target dependencies haven't been set yet */
187 LIST_HEAD(Unit
, target_deps_queue
);
189 /* Units that might be subject to StopWhenUnneeded= clean-up */
190 LIST_HEAD(Unit
, stop_when_unneeded_queue
);
192 /* Units which are upheld by another other which we might need to act on */
193 LIST_HEAD(Unit
, start_when_upheld_queue
);
195 /* Units that have BindsTo= another unit, and might need to be shutdown because the bound unit is not active. */
196 LIST_HEAD(Unit
, stop_when_bound_queue
);
200 /* This maps PIDs we care about to units that are interested in. We allow multiple units to be interested in
201 * the same PID and multiple PIDs to be relevant to the same unit. Since in most cases only a single unit will
202 * be interested in the same PID we use a somewhat special encoding here: the first unit interested in a PID is
203 * stored directly in the hashmap, keyed by the PID unmodified. If there are other units interested too they'll
204 * be stored in a NULL-terminated array, and keyed by the negative PID. This is safe as pid_t is signed and
205 * negative PIDs are not used for regular processes but process groups, which we don't care about in this
206 * context, but this allows us to use the negative range for our own purposes. */
207 Hashmap
*watch_pids
; /* pid => unit as well as -pid => array of units */
209 /* A set contains all units which cgroup should be refreshed after startup */
212 /* A set which contains all currently failed units */
215 sd_event_source
*run_queue_event_source
;
219 sd_event_source
*notify_event_source
;
221 int cgroups_agent_fd
;
222 sd_event_source
*cgroups_agent_event_source
;
225 sd_event_source
*signal_event_source
;
227 sd_event_source
*sigchld_event_source
;
229 sd_event_source
*time_change_event_source
;
231 sd_event_source
*timezone_change_event_source
;
233 sd_event_source
*jobs_in_progress_event_source
;
235 int user_lookup_fds
[2];
236 sd_event_source
*user_lookup_event_source
;
238 RuntimeScope runtime_scope
;
240 LookupPaths lookup_paths
;
241 Hashmap
*unit_id_map
;
242 Hashmap
*unit_name_map
;
243 Set
*unit_path_cache
;
244 uint64_t unit_cache_timestamp_hash
;
246 char **transient_environment
; /* The environment, as determined from config files, kernel cmdline and environment generators */
247 char **client_environment
; /* Environment variables created by clients through the bus API */
249 usec_t watchdog
[_WATCHDOG_TYPE_MAX
];
250 usec_t watchdog_overridden
[_WATCHDOG_TYPE_MAX
];
251 char *watchdog_pretimeout_governor
;
252 char *watchdog_pretimeout_governor_overridden
;
254 dual_timestamp timestamps
[_MANAGER_TIMESTAMP_MAX
];
256 /* Data specific to the device subsystem */
257 sd_device_monitor
*device_monitor
;
258 Hashmap
*devices_by_sysfs
;
260 /* Data specific to the mount subsystem */
261 struct libmnt_monitor
*mount_monitor
;
262 sd_event_source
*mount_event_source
;
264 /* Data specific to the swap filesystem */
266 sd_event_source
*swap_event_source
;
267 Hashmap
*swaps_by_devnode
;
269 /* Data specific to the D-Bus subsystem */
270 sd_bus
*api_bus
, *system_bus
;
272 int private_listen_fd
;
273 sd_event_source
*private_listen_event_source
;
275 /* Contains all the clients that are subscribed to signals via
276 the API bus. Note that private bus connections are always
277 considered subscribes, since they last for very short only,
278 and it is much simpler that way. */
279 sd_bus_track
*subscribed
;
280 char **deserialized_subscribed
;
282 /* This is used during reloading: before the reload we queue
283 * the reply message here, and afterwards we send it */
284 sd_bus_message
*pending_reload_message
;
286 Hashmap
*watch_bus
; /* D-Bus names => Unit object n:1 */
288 bool send_reloading_done
;
290 uint32_t current_job_id
;
291 uint32_t default_unit_job_id
;
293 /* Data specific to the Automount subsystem */
296 /* Data specific to the cgroup subsystem */
297 Hashmap
*cgroup_unit
;
298 CGroupMask cgroup_supported
;
301 /* Notifications from cgroups, when the unified hierarchy is used is done via inotify. */
302 int cgroup_inotify_fd
;
303 sd_event_source
*cgroup_inotify_event_source
;
305 /* Maps for finding the unit for each inotify watch descriptor for the cgroup.events and
306 * memory.events cgroupv2 attributes. */
307 Hashmap
*cgroup_control_inotify_wd_unit
;
308 Hashmap
*cgroup_memory_inotify_wd_unit
;
310 /* A defer event for handling cgroup empty events and processing them after SIGCHLD in all cases. */
311 sd_event_source
*cgroup_empty_event_source
;
312 sd_event_source
*cgroup_oom_event_source
;
314 /* Make sure the user cannot accidentally unmount our cgroup
320 /* The stat() data the last time we saw /etc/localtime */
321 usec_t etc_localtime_mtime
;
322 bool etc_localtime_accessible
;
324 ManagerObjective objective
;
327 bool dispatching_load_queue
;
331 /* Have we already sent out the READY=1 notification? */
334 /* Was the last status sent "STATUS=Ready."? */
337 /* Have we already printed the taint line if necessary? */
340 /* Have we ever changed the "kernel.pid_max" sysctl? */
341 bool sysctl_pid_max_changed
;
343 ManagerTestRunFlags test_run_flags
;
345 /* If non-zero, exit with the following value when the systemd
346 * process terminate. Useful for containers: systemd-nspawn could get
347 * the return value. */
348 uint8_t return_value
;
350 ShowStatus show_status
;
351 ShowStatus show_status_overridden
;
352 StatusUnitFormat status_unit_format
;
354 bool no_console_output
;
355 bool service_watchdogs
;
357 ExecOutput default_std_output
, default_std_error
;
359 usec_t default_restart_usec
, default_timeout_start_usec
, default_timeout_stop_usec
;
360 usec_t default_device_timeout_usec
;
361 usec_t default_timeout_abort_usec
;
362 bool default_timeout_abort_set
;
364 usec_t default_start_limit_interval
;
365 unsigned default_start_limit_burst
;
367 bool default_cpu_accounting
;
368 bool default_memory_accounting
;
369 bool default_io_accounting
;
370 bool default_blockio_accounting
;
371 bool default_tasks_accounting
;
372 bool default_ip_accounting
;
374 TasksMax default_tasks_max
;
375 usec_t default_timer_accuracy_usec
;
377 OOMPolicy default_oom_policy
;
378 int default_oom_score_adjust
;
379 bool default_oom_score_adjust_set
;
381 CGroupPressureWatch default_memory_pressure_watch
;
382 usec_t default_memory_pressure_threshold_usec
;
384 int original_log_level
;
385 LogTarget original_log_target
;
386 bool log_level_overridden
;
387 bool log_target_overridden
;
389 struct rlimit
*rlimit
[_RLIMIT_MAX
];
391 /* non-zero if we are reloading or reexecuting, */
394 unsigned n_installed_jobs
;
395 unsigned n_failed_jobs
;
397 /* Jobs in progress watching */
398 unsigned n_running_jobs
;
399 unsigned n_on_console
;
400 unsigned jobs_in_progress_iteration
;
402 /* Do we have any outstanding password prompts? */
403 int have_ask_password
;
404 int ask_password_inotify_fd
;
405 sd_event_source
*ask_password_event_source
;
407 /* Type=idle pipes */
409 sd_event_source
*idle_pipe_event_source
;
412 char *switch_root_init
;
414 /* This is true before and after switching root. */
417 /* This maps all possible path prefixes to the units needing
418 * them. It's a hashmap with a path string as key and a Set as
419 * value where Unit objects are contained. */
420 Hashmap
*units_requiring_mounts_for
;
422 /* Used for processing polkit authorization responses */
423 Hashmap
*polkit_registry
;
425 /* Dynamic users/groups, indexed by their name */
426 Hashmap
*dynamic_users
;
428 /* Keep track of all UIDs and GIDs any of our services currently use. This is useful for the RemoveIPC= logic. */
432 /* ExecSharedRuntime, indexed by their owner unit id */
433 Hashmap
*exec_shared_runtime_by_id
;
435 /* When the user hits C-A-D more than 7 times per 2s, do something immediately... */
436 RateLimit ctrl_alt_del_ratelimit
;
437 EmergencyAction cad_burst_action
;
439 const char *unit_log_field
;
440 const char *unit_log_format_string
;
442 const char *invocation_log_field
;
443 const char *invocation_log_format_string
;
445 int first_boot
; /* tri-state */
447 /* Prefixes of e.g. RuntimeDirectory= */
448 char *prefix
[_EXEC_DIRECTORY_TYPE_MAX
];
449 char *received_credentials_directory
;
450 char *received_encrypted_credentials_directory
;
452 /* Used in the SIGCHLD and sd_notify() message invocation logic to avoid that we dispatch the same event
453 * multiple times on the same unit. */
457 VarlinkServer
*varlink_server
;
458 /* When we're a system manager, this object manages the subscription from systemd-oomd to PID1 that's
459 * used to report changes in ManagedOOM settings (systemd server - oomd client). When
460 * we're a user manager, this object manages the client connection from the user manager to
461 * systemd-oomd to report changes in ManagedOOM settings (systemd client - oomd server). */
462 Varlink
*managed_oom_varlink
;
464 /* Reference to RestrictFileSystems= BPF program */
465 struct restrict_fs_bpf
*restrict_fs
;
467 char *default_smack_process_label
;
469 /* Allow users to configure a rate limit for Reload() operations */
470 RateLimit reload_ratelimit
;
472 sd_event_source
*memory_pressure_event_source
;
475 static inline usec_t
manager_default_timeout_abort_usec(Manager
*m
) {
477 return m
->default_timeout_abort_set
? m
->default_timeout_abort_usec
: m
->default_timeout_stop_usec
;
480 #define MANAGER_IS_SYSTEM(m) ((m)->runtime_scope == RUNTIME_SCOPE_SYSTEM)
481 #define MANAGER_IS_USER(m) ((m)->runtime_scope == RUNTIME_SCOPE_USER)
483 #define MANAGER_IS_RELOADING(m) ((m)->n_reloading > 0)
485 #define MANAGER_IS_FINISHED(m) (dual_timestamp_is_set((m)->timestamps + MANAGER_TIMESTAMP_FINISH))
487 /* The objective is set to OK as soon as we enter the main loop, and set otherwise as soon as we are done with it */
488 #define MANAGER_IS_RUNNING(m) ((m)->objective == MANAGER_OK)
490 #define MANAGER_IS_SWITCHING_ROOT(m) ((m)->switching_root)
492 #define MANAGER_IS_TEST_RUN(m) ((m)->test_run_flags != 0)
494 static inline usec_t
manager_default_timeout(RuntimeScope scope
) {
495 return scope
== RUNTIME_SCOPE_SYSTEM
? DEFAULT_TIMEOUT_USEC
: DEFAULT_USER_TIMEOUT_USEC
;
498 int manager_new(RuntimeScope scope
, ManagerTestRunFlags test_run_flags
, Manager
**m
);
499 Manager
* manager_free(Manager
*m
);
500 DEFINE_TRIVIAL_CLEANUP_FUNC(Manager
*, manager_free
);
502 int manager_startup(Manager
*m
, FILE *serialization
, FDSet
*fds
, const char *root
);
504 Job
*manager_get_job(Manager
*m
, uint32_t id
);
505 Unit
*manager_get_unit(Manager
*m
, const char *name
);
507 int manager_get_job_from_dbus_path(Manager
*m
, const char *s
, Job
**_j
);
509 bool manager_unit_cache_should_retry_load(Unit
*u
);
510 int manager_load_unit_prepare(Manager
*m
, const char *name
, const char *path
, sd_bus_error
*e
, Unit
**ret
);
511 int manager_load_unit(Manager
*m
, const char *name
, const char *path
, sd_bus_error
*e
, Unit
**ret
);
512 int manager_load_startable_unit_or_warn(Manager
*m
, const char *name
, const char *path
, Unit
**ret
);
513 int manager_load_unit_from_dbus_path(Manager
*m
, const char *s
, sd_bus_error
*e
, Unit
**_u
);
515 int manager_add_job(Manager
*m
, JobType type
, Unit
*unit
, JobMode mode
, Set
*affected_jobs
, sd_bus_error
*e
, Job
**_ret
);
516 int manager_add_job_by_name(Manager
*m
, JobType type
, const char *name
, JobMode mode
, Set
*affected_jobs
, sd_bus_error
*e
, Job
**_ret
);
517 int manager_add_job_by_name_and_warn(Manager
*m
, JobType type
, const char *name
, JobMode mode
, Set
*affected_jobs
, Job
**ret
);
518 int manager_propagate_reload(Manager
*m
, Unit
*unit
, JobMode mode
, sd_bus_error
*e
);
520 void manager_clear_jobs(Manager
*m
);
522 void manager_unwatch_pid(Manager
*m
, pid_t pid
);
524 unsigned manager_dispatch_load_queue(Manager
*m
);
526 int manager_setup_memory_pressure_event_source(Manager
*m
);
528 int manager_default_environment(Manager
*m
);
529 int manager_transient_environment_add(Manager
*m
, char **plus
);
530 int manager_client_environment_modify(Manager
*m
, char **minus
, char **plus
);
531 int manager_get_effective_environment(Manager
*m
, char ***ret
);
533 int manager_set_default_smack_process_label(Manager
*m
, const char *label
);
535 int manager_set_default_rlimits(Manager
*m
, struct rlimit
**default_rlimit
);
537 void manager_trigger_run_queue(Manager
*m
);
539 int manager_loop(Manager
*m
);
541 int manager_reload(Manager
*m
);
542 Manager
* manager_reloading_start(Manager
*m
);
543 void manager_reloading_stopp(Manager
**m
);
545 void manager_reset_failed(Manager
*m
);
547 void manager_send_unit_audit(Manager
*m
, Unit
*u
, int type
, bool success
);
548 void manager_send_unit_plymouth(Manager
*m
, Unit
*u
);
550 bool manager_unit_inactive_or_pending(Manager
*m
, const char *name
);
552 void manager_check_finished(Manager
*m
);
553 void manager_send_reloading(Manager
*m
);
555 void disable_printk_ratelimit(void);
556 void manager_recheck_dbus(Manager
*m
);
557 void manager_recheck_journal(Manager
*m
);
559 bool manager_get_show_status_on(Manager
*m
);
560 void manager_set_show_status(Manager
*m
, ShowStatus mode
, const char *reason
);
561 void manager_override_show_status(Manager
*m
, ShowStatus mode
, const char *reason
);
563 void manager_set_first_boot(Manager
*m
, bool b
);
564 void manager_set_switching_root(Manager
*m
, bool switching_root
);
566 void manager_status_printf(Manager
*m
, StatusType type
, const char *status
, const char *format
, ...) _printf_(4,5);
568 Set
*manager_get_units_requiring_mounts_for(Manager
*m
, const char *path
);
570 ManagerState
manager_state(Manager
*m
);
572 int manager_update_failed_units(Manager
*m
, Unit
*u
, bool failed
);
574 void manager_unref_uid(Manager
*m
, uid_t uid
, bool destroy_now
);
575 int manager_ref_uid(Manager
*m
, uid_t uid
, bool clean_ipc
);
577 void manager_unref_gid(Manager
*m
, gid_t gid
, bool destroy_now
);
578 int manager_ref_gid(Manager
*m
, gid_t gid
, bool clean_ipc
);
580 char* manager_taint_string(const Manager
*m
);
582 void manager_ref_console(Manager
*m
);
583 void manager_unref_console(Manager
*m
);
585 void manager_override_log_level(Manager
*m
, int level
);
586 void manager_restore_original_log_level(Manager
*m
);
588 void manager_override_log_target(Manager
*m
, LogTarget target
);
589 void manager_restore_original_log_target(Manager
*m
);
591 const char *manager_state_to_string(ManagerState m
) _const_
;
592 ManagerState
manager_state_from_string(const char *s
) _pure_
;
594 const char *manager_get_confirm_spawn(Manager
*m
);
595 bool manager_is_confirm_spawn_disabled(Manager
*m
);
596 void manager_disable_confirm_spawn(void);
598 const char *manager_timestamp_to_string(ManagerTimestamp m
) _const_
;
599 ManagerTimestamp
manager_timestamp_from_string(const char *s
) _pure_
;
600 ManagerTimestamp
manager_timestamp_initrd_mangle(ManagerTimestamp s
);
602 usec_t
manager_get_watchdog(Manager
*m
, WatchdogType t
);
603 void manager_set_watchdog(Manager
*m
, WatchdogType t
, usec_t timeout
);
604 void manager_override_watchdog(Manager
*m
, WatchdogType t
, usec_t timeout
);
605 int manager_set_watchdog_pretimeout_governor(Manager
*m
, const char *governor
);
606 int manager_override_watchdog_pretimeout_governor(Manager
*m
, const char *governor
);
608 const char* oom_policy_to_string(OOMPolicy i
) _const_
;
609 OOMPolicy
oom_policy_from_string(const char *s
) _pure_
;