1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
11 #include "cgroup-util.h"
17 #include "ratelimit.h"
20 struct libmnt_monitor
;
21 typedef struct Unit Unit
;
23 /* Enforce upper limit how many names we allow */
24 #define MANAGER_MAX_NAMES 131072 /* 128K */
26 typedef struct Manager Manager
;
28 /* An externally visible state. We don't actually maintain this as state variable, but derive it from various fields
30 typedef enum ManagerState
{
38 _MANAGER_STATE_INVALID
= -EINVAL
,
41 typedef enum ManagerObjective
{
51 _MANAGER_OBJECTIVE_MAX
,
52 _MANAGER_OBJECTIVE_INVALID
= -EINVAL
,
55 typedef enum StatusType
{
56 STATUS_TYPE_EPHEMERAL
,
59 STATUS_TYPE_EMERGENCY
,
62 typedef enum OOMPolicy
{
63 OOM_CONTINUE
, /* The kernel or systemd-oomd kills the process it wants to kill, and that's it */
64 OOM_STOP
, /* The kernel or systemd-oomd kills the process it wants to kill, and we stop the unit */
65 OOM_KILL
, /* The kernel or systemd-oomd kills the process it wants to kill, and all others in the unit, and we stop the unit */
67 _OOM_POLICY_INVALID
= -EINVAL
,
71 * 1. TIMESTAMP_FIRMWARE, TIMESTAMP_LOADER, TIMESTAMP_KERNEL, TIMESTAMP_INITRD,
72 * TIMESTAMP_SECURITY_START, and TIMESTAMP_SECURITY_FINISH are set only when
73 * the manager is system and not running under container environment.
75 * 2. The monotonic timestamp of TIMESTAMP_KERNEL is always zero.
77 * 3. The realtime timestamp of TIMESTAMP_KERNEL will be unset if the system does not
80 * 4. TIMESTAMP_FIRMWARE and TIMESTAMP_LOADER will be unset if the system does not
81 * have RTC, or systemd is built without EFI support.
83 * 5. The monotonic timestamps of TIMESTAMP_FIRMWARE and TIMESTAMP_LOADER are stored as
84 * negative of the actual value.
86 * 6. TIMESTAMP_USERSPACE is the timestamp of when the manager was started.
88 * 7. TIMESTAMP_INITRD_* are set only when the system is booted with an initrd.
91 typedef enum ManagerTimestamp
{
92 MANAGER_TIMESTAMP_FIRMWARE
,
93 MANAGER_TIMESTAMP_LOADER
,
94 MANAGER_TIMESTAMP_KERNEL
,
95 MANAGER_TIMESTAMP_INITRD
,
96 MANAGER_TIMESTAMP_USERSPACE
,
97 MANAGER_TIMESTAMP_FINISH
,
99 MANAGER_TIMESTAMP_SECURITY_START
,
100 MANAGER_TIMESTAMP_SECURITY_FINISH
,
101 MANAGER_TIMESTAMP_GENERATORS_START
,
102 MANAGER_TIMESTAMP_GENERATORS_FINISH
,
103 MANAGER_TIMESTAMP_UNITS_LOAD_START
,
104 MANAGER_TIMESTAMP_UNITS_LOAD_FINISH
,
105 MANAGER_TIMESTAMP_UNITS_LOAD
,
107 MANAGER_TIMESTAMP_INITRD_SECURITY_START
,
108 MANAGER_TIMESTAMP_INITRD_SECURITY_FINISH
,
109 MANAGER_TIMESTAMP_INITRD_GENERATORS_START
,
110 MANAGER_TIMESTAMP_INITRD_GENERATORS_FINISH
,
111 MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_START
,
112 MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_FINISH
,
113 _MANAGER_TIMESTAMP_MAX
,
114 _MANAGER_TIMESTAMP_INVALID
= -EINVAL
,
117 typedef enum WatchdogType
{
127 #include "path-lookup.h"
128 #include "show-status.h"
129 #include "unit-name.h"
131 typedef enum ManagerTestRunFlags
{
132 MANAGER_TEST_NORMAL
= 0, /* run normally */
133 MANAGER_TEST_RUN_MINIMAL
= 1 << 0, /* create basic data structures */
134 MANAGER_TEST_RUN_BASIC
= 1 << 1, /* interact with the environment */
135 MANAGER_TEST_RUN_ENV_GENERATORS
= 1 << 2, /* also run env generators */
136 MANAGER_TEST_RUN_GENERATORS
= 1 << 3, /* also run unit generators */
137 MANAGER_TEST_RUN_IGNORE_DEPENDENCIES
= 1 << 4, /* run while ignoring dependencies */
138 MANAGER_TEST_FULL
= MANAGER_TEST_RUN_BASIC
| MANAGER_TEST_RUN_ENV_GENERATORS
| MANAGER_TEST_RUN_GENERATORS
,
139 } ManagerTestRunFlags
;
141 assert_cc((MANAGER_TEST_FULL
& UINT8_MAX
) == MANAGER_TEST_FULL
);
144 /* Note that the set of units we know of is allowed to be
145 * inconsistent. However the subset of it that is loaded may
146 * not, and the list of jobs may neither. */
148 /* Active jobs and units */
149 Hashmap
*units
; /* name string => Unit object n:1 */
150 Hashmap
*units_by_invocation_id
;
151 Hashmap
*jobs
; /* job id => Job object 1:1 */
153 /* To make it easy to iterate through the units of a specific
154 * type we maintain a per type linked list */
155 LIST_HEAD(Unit
, units_by_type
[_UNIT_TYPE_MAX
]);
157 /* Units that need to be loaded */
158 LIST_HEAD(Unit
, load_queue
); /* this is actually more a stack than a queue, but uh. */
160 /* Jobs that need to be run */
161 struct Prioq
*run_queue
;
163 /* Units and jobs that have not yet been announced via
164 * D-Bus. When something about a job changes it is added here
165 * if it is not in there yet. This allows easy coalescing of
166 * D-Bus change signals. */
167 LIST_HEAD(Unit
, dbus_unit_queue
);
168 LIST_HEAD(Job
, dbus_job_queue
);
170 /* Units to remove */
171 LIST_HEAD(Unit
, cleanup_queue
);
173 /* Units and jobs to check when doing GC */
174 LIST_HEAD(Unit
, gc_unit_queue
);
175 LIST_HEAD(Job
, gc_job_queue
);
177 /* Units that should be realized */
178 LIST_HEAD(Unit
, cgroup_realize_queue
);
180 /* Units whose cgroup ran empty */
181 LIST_HEAD(Unit
, cgroup_empty_queue
);
183 /* Units whose memory.event fired */
184 LIST_HEAD(Unit
, cgroup_oom_queue
);
186 /* Target units whose default target dependencies haven't been set yet */
187 LIST_HEAD(Unit
, target_deps_queue
);
189 /* Units that might be subject to StopWhenUnneeded= clean-up */
190 LIST_HEAD(Unit
, stop_when_unneeded_queue
);
192 /* Units which are upheld by another other which we might need to act on */
193 LIST_HEAD(Unit
, start_when_upheld_queue
);
195 /* Units that have BindsTo= another unit, and might need to be shutdown because the bound unit is not active. */
196 LIST_HEAD(Unit
, stop_when_bound_queue
);
198 /* Units that have resources open, and where it might be good to check if they can be released now */
199 LIST_HEAD(Unit
, release_resources_queue
);
203 /* This maps PIDs we care about to units that are interested in. We allow multiple units to be interested in
204 * the same PID and multiple PIDs to be relevant to the same unit. Since in most cases only a single unit will
205 * be interested in the same PID we use a somewhat special encoding here: the first unit interested in a PID is
206 * stored directly in the hashmap, keyed by the PID unmodified. If there are other units interested too they'll
207 * be stored in a NULL-terminated array, and keyed by the negative PID. This is safe as pid_t is signed and
208 * negative PIDs are not used for regular processes but process groups, which we don't care about in this
209 * context, but this allows us to use the negative range for our own purposes. */
210 Hashmap
*watch_pids
; /* pid => unit as well as -pid => array of units */
212 /* A set contains all units which cgroup should be refreshed after startup */
215 /* A set which contains all currently failed units */
218 sd_event_source
*run_queue_event_source
;
222 sd_event_source
*notify_event_source
;
224 int cgroups_agent_fd
;
225 sd_event_source
*cgroups_agent_event_source
;
228 sd_event_source
*signal_event_source
;
230 sd_event_source
*sigchld_event_source
;
232 sd_event_source
*time_change_event_source
;
234 sd_event_source
*timezone_change_event_source
;
236 sd_event_source
*jobs_in_progress_event_source
;
238 int user_lookup_fds
[2];
239 sd_event_source
*user_lookup_event_source
;
241 RuntimeScope runtime_scope
;
243 LookupPaths lookup_paths
;
244 Hashmap
*unit_id_map
;
245 Hashmap
*unit_name_map
;
246 Set
*unit_path_cache
;
247 uint64_t unit_cache_timestamp_hash
;
249 char **transient_environment
; /* The environment, as determined from config files, kernel cmdline and environment generators */
250 char **client_environment
; /* Environment variables created by clients through the bus API */
252 usec_t watchdog
[_WATCHDOG_TYPE_MAX
];
253 usec_t watchdog_overridden
[_WATCHDOG_TYPE_MAX
];
254 char *watchdog_pretimeout_governor
;
255 char *watchdog_pretimeout_governor_overridden
;
257 dual_timestamp timestamps
[_MANAGER_TIMESTAMP_MAX
];
259 /* Data specific to the device subsystem */
260 sd_device_monitor
*device_monitor
;
261 Hashmap
*devices_by_sysfs
;
263 /* Data specific to the mount subsystem */
264 struct libmnt_monitor
*mount_monitor
;
265 sd_event_source
*mount_event_source
;
267 /* Data specific to the swap filesystem */
269 sd_event_source
*swap_event_source
;
270 Hashmap
*swaps_by_devnode
;
272 /* Data specific to the D-Bus subsystem */
273 sd_bus
*api_bus
, *system_bus
;
275 int private_listen_fd
;
276 sd_event_source
*private_listen_event_source
;
278 /* Contains all the clients that are subscribed to signals via
279 the API bus. Note that private bus connections are always
280 considered subscribes, since they last for very short only,
281 and it is much simpler that way. */
282 sd_bus_track
*subscribed
;
283 char **deserialized_subscribed
;
285 /* This is used during reloading: before the reload we queue
286 * the reply message here, and afterwards we send it */
287 sd_bus_message
*pending_reload_message
;
289 Hashmap
*watch_bus
; /* D-Bus names => Unit object n:1 */
291 bool send_reloading_done
;
293 uint32_t current_job_id
;
294 uint32_t default_unit_job_id
;
296 /* Data specific to the Automount subsystem */
299 /* Data specific to the cgroup subsystem */
300 Hashmap
*cgroup_unit
;
301 CGroupMask cgroup_supported
;
304 /* Notifications from cgroups, when the unified hierarchy is used is done via inotify. */
305 int cgroup_inotify_fd
;
306 sd_event_source
*cgroup_inotify_event_source
;
308 /* Maps for finding the unit for each inotify watch descriptor for the cgroup.events and
309 * memory.events cgroupv2 attributes. */
310 Hashmap
*cgroup_control_inotify_wd_unit
;
311 Hashmap
*cgroup_memory_inotify_wd_unit
;
313 /* A defer event for handling cgroup empty events and processing them after SIGCHLD in all cases. */
314 sd_event_source
*cgroup_empty_event_source
;
315 sd_event_source
*cgroup_oom_event_source
;
317 /* Make sure the user cannot accidentally unmount our cgroup
323 /* The stat() data the last time we saw /etc/localtime */
324 usec_t etc_localtime_mtime
;
325 bool etc_localtime_accessible
;
327 ManagerObjective objective
;
330 bool dispatching_load_queue
;
334 /* Have we already sent out the READY=1 notification? */
337 /* Was the last status sent "STATUS=Ready."? */
340 /* Have we already printed the taint line if necessary? */
343 /* Have we ever changed the "kernel.pid_max" sysctl? */
344 bool sysctl_pid_max_changed
;
346 ManagerTestRunFlags test_run_flags
;
348 /* If non-zero, exit with the following value when the systemd
349 * process terminate. Useful for containers: systemd-nspawn could get
350 * the return value. */
351 uint8_t return_value
;
353 ShowStatus show_status
;
354 ShowStatus show_status_overridden
;
355 StatusUnitFormat status_unit_format
;
357 bool no_console_output
;
358 bool service_watchdogs
;
360 ExecOutput default_std_output
, default_std_error
;
362 usec_t default_restart_usec
, default_timeout_start_usec
, default_timeout_stop_usec
;
363 usec_t default_device_timeout_usec
;
364 usec_t default_timeout_abort_usec
;
365 bool default_timeout_abort_set
;
367 usec_t default_start_limit_interval
;
368 unsigned default_start_limit_burst
;
370 bool default_cpu_accounting
;
371 bool default_memory_accounting
;
372 bool default_io_accounting
;
373 bool default_blockio_accounting
;
374 bool default_tasks_accounting
;
375 bool default_ip_accounting
;
377 TasksMax default_tasks_max
;
378 usec_t default_timer_accuracy_usec
;
380 OOMPolicy default_oom_policy
;
381 int default_oom_score_adjust
;
382 bool default_oom_score_adjust_set
;
384 CGroupPressureWatch default_memory_pressure_watch
;
385 usec_t default_memory_pressure_threshold_usec
;
387 int original_log_level
;
388 LogTarget original_log_target
;
389 bool log_level_overridden
;
390 bool log_target_overridden
;
392 struct rlimit
*rlimit
[_RLIMIT_MAX
];
394 /* non-zero if we are reloading or reexecuting, */
397 unsigned n_installed_jobs
;
398 unsigned n_failed_jobs
;
400 /* Jobs in progress watching */
401 unsigned n_running_jobs
;
402 unsigned n_on_console
;
403 unsigned jobs_in_progress_iteration
;
405 /* Do we have any outstanding password prompts? */
406 int have_ask_password
;
407 int ask_password_inotify_fd
;
408 sd_event_source
*ask_password_event_source
;
410 /* Type=idle pipes */
412 sd_event_source
*idle_pipe_event_source
;
415 char *switch_root_init
;
417 /* This is true before and after switching root. */
420 /* This maps all possible path prefixes to the units needing
421 * them. It's a hashmap with a path string as key and a Set as
422 * value where Unit objects are contained. */
423 Hashmap
*units_requiring_mounts_for
;
425 /* Used for processing polkit authorization responses */
426 Hashmap
*polkit_registry
;
428 /* Dynamic users/groups, indexed by their name */
429 Hashmap
*dynamic_users
;
431 /* Keep track of all UIDs and GIDs any of our services currently use. This is useful for the RemoveIPC= logic. */
435 /* ExecSharedRuntime, indexed by their owner unit id */
436 Hashmap
*exec_shared_runtime_by_id
;
438 /* When the user hits C-A-D more than 7 times per 2s, do something immediately... */
439 RateLimit ctrl_alt_del_ratelimit
;
440 EmergencyAction cad_burst_action
;
442 const char *unit_log_field
;
443 const char *unit_log_format_string
;
445 const char *invocation_log_field
;
446 const char *invocation_log_format_string
;
448 int first_boot
; /* tri-state */
450 /* Prefixes of e.g. RuntimeDirectory= */
451 char *prefix
[_EXEC_DIRECTORY_TYPE_MAX
];
452 char *received_credentials_directory
;
453 char *received_encrypted_credentials_directory
;
455 /* Used in the SIGCHLD and sd_notify() message invocation logic to avoid that we dispatch the same event
456 * multiple times on the same unit. */
460 VarlinkServer
*varlink_server
;
461 /* When we're a system manager, this object manages the subscription from systemd-oomd to PID1 that's
462 * used to report changes in ManagedOOM settings (systemd server - oomd client). When
463 * we're a user manager, this object manages the client connection from the user manager to
464 * systemd-oomd to report changes in ManagedOOM settings (systemd client - oomd server). */
465 Varlink
*managed_oom_varlink
;
467 /* Reference to RestrictFileSystems= BPF program */
468 struct restrict_fs_bpf
*restrict_fs
;
470 char *default_smack_process_label
;
472 /* Allow users to configure a rate limit for Reload() operations */
473 RateLimit reload_ratelimit
;
474 /* Dump*() are slow, so always rate limit them to 10 per 10 minutes */
475 RateLimit dump_ratelimit
;
477 sd_event_source
*memory_pressure_event_source
;
480 static inline usec_t
manager_default_timeout_abort_usec(Manager
*m
) {
482 return m
->default_timeout_abort_set
? m
->default_timeout_abort_usec
: m
->default_timeout_stop_usec
;
485 #define MANAGER_IS_SYSTEM(m) ((m)->runtime_scope == RUNTIME_SCOPE_SYSTEM)
486 #define MANAGER_IS_USER(m) ((m)->runtime_scope == RUNTIME_SCOPE_USER)
488 #define MANAGER_IS_RELOADING(m) ((m)->n_reloading > 0)
490 #define MANAGER_IS_FINISHED(m) (dual_timestamp_is_set((m)->timestamps + MANAGER_TIMESTAMP_FINISH))
492 /* The objective is set to OK as soon as we enter the main loop, and set otherwise as soon as we are done with it */
493 #define MANAGER_IS_RUNNING(m) ((m)->objective == MANAGER_OK)
495 #define MANAGER_IS_SWITCHING_ROOT(m) ((m)->switching_root)
497 #define MANAGER_IS_TEST_RUN(m) ((m)->test_run_flags != 0)
499 static inline usec_t
manager_default_timeout(RuntimeScope scope
) {
500 return scope
== RUNTIME_SCOPE_SYSTEM
? DEFAULT_TIMEOUT_USEC
: DEFAULT_USER_TIMEOUT_USEC
;
503 int manager_new(RuntimeScope scope
, ManagerTestRunFlags test_run_flags
, Manager
**m
);
504 Manager
* manager_free(Manager
*m
);
505 DEFINE_TRIVIAL_CLEANUP_FUNC(Manager
*, manager_free
);
507 int manager_startup(Manager
*m
, FILE *serialization
, FDSet
*fds
, const char *root
);
509 Job
*manager_get_job(Manager
*m
, uint32_t id
);
510 Unit
*manager_get_unit(Manager
*m
, const char *name
);
512 int manager_get_job_from_dbus_path(Manager
*m
, const char *s
, Job
**_j
);
514 bool manager_unit_cache_should_retry_load(Unit
*u
);
515 int manager_load_unit_prepare(Manager
*m
, const char *name
, const char *path
, sd_bus_error
*e
, Unit
**ret
);
516 int manager_load_unit(Manager
*m
, const char *name
, const char *path
, sd_bus_error
*e
, Unit
**ret
);
517 int manager_load_startable_unit_or_warn(Manager
*m
, const char *name
, const char *path
, Unit
**ret
);
518 int manager_load_unit_from_dbus_path(Manager
*m
, const char *s
, sd_bus_error
*e
, Unit
**_u
);
520 int manager_add_job(Manager
*m
, JobType type
, Unit
*unit
, JobMode mode
, Set
*affected_jobs
, sd_bus_error
*e
, Job
**_ret
);
521 int manager_add_job_by_name(Manager
*m
, JobType type
, const char *name
, JobMode mode
, Set
*affected_jobs
, sd_bus_error
*e
, Job
**_ret
);
522 int manager_add_job_by_name_and_warn(Manager
*m
, JobType type
, const char *name
, JobMode mode
, Set
*affected_jobs
, Job
**ret
);
523 int manager_propagate_reload(Manager
*m
, Unit
*unit
, JobMode mode
, sd_bus_error
*e
);
525 void manager_clear_jobs(Manager
*m
);
527 void manager_unwatch_pid(Manager
*m
, pid_t pid
);
529 unsigned manager_dispatch_load_queue(Manager
*m
);
531 int manager_setup_memory_pressure_event_source(Manager
*m
);
533 int manager_default_environment(Manager
*m
);
534 int manager_transient_environment_add(Manager
*m
, char **plus
);
535 int manager_client_environment_modify(Manager
*m
, char **minus
, char **plus
);
536 int manager_get_effective_environment(Manager
*m
, char ***ret
);
538 int manager_set_default_smack_process_label(Manager
*m
, const char *label
);
540 int manager_set_default_rlimits(Manager
*m
, struct rlimit
**default_rlimit
);
542 void manager_trigger_run_queue(Manager
*m
);
544 int manager_loop(Manager
*m
);
546 int manager_reload(Manager
*m
);
547 Manager
* manager_reloading_start(Manager
*m
);
548 void manager_reloading_stopp(Manager
**m
);
550 void manager_reset_failed(Manager
*m
);
552 void manager_send_unit_audit(Manager
*m
, Unit
*u
, int type
, bool success
);
553 void manager_send_unit_plymouth(Manager
*m
, Unit
*u
);
555 bool manager_unit_inactive_or_pending(Manager
*m
, const char *name
);
557 void manager_check_finished(Manager
*m
);
558 void manager_send_reloading(Manager
*m
);
560 void disable_printk_ratelimit(void);
561 void manager_recheck_dbus(Manager
*m
);
562 void manager_recheck_journal(Manager
*m
);
564 bool manager_get_show_status_on(Manager
*m
);
565 void manager_set_show_status(Manager
*m
, ShowStatus mode
, const char *reason
);
566 void manager_override_show_status(Manager
*m
, ShowStatus mode
, const char *reason
);
568 void manager_set_first_boot(Manager
*m
, bool b
);
569 void manager_set_switching_root(Manager
*m
, bool switching_root
);
571 void manager_status_printf(Manager
*m
, StatusType type
, const char *status
, const char *format
, ...) _printf_(4,5);
573 Set
*manager_get_units_requiring_mounts_for(Manager
*m
, const char *path
);
575 ManagerState
manager_state(Manager
*m
);
577 int manager_update_failed_units(Manager
*m
, Unit
*u
, bool failed
);
579 void manager_unref_uid(Manager
*m
, uid_t uid
, bool destroy_now
);
580 int manager_ref_uid(Manager
*m
, uid_t uid
, bool clean_ipc
);
582 void manager_unref_gid(Manager
*m
, gid_t gid
, bool destroy_now
);
583 int manager_ref_gid(Manager
*m
, gid_t gid
, bool clean_ipc
);
585 char* manager_taint_string(const Manager
*m
);
587 void manager_ref_console(Manager
*m
);
588 void manager_unref_console(Manager
*m
);
590 void manager_override_log_level(Manager
*m
, int level
);
591 void manager_restore_original_log_level(Manager
*m
);
593 void manager_override_log_target(Manager
*m
, LogTarget target
);
594 void manager_restore_original_log_target(Manager
*m
);
596 const char *manager_state_to_string(ManagerState m
) _const_
;
597 ManagerState
manager_state_from_string(const char *s
) _pure_
;
599 const char *manager_get_confirm_spawn(Manager
*m
);
600 bool manager_is_confirm_spawn_disabled(Manager
*m
);
601 void manager_disable_confirm_spawn(void);
603 const char *manager_timestamp_to_string(ManagerTimestamp m
) _const_
;
604 ManagerTimestamp
manager_timestamp_from_string(const char *s
) _pure_
;
605 ManagerTimestamp
manager_timestamp_initrd_mangle(ManagerTimestamp s
);
607 usec_t
manager_get_watchdog(Manager
*m
, WatchdogType t
);
608 void manager_set_watchdog(Manager
*m
, WatchdogType t
, usec_t timeout
);
609 void manager_override_watchdog(Manager
*m
, WatchdogType t
, usec_t timeout
);
610 int manager_set_watchdog_pretimeout_governor(Manager
*m
, const char *governor
);
611 int manager_override_watchdog_pretimeout_governor(Manager
*m
, const char *governor
);
613 const char* oom_policy_to_string(OOMPolicy i
) _const_
;
614 OOMPolicy
oom_policy_from_string(const char *s
) _pure_
;