1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
11 #include "cgroup-util.h"
17 #include "ratelimit.h"
20 struct libmnt_monitor
;
21 typedef struct Unit Unit
;
23 /* Enforce upper limit how many names we allow */
24 #define MANAGER_MAX_NAMES 131072 /* 128K */
26 typedef struct Manager Manager
;
28 /* An externally visible state. We don't actually maintain this as state variable, but derive it from various fields
30 typedef enum ManagerState
{
38 _MANAGER_STATE_INVALID
= -EINVAL
,
41 typedef enum ManagerObjective
{
51 _MANAGER_OBJECTIVE_MAX
,
52 _MANAGER_OBJECTIVE_INVALID
= -EINVAL
,
55 typedef enum StatusType
{
56 STATUS_TYPE_EPHEMERAL
,
59 STATUS_TYPE_EMERGENCY
,
62 typedef enum OOMPolicy
{
63 OOM_CONTINUE
, /* The kernel or systemd-oomd kills the process it wants to kill, and that's it */
64 OOM_STOP
, /* The kernel or systemd-oomd kills the process it wants to kill, and we stop the unit */
65 OOM_KILL
, /* The kernel or systemd-oomd kills the process it wants to kill, and all others in the unit, and we stop the unit */
67 _OOM_POLICY_INVALID
= -EINVAL
,
71 * 1. TIMESTAMP_FIRMWARE, TIMESTAMP_LOADER, TIMESTAMP_KERNEL, TIMESTAMP_INITRD,
72 * TIMESTAMP_SECURITY_START, and TIMESTAMP_SECURITY_FINISH are set only when
73 * the manager is system and not running under container environment.
75 * 2. The monotonic timestamp of TIMESTAMP_KERNEL is always zero.
77 * 3. The realtime timestamp of TIMESTAMP_KERNEL will be unset if the system does not
80 * 4. TIMESTAMP_FIRMWARE and TIMESTAMP_LOADER will be unset if the system does not
81 * have RTC, or systemd is built without EFI support.
83 * 5. The monotonic timestamps of TIMESTAMP_FIRMWARE and TIMESTAMP_LOADER are stored as
84 * negative of the actual value.
86 * 6. TIMESTAMP_USERSPACE is the timestamp of when the manager was started.
88 * 7. TIMESTAMP_INITRD_* are set only when the system is booted with an initrd.
91 typedef enum ManagerTimestamp
{
92 MANAGER_TIMESTAMP_FIRMWARE
,
93 MANAGER_TIMESTAMP_LOADER
,
94 MANAGER_TIMESTAMP_KERNEL
,
95 MANAGER_TIMESTAMP_INITRD
,
96 MANAGER_TIMESTAMP_USERSPACE
,
97 MANAGER_TIMESTAMP_FINISH
,
99 MANAGER_TIMESTAMP_SECURITY_START
,
100 MANAGER_TIMESTAMP_SECURITY_FINISH
,
101 MANAGER_TIMESTAMP_GENERATORS_START
,
102 MANAGER_TIMESTAMP_GENERATORS_FINISH
,
103 MANAGER_TIMESTAMP_UNITS_LOAD_START
,
104 MANAGER_TIMESTAMP_UNITS_LOAD_FINISH
,
105 MANAGER_TIMESTAMP_UNITS_LOAD
,
107 MANAGER_TIMESTAMP_INITRD_SECURITY_START
,
108 MANAGER_TIMESTAMP_INITRD_SECURITY_FINISH
,
109 MANAGER_TIMESTAMP_INITRD_GENERATORS_START
,
110 MANAGER_TIMESTAMP_INITRD_GENERATORS_FINISH
,
111 MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_START
,
112 MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_FINISH
,
113 _MANAGER_TIMESTAMP_MAX
,
114 _MANAGER_TIMESTAMP_INVALID
= -EINVAL
,
117 typedef enum WatchdogType
{
127 #include "path-lookup.h"
128 #include "show-status.h"
129 #include "unit-name.h"
131 typedef enum ManagerTestRunFlags
{
132 MANAGER_TEST_NORMAL
= 0, /* run normally */
133 MANAGER_TEST_RUN_MINIMAL
= 1 << 0, /* create basic data structures */
134 MANAGER_TEST_RUN_BASIC
= 1 << 1, /* interact with the environment */
135 MANAGER_TEST_RUN_ENV_GENERATORS
= 1 << 2, /* also run env generators */
136 MANAGER_TEST_RUN_GENERATORS
= 1 << 3, /* also run unit generators */
137 MANAGER_TEST_RUN_IGNORE_DEPENDENCIES
= 1 << 4, /* run while ignoring dependencies */
138 MANAGER_TEST_FULL
= MANAGER_TEST_RUN_BASIC
| MANAGER_TEST_RUN_ENV_GENERATORS
| MANAGER_TEST_RUN_GENERATORS
,
139 } ManagerTestRunFlags
;
141 assert_cc((MANAGER_TEST_FULL
& UINT8_MAX
) == MANAGER_TEST_FULL
);
144 /* Note that the set of units we know of is allowed to be
145 * inconsistent. However the subset of it that is loaded may
146 * not, and the list of jobs may neither. */
148 /* Active jobs and units */
149 Hashmap
*units
; /* name string => Unit object n:1 */
150 Hashmap
*units_by_invocation_id
;
151 Hashmap
*jobs
; /* job id => Job object 1:1 */
153 /* To make it easy to iterate through the units of a specific
154 * type we maintain a per type linked list */
155 LIST_HEAD(Unit
, units_by_type
[_UNIT_TYPE_MAX
]);
157 /* Units that need to be loaded */
158 LIST_HEAD(Unit
, load_queue
); /* this is actually more a stack than a queue, but uh. */
160 /* Jobs that need to be run */
161 struct Prioq
*run_queue
;
163 /* Units and jobs that have not yet been announced via
164 * D-Bus. When something about a job changes it is added here
165 * if it is not in there yet. This allows easy coalescing of
166 * D-Bus change signals. */
167 LIST_HEAD(Unit
, dbus_unit_queue
);
168 LIST_HEAD(Job
, dbus_job_queue
);
170 /* Units to remove */
171 LIST_HEAD(Unit
, cleanup_queue
);
173 /* Units and jobs to check when doing GC */
174 LIST_HEAD(Unit
, gc_unit_queue
);
175 LIST_HEAD(Job
, gc_job_queue
);
177 /* Units that should be realized */
178 LIST_HEAD(Unit
, cgroup_realize_queue
);
180 /* Units whose cgroup ran empty */
181 LIST_HEAD(Unit
, cgroup_empty_queue
);
183 /* Units whose memory.event fired */
184 LIST_HEAD(Unit
, cgroup_oom_queue
);
186 /* Target units whose default target dependencies haven't been set yet */
187 LIST_HEAD(Unit
, target_deps_queue
);
189 /* Units that might be subject to StopWhenUnneeded= clean-up */
190 LIST_HEAD(Unit
, stop_when_unneeded_queue
);
192 /* Units which are upheld by another other which we might need to act on */
193 LIST_HEAD(Unit
, start_when_upheld_queue
);
195 /* Units that have BindsTo= another unit, and might need to be shutdown because the bound unit is not active. */
196 LIST_HEAD(Unit
, stop_when_bound_queue
);
200 /* This maps PIDs we care about to units that are interested in. We allow multiple units to be interested in
201 * the same PID and multiple PIDs to be relevant to the same unit. Since in most cases only a single unit will
202 * be interested in the same PID we use a somewhat special encoding here: the first unit interested in a PID is
203 * stored directly in the hashmap, keyed by the PID unmodified. If there are other units interested too they'll
204 * be stored in a NULL-terminated array, and keyed by the negative PID. This is safe as pid_t is signed and
205 * negative PIDs are not used for regular processes but process groups, which we don't care about in this
206 * context, but this allows us to use the negative range for our own purposes. */
207 Hashmap
*watch_pids
; /* pid => unit as well as -pid => array of units */
209 /* A set contains all units which cgroup should be refreshed after startup */
212 /* A set which contains all currently failed units */
215 sd_event_source
*run_queue_event_source
;
219 sd_event_source
*notify_event_source
;
221 int cgroups_agent_fd
;
222 sd_event_source
*cgroups_agent_event_source
;
225 sd_event_source
*signal_event_source
;
227 sd_event_source
*sigchld_event_source
;
229 sd_event_source
*time_change_event_source
;
231 sd_event_source
*timezone_change_event_source
;
233 sd_event_source
*jobs_in_progress_event_source
;
235 int user_lookup_fds
[2];
236 sd_event_source
*user_lookup_event_source
;
238 LookupScope unit_file_scope
;
239 LookupPaths lookup_paths
;
240 Hashmap
*unit_id_map
;
241 Hashmap
*unit_name_map
;
242 Set
*unit_path_cache
;
243 uint64_t unit_cache_timestamp_hash
;
245 char **transient_environment
; /* The environment, as determined from config files, kernel cmdline and environment generators */
246 char **client_environment
; /* Environment variables created by clients through the bus API */
248 usec_t watchdog
[_WATCHDOG_TYPE_MAX
];
249 usec_t watchdog_overridden
[_WATCHDOG_TYPE_MAX
];
250 char *watchdog_pretimeout_governor
;
251 char *watchdog_pretimeout_governor_overridden
;
253 dual_timestamp timestamps
[_MANAGER_TIMESTAMP_MAX
];
255 /* Data specific to the device subsystem */
256 sd_device_monitor
*device_monitor
;
257 Hashmap
*devices_by_sysfs
;
259 /* Data specific to the mount subsystem */
260 struct libmnt_monitor
*mount_monitor
;
261 sd_event_source
*mount_event_source
;
263 /* Data specific to the swap filesystem */
265 sd_event_source
*swap_event_source
;
266 Hashmap
*swaps_by_devnode
;
268 /* Data specific to the D-Bus subsystem */
269 sd_bus
*api_bus
, *system_bus
;
271 int private_listen_fd
;
272 sd_event_source
*private_listen_event_source
;
274 /* Contains all the clients that are subscribed to signals via
275 the API bus. Note that private bus connections are always
276 considered subscribes, since they last for very short only,
277 and it is much simpler that way. */
278 sd_bus_track
*subscribed
;
279 char **deserialized_subscribed
;
281 /* This is used during reloading: before the reload we queue
282 * the reply message here, and afterwards we send it */
283 sd_bus_message
*pending_reload_message
;
285 Hashmap
*watch_bus
; /* D-Bus names => Unit object n:1 */
287 bool send_reloading_done
;
289 uint32_t current_job_id
;
290 uint32_t default_unit_job_id
;
292 /* Data specific to the Automount subsystem */
295 /* Data specific to the cgroup subsystem */
296 Hashmap
*cgroup_unit
;
297 CGroupMask cgroup_supported
;
300 /* Notifications from cgroups, when the unified hierarchy is used is done via inotify. */
301 int cgroup_inotify_fd
;
302 sd_event_source
*cgroup_inotify_event_source
;
304 /* Maps for finding the unit for each inotify watch descriptor for the cgroup.events and
305 * memory.events cgroupv2 attributes. */
306 Hashmap
*cgroup_control_inotify_wd_unit
;
307 Hashmap
*cgroup_memory_inotify_wd_unit
;
309 /* A defer event for handling cgroup empty events and processing them after SIGCHLD in all cases. */
310 sd_event_source
*cgroup_empty_event_source
;
311 sd_event_source
*cgroup_oom_event_source
;
313 /* Make sure the user cannot accidentally unmount our cgroup
319 /* The stat() data the last time we saw /etc/localtime */
320 usec_t etc_localtime_mtime
;
321 bool etc_localtime_accessible
;
323 ManagerObjective objective
;
326 bool dispatching_load_queue
;
330 /* Have we already sent out the READY=1 notification? */
333 /* Was the last status sent "STATUS=Ready."? */
336 /* Have we already printed the taint line if necessary? */
339 /* Have we ever changed the "kernel.pid_max" sysctl? */
340 bool sysctl_pid_max_changed
;
342 ManagerTestRunFlags test_run_flags
;
344 /* If non-zero, exit with the following value when the systemd
345 * process terminate. Useful for containers: systemd-nspawn could get
346 * the return value. */
347 uint8_t return_value
;
349 ShowStatus show_status
;
350 ShowStatus show_status_overridden
;
351 StatusUnitFormat status_unit_format
;
353 bool no_console_output
;
354 bool service_watchdogs
;
356 ExecOutput default_std_output
, default_std_error
;
358 usec_t default_restart_usec
, default_timeout_start_usec
, default_timeout_stop_usec
;
359 usec_t default_device_timeout_usec
;
360 usec_t default_timeout_abort_usec
;
361 bool default_timeout_abort_set
;
363 usec_t default_start_limit_interval
;
364 unsigned default_start_limit_burst
;
366 bool default_cpu_accounting
;
367 bool default_memory_accounting
;
368 bool default_io_accounting
;
369 bool default_blockio_accounting
;
370 bool default_tasks_accounting
;
371 bool default_ip_accounting
;
373 TasksMax default_tasks_max
;
374 usec_t default_timer_accuracy_usec
;
376 OOMPolicy default_oom_policy
;
377 int default_oom_score_adjust
;
378 bool default_oom_score_adjust_set
;
380 int original_log_level
;
381 LogTarget original_log_target
;
382 bool log_level_overridden
;
383 bool log_target_overridden
;
385 struct rlimit
*rlimit
[_RLIMIT_MAX
];
387 /* non-zero if we are reloading or reexecuting, */
390 unsigned n_installed_jobs
;
391 unsigned n_failed_jobs
;
393 /* Jobs in progress watching */
394 unsigned n_running_jobs
;
395 unsigned n_on_console
;
396 unsigned jobs_in_progress_iteration
;
398 /* Do we have any outstanding password prompts? */
399 int have_ask_password
;
400 int ask_password_inotify_fd
;
401 sd_event_source
*ask_password_event_source
;
403 /* Type=idle pipes */
405 sd_event_source
*idle_pipe_event_source
;
408 char *switch_root_init
;
410 /* This is true before and after switching root. */
413 /* This maps all possible path prefixes to the units needing
414 * them. It's a hashmap with a path string as key and a Set as
415 * value where Unit objects are contained. */
416 Hashmap
*units_requiring_mounts_for
;
418 /* Used for processing polkit authorization responses */
419 Hashmap
*polkit_registry
;
421 /* Dynamic users/groups, indexed by their name */
422 Hashmap
*dynamic_users
;
424 /* Keep track of all UIDs and GIDs any of our services currently use. This is useful for the RemoveIPC= logic. */
428 /* ExecRuntime, indexed by their owner unit id */
429 Hashmap
*exec_runtime_by_id
;
431 /* When the user hits C-A-D more than 7 times per 2s, do something immediately... */
432 RateLimit ctrl_alt_del_ratelimit
;
433 EmergencyAction cad_burst_action
;
435 const char *unit_log_field
;
436 const char *unit_log_format_string
;
438 const char *invocation_log_field
;
439 const char *invocation_log_format_string
;
441 int first_boot
; /* tri-state */
443 /* Prefixes of e.g. RuntimeDirectory= */
444 char *prefix
[_EXEC_DIRECTORY_TYPE_MAX
];
445 char *received_credentials_directory
;
446 char *received_encrypted_credentials_directory
;
448 /* Used in the SIGCHLD and sd_notify() message invocation logic to avoid that we dispatch the same event
449 * multiple times on the same unit. */
453 VarlinkServer
*varlink_server
;
454 /* When we're a system manager, this object manages the subscription from systemd-oomd to PID1 that's
455 * used to report changes in ManagedOOM settings (systemd server - oomd client). When
456 * we're a user manager, this object manages the client connection from the user manager to
457 * systemd-oomd to report changes in ManagedOOM settings (systemd client - oomd server). */
458 Varlink
*managed_oom_varlink
;
460 /* Reference to RestrictFileSystems= BPF program */
461 struct restrict_fs_bpf
*restrict_fs
;
463 char *default_smack_process_label
;
466 static inline usec_t
manager_default_timeout_abort_usec(Manager
*m
) {
468 return m
->default_timeout_abort_set
? m
->default_timeout_abort_usec
: m
->default_timeout_stop_usec
;
471 #define MANAGER_IS_SYSTEM(m) ((m)->unit_file_scope == LOOKUP_SCOPE_SYSTEM)
472 #define MANAGER_IS_USER(m) ((m)->unit_file_scope != LOOKUP_SCOPE_SYSTEM)
474 #define MANAGER_IS_RELOADING(m) ((m)->n_reloading > 0)
476 #define MANAGER_IS_FINISHED(m) (dual_timestamp_is_set((m)->timestamps + MANAGER_TIMESTAMP_FINISH))
478 /* The objective is set to OK as soon as we enter the main loop, and set otherwise as soon as we are done with it */
479 #define MANAGER_IS_RUNNING(m) ((m)->objective == MANAGER_OK)
481 #define MANAGER_IS_SWITCHING_ROOT(m) ((m)->switching_root)
483 #define MANAGER_IS_TEST_RUN(m) ((m)->test_run_flags != 0)
485 int manager_new(LookupScope scope
, ManagerTestRunFlags test_run_flags
, Manager
**m
);
486 Manager
* manager_free(Manager
*m
);
487 DEFINE_TRIVIAL_CLEANUP_FUNC(Manager
*, manager_free
);
489 int manager_startup(Manager
*m
, FILE *serialization
, FDSet
*fds
, const char *root
);
491 Job
*manager_get_job(Manager
*m
, uint32_t id
);
492 Unit
*manager_get_unit(Manager
*m
, const char *name
);
494 int manager_get_job_from_dbus_path(Manager
*m
, const char *s
, Job
**_j
);
496 bool manager_unit_cache_should_retry_load(Unit
*u
);
497 int manager_load_unit_prepare(Manager
*m
, const char *name
, const char *path
, sd_bus_error
*e
, Unit
**_ret
);
498 int manager_load_unit(Manager
*m
, const char *name
, const char *path
, sd_bus_error
*e
, Unit
**_ret
);
499 int manager_load_startable_unit_or_warn(Manager
*m
, const char *name
, const char *path
, Unit
**ret
);
500 int manager_load_unit_from_dbus_path(Manager
*m
, const char *s
, sd_bus_error
*e
, Unit
**_u
);
502 int manager_add_job(Manager
*m
, JobType type
, Unit
*unit
, JobMode mode
, Set
*affected_jobs
, sd_bus_error
*e
, Job
**_ret
);
503 int manager_add_job_by_name(Manager
*m
, JobType type
, const char *name
, JobMode mode
, Set
*affected_jobs
, sd_bus_error
*e
, Job
**_ret
);
504 int manager_add_job_by_name_and_warn(Manager
*m
, JobType type
, const char *name
, JobMode mode
, Set
*affected_jobs
, Job
**ret
);
505 int manager_propagate_reload(Manager
*m
, Unit
*unit
, JobMode mode
, sd_bus_error
*e
);
507 void manager_clear_jobs(Manager
*m
);
509 void manager_unwatch_pid(Manager
*m
, pid_t pid
);
511 unsigned manager_dispatch_load_queue(Manager
*m
);
513 int manager_default_environment(Manager
*m
);
514 int manager_transient_environment_add(Manager
*m
, char **plus
);
515 int manager_client_environment_modify(Manager
*m
, char **minus
, char **plus
);
516 int manager_get_effective_environment(Manager
*m
, char ***ret
);
518 int manager_set_default_smack_process_label(Manager
*m
, const char *label
);
520 int manager_set_default_rlimits(Manager
*m
, struct rlimit
**default_rlimit
);
522 void manager_trigger_run_queue(Manager
*m
);
524 int manager_loop(Manager
*m
);
526 int manager_reload(Manager
*m
);
527 Manager
* manager_reloading_start(Manager
*m
);
528 void manager_reloading_stopp(Manager
**m
);
530 void manager_reset_failed(Manager
*m
);
532 void manager_send_unit_audit(Manager
*m
, Unit
*u
, int type
, bool success
);
533 void manager_send_unit_plymouth(Manager
*m
, Unit
*u
);
535 bool manager_unit_inactive_or_pending(Manager
*m
, const char *name
);
537 void manager_check_finished(Manager
*m
);
539 void disable_printk_ratelimit(void);
540 void manager_recheck_dbus(Manager
*m
);
541 void manager_recheck_journal(Manager
*m
);
543 bool manager_get_show_status_on(Manager
*m
);
544 void manager_set_show_status(Manager
*m
, ShowStatus mode
, const char *reason
);
545 void manager_override_show_status(Manager
*m
, ShowStatus mode
, const char *reason
);
547 void manager_set_first_boot(Manager
*m
, bool b
);
548 void manager_set_switching_root(Manager
*m
, bool switching_root
);
550 void manager_status_printf(Manager
*m
, StatusType type
, const char *status
, const char *format
, ...) _printf_(4,5);
552 Set
*manager_get_units_requiring_mounts_for(Manager
*m
, const char *path
);
554 ManagerState
manager_state(Manager
*m
);
556 int manager_update_failed_units(Manager
*m
, Unit
*u
, bool failed
);
558 void manager_unref_uid(Manager
*m
, uid_t uid
, bool destroy_now
);
559 int manager_ref_uid(Manager
*m
, uid_t uid
, bool clean_ipc
);
561 void manager_unref_gid(Manager
*m
, gid_t gid
, bool destroy_now
);
562 int manager_ref_gid(Manager
*m
, gid_t gid
, bool clean_ipc
);
564 char* manager_taint_string(const Manager
*m
);
566 void manager_ref_console(Manager
*m
);
567 void manager_unref_console(Manager
*m
);
569 void manager_override_log_level(Manager
*m
, int level
);
570 void manager_restore_original_log_level(Manager
*m
);
572 void manager_override_log_target(Manager
*m
, LogTarget target
);
573 void manager_restore_original_log_target(Manager
*m
);
575 const char *manager_state_to_string(ManagerState m
) _const_
;
576 ManagerState
manager_state_from_string(const char *s
) _pure_
;
578 const char *manager_get_confirm_spawn(Manager
*m
);
579 bool manager_is_confirm_spawn_disabled(Manager
*m
);
580 void manager_disable_confirm_spawn(void);
582 const char *manager_timestamp_to_string(ManagerTimestamp m
) _const_
;
583 ManagerTimestamp
manager_timestamp_from_string(const char *s
) _pure_
;
584 ManagerTimestamp
manager_timestamp_initrd_mangle(ManagerTimestamp s
);
586 usec_t
manager_get_watchdog(Manager
*m
, WatchdogType t
);
587 void manager_set_watchdog(Manager
*m
, WatchdogType t
, usec_t timeout
);
588 void manager_override_watchdog(Manager
*m
, WatchdogType t
, usec_t timeout
);
589 int manager_set_watchdog_pretimeout_governor(Manager
*m
, const char *governor
);
590 int manager_override_watchdog_pretimeout_governor(Manager
*m
, const char *governor
);
592 const char* oom_policy_to_string(OOMPolicy i
) _const_
;
593 OOMPolicy
oom_policy_from_string(const char *s
) _pure_
;