]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/execute.h
Merge pull request #21172 from poettering/fix-systemctl-cgroup-tree
[thirdparty/systemd.git] / src / core / execute.h
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 #pragma once
3
4 typedef struct ExecStatus ExecStatus;
5 typedef struct ExecCommand ExecCommand;
6 typedef struct ExecContext ExecContext;
7 typedef struct ExecRuntime ExecRuntime;
8 typedef struct ExecParameters ExecParameters;
9 typedef struct Manager Manager;
10
11 #include <sched.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <sys/capability.h>
15
16 #include "cgroup-util.h"
17 #include "coredump-util.h"
18 #include "cpu-set-util.h"
19 #include "exec-util.h"
20 #include "fdset.h"
21 #include "list.h"
22 #include "missing_resource.h"
23 #include "namespace.h"
24 #include "nsflags.h"
25 #include "numa-util.h"
26 #include "path-util.h"
27 #include "time-util.h"
28
29 #define EXEC_STDIN_DATA_MAX (64U*1024U*1024U)
30
31 typedef enum ExecUtmpMode {
32 EXEC_UTMP_INIT,
33 EXEC_UTMP_LOGIN,
34 EXEC_UTMP_USER,
35 _EXEC_UTMP_MODE_MAX,
36 _EXEC_UTMP_MODE_INVALID = -EINVAL,
37 } ExecUtmpMode;
38
39 typedef enum ExecInput {
40 EXEC_INPUT_NULL,
41 EXEC_INPUT_TTY,
42 EXEC_INPUT_TTY_FORCE,
43 EXEC_INPUT_TTY_FAIL,
44 EXEC_INPUT_SOCKET,
45 EXEC_INPUT_NAMED_FD,
46 EXEC_INPUT_DATA,
47 EXEC_INPUT_FILE,
48 _EXEC_INPUT_MAX,
49 _EXEC_INPUT_INVALID = -EINVAL,
50 } ExecInput;
51
52 typedef enum ExecOutput {
53 EXEC_OUTPUT_INHERIT,
54 EXEC_OUTPUT_NULL,
55 EXEC_OUTPUT_TTY,
56 EXEC_OUTPUT_KMSG,
57 EXEC_OUTPUT_KMSG_AND_CONSOLE,
58 EXEC_OUTPUT_JOURNAL,
59 EXEC_OUTPUT_JOURNAL_AND_CONSOLE,
60 EXEC_OUTPUT_SOCKET,
61 EXEC_OUTPUT_NAMED_FD,
62 EXEC_OUTPUT_FILE,
63 EXEC_OUTPUT_FILE_APPEND,
64 EXEC_OUTPUT_FILE_TRUNCATE,
65 _EXEC_OUTPUT_MAX,
66 _EXEC_OUTPUT_INVALID = -EINVAL,
67 } ExecOutput;
68
69 typedef enum ExecPreserveMode {
70 EXEC_PRESERVE_NO,
71 EXEC_PRESERVE_YES,
72 EXEC_PRESERVE_RESTART,
73 _EXEC_PRESERVE_MODE_MAX,
74 _EXEC_PRESERVE_MODE_INVALID = -EINVAL,
75 } ExecPreserveMode;
76
77 typedef enum ExecKeyringMode {
78 EXEC_KEYRING_INHERIT,
79 EXEC_KEYRING_PRIVATE,
80 EXEC_KEYRING_SHARED,
81 _EXEC_KEYRING_MODE_MAX,
82 _EXEC_KEYRING_MODE_INVALID = -EINVAL,
83 } ExecKeyringMode;
84
85 /* Contains start and exit information about an executed command. */
86 struct ExecStatus {
87 dual_timestamp start_timestamp;
88 dual_timestamp exit_timestamp;
89 pid_t pid;
90 int code; /* as in siginfo_t::si_code */
91 int status; /* as in siginfo_t::si_status */
92 };
93
94 /* Stores information about commands we execute. Covers both configuration settings as well as runtime data. */
95 struct ExecCommand {
96 char *path;
97 char **argv;
98 ExecStatus exec_status;
99 ExecCommandFlags flags;
100 LIST_FIELDS(ExecCommand, command); /* useful for chaining commands */
101 };
102
103 /* Encapsulates certain aspects of the runtime environment that is to be shared between multiple otherwise separate
104 * invocations of commands. Specifically, this allows sharing of /tmp and /var/tmp data as well as network namespaces
105 * between invocations of commands. This is a reference counted object, with one reference taken by each currently
106 * active command invocation that wants to share this runtime. */
107 struct ExecRuntime {
108 unsigned n_ref;
109
110 Manager *manager;
111
112 char *id; /* Unit id of the owner */
113
114 char *tmp_dir;
115 char *var_tmp_dir;
116
117 /* An AF_UNIX socket pair, that contains a datagram containing a file descriptor referring to the network
118 * namespace. */
119 int netns_storage_socket[2];
120
121 /* Like netns_storage_socket, but the file descriptor is referring to the IPC namespace. */
122 int ipcns_storage_socket[2];
123 };
124
125 typedef enum ExecDirectoryType {
126 EXEC_DIRECTORY_RUNTIME = 0,
127 EXEC_DIRECTORY_STATE,
128 EXEC_DIRECTORY_CACHE,
129 EXEC_DIRECTORY_LOGS,
130 EXEC_DIRECTORY_CONFIGURATION,
131 _EXEC_DIRECTORY_TYPE_MAX,
132 _EXEC_DIRECTORY_TYPE_INVALID = -EINVAL,
133 } ExecDirectoryType;
134
135 typedef struct ExecDirectoryItem {
136 char *path;
137 char **symlinks;
138 } ExecDirectoryItem;
139
140 typedef struct ExecDirectory {
141 mode_t mode;
142 size_t n_items;
143 ExecDirectoryItem *items;
144 } ExecDirectory;
145
146 typedef enum ExecCleanMask {
147 /* In case you wonder why the bitmask below doesn't use "directory" in its name: we want to keep this
148 * generic so that .timer timestamp files can nicely be covered by this too, and similar. */
149 EXEC_CLEAN_RUNTIME = 1U << EXEC_DIRECTORY_RUNTIME,
150 EXEC_CLEAN_STATE = 1U << EXEC_DIRECTORY_STATE,
151 EXEC_CLEAN_CACHE = 1U << EXEC_DIRECTORY_CACHE,
152 EXEC_CLEAN_LOGS = 1U << EXEC_DIRECTORY_LOGS,
153 EXEC_CLEAN_CONFIGURATION = 1U << EXEC_DIRECTORY_CONFIGURATION,
154 EXEC_CLEAN_NONE = 0,
155 EXEC_CLEAN_ALL = (1U << _EXEC_DIRECTORY_TYPE_MAX) - 1,
156 _EXEC_CLEAN_MASK_INVALID = -EINVAL,
157 } ExecCleanMask;
158
159 /* A credential configured with LoadCredential= */
160 typedef struct ExecLoadCredential {
161 char *id, *path;
162 bool encrypted;
163 } ExecLoadCredential;
164
165 /* A credential configured with SetCredential= */
166 typedef struct ExecSetCredential {
167 char *id;
168 bool encrypted;
169 void *data;
170 size_t size;
171 } ExecSetCredential;
172
173 /* Encodes configuration parameters applied to invoked commands. Does not carry runtime data, but only configuration
174 * changes sourced from unit files and suchlike. ExecContext objects are usually embedded into Unit objects, and do not
175 * change after being loaded. */
176 struct ExecContext {
177 char **environment;
178 char **environment_files;
179 char **pass_environment;
180 char **unset_environment;
181
182 struct rlimit *rlimit[_RLIMIT_MAX];
183 char *working_directory, *root_directory, *root_image, *root_verity, *root_hash_path, *root_hash_sig_path;
184 void *root_hash, *root_hash_sig;
185 size_t root_hash_size, root_hash_sig_size;
186 LIST_HEAD(MountOptions, root_image_options);
187 bool working_directory_missing_ok:1;
188 bool working_directory_home:1;
189
190 bool oom_score_adjust_set:1;
191 bool coredump_filter_set:1;
192 bool nice_set:1;
193 bool ioprio_set:1;
194 bool cpu_sched_set:1;
195 bool mount_apivfs_set:1;
196
197 /* This is not exposed to the user but available internally. We need it to make sure that whenever we
198 * spawn /usr/bin/mount it is run in the same process group as us so that the autofs logic detects
199 * that it belongs to us and we don't enter a trigger loop. */
200 bool same_pgrp;
201
202 bool cpu_sched_reset_on_fork;
203 bool non_blocking;
204
205 mode_t umask;
206 int oom_score_adjust;
207 int nice;
208 int ioprio;
209 int cpu_sched_policy;
210 int cpu_sched_priority;
211 uint64_t coredump_filter;
212
213 CPUSet cpu_set;
214 NUMAPolicy numa_policy;
215 bool cpu_affinity_from_numa;
216
217 ExecInput std_input;
218 ExecOutput std_output;
219 ExecOutput std_error;
220 bool stdio_as_fds;
221 char *stdio_fdname[3];
222 char *stdio_file[3];
223
224 void *stdin_data;
225 size_t stdin_data_size;
226
227 nsec_t timer_slack_nsec;
228
229 char *tty_path;
230
231 bool tty_reset;
232 bool tty_vhangup;
233 bool tty_vt_disallocate;
234
235 bool ignore_sigpipe;
236
237 ExecKeyringMode keyring_mode;
238
239 /* Since resolving these names might involve socket
240 * connections and we don't want to deadlock ourselves these
241 * names are resolved on execution only and in the child
242 * process. */
243 char *user;
244 char *group;
245 char **supplementary_groups;
246
247 char *pam_name;
248
249 char *utmp_id;
250 ExecUtmpMode utmp_mode;
251
252 bool no_new_privileges;
253
254 bool selinux_context_ignore;
255 bool apparmor_profile_ignore;
256 bool smack_process_label_ignore;
257
258 char *selinux_context;
259 char *apparmor_profile;
260 char *smack_process_label;
261
262 char **read_write_paths, **read_only_paths, **inaccessible_paths, **exec_paths, **no_exec_paths;
263 char **exec_search_path;
264 unsigned long mount_flags;
265 BindMount *bind_mounts;
266 size_t n_bind_mounts;
267 TemporaryFileSystem *temporary_filesystems;
268 size_t n_temporary_filesystems;
269 MountImage *mount_images;
270 size_t n_mount_images;
271 MountImage *extension_images;
272 size_t n_extension_images;
273
274 uint64_t capability_bounding_set;
275 uint64_t capability_ambient_set;
276 int secure_bits;
277
278 int syslog_priority;
279 bool syslog_level_prefix;
280 char *syslog_identifier;
281
282 struct iovec* log_extra_fields;
283 size_t n_log_extra_fields;
284
285 usec_t log_ratelimit_interval_usec;
286 unsigned log_ratelimit_burst;
287
288 int log_level_max;
289
290 char *log_namespace;
291
292 ProtectProc protect_proc; /* hidepid= */
293 ProcSubset proc_subset; /* subset= */
294
295 bool private_tmp;
296 bool private_network;
297 bool private_devices;
298 bool private_users;
299 bool private_mounts;
300 bool private_ipc;
301 bool protect_kernel_tunables;
302 bool protect_kernel_modules;
303 bool protect_kernel_logs;
304 bool protect_clock;
305 bool protect_control_groups;
306 ProtectSystem protect_system;
307 ProtectHome protect_home;
308 bool protect_hostname;
309 bool mount_apivfs;
310
311 bool dynamic_user;
312 bool remove_ipc;
313
314 bool memory_deny_write_execute;
315 bool restrict_realtime;
316 bool restrict_suid_sgid;
317
318 bool lock_personality;
319 unsigned long personality;
320
321 unsigned long restrict_namespaces; /* The CLONE_NEWxyz flags permitted to the unit's processes */
322
323 Set *restrict_filesystems;
324 bool restrict_filesystems_allow_list:1;
325
326 Hashmap *syscall_filter;
327 Set *syscall_archs;
328 int syscall_errno;
329 bool syscall_allow_list:1;
330
331 Hashmap *syscall_log;
332 bool syscall_log_allow_list:1; /* Log listed system calls */
333
334 bool address_families_allow_list:1;
335 Set *address_families;
336
337 char *network_namespace_path;
338 char *ipc_namespace_path;
339
340 ExecDirectory directories[_EXEC_DIRECTORY_TYPE_MAX];
341 ExecPreserveMode runtime_directory_preserve_mode;
342 usec_t timeout_clean_usec;
343
344 Hashmap *set_credentials; /* output id → ExecSetCredential */
345 Hashmap *load_credentials; /* output id → ExecLoadCredential */
346 };
347
348 static inline bool exec_context_restrict_namespaces_set(const ExecContext *c) {
349 assert(c);
350
351 return (c->restrict_namespaces & NAMESPACE_FLAGS_ALL) != NAMESPACE_FLAGS_ALL;
352 }
353
354 static inline bool exec_context_restrict_filesystems_set(const ExecContext *c) {
355 assert(c);
356
357 return c->restrict_filesystems_allow_list ||
358 !set_isempty(c->restrict_filesystems);
359 }
360
361 static inline bool exec_context_with_rootfs(const ExecContext *c) {
362 assert(c);
363
364 /* Checks if RootDirectory= or RootImage= are used */
365
366 return !empty_or_root(c->root_directory) || c->root_image;
367 }
368
369 typedef enum ExecFlags {
370 EXEC_APPLY_SANDBOXING = 1 << 0,
371 EXEC_APPLY_CHROOT = 1 << 1,
372 EXEC_APPLY_TTY_STDIN = 1 << 2,
373 EXEC_PASS_LOG_UNIT = 1 << 3, /* Whether to pass the unit name to the service's journal stream connection */
374 EXEC_CHOWN_DIRECTORIES = 1 << 4, /* chown() the runtime/state/cache/log directories to the user we run as, under all conditions */
375 EXEC_NSS_BYPASS_BUS = 1 << 5, /* Set the SYSTEMD_NSS_BYPASS_BUS environment variable, to disable nss-systemd for dbus */
376 EXEC_CGROUP_DELEGATE = 1 << 6,
377 EXEC_IS_CONTROL = 1 << 7,
378 EXEC_CONTROL_CGROUP = 1 << 8, /* Place the process not in the indicated cgroup but in a subcgroup '/.control', but only EXEC_CGROUP_DELEGATE and EXEC_IS_CONTROL is set, too */
379 EXEC_WRITE_CREDENTIALS = 1 << 9, /* Set up the credential store logic */
380
381 /* The following are not used by execute.c, but by consumers internally */
382 EXEC_PASS_FDS = 1 << 10,
383 EXEC_SETENV_RESULT = 1 << 11,
384 EXEC_SET_WATCHDOG = 1 << 12,
385 } ExecFlags;
386
387 /* Parameters for a specific invocation of a command. This structure is put together right before a command is
388 * executed. */
389 struct ExecParameters {
390 char **environment;
391
392 int *fds;
393 char **fd_names;
394 size_t n_socket_fds;
395 size_t n_storage_fds;
396
397 ExecFlags flags;
398 bool selinux_context_net:1;
399
400 CGroupMask cgroup_supported;
401 const char *cgroup_path;
402
403 char **prefix;
404 const char *received_credentials;
405
406 const char *confirm_spawn;
407
408 usec_t watchdog_usec;
409
410 int *idle_pipe;
411
412 int stdin_fd;
413 int stdout_fd;
414 int stderr_fd;
415
416 /* An fd that is closed by the execve(), and thus will result in EOF when the execve() is done */
417 int exec_fd;
418
419 const char *notify_socket;
420 };
421
422 #include "unit.h"
423 #include "dynamic-user.h"
424
425 int exec_spawn(Unit *unit,
426 ExecCommand *command,
427 const ExecContext *context,
428 const ExecParameters *exec_params,
429 ExecRuntime *runtime,
430 DynamicCreds *dynamic_creds,
431 pid_t *ret);
432
433 void exec_command_done_array(ExecCommand *c, size_t n);
434 ExecCommand* exec_command_free_list(ExecCommand *c);
435 void exec_command_free_array(ExecCommand **c, size_t n);
436 void exec_command_reset_status_array(ExecCommand *c, size_t n);
437 void exec_command_reset_status_list_array(ExecCommand **c, size_t n);
438 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix);
439 void exec_command_append_list(ExecCommand **l, ExecCommand *e);
440 int exec_command_set(ExecCommand *c, const char *path, ...) _sentinel_;
441 int exec_command_append(ExecCommand *c, const char *path, ...) _sentinel_;
442
443 void exec_context_init(ExecContext *c);
444 void exec_context_done(ExecContext *c);
445 void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix);
446
447 int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_root);
448 int exec_context_destroy_credentials(const ExecContext *c, const char *runtime_root, const char *unit);
449
450 const char* exec_context_fdname(const ExecContext *c, int fd_index);
451
452 bool exec_context_may_touch_console(const ExecContext *c);
453 bool exec_context_maintains_privileges(const ExecContext *c);
454
455 int exec_context_get_effective_ioprio(const ExecContext *c);
456 bool exec_context_get_effective_mount_apivfs(const ExecContext *c);
457
458 void exec_context_free_log_extra_fields(ExecContext *c);
459
460 void exec_context_revert_tty(ExecContext *c);
461
462 int exec_context_get_clean_directories(ExecContext *c, char **prefix, ExecCleanMask mask, char ***ret);
463 int exec_context_get_clean_mask(ExecContext *c, ExecCleanMask *ret);
464
465 void exec_status_start(ExecStatus *s, pid_t pid);
466 void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status);
467 void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix);
468 void exec_status_reset(ExecStatus *s);
469
470 int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *name, bool create, ExecRuntime **ret);
471 ExecRuntime *exec_runtime_unref(ExecRuntime *r, bool destroy);
472
473 int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds);
474 int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds);
475 int exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds);
476 void exec_runtime_vacuum(Manager *m);
477
478 void exec_params_clear(ExecParameters *p);
479
480 bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c);
481
482 ExecSetCredential *exec_set_credential_free(ExecSetCredential *sc);
483 DEFINE_TRIVIAL_CLEANUP_FUNC(ExecSetCredential*, exec_set_credential_free);
484
485 ExecLoadCredential *exec_load_credential_free(ExecLoadCredential *lc);
486 DEFINE_TRIVIAL_CLEANUP_FUNC(ExecLoadCredential*, exec_load_credential_free);
487
488 void exec_directory_done(ExecDirectory *d);
489 int exec_directory_add(ExecDirectoryItem **d, size_t *n, const char *path, char **symlinks);
490
491 extern const struct hash_ops exec_set_credential_hash_ops;
492 extern const struct hash_ops exec_load_credential_hash_ops;
493
494 const char* exec_output_to_string(ExecOutput i) _const_;
495 ExecOutput exec_output_from_string(const char *s) _pure_;
496
497 const char* exec_input_to_string(ExecInput i) _const_;
498 ExecInput exec_input_from_string(const char *s) _pure_;
499
500 const char* exec_utmp_mode_to_string(ExecUtmpMode i) _const_;
501 ExecUtmpMode exec_utmp_mode_from_string(const char *s) _pure_;
502
503 const char* exec_preserve_mode_to_string(ExecPreserveMode i) _const_;
504 ExecPreserveMode exec_preserve_mode_from_string(const char *s) _pure_;
505
506 const char* exec_keyring_mode_to_string(ExecKeyringMode i) _const_;
507 ExecKeyringMode exec_keyring_mode_from_string(const char *s) _pure_;
508
509 const char* exec_directory_type_to_string(ExecDirectoryType i) _const_;
510 ExecDirectoryType exec_directory_type_from_string(const char *s) _pure_;
511
512 const char* exec_directory_type_symlink_to_string(ExecDirectoryType i) _const_;
513 ExecDirectoryType exec_directory_type_symlink_from_string(const char *s) _pure_;
514
515 const char* exec_resource_type_to_string(ExecDirectoryType i) _const_;
516 ExecDirectoryType exec_resource_type_from_string(const char *s) _pure_;
517
518 bool exec_needs_mount_namespace(const ExecContext *context, const ExecParameters *params, const ExecRuntime *runtime);