1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
7 #include "sd-messages.h"
9 #include "alloc-util.h"
10 #include "bus-error.h"
11 #include "bus-locator.h"
12 #include "bus-unit-util.h"
15 #include "errno-util.h"
17 #include "extract-word.h"
20 #include "format-util.h"
23 #include "machine-dbus.h"
25 #include "mkdir-label.h"
26 #include "parse-util.h"
27 #include "path-util.h"
28 #include "process-util.h"
29 #include "serialize.h"
30 #include "socket-util.h"
32 #include "stdio-util.h"
33 #include "string-table.h"
34 #include "string-util.h"
35 #include "terminal-util.h"
36 #include "tmpfile-util.h"
37 #include "uid-range.h"
38 #include "unit-name.h"
39 #include "user-util.h"
41 int machine_new(MachineClass
class, const char *name
, Machine
**ret
) {
42 _cleanup_(machine_freep
) Machine
*m
= NULL
;
44 assert(class < _MACHINE_CLASS_MAX
);
47 /* Passing class == _MACHINE_CLASS_INVALID here is fine. It
48 * means as much as "we don't know yet", and that we'll figure
49 * it out later when loading the state file. */
56 .leader
= PIDREF_NULL
,
57 .vsock_cid
= VMADDR_CID_ANY
,
61 m
->name
= strdup(name
);
72 int machine_link(Manager
*manager
, Machine
*machine
) {
83 if (machine
->class != MACHINE_HOST
) {
84 char *temp
= path_join("/run/systemd/machines", machine
->name
);
88 free_and_replace(machine
->state_file
, temp
);
91 r
= hashmap_put(manager
->machines
, machine
->name
, machine
);
95 machine
->manager
= manager
;
100 Machine
* machine_free(Machine
*m
) {
104 while (m
->operations
)
105 operation_free(m
->operations
);
107 if (m
->in_gc_queue
) {
109 LIST_REMOVE(gc_queue
, m
->manager
->machine_gc_queue
, m
);
113 machine_release_unit(m
);
115 (void) hashmap_remove(m
->manager
->machines
, m
->name
);
117 if (m
->manager
->host_machine
== m
)
118 m
->manager
->host_machine
= NULL
;
121 if (pidref_is_set(&m
->leader
)) {
123 (void) hashmap_remove_value(m
->manager
->machine_leaders
, PID_TO_PTR(m
->leader
.pid
), m
);
124 pidref_done(&m
->leader
);
127 sd_bus_message_unref(m
->create_message
);
133 free(m
->root_directory
);
135 free(m
->ssh_address
);
136 free(m
->ssh_private_key_path
);
140 int machine_save(Machine
*m
) {
141 _cleanup_(unlink_and_freep
) char *temp_path
= NULL
;
142 _cleanup_fclose_
FILE *f
= NULL
;
153 r
= mkdir_safe_label("/run/systemd/machines", 0755, 0, 0, MKDIR_WARN_MODE
);
157 r
= fopen_temporary(m
->state_file
, &f
, &temp_path
);
161 (void) fchmod(fileno(f
), 0644);
164 "# This is private data. Do not parse.\n"
169 _cleanup_free_
char *escaped
= NULL
;
171 escaped
= cescape(m
->unit
);
177 fprintf(f
, "SCOPE=%s\n", escaped
); /* We continue to call this "SCOPE=" because it is internal only, and we want to stay compatible with old files */
181 fprintf(f
, "SCOPE_JOB=%s\n", m
->scope_job
);
184 _cleanup_free_
char *escaped
= NULL
;
186 escaped
= cescape(m
->service
);
191 fprintf(f
, "SERVICE=%s\n", escaped
);
194 if (m
->root_directory
) {
195 _cleanup_free_
char *escaped
= NULL
;
197 escaped
= cescape(m
->root_directory
);
202 fprintf(f
, "ROOT=%s\n", escaped
);
205 if (!sd_id128_is_null(m
->id
))
206 fprintf(f
, "ID=" SD_ID128_FORMAT_STR
"\n", SD_ID128_FORMAT_VAL(m
->id
));
208 if (pidref_is_set(&m
->leader
))
209 fprintf(f
, "LEADER="PID_FMT
"\n", m
->leader
.pid
);
211 if (m
->class != _MACHINE_CLASS_INVALID
)
212 fprintf(f
, "CLASS=%s\n", machine_class_to_string(m
->class));
214 if (dual_timestamp_is_set(&m
->timestamp
))
216 "REALTIME="USEC_FMT
"\n"
217 "MONOTONIC="USEC_FMT
"\n",
218 m
->timestamp
.realtime
,
219 m
->timestamp
.monotonic
);
221 if (m
->n_netif
> 0) {
226 for (i
= 0; i
< m
->n_netif
; i
++) {
230 fprintf(f
, "%i", m
->netif
[i
]);
236 r
= fflush_and_check(f
);
240 if (rename(temp_path
, m
->state_file
) < 0) {
245 temp_path
= mfree(temp_path
);
250 /* Create a symlink from the unit name to the machine
251 * name, so that we can quickly find the machine for
252 * each given unit. Ignore error. */
253 sl
= strjoina("/run/systemd/machines/unit:", m
->unit
);
254 (void) symlink(m
->name
, sl
);
260 (void) unlink(m
->state_file
);
262 return log_error_errno(r
, "Failed to save machine data %s: %m", m
->state_file
);
265 static void machine_unlink(Machine
*m
) {
271 sl
= strjoina("/run/systemd/machines/unit:", m
->unit
);
276 (void) unlink(m
->state_file
);
279 int machine_load(Machine
*m
) {
280 _cleanup_free_
char *realtime
= NULL
, *monotonic
= NULL
, *id
= NULL
, *leader
= NULL
, *class = NULL
, *netif
= NULL
;
288 r
= parse_env_file(NULL
, m
->state_file
,
290 "SCOPE_JOB", &m
->scope_job
,
291 "SERVICE", &m
->service
,
292 "ROOT", &m
->root_directory
,
296 "REALTIME", &realtime
,
297 "MONOTONIC", &monotonic
,
302 return log_error_errno(r
, "Failed to read %s: %m", m
->state_file
);
305 (void) sd_id128_from_string(id
, &m
->id
);
308 pidref_done(&m
->leader
);
309 r
= pidref_set_pidstr(&m
->leader
, leader
);
311 log_debug_errno(r
, "Failed to set leader PID to '%s', ignoring: %m", leader
);
317 c
= machine_class_from_string(class);
323 (void) deserialize_usec(realtime
, &m
->timestamp
.realtime
);
325 (void) deserialize_usec(monotonic
, &m
->timestamp
.monotonic
);
328 _cleanup_free_
int *ni
= NULL
;
334 _cleanup_free_
char *word
= NULL
;
336 r
= extract_first_word(&p
, &word
, NULL
, 0);
342 log_warning_errno(r
, "Failed to parse NETIF: %s", netif
);
346 r
= parse_ifindex(word
);
350 if (!GREEDY_REALLOC(ni
, nr
+ 1))
356 free_and_replace(m
->netif
, ni
);
363 static int machine_start_scope(
366 sd_bus_message
*more_properties
,
367 sd_bus_error
*error
) {
369 _cleanup_(sd_bus_message_unrefp
) sd_bus_message
*m
= NULL
, *reply
= NULL
;
370 _cleanup_(sd_bus_error_free
) sd_bus_error e
= SD_BUS_ERROR_NULL
;
371 _cleanup_free_
char *escaped
= NULL
, *unit
= NULL
;
372 const char *description
;
376 assert(pidref_is_set(&machine
->leader
));
377 assert(!machine
->unit
);
379 escaped
= unit_name_escape(machine
->name
);
383 unit
= strjoin("machine-", escaped
, ".scope");
387 r
= bus_message_new_method_call(
388 machine
->manager
->bus
,
391 "StartTransientUnit");
395 r
= sd_bus_message_append(m
, "ss", unit
, "fail");
399 r
= sd_bus_message_open_container(m
, 'a', "(sv)");
403 r
= sd_bus_message_append(m
, "(sv)", "Slice", "s", SPECIAL_MACHINE_SLICE
);
407 description
= strjoina(machine
->class == MACHINE_VM
? "Virtual Machine " : "Container ", machine
->name
);
408 r
= sd_bus_message_append(m
, "(sv)", "Description", "s", description
);
412 r
= bus_append_scope_pidref(m
, &machine
->leader
, allow_pidfd
);
416 r
= sd_bus_message_append(m
, "(sv)(sv)(sv)(sv)",
418 "CollectMode", "s", "inactive-or-failed",
420 "TasksMax", "t", UINT64_C(16384));
424 if (more_properties
) {
425 r
= sd_bus_message_copy(m
, more_properties
, true);
430 r
= sd_bus_message_close_container(m
);
434 r
= sd_bus_message_append(m
, "a(sa(sv))", 0);
438 r
= sd_bus_call(NULL
, m
, 0, &e
, &reply
);
440 /* If this failed with a property we couldn't write, this is quite likely because the server
441 * doesn't support PIDFDs yet, let's try without. */
443 sd_bus_error_has_names(&e
, SD_BUS_ERROR_UNKNOWN_PROPERTY
, SD_BUS_ERROR_PROPERTY_READ_ONLY
))
444 return machine_start_scope(machine
, /* allow_pidfd = */ false, more_properties
, error
);
446 return sd_bus_error_move(error
, &e
);
449 machine
->unit
= TAKE_PTR(unit
);
450 machine
->referenced
= true;
453 r
= sd_bus_message_read(reply
, "o", &job
);
457 return free_and_strdup(&machine
->scope_job
, job
);
460 static int machine_ensure_scope(Machine
*m
, sd_bus_message
*properties
, sd_bus_error
*error
) {
464 assert(m
->class != MACHINE_HOST
);
467 r
= machine_start_scope(m
, /* allow_pidfd = */ true, properties
, error
);
469 return log_error_errno(r
, "Failed to start machine scope: %s", bus_error_message(error
, r
));
473 hashmap_put(m
->manager
->machine_units
, m
->unit
, m
);
478 int machine_start(Machine
*m
, sd_bus_message
*properties
, sd_bus_error
*error
) {
483 if (!IN_SET(m
->class, MACHINE_CONTAINER
, MACHINE_VM
))
489 r
= hashmap_put(m
->manager
->machine_leaders
, PID_TO_PTR(m
->leader
.pid
), m
);
494 r
= machine_ensure_scope(m
, properties
, error
);
499 "MESSAGE_ID=" SD_MESSAGE_MACHINE_START_STR
,
501 "LEADER="PID_FMT
, m
->leader
.pid
,
502 LOG_MESSAGE("New machine %s.", m
->name
));
504 if (!dual_timestamp_is_set(&m
->timestamp
))
505 dual_timestamp_now(&m
->timestamp
);
509 /* Save new machine data */
512 machine_send_signal(m
, true);
513 (void) manager_enqueue_nscd_cache_flush(m
->manager
);
518 int machine_stop(Machine
*m
) {
523 if (!IN_SET(m
->class, MACHINE_CONTAINER
, MACHINE_VM
))
527 _cleanup_(sd_bus_error_free
) sd_bus_error error
= SD_BUS_ERROR_NULL
;
530 r
= manager_stop_unit(m
->manager
, m
->unit
, &error
, &job
);
532 return log_error_errno(r
, "Failed to stop machine scope: %s", bus_error_message(&error
, r
));
534 free_and_replace(m
->scope_job
, job
);
540 (void) manager_enqueue_nscd_cache_flush(m
->manager
);
545 int machine_finalize(Machine
*m
) {
550 "MESSAGE_ID=" SD_MESSAGE_MACHINE_STOP_STR
,
552 "LEADER="PID_FMT
, m
->leader
.pid
,
553 LOG_MESSAGE("Machine %s terminated.", m
->name
));
555 m
->stopping
= true; /* The machine is supposed to be going away. Don't try to kill it. */
559 machine_add_to_gc_queue(m
);
562 machine_send_signal(m
, false);
569 bool machine_may_gc(Machine
*m
, bool drop_not_started
) {
572 if (m
->class == MACHINE_HOST
)
575 if (drop_not_started
&& !m
->started
)
578 if (m
->scope_job
&& manager_job_is_active(m
->manager
, m
->scope_job
))
581 if (m
->unit
&& manager_unit_is_active(m
->manager
, m
->unit
))
587 void machine_add_to_gc_queue(Machine
*m
) {
593 LIST_PREPEND(gc_queue
, m
->manager
->machine_gc_queue
, m
);
594 m
->in_gc_queue
= true;
597 MachineState
machine_get_state(Machine
*s
) {
600 if (s
->class == MACHINE_HOST
)
601 return MACHINE_RUNNING
;
604 return MACHINE_CLOSING
;
607 return MACHINE_OPENING
;
609 return MACHINE_RUNNING
;
612 int machine_kill(Machine
*m
, KillWho who
, int signo
) {
615 if (!IN_SET(m
->class, MACHINE_VM
, MACHINE_CONTAINER
))
621 if (who
== KILL_LEADER
) /* If we shall simply kill the leader, do so directly */
622 return pidref_kill(&m
->leader
, signo
);
624 /* Otherwise, make PID 1 do it for us, for the entire cgroup */
625 return manager_kill_unit(m
->manager
, m
->unit
, signo
, NULL
);
628 int machine_openpt(Machine
*m
, int flags
, char **ret_slave
) {
634 return openpt_allocate(flags
, ret_slave
);
636 case MACHINE_CONTAINER
:
637 if (!pidref_is_set(&m
->leader
))
640 return openpt_allocate_in_namespace(m
->leader
.pid
, flags
, ret_slave
);
647 int machine_open_terminal(Machine
*m
, const char *path
, int mode
) {
653 return open_terminal(path
, mode
);
655 case MACHINE_CONTAINER
:
656 if (!pidref_is_set(&m
->leader
))
659 return open_terminal_in_namespace(m
->leader
.pid
, path
, mode
);
666 void machine_release_unit(Machine
*m
) {
673 _cleanup_(sd_bus_error_free
) sd_bus_error error
= SD_BUS_ERROR_NULL
;
676 r
= manager_unref_unit(m
->manager
, m
->unit
, &error
);
678 log_full_errno(ERRNO_IS_DISCONNECT(r
) ? LOG_DEBUG
: LOG_WARNING
, r
,
679 "Failed to drop reference to machine scope, ignoring: %s",
680 bus_error_message(&error
, r
));
682 m
->referenced
= false;
685 (void) hashmap_remove(m
->manager
->machine_units
, m
->unit
);
686 m
->unit
= mfree(m
->unit
);
689 int machine_get_uid_shift(Machine
*m
, uid_t
*ret
) {
690 char p
[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(pid_t
) + 1];
691 uid_t uid_base
, uid_shift
, uid_range
;
692 gid_t gid_base
, gid_shift
, gid_range
;
693 _cleanup_fclose_
FILE *f
= NULL
;
699 /* Return the base UID/GID of the specified machine. Note that this only works for containers with simple
700 * mappings. In most cases setups should be simple like this, and administrators should only care about the
701 * basic offset a container has relative to the host. This is what this function exposes.
703 * If we encounter any more complex mappings we politely refuse this with ENXIO. */
705 if (m
->class == MACHINE_HOST
) {
710 if (m
->class != MACHINE_CONTAINER
)
713 xsprintf(p
, "/proc/" PID_FMT
"/uid_map", m
->leader
.pid
);
716 if (errno
== ENOENT
) {
717 /* If the file doesn't exist, user namespacing is off in the kernel, return a zero mapping hence. */
725 /* Read the first line. There's at least one. */
726 r
= uid_map_read_one(f
, &uid_base
, &uid_shift
, &uid_range
);
730 /* Not a mapping starting at 0? Then it's a complex mapping we can't expose here. */
733 /* Insist that at least the nobody user is mapped, everything else is weird, and hence complex, and we don't support it */
734 if (uid_range
< UID_NOBODY
)
737 /* If there's more than one line, then we don't support this mapping. */
738 r
= safe_fgetc(f
, NULL
);
741 if (r
!= 0) /* Insist on EOF */
746 xsprintf(p
, "/proc/" PID_FMT
"/gid_map", m
->leader
.pid
);
751 /* Read the first line. There's at least one. */
753 r
= fscanf(f
, GID_FMT
" " GID_FMT
" " GID_FMT
"\n", &gid_base
, &gid_shift
, &gid_range
);
755 return errno_or_else(ENOMSG
);
760 /* If there's more than one line, then we don't support this file. */
761 r
= safe_fgetc(f
, NULL
);
764 if (r
!= 0) /* Insist on EOF */
767 /* If the UID and GID mapping doesn't match, we don't support this mapping. */
768 if (uid_base
!= (uid_t
) gid_base
)
770 if (uid_shift
!= (uid_t
) gid_shift
)
772 if (uid_range
!= (uid_t
) gid_range
)
779 static int machine_owns_uid_internal(
781 const char *map_file
, /* "uid_map" or "gid_map" */
783 uid_t
*ret_internal_uid
) {
785 _cleanup_fclose_
FILE *f
= NULL
;
789 /* This is a generic implementation for both uids and gids, under the assumptions they have the same types and semantics. */
790 assert_cc(sizeof(uid_t
) == sizeof(gid_t
));
794 /* Checks if the specified host UID is owned by the machine, and returns the UID it maps to
795 * internally in the machine */
797 if (machine
->class != MACHINE_CONTAINER
)
800 p
= procfs_file_alloca(machine
->leader
.pid
, map_file
);
803 log_debug_errno(errno
, "Failed to open %s, ignoring.", p
);
808 uid_t uid_base
, uid_shift
, uid_range
, converted
;
810 r
= uid_map_read_one(f
, &uid_base
, &uid_shift
, &uid_range
);
816 /* The private user namespace is disabled, ignoring. */
820 if (uid
< uid_shift
|| uid
>= uid_shift
+ uid_range
)
823 converted
= (uid
- uid_shift
+ uid_base
);
824 if (!uid_is_valid(converted
))
827 if (ret_internal_uid
)
828 *ret_internal_uid
= converted
;
834 if (ret_internal_uid
)
835 *ret_internal_uid
= UID_INVALID
;
840 int machine_owns_uid(Machine
*machine
, uid_t uid
, uid_t
*ret_internal_uid
) {
841 return machine_owns_uid_internal(machine
, "uid_map", uid
, ret_internal_uid
);
844 int machine_owns_gid(Machine
*machine
, gid_t gid
, gid_t
*ret_internal_gid
) {
845 return machine_owns_uid_internal(machine
, "gid_map", (uid_t
) gid
, (uid_t
*) ret_internal_gid
);
848 static int machine_translate_uid_internal(
850 const char *map_file
, /* "uid_map" or "gid_map" */
852 uid_t
*ret_host_uid
) {
854 _cleanup_fclose_
FILE *f
= NULL
;
858 /* This is a generic implementation for both uids and gids, under the assumptions they have the same types and semantics. */
859 assert_cc(sizeof(uid_t
) == sizeof(gid_t
));
862 assert(uid_is_valid(uid
));
864 if (machine
->class != MACHINE_CONTAINER
)
867 /* Translates a machine UID into a host UID */
869 p
= procfs_file_alloca(machine
->leader
.pid
, map_file
);
875 uid_t uid_base
, uid_shift
, uid_range
, converted
;
877 r
= uid_map_read_one(f
, &uid_base
, &uid_shift
, &uid_range
);
883 if (uid
< uid_base
|| uid
>= uid_base
+ uid_range
)
886 converted
= uid
- uid_base
+ uid_shift
;
887 if (!uid_is_valid(converted
))
891 *ret_host_uid
= converted
;
899 int machine_translate_uid(Machine
*machine
, gid_t uid
, gid_t
*ret_host_uid
) {
900 return machine_translate_uid_internal(machine
, "uid_map", uid
, ret_host_uid
);
903 int machine_translate_gid(Machine
*machine
, gid_t gid
, gid_t
*ret_host_gid
) {
904 return machine_translate_uid_internal(machine
, "gid_map", (uid_t
) gid
, (uid_t
*) ret_host_gid
);
907 static const char* const machine_class_table
[_MACHINE_CLASS_MAX
] = {
908 [MACHINE_CONTAINER
] = "container",
910 [MACHINE_HOST
] = "host",
913 DEFINE_STRING_TABLE_LOOKUP(machine_class
, MachineClass
);
915 static const char* const machine_state_table
[_MACHINE_STATE_MAX
] = {
916 [MACHINE_OPENING
] = "opening",
917 [MACHINE_RUNNING
] = "running",
918 [MACHINE_CLOSING
] = "closing"
921 DEFINE_STRING_TABLE_LOOKUP(machine_state
, MachineState
);
923 static const char* const kill_who_table
[_KILL_WHO_MAX
] = {
924 [KILL_LEADER
] = "leader",
928 DEFINE_STRING_TABLE_LOOKUP(kill_who
, KillWho
);