#include "bus-unit-util.h"
#include "bus-util.h"
#include "bus-wait-for-jobs.h"
-#include "escape.h"
+#include "event-util.h"
#include "log.h"
#include "pidref.h"
-#include "random-util.h"
-#include "socket-util.h"
#include "special.h"
#include "string-util.h"
-#include "strv.h"
#include "unit-def.h"
-#include "unit-name.h"
#include "vmspawn-scope.h"
static int append_controller_property(sd_bus *bus, sd_bus_message *m) {
sd_bus *bus,
const char *machine_name,
const PidRef *pid,
+ sd_event_source **auxiliary,
+ size_t n_auxiliary,
+ const char *scope,
const char *slice,
char **properties,
- bool allow_pidfd,
- char **ret_scope) {
+ bool allow_pidfd) {
_cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
_cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL, *m = NULL;
- _cleanup_free_ char *scope = NULL, *description = NULL;
+ _cleanup_free_ char *description = NULL;
const char *object;
int r;
if (r < 0)
return log_error_errno(r, "Could not watch job: %m");
- r = unit_name_mangle_with_suffix(machine_name, "as machine name", /* flags= */ 0, ".scope", &scope);
- if (r < 0)
- return log_error_errno(r, "Failed to mangle scope name: %m");
-
description = strjoin("Virtual Machine ", machine_name);
if (!description)
return log_oom();
if (r < 0)
return bus_log_create_error(r);
+ FOREACH_ARRAY(aux, auxiliary, n_auxiliary) {
+ PidRef pidref;
+
+ r = event_source_get_child_pidref(*aux, &pidref);
+ if (r < 0)
+ return log_error_errno(r, "Could not get pidref for event source: %m");
+
+ r = bus_append_scope_pidref(m, &pidref, allow_pidfd);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
r = sd_bus_message_append(m, "(sv)(sv)(sv)(sv)",
"Description", "s", description,
"CollectMode", "s", "inactive-or-failed",
bus,
machine_name,
pid,
+ auxiliary,
+ n_auxiliary,
+ scope,
slice,
properties,
- /* allow_pidfd= */ false,
- ret_scope);
+ /* allow_pidfd= */ false);
return log_error_errno(r, "Failed to start transient scope unit: %s", bus_error_message(&error, r));
}
if (r < 0)
return bus_log_parse_error(r);
- r = bus_wait_for_jobs_one(
+ return bus_wait_for_jobs_one(
w,
object,
BUS_WAIT_JOBS_LOG_ERROR,
/* extra_args= */ NULL);
- if (r < 0)
- return r;
-
- if (ret_scope)
- *ret_scope = TAKE_PTR(scope);
-
- return 0;
}
-int terminate_scope(
- sd_bus *bus,
- const char *machine_name) {
-
+int terminate_scope(sd_bus *bus, const char *scope) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
- _cleanup_free_ char *scope = NULL;
int r;
- r = unit_name_mangle_with_suffix(machine_name, "to terminate", /* flags= */ 0, ".scope", &scope);
- if (r < 0)
- return log_error_errno(r, "Failed to mangle scope name: %m");
-
r = bus_call_method(bus, bus_systemd_mgr, "AbandonScope", &error, /* ret_reply= */ NULL, "s", scope);
if (r < 0) {
log_debug_errno(r, "Failed to abandon scope '%s', ignoring: %s", scope, bus_error_message(&error, r));
return 0;
}
-
-static int message_add_commands(sd_bus_message *m, const char *exec_type, char ***commands, size_t n_commands) {
- int r;
-
- assert(m);
- assert(exec_type);
- assert(commands || n_commands == 0);
-
- /* A small helper for adding an ExecStart / ExecStopPost / etc.. property to an sd_bus_message */
-
- r = sd_bus_message_open_container(m, 'r', "sv");
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_append(m, "s", exec_type);
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_open_container(m, 'v', "a(sasb)");
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_open_container(m, 'a', "(sasb)");
- if (r < 0)
- return bus_log_create_error(r);
-
- FOREACH_ARRAY(cmd, commands, n_commands) {
- char **cmdline = *cmd;
-
- r = sd_bus_message_open_container(m, 'r', "sasb");
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_append(m, "s", cmdline[0]);
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_append_strv(m, cmdline);
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_append(m, "b", 0);
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_close_container(m);
- if (r < 0)
- return bus_log_create_error(r);
- }
-
- r = sd_bus_message_close_container(m);
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_close_container(m);
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_close_container(m);
- if (r < 0)
- return bus_log_create_error(r);
-
- return 0;
-}
-
-void socket_service_pair_done(SocketServicePair *p) {
- assert(p);
-
- p->exec_start_pre = strv_free(p->exec_start_pre);
- p->exec_start = strv_free(p->exec_start);
- p->exec_stop_post = strv_free(p->exec_stop_post);
- p->unit_name_prefix = mfree(p->unit_name_prefix);
- p->listen_address = mfree(p->listen_address);
- p->socket_type = 0;
-}
-
-int start_socket_service_pair(sd_bus *bus, const char *scope, SocketServicePair *p) {
- _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
- _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
- _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
- _cleanup_free_ char *service_desc = NULL, *service_name = NULL, *socket_name = NULL;
- const char *object, *socket_type_str;
- int r;
-
- /* Starts a socket/service unit pair bound to the given scope. */
-
- assert(bus);
- assert(scope);
- assert(p);
- assert(p->unit_name_prefix);
- assert(p->exec_start);
- assert(p->listen_address);
-
- r = bus_wait_for_jobs_new(bus, &w);
- if (r < 0)
- return log_error_errno(r, "Could not watch job: %m");
-
- socket_name = strjoin(p->unit_name_prefix, ".socket");
- if (!socket_name)
- return log_oom();
-
- service_name = strjoin(p->unit_name_prefix, ".service");
- if (!service_name)
- return log_oom();
-
- service_desc = quote_command_line(p->exec_start, SHELL_ESCAPE_EMPTY);
- if (!service_desc)
- return log_oom();
-
- socket_type_str = socket_address_type_to_string(p->socket_type);
- if (!socket_type_str)
- return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Invalid socket type: %d", p->socket_type);
-
- r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, "StartTransientUnit");
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_append(m, "ssa(sv)",
- /* ss - name, mode */
- socket_name, "fail",
- /* a(sv) - Properties */
- 5,
- "Description", "s", p->listen_address,
- "AddRef", "b", true,
- "BindsTo", "as", 1, scope,
- "Listen", "a(ss)", 1, socket_type_str, p->listen_address,
- "CollectMode", "s", "inactive-or-failed",
- "RemoveOnStop", "b", true);
- if (r < 0)
- return bus_log_create_error(r);
-
- /* aux */
- r = sd_bus_message_open_container(m, 'a', "(sa(sv))");
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_open_container(m, 'r', "sa(sv)");
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_append(m, "s", service_name);
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_open_container(m, 'a', "(sv)");
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_append(m, "(sv)(sv)(sv)(sv)",
- "Description", "s", service_desc,
- "AddRef", "b", 1,
- "BindsTo", "as", 1, scope,
- "CollectMode", "s", "inactive-or-failed");
- if (r < 0)
- return bus_log_create_error(r);
-
- if (p->exec_start_pre) {
- r = message_add_commands(m, "ExecStartPre", &p->exec_start_pre, 1);
- if (r < 0)
- return r;
- }
-
- r = message_add_commands(m, "ExecStart", &p->exec_start, 1);
- if (r < 0)
- return r;
-
- if (p->exec_stop_post) {
- r = message_add_commands(m, "ExecStopPost", &p->exec_stop_post, 1);
- if (r < 0)
- return r;
- }
-
- r = sd_bus_message_close_container(m);
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_close_container(m);
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_close_container(m);
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_call(bus, m, 0, &error, &reply);
- if (r < 0)
- return log_error_errno(r, "Failed to start %s as transient unit: %s", p->exec_start[0], bus_error_message(&error, r));
-
- r = sd_bus_message_read(reply, "o", &object);
- if (r < 0)
- return bus_log_parse_error(r);
-
- return bus_wait_for_jobs_one(w, object, /* quiet */ false, NULL);
-}
#include "bus-internal.h"
#include "bus-locator.h"
#include "bus-util.h"
-#include "bus-wait-for-jobs.h"
#include "capability-util.h"
#include "common-signal.h"
#include "copy.h"
#include "event-util.h"
#include "extract-word.h"
#include "fd-util.h"
+#include "fork-notify.h"
#include "format-util.h"
#include "fs-util.h"
#include "gpt.h"
#include "hostname-setup.h"
#include "hostname-util.h"
#include "id128-util.h"
-#include "io-util.h"
-#include "iovec-util.h"
#include "log.h"
#include "machine-credential.h"
#include "main-func.h"
#include "namespace-util.h"
#include "netif-util.h"
#include "nsresource.h"
-#include "nulstr-util.h"
#include "osc-context.h"
#include "pager.h"
#include "parse-argument.h"
}
static int on_child_exit(sd_event_source *s, const siginfo_t *si, void *userdata) {
- sd_event_exit(sd_event_source_get_event(s), 0);
+ assert(si);
+
+ /* Let's first do some logging about the exit status of the child. */
+
+ int ret;
+ if (si->si_code == CLD_EXITED) {
+ if (si->si_status == EXIT_SUCCESS)
+ log_debug("Child process " PID_FMT " exited successfully.", si->si_pid);
+ else
+ log_error("Child process " PID_FMT " died with a failure exit status %i.", si->si_pid, si->si_status);
+
+ ret = si->si_status;
+ } else if (si->si_code == CLD_KILLED)
+ ret = log_error_errno(SYNTHETIC_ERRNO(EPROTO),
+ "Child process " PID_FMT " was killed by signal %s.",
+ si->si_pid, signal_to_string(si->si_status));
+ else if (si->si_code == CLD_DUMPED)
+ ret = log_error_errno(SYNTHETIC_ERRNO(EPROTO),
+ "Child process " PID_FMT " dumped core by signal %s.",
+ si->si_pid, signal_to_string(si->si_status));
+ else
+ ret = log_error_errno(SYNTHETIC_ERRNO(EPROTO),
+ "Got unexpected exit code %i via SIGCHLD,",
+ si->si_code);
+
+ /* Regardless of whether the main qemu process or an auxiliary process died, let's exit either way
+ * as it's very likely that the main qemu process won't be able to operate properly anymore if one
+ * of the auxiliary processes died. */
+
+ sd_event_exit(sd_event_source_get_event(s), ret);
return 0;
}
}
static int start_tpm(
- sd_bus *bus,
const char *scope,
const char *swtpm,
const char *runtime_dir,
- char **ret_listen_address) {
+ const char *sd_socket_activate,
+ char **ret_listen_address,
+ PidRef *ret_pidref) {
int r;
- assert(bus);
assert(scope);
assert(swtpm);
assert(runtime_dir);
if (r < 0)
return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
- _cleanup_(socket_service_pair_done) SocketServicePair ssp = {
- .socket_type = SOCK_STREAM,
- };
-
- ssp.unit_name_prefix = strjoin(scope_prefix, "-tpm");
- if (!ssp.unit_name_prefix)
- return log_oom();
-
- ssp.listen_address = path_join(runtime_dir, "tpm.sock");
- if (!ssp.listen_address)
+ _cleanup_free_ char *listen_address = path_join(runtime_dir, "tpm.sock");
+ if (!listen_address)
return log_oom();
_cleanup_free_ char *transient_state_dir = NULL;
if (arg_tpm_state_path)
state_dir = arg_tpm_state_path;
else {
- transient_state_dir = path_join(runtime_dir, ssp.unit_name_prefix);
+ _cleanup_free_ char *dirname = strjoin(scope_prefix, "-tpm");
+ if (!dirname)
+ return log_oom();
+
+ transient_state_dir = path_join(runtime_dir, dirname);
if (!transient_state_dir)
return log_oom();
if (r < 0)
return log_error_errno(r, "Failed to find swtpm_setup binary: %m");
- ssp.exec_start_pre = strv_new(swtpm_setup, "--tpm-state", state_dir, "--tpm2", "--pcr-banks", "sha256", "--not-overwrite");
- if (!ssp.exec_start_pre)
+ _cleanup_strv_free_ char **argv = strv_new(swtpm_setup, "--tpm-state", state_dir, "--tpm2", "--pcr-banks", "sha256", "--not-overwrite");
+ if (!argv)
return log_oom();
- ssp.exec_start = strv_new(swtpm, "socket", "--tpm2", "--tpmstate");
- if (!ssp.exec_start)
+ r = safe_fork("(swtpm-setup)", FORK_CLOSE_ALL_FDS|FORK_LOG|FORK_WAIT, NULL);
+ if (r == 0) {
+ /* Child */
+ execvp(argv[0], argv);
+ log_error_errno(errno, "Failed to execute '%s': %m", argv[0]);
+ _exit(EXIT_FAILURE);
+ }
+
+ strv_free(argv);
+ argv = strv_new(sd_socket_activate, "--listen", listen_address, swtpm, "socket", "--tpm2", "--tpmstate");
+ if (!argv)
return log_oom();
- r = strv_extendf(&ssp.exec_start, "dir=%s", state_dir);
+ r = strv_extendf(&argv, "dir=%s", state_dir);
if (r < 0)
return log_oom();
- r = strv_extend_many(&ssp.exec_start, "--ctrl", "type=unixio,fd=3");
+ r = strv_extend_many(&argv, "--ctrl", "type=unixio,fd=3");
if (r < 0)
return log_oom();
- r = start_socket_service_pair(bus, scope, &ssp);
+ r = fork_notify(argv, ret_pidref);
if (r < 0)
return r;
if (ret_listen_address)
- *ret_listen_address = TAKE_PTR(ssp.listen_address);
+ *ret_listen_address = TAKE_PTR(listen_address);
return 0;
}
static int start_systemd_journal_remote(
- sd_bus *bus,
const char *scope,
unsigned port,
- const char *sd_journal_remote,
- char **ret_listen_address) {
+ const char *sd_socket_activate,
+ char **ret_listen_address,
+ PidRef *ret_pidref) {
int r;
- assert(bus);
assert(scope);
- assert(sd_journal_remote);
_cleanup_free_ char *scope_prefix = NULL;
r = unit_name_to_prefix(scope, &scope_prefix);
if (r < 0)
return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
- _cleanup_(socket_service_pair_done) SocketServicePair ssp = {
- .socket_type = SOCK_STREAM,
- };
-
- ssp.unit_name_prefix = strjoin(scope_prefix, "-forward-journal");
- if (!ssp.unit_name_prefix)
+ _cleanup_free_ char *listen_address = NULL;
+ if (asprintf(&listen_address, "vsock:2:%u", port) < 0)
return log_oom();
- if (asprintf(&ssp.listen_address, "vsock:2:%u", port) < 0)
- return log_oom();
+ _cleanup_free_ char *sd_journal_remote = NULL;
+ r = find_executable_full(
+ "systemd-journal-remote",
+ /* root = */ NULL,
+ STRV_MAKE(LIBEXECDIR),
+ /* use_path_envvar = */ true, /* systemd-journal-remote should be installed in
+ * LIBEXECDIR, but for supporting fancy setups. */
+ &sd_journal_remote,
+ /* ret_fd = */ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to find systemd-journal-remote binary: %m");
- ssp.exec_start = strv_new(
+ _cleanup_strv_free_ char **argv = strv_new(
+ sd_socket_activate,
+ "--listen", listen_address,
sd_journal_remote,
"--output", arg_forward_journal,
"--split-mode", endswith(arg_forward_journal, ".journal") ? "none" : "host");
- if (!ssp.exec_start)
+ if (!argv)
return log_oom();
- r = start_socket_service_pair(bus, scope, &ssp);
+ r = fork_notify(argv, ret_pidref);
if (r < 0)
return r;
if (ret_listen_address)
- *ret_listen_address = TAKE_PTR(ssp.listen_address);
+ *ret_listen_address = TAKE_PTR(listen_address);
return 0;
}
}
static int start_virtiofsd(
- sd_bus *bus,
const char *scope,
const char *directory,
bool uidmap,
const char *runtime_dir,
- char **ret_listen_address) {
+ const char *sd_socket_activate,
+ char **ret_listen_address,
+ PidRef *ret_pidref) {
- static unsigned virtiofsd_instance = 0;
int r;
- assert(bus);
assert(scope);
assert(directory);
assert(runtime_dir);
if (r < 0)
return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
- _cleanup_(socket_service_pair_done) SocketServicePair ssp = {
- .socket_type = SOCK_STREAM,
- };
-
- if (asprintf(&ssp.unit_name_prefix, "%s-virtiofsd-%u", scope_prefix, virtiofsd_instance++) < 0)
- return log_oom();
-
- if (asprintf(&ssp.listen_address, "%s/sock-%"PRIx64, runtime_dir, random_u64()) < 0)
+ _cleanup_free_ char *listen_address = NULL;
+ if (asprintf(&listen_address, "%s/sock-%"PRIx64, runtime_dir, random_u64()) < 0)
return log_oom();
/* QEMU doesn't support submounts so don't announce them */
- ssp.exec_start = strv_new(virtiofsd, "--shared-dir", directory, "--xattr", "--fd", "3", "--no-announce-submounts");
- if (!ssp.exec_start)
+ _cleanup_strv_free_ char **argv = strv_new(
+ sd_socket_activate,
+ "--listen", listen_address,
+ virtiofsd,
+ "--shared-dir", directory,
+ "--xattr",
+ "--fd", "3",
+ "--no-announce-submounts");
+ if (!argv)
return log_oom();
if (uidmap && arg_uid_shift != UID_INVALID) {
- r = strv_extend(&ssp.exec_start, "--uid-map");
+ r = strv_extend(&argv, "--uid-map");
if (r < 0)
return log_oom();
- r = strv_extendf(&ssp.exec_start, ":0:" UID_FMT ":" UID_FMT ":", arg_uid_shift, arg_uid_range);
+ r = strv_extendf(&argv, ":0:" UID_FMT ":" UID_FMT ":", arg_uid_shift, arg_uid_range);
if (r < 0)
return log_oom();
- r = strv_extend(&ssp.exec_start, "--gid-map");
+ r = strv_extend(&argv, "--gid-map");
if (r < 0)
return log_oom();
- r = strv_extendf(&ssp.exec_start, ":0:" GID_FMT ":" GID_FMT ":", arg_uid_shift, arg_uid_range);
+ r = strv_extendf(&argv, ":0:" GID_FMT ":" GID_FMT ":", arg_uid_shift, arg_uid_range);
if (r < 0)
return log_oom();
}
- r = start_socket_service_pair(bus, scope, &ssp);
+ r = fork_notify(argv, ret_pidref);
if (r < 0)
return r;
if (ret_listen_address)
- *ret_listen_address = TAKE_PTR(ssp.listen_address);
+ *ret_listen_address = TAKE_PTR(listen_address);
return 0;
}
return 0;
}
-static int datagram_read_cmdline_and_exec(int _fd /* always taking possession, even on error */) {
- _cleanup_close_ int fd = TAKE_FD(_fd);
- int r;
-
- assert(fd >= 0);
-
- /* The first datagram contains the cmdline */
- r = fd_wait_for_event(fd, POLLIN, USEC_INFINITY);
- if (r < 0)
- return log_error_errno(r, "Failed to wait for command line: %m");
-
- ssize_t n = next_datagram_size_fd(fd);
- if (n < 0)
- return log_error_errno(n, "Failed to determine datagram size: %m");
- n += 1; /* extra byte to validate that the size we determined here was correct */
-
- _cleanup_free_ char *p = malloc(n);
- if (!p)
- return log_oom();
-
- ssize_t m = recv(fd, p, n, /* flags= */ 0);
- if (m < 0)
- return log_error_errno(errno, "Failed to read datagram: %m");
- if (m >= n)
- return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Unexpected message size.");
-
- _cleanup_strv_free_ char **a = strv_parse_nulstr(p, m);
- if (!a)
- return log_oom();
- if (strv_isempty(a))
- return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Invalid command line.");
-
- /* The second datagram contains an integer array of the intended fd numbers, and the an SCM_RIGHTS fd
- * list along with it, matching that. */
- r = fd_wait_for_event(fd, POLLIN, USEC_INFINITY);
- if (r < 0)
- return log_error_errno(r, "Failed to wait for command line: %m");
-
- n = next_datagram_size_fd(fd);
- if (n < 0)
- return log_error_errno(n, "Failed to determine datagram size: %m");
- n += 1; /* extra byte to validate that the size we determined here was correct */
-
- _cleanup_free_ int *f = malloc(n);
- if (!p)
- return log_oom();
-
- struct iovec iov = {
- .iov_base = f,
- .iov_len = n,
- };
-
- int *fds = NULL;
- size_t n_fds = 0;
- CLEANUP_ARRAY(fds, n_fds, close_many_and_free);
-
- m = receive_many_fds_iov(
- fd,
- &iov, /* iovlen= */ 1,
- &fds,
- &n_fds,
- /* flags= */ MSG_TRUNC);
- if (m < 0)
- return log_error_errno(m, "Failed to read datagram: %m");
- if (m >= n || (size_t) m != n_fds * sizeof(int))
- return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Unexpected message size.");
-
- fd = safe_close(fd);
-
- /* At this point the fds[] contains the file descriptors we got, and f[] contains the numbers we want
- * for them. Let's rearrange things. */
-
- /* 1. Determine largest number we want */
- int max_fd = 2;
- for (size_t k = 0; k < n_fds; k++)
- max_fd = MAX(max_fd, f[k]);
-
- /* 2. Move all fds we got above that */
- for (size_t k = 0; k < n_fds; k++) {
- if (fds[k] > max_fd)
- continue;
-
- _cleanup_close_ int copy = fcntl(fds[k], F_DUPFD_CLOEXEC, max_fd+1);
- if (copy < 0)
- return log_error_errno(errno, "Failed to duplicate file descriptor: %m");
-
- safe_close(fds[k]);
- fds[k] = TAKE_FD(copy);
-
- assert(fds[k] > max_fd);
- }
-
- log_close();
-
- r = close_all_fds(fds, n_fds);
- if (r < 0)
- return log_error_errno(r, "Failed to close remaining file descriptors: %m");
-
- /* 3. Move into place (this also disables O_CLOEXEC) */
- for (size_t k = 0; k < n_fds; k++) {
- if (dup2(fds[k], f[k]) < 0)
- return log_error_errno(errno, "Failed to move file descriptor: %m");
-
- safe_close(fds[k]);
- fds[k] = f[k];
- }
-
- execv(a[0], a);
- return log_error_errno(errno, "Failed to execve %s: %m", a[0]);
-}
-
-_noreturn_ static void child(int cmdline_fd) {
- assert(cmdline_fd >= 0);
-
- /* set LANG if they are missing */
- if (setenv("LANG", "C.UTF-8", /* override= */ 0) < 0) {
- log_oom();
- goto fail;
- }
-
- /* Now wait for the command line from the parent, and then execute it */
-
- (void) datagram_read_cmdline_and_exec(TAKE_FD(cmdline_fd));
-
-fail:
- _exit(EXIT_FAILURE);
-}
-
static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
_cleanup_(ovmf_config_freep) OvmfConfig *ovmf_config = NULL;
_cleanup_free_ char *qemu_binary = NULL, *mem = NULL, *kernel = NULL;
_cleanup_close_ int notify_sock_fd = -EBADF;
_cleanup_strv_free_ char **cmdline = NULL;
_cleanup_free_ int *pass_fds = NULL;
- size_t n_pass_fds = 0;
+ sd_event_source **children = NULL;
+ size_t n_children = 0, n_pass_fds = 0;
const char *accel;
int r;
+ CLEANUP_ARRAY(children, n_children, fork_notify_terminate_many);
+
polkit_agent_open();
/* Registration always happens on the system bus */
runtime_bus = sd_bus_ref(user_bus);
}
- assert_se(sigprocmask_many(SIG_BLOCK, /* ret_old_mask=*/ NULL, SIGCHLD) >= 0);
-
- _cleanup_close_pair_ int cmdline_socket[2] = EBADF_PAIR;
- if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, cmdline_socket) < 0)
- return log_error_errno(errno, "Failed to allocate command line socket pair: %m");
-
- /* Fork off child early on, as we need to assign it to a scope unit, which we can generate
- * dependencies towards for swtpm, virtiofsd and so on. It's just going to hang until we fully
- * prepared a command line */
- _cleanup_(pidref_done) PidRef child_pidref = PIDREF_NULL;
- r = pidref_safe_fork_full(
- "(qemu)",
- /* stdio_fds= */ NULL,
- cmdline_socket + 0, 1,
- FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_LOG|FORK_CLOEXEC_OFF|FORK_RLIMIT_NOFILE_SAFE,
- &child_pidref);
- if (r < 0)
- return r;
- if (r == 0) {
- cmdline_socket[1] = -EBADF; /* closed due to FORK_CLOEXEC_ALL_FDS */
-
- child(cmdline_socket[0]);
- assert_not_reached();
- }
-
- cmdline_socket[0] = safe_close(cmdline_socket[0]);
-
- if (!arg_keep_unit) {
- /* When a new scope is created for this container, then we'll be registered as its controller, in which
- * case PID 1 will send us a friendly RequestStop signal, when it is asked to terminate the
- * scope. Let's hook into that, and cleanly shut down the container, and print a friendly message. */
-
- r = sd_bus_match_signal_async(
- runtime_bus,
- /* ret= */ NULL,
- "org.freedesktop.systemd1",
- /* path= */ NULL,
- "org.freedesktop.systemd1.Scope",
- "RequestStop",
- on_request_stop,
- /* install_callback= */ NULL,
- /* userdata= */ NULL);
- if (r < 0)
- return log_error_errno(r, "Failed to request RequestStop match: %m");
- }
-
- _cleanup_free_ char *unit = NULL;
- bool scope_allocated = false;
- if (!arg_keep_unit && (!arg_register || !arg_privileged)) {
- r = allocate_scope(
- runtime_bus,
- arg_machine,
- &child_pidref,
- arg_slice,
- arg_property,
- /* allow_pidfd= */ true,
- &unit);
- if (r < 0)
- return r;
-
- scope_allocated = true;
- } else {
- if (arg_privileged)
- r = cg_pid_get_unit(0, &unit);
- else
- r = cg_pid_get_user_unit(0, &unit);
- if (r < 0)
- return log_error_errno(r, "Failed to get our own unit: %m");
- }
-
bool use_kvm = arg_kvm > 0;
if (arg_kvm < 0) {
r = qemu_check_kvm_support();
return r;
}
+ assert_se(sigprocmask_many(SIG_BLOCK, /* ret_old_mask=*/ NULL, SIGCHLD) >= 0);
+
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ r = sd_event_new(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get default event loop: %m");
+
+ (void) sd_event_set_watchdog(event, true);
+
+ _cleanup_free_ char *unit = NULL;
+ r = unit_name_mangle_with_suffix(arg_machine, "as machine name", /* flags= */ 0, ".scope", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle scope name: %m");
+
+ _cleanup_free_ char *sd_socket_activate = NULL;
+ r = find_executable("systemd-socket-activate", &sd_socket_activate);
+ if (r < 0)
+ return log_error_errno(r, "Failed to find systemd-socket-activate binary: %m");
+
if (arg_directory) {
_cleanup_free_ char *listen_address = NULL;
+ _cleanup_(fork_notify_terminate) PidRef child = PIDREF_NULL;
+
+ if (!GREEDY_REALLOC(children, n_children + 1))
+ return log_oom();
+
r = start_virtiofsd(
- runtime_bus,
unit,
arg_directory,
/* uidmap= */ true,
runtime_dir,
- &listen_address);
+ sd_socket_activate,
+ &listen_address,
+ &child);
if (r < 0)
return r;
+ _cleanup_(sd_event_source_unrefp) sd_event_source *source = NULL;
+ r = event_add_child_pidref(event, &source, &child, WEXITED, on_child_exit, /* userdata= */ NULL);
+ if (r < 0)
+ return r;
+
+ pidref_done(&child);
+ children[n_children++] = TAKE_PTR(source);
+
_cleanup_free_ char *escaped_listen_address = escape_qemu_value(listen_address);
if (!escaped_listen_address)
return log_oom();
FOREACH_ARRAY(mount, arg_runtime_mounts.mounts, arg_runtime_mounts.n_mounts) {
_cleanup_free_ char *listen_address = NULL;
+ _cleanup_(fork_notify_terminate) PidRef child = PIDREF_NULL;
+
+ if (!GREEDY_REALLOC(children, n_children + 1))
+ return log_oom();
+
r = start_virtiofsd(
- runtime_bus,
unit,
mount->source,
/* uidmap= */ false,
runtime_dir,
- &listen_address);
+ sd_socket_activate,
+ &listen_address,
+ &child);
+ if (r < 0)
+ return r;
+
+ _cleanup_(sd_event_source_unrefp) sd_event_source *source = NULL;
+ r = event_add_child_pidref(event, &source, &child, WEXITED, on_child_exit, /* userdata= */ NULL);
if (r < 0)
return r;
+ pidref_done(&child);
+ children[n_children++] = TAKE_PTR(source);
+
_cleanup_free_ char *escaped_listen_address = escape_qemu_value(listen_address);
if (!escaped_listen_address)
return log_oom();
_cleanup_free_ char *tpm_socket_address = NULL;
if (swtpm) {
- r = start_tpm(runtime_bus,
- unit,
- swtpm,
- runtime_dir,
- &tpm_socket_address);
+ _cleanup_(fork_notify_terminate) PidRef child = PIDREF_NULL;
+
+ if (!GREEDY_REALLOC(children, n_children + 1))
+ return log_oom();
+
+ r = start_tpm(unit, swtpm, runtime_dir, sd_socket_activate, &tpm_socket_address, &child);
if (r < 0) {
/* only bail if the user asked for a tpm */
if (arg_tpm > 0)
log_debug_errno(r, "Failed to start tpm, ignoring: %m");
}
+
+ _cleanup_(sd_event_source_unrefp) sd_event_source *source = NULL;
+ r = event_add_child_pidref(event, &source, &child, WEXITED, on_child_exit, /* userdata= */ NULL);
+ if (r < 0)
+ return r;
+
+ pidref_done(&child);
+ children[n_children++] = TAKE_PTR(source);
}
if (tpm_socket_address) {
}
if (arg_forward_journal) {
- _cleanup_free_ char *sd_journal_remote = NULL, *listen_address = NULL, *cred = NULL;
+ _cleanup_free_ char *listen_address = NULL, *cred = NULL;
+
+ if (!GREEDY_REALLOC(children, n_children + 1))
+ return log_oom();
- r = find_executable_full(
- "systemd-journal-remote",
- /* root = */ NULL,
- STRV_MAKE(LIBEXECDIR),
- /* use_path_envvar = */ true, /* systemd-journal-remote should be installed in
- * LIBEXECDIR, but for supporting fancy setups. */
- &sd_journal_remote,
- /* ret_fd = */ NULL);
+ _cleanup_(fork_notify_terminate) PidRef child = PIDREF_NULL;
+ r = start_systemd_journal_remote(unit, child_cid, sd_socket_activate, &listen_address, &child);
if (r < 0)
- return log_error_errno(r, "Failed to find systemd-journal-remote binary: %m");
+ return r;
- r = start_systemd_journal_remote(
- runtime_bus,
- unit,
- child_cid,
- sd_journal_remote,
- &listen_address);
+ _cleanup_(sd_event_source_unrefp) sd_event_source *source = NULL;
+ r = event_add_child_pidref(event, &source, &child, WEXITED, on_child_exit, /* userdata= */ NULL);
if (r < 0)
return r;
+ pidref_done(&child);
+ children[n_children++] = TAKE_PTR(source);
+
cred = strjoin("journal.forward_to_socket:", listen_address);
if (!cred)
return log_oom();
log_debug("Executing: %s", joined);
}
+ assert_se(sigprocmask_many(SIG_BLOCK, /* ret_old_mask=*/ NULL, SIGCHLD) >= 0);
+
+ _cleanup_(pidref_done) PidRef child_pidref = PIDREF_NULL;
+ r = pidref_safe_fork_full(
+ qemu_binary,
+ /* stdio_fds= */ NULL,
+ pass_fds, n_pass_fds,
+ FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_LOG|FORK_CLOEXEC_OFF|FORK_RLIMIT_NOFILE_SAFE,
+ &child_pidref);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (setenv("LANG", "C.UTF-8", 0) < 0) {
+ log_oom();
+ goto fail;
+ }
+
+ execv(qemu_binary, cmdline);
+ log_error_errno(errno, "Failed to execve %s: %m", qemu_binary);
+ fail:
+ _exit(EXIT_FAILURE);
+ }
+
+ /* Close relevant fds we passed to qemu in the parent. We don't need them anymore. */
+ child_vsock_fd = safe_close(child_vsock_fd);
+ tap_fd = safe_close(tap_fd);
+
+ if (!arg_keep_unit) {
+ /* When a new scope is created for this container, then we'll be registered as its controller, in which
+ * case PID 1 will send us a friendly RequestStop signal, when it is asked to terminate the
+ * scope. Let's hook into that, and cleanly shut down the container, and print a friendly message. */
+
+ r = sd_bus_match_signal_async(
+ runtime_bus,
+ /* ret= */ NULL,
+ "org.freedesktop.systemd1",
+ /* path= */ NULL,
+ "org.freedesktop.systemd1.Scope",
+ "RequestStop",
+ on_request_stop,
+ /* install_callback= */ NULL,
+ /* userdata= */ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request RequestStop match: %m");
+ }
+
+ bool scope_allocated = false;
+ if (!arg_keep_unit && (!arg_register || !arg_privileged)) {
+ r = allocate_scope(
+ runtime_bus,
+ arg_machine,
+ &child_pidref,
+ children,
+ n_children,
+ unit,
+ arg_slice,
+ arg_property,
+ /* allow_pidfd= */ true);
+ if (r < 0)
+ return r;
+
+ scope_allocated = true;
+ } else {
+ if (arg_privileged)
+ r = cg_pid_get_unit(0, &unit);
+ else
+ r = cg_pid_get_user_unit(0, &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get our own unit: %m");
+ }
+
bool registered = false;
if (arg_register) {
char vm_address[STRLEN("vsock/") + DECIMAL_STR_MAX(unsigned)];
registered = true;
}
- _cleanup_free_ char *nulstr = NULL;
- size_t nulstr_size = 0;
- if (strv_make_nulstr(cmdline, &nulstr, &nulstr_size) < 0)
- return log_oom();
-
- /* First datagram: the command line to execute */
- ssize_t n = send(cmdline_socket[1], nulstr, nulstr_size, /* flags= */ 0);
- if (n < 0)
- return log_error_errno(errno, "Failed to send command line: %m");
-
- /* Second datagram: the file descriptor array and the fds inside it */
- n = send_many_fds_iov(
- cmdline_socket[1],
- pass_fds, n_pass_fds, /* both as payload … */
- &IOVEC_MAKE(pass_fds, n_pass_fds * sizeof(int)), /* … and as auxiliary fds */
- /* iovlen= */ 1,
- /* flags= */ 0);
- if (n < 0)
- return log_error_errno(n, "Failed to send file descriptors to child: %m");
-
- /* We submitted the command line now, qemu is running now */
- cmdline_socket[1] = safe_close(cmdline_socket[1]);
-
- /* Close relevant fds we passed to qemu in the parent. We don't need them anymore. */
- child_vsock_fd = safe_close(child_vsock_fd);
- tap_fd = safe_close(tap_fd);
-
/* Report that the VM is now set up */
(void) sd_notifyf(/* unset_environment= */ false,
"STATUS=VM started.\n"
polkit_agent_close();
_cleanup_(sd_event_source_unrefp) sd_event_source *notify_event_source = NULL;
- _cleanup_(sd_event_unrefp) sd_event *event = NULL;
- r = sd_event_new(&event);
- if (r < 0)
- return log_error_errno(r, "Failed to get default event source: %m");
-
- (void) sd_event_set_watchdog(event, true);
if (system_bus) {
r = sd_bus_attach_event(system_bus, event, 0);