1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
13 #include "dirent-util.h"
15 #include "discover-image.h"
16 #include "sd-daemon.h"
20 #include "alloc-util.h"
21 #include "architecture.h"
23 #include "common-signal.h"
25 #include "creds-util.h"
26 #include "dissect-image.h"
28 #include "event-util.h"
29 #include "extract-word.h"
31 #include "format-util.h"
34 #include "hexdecoct.h"
35 #include "hostname-util.h"
36 #include "kernel-image.h"
38 #include "machine-credential.h"
40 #include "main-func.h"
43 #include "parse-argument.h"
44 #include "parse-util.h"
45 #include "path-lookup.h"
46 #include "path-util.h"
47 #include "pretty-print.h"
48 #include "process-util.h"
50 #include "random-util.h"
52 #include "signal-util.h"
53 #include "socket-util.h"
54 #include "stat-util.h"
55 #include "string-util.h"
57 #include "tmpfile-util.h"
58 #include "unit-name.h"
59 #include "vmspawn-mount.h"
60 #include "vmspawn-scope.h"
61 #include "vmspawn-settings.h"
62 #include "vmspawn-util.h"
64 static bool arg_quiet
= false;
65 static PagerFlags arg_pager_flags
= 0;
66 static char *arg_directory
= NULL
;
67 static char *arg_image
= NULL
;
68 static char *arg_machine
= NULL
;
69 static char *arg_cpus
= NULL
;
70 static uint64_t arg_ram
= UINT64_C(2) * U64_GB
;
71 static int arg_kvm
= -1;
72 static int arg_vsock
= -1;
73 static unsigned arg_vsock_cid
= VMADDR_CID_ANY
;
74 static int arg_tpm
= -1;
75 static char *arg_linux
= NULL
;
76 static char **arg_initrds
= NULL
;
77 static ConsoleMode arg_console_mode
= CONSOLE_INTERACTIVE
;
78 static NetworkStack arg_network_stack
= NETWORK_STACK_NONE
;
79 static int arg_secure_boot
= -1;
80 static MachineCredentialContext arg_credentials
= {};
81 static uid_t arg_uid_shift
= UID_INVALID
, arg_uid_range
= 0x10000U
;
82 static RuntimeMountContext arg_runtime_mounts
= {};
83 static SettingsMask arg_settings_mask
= 0;
84 static char *arg_firmware
= NULL
;
85 static char *arg_runtime_directory
= NULL
;
86 static char *arg_forward_journal
= NULL
;
87 static bool arg_runtime_directory_created
= false;
88 static bool arg_privileged
= false;
89 static char **arg_kernel_cmdline_extra
= NULL
;
90 static char **arg_extra_drives
= NULL
;
91 static char *arg_background
= NULL
;
93 STATIC_DESTRUCTOR_REGISTER(arg_directory
, freep
);
94 STATIC_DESTRUCTOR_REGISTER(arg_image
, freep
);
95 STATIC_DESTRUCTOR_REGISTER(arg_machine
, freep
);
96 STATIC_DESTRUCTOR_REGISTER(arg_cpus
, freep
);
97 STATIC_DESTRUCTOR_REGISTER(arg_runtime_directory
, freep
);
98 STATIC_DESTRUCTOR_REGISTER(arg_credentials
, machine_credential_context_done
);
99 STATIC_DESTRUCTOR_REGISTER(arg_firmware
, freep
);
100 STATIC_DESTRUCTOR_REGISTER(arg_linux
, freep
);
101 STATIC_DESTRUCTOR_REGISTER(arg_initrds
, strv_freep
);
102 STATIC_DESTRUCTOR_REGISTER(arg_runtime_mounts
, runtime_mount_context_done
);
103 STATIC_DESTRUCTOR_REGISTER(arg_forward_journal
, freep
);
104 STATIC_DESTRUCTOR_REGISTER(arg_kernel_cmdline_extra
, strv_freep
);
105 STATIC_DESTRUCTOR_REGISTER(arg_extra_drives
, strv_freep
);
106 STATIC_DESTRUCTOR_REGISTER(arg_background
, freep
);
108 static int help(void) {
109 _cleanup_free_
char *link
= NULL
;
112 pager_open(arg_pager_flags
);
114 r
= terminal_urlify_man("systemd-vmspawn", "1", &link
);
118 printf("%1$s [OPTIONS...] [ARGUMENTS...]\n\n"
119 "%5$sSpawn a command or OS in a virtual machine.%6$s\n\n"
120 " -h --help Show this help\n"
121 " --version Print version string\n"
122 " -q --quiet Do not show status information\n"
123 " --no-pager Do not pipe output into a pager\n"
125 " -D --directory=PATH Root directory for the VM\n"
126 " -i --image=FILE|DEVICE Root file system disk image or device for the VM\n"
127 "\n%3$sHost Configuration:%4$s\n"
128 " --cpus=CPUS Configure number of CPUs in guest\n"
129 " --ram=BYTES Configure guest's RAM size\n"
130 " --kvm=BOOL Enable use of KVM\n"
131 " --vsock=BOOL Override autodetection of VSOCK support\n"
132 " --vsock-cid=CID Specify the CID to use for the guest's VSOCK support\n"
133 " --tpm=BOOL Enable use of a virtual TPM\n"
134 " --linux=PATH Specify the linux kernel for direct kernel boot\n"
135 " --initrd=PATH Specify the initrd for direct kernel boot\n"
136 " -n --network-tap Create a TAP device for networking\n"
137 " --network-user-mode Use user mode networking\n"
138 " --secure-boot=BOOL Enable searching for firmware supporting SecureBoot\n"
139 " --firmware=PATH|list Select firmware definition file (or list available)\n"
140 "\n%3$sSystem Identity:%4$s\n"
141 " -M --machine=NAME Set the machine name for the VM\n"
142 "\n%3$sUser Namespacing:%4$s\n"
143 " --private-users=UIDBASE[:NUIDS]\n"
144 " Configure the UID/GID range to map into the\n"
145 " virtiofsd namespace\n"
146 "\n%3$sMounts:%4$s\n"
147 " --bind=SOURCE[:TARGET]\n"
148 " Mount a file or directory from the host into the VM\n"
149 " --bind-ro=SOURCE[:TARGET]\n"
150 " Mount a file or directory, but read-only\n"
151 " --extra-drive=PATH Adds an additional disk to the virtual machine\n"
152 "\n%3$sIntegration:%4$s\n"
153 " --forward-journal=FILE|DIR\n"
154 " Forward the VM's journal to the host\n"
155 "\n%3$sInput/Output:%4$s\n"
156 " --console=MODE Console mode (interactive, native, gui)\n"
157 " --background=COLOR Set ANSI color for background\n"
158 "\n%3$sCredentials:%4$s\n"
159 " --set-credential=ID:VALUE\n"
160 " Pass a credential with literal value to the VM\n"
161 " --load-credential=ID:PATH\n"
162 " Load credential for the VM from file or AF_UNIX\n"
164 "\nSee the %2$s for details.\n",
165 program_invocation_short_name
,
175 static int parse_argv(int argc
, char *argv
[]) {
188 ARG_NETWORK_USER_MODE
,
202 static const struct option options
[] = {
203 { "help", no_argument
, NULL
, 'h' },
204 { "version", no_argument
, NULL
, ARG_VERSION
},
205 { "quiet", no_argument
, NULL
, 'q' },
206 { "no-pager", no_argument
, NULL
, ARG_NO_PAGER
},
207 { "image", required_argument
, NULL
, 'i' },
208 { "directory", required_argument
, NULL
, 'D' },
209 { "machine", required_argument
, NULL
, 'M' },
210 { "cpus", required_argument
, NULL
, ARG_CPUS
},
211 { "qemu-smp", required_argument
, NULL
, ARG_CPUS
}, /* Compat alias */
212 { "ram", required_argument
, NULL
, ARG_RAM
},
213 { "qemu-mem", required_argument
, NULL
, ARG_RAM
}, /* Compat alias */
214 { "kvm", required_argument
, NULL
, ARG_KVM
},
215 { "qemu-kvm", required_argument
, NULL
, ARG_KVM
}, /* Compat alias */
216 { "vsock", required_argument
, NULL
, ARG_VSOCK
},
217 { "qemu-vsock", required_argument
, NULL
, ARG_VSOCK
}, /* Compat alias */
218 { "vsock-cid", required_argument
, NULL
, ARG_VSOCK_CID
},
219 { "tpm", required_argument
, NULL
, ARG_TPM
},
220 { "linux", required_argument
, NULL
, ARG_LINUX
},
221 { "initrd", required_argument
, NULL
, ARG_INITRD
},
222 { "console", required_argument
, NULL
, ARG_CONSOLE
},
223 { "qemu-gui", no_argument
, NULL
, ARG_QEMU_GUI
}, /* compat option */
224 { "network-tap", no_argument
, NULL
, 'n' },
225 { "network-user-mode", no_argument
, NULL
, ARG_NETWORK_USER_MODE
},
226 { "bind", required_argument
, NULL
, ARG_BIND
},
227 { "bind-ro", required_argument
, NULL
, ARG_BIND_RO
},
228 { "extra-drive", required_argument
, NULL
, ARG_EXTRA_DRIVE
},
229 { "secure-boot", required_argument
, NULL
, ARG_SECURE_BOOT
},
230 { "private-users", required_argument
, NULL
, ARG_PRIVATE_USERS
},
231 { "forward-journal", required_argument
, NULL
, ARG_FORWARD_JOURNAL
},
232 { "set-credential", required_argument
, NULL
, ARG_SET_CREDENTIAL
},
233 { "load-credential", required_argument
, NULL
, ARG_LOAD_CREDENTIAL
},
234 { "firmware", required_argument
, NULL
, ARG_FIRMWARE
},
235 { "background", required_argument
, NULL
, ARG_BACKGROUND
},
245 while ((c
= getopt_long(argc
, argv
, "+hD:i:M:nq", options
, NULL
)) >= 0)
258 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &arg_directory
);
262 arg_settings_mask
|= SETTING_DIRECTORY
;
266 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &arg_image
);
270 arg_settings_mask
|= SETTING_DIRECTORY
;
275 arg_machine
= mfree(arg_machine
);
277 if (!hostname_is_valid(optarg
, 0))
278 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
279 "Invalid machine name: %s", optarg
);
281 r
= free_and_strdup(&arg_machine
, optarg
);
288 arg_pager_flags
|= PAGER_DISABLE
;
292 r
= free_and_strdup_warn(&arg_cpus
, optarg
);
298 r
= parse_size(optarg
, 1024, &arg_ram
);
300 return log_error_errno(r
, "Failed to parse --ram=%s: %m", optarg
);
304 r
= parse_tristate(optarg
, &arg_kvm
);
306 return log_error_errno(r
, "Failed to parse --kvm=%s: %m", optarg
);
310 r
= parse_tristate(optarg
, &arg_vsock
);
312 return log_error_errno(r
, "Failed to parse --vsock=%s: %m", optarg
);
317 arg_vsock_cid
= VMADDR_CID_ANY
;
321 r
= vsock_parse_cid(optarg
, &cid
);
323 return log_error_errno(r
, "Failed to parse --vsock-cid: %s", optarg
);
324 if (!VSOCK_CID_IS_REGULAR(cid
))
325 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "Specified CID is not regular, refusing: %u", cid
);
332 r
= parse_tristate(optarg
, &arg_tpm
);
334 return log_error_errno(r
, "Failed to parse --tpm=%s: %m", optarg
);
338 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &arg_linux
);
344 _cleanup_free_
char *initrd_path
= NULL
;
345 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &initrd_path
);
349 r
= strv_consume(&arg_initrds
, TAKE_PTR(initrd_path
));
357 arg_console_mode
= console_mode_from_string(optarg
);
358 if (arg_console_mode
< 0)
359 return log_error_errno(arg_console_mode
, "Failed to parse specified console mode: %s", optarg
);
364 arg_console_mode
= CONSOLE_GUI
;
368 arg_network_stack
= NETWORK_STACK_TAP
;
371 case ARG_NETWORK_USER_MODE
:
372 arg_network_stack
= NETWORK_STACK_USER
;
377 r
= runtime_mount_parse(&arg_runtime_mounts
, optarg
, c
== ARG_BIND_RO
);
379 return log_error_errno(r
, "Failed to parse --bind(-ro)= argument %s: %m", optarg
);
381 arg_settings_mask
|= SETTING_BIND_MOUNTS
;
384 case ARG_EXTRA_DRIVE
: {
385 _cleanup_free_
char *drive_path
= NULL
;
387 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &drive_path
);
391 r
= strv_consume(&arg_extra_drives
, TAKE_PTR(drive_path
));
397 case ARG_SECURE_BOOT
:
398 r
= parse_tristate(optarg
, &arg_secure_boot
);
400 return log_error_errno(r
, "Failed to parse --secure-boot=%s: %m", optarg
);
403 case ARG_PRIVATE_USERS
:
404 r
= parse_userns_uid_range(optarg
, &arg_uid_shift
, &arg_uid_range
);
409 case ARG_FORWARD_JOURNAL
:
410 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &arg_forward_journal
);
415 case ARG_SET_CREDENTIAL
: {
416 r
= machine_credential_set(&arg_credentials
, optarg
);
419 arg_settings_mask
|= SETTING_CREDENTIALS
;
423 case ARG_LOAD_CREDENTIAL
: {
424 r
= machine_credential_load(&arg_credentials
, optarg
);
428 arg_settings_mask
|= SETTING_CREDENTIALS
;
433 if (streq(optarg
, "list")) {
434 _cleanup_strv_free_
char **l
= NULL
;
436 r
= list_ovmf_config(&l
);
438 return log_error_errno(r
, "Failed to list firmwares: %m");
441 fputstrv(stdout
, l
, "\n", &nl
);
448 if (!isempty(optarg
) && !path_is_absolute(optarg
) && !startswith(optarg
, "./"))
449 return log_error_errno(SYNTHETIC_ERRNO(errno
), "Absolute path or path starting with './' required.");
451 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &arg_firmware
);
458 r
= free_and_strdup_warn(&arg_background
, optarg
);
467 assert_not_reached();
471 arg_kernel_cmdline_extra
= strv_copy(argv
+ optind
);
472 if (!arg_kernel_cmdline_extra
)
475 arg_settings_mask
|= SETTING_START_MODE
;
481 static int open_vsock(void) {
482 _cleanup_close_
int vsock_fd
= -EBADF
;
484 static const union sockaddr_union bind_addr
= {
485 .vm
.svm_family
= AF_VSOCK
,
486 .vm
.svm_cid
= VMADDR_CID_ANY
,
487 .vm
.svm_port
= VMADDR_PORT_ANY
,
490 vsock_fd
= socket(AF_VSOCK
, SOCK_STREAM
|SOCK_CLOEXEC
, 0);
492 return log_error_errno(errno
, "Failed to open AF_VSOCK socket: %m");
494 r
= bind(vsock_fd
, &bind_addr
.sa
, sizeof(bind_addr
.vm
));
496 return log_error_errno(errno
, "Failed to bind to VSOCK address %u:%u: %m", bind_addr
.vm
.svm_cid
, bind_addr
.vm
.svm_port
);
498 r
= listen(vsock_fd
, SOMAXCONN_DELUXE
);
500 return log_error_errno(errno
, "Failed to listen on VSOCK: %m");
502 return TAKE_FD(vsock_fd
);
505 static int vmspawn_dispatch_notify_fd(sd_event_source
*source
, int fd
, uint32_t revents
, void *userdata
) {
506 char buf
[NOTIFY_BUFFER_MAX
+1];
507 const char *p
= NULL
;
508 struct iovec iovec
= {
510 .iov_len
= sizeof(buf
)-1,
512 struct msghdr msghdr
= {
517 _cleanup_strv_free_
char **tags
= NULL
;
518 int r
, *exit_status
= ASSERT_PTR(userdata
);
520 n
= recvmsg_safe(fd
, &msghdr
, MSG_DONTWAIT
);
521 if (ERRNO_IS_NEG_TRANSIENT(n
))
524 log_warning_errno(n
, "Got message with truncated control data, ignoring: %m");
528 return log_warning_errno(n
, "Couldn't read notification socket: %m");
530 if ((size_t) n
>= sizeof(buf
)) {
531 log_warning("Received notify message exceeded maximum size. Ignoring.");
536 tags
= strv_split(buf
, "\n\r");
540 STRV_FOREACH(s
, tags
)
541 log_debug("Received tag %s from notify socket", *s
);
543 if (strv_contains(tags
, "READY=1")) {
544 r
= sd_notify(false, "READY=1\n");
546 log_warning_errno(r
, "Failed to send readiness notification, ignoring: %m");
549 p
= strv_find_startswith(tags
, "STATUS=");
551 (void) sd_notifyf(false, "STATUS=VM running: %s", p
);
553 p
= strv_find_startswith(tags
, "EXIT_STATUS=");
555 r
= safe_atoi(p
, exit_status
);
557 log_warning_errno(r
, "Failed to parse exit status from %s, ignoring: %m", p
);
560 /* we will only receive one message from each connection so disable this source once one is received */
561 source
= sd_event_source_disable_unref(source
);
566 static int vmspawn_dispatch_vsock_connections(sd_event_source
*source
, int fd
, uint32_t revents
, void *userdata
) {
569 _cleanup_close_
int conn_fd
= -EBADF
;
573 if (revents
!= EPOLLIN
) {
574 log_warning("Got unexpected poll event for VSOCK fd.");
578 conn_fd
= accept4(fd
, NULL
, NULL
, SOCK_CLOEXEC
|SOCK_NONBLOCK
);
580 log_warning_errno(errno
, "Failed to accept connection from VSOCK fd (%m), ignoring...");
584 event
= sd_event_source_get_event(source
);
586 return log_error_errno(SYNTHETIC_ERRNO(ENOENT
), "Failed to retrieve event from event source, exiting task");
588 /* add a new floating task to read from the connection */
589 r
= sd_event_add_io(event
, NULL
, conn_fd
, revents
, vmspawn_dispatch_notify_fd
, userdata
);
591 return log_error_errno(r
, "Failed to allocate notify connection event source: %m");
593 /* conn_fd is now owned by the event loop so don't clean it up */
599 static int setup_notify_parent(sd_event
*event
, int fd
, int *exit_status
, sd_event_source
**ret_notify_event_source
) {
605 assert(ret_notify_event_source
);
607 r
= sd_event_add_io(event
, ret_notify_event_source
, fd
, EPOLLIN
, vmspawn_dispatch_vsock_connections
, exit_status
);
609 return log_error_errno(r
, "Failed to allocate notify socket event source: %m");
611 (void) sd_event_source_set_description(*ret_notify_event_source
, "vmspawn-notify-sock");
616 static int on_orderly_shutdown(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
617 PidRef
*pidref
= userdata
;
620 /* TODO: actually talk to qemu and ask the guest to shutdown here */
623 r
= pidref_kill(pidref
, SIGKILL
);
625 log_warning_errno(r
, "Failed to kill qemu, terminating: %m");
627 log_info("Trying to halt qemu. Send SIGTERM again to trigger vmspawn to immediately terminate.");
628 sd_event_source_set_userdata(s
, NULL
);
633 sd_event_exit(sd_event_source_get_event(s
), 0);
637 static int on_child_exit(sd_event_source
*s
, const siginfo_t
*si
, void *userdata
) {
638 sd_event_exit(sd_event_source_get_event(s
), 0);
642 static int cmdline_add_vsock(char ***cmdline
, int vsock_fd
) {
645 r
= strv_extend(cmdline
, "-smbios");
649 union sockaddr_union addr
;
650 socklen_t addr_len
= sizeof addr
.vm
;
651 r
= getsockname(vsock_fd
, &addr
.sa
, &addr_len
);
654 assert(addr_len
>= sizeof addr
.vm
);
655 assert(addr
.vm
.svm_family
== AF_VSOCK
);
657 r
= strv_extendf(cmdline
, "type=11,value=io.systemd.credential:vmm.notify_socket=vsock-stream:%u:%u", (unsigned) VMADDR_CID_HOST
, addr
.vm
.svm_port
);
664 static int start_tpm(
668 char **ret_state_tempdir
) {
670 _cleanup_(rm_rf_physical_and_freep
) char *state_dir
= NULL
;
671 _cleanup_free_
char *scope_prefix
= NULL
;
672 _cleanup_(socket_service_pair_done
) SocketServicePair ssp
= {
673 .socket_type
= SOCK_STREAM
,
680 assert(ret_state_tempdir
);
682 r
= unit_name_to_prefix(scope
, &scope_prefix
);
684 return log_error_errno(r
, "Failed to strip .scope suffix from scope: %m");
686 ssp
.unit_name_prefix
= strjoin(scope_prefix
, "-tpm");
687 if (!ssp
.unit_name_prefix
)
690 state_dir
= path_join(arg_runtime_directory
, ssp
.unit_name_prefix
);
694 if (arg_runtime_directory_created
) {
695 ssp
.runtime_directory
= path_join("systemd/vmspawn", ssp
.unit_name_prefix
);
696 if (!ssp
.runtime_directory
)
700 ssp
.listen_address
= path_join(state_dir
, "sock");
701 if (!ssp
.listen_address
)
704 _cleanup_free_
char *swtpm_setup
= NULL
;
705 r
= find_executable("swtpm_setup", &swtpm_setup
);
707 return log_error_errno(r
, "Failed to find swtpm_setup binary: %m");
709 ssp
.exec_start_pre
= strv_new(swtpm_setup
, "--tpm-state", state_dir
, "--tpm2", "--pcr-banks", "sha256");
710 if (!ssp
.exec_start_pre
)
713 ssp
.exec_start
= strv_new(swtpm
, "socket", "--tpm2", "--tpmstate");
717 r
= strv_extendf(&ssp
.exec_start
, "dir=%s", state_dir
);
721 r
= strv_extend_many(&ssp
.exec_start
, "--ctrl", "type=unixio,fd=3");
725 r
= start_socket_service_pair(bus
, scope
, &ssp
);
729 *ret_state_tempdir
= TAKE_PTR(state_dir
);
733 static int start_systemd_journal_remote(sd_bus
*bus
, const char *scope
, unsigned port
, const char *sd_journal_remote
, char **listen_address
) {
734 _cleanup_free_
char *scope_prefix
= NULL
;
735 _cleanup_(socket_service_pair_done
) SocketServicePair ssp
= {
736 .socket_type
= SOCK_STREAM
,
742 assert(sd_journal_remote
);
744 r
= unit_name_to_prefix(scope
, &scope_prefix
);
746 return log_error_errno(r
, "Failed to strip .scope suffix from scope: %m");
748 ssp
.unit_name_prefix
= strjoin(scope_prefix
, "-forward-journal");
749 if (!ssp
.unit_name_prefix
)
752 r
= asprintf(&ssp
.listen_address
, "vsock:2:%u", port
);
756 ssp
.exec_start
= strv_new(sd_journal_remote
,
757 "--output", arg_forward_journal
,
758 "--split-mode", endswith(arg_forward_journal
, ".journal") ? "none" : "host");
762 r
= start_socket_service_pair(bus
, scope
, &ssp
);
767 *listen_address
= TAKE_PTR(ssp
.listen_address
);
772 static int discover_root(char **ret
) {
774 _cleanup_(dissected_image_unrefp
) DissectedImage
*image
= NULL
;
775 _cleanup_free_
char *root
= NULL
;
779 r
= dissect_image_file_and_warn(
782 /* mount_options= */ NULL
,
783 /* image_policy= */ NULL
,
789 if (image
->partitions
[PARTITION_ROOT
].found
)
790 root
= strjoin("root=PARTUUID=", SD_ID128_TO_UUID_STRING(image
->partitions
[PARTITION_ROOT
].uuid
));
791 else if (image
->partitions
[PARTITION_USR
].found
)
792 root
= strjoin("mount.usr=PARTUUID=", SD_ID128_TO_UUID_STRING(image
->partitions
[PARTITION_USR
].uuid
));
794 return log_error_errno(SYNTHETIC_ERRNO(ENOENT
), "Cannot perform a direct kernel boot without a root or usr partition, refusing");
799 *ret
= TAKE_PTR(root
);
803 static int find_virtiofsd(char **ret
) {
805 _cleanup_free_
char *virtiofsd
= NULL
;
809 r
= find_executable("virtiofsd", &virtiofsd
);
810 if (r
< 0 && r
!= -ENOENT
)
811 return log_error_errno(r
, "Error while searching for virtiofsd: %m");
814 FOREACH_STRING(file
, "/usr/libexec/virtiofsd", "/usr/lib/virtiofsd") {
815 if (access(file
, X_OK
) >= 0) {
816 virtiofsd
= strdup(file
);
822 if (!IN_SET(errno
, ENOENT
, EACCES
))
823 return log_error_errno(errno
, "Error while searching for virtiofsd: %m");
828 return log_error_errno(SYNTHETIC_ERRNO(ENOENT
), "Failed to find virtiofsd binary.");
830 *ret
= TAKE_PTR(virtiofsd
);
834 static int start_virtiofsd(sd_bus
*bus
, const char *scope
, const char *directory
, bool uidmap
, char **ret_state_tempdir
, char **ret_sock_name
) {
835 _cleanup_(rm_rf_physical_and_freep
) char *state_dir
= NULL
;
836 _cleanup_free_
char *virtiofsd
= NULL
, *sock_name
= NULL
, *scope_prefix
= NULL
;
837 _cleanup_(socket_service_pair_done
) SocketServicePair ssp
= {
838 .socket_type
= SOCK_STREAM
,
840 static unsigned virtiofsd_instance
= 0;
846 assert(ret_state_tempdir
);
847 assert(ret_sock_name
);
849 r
= find_virtiofsd(&virtiofsd
);
853 r
= unit_name_to_prefix(scope
, &scope_prefix
);
855 return log_error_errno(r
, "Failed to strip .scope suffix from scope: %m");
857 if (asprintf(&ssp
.unit_name_prefix
, "%s-virtiofsd-%u", scope_prefix
, virtiofsd_instance
++) < 0)
860 state_dir
= path_join(arg_runtime_directory
, ssp
.unit_name_prefix
);
864 if (arg_runtime_directory_created
) {
865 ssp
.runtime_directory
= strjoin("systemd/vmspawn/", ssp
.unit_name_prefix
);
866 if (!ssp
.runtime_directory
)
870 if (asprintf(&sock_name
, "sock-%"PRIx64
, random_u64()) < 0)
873 ssp
.listen_address
= path_join(state_dir
, sock_name
);
874 if (!ssp
.listen_address
)
877 /* QEMU doesn't support submounts so don't announce them */
878 ssp
.exec_start
= strv_new(virtiofsd
, "--shared-dir", directory
, "--xattr", "--fd", "3", "--no-announce-submounts");
882 if (uidmap
&& arg_uid_shift
!= UID_INVALID
) {
883 r
= strv_extend(&ssp
.exec_start
, "--uid-map");
887 r
= strv_extendf(&ssp
.exec_start
, ":0:" UID_FMT
":" UID_FMT
":", arg_uid_shift
, arg_uid_range
);
891 r
= strv_extend(&ssp
.exec_start
, "--gid-map");
895 r
= strv_extendf(&ssp
.exec_start
, ":0:" GID_FMT
":" GID_FMT
":", arg_uid_shift
, arg_uid_range
);
900 r
= start_socket_service_pair(bus
, scope
, &ssp
);
904 *ret_state_tempdir
= TAKE_PTR(state_dir
);
905 *ret_sock_name
= TAKE_PTR(sock_name
);
910 static int kernel_cmdline_maybe_append_root(void) {
912 bool cmdline_contains_root
= strv_find_startswith(arg_kernel_cmdline_extra
, "root=")
913 || strv_find_startswith(arg_kernel_cmdline_extra
, "mount.usr=");
915 if (!cmdline_contains_root
) {
916 _cleanup_free_
char *root
= NULL
;
918 r
= discover_root(&root
);
922 log_debug("Determined root file system %s from dissected image", root
);
924 r
= strv_consume(&arg_kernel_cmdline_extra
, TAKE_PTR(root
));
932 static int discover_boot_entry(const char *root
, char **ret_linux
, char ***ret_initrds
) {
933 _cleanup_(boot_config_free
) BootConfig config
= BOOT_CONFIG_NULL
;
934 _cleanup_free_
char *esp_path
= NULL
, *xbootldr_path
= NULL
;
941 esp_path
= path_join(root
, "efi");
945 xbootldr_path
= path_join(root
, "boot");
949 r
= boot_config_load(&config
, esp_path
, xbootldr_path
);
953 r
= boot_config_select_special_entries(&config
, /* skip_efivars= */ true);
955 return log_error_errno(r
, "Failed to find special boot config entries: %m");
957 const BootEntry
*boot_entry
= boot_config_default_entry(&config
);
959 if (boot_entry
&& !IN_SET(boot_entry
->type
, BOOT_ENTRY_UNIFIED
, BOOT_ENTRY_CONF
))
962 /* If we cannot determine a default entry search for UKIs (Type #2 EFI Unified Kernel Images)
963 * then .conf files (Type #1 Boot Loader Specification Entries).
964 * https://uapi-group.org/specifications/specs/boot_loader_specification */
966 FOREACH_ARRAY(entry
, config
.entries
, config
.n_entries
)
967 if (entry
->type
== BOOT_ENTRY_UNIFIED
) {
973 FOREACH_ARRAY(entry
, config
.entries
, config
.n_entries
)
974 if (entry
->type
== BOOT_ENTRY_CONF
) {
980 return log_error_errno(SYNTHETIC_ERRNO(ENOENT
), "Failed to discover any boot entries.");
982 log_debug("Discovered boot entry %s (%s)", boot_entry
->id
, boot_entry_type_to_string(boot_entry
->type
));
984 _cleanup_free_
char *linux_kernel
= NULL
;
985 _cleanup_strv_free_
char **initrds
= NULL
;
986 if (boot_entry
->type
== BOOT_ENTRY_UNIFIED
) {
987 linux_kernel
= path_join(boot_entry
->root
, boot_entry
->kernel
);
990 } else if (boot_entry
->type
== BOOT_ENTRY_CONF
) {
991 linux_kernel
= path_join(boot_entry
->root
, boot_entry
->kernel
);
995 STRV_FOREACH(initrd
, boot_entry
->initrd
) {
996 _cleanup_free_
char *initrd_path
= path_join(boot_entry
->root
, *initrd
);
1000 r
= strv_consume(&initrds
, TAKE_PTR(initrd_path
));
1005 assert_not_reached();
1007 *ret_linux
= TAKE_PTR(linux_kernel
);
1008 *ret_initrds
= TAKE_PTR(initrds
);
1013 static int merge_initrds(char **ret
) {
1014 _cleanup_(rm_rf_physical_and_freep
) char *merged_initrd
= NULL
;
1015 _cleanup_close_
int ofd
= -EBADF
;
1020 r
= tempfn_random_child(NULL
, "vmspawn-initrd-", &merged_initrd
);
1022 return log_error_errno(r
, "Failed to create temporary file: %m");
1024 ofd
= open(merged_initrd
, O_WRONLY
|O_CREAT
|O_EXCL
|O_CLOEXEC
, 0600);
1026 return log_error_errno(errno
, "Failed to create regular file %s: %m", merged_initrd
);
1028 STRV_FOREACH(i
, arg_initrds
) {
1029 _cleanup_close_
int ifd
= -EBADF
;
1032 off
= lseek(ofd
, 0, SEEK_CUR
);
1034 return log_error_errno(errno
, "Failed to get file offset of %s: %m", merged_initrd
);
1036 to_seek
= (4 - (off
% 4)) % 4;
1038 /* seek to assure 4 byte alignment for each initrd */
1039 if (to_seek
!= 0 && lseek(ofd
, to_seek
, SEEK_CUR
) < 0)
1040 return log_error_errno(errno
, "Failed to seek %s: %m", merged_initrd
);
1042 ifd
= open(*i
, O_RDONLY
|O_CLOEXEC
);
1044 return log_error_errno(errno
, "Failed to open %s: %m", *i
);
1046 r
= copy_bytes(ifd
, ofd
, UINT64_MAX
, COPY_REFLINK
);
1048 return log_error_errno(r
, "Failed to copy bytes from %s to %s: %m", *i
, merged_initrd
);
1051 *ret
= TAKE_PTR(merged_initrd
);
1055 static void set_window_title(PTYForward
*f
) {
1056 _cleanup_free_
char *hn
= NULL
, *dot
= NULL
;
1060 (void) gethostname_strict(&hn
);
1062 if (emoji_enabled())
1063 dot
= strjoin(special_glyph(SPECIAL_GLYPH_GREEN_CIRCLE
), " ");
1066 (void) pty_forward_set_titlef(f
, "%sVirtual Machine %s on %s", strempty(dot
), arg_machine
, hn
);
1068 (void) pty_forward_set_titlef(f
, "%sVirtual Machine %s", strempty(dot
), arg_machine
);
1071 (void) pty_forward_set_title_prefix(f
, dot
);
1074 static int run_virtual_machine(int kvm_device_fd
, int vhost_device_fd
) {
1075 _cleanup_(ovmf_config_freep
) OvmfConfig
*ovmf_config
= NULL
;
1076 _cleanup_(sd_bus_flush_close_unrefp
) sd_bus
*bus
= NULL
;
1077 _cleanup_free_
char *machine
= NULL
, *qemu_binary
= NULL
, *mem
= NULL
, *trans_scope
= NULL
, *kernel
= NULL
;
1078 _cleanup_close_
int notify_sock_fd
= -EBADF
;
1079 _cleanup_strv_free_
char **cmdline
= NULL
;
1080 _cleanup_free_
int *pass_fds
= NULL
;
1081 size_t n_pass_fds
= 0;
1082 const char *accel
, *shm
;
1086 r
= sd_bus_default_system(&bus
);
1088 r
= sd_bus_default_user(&bus
);
1090 return log_error_errno(r
, "Failed to connect to systemd bus: %m");
1092 r
= start_transient_scope(bus
, arg_machine
, /* allow_pidfd= */ true, &trans_scope
);
1096 bool use_kvm
= arg_kvm
> 0;
1098 r
= qemu_check_kvm_support();
1100 return log_error_errno(r
, "Failed to check for KVM support: %m");
1105 r
= load_ovmf_config(arg_firmware
, &ovmf_config
);
1107 r
= find_ovmf_config(arg_secure_boot
, &ovmf_config
);
1109 return log_error_errno(r
, "Failed to find OVMF config: %m");
1111 /* only warn if the user hasn't disabled secureboot */
1112 if (!ovmf_config
->supports_sb
&& arg_secure_boot
)
1113 log_warning("Couldn't find OVMF firmware blob with Secure Boot support, "
1114 "falling back to OVMF firmware blobs without Secure Boot support.");
1116 shm
= arg_directory
? ",memory-backend=mem" : "";
1117 if (ARCHITECTURE_SUPPORTS_SMM
)
1118 machine
= strjoin("type=" QEMU_MACHINE_TYPE
",smm=", on_off(ovmf_config
->supports_sb
), shm
);
1120 machine
= strjoin("type=" QEMU_MACHINE_TYPE
, shm
);
1125 kernel
= strdup(arg_linux
);
1128 } else if (arg_directory
) {
1129 /* a kernel is required for directory type images so attempt to locate a UKI under /boot and /efi */
1130 r
= discover_boot_entry(arg_directory
, &kernel
, &arg_initrds
);
1132 return log_error_errno(r
, "Failed to locate UKI in directory type image, please specify one with --linux=.");
1134 log_debug("Discovered UKI image at %s", kernel
);
1137 r
= find_qemu_binary(&qemu_binary
);
1138 if (r
== -EOPNOTSUPP
)
1139 return log_error_errno(r
, "Native architecture is not supported by qemu.");
1141 return log_error_errno(r
, "Failed to find QEMU binary: %m");
1143 if (asprintf(&mem
, "%" PRIu64
"M", DIV_ROUND_UP(arg_ram
, U64_MB
)) < 0)
1148 "-machine", machine
,
1149 "-smp", arg_cpus
?: "1",
1151 "-object", "rng-random,filename=/dev/urandom,id=rng0",
1152 "-device", "virtio-rng-pci,rng=rng0,id=rng-device0"
1157 /* if we are going to be starting any units with state then create our runtime dir */
1158 if (arg_tpm
!= 0 || arg_directory
|| arg_runtime_mounts
.n_mounts
!= 0) {
1159 r
= runtime_directory(&arg_runtime_directory
, arg_privileged
? RUNTIME_SCOPE_SYSTEM
: RUNTIME_SCOPE_USER
, "systemd/vmspawn");
1161 return log_error_errno(r
, "Failed to lookup runtime directory: %m");
1163 /* r > 0 means we need to create our own runtime dir */
1164 r
= mkdir_p(arg_runtime_directory
, 0755);
1166 return log_error_errno(r
, "Failed to create runtime directory: %m");
1167 arg_runtime_directory_created
= true;
1171 if (arg_network_stack
== NETWORK_STACK_TAP
)
1172 r
= strv_extend_many(&cmdline
, "-nic", "tap,script=no,model=virtio-net-pci");
1173 else if (arg_network_stack
== NETWORK_STACK_USER
)
1174 r
= strv_extend_many(&cmdline
, "-nic", "user,model=virtio-net-pci");
1176 r
= strv_extend_many(&cmdline
, "-nic", "none");
1180 /* A shared memory backend might increase ram usage so only add one if actually necessary for virtiofsd. */
1181 if (arg_directory
|| arg_runtime_mounts
.n_mounts
!= 0) {
1182 r
= strv_extend(&cmdline
, "-object");
1186 r
= strv_extendf(&cmdline
, "memory-backend-memfd,id=mem,size=%s,share=on", mem
);
1191 bool use_vsock
= arg_vsock
> 0 && ARCHITECTURE_SUPPORTS_SMBIOS
;
1192 if (arg_vsock
< 0) {
1193 r
= qemu_check_vsock_support();
1195 return log_error_errno(r
, "Failed to check for VSOCK support: %m");
1200 if (!use_kvm
&& kvm_device_fd
>= 0) {
1201 log_warning("KVM is disabled but fd for /dev/kvm was passed, closing fd and ignoring");
1202 kvm_device_fd
= safe_close(kvm_device_fd
);
1205 if (use_kvm
&& kvm_device_fd
>= 0) {
1206 /* /dev/fdset/1 is magic string to tell qemu where to find the fd for /dev/kvm
1207 * we use this so that we can take a fd to /dev/kvm and then give qemu that fd */
1208 accel
= "kvm,device=/dev/fdset/1";
1210 r
= strv_extend(&cmdline
, "--add-fd");
1214 r
= strv_extendf(&cmdline
, "fd=%d,set=1,opaque=/dev/kvm", kvm_device_fd
);
1218 if (!GREEDY_REALLOC(pass_fds
, n_pass_fds
+ 1))
1221 pass_fds
[n_pass_fds
++] = kvm_device_fd
;
1227 r
= strv_extend_many(&cmdline
, "-accel", accel
);
1231 _cleanup_close_
int child_vsock_fd
= -EBADF
;
1232 unsigned child_cid
= arg_vsock_cid
;
1234 int device_fd
= vhost_device_fd
;
1236 if (device_fd
< 0) {
1237 child_vsock_fd
= open("/dev/vhost-vsock", O_RDWR
|O_CLOEXEC
);
1238 if (child_vsock_fd
< 0)
1239 return log_error_errno(errno
, "Failed to open /dev/vhost-vsock as read/write: %m");
1241 device_fd
= child_vsock_fd
;
1244 r
= vsock_fix_child_cid(device_fd
, &child_cid
, arg_machine
);
1246 return log_error_errno(r
, "Failed to fix CID for the guest VSOCK socket: %m");
1248 r
= strv_extend(&cmdline
, "-device");
1252 r
= strv_extendf(&cmdline
, "vhost-vsock-pci,guest-cid=%u,vhostfd=%d", child_cid
, device_fd
);
1256 if (!GREEDY_REALLOC(pass_fds
, n_pass_fds
+ 1))
1259 pass_fds
[n_pass_fds
++] = device_fd
;
1262 r
= strv_extend_many(&cmdline
, "-cpu", "max");
1266 _cleanup_close_
int master
= -EBADF
;
1267 PTYForwardFlags ptyfwd_flags
= 0;
1268 switch (arg_console_mode
) {
1270 case CONSOLE_READ_ONLY
:
1271 ptyfwd_flags
|= PTY_FORWARD_READ_ONLY
;
1275 case CONSOLE_INTERACTIVE
: {
1276 _cleanup_free_
char *pty_path
= NULL
;
1278 master
= posix_openpt(O_RDWR
|O_NOCTTY
|O_CLOEXEC
|O_NONBLOCK
);
1280 return log_error_errno(errno
, "Failed to acquire pseudo tty: %m");
1282 r
= ptsname_malloc(master
, &pty_path
);
1284 return log_error_errno(r
, "Failed to determine tty name: %m");
1286 if (unlockpt(master
) < 0)
1287 return log_error_errno(errno
, "Failed to unlock tty: %m");
1289 if (strv_extend_many(
1296 if (strv_extendf(&cmdline
,
1297 "serial,id=console,path=%s", pty_path
) < 0)
1300 r
= strv_extend_many(
1302 "-serial", "chardev:console");
1307 r
= strv_extend_many(
1313 case CONSOLE_NATIVE
:
1314 r
= strv_extend_many(
1318 "-chardev", "stdio,mux=on,id=console,signal=off",
1319 "-serial", "chardev:console",
1324 assert_not_reached();
1329 r
= strv_extend(&cmdline
, "-drive");
1333 _cleanup_free_
char *escaped_ovmf_config_path
= escape_qemu_value(ovmf_config
->path
);
1334 if (!escaped_ovmf_config_path
)
1337 r
= strv_extendf(&cmdline
, "if=pflash,format=%s,readonly=on,file=%s", ovmf_config_format(ovmf_config
), escaped_ovmf_config_path
);
1341 _cleanup_(unlink_and_freep
) char *ovmf_vars_to
= NULL
;
1342 if (ovmf_config
->supports_sb
) {
1343 const char *ovmf_vars_from
= ovmf_config
->vars
;
1344 _cleanup_free_
char *escaped_ovmf_vars_to
= NULL
;
1345 _cleanup_close_
int source_fd
= -EBADF
, target_fd
= -EBADF
;
1347 r
= tempfn_random_child(NULL
, "vmspawn-", &ovmf_vars_to
);
1351 source_fd
= open(ovmf_vars_from
, O_RDONLY
|O_CLOEXEC
);
1353 return log_error_errno(source_fd
, "Failed to open OVMF vars file %s: %m", ovmf_vars_from
);
1355 target_fd
= open(ovmf_vars_to
, O_WRONLY
|O_CREAT
|O_EXCL
|O_CLOEXEC
, 0600);
1357 return log_error_errno(errno
, "Failed to create regular file for OVMF vars at %s: %m", ovmf_vars_to
);
1359 r
= copy_bytes(source_fd
, target_fd
, UINT64_MAX
, COPY_REFLINK
);
1361 return log_error_errno(r
, "Failed to copy bytes from %s to %s: %m", ovmf_vars_from
, ovmf_vars_to
);
1363 /* These aren't always available so don't raise an error if they fail */
1364 (void) copy_xattr(source_fd
, NULL
, target_fd
, NULL
, 0);
1365 (void) copy_access(source_fd
, target_fd
);
1366 (void) copy_times(source_fd
, target_fd
, 0);
1368 r
= strv_extend_many(
1370 "-global", "ICH9-LPC.disable_s3=1",
1371 "-global", "driver=cfi.pflash01,property=secure,value=on",
1376 escaped_ovmf_vars_to
= escape_qemu_value(ovmf_vars_to
);
1377 if (!escaped_ovmf_vars_to
)
1380 r
= strv_extendf(&cmdline
, "file=%s,if=pflash,format=%s", escaped_ovmf_vars_to
, ovmf_config_format(ovmf_config
));
1385 STRV_FOREACH(drive
, arg_extra_drives
) {
1386 _cleanup_free_
char *escaped_drive
= NULL
;
1388 r
= strv_extend(&cmdline
, "-drive");
1392 escaped_drive
= escape_qemu_value(*drive
);
1396 r
= strv_extendf(&cmdline
, "format=raw,cache=unsafe,file=%s", escaped_drive
);
1402 r
= strv_extend_many(&cmdline
, "-kernel", kernel
);
1406 /* We can't rely on gpt-auto-generator when direct kernel booting so synthesize a root=
1407 * kernel argument instead. */
1409 r
= kernel_cmdline_maybe_append_root();
1416 _cleanup_free_
char *escaped_image
= NULL
;
1418 assert(!arg_directory
);
1420 r
= strv_extend(&cmdline
, "-drive");
1424 escaped_image
= escape_qemu_value(arg_image
);
1428 r
= strv_extendf(&cmdline
, "if=none,id=mkosi,file=%s,format=raw", escaped_image
);
1432 r
= strv_extend_many(&cmdline
,
1433 "-device", "virtio-scsi-pci,id=scsi",
1434 "-device", "scsi-hd,drive=mkosi,bootindex=1");
1439 if (arg_directory
) {
1440 _cleanup_free_
char *sock_path
= NULL
, *sock_name
= NULL
, *escaped_sock_path
= NULL
;
1442 r
= start_virtiofsd(bus
, trans_scope
, arg_directory
, /* uidmap= */ true, &sock_path
, &sock_name
);
1446 escaped_sock_path
= escape_qemu_value(sock_path
);
1447 if (!escaped_sock_path
)
1450 r
= strv_extend(&cmdline
, "-chardev");
1454 r
= strv_extendf(&cmdline
, "socket,id=%1$s,path=%2$s/%1$s", sock_name
, escaped_sock_path
);
1458 r
= strv_extend(&cmdline
, "-device");
1462 r
= strv_extendf(&cmdline
, "vhost-user-fs-pci,queue-size=1024,chardev=%s,tag=root", sock_name
);
1466 r
= strv_extend(&arg_kernel_cmdline_extra
, "root=root rootfstype=virtiofs rw");
1471 r
= strv_prepend(&arg_kernel_cmdline_extra
, "console=" DEFAULT_SERIAL_TTY
);
1475 FOREACH_ARRAY(mount
, arg_runtime_mounts
.mounts
, arg_runtime_mounts
.n_mounts
) {
1476 _cleanup_free_
char *sock_path
= NULL
, *sock_name
= NULL
, *clean_target
= NULL
, *escaped_sock_path
= NULL
;
1477 r
= start_virtiofsd(bus
, trans_scope
, mount
->source
, /* uidmap= */ false, &sock_path
, &sock_name
);
1481 escaped_sock_path
= escape_qemu_value(sock_path
);
1482 if (!escaped_sock_path
)
1485 r
= strv_extend(&cmdline
, "-chardev");
1489 r
= strv_extendf(&cmdline
, "socket,id=%1$s,path=%2$s/%1$s", sock_name
, escaped_sock_path
);
1493 r
= strv_extend(&cmdline
, "-device");
1497 r
= strv_extendf(&cmdline
, "vhost-user-fs-pci,queue-size=1024,chardev=%1$s,tag=%1$s", sock_name
);
1501 clean_target
= xescape(mount
->target
, "\":");
1505 r
= strv_extendf(&arg_kernel_cmdline_extra
, "systemd.mount-extra=\"%s:%s:virtiofs:%s\"",
1506 sock_name
, clean_target
, mount
->read_only
? "ro" : "rw");
1511 if (ARCHITECTURE_SUPPORTS_SMBIOS
) {
1512 _cleanup_free_
char *kcl
= strv_join(arg_kernel_cmdline_extra
, " "), *escaped_kcl
= NULL
;
1517 r
= strv_extend_many(&cmdline
, "-append", kcl
);
1521 if (ARCHITECTURE_SUPPORTS_SMBIOS
) {
1522 escaped_kcl
= escape_qemu_value(kcl
);
1526 r
= strv_extend(&cmdline
, "-smbios");
1530 r
= strv_extendf(&cmdline
, "type=11,value=io.systemd.stub.kernel-cmdline-extra=%s", escaped_kcl
);
1534 log_warning("Cannot append extra args to kernel cmdline, native architecture doesn't support SMBIOS, ignoring");
1537 log_warning("Cannot append extra args to kernel cmdline, native architecture doesn't support SMBIOS");
1539 /* disable TPM autodetection if the user's hardware doesn't support it */
1540 if (!ARCHITECTURE_SUPPORTS_TPM
) {
1543 log_debug("TPM not support on %s, disabling tpm autodetection and continuing", architecture_to_string(native_architecture()));
1544 } else if (arg_tpm
> 0)
1545 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP
), "TPM not supported on %s, refusing", architecture_to_string(native_architecture()));
1548 _cleanup_free_
char *swtpm
= NULL
;
1550 r
= find_executable("swtpm", &swtpm
);
1552 /* log if the user asked for swtpm and we cannot find it */
1554 return log_error_errno(r
, "Failed to find swtpm binary: %m");
1555 /* also log if we got an error other than ENOENT from find_executable */
1556 if (r
!= -ENOENT
&& arg_tpm
< 0)
1557 return log_error_errno(r
, "Error detecting swtpm: %m");
1561 _cleanup_free_
char *tpm_state_tempdir
= NULL
;
1563 _cleanup_free_
char *escaped_state_dir
= NULL
;
1565 r
= start_tpm(bus
, trans_scope
, swtpm
, &tpm_state_tempdir
);
1567 /* only bail if the user asked for a tpm */
1569 return log_error_errno(r
, "Failed to start tpm: %m");
1570 log_debug_errno(r
, "Failed to start tpm, ignoring: %m");
1573 escaped_state_dir
= escape_qemu_value(tpm_state_tempdir
);
1574 if (!escaped_state_dir
)
1577 r
= strv_extend(&cmdline
, "-chardev");
1581 r
= strv_extendf(&cmdline
, "socket,id=chrtpm,path=%s/sock", escaped_state_dir
);
1585 r
= strv_extend_many(&cmdline
, "-tpmdev", "emulator,id=tpm0,chardev=chrtpm");
1589 if (native_architecture() == ARCHITECTURE_X86_64
)
1590 r
= strv_extend_many(&cmdline
, "-device", "tpm-tis,tpmdev=tpm0");
1591 else if (IN_SET(native_architecture(), ARCHITECTURE_ARM64
, ARCHITECTURE_ARM64_BE
))
1592 r
= strv_extend_many(&cmdline
, "-device", "tpm-tis-device,tpmdev=tpm0");
1597 char *initrd
= NULL
;
1598 _cleanup_(rm_rf_physical_and_freep
) char *merged_initrd
= NULL
;
1599 size_t n_initrds
= strv_length(arg_initrds
);
1602 initrd
= arg_initrds
[0];
1603 else if (n_initrds
> 1) {
1604 r
= merge_initrds(&merged_initrd
);
1608 initrd
= merged_initrd
;
1612 r
= strv_extend_many(&cmdline
, "-initrd", initrd
);
1617 if (arg_forward_journal
) {
1618 _cleanup_free_
char *sd_journal_remote
= NULL
, *listen_address
= NULL
, *cred
= NULL
;
1619 r
= find_executable("systemd-journal-remote", &sd_journal_remote
);
1621 return log_error_errno(r
, "Failed to find systemd-journal-remote binary: %m");
1623 r
= start_systemd_journal_remote(bus
, trans_scope
, child_cid
, sd_journal_remote
, &listen_address
);
1627 cred
= strjoin("journal.forward_to_socket:", listen_address
);
1631 r
= machine_credential_set(&arg_credentials
, cred
);
1636 if (ARCHITECTURE_SUPPORTS_SMBIOS
)
1637 FOREACH_ARRAY(cred
, arg_credentials
.credentials
, arg_credentials
.n_credentials
) {
1638 _cleanup_free_
char *cred_data_b64
= NULL
;
1641 n
= base64mem(cred
->data
, cred
->size
, &cred_data_b64
);
1645 r
= strv_extend(&cmdline
, "-smbios");
1649 r
= strv_extendf(&cmdline
, "type=11,value=io.systemd.credential.binary:%s=%s", cred
->id
, cred_data_b64
);
1655 notify_sock_fd
= open_vsock();
1656 if (notify_sock_fd
< 0)
1657 return log_error_errno(notify_sock_fd
, "Failed to open VSOCK: %m");
1659 r
= cmdline_add_vsock(&cmdline
, notify_sock_fd
);
1663 return log_error_errno(r
, "Failed to call getsockname on VSOCK: %m");
1666 if (DEBUG_LOGGING
) {
1667 _cleanup_free_
char *joined
= quote_command_line(cmdline
, SHELL_ESCAPE_EMPTY
);
1671 log_debug("Executing: %s", joined
);
1674 assert_se(sigprocmask_many(SIG_BLOCK
, /* old_sigset=*/ NULL
, SIGCHLD
, SIGWINCH
) >= 0);
1676 _cleanup_(sd_event_source_unrefp
) sd_event_source
*notify_event_source
= NULL
;
1677 _cleanup_(sd_event_unrefp
) sd_event
*event
= NULL
;
1678 r
= sd_event_new(&event
);
1680 return log_error_errno(r
, "Failed to get default event source: %m");
1682 (void) sd_event_set_watchdog(event
, true);
1684 _cleanup_(pidref_done
) PidRef child_pidref
= PIDREF_NULL
;
1686 r
= pidref_safe_fork_full(
1688 /* stdio_fds= */ NULL
,
1689 &child_vsock_fd
, 1, /* pass the vsock fd to qemu */
1690 FORK_RESET_SIGNALS
|FORK_CLOSE_ALL_FDS
|FORK_DEATHSIG_SIGTERM
|FORK_LOG
|FORK_CLOEXEC_OFF
|FORK_RLIMIT_NOFILE_SAFE
,
1695 /* set TERM and LANG if they are missing */
1696 if (setenv("TERM", "vt220", 0) < 0)
1699 if (setenv("LANG", "C.UTF-8", 0) < 0)
1702 execv(qemu_binary
, cmdline
);
1703 log_error_errno(errno
, "Failed to execve %s: %m", qemu_binary
);
1704 _exit(EXIT_FAILURE
);
1707 /* Close the vsock fd we passed to qemu in the parent. We don't need it anymore. */
1708 child_vsock_fd
= safe_close(child_vsock_fd
);
1710 int exit_status
= INT_MAX
;
1712 r
= setup_notify_parent(event
, notify_sock_fd
, &exit_status
, ¬ify_event_source
);
1714 return log_error_errno(r
, "Failed to setup event loop to handle VSOCK notify events: %m");
1717 /* shutdown qemu when we are shutdown */
1718 (void) sd_event_add_signal(event
, NULL
, SIGINT
| SD_EVENT_SIGNAL_PROCMASK
, on_orderly_shutdown
, &child_pidref
);
1719 (void) sd_event_add_signal(event
, NULL
, SIGTERM
| SD_EVENT_SIGNAL_PROCMASK
, on_orderly_shutdown
, &child_pidref
);
1721 (void) sd_event_add_signal(event
, NULL
, (SIGRTMIN
+18) | SD_EVENT_SIGNAL_PROCMASK
, sigrtmin18_handler
, NULL
);
1723 /* Exit when the child exits */
1724 (void) event_add_child_pidref(event
, NULL
, &child_pidref
, WEXITED
, on_child_exit
, NULL
);
1726 _cleanup_(pty_forward_freep
) PTYForward
*forward
= NULL
;
1728 r
= pty_forward_new(event
, master
, ptyfwd_flags
, &forward
);
1730 return log_error_errno(r
, "Failed to create PTY forwarder: %m");
1732 if (!arg_background
) {
1733 _cleanup_free_
char *bg
= NULL
;
1735 r
= terminal_tint_color(130 /* green */, &bg
);
1737 log_debug_errno(r
, "Failed to determine terminal background color, not tinting.");
1739 (void) pty_forward_set_background_color(forward
, bg
);
1740 } else if (!isempty(arg_background
))
1741 (void) pty_forward_set_background_color(forward
, arg_background
);
1743 set_window_title(forward
);
1746 r
= sd_event_loop(event
);
1748 return log_error_errno(r
, "Failed to run event loop: %m");
1751 if (exit_status
== INT_MAX
) {
1752 log_debug("Couldn't retrieve inner EXIT_STATUS from VSOCK");
1753 return EXIT_SUCCESS
;
1755 if (exit_status
!= 0)
1756 log_warning("Non-zero exit code received: %d", exit_status
);
1763 static int determine_names(void) {
1766 if (!arg_directory
&& !arg_image
) {
1768 _cleanup_(image_unrefp
) Image
*i
= NULL
;
1770 r
= image_find(IMAGE_MACHINE
, arg_machine
, NULL
, &i
);
1772 return log_error_errno(r
, "No image for machine '%s'.", arg_machine
);
1774 return log_error_errno(r
, "Failed to find image for machine '%s': %m", arg_machine
);
1776 if (IN_SET(i
->type
, IMAGE_RAW
, IMAGE_BLOCK
))
1777 r
= free_and_strdup(&arg_image
, i
->path
);
1778 else if (IN_SET(i
->type
, IMAGE_DIRECTORY
, IMAGE_SUBVOLUME
))
1779 r
= free_and_strdup(&arg_directory
, i
->path
);
1781 assert_not_reached();
1785 r
= safe_getcwd(&arg_directory
);
1787 return log_error_errno(r
, "Failed to determine current directory: %m");
1792 if (arg_directory
&& path_equal(arg_directory
, "/")) {
1793 arg_machine
= gethostname_malloc();
1796 } else if (arg_image
) {
1799 r
= path_extract_filename(arg_image
, &arg_machine
);
1801 return log_error_errno(r
, "Failed to extract file name from '%s': %m", arg_image
);
1803 /* Truncate suffix if there is one */
1804 e
= endswith(arg_machine
, ".raw");
1808 r
= path_extract_filename(arg_directory
, &arg_machine
);
1810 return log_error_errno(r
, "Failed to extract file name from '%s': %m", arg_directory
);
1813 hostname_cleanup(arg_machine
);
1814 if (!hostname_is_valid(arg_machine
, 0))
1815 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "Failed to determine machine name automatically, please use -M.");
1821 static int verify_arguments(void) {
1822 if (arg_network_stack
== NETWORK_STACK_TAP
&& !arg_privileged
)
1823 return log_error_errno(SYNTHETIC_ERRNO(EPERM
), "--network-tap requires root privileges, refusing.");
1825 if (!strv_isempty(arg_initrds
) && !arg_linux
)
1826 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "Option --initrd= cannot be used without --linux=.");
1831 static int run(int argc
, char *argv
[]) {
1832 int r
, kvm_device_fd
= -EBADF
, vhost_device_fd
= -EBADF
;
1833 _cleanup_strv_free_
char **names
= NULL
;
1837 arg_privileged
= getuid() == 0;
1839 r
= parse_argv(argc
, argv
);
1843 r
= determine_names();
1847 r
= verify_arguments();
1851 if (!arg_quiet
&& arg_console_mode
!= CONSOLE_GUI
) {
1852 _cleanup_free_
char *u
= NULL
;
1853 const char *vm_path
= arg_image
?: arg_directory
;
1854 (void) terminal_urlify_path(vm_path
, vm_path
, &u
);
1856 log_info("%s %sSpawning VM %s on %s.%s",
1857 special_glyph(SPECIAL_GLYPH_LIGHT_SHADE
), ansi_grey(), arg_machine
, u
?: vm_path
, ansi_normal());
1859 if (arg_console_mode
== CONSOLE_INTERACTIVE
)
1860 log_info("%s %sPress %sCtrl-]%s three times within 1s to kill VM.%s",
1861 special_glyph(SPECIAL_GLYPH_LIGHT_SHADE
), ansi_grey(), ansi_highlight(), ansi_grey(), ansi_normal());
1862 else if (arg_console_mode
== CONSOLE_NATIVE
)
1863 log_info("%s %sPress %sCtrl-a x%s to kill VM.%s",
1864 special_glyph(SPECIAL_GLYPH_LIGHT_SHADE
), ansi_grey(), ansi_highlight(), ansi_grey(), ansi_normal());
1867 r
= sd_listen_fds_with_names(true, &names
);
1869 return log_error_errno(r
, "Failed to get passed file descriptors: %m");
1871 for (int i
= 0; i
< r
; i
++) {
1872 int fd
= SD_LISTEN_FDS_START
+ i
;
1873 if (streq(names
[i
], "kvm"))
1875 else if (streq(names
[i
], "vhost-vsock"))
1876 vhost_device_fd
= fd
;
1878 log_notice("Couldn't recognize passed fd %d (%s), closing fd and ignoring...", fd
, names
[i
]);
1883 return run_virtual_machine(kvm_device_fd
, vhost_device_fd
);
1886 DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run
);