1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
13 #include "dirent-util.h"
15 #include "discover-image.h"
16 #include "sd-daemon.h"
20 #include "alloc-util.h"
21 #include "architecture.h"
23 #include "common-signal.h"
25 #include "creds-util.h"
26 #include "dissect-image.h"
28 #include "event-util.h"
29 #include "extract-word.h"
31 #include "format-util.h"
34 #include "hexdecoct.h"
35 #include "hostname-util.h"
36 #include "kernel-image.h"
38 #include "machine-credential.h"
40 #include "main-func.h"
43 #include "parse-argument.h"
44 #include "parse-util.h"
45 #include "path-lookup.h"
46 #include "path-util.h"
47 #include "pretty-print.h"
48 #include "process-util.h"
49 #include "random-util.h"
51 #include "signal-util.h"
52 #include "socket-util.h"
53 #include "stat-util.h"
54 #include "string-util.h"
56 #include "tmpfile-util.h"
57 #include "unit-name.h"
58 #include "vmspawn-mount.h"
59 #include "vmspawn-scope.h"
60 #include "vmspawn-settings.h"
61 #include "vmspawn-util.h"
63 static bool arg_quiet
= false;
64 static PagerFlags arg_pager_flags
= 0;
65 static char *arg_directory
= NULL
;
66 static char *arg_image
= NULL
;
67 static char *arg_machine
= NULL
;
68 static char *arg_qemu_smp
= NULL
;
69 static uint64_t arg_qemu_mem
= UINT64_C(2) * U64_GB
;
70 static int arg_qemu_kvm
= -1;
71 static int arg_qemu_vsock
= -1;
72 static unsigned arg_vsock_cid
= VMADDR_CID_ANY
;
73 static int arg_tpm
= -1;
74 static char *arg_linux
= NULL
;
75 static char **arg_initrds
= NULL
;
76 static bool arg_qemu_gui
= false;
77 static QemuNetworkStack arg_network_stack
= QEMU_NET_NONE
;
78 static int arg_secure_boot
= -1;
79 static MachineCredentialContext arg_credentials
= {};
80 static uid_t arg_uid_shift
= UID_INVALID
, arg_uid_range
= 0x10000U
;
81 static RuntimeMountContext arg_runtime_mounts
= {};
82 static SettingsMask arg_settings_mask
= 0;
83 static char *arg_firmware
= NULL
;
84 static char *arg_runtime_directory
= NULL
;
85 static char *arg_forward_journal
= NULL
;
86 static bool arg_runtime_directory_created
= false;
87 static bool arg_privileged
= false;
88 static char **arg_kernel_cmdline_extra
= NULL
;
90 STATIC_DESTRUCTOR_REGISTER(arg_directory
, freep
);
91 STATIC_DESTRUCTOR_REGISTER(arg_image
, freep
);
92 STATIC_DESTRUCTOR_REGISTER(arg_machine
, freep
);
93 STATIC_DESTRUCTOR_REGISTER(arg_qemu_smp
, freep
);
94 STATIC_DESTRUCTOR_REGISTER(arg_runtime_directory
, freep
);
95 STATIC_DESTRUCTOR_REGISTER(arg_credentials
, machine_credential_context_done
);
96 STATIC_DESTRUCTOR_REGISTER(arg_firmware
, freep
);
97 STATIC_DESTRUCTOR_REGISTER(arg_linux
, freep
);
98 STATIC_DESTRUCTOR_REGISTER(arg_initrds
, strv_freep
);
99 STATIC_DESTRUCTOR_REGISTER(arg_runtime_mounts
, runtime_mount_context_done
);
100 STATIC_DESTRUCTOR_REGISTER(arg_forward_journal
, freep
);
101 STATIC_DESTRUCTOR_REGISTER(arg_kernel_cmdline_extra
, strv_freep
);
103 static int help(void) {
104 _cleanup_free_
char *link
= NULL
;
107 pager_open(arg_pager_flags
);
109 r
= terminal_urlify_man("systemd-vmspawn", "1", &link
);
113 printf("%1$s [OPTIONS...] [ARGUMENTS...]\n\n"
114 "%5$sSpawn a command or OS in a virtual machine.%6$s\n\n"
115 " -h --help Show this help\n"
116 " --version Print version string\n"
117 " -q --quiet Do not show status information\n"
118 " --no-pager Do not pipe output into a pager\n"
120 " -D --directory=PATH Root directory for the container\n"
121 " -i --image=PATH Root file system disk image (or device node) for\n"
122 " the virtual machine\n"
123 "\n%3$sHost Configuration:%4$s\n"
124 " --qemu-smp=SMP Configure guest's SMP settings\n"
125 " --qemu-mem=MEM Configure guest's RAM size\n"
126 " --qemu-kvm=BOOL Configure whether to use KVM or not\n"
127 " --qemu-vsock=BOOL Configure whether to use qemu with a vsock or not\n"
128 " --vsock-cid= Specify the CID to use for the qemu guest's vsock\n"
129 " --tpm=BOOL Configure whether to use a virtual TPM or not\n"
130 " --linux=PATH Specify the linux kernel for direct kernel boot\n"
131 " --initrd=PATH Specify the initrd for direct kernel boot\n"
132 " --qemu-gui Start QEMU in graphical mode\n"
133 " -n --network-tap Create a TAP device for networking with QEMU.\n"
134 " --network-user-mode Use user mode networking with QEMU.\n"
135 " --secure-boot=BOOL Configure whether to search for firmware which\n"
136 " supports Secure Boot\n"
137 " --firmware=PATH|list Select firmware definition file (or list available)\n"
138 "\n%3$sSystem Identity:%4$s\n"
139 " -M --machine=NAME Set the machine name for the virtual machine\n"
140 "\n%3$sUser Namespacing:%4$s\n"
141 " --private-users=UIDBASE[:NUIDS]\n"
142 " Configure the UID/GID range to map into the\n"
143 " virtiofsd namespace\n"
144 "\n%3$sMounts:%4$s\n"
145 " --bind=SOURCE[:TARGET]\n"
146 " Mount a file or directory from the host into\n"
148 " --bind-ro=SOURCE[:TARGET]\n"
149 " Similar, but creates a read-only mount\n"
150 "\n%3$sIntegration:%4$s\n"
151 " --forward-journal=FILE|DIR\n"
152 " Forward the virtual machine's journal entries to\n"
154 "\n%3$sCredentials:%4$s\n"
155 " --set-credential=ID:VALUE\n"
156 " Pass a credential with literal value to the\n"
158 " --load-credential=ID:PATH\n"
159 " Load credential to pass to the virtual machine from\n"
160 " file or AF_UNIX stream socket.\n"
161 "\nSee the %2$s for details.\n",
162 program_invocation_short_name
,
172 static int parse_argv(int argc
, char *argv
[]) {
185 ARG_NETWORK_USER_MODE
,
196 static const struct option options
[] = {
197 { "help", no_argument
, NULL
, 'h' },
198 { "version", no_argument
, NULL
, ARG_VERSION
},
199 { "quiet", no_argument
, NULL
, 'q' },
200 { "no-pager", no_argument
, NULL
, ARG_NO_PAGER
},
201 { "image", required_argument
, NULL
, 'i' },
202 { "directory", required_argument
, NULL
, 'D' },
203 { "machine", required_argument
, NULL
, 'M' },
204 { "qemu-smp", required_argument
, NULL
, ARG_QEMU_SMP
},
205 { "qemu-mem", required_argument
, NULL
, ARG_QEMU_MEM
},
206 { "qemu-kvm", required_argument
, NULL
, ARG_QEMU_KVM
},
207 { "qemu-vsock", required_argument
, NULL
, ARG_QEMU_VSOCK
},
208 { "vsock-cid", required_argument
, NULL
, ARG_VSOCK_CID
},
209 { "tpm", required_argument
, NULL
, ARG_TPM
},
210 { "linux", required_argument
, NULL
, ARG_LINUX
},
211 { "initrd", required_argument
, NULL
, ARG_INITRD
},
212 { "qemu-gui", no_argument
, NULL
, ARG_QEMU_GUI
},
213 { "network-tap", no_argument
, NULL
, 'n' },
214 { "network-user-mode", no_argument
, NULL
, ARG_NETWORK_USER_MODE
},
215 { "bind", required_argument
, NULL
, ARG_BIND
},
216 { "bind-ro", required_argument
, NULL
, ARG_BIND_RO
},
217 { "secure-boot", required_argument
, NULL
, ARG_SECURE_BOOT
},
218 { "private-users", required_argument
, NULL
, ARG_PRIVATE_USERS
},
219 { "forward-journal", required_argument
, NULL
, ARG_FORWARD_JOURNAL
},
220 { "set-credential", required_argument
, NULL
, ARG_SET_CREDENTIAL
},
221 { "load-credential", required_argument
, NULL
, ARG_LOAD_CREDENTIAL
},
222 { "firmware", required_argument
, NULL
, ARG_FIRMWARE
},
232 while ((c
= getopt_long(argc
, argv
, "+hD:i:M:nq", options
, NULL
)) >= 0)
245 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &arg_directory
);
249 arg_settings_mask
|= SETTING_DIRECTORY
;
253 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &arg_image
);
257 arg_settings_mask
|= SETTING_DIRECTORY
;
262 arg_machine
= mfree(arg_machine
);
264 if (!hostname_is_valid(optarg
, 0))
265 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
266 "Invalid machine name: %s", optarg
);
268 r
= free_and_strdup(&arg_machine
, optarg
);
275 arg_pager_flags
|= PAGER_DISABLE
;
279 r
= free_and_strdup_warn(&arg_qemu_smp
, optarg
);
285 r
= parse_size(optarg
, 1024, &arg_qemu_mem
);
287 return log_error_errno(r
, "Failed to parse --qemu-mem=%s: %m", optarg
);
291 r
= parse_tristate(optarg
, &arg_qemu_kvm
);
293 return log_error_errno(r
, "Failed to parse --qemu-kvm=%s: %m", optarg
);
297 r
= parse_tristate(optarg
, &arg_qemu_vsock
);
299 return log_error_errno(r
, "Failed to parse --qemu-vsock=%s: %m", optarg
);
304 arg_vsock_cid
= VMADDR_CID_ANY
;
308 r
= vsock_parse_cid(optarg
, &cid
);
310 return log_error_errno(r
, "Failed to parse --vsock-cid: %s", optarg
);
311 if (!VSOCK_CID_IS_REGULAR(cid
))
312 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "Specified CID is not regular, refusing: %u", cid
);
319 r
= parse_tristate(optarg
, &arg_tpm
);
321 return log_error_errno(r
, "Failed to parse --tpm=%s: %m", optarg
);
325 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &arg_linux
);
331 _cleanup_free_
char *initrd_path
= NULL
;
332 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &initrd_path
);
336 r
= strv_consume(&arg_initrds
, TAKE_PTR(initrd_path
));
348 arg_network_stack
= QEMU_NET_TAP
;
351 case ARG_NETWORK_USER_MODE
:
352 arg_network_stack
= QEMU_NET_USER
;
357 r
= runtime_mount_parse(&arg_runtime_mounts
, optarg
, c
== ARG_BIND_RO
);
359 return log_error_errno(r
, "Failed to parse --bind(-ro)= argument %s: %m", optarg
);
361 arg_settings_mask
|= SETTING_BIND_MOUNTS
;
364 case ARG_SECURE_BOOT
:
365 r
= parse_tristate(optarg
, &arg_secure_boot
);
367 return log_error_errno(r
, "Failed to parse --secure-boot=%s: %m", optarg
);
370 case ARG_PRIVATE_USERS
:
371 r
= parse_userns_uid_range(optarg
, &arg_uid_shift
, &arg_uid_range
);
376 case ARG_FORWARD_JOURNAL
:
377 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &arg_forward_journal
);
382 case ARG_SET_CREDENTIAL
: {
383 r
= machine_credential_set(&arg_credentials
, optarg
);
386 arg_settings_mask
|= SETTING_CREDENTIALS
;
390 case ARG_LOAD_CREDENTIAL
: {
391 r
= machine_credential_load(&arg_credentials
, optarg
);
395 arg_settings_mask
|= SETTING_CREDENTIALS
;
400 if (streq(optarg
, "list")) {
401 _cleanup_strv_free_
char **l
= NULL
;
403 r
= list_ovmf_config(&l
);
405 return log_error_errno(r
, "Failed to list firmwares: %m");
408 fputstrv(stdout
, l
, "\n", &nl
);
415 if (!isempty(optarg
) && !path_is_absolute(optarg
) && !startswith(optarg
, "./"))
416 return log_error_errno(SYNTHETIC_ERRNO(errno
), "Absolute path or path starting with './' required.");
418 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &arg_firmware
);
428 assert_not_reached();
432 arg_kernel_cmdline_extra
= strv_copy(argv
+ optind
);
433 if (!arg_kernel_cmdline_extra
)
436 arg_settings_mask
|= SETTING_START_MODE
;
442 static int open_vsock(void) {
443 _cleanup_close_
int vsock_fd
= -EBADF
;
445 static const union sockaddr_union bind_addr
= {
446 .vm
.svm_family
= AF_VSOCK
,
447 .vm
.svm_cid
= VMADDR_CID_ANY
,
448 .vm
.svm_port
= VMADDR_PORT_ANY
,
451 vsock_fd
= socket(AF_VSOCK
, SOCK_STREAM
|SOCK_CLOEXEC
, 0);
453 return log_error_errno(errno
, "Failed to open AF_VSOCK socket: %m");
455 r
= bind(vsock_fd
, &bind_addr
.sa
, sizeof(bind_addr
.vm
));
457 return log_error_errno(errno
, "Failed to bind to vsock to address %u:%u: %m", bind_addr
.vm
.svm_cid
, bind_addr
.vm
.svm_port
);
459 r
= listen(vsock_fd
, SOMAXCONN_DELUXE
);
461 return log_error_errno(errno
, "Failed to listen on vsock: %m");
463 return TAKE_FD(vsock_fd
);
466 static int vmspawn_dispatch_notify_fd(sd_event_source
*source
, int fd
, uint32_t revents
, void *userdata
) {
467 char buf
[NOTIFY_BUFFER_MAX
+1];
468 const char *p
= NULL
;
469 struct iovec iovec
= {
471 .iov_len
= sizeof(buf
)-1,
473 struct msghdr msghdr
= {
478 _cleanup_strv_free_
char **tags
= NULL
;
479 int r
, *exit_status
= ASSERT_PTR(userdata
);
481 n
= recvmsg_safe(fd
, &msghdr
, MSG_DONTWAIT
);
482 if (ERRNO_IS_NEG_TRANSIENT(n
))
485 log_warning_errno(n
, "Got message with truncated control data, ignoring: %m");
489 return log_warning_errno(n
, "Couldn't read notification socket: %m");
491 if ((size_t) n
>= sizeof(buf
)) {
492 log_warning("Received notify message exceeded maximum size. Ignoring.");
497 tags
= strv_split(buf
, "\n\r");
501 STRV_FOREACH(s
, tags
)
502 log_debug("Received tag %s from notify socket", *s
);
504 if (strv_contains(tags
, "READY=1")) {
505 r
= sd_notify(false, "READY=1\n");
507 log_warning_errno(r
, "Failed to send readiness notification, ignoring: %m");
510 p
= strv_find_startswith(tags
, "STATUS=");
512 (void) sd_notifyf(false, "STATUS=VM running: %s", p
);
514 p
= strv_find_startswith(tags
, "EXIT_STATUS=");
516 r
= safe_atoi(p
, exit_status
);
518 log_warning_errno(r
, "Failed to parse exit status from %s, ignoring: %m", p
);
521 /* we will only receive one message from each connection so disable this source once one is received */
522 source
= sd_event_source_disable_unref(source
);
527 static int vmspawn_dispatch_vsock_connections(sd_event_source
*source
, int fd
, uint32_t revents
, void *userdata
) {
530 _cleanup_close_
int conn_fd
= -EBADF
;
534 if (revents
!= EPOLLIN
) {
535 log_warning("Got unexpected poll event for vsock fd.");
539 conn_fd
= accept4(fd
, NULL
, NULL
, SOCK_CLOEXEC
|SOCK_NONBLOCK
);
541 log_warning_errno(errno
, "Failed to accept connection from vsock fd (%m), ignoring...");
545 event
= sd_event_source_get_event(source
);
547 return log_error_errno(SYNTHETIC_ERRNO(ENOENT
), "Failed to retrieve event from event source, exiting task");
549 /* add a new floating task to read from the connection */
550 r
= sd_event_add_io(event
, NULL
, conn_fd
, revents
, vmspawn_dispatch_notify_fd
, userdata
);
552 return log_error_errno(r
, "Failed to allocate notify connection event source: %m");
554 /* conn_fd is now owned by the event loop so don't clean it up */
560 static int setup_notify_parent(sd_event
*event
, int fd
, int *exit_status
, sd_event_source
**ret_notify_event_source
) {
566 assert(ret_notify_event_source
);
568 r
= sd_event_add_io(event
, ret_notify_event_source
, fd
, EPOLLIN
, vmspawn_dispatch_vsock_connections
, exit_status
);
570 return log_error_errno(r
, "Failed to allocate notify socket event source: %m");
572 (void) sd_event_source_set_description(*ret_notify_event_source
, "vmspawn-notify-sock");
577 static int on_orderly_shutdown(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
578 PidRef
*pidref
= userdata
;
581 /* TODO: actually talk to qemu and ask the guest to shutdown here */
584 r
= pidref_kill(pidref
, SIGKILL
);
586 log_warning_errno(r
, "Failed to kill qemu, terminating: %m");
588 log_info("Trying to halt qemu. Send SIGTERM again to trigger vmspawn to immediately terminate.");
589 sd_event_source_set_userdata(s
, NULL
);
594 sd_event_exit(sd_event_source_get_event(s
), 0);
598 static int on_child_exit(sd_event_source
*s
, const siginfo_t
*si
, void *userdata
) {
599 sd_event_exit(sd_event_source_get_event(s
), 0);
603 static int cmdline_add_vsock(char ***cmdline
, int vsock_fd
) {
606 r
= strv_extend(cmdline
, "-smbios");
610 union sockaddr_union addr
;
611 socklen_t addr_len
= sizeof addr
.vm
;
612 r
= getsockname(vsock_fd
, &addr
.sa
, &addr_len
);
615 assert(addr_len
>= sizeof addr
.vm
);
616 assert(addr
.vm
.svm_family
== AF_VSOCK
);
618 r
= strv_extendf(cmdline
, "type=11,value=io.systemd.credential:vmm.notify_socket=vsock-stream:%u:%u", (unsigned) VMADDR_CID_HOST
, addr
.vm
.svm_port
);
625 static int start_tpm(sd_bus
*bus
, const char *scope
, const char *tpm
, const char **ret_state_tempdir
) {
626 _cleanup_(rm_rf_physical_and_freep
) char *state_dir
= NULL
;
627 _cleanup_free_
char *scope_prefix
= NULL
;
628 _cleanup_(socket_service_pair_done
) SocketServicePair ssp
= {
629 .socket_type
= SOCK_STREAM
,
636 assert(ret_state_tempdir
);
638 r
= unit_name_to_prefix(scope
, &scope_prefix
);
640 return log_error_errno(r
, "Failed to strip .scope suffix from scope: %m");
642 ssp
.unit_name_prefix
= strjoin(scope_prefix
, "-tpm");
643 if (!ssp
.unit_name_prefix
)
646 state_dir
= path_join(arg_runtime_directory
, ssp
.unit_name_prefix
);
650 if (arg_runtime_directory_created
) {
651 ssp
.runtime_directory
= path_join("systemd/vmspawn", ssp
.unit_name_prefix
);
652 if (!ssp
.runtime_directory
)
656 ssp
.listen_address
= path_join(state_dir
, "sock");
657 if (!ssp
.listen_address
)
660 ssp
.exec_start
= strv_new(tpm
, "socket", "--tpm2", "--tpmstate");
664 r
= strv_extendf(&ssp
.exec_start
, "dir=%s", state_dir
);
668 r
= strv_extend_many(&ssp
.exec_start
, "--ctrl", "type=unixio,fd=3");
672 r
= start_socket_service_pair(bus
, scope
, &ssp
);
676 *ret_state_tempdir
= TAKE_PTR(state_dir
);
681 static int start_systemd_journal_remote(sd_bus
*bus
, const char *scope
, unsigned port
, const char *sd_journal_remote
, char **listen_address
) {
682 _cleanup_free_
char *scope_prefix
= NULL
;
683 _cleanup_(socket_service_pair_done
) SocketServicePair ssp
= {
684 .socket_type
= SOCK_STREAM
,
690 assert(sd_journal_remote
);
692 r
= unit_name_to_prefix(scope
, &scope_prefix
);
694 return log_error_errno(r
, "Failed to strip .scope suffix from scope: %m");
696 ssp
.unit_name_prefix
= strjoin(scope_prefix
, "-forward-journal");
697 if (!ssp
.unit_name_prefix
)
700 r
= asprintf(&ssp
.listen_address
, "vsock:2:%u", port
);
704 ssp
.exec_start
= strv_new(sd_journal_remote
,
705 "--output", arg_forward_journal
,
706 "--split-mode", endswith(arg_forward_journal
, ".journal") ? "none" : "host");
710 r
= start_socket_service_pair(bus
, scope
, &ssp
);
715 *listen_address
= TAKE_PTR(ssp
.listen_address
);
720 static int discover_root(char **ret
) {
722 _cleanup_(dissected_image_unrefp
) DissectedImage
*image
= NULL
;
723 _cleanup_free_
char *root
= NULL
;
727 r
= dissect_image_file_and_warn(
730 /* mount_options= */ NULL
,
731 /* image_policy= */ NULL
,
737 if (image
->partitions
[PARTITION_ROOT
].found
)
738 root
= strjoin("root=PARTUUID=", SD_ID128_TO_UUID_STRING(image
->partitions
[PARTITION_ROOT
].uuid
));
739 else if (image
->partitions
[PARTITION_USR
].found
)
740 root
= strjoin("mount.usr=PARTUUID=", SD_ID128_TO_UUID_STRING(image
->partitions
[PARTITION_USR
].uuid
));
742 return log_error_errno(SYNTHETIC_ERRNO(ENOENT
), "Cannot perform a direct kernel boot without a root or usr partition, refusing");
747 *ret
= TAKE_PTR(root
);
751 static int find_virtiofsd(char **ret
) {
753 _cleanup_free_
char *virtiofsd
= NULL
;
757 r
= find_executable("virtiofsd", &virtiofsd
);
758 if (r
< 0 && r
!= -ENOENT
)
759 return log_error_errno(r
, "Error while searching for virtiofsd: %m");
762 FOREACH_STRING(file
, "/usr/libexec/virtiofsd", "/usr/lib/virtiofsd") {
763 if (access(file
, X_OK
) >= 0) {
764 virtiofsd
= strdup(file
);
770 if (!IN_SET(errno
, ENOENT
, EACCES
))
771 return log_error_errno(errno
, "Error while searching for virtiofsd: %m");
776 return log_error_errno(SYNTHETIC_ERRNO(ENOENT
), "Failed to find virtiofsd binary.");
778 *ret
= TAKE_PTR(virtiofsd
);
782 static int start_virtiofsd(sd_bus
*bus
, const char *scope
, const char *directory
, bool uidmap
, char **ret_state_tempdir
, char **ret_sock_name
) {
783 _cleanup_(rm_rf_physical_and_freep
) char *state_dir
= NULL
;
784 _cleanup_free_
char *virtiofsd
= NULL
, *sock_name
= NULL
, *scope_prefix
= NULL
;
785 _cleanup_(socket_service_pair_done
) SocketServicePair ssp
= {
786 .socket_type
= SOCK_STREAM
,
788 static unsigned virtiofsd_instance
= 0;
794 assert(ret_state_tempdir
);
795 assert(ret_sock_name
);
797 r
= find_virtiofsd(&virtiofsd
);
801 r
= unit_name_to_prefix(scope
, &scope_prefix
);
803 return log_error_errno(r
, "Failed to strip .scope suffix from scope: %m");
805 if (asprintf(&ssp
.unit_name_prefix
, "%s-virtiofsd-%u", scope_prefix
, virtiofsd_instance
++) < 0)
808 state_dir
= path_join(arg_runtime_directory
, ssp
.unit_name_prefix
);
812 if (arg_runtime_directory_created
) {
813 ssp
.runtime_directory
= strjoin("systemd/vmspawn/", ssp
.unit_name_prefix
);
814 if (!ssp
.runtime_directory
)
818 if (asprintf(&sock_name
, "sock-%"PRIx64
, random_u64()) < 0)
821 ssp
.listen_address
= path_join(state_dir
, sock_name
);
822 if (!ssp
.listen_address
)
825 /* QEMU doesn't support submounts so don't announce them */
826 ssp
.exec_start
= strv_new(virtiofsd
, "--shared-dir", directory
, "--xattr", "--fd", "3", "--no-announce-submounts");
830 if (uidmap
&& arg_uid_shift
!= UID_INVALID
) {
831 r
= strv_extend(&ssp
.exec_start
, "--uid-map");
835 r
= strv_extendf(&ssp
.exec_start
, ":0:" UID_FMT
":" UID_FMT
":", arg_uid_shift
, arg_uid_range
);
839 r
= strv_extend(&ssp
.exec_start
, "--gid-map");
843 r
= strv_extendf(&ssp
.exec_start
, ":0:" GID_FMT
":" GID_FMT
":", arg_uid_shift
, arg_uid_range
);
848 r
= start_socket_service_pair(bus
, scope
, &ssp
);
852 *ret_state_tempdir
= TAKE_PTR(state_dir
);
853 *ret_sock_name
= TAKE_PTR(sock_name
);
858 static int kernel_cmdline_maybe_append_root(void) {
860 bool cmdline_contains_root
= strv_find_startswith(arg_kernel_cmdline_extra
, "root=")
861 || strv_find_startswith(arg_kernel_cmdline_extra
, "mount.usr=");
863 if (!cmdline_contains_root
) {
864 _cleanup_free_
char *root
= NULL
;
866 r
= discover_root(&root
);
870 log_debug("Determined root file system %s from dissected image", root
);
872 r
= strv_consume(&arg_kernel_cmdline_extra
, TAKE_PTR(root
));
880 static int discover_boot_entry(const char *root
, char **ret_linux
, char ***ret_initrds
) {
881 _cleanup_(boot_config_free
) BootConfig config
= BOOT_CONFIG_NULL
;
882 _cleanup_free_
char *esp_path
= NULL
, *xbootldr_path
= NULL
;
889 esp_path
= path_join(root
, "efi");
893 xbootldr_path
= path_join(root
, "boot");
897 r
= boot_config_load(&config
, esp_path
, xbootldr_path
);
901 r
= boot_config_select_special_entries(&config
, /* skip_efivars= */ true);
903 return log_error_errno(r
, "Failed to find special boot config entries: %m");
905 const BootEntry
*boot_entry
= boot_config_default_entry(&config
);
907 if (boot_entry
&& !IN_SET(boot_entry
->type
, BOOT_ENTRY_UNIFIED
, BOOT_ENTRY_CONF
))
910 /* If we cannot determine a default entry search for UKIs (Type #2 EFI Unified Kernel Images)
911 * then .conf files (Type #1 Boot Loader Specification Entries).
912 * https://uapi-group.org/specifications/specs/boot_loader_specification */
914 FOREACH_ARRAY(entry
, config
.entries
, config
.n_entries
)
915 if (entry
->type
== BOOT_ENTRY_UNIFIED
) {
921 FOREACH_ARRAY(entry
, config
.entries
, config
.n_entries
)
922 if (entry
->type
== BOOT_ENTRY_CONF
) {
928 return log_error_errno(SYNTHETIC_ERRNO(ENOENT
), "Failed to discover any boot entries.");
930 log_debug("Discovered boot entry %s (%s)", boot_entry
->id
, boot_entry_type_to_string(boot_entry
->type
));
932 _cleanup_free_
char *linux_kernel
= NULL
;
933 _cleanup_strv_free_
char **initrds
= NULL
;
934 if (boot_entry
->type
== BOOT_ENTRY_UNIFIED
) {
935 linux_kernel
= path_join(boot_entry
->root
, boot_entry
->kernel
);
938 } else if (boot_entry
->type
== BOOT_ENTRY_CONF
) {
939 linux_kernel
= path_join(boot_entry
->root
, boot_entry
->kernel
);
943 STRV_FOREACH(initrd
, boot_entry
->initrd
) {
944 _cleanup_free_
char *initrd_path
= path_join(boot_entry
->root
, *initrd
);
948 r
= strv_consume(&initrds
, TAKE_PTR(initrd_path
));
953 assert_not_reached();
955 *ret_linux
= TAKE_PTR(linux_kernel
);
956 *ret_initrds
= TAKE_PTR(initrds
);
961 static int merge_initrds(char **ret
) {
962 _cleanup_(rm_rf_physical_and_freep
) char *merged_initrd
= NULL
;
963 _cleanup_close_
int ofd
= -EBADF
;
968 r
= tempfn_random_child(NULL
, "vmspawn-initrd-", &merged_initrd
);
970 return log_error_errno(r
, "Failed to create temporary file: %m");
972 ofd
= open(merged_initrd
, O_WRONLY
|O_CREAT
|O_EXCL
|O_CLOEXEC
, 0600);
974 return log_error_errno(errno
, "Failed to create regular file %s: %m", merged_initrd
);
976 STRV_FOREACH(i
, arg_initrds
) {
977 _cleanup_close_
int ifd
= -EBADF
;
980 off
= lseek(ofd
, 0, SEEK_CUR
);
982 return log_error_errno(errno
, "Failed to get file offset of %s: %m", merged_initrd
);
984 to_seek
= (4 - (off
% 4)) % 4;
986 /* seek to assure 4 byte alignment for each initrd */
987 if (to_seek
!= 0 && lseek(ofd
, to_seek
, SEEK_CUR
) < 0)
988 return log_error_errno(errno
, "Failed to seek %s: %m", merged_initrd
);
990 ifd
= open(*i
, O_RDONLY
|O_CLOEXEC
);
992 return log_error_errno(errno
, "Failed to open %s: %m", *i
);
994 r
= copy_bytes(ifd
, ofd
, UINT64_MAX
, COPY_REFLINK
);
996 return log_error_errno(r
, "Failed to copy bytes from %s to %s: %m", *i
, merged_initrd
);
999 *ret
= TAKE_PTR(merged_initrd
);
1003 static int run_virtual_machine(int kvm_device_fd
, int vhost_device_fd
) {
1004 _cleanup_(ovmf_config_freep
) OvmfConfig
*ovmf_config
= NULL
;
1005 _cleanup_(sd_bus_flush_close_unrefp
) sd_bus
*bus
= NULL
;
1006 _cleanup_free_
char *machine
= NULL
, *qemu_binary
= NULL
, *mem
= NULL
, *trans_scope
= NULL
, *kernel
= NULL
;
1007 _cleanup_close_
int notify_sock_fd
= -EBADF
;
1008 _cleanup_strv_free_
char **cmdline
= NULL
;
1009 _cleanup_free_
int *pass_fds
= NULL
;
1010 size_t n_pass_fds
= 0;
1011 const char *accel
, *shm
;
1015 r
= sd_bus_default_system(&bus
);
1017 r
= sd_bus_default_user(&bus
);
1019 return log_error_errno(r
, "Failed to connect to systemd bus: %m");
1021 r
= start_transient_scope(bus
, arg_machine
, /* allow_pidfd= */ true, &trans_scope
);
1025 bool use_kvm
= arg_qemu_kvm
> 0;
1026 if (arg_qemu_kvm
< 0) {
1027 r
= qemu_check_kvm_support();
1029 return log_error_errno(r
, "Failed to check for KVM support: %m");
1034 r
= load_ovmf_config(arg_firmware
, &ovmf_config
);
1036 r
= find_ovmf_config(arg_secure_boot
, &ovmf_config
);
1038 return log_error_errno(r
, "Failed to find OVMF config: %m");
1040 /* only warn if the user hasn't disabled secureboot */
1041 if (!ovmf_config
->supports_sb
&& arg_secure_boot
)
1042 log_warning("Couldn't find OVMF firmware blob with Secure Boot support, "
1043 "falling back to OVMF firmware blobs without Secure Boot support.");
1045 shm
= arg_directory
? ",memory-backend=mem" : "";
1046 if (ARCHITECTURE_SUPPORTS_SMM
)
1047 machine
= strjoin("type=" QEMU_MACHINE_TYPE
",smm=", on_off(ovmf_config
->supports_sb
), shm
);
1049 machine
= strjoin("type=" QEMU_MACHINE_TYPE
, shm
);
1054 kernel
= strdup(arg_linux
);
1057 } else if (arg_directory
) {
1058 /* a kernel is required for directory type images so attempt to locate a UKI under /boot and /efi */
1059 r
= discover_boot_entry(arg_directory
, &kernel
, &arg_initrds
);
1061 return log_error_errno(r
, "Failed to locate UKI in directory type image, please specify one with --linux=.");
1063 log_debug("Discovered UKI image at %s", kernel
);
1066 r
= find_qemu_binary(&qemu_binary
);
1067 if (r
== -EOPNOTSUPP
)
1068 return log_error_errno(r
, "Native architecture is not supported by qemu.");
1070 return log_error_errno(r
, "Failed to find QEMU binary: %m");
1072 if (asprintf(&mem
, "%" PRIu64
"M", DIV_ROUND_UP(arg_qemu_mem
, U64_MB
)) < 0)
1077 "-machine", machine
,
1078 "-smp", arg_qemu_smp
?: "1",
1080 "-object", "rng-random,filename=/dev/urandom,id=rng0",
1081 "-device", "virtio-rng-pci,rng=rng0,id=rng-device0"
1086 /* if we are going to be starting any units with state then create our runtime dir */
1087 if (arg_tpm
!= 0 || arg_directory
|| arg_runtime_mounts
.n_mounts
!= 0) {
1088 r
= runtime_directory(&arg_runtime_directory
, arg_privileged
? RUNTIME_SCOPE_SYSTEM
: RUNTIME_SCOPE_USER
, "systemd/vmspawn");
1090 return log_error_errno(r
, "Failed to lookup runtime directory: %m");
1092 /* r > 0 means we need to create our own runtime dir */
1093 r
= mkdir_p(arg_runtime_directory
, 0755);
1095 return log_error_errno(r
, "Failed to create runtime directory: %m");
1096 arg_runtime_directory_created
= true;
1100 if (arg_network_stack
== QEMU_NET_TAP
)
1101 r
= strv_extend_many(&cmdline
, "-nic", "tap,script=no,model=virtio-net-pci");
1102 else if (arg_network_stack
== QEMU_NET_USER
)
1103 r
= strv_extend_many(&cmdline
, "-nic", "user,model=virtio-net-pci");
1105 r
= strv_extend_many(&cmdline
, "-nic", "none");
1109 /* A shared memory backend might increase ram usage so only add one if actually necessary for virtiofsd. */
1110 if (arg_directory
|| arg_runtime_mounts
.n_mounts
!= 0) {
1111 r
= strv_extend(&cmdline
, "-object");
1115 r
= strv_extendf(&cmdline
, "memory-backend-memfd,id=mem,size=%s,share=on", mem
);
1120 bool use_vsock
= arg_qemu_vsock
> 0 && ARCHITECTURE_SUPPORTS_SMBIOS
;
1121 if (arg_qemu_vsock
< 0) {
1122 r
= qemu_check_vsock_support();
1124 return log_error_errno(r
, "Failed to check for VSock support: %m");
1129 if (!use_kvm
&& kvm_device_fd
>= 0) {
1130 log_warning("KVM is disabled but fd for /dev/kvm was passed, closing fd and ignoring");
1131 kvm_device_fd
= safe_close(kvm_device_fd
);
1134 if (use_kvm
&& kvm_device_fd
>= 0) {
1135 /* /dev/fdset/1 is magic string to tell qemu where to find the fd for /dev/kvm
1136 * we use this so that we can take a fd to /dev/kvm and then give qemu that fd */
1137 accel
= "kvm,device=/dev/fdset/1";
1139 r
= strv_extend(&cmdline
, "--add-fd");
1143 r
= strv_extendf(&cmdline
, "fd=%d,set=1,opaque=/dev/kvm", kvm_device_fd
);
1147 if (!GREEDY_REALLOC(pass_fds
, n_pass_fds
+ 1))
1150 pass_fds
[n_pass_fds
++] = kvm_device_fd
;
1156 r
= strv_extend_many(&cmdline
, "-accel", accel
);
1160 _cleanup_close_
int child_vsock_fd
= -EBADF
;
1161 unsigned child_cid
= arg_vsock_cid
;
1163 int device_fd
= vhost_device_fd
;
1165 if (device_fd
< 0) {
1166 child_vsock_fd
= open("/dev/vhost-vsock", O_RDWR
|O_CLOEXEC
);
1167 if (child_vsock_fd
< 0)
1168 return log_error_errno(errno
, "Failed to open /dev/vhost-vsock as read/write: %m");
1170 device_fd
= child_vsock_fd
;
1173 r
= vsock_fix_child_cid(device_fd
, &child_cid
, arg_machine
);
1175 return log_error_errno(r
, "Failed to fix CID for the guest vsock socket: %m");
1177 r
= strv_extend(&cmdline
, "-device");
1181 r
= strv_extendf(&cmdline
, "vhost-vsock-pci,guest-cid=%u,vhostfd=%d", child_cid
, device_fd
);
1185 if (!GREEDY_REALLOC(pass_fds
, n_pass_fds
+ 1))
1188 pass_fds
[n_pass_fds
++] = device_fd
;
1191 r
= strv_extend_many(&cmdline
, "-cpu", "max");
1196 r
= strv_extend_many(
1201 r
= strv_extend_many(
1205 "-chardev", "stdio,mux=on,id=console,signal=off",
1206 "-serial", "chardev:console",
1211 r
= strv_extend(&cmdline
, "-drive");
1215 r
= strv_extendf(&cmdline
, "if=pflash,format=%s,readonly=on,file=%s", ovmf_config_format(ovmf_config
), ovmf_config
->path
);
1219 _cleanup_(unlink_and_freep
) char *ovmf_vars_to
= NULL
;
1220 if (ovmf_config
->supports_sb
) {
1221 const char *ovmf_vars_from
= ovmf_config
->vars
;
1222 _cleanup_close_
int source_fd
= -EBADF
, target_fd
= -EBADF
;
1224 r
= tempfn_random_child(NULL
, "vmspawn-", &ovmf_vars_to
);
1228 source_fd
= open(ovmf_vars_from
, O_RDONLY
|O_CLOEXEC
);
1230 return log_error_errno(source_fd
, "Failed to open OVMF vars file %s: %m", ovmf_vars_from
);
1232 target_fd
= open(ovmf_vars_to
, O_WRONLY
|O_CREAT
|O_EXCL
|O_CLOEXEC
, 0600);
1234 return log_error_errno(errno
, "Failed to create regular file for OVMF vars at %s: %m", ovmf_vars_to
);
1236 r
= copy_bytes(source_fd
, target_fd
, UINT64_MAX
, COPY_REFLINK
);
1238 return log_error_errno(r
, "Failed to copy bytes from %s to %s: %m", ovmf_vars_from
, ovmf_vars_to
);
1240 /* These aren't always available so don't raise an error if they fail */
1241 (void) copy_xattr(source_fd
, NULL
, target_fd
, NULL
, 0);
1242 (void) copy_access(source_fd
, target_fd
);
1243 (void) copy_times(source_fd
, target_fd
, 0);
1245 r
= strv_extend_many(
1247 "-global", "ICH9-LPC.disable_s3=1",
1248 "-global", "driver=cfi.pflash01,property=secure,value=on",
1253 r
= strv_extendf(&cmdline
, "file=%s,if=pflash,format=%s", ovmf_vars_to
, ovmf_config_format(ovmf_config
));
1259 r
= strv_extend_many(&cmdline
, "-kernel", kernel
);
1263 /* We can't rely on gpt-auto-generator when direct kernel booting so synthesize a root=
1264 * kernel argument instead. */
1266 r
= kernel_cmdline_maybe_append_root();
1273 assert(!arg_directory
);
1275 r
= strv_extend(&cmdline
, "-drive");
1279 r
= strv_extendf(&cmdline
, "if=none,id=mkosi,file=%s,format=raw", arg_image
);
1283 r
= strv_extend_many(&cmdline
,
1284 "-device", "virtio-scsi-pci,id=scsi",
1285 "-device", "scsi-hd,drive=mkosi,bootindex=1");
1290 if (arg_directory
) {
1291 _cleanup_free_
char *sock_path
= NULL
, *sock_name
= NULL
;
1292 r
= start_virtiofsd(bus
, trans_scope
, arg_directory
, /* uidmap= */ true, &sock_path
, &sock_name
);
1296 r
= strv_extend(&cmdline
, "-chardev");
1300 r
= strv_extendf(&cmdline
, "socket,id=%1$s,path=%2$s/%1$s", sock_name
, sock_path
);
1304 r
= strv_extend(&cmdline
, "-device");
1308 r
= strv_extendf(&cmdline
, "vhost-user-fs-pci,queue-size=1024,chardev=%s,tag=root", sock_name
);
1312 r
= strv_extend(&arg_kernel_cmdline_extra
, "root=root rootfstype=virtiofs rw");
1317 r
= strv_prepend(&arg_kernel_cmdline_extra
, "console=" DEFAULT_SERIAL_TTY
);
1321 FOREACH_ARRAY(mount
, arg_runtime_mounts
.mounts
, arg_runtime_mounts
.n_mounts
) {
1322 _cleanup_free_
char *sock_path
= NULL
, *sock_name
= NULL
, *clean_target
= NULL
;
1323 r
= start_virtiofsd(bus
, trans_scope
, mount
->source
, /* uidmap= */ false, &sock_path
, &sock_name
);
1327 r
= strv_extend(&cmdline
, "-chardev");
1331 r
= strv_extendf(&cmdline
, "socket,id=%1$s,path=%2$s/%1$s", sock_name
, sock_path
);
1335 r
= strv_extend(&cmdline
, "-device");
1339 r
= strv_extendf(&cmdline
, "vhost-user-fs-pci,queue-size=1024,chardev=%1$s,tag=%1$s", sock_name
);
1343 clean_target
= xescape(mount
->target
, "\":");
1347 r
= strv_extendf(&arg_kernel_cmdline_extra
, "systemd.mount-extra=\"%s:%s:virtiofs:%s\"",
1348 sock_name
, clean_target
, mount
->read_only
? "ro" : "rw");
1353 if (ARCHITECTURE_SUPPORTS_SMBIOS
) {
1354 _cleanup_free_
char *kcl
= strv_join(arg_kernel_cmdline_extra
, " ");
1359 r
= strv_extend_many(&cmdline
, "-append", kcl
);
1363 if (ARCHITECTURE_SUPPORTS_SMBIOS
) {
1364 r
= strv_extend(&cmdline
, "-smbios");
1368 r
= strv_extendf(&cmdline
, "type=11,value=io.systemd.stub.kernel-cmdline-extra=%s", kcl
);
1372 log_warning("Cannot append extra args to kernel cmdline, native architecture doesn't support SMBIOS, ignoring");
1375 log_warning("Cannot append extra args to kernel cmdline, native architecture doesn't support SMBIOS");
1377 /* disable TPM autodetection if the user's hardware doesn't support it */
1378 if (!ARCHITECTURE_SUPPORTS_TPM
) {
1381 log_debug("TPM not support on %s, disabling tpm autodetection and continuing", architecture_to_string(native_architecture()));
1382 } else if (arg_tpm
> 0)
1383 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP
), "TPM not supported on %s, refusing", architecture_to_string(native_architecture()));
1386 _cleanup_free_
char *swtpm
= NULL
;
1388 r
= find_executable("swtpm", &swtpm
);
1390 /* log if the user asked for swtpm and we cannot find it */
1392 return log_error_errno(r
, "Failed to find swtpm binary: %m");
1393 /* also log if we got an error other than ENOENT from find_executable */
1394 if (r
!= -ENOENT
&& arg_tpm
< 0)
1395 return log_error_errno(r
, "Error detecting swtpm: %m");
1399 _cleanup_free_
const char *tpm_state_tempdir
= NULL
;
1401 r
= start_tpm(bus
, trans_scope
, swtpm
, &tpm_state_tempdir
);
1403 /* only bail if the user asked for a tpm */
1405 return log_error_errno(r
, "Failed to start tpm: %m");
1406 log_debug_errno(r
, "Failed to start tpm, ignoring: %m");
1409 r
= strv_extend(&cmdline
, "-chardev");
1413 r
= strv_extendf(&cmdline
, "socket,id=chrtpm,path=%s/sock", tpm_state_tempdir
);
1417 r
= strv_extend_many(&cmdline
, "-tpmdev", "emulator,id=tpm0,chardev=chrtpm");
1421 if (native_architecture() == ARCHITECTURE_X86_64
)
1422 r
= strv_extend_many(&cmdline
, "-device", "tpm-tis,tpmdev=tpm0");
1423 else if (IN_SET(native_architecture(), ARCHITECTURE_ARM64
, ARCHITECTURE_ARM64_BE
))
1424 r
= strv_extend_many(&cmdline
, "-device", "tpm-tis-device,tpmdev=tpm0");
1429 char *initrd
= NULL
;
1430 _cleanup_(rm_rf_physical_and_freep
) char *merged_initrd
= NULL
;
1431 size_t n_initrds
= strv_length(arg_initrds
);
1434 initrd
= arg_initrds
[0];
1435 else if (n_initrds
> 1) {
1436 r
= merge_initrds(&merged_initrd
);
1440 initrd
= merged_initrd
;
1444 r
= strv_extend_many(&cmdline
, "-initrd", initrd
);
1449 if (arg_forward_journal
) {
1450 _cleanup_free_
char *sd_journal_remote
= NULL
, *listen_address
= NULL
, *cred
= NULL
;
1451 r
= find_executable("systemd-journal-remote", &sd_journal_remote
);
1453 return log_error_errno(r
, "Failed to find systemd-journal-remote binary: %m");
1455 r
= start_systemd_journal_remote(bus
, trans_scope
, child_cid
, sd_journal_remote
, &listen_address
);
1459 cred
= strjoin("journal.forward_to_socket:", listen_address
);
1463 r
= machine_credential_set(&arg_credentials
, cred
);
1468 if (ARCHITECTURE_SUPPORTS_SMBIOS
)
1469 FOREACH_ARRAY(cred
, arg_credentials
.credentials
, arg_credentials
.n_credentials
) {
1470 _cleanup_free_
char *cred_data_b64
= NULL
;
1473 n
= base64mem(cred
->data
, cred
->size
, &cred_data_b64
);
1477 r
= strv_extend(&cmdline
, "-smbios");
1481 r
= strv_extendf(&cmdline
, "type=11,value=io.systemd.credential.binary:%s=%s", cred
->id
, cred_data_b64
);
1487 notify_sock_fd
= open_vsock();
1488 if (notify_sock_fd
< 0)
1489 return log_error_errno(notify_sock_fd
, "Failed to open vsock: %m");
1491 r
= cmdline_add_vsock(&cmdline
, notify_sock_fd
);
1495 return log_error_errno(r
, "Failed to call getsockname on vsock: %m");
1498 if (DEBUG_LOGGING
) {
1499 _cleanup_free_
char *joined
= quote_command_line(cmdline
, SHELL_ESCAPE_EMPTY
);
1503 log_debug("Executing: %s", joined
);
1506 assert_se(sigprocmask_many(SIG_BLOCK
, NULL
, SIGCHLD
, -1) >= 0);
1508 _cleanup_(sd_event_source_unrefp
) sd_event_source
*notify_event_source
= NULL
;
1509 _cleanup_(sd_event_unrefp
) sd_event
*event
= NULL
;
1510 r
= sd_event_new(&event
);
1512 return log_error_errno(r
, "Failed to get default event source: %m");
1514 (void) sd_event_set_watchdog(event
, true);
1516 _cleanup_(pidref_done
) PidRef child_pidref
= PIDREF_NULL
;
1518 r
= pidref_safe_fork_full(
1520 /* stdio_fds= */ NULL
,
1521 &child_vsock_fd
, 1, /* pass the vsock fd to qemu */
1522 FORK_RESET_SIGNALS
|FORK_CLOSE_ALL_FDS
|FORK_DEATHSIG_SIGTERM
|FORK_LOG
|FORK_CLOEXEC_OFF
|FORK_RLIMIT_NOFILE_SAFE
,
1527 /* set TERM and LANG if they are missing */
1528 if (setenv("TERM", "vt220", 0) < 0)
1531 if (setenv("LANG", "C.UTF-8", 0) < 0)
1534 execv(qemu_binary
, cmdline
);
1535 log_error_errno(errno
, "Failed to execve %s: %m", qemu_binary
);
1536 _exit(EXIT_FAILURE
);
1539 /* Close the vsock fd we passed to qemu in the parent. We don't need it anymore. */
1540 child_vsock_fd
= safe_close(child_vsock_fd
);
1542 int exit_status
= INT_MAX
;
1544 r
= setup_notify_parent(event
, notify_sock_fd
, &exit_status
, ¬ify_event_source
);
1546 return log_error_errno(r
, "Failed to setup event loop to handle vsock notify events: %m");
1549 /* shutdown qemu when we are shutdown */
1550 (void) sd_event_add_signal(event
, NULL
, SIGINT
| SD_EVENT_SIGNAL_PROCMASK
, on_orderly_shutdown
, &child_pidref
);
1551 (void) sd_event_add_signal(event
, NULL
, SIGTERM
| SD_EVENT_SIGNAL_PROCMASK
, on_orderly_shutdown
, &child_pidref
);
1553 (void) sd_event_add_signal(event
, NULL
, (SIGRTMIN
+18) | SD_EVENT_SIGNAL_PROCMASK
, sigrtmin18_handler
, NULL
);
1555 /* Exit when the child exits */
1556 (void) event_add_child_pidref(event
, NULL
, &child_pidref
, WEXITED
, on_child_exit
, NULL
);
1558 r
= sd_event_loop(event
);
1560 return log_error_errno(r
, "Failed to run event loop: %m");
1563 if (exit_status
== INT_MAX
) {
1564 log_debug("Couldn't retrieve inner EXIT_STATUS from vsock");
1565 return EXIT_SUCCESS
;
1567 if (exit_status
!= 0)
1568 log_warning("Non-zero exit code received: %d", exit_status
);
1575 static int determine_names(void) {
1578 if (!arg_directory
&& !arg_image
) {
1580 _cleanup_(image_unrefp
) Image
*i
= NULL
;
1582 r
= image_find(IMAGE_MACHINE
, arg_machine
, NULL
, &i
);
1584 return log_error_errno(r
, "No image for machine '%s'.", arg_machine
);
1586 return log_error_errno(r
, "Failed to find image for machine '%s': %m", arg_machine
);
1588 if (IN_SET(i
->type
, IMAGE_RAW
, IMAGE_BLOCK
))
1589 r
= free_and_strdup(&arg_image
, i
->path
);
1590 else if (IN_SET(i
->type
, IMAGE_DIRECTORY
, IMAGE_SUBVOLUME
))
1591 r
= free_and_strdup(&arg_directory
, i
->path
);
1593 assert_not_reached();
1597 r
= safe_getcwd(&arg_directory
);
1599 return log_error_errno(r
, "Failed to determine current directory: %m");
1604 if (arg_directory
&& path_equal(arg_directory
, "/")) {
1605 arg_machine
= gethostname_malloc();
1608 } else if (arg_image
) {
1611 r
= path_extract_filename(arg_image
, &arg_machine
);
1613 return log_error_errno(r
, "Failed to extract file name from '%s': %m", arg_image
);
1615 /* Truncate suffix if there is one */
1616 e
= endswith(arg_machine
, ".raw");
1620 r
= path_extract_filename(arg_directory
, &arg_machine
);
1622 return log_error_errno(r
, "Failed to extract file name from '%s': %m", arg_directory
);
1625 hostname_cleanup(arg_machine
);
1626 if (!hostname_is_valid(arg_machine
, 0))
1627 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "Failed to determine machine name automatically, please use -M.");
1633 static int verify_arguments(void) {
1634 if (arg_network_stack
== QEMU_NET_TAP
&& !arg_privileged
)
1635 return log_error_errno(SYNTHETIC_ERRNO(EPERM
), "--network-tap requires root privileges, refusing.");
1637 if (!strv_isempty(arg_initrds
) && !arg_linux
)
1638 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "Option --initrd= cannot be used without --linux=.");
1643 static int run(int argc
, char *argv
[]) {
1644 int r
, kvm_device_fd
= -EBADF
, vhost_device_fd
= -EBADF
;
1645 _cleanup_strv_free_
char **names
= NULL
;
1649 arg_privileged
= getuid() == 0;
1651 r
= parse_argv(argc
, argv
);
1655 r
= determine_names();
1659 r
= verify_arguments();
1664 _cleanup_free_
char *u
= NULL
;
1665 const char *vm_path
= arg_image
?: arg_directory
;
1666 (void) terminal_urlify_path(vm_path
, vm_path
, &u
);
1668 log_info("%s %sSpawning VM %s on %s.%s\n"
1669 "%s %sPress %sCtrl-a x%s to kill VM.%s",
1670 special_glyph(SPECIAL_GLYPH_LIGHT_SHADE
), ansi_grey(), arg_machine
, u
?: vm_path
, ansi_normal(),
1671 special_glyph(SPECIAL_GLYPH_LIGHT_SHADE
), ansi_grey(), ansi_highlight(), ansi_grey(), ansi_normal());
1674 r
= sd_listen_fds_with_names(true, &names
);
1676 return log_error_errno(r
, "Failed to get passed file descriptors: %m");
1678 for (int i
= 0; i
< r
; i
++) {
1679 int fd
= SD_LISTEN_FDS_START
+ i
;
1680 if (streq(names
[i
], "kvm"))
1682 else if (streq(names
[i
], "vhost-vsock"))
1683 vhost_device_fd
= fd
;
1685 log_notice("Couldn't recognize passed fd %d (%s), closing fd and ignoring...", fd
, names
[i
]);
1690 return run_virtual_machine(kvm_device_fd
, vhost_device_fd
);
1693 DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run
);