1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
11 #include "sd-daemon.h"
15 #include "alloc-util.h"
16 #include "architecture.h"
19 #include "bus-error.h"
20 #include "bus-internal.h"
21 #include "bus-locator.h"
23 #include "bus-wait-for-jobs.h"
24 #include "capability-util.h"
25 #include "common-signal.h"
27 #include "discover-image.h"
28 #include "dissect-image.h"
30 #include "ether-addr-util.h"
31 #include "event-util.h"
32 #include "extract-word.h"
34 #include "format-util.h"
37 #include "hexdecoct.h"
38 #include "hostname-setup.h"
39 #include "hostname-util.h"
40 #include "id128-util.h"
42 #include "machine-credential.h"
43 #include "main-func.h"
45 #include "namespace-util.h"
46 #include "netif-util.h"
47 #include "nsresource.h"
48 #include "osc-context.h"
50 #include "parse-argument.h"
51 #include "parse-util.h"
52 #include "path-lookup.h"
53 #include "path-util.h"
55 #include "pretty-print.h"
56 #include "process-util.h"
58 #include "random-util.h"
60 #include "signal-util.h"
61 #include "socket-util.h"
62 #include "stdio-util.h"
63 #include "string-util.h"
65 #include "sync-util.h"
66 #include "terminal-util.h"
67 #include "tmpfile-util.h"
68 #include "unit-name.h"
70 #include "vmspawn-mount.h"
71 #include "vmspawn-register.h"
72 #include "vmspawn-scope.h"
73 #include "vmspawn-settings.h"
74 #include "vmspawn-util.h"
76 #define VM_TAP_HASH_KEY SD_ID128_MAKE(01,d0,c6,4c,2b,df,24,fb,c0,f8,b2,09,7d,59,b2,93)
78 typedef enum TpmStateMode
{
79 TPM_STATE_OFF
, /* keep no state around */
80 TPM_STATE_AUTO
, /* keep state around, derive path from image/directory */
81 TPM_STATE_PATH
, /* explicitly specified location */
83 _TPM_STATE_MODE_INVALID
= -EINVAL
,
86 typedef struct SSHInfo
{
88 char *private_key_path
;
92 typedef struct ShutdownInfo
{
97 static bool arg_quiet
= false;
98 static PagerFlags arg_pager_flags
= 0;
99 static char *arg_directory
= NULL
;
100 static char *arg_image
= NULL
;
101 static char *arg_machine
= NULL
;
102 static char *arg_cpus
= NULL
;
103 static uint64_t arg_ram
= UINT64_C(2) * U64_GB
;
104 static int arg_kvm
= -1;
105 static int arg_vsock
= -1;
106 static unsigned arg_vsock_cid
= VMADDR_CID_ANY
;
107 static int arg_tpm
= -1;
108 static char *arg_linux
= NULL
;
109 static char **arg_initrds
= NULL
;
110 static ConsoleMode arg_console_mode
= CONSOLE_INTERACTIVE
;
111 static NetworkStack arg_network_stack
= NETWORK_STACK_NONE
;
112 static int arg_secure_boot
= -1;
113 static MachineCredentialContext arg_credentials
= {};
114 static uid_t arg_uid_shift
= UID_INVALID
, arg_uid_range
= 0x10000U
;
115 static RuntimeMountContext arg_runtime_mounts
= {};
116 static SettingsMask arg_settings_mask
= 0;
117 static char *arg_firmware
= NULL
;
118 static char *arg_forward_journal
= NULL
;
119 static bool arg_privileged
= false;
120 static bool arg_register
= false;
121 static bool arg_keep_unit
= false;
122 static sd_id128_t arg_uuid
= {};
123 static char **arg_kernel_cmdline_extra
= NULL
;
124 static char **arg_extra_drives
= NULL
;
125 static char *arg_background
= NULL
;
126 static bool arg_pass_ssh_key
= true;
127 static char *arg_ssh_key_type
= NULL
;
128 static bool arg_discard_disk
= true;
129 struct ether_addr arg_network_provided_mac
= {};
130 static char **arg_smbios11
= NULL
;
131 static uint64_t arg_grow_image
= 0;
132 static char *arg_tpm_state_path
= NULL
;
133 static TpmStateMode arg_tpm_state_mode
= TPM_STATE_AUTO
;
135 STATIC_DESTRUCTOR_REGISTER(arg_directory
, freep
);
136 STATIC_DESTRUCTOR_REGISTER(arg_image
, freep
);
137 STATIC_DESTRUCTOR_REGISTER(arg_machine
, freep
);
138 STATIC_DESTRUCTOR_REGISTER(arg_cpus
, freep
);
139 STATIC_DESTRUCTOR_REGISTER(arg_credentials
, machine_credential_context_done
);
140 STATIC_DESTRUCTOR_REGISTER(arg_firmware
, freep
);
141 STATIC_DESTRUCTOR_REGISTER(arg_linux
, freep
);
142 STATIC_DESTRUCTOR_REGISTER(arg_initrds
, strv_freep
);
143 STATIC_DESTRUCTOR_REGISTER(arg_runtime_mounts
, runtime_mount_context_done
);
144 STATIC_DESTRUCTOR_REGISTER(arg_forward_journal
, freep
);
145 STATIC_DESTRUCTOR_REGISTER(arg_kernel_cmdline_extra
, strv_freep
);
146 STATIC_DESTRUCTOR_REGISTER(arg_extra_drives
, strv_freep
);
147 STATIC_DESTRUCTOR_REGISTER(arg_background
, freep
);
148 STATIC_DESTRUCTOR_REGISTER(arg_ssh_key_type
, freep
);
149 STATIC_DESTRUCTOR_REGISTER(arg_smbios11
, strv_freep
);
150 STATIC_DESTRUCTOR_REGISTER(arg_tpm_state_path
, freep
);
152 static int help(void) {
153 _cleanup_free_
char *link
= NULL
;
156 pager_open(arg_pager_flags
);
158 r
= terminal_urlify_man("systemd-vmspawn", "1", &link
);
162 printf("%1$s [OPTIONS...] [ARGUMENTS...]\n\n"
163 "%5$sSpawn a command or OS in a virtual machine.%6$s\n\n"
164 " -h --help Show this help\n"
165 " --version Print version string\n"
166 " -q --quiet Do not show status information\n"
167 " --no-pager Do not pipe output into a pager\n"
169 " -D --directory=PATH Root directory for the VM\n"
170 " -i --image=FILE|DEVICE Root file system disk image or device for the VM\n"
171 "\n%3$sHost Configuration:%4$s\n"
172 " --cpus=CPUS Configure number of CPUs in guest\n"
173 " --ram=BYTES Configure guest's RAM size\n"
174 " --kvm=BOOL Enable use of KVM\n"
175 " --vsock=BOOL Override autodetection of VSOCK support\n"
176 " --vsock-cid=CID Specify the CID to use for the guest's VSOCK support\n"
177 " --tpm=BOOL Enable use of a virtual TPM\n"
178 " --tpm-state=off|auto|PATH\n"
179 " Where to store TPM state\n"
180 " --linux=PATH Specify the linux kernel for direct kernel boot\n"
181 " --initrd=PATH Specify the initrd for direct kernel boot\n"
182 " -n --network-tap Create a TAP device for networking\n"
183 " --network-user-mode Use user mode networking\n"
184 " --secure-boot=BOOL Enable searching for firmware supporting SecureBoot\n"
185 " --firmware=PATH|list Select firmware definition file (or list available)\n"
186 " --discard-disk=BOOL Control processing of discard requests\n"
187 " -G --grow-image=BYTES Grow image file to specified size in bytes\n"
188 " -s --smbios11=STRING Pass an arbitrary SMBIOS Type #11 string to the VM\n"
189 "\n%3$sSystem Identity:%4$s\n"
190 " -M --machine=NAME Set the machine name for the VM\n"
191 " --uuid=UUID Set a specific machine UUID for the VM\n"
192 "\n%3$sProperties:%4$s\n"
193 " --register=BOOLEAN Register VM with systemd-machined\n"
194 " --keep-unit Don't let systemd-machined allocate scope unit for us\n"
195 "\n%3$sUser Namespacing:%4$s\n"
196 " --private-users=UIDBASE[:NUIDS]\n"
197 " Configure the UID/GID range to map into the\n"
198 " virtiofsd namespace\n"
199 "\n%3$sMounts:%4$s\n"
200 " --bind=SOURCE[:TARGET]\n"
201 " Mount a file or directory from the host into the VM\n"
202 " --bind-ro=SOURCE[:TARGET]\n"
203 " Mount a file or directory, but read-only\n"
204 " --extra-drive=PATH Adds an additional disk to the virtual machine\n"
205 "\n%3$sIntegration:%4$s\n"
206 " --forward-journal=FILE|DIR\n"
207 " Forward the VM's journal to the host\n"
208 " --pass-ssh-key=BOOL Create an SSH key to access the VM\n"
209 " --ssh-key-type=TYPE Choose what type of SSH key to pass\n"
210 "\n%3$sInput/Output:%4$s\n"
211 " --console=MODE Console mode (interactive, native, gui)\n"
212 " --background=COLOR Set ANSI color for background\n"
213 "\n%3$sCredentials:%4$s\n"
214 " --set-credential=ID:VALUE\n"
215 " Pass a credential with literal value to the VM\n"
216 " --load-credential=ID:PATH\n"
217 " Load credential for the VM from file or AF_UNIX\n"
219 "\nSee the %2$s for details.\n",
220 program_invocation_short_name
,
230 static int parse_environment(void) {
234 e
= getenv("SYSTEMD_VMSPAWN_NETWORK_MAC");
236 r
= parse_ether_addr(e
, &arg_network_provided_mac
);
238 return log_error_errno(r
, "Failed to parse provided MAC address via environment variable");
244 static int parse_argv(int argc
, char *argv
[]) {
257 ARG_NETWORK_USER_MODE
,
278 static const struct option options
[] = {
279 { "help", no_argument
, NULL
, 'h' },
280 { "version", no_argument
, NULL
, ARG_VERSION
},
281 { "quiet", no_argument
, NULL
, 'q' },
282 { "no-pager", no_argument
, NULL
, ARG_NO_PAGER
},
283 { "image", required_argument
, NULL
, 'i' },
284 { "directory", required_argument
, NULL
, 'D' },
285 { "machine", required_argument
, NULL
, 'M' },
286 { "cpus", required_argument
, NULL
, ARG_CPUS
},
287 { "qemu-smp", required_argument
, NULL
, ARG_CPUS
}, /* Compat alias */
288 { "ram", required_argument
, NULL
, ARG_RAM
},
289 { "qemu-mem", required_argument
, NULL
, ARG_RAM
}, /* Compat alias */
290 { "kvm", required_argument
, NULL
, ARG_KVM
},
291 { "qemu-kvm", required_argument
, NULL
, ARG_KVM
}, /* Compat alias */
292 { "vsock", required_argument
, NULL
, ARG_VSOCK
},
293 { "qemu-vsock", required_argument
, NULL
, ARG_VSOCK
}, /* Compat alias */
294 { "vsock-cid", required_argument
, NULL
, ARG_VSOCK_CID
},
295 { "tpm", required_argument
, NULL
, ARG_TPM
},
296 { "linux", required_argument
, NULL
, ARG_LINUX
},
297 { "initrd", required_argument
, NULL
, ARG_INITRD
},
298 { "console", required_argument
, NULL
, ARG_CONSOLE
},
299 { "qemu-gui", no_argument
, NULL
, ARG_QEMU_GUI
}, /* compat option */
300 { "network-tap", no_argument
, NULL
, 'n' },
301 { "network-user-mode", no_argument
, NULL
, ARG_NETWORK_USER_MODE
},
302 { "uuid", required_argument
, NULL
, ARG_UUID
},
303 { "register", required_argument
, NULL
, ARG_REGISTER
},
304 { "keep-unit", no_argument
, NULL
, ARG_KEEP_UNIT
},
305 { "bind", required_argument
, NULL
, ARG_BIND
},
306 { "bind-ro", required_argument
, NULL
, ARG_BIND_RO
},
307 { "extra-drive", required_argument
, NULL
, ARG_EXTRA_DRIVE
},
308 { "secure-boot", required_argument
, NULL
, ARG_SECURE_BOOT
},
309 { "private-users", required_argument
, NULL
, ARG_PRIVATE_USERS
},
310 { "forward-journal", required_argument
, NULL
, ARG_FORWARD_JOURNAL
},
311 { "pass-ssh-key", required_argument
, NULL
, ARG_PASS_SSH_KEY
},
312 { "ssh-key-type", required_argument
, NULL
, ARG_SSH_KEY_TYPE
},
313 { "set-credential", required_argument
, NULL
, ARG_SET_CREDENTIAL
},
314 { "load-credential", required_argument
, NULL
, ARG_LOAD_CREDENTIAL
},
315 { "firmware", required_argument
, NULL
, ARG_FIRMWARE
},
316 { "discard-disk", required_argument
, NULL
, ARG_DISCARD_DISK
},
317 { "background", required_argument
, NULL
, ARG_BACKGROUND
},
318 { "smbios11", required_argument
, NULL
, 's' },
319 { "grow-image", required_argument
, NULL
, 'G' },
320 { "tpm-state", required_argument
, NULL
, ARG_TPM_STATE
},
330 while ((c
= getopt_long(argc
, argv
, "+hD:i:M:nqs:G:", options
, NULL
)) >= 0)
343 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &arg_directory
);
347 arg_settings_mask
|= SETTING_DIRECTORY
;
351 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &arg_image
);
355 arg_settings_mask
|= SETTING_DIRECTORY
;
360 arg_machine
= mfree(arg_machine
);
362 if (!hostname_is_valid(optarg
, 0))
363 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
364 "Invalid machine name: %s", optarg
);
366 r
= free_and_strdup(&arg_machine
, optarg
);
373 arg_pager_flags
|= PAGER_DISABLE
;
377 r
= free_and_strdup_warn(&arg_cpus
, optarg
);
383 r
= parse_size(optarg
, 1024, &arg_ram
);
385 return log_error_errno(r
, "Failed to parse --ram=%s: %m", optarg
);
389 r
= parse_tristate(optarg
, &arg_kvm
);
391 return log_error_errno(r
, "Failed to parse --kvm=%s: %m", optarg
);
395 r
= parse_tristate(optarg
, &arg_vsock
);
397 return log_error_errno(r
, "Failed to parse --vsock=%s: %m", optarg
);
402 arg_vsock_cid
= VMADDR_CID_ANY
;
406 r
= vsock_parse_cid(optarg
, &cid
);
408 return log_error_errno(r
, "Failed to parse --vsock-cid: %s", optarg
);
409 if (!VSOCK_CID_IS_REGULAR(cid
))
410 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "Specified CID is not regular, refusing: %u", cid
);
417 r
= parse_tristate(optarg
, &arg_tpm
);
419 return log_error_errno(r
, "Failed to parse --tpm=%s: %m", optarg
);
423 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &arg_linux
);
429 _cleanup_free_
char *initrd_path
= NULL
;
430 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &initrd_path
);
434 r
= strv_consume(&arg_initrds
, TAKE_PTR(initrd_path
));
442 arg_console_mode
= console_mode_from_string(optarg
);
443 if (arg_console_mode
< 0)
444 return log_error_errno(arg_console_mode
, "Failed to parse specified console mode: %s", optarg
);
449 arg_console_mode
= CONSOLE_GUI
;
453 arg_network_stack
= NETWORK_STACK_TAP
;
456 case ARG_NETWORK_USER_MODE
:
457 arg_network_stack
= NETWORK_STACK_USER
;
461 r
= id128_from_string_nonzero(optarg
, &arg_uuid
);
463 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "Machine UUID may not be all zeroes.");
465 return log_error_errno(r
, "Invalid UUID: %s", optarg
);
467 arg_settings_mask
|= SETTING_MACHINE_ID
;
471 r
= parse_boolean_argument("--register=", optarg
, &arg_register
);
478 arg_keep_unit
= true;
483 r
= runtime_mount_parse(&arg_runtime_mounts
, optarg
, c
== ARG_BIND_RO
);
485 return log_error_errno(r
, "Failed to parse --bind(-ro)= argument %s: %m", optarg
);
487 arg_settings_mask
|= SETTING_BIND_MOUNTS
;
490 case ARG_EXTRA_DRIVE
: {
491 _cleanup_free_
char *drive_path
= NULL
;
493 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &drive_path
);
497 r
= strv_consume(&arg_extra_drives
, TAKE_PTR(drive_path
));
503 case ARG_SECURE_BOOT
:
504 r
= parse_tristate(optarg
, &arg_secure_boot
);
506 return log_error_errno(r
, "Failed to parse --secure-boot=%s: %m", optarg
);
509 case ARG_PRIVATE_USERS
:
510 r
= parse_userns_uid_range(optarg
, &arg_uid_shift
, &arg_uid_range
);
515 case ARG_FORWARD_JOURNAL
:
516 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &arg_forward_journal
);
521 case ARG_PASS_SSH_KEY
:
522 r
= parse_boolean_argument("--pass-ssh-key=", optarg
, &arg_pass_ssh_key
);
527 case ARG_SSH_KEY_TYPE
:
528 if (!string_is_safe(optarg
))
529 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "Invalid value for --arg-ssh-key-type=: %s", optarg
);
531 r
= free_and_strdup_warn(&arg_ssh_key_type
, optarg
);
536 case ARG_SET_CREDENTIAL
: {
537 r
= machine_credential_set(&arg_credentials
, optarg
);
540 arg_settings_mask
|= SETTING_CREDENTIALS
;
544 case ARG_LOAD_CREDENTIAL
: {
545 r
= machine_credential_load(&arg_credentials
, optarg
);
549 arg_settings_mask
|= SETTING_CREDENTIALS
;
554 if (streq(optarg
, "list")) {
555 _cleanup_strv_free_
char **l
= NULL
;
557 r
= list_ovmf_config(&l
);
559 return log_error_errno(r
, "Failed to list firmwares: %m");
562 fputstrv(stdout
, l
, "\n", &nl
);
569 if (!isempty(optarg
) && !path_is_absolute(optarg
) && !startswith(optarg
, "./"))
570 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "Absolute path or path starting with './' required.");
572 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &arg_firmware
);
578 case ARG_DISCARD_DISK
:
579 r
= parse_boolean_argument("--discard-disk=", optarg
, &arg_discard_disk
);
585 r
= free_and_strdup_warn(&arg_background
, optarg
);
591 if (isempty(optarg
)) {
592 arg_smbios11
= strv_free(arg_smbios11
);
596 if (!utf8_is_valid(optarg
))
597 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "SMBIOS Type 11 string is not UTF-8 clean, refusing: %s", optarg
);
599 if (strv_extend(&arg_smbios11
, optarg
) < 0)
605 if (isempty(optarg
)) {
610 r
= parse_size(optarg
, 1024, &arg_grow_image
);
612 return log_error_errno(r
, "Failed to parse --grow-image= parameter: %s", optarg
);
617 if (path_is_absolute(optarg
) && path_is_valid(optarg
)) {
618 r
= parse_path_argument(optarg
, /* suppress_root= */ false, &arg_tpm_state_path
);
622 arg_tpm_state_mode
= TPM_STATE_PATH
;
626 r
= isempty(optarg
) ? false :
627 streq(optarg
, "auto") ? true :
628 parse_boolean(optarg
);
630 return log_error_errno(r
, "Failed to parse --tpm-state= parameter: %s", optarg
);
632 arg_tpm_state_mode
= r
? TPM_STATE_AUTO
: TPM_STATE_OFF
;
633 arg_tpm_state_path
= mfree(arg_tpm_state_path
);
640 assert_not_reached();
644 arg_kernel_cmdline_extra
= strv_copy(argv
+ optind
);
645 if (!arg_kernel_cmdline_extra
)
648 arg_settings_mask
|= SETTING_START_MODE
;
654 static int open_vsock(void) {
655 static const union sockaddr_union bind_addr
= {
656 .vm
.svm_family
= AF_VSOCK
,
657 .vm
.svm_cid
= VMADDR_CID_ANY
,
658 .vm
.svm_port
= VMADDR_PORT_ANY
,
661 _cleanup_close_
int vsock_fd
= socket(AF_VSOCK
, SOCK_STREAM
|SOCK_CLOEXEC
, 0);
663 return log_error_errno(errno
, "Failed to open AF_VSOCK socket: %m");
665 if (bind(vsock_fd
, &bind_addr
.sa
, sizeof(bind_addr
.vm
)) < 0)
666 return log_error_errno(errno
, "Failed to bind to VSOCK address %u:%u: %m", bind_addr
.vm
.svm_cid
, bind_addr
.vm
.svm_port
);
668 if (listen(vsock_fd
, SOMAXCONN_DELUXE
) < 0)
669 return log_error_errno(errno
, "Failed to listen on VSOCK: %m");
671 return TAKE_FD(vsock_fd
);
674 typedef struct NotifyConnectionData
{
675 char buffer
[NOTIFY_BUFFER_MAX
+1];
678 } NotifyConnectionData
;
680 static int read_vsock_notify(NotifyConnectionData
*d
, int fd
) {
687 assert(d
->full
< sizeof(d
->buffer
));
689 ssize_t n
= read(fd
, d
->buffer
+ d
->full
, sizeof(d
->buffer
) - d
->full
);
691 if (ERRNO_IS_TRANSIENT(errno
))
694 return log_error_errno(errno
, "Failed to read notification message: %m");
696 if (n
== 0) /* We hit EOF! Let's parse this */
699 if ((size_t) n
>= sizeof(d
->buffer
) - d
->full
)
700 return log_error_errno(SYNTHETIC_ERRNO(EBADMSG
), "Received notify message exceeded maximum size.");
705 /* We reached EOF, now parse the thing */
706 assert(d
->full
< sizeof(d
->buffer
));
707 d
->buffer
[d
->full
] = 0;
709 _cleanup_strv_free_
char **tags
= strv_split(d
->buffer
, "\n\r");
714 _cleanup_free_
char *j
= strv_join(tags
, " ");
715 log_debug("Received notification message with tags: %s", strnull(j
));
718 if (strv_contains(tags
, "READY=1")) {
719 r
= sd_notify(false, "READY=1");
721 log_warning_errno(r
, "Failed to send readiness notification, ignoring: %m");
724 const char *p
= strv_find_startswith(tags
, "STATUS=");
726 (void) sd_notifyf(false, "STATUS=VM running: %s", p
);
728 p
= strv_find_startswith(tags
, "EXIT_STATUS=");
730 r
= safe_atoi(p
, d
->exit_status
);
732 log_warning_errno(r
, "Failed to parse exit status from %s, ignoring: %m", p
);
738 static int vmspawn_dispatch_notify_fd(sd_event_source
*source
, int fd
, uint32_t revents
, void *userdata
) {
739 NotifyConnectionData
*d
= ASSERT_PTR(userdata
);
745 r
= read_vsock_notify(d
, fd
);
749 /* If we are done or are seeing an error we'll turn off floating mode, which means the event
750 * loop itself won't keep the event source pinned anymore, and since no one else (hopefully!)
751 * keeps a reference anymore the whole thing will be released once we exit from this handler
754 q
= sd_event_source_set_floating(source
, false);
756 log_warning_errno(q
, "Failed to disable floating mode of event source, ignoring: %m");
764 static int vmspawn_dispatch_vsock_connections(sd_event_source
*source
, int fd
, uint32_t revents
, void *userdata
) {
765 _cleanup_close_
int conn_fd
= -EBADF
;
771 if (revents
!= EPOLLIN
) {
772 log_warning("Got unexpected poll event for VSOCK fd.");
776 conn_fd
= accept4(fd
, NULL
, NULL
, SOCK_CLOEXEC
|SOCK_NONBLOCK
);
778 if (ERRNO_IS_TRANSIENT(errno
))
781 log_warning_errno(errno
, "Failed to accept connection from VSOCK connection, ignoring: %m");
785 event
= sd_event_source_get_event(source
);
787 return log_error_errno(SYNTHETIC_ERRNO(ENOENT
), "Failed to retrieve event from event source, exiting task");
789 _cleanup_free_ NotifyConnectionData
*d
= new(NotifyConnectionData
, 1);
793 *d
= (NotifyConnectionData
) {
794 .exit_status
= userdata
,
797 /* add a new floating task to read from the connection */
798 _cleanup_(sd_event_source_unrefp
) sd_event_source
*s
= NULL
;
799 r
= sd_event_add_io(event
, &s
, conn_fd
, EPOLLIN
, vmspawn_dispatch_notify_fd
, d
);
801 return log_error_errno(r
, "Failed to allocate notify connection event source: %m");
803 r
= sd_event_source_set_io_fd_own(s
, true);
805 return log_error_errno(r
, "Failed to pass ownership of notify to event source: %m");
806 TAKE_FD(conn_fd
); /* conn_fd is now owned by the event loop so don't clean it up */
808 r
= sd_event_source_set_destroy_callback(s
, free
);
810 return log_error_errno(r
, "Failed to set destroy callback on event source: %m");
811 TAKE_PTR(d
); /* The data object will now automatically be freed by the event source when it goes away */
813 /* Finally, make sure the event loop pins the event source */
814 r
= sd_event_source_set_floating(s
, true);
816 return log_error_errno(r
, "Failed to set event source to floating mode: %m");
818 (void) sd_event_source_set_description(s
, "vmspawn-notify-socket-connection");
823 static int setup_notify_parent(sd_event
*event
, int fd
, int *exit_status
, sd_event_source
**ret_notify_event_source
) {
829 assert(ret_notify_event_source
);
831 r
= sd_event_add_io(event
, ret_notify_event_source
, fd
, EPOLLIN
, vmspawn_dispatch_vsock_connections
, exit_status
);
833 return log_error_errno(r
, "Failed to allocate notify socket event source: %m");
835 (void) sd_event_source_set_description(*ret_notify_event_source
, "vmspawn-notify-socket-listen");
840 static int bus_open_in_machine(sd_bus
**ret
, unsigned cid
, unsigned port
, const char *private_key_path
) {
841 _cleanup_(sd_bus_flush_close_unrefp
) sd_bus
*bus
= NULL
;
842 _cleanup_free_
char *ssh_escaped
= NULL
, *bus_address
= NULL
;
843 char port_str
[DECIMAL_STR_MAX(unsigned)], cid_str
[DECIMAL_STR_MAX(unsigned)];
847 assert(private_key_path
);
849 r
= sd_bus_new(&bus
);
853 const char *ssh
= secure_getenv("SYSTEMD_SSH") ?: "ssh";
854 ssh_escaped
= bus_address_escape(ssh
);
858 xsprintf(port_str
, "%u", port
);
859 xsprintf(cid_str
, "%u", cid
);
861 bus_address
= strjoin(
862 "unixexec:path=", ssh_escaped
,
863 /* -x: Disable X11 forwarding
864 * -T: Disable PTY allocation */
866 ",argv2=-o,argv3=IdentitiesOnly yes",
867 ",argv4=-o,argv5=IdentityFile=", private_key_path
,
868 ",argv6=-p,argv7=", port_str
,
870 ",argv9=root@vsock/", cid_str
,
871 ",argv10=systemd-stdio-bridge"
876 free_and_replace(bus
->address
, bus_address
);
877 bus
->bus_client
= true;
879 bus
->runtime_scope
= RUNTIME_SCOPE_SYSTEM
;
880 bus
->is_local
= false;
882 r
= sd_bus_start(bus
);
886 *ret
= TAKE_PTR(bus
);
890 static int shutdown_vm_graceful(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
891 _cleanup_(sd_bus_flush_close_unrefp
) sd_bus
*bus
= NULL
;
892 _cleanup_(sd_bus_error_free
) sd_bus_error error
= SD_BUS_ERROR_NULL
;
893 ShutdownInfo
*shutdown_info
= ASSERT_PTR(userdata
);
894 SSHInfo
*ssh_info
= ASSERT_PTR(shutdown_info
->ssh_info
);
900 /* If we don't have the vsock address and the SSH key, go to fallback */
901 if (ssh_info
->cid
== VMADDR_CID_ANY
|| !ssh_info
->private_key_path
)
905 * In order we should try:
906 * 1. PowerOff from logind respects inhibitors but might not be available
907 * 2. PowerOff from systemd heavy handed but should always be available
908 * 3. SIGKILL qemu (this waits for qemu to die still)
909 * 4. kill ourselves by shutting down our event loop (this does not wait for qemu)
912 r
= bus_open_in_machine(&bus
, ssh_info
->cid
, ssh_info
->port
, ssh_info
->private_key_path
);
914 log_warning_errno(r
, "Failed to connect to VM to forward signal, ignoring: %m");
918 r
= bus_call_method(bus
, bus_login_mgr
, "PowerOff", &error
, /* ret_reply= */ NULL
, "b", false);
920 log_info("Requested powering off VM through D-Bus.");
924 log_warning_errno(r
, "Failed to shutdown VM via logind, ignoring: %s", bus_error_message(&error
, r
));
925 sd_bus_error_free(&error
);
927 r
= bus_call_method(bus
, bus_systemd_mgr
, "PowerOff", &error
, /* ret_reply= */ NULL
, /* types= */ NULL
);
929 log_info("Requested powering off VM through D-Bus.");
933 log_warning_errno(r
, "Failed to shutdown VM via systemd, ignoring: %s", bus_error_message(&error
, r
));
936 /* at this point SSH clearly isn't working so don't try it again */
937 TAKE_STRUCT(*ssh_info
);
939 /* Backup method to shut down the VM when D-BUS access over SSH is not available */
940 if (shutdown_info
->pidref
) {
941 r
= pidref_kill(shutdown_info
->pidref
, SIGKILL
);
943 log_warning_errno(r
, "Failed to kill qemu, terminating: %m");
945 TAKE_PTR(shutdown_info
->pidref
);
946 log_info("Trying to halt qemu. Send SIGTERM again to trigger vmspawn to immediately terminate.");
951 return sd_event_exit(sd_event_source_get_event(s
), 0);
954 static int on_child_exit(sd_event_source
*s
, const siginfo_t
*si
, void *userdata
) {
955 sd_event_exit(sd_event_source_get_event(s
), 0);
959 static int cmdline_add_vsock(char ***cmdline
, int vsock_fd
) {
962 r
= strv_extend(cmdline
, "-smbios");
966 union sockaddr_union addr
;
967 socklen_t addr_len
= sizeof addr
.vm
;
968 if (getsockname(vsock_fd
, &addr
.sa
, &addr_len
) < 0)
971 assert(addr_len
>= sizeof addr
.vm
);
972 assert(addr
.vm
.svm_family
== AF_VSOCK
);
974 r
= strv_extendf(cmdline
, "type=11,value=io.systemd.credential:vmm.notify_socket=vsock-stream:%u:%u", (unsigned) VMADDR_CID_HOST
, addr
.vm
.svm_port
);
981 static int cmdline_add_kernel_cmdline(char ***cmdline
, const char *kernel
) {
984 if (strv_isempty(arg_kernel_cmdline_extra
))
987 _cleanup_free_
char *kcl
= strv_join(arg_kernel_cmdline_extra
, " ");
992 if (strv_extend_many(cmdline
, "-append", kcl
) < 0)
995 if (!ARCHITECTURE_SUPPORTS_SMBIOS
) {
996 log_warning("Cannot append extra args to kernel cmdline, native architecture doesn't support SMBIOS, ignoring.");
1000 _cleanup_free_
char *escaped_kcl
= NULL
;
1001 escaped_kcl
= escape_qemu_value(kcl
);
1005 if (strv_extend(cmdline
, "-smbios") < 0)
1008 if (strv_extendf(cmdline
, "type=11,value=io.systemd.stub.kernel-cmdline-extra=%s", escaped_kcl
) < 0)
1011 if (strv_extend(cmdline
, "-smbios") < 0)
1014 if (strv_extendf(cmdline
, "type=11,value=io.systemd.boot.kernel-cmdline-extra=%s", escaped_kcl
) < 0)
1021 static int cmdline_add_smbios11(char ***cmdline
) {
1024 if (strv_isempty(arg_smbios11
))
1027 if (!ARCHITECTURE_SUPPORTS_SMBIOS
) {
1028 log_warning("Cannot issue SMBIOS Type #11 strings, native architecture doesn't support SMBIOS, ignoring.");
1032 STRV_FOREACH(i
, arg_smbios11
) {
1033 _cleanup_free_
char *escaped
= NULL
;
1034 escaped
= escape_qemu_value(*i
);
1038 if (strv_extend(cmdline
, "-smbios") < 0)
1041 if (strv_extendf(cmdline
, "type=11,value=%s", escaped
) < 0)
1048 static int start_tpm(
1052 const char *runtime_dir
,
1053 char **ret_listen_address
) {
1060 assert(runtime_dir
);
1062 _cleanup_free_
char *scope_prefix
= NULL
;
1063 r
= unit_name_to_prefix(scope
, &scope_prefix
);
1065 return log_error_errno(r
, "Failed to strip .scope suffix from scope: %m");
1067 _cleanup_(socket_service_pair_done
) SocketServicePair ssp
= {
1068 .socket_type
= SOCK_STREAM
,
1071 ssp
.unit_name_prefix
= strjoin(scope_prefix
, "-tpm");
1072 if (!ssp
.unit_name_prefix
)
1075 ssp
.listen_address
= path_join(runtime_dir
, "tpm.sock");
1076 if (!ssp
.listen_address
)
1079 _cleanup_free_
char *transient_state_dir
= NULL
;
1080 const char *state_dir
;
1081 if (arg_tpm_state_path
)
1082 state_dir
= arg_tpm_state_path
;
1084 transient_state_dir
= path_join(runtime_dir
, ssp
.unit_name_prefix
);
1085 if (!transient_state_dir
)
1088 state_dir
= transient_state_dir
;
1091 r
= mkdir_p(state_dir
, 0700);
1093 return log_error_errno(r
, "Failed to create TPM state directory '%s': %m", state_dir
);
1095 _cleanup_free_
char *swtpm_setup
= NULL
;
1096 r
= find_executable("swtpm_setup", &swtpm_setup
);
1098 return log_error_errno(r
, "Failed to find swtpm_setup binary: %m");
1100 ssp
.exec_start_pre
= strv_new(swtpm_setup
, "--tpm-state", state_dir
, "--tpm2", "--pcr-banks", "sha256", "--not-overwrite");
1101 if (!ssp
.exec_start_pre
)
1104 ssp
.exec_start
= strv_new(swtpm
, "socket", "--tpm2", "--tpmstate");
1105 if (!ssp
.exec_start
)
1108 r
= strv_extendf(&ssp
.exec_start
, "dir=%s", state_dir
);
1112 r
= strv_extend_many(&ssp
.exec_start
, "--ctrl", "type=unixio,fd=3");
1116 r
= start_socket_service_pair(bus
, scope
, &ssp
);
1120 if (ret_listen_address
)
1121 *ret_listen_address
= TAKE_PTR(ssp
.listen_address
);
1126 static int start_systemd_journal_remote(
1130 const char *sd_journal_remote
,
1131 char **ret_listen_address
) {
1137 assert(sd_journal_remote
);
1139 _cleanup_free_
char *scope_prefix
= NULL
;
1140 r
= unit_name_to_prefix(scope
, &scope_prefix
);
1142 return log_error_errno(r
, "Failed to strip .scope suffix from scope: %m");
1144 _cleanup_(socket_service_pair_done
) SocketServicePair ssp
= {
1145 .socket_type
= SOCK_STREAM
,
1148 ssp
.unit_name_prefix
= strjoin(scope_prefix
, "-forward-journal");
1149 if (!ssp
.unit_name_prefix
)
1152 if (asprintf(&ssp
.listen_address
, "vsock:2:%u", port
) < 0)
1155 ssp
.exec_start
= strv_new(
1157 "--output", arg_forward_journal
,
1158 "--split-mode", endswith(arg_forward_journal
, ".journal") ? "none" : "host");
1159 if (!ssp
.exec_start
)
1162 r
= start_socket_service_pair(bus
, scope
, &ssp
);
1166 if (ret_listen_address
)
1167 *ret_listen_address
= TAKE_PTR(ssp
.listen_address
);
1172 static int discover_root(char **ret
) {
1174 _cleanup_(dissected_image_unrefp
) DissectedImage
*image
= NULL
;
1175 _cleanup_free_
char *root
= NULL
;
1179 r
= dissect_image_file_and_warn(
1182 /* mount_options= */ NULL
,
1183 /* image_policy= */ NULL
,
1184 /* image_filter= */ NULL
,
1190 if (image
->partitions
[PARTITION_ROOT
].found
)
1191 root
= strjoin("root=PARTUUID=", SD_ID128_TO_UUID_STRING(image
->partitions
[PARTITION_ROOT
].uuid
));
1192 else if (image
->partitions
[PARTITION_USR
].found
)
1193 root
= strjoin("mount.usr=PARTUUID=", SD_ID128_TO_UUID_STRING(image
->partitions
[PARTITION_USR
].uuid
));
1195 return log_error_errno(SYNTHETIC_ERRNO(ENOENT
), "Cannot perform a direct kernel boot without a root or usr partition, refusing");
1200 *ret
= TAKE_PTR(root
);
1204 static int find_virtiofsd(char **ret
) {
1206 _cleanup_free_
char *virtiofsd
= NULL
;
1210 r
= find_executable("virtiofsd", &virtiofsd
);
1211 if (r
< 0 && r
!= -ENOENT
)
1212 return log_error_errno(r
, "Error while searching for virtiofsd: %m");
1215 FOREACH_STRING(file
, "/usr/libexec/virtiofsd", "/usr/lib/virtiofsd") {
1216 if (access(file
, X_OK
) >= 0) {
1217 virtiofsd
= strdup(file
);
1223 if (!IN_SET(errno
, ENOENT
, EACCES
))
1224 return log_error_errno(errno
, "Error while searching for virtiofsd: %m");
1229 return log_error_errno(SYNTHETIC_ERRNO(ENOENT
), "Failed to find virtiofsd binary.");
1231 *ret
= TAKE_PTR(virtiofsd
);
1235 static int start_virtiofsd(
1238 const char *directory
,
1240 const char *runtime_dir
,
1241 char **ret_listen_address
) {
1243 static unsigned virtiofsd_instance
= 0;
1249 assert(runtime_dir
);
1251 _cleanup_free_
char *virtiofsd
= NULL
;
1252 r
= find_virtiofsd(&virtiofsd
);
1256 _cleanup_free_
char *scope_prefix
= NULL
;
1257 r
= unit_name_to_prefix(scope
, &scope_prefix
);
1259 return log_error_errno(r
, "Failed to strip .scope suffix from scope: %m");
1261 _cleanup_(socket_service_pair_done
) SocketServicePair ssp
= {
1262 .socket_type
= SOCK_STREAM
,
1265 if (asprintf(&ssp
.unit_name_prefix
, "%s-virtiofsd-%u", scope_prefix
, virtiofsd_instance
++) < 0)
1268 if (asprintf(&ssp
.listen_address
, "%s/sock-%"PRIx64
, runtime_dir
, random_u64()) < 0)
1271 /* QEMU doesn't support submounts so don't announce them */
1272 ssp
.exec_start
= strv_new(virtiofsd
, "--shared-dir", directory
, "--xattr", "--fd", "3", "--no-announce-submounts");
1273 if (!ssp
.exec_start
)
1276 if (uidmap
&& arg_uid_shift
!= UID_INVALID
) {
1277 r
= strv_extend(&ssp
.exec_start
, "--uid-map");
1281 r
= strv_extendf(&ssp
.exec_start
, ":0:" UID_FMT
":" UID_FMT
":", arg_uid_shift
, arg_uid_range
);
1285 r
= strv_extend(&ssp
.exec_start
, "--gid-map");
1289 r
= strv_extendf(&ssp
.exec_start
, ":0:" GID_FMT
":" GID_FMT
":", arg_uid_shift
, arg_uid_range
);
1294 r
= start_socket_service_pair(bus
, scope
, &ssp
);
1298 if (ret_listen_address
)
1299 *ret_listen_address
= TAKE_PTR(ssp
.listen_address
);
1304 static int kernel_cmdline_maybe_append_root(void) {
1306 bool cmdline_contains_root
= strv_find_startswith(arg_kernel_cmdline_extra
, "root=")
1307 || strv_find_startswith(arg_kernel_cmdline_extra
, "mount.usr=");
1309 if (!cmdline_contains_root
) {
1310 _cleanup_free_
char *root
= NULL
;
1312 r
= discover_root(&root
);
1316 log_debug("Determined root file system %s from dissected image", root
);
1318 r
= strv_consume(&arg_kernel_cmdline_extra
, TAKE_PTR(root
));
1326 static int discover_boot_entry(const char *root
, char **ret_linux
, char ***ret_initrds
) {
1327 _cleanup_(boot_config_free
) BootConfig config
= BOOT_CONFIG_NULL
;
1328 _cleanup_free_
char *esp_path
= NULL
, *xbootldr_path
= NULL
;
1333 assert(ret_initrds
);
1335 esp_path
= path_join(root
, "efi");
1339 xbootldr_path
= path_join(root
, "boot");
1343 r
= boot_config_load(&config
, esp_path
, xbootldr_path
);
1347 r
= boot_config_select_special_entries(&config
, /* skip_efivars= */ true);
1349 return log_error_errno(r
, "Failed to find special boot config entries: %m");
1351 const BootEntry
*boot_entry
= boot_config_default_entry(&config
);
1353 if (boot_entry
&& !IN_SET(boot_entry
->type
, BOOT_ENTRY_UNIFIED
, BOOT_ENTRY_CONF
))
1356 /* If we cannot determine a default entry search for UKIs (Type #2 EFI Unified Kernel Images)
1357 * then .conf files (Type #1 Boot Loader Specification Entries).
1358 * https://uapi-group.org/specifications/specs/boot_loader_specification */
1360 FOREACH_ARRAY(entry
, config
.entries
, config
.n_entries
)
1361 if (entry
->type
== BOOT_ENTRY_UNIFIED
) {
1367 FOREACH_ARRAY(entry
, config
.entries
, config
.n_entries
)
1368 if (entry
->type
== BOOT_ENTRY_CONF
) {
1374 return log_error_errno(SYNTHETIC_ERRNO(ENOENT
), "Failed to discover any boot entries.");
1376 log_debug("Discovered boot entry %s (%s)", boot_entry
->id
, boot_entry_type_to_string(boot_entry
->type
));
1378 _cleanup_free_
char *linux_kernel
= NULL
;
1379 _cleanup_strv_free_
char **initrds
= NULL
;
1380 if (boot_entry
->type
== BOOT_ENTRY_UNIFIED
) {
1381 linux_kernel
= path_join(boot_entry
->root
, boot_entry
->kernel
);
1384 } else if (boot_entry
->type
== BOOT_ENTRY_CONF
) {
1385 linux_kernel
= path_join(boot_entry
->root
, boot_entry
->kernel
);
1389 STRV_FOREACH(initrd
, boot_entry
->initrd
) {
1390 _cleanup_free_
char *initrd_path
= path_join(boot_entry
->root
, *initrd
);
1394 r
= strv_consume(&initrds
, TAKE_PTR(initrd_path
));
1399 assert_not_reached();
1401 *ret_linux
= TAKE_PTR(linux_kernel
);
1402 *ret_initrds
= TAKE_PTR(initrds
);
1407 static int merge_initrds(char **ret
) {
1408 _cleanup_(rm_rf_physical_and_freep
) char *merged_initrd
= NULL
;
1409 _cleanup_close_
int ofd
= -EBADF
;
1414 r
= tempfn_random_child(NULL
, "vmspawn-initrd-", &merged_initrd
);
1416 return log_error_errno(r
, "Failed to create temporary file: %m");
1418 ofd
= open(merged_initrd
, O_WRONLY
|O_CREAT
|O_EXCL
|O_CLOEXEC
, 0600);
1420 return log_error_errno(errno
, "Failed to create regular file %s: %m", merged_initrd
);
1422 STRV_FOREACH(i
, arg_initrds
) {
1423 _cleanup_close_
int ifd
= -EBADF
;
1426 off
= lseek(ofd
, 0, SEEK_CUR
);
1428 return log_error_errno(errno
, "Failed to get file offset of %s: %m", merged_initrd
);
1430 to_seek
= (4 - (off
% 4)) % 4;
1432 /* seek to assure 4 byte alignment for each initrd */
1433 if (to_seek
!= 0 && lseek(ofd
, to_seek
, SEEK_CUR
) < 0)
1434 return log_error_errno(errno
, "Failed to seek %s: %m", merged_initrd
);
1436 ifd
= open(*i
, O_RDONLY
|O_CLOEXEC
);
1438 return log_error_errno(errno
, "Failed to open %s: %m", *i
);
1440 r
= copy_bytes(ifd
, ofd
, UINT64_MAX
, COPY_REFLINK
);
1442 return log_error_errno(r
, "Failed to copy bytes from %s to %s: %m", *i
, merged_initrd
);
1445 *ret
= TAKE_PTR(merged_initrd
);
1449 static int generate_ssh_keypair(const char *key_path
, const char *key_type
) {
1450 _cleanup_free_
char *ssh_keygen
= NULL
;
1451 _cleanup_strv_free_
char **cmdline
= NULL
;
1456 r
= find_executable("ssh-keygen", &ssh_keygen
);
1458 return log_error_errno(r
, "Failed to find ssh-keygen: %m");
1460 cmdline
= strv_new(ssh_keygen
, "-f", key_path
, /* don't encrypt the key */ "-N", "");
1465 r
= strv_extend_many(&cmdline
, "-t", key_type
);
1470 if (DEBUG_LOGGING
) {
1471 _cleanup_free_
char *joined
= quote_command_line(cmdline
, SHELL_ESCAPE_EMPTY
);
1475 log_debug("Executing: %s", joined
);
1480 FORK_WAIT
|FORK_RESET_SIGNALS
|FORK_CLOSE_ALL_FDS
|FORK_DEATHSIG_SIGTERM
|FORK_LOG
|FORK_RLIMIT_NOFILE_SAFE
|FORK_REARRANGE_STDIO
,
1485 execv(ssh_keygen
, cmdline
);
1486 log_error_errno(errno
, "Failed to execve %s: %m", ssh_keygen
);
1487 _exit(EXIT_FAILURE
);
1493 static int grow_image(const char *path
, uint64_t size
) {
1501 /* Round up to multiple of 4K */
1502 size
= DIV_ROUND_UP(size
, 4096);
1503 if (size
> UINT64_MAX
/ 4096)
1504 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "Specified file size too large, refusing.");
1507 _cleanup_close_
int fd
= xopenat_full(AT_FDCWD
, path
, O_RDWR
|O_CLOEXEC
, XO_REGULAR
, /* mode= */ 0);
1509 return log_error_errno(fd
, "Failed to open image file '%s': %m", path
);
1512 if (fstat(fd
, &st
) < 0)
1513 return log_error_errno(errno
, "Failed to stat '%s': %m", path
);
1514 if ((uint64_t) st
.st_size
>= size
) {
1515 log_debug("Not growing image '%s' to %s, size already at %s.", path
,
1516 FORMAT_BYTES(size
), FORMAT_BYTES(st
.st_size
));
1520 if (ftruncate(fd
, size
) < 0)
1521 return log_error_errno(errno
, "Failed grow image file '%s' from %s to %s: %m", path
,
1522 FORMAT_BYTES(st
.st_size
), FORMAT_BYTES(size
));
1526 return log_error_errno(r
, "Failed to sync image file '%s' after growing to %s: %m", path
, FORMAT_BYTES(size
));
1529 log_info("Image file '%s' successfully grown from %s to %s.", path
, FORMAT_BYTES(st
.st_size
), FORMAT_BYTES(size
));
1534 static int run_virtual_machine(int kvm_device_fd
, int vhost_device_fd
) {
1535 _cleanup_(ovmf_config_freep
) OvmfConfig
*ovmf_config
= NULL
;
1536 _cleanup_(sd_bus_flush_close_unrefp
) sd_bus
*bus
= NULL
;
1537 _cleanup_free_
char *machine
= NULL
, *qemu_binary
= NULL
, *mem
= NULL
, *trans_scope
= NULL
, *kernel
= NULL
;
1538 _cleanup_(rm_rf_physical_and_freep
) char *ssh_private_key_path
= NULL
, *ssh_public_key_path
= NULL
;
1539 _cleanup_close_
int notify_sock_fd
= -EBADF
;
1540 _cleanup_strv_free_
char **cmdline
= NULL
;
1541 _cleanup_free_
int *pass_fds
= NULL
;
1542 size_t n_pass_fds
= 0;
1543 const char *accel
, *shm
;
1547 r
= sd_bus_default_system(&bus
);
1549 r
= sd_bus_default_user(&bus
);
1551 return log_error_errno(r
, "Failed to connect to systemd bus: %m");
1553 r
= start_transient_scope(bus
, arg_machine
, /* allow_pidfd= */ true, &trans_scope
);
1557 bool use_kvm
= arg_kvm
> 0;
1559 r
= qemu_check_kvm_support();
1561 return log_error_errno(r
, "Failed to check for KVM support: %m");
1566 r
= load_ovmf_config(arg_firmware
, &ovmf_config
);
1568 r
= find_ovmf_config(arg_secure_boot
, &ovmf_config
);
1570 return log_error_errno(r
, "Failed to find OVMF config: %m");
1572 /* only warn if the user hasn't disabled secureboot */
1573 if (!ovmf_config
->supports_sb
&& arg_secure_boot
)
1574 log_warning("Couldn't find OVMF firmware blob with Secure Boot support, "
1575 "falling back to OVMF firmware blobs without Secure Boot support.");
1577 shm
= arg_directory
|| arg_runtime_mounts
.n_mounts
!= 0 ? ",memory-backend=mem" : "";
1578 if (ARCHITECTURE_SUPPORTS_SMM
)
1579 machine
= strjoin("type=" QEMU_MACHINE_TYPE
",smm=", on_off(ovmf_config
->supports_sb
), shm
);
1581 machine
= strjoin("type=" QEMU_MACHINE_TYPE
, shm
);
1586 kernel
= strdup(arg_linux
);
1589 } else if (arg_directory
) {
1590 /* a kernel is required for directory type images so attempt to locate a UKI under /boot and /efi */
1591 r
= discover_boot_entry(arg_directory
, &kernel
, &arg_initrds
);
1593 return log_error_errno(r
, "Failed to locate UKI in directory type image, please specify one with --linux=.");
1595 log_debug("Discovered UKI image at %s", kernel
);
1598 r
= find_qemu_binary(&qemu_binary
);
1599 if (r
== -EOPNOTSUPP
)
1600 return log_error_errno(r
, "Native architecture is not supported by qemu.");
1602 return log_error_errno(r
, "Failed to find QEMU binary: %m");
1604 if (asprintf(&mem
, "%" PRIu64
"M", DIV_ROUND_UP(arg_ram
, U64_MB
)) < 0)
1609 "-machine", machine
,
1610 "-smp", arg_cpus
?: "1",
1612 "-object", "rng-random,filename=/dev/urandom,id=rng0",
1613 "-device", "virtio-rng-pci,rng=rng0,id=rng-device0",
1614 "-device", "virtio-balloon,free-page-reporting=on"
1619 if (!sd_id128_is_null(arg_uuid
))
1620 if (strv_extend_many(&cmdline
, "-uuid", SD_ID128_TO_UUID_STRING(arg_uuid
)) < 0)
1623 if (ARCHITECTURE_SUPPORTS_VMGENID
) {
1624 /* Derive a vmgenid automatically from the invocation ID, in a deterministic way. */
1626 r
= sd_id128_get_invocation_app_specific(SD_ID128_MAKE(bd
,84,6d
,e3
,e4
,7d
,4b
,6c
,a6
,85,4a
,87,0f
,3c
,a3
,a0
), &vmgenid
);
1628 log_debug_errno(r
, "Failed to get invocation ID, making up randomized vmgenid: %m");
1630 r
= sd_id128_randomize(&vmgenid
);
1632 return log_error_errno(r
, "Failed to make up randomized vmgenid: %m");
1635 _cleanup_free_
char *vmgenid_device
= NULL
;
1636 if (asprintf(&vmgenid_device
, "vmgenid,guid=" SD_ID128_UUID_FORMAT_STR
, SD_ID128_FORMAT_VAL(vmgenid
)) < 0)
1639 if (strv_extend_many(&cmdline
, "-device", vmgenid_device
) < 0)
1643 /* if we are going to be starting any units with state then create our runtime dir */
1644 _cleanup_free_
char *runtime_dir
= NULL
;
1645 _cleanup_(rm_rf_physical_and_freep
) char *runtime_dir_destroy
= NULL
;
1646 if (arg_tpm
!= 0 || arg_directory
|| arg_runtime_mounts
.n_mounts
!= 0 || arg_pass_ssh_key
) {
1647 _cleanup_free_
char *subdir
= NULL
;
1649 if (asprintf(&subdir
, "systemd/vmspawn.%" PRIx64
, random_u64()) < 0)
1652 r
= runtime_directory(
1653 arg_privileged
? RUNTIME_SCOPE_SYSTEM
: RUNTIME_SCOPE_USER
,
1657 return log_error_errno(r
, "Failed to lookup runtime directory: %m");
1658 if (r
> 0) { /* We need to create our own runtime dir */
1659 r
= mkdir_p(runtime_dir
, 0755);
1661 return log_error_errno(r
, "Failed to create runtime directory '%s': %m", runtime_dir
);
1663 /* We created this, hence also destroy it */
1664 runtime_dir_destroy
= TAKE_PTR(runtime_dir
);
1666 runtime_dir
= strdup(runtime_dir_destroy
);
1671 log_debug("Using runtime directory: %s", runtime_dir
);
1674 _cleanup_close_
int delegate_userns_fd
= -EBADF
, tap_fd
= -EBADF
;
1675 if (arg_network_stack
== NETWORK_STACK_TAP
) {
1676 if (have_effective_cap(CAP_NET_ADMIN
) <= 0) {
1677 delegate_userns_fd
= userns_acquire_self_root();
1678 if (delegate_userns_fd
< 0)
1679 return log_error_errno(delegate_userns_fd
, "Failed to acquire userns: %m");
1681 _cleanup_free_
char *userns_name
= NULL
;
1682 if (asprintf(&userns_name
, "vmspawn-" PID_FMT
"-%s", getpid_cached(), arg_machine
) < 0)
1685 r
= nsresource_register_userns(userns_name
, delegate_userns_fd
);
1687 return log_error_errno(r
, "Failed to register user namespace with systemd-nsresourced: %m");
1689 tap_fd
= nsresource_add_netif_tap(delegate_userns_fd
, /* ret_host_ifname= */ NULL
);
1691 return log_error_errno(tap_fd
, "Failed to allocate network tap device: %m");
1693 r
= strv_extend(&cmdline
, "-nic");
1697 r
= strv_extendf(&cmdline
, "tap,fd=%i,model=virtio-net-pci", tap_fd
);
1701 if (!GREEDY_REALLOC(pass_fds
, n_pass_fds
+ 1))
1704 pass_fds
[n_pass_fds
++] = tap_fd
;
1706 _cleanup_free_
char *tap_name
= NULL
;
1707 struct ether_addr mac_vm
= {};
1709 tap_name
= strjoin("vt-", arg_machine
);
1713 (void) net_shorten_ifname(tap_name
, /* check_naming_scheme= */ false);
1715 if (ether_addr_is_null(&arg_network_provided_mac
)){
1716 r
= net_generate_mac(arg_machine
, &mac_vm
, VM_TAP_HASH_KEY
, 0);
1718 return log_error_errno(r
, "Failed to generate predictable MAC address for VM side: %m");
1720 mac_vm
= arg_network_provided_mac
;
1722 r
= strv_extend(&cmdline
, "-nic");
1726 r
= strv_extendf(&cmdline
, "tap,ifname=%s,script=no,downscript=no,model=virtio-net-pci,mac=%s", tap_name
, ETHER_ADDR_TO_STR(&mac_vm
));
1730 } else if (arg_network_stack
== NETWORK_STACK_USER
)
1731 r
= strv_extend_many(&cmdline
, "-nic", "user,model=virtio-net-pci");
1733 r
= strv_extend_many(&cmdline
, "-nic", "none");
1737 /* A shared memory backend might increase ram usage so only add one if actually necessary for virtiofsd. */
1738 if (arg_directory
|| arg_runtime_mounts
.n_mounts
!= 0) {
1739 r
= strv_extend(&cmdline
, "-object");
1743 r
= strv_extendf(&cmdline
, "memory-backend-memfd,id=mem,size=%s,share=on", mem
);
1748 bool use_vsock
= arg_vsock
> 0 && ARCHITECTURE_SUPPORTS_SMBIOS
;
1749 if (arg_vsock
< 0) {
1750 r
= qemu_check_vsock_support();
1752 return log_error_errno(r
, "Failed to check for VSOCK support: %m");
1757 if (!use_kvm
&& kvm_device_fd
>= 0) {
1758 log_warning("KVM is disabled but fd for /dev/kvm was passed, closing fd and ignoring");
1759 kvm_device_fd
= safe_close(kvm_device_fd
);
1762 if (use_kvm
&& kvm_device_fd
>= 0) {
1763 /* /dev/fdset/1 is magic string to tell qemu where to find the fd for /dev/kvm
1764 * we use this so that we can take a fd to /dev/kvm and then give qemu that fd */
1765 accel
= "kvm,device=/dev/fdset/1";
1767 r
= strv_extend(&cmdline
, "--add-fd");
1771 r
= strv_extendf(&cmdline
, "fd=%d,set=1,opaque=/dev/kvm", kvm_device_fd
);
1775 if (!GREEDY_REALLOC(pass_fds
, n_pass_fds
+ 1))
1778 pass_fds
[n_pass_fds
++] = kvm_device_fd
;
1784 r
= strv_extend_many(&cmdline
, "-accel", accel
);
1788 _cleanup_close_
int child_vsock_fd
= -EBADF
;
1789 unsigned child_cid
= arg_vsock_cid
;
1791 int device_fd
= vhost_device_fd
;
1793 if (device_fd
< 0) {
1794 child_vsock_fd
= open("/dev/vhost-vsock", O_RDWR
|O_CLOEXEC
);
1795 if (child_vsock_fd
< 0)
1796 return log_error_errno(errno
, "Failed to open /dev/vhost-vsock as read/write: %m");
1798 device_fd
= child_vsock_fd
;
1801 r
= vsock_fix_child_cid(device_fd
, &child_cid
, arg_machine
);
1803 return log_error_errno(r
, "Failed to fix CID for the guest VSOCK socket: %m");
1805 r
= strv_extend(&cmdline
, "-device");
1809 r
= strv_extendf(&cmdline
, "vhost-vsock-pci,guest-cid=%u,vhostfd=%d", child_cid
, device_fd
);
1813 if (!GREEDY_REALLOC(pass_fds
, n_pass_fds
+ 1))
1816 pass_fds
[n_pass_fds
++] = device_fd
;
1819 r
= strv_extend_many(&cmdline
, "-cpu",
1821 "max,hv_relaxed,hv-vapic,hv-time"
1829 _cleanup_close_
int master
= -EBADF
;
1830 PTYForwardFlags ptyfwd_flags
= 0;
1831 switch (arg_console_mode
) {
1833 case CONSOLE_READ_ONLY
:
1834 ptyfwd_flags
|= PTY_FORWARD_READ_ONLY
;
1838 case CONSOLE_INTERACTIVE
: {
1839 _cleanup_free_
char *pty_path
= NULL
;
1841 master
= openpt_allocate(O_RDWR
|O_NONBLOCK
, &pty_path
);
1843 return log_error_errno(master
, "Failed to setup pty: %m");
1845 if (strv_extend_many(
1849 "-device", "virtio-serial-pci,id=vmspawn-virtio-serial-pci",
1853 if (strv_extendf(&cmdline
,
1854 "serial,id=console,path=%s", pty_path
) < 0)
1857 r
= strv_extend_many(
1859 "-device", "virtconsole,chardev=console");
1864 r
= strv_extend_many(
1870 case CONSOLE_NATIVE
:
1871 r
= strv_extend_many(
1875 "-chardev", "stdio,mux=on,id=console,signal=off",
1876 "-device", "virtio-serial-pci,id=vmspawn-virtio-serial-pci",
1877 "-device", "virtconsole,chardev=console",
1882 assert_not_reached();
1887 r
= strv_extend(&cmdline
, "-drive");
1891 _cleanup_free_
char *escaped_ovmf_config_path
= escape_qemu_value(ovmf_config
->path
);
1892 if (!escaped_ovmf_config_path
)
1895 r
= strv_extendf(&cmdline
, "if=pflash,format=%s,readonly=on,file=%s", ovmf_config_format(ovmf_config
), escaped_ovmf_config_path
);
1899 _cleanup_(unlink_and_freep
) char *ovmf_vars_to
= NULL
;
1900 if (ovmf_config
->supports_sb
) {
1901 const char *ovmf_vars_from
= ovmf_config
->vars
;
1902 _cleanup_free_
char *escaped_ovmf_vars_to
= NULL
;
1903 _cleanup_close_
int source_fd
= -EBADF
, target_fd
= -EBADF
;
1905 r
= tempfn_random_child(NULL
, "vmspawn-", &ovmf_vars_to
);
1909 source_fd
= open(ovmf_vars_from
, O_RDONLY
|O_CLOEXEC
);
1911 return log_error_errno(source_fd
, "Failed to open OVMF vars file %s: %m", ovmf_vars_from
);
1913 target_fd
= open(ovmf_vars_to
, O_WRONLY
|O_CREAT
|O_EXCL
|O_CLOEXEC
, 0600);
1915 return log_error_errno(errno
, "Failed to create regular file for OVMF vars at %s: %m", ovmf_vars_to
);
1917 r
= copy_bytes(source_fd
, target_fd
, UINT64_MAX
, COPY_REFLINK
);
1919 return log_error_errno(r
, "Failed to copy bytes from %s to %s: %m", ovmf_vars_from
, ovmf_vars_to
);
1921 /* This isn't always available so don't raise an error if it fails */
1922 (void) copy_times(source_fd
, target_fd
, 0);
1924 r
= strv_extend_many(
1926 "-global", "ICH9-LPC.disable_s3=1",
1927 "-global", "driver=cfi.pflash01,property=secure,value=on",
1932 escaped_ovmf_vars_to
= escape_qemu_value(ovmf_vars_to
);
1933 if (!escaped_ovmf_vars_to
)
1936 r
= strv_extendf(&cmdline
, "file=%s,if=pflash,format=%s", escaped_ovmf_vars_to
, ovmf_config_format(ovmf_config
));
1941 if (arg_image
|| strv_length(arg_extra_drives
) > 0) {
1942 r
= strv_extend_many(&cmdline
, "-device", "virtio-scsi-pci,id=scsi");
1948 r
= strv_extend_many(&cmdline
, "-kernel", kernel
);
1952 /* We can't rely on gpt-auto-generator when direct kernel booting so synthesize a root=
1953 * kernel argument instead. */
1955 r
= kernel_cmdline_maybe_append_root();
1962 _cleanup_free_
char *escaped_image
= NULL
;
1964 assert(!arg_directory
);
1966 r
= strv_extend(&cmdline
, "-drive");
1970 escaped_image
= escape_qemu_value(arg_image
);
1974 r
= strv_extendf(&cmdline
, "if=none,id=vmspawn,file=%s,format=raw,discard=%s", escaped_image
, on_off(arg_discard_disk
));
1978 r
= strv_extend_many(&cmdline
, "-device", "scsi-hd,drive=vmspawn,bootindex=1");
1982 r
= grow_image(arg_image
, arg_grow_image
);
1987 if (arg_directory
) {
1988 _cleanup_free_
char *listen_address
= NULL
;
1989 r
= start_virtiofsd(bus
, trans_scope
, arg_directory
, /* uidmap= */ true, runtime_dir
, &listen_address
);
1993 _cleanup_free_
char *escaped_listen_address
= escape_qemu_value(listen_address
);
1994 if (!escaped_listen_address
)
1997 if (strv_extend(&cmdline
, "-chardev") < 0)
2000 if (strv_extendf(&cmdline
, "socket,id=rootdir,path=%s", escaped_listen_address
) < 0)
2003 if (strv_extend_many(
2006 "vhost-user-fs-pci,queue-size=1024,chardev=rootdir,tag=root") < 0)
2009 if (strv_extend(&arg_kernel_cmdline_extra
, "root=root rootfstype=virtiofs rw") < 0)
2014 STRV_FOREACH(drive
, arg_extra_drives
) {
2015 _cleanup_free_
char *escaped_drive
= NULL
;
2016 const char *driver
= NULL
;
2019 r
= strv_extend(&cmdline
, "-blockdev");
2023 escaped_drive
= escape_qemu_value(*drive
);
2027 if (stat(*drive
, &st
) < 0)
2028 return log_error_errno(errno
, "Failed to stat '%s': %m", *drive
);
2030 if (S_ISREG(st
.st_mode
))
2032 else if (S_ISBLK(st
.st_mode
))
2033 driver
= "host_device";
2035 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "Expected regular file or block device, not '%s': %m", *drive
);
2037 r
= strv_extendf(&cmdline
, "driver=raw,cache.direct=off,cache.no-flush=on,file.driver=%s,file.filename=%s,node-name=vmspawn_extra_%zu", driver
, escaped_drive
, i
);
2041 r
= strv_extend(&cmdline
, "-device");
2045 r
= strv_extendf(&cmdline
, "scsi-hd,drive=vmspawn_extra_%zu", i
++);
2050 r
= strv_prepend(&arg_kernel_cmdline_extra
, "console=hvc0");
2054 FOREACH_ARRAY(mount
, arg_runtime_mounts
.mounts
, arg_runtime_mounts
.n_mounts
) {
2055 _cleanup_free_
char *listen_address
= NULL
;
2056 r
= start_virtiofsd(bus
, trans_scope
, mount
->source
, /* uidmap= */ false, runtime_dir
, &listen_address
);
2060 _cleanup_free_
char *escaped_listen_address
= escape_qemu_value(listen_address
);
2061 if (!escaped_listen_address
)
2064 if (strv_extend(&cmdline
, "-chardev") < 0)
2067 _cleanup_free_
char *id
= NULL
;
2068 if (asprintf(&id
, "mnt%zi", mount
- arg_runtime_mounts
.mounts
) < 0)
2071 if (strv_extendf(&cmdline
, "socket,id=%s,path=%s", id
, escaped_listen_address
) < 0)
2074 if (strv_extend(&cmdline
, "-device") < 0)
2077 if (strv_extendf(&cmdline
, "vhost-user-fs-pci,queue-size=1024,chardev=%1$s,tag=%1$s", id
) < 0)
2080 _cleanup_free_
char *clean_target
= xescape(mount
->target
, "\":");
2084 if (strv_extendf(&arg_kernel_cmdline_extra
, "systemd.mount-extra=\"%s:%s:virtiofs:%s\"",
2085 id
, clean_target
, mount
->read_only
? "ro" : "rw") < 0)
2089 r
= cmdline_add_kernel_cmdline(&cmdline
, kernel
);
2093 r
= cmdline_add_smbios11(&cmdline
);
2097 /* disable TPM autodetection if the user's hardware doesn't support it */
2098 if (!ARCHITECTURE_SUPPORTS_TPM
) {
2100 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP
), "TPM not supported on %s, refusing", architecture_to_string(native_architecture()));
2103 log_debug("TPM not support on %s, disabling tpm autodetection and continuing", architecture_to_string(native_architecture()));
2107 _cleanup_free_
char *swtpm
= NULL
;
2109 if (arg_tpm_state_mode
== TPM_STATE_AUTO
) {
2110 assert(!arg_tpm_state_path
);
2112 const char *p
= ASSERT_PTR(arg_image
?: arg_directory
);
2114 _cleanup_free_
char *parent
= NULL
;
2115 r
= path_extract_directory(p
, &parent
);
2117 return log_error_errno(r
, "Failed to extract parent directory from '%s': %m", p
);
2119 _cleanup_free_
char *filename
= NULL
;
2120 r
= path_extract_filename(p
, &filename
);
2122 return log_error_errno(r
, "Failed to extract filename from '%s': %m", p
);
2124 if (!strextend(&filename
, ".tpmstate"))
2127 arg_tpm_state_path
= path_join(parent
, filename
);
2128 if (!arg_tpm_state_path
)
2131 log_debug("Storing TPM state persistently under '%s'.", arg_tpm_state_path
);
2134 r
= find_executable("swtpm", &swtpm
);
2136 /* log if the user asked for swtpm and we cannot find it */
2138 return log_error_errno(r
, "Failed to find swtpm binary: %m");
2139 /* also log if we got an error other than ENOENT from find_executable */
2140 if (r
!= -ENOENT
&& arg_tpm
< 0)
2141 return log_error_errno(r
, "Error detecting swtpm: %m");
2145 _cleanup_free_
char *tpm_socket_address
= NULL
;
2147 r
= start_tpm(bus
, trans_scope
, swtpm
, runtime_dir
, &tpm_socket_address
);
2149 /* only bail if the user asked for a tpm */
2151 return log_error_errno(r
, "Failed to start tpm: %m");
2153 log_debug_errno(r
, "Failed to start tpm, ignoring: %m");
2157 if (tpm_socket_address
) {
2158 _cleanup_free_
char *escaped_tpm_socket_address
= escape_qemu_value(tpm_socket_address
);
2159 if (!escaped_tpm_socket_address
)
2162 if (strv_extend(&cmdline
, "-chardev") < 0)
2165 if (strv_extendf(&cmdline
, "socket,id=chrtpm,path=%s", tpm_socket_address
) < 0)
2168 if (strv_extend_many(&cmdline
, "-tpmdev", "emulator,id=tpm0,chardev=chrtpm") < 0)
2171 if (native_architecture() == ARCHITECTURE_X86_64
)
2172 r
= strv_extend_many(&cmdline
, "-device", "tpm-tis,tpmdev=tpm0");
2173 else if (IN_SET(native_architecture(), ARCHITECTURE_ARM64
, ARCHITECTURE_ARM64_BE
))
2174 r
= strv_extend_many(&cmdline
, "-device", "tpm-tis-device,tpmdev=tpm0");
2179 char *initrd
= NULL
;
2180 _cleanup_(rm_rf_physical_and_freep
) char *merged_initrd
= NULL
;
2181 size_t n_initrds
= strv_length(arg_initrds
);
2184 initrd
= arg_initrds
[0];
2185 else if (n_initrds
> 1) {
2186 r
= merge_initrds(&merged_initrd
);
2190 initrd
= merged_initrd
;
2194 r
= strv_extend_many(&cmdline
, "-initrd", initrd
);
2199 if (arg_forward_journal
) {
2200 _cleanup_free_
char *sd_journal_remote
= NULL
, *listen_address
= NULL
, *cred
= NULL
;
2202 r
= find_executable_full(
2203 "systemd-journal-remote",
2205 STRV_MAKE(LIBEXECDIR
),
2206 /* use_path_envvar = */ true, /* systemd-journal-remote should be installed in
2207 * LIBEXECDIR, but for supporting fancy setups. */
2209 /* ret_fd = */ NULL
);
2211 return log_error_errno(r
, "Failed to find systemd-journal-remote binary: %m");
2213 r
= start_systemd_journal_remote(bus
, trans_scope
, child_cid
, sd_journal_remote
, &listen_address
);
2217 cred
= strjoin("journal.forward_to_socket:", listen_address
);
2221 r
= machine_credential_set(&arg_credentials
, cred
);
2226 if (arg_pass_ssh_key
) {
2227 _cleanup_free_
char *scope_prefix
= NULL
, *privkey_path
= NULL
, *pubkey_path
= NULL
;
2228 const char *key_type
= arg_ssh_key_type
?: "ed25519";
2230 r
= unit_name_to_prefix(trans_scope
, &scope_prefix
);
2232 return log_error_errno(r
, "Failed to strip .scope suffix from scope: %m");
2234 privkey_path
= strjoin(runtime_dir
, "/", scope_prefix
, "-", key_type
);
2238 pubkey_path
= strjoin(privkey_path
, ".pub");
2242 r
= generate_ssh_keypair(privkey_path
, key_type
);
2246 ssh_private_key_path
= TAKE_PTR(privkey_path
);
2247 ssh_public_key_path
= TAKE_PTR(pubkey_path
);
2250 if (ssh_public_key_path
&& ssh_private_key_path
) {
2251 _cleanup_free_
char *scope_prefix
= NULL
, *cred_path
= NULL
;
2253 cred_path
= strjoin("ssh.ephemeral-authorized_keys-all:", ssh_public_key_path
);
2257 r
= machine_credential_load(&arg_credentials
, cred_path
);
2259 return log_error_errno(r
, "Failed to load credential %s: %m", cred_path
);
2261 r
= unit_name_to_prefix(trans_scope
, &scope_prefix
);
2263 return log_error_errno(r
, "Failed to strip .scope suffix from scope: %m");
2265 /* on distros that provide their own sshd@.service file we need to provide a dropin which
2266 * picks up our public key credential */
2267 r
= machine_credential_set(
2269 "systemd.unit-dropin.sshd-vsock@.service:"
2272 "ExecStart=-sshd -i -o 'AuthorizedKeysFile=%d/ssh.ephemeral-authorized_keys-all .ssh/authorized_keys'\n"
2273 "ImportCredential=ssh.ephemeral-authorized_keys-all\n");
2275 return log_error_errno(r
, "Failed to set credential systemd.unit-dropin.sshd-vsock@.service: %m");
2278 if (ARCHITECTURE_SUPPORTS_SMBIOS
)
2279 FOREACH_ARRAY(cred
, arg_credentials
.credentials
, arg_credentials
.n_credentials
) {
2280 _cleanup_free_
char *cred_data_b64
= NULL
;
2283 n
= base64mem(cred
->data
, cred
->size
, &cred_data_b64
);
2287 r
= strv_extend(&cmdline
, "-smbios");
2291 r
= strv_extendf(&cmdline
, "type=11,value=io.systemd.credential.binary:%s=%s", cred
->id
, cred_data_b64
);
2297 notify_sock_fd
= open_vsock();
2298 if (notify_sock_fd
< 0)
2299 return log_error_errno(notify_sock_fd
, "Failed to open VSOCK: %m");
2301 r
= cmdline_add_vsock(&cmdline
, notify_sock_fd
);
2305 return log_error_errno(r
, "Failed to call getsockname on VSOCK: %m");
2308 const char *e
= secure_getenv("SYSTEMD_VMSPAWN_QEMU_EXTRA");
2310 r
= strv_split_and_extend_full(&cmdline
, e
,
2311 /* separators = */ NULL
, /* filter_duplicates = */ false,
2312 EXTRACT_CUNESCAPE
|EXTRACT_UNQUOTE
);
2314 return log_error_errno(r
, "Failed to parse $SYSTEMD_VMSPAWN_QEMU_EXTRA: %m");
2317 if (DEBUG_LOGGING
) {
2318 _cleanup_free_
char *joined
= quote_command_line(cmdline
, SHELL_ESCAPE_EMPTY
);
2322 log_debug("Executing: %s", joined
);
2326 char vm_address
[STRLEN("vsock/") + DECIMAL_STR_MAX(unsigned)];
2328 xsprintf(vm_address
, "vsock/%u", child_cid
);
2329 r
= register_machine(
2336 child_cid
!= VMADDR_CID_ANY
? vm_address
: NULL
,
2337 ssh_private_key_path
,
2343 assert_se(sigprocmask_many(SIG_BLOCK
, /* ret_old_mask=*/ NULL
, SIGCHLD
) >= 0);
2345 _cleanup_(sd_event_source_unrefp
) sd_event_source
*notify_event_source
= NULL
;
2346 _cleanup_(sd_event_unrefp
) sd_event
*event
= NULL
;
2347 r
= sd_event_new(&event
);
2349 return log_error_errno(r
, "Failed to get default event source: %m");
2351 (void) sd_event_set_watchdog(event
, true);
2353 _cleanup_(pidref_done
) PidRef child_pidref
= PIDREF_NULL
;
2355 r
= pidref_safe_fork_full(
2357 /* stdio_fds= */ NULL
,
2358 pass_fds
, n_pass_fds
,
2359 FORK_RESET_SIGNALS
|FORK_CLOSE_ALL_FDS
|FORK_DEATHSIG_SIGTERM
|FORK_LOG
|FORK_CLOEXEC_OFF
|FORK_RLIMIT_NOFILE_SAFE
,
2364 /* set TERM and LANG if they are missing */
2365 if (setenv("TERM", "vt220", 0) < 0) {
2370 if (setenv("LANG", "C.UTF-8", 0) < 0) {
2375 execv(qemu_binary
, cmdline
);
2376 log_error_errno(errno
, "Failed to execve %s: %m", qemu_binary
);
2378 _exit(EXIT_FAILURE
);
2381 /* Close relevant fds we passed to qemu in the parent. We don't need them anymore. */
2382 child_vsock_fd
= safe_close(child_vsock_fd
);
2383 tap_fd
= safe_close(tap_fd
);
2385 int exit_status
= INT_MAX
;
2387 r
= setup_notify_parent(event
, notify_sock_fd
, &exit_status
, ¬ify_event_source
);
2389 return log_error_errno(r
, "Failed to setup event loop to handle VSOCK notify events: %m");
2392 /* Used when talking to pid1 via SSH, but must survive until the function ends. */
2393 SSHInfo ssh_info
= {
2395 .private_key_path
= ssh_private_key_path
,
2398 ShutdownInfo shutdown_info
= {
2399 .ssh_info
= &ssh_info
,
2400 .pidref
= &child_pidref
,
2403 (void) sd_event_add_signal(event
, NULL
, SIGINT
| SD_EVENT_SIGNAL_PROCMASK
, shutdown_vm_graceful
, &shutdown_info
);
2404 (void) sd_event_add_signal(event
, NULL
, SIGTERM
| SD_EVENT_SIGNAL_PROCMASK
, shutdown_vm_graceful
, &shutdown_info
);
2405 (void) sd_event_add_signal(event
, NULL
, (SIGRTMIN
+4) | SD_EVENT_SIGNAL_PROCMASK
, shutdown_vm_graceful
, &shutdown_info
);
2407 (void) sd_event_add_signal(event
, NULL
, (SIGRTMIN
+18) | SD_EVENT_SIGNAL_PROCMASK
, sigrtmin18_handler
, NULL
);
2409 r
= sd_event_add_memory_pressure(event
, NULL
, NULL
, NULL
);
2411 log_debug_errno(r
, "Failed allocate memory pressure event source, ignoring: %m");
2413 /* Exit when the child exits */
2414 r
= event_add_child_pidref(event
, /* ret= */ NULL
, &child_pidref
, WEXITED
, on_child_exit
, /* userdata= */ NULL
);
2416 return log_error_errno(r
, "Failed to watch qemu process: &m");
2418 _cleanup_(osc_context_closep
) sd_id128_t osc_context_id
= SD_ID128_NULL
;
2419 _cleanup_(pty_forward_freep
) PTYForward
*forward
= NULL
;
2421 if (!terminal_is_dumb()) {
2422 r
= osc_context_open_vm(arg_machine
, /* ret_seq= */ NULL
, &osc_context_id
);
2427 r
= pty_forward_new(event
, master
, ptyfwd_flags
, &forward
);
2429 return log_error_errno(r
, "Failed to create PTY forwarder: %m");
2431 if (!arg_background
&& shall_tint_background()) {
2432 _cleanup_free_
char *bg
= NULL
;
2434 r
= terminal_tint_color(130 /* green */, &bg
);
2436 log_debug_errno(r
, "Failed to determine terminal background color, not tinting.");
2438 (void) pty_forward_set_background_color(forward
, bg
);
2439 } else if (!isempty(arg_background
))
2440 (void) pty_forward_set_background_color(forward
, arg_background
);
2442 (void) pty_forward_set_window_title(forward
, GLYPH_GREEN_CIRCLE
, /* hostname = */ NULL
,
2443 STRV_MAKE("Virtual Machine", arg_machine
));
2446 r
= sd_event_loop(event
);
2448 return log_error_errno(r
, "Failed to run event loop: %m");
2451 (void) unregister_machine(bus
, arg_machine
);
2454 if (exit_status
== INT_MAX
) {
2455 log_debug("Couldn't retrieve inner EXIT_STATUS from VSOCK");
2456 return EXIT_SUCCESS
;
2458 if (exit_status
!= 0)
2459 log_warning("Non-zero exit code received: %d", exit_status
);
2466 static int determine_names(void) {
2469 if (!arg_directory
&& !arg_image
) {
2471 _cleanup_(image_unrefp
) Image
*i
= NULL
;
2473 r
= image_find(arg_privileged
? RUNTIME_SCOPE_SYSTEM
: RUNTIME_SCOPE_USER
,
2474 IMAGE_MACHINE
, arg_machine
, NULL
, &i
);
2476 return log_error_errno(r
, "No image for machine '%s'.", arg_machine
);
2478 return log_error_errno(r
, "Failed to find image for machine '%s': %m", arg_machine
);
2480 if (IN_SET(i
->type
, IMAGE_RAW
, IMAGE_BLOCK
))
2481 r
= free_and_strdup(&arg_image
, i
->path
);
2482 else if (IN_SET(i
->type
, IMAGE_DIRECTORY
, IMAGE_SUBVOLUME
))
2483 r
= free_and_strdup(&arg_directory
, i
->path
);
2485 assert_not_reached();
2489 r
= safe_getcwd(&arg_directory
);
2491 return log_error_errno(r
, "Failed to determine current directory: %m");
2496 if (arg_directory
&& path_equal(arg_directory
, "/")) {
2497 arg_machine
= gethostname_malloc();
2500 } else if (arg_image
) {
2503 r
= path_extract_filename(arg_image
, &arg_machine
);
2505 return log_error_errno(r
, "Failed to extract file name from '%s': %m", arg_image
);
2507 /* Truncate suffix if there is one */
2508 e
= endswith(arg_machine
, ".raw");
2512 r
= path_extract_filename(arg_directory
, &arg_machine
);
2514 return log_error_errno(r
, "Failed to extract file name from '%s': %m", arg_directory
);
2517 hostname_cleanup(arg_machine
);
2518 if (!hostname_is_valid(arg_machine
, 0))
2519 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "Failed to determine machine name automatically, please use -M.");
2525 static int verify_arguments(void) {
2526 if (!strv_isempty(arg_initrds
) && !arg_linux
)
2527 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "Option --initrd= cannot be used without --linux=.");
2529 if (arg_keep_unit
&& arg_register
&& cg_pid_get_owner_uid(0, NULL
) >= 0)
2530 /* Save the user from accidentally registering either user-$SESSION.scope or user@.service.
2531 * The latter is not technically a user session, but we don't need to labour the point. */
2532 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
), "--keep-unit --register=yes may not be used when invoked from a user session.");
2537 static int run(int argc
, char *argv
[]) {
2538 int r
, kvm_device_fd
= -EBADF
, vhost_device_fd
= -EBADF
;
2539 _cleanup_strv_free_
char **names
= NULL
;
2543 arg_privileged
= getuid() == 0;
2545 /* don't attempt to register as a machine when running as a user */
2546 arg_register
= arg_privileged
;
2548 r
= parse_environment();
2552 r
= parse_argv(argc
, argv
);
2556 r
= determine_names();
2560 r
= verify_arguments();
2564 if (!arg_quiet
&& arg_console_mode
!= CONSOLE_GUI
) {
2565 _cleanup_free_
char *u
= NULL
;
2566 const char *vm_path
= arg_image
?: arg_directory
;
2567 (void) terminal_urlify_path(vm_path
, vm_path
, &u
);
2569 log_info("%s %sSpawning VM %s on %s.%s",
2570 glyph(GLYPH_LIGHT_SHADE
), ansi_grey(), arg_machine
, u
?: vm_path
, ansi_normal());
2572 if (arg_console_mode
== CONSOLE_INTERACTIVE
)
2573 log_info("%s %sPress %sCtrl-]%s three times within 1s to kill VM.%s",
2574 glyph(GLYPH_LIGHT_SHADE
), ansi_grey(), ansi_highlight(), ansi_grey(), ansi_normal());
2575 else if (arg_console_mode
== CONSOLE_NATIVE
)
2576 log_info("%s %sPress %sCtrl-a x%s to kill VM.%s",
2577 glyph(GLYPH_LIGHT_SHADE
), ansi_grey(), ansi_highlight(), ansi_grey(), ansi_normal());
2580 int n
= sd_listen_fds_with_names(true, &names
);
2582 return log_error_errno(n
, "Failed to get passed file descriptors: %m");
2584 for (int i
= 0; i
< n
; i
++) {
2585 int fd
= SD_LISTEN_FDS_START
+ i
;
2586 if (streq(names
[i
], "kvm"))
2588 else if (streq(names
[i
], "vhost-vsock"))
2589 vhost_device_fd
= fd
;
2591 log_notice("Couldn't recognize passed fd %d (%s), closing fd and ignoring...", fd
, names
[i
]);
2596 return run_virtual_machine(kvm_device_fd
, vhost_device_fd
);
2599 DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run
);