]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/vmspawn/vmspawn.c
analyze: fix typo
[thirdparty/systemd.git] / src / vmspawn / vmspawn.c
CommitLineData
9de3cc14
SL
1/* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3#include <getopt.h>
811ad9e6 4#include <stdio.h>
9de3cc14 5#include <stdlib.h>
88af28d1 6#include <string.h>
811ad9e6 7#include <sys/stat.h>
9de3cc14
SL
8#include <unistd.h>
9
e2165e01 10#include "sd-bus.h"
51747b34 11#include "sd-daemon.h"
19301e76
SL
12#include "sd-event.h"
13#include "sd-id128.h"
14
9de3cc14
SL
15#include "alloc-util.h"
16#include "architecture.h"
7c643826 17#include "bootspec.h"
9de3cc14 18#include "build.h"
e2165e01 19#include "bus-error.h"
521e9286
SL
20#include "bus-internal.h"
21#include "bus-locator.h"
76d62b63 22#include "bus-util.h"
521e9286 23#include "bus-wait-for-jobs.h"
e29d5385 24#include "capability-util.h"
f72a0856 25#include "common-signal.h"
9de3cc14 26#include "copy.h"
7c643826 27#include "discover-image.h"
19301e76 28#include "dissect-image.h"
9de3cc14 29#include "escape.h"
c3dd4e20 30#include "ether-addr-util.h"
6cff1854 31#include "event-util.h"
a8f940c4 32#include "extract-word.h"
7c643826 33#include "fd-util.h"
9de3cc14 34#include "format-util.h"
f72a0856 35#include "fs-util.h"
19301e76 36#include "gpt.h"
9de3cc14 37#include "hexdecoct.h"
98b7c5e2 38#include "hostname-setup.h"
f72a0856 39#include "hostname-util.h"
e2165e01 40#include "id128-util.h"
9de3cc14
SL
41#include "log.h"
42#include "machine-credential.h"
43#include "main-func.h"
cf3beb27 44#include "mkdir.h"
0577339d 45#include "namespace-util.h"
c3dd4e20 46#include "netif-util.h"
e29d5385 47#include "nsresource.h"
5eb39a38 48#include "osc-context.h"
9de3cc14
SL
49#include "pager.h"
50#include "parse-argument.h"
51#include "parse-util.h"
cf3beb27 52#include "path-lookup.h"
9de3cc14 53#include "path-util.h"
7c643826 54#include "pidref.h"
9de3cc14
SL
55#include "pretty-print.h"
56#include "process-util.h"
795ec90c 57#include "ptyfwd.h"
5c57a865 58#include "random-util.h"
cf3beb27 59#include "rm-rf.h"
f72a0856
SL
60#include "signal-util.h"
61#include "socket-util.h"
521e9286 62#include "stdio-util.h"
88af28d1 63#include "string-util.h"
9de3cc14 64#include "strv.h"
ef430b84 65#include "sync-util.h"
e2165e01 66#include "terminal-util.h"
9de3cc14 67#include "tmpfile-util.h"
cf3beb27 68#include "unit-name.h"
e4a08721 69#include "utf8.h"
a8f940c4 70#include "vmspawn-mount.h"
b0dc7668 71#include "vmspawn-register.h"
cf3beb27 72#include "vmspawn-scope.h"
9de3cc14
SL
73#include "vmspawn-settings.h"
74#include "vmspawn-util.h"
75
c3dd4e20
SL
76#define VM_TAP_HASH_KEY SD_ID128_MAKE(01,d0,c6,4c,2b,df,24,fb,c0,f8,b2,09,7d,59,b2,93)
77
6fd25375
LP
78typedef enum TpmStateMode {
79 TPM_STATE_OFF, /* keep no state around */
80 TPM_STATE_AUTO, /* keep state around, derive path from image/directory */
81 TPM_STATE_PATH, /* explicitly specified location */
82 _TPM_STATE_MODE_MAX,
83 _TPM_STATE_MODE_INVALID = -EINVAL,
84} TpmStateMode;
85
521e9286
SL
86typedef struct SSHInfo {
87 unsigned cid;
88 char *private_key_path;
89 unsigned port;
90} SSHInfo;
91
305e5893
SL
92typedef struct ShutdownInfo {
93 SSHInfo *ssh_info;
94 PidRef *pidref;
95} ShutdownInfo;
96
dbb2718f 97static bool arg_quiet = false;
9de3cc14 98static PagerFlags arg_pager_flags = 0;
5c57a865 99static char *arg_directory = NULL;
9de3cc14 100static char *arg_image = NULL;
f72a0856 101static char *arg_machine = NULL;
2c0061c7
LP
102static char *arg_cpus = NULL;
103static uint64_t arg_ram = UINT64_C(2) * U64_GB;
104static int arg_kvm = -1;
105static int arg_vsock = -1;
06d4fe57 106static unsigned arg_vsock_cid = VMADDR_CID_ANY;
cf3beb27 107static int arg_tpm = -1;
0f25e3e4 108static char *arg_linux = NULL;
811ad9e6 109static char **arg_initrds = NULL;
795ec90c 110static ConsoleMode arg_console_mode = CONSOLE_INTERACTIVE;
2c0061c7 111static NetworkStack arg_network_stack = NETWORK_STACK_NONE;
9de3cc14 112static int arg_secure_boot = -1;
bd546b9b 113static MachineCredentialContext arg_credentials = {};
c05ca33a 114static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U;
a8f940c4 115static RuntimeMountContext arg_runtime_mounts = {};
9de3cc14 116static SettingsMask arg_settings_mask = 0;
e8ce204d 117static char *arg_firmware = NULL;
258d2694 118static char *arg_forward_journal = NULL;
cf3beb27 119static bool arg_privileged = false;
b0dc7668 120static bool arg_register = false;
ecc4287d 121static bool arg_keep_unit = false;
b0dc7668 122static sd_id128_t arg_uuid = {};
0f25e3e4 123static char **arg_kernel_cmdline_extra = NULL;
1ec3218e 124static char **arg_extra_drives = NULL;
795ec90c 125static char *arg_background = NULL;
2042aebb
SL
126static bool arg_pass_ssh_key = true;
127static char *arg_ssh_key_type = NULL;
edd85c84 128static bool arg_discard_disk = true;
c3dd4e20 129struct ether_addr arg_network_provided_mac = {};
89cecfb1 130static char **arg_smbios11 = NULL;
ef430b84 131static uint64_t arg_grow_image = 0;
6fd25375
LP
132static char *arg_tpm_state_path = NULL;
133static TpmStateMode arg_tpm_state_mode = TPM_STATE_AUTO;
9de3cc14 134
5c57a865 135STATIC_DESTRUCTOR_REGISTER(arg_directory, freep);
9de3cc14 136STATIC_DESTRUCTOR_REGISTER(arg_image, freep);
f72a0856 137STATIC_DESTRUCTOR_REGISTER(arg_machine, freep);
2c0061c7 138STATIC_DESTRUCTOR_REGISTER(arg_cpus, freep);
bd546b9b 139STATIC_DESTRUCTOR_REGISTER(arg_credentials, machine_credential_context_done);
e8ce204d 140STATIC_DESTRUCTOR_REGISTER(arg_firmware, freep);
0f25e3e4 141STATIC_DESTRUCTOR_REGISTER(arg_linux, freep);
811ad9e6 142STATIC_DESTRUCTOR_REGISTER(arg_initrds, strv_freep);
a8f940c4 143STATIC_DESTRUCTOR_REGISTER(arg_runtime_mounts, runtime_mount_context_done);
258d2694 144STATIC_DESTRUCTOR_REGISTER(arg_forward_journal, freep);
0f25e3e4 145STATIC_DESTRUCTOR_REGISTER(arg_kernel_cmdline_extra, strv_freep);
1ec3218e 146STATIC_DESTRUCTOR_REGISTER(arg_extra_drives, strv_freep);
795ec90c 147STATIC_DESTRUCTOR_REGISTER(arg_background, freep);
2042aebb 148STATIC_DESTRUCTOR_REGISTER(arg_ssh_key_type, freep);
89cecfb1 149STATIC_DESTRUCTOR_REGISTER(arg_smbios11, strv_freep);
6fd25375 150STATIC_DESTRUCTOR_REGISTER(arg_tpm_state_path, freep);
9de3cc14
SL
151
152static int help(void) {
153 _cleanup_free_ char *link = NULL;
154 int r;
155
156 pager_open(arg_pager_flags);
157
158 r = terminal_urlify_man("systemd-vmspawn", "1", &link);
159 if (r < 0)
160 return log_oom();
161
162 printf("%1$s [OPTIONS...] [ARGUMENTS...]\n\n"
163 "%5$sSpawn a command or OS in a virtual machine.%6$s\n\n"
7e2c6c74
ZJS
164 " -h --help Show this help\n"
165 " --version Print version string\n"
166 " -q --quiet Do not show status information\n"
167 " --no-pager Do not pipe output into a pager\n"
1d748d7c 168 "\n%3$sImage:%4$s\n"
7e2c6c74
ZJS
169 " -D --directory=PATH Root directory for the VM\n"
170 " -i --image=FILE|DEVICE Root file system disk image or device for the VM\n"
1d748d7c 171 "\n%3$sHost Configuration:%4$s\n"
2c0061c7
LP
172 " --cpus=CPUS Configure number of CPUs in guest\n"
173 " --ram=BYTES Configure guest's RAM size\n"
174 " --kvm=BOOL Enable use of KVM\n"
175 " --vsock=BOOL Override autodetection of VSOCK support\n"
176 " --vsock-cid=CID Specify the CID to use for the guest's VSOCK support\n"
7e2c6c74 177 " --tpm=BOOL Enable use of a virtual TPM\n"
6fd25375
LP
178 " --tpm-state=off|auto|PATH\n"
179 " Where to store TPM state\n"
7e2c6c74
ZJS
180 " --linux=PATH Specify the linux kernel for direct kernel boot\n"
181 " --initrd=PATH Specify the initrd for direct kernel boot\n"
2c0061c7
LP
182 " -n --network-tap Create a TAP device for networking\n"
183 " --network-user-mode Use user mode networking\n"
7e2c6c74
ZJS
184 " --secure-boot=BOOL Enable searching for firmware supporting SecureBoot\n"
185 " --firmware=PATH|list Select firmware definition file (or list available)\n"
edd85c84 186 " --discard-disk=BOOL Control processing of discard requests\n"
ef430b84 187 " -G --grow-image=BYTES Grow image file to specified size in bytes\n"
89cecfb1 188 " -s --smbios11=STRING Pass an arbitrary SMBIOS Type #11 string to the VM\n"
1d748d7c 189 "\n%3$sSystem Identity:%4$s\n"
7e2c6c74 190 " -M --machine=NAME Set the machine name for the VM\n"
b0dc7668
SL
191 " --uuid=UUID Set a specific machine UUID for the VM\n"
192 "\n%3$sProperties:%4$s\n"
193 " --register=BOOLEAN Register VM with systemd-machined\n"
ecc4287d 194 " --keep-unit Don't let systemd-machined allocate scope unit for us\n"
c05ca33a
SL
195 "\n%3$sUser Namespacing:%4$s\n"
196 " --private-users=UIDBASE[:NUIDS]\n"
7e2c6c74
ZJS
197 " Configure the UID/GID range to map into the\n"
198 " virtiofsd namespace\n"
a8f940c4
SL
199 "\n%3$sMounts:%4$s\n"
200 " --bind=SOURCE[:TARGET]\n"
7e2c6c74 201 " Mount a file or directory from the host into the VM\n"
a8f940c4 202 " --bind-ro=SOURCE[:TARGET]\n"
7e2c6c74 203 " Mount a file or directory, but read-only\n"
1ec3218e 204 " --extra-drive=PATH Adds an additional disk to the virtual machine\n"
258d2694
SL
205 "\n%3$sIntegration:%4$s\n"
206 " --forward-journal=FILE|DIR\n"
7e2c6c74 207 " Forward the VM's journal to the host\n"
2042aebb
SL
208 " --pass-ssh-key=BOOL Create an SSH key to access the VM\n"
209 " --ssh-key-type=TYPE Choose what type of SSH key to pass\n"
795ec90c
LP
210 "\n%3$sInput/Output:%4$s\n"
211 " --console=MODE Console mode (interactive, native, gui)\n"
212 " --background=COLOR Set ANSI color for background\n"
1d748d7c 213 "\n%3$sCredentials:%4$s\n"
9de3cc14 214 " --set-credential=ID:VALUE\n"
7e2c6c74 215 " Pass a credential with literal value to the VM\n"
9de3cc14 216 " --load-credential=ID:PATH\n"
7e2c6c74
ZJS
217 " Load credential for the VM from file or AF_UNIX\n"
218 " stream socket.\n"
9de3cc14
SL
219 "\nSee the %2$s for details.\n",
220 program_invocation_short_name,
221 link,
222 ansi_underline(),
223 ansi_normal(),
224 ansi_highlight(),
225 ansi_normal());
226
227 return 0;
228}
229
c3dd4e20
SL
230static int parse_environment(void) {
231 const char *e;
232 int r;
233
234 e = getenv("SYSTEMD_VMSPAWN_NETWORK_MAC");
235 if (e) {
236 r = parse_ether_addr(e, &arg_network_provided_mac);
237 if (r < 0)
238 return log_error_errno(r, "Failed to parse provided MAC address via environment variable");
239 }
240
241 return 0;
242}
243
9de3cc14
SL
244static int parse_argv(int argc, char *argv[]) {
245 enum {
246 ARG_VERSION = 0x100,
247 ARG_NO_PAGER,
2c0061c7
LP
248 ARG_CPUS,
249 ARG_RAM,
250 ARG_KVM,
251 ARG_VSOCK,
f72a0856 252 ARG_VSOCK_CID,
cf3beb27 253 ARG_TPM,
0f25e3e4 254 ARG_LINUX,
88af28d1 255 ARG_INITRD,
9de3cc14 256 ARG_QEMU_GUI,
75331bed 257 ARG_NETWORK_USER_MODE,
b0dc7668
SL
258 ARG_UUID,
259 ARG_REGISTER,
ecc4287d 260 ARG_KEEP_UNIT,
a8f940c4
SL
261 ARG_BIND,
262 ARG_BIND_RO,
1ec3218e 263 ARG_EXTRA_DRIVE,
9de3cc14 264 ARG_SECURE_BOOT,
c05ca33a 265 ARG_PRIVATE_USERS,
258d2694 266 ARG_FORWARD_JOURNAL,
2042aebb
SL
267 ARG_PASS_SSH_KEY,
268 ARG_SSH_KEY_TYPE,
9de3cc14
SL
269 ARG_SET_CREDENTIAL,
270 ARG_LOAD_CREDENTIAL,
e8ce204d 271 ARG_FIRMWARE,
edd85c84 272 ARG_DISCARD_DISK,
795ec90c
LP
273 ARG_CONSOLE,
274 ARG_BACKGROUND,
6fd25375 275 ARG_TPM_STATE,
9de3cc14
SL
276 };
277
278 static const struct option options[] = {
75331bed
SL
279 { "help", no_argument, NULL, 'h' },
280 { "version", no_argument, NULL, ARG_VERSION },
281 { "quiet", no_argument, NULL, 'q' },
282 { "no-pager", no_argument, NULL, ARG_NO_PAGER },
283 { "image", required_argument, NULL, 'i' },
5c57a865 284 { "directory", required_argument, NULL, 'D' },
75331bed 285 { "machine", required_argument, NULL, 'M' },
2c0061c7
LP
286 { "cpus", required_argument, NULL, ARG_CPUS },
287 { "qemu-smp", required_argument, NULL, ARG_CPUS }, /* Compat alias */
288 { "ram", required_argument, NULL, ARG_RAM },
289 { "qemu-mem", required_argument, NULL, ARG_RAM }, /* Compat alias */
290 { "kvm", required_argument, NULL, ARG_KVM },
291 { "qemu-kvm", required_argument, NULL, ARG_KVM }, /* Compat alias */
292 { "vsock", required_argument, NULL, ARG_VSOCK },
293 { "qemu-vsock", required_argument, NULL, ARG_VSOCK }, /* Compat alias */
75331bed
SL
294 { "vsock-cid", required_argument, NULL, ARG_VSOCK_CID },
295 { "tpm", required_argument, NULL, ARG_TPM },
296 { "linux", required_argument, NULL, ARG_LINUX },
297 { "initrd", required_argument, NULL, ARG_INITRD },
795ec90c
LP
298 { "console", required_argument, NULL, ARG_CONSOLE },
299 { "qemu-gui", no_argument, NULL, ARG_QEMU_GUI }, /* compat option */
75331bed
SL
300 { "network-tap", no_argument, NULL, 'n' },
301 { "network-user-mode", no_argument, NULL, ARG_NETWORK_USER_MODE },
b0dc7668
SL
302 { "uuid", required_argument, NULL, ARG_UUID },
303 { "register", required_argument, NULL, ARG_REGISTER },
ecc4287d 304 { "keep-unit", no_argument, NULL, ARG_KEEP_UNIT },
a8f940c4
SL
305 { "bind", required_argument, NULL, ARG_BIND },
306 { "bind-ro", required_argument, NULL, ARG_BIND_RO },
1ec3218e 307 { "extra-drive", required_argument, NULL, ARG_EXTRA_DRIVE },
75331bed 308 { "secure-boot", required_argument, NULL, ARG_SECURE_BOOT },
c05ca33a 309 { "private-users", required_argument, NULL, ARG_PRIVATE_USERS },
258d2694 310 { "forward-journal", required_argument, NULL, ARG_FORWARD_JOURNAL },
2042aebb
SL
311 { "pass-ssh-key", required_argument, NULL, ARG_PASS_SSH_KEY },
312 { "ssh-key-type", required_argument, NULL, ARG_SSH_KEY_TYPE },
75331bed
SL
313 { "set-credential", required_argument, NULL, ARG_SET_CREDENTIAL },
314 { "load-credential", required_argument, NULL, ARG_LOAD_CREDENTIAL },
315 { "firmware", required_argument, NULL, ARG_FIRMWARE },
edd85c84 316 { "discard-disk", required_argument, NULL, ARG_DISCARD_DISK },
795ec90c 317 { "background", required_argument, NULL, ARG_BACKGROUND },
89cecfb1 318 { "smbios11", required_argument, NULL, 's' },
ef430b84 319 { "grow-image", required_argument, NULL, 'G' },
6fd25375 320 { "tpm-state", required_argument, NULL, ARG_TPM_STATE },
9de3cc14
SL
321 {}
322 };
323
324 int c, r;
325
326 assert(argc >= 0);
327 assert(argv);
328
329 optind = 0;
ef430b84 330 while ((c = getopt_long(argc, argv, "+hD:i:M:nqs:G:", options, NULL)) >= 0)
9de3cc14
SL
331 switch (c) {
332 case 'h':
333 return help();
334
335 case ARG_VERSION:
336 return version();
337
dbb2718f
LP
338 case 'q':
339 arg_quiet = true;
340 break;
341
5c57a865 342 case 'D':
a8f940c4 343 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_directory);
5c57a865
SL
344 if (r < 0)
345 return r;
346
347 arg_settings_mask |= SETTING_DIRECTORY;
348 break;
349
9de3cc14
SL
350 case 'i':
351 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_image);
352 if (r < 0)
353 return r;
354
355 arg_settings_mask |= SETTING_DIRECTORY;
356 break;
357
f72a0856
SL
358 case 'M':
359 if (isempty(optarg))
360 arg_machine = mfree(arg_machine);
361 else {
362 if (!hostname_is_valid(optarg, 0))
363 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
364 "Invalid machine name: %s", optarg);
365
366 r = free_and_strdup(&arg_machine, optarg);
367 if (r < 0)
368 return log_oom();
369 }
370 break;
371
9de3cc14
SL
372 case ARG_NO_PAGER:
373 arg_pager_flags |= PAGER_DISABLE;
374 break;
375
2c0061c7
LP
376 case ARG_CPUS:
377 r = free_and_strdup_warn(&arg_cpus, optarg);
d9c4917b
YW
378 if (r < 0)
379 return r;
9de3cc14
SL
380 break;
381
2c0061c7
LP
382 case ARG_RAM:
383 r = parse_size(optarg, 1024, &arg_ram);
9de3cc14 384 if (r < 0)
2c0061c7 385 return log_error_errno(r, "Failed to parse --ram=%s: %m", optarg);
9de3cc14
SL
386 break;
387
2c0061c7
LP
388 case ARG_KVM:
389 r = parse_tristate(optarg, &arg_kvm);
9de3cc14 390 if (r < 0)
2c0061c7 391 return log_error_errno(r, "Failed to parse --kvm=%s: %m", optarg);
9de3cc14
SL
392 break;
393
2c0061c7
LP
394 case ARG_VSOCK:
395 r = parse_tristate(optarg, &arg_vsock);
f72a0856 396 if (r < 0)
2c0061c7 397 return log_error_errno(r, "Failed to parse --vsock=%s: %m", optarg);
f72a0856
SL
398 break;
399
06d4fe57 400 case ARG_VSOCK_CID:
f72a0856 401 if (isempty(optarg))
06d4fe57 402 arg_vsock_cid = VMADDR_CID_ANY;
f72a0856 403 else {
06d4fe57
LP
404 unsigned cid;
405
406 r = vsock_parse_cid(optarg, &cid);
f72a0856 407 if (r < 0)
06d4fe57
LP
408 return log_error_errno(r, "Failed to parse --vsock-cid: %s", optarg);
409 if (!VSOCK_CID_IS_REGULAR(cid))
410 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Specified CID is not regular, refusing: %u", cid);
411
412 arg_vsock_cid = cid;
f72a0856 413 }
f72a0856 414 break;
f72a0856 415
cf3beb27
SL
416 case ARG_TPM:
417 r = parse_tristate(optarg, &arg_tpm);
418 if (r < 0)
419 return log_error_errno(r, "Failed to parse --tpm=%s: %m", optarg);
420 break;
421
0f25e3e4
SL
422 case ARG_LINUX:
423 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_linux);
424 if (r < 0)
425 return r;
426 break;
427
88af28d1 428 case ARG_INITRD: {
811ad9e6
SL
429 _cleanup_free_ char *initrd_path = NULL;
430 r = parse_path_argument(optarg, /* suppress_root= */ false, &initrd_path);
88af28d1
SL
431 if (r < 0)
432 return r;
811ad9e6
SL
433
434 r = strv_consume(&arg_initrds, TAKE_PTR(initrd_path));
435 if (r < 0)
436 return log_oom();
437
88af28d1
SL
438 break;
439 }
440
795ec90c
LP
441 case ARG_CONSOLE:
442 arg_console_mode = console_mode_from_string(optarg);
443 if (arg_console_mode < 0)
444 return log_error_errno(arg_console_mode, "Failed to parse specified console mode: %s", optarg);
445
446 break;
447
9de3cc14 448 case ARG_QEMU_GUI:
795ec90c 449 arg_console_mode = CONSOLE_GUI;
9de3cc14
SL
450 break;
451
75331bed 452 case 'n':
2c0061c7 453 arg_network_stack = NETWORK_STACK_TAP;
75331bed
SL
454 break;
455
456 case ARG_NETWORK_USER_MODE:
2c0061c7 457 arg_network_stack = NETWORK_STACK_USER;
75331bed
SL
458 break;
459
b0dc7668
SL
460 case ARG_UUID:
461 r = id128_from_string_nonzero(optarg, &arg_uuid);
462 if (r == -ENXIO)
463 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Machine UUID may not be all zeroes.");
464 if (r < 0)
465 return log_error_errno(r, "Invalid UUID: %s", optarg);
466
467 arg_settings_mask |= SETTING_MACHINE_ID;
468 break;
469
470 case ARG_REGISTER:
0f856229
SL
471 r = parse_boolean_argument("--register=", optarg, &arg_register);
472 if (r < 0)
b0dc7668 473 return r;
ecc4287d
LP
474
475 break;
476
477 case ARG_KEEP_UNIT:
478 arg_keep_unit = true;
b0dc7668
SL
479 break;
480
a8f940c4
SL
481 case ARG_BIND:
482 case ARG_BIND_RO:
483 r = runtime_mount_parse(&arg_runtime_mounts, optarg, c == ARG_BIND_RO);
484 if (r < 0)
485 return log_error_errno(r, "Failed to parse --bind(-ro)= argument %s: %m", optarg);
486
487 arg_settings_mask |= SETTING_BIND_MOUNTS;
488 break;
489
1ec3218e
SL
490 case ARG_EXTRA_DRIVE: {
491 _cleanup_free_ char *drive_path = NULL;
492
493 r = parse_path_argument(optarg, /* suppress_root= */ false, &drive_path);
494 if (r < 0)
495 return r;
496
497 r = strv_consume(&arg_extra_drives, TAKE_PTR(drive_path));
498 if (r < 0)
499 return log_oom();
500 break;
501 }
502
9de3cc14
SL
503 case ARG_SECURE_BOOT:
504 r = parse_tristate(optarg, &arg_secure_boot);
505 if (r < 0)
506 return log_error_errno(r, "Failed to parse --secure-boot=%s: %m", optarg);
507 break;
508
c05ca33a
SL
509 case ARG_PRIVATE_USERS:
510 r = parse_userns_uid_range(optarg, &arg_uid_shift, &arg_uid_range);
511 if (r < 0)
512 return r;
513 break;
514
258d2694
SL
515 case ARG_FORWARD_JOURNAL:
516 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_forward_journal);
517 if (r < 0)
518 return r;
519 break;
520
2042aebb 521 case ARG_PASS_SSH_KEY:
0f856229 522 r = parse_boolean_argument("--pass-ssh-key=", optarg, &arg_pass_ssh_key);
2042aebb 523 if (r < 0)
0f856229 524 return r;
2042aebb
SL
525 break;
526
527 case ARG_SSH_KEY_TYPE:
528 if (!string_is_safe(optarg))
529 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid value for --arg-ssh-key-type=: %s", optarg);
530
531 r = free_and_strdup_warn(&arg_ssh_key_type, optarg);
532 if (r < 0)
533 return r;
534 break;
535
9de3cc14 536 case ARG_SET_CREDENTIAL: {
bd546b9b 537 r = machine_credential_set(&arg_credentials, optarg);
9de3cc14 538 if (r < 0)
6045958b 539 return r;
9de3cc14
SL
540 arg_settings_mask |= SETTING_CREDENTIALS;
541 break;
542 }
543
544 case ARG_LOAD_CREDENTIAL: {
bd546b9b 545 r = machine_credential_load(&arg_credentials, optarg);
9de3cc14 546 if (r < 0)
6045958b 547 return r;
9de3cc14
SL
548
549 arg_settings_mask |= SETTING_CREDENTIALS;
550 break;
551 }
552
e8ce204d
LP
553 case ARG_FIRMWARE:
554 if (streq(optarg, "list")) {
555 _cleanup_strv_free_ char **l = NULL;
556
557 r = list_ovmf_config(&l);
558 if (r < 0)
559 return log_error_errno(r, "Failed to list firmwares: %m");
560
561 bool nl = false;
562 fputstrv(stdout, l, "\n", &nl);
563 if (nl)
564 putchar('\n');
565
566 return 0;
567 }
568
569 if (!isempty(optarg) && !path_is_absolute(optarg) && !startswith(optarg, "./"))
8c27f1ba 570 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Absolute path or path starting with './' required.");
e8ce204d
LP
571
572 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_firmware);
573 if (r < 0)
574 return r;
575
576 break;
577
edd85c84
SL
578 case ARG_DISCARD_DISK:
579 r = parse_boolean_argument("--discard-disk=", optarg, &arg_discard_disk);
580 if (r < 0)
581 return r;
582 break;
583
795ec90c
LP
584 case ARG_BACKGROUND:
585 r = free_and_strdup_warn(&arg_background, optarg);
586 if (r < 0)
587 return r;
588 break;
589
89cecfb1
LP
590 case 's':
591 if (isempty(optarg)) {
592 arg_smbios11 = strv_free(arg_smbios11);
593 break;
594 }
595
596 if (!utf8_is_valid(optarg))
597 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "SMBIOS Type 11 string is not UTF-8 clean, refusing: %s", optarg);
598
599 if (strv_extend(&arg_smbios11, optarg) < 0)
600 return log_oom();
601
602 break;
603
ef430b84
LP
604 case 'G':
605 if (isempty(optarg)) {
606 arg_grow_image = 0;
607 break;
608 }
609
610 r = parse_size(optarg, 1024, &arg_grow_image);
611 if (r < 0)
ad338e0f 612 return log_error_errno(r, "Failed to parse --grow-image= parameter: %s", optarg);
ef430b84
LP
613
614 break;
615
6fd25375
LP
616 case ARG_TPM_STATE:
617 if (path_is_absolute(optarg) && path_is_valid(optarg)) {
618 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_tpm_state_path);
619 if (r < 0)
620 return r;
621
622 arg_tpm_state_mode = TPM_STATE_PATH;
623 break;
624 }
625
626 r = isempty(optarg) ? false :
627 streq(optarg, "auto") ? true :
628 parse_boolean(optarg);
629 if (r < 0)
630 return log_error_errno(r, "Failed to parse --tpm-state= parameter: %s", optarg);
631
632 arg_tpm_state_mode = r ? TPM_STATE_AUTO : TPM_STATE_OFF;
633 arg_tpm_state_path = mfree(arg_tpm_state_path);
634 break;
635
9de3cc14
SL
636 case '?':
637 return -EINVAL;
638
639 default:
640 assert_not_reached();
641 }
642
643 if (argc > optind) {
0f25e3e4
SL
644 arg_kernel_cmdline_extra = strv_copy(argv + optind);
645 if (!arg_kernel_cmdline_extra)
9de3cc14
SL
646 return log_oom();
647
648 arg_settings_mask |= SETTING_START_MODE;
649 }
650
651 return 1;
652}
653
f72a0856 654static int open_vsock(void) {
f72a0856
SL
655 static const union sockaddr_union bind_addr = {
656 .vm.svm_family = AF_VSOCK,
657 .vm.svm_cid = VMADDR_CID_ANY,
658 .vm.svm_port = VMADDR_PORT_ANY,
659 };
660
8c6a7fa6 661 _cleanup_close_ int vsock_fd = socket(AF_VSOCK, SOCK_STREAM|SOCK_CLOEXEC, 0);
f72a0856
SL
662 if (vsock_fd < 0)
663 return log_error_errno(errno, "Failed to open AF_VSOCK socket: %m");
664
8c6a7fa6 665 if (bind(vsock_fd, &bind_addr.sa, sizeof(bind_addr.vm)) < 0)
cf9de8ef 666 return log_error_errno(errno, "Failed to bind to VSOCK address %u:%u: %m", bind_addr.vm.svm_cid, bind_addr.vm.svm_port);
f72a0856 667
8c6a7fa6 668 if (listen(vsock_fd, SOMAXCONN_DELUXE) < 0)
cf9de8ef 669 return log_error_errno(errno, "Failed to listen on VSOCK: %m");
f72a0856
SL
670
671 return TAKE_FD(vsock_fd);
672}
673
862c68a9
LP
674typedef struct NotifyConnectionData {
675 char buffer[NOTIFY_BUFFER_MAX+1];
676 size_t full;
677 int *exit_status;
678} NotifyConnectionData;
f72a0856 679
862c68a9
LP
680static int read_vsock_notify(NotifyConnectionData *d, int fd) {
681 int r;
f72a0856 682
862c68a9
LP
683 assert(d);
684 assert(fd >= 0);
685
686 for (;;) {
687 assert(d->full < sizeof(d->buffer));
688
689 ssize_t n = read(fd, d->buffer + d->full, sizeof(d->buffer) - d->full);
690 if (n < 0) {
691 if (ERRNO_IS_TRANSIENT(errno))
692 return 0;
693
694 return log_error_errno(errno, "Failed to read notification message: %m");
695 }
696 if (n == 0) /* We hit EOF! Let's parse this */
697 break;
698
a30ac8d5 699 if ((size_t) n >= sizeof(d->buffer) - d->full)
862c68a9 700 return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Received notify message exceeded maximum size.");
a30ac8d5
YW
701
702 d->full += n;
f72a0856
SL
703 }
704
862c68a9
LP
705 /* We reached EOF, now parse the thing */
706 assert(d->full < sizeof(d->buffer));
707 d->buffer[d->full] = 0;
708
709 _cleanup_strv_free_ char **tags = strv_split(d->buffer, "\n\r");
f72a0856
SL
710 if (!tags)
711 return log_oom();
712
862c68a9
LP
713 if (DEBUG_LOGGING) {
714 _cleanup_free_ char *j = strv_join(tags, " ");
715 log_debug("Received notification message with tags: %s", strnull(j));
716 }
f72a0856
SL
717
718 if (strv_contains(tags, "READY=1")) {
862c68a9 719 r = sd_notify(false, "READY=1");
f72a0856
SL
720 if (r < 0)
721 log_warning_errno(r, "Failed to send readiness notification, ignoring: %m");
722 }
723
862c68a9 724 const char *p = strv_find_startswith(tags, "STATUS=");
f72a0856
SL
725 if (p)
726 (void) sd_notifyf(false, "STATUS=VM running: %s", p);
727
728 p = strv_find_startswith(tags, "EXIT_STATUS=");
729 if (p) {
862c68a9 730 r = safe_atoi(p, d->exit_status);
f72a0856
SL
731 if (r < 0)
732 log_warning_errno(r, "Failed to parse exit status from %s, ignoring: %m", p);
733 }
734
862c68a9
LP
735 return 1; /* done */
736}
737
738static int vmspawn_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
739 NotifyConnectionData *d = ASSERT_PTR(userdata);
740 int r;
741
742 assert(source);
743 assert(fd >= 0);
744
745 r = read_vsock_notify(d, fd);
746 if (r != 0) {
747 int q;
748
749 /* If we are done or are seeing an error we'll turn off floating mode, which means the event
6b8e373c 750 * loop itself won't keep the event source pinned anymore, and since no one else (hopefully!)
862c68a9
LP
751 * keeps a reference anymore the whole thing will be released once we exit from this handler
752 * here. */
753
754 q = sd_event_source_set_floating(source, false);
755 if (q < 0)
756 log_warning_errno(q, "Failed to disable floating mode of event source, ignoring: %m");
757
758 return r;
759 }
f72a0856
SL
760
761 return 0;
762}
763
764static int vmspawn_dispatch_vsock_connections(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
f72a0856 765 _cleanup_close_ int conn_fd = -EBADF;
862c68a9
LP
766 sd_event *event;
767 int r;
f72a0856
SL
768
769 assert(userdata);
770
771 if (revents != EPOLLIN) {
cf9de8ef 772 log_warning("Got unexpected poll event for VSOCK fd.");
f72a0856
SL
773 return 0;
774 }
775
776 conn_fd = accept4(fd, NULL, NULL, SOCK_CLOEXEC|SOCK_NONBLOCK);
777 if (conn_fd < 0) {
862c68a9
LP
778 if (ERRNO_IS_TRANSIENT(errno))
779 return 0;
780
781 log_warning_errno(errno, "Failed to accept connection from VSOCK connection, ignoring: %m");
f72a0856
SL
782 return 0;
783 }
784
785 event = sd_event_source_get_event(source);
786 if (!event)
787 return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Failed to retrieve event from event source, exiting task");
788
862c68a9
LP
789 _cleanup_free_ NotifyConnectionData *d = new(NotifyConnectionData, 1);
790 if (!d)
791 return log_oom();
792
793 *d = (NotifyConnectionData) {
794 .exit_status = userdata,
795 };
796
f72a0856 797 /* add a new floating task to read from the connection */
862c68a9
LP
798 _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL;
799 r = sd_event_add_io(event, &s, conn_fd, EPOLLIN, vmspawn_dispatch_notify_fd, d);
f72a0856
SL
800 if (r < 0)
801 return log_error_errno(r, "Failed to allocate notify connection event source: %m");
802
862c68a9
LP
803 r = sd_event_source_set_io_fd_own(s, true);
804 if (r < 0)
805 return log_error_errno(r, "Failed to pass ownership of notify to event source: %m");
806 TAKE_FD(conn_fd); /* conn_fd is now owned by the event loop so don't clean it up */
807
808 r = sd_event_source_set_destroy_callback(s, free);
809 if (r < 0)
810 return log_error_errno(r, "Failed to set destroy callback on event source: %m");
811 TAKE_PTR(d); /* The data object will now automatically be freed by the event source when it goes away */
812
813 /* Finally, make sure the event loop pins the event source */
814 r = sd_event_source_set_floating(s, true);
815 if (r < 0)
816 return log_error_errno(r, "Failed to set event source to floating mode: %m");
817
818 (void) sd_event_source_set_description(s, "vmspawn-notify-socket-connection");
f72a0856
SL
819
820 return 0;
821}
822
400da3e4 823static int setup_notify_parent(sd_event *event, int fd, int *exit_status, sd_event_source **ret_notify_event_source) {
f72a0856
SL
824 int r;
825
400da3e4
LP
826 assert(event);
827 assert(fd >= 0);
828 assert(exit_status);
829 assert(ret_notify_event_source);
830
831 r = sd_event_add_io(event, ret_notify_event_source, fd, EPOLLIN, vmspawn_dispatch_vsock_connections, exit_status);
f72a0856
SL
832 if (r < 0)
833 return log_error_errno(r, "Failed to allocate notify socket event source: %m");
834
862c68a9 835 (void) sd_event_source_set_description(*ret_notify_event_source, "vmspawn-notify-socket-listen");
f72a0856
SL
836
837 return 0;
838}
839
521e9286
SL
840static int bus_open_in_machine(sd_bus **ret, unsigned cid, unsigned port, const char *private_key_path) {
841 _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
842 _cleanup_free_ char *ssh_escaped = NULL, *bus_address = NULL;
843 char port_str[DECIMAL_STR_MAX(unsigned)], cid_str[DECIMAL_STR_MAX(unsigned)];
844 int r;
845
846 assert(ret);
847 assert(private_key_path);
848
849 r = sd_bus_new(&bus);
850 if (r < 0)
851 return r;
852
853 const char *ssh = secure_getenv("SYSTEMD_SSH") ?: "ssh";
854 ssh_escaped = bus_address_escape(ssh);
855 if (!ssh_escaped)
856 return -ENOMEM;
857
858 xsprintf(port_str, "%u", port);
859 xsprintf(cid_str, "%u", cid);
860
861 bus_address = strjoin(
862 "unixexec:path=", ssh_escaped,
863 /* -x: Disable X11 forwarding
864 * -T: Disable PTY allocation */
865 ",argv1=-xT",
866 ",argv2=-o,argv3=IdentitiesOnly yes",
867 ",argv4=-o,argv5=IdentityFile=", private_key_path,
868 ",argv6=-p,argv7=", port_str,
869 ",argv8=--",
870 ",argv9=root@vsock/", cid_str,
871 ",argv10=systemd-stdio-bridge"
872 );
873 if (!bus_address)
874 return -ENOMEM;
875
876 free_and_replace(bus->address, bus_address);
877 bus->bus_client = true;
878 bus->trusted = true;
879 bus->runtime_scope = RUNTIME_SCOPE_SYSTEM;
880 bus->is_local = false;
881
882 r = sd_bus_start(bus);
883 if (r < 0)
884 return r;
885
886 *ret = TAKE_PTR(bus);
887 return 0;
888}
889
305e5893 890static int shutdown_vm_graceful(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
521e9286
SL
891 _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
892 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
305e5893
SL
893 ShutdownInfo *shutdown_info = ASSERT_PTR(userdata);
894 SSHInfo *ssh_info = ASSERT_PTR(shutdown_info->ssh_info);
521e9286
SL
895 int r;
896
897 assert(s);
898 assert(si);
899
305e5893
SL
900 /* If we don't have the vsock address and the SSH key, go to fallback */
901 if (ssh_info->cid == VMADDR_CID_ANY || !ssh_info->private_key_path)
902 goto fallback;
903
904 /*
905 * In order we should try:
906 * 1. PowerOff from logind respects inhibitors but might not be available
907 * 2. PowerOff from systemd heavy handed but should always be available
908 * 3. SIGKILL qemu (this waits for qemu to die still)
909 * 4. kill ourselves by shutting down our event loop (this does not wait for qemu)
910 */
911
521e9286 912 r = bus_open_in_machine(&bus, ssh_info->cid, ssh_info->port, ssh_info->private_key_path);
305e5893
SL
913 if (r < 0) {
914 log_warning_errno(r, "Failed to connect to VM to forward signal, ignoring: %m");
915 goto fallback;
916 }
521e9286 917
9d5f05ae 918 r = bus_call_method(bus, bus_login_mgr, "PowerOff", &error, /* ret_reply= */ NULL, "b", false);
305e5893
SL
919 if (r >= 0) {
920 log_info("Requested powering off VM through D-Bus.");
921 return 0;
922 }
521e9286 923
305e5893
SL
924 log_warning_errno(r, "Failed to shutdown VM via logind, ignoring: %s", bus_error_message(&error, r));
925 sd_bus_error_free(&error);
521e9286 926
9d5f05ae 927 r = bus_call_method(bus, bus_systemd_mgr, "PowerOff", &error, /* ret_reply= */ NULL, /* types= */ NULL);
305e5893
SL
928 if (r >= 0) {
929 log_info("Requested powering off VM through D-Bus.");
930 return 0;
931 }
521e9286 932
305e5893 933 log_warning_errno(r, "Failed to shutdown VM via systemd, ignoring: %s", bus_error_message(&error, r));
521e9286 934
305e5893
SL
935fallback:
936 /* at this point SSH clearly isn't working so don't try it again */
937 TAKE_STRUCT(*ssh_info);
938
939 /* Backup method to shut down the VM when D-BUS access over SSH is not available */
940 if (shutdown_info->pidref) {
941 r = pidref_kill(shutdown_info->pidref, SIGKILL);
942 if (r < 0)
943 log_warning_errno(r, "Failed to kill qemu, terminating: %m");
944 else {
945 TAKE_PTR(shutdown_info->pidref);
946 log_info("Trying to halt qemu. Send SIGTERM again to trigger vmspawn to immediately terminate.");
947 return 0;
948 }
949 }
950
951 return sd_event_exit(sd_event_source_get_event(s), 0);
521e9286
SL
952}
953
f72a0856
SL
954static int on_child_exit(sd_event_source *s, const siginfo_t *si, void *userdata) {
955 sd_event_exit(sd_event_source_get_event(s), 0);
956 return 0;
957}
958
959static int cmdline_add_vsock(char ***cmdline, int vsock_fd) {
960 int r;
961
962 r = strv_extend(cmdline, "-smbios");
963 if (r < 0)
964 return r;
965
966 union sockaddr_union addr;
967 socklen_t addr_len = sizeof addr.vm;
fab0f6e0 968 if (getsockname(vsock_fd, &addr.sa, &addr_len) < 0)
f72a0856 969 return -errno;
fab0f6e0 970
f72a0856
SL
971 assert(addr_len >= sizeof addr.vm);
972 assert(addr.vm.svm_family == AF_VSOCK);
973
f72a0856
SL
974 r = strv_extendf(cmdline, "type=11,value=io.systemd.credential:vmm.notify_socket=vsock-stream:%u:%u", (unsigned) VMADDR_CID_HOST, addr.vm.svm_port);
975 if (r < 0)
976 return r;
977
978 return 0;
979}
980
dde4c13a
LP
981static int cmdline_add_kernel_cmdline(char ***cmdline, const char *kernel) {
982 assert(cmdline);
983
984 if (strv_isempty(arg_kernel_cmdline_extra))
985 return 0;
986
987 _cleanup_free_ char *kcl = strv_join(arg_kernel_cmdline_extra, " ");
988 if (!kcl)
989 return log_oom();
990
991 if (kernel) {
992 if (strv_extend_many(cmdline, "-append", kcl) < 0)
993 return log_oom();
994 } else {
995 if (!ARCHITECTURE_SUPPORTS_SMBIOS) {
996 log_warning("Cannot append extra args to kernel cmdline, native architecture doesn't support SMBIOS, ignoring.");
997 return 0;
998 }
999
1000 _cleanup_free_ char *escaped_kcl = NULL;
1001 escaped_kcl = escape_qemu_value(kcl);
1002 if (!escaped_kcl)
1003 return log_oom();
1004
1005 if (strv_extend(cmdline, "-smbios") < 0)
1006 return log_oom();
1007
1008 if (strv_extendf(cmdline, "type=11,value=io.systemd.stub.kernel-cmdline-extra=%s", escaped_kcl) < 0)
1009 return log_oom();
1010
1011 if (strv_extend(cmdline, "-smbios") < 0)
1012 return log_oom();
1013
1014 if (strv_extendf(cmdline, "type=11,value=io.systemd.boot.kernel-cmdline-extra=%s", escaped_kcl) < 0)
1015 return log_oom();
1016 }
1017
1018 return 0;
1019}
1020
89cecfb1
LP
1021static int cmdline_add_smbios11(char ***cmdline) {
1022 assert(cmdline);
1023
1024 if (strv_isempty(arg_smbios11))
1025 return 0;
1026
1027 if (!ARCHITECTURE_SUPPORTS_SMBIOS) {
1028 log_warning("Cannot issue SMBIOS Type #11 strings, native architecture doesn't support SMBIOS, ignoring.");
1029 return 0;
1030 }
1031
1032 STRV_FOREACH(i, arg_smbios11) {
1033 _cleanup_free_ char *escaped = NULL;
1034 escaped = escape_qemu_value(*i);
1035 if (!escaped)
1036 return log_oom();
1037
1038 if (strv_extend(cmdline, "-smbios") < 0)
1039 return log_oom();
1040
1041 if (strv_extendf(cmdline, "type=11,value=%s", escaped) < 0)
1042 return log_oom();
1043 }
1044
1045 return 0;
1046}
1047
d90a05b6
LP
1048static int start_tpm(
1049 sd_bus *bus,
1050 const char *scope,
1051 const char *swtpm,
6fd25375
LP
1052 const char *runtime_dir,
1053 char **ret_listen_address) {
d90a05b6 1054
cf3beb27
SL
1055 int r;
1056
1057 assert(bus);
1058 assert(scope);
d90a05b6 1059 assert(swtpm);
6fd25375 1060 assert(runtime_dir);
cf3beb27 1061
6fd25375 1062 _cleanup_free_ char *scope_prefix = NULL;
cf3beb27
SL
1063 r = unit_name_to_prefix(scope, &scope_prefix);
1064 if (r < 0)
1065 return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
1066
6fd25375
LP
1067 _cleanup_(socket_service_pair_done) SocketServicePair ssp = {
1068 .socket_type = SOCK_STREAM,
1069 };
1070
cf3beb27
SL
1071 ssp.unit_name_prefix = strjoin(scope_prefix, "-tpm");
1072 if (!ssp.unit_name_prefix)
1073 return log_oom();
1074
6fd25375
LP
1075 ssp.listen_address = path_join(runtime_dir, "tpm.sock");
1076 if (!ssp.listen_address)
cf3beb27
SL
1077 return log_oom();
1078
6fd25375
LP
1079 _cleanup_free_ char *transient_state_dir = NULL;
1080 const char *state_dir;
1081 if (arg_tpm_state_path)
1082 state_dir = arg_tpm_state_path;
1083 else {
1084 transient_state_dir = path_join(runtime_dir, ssp.unit_name_prefix);
1085 if (!transient_state_dir)
cf3beb27 1086 return log_oom();
6fd25375
LP
1087
1088 state_dir = transient_state_dir;
cf3beb27
SL
1089 }
1090
6fd25375
LP
1091 r = mkdir_p(state_dir, 0700);
1092 if (r < 0)
1093 return log_error_errno(r, "Failed to create TPM state directory '%s': %m", state_dir);
cf3beb27 1094
519bad6c
LP
1095 _cleanup_free_ char *swtpm_setup = NULL;
1096 r = find_executable("swtpm_setup", &swtpm_setup);
1097 if (r < 0)
1098 return log_error_errno(r, "Failed to find swtpm_setup binary: %m");
1099
6fd25375 1100 ssp.exec_start_pre = strv_new(swtpm_setup, "--tpm-state", state_dir, "--tpm2", "--pcr-banks", "sha256", "--not-overwrite");
519bad6c
LP
1101 if (!ssp.exec_start_pre)
1102 return log_oom();
1103
d90a05b6 1104 ssp.exec_start = strv_new(swtpm, "socket", "--tpm2", "--tpmstate");
cf3beb27
SL
1105 if (!ssp.exec_start)
1106 return log_oom();
1107
1108 r = strv_extendf(&ssp.exec_start, "dir=%s", state_dir);
1109 if (r < 0)
1110 return log_oom();
1111
1112 r = strv_extend_many(&ssp.exec_start, "--ctrl", "type=unixio,fd=3");
1113 if (r < 0)
1114 return log_oom();
1115
1116 r = start_socket_service_pair(bus, scope, &ssp);
1117 if (r < 0)
1118 return r;
1119
6fd25375
LP
1120 if (ret_listen_address)
1121 *ret_listen_address = TAKE_PTR(ssp.listen_address);
1122
cf3beb27
SL
1123 return 0;
1124}
1125
6fd25375
LP
1126static int start_systemd_journal_remote(
1127 sd_bus *bus,
1128 const char *scope,
1129 unsigned port,
1130 const char *sd_journal_remote,
1131 char **ret_listen_address) {
1132
258d2694
SL
1133 int r;
1134
1135 assert(bus);
1136 assert(scope);
1137 assert(sd_journal_remote);
1138
6fd25375 1139 _cleanup_free_ char *scope_prefix = NULL;
258d2694
SL
1140 r = unit_name_to_prefix(scope, &scope_prefix);
1141 if (r < 0)
1142 return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
1143
6fd25375
LP
1144 _cleanup_(socket_service_pair_done) SocketServicePair ssp = {
1145 .socket_type = SOCK_STREAM,
1146 };
1147
258d2694
SL
1148 ssp.unit_name_prefix = strjoin(scope_prefix, "-forward-journal");
1149 if (!ssp.unit_name_prefix)
1150 return log_oom();
1151
6fd25375 1152 if (asprintf(&ssp.listen_address, "vsock:2:%u", port) < 0)
258d2694
SL
1153 return log_oom();
1154
f607dd58
YW
1155 ssp.exec_start = strv_new(
1156 sd_journal_remote,
258d2694
SL
1157 "--output", arg_forward_journal,
1158 "--split-mode", endswith(arg_forward_journal, ".journal") ? "none" : "host");
1159 if (!ssp.exec_start)
1160 return log_oom();
1161
1162 r = start_socket_service_pair(bus, scope, &ssp);
1163 if (r < 0)
1164 return r;
1165
f607dd58
YW
1166 if (ret_listen_address)
1167 *ret_listen_address = TAKE_PTR(ssp.listen_address);
258d2694
SL
1168
1169 return 0;
1170}
1171
19301e76
SL
1172static int discover_root(char **ret) {
1173 int r;
1174 _cleanup_(dissected_image_unrefp) DissectedImage *image = NULL;
1175 _cleanup_free_ char *root = NULL;
1176
1177 assert(ret);
1178
1179 r = dissect_image_file_and_warn(
1180 arg_image,
1181 /* verity= */ NULL,
1182 /* mount_options= */ NULL,
1183 /* image_policy= */ NULL,
f1395724 1184 /* image_filter= */ NULL,
19301e76
SL
1185 /* flags= */ 0,
1186 &image);
1187 if (r < 0)
1188 return r;
1189
1190 if (image->partitions[PARTITION_ROOT].found)
1191 root = strjoin("root=PARTUUID=", SD_ID128_TO_UUID_STRING(image->partitions[PARTITION_ROOT].uuid));
1192 else if (image->partitions[PARTITION_USR].found)
1193 root = strjoin("mount.usr=PARTUUID=", SD_ID128_TO_UUID_STRING(image->partitions[PARTITION_USR].uuid));
1194 else
1195 return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Cannot perform a direct kernel boot without a root or usr partition, refusing");
1196
1197 if (!root)
1198 return log_oom();
1199
1200 *ret = TAKE_PTR(root);
5c57a865
SL
1201 return 0;
1202}
1203
1204static int find_virtiofsd(char **ret) {
1205 int r;
1206 _cleanup_free_ char *virtiofsd = NULL;
1207
1208 assert(ret);
1209
1210 r = find_executable("virtiofsd", &virtiofsd);
1211 if (r < 0 && r != -ENOENT)
1212 return log_error_errno(r, "Error while searching for virtiofsd: %m");
1213
1214 if (!virtiofsd) {
1215 FOREACH_STRING(file, "/usr/libexec/virtiofsd", "/usr/lib/virtiofsd") {
1216 if (access(file, X_OK) >= 0) {
1217 virtiofsd = strdup(file);
1218 if (!virtiofsd)
1219 return log_oom();
1220 break;
1221 }
1222
1223 if (!IN_SET(errno, ENOENT, EACCES))
1224 return log_error_errno(errno, "Error while searching for virtiofsd: %m");
1225 }
1226 }
1227
1228 if (!virtiofsd)
1229 return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Failed to find virtiofsd binary.");
1230
1231 *ret = TAKE_PTR(virtiofsd);
1232 return 0;
1233}
1234
6fd25375
LP
1235static int start_virtiofsd(
1236 sd_bus *bus,
1237 const char *scope,
1238 const char *directory,
1239 bool uidmap,
1240 const char *runtime_dir,
1241 char **ret_listen_address) {
1242
5c57a865
SL
1243 static unsigned virtiofsd_instance = 0;
1244 int r;
1245
1246 assert(bus);
1247 assert(scope);
1248 assert(directory);
6fd25375 1249 assert(runtime_dir);
5c57a865 1250
6fd25375 1251 _cleanup_free_ char *virtiofsd = NULL;
5c57a865
SL
1252 r = find_virtiofsd(&virtiofsd);
1253 if (r < 0)
1254 return r;
1255
6fd25375 1256 _cleanup_free_ char *scope_prefix = NULL;
5c57a865
SL
1257 r = unit_name_to_prefix(scope, &scope_prefix);
1258 if (r < 0)
1259 return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
1260
6fd25375
LP
1261 _cleanup_(socket_service_pair_done) SocketServicePair ssp = {
1262 .socket_type = SOCK_STREAM,
1263 };
5c57a865 1264
6fd25375 1265 if (asprintf(&ssp.unit_name_prefix, "%s-virtiofsd-%u", scope_prefix, virtiofsd_instance++) < 0)
5c57a865
SL
1266 return log_oom();
1267
6fd25375 1268 if (asprintf(&ssp.listen_address, "%s/sock-%"PRIx64, runtime_dir, random_u64()) < 0)
5c57a865
SL
1269 return log_oom();
1270
1271 /* QEMU doesn't support submounts so don't announce them */
1272 ssp.exec_start = strv_new(virtiofsd, "--shared-dir", directory, "--xattr", "--fd", "3", "--no-announce-submounts");
1273 if (!ssp.exec_start)
1274 return log_oom();
1275
a8f940c4 1276 if (uidmap && arg_uid_shift != UID_INVALID) {
c05ca33a
SL
1277 r = strv_extend(&ssp.exec_start, "--uid-map");
1278 if (r < 0)
1279 return log_oom();
1280
1281 r = strv_extendf(&ssp.exec_start, ":0:" UID_FMT ":" UID_FMT ":", arg_uid_shift, arg_uid_range);
1282 if (r < 0)
1283 return log_oom();
1284
1285 r = strv_extend(&ssp.exec_start, "--gid-map");
1286 if (r < 0)
1287 return log_oom();
1288
1289 r = strv_extendf(&ssp.exec_start, ":0:" GID_FMT ":" GID_FMT ":", arg_uid_shift, arg_uid_range);
1290 if (r < 0)
1291 return log_oom();
1292 }
1293
5c57a865
SL
1294 r = start_socket_service_pair(bus, scope, &ssp);
1295 if (r < 0)
1296 return r;
1297
6fd25375
LP
1298 if (ret_listen_address)
1299 *ret_listen_address = TAKE_PTR(ssp.listen_address);
19301e76
SL
1300
1301 return 0;
1302}
1303
1304static int kernel_cmdline_maybe_append_root(void) {
1305 int r;
1306 bool cmdline_contains_root = strv_find_startswith(arg_kernel_cmdline_extra, "root=")
1307 || strv_find_startswith(arg_kernel_cmdline_extra, "mount.usr=");
1308
1309 if (!cmdline_contains_root) {
1310 _cleanup_free_ char *root = NULL;
1311
1312 r = discover_root(&root);
1313 if (r < 0)
1314 return r;
1315
1316 log_debug("Determined root file system %s from dissected image", root);
1317
1318 r = strv_consume(&arg_kernel_cmdline_extra, TAKE_PTR(root));
1319 if (r < 0)
1320 return log_oom();
1321 }
1322
1323 return 0;
1324}
1325
6af6d442
SL
1326static int discover_boot_entry(const char *root, char **ret_linux, char ***ret_initrds) {
1327 _cleanup_(boot_config_free) BootConfig config = BOOT_CONFIG_NULL;
1328 _cleanup_free_ char *esp_path = NULL, *xbootldr_path = NULL;
1329 int r;
1330
1331 assert(root);
1332 assert(ret_linux);
1333 assert(ret_initrds);
1334
1335 esp_path = path_join(root, "efi");
1336 if (!esp_path)
1337 return log_oom();
1338
1339 xbootldr_path = path_join(root, "boot");
1340 if (!xbootldr_path)
1341 return log_oom();
1342
1343 r = boot_config_load(&config, esp_path, xbootldr_path);
1344 if (r < 0)
1345 return r;
1346
1347 r = boot_config_select_special_entries(&config, /* skip_efivars= */ true);
1348 if (r < 0)
1349 return log_error_errno(r, "Failed to find special boot config entries: %m");
1350
1351 const BootEntry *boot_entry = boot_config_default_entry(&config);
1352
ec3d2f2e 1353 if (boot_entry && !IN_SET(boot_entry->type, BOOT_ENTRY_UNIFIED, BOOT_ENTRY_CONF))
6af6d442
SL
1354 boot_entry = NULL;
1355
1356 /* If we cannot determine a default entry search for UKIs (Type #2 EFI Unified Kernel Images)
1357 * then .conf files (Type #1 Boot Loader Specification Entries).
1358 * https://uapi-group.org/specifications/specs/boot_loader_specification */
1359 if (!boot_entry)
1360 FOREACH_ARRAY(entry, config.entries, config.n_entries)
1361 if (entry->type == BOOT_ENTRY_UNIFIED) {
1362 boot_entry = entry;
1363 break;
1364 }
1365
1366 if (!boot_entry)
1367 FOREACH_ARRAY(entry, config.entries, config.n_entries)
1368 if (entry->type == BOOT_ENTRY_CONF) {
1369 boot_entry = entry;
1370 break;
1371 }
1372
1373 if (!boot_entry)
1374 return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Failed to discover any boot entries.");
1375
1376 log_debug("Discovered boot entry %s (%s)", boot_entry->id, boot_entry_type_to_string(boot_entry->type));
1377
1378 _cleanup_free_ char *linux_kernel = NULL;
1379 _cleanup_strv_free_ char **initrds = NULL;
1380 if (boot_entry->type == BOOT_ENTRY_UNIFIED) {
1381 linux_kernel = path_join(boot_entry->root, boot_entry->kernel);
1382 if (!linux_kernel)
1383 return log_oom();
1384 } else if (boot_entry->type == BOOT_ENTRY_CONF) {
1385 linux_kernel = path_join(boot_entry->root, boot_entry->kernel);
1386 if (!linux_kernel)
1387 return log_oom();
1388
1389 STRV_FOREACH(initrd, boot_entry->initrd) {
1390 _cleanup_free_ char *initrd_path = path_join(boot_entry->root, *initrd);
1391 if (!initrd_path)
1392 return log_oom();
1393
1394 r = strv_consume(&initrds, TAKE_PTR(initrd_path));
1395 if (r < 0)
1396 return log_oom();
1397 }
1398 } else
1399 assert_not_reached();
1400
1401 *ret_linux = TAKE_PTR(linux_kernel);
1402 *ret_initrds = TAKE_PTR(initrds);
1403
1404 return 0;
1405}
1406
811ad9e6
SL
1407static int merge_initrds(char **ret) {
1408 _cleanup_(rm_rf_physical_and_freep) char *merged_initrd = NULL;
1409 _cleanup_close_ int ofd = -EBADF;
1410 int r;
1411
1412 assert(ret);
1413
1414 r = tempfn_random_child(NULL, "vmspawn-initrd-", &merged_initrd);
1415 if (r < 0)
1416 return log_error_errno(r, "Failed to create temporary file: %m");
1417
1418 ofd = open(merged_initrd, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0600);
1419 if (ofd < 0)
1420 return log_error_errno(errno, "Failed to create regular file %s: %m", merged_initrd);
1421
811ad9e6
SL
1422 STRV_FOREACH(i, arg_initrds) {
1423 _cleanup_close_ int ifd = -EBADF;
8c3f9888
YW
1424 off_t off, to_seek;
1425
1426 off = lseek(ofd, 0, SEEK_CUR);
1427 if (off < 0)
1428 return log_error_errno(errno, "Failed to get file offset of %s: %m", merged_initrd);
1429
1430 to_seek = (4 - (off % 4)) % 4;
811ad9e6
SL
1431
1432 /* seek to assure 4 byte alignment for each initrd */
1433 if (to_seek != 0 && lseek(ofd, to_seek, SEEK_CUR) < 0)
1434 return log_error_errno(errno, "Failed to seek %s: %m", merged_initrd);
1435
1436 ifd = open(*i, O_RDONLY|O_CLOEXEC);
1437 if (ifd < 0)
1438 return log_error_errno(errno, "Failed to open %s: %m", *i);
1439
1440 r = copy_bytes(ifd, ofd, UINT64_MAX, COPY_REFLINK);
1441 if (r < 0)
1442 return log_error_errno(r, "Failed to copy bytes from %s to %s: %m", *i, merged_initrd);
1443 }
1444
1445 *ret = TAKE_PTR(merged_initrd);
1446 return 0;
1447}
1448
2042aebb
SL
1449static int generate_ssh_keypair(const char *key_path, const char *key_type) {
1450 _cleanup_free_ char *ssh_keygen = NULL;
1451 _cleanup_strv_free_ char **cmdline = NULL;
1452 int r;
1453
1454 assert(key_path);
1455
1456 r = find_executable("ssh-keygen", &ssh_keygen);
1457 if (r < 0)
1458 return log_error_errno(r, "Failed to find ssh-keygen: %m");
1459
1460 cmdline = strv_new(ssh_keygen, "-f", key_path, /* don't encrypt the key */ "-N", "");
1461 if (!cmdline)
1462 return log_oom();
1463
1464 if (key_type) {
1465 r = strv_extend_many(&cmdline, "-t", key_type);
1466 if (r < 0)
1467 return log_oom();
1468 }
1469
1470 if (DEBUG_LOGGING) {
1471 _cleanup_free_ char *joined = quote_command_line(cmdline, SHELL_ESCAPE_EMPTY);
1472 if (!joined)
1473 return log_oom();
1474
1475 log_debug("Executing: %s", joined);
1476 }
1477
1478 r = safe_fork(
1479 ssh_keygen,
1480 FORK_WAIT|FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_LOG|FORK_RLIMIT_NOFILE_SAFE|FORK_REARRANGE_STDIO,
1481 NULL);
1482 if (r < 0)
1483 return r;
1484 if (r == 0) {
1485 execv(ssh_keygen, cmdline);
1486 log_error_errno(errno, "Failed to execve %s: %m", ssh_keygen);
1487 _exit(EXIT_FAILURE);
1488 }
1489
1490 return 0;
1491}
795ec90c 1492
ef430b84
LP
1493static int grow_image(const char *path, uint64_t size) {
1494 int r;
1495
1496 assert(path);
1497
1498 if (size == 0)
1499 return 0;
1500
1501 /* Round up to multiple of 4K */
1502 size = DIV_ROUND_UP(size, 4096);
1503 if (size > UINT64_MAX / 4096)
1504 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Specified file size too large, refusing.");
1505 size *= 4096;
1506
1507 _cleanup_close_ int fd = xopenat_full(AT_FDCWD, path, O_RDWR|O_CLOEXEC, XO_REGULAR, /* mode= */ 0);
1508 if (fd < 0)
1509 return log_error_errno(fd, "Failed to open image file '%s': %m", path);
1510
1511 struct stat st;
1512 if (fstat(fd, &st) < 0)
1513 return log_error_errno(errno, "Failed to stat '%s': %m", path);
1514 if ((uint64_t) st.st_size >= size) {
1515 log_debug("Not growing image '%s' to %s, size already at %s.", path,
1516 FORMAT_BYTES(size), FORMAT_BYTES(st.st_size));
1517 return 0;
1518 }
1519
1520 if (ftruncate(fd, size) < 0)
1521 return log_error_errno(errno, "Failed grow image file '%s' from %s to %s: %m", path,
1522 FORMAT_BYTES(st.st_size), FORMAT_BYTES(size));
1523
1524 r = fsync_full(fd);
1525 if (r < 0)
1526 return log_error_errno(r, "Failed to sync image file '%s' after growing to %s: %m", path, FORMAT_BYTES(size));
1527
1528 if (!arg_quiet)
1529 log_info("Image file '%s' successfully grown from %s to %s.", path, FORMAT_BYTES(st.st_size), FORMAT_BYTES(size));
1530
1531 return 1;
1532}
1533
51747b34 1534static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
9de3cc14 1535 _cleanup_(ovmf_config_freep) OvmfConfig *ovmf_config = NULL;
cf3beb27 1536 _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
5c57a865 1537 _cleanup_free_ char *machine = NULL, *qemu_binary = NULL, *mem = NULL, *trans_scope = NULL, *kernel = NULL;
2042aebb 1538 _cleanup_(rm_rf_physical_and_freep) char *ssh_private_key_path = NULL, *ssh_public_key_path = NULL;
51747b34 1539 _cleanup_close_ int notify_sock_fd = -EBADF;
9de3cc14 1540 _cleanup_strv_free_ char **cmdline = NULL;
51747b34
SL
1541 _cleanup_free_ int *pass_fds = NULL;
1542 size_t n_pass_fds = 0;
5c57a865 1543 const char *accel, *shm;
9de3cc14 1544 int r;
cf3beb27
SL
1545
1546 if (arg_privileged)
1547 r = sd_bus_default_system(&bus);
1548 else
1549 r = sd_bus_default_user(&bus);
1550 if (r < 0)
1551 return log_error_errno(r, "Failed to connect to systemd bus: %m");
1552
1553 r = start_transient_scope(bus, arg_machine, /* allow_pidfd= */ true, &trans_scope);
1554 if (r < 0)
1555 return r;
9de3cc14 1556
2c0061c7
LP
1557 bool use_kvm = arg_kvm > 0;
1558 if (arg_kvm < 0) {
9de3cc14
SL
1559 r = qemu_check_kvm_support();
1560 if (r < 0)
1561 return log_error_errno(r, "Failed to check for KVM support: %m");
1562 use_kvm = r;
1563 }
1564
e8ce204d
LP
1565 if (arg_firmware)
1566 r = load_ovmf_config(arg_firmware, &ovmf_config);
1567 else
1568 r = find_ovmf_config(arg_secure_boot, &ovmf_config);
9de3cc14
SL
1569 if (r < 0)
1570 return log_error_errno(r, "Failed to find OVMF config: %m");
1571
1572 /* only warn if the user hasn't disabled secureboot */
1573 if (!ovmf_config->supports_sb && arg_secure_boot)
1574 log_warning("Couldn't find OVMF firmware blob with Secure Boot support, "
1575 "falling back to OVMF firmware blobs without Secure Boot support.");
1576
f5fefec7 1577 shm = arg_directory || arg_runtime_mounts.n_mounts != 0 ? ",memory-backend=mem" : "";
5c57a865
SL
1578 if (ARCHITECTURE_SUPPORTS_SMM)
1579 machine = strjoin("type=" QEMU_MACHINE_TYPE ",smm=", on_off(ovmf_config->supports_sb), shm);
1580 else
1581 machine = strjoin("type=" QEMU_MACHINE_TYPE, shm);
1582 if (!machine)
1583 return log_oom();
1584
1585 if (arg_linux) {
1586 kernel = strdup(arg_linux);
1587 if (!kernel)
1588 return log_oom();
6af6d442
SL
1589 } else if (arg_directory) {
1590 /* a kernel is required for directory type images so attempt to locate a UKI under /boot and /efi */
1591 r = discover_boot_entry(arg_directory, &kernel, &arg_initrds);
1592 if (r < 0)
1593 return log_error_errno(r, "Failed to locate UKI in directory type image, please specify one with --linux=.");
1594
1595 log_debug("Discovered UKI image at %s", kernel);
1596 }
5c57a865 1597
9de3cc14
SL
1598 r = find_qemu_binary(&qemu_binary);
1599 if (r == -EOPNOTSUPP)
1600 return log_error_errno(r, "Native architecture is not supported by qemu.");
1601 if (r < 0)
1602 return log_error_errno(r, "Failed to find QEMU binary: %m");
1603
2c0061c7 1604 if (asprintf(&mem, "%" PRIu64 "M", DIV_ROUND_UP(arg_ram, U64_MB)) < 0)
9de3cc14
SL
1605 return log_oom();
1606
1607 cmdline = strv_new(
1608 qemu_binary,
1609 "-machine", machine,
2c0061c7 1610 "-smp", arg_cpus ?: "1",
9de3cc14
SL
1611 "-m", mem,
1612 "-object", "rng-random,filename=/dev/urandom,id=rng0",
657be6bd
SL
1613 "-device", "virtio-rng-pci,rng=rng0,id=rng-device0",
1614 "-device", "virtio-balloon,free-page-reporting=on"
9de3cc14 1615 );
f72a0856
SL
1616 if (!cmdline)
1617 return log_oom();
9c046d93
LP
1618
1619 if (!sd_id128_is_null(arg_uuid))
1620 if (strv_extend_many(&cmdline, "-uuid", SD_ID128_TO_UUID_STRING(arg_uuid)) < 0)
1621 return log_oom();
f72a0856 1622
f17247c7
AP
1623 if (ARCHITECTURE_SUPPORTS_VMGENID) {
1624 /* Derive a vmgenid automatically from the invocation ID, in a deterministic way. */
1625 sd_id128_t vmgenid;
1626 r = sd_id128_get_invocation_app_specific(SD_ID128_MAKE(bd,84,6d,e3,e4,7d,4b,6c,a6,85,4a,87,0f,3c,a3,a0), &vmgenid);
1627 if (r < 0) {
1628 log_debug_errno(r, "Failed to get invocation ID, making up randomized vmgenid: %m");
1629
1630 r = sd_id128_randomize(&vmgenid);
1631 if (r < 0)
1632 return log_error_errno(r, "Failed to make up randomized vmgenid: %m");
1633 }
9573c0ba 1634
f17247c7
AP
1635 _cleanup_free_ char *vmgenid_device = NULL;
1636 if (asprintf(&vmgenid_device, "vmgenid,guid=" SD_ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(vmgenid)) < 0)
1637 return log_oom();
9573c0ba 1638
f17247c7
AP
1639 if (strv_extend_many(&cmdline, "-device", vmgenid_device) < 0)
1640 return log_oom();
1641 }
9573c0ba 1642
cf3beb27 1643 /* if we are going to be starting any units with state then create our runtime dir */
6fd25375
LP
1644 _cleanup_free_ char *runtime_dir = NULL;
1645 _cleanup_(rm_rf_physical_and_freep) char *runtime_dir_destroy = NULL;
1646 if (arg_tpm != 0 || arg_directory || arg_runtime_mounts.n_mounts != 0 || arg_pass_ssh_key) {
1647 _cleanup_free_ char *subdir = NULL;
1648
1649 if (asprintf(&subdir, "systemd/vmspawn.%" PRIx64, random_u64()) < 0)
1650 return log_oom();
1651
1652 r = runtime_directory(
1653 arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER,
1654 subdir,
1655 &runtime_dir);
cf3beb27
SL
1656 if (r < 0)
1657 return log_error_errno(r, "Failed to lookup runtime directory: %m");
8cff087d 1658 if (r > 0) { /* We need to create our own runtime dir */
6fd25375 1659 r = mkdir_p(runtime_dir, 0755);
cf3beb27 1660 if (r < 0)
6fd25375
LP
1661 return log_error_errno(r, "Failed to create runtime directory '%s': %m", runtime_dir);
1662
1663 /* We created this, hence also destroy it */
1664 runtime_dir_destroy = TAKE_PTR(runtime_dir);
1665
1666 runtime_dir = strdup(runtime_dir_destroy);
1667 if (!runtime_dir)
1668 return log_oom();
cf3beb27 1669 }
6fd25375
LP
1670
1671 log_debug("Using runtime directory: %s", runtime_dir);
cf3beb27
SL
1672 }
1673
e29d5385 1674 _cleanup_close_ int delegate_userns_fd = -EBADF, tap_fd = -EBADF;
c3dd4e20 1675 if (arg_network_stack == NETWORK_STACK_TAP) {
e29d5385
LP
1676 if (have_effective_cap(CAP_NET_ADMIN) <= 0) {
1677 delegate_userns_fd = userns_acquire_self_root();
1678 if (delegate_userns_fd < 0)
1679 return log_error_errno(delegate_userns_fd, "Failed to acquire userns: %m");
c3dd4e20 1680
e29d5385
LP
1681 _cleanup_free_ char *userns_name = NULL;
1682 if (asprintf(&userns_name, "vmspawn-" PID_FMT "-%s", getpid_cached(), arg_machine) < 0)
1683 return log_oom();
1684
1685 r = nsresource_register_userns(userns_name, delegate_userns_fd);
1686 if (r < 0)
1687 return log_error_errno(r, "Failed to register user namespace with systemd-nsresourced: %m");
c3dd4e20 1688
e29d5385
LP
1689 tap_fd = nsresource_add_netif_tap(delegate_userns_fd, /* ret_host_ifname= */ NULL);
1690 if (tap_fd < 0)
1691 return log_error_errno(tap_fd, "Failed to allocate network tap device: %m");
c3dd4e20 1692
e29d5385 1693 r = strv_extend(&cmdline, "-nic");
c3dd4e20 1694 if (r < 0)
e29d5385 1695 return log_oom();
c3dd4e20 1696
e29d5385
LP
1697 r = strv_extendf(&cmdline, "tap,fd=%i,model=virtio-net-pci", tap_fd);
1698 if (r < 0)
1699 return log_oom();
c3dd4e20 1700
e29d5385
LP
1701 if (!GREEDY_REALLOC(pass_fds, n_pass_fds + 1))
1702 return log_oom();
1703
1704 pass_fds[n_pass_fds++] = tap_fd;
1705 } else {
1706 _cleanup_free_ char *tap_name = NULL;
1707 struct ether_addr mac_vm = {};
1708
1709 tap_name = strjoin("vt-", arg_machine);
1710 if (!tap_name)
1711 return log_oom();
1712
1713 (void) net_shorten_ifname(tap_name, /* check_naming_scheme= */ false);
1714
1715 if (ether_addr_is_null(&arg_network_provided_mac)){
1716 r = net_generate_mac(arg_machine, &mac_vm, VM_TAP_HASH_KEY, 0);
1717 if (r < 0)
1718 return log_error_errno(r, "Failed to generate predictable MAC address for VM side: %m");
1719 } else
1720 mac_vm = arg_network_provided_mac;
1721
1722 r = strv_extend(&cmdline, "-nic");
1723 if (r < 0)
1724 return log_oom();
1725
1726 r = strv_extendf(&cmdline, "tap,ifname=%s,script=no,downscript=no,model=virtio-net-pci,mac=%s", tap_name, ETHER_ADDR_TO_STR(&mac_vm));
1727 if (r < 0)
1728 return log_oom();
1729 }
c3dd4e20 1730 } else if (arg_network_stack == NETWORK_STACK_USER)
75331bed
SL
1731 r = strv_extend_many(&cmdline, "-nic", "user,model=virtio-net-pci");
1732 else
1733 r = strv_extend_many(&cmdline, "-nic", "none");
1734 if (r < 0)
1735 return log_oom();
1736
5c57a865 1737 /* A shared memory backend might increase ram usage so only add one if actually necessary for virtiofsd. */
a8f940c4 1738 if (arg_directory || arg_runtime_mounts.n_mounts != 0) {
5c57a865
SL
1739 r = strv_extend(&cmdline, "-object");
1740 if (r < 0)
1741 return log_oom();
1742
1743 r = strv_extendf(&cmdline, "memory-backend-memfd,id=mem,size=%s,share=on", mem);
1744 if (r < 0)
1745 return log_oom();
1746 }
1747
2c0061c7
LP
1748 bool use_vsock = arg_vsock > 0 && ARCHITECTURE_SUPPORTS_SMBIOS;
1749 if (arg_vsock < 0) {
5c57a865
SL
1750 r = qemu_check_vsock_support();
1751 if (r < 0)
cf9de8ef 1752 return log_error_errno(r, "Failed to check for VSOCK support: %m");
5c57a865
SL
1753
1754 use_vsock = r;
1755 }
1756
51747b34
SL
1757 if (!use_kvm && kvm_device_fd >= 0) {
1758 log_warning("KVM is disabled but fd for /dev/kvm was passed, closing fd and ignoring");
1759 kvm_device_fd = safe_close(kvm_device_fd);
1760 }
1761
1762 if (use_kvm && kvm_device_fd >= 0) {
1763 /* /dev/fdset/1 is magic string to tell qemu where to find the fd for /dev/kvm
1764 * we use this so that we can take a fd to /dev/kvm and then give qemu that fd */
1765 accel = "kvm,device=/dev/fdset/1";
1766
1767 r = strv_extend(&cmdline, "--add-fd");
1768 if (r < 0)
1769 return log_oom();
1770
1771 r = strv_extendf(&cmdline, "fd=%d,set=1,opaque=/dev/kvm", kvm_device_fd);
1772 if (r < 0)
1773 return log_oom();
1774
1775 if (!GREEDY_REALLOC(pass_fds, n_pass_fds + 1))
1776 return log_oom();
1777
1778 pass_fds[n_pass_fds++] = kvm_device_fd;
1779 } else if (use_kvm)
1780 accel = "kvm";
1781 else
1782 accel = "tcg";
1783
1784 r = strv_extend_many(&cmdline, "-accel", accel);
1785 if (r < 0)
1786 return log_oom();
1787
f72a0856 1788 _cleanup_close_ int child_vsock_fd = -EBADF;
258d2694 1789 unsigned child_cid = arg_vsock_cid;
f72a0856 1790 if (use_vsock) {
51747b34 1791 int device_fd = vhost_device_fd;
51747b34
SL
1792
1793 if (device_fd < 0) {
1794 child_vsock_fd = open("/dev/vhost-vsock", O_RDWR|O_CLOEXEC);
1795 if (child_vsock_fd < 0)
1796 return log_error_errno(errno, "Failed to open /dev/vhost-vsock as read/write: %m");
1797
1798 device_fd = child_vsock_fd;
1799 }
f72a0856 1800
51747b34 1801 r = vsock_fix_child_cid(device_fd, &child_cid, arg_machine);
f72a0856 1802 if (r < 0)
cf9de8ef 1803 return log_error_errno(r, "Failed to fix CID for the guest VSOCK socket: %m");
f72a0856
SL
1804
1805 r = strv_extend(&cmdline, "-device");
1806 if (r < 0)
1807 return log_oom();
1808
51747b34 1809 r = strv_extendf(&cmdline, "vhost-vsock-pci,guest-cid=%u,vhostfd=%d", child_cid, device_fd);
f72a0856
SL
1810 if (r < 0)
1811 return log_oom();
51747b34
SL
1812
1813 if (!GREEDY_REALLOC(pass_fds, n_pass_fds + 1))
1814 return log_oom();
1815
1816 pass_fds[n_pass_fds++] = device_fd;
f72a0856
SL
1817 }
1818
77290bc8
LP
1819 r = strv_extend_many(&cmdline, "-cpu",
1820#ifdef __x86_64__
1821 "max,hv_relaxed,hv-vapic,hv-time"
1822#else
1823 "max"
1824#endif
1825 );
f72a0856
SL
1826 if (r < 0)
1827 return log_oom();
9de3cc14 1828
795ec90c
LP
1829 _cleanup_close_ int master = -EBADF;
1830 PTYForwardFlags ptyfwd_flags = 0;
1831 switch (arg_console_mode) {
1832
1833 case CONSOLE_READ_ONLY:
1834 ptyfwd_flags |= PTY_FORWARD_READ_ONLY;
1835
1836 _fallthrough_;
1837
1838 case CONSOLE_INTERACTIVE: {
1839 _cleanup_free_ char *pty_path = NULL;
1840
6d2a26a1 1841 master = openpt_allocate(O_RDWR|O_NONBLOCK, &pty_path);
795ec90c 1842 if (master < 0)
6d2a26a1 1843 return log_error_errno(master, "Failed to setup pty: %m");
795ec90c
LP
1844
1845 if (strv_extend_many(
1846 &cmdline,
1847 "-nographic",
1848 "-nodefaults",
470c213b 1849 "-device", "virtio-serial-pci,id=vmspawn-virtio-serial-pci",
795ec90c
LP
1850 "-chardev") < 0)
1851 return log_oom();
1852
1853 if (strv_extendf(&cmdline,
1854 "serial,id=console,path=%s", pty_path) < 0)
1855 return log_oom();
1856
1857 r = strv_extend_many(
1858 &cmdline,
470c213b 1859 "-device", "virtconsole,chardev=console");
795ec90c
LP
1860 break;
1861 }
1862
1863 case CONSOLE_GUI:
69f3c619
LP
1864 r = strv_extend_many(
1865 &cmdline,
1866 "-vga",
1867 "virtio");
795ec90c
LP
1868 break;
1869
1870 case CONSOLE_NATIVE:
69f3c619
LP
1871 r = strv_extend_many(
1872 &cmdline,
1873 "-nographic",
1874 "-nodefaults",
1875 "-chardev", "stdio,mux=on,id=console,signal=off",
470c213b
DDM
1876 "-device", "virtio-serial-pci,id=vmspawn-virtio-serial-pci",
1877 "-device", "virtconsole,chardev=console",
69f3c619 1878 "-mon", "console");
795ec90c
LP
1879 break;
1880
1881 default:
1882 assert_not_reached();
1883 }
69f3c619
LP
1884 if (r < 0)
1885 return log_oom();
9de3cc14 1886
9de3cc14
SL
1887 r = strv_extend(&cmdline, "-drive");
1888 if (r < 0)
1889 return log_oom();
1890
018cc9ea
SL
1891 _cleanup_free_ char *escaped_ovmf_config_path = escape_qemu_value(ovmf_config->path);
1892 if (!escaped_ovmf_config_path)
1893 return log_oom();
1894
1895 r = strv_extendf(&cmdline, "if=pflash,format=%s,readonly=on,file=%s", ovmf_config_format(ovmf_config), escaped_ovmf_config_path);
9de3cc14
SL
1896 if (r < 0)
1897 return log_oom();
1898
f72a0856 1899 _cleanup_(unlink_and_freep) char *ovmf_vars_to = NULL;
9de3cc14
SL
1900 if (ovmf_config->supports_sb) {
1901 const char *ovmf_vars_from = ovmf_config->vars;
018cc9ea 1902 _cleanup_free_ char *escaped_ovmf_vars_to = NULL;
9de3cc14
SL
1903 _cleanup_close_ int source_fd = -EBADF, target_fd = -EBADF;
1904
1905 r = tempfn_random_child(NULL, "vmspawn-", &ovmf_vars_to);
1906 if (r < 0)
1907 return r;
1908
1909 source_fd = open(ovmf_vars_from, O_RDONLY|O_CLOEXEC);
1910 if (source_fd < 0)
1911 return log_error_errno(source_fd, "Failed to open OVMF vars file %s: %m", ovmf_vars_from);
1912
1913 target_fd = open(ovmf_vars_to, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0600);
1914 if (target_fd < 0)
1915 return log_error_errno(errno, "Failed to create regular file for OVMF vars at %s: %m", ovmf_vars_to);
1916
1917 r = copy_bytes(source_fd, target_fd, UINT64_MAX, COPY_REFLINK);
1918 if (r < 0)
1919 return log_error_errno(r, "Failed to copy bytes from %s to %s: %m", ovmf_vars_from, ovmf_vars_to);
1920
96e481bf 1921 /* This isn't always available so don't raise an error if it fails */
9de3cc14
SL
1922 (void) copy_times(source_fd, target_fd, 0);
1923
69f3c619
LP
1924 r = strv_extend_many(
1925 &cmdline,
1926 "-global", "ICH9-LPC.disable_s3=1",
1927 "-global", "driver=cfi.pflash01,property=secure,value=on",
1928 "-drive");
9de3cc14
SL
1929 if (r < 0)
1930 return log_oom();
1931
018cc9ea
SL
1932 escaped_ovmf_vars_to = escape_qemu_value(ovmf_vars_to);
1933 if (!escaped_ovmf_vars_to)
1934 return log_oom();
1935
1936 r = strv_extendf(&cmdline, "file=%s,if=pflash,format=%s", escaped_ovmf_vars_to, ovmf_config_format(ovmf_config));
9de3cc14
SL
1937 if (r < 0)
1938 return log_oom();
1939 }
1940
3b31b2e4
NL
1941 if (arg_image || strv_length(arg_extra_drives) > 0) {
1942 r = strv_extend_many(&cmdline, "-device", "virtio-scsi-pci,id=scsi");
1943 if (r < 0)
1944 return log_oom();
1945 }
1946
5c57a865
SL
1947 if (kernel) {
1948 r = strv_extend_many(&cmdline, "-kernel", kernel);
0f25e3e4
SL
1949 if (r < 0)
1950 return log_oom();
19301e76
SL
1951
1952 /* We can't rely on gpt-auto-generator when direct kernel booting so synthesize a root=
1953 * kernel argument instead. */
1954 if (arg_image) {
1955 r = kernel_cmdline_maybe_append_root();
1956 if (r < 0)
1957 return r;
1958 }
0f25e3e4
SL
1959 }
1960
5c57a865 1961 if (arg_image) {
018cc9ea
SL
1962 _cleanup_free_ char *escaped_image = NULL;
1963
5c57a865 1964 assert(!arg_directory);
9de3cc14 1965
5c57a865
SL
1966 r = strv_extend(&cmdline, "-drive");
1967 if (r < 0)
1968 return log_oom();
9de3cc14 1969
018cc9ea
SL
1970 escaped_image = escape_qemu_value(arg_image);
1971 if (!escaped_image)
6fd25375 1972 return log_oom();
018cc9ea 1973
33c17360 1974 r = strv_extendf(&cmdline, "if=none,id=vmspawn,file=%s,format=raw,discard=%s", escaped_image, on_off(arg_discard_disk));
5c57a865
SL
1975 if (r < 0)
1976 return log_oom();
1977
3b31b2e4 1978 r = strv_extend_many(&cmdline, "-device", "scsi-hd,drive=vmspawn,bootindex=1");
5c57a865
SL
1979 if (r < 0)
1980 return log_oom();
10819eb0
AP
1981
1982 r = grow_image(arg_image, arg_grow_image);
1983 if (r < 0)
1984 return r;
5c57a865
SL
1985 }
1986
1987 if (arg_directory) {
6fd25375
LP
1988 _cleanup_free_ char *listen_address = NULL;
1989 r = start_virtiofsd(bus, trans_scope, arg_directory, /* uidmap= */ true, runtime_dir, &listen_address);
5c57a865
SL
1990 if (r < 0)
1991 return r;
1992
6fd25375
LP
1993 _cleanup_free_ char *escaped_listen_address = escape_qemu_value(listen_address);
1994 if (!escaped_listen_address)
5c57a865
SL
1995 return log_oom();
1996
6fd25375 1997 if (strv_extend(&cmdline, "-chardev") < 0)
5c57a865
SL
1998 return log_oom();
1999
6fd25375 2000 if (strv_extendf(&cmdline, "socket,id=rootdir,path=%s", escaped_listen_address) < 0)
5c57a865
SL
2001 return log_oom();
2002
6fd25375
LP
2003 if (strv_extend_many(
2004 &cmdline,
2005 "-device",
2006 "vhost-user-fs-pci,queue-size=1024,chardev=rootdir,tag=root") < 0)
5c57a865
SL
2007 return log_oom();
2008
6fd25375 2009 if (strv_extend(&arg_kernel_cmdline_extra, "root=root rootfstype=virtiofs rw") < 0)
5c57a865
SL
2010 return log_oom();
2011 }
9de3cc14 2012
42514e54
LP
2013 size_t i = 0;
2014 STRV_FOREACH(drive, arg_extra_drives) {
2015 _cleanup_free_ char *escaped_drive = NULL;
878c2b28
NL
2016 const char *driver = NULL;
2017 struct stat st;
42514e54
LP
2018
2019 r = strv_extend(&cmdline, "-blockdev");
2020 if (r < 0)
2021 return log_oom();
2022
2023 escaped_drive = escape_qemu_value(*drive);
2024 if (!escaped_drive)
2025 return log_oom();
2026
878c2b28
NL
2027 if (stat(*drive, &st) < 0)
2028 return log_error_errno(errno, "Failed to stat '%s': %m", *drive);
2029
2030 if (S_ISREG(st.st_mode))
2031 driver = "file";
2032 else if (S_ISBLK(st.st_mode))
2033 driver = "host_device";
2034 else
2035 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Expected regular file or block device, not '%s': %m", *drive);
2036
2037 r = strv_extendf(&cmdline, "driver=raw,cache.direct=off,cache.no-flush=on,file.driver=%s,file.filename=%s,node-name=vmspawn_extra_%zu", driver, escaped_drive, i);
42514e54
LP
2038 if (r < 0)
2039 return log_oom();
2040
2041 r = strv_extend(&cmdline, "-device");
2042 if (r < 0)
2043 return log_oom();
2044
2045 r = strv_extendf(&cmdline, "scsi-hd,drive=vmspawn_extra_%zu", i++);
2046 if (r < 0)
2047 return log_oom();
2048 }
2049
470c213b 2050 r = strv_prepend(&arg_kernel_cmdline_extra, "console=hvc0");
773ca1de
SL
2051 if (r < 0)
2052 return log_oom();
4291f446 2053
a8f940c4 2054 FOREACH_ARRAY(mount, arg_runtime_mounts.mounts, arg_runtime_mounts.n_mounts) {
6fd25375
LP
2055 _cleanup_free_ char *listen_address = NULL;
2056 r = start_virtiofsd(bus, trans_scope, mount->source, /* uidmap= */ false, runtime_dir, &listen_address);
a8f940c4
SL
2057 if (r < 0)
2058 return r;
2059
6fd25375
LP
2060 _cleanup_free_ char *escaped_listen_address = escape_qemu_value(listen_address);
2061 if (!escaped_listen_address)
2062 return log_oom();
018cc9ea 2063
6fd25375 2064 if (strv_extend(&cmdline, "-chardev") < 0)
a8f940c4
SL
2065 return log_oom();
2066
6fd25375
LP
2067 _cleanup_free_ char *id = NULL;
2068 if (asprintf(&id, "mnt%zi", mount - arg_runtime_mounts.mounts) < 0)
a8f940c4
SL
2069 return log_oom();
2070
6fd25375 2071 if (strv_extendf(&cmdline, "socket,id=%s,path=%s", id, escaped_listen_address) < 0)
a8f940c4
SL
2072 return log_oom();
2073
6fd25375 2074 if (strv_extend(&cmdline, "-device") < 0)
a8f940c4
SL
2075 return log_oom();
2076
6fd25375
LP
2077 if (strv_extendf(&cmdline, "vhost-user-fs-pci,queue-size=1024,chardev=%1$s,tag=%1$s", id) < 0)
2078 return log_oom();
2079
2080 _cleanup_free_ char *clean_target = xescape(mount->target, "\":");
a8f940c4
SL
2081 if (!clean_target)
2082 return log_oom();
2083
6fd25375
LP
2084 if (strv_extendf(&arg_kernel_cmdline_extra, "systemd.mount-extra=\"%s:%s:virtiofs:%s\"",
2085 id, clean_target, mount->read_only ? "ro" : "rw") < 0)
a8f940c4
SL
2086 return log_oom();
2087 }
2088
dde4c13a
LP
2089 r = cmdline_add_kernel_cmdline(&cmdline, kernel);
2090 if (r < 0)
2091 return r;
f72a0856 2092
89cecfb1
LP
2093 r = cmdline_add_smbios11(&cmdline);
2094 if (r < 0)
2095 return r;
2096
cf3beb27
SL
2097 /* disable TPM autodetection if the user's hardware doesn't support it */
2098 if (!ARCHITECTURE_SUPPORTS_TPM) {
6fd25375
LP
2099 if (arg_tpm > 0)
2100 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "TPM not supported on %s, refusing", architecture_to_string(native_architecture()));
cf3beb27 2101 if (arg_tpm < 0) {
6fd25375 2102 arg_tpm = false;
cf3beb27 2103 log_debug("TPM not support on %s, disabling tpm autodetection and continuing", architecture_to_string(native_architecture()));
6fd25375 2104 }
cf3beb27
SL
2105 }
2106
2107 _cleanup_free_ char *swtpm = NULL;
2108 if (arg_tpm != 0) {
6fd25375
LP
2109 if (arg_tpm_state_mode == TPM_STATE_AUTO) {
2110 assert(!arg_tpm_state_path);
2111
2112 const char *p = ASSERT_PTR(arg_image ?: arg_directory);
2113
2114 _cleanup_free_ char *parent = NULL;
2115 r = path_extract_directory(p, &parent);
2116 if (r < 0)
2117 return log_error_errno(r, "Failed to extract parent directory from '%s': %m", p);
2118
2119 _cleanup_free_ char *filename = NULL;
2120 r = path_extract_filename(p, &filename);
2121 if (r < 0)
2122 return log_error_errno(r, "Failed to extract filename from '%s': %m", p);
2123
2124 if (!strextend(&filename, ".tpmstate"))
2125 return log_oom();
2126
2127 arg_tpm_state_path = path_join(parent, filename);
2128 if (!arg_tpm_state_path)
2129 return log_oom();
2130
2131 log_debug("Storing TPM state persistently under '%s'.", arg_tpm_state_path);
2132 }
2133
cf3beb27
SL
2134 r = find_executable("swtpm", &swtpm);
2135 if (r < 0) {
2136 /* log if the user asked for swtpm and we cannot find it */
2137 if (arg_tpm > 0)
2138 return log_error_errno(r, "Failed to find swtpm binary: %m");
2139 /* also log if we got an error other than ENOENT from find_executable */
2140 if (r != -ENOENT && arg_tpm < 0)
2141 return log_error_errno(r, "Error detecting swtpm: %m");
2142 }
2143 }
2144
6fd25375 2145 _cleanup_free_ char *tpm_socket_address = NULL;
cf3beb27 2146 if (swtpm) {
6fd25375 2147 r = start_tpm(bus, trans_scope, swtpm, runtime_dir, &tpm_socket_address);
cf3beb27
SL
2148 if (r < 0) {
2149 /* only bail if the user asked for a tpm */
2150 if (arg_tpm > 0)
2151 return log_error_errno(r, "Failed to start tpm: %m");
6fd25375 2152
cf3beb27
SL
2153 log_debug_errno(r, "Failed to start tpm, ignoring: %m");
2154 }
eb77d24b
SL
2155 }
2156
6fd25375
LP
2157 if (tpm_socket_address) {
2158 _cleanup_free_ char *escaped_tpm_socket_address = escape_qemu_value(tpm_socket_address);
2159 if (!escaped_tpm_socket_address)
2160 return log_oom();
018cc9ea 2161
6fd25375 2162 if (strv_extend(&cmdline, "-chardev") < 0)
cf3beb27
SL
2163 return log_oom();
2164
6fd25375 2165 if (strv_extendf(&cmdline, "socket,id=chrtpm,path=%s", tpm_socket_address) < 0)
cf3beb27
SL
2166 return log_oom();
2167
6fd25375 2168 if (strv_extend_many(&cmdline, "-tpmdev", "emulator,id=tpm0,chardev=chrtpm") < 0)
cf3beb27
SL
2169 return log_oom();
2170
2171 if (native_architecture() == ARCHITECTURE_X86_64)
2172 r = strv_extend_many(&cmdline, "-device", "tpm-tis,tpmdev=tpm0");
2173 else if (IN_SET(native_architecture(), ARCHITECTURE_ARM64, ARCHITECTURE_ARM64_BE))
2174 r = strv_extend_many(&cmdline, "-device", "tpm-tis-device,tpmdev=tpm0");
2175 if (r < 0)
2176 return log_oom();
2177 }
2178
811ad9e6
SL
2179 char *initrd = NULL;
2180 _cleanup_(rm_rf_physical_and_freep) char *merged_initrd = NULL;
2181 size_t n_initrds = strv_length(arg_initrds);
2182
2183 if (n_initrds == 1)
2184 initrd = arg_initrds[0];
2185 else if (n_initrds > 1) {
2186 r = merge_initrds(&merged_initrd);
2187 if (r < 0)
2188 return r;
2189
2190 initrd = merged_initrd;
2191 }
2192
2193 if (initrd) {
2194 r = strv_extend_many(&cmdline, "-initrd", initrd);
88af28d1
SL
2195 if (r < 0)
2196 return log_oom();
2197 }
2198
258d2694
SL
2199 if (arg_forward_journal) {
2200 _cleanup_free_ char *sd_journal_remote = NULL, *listen_address = NULL, *cred = NULL;
e8357737
YW
2201
2202 r = find_executable_full(
2203 "systemd-journal-remote",
2204 /* root = */ NULL,
2205 STRV_MAKE(LIBEXECDIR),
2206 /* use_path_envvar = */ true, /* systemd-journal-remote should be installed in
2207 * LIBEXECDIR, but for supporting fancy setups. */
2208 &sd_journal_remote,
2209 /* ret_fd = */ NULL);
258d2694
SL
2210 if (r < 0)
2211 return log_error_errno(r, "Failed to find systemd-journal-remote binary: %m");
2212
2213 r = start_systemd_journal_remote(bus, trans_scope, child_cid, sd_journal_remote, &listen_address);
2214 if (r < 0)
2215 return r;
2216
2217 cred = strjoin("journal.forward_to_socket:", listen_address);
2218 if (!cred)
2219 return log_oom();
2220
2221 r = machine_credential_set(&arg_credentials, cred);
2222 if (r < 0)
2223 return r;
2224 }
2225
2042aebb
SL
2226 if (arg_pass_ssh_key) {
2227 _cleanup_free_ char *scope_prefix = NULL, *privkey_path = NULL, *pubkey_path = NULL;
2228 const char *key_type = arg_ssh_key_type ?: "ed25519";
2229
2230 r = unit_name_to_prefix(trans_scope, &scope_prefix);
2231 if (r < 0)
2232 return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
2233
6fd25375 2234 privkey_path = strjoin(runtime_dir, "/", scope_prefix, "-", key_type);
2042aebb
SL
2235 if (!privkey_path)
2236 return log_oom();
2237
2238 pubkey_path = strjoin(privkey_path, ".pub");
2239 if (!pubkey_path)
2240 return log_oom();
2241
2242 r = generate_ssh_keypair(privkey_path, key_type);
2243 if (r < 0)
2244 return r;
2245
2246 ssh_private_key_path = TAKE_PTR(privkey_path);
2247 ssh_public_key_path = TAKE_PTR(pubkey_path);
2248 }
2249
2250 if (ssh_public_key_path && ssh_private_key_path) {
2251 _cleanup_free_ char *scope_prefix = NULL, *cred_path = NULL;
2252
2253 cred_path = strjoin("ssh.ephemeral-authorized_keys-all:", ssh_public_key_path);
2254 if (!cred_path)
2255 return log_oom();
2256
2257 r = machine_credential_load(&arg_credentials, cred_path);
2258 if (r < 0)
2259 return log_error_errno(r, "Failed to load credential %s: %m", cred_path);
2260
2261 r = unit_name_to_prefix(trans_scope, &scope_prefix);
2262 if (r < 0)
2263 return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
506aa371
SL
2264
2265 /* on distros that provide their own sshd@.service file we need to provide a dropin which
2266 * picks up our public key credential */
2267 r = machine_credential_set(
2268 &arg_credentials,
2269 "systemd.unit-dropin.sshd-vsock@.service:"
2270 "[Service]\n"
2271 "ExecStart=\n"
df8bd4c1 2272 "ExecStart=-sshd -i -o 'AuthorizedKeysFile=%d/ssh.ephemeral-authorized_keys-all .ssh/authorized_keys'\n"
506aa371
SL
2273 "ImportCredential=ssh.ephemeral-authorized_keys-all\n");
2274 if (r < 0)
2275 return log_error_errno(r, "Failed to set credential systemd.unit-dropin.sshd-vsock@.service: %m");
2042aebb
SL
2276 }
2277
258d2694
SL
2278 if (ARCHITECTURE_SUPPORTS_SMBIOS)
2279 FOREACH_ARRAY(cred, arg_credentials.credentials, arg_credentials.n_credentials) {
2280 _cleanup_free_ char *cred_data_b64 = NULL;
2281 ssize_t n;
2282
2283 n = base64mem(cred->data, cred->size, &cred_data_b64);
2284 if (n < 0)
2285 return log_oom();
2286
2287 r = strv_extend(&cmdline, "-smbios");
2288 if (r < 0)
2289 return log_oom();
2290
2291 r = strv_extendf(&cmdline, "type=11,value=io.systemd.credential.binary:%s=%s", cred->id, cred_data_b64);
2292 if (r < 0)
2293 return log_oom();
2294 }
2295
f72a0856 2296 if (use_vsock) {
51747b34
SL
2297 notify_sock_fd = open_vsock();
2298 if (notify_sock_fd < 0)
cf9de8ef 2299 return log_error_errno(notify_sock_fd, "Failed to open VSOCK: %m");
f72a0856 2300
51747b34 2301 r = cmdline_add_vsock(&cmdline, notify_sock_fd);
f72a0856 2302 if (r == -ENOMEM)
4291f446 2303 return log_oom();
f72a0856 2304 if (r < 0)
cf9de8ef 2305 return log_error_errno(r, "Failed to call getsockname on VSOCK: %m");
4291f446 2306 }
9de3cc14 2307
6f9a1adf
LP
2308 const char *e = secure_getenv("SYSTEMD_VMSPAWN_QEMU_EXTRA");
2309 if (e) {
a2c8652a 2310 r = strv_split_and_extend_full(&cmdline, e,
14a07ec3 2311 /* separators = */ NULL, /* filter_duplicates = */ false,
a2c8652a 2312 EXTRACT_CUNESCAPE|EXTRACT_UNQUOTE);
6f9a1adf 2313 if (r < 0)
a2c8652a 2314 return log_error_errno(r, "Failed to parse $SYSTEMD_VMSPAWN_QEMU_EXTRA: %m");
6f9a1adf
LP
2315 }
2316
b9e2d83b
LP
2317 if (DEBUG_LOGGING) {
2318 _cleanup_free_ char *joined = quote_command_line(cmdline, SHELL_ESCAPE_EMPTY);
2319 if (!joined)
2320 return log_oom();
2321
2322 log_debug("Executing: %s", joined);
2323 }
2324
1e83cfff
SL
2325 if (arg_register) {
2326 char vm_address[STRLEN("vsock/") + DECIMAL_STR_MAX(unsigned)];
2327
2328 xsprintf(vm_address, "vsock/%u", child_cid);
2329 r = register_machine(
2330 bus,
2331 arg_machine,
2332 arg_uuid,
73fade29 2333 "systemd-vmspawn",
1e83cfff
SL
2334 arg_directory,
2335 child_cid,
2336 child_cid != VMADDR_CID_ANY ? vm_address : NULL,
ecc4287d
LP
2337 ssh_private_key_path,
2338 arg_keep_unit);
1e83cfff
SL
2339 if (r < 0)
2340 return r;
2341 }
2342
d7a6bb98 2343 assert_se(sigprocmask_many(SIG_BLOCK, /* ret_old_mask=*/ NULL, SIGCHLD) >= 0);
a8f940c4 2344
f72a0856
SL
2345 _cleanup_(sd_event_source_unrefp) sd_event_source *notify_event_source = NULL;
2346 _cleanup_(sd_event_unrefp) sd_event *event = NULL;
2347 r = sd_event_new(&event);
2348 if (r < 0)
2349 return log_error_errno(r, "Failed to get default event source: %m");
2350
2351 (void) sd_event_set_watchdog(event, true);
2352
6cff1854
LP
2353 _cleanup_(pidref_done) PidRef child_pidref = PIDREF_NULL;
2354
2355 r = pidref_safe_fork_full(
f72a0856 2356 qemu_binary,
6cff1854 2357 /* stdio_fds= */ NULL,
4364a542 2358 pass_fds, n_pass_fds,
6cff1854
LP
2359 FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_LOG|FORK_CLOEXEC_OFF|FORK_RLIMIT_NOFILE_SAFE,
2360 &child_pidref);
f72a0856 2361 if (r < 0)
6cff1854 2362 return r;
9de3cc14
SL
2363 if (r == 0) {
2364 /* set TERM and LANG if they are missing */
0d4f3873
LP
2365 if (setenv("TERM", "vt220", 0) < 0) {
2366 log_oom();
2367 goto fail;
2368 }
9de3cc14 2369
0d4f3873
LP
2370 if (setenv("LANG", "C.UTF-8", 0) < 0) {
2371 log_oom();
2372 goto fail;
2373 }
9de3cc14 2374
02aacdcf 2375 execv(qemu_binary, cmdline);
9de3cc14 2376 log_error_errno(errno, "Failed to execve %s: %m", qemu_binary);
0d4f3873 2377 fail:
9de3cc14
SL
2378 _exit(EXIT_FAILURE);
2379 }
2380
e29d5385 2381 /* Close relevant fds we passed to qemu in the parent. We don't need them anymore. */
c7c6b6f2 2382 child_vsock_fd = safe_close(child_vsock_fd);
e29d5385 2383 tap_fd = safe_close(tap_fd);
c7c6b6f2 2384
f72a0856
SL
2385 int exit_status = INT_MAX;
2386 if (use_vsock) {
51747b34 2387 r = setup_notify_parent(event, notify_sock_fd, &exit_status, &notify_event_source);
f72a0856 2388 if (r < 0)
cf9de8ef 2389 return log_error_errno(r, "Failed to setup event loop to handle VSOCK notify events: %m");
f72a0856
SL
2390 }
2391
305e5893
SL
2392 /* Used when talking to pid1 via SSH, but must survive until the function ends. */
2393 SSHInfo ssh_info = {
2394 .cid = child_cid,
2395 .private_key_path = ssh_private_key_path,
2396 .port = 22,
2397 };
2398 ShutdownInfo shutdown_info = {
2399 .ssh_info = &ssh_info,
2400 .pidref = &child_pidref,
2401 };
521e9286 2402
305e5893
SL
2403 (void) sd_event_add_signal(event, NULL, SIGINT | SD_EVENT_SIGNAL_PROCMASK, shutdown_vm_graceful, &shutdown_info);
2404 (void) sd_event_add_signal(event, NULL, SIGTERM | SD_EVENT_SIGNAL_PROCMASK, shutdown_vm_graceful, &shutdown_info);
2405 (void) sd_event_add_signal(event, NULL, (SIGRTMIN+4) | SD_EVENT_SIGNAL_PROCMASK, shutdown_vm_graceful, &shutdown_info);
f72a0856 2406
8b183505 2407 (void) sd_event_add_signal(event, NULL, (SIGRTMIN+18) | SD_EVENT_SIGNAL_PROCMASK, sigrtmin18_handler, NULL);
f72a0856 2408
ec97125a
LP
2409 r = sd_event_add_memory_pressure(event, NULL, NULL, NULL);
2410 if (r < 0)
2411 log_debug_errno(r, "Failed allocate memory pressure event source, ignoring: %m");
2412
f72a0856 2413 /* Exit when the child exits */
00299a80 2414 r = event_add_child_pidref(event, /* ret= */ NULL, &child_pidref, WEXITED, on_child_exit, /* userdata= */ NULL);
179dcf92
LP
2415 if (r < 0)
2416 return log_error_errno(r, "Failed to watch qemu process: &m");
f72a0856 2417
5eb39a38 2418 _cleanup_(osc_context_closep) sd_id128_t osc_context_id = SD_ID128_NULL;
795ec90c
LP
2419 _cleanup_(pty_forward_freep) PTYForward *forward = NULL;
2420 if (master >= 0) {
5eb39a38
LP
2421 if (!terminal_is_dumb()) {
2422 r = osc_context_open_vm(arg_machine, /* ret_seq= */ NULL, &osc_context_id);
2423 if (r < 0)
2424 return r;
2425 }
2426
795ec90c
LP
2427 r = pty_forward_new(event, master, ptyfwd_flags, &forward);
2428 if (r < 0)
2429 return log_error_errno(r, "Failed to create PTY forwarder: %m");
2430
d4ffb37b 2431 if (!arg_background && shall_tint_background()) {
795ec90c
LP
2432 _cleanup_free_ char *bg = NULL;
2433
2434 r = terminal_tint_color(130 /* green */, &bg);
2435 if (r < 0)
2436 log_debug_errno(r, "Failed to determine terminal background color, not tinting.");
2437 else
2438 (void) pty_forward_set_background_color(forward, bg);
2439 } else if (!isempty(arg_background))
2440 (void) pty_forward_set_background_color(forward, arg_background);
2441
23f9ff47
YW
2442 (void) pty_forward_set_window_title(forward, GLYPH_GREEN_CIRCLE, /* hostname = */ NULL,
2443 STRV_MAKE("Virtual Machine", arg_machine));
795ec90c
LP
2444 }
2445
f72a0856
SL
2446 r = sd_event_loop(event);
2447 if (r < 0)
2448 return log_error_errno(r, "Failed to run event loop: %m");
2449
b0dc7668
SL
2450 if (arg_register)
2451 (void) unregister_machine(bus, arg_machine);
2452
f72a0856
SL
2453 if (use_vsock) {
2454 if (exit_status == INT_MAX) {
cf9de8ef 2455 log_debug("Couldn't retrieve inner EXIT_STATUS from VSOCK");
f72a0856
SL
2456 return EXIT_SUCCESS;
2457 }
2458 if (exit_status != 0)
2459 log_warning("Non-zero exit code received: %d", exit_status);
2460 return exit_status;
2461 }
2462
2463 return 0;
2464}
2465
2466static int determine_names(void) {
2467 int r;
2468
b064cc56
SL
2469 if (!arg_directory && !arg_image) {
2470 if (arg_machine) {
2471 _cleanup_(image_unrefp) Image *i = NULL;
2472
1c0ade2e
LP
2473 r = image_find(arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER,
2474 IMAGE_MACHINE, arg_machine, NULL, &i);
b064cc56
SL
2475 if (r == -ENOENT)
2476 return log_error_errno(r, "No image for machine '%s'.", arg_machine);
2477 if (r < 0)
2478 return log_error_errno(r, "Failed to find image for machine '%s': %m", arg_machine);
2479
2480 if (IN_SET(i->type, IMAGE_RAW, IMAGE_BLOCK))
2481 r = free_and_strdup(&arg_image, i->path);
2482 else if (IN_SET(i->type, IMAGE_DIRECTORY, IMAGE_SUBVOLUME))
2483 r = free_and_strdup(&arg_directory, i->path);
2484 else
2485 assert_not_reached();
2486 if (r < 0)
2487 return log_oom();
2488 } else {
2489 r = safe_getcwd(&arg_directory);
2490 if (r < 0)
2491 return log_error_errno(r, "Failed to determine current directory: %m");
2492 }
2493 }
f72a0856
SL
2494
2495 if (!arg_machine) {
5c57a865
SL
2496 if (arg_directory && path_equal(arg_directory, "/")) {
2497 arg_machine = gethostname_malloc();
2498 if (!arg_machine)
2499 return log_oom();
2500 } else if (arg_image) {
2501 char *e;
f72a0856 2502
5c57a865
SL
2503 r = path_extract_filename(arg_image, &arg_machine);
2504 if (r < 0)
2505 return log_error_errno(r, "Failed to extract file name from '%s': %m", arg_image);
f72a0856 2506
5c57a865
SL
2507 /* Truncate suffix if there is one */
2508 e = endswith(arg_machine, ".raw");
2509 if (e)
2510 *e = 0;
2511 } else {
2512 r = path_extract_filename(arg_directory, &arg_machine);
2513 if (r < 0)
2514 return log_error_errno(r, "Failed to extract file name from '%s': %m", arg_directory);
2515 }
f72a0856
SL
2516
2517 hostname_cleanup(arg_machine);
2518 if (!hostname_is_valid(arg_machine, 0))
2519 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine machine name automatically, please use -M.");
2520 }
2521
2522 return 0;
9de3cc14
SL
2523}
2524
75331bed 2525static int verify_arguments(void) {
6af6d442
SL
2526 if (!strv_isempty(arg_initrds) && !arg_linux)
2527 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Option --initrd= cannot be used without --linux=.");
2528
ecc4287d
LP
2529 if (arg_keep_unit && arg_register && cg_pid_get_owner_uid(0, NULL) >= 0)
2530 /* Save the user from accidentally registering either user-$SESSION.scope or user@.service.
2531 * The latter is not technically a user session, but we don't need to labour the point. */
2532 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--keep-unit --register=yes may not be used when invoked from a user session.");
2533
75331bed
SL
2534 return 0;
2535}
2536
9de3cc14 2537static int run(int argc, char *argv[]) {
51747b34
SL
2538 int r, kvm_device_fd = -EBADF, vhost_device_fd = -EBADF;
2539 _cleanup_strv_free_ char **names = NULL;
9de3cc14
SL
2540
2541 log_setup();
2542
cf3beb27
SL
2543 arg_privileged = getuid() == 0;
2544
b0dc7668
SL
2545 /* don't attempt to register as a machine when running as a user */
2546 arg_register = arg_privileged;
2547
c3dd4e20
SL
2548 r = parse_environment();
2549 if (r < 0)
2550 return r;
2551
9de3cc14
SL
2552 r = parse_argv(argc, argv);
2553 if (r <= 0)
bd546b9b 2554 return r;
9de3cc14 2555
f72a0856
SL
2556 r = determine_names();
2557 if (r < 0)
bd546b9b 2558 return r;
f72a0856 2559
75331bed
SL
2560 r = verify_arguments();
2561 if (r < 0)
2562 return r;
2563
795ec90c 2564 if (!arg_quiet && arg_console_mode != CONSOLE_GUI) {
1807baa9 2565 _cleanup_free_ char *u = NULL;
5c57a865
SL
2566 const char *vm_path = arg_image ?: arg_directory;
2567 (void) terminal_urlify_path(vm_path, vm_path, &u);
1807baa9 2568
795ec90c 2569 log_info("%s %sSpawning VM %s on %s.%s",
1ae9b0cf 2570 glyph(GLYPH_LIGHT_SHADE), ansi_grey(), arg_machine, u ?: vm_path, ansi_normal());
795ec90c
LP
2571
2572 if (arg_console_mode == CONSOLE_INTERACTIVE)
2573 log_info("%s %sPress %sCtrl-]%s three times within 1s to kill VM.%s",
1ae9b0cf 2574 glyph(GLYPH_LIGHT_SHADE), ansi_grey(), ansi_highlight(), ansi_grey(), ansi_normal());
795ec90c
LP
2575 else if (arg_console_mode == CONSOLE_NATIVE)
2576 log_info("%s %sPress %sCtrl-a x%s to kill VM.%s",
1ae9b0cf 2577 glyph(GLYPH_LIGHT_SHADE), ansi_grey(), ansi_highlight(), ansi_grey(), ansi_normal());
1807baa9 2578 }
dbb2718f 2579
d50b9cbe
YW
2580 int n = sd_listen_fds_with_names(true, &names);
2581 if (n < 0)
2582 return log_error_errno(n, "Failed to get passed file descriptors: %m");
51747b34 2583
d50b9cbe 2584 for (int i = 0; i < n; i++) {
51747b34
SL
2585 int fd = SD_LISTEN_FDS_START + i;
2586 if (streq(names[i], "kvm"))
2587 kvm_device_fd = fd;
2588 else if (streq(names[i], "vhost-vsock"))
2589 vhost_device_fd = fd;
2590 else {
2591 log_notice("Couldn't recognize passed fd %d (%s), closing fd and ignoring...", fd, names[i]);
2592 safe_close(fd);
2593 }
2594 }
2595
51747b34 2596 return run_virtual_machine(kvm_device_fd, vhost_device_fd);
9de3cc14
SL
2597}
2598
2599DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);