]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/vmspawn/vmspawn.c
Merge pull request #31000 from flatcar-hub/krnowak/mutable-overlays
[thirdparty/systemd.git] / src / vmspawn / vmspawn.c
CommitLineData
9de3cc14
SL
1/* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3#include <getopt.h>
4#include <stdint.h>
811ad9e6 5#include <stdio.h>
9de3cc14 6#include <stdlib.h>
88af28d1 7#include <string.h>
811ad9e6 8#include <sys/stat.h>
9de3cc14
SL
9#include <unistd.h>
10
6af6d442
SL
11#include "bootspec.h"
12#include "chase.h"
13#include "dirent-util.h"
14#include "fd-util.h"
b064cc56 15#include "discover-image.h"
51747b34 16#include "sd-daemon.h"
19301e76
SL
17#include "sd-event.h"
18#include "sd-id128.h"
19
9de3cc14
SL
20#include "alloc-util.h"
21#include "architecture.h"
22#include "build.h"
f72a0856 23#include "common-signal.h"
9de3cc14
SL
24#include "copy.h"
25#include "creds-util.h"
19301e76 26#include "dissect-image.h"
9de3cc14 27#include "escape.h"
6cff1854 28#include "event-util.h"
a8f940c4 29#include "extract-word.h"
9de3cc14
SL
30#include "fileio.h"
31#include "format-util.h"
f72a0856 32#include "fs-util.h"
19301e76 33#include "gpt.h"
9de3cc14 34#include "hexdecoct.h"
f72a0856 35#include "hostname-util.h"
88af28d1 36#include "kernel-image.h"
9de3cc14
SL
37#include "log.h"
38#include "machine-credential.h"
88af28d1 39#include "macro.h"
9de3cc14 40#include "main-func.h"
cf3beb27 41#include "mkdir.h"
9de3cc14
SL
42#include "pager.h"
43#include "parse-argument.h"
44#include "parse-util.h"
cf3beb27 45#include "path-lookup.h"
9de3cc14
SL
46#include "path-util.h"
47#include "pretty-print.h"
48#include "process-util.h"
795ec90c 49#include "ptyfwd.h"
5c57a865 50#include "random-util.h"
cf3beb27 51#include "rm-rf.h"
f72a0856
SL
52#include "signal-util.h"
53#include "socket-util.h"
6af6d442 54#include "stat-util.h"
88af28d1 55#include "string-util.h"
9de3cc14
SL
56#include "strv.h"
57#include "tmpfile-util.h"
cf3beb27 58#include "unit-name.h"
a8f940c4 59#include "vmspawn-mount.h"
cf3beb27 60#include "vmspawn-scope.h"
9de3cc14
SL
61#include "vmspawn-settings.h"
62#include "vmspawn-util.h"
63
dbb2718f 64static bool arg_quiet = false;
9de3cc14 65static PagerFlags arg_pager_flags = 0;
5c57a865 66static char *arg_directory = NULL;
9de3cc14 67static char *arg_image = NULL;
f72a0856 68static char *arg_machine = NULL;
2c0061c7
LP
69static char *arg_cpus = NULL;
70static uint64_t arg_ram = UINT64_C(2) * U64_GB;
71static int arg_kvm = -1;
72static int arg_vsock = -1;
06d4fe57 73static unsigned arg_vsock_cid = VMADDR_CID_ANY;
cf3beb27 74static int arg_tpm = -1;
0f25e3e4 75static char *arg_linux = NULL;
811ad9e6 76static char **arg_initrds = NULL;
795ec90c 77static ConsoleMode arg_console_mode = CONSOLE_INTERACTIVE;
2c0061c7 78static NetworkStack arg_network_stack = NETWORK_STACK_NONE;
9de3cc14 79static int arg_secure_boot = -1;
bd546b9b 80static MachineCredentialContext arg_credentials = {};
c05ca33a 81static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U;
a8f940c4 82static RuntimeMountContext arg_runtime_mounts = {};
9de3cc14 83static SettingsMask arg_settings_mask = 0;
e8ce204d 84static char *arg_firmware = NULL;
cf3beb27 85static char *arg_runtime_directory = NULL;
258d2694 86static char *arg_forward_journal = NULL;
cf3beb27
SL
87static bool arg_runtime_directory_created = false;
88static bool arg_privileged = false;
0f25e3e4 89static char **arg_kernel_cmdline_extra = NULL;
1ec3218e 90static char **arg_extra_drives = NULL;
795ec90c 91static char *arg_background = NULL;
9de3cc14 92
5c57a865 93STATIC_DESTRUCTOR_REGISTER(arg_directory, freep);
9de3cc14 94STATIC_DESTRUCTOR_REGISTER(arg_image, freep);
f72a0856 95STATIC_DESTRUCTOR_REGISTER(arg_machine, freep);
2c0061c7 96STATIC_DESTRUCTOR_REGISTER(arg_cpus, freep);
cf3beb27 97STATIC_DESTRUCTOR_REGISTER(arg_runtime_directory, freep);
bd546b9b 98STATIC_DESTRUCTOR_REGISTER(arg_credentials, machine_credential_context_done);
e8ce204d 99STATIC_DESTRUCTOR_REGISTER(arg_firmware, freep);
0f25e3e4 100STATIC_DESTRUCTOR_REGISTER(arg_linux, freep);
811ad9e6 101STATIC_DESTRUCTOR_REGISTER(arg_initrds, strv_freep);
a8f940c4 102STATIC_DESTRUCTOR_REGISTER(arg_runtime_mounts, runtime_mount_context_done);
258d2694 103STATIC_DESTRUCTOR_REGISTER(arg_forward_journal, freep);
0f25e3e4 104STATIC_DESTRUCTOR_REGISTER(arg_kernel_cmdline_extra, strv_freep);
1ec3218e 105STATIC_DESTRUCTOR_REGISTER(arg_extra_drives, strv_freep);
795ec90c 106STATIC_DESTRUCTOR_REGISTER(arg_background, freep);
9de3cc14
SL
107
108static int help(void) {
109 _cleanup_free_ char *link = NULL;
110 int r;
111
112 pager_open(arg_pager_flags);
113
114 r = terminal_urlify_man("systemd-vmspawn", "1", &link);
115 if (r < 0)
116 return log_oom();
117
118 printf("%1$s [OPTIONS...] [ARGUMENTS...]\n\n"
119 "%5$sSpawn a command or OS in a virtual machine.%6$s\n\n"
7e2c6c74
ZJS
120 " -h --help Show this help\n"
121 " --version Print version string\n"
122 " -q --quiet Do not show status information\n"
123 " --no-pager Do not pipe output into a pager\n"
1d748d7c 124 "\n%3$sImage:%4$s\n"
7e2c6c74
ZJS
125 " -D --directory=PATH Root directory for the VM\n"
126 " -i --image=FILE|DEVICE Root file system disk image or device for the VM\n"
1d748d7c 127 "\n%3$sHost Configuration:%4$s\n"
2c0061c7
LP
128 " --cpus=CPUS Configure number of CPUs in guest\n"
129 " --ram=BYTES Configure guest's RAM size\n"
130 " --kvm=BOOL Enable use of KVM\n"
131 " --vsock=BOOL Override autodetection of VSOCK support\n"
132 " --vsock-cid=CID Specify the CID to use for the guest's VSOCK support\n"
7e2c6c74
ZJS
133 " --tpm=BOOL Enable use of a virtual TPM\n"
134 " --linux=PATH Specify the linux kernel for direct kernel boot\n"
135 " --initrd=PATH Specify the initrd for direct kernel boot\n"
2c0061c7
LP
136 " -n --network-tap Create a TAP device for networking\n"
137 " --network-user-mode Use user mode networking\n"
7e2c6c74
ZJS
138 " --secure-boot=BOOL Enable searching for firmware supporting SecureBoot\n"
139 " --firmware=PATH|list Select firmware definition file (or list available)\n"
1d748d7c 140 "\n%3$sSystem Identity:%4$s\n"
7e2c6c74 141 " -M --machine=NAME Set the machine name for the VM\n"
c05ca33a
SL
142 "\n%3$sUser Namespacing:%4$s\n"
143 " --private-users=UIDBASE[:NUIDS]\n"
7e2c6c74
ZJS
144 " Configure the UID/GID range to map into the\n"
145 " virtiofsd namespace\n"
a8f940c4
SL
146 "\n%3$sMounts:%4$s\n"
147 " --bind=SOURCE[:TARGET]\n"
7e2c6c74 148 " Mount a file or directory from the host into the VM\n"
a8f940c4 149 " --bind-ro=SOURCE[:TARGET]\n"
7e2c6c74 150 " Mount a file or directory, but read-only\n"
1ec3218e 151 " --extra-drive=PATH Adds an additional disk to the virtual machine\n"
258d2694
SL
152 "\n%3$sIntegration:%4$s\n"
153 " --forward-journal=FILE|DIR\n"
7e2c6c74 154 " Forward the VM's journal to the host\n"
795ec90c
LP
155 "\n%3$sInput/Output:%4$s\n"
156 " --console=MODE Console mode (interactive, native, gui)\n"
157 " --background=COLOR Set ANSI color for background\n"
1d748d7c 158 "\n%3$sCredentials:%4$s\n"
9de3cc14 159 " --set-credential=ID:VALUE\n"
7e2c6c74 160 " Pass a credential with literal value to the VM\n"
9de3cc14 161 " --load-credential=ID:PATH\n"
7e2c6c74
ZJS
162 " Load credential for the VM from file or AF_UNIX\n"
163 " stream socket.\n"
9de3cc14
SL
164 "\nSee the %2$s for details.\n",
165 program_invocation_short_name,
166 link,
167 ansi_underline(),
168 ansi_normal(),
169 ansi_highlight(),
170 ansi_normal());
171
172 return 0;
173}
174
175static int parse_argv(int argc, char *argv[]) {
176 enum {
177 ARG_VERSION = 0x100,
178 ARG_NO_PAGER,
2c0061c7
LP
179 ARG_CPUS,
180 ARG_RAM,
181 ARG_KVM,
182 ARG_VSOCK,
f72a0856 183 ARG_VSOCK_CID,
cf3beb27 184 ARG_TPM,
0f25e3e4 185 ARG_LINUX,
88af28d1 186 ARG_INITRD,
9de3cc14 187 ARG_QEMU_GUI,
75331bed 188 ARG_NETWORK_USER_MODE,
a8f940c4
SL
189 ARG_BIND,
190 ARG_BIND_RO,
1ec3218e 191 ARG_EXTRA_DRIVE,
9de3cc14 192 ARG_SECURE_BOOT,
c05ca33a 193 ARG_PRIVATE_USERS,
258d2694 194 ARG_FORWARD_JOURNAL,
9de3cc14
SL
195 ARG_SET_CREDENTIAL,
196 ARG_LOAD_CREDENTIAL,
e8ce204d 197 ARG_FIRMWARE,
795ec90c
LP
198 ARG_CONSOLE,
199 ARG_BACKGROUND,
9de3cc14
SL
200 };
201
202 static const struct option options[] = {
75331bed
SL
203 { "help", no_argument, NULL, 'h' },
204 { "version", no_argument, NULL, ARG_VERSION },
205 { "quiet", no_argument, NULL, 'q' },
206 { "no-pager", no_argument, NULL, ARG_NO_PAGER },
207 { "image", required_argument, NULL, 'i' },
5c57a865 208 { "directory", required_argument, NULL, 'D' },
75331bed 209 { "machine", required_argument, NULL, 'M' },
2c0061c7
LP
210 { "cpus", required_argument, NULL, ARG_CPUS },
211 { "qemu-smp", required_argument, NULL, ARG_CPUS }, /* Compat alias */
212 { "ram", required_argument, NULL, ARG_RAM },
213 { "qemu-mem", required_argument, NULL, ARG_RAM }, /* Compat alias */
214 { "kvm", required_argument, NULL, ARG_KVM },
215 { "qemu-kvm", required_argument, NULL, ARG_KVM }, /* Compat alias */
216 { "vsock", required_argument, NULL, ARG_VSOCK },
217 { "qemu-vsock", required_argument, NULL, ARG_VSOCK }, /* Compat alias */
75331bed
SL
218 { "vsock-cid", required_argument, NULL, ARG_VSOCK_CID },
219 { "tpm", required_argument, NULL, ARG_TPM },
220 { "linux", required_argument, NULL, ARG_LINUX },
221 { "initrd", required_argument, NULL, ARG_INITRD },
795ec90c
LP
222 { "console", required_argument, NULL, ARG_CONSOLE },
223 { "qemu-gui", no_argument, NULL, ARG_QEMU_GUI }, /* compat option */
75331bed
SL
224 { "network-tap", no_argument, NULL, 'n' },
225 { "network-user-mode", no_argument, NULL, ARG_NETWORK_USER_MODE },
a8f940c4
SL
226 { "bind", required_argument, NULL, ARG_BIND },
227 { "bind-ro", required_argument, NULL, ARG_BIND_RO },
1ec3218e 228 { "extra-drive", required_argument, NULL, ARG_EXTRA_DRIVE },
75331bed 229 { "secure-boot", required_argument, NULL, ARG_SECURE_BOOT },
c05ca33a 230 { "private-users", required_argument, NULL, ARG_PRIVATE_USERS },
258d2694 231 { "forward-journal", required_argument, NULL, ARG_FORWARD_JOURNAL },
75331bed
SL
232 { "set-credential", required_argument, NULL, ARG_SET_CREDENTIAL },
233 { "load-credential", required_argument, NULL, ARG_LOAD_CREDENTIAL },
234 { "firmware", required_argument, NULL, ARG_FIRMWARE },
795ec90c 235 { "background", required_argument, NULL, ARG_BACKGROUND },
9de3cc14
SL
236 {}
237 };
238
239 int c, r;
240
241 assert(argc >= 0);
242 assert(argv);
243
244 optind = 0;
5c57a865 245 while ((c = getopt_long(argc, argv, "+hD:i:M:nq", options, NULL)) >= 0)
9de3cc14
SL
246 switch (c) {
247 case 'h':
248 return help();
249
250 case ARG_VERSION:
251 return version();
252
dbb2718f
LP
253 case 'q':
254 arg_quiet = true;
255 break;
256
5c57a865 257 case 'D':
a8f940c4 258 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_directory);
5c57a865
SL
259 if (r < 0)
260 return r;
261
262 arg_settings_mask |= SETTING_DIRECTORY;
263 break;
264
9de3cc14
SL
265 case 'i':
266 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_image);
267 if (r < 0)
268 return r;
269
270 arg_settings_mask |= SETTING_DIRECTORY;
271 break;
272
f72a0856
SL
273 case 'M':
274 if (isempty(optarg))
275 arg_machine = mfree(arg_machine);
276 else {
277 if (!hostname_is_valid(optarg, 0))
278 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
279 "Invalid machine name: %s", optarg);
280
281 r = free_and_strdup(&arg_machine, optarg);
282 if (r < 0)
283 return log_oom();
284 }
285 break;
286
9de3cc14
SL
287 case ARG_NO_PAGER:
288 arg_pager_flags |= PAGER_DISABLE;
289 break;
290
2c0061c7
LP
291 case ARG_CPUS:
292 r = free_and_strdup_warn(&arg_cpus, optarg);
d9c4917b
YW
293 if (r < 0)
294 return r;
9de3cc14
SL
295 break;
296
2c0061c7
LP
297 case ARG_RAM:
298 r = parse_size(optarg, 1024, &arg_ram);
9de3cc14 299 if (r < 0)
2c0061c7 300 return log_error_errno(r, "Failed to parse --ram=%s: %m", optarg);
9de3cc14
SL
301 break;
302
2c0061c7
LP
303 case ARG_KVM:
304 r = parse_tristate(optarg, &arg_kvm);
9de3cc14 305 if (r < 0)
2c0061c7 306 return log_error_errno(r, "Failed to parse --kvm=%s: %m", optarg);
9de3cc14
SL
307 break;
308
2c0061c7
LP
309 case ARG_VSOCK:
310 r = parse_tristate(optarg, &arg_vsock);
f72a0856 311 if (r < 0)
2c0061c7 312 return log_error_errno(r, "Failed to parse --vsock=%s: %m", optarg);
f72a0856
SL
313 break;
314
06d4fe57 315 case ARG_VSOCK_CID:
f72a0856 316 if (isempty(optarg))
06d4fe57 317 arg_vsock_cid = VMADDR_CID_ANY;
f72a0856 318 else {
06d4fe57
LP
319 unsigned cid;
320
321 r = vsock_parse_cid(optarg, &cid);
f72a0856 322 if (r < 0)
06d4fe57
LP
323 return log_error_errno(r, "Failed to parse --vsock-cid: %s", optarg);
324 if (!VSOCK_CID_IS_REGULAR(cid))
325 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Specified CID is not regular, refusing: %u", cid);
326
327 arg_vsock_cid = cid;
f72a0856 328 }
f72a0856 329 break;
f72a0856 330
cf3beb27
SL
331 case ARG_TPM:
332 r = parse_tristate(optarg, &arg_tpm);
333 if (r < 0)
334 return log_error_errno(r, "Failed to parse --tpm=%s: %m", optarg);
335 break;
336
0f25e3e4
SL
337 case ARG_LINUX:
338 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_linux);
339 if (r < 0)
340 return r;
341 break;
342
88af28d1 343 case ARG_INITRD: {
811ad9e6
SL
344 _cleanup_free_ char *initrd_path = NULL;
345 r = parse_path_argument(optarg, /* suppress_root= */ false, &initrd_path);
88af28d1
SL
346 if (r < 0)
347 return r;
811ad9e6
SL
348
349 r = strv_consume(&arg_initrds, TAKE_PTR(initrd_path));
350 if (r < 0)
351 return log_oom();
352
88af28d1
SL
353 break;
354 }
355
795ec90c
LP
356 case ARG_CONSOLE:
357 arg_console_mode = console_mode_from_string(optarg);
358 if (arg_console_mode < 0)
359 return log_error_errno(arg_console_mode, "Failed to parse specified console mode: %s", optarg);
360
361 break;
362
9de3cc14 363 case ARG_QEMU_GUI:
795ec90c 364 arg_console_mode = CONSOLE_GUI;
9de3cc14
SL
365 break;
366
75331bed 367 case 'n':
2c0061c7 368 arg_network_stack = NETWORK_STACK_TAP;
75331bed
SL
369 break;
370
371 case ARG_NETWORK_USER_MODE:
2c0061c7 372 arg_network_stack = NETWORK_STACK_USER;
75331bed
SL
373 break;
374
a8f940c4
SL
375 case ARG_BIND:
376 case ARG_BIND_RO:
377 r = runtime_mount_parse(&arg_runtime_mounts, optarg, c == ARG_BIND_RO);
378 if (r < 0)
379 return log_error_errno(r, "Failed to parse --bind(-ro)= argument %s: %m", optarg);
380
381 arg_settings_mask |= SETTING_BIND_MOUNTS;
382 break;
383
1ec3218e
SL
384 case ARG_EXTRA_DRIVE: {
385 _cleanup_free_ char *drive_path = NULL;
386
387 r = parse_path_argument(optarg, /* suppress_root= */ false, &drive_path);
388 if (r < 0)
389 return r;
390
391 r = strv_consume(&arg_extra_drives, TAKE_PTR(drive_path));
392 if (r < 0)
393 return log_oom();
394 break;
395 }
396
9de3cc14
SL
397 case ARG_SECURE_BOOT:
398 r = parse_tristate(optarg, &arg_secure_boot);
399 if (r < 0)
400 return log_error_errno(r, "Failed to parse --secure-boot=%s: %m", optarg);
401 break;
402
c05ca33a
SL
403 case ARG_PRIVATE_USERS:
404 r = parse_userns_uid_range(optarg, &arg_uid_shift, &arg_uid_range);
405 if (r < 0)
406 return r;
407 break;
408
258d2694
SL
409 case ARG_FORWARD_JOURNAL:
410 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_forward_journal);
411 if (r < 0)
412 return r;
413 break;
414
9de3cc14 415 case ARG_SET_CREDENTIAL: {
bd546b9b 416 r = machine_credential_set(&arg_credentials, optarg);
9de3cc14 417 if (r < 0)
6045958b 418 return r;
9de3cc14
SL
419 arg_settings_mask |= SETTING_CREDENTIALS;
420 break;
421 }
422
423 case ARG_LOAD_CREDENTIAL: {
bd546b9b 424 r = machine_credential_load(&arg_credentials, optarg);
9de3cc14 425 if (r < 0)
6045958b 426 return r;
9de3cc14
SL
427
428 arg_settings_mask |= SETTING_CREDENTIALS;
429 break;
430 }
431
e8ce204d
LP
432 case ARG_FIRMWARE:
433 if (streq(optarg, "list")) {
434 _cleanup_strv_free_ char **l = NULL;
435
436 r = list_ovmf_config(&l);
437 if (r < 0)
438 return log_error_errno(r, "Failed to list firmwares: %m");
439
440 bool nl = false;
441 fputstrv(stdout, l, "\n", &nl);
442 if (nl)
443 putchar('\n');
444
445 return 0;
446 }
447
448 if (!isempty(optarg) && !path_is_absolute(optarg) && !startswith(optarg, "./"))
449 return log_error_errno(SYNTHETIC_ERRNO(errno), "Absolute path or path starting with './' required.");
450
451 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_firmware);
452 if (r < 0)
453 return r;
454
455 break;
456
795ec90c
LP
457 case ARG_BACKGROUND:
458 r = free_and_strdup_warn(&arg_background, optarg);
459 if (r < 0)
460 return r;
461 break;
462
9de3cc14
SL
463 case '?':
464 return -EINVAL;
465
466 default:
467 assert_not_reached();
468 }
469
470 if (argc > optind) {
0f25e3e4
SL
471 arg_kernel_cmdline_extra = strv_copy(argv + optind);
472 if (!arg_kernel_cmdline_extra)
9de3cc14
SL
473 return log_oom();
474
475 arg_settings_mask |= SETTING_START_MODE;
476 }
477
478 return 1;
479}
480
f72a0856
SL
481static int open_vsock(void) {
482 _cleanup_close_ int vsock_fd = -EBADF;
483 int r;
484 static const union sockaddr_union bind_addr = {
485 .vm.svm_family = AF_VSOCK,
486 .vm.svm_cid = VMADDR_CID_ANY,
487 .vm.svm_port = VMADDR_PORT_ANY,
488 };
489
490 vsock_fd = socket(AF_VSOCK, SOCK_STREAM|SOCK_CLOEXEC, 0);
491 if (vsock_fd < 0)
492 return log_error_errno(errno, "Failed to open AF_VSOCK socket: %m");
493
494 r = bind(vsock_fd, &bind_addr.sa, sizeof(bind_addr.vm));
495 if (r < 0)
cf9de8ef 496 return log_error_errno(errno, "Failed to bind to VSOCK address %u:%u: %m", bind_addr.vm.svm_cid, bind_addr.vm.svm_port);
f72a0856
SL
497
498 r = listen(vsock_fd, SOMAXCONN_DELUXE);
499 if (r < 0)
cf9de8ef 500 return log_error_errno(errno, "Failed to listen on VSOCK: %m");
f72a0856
SL
501
502 return TAKE_FD(vsock_fd);
503}
504
505static int vmspawn_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
506 char buf[NOTIFY_BUFFER_MAX+1];
507 const char *p = NULL;
508 struct iovec iovec = {
509 .iov_base = buf,
510 .iov_len = sizeof(buf)-1,
511 };
512 struct msghdr msghdr = {
513 .msg_iov = &iovec,
514 .msg_iovlen = 1,
515 };
516 ssize_t n;
517 _cleanup_strv_free_ char **tags = NULL;
518 int r, *exit_status = ASSERT_PTR(userdata);
519
520 n = recvmsg_safe(fd, &msghdr, MSG_DONTWAIT);
521 if (ERRNO_IS_NEG_TRANSIENT(n))
522 return 0;
523 if (n == -EXFULL) {
524 log_warning_errno(n, "Got message with truncated control data, ignoring: %m");
525 return 0;
526 }
527 if (n < 0)
528 return log_warning_errno(n, "Couldn't read notification socket: %m");
529
530 if ((size_t) n >= sizeof(buf)) {
531 log_warning("Received notify message exceeded maximum size. Ignoring.");
532 return 0;
533 }
534
535 buf[n] = 0;
536 tags = strv_split(buf, "\n\r");
537 if (!tags)
538 return log_oom();
539
540 STRV_FOREACH(s, tags)
541 log_debug("Received tag %s from notify socket", *s);
542
543 if (strv_contains(tags, "READY=1")) {
544 r = sd_notify(false, "READY=1\n");
545 if (r < 0)
546 log_warning_errno(r, "Failed to send readiness notification, ignoring: %m");
547 }
548
549 p = strv_find_startswith(tags, "STATUS=");
550 if (p)
551 (void) sd_notifyf(false, "STATUS=VM running: %s", p);
552
553 p = strv_find_startswith(tags, "EXIT_STATUS=");
554 if (p) {
555 r = safe_atoi(p, exit_status);
556 if (r < 0)
557 log_warning_errno(r, "Failed to parse exit status from %s, ignoring: %m", p);
558 }
559
560 /* we will only receive one message from each connection so disable this source once one is received */
561 source = sd_event_source_disable_unref(source);
562
563 return 0;
564}
565
566static int vmspawn_dispatch_vsock_connections(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
567 int r;
568 sd_event *event;
569 _cleanup_close_ int conn_fd = -EBADF;
570
571 assert(userdata);
572
573 if (revents != EPOLLIN) {
cf9de8ef 574 log_warning("Got unexpected poll event for VSOCK fd.");
f72a0856
SL
575 return 0;
576 }
577
578 conn_fd = accept4(fd, NULL, NULL, SOCK_CLOEXEC|SOCK_NONBLOCK);
579 if (conn_fd < 0) {
cf9de8ef 580 log_warning_errno(errno, "Failed to accept connection from VSOCK fd (%m), ignoring...");
f72a0856
SL
581 return 0;
582 }
583
584 event = sd_event_source_get_event(source);
585 if (!event)
586 return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Failed to retrieve event from event source, exiting task");
587
588 /* add a new floating task to read from the connection */
589 r = sd_event_add_io(event, NULL, conn_fd, revents, vmspawn_dispatch_notify_fd, userdata);
590 if (r < 0)
591 return log_error_errno(r, "Failed to allocate notify connection event source: %m");
592
593 /* conn_fd is now owned by the event loop so don't clean it up */
594 TAKE_FD(conn_fd);
595
596 return 0;
597}
598
400da3e4 599static int setup_notify_parent(sd_event *event, int fd, int *exit_status, sd_event_source **ret_notify_event_source) {
f72a0856
SL
600 int r;
601
400da3e4
LP
602 assert(event);
603 assert(fd >= 0);
604 assert(exit_status);
605 assert(ret_notify_event_source);
606
607 r = sd_event_add_io(event, ret_notify_event_source, fd, EPOLLIN, vmspawn_dispatch_vsock_connections, exit_status);
f72a0856
SL
608 if (r < 0)
609 return log_error_errno(r, "Failed to allocate notify socket event source: %m");
610
400da3e4 611 (void) sd_event_source_set_description(*ret_notify_event_source, "vmspawn-notify-sock");
f72a0856
SL
612
613 return 0;
614}
615
616static int on_orderly_shutdown(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
6cff1854
LP
617 PidRef *pidref = userdata;
618 int r;
619
620 /* TODO: actually talk to qemu and ask the guest to shutdown here */
f72a0856 621
6cff1854
LP
622 if (pidref) {
623 r = pidref_kill(pidref, SIGKILL);
624 if (r < 0)
625 log_warning_errno(r, "Failed to kill qemu, terminating: %m");
626 else {
f72a0856
SL
627 log_info("Trying to halt qemu. Send SIGTERM again to trigger vmspawn to immediately terminate.");
628 sd_event_source_set_userdata(s, NULL);
629 return 0;
630 }
631 }
632
633 sd_event_exit(sd_event_source_get_event(s), 0);
634 return 0;
635}
636
637static int on_child_exit(sd_event_source *s, const siginfo_t *si, void *userdata) {
638 sd_event_exit(sd_event_source_get_event(s), 0);
639 return 0;
640}
641
642static int cmdline_add_vsock(char ***cmdline, int vsock_fd) {
643 int r;
644
645 r = strv_extend(cmdline, "-smbios");
646 if (r < 0)
647 return r;
648
649 union sockaddr_union addr;
650 socklen_t addr_len = sizeof addr.vm;
651 r = getsockname(vsock_fd, &addr.sa, &addr_len);
652 if (r < 0)
653 return -errno;
654 assert(addr_len >= sizeof addr.vm);
655 assert(addr.vm.svm_family == AF_VSOCK);
656
f72a0856
SL
657 r = strv_extendf(cmdline, "type=11,value=io.systemd.credential:vmm.notify_socket=vsock-stream:%u:%u", (unsigned) VMADDR_CID_HOST, addr.vm.svm_port);
658 if (r < 0)
659 return r;
660
661 return 0;
662}
663
d90a05b6
LP
664static int start_tpm(
665 sd_bus *bus,
666 const char *scope,
667 const char *swtpm,
668 char **ret_state_tempdir) {
669
cf3beb27
SL
670 _cleanup_(rm_rf_physical_and_freep) char *state_dir = NULL;
671 _cleanup_free_ char *scope_prefix = NULL;
672 _cleanup_(socket_service_pair_done) SocketServicePair ssp = {
673 .socket_type = SOCK_STREAM,
674 };
675 int r;
676
677 assert(bus);
678 assert(scope);
d90a05b6 679 assert(swtpm);
cf3beb27
SL
680 assert(ret_state_tempdir);
681
682 r = unit_name_to_prefix(scope, &scope_prefix);
683 if (r < 0)
684 return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
685
686 ssp.unit_name_prefix = strjoin(scope_prefix, "-tpm");
687 if (!ssp.unit_name_prefix)
688 return log_oom();
689
690 state_dir = path_join(arg_runtime_directory, ssp.unit_name_prefix);
691 if (!state_dir)
692 return log_oom();
693
694 if (arg_runtime_directory_created) {
695 ssp.runtime_directory = path_join("systemd/vmspawn", ssp.unit_name_prefix);
696 if (!ssp.runtime_directory)
697 return log_oom();
698 }
699
700 ssp.listen_address = path_join(state_dir, "sock");
701 if (!ssp.listen_address)
702 return log_oom();
703
519bad6c
LP
704 _cleanup_free_ char *swtpm_setup = NULL;
705 r = find_executable("swtpm_setup", &swtpm_setup);
706 if (r < 0)
707 return log_error_errno(r, "Failed to find swtpm_setup binary: %m");
708
709 ssp.exec_start_pre = strv_new(swtpm_setup, "--tpm-state", state_dir, "--tpm2", "--pcr-banks", "sha256");
710 if (!ssp.exec_start_pre)
711 return log_oom();
712
d90a05b6 713 ssp.exec_start = strv_new(swtpm, "socket", "--tpm2", "--tpmstate");
cf3beb27
SL
714 if (!ssp.exec_start)
715 return log_oom();
716
717 r = strv_extendf(&ssp.exec_start, "dir=%s", state_dir);
718 if (r < 0)
719 return log_oom();
720
721 r = strv_extend_many(&ssp.exec_start, "--ctrl", "type=unixio,fd=3");
722 if (r < 0)
723 return log_oom();
724
725 r = start_socket_service_pair(bus, scope, &ssp);
726 if (r < 0)
727 return r;
728
729 *ret_state_tempdir = TAKE_PTR(state_dir);
cf3beb27
SL
730 return 0;
731}
732
258d2694
SL
733static int start_systemd_journal_remote(sd_bus *bus, const char *scope, unsigned port, const char *sd_journal_remote, char **listen_address) {
734 _cleanup_free_ char *scope_prefix = NULL;
735 _cleanup_(socket_service_pair_done) SocketServicePair ssp = {
736 .socket_type = SOCK_STREAM,
737 };
738 int r;
739
740 assert(bus);
741 assert(scope);
742 assert(sd_journal_remote);
743
744 r = unit_name_to_prefix(scope, &scope_prefix);
745 if (r < 0)
746 return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
747
748 ssp.unit_name_prefix = strjoin(scope_prefix, "-forward-journal");
749 if (!ssp.unit_name_prefix)
750 return log_oom();
751
752 r = asprintf(&ssp.listen_address, "vsock:2:%u", port);
753 if (r < 0)
754 return log_oom();
755
756 ssp.exec_start = strv_new(sd_journal_remote,
757 "--output", arg_forward_journal,
758 "--split-mode", endswith(arg_forward_journal, ".journal") ? "none" : "host");
759 if (!ssp.exec_start)
760 return log_oom();
761
762 r = start_socket_service_pair(bus, scope, &ssp);
763 if (r < 0)
764 return r;
765
766 if (listen_address)
767 *listen_address = TAKE_PTR(ssp.listen_address);
768
769 return 0;
770}
771
19301e76
SL
772static int discover_root(char **ret) {
773 int r;
774 _cleanup_(dissected_image_unrefp) DissectedImage *image = NULL;
775 _cleanup_free_ char *root = NULL;
776
777 assert(ret);
778
779 r = dissect_image_file_and_warn(
780 arg_image,
781 /* verity= */ NULL,
782 /* mount_options= */ NULL,
783 /* image_policy= */ NULL,
784 /* flags= */ 0,
785 &image);
786 if (r < 0)
787 return r;
788
789 if (image->partitions[PARTITION_ROOT].found)
790 root = strjoin("root=PARTUUID=", SD_ID128_TO_UUID_STRING(image->partitions[PARTITION_ROOT].uuid));
791 else if (image->partitions[PARTITION_USR].found)
792 root = strjoin("mount.usr=PARTUUID=", SD_ID128_TO_UUID_STRING(image->partitions[PARTITION_USR].uuid));
793 else
794 return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Cannot perform a direct kernel boot without a root or usr partition, refusing");
795
796 if (!root)
797 return log_oom();
798
799 *ret = TAKE_PTR(root);
5c57a865
SL
800 return 0;
801}
802
803static int find_virtiofsd(char **ret) {
804 int r;
805 _cleanup_free_ char *virtiofsd = NULL;
806
807 assert(ret);
808
809 r = find_executable("virtiofsd", &virtiofsd);
810 if (r < 0 && r != -ENOENT)
811 return log_error_errno(r, "Error while searching for virtiofsd: %m");
812
813 if (!virtiofsd) {
814 FOREACH_STRING(file, "/usr/libexec/virtiofsd", "/usr/lib/virtiofsd") {
815 if (access(file, X_OK) >= 0) {
816 virtiofsd = strdup(file);
817 if (!virtiofsd)
818 return log_oom();
819 break;
820 }
821
822 if (!IN_SET(errno, ENOENT, EACCES))
823 return log_error_errno(errno, "Error while searching for virtiofsd: %m");
824 }
825 }
826
827 if (!virtiofsd)
828 return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Failed to find virtiofsd binary.");
829
830 *ret = TAKE_PTR(virtiofsd);
831 return 0;
832}
833
a8f940c4 834static int start_virtiofsd(sd_bus *bus, const char *scope, const char *directory, bool uidmap, char **ret_state_tempdir, char **ret_sock_name) {
5c57a865
SL
835 _cleanup_(rm_rf_physical_and_freep) char *state_dir = NULL;
836 _cleanup_free_ char *virtiofsd = NULL, *sock_name = NULL, *scope_prefix = NULL;
837 _cleanup_(socket_service_pair_done) SocketServicePair ssp = {
838 .socket_type = SOCK_STREAM,
839 };
840 static unsigned virtiofsd_instance = 0;
841 int r;
842
843 assert(bus);
844 assert(scope);
845 assert(directory);
846 assert(ret_state_tempdir);
847 assert(ret_sock_name);
848
849 r = find_virtiofsd(&virtiofsd);
850 if (r < 0)
851 return r;
852
853 r = unit_name_to_prefix(scope, &scope_prefix);
854 if (r < 0)
855 return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
856
857 if (asprintf(&ssp.unit_name_prefix, "%s-virtiofsd-%u", scope_prefix, virtiofsd_instance++) < 0)
858 return log_oom();
859
860 state_dir = path_join(arg_runtime_directory, ssp.unit_name_prefix);
861 if (!state_dir)
862 return log_oom();
863
864 if (arg_runtime_directory_created) {
865 ssp.runtime_directory = strjoin("systemd/vmspawn/", ssp.unit_name_prefix);
866 if (!ssp.runtime_directory)
867 return log_oom();
868 }
869
870 if (asprintf(&sock_name, "sock-%"PRIx64, random_u64()) < 0)
871 return log_oom();
872
873 ssp.listen_address = path_join(state_dir, sock_name);
874 if (!ssp.listen_address)
875 return log_oom();
876
877 /* QEMU doesn't support submounts so don't announce them */
878 ssp.exec_start = strv_new(virtiofsd, "--shared-dir", directory, "--xattr", "--fd", "3", "--no-announce-submounts");
879 if (!ssp.exec_start)
880 return log_oom();
881
a8f940c4 882 if (uidmap && arg_uid_shift != UID_INVALID) {
c05ca33a
SL
883 r = strv_extend(&ssp.exec_start, "--uid-map");
884 if (r < 0)
885 return log_oom();
886
887 r = strv_extendf(&ssp.exec_start, ":0:" UID_FMT ":" UID_FMT ":", arg_uid_shift, arg_uid_range);
888 if (r < 0)
889 return log_oom();
890
891 r = strv_extend(&ssp.exec_start, "--gid-map");
892 if (r < 0)
893 return log_oom();
894
895 r = strv_extendf(&ssp.exec_start, ":0:" GID_FMT ":" GID_FMT ":", arg_uid_shift, arg_uid_range);
896 if (r < 0)
897 return log_oom();
898 }
899
5c57a865
SL
900 r = start_socket_service_pair(bus, scope, &ssp);
901 if (r < 0)
902 return r;
903
904 *ret_state_tempdir = TAKE_PTR(state_dir);
905 *ret_sock_name = TAKE_PTR(sock_name);
19301e76
SL
906
907 return 0;
908}
909
910static int kernel_cmdline_maybe_append_root(void) {
911 int r;
912 bool cmdline_contains_root = strv_find_startswith(arg_kernel_cmdline_extra, "root=")
913 || strv_find_startswith(arg_kernel_cmdline_extra, "mount.usr=");
914
915 if (!cmdline_contains_root) {
916 _cleanup_free_ char *root = NULL;
917
918 r = discover_root(&root);
919 if (r < 0)
920 return r;
921
922 log_debug("Determined root file system %s from dissected image", root);
923
924 r = strv_consume(&arg_kernel_cmdline_extra, TAKE_PTR(root));
925 if (r < 0)
926 return log_oom();
927 }
928
929 return 0;
930}
931
6af6d442
SL
932static int discover_boot_entry(const char *root, char **ret_linux, char ***ret_initrds) {
933 _cleanup_(boot_config_free) BootConfig config = BOOT_CONFIG_NULL;
934 _cleanup_free_ char *esp_path = NULL, *xbootldr_path = NULL;
935 int r;
936
937 assert(root);
938 assert(ret_linux);
939 assert(ret_initrds);
940
941 esp_path = path_join(root, "efi");
942 if (!esp_path)
943 return log_oom();
944
945 xbootldr_path = path_join(root, "boot");
946 if (!xbootldr_path)
947 return log_oom();
948
949 r = boot_config_load(&config, esp_path, xbootldr_path);
950 if (r < 0)
951 return r;
952
953 r = boot_config_select_special_entries(&config, /* skip_efivars= */ true);
954 if (r < 0)
955 return log_error_errno(r, "Failed to find special boot config entries: %m");
956
957 const BootEntry *boot_entry = boot_config_default_entry(&config);
958
ec3d2f2e 959 if (boot_entry && !IN_SET(boot_entry->type, BOOT_ENTRY_UNIFIED, BOOT_ENTRY_CONF))
6af6d442
SL
960 boot_entry = NULL;
961
962 /* If we cannot determine a default entry search for UKIs (Type #2 EFI Unified Kernel Images)
963 * then .conf files (Type #1 Boot Loader Specification Entries).
964 * https://uapi-group.org/specifications/specs/boot_loader_specification */
965 if (!boot_entry)
966 FOREACH_ARRAY(entry, config.entries, config.n_entries)
967 if (entry->type == BOOT_ENTRY_UNIFIED) {
968 boot_entry = entry;
969 break;
970 }
971
972 if (!boot_entry)
973 FOREACH_ARRAY(entry, config.entries, config.n_entries)
974 if (entry->type == BOOT_ENTRY_CONF) {
975 boot_entry = entry;
976 break;
977 }
978
979 if (!boot_entry)
980 return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Failed to discover any boot entries.");
981
982 log_debug("Discovered boot entry %s (%s)", boot_entry->id, boot_entry_type_to_string(boot_entry->type));
983
984 _cleanup_free_ char *linux_kernel = NULL;
985 _cleanup_strv_free_ char **initrds = NULL;
986 if (boot_entry->type == BOOT_ENTRY_UNIFIED) {
987 linux_kernel = path_join(boot_entry->root, boot_entry->kernel);
988 if (!linux_kernel)
989 return log_oom();
990 } else if (boot_entry->type == BOOT_ENTRY_CONF) {
991 linux_kernel = path_join(boot_entry->root, boot_entry->kernel);
992 if (!linux_kernel)
993 return log_oom();
994
995 STRV_FOREACH(initrd, boot_entry->initrd) {
996 _cleanup_free_ char *initrd_path = path_join(boot_entry->root, *initrd);
997 if (!initrd_path)
998 return log_oom();
999
1000 r = strv_consume(&initrds, TAKE_PTR(initrd_path));
1001 if (r < 0)
1002 return log_oom();
1003 }
1004 } else
1005 assert_not_reached();
1006
1007 *ret_linux = TAKE_PTR(linux_kernel);
1008 *ret_initrds = TAKE_PTR(initrds);
1009
1010 return 0;
1011}
1012
811ad9e6
SL
1013static int merge_initrds(char **ret) {
1014 _cleanup_(rm_rf_physical_and_freep) char *merged_initrd = NULL;
1015 _cleanup_close_ int ofd = -EBADF;
1016 int r;
1017
1018 assert(ret);
1019
1020 r = tempfn_random_child(NULL, "vmspawn-initrd-", &merged_initrd);
1021 if (r < 0)
1022 return log_error_errno(r, "Failed to create temporary file: %m");
1023
1024 ofd = open(merged_initrd, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0600);
1025 if (ofd < 0)
1026 return log_error_errno(errno, "Failed to create regular file %s: %m", merged_initrd);
1027
811ad9e6
SL
1028 STRV_FOREACH(i, arg_initrds) {
1029 _cleanup_close_ int ifd = -EBADF;
8c3f9888
YW
1030 off_t off, to_seek;
1031
1032 off = lseek(ofd, 0, SEEK_CUR);
1033 if (off < 0)
1034 return log_error_errno(errno, "Failed to get file offset of %s: %m", merged_initrd);
1035
1036 to_seek = (4 - (off % 4)) % 4;
811ad9e6
SL
1037
1038 /* seek to assure 4 byte alignment for each initrd */
1039 if (to_seek != 0 && lseek(ofd, to_seek, SEEK_CUR) < 0)
1040 return log_error_errno(errno, "Failed to seek %s: %m", merged_initrd);
1041
1042 ifd = open(*i, O_RDONLY|O_CLOEXEC);
1043 if (ifd < 0)
1044 return log_error_errno(errno, "Failed to open %s: %m", *i);
1045
1046 r = copy_bytes(ifd, ofd, UINT64_MAX, COPY_REFLINK);
1047 if (r < 0)
1048 return log_error_errno(r, "Failed to copy bytes from %s to %s: %m", *i, merged_initrd);
1049 }
1050
1051 *ret = TAKE_PTR(merged_initrd);
1052 return 0;
1053}
1054
795ec90c
LP
1055static void set_window_title(PTYForward *f) {
1056 _cleanup_free_ char *hn = NULL, *dot = NULL;
1057
1058 assert(f);
1059
1060 (void) gethostname_strict(&hn);
1061
1062 if (emoji_enabled())
1063 dot = strjoin(special_glyph(SPECIAL_GLYPH_GREEN_CIRCLE), " ");
1064
1065 if (hn)
1066 (void) pty_forward_set_titlef(f, "%sVirtual Machine %s on %s", strempty(dot), arg_machine, hn);
1067 else
1068 (void) pty_forward_set_titlef(f, "%sVirtual Machine %s", strempty(dot), arg_machine);
1069
1070 if (dot)
1071 (void) pty_forward_set_title_prefix(f, dot);
1072}
1073
51747b34 1074static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
9de3cc14 1075 _cleanup_(ovmf_config_freep) OvmfConfig *ovmf_config = NULL;
cf3beb27 1076 _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
5c57a865 1077 _cleanup_free_ char *machine = NULL, *qemu_binary = NULL, *mem = NULL, *trans_scope = NULL, *kernel = NULL;
51747b34 1078 _cleanup_close_ int notify_sock_fd = -EBADF;
9de3cc14 1079 _cleanup_strv_free_ char **cmdline = NULL;
51747b34
SL
1080 _cleanup_free_ int *pass_fds = NULL;
1081 size_t n_pass_fds = 0;
5c57a865 1082 const char *accel, *shm;
9de3cc14 1083 int r;
cf3beb27
SL
1084
1085 if (arg_privileged)
1086 r = sd_bus_default_system(&bus);
1087 else
1088 r = sd_bus_default_user(&bus);
1089 if (r < 0)
1090 return log_error_errno(r, "Failed to connect to systemd bus: %m");
1091
1092 r = start_transient_scope(bus, arg_machine, /* allow_pidfd= */ true, &trans_scope);
1093 if (r < 0)
1094 return r;
9de3cc14 1095
2c0061c7
LP
1096 bool use_kvm = arg_kvm > 0;
1097 if (arg_kvm < 0) {
9de3cc14
SL
1098 r = qemu_check_kvm_support();
1099 if (r < 0)
1100 return log_error_errno(r, "Failed to check for KVM support: %m");
1101 use_kvm = r;
1102 }
1103
e8ce204d
LP
1104 if (arg_firmware)
1105 r = load_ovmf_config(arg_firmware, &ovmf_config);
1106 else
1107 r = find_ovmf_config(arg_secure_boot, &ovmf_config);
9de3cc14
SL
1108 if (r < 0)
1109 return log_error_errno(r, "Failed to find OVMF config: %m");
1110
1111 /* only warn if the user hasn't disabled secureboot */
1112 if (!ovmf_config->supports_sb && arg_secure_boot)
1113 log_warning("Couldn't find OVMF firmware blob with Secure Boot support, "
1114 "falling back to OVMF firmware blobs without Secure Boot support.");
1115
5c57a865
SL
1116 shm = arg_directory ? ",memory-backend=mem" : "";
1117 if (ARCHITECTURE_SUPPORTS_SMM)
1118 machine = strjoin("type=" QEMU_MACHINE_TYPE ",smm=", on_off(ovmf_config->supports_sb), shm);
1119 else
1120 machine = strjoin("type=" QEMU_MACHINE_TYPE, shm);
1121 if (!machine)
1122 return log_oom();
1123
1124 if (arg_linux) {
1125 kernel = strdup(arg_linux);
1126 if (!kernel)
1127 return log_oom();
6af6d442
SL
1128 } else if (arg_directory) {
1129 /* a kernel is required for directory type images so attempt to locate a UKI under /boot and /efi */
1130 r = discover_boot_entry(arg_directory, &kernel, &arg_initrds);
1131 if (r < 0)
1132 return log_error_errno(r, "Failed to locate UKI in directory type image, please specify one with --linux=.");
1133
1134 log_debug("Discovered UKI image at %s", kernel);
1135 }
5c57a865 1136
9de3cc14
SL
1137 r = find_qemu_binary(&qemu_binary);
1138 if (r == -EOPNOTSUPP)
1139 return log_error_errno(r, "Native architecture is not supported by qemu.");
1140 if (r < 0)
1141 return log_error_errno(r, "Failed to find QEMU binary: %m");
1142
2c0061c7 1143 if (asprintf(&mem, "%" PRIu64 "M", DIV_ROUND_UP(arg_ram, U64_MB)) < 0)
9de3cc14
SL
1144 return log_oom();
1145
1146 cmdline = strv_new(
1147 qemu_binary,
1148 "-machine", machine,
2c0061c7 1149 "-smp", arg_cpus ?: "1",
9de3cc14
SL
1150 "-m", mem,
1151 "-object", "rng-random,filename=/dev/urandom,id=rng0",
75331bed 1152 "-device", "virtio-rng-pci,rng=rng0,id=rng-device0"
9de3cc14 1153 );
f72a0856
SL
1154 if (!cmdline)
1155 return log_oom();
1156
cf3beb27 1157 /* if we are going to be starting any units with state then create our runtime dir */
a8f940c4 1158 if (arg_tpm != 0 || arg_directory || arg_runtime_mounts.n_mounts != 0) {
cf3beb27
SL
1159 r = runtime_directory(&arg_runtime_directory, arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER, "systemd/vmspawn");
1160 if (r < 0)
1161 return log_error_errno(r, "Failed to lookup runtime directory: %m");
1162 if (r) {
1163 /* r > 0 means we need to create our own runtime dir */
1164 r = mkdir_p(arg_runtime_directory, 0755);
1165 if (r < 0)
1166 return log_error_errno(r, "Failed to create runtime directory: %m");
1167 arg_runtime_directory_created = true;
1168 }
1169 }
1170
2c0061c7 1171 if (arg_network_stack == NETWORK_STACK_TAP)
75331bed 1172 r = strv_extend_many(&cmdline, "-nic", "tap,script=no,model=virtio-net-pci");
2c0061c7 1173 else if (arg_network_stack == NETWORK_STACK_USER)
75331bed
SL
1174 r = strv_extend_many(&cmdline, "-nic", "user,model=virtio-net-pci");
1175 else
1176 r = strv_extend_many(&cmdline, "-nic", "none");
1177 if (r < 0)
1178 return log_oom();
1179
5c57a865 1180 /* A shared memory backend might increase ram usage so only add one if actually necessary for virtiofsd. */
a8f940c4 1181 if (arg_directory || arg_runtime_mounts.n_mounts != 0) {
5c57a865
SL
1182 r = strv_extend(&cmdline, "-object");
1183 if (r < 0)
1184 return log_oom();
1185
1186 r = strv_extendf(&cmdline, "memory-backend-memfd,id=mem,size=%s,share=on", mem);
1187 if (r < 0)
1188 return log_oom();
1189 }
1190
2c0061c7
LP
1191 bool use_vsock = arg_vsock > 0 && ARCHITECTURE_SUPPORTS_SMBIOS;
1192 if (arg_vsock < 0) {
5c57a865
SL
1193 r = qemu_check_vsock_support();
1194 if (r < 0)
cf9de8ef 1195 return log_error_errno(r, "Failed to check for VSOCK support: %m");
5c57a865
SL
1196
1197 use_vsock = r;
1198 }
1199
51747b34
SL
1200 if (!use_kvm && kvm_device_fd >= 0) {
1201 log_warning("KVM is disabled but fd for /dev/kvm was passed, closing fd and ignoring");
1202 kvm_device_fd = safe_close(kvm_device_fd);
1203 }
1204
1205 if (use_kvm && kvm_device_fd >= 0) {
1206 /* /dev/fdset/1 is magic string to tell qemu where to find the fd for /dev/kvm
1207 * we use this so that we can take a fd to /dev/kvm and then give qemu that fd */
1208 accel = "kvm,device=/dev/fdset/1";
1209
1210 r = strv_extend(&cmdline, "--add-fd");
1211 if (r < 0)
1212 return log_oom();
1213
1214 r = strv_extendf(&cmdline, "fd=%d,set=1,opaque=/dev/kvm", kvm_device_fd);
1215 if (r < 0)
1216 return log_oom();
1217
1218 if (!GREEDY_REALLOC(pass_fds, n_pass_fds + 1))
1219 return log_oom();
1220
1221 pass_fds[n_pass_fds++] = kvm_device_fd;
1222 } else if (use_kvm)
1223 accel = "kvm";
1224 else
1225 accel = "tcg";
1226
1227 r = strv_extend_many(&cmdline, "-accel", accel);
1228 if (r < 0)
1229 return log_oom();
1230
f72a0856 1231 _cleanup_close_ int child_vsock_fd = -EBADF;
258d2694 1232 unsigned child_cid = arg_vsock_cid;
f72a0856 1233 if (use_vsock) {
51747b34 1234 int device_fd = vhost_device_fd;
51747b34
SL
1235
1236 if (device_fd < 0) {
1237 child_vsock_fd = open("/dev/vhost-vsock", O_RDWR|O_CLOEXEC);
1238 if (child_vsock_fd < 0)
1239 return log_error_errno(errno, "Failed to open /dev/vhost-vsock as read/write: %m");
1240
1241 device_fd = child_vsock_fd;
1242 }
f72a0856 1243
51747b34 1244 r = vsock_fix_child_cid(device_fd, &child_cid, arg_machine);
f72a0856 1245 if (r < 0)
cf9de8ef 1246 return log_error_errno(r, "Failed to fix CID for the guest VSOCK socket: %m");
f72a0856
SL
1247
1248 r = strv_extend(&cmdline, "-device");
1249 if (r < 0)
1250 return log_oom();
1251
51747b34 1252 r = strv_extendf(&cmdline, "vhost-vsock-pci,guest-cid=%u,vhostfd=%d", child_cid, device_fd);
f72a0856
SL
1253 if (r < 0)
1254 return log_oom();
51747b34
SL
1255
1256 if (!GREEDY_REALLOC(pass_fds, n_pass_fds + 1))
1257 return log_oom();
1258
1259 pass_fds[n_pass_fds++] = device_fd;
f72a0856
SL
1260 }
1261
69f3c619 1262 r = strv_extend_many(&cmdline, "-cpu", "max");
f72a0856
SL
1263 if (r < 0)
1264 return log_oom();
9de3cc14 1265
795ec90c
LP
1266 _cleanup_close_ int master = -EBADF;
1267 PTYForwardFlags ptyfwd_flags = 0;
1268 switch (arg_console_mode) {
1269
1270 case CONSOLE_READ_ONLY:
1271 ptyfwd_flags |= PTY_FORWARD_READ_ONLY;
1272
1273 _fallthrough_;
1274
1275 case CONSOLE_INTERACTIVE: {
1276 _cleanup_free_ char *pty_path = NULL;
1277
1278 master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
1279 if (master < 0)
1280 return log_error_errno(errno, "Failed to acquire pseudo tty: %m");
1281
1282 r = ptsname_malloc(master, &pty_path);
1283 if (r < 0)
1284 return log_error_errno(r, "Failed to determine tty name: %m");
1285
1286 if (unlockpt(master) < 0)
1287 return log_error_errno(errno, "Failed to unlock tty: %m");
1288
1289 if (strv_extend_many(
1290 &cmdline,
1291 "-nographic",
1292 "-nodefaults",
1293 "-chardev") < 0)
1294 return log_oom();
1295
1296 if (strv_extendf(&cmdline,
1297 "serial,id=console,path=%s", pty_path) < 0)
1298 return log_oom();
1299
1300 r = strv_extend_many(
1301 &cmdline,
1302 "-serial", "chardev:console");
1303 break;
1304 }
1305
1306 case CONSOLE_GUI:
69f3c619
LP
1307 r = strv_extend_many(
1308 &cmdline,
1309 "-vga",
1310 "virtio");
795ec90c
LP
1311 break;
1312
1313 case CONSOLE_NATIVE:
69f3c619
LP
1314 r = strv_extend_many(
1315 &cmdline,
1316 "-nographic",
1317 "-nodefaults",
1318 "-chardev", "stdio,mux=on,id=console,signal=off",
1319 "-serial", "chardev:console",
1320 "-mon", "console");
795ec90c
LP
1321 break;
1322
1323 default:
1324 assert_not_reached();
1325 }
69f3c619
LP
1326 if (r < 0)
1327 return log_oom();
9de3cc14 1328
9de3cc14
SL
1329 r = strv_extend(&cmdline, "-drive");
1330 if (r < 0)
1331 return log_oom();
1332
018cc9ea
SL
1333 _cleanup_free_ char *escaped_ovmf_config_path = escape_qemu_value(ovmf_config->path);
1334 if (!escaped_ovmf_config_path)
1335 return log_oom();
1336
1337 r = strv_extendf(&cmdline, "if=pflash,format=%s,readonly=on,file=%s", ovmf_config_format(ovmf_config), escaped_ovmf_config_path);
9de3cc14
SL
1338 if (r < 0)
1339 return log_oom();
1340
f72a0856 1341 _cleanup_(unlink_and_freep) char *ovmf_vars_to = NULL;
9de3cc14
SL
1342 if (ovmf_config->supports_sb) {
1343 const char *ovmf_vars_from = ovmf_config->vars;
018cc9ea 1344 _cleanup_free_ char *escaped_ovmf_vars_to = NULL;
9de3cc14
SL
1345 _cleanup_close_ int source_fd = -EBADF, target_fd = -EBADF;
1346
1347 r = tempfn_random_child(NULL, "vmspawn-", &ovmf_vars_to);
1348 if (r < 0)
1349 return r;
1350
1351 source_fd = open(ovmf_vars_from, O_RDONLY|O_CLOEXEC);
1352 if (source_fd < 0)
1353 return log_error_errno(source_fd, "Failed to open OVMF vars file %s: %m", ovmf_vars_from);
1354
1355 target_fd = open(ovmf_vars_to, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0600);
1356 if (target_fd < 0)
1357 return log_error_errno(errno, "Failed to create regular file for OVMF vars at %s: %m", ovmf_vars_to);
1358
1359 r = copy_bytes(source_fd, target_fd, UINT64_MAX, COPY_REFLINK);
1360 if (r < 0)
1361 return log_error_errno(r, "Failed to copy bytes from %s to %s: %m", ovmf_vars_from, ovmf_vars_to);
1362
1363 /* These aren't always available so don't raise an error if they fail */
1364 (void) copy_xattr(source_fd, NULL, target_fd, NULL, 0);
1365 (void) copy_access(source_fd, target_fd);
1366 (void) copy_times(source_fd, target_fd, 0);
1367
69f3c619
LP
1368 r = strv_extend_many(
1369 &cmdline,
1370 "-global", "ICH9-LPC.disable_s3=1",
1371 "-global", "driver=cfi.pflash01,property=secure,value=on",
1372 "-drive");
9de3cc14
SL
1373 if (r < 0)
1374 return log_oom();
1375
018cc9ea
SL
1376 escaped_ovmf_vars_to = escape_qemu_value(ovmf_vars_to);
1377 if (!escaped_ovmf_vars_to)
1378 return log_oom();
1379
1380 r = strv_extendf(&cmdline, "file=%s,if=pflash,format=%s", escaped_ovmf_vars_to, ovmf_config_format(ovmf_config));
9de3cc14
SL
1381 if (r < 0)
1382 return log_oom();
1383 }
1384
1ec3218e
SL
1385 STRV_FOREACH(drive, arg_extra_drives) {
1386 _cleanup_free_ char *escaped_drive = NULL;
1387
1388 r = strv_extend(&cmdline, "-drive");
1389 if (r < 0)
1390 return log_oom();
1391
1392 escaped_drive = escape_qemu_value(*drive);
1393 if (!escaped_drive)
1394 return log_oom();
1395
1396 r = strv_extendf(&cmdline, "format=raw,cache=unsafe,file=%s", escaped_drive);
1397 if (r < 0)
1398 return log_oom();
1399 }
1400
5c57a865
SL
1401 if (kernel) {
1402 r = strv_extend_many(&cmdline, "-kernel", kernel);
0f25e3e4
SL
1403 if (r < 0)
1404 return log_oom();
19301e76
SL
1405
1406 /* We can't rely on gpt-auto-generator when direct kernel booting so synthesize a root=
1407 * kernel argument instead. */
1408 if (arg_image) {
1409 r = kernel_cmdline_maybe_append_root();
1410 if (r < 0)
1411 return r;
1412 }
0f25e3e4
SL
1413 }
1414
5c57a865 1415 if (arg_image) {
018cc9ea
SL
1416 _cleanup_free_ char *escaped_image = NULL;
1417
5c57a865 1418 assert(!arg_directory);
9de3cc14 1419
5c57a865
SL
1420 r = strv_extend(&cmdline, "-drive");
1421 if (r < 0)
1422 return log_oom();
9de3cc14 1423
018cc9ea
SL
1424 escaped_image = escape_qemu_value(arg_image);
1425 if (!escaped_image)
1426 log_oom();
1427
1428 r = strv_extendf(&cmdline, "if=none,id=mkosi,file=%s,format=raw", escaped_image);
5c57a865
SL
1429 if (r < 0)
1430 return log_oom();
1431
1432 r = strv_extend_many(&cmdline,
69f3c619
LP
1433 "-device", "virtio-scsi-pci,id=scsi",
1434 "-device", "scsi-hd,drive=mkosi,bootindex=1");
5c57a865
SL
1435 if (r < 0)
1436 return log_oom();
1437 }
1438
1439 if (arg_directory) {
018cc9ea
SL
1440 _cleanup_free_ char *sock_path = NULL, *sock_name = NULL, *escaped_sock_path = NULL;
1441
a8f940c4 1442 r = start_virtiofsd(bus, trans_scope, arg_directory, /* uidmap= */ true, &sock_path, &sock_name);
5c57a865
SL
1443 if (r < 0)
1444 return r;
1445
018cc9ea
SL
1446 escaped_sock_path = escape_qemu_value(sock_path);
1447 if (!escaped_sock_path)
1448 log_oom();
1449
5c57a865
SL
1450 r = strv_extend(&cmdline, "-chardev");
1451 if (r < 0)
1452 return log_oom();
1453
018cc9ea 1454 r = strv_extendf(&cmdline, "socket,id=%1$s,path=%2$s/%1$s", sock_name, escaped_sock_path);
5c57a865
SL
1455 if (r < 0)
1456 return log_oom();
1457
1458 r = strv_extend(&cmdline, "-device");
1459 if (r < 0)
1460 return log_oom();
1461
1462 r = strv_extendf(&cmdline, "vhost-user-fs-pci,queue-size=1024,chardev=%s,tag=root", sock_name);
1463 if (r < 0)
1464 return log_oom();
1465
1466 r = strv_extend(&arg_kernel_cmdline_extra, "root=root rootfstype=virtiofs rw");
1467 if (r < 0)
1468 return log_oom();
1469 }
9de3cc14 1470
0f25e3e4 1471 r = strv_prepend(&arg_kernel_cmdline_extra, "console=" DEFAULT_SERIAL_TTY);
773ca1de
SL
1472 if (r < 0)
1473 return log_oom();
4291f446 1474
a8f940c4 1475 FOREACH_ARRAY(mount, arg_runtime_mounts.mounts, arg_runtime_mounts.n_mounts) {
018cc9ea 1476 _cleanup_free_ char *sock_path = NULL, *sock_name = NULL, *clean_target = NULL, *escaped_sock_path = NULL;
a8f940c4
SL
1477 r = start_virtiofsd(bus, trans_scope, mount->source, /* uidmap= */ false, &sock_path, &sock_name);
1478 if (r < 0)
1479 return r;
1480
018cc9ea
SL
1481 escaped_sock_path = escape_qemu_value(sock_path);
1482 if (!escaped_sock_path)
1483 log_oom();
1484
a8f940c4
SL
1485 r = strv_extend(&cmdline, "-chardev");
1486 if (r < 0)
1487 return log_oom();
1488
018cc9ea 1489 r = strv_extendf(&cmdline, "socket,id=%1$s,path=%2$s/%1$s", sock_name, escaped_sock_path);
a8f940c4
SL
1490 if (r < 0)
1491 return log_oom();
1492
1493 r = strv_extend(&cmdline, "-device");
1494 if (r < 0)
1495 return log_oom();
1496
1497 r = strv_extendf(&cmdline, "vhost-user-fs-pci,queue-size=1024,chardev=%1$s,tag=%1$s", sock_name);
1498 if (r < 0)
1499 return log_oom();
1500
1501 clean_target = xescape(mount->target, "\":");
1502 if (!clean_target)
1503 return log_oom();
1504
1505 r = strv_extendf(&arg_kernel_cmdline_extra, "systemd.mount-extra=\"%s:%s:virtiofs:%s\"",
1506 sock_name, clean_target, mount->read_only ? "ro" : "rw");
1507 if (r < 0)
1508 return log_oom();
1509 }
1510
773ca1de 1511 if (ARCHITECTURE_SUPPORTS_SMBIOS) {
018cc9ea 1512 _cleanup_free_ char *kcl = strv_join(arg_kernel_cmdline_extra, " "), *escaped_kcl = NULL;
773ca1de
SL
1513 if (!kcl)
1514 return log_oom();
4291f446 1515
5c57a865 1516 if (kernel) {
0f25e3e4
SL
1517 r = strv_extend_many(&cmdline, "-append", kcl);
1518 if (r < 0)
1519 return log_oom();
1520 } else {
1521 if (ARCHITECTURE_SUPPORTS_SMBIOS) {
018cc9ea
SL
1522 escaped_kcl = escape_qemu_value(kcl);
1523 if (!escaped_kcl)
1524 log_oom();
1525
0f25e3e4
SL
1526 r = strv_extend(&cmdline, "-smbios");
1527 if (r < 0)
1528 return log_oom();
773ca1de 1529
018cc9ea 1530 r = strv_extendf(&cmdline, "type=11,value=io.systemd.stub.kernel-cmdline-extra=%s", escaped_kcl);
0f25e3e4
SL
1531 if (r < 0)
1532 return log_oom();
1533 } else
1534 log_warning("Cannot append extra args to kernel cmdline, native architecture doesn't support SMBIOS, ignoring");
1535 }
773ca1de
SL
1536 } else
1537 log_warning("Cannot append extra args to kernel cmdline, native architecture doesn't support SMBIOS");
f72a0856 1538
cf3beb27
SL
1539 /* disable TPM autodetection if the user's hardware doesn't support it */
1540 if (!ARCHITECTURE_SUPPORTS_TPM) {
1541 if (arg_tpm < 0) {
1542 arg_tpm = 0;
1543 log_debug("TPM not support on %s, disabling tpm autodetection and continuing", architecture_to_string(native_architecture()));
1544 } else if (arg_tpm > 0)
1545 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "TPM not supported on %s, refusing", architecture_to_string(native_architecture()));
1546 }
1547
1548 _cleanup_free_ char *swtpm = NULL;
1549 if (arg_tpm != 0) {
1550 r = find_executable("swtpm", &swtpm);
1551 if (r < 0) {
1552 /* log if the user asked for swtpm and we cannot find it */
1553 if (arg_tpm > 0)
1554 return log_error_errno(r, "Failed to find swtpm binary: %m");
1555 /* also log if we got an error other than ENOENT from find_executable */
1556 if (r != -ENOENT && arg_tpm < 0)
1557 return log_error_errno(r, "Error detecting swtpm: %m");
1558 }
1559 }
1560
d90a05b6 1561 _cleanup_free_ char *tpm_state_tempdir = NULL;
cf3beb27 1562 if (swtpm) {
018cc9ea
SL
1563 _cleanup_free_ char *escaped_state_dir = NULL;
1564
cf3beb27
SL
1565 r = start_tpm(bus, trans_scope, swtpm, &tpm_state_tempdir);
1566 if (r < 0) {
1567 /* only bail if the user asked for a tpm */
1568 if (arg_tpm > 0)
1569 return log_error_errno(r, "Failed to start tpm: %m");
1570 log_debug_errno(r, "Failed to start tpm, ignoring: %m");
1571 }
1572
018cc9ea
SL
1573 escaped_state_dir = escape_qemu_value(tpm_state_tempdir);
1574 if (!escaped_state_dir)
1575 log_oom();
1576
cf3beb27
SL
1577 r = strv_extend(&cmdline, "-chardev");
1578 if (r < 0)
1579 return log_oom();
1580
018cc9ea 1581 r = strv_extendf(&cmdline, "socket,id=chrtpm,path=%s/sock", escaped_state_dir);
cf3beb27
SL
1582 if (r < 0)
1583 return log_oom();
1584
1585 r = strv_extend_many(&cmdline, "-tpmdev", "emulator,id=tpm0,chardev=chrtpm");
1586 if (r < 0)
1587 return log_oom();
1588
1589 if (native_architecture() == ARCHITECTURE_X86_64)
1590 r = strv_extend_many(&cmdline, "-device", "tpm-tis,tpmdev=tpm0");
1591 else if (IN_SET(native_architecture(), ARCHITECTURE_ARM64, ARCHITECTURE_ARM64_BE))
1592 r = strv_extend_many(&cmdline, "-device", "tpm-tis-device,tpmdev=tpm0");
1593 if (r < 0)
1594 return log_oom();
1595 }
1596
811ad9e6
SL
1597 char *initrd = NULL;
1598 _cleanup_(rm_rf_physical_and_freep) char *merged_initrd = NULL;
1599 size_t n_initrds = strv_length(arg_initrds);
1600
1601 if (n_initrds == 1)
1602 initrd = arg_initrds[0];
1603 else if (n_initrds > 1) {
1604 r = merge_initrds(&merged_initrd);
1605 if (r < 0)
1606 return r;
1607
1608 initrd = merged_initrd;
1609 }
1610
1611 if (initrd) {
1612 r = strv_extend_many(&cmdline, "-initrd", initrd);
88af28d1
SL
1613 if (r < 0)
1614 return log_oom();
1615 }
1616
258d2694
SL
1617 if (arg_forward_journal) {
1618 _cleanup_free_ char *sd_journal_remote = NULL, *listen_address = NULL, *cred = NULL;
1619 r = find_executable("systemd-journal-remote", &sd_journal_remote);
1620 if (r < 0)
1621 return log_error_errno(r, "Failed to find systemd-journal-remote binary: %m");
1622
1623 r = start_systemd_journal_remote(bus, trans_scope, child_cid, sd_journal_remote, &listen_address);
1624 if (r < 0)
1625 return r;
1626
1627 cred = strjoin("journal.forward_to_socket:", listen_address);
1628 if (!cred)
1629 return log_oom();
1630
1631 r = machine_credential_set(&arg_credentials, cred);
1632 if (r < 0)
1633 return r;
1634 }
1635
1636 if (ARCHITECTURE_SUPPORTS_SMBIOS)
1637 FOREACH_ARRAY(cred, arg_credentials.credentials, arg_credentials.n_credentials) {
1638 _cleanup_free_ char *cred_data_b64 = NULL;
1639 ssize_t n;
1640
1641 n = base64mem(cred->data, cred->size, &cred_data_b64);
1642 if (n < 0)
1643 return log_oom();
1644
1645 r = strv_extend(&cmdline, "-smbios");
1646 if (r < 0)
1647 return log_oom();
1648
1649 r = strv_extendf(&cmdline, "type=11,value=io.systemd.credential.binary:%s=%s", cred->id, cred_data_b64);
1650 if (r < 0)
1651 return log_oom();
1652 }
1653
f72a0856 1654 if (use_vsock) {
51747b34
SL
1655 notify_sock_fd = open_vsock();
1656 if (notify_sock_fd < 0)
cf9de8ef 1657 return log_error_errno(notify_sock_fd, "Failed to open VSOCK: %m");
f72a0856 1658
51747b34 1659 r = cmdline_add_vsock(&cmdline, notify_sock_fd);
f72a0856 1660 if (r == -ENOMEM)
4291f446 1661 return log_oom();
f72a0856 1662 if (r < 0)
cf9de8ef 1663 return log_error_errno(r, "Failed to call getsockname on VSOCK: %m");
4291f446 1664 }
9de3cc14 1665
b9e2d83b
LP
1666 if (DEBUG_LOGGING) {
1667 _cleanup_free_ char *joined = quote_command_line(cmdline, SHELL_ESCAPE_EMPTY);
1668 if (!joined)
1669 return log_oom();
1670
1671 log_debug("Executing: %s", joined);
1672 }
1673
795ec90c 1674 assert_se(sigprocmask_many(SIG_BLOCK, /* old_sigset=*/ NULL, SIGCHLD, SIGWINCH) >= 0);
a8f940c4 1675
f72a0856
SL
1676 _cleanup_(sd_event_source_unrefp) sd_event_source *notify_event_source = NULL;
1677 _cleanup_(sd_event_unrefp) sd_event *event = NULL;
1678 r = sd_event_new(&event);
1679 if (r < 0)
1680 return log_error_errno(r, "Failed to get default event source: %m");
1681
1682 (void) sd_event_set_watchdog(event, true);
1683
6cff1854
LP
1684 _cleanup_(pidref_done) PidRef child_pidref = PIDREF_NULL;
1685
1686 r = pidref_safe_fork_full(
f72a0856 1687 qemu_binary,
6cff1854 1688 /* stdio_fds= */ NULL,
f72a0856 1689 &child_vsock_fd, 1, /* pass the vsock fd to qemu */
6cff1854
LP
1690 FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_LOG|FORK_CLOEXEC_OFF|FORK_RLIMIT_NOFILE_SAFE,
1691 &child_pidref);
f72a0856 1692 if (r < 0)
6cff1854 1693 return r;
9de3cc14
SL
1694 if (r == 0) {
1695 /* set TERM and LANG if they are missing */
1696 if (setenv("TERM", "vt220", 0) < 0)
1697 return log_oom();
1698
1699 if (setenv("LANG", "C.UTF-8", 0) < 0)
1700 return log_oom();
1701
02aacdcf 1702 execv(qemu_binary, cmdline);
9de3cc14
SL
1703 log_error_errno(errno, "Failed to execve %s: %m", qemu_binary);
1704 _exit(EXIT_FAILURE);
1705 }
1706
c7c6b6f2
LP
1707 /* Close the vsock fd we passed to qemu in the parent. We don't need it anymore. */
1708 child_vsock_fd = safe_close(child_vsock_fd);
1709
f72a0856
SL
1710 int exit_status = INT_MAX;
1711 if (use_vsock) {
51747b34 1712 r = setup_notify_parent(event, notify_sock_fd, &exit_status, &notify_event_source);
f72a0856 1713 if (r < 0)
cf9de8ef 1714 return log_error_errno(r, "Failed to setup event loop to handle VSOCK notify events: %m");
f72a0856
SL
1715 }
1716
1717 /* shutdown qemu when we are shutdown */
6cff1854
LP
1718 (void) sd_event_add_signal(event, NULL, SIGINT | SD_EVENT_SIGNAL_PROCMASK, on_orderly_shutdown, &child_pidref);
1719 (void) sd_event_add_signal(event, NULL, SIGTERM | SD_EVENT_SIGNAL_PROCMASK, on_orderly_shutdown, &child_pidref);
f72a0856 1720
8b183505 1721 (void) sd_event_add_signal(event, NULL, (SIGRTMIN+18) | SD_EVENT_SIGNAL_PROCMASK, sigrtmin18_handler, NULL);
f72a0856
SL
1722
1723 /* Exit when the child exits */
6cff1854 1724 (void) event_add_child_pidref(event, NULL, &child_pidref, WEXITED, on_child_exit, NULL);
f72a0856 1725
795ec90c
LP
1726 _cleanup_(pty_forward_freep) PTYForward *forward = NULL;
1727 if (master >= 0) {
1728 r = pty_forward_new(event, master, ptyfwd_flags, &forward);
1729 if (r < 0)
1730 return log_error_errno(r, "Failed to create PTY forwarder: %m");
1731
1732 if (!arg_background) {
1733 _cleanup_free_ char *bg = NULL;
1734
1735 r = terminal_tint_color(130 /* green */, &bg);
1736 if (r < 0)
1737 log_debug_errno(r, "Failed to determine terminal background color, not tinting.");
1738 else
1739 (void) pty_forward_set_background_color(forward, bg);
1740 } else if (!isempty(arg_background))
1741 (void) pty_forward_set_background_color(forward, arg_background);
1742
1743 set_window_title(forward);
1744 }
1745
f72a0856
SL
1746 r = sd_event_loop(event);
1747 if (r < 0)
1748 return log_error_errno(r, "Failed to run event loop: %m");
1749
1750 if (use_vsock) {
1751 if (exit_status == INT_MAX) {
cf9de8ef 1752 log_debug("Couldn't retrieve inner EXIT_STATUS from VSOCK");
f72a0856
SL
1753 return EXIT_SUCCESS;
1754 }
1755 if (exit_status != 0)
1756 log_warning("Non-zero exit code received: %d", exit_status);
1757 return exit_status;
1758 }
1759
1760 return 0;
1761}
1762
1763static int determine_names(void) {
1764 int r;
1765
b064cc56
SL
1766 if (!arg_directory && !arg_image) {
1767 if (arg_machine) {
1768 _cleanup_(image_unrefp) Image *i = NULL;
1769
1770 r = image_find(IMAGE_MACHINE, arg_machine, NULL, &i);
1771 if (r == -ENOENT)
1772 return log_error_errno(r, "No image for machine '%s'.", arg_machine);
1773 if (r < 0)
1774 return log_error_errno(r, "Failed to find image for machine '%s': %m", arg_machine);
1775
1776 if (IN_SET(i->type, IMAGE_RAW, IMAGE_BLOCK))
1777 r = free_and_strdup(&arg_image, i->path);
1778 else if (IN_SET(i->type, IMAGE_DIRECTORY, IMAGE_SUBVOLUME))
1779 r = free_and_strdup(&arg_directory, i->path);
1780 else
1781 assert_not_reached();
1782 if (r < 0)
1783 return log_oom();
1784 } else {
1785 r = safe_getcwd(&arg_directory);
1786 if (r < 0)
1787 return log_error_errno(r, "Failed to determine current directory: %m");
1788 }
1789 }
f72a0856
SL
1790
1791 if (!arg_machine) {
5c57a865
SL
1792 if (arg_directory && path_equal(arg_directory, "/")) {
1793 arg_machine = gethostname_malloc();
1794 if (!arg_machine)
1795 return log_oom();
1796 } else if (arg_image) {
1797 char *e;
f72a0856 1798
5c57a865
SL
1799 r = path_extract_filename(arg_image, &arg_machine);
1800 if (r < 0)
1801 return log_error_errno(r, "Failed to extract file name from '%s': %m", arg_image);
f72a0856 1802
5c57a865
SL
1803 /* Truncate suffix if there is one */
1804 e = endswith(arg_machine, ".raw");
1805 if (e)
1806 *e = 0;
1807 } else {
1808 r = path_extract_filename(arg_directory, &arg_machine);
1809 if (r < 0)
1810 return log_error_errno(r, "Failed to extract file name from '%s': %m", arg_directory);
1811 }
f72a0856
SL
1812
1813 hostname_cleanup(arg_machine);
1814 if (!hostname_is_valid(arg_machine, 0))
1815 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine machine name automatically, please use -M.");
1816 }
1817
1818 return 0;
9de3cc14
SL
1819}
1820
75331bed 1821static int verify_arguments(void) {
2c0061c7 1822 if (arg_network_stack == NETWORK_STACK_TAP && !arg_privileged)
75331bed
SL
1823 return log_error_errno(SYNTHETIC_ERRNO(EPERM), "--network-tap requires root privileges, refusing.");
1824
6af6d442
SL
1825 if (!strv_isempty(arg_initrds) && !arg_linux)
1826 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Option --initrd= cannot be used without --linux=.");
1827
75331bed
SL
1828 return 0;
1829}
1830
9de3cc14 1831static int run(int argc, char *argv[]) {
51747b34
SL
1832 int r, kvm_device_fd = -EBADF, vhost_device_fd = -EBADF;
1833 _cleanup_strv_free_ char **names = NULL;
9de3cc14
SL
1834
1835 log_setup();
1836
cf3beb27
SL
1837 arg_privileged = getuid() == 0;
1838
9de3cc14
SL
1839 r = parse_argv(argc, argv);
1840 if (r <= 0)
bd546b9b 1841 return r;
9de3cc14 1842
f72a0856
SL
1843 r = determine_names();
1844 if (r < 0)
bd546b9b 1845 return r;
f72a0856 1846
75331bed
SL
1847 r = verify_arguments();
1848 if (r < 0)
1849 return r;
1850
795ec90c 1851 if (!arg_quiet && arg_console_mode != CONSOLE_GUI) {
1807baa9 1852 _cleanup_free_ char *u = NULL;
5c57a865
SL
1853 const char *vm_path = arg_image ?: arg_directory;
1854 (void) terminal_urlify_path(vm_path, vm_path, &u);
1807baa9 1855
795ec90c
LP
1856 log_info("%s %sSpawning VM %s on %s.%s",
1857 special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), ansi_grey(), arg_machine, u ?: vm_path, ansi_normal());
1858
1859 if (arg_console_mode == CONSOLE_INTERACTIVE)
1860 log_info("%s %sPress %sCtrl-]%s three times within 1s to kill VM.%s",
1861 special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), ansi_grey(), ansi_highlight(), ansi_grey(), ansi_normal());
1862 else if (arg_console_mode == CONSOLE_NATIVE)
1863 log_info("%s %sPress %sCtrl-a x%s to kill VM.%s",
1864 special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), ansi_grey(), ansi_highlight(), ansi_grey(), ansi_normal());
1807baa9 1865 }
dbb2718f 1866
51747b34
SL
1867 r = sd_listen_fds_with_names(true, &names);
1868 if (r < 0)
1869 return log_error_errno(r, "Failed to get passed file descriptors: %m");
1870
1871 for (int i = 0; i < r; i++) {
1872 int fd = SD_LISTEN_FDS_START + i;
1873 if (streq(names[i], "kvm"))
1874 kvm_device_fd = fd;
1875 else if (streq(names[i], "vhost-vsock"))
1876 vhost_device_fd = fd;
1877 else {
1878 log_notice("Couldn't recognize passed fd %d (%s), closing fd and ignoring...", fd, names[i]);
1879 safe_close(fd);
1880 }
1881 }
1882
51747b34 1883 return run_virtual_machine(kvm_device_fd, vhost_device_fd);
9de3cc14
SL
1884}
1885
1886DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);