]>
Commit | Line | Data |
---|---|---|
9de3cc14 SL |
1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
2 | ||
3 | #include <getopt.h> | |
4 | #include <stdint.h> | |
5 | #include <stdlib.h> | |
6 | #include <sys/wait.h> | |
7 | #include <unistd.h> | |
8 | ||
9 | #include "alloc-util.h" | |
10 | #include "architecture.h" | |
11 | #include "build.h" | |
f72a0856 | 12 | #include "common-signal.h" |
9de3cc14 SL |
13 | #include "copy.h" |
14 | #include "creds-util.h" | |
15 | #include "escape.h" | |
6cff1854 | 16 | #include "event-util.h" |
9de3cc14 SL |
17 | #include "fileio.h" |
18 | #include "format-util.h" | |
f72a0856 | 19 | #include "fs-util.h" |
9de3cc14 | 20 | #include "hexdecoct.h" |
f72a0856 | 21 | #include "hostname-util.h" |
9de3cc14 SL |
22 | #include "log.h" |
23 | #include "machine-credential.h" | |
24 | #include "main-func.h" | |
25 | #include "pager.h" | |
26 | #include "parse-argument.h" | |
27 | #include "parse-util.h" | |
28 | #include "path-util.h" | |
29 | #include "pretty-print.h" | |
30 | #include "process-util.h" | |
f72a0856 SL |
31 | #include "sd-event.h" |
32 | #include "signal-util.h" | |
33 | #include "socket-util.h" | |
9de3cc14 SL |
34 | #include "strv.h" |
35 | #include "tmpfile-util.h" | |
36 | #include "vmspawn-settings.h" | |
37 | #include "vmspawn-util.h" | |
38 | ||
dbb2718f | 39 | static bool arg_quiet = false; |
9de3cc14 SL |
40 | static PagerFlags arg_pager_flags = 0; |
41 | static char *arg_image = NULL; | |
f72a0856 | 42 | static char *arg_machine = NULL; |
9de3cc14 SL |
43 | static char *arg_qemu_smp = NULL; |
44 | static uint64_t arg_qemu_mem = 2ULL * 1024ULL * 1024ULL * 1024ULL; | |
45 | static int arg_qemu_kvm = -1; | |
f72a0856 | 46 | static int arg_qemu_vsock = -1; |
06d4fe57 | 47 | static unsigned arg_vsock_cid = VMADDR_CID_ANY; |
9de3cc14 SL |
48 | static bool arg_qemu_gui = false; |
49 | static int arg_secure_boot = -1; | |
bd546b9b | 50 | static MachineCredentialContext arg_credentials = {}; |
9de3cc14 SL |
51 | static SettingsMask arg_settings_mask = 0; |
52 | static char **arg_parameters = NULL; | |
e8ce204d | 53 | static char *arg_firmware = NULL; |
9de3cc14 SL |
54 | |
55 | STATIC_DESTRUCTOR_REGISTER(arg_image, freep); | |
f72a0856 | 56 | STATIC_DESTRUCTOR_REGISTER(arg_machine, freep); |
9de3cc14 SL |
57 | STATIC_DESTRUCTOR_REGISTER(arg_qemu_smp, freep); |
58 | STATIC_DESTRUCTOR_REGISTER(arg_parameters, strv_freep); | |
bd546b9b | 59 | STATIC_DESTRUCTOR_REGISTER(arg_credentials, machine_credential_context_done); |
e8ce204d | 60 | STATIC_DESTRUCTOR_REGISTER(arg_firmware, freep); |
9de3cc14 SL |
61 | |
62 | static int help(void) { | |
63 | _cleanup_free_ char *link = NULL; | |
64 | int r; | |
65 | ||
66 | pager_open(arg_pager_flags); | |
67 | ||
68 | r = terminal_urlify_man("systemd-vmspawn", "1", &link); | |
69 | if (r < 0) | |
70 | return log_oom(); | |
71 | ||
72 | printf("%1$s [OPTIONS...] [ARGUMENTS...]\n\n" | |
73 | "%5$sSpawn a command or OS in a virtual machine.%6$s\n\n" | |
74 | " -h --help Show this help\n" | |
75 | " --version Print version string\n" | |
dbb2718f | 76 | " -q --quiet Do not show status information\n" |
1d748d7c LP |
77 | " --no-pager Do not pipe output into a pager\n" |
78 | "\n%3$sImage:%4$s\n" | |
9de3cc14 | 79 | " -i --image=PATH Root file system disk image (or device node) for\n" |
1d748d7c LP |
80 | " the virtual machine\n" |
81 | "\n%3$sHost Configuration:%4$s\n" | |
9de3cc14 SL |
82 | " --qemu-smp=SMP Configure guest's SMP settings\n" |
83 | " --qemu-mem=MEM Configure guest's RAM size\n" | |
f72a0856 SL |
84 | " --qemu-kvm=BOOL Configure whether to use KVM or not\n" |
85 | " --qemu-vsock=BOOL Configure whether to use qemu with a vsock or not\n" | |
86 | " --vsock-cid= Specify the CID to use for the qemu guest's vsock\n" | |
9de3cc14 | 87 | " --qemu-gui Start QEMU in graphical mode\n" |
f72a0856 | 88 | " --secure-boot=BOOL Configure whether to search for firmware which\n" |
1d748d7c | 89 | " supports Secure Boot\n" |
e8ce204d | 90 | " --firmware=PATH|list Select firmware definition file (or list available)\n" |
1d748d7c | 91 | "\n%3$sSystem Identity:%4$s\n" |
f72a0856 | 92 | " -M --machine=NAME Set the machine name for the container\n" |
1d748d7c | 93 | "\n%3$sCredentials:%4$s\n" |
9de3cc14 SL |
94 | " --set-credential=ID:VALUE\n" |
95 | " Pass a credential with literal value to container.\n" | |
96 | " --load-credential=ID:PATH\n" | |
97 | " Load credential to pass to container from file or\n" | |
98 | " AF_UNIX stream socket.\n" | |
99 | "\nSee the %2$s for details.\n", | |
100 | program_invocation_short_name, | |
101 | link, | |
102 | ansi_underline(), | |
103 | ansi_normal(), | |
104 | ansi_highlight(), | |
105 | ansi_normal()); | |
106 | ||
107 | return 0; | |
108 | } | |
109 | ||
110 | static int parse_argv(int argc, char *argv[]) { | |
111 | enum { | |
112 | ARG_VERSION = 0x100, | |
113 | ARG_NO_PAGER, | |
114 | ARG_QEMU_SMP, | |
115 | ARG_QEMU_MEM, | |
116 | ARG_QEMU_KVM, | |
f72a0856 SL |
117 | ARG_QEMU_VSOCK, |
118 | ARG_VSOCK_CID, | |
9de3cc14 SL |
119 | ARG_QEMU_GUI, |
120 | ARG_SECURE_BOOT, | |
121 | ARG_SET_CREDENTIAL, | |
122 | ARG_LOAD_CREDENTIAL, | |
e8ce204d | 123 | ARG_FIRMWARE, |
9de3cc14 SL |
124 | }; |
125 | ||
126 | static const struct option options[] = { | |
127 | { "help", no_argument, NULL, 'h' }, | |
128 | { "version", no_argument, NULL, ARG_VERSION }, | |
dbb2718f | 129 | { "quiet", no_argument, NULL, 'q' }, |
9de3cc14 SL |
130 | { "no-pager", no_argument, NULL, ARG_NO_PAGER }, |
131 | { "image", required_argument, NULL, 'i' }, | |
f72a0856 | 132 | { "machine", required_argument, NULL, 'M' }, |
9de3cc14 SL |
133 | { "qemu-smp", required_argument, NULL, ARG_QEMU_SMP }, |
134 | { "qemu-mem", required_argument, NULL, ARG_QEMU_MEM }, | |
135 | { "qemu-kvm", required_argument, NULL, ARG_QEMU_KVM }, | |
f72a0856 SL |
136 | { "qemu-vsock", required_argument, NULL, ARG_QEMU_VSOCK }, |
137 | { "vsock-cid", required_argument, NULL, ARG_VSOCK_CID }, | |
9de3cc14 SL |
138 | { "qemu-gui", no_argument, NULL, ARG_QEMU_GUI }, |
139 | { "secure-boot", required_argument, NULL, ARG_SECURE_BOOT }, | |
140 | { "set-credential", required_argument, NULL, ARG_SET_CREDENTIAL }, | |
141 | { "load-credential", required_argument, NULL, ARG_LOAD_CREDENTIAL }, | |
e8ce204d | 142 | { "firmware", required_argument, NULL, ARG_FIRMWARE }, |
9de3cc14 SL |
143 | {} |
144 | }; | |
145 | ||
146 | int c, r; | |
147 | ||
148 | assert(argc >= 0); | |
149 | assert(argv); | |
150 | ||
151 | optind = 0; | |
dbb2718f | 152 | while ((c = getopt_long(argc, argv, "+hi:Mq", options, NULL)) >= 0) |
9de3cc14 SL |
153 | switch (c) { |
154 | case 'h': | |
155 | return help(); | |
156 | ||
157 | case ARG_VERSION: | |
158 | return version(); | |
159 | ||
dbb2718f LP |
160 | case 'q': |
161 | arg_quiet = true; | |
162 | break; | |
163 | ||
9de3cc14 SL |
164 | case 'i': |
165 | r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_image); | |
166 | if (r < 0) | |
167 | return r; | |
168 | ||
169 | arg_settings_mask |= SETTING_DIRECTORY; | |
170 | break; | |
171 | ||
f72a0856 SL |
172 | case 'M': |
173 | if (isempty(optarg)) | |
174 | arg_machine = mfree(arg_machine); | |
175 | else { | |
176 | if (!hostname_is_valid(optarg, 0)) | |
177 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), | |
178 | "Invalid machine name: %s", optarg); | |
179 | ||
180 | r = free_and_strdup(&arg_machine, optarg); | |
181 | if (r < 0) | |
182 | return log_oom(); | |
183 | } | |
184 | break; | |
185 | ||
9de3cc14 SL |
186 | case ARG_NO_PAGER: |
187 | arg_pager_flags |= PAGER_DISABLE; | |
188 | break; | |
189 | ||
190 | case ARG_QEMU_SMP: | |
d9c4917b YW |
191 | r = free_and_strdup_warn(&arg_qemu_smp, optarg); |
192 | if (r < 0) | |
193 | return r; | |
9de3cc14 SL |
194 | break; |
195 | ||
196 | case ARG_QEMU_MEM: | |
197 | r = parse_size(optarg, 1024, &arg_qemu_mem); | |
198 | if (r < 0) | |
199 | return log_error_errno(r, "Failed to parse --qemu-mem=%s: %m", optarg); | |
200 | break; | |
201 | ||
202 | case ARG_QEMU_KVM: | |
203 | r = parse_tristate(optarg, &arg_qemu_kvm); | |
204 | if (r < 0) | |
205 | return log_error_errno(r, "Failed to parse --qemu-kvm=%s: %m", optarg); | |
206 | break; | |
207 | ||
f72a0856 SL |
208 | case ARG_QEMU_VSOCK: |
209 | r = parse_tristate(optarg, &arg_qemu_vsock); | |
210 | if (r < 0) | |
211 | return log_error_errno(r, "Failed to parse --qemu-vsock=%s: %m", optarg); | |
212 | break; | |
213 | ||
06d4fe57 | 214 | case ARG_VSOCK_CID: |
f72a0856 | 215 | if (isempty(optarg)) |
06d4fe57 | 216 | arg_vsock_cid = VMADDR_CID_ANY; |
f72a0856 | 217 | else { |
06d4fe57 LP |
218 | unsigned cid; |
219 | ||
220 | r = vsock_parse_cid(optarg, &cid); | |
f72a0856 | 221 | if (r < 0) |
06d4fe57 LP |
222 | return log_error_errno(r, "Failed to parse --vsock-cid: %s", optarg); |
223 | if (!VSOCK_CID_IS_REGULAR(cid)) | |
224 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Specified CID is not regular, refusing: %u", cid); | |
225 | ||
226 | arg_vsock_cid = cid; | |
f72a0856 | 227 | } |
f72a0856 | 228 | break; |
f72a0856 | 229 | |
9de3cc14 SL |
230 | case ARG_QEMU_GUI: |
231 | arg_qemu_gui = true; | |
232 | break; | |
233 | ||
234 | case ARG_SECURE_BOOT: | |
235 | r = parse_tristate(optarg, &arg_secure_boot); | |
236 | if (r < 0) | |
237 | return log_error_errno(r, "Failed to parse --secure-boot=%s: %m", optarg); | |
238 | break; | |
239 | ||
240 | case ARG_SET_CREDENTIAL: { | |
bd546b9b | 241 | r = machine_credential_set(&arg_credentials, optarg); |
9de3cc14 | 242 | if (r < 0) |
6045958b | 243 | return r; |
9de3cc14 SL |
244 | arg_settings_mask |= SETTING_CREDENTIALS; |
245 | break; | |
246 | } | |
247 | ||
248 | case ARG_LOAD_CREDENTIAL: { | |
bd546b9b | 249 | r = machine_credential_load(&arg_credentials, optarg); |
9de3cc14 | 250 | if (r < 0) |
6045958b | 251 | return r; |
9de3cc14 SL |
252 | |
253 | arg_settings_mask |= SETTING_CREDENTIALS; | |
254 | break; | |
255 | } | |
256 | ||
e8ce204d LP |
257 | case ARG_FIRMWARE: |
258 | if (streq(optarg, "list")) { | |
259 | _cleanup_strv_free_ char **l = NULL; | |
260 | ||
261 | r = list_ovmf_config(&l); | |
262 | if (r < 0) | |
263 | return log_error_errno(r, "Failed to list firmwares: %m"); | |
264 | ||
265 | bool nl = false; | |
266 | fputstrv(stdout, l, "\n", &nl); | |
267 | if (nl) | |
268 | putchar('\n'); | |
269 | ||
270 | return 0; | |
271 | } | |
272 | ||
273 | if (!isempty(optarg) && !path_is_absolute(optarg) && !startswith(optarg, "./")) | |
274 | return log_error_errno(SYNTHETIC_ERRNO(errno), "Absolute path or path starting with './' required."); | |
275 | ||
276 | r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_firmware); | |
277 | if (r < 0) | |
278 | return r; | |
279 | ||
280 | break; | |
281 | ||
9de3cc14 SL |
282 | case '?': |
283 | return -EINVAL; | |
284 | ||
285 | default: | |
286 | assert_not_reached(); | |
287 | } | |
288 | ||
289 | if (argc > optind) { | |
290 | strv_free(arg_parameters); | |
291 | arg_parameters = strv_copy(argv + optind); | |
292 | if (!arg_parameters) | |
293 | return log_oom(); | |
294 | ||
295 | arg_settings_mask |= SETTING_START_MODE; | |
296 | } | |
297 | ||
298 | return 1; | |
299 | } | |
300 | ||
f72a0856 SL |
301 | static int open_vsock(void) { |
302 | _cleanup_close_ int vsock_fd = -EBADF; | |
303 | int r; | |
304 | static const union sockaddr_union bind_addr = { | |
305 | .vm.svm_family = AF_VSOCK, | |
306 | .vm.svm_cid = VMADDR_CID_ANY, | |
307 | .vm.svm_port = VMADDR_PORT_ANY, | |
308 | }; | |
309 | ||
310 | vsock_fd = socket(AF_VSOCK, SOCK_STREAM|SOCK_CLOEXEC, 0); | |
311 | if (vsock_fd < 0) | |
312 | return log_error_errno(errno, "Failed to open AF_VSOCK socket: %m"); | |
313 | ||
314 | r = bind(vsock_fd, &bind_addr.sa, sizeof(bind_addr.vm)); | |
315 | if (r < 0) | |
316 | return log_error_errno(errno, "Failed to bind to vsock to address %u:%u: %m", bind_addr.vm.svm_cid, bind_addr.vm.svm_port); | |
317 | ||
318 | r = listen(vsock_fd, SOMAXCONN_DELUXE); | |
319 | if (r < 0) | |
320 | return log_error_errno(errno, "Failed to listen on vsock: %m"); | |
321 | ||
322 | return TAKE_FD(vsock_fd); | |
323 | } | |
324 | ||
325 | static int vmspawn_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) { | |
326 | char buf[NOTIFY_BUFFER_MAX+1]; | |
327 | const char *p = NULL; | |
328 | struct iovec iovec = { | |
329 | .iov_base = buf, | |
330 | .iov_len = sizeof(buf)-1, | |
331 | }; | |
332 | struct msghdr msghdr = { | |
333 | .msg_iov = &iovec, | |
334 | .msg_iovlen = 1, | |
335 | }; | |
336 | ssize_t n; | |
337 | _cleanup_strv_free_ char **tags = NULL; | |
338 | int r, *exit_status = ASSERT_PTR(userdata); | |
339 | ||
340 | n = recvmsg_safe(fd, &msghdr, MSG_DONTWAIT); | |
341 | if (ERRNO_IS_NEG_TRANSIENT(n)) | |
342 | return 0; | |
343 | if (n == -EXFULL) { | |
344 | log_warning_errno(n, "Got message with truncated control data, ignoring: %m"); | |
345 | return 0; | |
346 | } | |
347 | if (n < 0) | |
348 | return log_warning_errno(n, "Couldn't read notification socket: %m"); | |
349 | ||
350 | if ((size_t) n >= sizeof(buf)) { | |
351 | log_warning("Received notify message exceeded maximum size. Ignoring."); | |
352 | return 0; | |
353 | } | |
354 | ||
355 | buf[n] = 0; | |
356 | tags = strv_split(buf, "\n\r"); | |
357 | if (!tags) | |
358 | return log_oom(); | |
359 | ||
360 | STRV_FOREACH(s, tags) | |
361 | log_debug("Received tag %s from notify socket", *s); | |
362 | ||
363 | if (strv_contains(tags, "READY=1")) { | |
364 | r = sd_notify(false, "READY=1\n"); | |
365 | if (r < 0) | |
366 | log_warning_errno(r, "Failed to send readiness notification, ignoring: %m"); | |
367 | } | |
368 | ||
369 | p = strv_find_startswith(tags, "STATUS="); | |
370 | if (p) | |
371 | (void) sd_notifyf(false, "STATUS=VM running: %s", p); | |
372 | ||
373 | p = strv_find_startswith(tags, "EXIT_STATUS="); | |
374 | if (p) { | |
375 | r = safe_atoi(p, exit_status); | |
376 | if (r < 0) | |
377 | log_warning_errno(r, "Failed to parse exit status from %s, ignoring: %m", p); | |
378 | } | |
379 | ||
380 | /* we will only receive one message from each connection so disable this source once one is received */ | |
381 | source = sd_event_source_disable_unref(source); | |
382 | ||
383 | return 0; | |
384 | } | |
385 | ||
386 | static int vmspawn_dispatch_vsock_connections(sd_event_source *source, int fd, uint32_t revents, void *userdata) { | |
387 | int r; | |
388 | sd_event *event; | |
389 | _cleanup_close_ int conn_fd = -EBADF; | |
390 | ||
391 | assert(userdata); | |
392 | ||
393 | if (revents != EPOLLIN) { | |
394 | log_warning("Got unexpected poll event for vsock fd."); | |
395 | return 0; | |
396 | } | |
397 | ||
398 | conn_fd = accept4(fd, NULL, NULL, SOCK_CLOEXEC|SOCK_NONBLOCK); | |
399 | if (conn_fd < 0) { | |
400 | log_warning_errno(errno, "Failed to accept connection from vsock fd (%m), ignoring..."); | |
401 | return 0; | |
402 | } | |
403 | ||
404 | event = sd_event_source_get_event(source); | |
405 | if (!event) | |
406 | return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Failed to retrieve event from event source, exiting task"); | |
407 | ||
408 | /* add a new floating task to read from the connection */ | |
409 | r = sd_event_add_io(event, NULL, conn_fd, revents, vmspawn_dispatch_notify_fd, userdata); | |
410 | if (r < 0) | |
411 | return log_error_errno(r, "Failed to allocate notify connection event source: %m"); | |
412 | ||
413 | /* conn_fd is now owned by the event loop so don't clean it up */ | |
414 | TAKE_FD(conn_fd); | |
415 | ||
416 | return 0; | |
417 | } | |
418 | ||
400da3e4 | 419 | static int setup_notify_parent(sd_event *event, int fd, int *exit_status, sd_event_source **ret_notify_event_source) { |
f72a0856 SL |
420 | int r; |
421 | ||
400da3e4 LP |
422 | assert(event); |
423 | assert(fd >= 0); | |
424 | assert(exit_status); | |
425 | assert(ret_notify_event_source); | |
426 | ||
427 | r = sd_event_add_io(event, ret_notify_event_source, fd, EPOLLIN, vmspawn_dispatch_vsock_connections, exit_status); | |
f72a0856 SL |
428 | if (r < 0) |
429 | return log_error_errno(r, "Failed to allocate notify socket event source: %m"); | |
430 | ||
400da3e4 | 431 | (void) sd_event_source_set_description(*ret_notify_event_source, "vmspawn-notify-sock"); |
f72a0856 SL |
432 | |
433 | return 0; | |
434 | } | |
435 | ||
436 | static int on_orderly_shutdown(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) { | |
6cff1854 LP |
437 | PidRef *pidref = userdata; |
438 | int r; | |
439 | ||
440 | /* TODO: actually talk to qemu and ask the guest to shutdown here */ | |
f72a0856 | 441 | |
6cff1854 LP |
442 | if (pidref) { |
443 | r = pidref_kill(pidref, SIGKILL); | |
444 | if (r < 0) | |
445 | log_warning_errno(r, "Failed to kill qemu, terminating: %m"); | |
446 | else { | |
f72a0856 SL |
447 | log_info("Trying to halt qemu. Send SIGTERM again to trigger vmspawn to immediately terminate."); |
448 | sd_event_source_set_userdata(s, NULL); | |
449 | return 0; | |
450 | } | |
451 | } | |
452 | ||
453 | sd_event_exit(sd_event_source_get_event(s), 0); | |
454 | return 0; | |
455 | } | |
456 | ||
457 | static int on_child_exit(sd_event_source *s, const siginfo_t *si, void *userdata) { | |
458 | sd_event_exit(sd_event_source_get_event(s), 0); | |
459 | return 0; | |
460 | } | |
461 | ||
462 | static int cmdline_add_vsock(char ***cmdline, int vsock_fd) { | |
463 | int r; | |
464 | ||
465 | r = strv_extend(cmdline, "-smbios"); | |
466 | if (r < 0) | |
467 | return r; | |
468 | ||
469 | union sockaddr_union addr; | |
470 | socklen_t addr_len = sizeof addr.vm; | |
471 | r = getsockname(vsock_fd, &addr.sa, &addr_len); | |
472 | if (r < 0) | |
473 | return -errno; | |
474 | assert(addr_len >= sizeof addr.vm); | |
475 | assert(addr.vm.svm_family == AF_VSOCK); | |
476 | ||
477 | log_info("Using vsock-stream:%u:%u", (unsigned) VMADDR_CID_HOST, addr.vm.svm_port); | |
478 | r = strv_extendf(cmdline, "type=11,value=io.systemd.credential:vmm.notify_socket=vsock-stream:%u:%u", (unsigned) VMADDR_CID_HOST, addr.vm.svm_port); | |
479 | if (r < 0) | |
480 | return r; | |
481 | ||
482 | return 0; | |
483 | } | |
484 | ||
9de3cc14 SL |
485 | static int run_virtual_machine(void) { |
486 | _cleanup_(ovmf_config_freep) OvmfConfig *ovmf_config = NULL; | |
487 | _cleanup_strv_free_ char **cmdline = NULL; | |
f72a0856 | 488 | _cleanup_free_ char *machine = NULL, *qemu_binary = NULL, *mem = NULL; |
9de3cc14 | 489 | int r; |
f72a0856 | 490 | _cleanup_close_ int vsock_fd = -EBADF; |
9de3cc14 SL |
491 | |
492 | bool use_kvm = arg_qemu_kvm > 0; | |
493 | if (arg_qemu_kvm < 0) { | |
494 | r = qemu_check_kvm_support(); | |
495 | if (r < 0) | |
496 | return log_error_errno(r, "Failed to check for KVM support: %m"); | |
497 | use_kvm = r; | |
498 | } | |
499 | ||
e8ce204d LP |
500 | if (arg_firmware) |
501 | r = load_ovmf_config(arg_firmware, &ovmf_config); | |
502 | else | |
503 | r = find_ovmf_config(arg_secure_boot, &ovmf_config); | |
9de3cc14 SL |
504 | if (r < 0) |
505 | return log_error_errno(r, "Failed to find OVMF config: %m"); | |
506 | ||
507 | /* only warn if the user hasn't disabled secureboot */ | |
508 | if (!ovmf_config->supports_sb && arg_secure_boot) | |
509 | log_warning("Couldn't find OVMF firmware blob with Secure Boot support, " | |
510 | "falling back to OVMF firmware blobs without Secure Boot support."); | |
511 | ||
512 | const char *accel = use_kvm ? "kvm" : "tcg"; | |
f72a0856 SL |
513 | if (IN_SET(native_architecture(), ARCHITECTURE_ARM64, ARCHITECTURE_ARM64_BE)) |
514 | machine = strjoin("type=virt,accel=", accel); | |
515 | else | |
516 | machine = strjoin("type=q35,accel=", accel, ",smm=", on_off(ovmf_config->supports_sb)); | |
9de3cc14 SL |
517 | if (!machine) |
518 | return log_oom(); | |
519 | ||
520 | r = find_qemu_binary(&qemu_binary); | |
521 | if (r == -EOPNOTSUPP) | |
522 | return log_error_errno(r, "Native architecture is not supported by qemu."); | |
523 | if (r < 0) | |
524 | return log_error_errno(r, "Failed to find QEMU binary: %m"); | |
525 | ||
526 | if (asprintf(&mem, "%.4fM", (double)arg_qemu_mem / (1024.0 * 1024.0)) < 0) | |
527 | return log_oom(); | |
528 | ||
529 | cmdline = strv_new( | |
530 | qemu_binary, | |
531 | "-machine", machine, | |
532 | "-smp", arg_qemu_smp ?: "1", | |
533 | "-m", mem, | |
534 | "-object", "rng-random,filename=/dev/urandom,id=rng0", | |
535 | "-device", "virtio-rng-pci,rng=rng0,id=rng-device0", | |
f72a0856 | 536 | "-nic", "user,model=virtio-net-pci" |
9de3cc14 | 537 | ); |
f72a0856 SL |
538 | if (!cmdline) |
539 | return log_oom(); | |
540 | ||
541 | bool use_vsock = arg_qemu_vsock > 0 && ARCHITECTURE_SUPPORTS_SMBIOS; | |
542 | if (arg_qemu_vsock < 0) { | |
543 | r = qemu_check_vsock_support(); | |
544 | if (r < 0) | |
545 | return log_error_errno(r, "Failed to check for VSock support: %m"); | |
546 | ||
547 | use_vsock = r; | |
548 | } | |
549 | ||
550 | unsigned child_cid = VMADDR_CID_ANY; | |
551 | _cleanup_close_ int child_vsock_fd = -EBADF; | |
552 | if (use_vsock) { | |
06d4fe57 | 553 | child_cid = arg_vsock_cid; |
f72a0856 SL |
554 | |
555 | r = vsock_fix_child_cid(&child_cid, arg_machine, &child_vsock_fd); | |
556 | if (r < 0) | |
557 | return log_error_errno(r, "Failed to fix CID for the guest vsock socket: %m"); | |
558 | ||
559 | r = strv_extend(&cmdline, "-device"); | |
560 | if (r < 0) | |
561 | return log_oom(); | |
562 | ||
563 | log_debug("vhost-vsock-pci,guest-cid=%u,vhostfd=%d", child_cid, child_vsock_fd); | |
564 | r = strv_extendf(&cmdline, "vhost-vsock-pci,guest-cid=%u,vhostfd=%d", child_cid, child_vsock_fd); | |
565 | if (r < 0) | |
566 | return log_oom(); | |
567 | } | |
568 | ||
69f3c619 | 569 | r = strv_extend_many(&cmdline, "-cpu", "max"); |
f72a0856 SL |
570 | if (r < 0) |
571 | return log_oom(); | |
9de3cc14 | 572 | |
69f3c619 LP |
573 | if (arg_qemu_gui) |
574 | r = strv_extend_many( | |
575 | &cmdline, | |
576 | "-vga", | |
577 | "virtio"); | |
578 | else | |
579 | r = strv_extend_many( | |
580 | &cmdline, | |
581 | "-nographic", | |
582 | "-nodefaults", | |
583 | "-chardev", "stdio,mux=on,id=console,signal=off", | |
584 | "-serial", "chardev:console", | |
585 | "-mon", "console"); | |
586 | if (r < 0) | |
587 | return log_oom(); | |
9de3cc14 | 588 | |
bd546b9b MY |
589 | if (ARCHITECTURE_SUPPORTS_SMBIOS) |
590 | FOREACH_ARRAY(cred, arg_credentials.credentials, arg_credentials.n_credentials) { | |
f72a0856 | 591 | _cleanup_free_ char *cred_data_b64 = NULL; |
bd546b9b | 592 | ssize_t n; |
9de3cc14 | 593 | |
f72a0856 SL |
594 | n = base64mem(cred->data, cred->size, &cred_data_b64); |
595 | if (n < 0) | |
596 | return log_oom(); | |
9de3cc14 | 597 | |
f72a0856 SL |
598 | r = strv_extend(&cmdline, "-smbios"); |
599 | if (r < 0) | |
600 | return log_oom(); | |
9de3cc14 | 601 | |
f72a0856 SL |
602 | r = strv_extendf(&cmdline, "type=11,value=io.systemd.credential.binary:%s=%s", cred->id, cred_data_b64); |
603 | if (r < 0) | |
604 | return log_oom(); | |
605 | } | |
9de3cc14 SL |
606 | |
607 | r = strv_extend(&cmdline, "-drive"); | |
608 | if (r < 0) | |
609 | return log_oom(); | |
610 | ||
71e42c36 | 611 | r = strv_extendf(&cmdline, "if=pflash,format=%s,readonly=on,file=%s", ovmf_config_format(ovmf_config), ovmf_config->path); |
9de3cc14 SL |
612 | if (r < 0) |
613 | return log_oom(); | |
614 | ||
f72a0856 | 615 | _cleanup_(unlink_and_freep) char *ovmf_vars_to = NULL; |
9de3cc14 SL |
616 | if (ovmf_config->supports_sb) { |
617 | const char *ovmf_vars_from = ovmf_config->vars; | |
9de3cc14 SL |
618 | _cleanup_close_ int source_fd = -EBADF, target_fd = -EBADF; |
619 | ||
620 | r = tempfn_random_child(NULL, "vmspawn-", &ovmf_vars_to); | |
621 | if (r < 0) | |
622 | return r; | |
623 | ||
624 | source_fd = open(ovmf_vars_from, O_RDONLY|O_CLOEXEC); | |
625 | if (source_fd < 0) | |
626 | return log_error_errno(source_fd, "Failed to open OVMF vars file %s: %m", ovmf_vars_from); | |
627 | ||
628 | target_fd = open(ovmf_vars_to, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0600); | |
629 | if (target_fd < 0) | |
630 | return log_error_errno(errno, "Failed to create regular file for OVMF vars at %s: %m", ovmf_vars_to); | |
631 | ||
632 | r = copy_bytes(source_fd, target_fd, UINT64_MAX, COPY_REFLINK); | |
633 | if (r < 0) | |
634 | return log_error_errno(r, "Failed to copy bytes from %s to %s: %m", ovmf_vars_from, ovmf_vars_to); | |
635 | ||
636 | /* These aren't always available so don't raise an error if they fail */ | |
637 | (void) copy_xattr(source_fd, NULL, target_fd, NULL, 0); | |
638 | (void) copy_access(source_fd, target_fd); | |
639 | (void) copy_times(source_fd, target_fd, 0); | |
640 | ||
69f3c619 LP |
641 | r = strv_extend_many( |
642 | &cmdline, | |
643 | "-global", "ICH9-LPC.disable_s3=1", | |
644 | "-global", "driver=cfi.pflash01,property=secure,value=on", | |
645 | "-drive"); | |
9de3cc14 SL |
646 | if (r < 0) |
647 | return log_oom(); | |
648 | ||
71e42c36 | 649 | r = strv_extendf(&cmdline, "file=%s,if=pflash,format=%s", ovmf_vars_to, ovmf_config_format(ovmf_config)); |
9de3cc14 SL |
650 | if (r < 0) |
651 | return log_oom(); | |
652 | } | |
653 | ||
654 | r = strv_extend(&cmdline, "-drive"); | |
655 | if (r < 0) | |
656 | return log_oom(); | |
657 | ||
658 | r = strv_extendf(&cmdline, "if=none,id=mkosi,file=%s,format=raw", arg_image); | |
659 | if (r < 0) | |
660 | return log_oom(); | |
661 | ||
69f3c619 LP |
662 | r = strv_extend_many( |
663 | &cmdline, | |
664 | "-device", "virtio-scsi-pci,id=scsi", | |
665 | "-device", "scsi-hd,drive=mkosi,bootindex=1"); | |
9de3cc14 SL |
666 | if (r < 0) |
667 | return log_oom(); | |
668 | ||
773ca1de SL |
669 | r = strv_prepend(&arg_parameters, "console=" DEFAULT_SERIAL_TTY); |
670 | if (r < 0) | |
671 | return log_oom(); | |
4291f446 | 672 | |
773ca1de SL |
673 | if (ARCHITECTURE_SUPPORTS_SMBIOS) { |
674 | _cleanup_free_ char *kcl = strv_join(arg_parameters, " "); | |
675 | if (!kcl) | |
676 | return log_oom(); | |
4291f446 | 677 | |
773ca1de SL |
678 | r = strv_extend(&cmdline, "-smbios"); |
679 | if (r < 0) | |
680 | return log_oom(); | |
681 | ||
682 | r = strv_extendf(&cmdline, "type=11,value=io.systemd.stub.kernel-cmdline-extra=%s", kcl); | |
683 | if (r < 0) | |
684 | return log_oom(); | |
685 | } else | |
686 | log_warning("Cannot append extra args to kernel cmdline, native architecture doesn't support SMBIOS"); | |
f72a0856 SL |
687 | |
688 | if (use_vsock) { | |
689 | vsock_fd = open_vsock(); | |
690 | if (vsock_fd < 0) | |
691 | return log_error_errno(vsock_fd, "Failed to open vsock: %m"); | |
692 | ||
693 | r = cmdline_add_vsock(&cmdline, vsock_fd); | |
694 | if (r == -ENOMEM) | |
4291f446 | 695 | return log_oom(); |
f72a0856 SL |
696 | if (r < 0) |
697 | return log_error_errno(r, "Failed to call getsockname on vsock: %m"); | |
4291f446 | 698 | } |
9de3cc14 | 699 | |
b9e2d83b LP |
700 | if (DEBUG_LOGGING) { |
701 | _cleanup_free_ char *joined = quote_command_line(cmdline, SHELL_ESCAPE_EMPTY); | |
702 | if (!joined) | |
703 | return log_oom(); | |
704 | ||
705 | log_debug("Executing: %s", joined); | |
706 | } | |
707 | ||
f72a0856 SL |
708 | _cleanup_(sd_event_source_unrefp) sd_event_source *notify_event_source = NULL; |
709 | _cleanup_(sd_event_unrefp) sd_event *event = NULL; | |
710 | r = sd_event_new(&event); | |
711 | if (r < 0) | |
712 | return log_error_errno(r, "Failed to get default event source: %m"); | |
713 | ||
714 | (void) sd_event_set_watchdog(event, true); | |
715 | ||
6cff1854 LP |
716 | _cleanup_(pidref_done) PidRef child_pidref = PIDREF_NULL; |
717 | ||
718 | r = pidref_safe_fork_full( | |
f72a0856 | 719 | qemu_binary, |
6cff1854 | 720 | /* stdio_fds= */ NULL, |
f72a0856 | 721 | &child_vsock_fd, 1, /* pass the vsock fd to qemu */ |
6cff1854 LP |
722 | FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_LOG|FORK_CLOEXEC_OFF|FORK_RLIMIT_NOFILE_SAFE, |
723 | &child_pidref); | |
f72a0856 | 724 | if (r < 0) |
6cff1854 | 725 | return r; |
9de3cc14 SL |
726 | if (r == 0) { |
727 | /* set TERM and LANG if they are missing */ | |
728 | if (setenv("TERM", "vt220", 0) < 0) | |
729 | return log_oom(); | |
730 | ||
731 | if (setenv("LANG", "C.UTF-8", 0) < 0) | |
732 | return log_oom(); | |
733 | ||
02aacdcf | 734 | execv(qemu_binary, cmdline); |
9de3cc14 SL |
735 | log_error_errno(errno, "Failed to execve %s: %m", qemu_binary); |
736 | _exit(EXIT_FAILURE); | |
737 | } | |
738 | ||
c7c6b6f2 LP |
739 | /* Close the vsock fd we passed to qemu in the parent. We don't need it anymore. */ |
740 | child_vsock_fd = safe_close(child_vsock_fd); | |
741 | ||
f72a0856 SL |
742 | int exit_status = INT_MAX; |
743 | if (use_vsock) { | |
744 | r = setup_notify_parent(event, vsock_fd, &exit_status, ¬ify_event_source); | |
745 | if (r < 0) | |
746 | return log_error_errno(r, "Failed to setup event loop to handle vsock notify events: %m"); | |
747 | } | |
748 | ||
749 | /* shutdown qemu when we are shutdown */ | |
6cff1854 LP |
750 | (void) sd_event_add_signal(event, NULL, SIGINT | SD_EVENT_SIGNAL_PROCMASK, on_orderly_shutdown, &child_pidref); |
751 | (void) sd_event_add_signal(event, NULL, SIGTERM | SD_EVENT_SIGNAL_PROCMASK, on_orderly_shutdown, &child_pidref); | |
f72a0856 | 752 | |
8b183505 | 753 | (void) sd_event_add_signal(event, NULL, (SIGRTMIN+18) | SD_EVENT_SIGNAL_PROCMASK, sigrtmin18_handler, NULL); |
f72a0856 SL |
754 | |
755 | /* Exit when the child exits */ | |
6cff1854 | 756 | (void) event_add_child_pidref(event, NULL, &child_pidref, WEXITED, on_child_exit, NULL); |
f72a0856 SL |
757 | |
758 | r = sd_event_loop(event); | |
759 | if (r < 0) | |
760 | return log_error_errno(r, "Failed to run event loop: %m"); | |
761 | ||
762 | if (use_vsock) { | |
763 | if (exit_status == INT_MAX) { | |
764 | log_debug("Couldn't retrieve inner EXIT_STATUS from vsock"); | |
765 | return EXIT_SUCCESS; | |
766 | } | |
767 | if (exit_status != 0) | |
768 | log_warning("Non-zero exit code received: %d", exit_status); | |
769 | return exit_status; | |
770 | } | |
771 | ||
772 | return 0; | |
773 | } | |
774 | ||
775 | static int determine_names(void) { | |
776 | int r; | |
777 | ||
778 | if (!arg_image) | |
b71855b3 | 779 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Missing required argument -i/--image=, quitting"); |
f72a0856 SL |
780 | |
781 | if (!arg_machine) { | |
782 | char *e; | |
783 | ||
784 | r = path_extract_filename(arg_image, &arg_machine); | |
785 | if (r < 0) | |
786 | return log_error_errno(r, "Failed to extract file name from '%s': %m", arg_image); | |
787 | ||
788 | /* Truncate suffix if there is one */ | |
789 | e = endswith(arg_machine, ".raw"); | |
790 | if (e) | |
791 | *e = 0; | |
792 | ||
793 | hostname_cleanup(arg_machine); | |
794 | if (!hostname_is_valid(arg_machine, 0)) | |
795 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine machine name automatically, please use -M."); | |
796 | } | |
797 | ||
798 | return 0; | |
9de3cc14 SL |
799 | } |
800 | ||
801 | static int run(int argc, char *argv[]) { | |
bd546b9b | 802 | int r; |
9de3cc14 SL |
803 | |
804 | log_setup(); | |
805 | ||
806 | r = parse_argv(argc, argv); | |
807 | if (r <= 0) | |
bd546b9b | 808 | return r; |
9de3cc14 | 809 | |
f72a0856 SL |
810 | r = determine_names(); |
811 | if (r < 0) | |
bd546b9b | 812 | return r; |
f72a0856 | 813 | |
dbb2718f LP |
814 | if (!arg_quiet) |
815 | log_info("Spawning VM %s on %s.\n" | |
816 | "Press Ctrl-a x to kill VM.", | |
817 | arg_machine, arg_image); | |
818 | ||
8b183505 | 819 | assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, -1) >= 0); |
9de3cc14 | 820 | |
bd546b9b | 821 | return run_virtual_machine(); |
9de3cc14 SL |
822 | } |
823 | ||
824 | DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run); |