1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
4 #include <linux/vhost.h>
7 #include "architecture.h"
8 #include "conf-files.h"
9 #include "errno-util.h"
15 #include "memory-util.h"
16 #include "path-lookup.h"
17 #include "path-util.h"
18 #include "random-util.h"
19 #include "recurse-dir.h"
20 #include "siphash24.h"
21 #include "socket-util.h"
22 #include "sort-util.h"
23 #include "string-util.h"
25 #include "vmspawn-util.h"
27 OvmfConfig
* ovmf_config_free(OvmfConfig
*config
) {
36 int qemu_check_kvm_support(void) {
37 if (access("/dev/kvm", F_OK
) >= 0)
39 if (errno
== ENOENT
) {
40 log_debug_errno(errno
, "/dev/kvm not found. Not using KVM acceleration.");
43 if (ERRNO_IS_PRIVILEGE(errno
)) {
44 log_debug_errno(errno
, "Permission denied to access /dev/kvm. Not using KVM acceleration.");
51 int qemu_check_vsock_support(void) {
52 _cleanup_close_
int fd
= -EBADF
;
53 /* Just using access() will just check if the device node exists, but not whether a
54 * device driver is behind it (this is a common case since systemd-tmpfiles creates
55 * the device node on boot, typically).
57 * Hence we open() the path to see if there's actually something behind.
59 * If not this should return ENODEV.
62 fd
= open("/dev/vhost-vsock", O_RDWR
|O_CLOEXEC
);
65 if (ERRNO_IS_DEVICE_ABSENT(errno
)) {
66 log_debug_errno(errno
, "/dev/vhost-vsock device doesn't exist. Not adding a vsock device to the virtual machine.");
69 if (ERRNO_IS_PRIVILEGE(errno
)) {
70 log_debug_errno(errno
, "Permission denied to access /dev/vhost-vsock. Not adding a vsock device to the virtual machine.");
77 /* holds the data retrieved from the QEMU firmware interop JSON data */
78 typedef struct FirmwareData
{
84 static FirmwareData
* firmware_data_free(FirmwareData
*fwd
) {
88 fwd
->features
= strv_free(fwd
->features
);
89 fwd
->firmware
= mfree(fwd
->firmware
);
90 fwd
->vars
= mfree(fwd
->vars
);
94 DEFINE_TRIVIAL_CLEANUP_FUNC(FirmwareData
*, firmware_data_free
);
96 static int firmware_executable(const char *name
, JsonVariant
*v
, JsonDispatchFlags flags
, void *userdata
) {
97 static const JsonDispatch table
[] = {
98 { "filename", JSON_VARIANT_STRING
, json_dispatch_string
, offsetof(FirmwareData
, firmware
), JSON_MANDATORY
},
99 { "format", JSON_VARIANT_STRING
, NULL
, 0, JSON_MANDATORY
},
103 return json_dispatch(v
, table
, flags
, userdata
);
106 static int firmware_nvram_template(const char *name
, JsonVariant
*v
, JsonDispatchFlags flags
, void *userdata
) {
107 static const JsonDispatch table
[] = {
108 { "filename", JSON_VARIANT_STRING
, json_dispatch_string
, offsetof(FirmwareData
, vars
), JSON_MANDATORY
},
109 { "format", JSON_VARIANT_STRING
, NULL
, 0, JSON_MANDATORY
},
113 return json_dispatch(v
, table
, flags
, userdata
);
116 static int firmware_mapping(const char *name
, JsonVariant
*v
, JsonDispatchFlags flags
, void *userdata
) {
117 static const JsonDispatch table
[] = {
118 { "device", JSON_VARIANT_STRING
, NULL
, 0, JSON_MANDATORY
},
119 { "executable", JSON_VARIANT_OBJECT
, firmware_executable
, 0, JSON_MANDATORY
},
120 { "nvram-template", JSON_VARIANT_OBJECT
, firmware_nvram_template
, 0, JSON_MANDATORY
},
124 return json_dispatch(v
, table
, flags
, userdata
);
127 int find_ovmf_config(int search_sb
, OvmfConfig
**ret
) {
128 _cleanup_(ovmf_config_freep
) OvmfConfig
*config
= NULL
;
129 _cleanup_free_
char *user_firmware_dir
= NULL
;
130 _cleanup_strv_free_
char **conf_files
= NULL
;
134 * - $XDG_CONFIG_HOME/qemu/firmware
135 * - /etc/qemu/firmware
136 * - /usr/share/qemu/firmware
138 * Prioritising entries in "more specific" directories
141 r
= xdg_user_config_dir(&user_firmware_dir
, "/qemu/firmware");
145 r
= conf_files_list_strv(&conf_files
, ".json", NULL
, CONF_FILES_FILTER_MASKED
|CONF_FILES_REGULAR
,
146 STRV_MAKE_CONST(user_firmware_dir
, "/etc/qemu/firmware", "/usr/share/qemu/firmware"));
148 return log_debug_errno(r
, "Failed to list config files: %m");
150 STRV_FOREACH(file
, conf_files
) {
151 _cleanup_(firmware_data_freep
) FirmwareData
*fwd
= NULL
;
152 _cleanup_(json_variant_unrefp
) JsonVariant
*config_json
= NULL
;
153 _cleanup_free_
char *contents
= NULL
;
154 size_t contents_sz
= 0;
156 r
= read_full_file(*file
, &contents
, &contents_sz
);
160 log_debug_errno(r
, "Failed to read contents of %s - ignoring: %m", *file
);
164 r
= json_parse(contents
, 0, &config_json
, NULL
, NULL
);
168 log_debug_errno(r
, "Failed to parse the JSON in %s - ignoring: %m", *file
);
172 static const JsonDispatch table
[] = {
173 { "description", JSON_VARIANT_STRING
, NULL
, 0, JSON_MANDATORY
},
174 { "interface-types", JSON_VARIANT_ARRAY
, NULL
, 0, JSON_MANDATORY
},
175 { "mapping", JSON_VARIANT_OBJECT
, firmware_mapping
, 0, JSON_MANDATORY
},
176 { "targets", JSON_VARIANT_ARRAY
, NULL
, 0, JSON_MANDATORY
},
177 { "features", JSON_VARIANT_ARRAY
, json_dispatch_strv
, offsetof(FirmwareData
, features
), JSON_MANDATORY
},
178 { "tags", JSON_VARIANT_ARRAY
, NULL
, 0, JSON_MANDATORY
},
182 fwd
= new0(FirmwareData
, 1);
186 r
= json_dispatch(config_json
, table
, JSON_ALLOW_EXTENSIONS
, fwd
);
190 log_debug_errno(r
, "Failed to extract the required fields from the JSON in %s - ignoring: %m", *file
);
194 if (strv_contains(fwd
->features
, "enrolled-keys")) {
195 log_debug("Skipping %s, firmware has enrolled keys which has been known to cause issues", *file
);
199 bool sb_present
= strv_contains(fwd
->features
, "secure-boot");
201 /* exclude firmware which doesn't match our Secure Boot requirements */
202 if (search_sb
>= 0 && search_sb
!= sb_present
) {
203 log_debug("Skipping %s, firmware doesn't fit required Secure Boot configuration", *file
);
207 config
= new0(OvmfConfig
, 1);
211 config
->path
= TAKE_PTR(fwd
->firmware
);
212 config
->vars
= TAKE_PTR(fwd
->vars
);
213 config
->supports_sb
= sb_present
;
221 *ret
= TAKE_PTR(config
);
226 int find_qemu_binary(char **ret_qemu_binary
) {
230 * On success the path to the qemu binary will be stored in `req_qemu_binary`
232 * If the qemu binary cannot be found -ENOENT will be returned.
233 * If the native architecture is not supported by qemu -EOPNOTSUPP will be returned;
236 static const char *architecture_to_qemu_table
[_ARCHITECTURE_MAX
] = {
237 [ARCHITECTURE_ARM64
] = "aarch64", /* differs from our name */
238 [ARCHITECTURE_ARM
] = "arm",
239 [ARCHITECTURE_ALPHA
] = "alpha",
240 [ARCHITECTURE_X86_64
] = "x86_64", /* differs from our name */
241 [ARCHITECTURE_X86
] = "i386", /* differs from our name */
242 [ARCHITECTURE_LOONGARCH64
] = "loongarch64",
243 [ARCHITECTURE_MIPS64_LE
] = "mips", /* differs from our name */
244 [ARCHITECTURE_MIPS_LE
] = "mips", /* differs from our name */
245 [ARCHITECTURE_PARISC
] = "hppa", /* differs from our name */
246 [ARCHITECTURE_PPC64_LE
] = "ppc", /* differs from our name */
247 [ARCHITECTURE_PPC64
] = "ppc", /* differs from our name */
248 [ARCHITECTURE_PPC
] = "ppc",
249 [ARCHITECTURE_RISCV32
] = "riscv32",
250 [ARCHITECTURE_RISCV64
] = "riscv64",
251 [ARCHITECTURE_S390X
] = "s390x",
254 FOREACH_STRING(s
, "qemu", "qemu-kvm") {
255 r
= find_executable(s
, ret_qemu_binary
);
263 const char *arch_qemu
= architecture_to_qemu_table
[native_architecture()];
265 return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP
), "Architecture %s not supported by qemu", architecture_to_string(native_architecture()));
267 _cleanup_free_
char *qemu_arch_specific
= NULL
;
268 qemu_arch_specific
= strjoin("qemu-system-", arch_qemu
);
269 if (!qemu_arch_specific
)
272 return find_executable(qemu_arch_specific
, ret_qemu_binary
);
275 int vsock_fix_child_cid(unsigned *machine_cid
, const char *machine
, int *ret_child_sock
) {
276 /* this is an arbitrary value picked from /dev/urandom */
277 static const uint8_t sip_key
[HASH_KEY_SIZE
] = {
278 0x03, 0xad, 0xf0, 0xa4,
279 0x59, 0x2c, 0x77, 0x11,
280 0xda, 0x39, 0x0c, 0xba,
281 0xf5, 0x4c, 0x80, 0x52
283 struct siphash machine_hash_state
, state
;
284 _cleanup_close_
int vfd
= -EBADF
;
287 /* uint64_t is required here for the ioctl call, but valid CIDs are only 32 bits */
288 uint64_t cid
= *ASSERT_PTR(machine_cid
);
291 assert(ret_child_sock
);
293 /* Fix the CID of the AF_VSOCK socket passed to qemu
295 * If the user has passed us a CID (machine_cid != VMADDR_CID_ANY), then attempt to bind to that CID
296 * and error if we cannot.
298 * Otherwise hash the machine name to get a random CID and attempt to bind to that.
299 * If it is occupied add more information into the hash and try again.
300 * If after 64 attempts this hasn't worked fallback to truly random CIDs.
301 * If after another 64 attempts this hasn't worked then give up and return EADDRNOTAVAIL.
304 /* remove O_CLOEXEC before this fd is passed to QEMU */
305 vfd
= open("/dev/vhost-vsock", O_RDWR
|O_CLOEXEC
);
307 return log_debug_errno(errno
, "Failed to open /dev/vhost-vsock as read/write: %m");
309 if (cid
!= VMADDR_CID_ANY
) {
310 r
= ioctl(vfd
, VHOST_VSOCK_SET_GUEST_CID
, &cid
);
312 return log_debug_errno(errno
, "Failed to set CID for child vsock with user provided CID %" PRIu64
": %m", cid
);
313 *ret_child_sock
= TAKE_FD(vfd
);
317 siphash24_init(&machine_hash_state
, sip_key
);
318 siphash24_compress_string(machine
, &machine_hash_state
);
319 for (unsigned i
= 0; i
< 64; i
++) {
320 state
= machine_hash_state
;
321 siphash24_compress_safe(&i
, sizeof i
, &state
);
322 uint64_t hash
= siphash24_finalize(&state
);
324 cid
= 3 + (hash
% (UINT_MAX
- 4));
325 r
= ioctl(vfd
, VHOST_VSOCK_SET_GUEST_CID
, &cid
);
328 *ret_child_sock
= TAKE_FD(vfd
);
331 if (errno
!= EADDRINUSE
)
335 for (unsigned i
= 0; i
< 64; i
++) {
336 cid
= 3 + random_u64_range(UINT_MAX
- 4);
337 r
= ioctl(vfd
, VHOST_VSOCK_SET_GUEST_CID
, &cid
);
340 *ret_child_sock
= TAKE_FD(vfd
);
344 if (errno
!= EADDRINUSE
)
348 return log_debug_errno(SYNTHETIC_ERRNO(EADDRNOTAVAIL
), "Failed to assign a CID to the guest vsock");