]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/vmspawn/vmspawn-util.c
c8e67597909fcf80b09fba2cdd450b82f2f2d77b
[thirdparty/systemd.git] / src / vmspawn / vmspawn-util.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <unistd.h>
4 #include <linux/vhost.h>
5 #include <sys/ioctl.h>
6
7 #include "architecture.h"
8 #include "conf-files.h"
9 #include "errno-util.h"
10 #include "fd-util.h"
11 #include "fileio.h"
12 #include "json.h"
13 #include "log.h"
14 #include "macro.h"
15 #include "memory-util.h"
16 #include "path-lookup.h"
17 #include "path-util.h"
18 #include "random-util.h"
19 #include "recurse-dir.h"
20 #include "siphash24.h"
21 #include "socket-util.h"
22 #include "sort-util.h"
23 #include "string-util.h"
24 #include "strv.h"
25 #include "vmspawn-util.h"
26
27 OvmfConfig* ovmf_config_free(OvmfConfig *config) {
28 if (!config)
29 return NULL;
30
31 free(config->path);
32 free(config->vars);
33 return mfree(config);
34 }
35
36 int qemu_check_kvm_support(void) {
37 if (access("/dev/kvm", F_OK) >= 0)
38 return true;
39 if (errno == ENOENT) {
40 log_debug_errno(errno, "/dev/kvm not found. Not using KVM acceleration.");
41 return false;
42 }
43 if (ERRNO_IS_PRIVILEGE(errno)) {
44 log_debug_errno(errno, "Permission denied to access /dev/kvm. Not using KVM acceleration.");
45 return false;
46 }
47
48 return -errno;
49 }
50
51 int qemu_check_vsock_support(void) {
52 _cleanup_close_ int fd = -EBADF;
53 /* Just using access() will just check if the device node exists, but not whether a
54 * device driver is behind it (this is a common case since systemd-tmpfiles creates
55 * the device node on boot, typically).
56 *
57 * Hence we open() the path to see if there's actually something behind.
58 *
59 * If not this should return ENODEV.
60 */
61
62 fd = open("/dev/vhost-vsock", O_RDWR|O_CLOEXEC);
63 if (fd >= 0)
64 return true;
65 if (ERRNO_IS_DEVICE_ABSENT(errno)) {
66 log_debug_errno(errno, "/dev/vhost-vsock device doesn't exist. Not adding a vsock device to the virtual machine.");
67 return false;
68 }
69 if (ERRNO_IS_PRIVILEGE(errno)) {
70 log_debug_errno(errno, "Permission denied to access /dev/vhost-vsock. Not adding a vsock device to the virtual machine.");
71 return false;
72 }
73
74 return -errno;
75 }
76
77 /* holds the data retrieved from the QEMU firmware interop JSON data */
78 typedef struct FirmwareData {
79 char **features;
80 char *firmware;
81 char *vars;
82 } FirmwareData;
83
84 static FirmwareData* firmware_data_free(FirmwareData *fwd) {
85 if (!fwd)
86 return NULL;
87
88 fwd->features = strv_free(fwd->features);
89 fwd->firmware = mfree(fwd->firmware);
90 fwd->vars = mfree(fwd->vars);
91
92 return mfree(fwd);
93 }
94 DEFINE_TRIVIAL_CLEANUP_FUNC(FirmwareData*, firmware_data_free);
95
96 static int firmware_executable(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
97 static const JsonDispatch table[] = {
98 { "filename", JSON_VARIANT_STRING, json_dispatch_string, offsetof(FirmwareData, firmware), JSON_MANDATORY },
99 { "format", JSON_VARIANT_STRING, NULL, 0, JSON_MANDATORY },
100 {}
101 };
102
103 return json_dispatch(v, table, flags, userdata);
104 }
105
106 static int firmware_nvram_template(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
107 static const JsonDispatch table[] = {
108 { "filename", JSON_VARIANT_STRING, json_dispatch_string, offsetof(FirmwareData, vars), JSON_MANDATORY },
109 { "format", JSON_VARIANT_STRING, NULL, 0, JSON_MANDATORY },
110 {}
111 };
112
113 return json_dispatch(v, table, flags, userdata);
114 }
115
116 static int firmware_mapping(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
117 static const JsonDispatch table[] = {
118 { "device", JSON_VARIANT_STRING, NULL, 0, JSON_MANDATORY },
119 { "executable", JSON_VARIANT_OBJECT, firmware_executable, 0, JSON_MANDATORY },
120 { "nvram-template", JSON_VARIANT_OBJECT, firmware_nvram_template, 0, JSON_MANDATORY },
121 {}
122 };
123
124 return json_dispatch(v, table, flags, userdata);
125 }
126
127 int find_ovmf_config(int search_sb, OvmfConfig **ret) {
128 _cleanup_(ovmf_config_freep) OvmfConfig *config = NULL;
129 _cleanup_free_ char *user_firmware_dir = NULL;
130 _cleanup_strv_free_ char **conf_files = NULL;
131 int r;
132
133 /* Search in:
134 * - $XDG_CONFIG_HOME/qemu/firmware
135 * - /etc/qemu/firmware
136 * - /usr/share/qemu/firmware
137 *
138 * Prioritising entries in "more specific" directories
139 */
140
141 r = xdg_user_config_dir(&user_firmware_dir, "/qemu/firmware");
142 if (r < 0)
143 return r;
144
145 r = conf_files_list_strv(&conf_files, ".json", NULL, CONF_FILES_FILTER_MASKED|CONF_FILES_REGULAR,
146 STRV_MAKE_CONST(user_firmware_dir, "/etc/qemu/firmware", "/usr/share/qemu/firmware"));
147 if (r < 0)
148 return log_debug_errno(r, "Failed to list config files: %m");
149
150 STRV_FOREACH(file, conf_files) {
151 _cleanup_(firmware_data_freep) FirmwareData *fwd = NULL;
152 _cleanup_(json_variant_unrefp) JsonVariant *config_json = NULL;
153 _cleanup_free_ char *contents = NULL;
154 size_t contents_sz = 0;
155
156 r = read_full_file(*file, &contents, &contents_sz);
157 if (r == -ENOMEM)
158 return r;
159 if (r < 0) {
160 log_debug_errno(r, "Failed to read contents of %s - ignoring: %m", *file);
161 continue;
162 }
163
164 r = json_parse(contents, 0, &config_json, NULL, NULL);
165 if (r == -ENOMEM)
166 return r;
167 if (r < 0) {
168 log_debug_errno(r, "Failed to parse the JSON in %s - ignoring: %m", *file);
169 continue;
170 }
171
172 static const JsonDispatch table[] = {
173 { "description", JSON_VARIANT_STRING, NULL, 0, JSON_MANDATORY },
174 { "interface-types", JSON_VARIANT_ARRAY, NULL, 0, JSON_MANDATORY },
175 { "mapping", JSON_VARIANT_OBJECT, firmware_mapping, 0, JSON_MANDATORY },
176 { "targets", JSON_VARIANT_ARRAY, NULL, 0, JSON_MANDATORY },
177 { "features", JSON_VARIANT_ARRAY, json_dispatch_strv, offsetof(FirmwareData, features), JSON_MANDATORY },
178 { "tags", JSON_VARIANT_ARRAY, NULL, 0, JSON_MANDATORY },
179 {}
180 };
181
182 fwd = new0(FirmwareData, 1);
183 if (!fwd)
184 return -ENOMEM;
185
186 r = json_dispatch(config_json, table, JSON_ALLOW_EXTENSIONS, fwd);
187 if (r == -ENOMEM)
188 return r;
189 if (r < 0) {
190 log_debug_errno(r, "Failed to extract the required fields from the JSON in %s - ignoring: %m", *file);
191 continue;
192 }
193
194 if (strv_contains(fwd->features, "enrolled-keys")) {
195 log_debug("Skipping %s, firmware has enrolled keys which has been known to cause issues", *file);
196 continue;
197 }
198
199 bool sb_present = strv_contains(fwd->features, "secure-boot");
200
201 /* exclude firmware which doesn't match our Secure Boot requirements */
202 if (search_sb >= 0 && search_sb != sb_present) {
203 log_debug("Skipping %s, firmware doesn't fit required Secure Boot configuration", *file);
204 continue;
205 }
206
207 config = new0(OvmfConfig, 1);
208 if (!config)
209 return -ENOMEM;
210
211 config->path = TAKE_PTR(fwd->firmware);
212 config->vars = TAKE_PTR(fwd->vars);
213 config->supports_sb = sb_present;
214 break;
215 }
216
217 if (!config)
218 return -ENOENT;
219
220 if (ret)
221 *ret = TAKE_PTR(config);
222
223 return 0;
224 }
225
226 int find_qemu_binary(char **ret_qemu_binary) {
227 int r;
228
229 /*
230 * On success the path to the qemu binary will be stored in `req_qemu_binary`
231 *
232 * If the qemu binary cannot be found -ENOENT will be returned.
233 * If the native architecture is not supported by qemu -EOPNOTSUPP will be returned;
234 */
235
236 static const char *architecture_to_qemu_table[_ARCHITECTURE_MAX] = {
237 [ARCHITECTURE_ARM64] = "aarch64", /* differs from our name */
238 [ARCHITECTURE_ARM] = "arm",
239 [ARCHITECTURE_ALPHA] = "alpha",
240 [ARCHITECTURE_X86_64] = "x86_64", /* differs from our name */
241 [ARCHITECTURE_X86] = "i386", /* differs from our name */
242 [ARCHITECTURE_LOONGARCH64] = "loongarch64",
243 [ARCHITECTURE_MIPS64_LE] = "mips", /* differs from our name */
244 [ARCHITECTURE_MIPS_LE] = "mips", /* differs from our name */
245 [ARCHITECTURE_PARISC] = "hppa", /* differs from our name */
246 [ARCHITECTURE_PPC64_LE] = "ppc", /* differs from our name */
247 [ARCHITECTURE_PPC64] = "ppc", /* differs from our name */
248 [ARCHITECTURE_PPC] = "ppc",
249 [ARCHITECTURE_RISCV32] = "riscv32",
250 [ARCHITECTURE_RISCV64] = "riscv64",
251 [ARCHITECTURE_S390X] = "s390x",
252 };
253
254 FOREACH_STRING(s, "qemu", "qemu-kvm") {
255 r = find_executable(s, ret_qemu_binary);
256 if (r == 0)
257 return 0;
258
259 if (r != -ENOENT)
260 return r;
261 }
262
263 const char *arch_qemu = architecture_to_qemu_table[native_architecture()];
264 if (!arch_qemu)
265 return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Architecture %s not supported by qemu", architecture_to_string(native_architecture()));
266
267 _cleanup_free_ char *qemu_arch_specific = NULL;
268 qemu_arch_specific = strjoin("qemu-system-", arch_qemu);
269 if (!qemu_arch_specific)
270 return -ENOMEM;
271
272 return find_executable(qemu_arch_specific, ret_qemu_binary);
273 }
274
275 int vsock_fix_child_cid(unsigned *machine_cid, const char *machine, int *ret_child_sock) {
276 /* this is an arbitrary value picked from /dev/urandom */
277 static const uint8_t sip_key[HASH_KEY_SIZE] = {
278 0x03, 0xad, 0xf0, 0xa4,
279 0x59, 0x2c, 0x77, 0x11,
280 0xda, 0x39, 0x0c, 0xba,
281 0xf5, 0x4c, 0x80, 0x52
282 };
283 struct siphash machine_hash_state, state;
284 _cleanup_close_ int vfd = -EBADF;
285 int r;
286
287 /* uint64_t is required here for the ioctl call, but valid CIDs are only 32 bits */
288 uint64_t cid = *ASSERT_PTR(machine_cid);
289
290 assert(machine);
291 assert(ret_child_sock);
292
293 /* Fix the CID of the AF_VSOCK socket passed to qemu
294 *
295 * If the user has passed us a CID (machine_cid != VMADDR_CID_ANY), then attempt to bind to that CID
296 * and error if we cannot.
297 *
298 * Otherwise hash the machine name to get a random CID and attempt to bind to that.
299 * If it is occupied add more information into the hash and try again.
300 * If after 64 attempts this hasn't worked fallback to truly random CIDs.
301 * If after another 64 attempts this hasn't worked then give up and return EADDRNOTAVAIL.
302 */
303
304 /* remove O_CLOEXEC before this fd is passed to QEMU */
305 vfd = open("/dev/vhost-vsock", O_RDWR|O_CLOEXEC);
306 if (vfd < 0)
307 return log_debug_errno(errno, "Failed to open /dev/vhost-vsock as read/write: %m");
308
309 if (cid != VMADDR_CID_ANY) {
310 r = ioctl(vfd, VHOST_VSOCK_SET_GUEST_CID, &cid);
311 if (r < 0)
312 return log_debug_errno(errno, "Failed to set CID for child vsock with user provided CID %" PRIu64 ": %m", cid);
313 *ret_child_sock = TAKE_FD(vfd);
314 return 0;
315 }
316
317 siphash24_init(&machine_hash_state, sip_key);
318 siphash24_compress_string(machine, &machine_hash_state);
319 for (unsigned i = 0; i < 64; i++) {
320 state = machine_hash_state;
321 siphash24_compress_safe(&i, sizeof i, &state);
322 uint64_t hash = siphash24_finalize(&state);
323
324 cid = 3 + (hash % (UINT_MAX - 4));
325 r = ioctl(vfd, VHOST_VSOCK_SET_GUEST_CID, &cid);
326 if (r >= 0) {
327 *machine_cid = cid;
328 *ret_child_sock = TAKE_FD(vfd);
329 return 0;
330 }
331 if (errno != EADDRINUSE)
332 return -errno;
333 }
334
335 for (unsigned i = 0; i < 64; i++) {
336 cid = 3 + random_u64_range(UINT_MAX - 4);
337 r = ioctl(vfd, VHOST_VSOCK_SET_GUEST_CID, &cid);
338 if (r >= 0) {
339 *machine_cid = cid;
340 *ret_child_sock = TAKE_FD(vfd);
341 return 0;
342 }
343
344 if (errno != EADDRINUSE)
345 return -errno;
346 }
347
348 return log_debug_errno(SYNTHETIC_ERRNO(EADDRNOTAVAIL), "Failed to assign a CID to the guest vsock");
349 }