]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/virt.c
Merge pull request #30284 from YHNdnzj/fstab-wantedby-defaultdeps
[thirdparty/systemd.git] / src / basic / virt.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #if defined(__i386__) || defined(__x86_64__)
4 #include <cpuid.h>
5 #endif
6 #include <errno.h>
7 #include <stdint.h>
8 #include <stdlib.h>
9 #include <unistd.h>
10
11 #include "alloc-util.h"
12 #include "cgroup-util.h"
13 #include "dirent-util.h"
14 #include "env-util.h"
15 #include "errno-util.h"
16 #include "fd-util.h"
17 #include "fileio.h"
18 #include "macro.h"
19 #include "missing_threads.h"
20 #include "process-util.h"
21 #include "stat-util.h"
22 #include "string-table.h"
23 #include "string-util.h"
24 #include "uid-range.h"
25 #include "virt.h"
26
27 enum {
28 SMBIOS_VM_BIT_SET,
29 SMBIOS_VM_BIT_UNSET,
30 SMBIOS_VM_BIT_UNKNOWN,
31 };
32
33 static Virtualization detect_vm_cpuid(void) {
34
35 /* CPUID is an x86 specific interface. */
36 #if defined(__i386__) || defined(__x86_64__)
37
38 static const struct {
39 const char sig[13];
40 Virtualization id;
41 } vm_table[] = {
42 { "XenVMMXenVMM", VIRTUALIZATION_XEN },
43 { "KVMKVMKVM", VIRTUALIZATION_KVM }, /* qemu with KVM */
44 { "Linux KVM Hv", VIRTUALIZATION_KVM }, /* qemu with KVM + HyperV Enlightenments */
45 { "TCGTCGTCGTCG", VIRTUALIZATION_QEMU }, /* qemu without KVM */
46 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
47 { "VMwareVMware", VIRTUALIZATION_VMWARE },
48 /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
49 { "Microsoft Hv", VIRTUALIZATION_MICROSOFT },
50 /* https://wiki.freebsd.org/bhyve */
51 { "bhyve bhyve ", VIRTUALIZATION_BHYVE },
52 { "QNXQVMBSQG", VIRTUALIZATION_QNX },
53 /* https://projectacrn.org */
54 { "ACRNACRNACRN", VIRTUALIZATION_ACRN },
55 /* https://www.lockheedmartin.com/en-us/products/Hardened-Security-for-Intel-Processors.html */
56 { "SRESRESRESRE", VIRTUALIZATION_SRE },
57 };
58
59 uint32_t eax, ebx, ecx, edx;
60 bool hypervisor;
61
62 /* http://lwn.net/Articles/301888/ */
63
64 /* First detect whether there is a hypervisor */
65 if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0)
66 return VIRTUALIZATION_NONE;
67
68 hypervisor = ecx & 0x80000000U;
69
70 if (hypervisor) {
71 union {
72 uint32_t sig32[3];
73 char text[13];
74 } sig = {};
75
76 /* There is a hypervisor, see what it is */
77 __cpuid(0x40000000U, eax, ebx, ecx, edx);
78
79 sig.sig32[0] = ebx;
80 sig.sig32[1] = ecx;
81 sig.sig32[2] = edx;
82
83 log_debug("Virtualization found, CPUID=%s", sig.text);
84
85 for (size_t i = 0; i < ELEMENTSOF(vm_table); i++)
86 if (memcmp_nn(sig.text, sizeof(sig.text),
87 vm_table[i].sig, sizeof(vm_table[i].sig)) == 0)
88 return vm_table[i].id;
89
90 log_debug("Unknown virtualization with CPUID=%s. Add to vm_table[]?", sig.text);
91 return VIRTUALIZATION_VM_OTHER;
92 }
93 #endif
94 log_debug("No virtualization found in CPUID");
95
96 return VIRTUALIZATION_NONE;
97 }
98
99 static Virtualization detect_vm_device_tree(void) {
100 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
101 _cleanup_free_ char *hvtype = NULL;
102 int r;
103
104 r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
105 if (r == -ENOENT) {
106 _cleanup_closedir_ DIR *dir = NULL;
107 _cleanup_free_ char *compat = NULL;
108
109 if (access("/proc/device-tree/ibm,partition-name", F_OK) == 0 &&
110 access("/proc/device-tree/hmc-managed?", F_OK) == 0 &&
111 access("/proc/device-tree/chosen/qemu,graphic-width", F_OK) != 0)
112 return VIRTUALIZATION_POWERVM;
113
114 dir = opendir("/proc/device-tree");
115 if (!dir) {
116 if (errno == ENOENT) {
117 log_debug_errno(errno, "/proc/device-tree: %m");
118 return VIRTUALIZATION_NONE;
119 }
120 return -errno;
121 }
122
123 FOREACH_DIRENT(de, dir, return -errno)
124 if (strstr(de->d_name, "fw-cfg")) {
125 log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", de->d_name);
126 return VIRTUALIZATION_QEMU;
127 }
128
129 r = read_one_line_file("/proc/device-tree/compatible", &compat);
130 if (r < 0 && r != -ENOENT)
131 return r;
132 if (r >= 0 && streq(compat, "qemu,pseries")) {
133 log_debug("Virtualization %s found in /proc/device-tree/compatible", compat);
134 return VIRTUALIZATION_QEMU;
135 }
136
137 log_debug("No virtualization found in /proc/device-tree/*");
138 return VIRTUALIZATION_NONE;
139 } else if (r < 0)
140 return r;
141
142 log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
143 if (streq(hvtype, "linux,kvm"))
144 return VIRTUALIZATION_KVM;
145 else if (strstr(hvtype, "xen"))
146 return VIRTUALIZATION_XEN;
147 else if (strstr(hvtype, "vmware"))
148 return VIRTUALIZATION_VMWARE;
149 else
150 return VIRTUALIZATION_VM_OTHER;
151 #else
152 log_debug("This platform does not support /proc/device-tree");
153 return VIRTUALIZATION_NONE;
154 #endif
155 }
156
157 #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch_lp64)
158 static Virtualization detect_vm_dmi_vendor(void) {
159 static const char* const dmi_vendors[] = {
160 "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
161 "/sys/class/dmi/id/sys_vendor",
162 "/sys/class/dmi/id/board_vendor",
163 "/sys/class/dmi/id/bios_vendor",
164 "/sys/class/dmi/id/product_version", /* For Hyper-V VMs test */
165 NULL
166 };
167
168 static const struct {
169 const char *vendor;
170 Virtualization id;
171 } dmi_vendor_table[] = {
172 { "KVM", VIRTUALIZATION_KVM },
173 { "OpenStack", VIRTUALIZATION_KVM }, /* Detect OpenStack instance as KVM in non x86 architecture */
174 { "KubeVirt", VIRTUALIZATION_KVM }, /* Detect KubeVirt instance as KVM in non x86 architecture */
175 { "Amazon EC2", VIRTUALIZATION_AMAZON },
176 { "QEMU", VIRTUALIZATION_QEMU },
177 { "VMware", VIRTUALIZATION_VMWARE }, /* https://kb.vmware.com/s/article/1009458 */
178 { "VMW", VIRTUALIZATION_VMWARE },
179 { "innotek GmbH", VIRTUALIZATION_ORACLE },
180 { "VirtualBox", VIRTUALIZATION_ORACLE },
181 { "Xen", VIRTUALIZATION_XEN },
182 { "Bochs", VIRTUALIZATION_BOCHS },
183 { "Parallels", VIRTUALIZATION_PARALLELS },
184 /* https://wiki.freebsd.org/bhyve */
185 { "BHYVE", VIRTUALIZATION_BHYVE },
186 { "Hyper-V", VIRTUALIZATION_MICROSOFT },
187 { "Apple Virtualization", VIRTUALIZATION_APPLE },
188 };
189 int r;
190
191 STRV_FOREACH(vendor, dmi_vendors) {
192 _cleanup_free_ char *s = NULL;
193
194 r = read_one_line_file(*vendor, &s);
195 if (r < 0) {
196 if (r == -ENOENT)
197 continue;
198
199 return r;
200 }
201
202 for (size_t i = 0; i < ELEMENTSOF(dmi_vendor_table); i++)
203 if (startswith(s, dmi_vendor_table[i].vendor)) {
204 log_debug("Virtualization %s found in DMI (%s)", s, *vendor);
205 return dmi_vendor_table[i].id;
206 }
207 }
208 log_debug("No virtualization found in DMI vendor table.");
209 return VIRTUALIZATION_NONE;
210 }
211
212 static int detect_vm_smbios(void) {
213 /* The SMBIOS BIOS Characteristics Extension Byte 2 (Section 2.1.2.2 of
214 * https://www.dmtf.org/sites/default/files/standards/documents/DSP0134_3.4.0.pdf), specifies that
215 * the 4th bit being set indicates a VM. The BIOS Characteristics table is exposed via the kernel in
216 * /sys/firmware/dmi/entries/0-0. Note that in the general case, this bit being unset should not
217 * imply that the system is running on bare-metal. For example, QEMU 3.1.0 (with or without KVM)
218 * with SeaBIOS does not set this bit. */
219 _cleanup_free_ char *s = NULL;
220 size_t readsize;
221 int r;
222
223 r = read_full_virtual_file("/sys/firmware/dmi/entries/0-0/raw", &s, &readsize);
224 if (r < 0) {
225 log_debug_errno(r, "Unable to read /sys/firmware/dmi/entries/0-0/raw, "
226 "using the virtualization information found in DMI vendor table, ignoring: %m");
227 return SMBIOS_VM_BIT_UNKNOWN;
228 }
229 if (readsize < 20 || s[1] < 20) {
230 /* The spec indicates that byte 1 contains the size of the table, 0x12 + the number of
231 * extension bytes. The data we're interested in is in extension byte 2, which would be at
232 * 0x13. If we didn't read that much data, or if the BIOS indicates that we don't have that
233 * much data, we don't infer anything from the SMBIOS. */
234 log_debug("Only read %zu bytes from /sys/firmware/dmi/entries/0-0/raw (expected 20). "
235 "Using the virtualization information found in DMI vendor table.", readsize);
236 return SMBIOS_VM_BIT_UNKNOWN;
237 }
238
239 uint8_t byte = (uint8_t) s[19];
240 if (byte & (1U<<4)) {
241 log_debug("DMI BIOS Extension table indicates virtualization.");
242 return SMBIOS_VM_BIT_SET;
243 }
244 log_debug("DMI BIOS Extension table does not indicate virtualization.");
245 return SMBIOS_VM_BIT_UNSET;
246 }
247 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch_lp64) */
248
249 static Virtualization detect_vm_dmi(void) {
250 #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch_lp64)
251
252 int r;
253 r = detect_vm_dmi_vendor();
254
255 /* The DMI vendor tables in /sys/class/dmi/id don't help us distinguish between Amazon EC2
256 * virtual machines and bare-metal instances, so we need to look at SMBIOS. */
257 if (r == VIRTUALIZATION_AMAZON) {
258 switch (detect_vm_smbios()) {
259 case SMBIOS_VM_BIT_SET:
260 return VIRTUALIZATION_AMAZON;
261 case SMBIOS_VM_BIT_UNSET:
262 return VIRTUALIZATION_NONE;
263 case SMBIOS_VM_BIT_UNKNOWN: {
264 /* The DMI information we are after is only accessible to the root user,
265 * so we fallback to using the product name which is less restricted
266 * to distinguish metal systems from virtualized instances */
267 _cleanup_free_ char *s = NULL;
268 const char *e;
269
270 r = read_full_virtual_file("/sys/class/dmi/id/product_name", &s, NULL);
271 /* In EC2, virtualized is much more common than metal, so if for some reason
272 * we fail to read the DMI data, assume we are virtualized. */
273 if (r < 0) {
274 log_debug_errno(r, "Can't read /sys/class/dmi/id/product_name,"
275 " assuming virtualized: %m");
276 return VIRTUALIZATION_AMAZON;
277 }
278 e = strstrafter(truncate_nl(s), ".metal");
279 if (e && IN_SET(*e, 0, '-')) {
280 log_debug("DMI product name has '.metal', assuming no virtualization");
281 return VIRTUALIZATION_NONE;
282 } else
283 return VIRTUALIZATION_AMAZON;
284 }
285 default:
286 assert_not_reached();
287 }
288 }
289
290 /* If we haven't identified a VM, but the firmware indicates that there is one, indicate as much. We
291 * have no further information about what it is. */
292 if (r == VIRTUALIZATION_NONE && detect_vm_smbios() == SMBIOS_VM_BIT_SET)
293 return VIRTUALIZATION_VM_OTHER;
294 return r;
295 #else
296 return VIRTUALIZATION_NONE;
297 #endif
298 }
299
300 #define XENFEAT_dom0 11 /* xen/include/public/features.h */
301 #define PATH_FEATURES "/sys/hypervisor/properties/features"
302 /* Returns -errno, or 0 for domU, or 1 for dom0 */
303 static int detect_vm_xen_dom0(void) {
304 _cleanup_free_ char *domcap = NULL;
305 int r;
306
307 r = read_one_line_file(PATH_FEATURES, &domcap);
308 if (r < 0 && r != -ENOENT)
309 return r;
310 if (r >= 0) {
311 unsigned long features;
312
313 /* Here, we need to use sscanf() instead of safe_atoul()
314 * as the string lacks the leading "0x". */
315 r = sscanf(domcap, "%lx", &features);
316 if (r == 1) {
317 r = !!(features & (1U << XENFEAT_dom0));
318 log_debug("Virtualization XEN, found %s with value %08lx, "
319 "XENFEAT_dom0 (indicating the 'hardware domain') is%s set.",
320 PATH_FEATURES, features, r ? "" : " not");
321 return r;
322 }
323 log_debug("Virtualization XEN, found %s, unhandled content '%s'",
324 PATH_FEATURES, domcap);
325 }
326
327 r = read_one_line_file("/proc/xen/capabilities", &domcap);
328 if (r == -ENOENT) {
329 log_debug("Virtualization XEN because /proc/xen/capabilities does not exist");
330 return 0;
331 }
332 if (r < 0)
333 return r;
334
335 for (const char *i = domcap;;) {
336 _cleanup_free_ char *cap = NULL;
337
338 r = extract_first_word(&i, &cap, ",", 0);
339 if (r < 0)
340 return r;
341 if (r == 0) {
342 log_debug("Virtualization XEN DomU found (/proc/xen/capabilities)");
343 return 0;
344 }
345
346 if (streq(cap, "control_d")) {
347 log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
348 return 1;
349 }
350 }
351 }
352
353 static Virtualization detect_vm_xen(void) {
354 /* The presence of /proc/xen indicates some form of a Xen domain
355 The check for Dom0 is handled outside this function */
356 if (access("/proc/xen", F_OK) < 0) {
357 log_debug("Virtualization XEN not found, /proc/xen does not exist");
358 return VIRTUALIZATION_NONE;
359 }
360 log_debug("Virtualization XEN found (/proc/xen exists)");
361 return VIRTUALIZATION_XEN;
362 }
363
364 static Virtualization detect_vm_hypervisor(void) {
365 _cleanup_free_ char *hvtype = NULL;
366 int r;
367
368 r = read_one_line_file("/sys/hypervisor/type", &hvtype);
369 if (r == -ENOENT)
370 return VIRTUALIZATION_NONE;
371 if (r < 0)
372 return r;
373
374 log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
375
376 if (streq(hvtype, "xen"))
377 return VIRTUALIZATION_XEN;
378 else
379 return VIRTUALIZATION_VM_OTHER;
380 }
381
382 static Virtualization detect_vm_uml(void) {
383 _cleanup_fclose_ FILE *f = NULL;
384 int r;
385
386 /* Detect User-Mode Linux by reading /proc/cpuinfo */
387 f = fopen("/proc/cpuinfo", "re");
388 if (!f) {
389 if (errno == ENOENT) {
390 log_debug("/proc/cpuinfo not found, assuming no UML virtualization.");
391 return VIRTUALIZATION_NONE;
392 }
393 return -errno;
394 }
395
396 for (;;) {
397 _cleanup_free_ char *line = NULL;
398 const char *t;
399
400 r = read_line(f, LONG_LINE_MAX, &line);
401 if (r < 0)
402 return r;
403 if (r == 0)
404 break;
405
406 t = startswith(line, "vendor_id\t: ");
407 if (t) {
408 if (startswith(t, "User Mode Linux")) {
409 log_debug("UML virtualization found in /proc/cpuinfo");
410 return VIRTUALIZATION_UML;
411 }
412
413 break;
414 }
415 }
416
417 log_debug("UML virtualization not found in /proc/cpuinfo.");
418 return VIRTUALIZATION_NONE;
419 }
420
421 static Virtualization detect_vm_zvm(void) {
422
423 #if defined(__s390__)
424 _cleanup_free_ char *t = NULL;
425 int r;
426
427 r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
428 if (r == -ENOENT)
429 return VIRTUALIZATION_NONE;
430 if (r < 0)
431 return r;
432
433 log_debug("Virtualization %s found in /proc/sysinfo", t);
434 if (streq(t, "z/VM"))
435 return VIRTUALIZATION_ZVM;
436 else
437 return VIRTUALIZATION_KVM;
438 #else
439 log_debug("This platform does not support /proc/sysinfo");
440 return VIRTUALIZATION_NONE;
441 #endif
442 }
443
444 /* Returns a short identifier for the various VM implementations */
445 Virtualization detect_vm(void) {
446 static thread_local Virtualization cached_found = _VIRTUALIZATION_INVALID;
447 bool other = false;
448 int xen_dom0 = 0;
449 Virtualization v, dmi;
450
451 if (cached_found >= 0)
452 return cached_found;
453
454 /* We have to use the correct order here:
455 *
456 * → First, try to detect Oracle Virtualbox, Amazon EC2 Nitro, and Parallels, even if they use KVM,
457 * as well as Xen even if it cloaks as Microsoft Hyper-V. Attempt to detect uml at this stage also
458 * since it runs as a user-process nested inside other VMs. Also check for Xen now, because Xen PV
459 * mode does not override CPUID when nested inside another hypervisor.
460 *
461 * → Second, try to detect from CPUID, this will report KVM for whatever software is used even if
462 * info in DMI is overwritten.
463 *
464 * → Third, try to detect from DMI. */
465
466 dmi = detect_vm_dmi();
467 if (IN_SET(dmi,
468 VIRTUALIZATION_ORACLE,
469 VIRTUALIZATION_XEN,
470 VIRTUALIZATION_AMAZON,
471 VIRTUALIZATION_PARALLELS)) {
472 v = dmi;
473 goto finish;
474 }
475
476 /* Detect UML */
477 v = detect_vm_uml();
478 if (v < 0)
479 return v;
480 if (v != VIRTUALIZATION_NONE)
481 goto finish;
482
483 /* Detect Xen */
484 v = detect_vm_xen();
485 if (v < 0)
486 return v;
487 if (v == VIRTUALIZATION_XEN) {
488 /* If we are Dom0, then we expect to not report as a VM. However, as we might be nested
489 * inside another hypervisor which can be detected via the CPUID check, wait to report this
490 * until after the CPUID check. */
491 xen_dom0 = detect_vm_xen_dom0();
492 if (xen_dom0 < 0)
493 return xen_dom0;
494 if (xen_dom0 == 0)
495 goto finish;
496 } else if (v != VIRTUALIZATION_NONE)
497 assert_not_reached();
498
499 /* Detect from CPUID */
500 v = detect_vm_cpuid();
501 if (v < 0)
502 return v;
503 if (v == VIRTUALIZATION_VM_OTHER)
504 other = true;
505 else if (v != VIRTUALIZATION_NONE)
506 goto finish;
507
508 /* If we are in Dom0 and have not yet finished, finish with the result of detect_vm_cpuid */
509 if (xen_dom0 > 0)
510 goto finish;
511
512 /* Now, let's get back to DMI */
513 if (dmi < 0)
514 return dmi;
515 if (dmi == VIRTUALIZATION_VM_OTHER)
516 other = true;
517 else if (dmi != VIRTUALIZATION_NONE) {
518 v = dmi;
519 goto finish;
520 }
521
522 /* Check high-level hypervisor sysfs file */
523 v = detect_vm_hypervisor();
524 if (v < 0)
525 return v;
526 if (v == VIRTUALIZATION_VM_OTHER)
527 other = true;
528 else if (v != VIRTUALIZATION_NONE)
529 goto finish;
530
531 v = detect_vm_device_tree();
532 if (v < 0)
533 return v;
534 if (v == VIRTUALIZATION_VM_OTHER)
535 other = true;
536 else if (v != VIRTUALIZATION_NONE)
537 goto finish;
538
539 v = detect_vm_zvm();
540 if (v < 0)
541 return v;
542
543 finish:
544 if (v == VIRTUALIZATION_NONE && other)
545 v = VIRTUALIZATION_VM_OTHER;
546
547 cached_found = v;
548 log_debug("Found VM virtualization %s", virtualization_to_string(v));
549 return v;
550 }
551
552 static const char *const container_table[_VIRTUALIZATION_MAX] = {
553 [VIRTUALIZATION_LXC] = "lxc",
554 [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
555 [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
556 [VIRTUALIZATION_DOCKER] = "docker",
557 [VIRTUALIZATION_PODMAN] = "podman",
558 [VIRTUALIZATION_RKT] = "rkt",
559 [VIRTUALIZATION_WSL] = "wsl",
560 [VIRTUALIZATION_PROOT] = "proot",
561 [VIRTUALIZATION_POUCH] = "pouch",
562 };
563
564 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(container, int);
565
566 static int running_in_cgroupns(void) {
567 int r;
568
569 if (!cg_ns_supported())
570 return false;
571
572 r = cg_all_unified();
573 if (r < 0)
574 return r;
575
576 if (r) {
577 /* cgroup v2 */
578
579 r = access("/sys/fs/cgroup/cgroup.events", F_OK);
580 if (r < 0) {
581 if (errno != ENOENT)
582 return -errno;
583 /* All kernel versions have cgroup.events in nested cgroups. */
584 return false;
585 }
586
587 /* There's no cgroup.type in the root cgroup, and future kernel versions
588 * are unlikely to add it since cgroup.type is something that makes no sense
589 * whatsoever in the root cgroup. */
590 r = access("/sys/fs/cgroup/cgroup.type", F_OK);
591 if (r == 0)
592 return true;
593 if (r < 0 && errno != ENOENT)
594 return -errno;
595
596 /* On older kernel versions, there's no cgroup.type */
597 r = access("/sys/kernel/cgroup/features", F_OK);
598 if (r < 0) {
599 if (errno != ENOENT)
600 return -errno;
601 /* This is an old kernel that we know for sure has cgroup.events
602 * only in nested cgroups. */
603 return true;
604 }
605
606 /* This is a recent kernel, and cgroup.type doesn't exist, so we must be
607 * in the root cgroup. */
608 return false;
609 } else {
610 /* cgroup v1 */
611
612 /* If systemd controller is not mounted, do not even bother. */
613 r = access("/sys/fs/cgroup/systemd", F_OK);
614 if (r < 0) {
615 if (errno != ENOENT)
616 return -errno;
617 return false;
618 }
619
620 /* release_agent only exists in the root cgroup. */
621 r = access("/sys/fs/cgroup/systemd/release_agent", F_OK);
622 if (r < 0) {
623 if (errno != ENOENT)
624 return -errno;
625 return true;
626 }
627
628 return false;
629 }
630 }
631
632 static Virtualization detect_container_files(void) {
633 static const struct {
634 const char *file_path;
635 Virtualization id;
636 } container_file_table[] = {
637 /* https://github.com/containers/podman/issues/6192 */
638 /* https://github.com/containers/podman/issues/3586#issuecomment-661918679 */
639 { "/run/.containerenv", VIRTUALIZATION_PODMAN },
640 /* https://github.com/moby/moby/issues/18355 */
641 /* Docker must be the last in this table, see below. */
642 { "/.dockerenv", VIRTUALIZATION_DOCKER },
643 };
644
645 for (size_t i = 0; i < ELEMENTSOF(container_file_table); i++) {
646 if (access(container_file_table[i].file_path, F_OK) >= 0)
647 return container_file_table[i].id;
648
649 if (errno != ENOENT)
650 log_debug_errno(errno,
651 "Checking if %s exists failed, ignoring: %m",
652 container_file_table[i].file_path);
653 }
654
655 return VIRTUALIZATION_NONE;
656 }
657
658 Virtualization detect_container(void) {
659 static thread_local Virtualization cached_found = _VIRTUALIZATION_INVALID;
660 _cleanup_free_ char *m = NULL, *o = NULL, *p = NULL;
661 const char *e = NULL;
662 Virtualization v;
663 int r;
664
665 if (cached_found >= 0)
666 return cached_found;
667
668 /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
669 if (access("/proc/vz", F_OK) < 0) {
670 if (errno != ENOENT)
671 log_debug_errno(errno, "Failed to check if /proc/vz exists, ignoring: %m");
672 } else if (access("/proc/bc", F_OK) < 0) {
673 if (errno == ENOENT) {
674 v = VIRTUALIZATION_OPENVZ;
675 goto finish;
676 }
677
678 log_debug_errno(errno, "Failed to check if /proc/bc exists, ignoring: %m");
679 }
680
681 /* "Official" way of detecting WSL https://github.com/Microsoft/WSL/issues/423#issuecomment-221627364 */
682 r = read_one_line_file("/proc/sys/kernel/osrelease", &o);
683 if (r < 0)
684 log_debug_errno(r, "Failed to read /proc/sys/kernel/osrelease, ignoring: %m");
685 else if (strstr(o, "Microsoft") || strstr(o, "WSL")) {
686 v = VIRTUALIZATION_WSL;
687 goto finish;
688 }
689
690 /* proot doesn't use PID namespacing, so we can just check if we have a matching tracer for this
691 * invocation without worrying about it being elsewhere.
692 */
693 r = get_proc_field("/proc/self/status", "TracerPid", WHITESPACE, &p);
694 if (r < 0)
695 log_debug_errno(r, "Failed to read our own trace PID, ignoring: %m");
696 else if (!streq(p, "0")) {
697 pid_t ptrace_pid;
698
699 r = parse_pid(p, &ptrace_pid);
700 if (r < 0)
701 log_debug_errno(r, "Failed to parse our own tracer PID, ignoring: %m");
702 else {
703 _cleanup_free_ char *ptrace_comm = NULL;
704 const char *pf;
705
706 pf = procfs_file_alloca(ptrace_pid, "comm");
707 r = read_one_line_file(pf, &ptrace_comm);
708 if (r < 0)
709 log_debug_errno(r, "Failed to read %s, ignoring: %m", pf);
710 else if (startswith(ptrace_comm, "proot")) {
711 v = VIRTUALIZATION_PROOT;
712 goto finish;
713 }
714 }
715 }
716
717 /* The container manager might have placed this in the /run/host/ hierarchy for us, which is best
718 * because we can be consumed just like that, without special privileges. */
719 r = read_one_line_file("/run/host/container-manager", &m);
720 if (r > 0) {
721 e = m;
722 goto translate_name;
723 }
724 if (!IN_SET(r, -ENOENT, 0))
725 return log_debug_errno(r, "Failed to read /run/host/container-manager: %m");
726
727 if (getpid_cached() == 1) {
728 /* If we are PID 1 we can just check our own environment variable, and that's authoritative.
729 * We distinguish three cases:
730 * - the variable is not defined → we jump to other checks
731 * - the variable is defined to an empty value → we are not in a container
732 * - anything else → some container, either one of the known ones or "container-other"
733 */
734 e = getenv("container");
735 if (!e)
736 goto check_files;
737 if (isempty(e)) {
738 v = VIRTUALIZATION_NONE;
739 goto finish;
740 }
741
742 goto translate_name;
743 }
744
745 /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
746 * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
747 r = read_one_line_file("/run/systemd/container", &m);
748 if (r > 0) {
749 e = m;
750 goto translate_name;
751 }
752 if (!IN_SET(r, -ENOENT, 0))
753 return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
754
755 /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
756 r = getenv_for_pid(1, "container", &m);
757 if (r > 0) {
758 e = m;
759 goto translate_name;
760 }
761 if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
762 log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
763
764 check_files:
765 /* Check for existence of some well-known files. We only do this after checking
766 * for other specific container managers, otherwise we risk mistaking another
767 * container manager for Docker: the /.dockerenv file could inadvertently end up
768 * in a file system image. */
769 v = detect_container_files();
770 if (v < 0)
771 return v;
772 if (v != VIRTUALIZATION_NONE)
773 goto finish;
774
775 r = running_in_cgroupns();
776 if (r > 0) {
777 v = VIRTUALIZATION_CONTAINER_OTHER;
778 goto finish;
779 }
780 if (r < 0)
781 log_debug_errno(r, "Failed to detect cgroup namespace: %m");
782
783 /* If none of that worked, give up, assume no container manager. */
784 v = VIRTUALIZATION_NONE;
785 goto finish;
786
787 translate_name:
788 if (streq(e, "oci")) {
789 /* Some images hardcode container=oci, but OCI is not a specific container manager.
790 * Try to detect one based on well-known files. */
791 v = detect_container_files();
792 if (v == VIRTUALIZATION_NONE)
793 v = VIRTUALIZATION_CONTAINER_OTHER;
794 goto finish;
795 }
796 v = container_from_string(e);
797 if (v < 0)
798 v = VIRTUALIZATION_CONTAINER_OTHER;
799
800 finish:
801 log_debug("Found container virtualization %s.", virtualization_to_string(v));
802 cached_found = v;
803 return v;
804 }
805
806 Virtualization detect_virtualization(void) {
807 int v;
808
809 v = detect_container();
810 if (v != VIRTUALIZATION_NONE)
811 return v;
812
813 return detect_vm();
814 }
815
816 static int userns_has_mapping(const char *name) {
817 _cleanup_fclose_ FILE *f = NULL;
818 uid_t base, shift, range;
819 int r;
820
821 f = fopen(name, "re");
822 if (!f) {
823 log_debug_errno(errno, "Failed to open %s: %m", name);
824 return errno == ENOENT ? false : -errno;
825 }
826
827 r = uid_map_read_one(f, &base, &shift, &range);
828 if (r == -ENOMSG) {
829 log_debug("%s is empty, we're in an uninitialized user namespace.", name);
830 return true;
831 }
832 if (r < 0)
833 return log_debug_errno(r, "Failed to read %s: %m", name);
834
835 if (base == 0 && shift == 0 && range == UINT32_MAX) {
836 /* The kernel calls mappings_overlap() and does not allow overlaps */
837 log_debug("%s has a full 1:1 mapping", name);
838 return false;
839 }
840
841 /* Anything else implies that we are in a user namespace */
842 log_debug("Mapping found in %s, we're in a user namespace.", name);
843 return true;
844 }
845
846 int running_in_userns(void) {
847 _cleanup_free_ char *line = NULL;
848 int r;
849
850 r = userns_has_mapping("/proc/self/uid_map");
851 if (r != 0)
852 return r;
853
854 r = userns_has_mapping("/proc/self/gid_map");
855 if (r != 0)
856 return r;
857
858 /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also possible to compile a
859 * kernel without CONFIG_USER_NS, in which case "setgroups" also does not exist. We cannot
860 * distinguish those two cases, so assume that we're running on a stripped-down recent kernel, rather
861 * than on an old one, and if the file is not found, return false. */
862 r = read_virtual_file("/proc/self/setgroups", SIZE_MAX, &line, NULL);
863 if (r < 0) {
864 log_debug_errno(r, "/proc/self/setgroups: %m");
865 return r == -ENOENT ? false : r;
866 }
867
868 strstrip(line); /* remove trailing newline */
869
870 r = streq(line, "deny");
871 /* See user_namespaces(7) for a description of this "setgroups" contents. */
872 log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
873 return r;
874 }
875
876 int running_in_chroot(void) {
877 int r;
878
879 /* If we're PID1, /proc may not be mounted (and most likely we're not in a chroot). But PID1 will
880 * mount /proc, so all other programs can assume that if /proc is *not* available, we're in some
881 * chroot. */
882
883 if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
884 return 0;
885
886 r = inode_same("/proc/1/root", "/", 0);
887 if (r == -ENOENT) {
888 r = proc_mounted();
889 if (r == 0) {
890 if (getpid_cached() == 1)
891 return false; /* We will mount /proc, assuming we're not in a chroot. */
892
893 log_debug("/proc is not mounted, assuming we're in a chroot.");
894 return true;
895 }
896 if (r > 0) /* If we have fake /proc/, we can't do the check properly. */
897 return -ENOSYS;
898 }
899 if (r < 0)
900 return r;
901
902 return r == 0;
903 }
904
905 #if defined(__i386__) || defined(__x86_64__)
906 struct cpuid_table_entry {
907 uint32_t flag_bit;
908 const char *name;
909 };
910
911 static const struct cpuid_table_entry leaf1_edx[] = {
912 { 0, "fpu" },
913 { 1, "vme" },
914 { 2, "de" },
915 { 3, "pse" },
916 { 4, "tsc" },
917 { 5, "msr" },
918 { 6, "pae" },
919 { 7, "mce" },
920 { 8, "cx8" },
921 { 9, "apic" },
922 { 11, "sep" },
923 { 12, "mtrr" },
924 { 13, "pge" },
925 { 14, "mca" },
926 { 15, "cmov" },
927 { 16, "pat" },
928 { 17, "pse36" },
929 { 19, "clflush" },
930 { 23, "mmx" },
931 { 24, "fxsr" },
932 { 25, "sse" },
933 { 26, "sse2" },
934 { 28, "ht" },
935 };
936
937 static const struct cpuid_table_entry leaf1_ecx[] = {
938 { 0, "pni" },
939 { 1, "pclmul" },
940 { 3, "monitor" },
941 { 9, "ssse3" },
942 { 12, "fma3" },
943 { 13, "cx16" },
944 { 19, "sse4_1" },
945 { 20, "sse4_2" },
946 { 22, "movbe" },
947 { 23, "popcnt" },
948 { 25, "aes" },
949 { 26, "xsave" },
950 { 27, "osxsave" },
951 { 28, "avx" },
952 { 29, "f16c" },
953 { 30, "rdrand" },
954 };
955
956 static const struct cpuid_table_entry leaf7_ebx[] = {
957 { 3, "bmi1" },
958 { 5, "avx2" },
959 { 8, "bmi2" },
960 { 18, "rdseed" },
961 { 19, "adx" },
962 { 29, "sha_ni" },
963 };
964
965 static const struct cpuid_table_entry leaf81_edx[] = {
966 { 11, "syscall" },
967 { 27, "rdtscp" },
968 { 29, "lm" },
969 };
970
971 static const struct cpuid_table_entry leaf81_ecx[] = {
972 { 0, "lahf_lm" },
973 { 5, "abm" },
974 };
975
976 static const struct cpuid_table_entry leaf87_edx[] = {
977 { 8, "constant_tsc" },
978 };
979
980 static bool given_flag_in_set(const char *flag, const struct cpuid_table_entry *set, size_t set_size, uint32_t val) {
981 for (size_t i = 0; i < set_size; i++) {
982 if ((UINT32_C(1) << set[i].flag_bit) & val &&
983 streq(flag, set[i].name))
984 return true;
985 }
986 return false;
987 }
988
989 static bool real_has_cpu_with_flag(const char *flag) {
990 uint32_t eax, ebx, ecx, edx;
991
992 if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
993 if (given_flag_in_set(flag, leaf1_ecx, ELEMENTSOF(leaf1_ecx), ecx))
994 return true;
995
996 if (given_flag_in_set(flag, leaf1_edx, ELEMENTSOF(leaf1_edx), edx))
997 return true;
998 }
999
1000 if (__get_cpuid(7, &eax, &ebx, &ecx, &edx)) {
1001 if (given_flag_in_set(flag, leaf7_ebx, ELEMENTSOF(leaf7_ebx), ebx))
1002 return true;
1003 }
1004
1005 if (__get_cpuid(0x80000001U, &eax, &ebx, &ecx, &edx)) {
1006 if (given_flag_in_set(flag, leaf81_ecx, ELEMENTSOF(leaf81_ecx), ecx))
1007 return true;
1008
1009 if (given_flag_in_set(flag, leaf81_edx, ELEMENTSOF(leaf81_edx), edx))
1010 return true;
1011 }
1012
1013 if (__get_cpuid(0x80000007U, &eax, &ebx, &ecx, &edx))
1014 if (given_flag_in_set(flag, leaf87_edx, ELEMENTSOF(leaf87_edx), edx))
1015 return true;
1016
1017 return false;
1018 }
1019 #endif
1020
1021 bool has_cpu_with_flag(const char *flag) {
1022 /* CPUID is an x86 specific interface. Assume on all others that no CPUs have those flags. */
1023 #if defined(__i386__) || defined(__x86_64__)
1024 return real_has_cpu_with_flag(flag);
1025 #else
1026 return false;
1027 #endif
1028 }
1029
1030 static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
1031 [VIRTUALIZATION_NONE] = "none",
1032 [VIRTUALIZATION_KVM] = "kvm",
1033 [VIRTUALIZATION_AMAZON] = "amazon",
1034 [VIRTUALIZATION_QEMU] = "qemu",
1035 [VIRTUALIZATION_BOCHS] = "bochs",
1036 [VIRTUALIZATION_XEN] = "xen",
1037 [VIRTUALIZATION_UML] = "uml",
1038 [VIRTUALIZATION_VMWARE] = "vmware",
1039 [VIRTUALIZATION_ORACLE] = "oracle",
1040 [VIRTUALIZATION_MICROSOFT] = "microsoft",
1041 [VIRTUALIZATION_ZVM] = "zvm",
1042 [VIRTUALIZATION_PARALLELS] = "parallels",
1043 [VIRTUALIZATION_BHYVE] = "bhyve",
1044 [VIRTUALIZATION_QNX] = "qnx",
1045 [VIRTUALIZATION_ACRN] = "acrn",
1046 [VIRTUALIZATION_POWERVM] = "powervm",
1047 [VIRTUALIZATION_APPLE] = "apple",
1048 [VIRTUALIZATION_SRE] = "sre",
1049 [VIRTUALIZATION_VM_OTHER] = "vm-other",
1050
1051 [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
1052 [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
1053 [VIRTUALIZATION_LXC] = "lxc",
1054 [VIRTUALIZATION_OPENVZ] = "openvz",
1055 [VIRTUALIZATION_DOCKER] = "docker",
1056 [VIRTUALIZATION_PODMAN] = "podman",
1057 [VIRTUALIZATION_RKT] = "rkt",
1058 [VIRTUALIZATION_WSL] = "wsl",
1059 [VIRTUALIZATION_PROOT] = "proot",
1060 [VIRTUALIZATION_POUCH] = "pouch",
1061 [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
1062 };
1063
1064 DEFINE_STRING_TABLE_LOOKUP(virtualization, Virtualization);