]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/virt.c
detect-virt: fix Google Compute Engine support
[thirdparty/systemd.git] / src / basic / virt.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #if defined(__i386__) || defined(__x86_64__)
4 #include <cpuid.h>
5 #endif
6 #include <errno.h>
7 #include <stdint.h>
8 #include <stdlib.h>
9 #include <unistd.h>
10
11 #include "alloc-util.h"
12 #include "cgroup-util.h"
13 #include "dirent-util.h"
14 #include "env-util.h"
15 #include "errno-util.h"
16 #include "fd-util.h"
17 #include "fileio.h"
18 #include "macro.h"
19 #include "missing_threads.h"
20 #include "process-util.h"
21 #include "stat-util.h"
22 #include "string-table.h"
23 #include "string-util.h"
24 #include "uid-range.h"
25 #include "virt.h"
26
27 enum {
28 SMBIOS_VM_BIT_SET,
29 SMBIOS_VM_BIT_UNSET,
30 SMBIOS_VM_BIT_UNKNOWN,
31 };
32
33 static Virtualization detect_vm_cpuid(void) {
34
35 /* CPUID is an x86 specific interface. */
36 #if defined(__i386__) || defined(__x86_64__)
37
38 static const struct {
39 const char sig[13];
40 Virtualization id;
41 } vm_table[] = {
42 { "XenVMMXenVMM", VIRTUALIZATION_XEN },
43 { "KVMKVMKVM", VIRTUALIZATION_KVM }, /* qemu with KVM */
44 { "Linux KVM Hv", VIRTUALIZATION_KVM }, /* qemu with KVM + HyperV Enlightenments */
45 { "TCGTCGTCGTCG", VIRTUALIZATION_QEMU }, /* qemu without KVM */
46 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
47 { "VMwareVMware", VIRTUALIZATION_VMWARE },
48 /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
49 { "Microsoft Hv", VIRTUALIZATION_MICROSOFT },
50 /* https://wiki.freebsd.org/bhyve */
51 { "bhyve bhyve ", VIRTUALIZATION_BHYVE },
52 { "QNXQVMBSQG", VIRTUALIZATION_QNX },
53 /* https://projectacrn.org */
54 { "ACRNACRNACRN", VIRTUALIZATION_ACRN },
55 /* https://www.lockheedmartin.com/en-us/products/Hardened-Security-for-Intel-Processors.html */
56 { "SRESRESRESRE", VIRTUALIZATION_SRE },
57 { "Apple VZ", VIRTUALIZATION_APPLE },
58 };
59
60 uint32_t eax, ebx, ecx, edx;
61 bool hypervisor;
62
63 /* http://lwn.net/Articles/301888/ */
64
65 /* First detect whether there is a hypervisor */
66 if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0)
67 return VIRTUALIZATION_NONE;
68
69 hypervisor = ecx & 0x80000000U;
70
71 if (hypervisor) {
72 union {
73 uint32_t sig32[3];
74 char text[13];
75 } sig = {};
76
77 /* There is a hypervisor, see what it is */
78 __cpuid(0x40000000U, eax, ebx, ecx, edx);
79
80 sig.sig32[0] = ebx;
81 sig.sig32[1] = ecx;
82 sig.sig32[2] = edx;
83
84 log_debug("Virtualization found, CPUID=%s", sig.text);
85
86 for (size_t i = 0; i < ELEMENTSOF(vm_table); i++)
87 if (memcmp_nn(sig.text, sizeof(sig.text),
88 vm_table[i].sig, sizeof(vm_table[i].sig)) == 0)
89 return vm_table[i].id;
90
91 log_debug("Unknown virtualization with CPUID=%s. Add to vm_table[]?", sig.text);
92 return VIRTUALIZATION_VM_OTHER;
93 }
94 #endif
95 log_debug("No virtualization found in CPUID");
96
97 return VIRTUALIZATION_NONE;
98 }
99
100 static Virtualization detect_vm_device_tree(void) {
101 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
102 _cleanup_free_ char *hvtype = NULL;
103 int r;
104
105 r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
106 if (r == -ENOENT) {
107 _cleanup_closedir_ DIR *dir = NULL;
108 _cleanup_free_ char *compat = NULL;
109
110 if (access("/proc/device-tree/ibm,partition-name", F_OK) == 0 &&
111 access("/proc/device-tree/hmc-managed?", F_OK) == 0 &&
112 access("/proc/device-tree/chosen/qemu,graphic-width", F_OK) != 0)
113 return VIRTUALIZATION_POWERVM;
114
115 dir = opendir("/proc/device-tree");
116 if (!dir) {
117 if (errno == ENOENT) {
118 log_debug_errno(errno, "/proc/device-tree: %m");
119 return VIRTUALIZATION_NONE;
120 }
121 return -errno;
122 }
123
124 FOREACH_DIRENT(de, dir, return -errno)
125 if (strstr(de->d_name, "fw-cfg")) {
126 log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", de->d_name);
127 return VIRTUALIZATION_QEMU;
128 }
129
130 r = read_one_line_file("/proc/device-tree/compatible", &compat);
131 if (r < 0 && r != -ENOENT)
132 return r;
133 if (r >= 0 && streq(compat, "qemu,pseries")) {
134 log_debug("Virtualization %s found in /proc/device-tree/compatible", compat);
135 return VIRTUALIZATION_QEMU;
136 }
137
138 log_debug("No virtualization found in /proc/device-tree/*");
139 return VIRTUALIZATION_NONE;
140 } else if (r < 0)
141 return r;
142
143 log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
144 if (streq(hvtype, "linux,kvm"))
145 return VIRTUALIZATION_KVM;
146 else if (strstr(hvtype, "xen"))
147 return VIRTUALIZATION_XEN;
148 else if (strstr(hvtype, "vmware"))
149 return VIRTUALIZATION_VMWARE;
150 else
151 return VIRTUALIZATION_VM_OTHER;
152 #else
153 log_debug("This platform does not support /proc/device-tree");
154 return VIRTUALIZATION_NONE;
155 #endif
156 }
157
158 #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch_lp64)
159 static Virtualization detect_vm_dmi_vendor(void) {
160 static const char* const dmi_vendors[] = {
161 "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
162 "/sys/class/dmi/id/sys_vendor",
163 "/sys/class/dmi/id/board_vendor",
164 "/sys/class/dmi/id/bios_vendor",
165 "/sys/class/dmi/id/product_version", /* For Hyper-V VMs test */
166 NULL
167 };
168
169 static const struct {
170 const char *vendor;
171 Virtualization id;
172 } dmi_vendor_table[] = {
173 { "KVM", VIRTUALIZATION_KVM },
174 { "OpenStack", VIRTUALIZATION_KVM }, /* Detect OpenStack instance as KVM in non x86 architecture */
175 { "KubeVirt", VIRTUALIZATION_KVM }, /* Detect KubeVirt instance as KVM in non x86 architecture */
176 { "Amazon EC2", VIRTUALIZATION_AMAZON },
177 { "QEMU", VIRTUALIZATION_QEMU },
178 { "VMware", VIRTUALIZATION_VMWARE }, /* https://kb.vmware.com/s/article/1009458 */
179 { "VMW", VIRTUALIZATION_VMWARE },
180 { "innotek GmbH", VIRTUALIZATION_ORACLE },
181 { "VirtualBox", VIRTUALIZATION_ORACLE },
182 { "Xen", VIRTUALIZATION_XEN },
183 { "Bochs", VIRTUALIZATION_BOCHS },
184 { "Parallels", VIRTUALIZATION_PARALLELS },
185 /* https://wiki.freebsd.org/bhyve */
186 { "BHYVE", VIRTUALIZATION_BHYVE },
187 { "Hyper-V", VIRTUALIZATION_MICROSOFT },
188 { "Apple Virtualization", VIRTUALIZATION_APPLE },
189 { "Google Compute Engine", VIRTUALIZATION_GOOGLE }, /* https://cloud.google.com/run/docs/container-contract#sandbox */
190 };
191 int r;
192
193 STRV_FOREACH(vendor, dmi_vendors) {
194 _cleanup_free_ char *s = NULL;
195
196 r = read_one_line_file(*vendor, &s);
197 if (r < 0) {
198 if (r == -ENOENT)
199 continue;
200
201 return r;
202 }
203
204 for (size_t i = 0; i < ELEMENTSOF(dmi_vendor_table); i++)
205 if (startswith(s, dmi_vendor_table[i].vendor)) {
206 log_debug("Virtualization %s found in DMI (%s)", s, *vendor);
207 return dmi_vendor_table[i].id;
208 }
209 }
210 log_debug("No virtualization found in DMI vendor table.");
211 return VIRTUALIZATION_NONE;
212 }
213
214 static int detect_vm_smbios(void) {
215 /* The SMBIOS BIOS Characteristics Extension Byte 2 (Section 2.1.2.2 of
216 * https://www.dmtf.org/sites/default/files/standards/documents/DSP0134_3.4.0.pdf), specifies that
217 * the 4th bit being set indicates a VM. The BIOS Characteristics table is exposed via the kernel in
218 * /sys/firmware/dmi/entries/0-0. Note that in the general case, this bit being unset should not
219 * imply that the system is running on bare-metal. For example, QEMU 3.1.0 (with or without KVM)
220 * with SeaBIOS does not set this bit. */
221 _cleanup_free_ char *s = NULL;
222 size_t readsize;
223 int r;
224
225 r = read_full_virtual_file("/sys/firmware/dmi/entries/0-0/raw", &s, &readsize);
226 if (r < 0) {
227 log_debug_errno(r, "Unable to read /sys/firmware/dmi/entries/0-0/raw, "
228 "using the virtualization information found in DMI vendor table, ignoring: %m");
229 return SMBIOS_VM_BIT_UNKNOWN;
230 }
231 if (readsize < 20 || s[1] < 20) {
232 /* The spec indicates that byte 1 contains the size of the table, 0x12 + the number of
233 * extension bytes. The data we're interested in is in extension byte 2, which would be at
234 * 0x13. If we didn't read that much data, or if the BIOS indicates that we don't have that
235 * much data, we don't infer anything from the SMBIOS. */
236 log_debug("Only read %zu bytes from /sys/firmware/dmi/entries/0-0/raw (expected 20). "
237 "Using the virtualization information found in DMI vendor table.", readsize);
238 return SMBIOS_VM_BIT_UNKNOWN;
239 }
240
241 uint8_t byte = (uint8_t) s[19];
242 if (byte & (1U<<4)) {
243 log_debug("DMI BIOS Extension table indicates virtualization.");
244 return SMBIOS_VM_BIT_SET;
245 }
246 log_debug("DMI BIOS Extension table does not indicate virtualization.");
247 return SMBIOS_VM_BIT_UNSET;
248 }
249 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch_lp64) */
250
251 static Virtualization detect_vm_dmi(void) {
252 #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch_lp64)
253
254 int r;
255 r = detect_vm_dmi_vendor();
256
257 /* The DMI vendor tables in /sys/class/dmi/id don't help us distinguish between Amazon EC2
258 * virtual machines and bare-metal instances, so we need to look at SMBIOS. */
259 if (r == VIRTUALIZATION_AMAZON) {
260 switch (detect_vm_smbios()) {
261 case SMBIOS_VM_BIT_SET:
262 return VIRTUALIZATION_AMAZON;
263 case SMBIOS_VM_BIT_UNSET:
264 return VIRTUALIZATION_NONE;
265 case SMBIOS_VM_BIT_UNKNOWN: {
266 /* The DMI information we are after is only accessible to the root user,
267 * so we fallback to using the product name which is less restricted
268 * to distinguish metal systems from virtualized instances */
269 _cleanup_free_ char *s = NULL;
270 const char *e;
271
272 r = read_full_virtual_file("/sys/class/dmi/id/product_name", &s, NULL);
273 /* In EC2, virtualized is much more common than metal, so if for some reason
274 * we fail to read the DMI data, assume we are virtualized. */
275 if (r < 0) {
276 log_debug_errno(r, "Can't read /sys/class/dmi/id/product_name,"
277 " assuming virtualized: %m");
278 return VIRTUALIZATION_AMAZON;
279 }
280 e = strstrafter(truncate_nl(s), ".metal");
281 if (e && IN_SET(*e, 0, '-')) {
282 log_debug("DMI product name has '.metal', assuming no virtualization");
283 return VIRTUALIZATION_NONE;
284 } else
285 return VIRTUALIZATION_AMAZON;
286 }
287 default:
288 assert_not_reached();
289 }
290 }
291
292 /* If we haven't identified a VM, but the firmware indicates that there is one, indicate as much. We
293 * have no further information about what it is. */
294 if (r == VIRTUALIZATION_NONE && detect_vm_smbios() == SMBIOS_VM_BIT_SET)
295 return VIRTUALIZATION_VM_OTHER;
296 return r;
297 #else
298 return VIRTUALIZATION_NONE;
299 #endif
300 }
301
302 #define XENFEAT_dom0 11 /* xen/include/public/features.h */
303 #define PATH_FEATURES "/sys/hypervisor/properties/features"
304 /* Returns -errno, or 0 for domU, or 1 for dom0 */
305 static int detect_vm_xen_dom0(void) {
306 _cleanup_free_ char *domcap = NULL;
307 int r;
308
309 r = read_one_line_file(PATH_FEATURES, &domcap);
310 if (r < 0 && r != -ENOENT)
311 return r;
312 if (r >= 0) {
313 unsigned long features;
314
315 /* Here, we need to use sscanf() instead of safe_atoul()
316 * as the string lacks the leading "0x". */
317 r = sscanf(domcap, "%lx", &features);
318 if (r == 1) {
319 r = !!(features & (1U << XENFEAT_dom0));
320 log_debug("Virtualization XEN, found %s with value %08lx, "
321 "XENFEAT_dom0 (indicating the 'hardware domain') is%s set.",
322 PATH_FEATURES, features, r ? "" : " not");
323 return r;
324 }
325 log_debug("Virtualization XEN, found %s, unhandled content '%s'",
326 PATH_FEATURES, domcap);
327 }
328
329 r = read_one_line_file("/proc/xen/capabilities", &domcap);
330 if (r == -ENOENT) {
331 log_debug("Virtualization XEN because /proc/xen/capabilities does not exist");
332 return 0;
333 }
334 if (r < 0)
335 return r;
336
337 for (const char *i = domcap;;) {
338 _cleanup_free_ char *cap = NULL;
339
340 r = extract_first_word(&i, &cap, ",", 0);
341 if (r < 0)
342 return r;
343 if (r == 0) {
344 log_debug("Virtualization XEN DomU found (/proc/xen/capabilities)");
345 return 0;
346 }
347
348 if (streq(cap, "control_d")) {
349 log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
350 return 1;
351 }
352 }
353 }
354
355 static Virtualization detect_vm_xen(void) {
356 /* The presence of /proc/xen indicates some form of a Xen domain
357 The check for Dom0 is handled outside this function */
358 if (access("/proc/xen", F_OK) < 0) {
359 log_debug("Virtualization XEN not found, /proc/xen does not exist");
360 return VIRTUALIZATION_NONE;
361 }
362 log_debug("Virtualization XEN found (/proc/xen exists)");
363 return VIRTUALIZATION_XEN;
364 }
365
366 static Virtualization detect_vm_hypervisor(void) {
367 _cleanup_free_ char *hvtype = NULL;
368 int r;
369
370 r = read_one_line_file("/sys/hypervisor/type", &hvtype);
371 if (r == -ENOENT)
372 return VIRTUALIZATION_NONE;
373 if (r < 0)
374 return r;
375
376 log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
377
378 if (streq(hvtype, "xen"))
379 return VIRTUALIZATION_XEN;
380 else
381 return VIRTUALIZATION_VM_OTHER;
382 }
383
384 static Virtualization detect_vm_uml(void) {
385 _cleanup_fclose_ FILE *f = NULL;
386 int r;
387
388 /* Detect User-Mode Linux by reading /proc/cpuinfo */
389 f = fopen("/proc/cpuinfo", "re");
390 if (!f) {
391 if (errno == ENOENT) {
392 log_debug("/proc/cpuinfo not found, assuming no UML virtualization.");
393 return VIRTUALIZATION_NONE;
394 }
395 return -errno;
396 }
397
398 for (;;) {
399 _cleanup_free_ char *line = NULL;
400 const char *t;
401
402 r = read_line(f, LONG_LINE_MAX, &line);
403 if (r < 0)
404 return r;
405 if (r == 0)
406 break;
407
408 t = startswith(line, "vendor_id\t: ");
409 if (t) {
410 if (startswith(t, "User Mode Linux")) {
411 log_debug("UML virtualization found in /proc/cpuinfo");
412 return VIRTUALIZATION_UML;
413 }
414
415 break;
416 }
417 }
418
419 log_debug("UML virtualization not found in /proc/cpuinfo.");
420 return VIRTUALIZATION_NONE;
421 }
422
423 static Virtualization detect_vm_zvm(void) {
424
425 #if defined(__s390__)
426 _cleanup_free_ char *t = NULL;
427 int r;
428
429 r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
430 if (r == -ENOENT)
431 return VIRTUALIZATION_NONE;
432 if (r < 0)
433 return r;
434
435 log_debug("Virtualization %s found in /proc/sysinfo", t);
436 if (streq(t, "z/VM"))
437 return VIRTUALIZATION_ZVM;
438 else
439 return VIRTUALIZATION_KVM;
440 #else
441 log_debug("This platform does not support /proc/sysinfo");
442 return VIRTUALIZATION_NONE;
443 #endif
444 }
445
446 /* Returns a short identifier for the various VM implementations */
447 Virtualization detect_vm(void) {
448 static thread_local Virtualization cached_found = _VIRTUALIZATION_INVALID;
449 bool other = false;
450 int xen_dom0 = 0;
451 Virtualization v, dmi;
452
453 if (cached_found >= 0)
454 return cached_found;
455
456 /* We have to use the correct order here:
457 *
458 * → First, try to detect Oracle Virtualbox, Amazon EC2 Nitro, Parallels, and Google Compute Engine, even if they use KVM,
459 * as well as Xen even if it cloaks as Microsoft Hyper-V. Attempt to detect uml at this stage also
460 * since it runs as a user-process nested inside other VMs. Also check for Xen now, because Xen PV
461 * mode does not override CPUID when nested inside another hypervisor.
462 *
463 * → Second, try to detect from CPUID, this will report KVM for whatever software is used even if
464 * info in DMI is overwritten.
465 *
466 * → Third, try to detect from DMI. */
467
468 dmi = detect_vm_dmi();
469 if (IN_SET(dmi,
470 VIRTUALIZATION_ORACLE,
471 VIRTUALIZATION_XEN,
472 VIRTUALIZATION_AMAZON,
473 VIRTUALIZATION_PARALLELS,
474 VIRTUALIZATION_GOOGLE)) {
475 v = dmi;
476 goto finish;
477 }
478
479 /* Detect UML */
480 v = detect_vm_uml();
481 if (v < 0)
482 return v;
483 if (v != VIRTUALIZATION_NONE)
484 goto finish;
485
486 /* Detect Xen */
487 v = detect_vm_xen();
488 if (v < 0)
489 return v;
490 if (v == VIRTUALIZATION_XEN) {
491 /* If we are Dom0, then we expect to not report as a VM. However, as we might be nested
492 * inside another hypervisor which can be detected via the CPUID check, wait to report this
493 * until after the CPUID check. */
494 xen_dom0 = detect_vm_xen_dom0();
495 if (xen_dom0 < 0)
496 return xen_dom0;
497 if (xen_dom0 == 0)
498 goto finish;
499 } else if (v != VIRTUALIZATION_NONE)
500 assert_not_reached();
501
502 /* Detect from CPUID */
503 v = detect_vm_cpuid();
504 if (v < 0)
505 return v;
506 if (v == VIRTUALIZATION_VM_OTHER)
507 other = true;
508 else if (v != VIRTUALIZATION_NONE)
509 goto finish;
510
511 /* If we are in Dom0 and have not yet finished, finish with the result of detect_vm_cpuid */
512 if (xen_dom0 > 0)
513 goto finish;
514
515 /* Now, let's get back to DMI */
516 if (dmi < 0)
517 return dmi;
518 if (dmi == VIRTUALIZATION_VM_OTHER)
519 other = true;
520 else if (dmi != VIRTUALIZATION_NONE) {
521 v = dmi;
522 goto finish;
523 }
524
525 /* Check high-level hypervisor sysfs file */
526 v = detect_vm_hypervisor();
527 if (v < 0)
528 return v;
529 if (v == VIRTUALIZATION_VM_OTHER)
530 other = true;
531 else if (v != VIRTUALIZATION_NONE)
532 goto finish;
533
534 v = detect_vm_device_tree();
535 if (v < 0)
536 return v;
537 if (v == VIRTUALIZATION_VM_OTHER)
538 other = true;
539 else if (v != VIRTUALIZATION_NONE)
540 goto finish;
541
542 v = detect_vm_zvm();
543 if (v < 0)
544 return v;
545
546 finish:
547 if (v == VIRTUALIZATION_NONE && other)
548 v = VIRTUALIZATION_VM_OTHER;
549
550 cached_found = v;
551 log_debug("Found VM virtualization %s", virtualization_to_string(v));
552 return v;
553 }
554
555 static const char *const container_table[_VIRTUALIZATION_MAX] = {
556 [VIRTUALIZATION_LXC] = "lxc",
557 [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
558 [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
559 [VIRTUALIZATION_DOCKER] = "docker",
560 [VIRTUALIZATION_PODMAN] = "podman",
561 [VIRTUALIZATION_RKT] = "rkt",
562 [VIRTUALIZATION_WSL] = "wsl",
563 [VIRTUALIZATION_PROOT] = "proot",
564 [VIRTUALIZATION_POUCH] = "pouch",
565 };
566
567 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(container, int);
568
569 static int running_in_cgroupns(void) {
570 int r;
571
572 if (!cg_ns_supported())
573 return false;
574
575 r = cg_all_unified();
576 if (r < 0)
577 return r;
578
579 if (r) {
580 /* cgroup v2 */
581
582 r = access("/sys/fs/cgroup/cgroup.events", F_OK);
583 if (r < 0) {
584 if (errno != ENOENT)
585 return -errno;
586 /* All kernel versions have cgroup.events in nested cgroups. */
587 return false;
588 }
589
590 /* There's no cgroup.type in the root cgroup, and future kernel versions
591 * are unlikely to add it since cgroup.type is something that makes no sense
592 * whatsoever in the root cgroup. */
593 r = access("/sys/fs/cgroup/cgroup.type", F_OK);
594 if (r == 0)
595 return true;
596 if (r < 0 && errno != ENOENT)
597 return -errno;
598
599 /* On older kernel versions, there's no cgroup.type */
600 r = access("/sys/kernel/cgroup/features", F_OK);
601 if (r < 0) {
602 if (errno != ENOENT)
603 return -errno;
604 /* This is an old kernel that we know for sure has cgroup.events
605 * only in nested cgroups. */
606 return true;
607 }
608
609 /* This is a recent kernel, and cgroup.type doesn't exist, so we must be
610 * in the root cgroup. */
611 return false;
612 } else {
613 /* cgroup v1 */
614
615 /* If systemd controller is not mounted, do not even bother. */
616 r = access("/sys/fs/cgroup/systemd", F_OK);
617 if (r < 0) {
618 if (errno != ENOENT)
619 return -errno;
620 return false;
621 }
622
623 /* release_agent only exists in the root cgroup. */
624 r = access("/sys/fs/cgroup/systemd/release_agent", F_OK);
625 if (r < 0) {
626 if (errno != ENOENT)
627 return -errno;
628 return true;
629 }
630
631 return false;
632 }
633 }
634
635 static Virtualization detect_container_files(void) {
636 static const struct {
637 const char *file_path;
638 Virtualization id;
639 } container_file_table[] = {
640 /* https://github.com/containers/podman/issues/6192 */
641 /* https://github.com/containers/podman/issues/3586#issuecomment-661918679 */
642 { "/run/.containerenv", VIRTUALIZATION_PODMAN },
643 /* https://github.com/moby/moby/issues/18355 */
644 /* Docker must be the last in this table, see below. */
645 { "/.dockerenv", VIRTUALIZATION_DOCKER },
646 };
647
648 for (size_t i = 0; i < ELEMENTSOF(container_file_table); i++) {
649 if (access(container_file_table[i].file_path, F_OK) >= 0)
650 return container_file_table[i].id;
651
652 if (errno != ENOENT)
653 log_debug_errno(errno,
654 "Checking if %s exists failed, ignoring: %m",
655 container_file_table[i].file_path);
656 }
657
658 return VIRTUALIZATION_NONE;
659 }
660
661 Virtualization detect_container(void) {
662 static thread_local Virtualization cached_found = _VIRTUALIZATION_INVALID;
663 _cleanup_free_ char *m = NULL, *o = NULL, *p = NULL;
664 const char *e = NULL;
665 Virtualization v;
666 int r;
667
668 if (cached_found >= 0)
669 return cached_found;
670
671 /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
672 if (access("/proc/vz", F_OK) < 0) {
673 if (errno != ENOENT)
674 log_debug_errno(errno, "Failed to check if /proc/vz exists, ignoring: %m");
675 } else if (access("/proc/bc", F_OK) < 0) {
676 if (errno == ENOENT) {
677 v = VIRTUALIZATION_OPENVZ;
678 goto finish;
679 }
680
681 log_debug_errno(errno, "Failed to check if /proc/bc exists, ignoring: %m");
682 }
683
684 /* "Official" way of detecting WSL https://github.com/Microsoft/WSL/issues/423#issuecomment-221627364 */
685 r = read_one_line_file("/proc/sys/kernel/osrelease", &o);
686 if (r < 0)
687 log_debug_errno(r, "Failed to read /proc/sys/kernel/osrelease, ignoring: %m");
688 else if (strstr(o, "Microsoft") || strstr(o, "WSL")) {
689 v = VIRTUALIZATION_WSL;
690 goto finish;
691 }
692
693 /* proot doesn't use PID namespacing, so we can just check if we have a matching tracer for this
694 * invocation without worrying about it being elsewhere.
695 */
696 r = get_proc_field("/proc/self/status", "TracerPid", WHITESPACE, &p);
697 if (r < 0)
698 log_debug_errno(r, "Failed to read our own trace PID, ignoring: %m");
699 else if (!streq(p, "0")) {
700 pid_t ptrace_pid;
701
702 r = parse_pid(p, &ptrace_pid);
703 if (r < 0)
704 log_debug_errno(r, "Failed to parse our own tracer PID, ignoring: %m");
705 else {
706 _cleanup_free_ char *ptrace_comm = NULL;
707 const char *pf;
708
709 pf = procfs_file_alloca(ptrace_pid, "comm");
710 r = read_one_line_file(pf, &ptrace_comm);
711 if (r < 0)
712 log_debug_errno(r, "Failed to read %s, ignoring: %m", pf);
713 else if (startswith(ptrace_comm, "proot")) {
714 v = VIRTUALIZATION_PROOT;
715 goto finish;
716 }
717 }
718 }
719
720 /* The container manager might have placed this in the /run/host/ hierarchy for us, which is best
721 * because we can be consumed just like that, without special privileges. */
722 r = read_one_line_file("/run/host/container-manager", &m);
723 if (r > 0) {
724 e = m;
725 goto translate_name;
726 }
727 if (!IN_SET(r, -ENOENT, 0))
728 return log_debug_errno(r, "Failed to read /run/host/container-manager: %m");
729
730 if (getpid_cached() == 1) {
731 /* If we are PID 1 we can just check our own environment variable, and that's authoritative.
732 * We distinguish three cases:
733 * - the variable is not defined → we jump to other checks
734 * - the variable is defined to an empty value → we are not in a container
735 * - anything else → some container, either one of the known ones or "container-other"
736 */
737 e = getenv("container");
738 if (!e)
739 goto check_files;
740 if (isempty(e)) {
741 v = VIRTUALIZATION_NONE;
742 goto finish;
743 }
744
745 goto translate_name;
746 }
747
748 /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
749 * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
750 r = read_one_line_file("/run/systemd/container", &m);
751 if (r > 0) {
752 e = m;
753 goto translate_name;
754 }
755 if (!IN_SET(r, -ENOENT, 0))
756 return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
757
758 /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
759 r = getenv_for_pid(1, "container", &m);
760 if (r > 0) {
761 e = m;
762 goto translate_name;
763 }
764 if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
765 log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
766
767 check_files:
768 /* Check for existence of some well-known files. We only do this after checking
769 * for other specific container managers, otherwise we risk mistaking another
770 * container manager for Docker: the /.dockerenv file could inadvertently end up
771 * in a file system image. */
772 v = detect_container_files();
773 if (v < 0)
774 return v;
775 if (v != VIRTUALIZATION_NONE)
776 goto finish;
777
778 r = running_in_cgroupns();
779 if (r > 0) {
780 v = VIRTUALIZATION_CONTAINER_OTHER;
781 goto finish;
782 }
783 if (r < 0)
784 log_debug_errno(r, "Failed to detect cgroup namespace: %m");
785
786 /* If none of that worked, give up, assume no container manager. */
787 v = VIRTUALIZATION_NONE;
788 goto finish;
789
790 translate_name:
791 if (streq(e, "oci")) {
792 /* Some images hardcode container=oci, but OCI is not a specific container manager.
793 * Try to detect one based on well-known files. */
794 v = detect_container_files();
795 if (v == VIRTUALIZATION_NONE)
796 v = VIRTUALIZATION_CONTAINER_OTHER;
797 goto finish;
798 }
799 v = container_from_string(e);
800 if (v < 0)
801 v = VIRTUALIZATION_CONTAINER_OTHER;
802
803 finish:
804 log_debug("Found container virtualization %s.", virtualization_to_string(v));
805 cached_found = v;
806 return v;
807 }
808
809 Virtualization detect_virtualization(void) {
810 int v;
811
812 v = detect_container();
813 if (v != VIRTUALIZATION_NONE)
814 return v;
815
816 return detect_vm();
817 }
818
819 static int userns_has_mapping(const char *name) {
820 _cleanup_fclose_ FILE *f = NULL;
821 uid_t base, shift, range;
822 int r;
823
824 f = fopen(name, "re");
825 if (!f) {
826 log_debug_errno(errno, "Failed to open %s: %m", name);
827 return errno == ENOENT ? false : -errno;
828 }
829
830 r = uid_map_read_one(f, &base, &shift, &range);
831 if (r == -ENOMSG) {
832 log_debug("%s is empty, we're in an uninitialized user namespace.", name);
833 return true;
834 }
835 if (r < 0)
836 return log_debug_errno(r, "Failed to read %s: %m", name);
837
838 if (base == 0 && shift == 0 && range == UINT32_MAX) {
839 /* The kernel calls mappings_overlap() and does not allow overlaps */
840 log_debug("%s has a full 1:1 mapping", name);
841 return false;
842 }
843
844 /* Anything else implies that we are in a user namespace */
845 log_debug("Mapping found in %s, we're in a user namespace.", name);
846 return true;
847 }
848
849 int running_in_userns(void) {
850 _cleanup_free_ char *line = NULL;
851 int r;
852
853 r = userns_has_mapping("/proc/self/uid_map");
854 if (r != 0)
855 return r;
856
857 r = userns_has_mapping("/proc/self/gid_map");
858 if (r != 0)
859 return r;
860
861 /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also possible to compile a
862 * kernel without CONFIG_USER_NS, in which case "setgroups" also does not exist. We cannot
863 * distinguish those two cases, so assume that we're running on a stripped-down recent kernel, rather
864 * than on an old one, and if the file is not found, return false. */
865 r = read_virtual_file("/proc/self/setgroups", SIZE_MAX, &line, NULL);
866 if (r < 0) {
867 log_debug_errno(r, "/proc/self/setgroups: %m");
868 return r == -ENOENT ? false : r;
869 }
870
871 strstrip(line); /* remove trailing newline */
872
873 r = streq(line, "deny");
874 /* See user_namespaces(7) for a description of this "setgroups" contents. */
875 log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
876 return r;
877 }
878
879 int running_in_chroot(void) {
880 int r;
881
882 /* If we're PID1, /proc may not be mounted (and most likely we're not in a chroot). But PID1 will
883 * mount /proc, so all other programs can assume that if /proc is *not* available, we're in some
884 * chroot. */
885
886 if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
887 return 0;
888
889 r = inode_same("/proc/1/root", "/", 0);
890 if (r == -ENOENT) {
891 r = proc_mounted();
892 if (r == 0) {
893 if (getpid_cached() == 1)
894 return false; /* We will mount /proc, assuming we're not in a chroot. */
895
896 log_debug("/proc is not mounted, assuming we're in a chroot.");
897 return true;
898 }
899 if (r > 0) /* If we have fake /proc/, we can't do the check properly. */
900 return -ENOSYS;
901 }
902 if (r < 0)
903 return r;
904
905 return r == 0;
906 }
907
908 #if defined(__i386__) || defined(__x86_64__)
909 struct cpuid_table_entry {
910 uint32_t flag_bit;
911 const char *name;
912 };
913
914 static const struct cpuid_table_entry leaf1_edx[] = {
915 { 0, "fpu" },
916 { 1, "vme" },
917 { 2, "de" },
918 { 3, "pse" },
919 { 4, "tsc" },
920 { 5, "msr" },
921 { 6, "pae" },
922 { 7, "mce" },
923 { 8, "cx8" },
924 { 9, "apic" },
925 { 11, "sep" },
926 { 12, "mtrr" },
927 { 13, "pge" },
928 { 14, "mca" },
929 { 15, "cmov" },
930 { 16, "pat" },
931 { 17, "pse36" },
932 { 19, "clflush" },
933 { 23, "mmx" },
934 { 24, "fxsr" },
935 { 25, "sse" },
936 { 26, "sse2" },
937 { 28, "ht" },
938 };
939
940 static const struct cpuid_table_entry leaf1_ecx[] = {
941 { 0, "pni" },
942 { 1, "pclmul" },
943 { 3, "monitor" },
944 { 9, "ssse3" },
945 { 12, "fma3" },
946 { 13, "cx16" },
947 { 19, "sse4_1" },
948 { 20, "sse4_2" },
949 { 22, "movbe" },
950 { 23, "popcnt" },
951 { 25, "aes" },
952 { 26, "xsave" },
953 { 27, "osxsave" },
954 { 28, "avx" },
955 { 29, "f16c" },
956 { 30, "rdrand" },
957 };
958
959 static const struct cpuid_table_entry leaf7_ebx[] = {
960 { 3, "bmi1" },
961 { 5, "avx2" },
962 { 8, "bmi2" },
963 { 18, "rdseed" },
964 { 19, "adx" },
965 { 29, "sha_ni" },
966 };
967
968 static const struct cpuid_table_entry leaf81_edx[] = {
969 { 11, "syscall" },
970 { 27, "rdtscp" },
971 { 29, "lm" },
972 };
973
974 static const struct cpuid_table_entry leaf81_ecx[] = {
975 { 0, "lahf_lm" },
976 { 5, "abm" },
977 };
978
979 static const struct cpuid_table_entry leaf87_edx[] = {
980 { 8, "constant_tsc" },
981 };
982
983 static bool given_flag_in_set(const char *flag, const struct cpuid_table_entry *set, size_t set_size, uint32_t val) {
984 for (size_t i = 0; i < set_size; i++) {
985 if ((UINT32_C(1) << set[i].flag_bit) & val &&
986 streq(flag, set[i].name))
987 return true;
988 }
989 return false;
990 }
991
992 static bool real_has_cpu_with_flag(const char *flag) {
993 uint32_t eax, ebx, ecx, edx;
994
995 if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
996 if (given_flag_in_set(flag, leaf1_ecx, ELEMENTSOF(leaf1_ecx), ecx))
997 return true;
998
999 if (given_flag_in_set(flag, leaf1_edx, ELEMENTSOF(leaf1_edx), edx))
1000 return true;
1001 }
1002
1003 if (__get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx)) {
1004 if (given_flag_in_set(flag, leaf7_ebx, ELEMENTSOF(leaf7_ebx), ebx))
1005 return true;
1006 }
1007
1008 if (__get_cpuid(0x80000001U, &eax, &ebx, &ecx, &edx)) {
1009 if (given_flag_in_set(flag, leaf81_ecx, ELEMENTSOF(leaf81_ecx), ecx))
1010 return true;
1011
1012 if (given_flag_in_set(flag, leaf81_edx, ELEMENTSOF(leaf81_edx), edx))
1013 return true;
1014 }
1015
1016 if (__get_cpuid(0x80000007U, &eax, &ebx, &ecx, &edx))
1017 if (given_flag_in_set(flag, leaf87_edx, ELEMENTSOF(leaf87_edx), edx))
1018 return true;
1019
1020 return false;
1021 }
1022 #endif
1023
1024 bool has_cpu_with_flag(const char *flag) {
1025 /* CPUID is an x86 specific interface. Assume on all others that no CPUs have those flags. */
1026 #if defined(__i386__) || defined(__x86_64__)
1027 return real_has_cpu_with_flag(flag);
1028 #else
1029 return false;
1030 #endif
1031 }
1032
1033 static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
1034 [VIRTUALIZATION_NONE] = "none",
1035 [VIRTUALIZATION_KVM] = "kvm",
1036 [VIRTUALIZATION_AMAZON] = "amazon",
1037 [VIRTUALIZATION_QEMU] = "qemu",
1038 [VIRTUALIZATION_BOCHS] = "bochs",
1039 [VIRTUALIZATION_XEN] = "xen",
1040 [VIRTUALIZATION_UML] = "uml",
1041 [VIRTUALIZATION_VMWARE] = "vmware",
1042 [VIRTUALIZATION_ORACLE] = "oracle",
1043 [VIRTUALIZATION_MICROSOFT] = "microsoft",
1044 [VIRTUALIZATION_ZVM] = "zvm",
1045 [VIRTUALIZATION_PARALLELS] = "parallels",
1046 [VIRTUALIZATION_BHYVE] = "bhyve",
1047 [VIRTUALIZATION_QNX] = "qnx",
1048 [VIRTUALIZATION_ACRN] = "acrn",
1049 [VIRTUALIZATION_POWERVM] = "powervm",
1050 [VIRTUALIZATION_APPLE] = "apple",
1051 [VIRTUALIZATION_SRE] = "sre",
1052 [VIRTUALIZATION_GOOGLE] = "google",
1053 [VIRTUALIZATION_VM_OTHER] = "vm-other",
1054
1055 [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
1056 [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
1057 [VIRTUALIZATION_LXC] = "lxc",
1058 [VIRTUALIZATION_OPENVZ] = "openvz",
1059 [VIRTUALIZATION_DOCKER] = "docker",
1060 [VIRTUALIZATION_PODMAN] = "podman",
1061 [VIRTUALIZATION_RKT] = "rkt",
1062 [VIRTUALIZATION_WSL] = "wsl",
1063 [VIRTUALIZATION_PROOT] = "proot",
1064 [VIRTUALIZATION_POUCH] = "pouch",
1065 [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
1066 };
1067
1068 DEFINE_STRING_TABLE_LOOKUP(virtualization, Virtualization);