]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/virt.c
Merge pull request #23962 from keszybz/taint-flag-support-ended
[thirdparty/systemd.git] / src / basic / virt.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
b52aae1d 2
d31b0033
MG
3#if defined(__i386__) || defined(__x86_64__)
4#include <cpuid.h>
5#endif
b52aae1d 6#include <errno.h>
11c3a366
TA
7#include <stdint.h>
8#include <stdlib.h>
b52aae1d
LP
9#include <unistd.h>
10
b5efdb8a 11#include "alloc-util.h"
0e13779d 12#include "cgroup-util.h"
ade61d3b 13#include "dirent-util.h"
295ee984 14#include "env-util.h"
b2a331f2 15#include "errno-util.h"
ade61d3b 16#include "fd-util.h"
07630cea 17#include "fileio.h"
11c3a366 18#include "macro.h"
93cc7779 19#include "process-util.h"
b5efdb8a 20#include "stat-util.h"
8b43440b 21#include "string-table.h"
07630cea 22#include "string-util.h"
b52aae1d
LP
23#include "virt.h"
24
ce350379
NM
25enum {
26 SMBIOS_VM_BIT_SET,
27 SMBIOS_VM_BIT_UNSET,
28 SMBIOS_VM_BIT_UNKNOWN,
29};
30
1b86c7c5 31static Virtualization detect_vm_cpuid(void) {
b52aae1d 32
2ef8a4c4 33 /* CPUID is an x86 specific interface. */
bdb628ee 34#if defined(__i386__) || defined(__x86_64__)
b52aae1d 35
0f534758
LP
36 static const struct {
37 const char sig[13];
1b86c7c5 38 Virtualization id;
0f534758
LP
39 } vm_table[] = {
40 { "XenVMMXenVMM", VIRTUALIZATION_XEN },
41 { "KVMKVMKVM", VIRTUALIZATION_KVM }, /* qemu with KVM */
42 { "Linux KVM Hv", VIRTUALIZATION_KVM }, /* qemu with KVM + HyperV Enlightenments */
43 { "TCGTCGTCGTCG", VIRTUALIZATION_QEMU }, /* qemu without KVM */
44 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
45 { "VMwareVMware", VIRTUALIZATION_VMWARE },
46 /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
47 { "Microsoft Hv", VIRTUALIZATION_MICROSOFT },
48 /* https://wiki.freebsd.org/bhyve */
49 { "bhyve bhyve ", VIRTUALIZATION_BHYVE },
50 { "QNXQVMBSQG", VIRTUALIZATION_QNX },
51 /* https://projectacrn.org */
52 { "ACRNACRNACRN", VIRTUALIZATION_ACRN },
53 };
54
d31b0033 55 uint32_t eax, ebx, ecx, edx;
b52aae1d
LP
56 bool hypervisor;
57
58 /* http://lwn.net/Articles/301888/ */
b52aae1d 59
b52aae1d 60 /* First detect whether there is a hypervisor */
d31b0033
MG
61 if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0)
62 return VIRTUALIZATION_NONE;
b52aae1d 63
5d904a6a 64 hypervisor = ecx & 0x80000000U;
b52aae1d
LP
65
66 if (hypervisor) {
75f86906
LP
67 union {
68 uint32_t sig32[3];
69 char text[13];
70 } sig = {};
b52aae1d
LP
71
72 /* There is a hypervisor, see what it is */
8481e3e7 73 __cpuid(0x40000000U, eax, ebx, ecx, edx);
d31b0033
MG
74
75 sig.sig32[0] = ebx;
76 sig.sig32[1] = ecx;
77 sig.sig32[2] = edx;
b52aae1d 78
9f63a08d
SS
79 log_debug("Virtualization found, CPUID=%s", sig.text);
80
0f534758
LP
81 for (size_t i = 0; i < ELEMENTSOF(vm_table); i++)
82 if (memcmp_nn(sig.text, sizeof(sig.text),
83 vm_table[i].sig, sizeof(vm_table[i].sig)) == 0)
84 return vm_table[i].id;
bdb628ee 85
0f534758
LP
86 log_debug("Unknown virtualization with CPUID=%s. Add to vm_table[]?", sig.text);
87 return VIRTUALIZATION_VM_OTHER;
b52aae1d 88 }
bdb628ee 89#endif
9f63a08d 90 log_debug("No virtualization found in CPUID");
bdb628ee 91
75f86906 92 return VIRTUALIZATION_NONE;
bdb628ee
ZJS
93}
94
1b86c7c5 95static Virtualization detect_vm_device_tree(void) {
db6a8689 96#if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
d831deb5
CA
97 _cleanup_free_ char *hvtype = NULL;
98 int r;
99
b8f1df82 100 r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
75f86906 101 if (r == -ENOENT) {
ce09c71d 102 _cleanup_closedir_ DIR *dir = NULL;
ce09c71d 103
3224e38b
MS
104 if (access("/proc/device-tree/ibm,partition-name", F_OK) == 0 &&
105 access("/proc/device-tree/hmc-managed?", F_OK) == 0 &&
106 access("/proc/device-tree/chosen/qemu,graphic-width", F_OK) != 0)
107 return VIRTUALIZATION_POWERVM;
108
ce09c71d
AJ
109 dir = opendir("/proc/device-tree");
110 if (!dir) {
9f63a08d
SS
111 if (errno == ENOENT) {
112 log_debug_errno(errno, "/proc/device-tree: %m");
75f86906 113 return VIRTUALIZATION_NONE;
9f63a08d 114 }
ce09c71d
AJ
115 return -errno;
116 }
117
af3b864d
ZJS
118 FOREACH_DIRENT(de, dir, return -errno)
119 if (strstr(de->d_name, "fw-cfg")) {
120 log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", de->d_name);
75f86906 121 return VIRTUALIZATION_QEMU;
9f63a08d 122 }
75f86906 123
9f63a08d 124 log_debug("No virtualization found in /proc/device-tree/*");
75f86906
LP
125 return VIRTUALIZATION_NONE;
126 } else if (r < 0)
127 return r;
128
9f63a08d 129 log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
75f86906
LP
130 if (streq(hvtype, "linux,kvm"))
131 return VIRTUALIZATION_KVM;
132 else if (strstr(hvtype, "xen"))
133 return VIRTUALIZATION_XEN;
4d4ac92c
CL
134 else if (strstr(hvtype, "vmware"))
135 return VIRTUALIZATION_VMWARE;
75f86906
LP
136 else
137 return VIRTUALIZATION_VM_OTHER;
138#else
9f63a08d 139 log_debug("This platform does not support /proc/device-tree");
75f86906 140 return VIRTUALIZATION_NONE;
d831deb5 141#endif
d831deb5
CA
142}
143
eebbd595 144#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch64)
1b86c7c5 145static Virtualization detect_vm_dmi_vendor(void) {
a9d178d2 146 static const char* const dmi_vendors[] = {
3728dcde 147 "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
bdb628ee
ZJS
148 "/sys/class/dmi/id/sys_vendor",
149 "/sys/class/dmi/id/board_vendor",
76eec064 150 "/sys/class/dmi/id/bios_vendor",
a9d178d2
ZJS
151 "/sys/class/dmi/id/product_version", /* For Hyper-V VMs test */
152 NULL
bdb628ee
ZJS
153 };
154
75f86906
LP
155 static const struct {
156 const char *vendor;
1b86c7c5 157 Virtualization id;
75f86906 158 } dmi_vendor_table[] = {
87bc4b40 159 { "KVM", VIRTUALIZATION_KVM },
01d9fbcc 160 { "OpenStack", VIRTUALIZATION_KVM }, /* Detect OpenStack instance as KVM in non x86 architecture */
b6eca373 161 { "Amazon EC2", VIRTUALIZATION_AMAZON },
25454a0c 162 { "QEMU", VIRTUALIZATION_QEMU },
a86cbb0f 163 { "VMware", VIRTUALIZATION_VMWARE }, /* https://kb.vmware.com/s/article/1009458 */
87bc4b40
JL
164 { "VMW", VIRTUALIZATION_VMWARE },
165 { "innotek GmbH", VIRTUALIZATION_ORACLE },
cfee6b95 166 { "VirtualBox", VIRTUALIZATION_ORACLE },
87bc4b40
JL
167 { "Xen", VIRTUALIZATION_XEN },
168 { "Bochs", VIRTUALIZATION_BOCHS },
169 { "Parallels", VIRTUALIZATION_PARALLELS },
aa0c3427 170 /* https://wiki.freebsd.org/bhyve */
87bc4b40 171 { "BHYVE", VIRTUALIZATION_BHYVE },
76eec064 172 { "Hyper-V", VIRTUALIZATION_MICROSOFT },
75f86906 173 };
75f86906 174 int r;
b52aae1d 175
a9d178d2 176 STRV_FOREACH(vendor, dmi_vendors) {
b1b8e816 177 _cleanup_free_ char *s = NULL;
b52aae1d 178
a9d178d2 179 r = read_one_line_file(*vendor, &s);
b1b8e816 180 if (r < 0) {
75f86906
LP
181 if (r == -ENOENT)
182 continue;
b52aae1d 183
75f86906 184 return r;
b52aae1d
LP
185 }
186
a9d178d2
ZJS
187 for (size_t i = 0; i < ELEMENTSOF(dmi_vendor_table); i++)
188 if (startswith(s, dmi_vendor_table[i].vendor)) {
189 log_debug("Virtualization %s found in DMI (%s)", s, *vendor);
190 return dmi_vendor_table[i].id;
9f63a08d 191 }
b52aae1d 192 }
932feb79 193 log_debug("No virtualization found in DMI vendor table.");
ce350379
NM
194 return VIRTUALIZATION_NONE;
195}
196
197static int detect_vm_smbios(void) {
198 /* The SMBIOS BIOS Charateristics Extension Byte 2 (Section 2.1.2.2 of
199 * https://www.dmtf.org/sites/default/files/standards/documents/DSP0134_3.4.0.pdf), specifies that
200 * the 4th bit being set indicates a VM. The BIOS Characteristics table is exposed via the kernel in
201 * /sys/firmware/dmi/entries/0-0. Note that in the general case, this bit being unset should not
202 * imply that the system is running on bare-metal. For example, QEMU 3.1.0 (with or without KVM)
203 * with SeaBIOS does not set this bit. */
204 _cleanup_free_ char *s = NULL;
205 size_t readsize;
206 int r;
207
208 r = read_full_virtual_file("/sys/firmware/dmi/entries/0-0/raw", &s, &readsize);
209 if (r < 0) {
932feb79
YW
210 log_debug_errno(r, "Unable to read /sys/firmware/dmi/entries/0-0/raw, "
211 "using the virtualization information found in DMI vendor table, ignoring: %m");
ce350379
NM
212 return SMBIOS_VM_BIT_UNKNOWN;
213 }
214 if (readsize < 20 || s[1] < 20) {
215 /* The spec indicates that byte 1 contains the size of the table, 0x12 + the number of
216 * extension bytes. The data we're interested in is in extension byte 2, which would be at
217 * 0x13. If we didn't read that much data, or if the BIOS indicates that we don't have that
218 * much data, we don't infer anything from the SMBIOS. */
932feb79
YW
219 log_debug("Only read %zu bytes from /sys/firmware/dmi/entries/0-0/raw (expected 20). "
220 "Using the virtualization information found in DMI vendor table.", readsize);
ce350379
NM
221 return SMBIOS_VM_BIT_UNKNOWN;
222 }
bdb628ee 223
ce350379
NM
224 uint8_t byte = (uint8_t) s[19];
225 if (byte & (1U<<4)) {
932feb79 226 log_debug("DMI BIOS Extension table indicates virtualization.");
ce350379
NM
227 return SMBIOS_VM_BIT_SET;
228 }
932feb79 229 log_debug("DMI BIOS Extension table does not indicate virtualization.");
ce350379
NM
230 return SMBIOS_VM_BIT_UNSET;
231}
eebbd595 232#endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch64) */
ce350379 233
1b86c7c5 234static Virtualization detect_vm_dmi(void) {
eebbd595 235#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch64)
ce350379
NM
236
237 int r;
238 r = detect_vm_dmi_vendor();
239
240 /* The DMI vendor tables in /sys/class/dmi/id don't help us distinguish between Amazon EC2
241 * virtual machines and bare-metal instances, so we need to look at SMBIOS. */
f90eea7d
BH
242 if (r == VIRTUALIZATION_AMAZON) {
243 switch (detect_vm_smbios()) {
244 case SMBIOS_VM_BIT_SET:
245 return VIRTUALIZATION_AMAZON;
246 case SMBIOS_VM_BIT_UNSET:
247 return VIRTUALIZATION_NONE;
248 case SMBIOS_VM_BIT_UNKNOWN: {
249 /* The DMI information we are after is only accessible to the root user,
250 * so we fallback to using the product name which is less restricted
251 * to distinguish metal systems from virtualized instances */
252 _cleanup_free_ char *s = NULL;
253
254 r = read_full_virtual_file("/sys/class/dmi/id/product_name", &s, NULL);
255 /* In EC2, virtualized is much more common than metal, so if for some reason
256 * we fail to read the DMI data, assume we are virtualized. */
257 if (r < 0) {
258 log_debug_errno(r, "Can't read /sys/class/dmi/id/product_name,"
259 " assuming virtualized: %m");
260 return VIRTUALIZATION_AMAZON;
261 }
262 if (endswith(truncate_nl(s), ".metal")) {
263 log_debug("DMI product name ends with '.metal', assuming no virtualization");
264 return VIRTUALIZATION_NONE;
265 } else
266 return VIRTUALIZATION_AMAZON;
267 }
268 default:
269 assert_not_reached();
270 }
271 }
9f63a08d 272
ce350379
NM
273 /* If we haven't identified a VM, but the firmware indicates that there is one, indicate as much. We
274 * have no further information about what it is. */
275 if (r == VIRTUALIZATION_NONE && detect_vm_smbios() == SMBIOS_VM_BIT_SET)
276 return VIRTUALIZATION_VM_OTHER;
277 return r;
278#else
75f86906 279 return VIRTUALIZATION_NONE;
ce350379 280#endif
bdb628ee
ZJS
281}
282
575e6588
OH
283#define XENFEAT_dom0 11 /* xen/include/public/features.h */
284#define PATH_FEATURES "/sys/hypervisor/properties/features"
1a8e4148
OH
285/* Returns -errno, or 0 for domU, or 1 for dom0 */
286static int detect_vm_xen_dom0(void) {
75f86906 287 _cleanup_free_ char *domcap = NULL;
bdb628ee 288 int r;
b52aae1d 289
575e6588
OH
290 r = read_one_line_file(PATH_FEATURES, &domcap);
291 if (r < 0 && r != -ENOENT)
292 return r;
d6062e3b 293 if (r >= 0) {
575e6588
OH
294 unsigned long features;
295
47dbb99a
YW
296 /* Here, we need to use sscanf() instead of safe_atoul()
297 * as the string lacks the leading "0x". */
13e0f9fe
OH
298 r = sscanf(domcap, "%lx", &features);
299 if (r == 1) {
575e6588
OH
300 r = !!(features & (1U << XENFEAT_dom0));
301 log_debug("Virtualization XEN, found %s with value %08lx, "
302 "XENFEAT_dom0 (indicating the 'hardware domain') is%s set.",
303 PATH_FEATURES, features, r ? "" : " not");
304 return r;
305 }
306 log_debug("Virtualization XEN, found %s, unhandled content '%s'",
307 PATH_FEATURES, domcap);
308 }
309
75f86906 310 r = read_one_line_file("/proc/xen/capabilities", &domcap);
9f63a08d 311 if (r == -ENOENT) {
1a8e4148
OH
312 log_debug("Virtualization XEN because /proc/xen/capabilities does not exist");
313 return 0;
9f63a08d 314 }
d5b687e7
LP
315 if (r < 0)
316 return r;
bdb628ee 317
31a9be23
YW
318 for (const char *i = domcap;;) {
319 _cleanup_free_ char *cap = NULL;
9f63a08d 320
31a9be23
YW
321 r = extract_first_word(&i, &cap, ",", 0);
322 if (r < 0)
323 return r;
324 if (r == 0) {
325 log_debug("Virtualization XEN DomU found (/proc/xen/capabilities)");
326 return 0;
327 }
328
329 if (streq(cap, "control_d")) {
330 log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
331 return 1;
332 }
333 }
75f86906 334}
37287585 335
1b86c7c5 336static Virtualization detect_vm_xen(void) {
ea583ed5
RN
337 /* The presence of /proc/xen indicates some form of a Xen domain
338 The check for Dom0 is handled outside this function */
599be274
BS
339 if (access("/proc/xen", F_OK) < 0) {
340 log_debug("Virtualization XEN not found, /proc/xen does not exist");
341 return VIRTUALIZATION_NONE;
342 }
343 log_debug("Virtualization XEN found (/proc/xen exists)");
599be274
BS
344 return VIRTUALIZATION_XEN;
345}
346
1b86c7c5 347static Virtualization detect_vm_hypervisor(void) {
75f86906
LP
348 _cleanup_free_ char *hvtype = NULL;
349 int r;
37287585 350
75f86906
LP
351 r = read_one_line_file("/sys/hypervisor/type", &hvtype);
352 if (r == -ENOENT)
353 return VIRTUALIZATION_NONE;
354 if (r < 0)
355 return r;
37287585 356
9f63a08d
SS
357 log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
358
75f86906
LP
359 if (streq(hvtype, "xen"))
360 return VIRTUALIZATION_XEN;
361 else
362 return VIRTUALIZATION_VM_OTHER;
363}
37287585 364
1b86c7c5 365static Virtualization detect_vm_uml(void) {
6058516a 366 _cleanup_fclose_ FILE *f = NULL;
75f86906 367 int r;
37287585 368
75f86906 369 /* Detect User-Mode Linux by reading /proc/cpuinfo */
6058516a
ZJS
370 f = fopen("/proc/cpuinfo", "re");
371 if (!f) {
372 if (errno == ENOENT) {
373 log_debug("/proc/cpuinfo not found, assuming no UML virtualization.");
374 return VIRTUALIZATION_NONE;
375 }
376 return -errno;
ef2a48aa 377 }
9f63a08d 378
6058516a
ZJS
379 for (;;) {
380 _cleanup_free_ char *line = NULL;
381 const char *t;
382
383 r = read_line(f, LONG_LINE_MAX, &line);
384 if (r < 0)
385 return r;
386 if (r == 0)
387 break;
388
389 t = startswith(line, "vendor_id\t: ");
390 if (t) {
391 if (startswith(t, "User Mode Linux")) {
392 log_debug("UML virtualization found in /proc/cpuinfo");
393 return VIRTUALIZATION_UML;
394 }
395
396 break;
397 }
9f63a08d 398 }
bdb628ee 399
ef2a48aa 400 log_debug("UML virtualization not found in /proc/cpuinfo.");
75f86906
LP
401 return VIRTUALIZATION_NONE;
402}
e32886e0 403
1b86c7c5 404static Virtualization detect_vm_zvm(void) {
bdb628ee 405
75f86906
LP
406#if defined(__s390__)
407 _cleanup_free_ char *t = NULL;
408 int r;
e32886e0 409
c4cd1d4d 410 r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
75f86906
LP
411 if (r == -ENOENT)
412 return VIRTUALIZATION_NONE;
413 if (r < 0)
414 return r;
e32886e0 415
9f63a08d 416 log_debug("Virtualization %s found in /proc/sysinfo", t);
75f86906
LP
417 if (streq(t, "z/VM"))
418 return VIRTUALIZATION_ZVM;
419 else
420 return VIRTUALIZATION_KVM;
421#else
9f63a08d 422 log_debug("This platform does not support /proc/sysinfo");
75f86906
LP
423 return VIRTUALIZATION_NONE;
424#endif
425}
e32886e0 426
75f86906 427/* Returns a short identifier for the various VM implementations */
1b86c7c5
LP
428Virtualization detect_vm(void) {
429 static thread_local Virtualization cached_found = _VIRTUALIZATION_INVALID;
530c1c30 430 bool other = false;
1b86c7c5
LP
431 int xen_dom0 = 0;
432 Virtualization v, dmi;
e32886e0 433
75f86906
LP
434 if (cached_found >= 0)
435 return cached_found;
bdb628ee 436
f6875b0a 437 /* We have to use the correct order here:
f6875b0a 438 *
840a49f3
YW
439 * → First, try to detect Oracle Virtualbox, Amazon EC2 Nitro, and Parallels, even if they use KVM,
440 * as well as Xen even if it cloaks as Microsoft Hyper-V. Attempt to detect uml at this stage also
441 * since it runs as a user-process nested inside other VMs. Also check for Xen now, because Xen PV
442 * mode does not override CPUID when nested inside another hypervisor.
f2fe2865 443 *
840a49f3
YW
444 * → Second, try to detect from CPUID, this will report KVM for whatever software is used even if
445 * info in DMI is overwritten.
f2fe2865
LP
446 *
447 * → Third, try to detect from DMI. */
5f1c788c 448
28b1a3ea 449 dmi = detect_vm_dmi();
840a49f3
YW
450 if (IN_SET(dmi,
451 VIRTUALIZATION_ORACLE,
452 VIRTUALIZATION_XEN,
453 VIRTUALIZATION_AMAZON,
454 VIRTUALIZATION_PARALLELS)) {
1b86c7c5 455 v = dmi;
2f8e375d
BR
456 goto finish;
457 }
5f1c788c 458
c8037dbf 459 /* Detect UML */
1b86c7c5
LP
460 v = detect_vm_uml();
461 if (v < 0)
462 return v;
463 if (v != VIRTUALIZATION_NONE)
c8037dbf
CO
464 goto finish;
465
599be274 466 /* Detect Xen */
1b86c7c5
LP
467 v = detect_vm_xen();
468 if (v < 0)
469 return v;
470 if (v == VIRTUALIZATION_XEN) {
ea583ed5
RN
471 /* If we are Dom0, then we expect to not report as a VM. However, as we might be nested
472 * inside another hypervisor which can be detected via the CPUID check, wait to report this
473 * until after the CPUID check. */
474 xen_dom0 = detect_vm_xen_dom0();
475 if (xen_dom0 < 0)
476 return xen_dom0;
477 if (xen_dom0 == 0)
478 goto finish;
479
1b86c7c5
LP
480 v = VIRTUALIZATION_NONE;
481 } else if (v != VIRTUALIZATION_NONE)
ea583ed5 482 assert_not_reached();
599be274 483
c8037dbf 484 /* Detect from CPUID */
1b86c7c5
LP
485 v = detect_vm_cpuid();
486 if (v < 0)
487 return v;
488 if (v == VIRTUALIZATION_VM_OTHER)
c2b19b3c 489 other = true;
1b86c7c5 490 else if (v != VIRTUALIZATION_NONE)
c2b19b3c 491 goto finish;
d831deb5 492
ea583ed5
RN
493 /* If we are in Dom0 and have not yet finished, finish with the result of detect_vm_cpuid */
494 if (xen_dom0 > 0)
495 goto finish;
496
c2b19b3c
LP
497 /* Now, let's get back to DMI */
498 if (dmi < 0)
499 return dmi;
500 if (dmi == VIRTUALIZATION_VM_OTHER)
501 other = true;
502 else if (dmi != VIRTUALIZATION_NONE) {
1b86c7c5 503 v = dmi;
c2b19b3c 504 goto finish;
530c1c30 505 }
b52aae1d 506
599be274 507 /* Check high-level hypervisor sysfs file */
1b86c7c5
LP
508 v = detect_vm_hypervisor();
509 if (v < 0)
510 return v;
511 if (v == VIRTUALIZATION_VM_OTHER)
c2b19b3c 512 other = true;
1b86c7c5 513 else if (v != VIRTUALIZATION_NONE)
c2b19b3c 514 goto finish;
f41925b4 515
1b86c7c5
LP
516 v = detect_vm_device_tree();
517 if (v < 0)
518 return v;
519 if (v == VIRTUALIZATION_VM_OTHER)
c2b19b3c 520 other = true;
1b86c7c5 521 else if (v != VIRTUALIZATION_NONE)
c2b19b3c 522 goto finish;
f41925b4 523
1b86c7c5
LP
524 v = detect_vm_zvm();
525 if (v < 0)
526 return v;
0fb533a5
LP
527
528finish:
1b86c7c5
LP
529 if (v == VIRTUALIZATION_NONE && other)
530 v = VIRTUALIZATION_VM_OTHER;
3f61278b 531
1b86c7c5
LP
532 cached_found = v;
533 log_debug("Found VM virtualization %s", virtualization_to_string(v));
534 return v;
b52aae1d
LP
535}
536
735ea55f
YW
537static const char *const container_table[_VIRTUALIZATION_MAX] = {
538 [VIRTUALIZATION_LXC] = "lxc",
539 [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
540 [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
541 [VIRTUALIZATION_DOCKER] = "docker",
542 [VIRTUALIZATION_PODMAN] = "podman",
543 [VIRTUALIZATION_RKT] = "rkt",
544 [VIRTUALIZATION_WSL] = "wsl",
80cc3e3e 545 [VIRTUALIZATION_PROOT] = "proot",
abac810b 546 [VIRTUALIZATION_POUCH] = "pouch",
735ea55f 547};
0fb533a5 548
735ea55f
YW
549DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(container, int);
550
0e13779d
SB
551static int running_in_cgroupns(void) {
552 int r;
553
554 if (!cg_ns_supported())
555 return false;
556
557 r = cg_all_unified();
558 if (r < 0)
559 return r;
560
561 if (r) {
562 /* cgroup v2 */
563
564 r = access("/sys/fs/cgroup/cgroup.events", F_OK);
565 if (r < 0) {
566 if (errno != ENOENT)
567 return -errno;
568 /* All kernel versions have cgroup.events in nested cgroups. */
569 return false;
570 }
571
572 /* There's no cgroup.type in the root cgroup, and future kernel versions
573 * are unlikely to add it since cgroup.type is something that makes no sense
574 * whatsoever in the root cgroup. */
575 r = access("/sys/fs/cgroup/cgroup.type", F_OK);
576 if (r == 0)
577 return true;
578 if (r < 0 && errno != ENOENT)
579 return -errno;
580
581 /* On older kernel versions, there's no cgroup.type */
582 r = access("/sys/kernel/cgroup/features", F_OK);
583 if (r < 0) {
584 if (errno != ENOENT)
585 return -errno;
586 /* This is an old kernel that we know for sure has cgroup.events
587 * only in nested cgroups. */
588 return true;
589 }
590
591 /* This is a recent kernel, and cgroup.type doesn't exist, so we must be
592 * in the root cgroup. */
593 return false;
594 } else {
595 /* cgroup v1 */
596
597 /* If systemd controller is not mounted, do not even bother. */
598 r = access("/sys/fs/cgroup/systemd", F_OK);
599 if (r < 0) {
600 if (errno != ENOENT)
601 return -errno;
602 return false;
603 }
604
605 /* release_agent only exists in the root cgroup. */
606 r = access("/sys/fs/cgroup/systemd/release_agent", F_OK);
607 if (r < 0) {
608 if (errno != ENOENT)
609 return -errno;
610 return true;
611 }
612
613 return false;
614 }
615}
616
1b86c7c5 617static Virtualization detect_container_files(void) {
a4a9a6f7
SB
618 static const struct {
619 const char *file_path;
1b86c7c5 620 Virtualization id;
a4a9a6f7
SB
621 } container_file_table[] = {
622 /* https://github.com/containers/podman/issues/6192 */
623 /* https://github.com/containers/podman/issues/3586#issuecomment-661918679 */
624 { "/run/.containerenv", VIRTUALIZATION_PODMAN },
625 /* https://github.com/moby/moby/issues/18355 */
626 /* Docker must be the last in this table, see below. */
627 { "/.dockerenv", VIRTUALIZATION_DOCKER },
628 };
629
0ee2d5b2 630 for (size_t i = 0; i < ELEMENTSOF(container_file_table); i++) {
a4a9a6f7
SB
631 if (access(container_file_table[i].file_path, F_OK) >= 0)
632 return container_file_table[i].id;
633
634 if (errno != ENOENT)
635 log_debug_errno(errno,
636 "Checking if %s exists failed, ignoring: %m",
637 container_file_table[i].file_path);
638 }
639
640 return VIRTUALIZATION_NONE;
641}
642
1b86c7c5
LP
643Virtualization detect_container(void) {
644 static thread_local Virtualization cached_found = _VIRTUALIZATION_INVALID;
a7e508f8 645 _cleanup_free_ char *m = NULL, *o = NULL, *p = NULL;
75f86906 646 const char *e = NULL;
1b86c7c5 647 Virtualization v;
ab94af92 648 int r;
b52aae1d 649
75f86906 650 if (cached_found >= 0)
0fb533a5 651 return cached_found;
0fb533a5 652
8d6e8034 653 /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
b7ec9e71
LP
654 if (access("/proc/vz", F_OK) < 0) {
655 if (errno != ENOENT)
656 log_debug_errno(errno, "Failed to check if /proc/vz exists, ignoring: %m");
657 } else if (access("/proc/bc", F_OK) < 0) {
658 if (errno == ENOENT) {
1b86c7c5 659 v = VIRTUALIZATION_OPENVZ;
b7ec9e71
LP
660 goto finish;
661 }
662
663 log_debug_errno(errno, "Failed to check if /proc/bc exists, ignoring: %m");
b52aae1d
LP
664 }
665
4096043f 666 /* "Official" way of detecting WSL https://github.com/Microsoft/WSL/issues/423#issuecomment-221627364 */
6c8a2c67 667 r = read_one_line_file("/proc/sys/kernel/osrelease", &o);
b7ec9e71
LP
668 if (r < 0)
669 log_debug_errno(r, "Failed to read /proc/sys/kernel/osrelease, ignoring: %m");
670 else if (strstr(o, "Microsoft") || strstr(o, "WSL")) {
1b86c7c5 671 v = VIRTUALIZATION_WSL;
a2f838d5 672 goto finish;
6c8a2c67
BR
673 }
674
80cc3e3e
CD
675 /* proot doesn't use PID namespacing, so we can just check if we have a matching tracer for this
676 * invocation without worrying about it being elsewhere.
677 */
678 r = get_proc_field("/proc/self/status", "TracerPid", WHITESPACE, &p);
b7ec9e71
LP
679 if (r < 0)
680 log_debug_errno(r, "Failed to read our own trace PID, ignoring: %m");
681 else if (!streq(p, "0")) {
80cc3e3e 682 pid_t ptrace_pid;
b7ec9e71 683
80cc3e3e 684 r = parse_pid(p, &ptrace_pid);
b7ec9e71
LP
685 if (r < 0)
686 log_debug_errno(r, "Failed to parse our own tracer PID, ignoring: %m");
687 else {
80cc3e3e 688 _cleanup_free_ char *ptrace_comm = NULL;
b7ec9e71
LP
689 const char *pf;
690
691 pf = procfs_file_alloca(ptrace_pid, "comm");
80cc3e3e 692 r = read_one_line_file(pf, &ptrace_comm);
b7ec9e71
LP
693 if (r < 0)
694 log_debug_errno(r, "Failed to read %s, ignoring: %m", pf);
695 else if (startswith(ptrace_comm, "proot")) {
1b86c7c5 696 v = VIRTUALIZATION_PROOT;
9b4f3fa3
CD
697 goto finish;
698 }
80cc3e3e
CD
699 }
700 }
701
79efcd02 702 /* The container manager might have placed this in the /run/host/ hierarchy for us, which is best
0f48ba7b
LP
703 * because we can be consumed just like that, without special privileges. */
704 r = read_one_line_file("/run/host/container-manager", &m);
705 if (r > 0) {
706 e = m;
707 goto translate_name;
708 }
709 if (!IN_SET(r, -ENOENT, 0))
79efcd02 710 return log_debug_errno(r, "Failed to read /run/host/container-manager: %m");
0f48ba7b 711
df0ff127 712 if (getpid_cached() == 1) {
342bed02
ZJS
713 /* If we are PID 1 we can just check our own environment variable, and that's authoritative.
714 * We distinguish three cases:
715 * - the variable is not defined → we jump to other checks
716 * - the variable is defined to an empty value → we are not in a container
717 * - anything else → some container, either one of the known ones or "container-other"
718 */
fdd25311 719 e = getenv("container");
342bed02 720 if (!e)
a4a9a6f7 721 goto check_files;
fdd25311 722 if (isempty(e)) {
1b86c7c5 723 v = VIRTUALIZATION_NONE;
fdd25311
LP
724 goto finish;
725 }
fdd25311 726
8d6e8034
LP
727 goto translate_name;
728 }
729
730 /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
731 * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
732 r = read_one_line_file("/run/systemd/container", &m);
a2176045 733 if (r > 0) {
8d6e8034
LP
734 e = m;
735 goto translate_name;
736 }
a2176045 737 if (!IN_SET(r, -ENOENT, 0))
8d6e8034
LP
738 return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
739
740 /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
741 r = getenv_for_pid(1, "container", &m);
742 if (r > 0) {
fdd25311 743 e = m;
8d6e8034 744 goto translate_name;
fdd25311 745 }
8d6e8034
LP
746 if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
747 log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
748
a4a9a6f7
SB
749check_files:
750 /* Check for existence of some well-known files. We only do this after checking
751 * for other specific container managers, otherwise we risk mistaking another
752 * container manager for Docker: the /.dockerenv file could inadvertently end up
753 * in a file system image. */
1b86c7c5
LP
754 v = detect_container_files();
755 if (v < 0)
756 return v;
757 if (v != VIRTUALIZATION_NONE)
a4a9a6f7
SB
758 goto finish;
759
0e13779d
SB
760 r = running_in_cgroupns();
761 if (r > 0) {
1b86c7c5 762 v = VIRTUALIZATION_CONTAINER_OTHER;
0e13779d
SB
763 goto finish;
764 }
765 if (r < 0)
766 log_debug_errno(r, "Failed to detect cgroup namespace: %m");
767
a4a9a6f7 768 /* If none of that worked, give up, assume no container manager. */
1b86c7c5 769 v = VIRTUALIZATION_NONE;
8d6e8034 770 goto finish;
b52aae1d 771
8d6e8034 772translate_name:
a4a9a6f7
SB
773 if (streq(e, "oci")) {
774 /* Some images hardcode container=oci, but OCI is not a specific container manager.
775 * Try to detect one based on well-known files. */
1b86c7c5
LP
776 v = detect_container_files();
777 if (v != VIRTUALIZATION_NONE)
778 v = VIRTUALIZATION_CONTAINER_OTHER;
a4a9a6f7
SB
779 goto finish;
780 }
1b86c7c5
LP
781 v = container_from_string(e);
782 if (v < 0)
783 v = VIRTUALIZATION_CONTAINER_OTHER;
fdd25311 784
0fb533a5 785finish:
1b86c7c5
LP
786 log_debug("Found container virtualization %s.", virtualization_to_string(v));
787 cached_found = v;
788 return v;
b52aae1d
LP
789}
790
1b86c7c5
LP
791Virtualization detect_virtualization(void) {
792 int v;
b52aae1d 793
1b86c7c5
LP
794 v = detect_container();
795 if (v != VIRTUALIZATION_NONE)
796 return v;
b52aae1d 797
1b86c7c5 798 return detect_vm();
b52aae1d 799}
75f86906 800
299a34c1
ZJS
801static int userns_has_mapping(const char *name) {
802 _cleanup_fclose_ FILE *f = NULL;
b2a331f2 803 uid_t a, b, c;
299a34c1
ZJS
804 int r;
805
806 f = fopen(name, "re");
807 if (!f) {
808 log_debug_errno(errno, "Failed to open %s: %m", name);
abd67ce7 809 return errno == ENOENT ? false : -errno;
299a34c1
ZJS
810 }
811
b2a331f2
LP
812 errno = 0;
813 r = fscanf(f, UID_FMT " " UID_FMT " " UID_FMT "\n", &a, &b, &c);
814 if (r == EOF) {
815 if (ferror(f))
816 return log_debug_errno(errno_or_else(EIO), "Failed to read %s: %m", name);
299a34c1 817
b2a331f2
LP
818 log_debug("%s is empty, we're in an uninitialized user namespace", name);
819 return true;
299a34c1 820 }
b2a331f2
LP
821 if (r != 3)
822 return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "Failed to parse %s: %m", name);
299a34c1
ZJS
823
824 if (a == 0 && b == 0 && c == UINT32_MAX) {
825 /* The kernel calls mappings_overlap() and does not allow overlaps */
826 log_debug("%s has a full 1:1 mapping", name);
827 return false;
828 }
829
830 /* Anything else implies that we are in a user namespace */
831 log_debug("Mapping found in %s, we're in a user namespace", name);
832 return true;
833}
834
835int running_in_userns(void) {
836 _cleanup_free_ char *line = NULL;
837 int r;
838
839 r = userns_has_mapping("/proc/self/uid_map");
840 if (r != 0)
841 return r;
842
843 r = userns_has_mapping("/proc/self/gid_map");
844 if (r != 0)
845 return r;
846
16fa4746
LP
847 /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also possible to compile a
848 * kernel without CONFIG_USER_NS, in which case "setgroups" also does not exist. We cannot
849 * distinguish those two cases, so assume that we're running on a stripped-down recent kernel, rather
850 * than on an old one, and if the file is not found, return false. */
851 r = read_virtual_file("/proc/self/setgroups", SIZE_MAX, &line, NULL);
299a34c1
ZJS
852 if (r < 0) {
853 log_debug_errno(r, "/proc/self/setgroups: %m");
854 return r == -ENOENT ? false : r;
855 }
856
16fa4746
LP
857 strstrip(line); /* remove trailing newline */
858
299a34c1
ZJS
859 r = streq(line, "deny");
860 /* See user_namespaces(7) for a description of this "setgroups" contents. */
861 log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
862 return r;
863}
864
7f4b3c5e 865int running_in_chroot(void) {
ef2a48aa 866 int r;
7f4b3c5e 867
08a28eec
LN
868 if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
869 return 0;
870
ef2a48aa
ZJS
871 r = files_same("/proc/1/root", "/", 0);
872 if (r < 0)
873 return r;
7f4b3c5e 874
ef2a48aa 875 return r == 0;
7f4b3c5e
LP
876}
877
68337e55
GS
878#if defined(__i386__) || defined(__x86_64__)
879struct cpuid_table_entry {
880 uint32_t flag_bit;
881 const char *name;
882};
883
884static const struct cpuid_table_entry leaf1_edx[] = {
885 { 0, "fpu" },
886 { 1, "vme" },
887 { 2, "de" },
888 { 3, "pse" },
889 { 4, "tsc" },
890 { 5, "msr" },
891 { 6, "pae" },
892 { 7, "mce" },
893 { 8, "cx8" },
894 { 9, "apic" },
895 { 11, "sep" },
896 { 12, "mtrr" },
897 { 13, "pge" },
898 { 14, "mca" },
899 { 15, "cmov" },
900 { 16, "pat" },
901 { 17, "pse36" },
902 { 19, "clflush" },
903 { 23, "mmx" },
904 { 24, "fxsr" },
905 { 25, "sse" },
906 { 26, "sse2" },
907 { 28, "ht" },
908};
909
910static const struct cpuid_table_entry leaf1_ecx[] = {
911 { 0, "pni" },
912 { 1, "pclmul" },
913 { 3, "monitor" },
914 { 9, "ssse3" },
915 { 12, "fma3" },
916 { 13, "cx16" },
917 { 19, "sse4_1" },
918 { 20, "sse4_2" },
919 { 22, "movbe" },
920 { 23, "popcnt" },
921 { 25, "aes" },
922 { 26, "xsave" },
923 { 27, "osxsave" },
924 { 28, "avx" },
925 { 29, "f16c" },
926 { 30, "rdrand" },
927};
928
929static const struct cpuid_table_entry leaf7_ebx[] = {
930 { 3, "bmi1" },
931 { 5, "avx2" },
932 { 8, "bmi2" },
933 { 18, "rdseed" },
934 { 19, "adx" },
935 { 29, "sha_ni" },
936};
937
938static const struct cpuid_table_entry leaf81_edx[] = {
939 { 11, "syscall" },
940 { 27, "rdtscp" },
941 { 29, "lm" },
942};
943
944static const struct cpuid_table_entry leaf81_ecx[] = {
945 { 0, "lahf_lm" },
946 { 5, "abm" },
947};
948
949static const struct cpuid_table_entry leaf87_edx[] = {
950 { 8, "constant_tsc" },
951};
952
953static bool given_flag_in_set(const char *flag, const struct cpuid_table_entry *set, size_t set_size, uint32_t val) {
954 for (size_t i = 0; i < set_size; i++) {
955 if ((UINT32_C(1) << set[i].flag_bit) & val &&
956 streq(flag, set[i].name))
957 return true;
958 }
959 return false;
960}
961
962static bool real_has_cpu_with_flag(const char *flag) {
963 uint32_t eax, ebx, ecx, edx;
964
965 if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
966 if (given_flag_in_set(flag, leaf1_ecx, ELEMENTSOF(leaf1_ecx), ecx))
967 return true;
968
969 if (given_flag_in_set(flag, leaf1_edx, ELEMENTSOF(leaf1_edx), edx))
970 return true;
971 }
972
973 if (__get_cpuid(7, &eax, &ebx, &ecx, &edx)) {
974 if (given_flag_in_set(flag, leaf7_ebx, ELEMENTSOF(leaf7_ebx), ebx))
975 return true;
976 }
977
978 if (__get_cpuid(0x80000001U, &eax, &ebx, &ecx, &edx)) {
979 if (given_flag_in_set(flag, leaf81_ecx, ELEMENTSOF(leaf81_ecx), ecx))
980 return true;
981
982 if (given_flag_in_set(flag, leaf81_edx, ELEMENTSOF(leaf81_edx), edx))
983 return true;
984 }
985
986 if (__get_cpuid(0x80000007U, &eax, &ebx, &ecx, &edx))
987 if (given_flag_in_set(flag, leaf87_edx, ELEMENTSOF(leaf87_edx), edx))
988 return true;
989
990 return false;
991}
992#endif
993
994bool has_cpu_with_flag(const char *flag) {
995 /* CPUID is an x86 specific interface. Assume on all others that no CPUs have those flags. */
996#if defined(__i386__) || defined(__x86_64__)
997 return real_has_cpu_with_flag(flag);
998#else
999 return false;
1000#endif
1001}
1002
75f86906
LP
1003static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
1004 [VIRTUALIZATION_NONE] = "none",
1005 [VIRTUALIZATION_KVM] = "kvm",
b6eca373 1006 [VIRTUALIZATION_AMAZON] = "amazon",
75f86906
LP
1007 [VIRTUALIZATION_QEMU] = "qemu",
1008 [VIRTUALIZATION_BOCHS] = "bochs",
1009 [VIRTUALIZATION_XEN] = "xen",
1010 [VIRTUALIZATION_UML] = "uml",
1011 [VIRTUALIZATION_VMWARE] = "vmware",
1012 [VIRTUALIZATION_ORACLE] = "oracle",
1013 [VIRTUALIZATION_MICROSOFT] = "microsoft",
1014 [VIRTUALIZATION_ZVM] = "zvm",
1015 [VIRTUALIZATION_PARALLELS] = "parallels",
aa0c3427 1016 [VIRTUALIZATION_BHYVE] = "bhyve",
1fdf07f5 1017 [VIRTUALIZATION_QNX] = "qnx",
095b9cf4 1018 [VIRTUALIZATION_ACRN] = "acrn",
3224e38b 1019 [VIRTUALIZATION_POWERVM] = "powervm",
75f86906
LP
1020 [VIRTUALIZATION_VM_OTHER] = "vm-other",
1021
1022 [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
1023 [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
1024 [VIRTUALIZATION_LXC] = "lxc",
1025 [VIRTUALIZATION_OPENVZ] = "openvz",
1026 [VIRTUALIZATION_DOCKER] = "docker",
90fb1f09 1027 [VIRTUALIZATION_PODMAN] = "podman",
9fb16425 1028 [VIRTUALIZATION_RKT] = "rkt",
6c8a2c67 1029 [VIRTUALIZATION_WSL] = "wsl",
80cc3e3e 1030 [VIRTUALIZATION_PROOT] = "proot",
abac810b 1031 [VIRTUALIZATION_POUCH] = "pouch",
75f86906
LP
1032 [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
1033};
1034
1b86c7c5 1035DEFINE_STRING_TABLE_LOOKUP(virtualization, Virtualization);