]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/virt.c
Merge pull request #5283 from poettering/tighten-sandbox
[thirdparty/systemd.git] / src / basic / virt.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2011 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <errno.h>
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <unistd.h>
25
26 #include "alloc-util.h"
27 #include "dirent-util.h"
28 #include "env-util.h"
29 #include "fd-util.h"
30 #include "fileio.h"
31 #include "macro.h"
32 #include "process-util.h"
33 #include "stat-util.h"
34 #include "string-table.h"
35 #include "string-util.h"
36 #include "virt.h"
37
38 static int detect_vm_cpuid(void) {
39
40 /* CPUID is an x86 specific interface. */
41 #if defined(__i386__) || defined(__x86_64__)
42
43 static const struct {
44 const char *cpuid;
45 int id;
46 } cpuid_vendor_table[] = {
47 { "XenVMMXenVMM", VIRTUALIZATION_XEN },
48 { "KVMKVMKVM", VIRTUALIZATION_KVM },
49 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
50 { "VMwareVMware", VIRTUALIZATION_VMWARE },
51 /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
52 { "Microsoft Hv", VIRTUALIZATION_MICROSOFT },
53 /* https://wiki.freebsd.org/bhyve */
54 { "bhyve bhyve ", VIRTUALIZATION_BHYVE },
55 };
56
57 uint32_t eax, ecx;
58 bool hypervisor;
59
60 /* http://lwn.net/Articles/301888/ */
61
62 #if defined (__i386__)
63 #define REG_a "eax"
64 #define REG_b "ebx"
65 #elif defined (__amd64__)
66 #define REG_a "rax"
67 #define REG_b "rbx"
68 #endif
69
70 /* First detect whether there is a hypervisor */
71 eax = 1;
72 __asm__ __volatile__ (
73 /* ebx/rbx is being used for PIC! */
74 " push %%"REG_b" \n\t"
75 " cpuid \n\t"
76 " pop %%"REG_b" \n\t"
77
78 : "=a" (eax), "=c" (ecx)
79 : "0" (eax)
80 );
81
82 hypervisor = !!(ecx & 0x80000000U);
83
84 if (hypervisor) {
85 union {
86 uint32_t sig32[3];
87 char text[13];
88 } sig = {};
89 unsigned j;
90
91 /* There is a hypervisor, see what it is */
92 eax = 0x40000000U;
93 __asm__ __volatile__ (
94 /* ebx/rbx is being used for PIC! */
95 " push %%"REG_b" \n\t"
96 " cpuid \n\t"
97 " mov %%ebx, %1 \n\t"
98 " pop %%"REG_b" \n\t"
99
100 : "=a" (eax), "=r" (sig.sig32[0]), "=c" (sig.sig32[1]), "=d" (sig.sig32[2])
101 : "0" (eax)
102 );
103
104 log_debug("Virtualization found, CPUID=%s", sig.text);
105
106 for (j = 0; j < ELEMENTSOF(cpuid_vendor_table); j ++)
107 if (streq(sig.text, cpuid_vendor_table[j].cpuid))
108 return cpuid_vendor_table[j].id;
109
110 return VIRTUALIZATION_VM_OTHER;
111 }
112 #endif
113 log_debug("No virtualization found in CPUID");
114
115 return VIRTUALIZATION_NONE;
116 }
117
118 static int detect_vm_device_tree(void) {
119 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
120 _cleanup_free_ char *hvtype = NULL;
121 int r;
122
123 r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
124 if (r == -ENOENT) {
125 _cleanup_closedir_ DIR *dir = NULL;
126 struct dirent *dent;
127
128 dir = opendir("/proc/device-tree");
129 if (!dir) {
130 if (errno == ENOENT) {
131 log_debug_errno(errno, "/proc/device-tree: %m");
132 return VIRTUALIZATION_NONE;
133 }
134 return -errno;
135 }
136
137 FOREACH_DIRENT(dent, dir, return -errno)
138 if (strstr(dent->d_name, "fw-cfg")) {
139 log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", dent->d_name);
140 return VIRTUALIZATION_QEMU;
141 }
142
143 log_debug("No virtualization found in /proc/device-tree/*");
144 return VIRTUALIZATION_NONE;
145 } else if (r < 0)
146 return r;
147
148 log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
149 if (streq(hvtype, "linux,kvm"))
150 return VIRTUALIZATION_KVM;
151 else if (strstr(hvtype, "xen"))
152 return VIRTUALIZATION_XEN;
153 else
154 return VIRTUALIZATION_VM_OTHER;
155 #else
156 log_debug("This platform does not support /proc/device-tree");
157 return VIRTUALIZATION_NONE;
158 #endif
159 }
160
161 static int detect_vm_dmi(void) {
162 #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
163
164 static const char *const dmi_vendors[] = {
165 "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
166 "/sys/class/dmi/id/sys_vendor",
167 "/sys/class/dmi/id/board_vendor",
168 "/sys/class/dmi/id/bios_vendor"
169 };
170
171 static const struct {
172 const char *vendor;
173 int id;
174 } dmi_vendor_table[] = {
175 { "KVM", VIRTUALIZATION_KVM },
176 { "QEMU", VIRTUALIZATION_QEMU },
177 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
178 { "VMware", VIRTUALIZATION_VMWARE },
179 { "VMW", VIRTUALIZATION_VMWARE },
180 { "innotek GmbH", VIRTUALIZATION_ORACLE },
181 { "Xen", VIRTUALIZATION_XEN },
182 { "Bochs", VIRTUALIZATION_BOCHS },
183 { "Parallels", VIRTUALIZATION_PARALLELS },
184 /* https://wiki.freebsd.org/bhyve */
185 { "BHYVE", VIRTUALIZATION_BHYVE },
186 };
187 unsigned i;
188 int r;
189
190 for (i = 0; i < ELEMENTSOF(dmi_vendors); i++) {
191 _cleanup_free_ char *s = NULL;
192 unsigned j;
193
194 r = read_one_line_file(dmi_vendors[i], &s);
195 if (r < 0) {
196 if (r == -ENOENT)
197 continue;
198
199 return r;
200 }
201
202
203
204 for (j = 0; j < ELEMENTSOF(dmi_vendor_table); j++)
205 if (startswith(s, dmi_vendor_table[j].vendor)) {
206 log_debug("Virtualization %s found in DMI (%s)", s, dmi_vendors[i]);
207 return dmi_vendor_table[j].id;
208 }
209 }
210 #endif
211
212 log_debug("No virtualization found in DMI");
213
214 return VIRTUALIZATION_NONE;
215 }
216
217 static int detect_vm_xen(void) {
218 /* Check for Dom0 will be executed later in detect_vm_xen_dom0
219 Thats why we dont check the content of /proc/xen/capabilities here. */
220 if (access("/proc/xen/capabilities", F_OK) < 0) {
221 log_debug("Virtualization XEN not found, /proc/xen/capabilities does not exist");
222 return VIRTUALIZATION_NONE;
223 }
224
225 log_debug("Virtualization XEN found (/proc/xen/capabilities exists)");
226 return VIRTUALIZATION_XEN;
227
228 }
229
230 static bool detect_vm_xen_dom0(void) {
231 _cleanup_free_ char *domcap = NULL;
232 char *cap, *i;
233 int r;
234
235 r = read_one_line_file("/proc/xen/capabilities", &domcap);
236 if (r == -ENOENT) {
237 log_debug("Virtualization XEN not found, /proc/xen/capabilities does not exist");
238 return false;
239 }
240 if (r < 0)
241 return r;
242
243 i = domcap;
244 while ((cap = strsep(&i, ",")))
245 if (streq(cap, "control_d"))
246 break;
247 if (!cap) {
248 log_debug("Virtualization XEN DomU found (/proc/xen/capabilites)");
249 return false;
250 }
251
252 log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
253 return true;
254 }
255
256 static int detect_vm_hypervisor(void) {
257 _cleanup_free_ char *hvtype = NULL;
258 int r;
259
260 r = read_one_line_file("/sys/hypervisor/type", &hvtype);
261 if (r == -ENOENT)
262 return VIRTUALIZATION_NONE;
263 if (r < 0)
264 return r;
265
266 log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
267
268 if (streq(hvtype, "xen"))
269 return VIRTUALIZATION_XEN;
270 else
271 return VIRTUALIZATION_VM_OTHER;
272 }
273
274 static int detect_vm_uml(void) {
275 _cleanup_free_ char *cpuinfo_contents = NULL;
276 int r;
277
278 /* Detect User-Mode Linux by reading /proc/cpuinfo */
279 r = read_full_file("/proc/cpuinfo", &cpuinfo_contents, NULL);
280 if (r < 0)
281 return r;
282
283 if (strstr(cpuinfo_contents, "\nvendor_id\t: User Mode Linux\n")) {
284 log_debug("UML virtualization found in /proc/cpuinfo");
285 return VIRTUALIZATION_UML;
286 }
287
288 log_debug("No virtualization found in /proc/cpuinfo.");
289 return VIRTUALIZATION_NONE;
290 }
291
292 static int detect_vm_zvm(void) {
293
294 #if defined(__s390__)
295 _cleanup_free_ char *t = NULL;
296 int r;
297
298 r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
299 if (r == -ENOENT)
300 return VIRTUALIZATION_NONE;
301 if (r < 0)
302 return r;
303
304 log_debug("Virtualization %s found in /proc/sysinfo", t);
305 if (streq(t, "z/VM"))
306 return VIRTUALIZATION_ZVM;
307 else
308 return VIRTUALIZATION_KVM;
309 #else
310 log_debug("This platform does not support /proc/sysinfo");
311 return VIRTUALIZATION_NONE;
312 #endif
313 }
314
315 /* Returns a short identifier for the various VM implementations */
316 int detect_vm(void) {
317 static thread_local int cached_found = _VIRTUALIZATION_INVALID;
318 int r, dmi;
319
320 if (cached_found >= 0)
321 return cached_found;
322
323 /* We have to use the correct order here:
324 *
325 * -> First try to detect Oracle Virtualbox, even if it uses KVM.
326 * -> Second try to detect from cpuid, this will report KVM for
327 * whatever software is used even if info in dmi is overwritten.
328 * -> Third try to detect from dmi. */
329
330 dmi = detect_vm_dmi();
331 if (dmi == VIRTUALIZATION_ORACLE) {
332 r = dmi;
333 goto finish;
334 }
335
336 r = detect_vm_cpuid();
337 if (r < 0)
338 return r;
339 if (r != VIRTUALIZATION_NONE)
340 goto finish;
341
342 r = dmi;
343 if (r < 0)
344 return r;
345 if (r != VIRTUALIZATION_NONE)
346 goto finish;
347
348 /* x86 xen will most likely be detected by cpuid. If not (most likely
349 * because we're not an x86 guest), then we should try the xen capabilities
350 * file next. If that's not found, then we check for the high-level
351 * hypervisor sysfs file:
352 *
353 * https://bugs.freedesktop.org/show_bug.cgi?id=77271 */
354
355 r = detect_vm_xen();
356 if (r < 0)
357 return r;
358 if (r != VIRTUALIZATION_NONE)
359 goto finish;
360
361 r = detect_vm_hypervisor();
362 if (r < 0)
363 return r;
364 if (r != VIRTUALIZATION_NONE)
365 goto finish;
366
367 r = detect_vm_device_tree();
368 if (r < 0)
369 return r;
370 if (r != VIRTUALIZATION_NONE)
371 goto finish;
372
373 r = detect_vm_uml();
374 if (r < 0)
375 return r;
376 if (r != VIRTUALIZATION_NONE)
377 goto finish;
378
379 r = detect_vm_zvm();
380 if (r < 0)
381 return r;
382
383 finish:
384 /* x86 xen Dom0 is detected as XEN in hypervisor and maybe others.
385 * In order to detect the Dom0 as not virtualization we need to
386 * double-check it */
387 if (r == VIRTUALIZATION_XEN && detect_vm_xen_dom0())
388 r = VIRTUALIZATION_NONE;
389
390 cached_found = r;
391 log_debug("Found VM virtualization %s", virtualization_to_string(r));
392 return r;
393 }
394
395 int detect_container(void) {
396
397 static const struct {
398 const char *value;
399 int id;
400 } value_table[] = {
401 { "lxc", VIRTUALIZATION_LXC },
402 { "lxc-libvirt", VIRTUALIZATION_LXC_LIBVIRT },
403 { "systemd-nspawn", VIRTUALIZATION_SYSTEMD_NSPAWN },
404 { "docker", VIRTUALIZATION_DOCKER },
405 { "rkt", VIRTUALIZATION_RKT },
406 };
407
408 static thread_local int cached_found = _VIRTUALIZATION_INVALID;
409 _cleanup_free_ char *m = NULL;
410 const char *e = NULL;
411 unsigned j;
412 int r;
413
414 if (cached_found >= 0)
415 return cached_found;
416
417 /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
418 if (access("/proc/vz", F_OK) >= 0 &&
419 access("/proc/bc", F_OK) < 0) {
420 r = VIRTUALIZATION_OPENVZ;
421 goto finish;
422 }
423
424 if (getpid() == 1) {
425 /* If we are PID 1 we can just check our own environment variable, and that's authoritative. */
426
427 e = getenv("container");
428 if (isempty(e)) {
429 r = VIRTUALIZATION_NONE;
430 goto finish;
431 }
432
433 goto translate_name;
434 }
435
436 /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
437 * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
438 r = read_one_line_file("/run/systemd/container", &m);
439 if (r >= 0) {
440 e = m;
441 goto translate_name;
442 }
443 if (r != -ENOENT)
444 return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
445
446 /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
447 r = getenv_for_pid(1, "container", &m);
448 if (r > 0) {
449 e = m;
450 goto translate_name;
451 }
452 if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
453 log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
454
455 /* Interestingly /proc/1/sched actually shows the host's PID for what we see as PID 1. Hence, if the PID shown
456 * there is not 1, we know we are in a PID namespace. and hence a container. */
457 r = read_one_line_file("/proc/1/sched", &m);
458 if (r >= 0) {
459 const char *t;
460
461 t = strrchr(m, '(');
462 if (!t)
463 return -EIO;
464
465 if (!startswith(t, "(1,")) {
466 r = VIRTUALIZATION_CONTAINER_OTHER;
467 goto finish;
468 }
469 } else if (r != -ENOENT)
470 return r;
471
472 /* If that didn't work, give up, assume no container manager. */
473 r = VIRTUALIZATION_NONE;
474 goto finish;
475
476 translate_name:
477 for (j = 0; j < ELEMENTSOF(value_table); j++)
478 if (streq(e, value_table[j].value)) {
479 r = value_table[j].id;
480 goto finish;
481 }
482
483 r = VIRTUALIZATION_CONTAINER_OTHER;
484
485 finish:
486 log_debug("Found container virtualization %s.", virtualization_to_string(r));
487 cached_found = r;
488 return r;
489 }
490
491 int detect_virtualization(void) {
492 int r;
493
494 r = detect_container();
495 if (r == 0)
496 r = detect_vm();
497
498 return r;
499 }
500
501 static int userns_has_mapping(const char *name) {
502 _cleanup_fclose_ FILE *f = NULL;
503 _cleanup_free_ char *buf = NULL;
504 size_t n_allocated = 0;
505 ssize_t n;
506 uint32_t a, b, c;
507 int r;
508
509 f = fopen(name, "re");
510 if (!f) {
511 log_debug_errno(errno, "Failed to open %s: %m", name);
512 return errno == ENOENT ? false : -errno;
513 }
514
515 n = getline(&buf, &n_allocated, f);
516 if (n < 0) {
517 if (feof(f)) {
518 log_debug("%s is empty, we're in an uninitialized user namespace", name);
519 return true;
520 }
521
522 return log_debug_errno(errno, "Failed to read %s: %m", name);
523 }
524
525 r = sscanf(buf, "%"PRIu32" %"PRIu32" %"PRIu32, &a, &b, &c);
526 if (r < 3)
527 return log_debug_errno(errno, "Failed to parse %s: %m", name);
528
529 if (a == 0 && b == 0 && c == UINT32_MAX) {
530 /* The kernel calls mappings_overlap() and does not allow overlaps */
531 log_debug("%s has a full 1:1 mapping", name);
532 return false;
533 }
534
535 /* Anything else implies that we are in a user namespace */
536 log_debug("Mapping found in %s, we're in a user namespace", name);
537 return true;
538 }
539
540 int running_in_userns(void) {
541 _cleanup_free_ char *line = NULL;
542 int r;
543
544 r = userns_has_mapping("/proc/self/uid_map");
545 if (r != 0)
546 return r;
547
548 r = userns_has_mapping("/proc/self/gid_map");
549 if (r != 0)
550 return r;
551
552 /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also
553 * possible to compile a kernel without CONFIG_USER_NS, in which case "setgroups"
554 * also does not exist. We cannot distinguish those two cases, so assume that
555 * we're running on a stripped-down recent kernel, rather than on an old one,
556 * and if the file is not found, return false.
557 */
558 r = read_one_line_file("/proc/self/setgroups", &line);
559 if (r < 0) {
560 log_debug_errno(r, "/proc/self/setgroups: %m");
561 return r == -ENOENT ? false : r;
562 }
563
564 truncate_nl(line);
565 r = streq(line, "deny");
566 /* See user_namespaces(7) for a description of this "setgroups" contents. */
567 log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
568 return r;
569 }
570
571 int running_in_chroot(void) {
572 int ret;
573
574 if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
575 return 0;
576
577 ret = files_same("/proc/1/root", "/");
578 if (ret < 0)
579 return ret;
580
581 return ret == 0;
582 }
583
584 static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
585 [VIRTUALIZATION_NONE] = "none",
586 [VIRTUALIZATION_KVM] = "kvm",
587 [VIRTUALIZATION_QEMU] = "qemu",
588 [VIRTUALIZATION_BOCHS] = "bochs",
589 [VIRTUALIZATION_XEN] = "xen",
590 [VIRTUALIZATION_UML] = "uml",
591 [VIRTUALIZATION_VMWARE] = "vmware",
592 [VIRTUALIZATION_ORACLE] = "oracle",
593 [VIRTUALIZATION_MICROSOFT] = "microsoft",
594 [VIRTUALIZATION_ZVM] = "zvm",
595 [VIRTUALIZATION_PARALLELS] = "parallels",
596 [VIRTUALIZATION_BHYVE] = "bhyve",
597 [VIRTUALIZATION_VM_OTHER] = "vm-other",
598
599 [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
600 [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
601 [VIRTUALIZATION_LXC] = "lxc",
602 [VIRTUALIZATION_OPENVZ] = "openvz",
603 [VIRTUALIZATION_DOCKER] = "docker",
604 [VIRTUALIZATION_RKT] = "rkt",
605 [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
606 };
607
608 DEFINE_STRING_TABLE_LOOKUP(virtualization, int);