]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/machine/machine.c
Merge the "boot loader specification" wiki page
[thirdparty/systemd.git] / src / machine / machine.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <errno.h>
4 #include <string.h>
5 #include <unistd.h>
6 #include <stdio_ext.h>
7
8 #include "sd-messages.h"
9
10 #include "alloc-util.h"
11 #include "bus-error.h"
12 #include "bus-util.h"
13 #include "escape.h"
14 #include "extract-word.h"
15 #include "fd-util.h"
16 #include "fileio.h"
17 #include "format-util.h"
18 #include "hashmap.h"
19 #include "machine-dbus.h"
20 #include "machine.h"
21 #include "mkdir.h"
22 #include "parse-util.h"
23 #include "process-util.h"
24 #include "special.h"
25 #include "stdio-util.h"
26 #include "string-table.h"
27 #include "terminal-util.h"
28 #include "unit-name.h"
29 #include "user-util.h"
30 #include "util.h"
31
32 Machine* machine_new(Manager *manager, MachineClass class, const char *name) {
33 Machine *m;
34
35 assert(manager);
36 assert(class < _MACHINE_CLASS_MAX);
37 assert(name);
38
39 /* Passing class == _MACHINE_CLASS_INVALID here is fine. It
40 * means as much as "we don't know yet", and that we'll figure
41 * it out later when loading the state file. */
42
43 m = new0(Machine, 1);
44 if (!m)
45 return NULL;
46
47 m->name = strdup(name);
48 if (!m->name)
49 goto fail;
50
51 if (class != MACHINE_HOST) {
52 m->state_file = strappend("/run/systemd/machines/", m->name);
53 if (!m->state_file)
54 goto fail;
55 }
56
57 m->class = class;
58
59 if (hashmap_put(manager->machines, m->name, m) < 0)
60 goto fail;
61
62 m->manager = manager;
63
64 return m;
65
66 fail:
67 free(m->state_file);
68 free(m->name);
69 return mfree(m);
70 }
71
72 void machine_free(Machine *m) {
73 assert(m);
74
75 while (m->operations)
76 operation_free(m->operations);
77
78 if (m->in_gc_queue)
79 LIST_REMOVE(gc_queue, m->manager->machine_gc_queue, m);
80
81 machine_release_unit(m);
82
83 free(m->scope_job);
84
85 (void) hashmap_remove(m->manager->machines, m->name);
86
87 if (m->manager->host_machine == m)
88 m->manager->host_machine = NULL;
89
90 if (m->leader > 0)
91 (void) hashmap_remove_value(m->manager->machine_leaders, PID_TO_PTR(m->leader), m);
92
93 sd_bus_message_unref(m->create_message);
94
95 free(m->name);
96 free(m->state_file);
97 free(m->service);
98 free(m->root_directory);
99 free(m->netif);
100 free(m);
101 }
102
103 int machine_save(Machine *m) {
104 _cleanup_free_ char *temp_path = NULL;
105 _cleanup_fclose_ FILE *f = NULL;
106 int r;
107
108 assert(m);
109
110 if (!m->state_file)
111 return 0;
112
113 if (!m->started)
114 return 0;
115
116 r = mkdir_safe_label("/run/systemd/machines", 0755, 0, 0, MKDIR_WARN_MODE);
117 if (r < 0)
118 goto fail;
119
120 r = fopen_temporary(m->state_file, &f, &temp_path);
121 if (r < 0)
122 goto fail;
123
124 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
125 (void) fchmod(fileno(f), 0644);
126
127 fprintf(f,
128 "# This is private data. Do not parse.\n"
129 "NAME=%s\n",
130 m->name);
131
132 if (m->unit) {
133 _cleanup_free_ char *escaped;
134
135 escaped = cescape(m->unit);
136 if (!escaped) {
137 r = -ENOMEM;
138 goto fail;
139 }
140
141 fprintf(f, "SCOPE=%s\n", escaped); /* We continue to call this "SCOPE=" because it is internal only, and we want to stay compatible with old files */
142 }
143
144 if (m->scope_job)
145 fprintf(f, "SCOPE_JOB=%s\n", m->scope_job);
146
147 if (m->service) {
148 _cleanup_free_ char *escaped;
149
150 escaped = cescape(m->service);
151 if (!escaped) {
152 r = -ENOMEM;
153 goto fail;
154 }
155 fprintf(f, "SERVICE=%s\n", escaped);
156 }
157
158 if (m->root_directory) {
159 _cleanup_free_ char *escaped;
160
161 escaped = cescape(m->root_directory);
162 if (!escaped) {
163 r = -ENOMEM;
164 goto fail;
165 }
166 fprintf(f, "ROOT=%s\n", escaped);
167 }
168
169 if (!sd_id128_is_null(m->id))
170 fprintf(f, "ID=" SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(m->id));
171
172 if (m->leader != 0)
173 fprintf(f, "LEADER="PID_FMT"\n", m->leader);
174
175 if (m->class != _MACHINE_CLASS_INVALID)
176 fprintf(f, "CLASS=%s\n", machine_class_to_string(m->class));
177
178 if (dual_timestamp_is_set(&m->timestamp))
179 fprintf(f,
180 "REALTIME="USEC_FMT"\n"
181 "MONOTONIC="USEC_FMT"\n",
182 m->timestamp.realtime,
183 m->timestamp.monotonic);
184
185 if (m->n_netif > 0) {
186 unsigned i;
187
188 fputs("NETIF=", f);
189
190 for (i = 0; i < m->n_netif; i++) {
191 if (i != 0)
192 fputc(' ', f);
193
194 fprintf(f, "%i", m->netif[i]);
195 }
196
197 fputc('\n', f);
198 }
199
200 r = fflush_and_check(f);
201 if (r < 0)
202 goto fail;
203
204 if (rename(temp_path, m->state_file) < 0) {
205 r = -errno;
206 goto fail;
207 }
208
209 if (m->unit) {
210 char *sl;
211
212 /* Create a symlink from the unit name to the machine
213 * name, so that we can quickly find the machine for
214 * each given unit. Ignore error. */
215 sl = strjoina("/run/systemd/machines/unit:", m->unit);
216 (void) symlink(m->name, sl);
217 }
218
219 return 0;
220
221 fail:
222 (void) unlink(m->state_file);
223
224 if (temp_path)
225 (void) unlink(temp_path);
226
227 return log_error_errno(r, "Failed to save machine data %s: %m", m->state_file);
228 }
229
230 static void machine_unlink(Machine *m) {
231 assert(m);
232
233 if (m->unit) {
234 char *sl;
235
236 sl = strjoina("/run/systemd/machines/unit:", m->unit);
237 (void) unlink(sl);
238 }
239
240 if (m->state_file)
241 (void) unlink(m->state_file);
242 }
243
244 int machine_load(Machine *m) {
245 _cleanup_free_ char *realtime = NULL, *monotonic = NULL, *id = NULL, *leader = NULL, *class = NULL, *netif = NULL;
246 int r;
247
248 assert(m);
249
250 if (!m->state_file)
251 return 0;
252
253 r = parse_env_file(NULL, m->state_file, NEWLINE,
254 "SCOPE", &m->unit,
255 "SCOPE_JOB", &m->scope_job,
256 "SERVICE", &m->service,
257 "ROOT", &m->root_directory,
258 "ID", &id,
259 "LEADER", &leader,
260 "CLASS", &class,
261 "REALTIME", &realtime,
262 "MONOTONIC", &monotonic,
263 "NETIF", &netif,
264 NULL);
265 if (r < 0) {
266 if (r == -ENOENT)
267 return 0;
268
269 return log_error_errno(r, "Failed to read %s: %m", m->state_file);
270 }
271
272 if (id)
273 sd_id128_from_string(id, &m->id);
274
275 if (leader)
276 parse_pid(leader, &m->leader);
277
278 if (class) {
279 MachineClass c;
280
281 c = machine_class_from_string(class);
282 if (c >= 0)
283 m->class = c;
284 }
285
286 if (realtime)
287 timestamp_deserialize(realtime, &m->timestamp.realtime);
288 if (monotonic)
289 timestamp_deserialize(monotonic, &m->timestamp.monotonic);
290
291 if (netif) {
292 size_t allocated = 0, nr = 0;
293 const char *p;
294 int *ni = NULL;
295
296 p = netif;
297 for (;;) {
298 _cleanup_free_ char *word = NULL;
299 int ifi;
300
301 r = extract_first_word(&p, &word, NULL, 0);
302 if (r == 0)
303 break;
304 if (r == -ENOMEM)
305 return log_oom();
306 if (r < 0) {
307 log_warning_errno(r, "Failed to parse NETIF: %s", netif);
308 break;
309 }
310
311 if (parse_ifindex(word, &ifi) < 0)
312 continue;
313
314 if (!GREEDY_REALLOC(ni, allocated, nr+1)) {
315 free(ni);
316 return log_oom();
317 }
318
319 ni[nr++] = ifi;
320 }
321
322 free(m->netif);
323 m->netif = ni;
324 m->n_netif = nr;
325 }
326
327 return r;
328 }
329
330 static int machine_start_scope(Machine *m, sd_bus_message *properties, sd_bus_error *error) {
331 int r = 0;
332
333 assert(m);
334 assert(m->class != MACHINE_HOST);
335
336 if (!m->unit) {
337 _cleanup_free_ char *escaped = NULL;
338 char *scope, *description, *job = NULL;
339
340 escaped = unit_name_escape(m->name);
341 if (!escaped)
342 return log_oom();
343
344 scope = strjoin("machine-", escaped, ".scope");
345 if (!scope)
346 return log_oom();
347
348 description = strjoina(m->class == MACHINE_VM ? "Virtual Machine " : "Container ", m->name);
349
350 r = manager_start_scope(m->manager, scope, m->leader, SPECIAL_MACHINE_SLICE, description, properties, error, &job);
351 if (r < 0) {
352 log_error("Failed to start machine scope: %s", bus_error_message(error, r));
353 free(scope);
354 return r;
355 } else {
356 m->unit = scope;
357
358 free(m->scope_job);
359 m->scope_job = job;
360 }
361 }
362
363 if (m->unit)
364 hashmap_put(m->manager->machine_units, m->unit, m);
365
366 return r;
367 }
368
369 int machine_start(Machine *m, sd_bus_message *properties, sd_bus_error *error) {
370 int r;
371
372 assert(m);
373
374 if (!IN_SET(m->class, MACHINE_CONTAINER, MACHINE_VM))
375 return -EOPNOTSUPP;
376
377 if (m->started)
378 return 0;
379
380 r = hashmap_put(m->manager->machine_leaders, PID_TO_PTR(m->leader), m);
381 if (r < 0)
382 return r;
383
384 /* Create cgroup */
385 r = machine_start_scope(m, properties, error);
386 if (r < 0)
387 return r;
388
389 log_struct(LOG_INFO,
390 "MESSAGE_ID=" SD_MESSAGE_MACHINE_START_STR,
391 "NAME=%s", m->name,
392 "LEADER="PID_FMT, m->leader,
393 LOG_MESSAGE("New machine %s.", m->name));
394
395 if (!dual_timestamp_is_set(&m->timestamp))
396 dual_timestamp_get(&m->timestamp);
397
398 m->started = true;
399
400 /* Save new machine data */
401 machine_save(m);
402
403 machine_send_signal(m, true);
404
405 return 0;
406 }
407
408 static int machine_stop_scope(Machine *m) {
409 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
410 char *job = NULL;
411 int r;
412
413 assert(m);
414 assert(m->class != MACHINE_HOST);
415
416 if (!m->unit)
417 return 0;
418
419 r = manager_stop_unit(m->manager, m->unit, &error, &job);
420 if (r < 0) {
421 log_error("Failed to stop machine scope: %s", bus_error_message(&error, r));
422 return r;
423 }
424
425 free(m->scope_job);
426 m->scope_job = job;
427
428 return 0;
429 }
430
431 int machine_stop(Machine *m) {
432 int r;
433 assert(m);
434
435 if (!IN_SET(m->class, MACHINE_CONTAINER, MACHINE_VM))
436 return -EOPNOTSUPP;
437
438 r = machine_stop_scope(m);
439
440 m->stopping = true;
441
442 machine_save(m);
443
444 return r;
445 }
446
447 int machine_finalize(Machine *m) {
448 assert(m);
449
450 if (m->started)
451 log_struct(LOG_INFO,
452 "MESSAGE_ID=" SD_MESSAGE_MACHINE_STOP_STR,
453 "NAME=%s", m->name,
454 "LEADER="PID_FMT, m->leader,
455 LOG_MESSAGE("Machine %s terminated.", m->name));
456
457 machine_unlink(m);
458 machine_add_to_gc_queue(m);
459
460 if (m->started) {
461 machine_send_signal(m, false);
462 m->started = false;
463 }
464
465 return 0;
466 }
467
468 bool machine_may_gc(Machine *m, bool drop_not_started) {
469 assert(m);
470
471 if (m->class == MACHINE_HOST)
472 return false;
473
474 if (drop_not_started && !m->started)
475 return true;
476
477 if (m->scope_job && manager_job_is_active(m->manager, m->scope_job))
478 return false;
479
480 if (m->unit && manager_unit_is_active(m->manager, m->unit))
481 return false;
482
483 return true;
484 }
485
486 void machine_add_to_gc_queue(Machine *m) {
487 assert(m);
488
489 if (m->in_gc_queue)
490 return;
491
492 LIST_PREPEND(gc_queue, m->manager->machine_gc_queue, m);
493 m->in_gc_queue = true;
494 }
495
496 MachineState machine_get_state(Machine *s) {
497 assert(s);
498
499 if (s->class == MACHINE_HOST)
500 return MACHINE_RUNNING;
501
502 if (s->stopping)
503 return MACHINE_CLOSING;
504
505 if (s->scope_job)
506 return MACHINE_OPENING;
507
508 return MACHINE_RUNNING;
509 }
510
511 int machine_kill(Machine *m, KillWho who, int signo) {
512 assert(m);
513
514 if (!IN_SET(m->class, MACHINE_VM, MACHINE_CONTAINER))
515 return -EOPNOTSUPP;
516
517 if (!m->unit)
518 return -ESRCH;
519
520 if (who == KILL_LEADER) {
521 /* If we shall simply kill the leader, do so directly */
522
523 if (kill(m->leader, signo) < 0)
524 return -errno;
525
526 return 0;
527 }
528
529 /* Otherwise, make PID 1 do it for us, for the entire cgroup */
530 return manager_kill_unit(m->manager, m->unit, signo, NULL);
531 }
532
533 int machine_openpt(Machine *m, int flags) {
534 assert(m);
535
536 switch (m->class) {
537
538 case MACHINE_HOST: {
539 int fd;
540
541 fd = posix_openpt(flags);
542 if (fd < 0)
543 return -errno;
544
545 if (unlockpt(fd) < 0)
546 return -errno;
547
548 return fd;
549 }
550
551 case MACHINE_CONTAINER:
552 if (m->leader <= 0)
553 return -EINVAL;
554
555 return openpt_in_namespace(m->leader, flags);
556
557 default:
558 return -EOPNOTSUPP;
559 }
560 }
561
562 int machine_open_terminal(Machine *m, const char *path, int mode) {
563 assert(m);
564
565 switch (m->class) {
566
567 case MACHINE_HOST:
568 return open_terminal(path, mode);
569
570 case MACHINE_CONTAINER:
571 if (m->leader <= 0)
572 return -EINVAL;
573
574 return open_terminal_in_namespace(m->leader, path, mode);
575
576 default:
577 return -EOPNOTSUPP;
578 }
579 }
580
581 void machine_release_unit(Machine *m) {
582 assert(m);
583
584 if (!m->unit)
585 return;
586
587 (void) hashmap_remove(m->manager->machine_units, m->unit);
588 m->unit = mfree(m->unit);
589 }
590
591 int machine_get_uid_shift(Machine *m, uid_t *ret) {
592 char p[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(pid_t) + 1];
593 uid_t uid_base, uid_shift, uid_range;
594 gid_t gid_base, gid_shift, gid_range;
595 _cleanup_fclose_ FILE *f = NULL;
596 int k;
597
598 assert(m);
599 assert(ret);
600
601 /* Return the base UID/GID of the specified machine. Note that this only works for containers with simple
602 * mappings. In most cases setups should be simple like this, and administrators should only care about the
603 * basic offset a container has relative to the host. This is what this function exposes.
604 *
605 * If we encounter any more complex mappings we politely refuse this with ENXIO. */
606
607 if (m->class == MACHINE_HOST) {
608 *ret = 0;
609 return 0;
610 }
611
612 if (m->class != MACHINE_CONTAINER)
613 return -EOPNOTSUPP;
614
615 xsprintf(p, "/proc/" PID_FMT "/uid_map", m->leader);
616 f = fopen(p, "re");
617 if (!f) {
618 if (errno == ENOENT) {
619 /* If the file doesn't exist, user namespacing is off in the kernel, return a zero mapping hence. */
620 *ret = 0;
621 return 0;
622 }
623
624 return -errno;
625 }
626
627 /* Read the first line. There's at least one. */
628 errno = 0;
629 k = fscanf(f, UID_FMT " " UID_FMT " " UID_FMT "\n", &uid_base, &uid_shift, &uid_range);
630 if (k != 3) {
631 if (ferror(f))
632 return -errno;
633
634 return -EBADMSG;
635 }
636
637 /* Not a mapping starting at 0? Then it's a complex mapping we can't expose here. */
638 if (uid_base != 0)
639 return -ENXIO;
640 /* Insist that at least the nobody user is mapped, everything else is weird, and hence complex, and we don't support it */
641 if (uid_range < UID_NOBODY)
642 return -ENXIO;
643
644 /* If there's more than one line, then we don't support this mapping. */
645 if (fgetc(f) != EOF)
646 return -ENXIO;
647
648 fclose(f);
649
650 xsprintf(p, "/proc/" PID_FMT "/gid_map", m->leader);
651 f = fopen(p, "re");
652 if (!f)
653 return -errno;
654
655 /* Read the first line. There's at least one. */
656 errno = 0;
657 k = fscanf(f, GID_FMT " " GID_FMT " " GID_FMT "\n", &gid_base, &gid_shift, &gid_range);
658 if (k != 3) {
659 if (ferror(f))
660 return -errno;
661
662 return -EBADMSG;
663 }
664
665 /* If there's more than one line, then we don't support this file. */
666 if (fgetc(f) != EOF)
667 return -ENXIO;
668
669 /* If the UID and GID mapping doesn't match, we don't support this mapping. */
670 if (uid_base != (uid_t) gid_base)
671 return -ENXIO;
672 if (uid_shift != (uid_t) gid_shift)
673 return -ENXIO;
674 if (uid_range != (uid_t) gid_range)
675 return -ENXIO;
676
677 *ret = uid_shift;
678 return 0;
679 }
680
681 static const char* const machine_class_table[_MACHINE_CLASS_MAX] = {
682 [MACHINE_CONTAINER] = "container",
683 [MACHINE_VM] = "vm",
684 [MACHINE_HOST] = "host",
685 };
686
687 DEFINE_STRING_TABLE_LOOKUP(machine_class, MachineClass);
688
689 static const char* const machine_state_table[_MACHINE_STATE_MAX] = {
690 [MACHINE_OPENING] = "opening",
691 [MACHINE_RUNNING] = "running",
692 [MACHINE_CLOSING] = "closing"
693 };
694
695 DEFINE_STRING_TABLE_LOOKUP(machine_state, MachineState);
696
697 static const char* const kill_who_table[_KILL_WHO_MAX] = {
698 [KILL_LEADER] = "leader",
699 [KILL_ALL] = "all"
700 };
701
702 DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);