]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/machine/machine.c
Merge pull request #9712 from filbranden/socket1
[thirdparty/systemd.git] / src / machine / machine.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <errno.h>
4 #include <string.h>
5 #include <unistd.h>
6 #include <stdio_ext.h>
7
8 #include "sd-messages.h"
9
10 #include "alloc-util.h"
11 #include "bus-error.h"
12 #include "bus-util.h"
13 #include "escape.h"
14 #include "extract-word.h"
15 #include "fd-util.h"
16 #include "fileio.h"
17 #include "format-util.h"
18 #include "hashmap.h"
19 #include "machine-dbus.h"
20 #include "machine.h"
21 #include "mkdir.h"
22 #include "parse-util.h"
23 #include "process-util.h"
24 #include "special.h"
25 #include "stdio-util.h"
26 #include "string-table.h"
27 #include "terminal-util.h"
28 #include "unit-name.h"
29 #include "user-util.h"
30 #include "util.h"
31
32 Machine* machine_new(Manager *manager, MachineClass class, const char *name) {
33 Machine *m;
34
35 assert(manager);
36 assert(class < _MACHINE_CLASS_MAX);
37 assert(name);
38
39 /* Passing class == _MACHINE_CLASS_INVALID here is fine. It
40 * means as much as "we don't know yet", and that we'll figure
41 * it out later when loading the state file. */
42
43 m = new0(Machine, 1);
44 if (!m)
45 return NULL;
46
47 m->name = strdup(name);
48 if (!m->name)
49 goto fail;
50
51 if (class != MACHINE_HOST) {
52 m->state_file = strappend("/run/systemd/machines/", m->name);
53 if (!m->state_file)
54 goto fail;
55 }
56
57 m->class = class;
58
59 if (hashmap_put(manager->machines, m->name, m) < 0)
60 goto fail;
61
62 m->manager = manager;
63
64 return m;
65
66 fail:
67 free(m->state_file);
68 free(m->name);
69 return mfree(m);
70 }
71
72 void machine_free(Machine *m) {
73 assert(m);
74
75 while (m->operations)
76 operation_free(m->operations);
77
78 if (m->in_gc_queue)
79 LIST_REMOVE(gc_queue, m->manager->machine_gc_queue, m);
80
81 machine_release_unit(m);
82
83 free(m->scope_job);
84
85 (void) hashmap_remove(m->manager->machines, m->name);
86
87 if (m->manager->host_machine == m)
88 m->manager->host_machine = NULL;
89
90 if (m->leader > 0)
91 (void) hashmap_remove_value(m->manager->machine_leaders, PID_TO_PTR(m->leader), m);
92
93 sd_bus_message_unref(m->create_message);
94
95 free(m->name);
96 free(m->state_file);
97 free(m->service);
98 free(m->root_directory);
99 free(m->netif);
100 free(m);
101 }
102
103 int machine_save(Machine *m) {
104 _cleanup_free_ char *temp_path = NULL;
105 _cleanup_fclose_ FILE *f = NULL;
106 int r;
107
108 assert(m);
109
110 if (!m->state_file)
111 return 0;
112
113 if (!m->started)
114 return 0;
115
116 r = mkdir_safe_label("/run/systemd/machines", 0755, 0, 0, MKDIR_WARN_MODE);
117 if (r < 0)
118 goto fail;
119
120 r = fopen_temporary(m->state_file, &f, &temp_path);
121 if (r < 0)
122 goto fail;
123
124 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
125 (void) fchmod(fileno(f), 0644);
126
127 fprintf(f,
128 "# This is private data. Do not parse.\n"
129 "NAME=%s\n",
130 m->name);
131
132 if (m->unit) {
133 _cleanup_free_ char *escaped;
134
135 escaped = cescape(m->unit);
136 if (!escaped) {
137 r = -ENOMEM;
138 goto fail;
139 }
140
141 fprintf(f, "SCOPE=%s\n", escaped); /* We continue to call this "SCOPE=" because it is internal only, and we want to stay compatible with old files */
142 }
143
144 if (m->scope_job)
145 fprintf(f, "SCOPE_JOB=%s\n", m->scope_job);
146
147 if (m->service) {
148 _cleanup_free_ char *escaped;
149
150 escaped = cescape(m->service);
151 if (!escaped) {
152 r = -ENOMEM;
153 goto fail;
154 }
155 fprintf(f, "SERVICE=%s\n", escaped);
156 }
157
158 if (m->root_directory) {
159 _cleanup_free_ char *escaped;
160
161 escaped = cescape(m->root_directory);
162 if (!escaped) {
163 r = -ENOMEM;
164 goto fail;
165 }
166 fprintf(f, "ROOT=%s\n", escaped);
167 }
168
169 if (!sd_id128_is_null(m->id))
170 fprintf(f, "ID=" SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(m->id));
171
172 if (m->leader != 0)
173 fprintf(f, "LEADER="PID_FMT"\n", m->leader);
174
175 if (m->class != _MACHINE_CLASS_INVALID)
176 fprintf(f, "CLASS=%s\n", machine_class_to_string(m->class));
177
178 if (dual_timestamp_is_set(&m->timestamp))
179 fprintf(f,
180 "REALTIME="USEC_FMT"\n"
181 "MONOTONIC="USEC_FMT"\n",
182 m->timestamp.realtime,
183 m->timestamp.monotonic);
184
185 if (m->n_netif > 0) {
186 unsigned i;
187
188 fputs("NETIF=", f);
189
190 for (i = 0; i < m->n_netif; i++) {
191 if (i != 0)
192 fputc(' ', f);
193
194 fprintf(f, "%i", m->netif[i]);
195 }
196
197 fputc('\n', f);
198 }
199
200 r = fflush_and_check(f);
201 if (r < 0)
202 goto fail;
203
204 if (rename(temp_path, m->state_file) < 0) {
205 r = -errno;
206 goto fail;
207 }
208
209 if (m->unit) {
210 char *sl;
211
212 /* Create a symlink from the unit name to the machine
213 * name, so that we can quickly find the machine for
214 * each given unit. Ignore error. */
215 sl = strjoina("/run/systemd/machines/unit:", m->unit);
216 (void) symlink(m->name, sl);
217 }
218
219 return 0;
220
221 fail:
222 (void) unlink(m->state_file);
223
224 if (temp_path)
225 (void) unlink(temp_path);
226
227 return log_error_errno(r, "Failed to save machine data %s: %m", m->state_file);
228 }
229
230 static void machine_unlink(Machine *m) {
231 assert(m);
232
233 if (m->unit) {
234 char *sl;
235
236 sl = strjoina("/run/systemd/machines/unit:", m->unit);
237 (void) unlink(sl);
238 }
239
240 if (m->state_file)
241 (void) unlink(m->state_file);
242 }
243
244 int machine_load(Machine *m) {
245 _cleanup_free_ char *realtime = NULL, *monotonic = NULL, *id = NULL, *leader = NULL, *class = NULL, *netif = NULL;
246 int r;
247
248 assert(m);
249
250 if (!m->state_file)
251 return 0;
252
253 r = parse_env_file(NULL, m->state_file, NEWLINE,
254 "SCOPE", &m->unit,
255 "SCOPE_JOB", &m->scope_job,
256 "SERVICE", &m->service,
257 "ROOT", &m->root_directory,
258 "ID", &id,
259 "LEADER", &leader,
260 "CLASS", &class,
261 "REALTIME", &realtime,
262 "MONOTONIC", &monotonic,
263 "NETIF", &netif,
264 NULL);
265 if (r < 0) {
266 if (r == -ENOENT)
267 return 0;
268
269 return log_error_errno(r, "Failed to read %s: %m", m->state_file);
270 }
271
272 if (id)
273 sd_id128_from_string(id, &m->id);
274
275 if (leader)
276 parse_pid(leader, &m->leader);
277
278 if (class) {
279 MachineClass c;
280
281 c = machine_class_from_string(class);
282 if (c >= 0)
283 m->class = c;
284 }
285
286 if (realtime)
287 timestamp_deserialize(realtime, &m->timestamp.realtime);
288 if (monotonic)
289 timestamp_deserialize(monotonic, &m->timestamp.monotonic);
290
291 if (netif) {
292 size_t allocated = 0, nr = 0;
293 const char *p;
294 int *ni = NULL;
295
296 p = netif;
297 for (;;) {
298 _cleanup_free_ char *word = NULL;
299 int ifi;
300
301 r = extract_first_word(&p, &word, NULL, 0);
302 if (r == 0)
303 break;
304 if (r == -ENOMEM)
305 return log_oom();
306 if (r < 0) {
307 log_warning_errno(r, "Failed to parse NETIF: %s", netif);
308 break;
309 }
310
311 if (parse_ifindex(word, &ifi) < 0)
312 continue;
313
314 if (!GREEDY_REALLOC(ni, allocated, nr+1)) {
315 free(ni);
316 return log_oom();
317 }
318
319 ni[nr++] = ifi;
320 }
321
322 free(m->netif);
323 m->netif = ni;
324 m->n_netif = nr;
325 }
326
327 return r;
328 }
329
330 static int machine_start_scope(Machine *m, sd_bus_message *properties, sd_bus_error *error) {
331 assert(m);
332 assert(m->class != MACHINE_HOST);
333
334 if (!m->unit) {
335 _cleanup_free_ char *escaped = NULL, *scope = NULL;
336 char *description, *job = NULL;
337 int r;
338
339 escaped = unit_name_escape(m->name);
340 if (!escaped)
341 return log_oom();
342
343 scope = strjoin("machine-", escaped, ".scope");
344 if (!scope)
345 return log_oom();
346
347 description = strjoina(m->class == MACHINE_VM ? "Virtual Machine " : "Container ", m->name);
348
349 r = manager_start_scope(m->manager, scope, m->leader, SPECIAL_MACHINE_SLICE, description, properties, error, &job);
350 if (r < 0)
351 return log_error_errno(r, "Failed to start machine scope: %s", bus_error_message(error, r));
352
353 m->unit = TAKE_PTR(scope);
354 free_and_replace(m->scope_job, job);
355 }
356
357 if (m->unit)
358 hashmap_put(m->manager->machine_units, m->unit, m);
359
360 return 0;
361 }
362
363 int machine_start(Machine *m, sd_bus_message *properties, sd_bus_error *error) {
364 int r;
365
366 assert(m);
367
368 if (!IN_SET(m->class, MACHINE_CONTAINER, MACHINE_VM))
369 return -EOPNOTSUPP;
370
371 if (m->started)
372 return 0;
373
374 r = hashmap_put(m->manager->machine_leaders, PID_TO_PTR(m->leader), m);
375 if (r < 0)
376 return r;
377
378 /* Create cgroup */
379 r = machine_start_scope(m, properties, error);
380 if (r < 0)
381 return r;
382
383 log_struct(LOG_INFO,
384 "MESSAGE_ID=" SD_MESSAGE_MACHINE_START_STR,
385 "NAME=%s", m->name,
386 "LEADER="PID_FMT, m->leader,
387 LOG_MESSAGE("New machine %s.", m->name));
388
389 if (!dual_timestamp_is_set(&m->timestamp))
390 dual_timestamp_get(&m->timestamp);
391
392 m->started = true;
393
394 /* Save new machine data */
395 machine_save(m);
396
397 machine_send_signal(m, true);
398
399 return 0;
400 }
401
402 static int machine_stop_scope(Machine *m) {
403 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
404 char *job = NULL;
405 int r;
406
407 assert(m);
408 assert(m->class != MACHINE_HOST);
409
410 if (!m->unit)
411 return 0;
412
413 r = manager_stop_unit(m->manager, m->unit, &error, &job);
414 if (r < 0)
415 return log_error_errno(r, "Failed to stop machine scope: %s", bus_error_message(&error, r));
416
417 return free_and_replace(m->scope_job, job);
418 }
419
420 int machine_stop(Machine *m) {
421 int r;
422 assert(m);
423
424 if (!IN_SET(m->class, MACHINE_CONTAINER, MACHINE_VM))
425 return -EOPNOTSUPP;
426
427 r = machine_stop_scope(m);
428
429 m->stopping = true;
430
431 machine_save(m);
432
433 return r;
434 }
435
436 int machine_finalize(Machine *m) {
437 assert(m);
438
439 if (m->started)
440 log_struct(LOG_INFO,
441 "MESSAGE_ID=" SD_MESSAGE_MACHINE_STOP_STR,
442 "NAME=%s", m->name,
443 "LEADER="PID_FMT, m->leader,
444 LOG_MESSAGE("Machine %s terminated.", m->name));
445
446 machine_unlink(m);
447 machine_add_to_gc_queue(m);
448
449 if (m->started) {
450 machine_send_signal(m, false);
451 m->started = false;
452 }
453
454 return 0;
455 }
456
457 bool machine_may_gc(Machine *m, bool drop_not_started) {
458 assert(m);
459
460 if (m->class == MACHINE_HOST)
461 return false;
462
463 if (drop_not_started && !m->started)
464 return true;
465
466 if (m->scope_job && manager_job_is_active(m->manager, m->scope_job))
467 return false;
468
469 if (m->unit && manager_unit_is_active(m->manager, m->unit))
470 return false;
471
472 return true;
473 }
474
475 void machine_add_to_gc_queue(Machine *m) {
476 assert(m);
477
478 if (m->in_gc_queue)
479 return;
480
481 LIST_PREPEND(gc_queue, m->manager->machine_gc_queue, m);
482 m->in_gc_queue = true;
483 }
484
485 MachineState machine_get_state(Machine *s) {
486 assert(s);
487
488 if (s->class == MACHINE_HOST)
489 return MACHINE_RUNNING;
490
491 if (s->stopping)
492 return MACHINE_CLOSING;
493
494 if (s->scope_job)
495 return MACHINE_OPENING;
496
497 return MACHINE_RUNNING;
498 }
499
500 int machine_kill(Machine *m, KillWho who, int signo) {
501 assert(m);
502
503 if (!IN_SET(m->class, MACHINE_VM, MACHINE_CONTAINER))
504 return -EOPNOTSUPP;
505
506 if (!m->unit)
507 return -ESRCH;
508
509 if (who == KILL_LEADER) {
510 /* If we shall simply kill the leader, do so directly */
511
512 if (kill(m->leader, signo) < 0)
513 return -errno;
514
515 return 0;
516 }
517
518 /* Otherwise, make PID 1 do it for us, for the entire cgroup */
519 return manager_kill_unit(m->manager, m->unit, signo, NULL);
520 }
521
522 int machine_openpt(Machine *m, int flags) {
523 assert(m);
524
525 switch (m->class) {
526
527 case MACHINE_HOST: {
528 int fd;
529
530 fd = posix_openpt(flags);
531 if (fd < 0)
532 return -errno;
533
534 if (unlockpt(fd) < 0)
535 return -errno;
536
537 return fd;
538 }
539
540 case MACHINE_CONTAINER:
541 if (m->leader <= 0)
542 return -EINVAL;
543
544 return openpt_in_namespace(m->leader, flags);
545
546 default:
547 return -EOPNOTSUPP;
548 }
549 }
550
551 int machine_open_terminal(Machine *m, const char *path, int mode) {
552 assert(m);
553
554 switch (m->class) {
555
556 case MACHINE_HOST:
557 return open_terminal(path, mode);
558
559 case MACHINE_CONTAINER:
560 if (m->leader <= 0)
561 return -EINVAL;
562
563 return open_terminal_in_namespace(m->leader, path, mode);
564
565 default:
566 return -EOPNOTSUPP;
567 }
568 }
569
570 void machine_release_unit(Machine *m) {
571 assert(m);
572
573 if (!m->unit)
574 return;
575
576 (void) hashmap_remove(m->manager->machine_units, m->unit);
577 m->unit = mfree(m->unit);
578 }
579
580 int machine_get_uid_shift(Machine *m, uid_t *ret) {
581 char p[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(pid_t) + 1];
582 uid_t uid_base, uid_shift, uid_range;
583 gid_t gid_base, gid_shift, gid_range;
584 _cleanup_fclose_ FILE *f = NULL;
585 int k;
586
587 assert(m);
588 assert(ret);
589
590 /* Return the base UID/GID of the specified machine. Note that this only works for containers with simple
591 * mappings. In most cases setups should be simple like this, and administrators should only care about the
592 * basic offset a container has relative to the host. This is what this function exposes.
593 *
594 * If we encounter any more complex mappings we politely refuse this with ENXIO. */
595
596 if (m->class == MACHINE_HOST) {
597 *ret = 0;
598 return 0;
599 }
600
601 if (m->class != MACHINE_CONTAINER)
602 return -EOPNOTSUPP;
603
604 xsprintf(p, "/proc/" PID_FMT "/uid_map", m->leader);
605 f = fopen(p, "re");
606 if (!f) {
607 if (errno == ENOENT) {
608 /* If the file doesn't exist, user namespacing is off in the kernel, return a zero mapping hence. */
609 *ret = 0;
610 return 0;
611 }
612
613 return -errno;
614 }
615
616 /* Read the first line. There's at least one. */
617 errno = 0;
618 k = fscanf(f, UID_FMT " " UID_FMT " " UID_FMT "\n", &uid_base, &uid_shift, &uid_range);
619 if (k != 3) {
620 if (ferror(f))
621 return -errno;
622
623 return -EBADMSG;
624 }
625
626 /* Not a mapping starting at 0? Then it's a complex mapping we can't expose here. */
627 if (uid_base != 0)
628 return -ENXIO;
629 /* Insist that at least the nobody user is mapped, everything else is weird, and hence complex, and we don't support it */
630 if (uid_range < UID_NOBODY)
631 return -ENXIO;
632
633 /* If there's more than one line, then we don't support this mapping. */
634 if (fgetc(f) != EOF)
635 return -ENXIO;
636
637 fclose(f);
638
639 xsprintf(p, "/proc/" PID_FMT "/gid_map", m->leader);
640 f = fopen(p, "re");
641 if (!f)
642 return -errno;
643
644 /* Read the first line. There's at least one. */
645 errno = 0;
646 k = fscanf(f, GID_FMT " " GID_FMT " " GID_FMT "\n", &gid_base, &gid_shift, &gid_range);
647 if (k != 3) {
648 if (ferror(f))
649 return -errno;
650
651 return -EBADMSG;
652 }
653
654 /* If there's more than one line, then we don't support this file. */
655 if (fgetc(f) != EOF)
656 return -ENXIO;
657
658 /* If the UID and GID mapping doesn't match, we don't support this mapping. */
659 if (uid_base != (uid_t) gid_base)
660 return -ENXIO;
661 if (uid_shift != (uid_t) gid_shift)
662 return -ENXIO;
663 if (uid_range != (uid_t) gid_range)
664 return -ENXIO;
665
666 *ret = uid_shift;
667 return 0;
668 }
669
670 static const char* const machine_class_table[_MACHINE_CLASS_MAX] = {
671 [MACHINE_CONTAINER] = "container",
672 [MACHINE_VM] = "vm",
673 [MACHINE_HOST] = "host",
674 };
675
676 DEFINE_STRING_TABLE_LOOKUP(machine_class, MachineClass);
677
678 static const char* const machine_state_table[_MACHINE_STATE_MAX] = {
679 [MACHINE_OPENING] = "opening",
680 [MACHINE_RUNNING] = "running",
681 [MACHINE_CLOSING] = "closing"
682 };
683
684 DEFINE_STRING_TABLE_LOOKUP(machine_state, MachineState);
685
686 static const char* const kill_who_table[_KILL_WHO_MAX] = {
687 [KILL_LEADER] = "leader",
688 [KILL_ALL] = "all"
689 };
690
691 DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);