]> git.ipfire.org Git - thirdparty/systemd.git/blame_incremental - src/shared/discover-image.c
Fixes for vscode/intellisense parsing (#38040)
[thirdparty/systemd.git] / src / shared / discover-image.c
... / ...
CommitLineData
1/* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3#include <fcntl.h>
4#include <linux/loop.h>
5#include <linux/magic.h>
6#include <stdio.h>
7#include <sys/file.h>
8#include <sys/ioctl.h>
9#include <sys/stat.h>
10#include <sys/sysmacros.h>
11#include <unistd.h>
12
13#include "sd-json.h"
14#include "sd-path.h"
15
16#include "alloc-util.h"
17#include "blockdev-util.h"
18#include "btrfs-util.h"
19#include "chase.h"
20#include "chattr-util.h"
21#include "copy.h"
22#include "dirent-util.h"
23#include "discover-image.h"
24#include "dissect-image.h"
25#include "env-file.h"
26#include "env-util.h"
27#include "extension-util.h"
28#include "fd-util.h"
29#include "fs-util.h"
30#include "hashmap.h"
31#include "hostname-setup.h"
32#include "id128-util.h"
33#include "initrd-util.h"
34#include "lock-util.h"
35#include "log.h"
36#include "loop-util.h"
37#include "mkdir.h"
38#include "nulstr-util.h"
39#include "os-util.h"
40#include "path-util.h"
41#include "rm-rf.h"
42#include "runtime-scope.h"
43#include "stat-util.h"
44#include "string-table.h"
45#include "string-util.h"
46#include "strv.h"
47#include "time-util.h"
48#include "vpick.h"
49#include "xattr-util.h"
50
51const char* const image_search_path[_IMAGE_CLASS_MAX] = {
52 [IMAGE_MACHINE] = "/etc/machines\0" /* only place symlinks here */
53 "/run/machines\0" /* and here too */
54 "/var/lib/machines\0" /* the main place for images */
55 "/var/lib/container\0" /* legacy */
56 "/usr/local/lib/machines\0"
57 "/usr/lib/machines\0",
58
59 [IMAGE_PORTABLE] = "/etc/portables\0" /* only place symlinks here */
60 "/run/portables\0" /* and here too */
61 "/var/lib/portables\0" /* the main place for images */
62 "/usr/local/lib/portables\0"
63 "/usr/lib/portables\0",
64
65 /* Note that we don't allow storing extensions under /usr/, unlike with other image types. That's
66 * because extension images are supposed to extend /usr/, so you get into recursive races, especially
67 * with directory-based extensions, as the kernel's OverlayFS explicitly checks for this and errors
68 * out with -ELOOP if it finds that a lowerdir= is a child of another lowerdir=. */
69 [IMAGE_SYSEXT] = "/etc/extensions\0" /* only place symlinks here */
70 "/run/extensions\0" /* and here too */
71 "/var/lib/extensions\0", /* the main place for images */
72
73 [IMAGE_CONFEXT] = "/run/confexts\0" /* only place symlinks here */
74 "/var/lib/confexts\0" /* the main place for images */
75 "/usr/local/lib/confexts\0"
76 "/usr/lib/confexts\0",
77};
78
79/* Inside the initrd, use a slightly different set of search path (i.e. include .extra/sysext/ and
80 * .extra/confext/ in extension search dir) */
81static const char* const image_search_path_initrd[_IMAGE_CLASS_MAX] = {
82 /* (entries that aren't listed here will get the same search path as for the non initrd-case) */
83
84 [IMAGE_SYSEXT] = "/etc/extensions\0" /* only place symlinks here */
85 "/run/extensions\0" /* and here too */
86 "/var/lib/extensions\0" /* the main place for images */
87 "/.extra/sysext\0", /* put sysext picked up by systemd-stub last, since not trusted */
88
89 [IMAGE_CONFEXT] = "/run/confexts\0" /* only place symlinks here */
90 "/var/lib/confexts\0" /* the main place for images */
91 "/usr/local/lib/confexts\0"
92 "/.extra/confext\0", /* put confext picked up by systemd-stub last, since not trusted */
93};
94
95static const char* image_class_suffix_table[_IMAGE_CLASS_MAX] = {
96 [IMAGE_SYSEXT] = ".sysext",
97 [IMAGE_CONFEXT] = ".confext",
98};
99
100DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(image_class_suffix, ImageClass);
101
102static const char *const image_root_table[_IMAGE_CLASS_MAX] = {
103 [IMAGE_MACHINE] = "/var/lib/machines",
104 [IMAGE_PORTABLE] = "/var/lib/portables",
105 [IMAGE_SYSEXT] = "/var/lib/extensions",
106 [IMAGE_CONFEXT] = "/var/lib/confexts",
107};
108
109DEFINE_STRING_TABLE_LOOKUP_TO_STRING(image_root, ImageClass);
110
111static const char *const image_root_runtime_table[_IMAGE_CLASS_MAX] = {
112 [IMAGE_MACHINE] = "/run/machines",
113 [IMAGE_PORTABLE] = "/run/portables",
114 [IMAGE_SYSEXT] = "/run/extensions",
115 [IMAGE_CONFEXT] = "/run/confexts",
116};
117
118DEFINE_STRING_TABLE_LOOKUP_TO_STRING(image_root_runtime, ImageClass);
119
120static Image* image_free(Image *i) {
121 assert(i);
122
123 free(i->name);
124 free(i->path);
125
126 free(i->hostname);
127 strv_free(i->machine_info);
128 strv_free(i->os_release);
129 strv_free(i->sysext_release);
130 strv_free(i->confext_release);
131
132 return mfree(i);
133}
134
135DEFINE_TRIVIAL_REF_UNREF_FUNC(Image, image, image_free);
136DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(image_hash_ops, char, string_hash_func, string_compare_func,
137 Image, image_unref);
138
139static char** image_settings_path(Image *image) {
140 _cleanup_strv_free_ char **l = NULL;
141 _cleanup_free_ char *fn = NULL;
142 size_t i = 0;
143 int r;
144
145 assert(image);
146
147 l = new0(char*, 4);
148 if (!l)
149 return NULL;
150
151 fn = strjoin(image->name, ".nspawn");
152 if (!fn)
153 return NULL;
154
155 FOREACH_STRING(s, "/etc/systemd/nspawn", "/run/systemd/nspawn") {
156 l[i] = path_join(s, fn);
157 if (!l[i])
158 return NULL;
159
160 i++;
161 }
162
163 r = file_in_same_dir(image->path, fn, l + i);
164 if (r == -ENOMEM)
165 return NULL;
166 if (r < 0)
167 log_debug_errno(r, "Failed to generate .nspawn settings path from image path, ignoring: %m");
168
169 strv_uniq(l);
170
171 return TAKE_PTR(l);
172}
173
174static int image_roothash_path(Image *image, char **ret) {
175 _cleanup_free_ char *fn = NULL;
176
177 assert(image);
178
179 fn = strjoin(image->name, ".roothash");
180 if (!fn)
181 return -ENOMEM;
182
183 return file_in_same_dir(image->path, fn, ret);
184}
185
186static int image_new(
187 ImageType t,
188 ImageClass c,
189 const char *pretty,
190 const char *path,
191 const char *filename,
192 bool read_only,
193 usec_t crtime,
194 usec_t mtime,
195 Image **ret) {
196
197 _cleanup_(image_unrefp) Image *i = NULL;
198
199 assert(t >= 0);
200 assert(t < _IMAGE_TYPE_MAX);
201 assert(pretty);
202 assert(filename);
203 assert(ret);
204
205 i = new(Image, 1);
206 if (!i)
207 return -ENOMEM;
208
209 *i = (Image) {
210 .n_ref = 1,
211 .type = t,
212 .class = c,
213 .read_only = read_only,
214 .crtime = crtime,
215 .mtime = mtime,
216 .usage = UINT64_MAX,
217 .usage_exclusive = UINT64_MAX,
218 .limit = UINT64_MAX,
219 .limit_exclusive = UINT64_MAX,
220 };
221
222 i->name = strdup(pretty);
223 if (!i->name)
224 return -ENOMEM;
225
226 i->path = path_join(path, filename);
227 if (!i->path)
228 return -ENOMEM;
229
230 path_simplify(i->path);
231
232 *ret = TAKE_PTR(i);
233
234 return 0;
235}
236
237static int extract_image_basename(
238 const char *path,
239 const char *class_suffix, /* e.g. ".sysext" (this is an optional suffix) */
240 char **format_suffixes, /* e.g. ".raw" (one of these will be required) */
241 char **ret_basename,
242 char **ret_suffix) {
243
244 _cleanup_free_ char *name = NULL, *suffix = NULL;
245 int r;
246
247 assert(path);
248
249 r = path_extract_filename(path, &name);
250 if (r < 0)
251 return r;
252
253 if (format_suffixes) {
254 char *e = endswith_strv(name, format_suffixes);
255 if (!e) /* Format suffix is required */
256 return -EINVAL;
257
258 if (ret_suffix) {
259 suffix = strdup(e);
260 if (!suffix)
261 return -ENOMEM;
262 }
263
264 *e = 0;
265 }
266
267 if (class_suffix) {
268 char *e = endswith(name, class_suffix);
269 if (e) { /* Class suffix is optional */
270 if (ret_suffix) {
271 _cleanup_free_ char *j = strjoin(e, suffix);
272 if (!j)
273 return -ENOMEM;
274
275 free_and_replace(suffix, j);
276 }
277
278 *e = 0;
279 }
280 }
281
282 if (!image_name_is_valid(name))
283 return -EINVAL;
284
285 if (ret_suffix)
286 *ret_suffix = TAKE_PTR(suffix);
287
288 if (ret_basename)
289 *ret_basename = TAKE_PTR(name);
290
291 return 0;
292}
293
294static int image_update_quota(Image *i, int fd) {
295 _cleanup_close_ int fd_close = -EBADF;
296 int r;
297
298 assert(i);
299
300 if (image_is_vendor(i) || image_is_host(i))
301 return -EROFS;
302
303 if (i->type != IMAGE_SUBVOLUME)
304 return -EOPNOTSUPP;
305
306 if (fd < 0) {
307 fd_close = open(i->path, O_CLOEXEC|O_DIRECTORY);
308 if (fd_close < 0)
309 return -errno;
310 fd = fd_close;
311 } else {
312 /* Convert from O_PATH to proper fd, if needed */
313 fd = fd_reopen_condition(fd, O_CLOEXEC|O_DIRECTORY, O_PATH, &fd_close);
314 if (fd < 0)
315 return fd;
316 }
317
318 r = btrfs_quota_scan_ongoing(fd);
319 if (r < 0)
320 return r;
321 if (r > 0)
322 return 0;
323
324 BtrfsQuotaInfo quota;
325 r = btrfs_subvol_get_subtree_quota_fd(fd, 0, &quota);
326 if (r < 0)
327 return r;
328
329 i->usage = quota.referenced;
330 i->usage_exclusive = quota.exclusive;
331 i->limit = quota.referenced_max;
332 i->limit_exclusive = quota.exclusive_max;
333
334 return 1;
335}
336
337static int image_make(
338 ImageClass c,
339 const char *pretty,
340 int dir_fd,
341 const char *dir_path,
342 const char *filename,
343 int fd, /* O_PATH fd */
344 const struct stat *st,
345 Image **ret) {
346
347 _cleanup_free_ char *pretty_buffer = NULL;
348 bool read_only;
349 int r;
350
351 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
352 assert(dir_path || dir_fd == AT_FDCWD);
353 assert(filename);
354
355 /* We explicitly *do* follow symlinks here, since we want to allow symlinking trees, raw files and block
356 * devices into /var/lib/machines/, and treat them normally.
357 *
358 * This function returns -ENOENT if we can't find the image after all, and -EMEDIUMTYPE if it's not a file we
359 * recognize. */
360
361 _cleanup_close_ int _fd = -EBADF;
362 if (fd < 0) {
363 /* If we didn't get an fd passed in, then let's pin it via O_PATH now */
364 _fd = openat(dir_fd, filename, O_PATH|O_CLOEXEC);
365 if (_fd < 0)
366 return -errno;
367
368 fd = _fd;
369 st = NULL; /* refresh stat() data now that we have the inode pinned */
370 }
371
372 struct stat stbuf;
373 if (!st) {
374 if (fstat(fd, &stbuf) < 0)
375 return -errno;
376
377 st = &stbuf;
378 }
379
380 _cleanup_free_ char *parent = NULL;
381 if (!dir_path) {
382 (void) fd_get_path(dir_fd, &parent);
383 dir_path = parent;
384 }
385
386 read_only =
387 (dir_path && path_startswith(dir_path, "/usr")) ||
388 (faccessat(fd, "", W_OK, AT_EACCESS|AT_EMPTY_PATH) < 0 && errno == EROFS);
389
390 if (S_ISDIR(st->st_mode)) {
391 unsigned file_attr = 0;
392 usec_t crtime = 0;
393
394 if (!ret)
395 return 0;
396
397 if (!pretty) {
398 r = extract_image_basename(
399 filename,
400 image_class_suffix_to_string(c),
401 /* format_suffixes= */ NULL,
402 &pretty_buffer,
403 /* ret_suffix= */ NULL);
404 if (r < 0)
405 return r;
406
407 pretty = pretty_buffer;
408 }
409
410 if (btrfs_might_be_subvol(st)) {
411
412 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
413 if (r < 0)
414 return r;
415 if (r > 0) {
416 BtrfsSubvolInfo info;
417
418 /* It's a btrfs subvolume */
419
420 r = btrfs_subvol_get_info_fd(fd, 0, &info);
421 if (r < 0)
422 return r;
423
424 r = image_new(IMAGE_SUBVOLUME,
425 c,
426 pretty,
427 dir_path,
428 filename,
429 info.read_only || read_only,
430 info.otime,
431 0,
432 ret);
433 if (r < 0)
434 return r;
435
436 (void) image_update_quota(*ret, fd);
437 return 0;
438 }
439 }
440
441 /* Get directory creation time (not available everywhere, but that's OK */
442 (void) fd_getcrtime(fd, &crtime);
443
444 /* If the IMMUTABLE bit is set, we consider the directory read-only. Since the ioctl is not
445 * supported everywhere we ignore failures. */
446 (void) read_attr_fd(fd, &file_attr);
447
448 /* It's just a normal directory. */
449 r = image_new(IMAGE_DIRECTORY,
450 c,
451 pretty,
452 dir_path,
453 filename,
454 read_only || (file_attr & FS_IMMUTABLE_FL),
455 crtime,
456 0, /* we don't use mtime of stat() here, since it's not the time of last change of the tree, but only of the top-level dir */
457 ret);
458 if (r < 0)
459 return r;
460
461 return 0;
462
463 } else if (S_ISREG(st->st_mode) && endswith(filename, ".raw")) {
464 usec_t crtime = 0;
465
466 /* It's a RAW disk image */
467
468 if (!ret)
469 return 0;
470
471 (void) fd_getcrtime(fd, &crtime);
472
473 if (!pretty) {
474 r = extract_image_basename(
475 filename,
476 image_class_suffix_to_string(c),
477 STRV_MAKE(".raw"),
478 &pretty_buffer,
479 /* ret_suffix= */ NULL);
480 if (r < 0)
481 return r;
482
483 pretty = pretty_buffer;
484 }
485
486 r = image_new(IMAGE_RAW,
487 c,
488 pretty,
489 dir_path,
490 filename,
491 !(st->st_mode & 0222) || read_only,
492 crtime,
493 timespec_load(&st->st_mtim),
494 ret);
495 if (r < 0)
496 return r;
497
498 (*ret)->usage = (*ret)->usage_exclusive = st->st_blocks * 512;
499 (*ret)->limit = (*ret)->limit_exclusive = st->st_size;
500
501 return 0;
502
503 } else if (S_ISBLK(st->st_mode)) {
504 uint64_t size = UINT64_MAX;
505
506 /* A block device */
507
508 if (!ret)
509 return 0;
510
511 if (!pretty) {
512 r = extract_image_basename(
513 filename,
514 /* class_suffix= */ NULL,
515 /* format_suffix= */ NULL,
516 &pretty_buffer,
517 /* ret_suffix= */ NULL);
518 if (r < 0)
519 return r;
520
521 pretty = pretty_buffer;
522 }
523
524 _cleanup_close_ int block_fd = fd_reopen(fd, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
525 if (block_fd < 0)
526 log_debug_errno(errno, "Failed to open block device %s/%s, ignoring: %m", strnull(dir_path), filename);
527 else {
528 if (!read_only) {
529 int state = 0;
530
531 if (ioctl(block_fd, BLKROGET, &state) < 0)
532 log_debug_errno(errno, "Failed to issue BLKROGET on device %s/%s, ignoring: %m", strnull(dir_path), filename);
533 else if (state)
534 read_only = true;
535 }
536
537 r = blockdev_get_device_size(block_fd, &size);
538 if (r < 0)
539 log_debug_errno(r, "Failed to issue BLKGETSIZE64 on device %s/%s, ignoring: %m", strnull(dir_path), filename);
540
541 block_fd = safe_close(block_fd);
542 }
543
544 r = image_new(IMAGE_BLOCK,
545 c,
546 pretty,
547 dir_path,
548 filename,
549 !(st->st_mode & 0222) || read_only,
550 0,
551 0,
552 ret);
553 if (r < 0)
554 return r;
555
556 if (!IN_SET(size, 0, UINT64_MAX))
557 (*ret)->usage = (*ret)->usage_exclusive = (*ret)->limit = (*ret)->limit_exclusive = size;
558
559 return 0;
560 }
561
562 return -EMEDIUMTYPE;
563}
564
565static int pick_image_search_path(
566 RuntimeScope scope,
567 ImageClass class,
568 char ***ret) {
569
570 int r;
571
572 assert(scope < _RUNTIME_SCOPE_MAX && scope != RUNTIME_SCOPE_GLOBAL);
573 assert(class < _IMAGE_CLASS_MAX);
574 assert(ret);
575
576 if (class < 0) {
577 *ret = NULL;
578 return 0;
579 }
580
581 if (scope < 0) {
582 _cleanup_strv_free_ char **a = NULL, **b = NULL;
583
584 r = pick_image_search_path(RUNTIME_SCOPE_USER, class, &a);
585 if (r < 0)
586 return r;
587
588 r = pick_image_search_path(RUNTIME_SCOPE_SYSTEM, class, &b);
589 if (r < 0)
590 return r;
591
592 r = strv_extend_strv(&a, b, /* filter_duplicates= */ false);
593 if (r < 0)
594 return r;
595
596 *ret = TAKE_PTR(a);
597 return 0;
598 }
599
600 switch (scope) {
601
602 case RUNTIME_SCOPE_SYSTEM: {
603 const char *ns;
604 /* Use the initrd search path if there is one, otherwise use the common one */
605 ns = in_initrd() && image_search_path_initrd[class] ?
606 image_search_path_initrd[class] :
607 image_search_path[class];
608 if (!ns)
609 break;
610
611 _cleanup_strv_free_ char **search = strv_split_nulstr(ns);
612 if (!search)
613 return -ENOMEM;
614
615 *ret = TAKE_PTR(search);
616 return 0;
617 }
618
619 case RUNTIME_SCOPE_USER: {
620 if (class != IMAGE_MACHINE)
621 break;
622
623 static const uint64_t dirs[] = {
624 SD_PATH_USER_RUNTIME,
625 SD_PATH_USER_STATE_PRIVATE,
626 SD_PATH_USER_LIBRARY_PRIVATE,
627 };
628
629 _cleanup_strv_free_ char **search = NULL;
630 FOREACH_ELEMENT(d, dirs) {
631 _cleanup_free_ char *p = NULL;
632
633 r = sd_path_lookup(*d, "machines", &p);
634 if (r == -ENXIO) /* No XDG_RUNTIME_DIR set */
635 continue;
636 if (r < 0)
637 return r;
638
639 r = strv_consume(&search, TAKE_PTR(p));
640 if (r < 0)
641 return r;
642 }
643
644 *ret = TAKE_PTR(search);
645 return 0;
646 }
647
648 default:
649 assert_not_reached();
650 }
651
652 *ret = NULL;
653 return 0;
654}
655
656static char** make_possible_filenames(ImageClass class, const char *image_name) {
657 _cleanup_strv_free_ char **l = NULL;
658
659 assert(image_name);
660
661 FOREACH_STRING(v_suffix, "", ".v")
662 FOREACH_STRING(format_suffix, "", ".raw") {
663 _cleanup_free_ char *j = NULL;
664 const char *class_suffix;
665
666 class_suffix = image_class_suffix_to_string(class);
667 if (class_suffix) {
668 j = strjoin(image_name, class_suffix, format_suffix, v_suffix);
669 if (!j)
670 return NULL;
671
672 if (strv_consume(&l, TAKE_PTR(j)) < 0)
673 return NULL;
674 }
675
676 j = strjoin(image_name, format_suffix, v_suffix);
677 if (!j)
678 return NULL;
679
680 if (strv_consume(&l, TAKE_PTR(j)) < 0)
681 return NULL;
682 }
683
684 return TAKE_PTR(l);
685}
686
687int image_find(RuntimeScope scope,
688 ImageClass class,
689 const char *name,
690 const char *root,
691 Image **ret) {
692
693 /* As mentioned above, we follow symlinks on this fstatat(), because we want to permit people to
694 * symlink block devices into the search path. (For now, we disable that when operating relative to
695 * some root directory.) */
696 int open_flags = root ? O_NOFOLLOW : 0, r;
697
698 assert(scope < _RUNTIME_SCOPE_MAX && scope != RUNTIME_SCOPE_GLOBAL);
699 assert(class >= 0);
700 assert(class < _IMAGE_CLASS_MAX);
701 assert(name);
702
703 /* There are no images with invalid names */
704 if (!image_name_is_valid(name))
705 return -ENOENT;
706
707 _cleanup_strv_free_ char **names = make_possible_filenames(class, name);
708 if (!names)
709 return -ENOMEM;
710
711 _cleanup_strv_free_ char **search = NULL;
712 r = pick_image_search_path(scope, class, &search);
713 if (r < 0)
714 return r;
715
716 STRV_FOREACH(path, search) {
717 _cleanup_free_ char *resolved = NULL;
718 _cleanup_closedir_ DIR *d = NULL;
719
720 r = chase_and_opendir(*path, root, CHASE_PREFIX_ROOT, &resolved, &d);
721 if (r == -ENOENT)
722 continue;
723 if (r < 0)
724 return r;
725
726 STRV_FOREACH(n, names) {
727 _cleanup_free_ char *fname_buf = NULL;
728 const char *fname = *n;
729
730 _cleanup_close_ int fd = openat(dirfd(d), fname, O_PATH|O_CLOEXEC|open_flags);
731 if (fd < 0) {
732 if (errno != ENOENT)
733 return -errno;
734
735 continue;
736 }
737
738 struct stat st;
739 if (fstat(fd, &st) < 0)
740 return -errno;
741
742 if (endswith(fname, ".raw")) {
743 if (!S_ISREG(st.st_mode)) {
744 log_debug("Ignoring non-regular file '%s' with .raw suffix.", fname);
745 continue;
746 }
747
748 } else if (endswith(fname, ".v")) {
749
750 if (!S_ISDIR(st.st_mode)) {
751 log_debug("Ignoring non-directory file '%s' with .v suffix.", fname);
752 continue;
753 }
754
755 _cleanup_free_ char *suffix = NULL;
756 suffix = strdup(ASSERT_PTR(startswith(fname, name)));
757 if (!suffix)
758 return -ENOMEM;
759
760 *ASSERT_PTR(endswith(suffix, ".v")) = 0;
761
762 _cleanup_free_ char *vp = path_join(resolved, fname);
763 if (!vp)
764 return -ENOMEM;
765
766 PickFilter filter = {
767 .type_mask = endswith(suffix, ".raw") ? (UINT32_C(1) << DT_REG) | (UINT32_C(1) << DT_BLK) : (UINT32_C(1) << DT_DIR),
768 .basename = name,
769 .architecture = _ARCHITECTURE_INVALID,
770 .suffix = STRV_MAKE(suffix),
771 };
772
773 _cleanup_(pick_result_done) PickResult result = PICK_RESULT_NULL;
774 r = path_pick(root,
775 /* toplevel_fd= */ AT_FDCWD,
776 vp,
777 &filter,
778 PICK_ARCHITECTURE|PICK_TRIES,
779 &result);
780 if (r < 0) {
781 log_debug_errno(r, "Failed to pick versioned image on '%s', skipping: %m", vp);
782 continue;
783 }
784 if (!result.path) {
785 log_debug("Found versioned directory '%s', without matching entry, skipping: %m", vp);
786 continue;
787 }
788
789 /* Refresh the stat data for the discovered target */
790 st = result.st;
791 fd = safe_close(fd);
792
793 _cleanup_free_ char *bn = NULL;
794 r = path_extract_filename(result.path, &bn);
795 if (r < 0) {
796 log_debug_errno(r, "Failed to extract basename of image path '%s', skipping: %m", result.path);
797 continue;
798 }
799
800 fname_buf = path_join(fname, bn);
801 if (!fname_buf)
802 return log_oom();
803
804 fname = fname_buf;
805
806 } else if (!S_ISDIR(st.st_mode) && !S_ISBLK(st.st_mode)) {
807 log_debug("Ignoring non-directory and non-block device file '%s' without suffix.", fname);
808 continue;
809 }
810
811 r = image_make(class, name, dirfd(d), resolved, fname, fd, &st, ret);
812 if (IN_SET(r, -ENOENT, -EMEDIUMTYPE))
813 continue;
814 if (r < 0)
815 return r;
816
817 if (ret)
818 (*ret)->discoverable = true;
819
820 return 1;
821 }
822 }
823
824 if (scope == RUNTIME_SCOPE_SYSTEM && class == IMAGE_MACHINE && streq(name, ".host")) {
825 r = image_make(class,
826 ".host",
827 /* dir_fd= */ AT_FDCWD,
828 /* dir_path= */ NULL,
829 /* filename= */ empty_to_root(root),
830 /* fd= */ -EBADF,
831 /* st= */ NULL,
832 ret);
833 if (r < 0)
834 return r;
835
836 if (ret)
837 (*ret)->discoverable = true;
838
839 return 1;
840 }
841
842 return -ENOENT;
843};
844
845int image_from_path(const char *path, Image **ret) {
846
847 /* Note that we don't set the 'discoverable' field of the returned object, because we don't check here whether
848 * the image is in the image search path. And if it is we don't know if the path we used is actually not
849 * overridden by another, different image earlier in the search path */
850
851 if (path_equal(path, "/"))
852 return image_make(
853 IMAGE_MACHINE,
854 ".host",
855 /* dir_fd= */ AT_FDCWD,
856 /* dir_path= */ NULL,
857 /* filename= */ "/",
858 /* fd= */ -EBADF,
859 /* st= */ NULL,
860 ret);
861
862 return image_make(
863 _IMAGE_CLASS_INVALID,
864 /* pretty= */ NULL,
865 /* dir_fd= */ AT_FDCWD,
866 /* dir_path= */ NULL,
867 /* filename= */ path,
868 /* fd= */ -EBADF,
869 /* st= */ NULL,
870 ret);
871}
872
873int image_find_harder(
874 RuntimeScope scope,
875 ImageClass class,
876 const char *name_or_path,
877 const char *root,
878 Image **ret) {
879
880 if (image_name_is_valid(name_or_path))
881 return image_find(scope, class, name_or_path, root, ret);
882
883 return image_from_path(name_or_path, ret);
884}
885
886int image_discover(
887 RuntimeScope scope,
888 ImageClass class,
889 const char *root,
890 Hashmap **images) {
891
892 /* As mentioned above, we follow symlinks on this fstatat(), because we want to permit people to
893 * symlink block devices into the search path. (For now, we disable that when operating relative to
894 * some root directory.) */
895 int open_flags = root ? O_NOFOLLOW : 0, r;
896
897 assert(scope < _RUNTIME_SCOPE_MAX && scope != RUNTIME_SCOPE_GLOBAL);
898 assert(class >= 0);
899 assert(class < _IMAGE_CLASS_MAX);
900 assert(images);
901
902 _cleanup_strv_free_ char **search = NULL;
903 r = pick_image_search_path(scope, class, &search);
904 if (r < 0)
905 return r;
906
907 STRV_FOREACH(path, search) {
908 _cleanup_free_ char *resolved = NULL;
909 _cleanup_closedir_ DIR *d = NULL;
910
911 r = chase_and_opendir(*path, root, CHASE_PREFIX_ROOT, &resolved, &d);
912 if (r == -ENOENT)
913 continue;
914 if (r < 0)
915 return r;
916
917 FOREACH_DIRENT_ALL(de, d, return -errno) {
918 _cleanup_free_ char *pretty = NULL, *fname_buf = NULL;
919 _cleanup_(image_unrefp) Image *image = NULL;
920 const char *fname = de->d_name;
921
922 if (dot_or_dot_dot(fname))
923 continue;
924
925 _cleanup_close_ int fd = openat(dirfd(d), fname, O_PATH|O_CLOEXEC|open_flags);
926 if (fd < 0) {
927 if (errno != ENOENT)
928 return -errno;
929
930 continue; /* Vanished while we were looking at it */
931 }
932
933 struct stat st;
934 if (fstat(fd, &st) < 0)
935 return -errno;
936
937 if (S_ISREG(st.st_mode)) {
938 r = extract_image_basename(
939 fname,
940 image_class_suffix_to_string(class),
941 STRV_MAKE(".raw"),
942 &pretty,
943 /* ret_suffix= */ NULL);
944 if (r < 0) {
945 log_debug_errno(r, "Skipping directory entry '%s', which doesn't look like an image.", fname);
946 continue;
947 }
948 } else if (S_ISDIR(st.st_mode)) {
949 const char *v;
950
951 v = endswith(fname, ".v");
952 if (v) {
953 _cleanup_free_ char *suffix = NULL, *nov = NULL;
954
955 nov = strndup(fname, v - fname); /* Chop off the .v */
956 if (!nov)
957 return -ENOMEM;
958
959 r = extract_image_basename(
960 nov,
961 image_class_suffix_to_string(class),
962 STRV_MAKE(".raw", ""),
963 &pretty,
964 &suffix);
965 if (r < 0) {
966 log_debug_errno(r, "Skipping directory entry '%s', which doesn't look like a versioned image.", fname);
967 continue;
968 }
969
970 _cleanup_free_ char *vp = path_join(resolved, fname);
971 if (!vp)
972 return -ENOMEM;
973
974 PickFilter filter = {
975 .type_mask = endswith(suffix, ".raw") ? (UINT32_C(1) << DT_REG) | (UINT32_C(1) << DT_BLK) : (UINT32_C(1) << DT_DIR),
976 .basename = pretty,
977 .architecture = _ARCHITECTURE_INVALID,
978 .suffix = STRV_MAKE(suffix),
979 };
980
981 _cleanup_(pick_result_done) PickResult result = PICK_RESULT_NULL;
982 r = path_pick(root,
983 /* toplevel_fd= */ AT_FDCWD,
984 vp,
985 &filter,
986 PICK_ARCHITECTURE|PICK_TRIES,
987 &result);
988 if (r < 0) {
989 log_debug_errno(r, "Failed to pick versioned image on '%s', skipping: %m", vp);
990 continue;
991 }
992 if (!result.path) {
993 log_debug("Found versioned directory '%s', without matching entry, skipping: %m", vp);
994 continue;
995 }
996
997 /* Refresh the stat data for the discovered target */
998 st = result.st;
999 fd = safe_close(fd);
1000
1001 _cleanup_free_ char *bn = NULL;
1002 r = path_extract_filename(result.path, &bn);
1003 if (r < 0) {
1004 log_debug_errno(r, "Failed to extract basename of image path '%s', skipping: %m", result.path);
1005 continue;
1006 }
1007
1008 fname_buf = path_join(fname, bn);
1009 if (!fname_buf)
1010 return log_oom();
1011
1012 fname = fname_buf;
1013 } else {
1014 r = extract_image_basename(
1015 fname,
1016 image_class_suffix_to_string(class),
1017 /* format_suffixes= */ NULL,
1018 &pretty,
1019 /* ret_suffix= */ NULL);
1020 if (r < 0) {
1021 log_debug_errno(r, "Skipping directory entry '%s', which doesn't look like an image.", fname);
1022 continue;
1023 }
1024 }
1025
1026 } else if (S_ISBLK(st.st_mode)) {
1027 r = extract_image_basename(
1028 fname,
1029 /* class_suffix= */ NULL,
1030 /* format_suffix= */ NULL,
1031 &pretty,
1032 /* ret_suffix= */ NULL);
1033 if (r < 0) {
1034 log_debug_errno(r, "Skipping directory entry '%s', which doesn't look like an image.", fname);
1035 continue;
1036 }
1037 } else {
1038 log_debug("Skipping directory entry '%s', which is neither regular file, directory nor block device.", fname);
1039 continue;
1040 }
1041
1042 if (hashmap_contains(*images, pretty))
1043 continue;
1044
1045 r = image_make(class, pretty, dirfd(d), resolved, fname, fd, &st, &image);
1046 if (IN_SET(r, -ENOENT, -EMEDIUMTYPE))
1047 continue;
1048 if (r < 0)
1049 return r;
1050
1051 image->discoverable = true;
1052
1053 r = hashmap_ensure_put(images, &image_hash_ops, image->name, image);
1054 if (r < 0)
1055 return r;
1056
1057 TAKE_PTR(image);
1058 }
1059 }
1060
1061 if (scope == RUNTIME_SCOPE_SYSTEM && class == IMAGE_MACHINE && !hashmap_contains(*images, ".host")) {
1062 _cleanup_(image_unrefp) Image *image = NULL;
1063
1064 r = image_make(IMAGE_MACHINE,
1065 ".host",
1066 /* dir_fd= */ AT_FDCWD,
1067 /* dir_path= */ NULL,
1068 empty_to_root(root),
1069 /* fd= */ -EBADF,
1070 /* st= */ NULL,
1071 &image);
1072 if (r < 0)
1073 return r;
1074
1075 image->discoverable = true;
1076
1077 r = hashmap_ensure_put(images, &image_hash_ops, image->name, image);
1078 if (r < 0)
1079 return r;
1080
1081 image = NULL;
1082 }
1083
1084 return 0;
1085}
1086
1087int image_remove(Image *i) {
1088 _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
1089 _cleanup_strv_free_ char **settings = NULL;
1090 _cleanup_free_ char *roothash = NULL;
1091 int r;
1092
1093 assert(i);
1094
1095 if (image_is_vendor(i) || image_is_host(i))
1096 return -EROFS;
1097
1098 settings = image_settings_path(i);
1099 if (!settings)
1100 return -ENOMEM;
1101
1102 r = image_roothash_path(i, &roothash);
1103 if (r < 0)
1104 return r;
1105
1106 /* Make sure we don't interfere with a running nspawn */
1107 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
1108 if (r < 0)
1109 return r;
1110
1111 switch (i->type) {
1112
1113 case IMAGE_SUBVOLUME:
1114
1115 /* Let's unlink first, maybe it is a symlink? If that works we are happy. Otherwise, let's get out the
1116 * big guns */
1117 if (unlink(i->path) < 0) {
1118 r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
1119 if (r < 0)
1120 return r;
1121 }
1122
1123 break;
1124
1125 case IMAGE_DIRECTORY:
1126 /* Allow deletion of read-only directories */
1127 (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL);
1128 r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
1129 if (r < 0)
1130 return r;
1131
1132 break;
1133
1134 case IMAGE_BLOCK:
1135
1136 /* If this is inside of /dev, then it's a real block device, hence let's not touch the device node
1137 * itself (but let's remove the stuff stored alongside it). If it's anywhere else, let's try to unlink
1138 * the thing (it's most likely a symlink after all). */
1139
1140 if (path_startswith(i->path, "/dev"))
1141 break;
1142
1143 _fallthrough_;
1144 case IMAGE_RAW:
1145 if (unlink(i->path) < 0)
1146 return -errno;
1147 break;
1148
1149 default:
1150 return -EOPNOTSUPP;
1151 }
1152
1153 STRV_FOREACH(j, settings)
1154 if (unlink(*j) < 0 && errno != ENOENT)
1155 log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j);
1156
1157 if (unlink(roothash) < 0 && errno != ENOENT)
1158 log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", roothash);
1159
1160 return 0;
1161}
1162
1163static int rename_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
1164 _cleanup_free_ char *fn = NULL, *rs = NULL;
1165 int r;
1166
1167 fn = strjoin(new_name, suffix);
1168 if (!fn)
1169 return -ENOMEM;
1170
1171 r = file_in_same_dir(path, fn, &rs);
1172 if (r < 0)
1173 return r;
1174
1175 return rename_noreplace(AT_FDCWD, path, AT_FDCWD, rs);
1176}
1177
1178int image_rename(Image *i, const char *new_name, RuntimeScope scope) {
1179 _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
1180 _cleanup_free_ char *new_path = NULL, *nn = NULL, *roothash = NULL;
1181 _cleanup_strv_free_ char **settings = NULL;
1182 unsigned file_attr = 0;
1183 int r;
1184
1185 assert(i);
1186
1187 if (!image_name_is_valid(new_name))
1188 return -EINVAL;
1189
1190 if (image_is_vendor(i) || image_is_host(i))
1191 return -EROFS;
1192
1193 settings = image_settings_path(i);
1194 if (!settings)
1195 return -ENOMEM;
1196
1197 r = image_roothash_path(i, &roothash);
1198 if (r < 0)
1199 return r;
1200
1201 /* Make sure we don't interfere with a running nspawn */
1202 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
1203 if (r < 0)
1204 return r;
1205
1206 /* Make sure nobody takes the new name, between the time we
1207 * checked it is currently unused in all search paths, and the
1208 * time we take possession of it */
1209 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
1210 if (r < 0)
1211 return r;
1212
1213 r = image_find(scope, IMAGE_MACHINE, new_name, NULL, NULL);
1214 if (r >= 0)
1215 return -EEXIST;
1216 if (r != -ENOENT)
1217 return r;
1218
1219 switch (i->type) {
1220
1221 case IMAGE_DIRECTORY:
1222 /* Turn of the immutable bit while we rename the image, so that we can rename it */
1223 (void) read_attr_at(AT_FDCWD, i->path, &file_attr);
1224
1225 if (file_attr & FS_IMMUTABLE_FL)
1226 (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL);
1227
1228 _fallthrough_;
1229 case IMAGE_SUBVOLUME:
1230 r = file_in_same_dir(i->path, new_name, &new_path);
1231 break;
1232
1233 case IMAGE_BLOCK:
1234
1235 /* Refuse renaming raw block devices in /dev, the names are picked by udev after all. */
1236 if (path_startswith(i->path, "/dev"))
1237 return -EROFS;
1238
1239 r = file_in_same_dir(i->path, new_name, &new_path);
1240 break;
1241
1242 case IMAGE_RAW: {
1243 const char *fn;
1244
1245 fn = strjoina(new_name, ".raw");
1246
1247 r = file_in_same_dir(i->path, fn, &new_path);
1248 break;
1249 }
1250
1251 default:
1252 return -EOPNOTSUPP;
1253 }
1254 if (r < 0)
1255 return r;
1256
1257 nn = strdup(new_name);
1258 if (!nn)
1259 return -ENOMEM;
1260
1261 r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
1262 if (r < 0)
1263 return r;
1264
1265 /* Restore the immutable bit, if it was set before */
1266 if (file_attr & FS_IMMUTABLE_FL)
1267 (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL);
1268
1269 free_and_replace(i->path, new_path);
1270 free_and_replace(i->name, nn);
1271
1272 STRV_FOREACH(j, settings) {
1273 r = rename_auxiliary_file(*j, new_name, ".nspawn");
1274 if (r < 0 && r != -ENOENT)
1275 log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j);
1276 }
1277
1278 r = rename_auxiliary_file(roothash, new_name, ".roothash");
1279 if (r < 0 && r != -ENOENT)
1280 log_debug_errno(r, "Failed to rename roothash file %s, ignoring: %m", roothash);
1281
1282 return 0;
1283}
1284
1285static int clone_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
1286 _cleanup_free_ char *fn = NULL, *rs = NULL;
1287 int r;
1288
1289 fn = strjoin(new_name, suffix);
1290 if (!fn)
1291 return -ENOMEM;
1292
1293 r = file_in_same_dir(path, fn, &rs);
1294 if (r < 0)
1295 return r;
1296
1297 return copy_file_atomic(path, rs, 0664, COPY_REFLINK);
1298}
1299
1300int image_clone(Image *i, const char *new_name, bool read_only, RuntimeScope scope) {
1301 _cleanup_(release_lock_file) LockFile name_lock = LOCK_FILE_INIT;
1302 _cleanup_strv_free_ char **settings = NULL;
1303 _cleanup_free_ char *roothash = NULL;
1304 const char *new_path;
1305 int r;
1306
1307 assert(i);
1308
1309 if (!image_name_is_valid(new_name))
1310 return -EINVAL;
1311
1312 settings = image_settings_path(i);
1313 if (!settings)
1314 return -ENOMEM;
1315
1316 r = image_roothash_path(i, &roothash);
1317 if (r < 0)
1318 return r;
1319
1320 /* Make sure nobody takes the new name, between the time we
1321 * checked it is currently unused in all search paths, and the
1322 * time we take possession of it */
1323 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
1324 if (r < 0)
1325 return r;
1326
1327 r = image_find(scope, IMAGE_MACHINE, new_name, NULL, NULL);
1328 if (r >= 0)
1329 return -EEXIST;
1330 if (r != -ENOENT)
1331 return r;
1332
1333 switch (i->type) {
1334
1335 case IMAGE_SUBVOLUME:
1336 case IMAGE_DIRECTORY:
1337 /* If we can we'll always try to create a new btrfs subvolume here, even if the source is a plain
1338 * directory. */
1339
1340 new_path = strjoina("/var/lib/machines/", new_name);
1341
1342 r = btrfs_subvol_snapshot_at(AT_FDCWD, i->path, AT_FDCWD, new_path,
1343 (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
1344 BTRFS_SNAPSHOT_FALLBACK_COPY |
1345 BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
1346 BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE |
1347 BTRFS_SNAPSHOT_RECURSIVE |
1348 BTRFS_SNAPSHOT_QUOTA);
1349 if (r >= 0)
1350 /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */
1351 (void) btrfs_subvol_auto_qgroup(new_path, 0, true);
1352
1353 break;
1354
1355 case IMAGE_RAW:
1356 new_path = strjoina("/var/lib/machines/", new_name, ".raw");
1357
1358 r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644,
1359 COPY_REFLINK|COPY_CRTIME|COPY_NOCOW_AFTER);
1360 break;
1361
1362 case IMAGE_BLOCK:
1363 default:
1364 return -EOPNOTSUPP;
1365 }
1366
1367 if (r < 0)
1368 return r;
1369
1370 STRV_FOREACH(j, settings) {
1371 r = clone_auxiliary_file(*j, new_name, ".nspawn");
1372 if (r < 0 && r != -ENOENT)
1373 log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j);
1374 }
1375
1376 r = clone_auxiliary_file(roothash, new_name, ".roothash");
1377 if (r < 0 && r != -ENOENT)
1378 log_debug_errno(r, "Failed to clone root hash file %s, ignoring: %m", roothash);
1379
1380 return 0;
1381}
1382
1383int image_read_only(Image *i, bool b) {
1384 _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
1385 int r;
1386
1387 assert(i);
1388
1389 if (image_is_vendor(i) || image_is_host(i))
1390 return -EROFS;
1391
1392 /* Make sure we don't interfere with a running nspawn */
1393 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
1394 if (r < 0)
1395 return r;
1396
1397 switch (i->type) {
1398
1399 case IMAGE_SUBVOLUME:
1400
1401 /* Note that we set the flag only on the top-level
1402 * subvolume of the image. */
1403
1404 r = btrfs_subvol_set_read_only(i->path, b);
1405 if (r < 0)
1406 return r;
1407
1408 break;
1409
1410 case IMAGE_DIRECTORY:
1411 /* For simple directory trees we cannot use the access
1412 mode of the top-level directory, since it has an
1413 effect on the container itself. However, we can
1414 use the "immutable" flag, to at least make the
1415 top-level directory read-only. It's not as good as
1416 a read-only subvolume, but at least something, and
1417 we can read the value back. */
1418
1419 r = chattr_path(i->path, b ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL);
1420 if (r < 0)
1421 return r;
1422
1423 break;
1424
1425 case IMAGE_RAW: {
1426 struct stat st;
1427
1428 if (stat(i->path, &st) < 0)
1429 return -errno;
1430
1431 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
1432 return -errno;
1433
1434 /* If the images is now read-only, it's a good time to
1435 * defrag it, given that no write patterns will
1436 * fragment it again. */
1437 if (b)
1438 (void) btrfs_defrag(i->path);
1439 break;
1440 }
1441
1442 case IMAGE_BLOCK: {
1443 _cleanup_close_ int fd = -EBADF;
1444 struct stat st;
1445 int state = b;
1446
1447 fd = open(i->path, O_CLOEXEC|O_RDONLY|O_NONBLOCK|O_NOCTTY);
1448 if (fd < 0)
1449 return -errno;
1450
1451 if (fstat(fd, &st) < 0)
1452 return -errno;
1453 if (!S_ISBLK(st.st_mode))
1454 return -ENOTTY;
1455
1456 if (ioctl(fd, BLKROSET, &state) < 0)
1457 return -errno;
1458
1459 break;
1460 }
1461
1462 default:
1463 return -EOPNOTSUPP;
1464 }
1465
1466 i->read_only = b;
1467 return 0;
1468}
1469
1470static void make_lock_dir(void) {
1471 (void) mkdir_p("/run/systemd/nspawn", 0755);
1472 (void) mkdir("/run/systemd/nspawn/locks", 0700);
1473}
1474
1475int image_path_lock(
1476 const char *path,
1477 int operation,
1478 LockFile *ret_global,
1479 LockFile *ret_local) {
1480
1481 _cleanup_free_ char *p = NULL;
1482 LockFile t = LOCK_FILE_INIT;
1483 struct stat st;
1484 bool exclusive;
1485 int r;
1486
1487 assert(path);
1488 assert(ret_local);
1489
1490 /* Locks an image path. This actually creates two locks: one "local" one, next to the image path
1491 * itself, which might be shared via NFS. And another "global" one, in /run, that uses the
1492 * device/inode number. This has the benefit that we can even lock a tree that is a mount point,
1493 * correctly. */
1494
1495 if (!path_is_absolute(path))
1496 return -EINVAL;
1497
1498 switch (operation & (LOCK_SH|LOCK_EX)) {
1499 case LOCK_SH:
1500 exclusive = false;
1501 break;
1502 case LOCK_EX:
1503 exclusive = true;
1504 break;
1505 default:
1506 return -EINVAL;
1507 }
1508
1509 if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
1510 *ret_local = LOCK_FILE_INIT;
1511 if (ret_global)
1512 *ret_global = LOCK_FILE_INIT;
1513 return 0;
1514 }
1515
1516 /* Prohibit taking exclusive locks on the host image. We can't allow this, since we ourselves are
1517 * running off it after all, and we don't want any images to manipulate the host image. We make an
1518 * exception for shared locks however: we allow those (and make them NOPs since there's no point in
1519 * taking them if there can't be exclusive locks). Strictly speaking these are questionable as well,
1520 * since it means changes made to the host might propagate to the container as they happen (and a
1521 * shared lock kinda suggests that no changes happen at all while it is in place), but it's too
1522 * useful not to allow read-only containers off the host root, hence let's support this, and trust
1523 * the user to do the right thing with this. */
1524 if (path_equal(path, "/")) {
1525 if (exclusive)
1526 return -EBUSY;
1527
1528 *ret_local = LOCK_FILE_INIT;
1529 if (ret_global)
1530 *ret_global = LOCK_FILE_INIT;
1531 return 0;
1532 }
1533
1534 if (ret_global) {
1535 if (stat(path, &st) >= 0) {
1536 if (S_ISBLK(st.st_mode))
1537 r = asprintf(&p, "/run/systemd/nspawn/locks/block-%u:%u", major(st.st_rdev), minor(st.st_rdev));
1538 else if (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode))
1539 r = asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino);
1540 else
1541 return -ENOTTY;
1542 if (r < 0)
1543 return -ENOMEM;
1544 }
1545 }
1546
1547 /* For block devices we don't need the "local" lock, as the major/minor lock above should be
1548 * sufficient, since block devices are host local anyway. */
1549 if (!path_startswith(path, "/dev/")) {
1550 r = make_lock_file_for(path, operation, &t);
1551 if (r < 0) {
1552 if (!exclusive && r == -EROFS)
1553 log_debug_errno(r, "Failed to create shared lock for '%s', ignoring: %m", path);
1554 else
1555 return r;
1556 }
1557 }
1558
1559 if (p) {
1560 make_lock_dir();
1561
1562 r = make_lock_file(p, operation, ret_global);
1563 if (r < 0) {
1564 release_lock_file(&t);
1565 return r;
1566 }
1567 } else if (ret_global)
1568 *ret_global = LOCK_FILE_INIT;
1569
1570 *ret_local = t;
1571 return 0;
1572}
1573
1574int image_set_limit(Image *i, uint64_t referenced_max) {
1575 int r;
1576
1577 assert(i);
1578
1579 if (image_is_vendor(i) || image_is_host(i))
1580 return -EROFS;
1581
1582 if (i->type != IMAGE_SUBVOLUME)
1583 return -EOPNOTSUPP;
1584
1585 /* We set the quota both for the subvolume as well as for the
1586 * subtree. The latter is mostly for historical reasons, since
1587 * we didn't use to have a concept of subtree quota, and hence
1588 * only modified the subvolume quota. */
1589
1590 (void) btrfs_qgroup_set_limit(i->path, 0, referenced_max);
1591 (void) btrfs_subvol_auto_qgroup(i->path, 0, true);
1592 r = btrfs_subvol_set_subtree_quota_limit(i->path, 0, referenced_max);
1593 if (r < 0)
1594 return r;
1595
1596 (void) image_update_quota(i, -EBADF);
1597 return 0;
1598}
1599
1600int image_set_pool_limit(ImageClass class, uint64_t referenced_max) {
1601 const char *dir;
1602 int r;
1603
1604 assert(class >= 0 && class < _IMAGE_CLASS_MAX);
1605
1606 dir = image_root_to_string(class);
1607
1608 r = btrfs_qgroup_set_limit(dir, /* qgroupid = */ 0, referenced_max);
1609 if (ERRNO_IS_NEG_NOT_SUPPORTED(r))
1610 return r;
1611 if (r < 0)
1612 log_debug_errno(r, "Failed to set limit on btrfs quota group for '%s', ignoring: %m", dir);
1613
1614 r = btrfs_subvol_set_subtree_quota_limit(dir, /* subvol_id = */ 0, referenced_max);
1615 if (ERRNO_IS_NEG_NOT_SUPPORTED(r))
1616 return r;
1617 if (r < 0)
1618 return log_debug_errno(r, "Failed to set subtree quota limit for '%s': %m", dir);
1619
1620 return 0;
1621}
1622
1623int image_read_metadata(Image *i, const ImagePolicy *image_policy) {
1624 _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
1625 int r;
1626
1627 assert(i);
1628
1629 r = image_path_lock(i->path, LOCK_SH|LOCK_NB, &global_lock, &local_lock);
1630 if (r < 0)
1631 return r;
1632
1633 switch (i->type) {
1634
1635 case IMAGE_SUBVOLUME:
1636 case IMAGE_DIRECTORY: {
1637 _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL, **sysext_release = NULL, **confext_release = NULL;
1638 _cleanup_free_ char *hostname = NULL, *path = NULL;
1639 sd_id128_t machine_id = SD_ID128_NULL;
1640
1641 if (i->class == IMAGE_SYSEXT) {
1642 r = extension_has_forbidden_content(i->path);
1643 if (r < 0)
1644 return r;
1645 if (r > 0)
1646 return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
1647 "Conflicting content found in image %s, refusing.",
1648 i->name);
1649 }
1650
1651 r = chase("/etc/hostname", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL);
1652 if (r < 0 && r != -ENOENT)
1653 log_debug_errno(r, "Failed to chase /etc/hostname in image %s: %m", i->name);
1654 else if (r >= 0) {
1655 r = read_etc_hostname(path, /* substitute_wildcards= */ false, &hostname);
1656 if (r < 0)
1657 log_debug_errno(r, "Failed to read /etc/hostname of image %s: %m", i->name);
1658 }
1659
1660 path = mfree(path);
1661
1662 r = id128_get_machine(i->path, &machine_id);
1663 if (r < 0)
1664 log_debug_errno(r, "Failed to read machine ID in image %s, ignoring: %m", i->name);
1665
1666 r = chase("/etc/machine-info", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL);
1667 if (r < 0 && r != -ENOENT)
1668 log_debug_errno(r, "Failed to chase /etc/machine-info in image %s: %m", i->name);
1669 else if (r >= 0) {
1670 r = load_env_file_pairs(NULL, path, &machine_info);
1671 if (r < 0)
1672 log_debug_errno(r, "Failed to parse machine-info data of %s: %m", i->name);
1673 }
1674
1675 r = load_os_release_pairs(i->path, &os_release);
1676 if (r < 0)
1677 log_debug_errno(r, "Failed to read os-release in image, ignoring: %m");
1678
1679 r = load_extension_release_pairs(i->path, IMAGE_SYSEXT, i->name, /* relax_extension_release_check= */ false, &sysext_release);
1680 if (r < 0)
1681 log_debug_errno(r, "Failed to read sysext-release in image, ignoring: %m");
1682
1683 r = load_extension_release_pairs(i->path, IMAGE_CONFEXT, i->name, /* relax_extension_release_check= */ false, &confext_release);
1684 if (r < 0)
1685 log_debug_errno(r, "Failed to read confext-release in image, ignoring: %m");
1686
1687 free_and_replace(i->hostname, hostname);
1688 i->machine_id = machine_id;
1689 strv_free_and_replace(i->machine_info, machine_info);
1690 strv_free_and_replace(i->os_release, os_release);
1691 strv_free_and_replace(i->sysext_release, sysext_release);
1692 strv_free_and_replace(i->confext_release, confext_release);
1693 break;
1694 }
1695
1696 case IMAGE_RAW:
1697 case IMAGE_BLOCK: {
1698 _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
1699 _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
1700 DissectImageFlags flags =
1701 DISSECT_IMAGE_GENERIC_ROOT |
1702 DISSECT_IMAGE_REQUIRE_ROOT |
1703 DISSECT_IMAGE_RELAX_VAR_CHECK |
1704 DISSECT_IMAGE_READ_ONLY |
1705 DISSECT_IMAGE_USR_NO_ROOT |
1706 DISSECT_IMAGE_ADD_PARTITION_DEVICES |
1707 DISSECT_IMAGE_PIN_PARTITION_DEVICES |
1708 DISSECT_IMAGE_VALIDATE_OS |
1709 DISSECT_IMAGE_VALIDATE_OS_EXT |
1710 DISSECT_IMAGE_ALLOW_USERSPACE_VERITY;
1711
1712 r = loop_device_make_by_path(
1713 i->path,
1714 O_RDONLY,
1715 /* sector_size= */ UINT32_MAX,
1716 LO_FLAGS_PARTSCAN,
1717 LOCK_SH,
1718 &d);
1719 if (r < 0)
1720 return r;
1721
1722 r = dissect_loop_device(
1723 d,
1724 /* verity= */ NULL,
1725 /* mount_options= */ NULL,
1726 image_policy,
1727 /* image_filter= */ NULL,
1728 flags,
1729 &m);
1730 if (r < 0)
1731 return r;
1732
1733 r = dissected_image_acquire_metadata(
1734 m,
1735 /* userns_fd= */ -EBADF,
1736 flags);
1737 if (r < 0)
1738 return r;
1739
1740 free_and_replace(i->hostname, m->hostname);
1741 i->machine_id = m->machine_id;
1742 strv_free_and_replace(i->machine_info, m->machine_info);
1743 strv_free_and_replace(i->os_release, m->os_release);
1744 strv_free_and_replace(i->sysext_release, m->sysext_release);
1745 strv_free_and_replace(i->confext_release, m->confext_release);
1746
1747 break;
1748 }
1749
1750 default:
1751 return -EOPNOTSUPP;
1752 }
1753
1754 i->metadata_valid = true;
1755
1756 return 0;
1757}
1758
1759int image_name_lock(const char *name, int operation, LockFile *ret) {
1760 const char *p;
1761
1762 assert(name);
1763 assert(ret);
1764
1765 /* Locks an image name, regardless of the precise path used. */
1766
1767 if (streq(name, ".host"))
1768 return -EBUSY;
1769
1770 if (!image_name_is_valid(name))
1771 return -EINVAL;
1772
1773 if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
1774 *ret = (LockFile) LOCK_FILE_INIT;
1775 return 0;
1776 }
1777
1778 make_lock_dir();
1779
1780 p = strjoina("/run/systemd/nspawn/locks/name-", name);
1781 return make_lock_file(p, operation, ret);
1782}
1783
1784bool image_in_search_path(
1785 RuntimeScope scope,
1786 ImageClass class,
1787 const char *root,
1788 const char *image) {
1789
1790 int r;
1791
1792 assert(scope < _RUNTIME_SCOPE_MAX && scope != RUNTIME_SCOPE_GLOBAL);
1793 assert(class >= 0);
1794 assert(class < _IMAGE_CLASS_MAX);
1795 assert(image);
1796
1797 _cleanup_strv_free_ char **search = NULL;
1798 r = pick_image_search_path(scope, class, &search);
1799 if (r < 0)
1800 return r;
1801
1802 STRV_FOREACH(path, search) {
1803 const char *p, *q;
1804 size_t k;
1805
1806 if (!empty_or_root(root)) {
1807 q = path_startswith(*path, root);
1808 if (!q)
1809 continue;
1810 } else
1811 q = *path;
1812
1813 p = path_startswith(q, *path);
1814 if (!p)
1815 continue;
1816
1817 /* Make sure there's a filename following */
1818 k = strcspn(p, "/");
1819 if (k == 0)
1820 continue;
1821
1822 p += k;
1823
1824 /* Accept trailing slashes */
1825 if (p[strspn(p, "/")] == 0)
1826 return true;
1827 }
1828
1829 return false;
1830}
1831
1832bool image_is_vendor(const struct Image *i) {
1833 assert(i);
1834
1835 return i->path && path_startswith(i->path, "/usr");
1836}
1837
1838bool image_is_host(const struct Image *i) {
1839 assert(i);
1840
1841 if (i->name && streq(i->name, ".host"))
1842 return true;
1843
1844 if (i->path && path_equal(i->path, "/"))
1845 return true;
1846
1847 return false;
1848}
1849
1850int image_to_json(const struct Image *img, sd_json_variant **ret) {
1851 assert(img);
1852
1853 return sd_json_buildo(
1854 ret,
1855 SD_JSON_BUILD_PAIR_STRING("Type", image_type_to_string(img->type)),
1856 SD_JSON_BUILD_PAIR_STRING("Class", image_class_to_string(img->class)),
1857 SD_JSON_BUILD_PAIR_STRING("Name", img->name),
1858 SD_JSON_BUILD_PAIR_CONDITION(!!img->path, "Path", SD_JSON_BUILD_STRING(img->path)),
1859 SD_JSON_BUILD_PAIR_BOOLEAN("ReadOnly", img->read_only),
1860 SD_JSON_BUILD_PAIR_CONDITION(img->crtime != 0, "CreationTimestamp", SD_JSON_BUILD_UNSIGNED(img->crtime)),
1861 SD_JSON_BUILD_PAIR_CONDITION(img->mtime != 0, "ModificationTimestamp", SD_JSON_BUILD_UNSIGNED(img->mtime)),
1862 SD_JSON_BUILD_PAIR_CONDITION(img->usage != UINT64_MAX, "Usage", SD_JSON_BUILD_UNSIGNED(img->usage)),
1863 SD_JSON_BUILD_PAIR_CONDITION(img->usage_exclusive != UINT64_MAX, "UsageExclusive", SD_JSON_BUILD_UNSIGNED(img->usage_exclusive)),
1864 SD_JSON_BUILD_PAIR_CONDITION(img->limit != UINT64_MAX, "Limit", SD_JSON_BUILD_UNSIGNED(img->limit)),
1865 SD_JSON_BUILD_PAIR_CONDITION(img->limit_exclusive != UINT64_MAX, "LimitExclusive", SD_JSON_BUILD_UNSIGNED(img->limit_exclusive)));
1866}
1867
1868static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
1869 [IMAGE_DIRECTORY] = "directory",
1870 [IMAGE_SUBVOLUME] = "subvolume",
1871 [IMAGE_RAW] = "raw",
1872 [IMAGE_BLOCK] = "block",
1873};
1874
1875DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);