]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/shared/discover-image.c
hwdb: updated Librem 11 accelerometer (#32772)
[thirdparty/systemd.git] / src / shared / discover-image.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
cd61c3bf 2
a8fbdf54 3#include <errno.h>
ebd93cb6 4#include <fcntl.h>
e08f94ac
LP
5#include <linux/fs.h>
6#include <linux/loop.h>
65ddc2c5 7#include <linux/magic.h>
a8fbdf54
TA
8#include <stdio.h>
9#include <stdlib.h>
e306723e 10#include <sys/file.h>
204f52e3 11#include <sys/ioctl.h>
a8fbdf54
TA
12#include <sys/stat.h>
13#include <unistd.h>
546dbec5 14
b5efdb8a 15#include "alloc-util.h"
01db9c85 16#include "blockdev-util.h"
cd61c3bf 17#include "btrfs-util.h"
f461a28d 18#include "chase.h"
c8b3094d 19#include "chattr-util.h"
ebd93cb6 20#include "copy.h"
a0956174 21#include "dirent-util.h"
57f1b61b 22#include "discover-image.h"
c7664c07 23#include "dissect-image.h"
686d13b9 24#include "env-file.h"
b6e953f2 25#include "env-util.h"
6afa5d86 26#include "extension-util.h"
3ffd4af2 27#include "fd-util.h"
f4f15635 28#include "fs-util.h"
a8fbdf54 29#include "hashmap.h"
e2054217 30#include "hostname-setup.h"
c7664c07 31#include "id128-util.h"
73740c9f 32#include "initrd-util.h"
64e89f56 33#include "lock-util.h"
a8fbdf54 34#include "log.h"
c7664c07 35#include "loop-util.h"
546dbec5 36#include "macro.h"
30535c16 37#include "mkdir.h"
d8b4d14d 38#include "nulstr-util.h"
d58ad743 39#include "os-util.h"
8e0b6570 40#include "path-util.h"
c6878637 41#include "rm-rf.h"
65ddc2c5 42#include "stat-util.h"
8b43440b 43#include "string-table.h"
07630cea 44#include "string-util.h"
8e0b6570 45#include "strv.h"
a8fbdf54 46#include "time-util.h"
8e0b6570 47#include "utf8.h"
a5ecdf7c 48#include "vpick.h"
89a5a90c 49#include "xattr-util.h"
cd61c3bf 50
f7178a04 51const char* const image_search_path[_IMAGE_CLASS_MAX] = {
9bca4ae4
LP
52 [IMAGE_MACHINE] = "/etc/machines\0" /* only place symlinks here */
53 "/run/machines\0" /* and here too */
54 "/var/lib/machines\0" /* the main place for images */
55 "/var/lib/container\0" /* legacy */
56 "/usr/local/lib/machines\0"
57 "/usr/lib/machines\0",
5ef46e5f 58
9bca4ae4
LP
59 [IMAGE_PORTABLE] = "/etc/portables\0" /* only place symlinks here */
60 "/run/portables\0" /* and here too */
61 "/var/lib/portables\0" /* the main place for images */
62 "/usr/local/lib/portables\0"
63 "/usr/lib/portables\0",
64
de862276
LB
65 /* Note that we don't allow storing extensions under /usr/, unlike with other image types. That's
66 * because extension images are supposed to extend /usr/, so you get into recursive races, especially
67 * with directory-based extensions, as the kernel's OverlayFS explicitly checks for this and errors
68 * out with -ELOOP if it finds that a lowerdir= is a child of another lowerdir=. */
b60e0f57 69 [IMAGE_SYSEXT] = "/etc/extensions\0" /* only place symlinks here */
70 "/run/extensions\0" /* and here too */
71 "/var/lib/extensions\0", /* the main place for images */
72
73 [IMAGE_CONFEXT] = "/run/confexts\0" /* only place symlinks here */
74 "/var/lib/confexts\0" /* the main place for images */
75 "/usr/local/lib/confexts\0"
76 "/usr/lib/confexts\0",
5ef46e5f 77};
c2ce6a3d 78
d4fee894
LP
79/* Inside the initrd, use a slightly different set of search path (i.e. include .extra/sysext/ and
80 * .extra/confext/ in extension search dir) */
73740c9f
LP
81static const char* const image_search_path_initrd[_IMAGE_CLASS_MAX] = {
82 /* (entries that aren't listed here will get the same search path as for the non initrd-case) */
83
b151e696
LP
84 [IMAGE_SYSEXT] = "/etc/extensions\0" /* only place symlinks here */
85 "/run/extensions\0" /* and here too */
86 "/var/lib/extensions\0" /* the main place for images */
d4fee894
LP
87 "/.extra/sysext\0", /* put sysext picked up by systemd-stub last, since not trusted */
88
89 [IMAGE_CONFEXT] = "/run/confexts\0" /* only place symlinks here */
90 "/var/lib/confexts\0" /* the main place for images */
91 "/usr/local/lib/confexts\0"
92 "/.extra/confext\0", /* put confext picked up by systemd-stub last, since not trusted */
73740c9f
LP
93};
94
a747994b
LP
95static const char* image_class_suffix_table[_IMAGE_CLASS_MAX] = {
96 [IMAGE_SYSEXT] = ".sysext",
97 [IMAGE_CONFEXT] = ".confext",
98};
99
100DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(image_class_suffix, ImageClass);
101
7af5785d
LP
102static const char *const image_root_table[_IMAGE_CLASS_MAX] = {
103 [IMAGE_MACHINE] = "/var/lib/machines",
104 [IMAGE_PORTABLE] = "/var/lib/portables",
105 [IMAGE_SYSEXT] = "/var/lib/extensions",
106 [IMAGE_CONFEXT] = "/var/lib/confexts",
107};
108
109DEFINE_STRING_TABLE_LOOKUP_TO_STRING(image_root, ImageClass);
110
8301aa0b
YW
111static Image *image_free(Image *i) {
112 assert(i);
9614bb06 113
cd61c3bf
LP
114 free(i->name);
115 free(i->path);
c7664c07
LP
116
117 free(i->hostname);
118 strv_free(i->machine_info);
119 strv_free(i->os_release);
a81fe93e
LP
120 strv_free(i->sysext_release);
121 strv_free(i->confext_release);
c7664c07 122
6b430fdb 123 return mfree(i);
cd61c3bf
LP
124}
125
8301aa0b 126DEFINE_TRIVIAL_REF_UNREF_FUNC(Image, image, image_free);
b07ec5a1
YW
127DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(image_hash_ops, char, string_hash_func, string_compare_func,
128 Image, image_unref);
9614bb06 129
8e0b6570
LP
130static char **image_settings_path(Image *image) {
131 _cleanup_strv_free_ char **l = NULL;
162f6477
LP
132 _cleanup_free_ char *fn = NULL;
133 size_t i = 0;
134 int r;
8e0b6570
LP
135
136 assert(image);
137
138 l = new0(char*, 4);
139 if (!l)
140 return NULL;
141
162f6477
LP
142 fn = strjoin(image->name, ".nspawn");
143 if (!fn)
144 return NULL;
8e0b6570 145
b910cc72
LP
146 FOREACH_STRING(s, "/etc/systemd/nspawn", "/run/systemd/nspawn") {
147 l[i] = path_join(s, fn);
8e0b6570
LP
148 if (!l[i])
149 return NULL;
150
151 i++;
152 }
153
162f6477
LP
154 r = file_in_same_dir(image->path, fn, l + i);
155 if (r == -ENOMEM)
8e0b6570 156 return NULL;
162f6477
LP
157 if (r < 0)
158 log_debug_errno(r, "Failed to generate .nspawn settings path from image path, ignoring: %m");
159
160 strv_uniq(l);
8e0b6570 161
ae2a15bc 162 return TAKE_PTR(l);
8e0b6570
LP
163}
164
162f6477
LP
165static int image_roothash_path(Image *image, char **ret) {
166 _cleanup_free_ char *fn = NULL;
bafbac4e
LP
167
168 assert(image);
169
162f6477
LP
170 fn = strjoin(image->name, ".roothash");
171 if (!fn)
172 return -ENOMEM;
bafbac4e 173
162f6477 174 return file_in_same_dir(image->path, fn, ret);
bafbac4e
LP
175}
176
c2ce6a3d 177static int image_new(
cd61c3bf 178 ImageType t,
3775e141 179 ImageClass c,
5fc7f358 180 const char *pretty,
cd61c3bf 181 const char *path,
5fc7f358 182 const char *filename,
cd61c3bf 183 bool read_only,
10f9c755 184 usec_t crtime,
cd61c3bf 185 usec_t mtime,
c2ce6a3d 186 Image **ret) {
cd61c3bf
LP
187
188 _cleanup_(image_unrefp) Image *i = NULL;
cd61c3bf 189
cd61c3bf
LP
190 assert(t >= 0);
191 assert(t < _IMAGE_TYPE_MAX);
5fc7f358
LP
192 assert(pretty);
193 assert(filename);
c2ce6a3d 194 assert(ret);
cd61c3bf 195
c2108701 196 i = new(Image, 1);
cd61c3bf
LP
197 if (!i)
198 return -ENOMEM;
199
c2108701
LP
200 *i = (Image) {
201 .n_ref = 1,
202 .type = t,
3775e141 203 .class = c,
c2108701
LP
204 .read_only = read_only,
205 .crtime = crtime,
206 .mtime = mtime,
207 .usage = UINT64_MAX,
208 .usage_exclusive = UINT64_MAX,
209 .limit = UINT64_MAX,
210 .limit_exclusive = UINT64_MAX,
211 };
cd61c3bf 212
5fc7f358 213 i->name = strdup(pretty);
cd61c3bf
LP
214 if (!i->name)
215 return -ENOMEM;
216
657ee2d8 217 i->path = path_join(path, filename);
5fc7f358
LP
218 if (!i->path)
219 return -ENOMEM;
220
4ff361cc 221 path_simplify(i->path);
cd61c3bf 222
1cc6c93a 223 *ret = TAKE_PTR(i);
c2ce6a3d 224
cd61c3bf
LP
225 return 0;
226}
227
a5ecdf7c 228static int extract_image_basename(
a747994b 229 const char *path,
a5ecdf7c
LP
230 const char *class_suffix, /* e.g. ".sysext" (this is an optional suffix) */
231 char **format_suffixes, /* e.g. ".raw" (one of these will be required) */
232 char **ret_basename,
233 char **ret_suffix) {
a747994b 234
a5ecdf7c 235 _cleanup_free_ char *name = NULL, *suffix = NULL;
a747994b 236 int r;
4756c94e
LP
237
238 assert(path);
4756c94e 239
a747994b
LP
240 r = path_extract_filename(path, &name);
241 if (r < 0)
242 return r;
4756c94e 243
a5ecdf7c 244 if (format_suffixes) {
2e6f012b 245 char *e = endswith_strv(name, format_suffixes);
a747994b 246 if (!e) /* Format suffix is required */
4756c94e
LP
247 return -EINVAL;
248
a5ecdf7c
LP
249 if (ret_suffix) {
250 suffix = strdup(e);
251 if (!suffix)
252 return -ENOMEM;
253 }
254
4756c94e
LP
255 *e = 0;
256 }
257
a747994b
LP
258 if (class_suffix) {
259 char *e = endswith(name, class_suffix);
a5ecdf7c
LP
260 if (e) { /* Class suffix is optional */
261 if (ret_suffix) {
262 _cleanup_free_ char *j = strjoin(e, suffix);
263 if (!j)
264 return -ENOMEM;
265
266 free_and_replace(suffix, j);
267 }
268
a747994b 269 *e = 0;
a5ecdf7c 270 }
a747994b
LP
271 }
272
4756c94e
LP
273 if (!image_name_is_valid(name))
274 return -EINVAL;
275
a5ecdf7c
LP
276 if (ret_suffix)
277 *ret_suffix = TAKE_PTR(suffix);
278
279 if (ret_basename)
280 *ret_basename = TAKE_PTR(name);
281
4756c94e
LP
282 return 0;
283}
284
5fc7f358 285static int image_make(
3775e141 286 ImageClass c,
5fc7f358
LP
287 const char *pretty,
288 int dfd,
289 const char *path,
290 const char *filename,
3a6ce860 291 const struct stat *st,
5fc7f358
LP
292 Image **ret) {
293
2570578d 294 _cleanup_free_ char *pretty_buffer = NULL, *parent = NULL;
3a6ce860 295 struct stat stbuf;
5fc7f358 296 bool read_only;
cd61c3bf
LP
297 int r;
298
3a6ce860 299 assert(dfd >= 0 || dfd == AT_FDCWD);
bcb846f3 300 assert(path || dfd == AT_FDCWD);
5fc7f358 301 assert(filename);
cd61c3bf 302
eb38edce 303 /* We explicitly *do* follow symlinks here, since we want to allow symlinking trees, raw files and block
3a6ce860
LP
304 * devices into /var/lib/machines/, and treat them normally.
305 *
306 * This function returns -ENOENT if we can't find the image after all, and -EMEDIUMTYPE if it's not a file we
307 * recognize. */
cd61c3bf 308
3a6ce860
LP
309 if (!st) {
310 if (fstatat(dfd, filename, &stbuf, 0) < 0)
311 return -errno;
312
313 st = &stbuf;
314 }
cd61c3bf 315
2570578d
LP
316 if (!path) {
317 if (dfd == AT_FDCWD)
318 (void) safe_getcwd(&parent);
319 else
320 (void) fd_get_path(dfd, &parent);
321 }
bcb846f3 322
5fc7f358
LP
323 read_only =
324 (path && path_startswith(path, "/usr")) ||
08ff5529 325 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
86e339c8 326
3a6ce860 327 if (S_ISDIR(st->st_mode)) {
254d1313 328 _cleanup_close_ int fd = -EBADF;
01b72568 329 unsigned file_attr = 0;
02dcf215 330 usec_t crtime = 0;
cd61c3bf 331
c2ce6a3d 332 if (!ret)
3a6ce860 333 return 0;
cd61c3bf 334
4756c94e 335 if (!pretty) {
a5ecdf7c
LP
336 r = extract_image_basename(
337 filename,
338 image_class_suffix_to_string(c),
339 /* format_suffix= */ NULL,
340 &pretty_buffer,
341 /* ret_suffix= */ NULL);
4756c94e
LP
342 if (r < 0)
343 return r;
344
345 pretty = pretty_buffer;
346 }
5fc7f358 347
01b72568
LP
348 fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
349 if (fd < 0)
350 return -errno;
351
674b04ff 352 if (btrfs_might_be_subvol(st)) {
cd61c3bf 353
65ddc2c5 354 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
21222ea5
LP
355 if (r < 0)
356 return r;
79de6eb1 357 if (r > 0) {
10f9c755 358 BtrfsSubvolInfo info;
cd61c3bf 359
c2ce6a3d 360 /* It's a btrfs subvolume */
cd61c3bf 361
5bcd08db 362 r = btrfs_subvol_get_info_fd(fd, 0, &info);
10f9c755
LP
363 if (r < 0)
364 return r;
c2ce6a3d
LP
365
366 r = image_new(IMAGE_SUBVOLUME,
3775e141 367 c,
5fc7f358 368 pretty,
c2ce6a3d 369 path,
5fc7f358
LP
370 filename,
371 info.read_only || read_only,
10f9c755 372 info.otime,
c2ce6a3d 373 0,
c2ce6a3d
LP
374 ret);
375 if (r < 0)
376 return r;
377
5bcd08db
LP
378 if (btrfs_quota_scan_ongoing(fd) == 0) {
379 BtrfsQuotaInfo quota;
b6b18498 380
5bcd08db
LP
381 r = btrfs_subvol_get_subtree_quota_fd(fd, 0, &quota);
382 if (r >= 0) {
383 (*ret)->usage = quota.referenced;
384 (*ret)->usage_exclusive = quota.exclusive;
385
386 (*ret)->limit = quota.referenced_max;
387 (*ret)->limit_exclusive = quota.exclusive_max;
388 }
b6b18498
LP
389 }
390
3a6ce860 391 return 0;
cd61c3bf 392 }
c2ce6a3d 393 }
cd61c3bf 394
02dcf215 395 /* Get directory creation time (not available everywhere, but that's OK */
12a7f04a 396 (void) fd_getcrtime(fd, &crtime);
02dcf215
LP
397
398 /* If the IMMUTABLE bit is set, we consider the directory read-only. Since the ioctl is not
01b72568
LP
399 * supported everywhere we ignore failures. */
400 (void) read_attr_fd(fd, &file_attr);
cd61c3bf 401
01b72568 402 /* It's just a normal directory. */
c2ce6a3d 403 r = image_new(IMAGE_DIRECTORY,
3775e141 404 c,
5fc7f358 405 pretty,
c2ce6a3d 406 path,
5fc7f358 407 filename,
01b72568 408 read_only || (file_attr & FS_IMMUTABLE_FL),
02dcf215
LP
409 crtime,
410 0, /* we don't use mtime of stat() here, since it's not the time of last change of the tree, but only of the top-level dir */
c2ce6a3d
LP
411 ret);
412 if (r < 0)
413 return r;
cd61c3bf 414
3a6ce860 415 return 0;
cd61c3bf 416
3a6ce860 417 } else if (S_ISREG(st->st_mode) && endswith(filename, ".raw")) {
10f9c755 418 usec_t crtime = 0;
cd61c3bf 419
aceac2f0 420 /* It's a RAW disk image */
cd61c3bf 421
c2ce6a3d 422 if (!ret)
3a6ce860 423 return 0;
cd61c3bf 424
c53e07e2 425 (void) fd_getcrtime_at(dfd, filename, AT_SYMLINK_FOLLOW, &crtime);
10f9c755 426
4756c94e 427 if (!pretty) {
a5ecdf7c
LP
428 r = extract_image_basename(
429 filename,
430 image_class_suffix_to_string(c),
431 STRV_MAKE(".raw"),
432 &pretty_buffer,
433 /* ret_suffix= */ NULL);
4756c94e
LP
434 if (r < 0)
435 return r;
436
437 pretty = pretty_buffer;
438 }
10f9c755 439
aceac2f0 440 r = image_new(IMAGE_RAW,
3775e141 441 c,
5fc7f358 442 pretty,
c2ce6a3d 443 path,
5fc7f358 444 filename,
3a6ce860 445 !(st->st_mode & 0222) || read_only,
10f9c755 446 crtime,
3a6ce860 447 timespec_load(&st->st_mtim),
c2ce6a3d
LP
448 ret);
449 if (r < 0)
450 return r;
cd61c3bf 451
3a6ce860
LP
452 (*ret)->usage = (*ret)->usage_exclusive = st->st_blocks * 512;
453 (*ret)->limit = (*ret)->limit_exclusive = st->st_size;
b6b18498 454
3a6ce860 455 return 0;
eb38edce 456
3a6ce860 457 } else if (S_ISBLK(st->st_mode)) {
254d1313 458 _cleanup_close_ int block_fd = -EBADF;
eb38edce
LP
459 uint64_t size = UINT64_MAX;
460
461 /* A block device */
462
463 if (!ret)
3a6ce860 464 return 0;
eb38edce 465
4756c94e 466 if (!pretty) {
a5ecdf7c
LP
467 r = extract_image_basename(
468 filename,
469 /* class_suffix= */ NULL,
470 /* format_suffix= */ NULL,
471 &pretty_buffer,
472 /* ret_suffix= */ NULL);
4756c94e
LP
473 if (r < 0)
474 return r;
475
476 pretty = pretty_buffer;
477 }
eb38edce
LP
478
479 block_fd = openat(dfd, filename, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
480 if (block_fd < 0)
2570578d 481 log_debug_errno(errno, "Failed to open block device %s/%s, ignoring: %m", path ?: strnull(parent), filename);
eb38edce 482 else {
3a6ce860
LP
483 /* Refresh stat data after opening the node */
484 if (fstat(block_fd, &stbuf) < 0)
eb38edce 485 return -errno;
3a6ce860
LP
486 st = &stbuf;
487
488 if (!S_ISBLK(st->st_mode)) /* Verify that what we opened is actually what we think it is */
eb38edce
LP
489 return -ENOTTY;
490
491 if (!read_only) {
492 int state = 0;
493
494 if (ioctl(block_fd, BLKROGET, &state) < 0)
2570578d 495 log_debug_errno(errno, "Failed to issue BLKROGET on device %s/%s, ignoring: %m", path ?: strnull(parent), filename);
eb38edce
LP
496 else if (state)
497 read_only = true;
498 }
499
01db9c85
LP
500 r = blockdev_get_device_size(block_fd, &size);
501 if (r < 0)
502 log_debug_errno(r, "Failed to issue BLKGETSIZE64 on device %s/%s, ignoring: %m", path ?: strnull(parent), filename);
eb38edce
LP
503
504 block_fd = safe_close(block_fd);
505 }
506
507 r = image_new(IMAGE_BLOCK,
3775e141 508 c,
eb38edce
LP
509 pretty,
510 path,
511 filename,
3a6ce860 512 !(st->st_mode & 0222) || read_only,
eb38edce
LP
513 0,
514 0,
515 ret);
516 if (r < 0)
517 return r;
518
ed0cb346 519 if (!IN_SET(size, 0, UINT64_MAX))
eb38edce
LP
520 (*ret)->usage = (*ret)->usage_exclusive = (*ret)->limit = (*ret)->limit_exclusive = size;
521
3a6ce860 522 return 0;
c2ce6a3d 523 }
cd61c3bf 524
3a6ce860 525 return -EMEDIUMTYPE;
c2ce6a3d 526}
cd61c3bf 527
73740c9f
LP
528static const char *pick_image_search_path(ImageClass class) {
529 if (class < 0 || class >= _IMAGE_CLASS_MAX)
530 return NULL;
531
532 /* Use the initrd search path if there is one, otherwise use the common one */
533 return in_initrd() && image_search_path_initrd[class] ? image_search_path_initrd[class] : image_search_path[class];
534}
535
a5ecdf7c
LP
536static char **make_possible_filenames(ImageClass class, const char *image_name) {
537 _cleanup_strv_free_ char **l = NULL;
538
539 assert(image_name);
540
541 FOREACH_STRING(v_suffix, "", ".v")
542 FOREACH_STRING(format_suffix, "", ".raw") {
543 _cleanup_free_ char *j = NULL;
544 const char *class_suffix;
545
546 class_suffix = image_class_suffix_to_string(class);
547 if (class_suffix) {
548 j = strjoin(image_name, class_suffix, format_suffix, v_suffix);
549 if (!j)
550 return NULL;
551
552 if (strv_consume(&l, TAKE_PTR(j)) < 0)
553 return NULL;
554 }
555
556 j = strjoin(image_name, format_suffix, v_suffix);
557 if (!j)
558 return NULL;
559
560 if (strv_consume(&l, TAKE_PTR(j)) < 0)
561 return NULL;
562 }
563
564 return TAKE_PTR(l);
565}
566
d577d4a4
LP
567int image_find(ImageClass class,
568 const char *name,
569 const char *root,
570 Image **ret) {
571
c2ce6a3d 572 int r;
cd61c3bf 573
5ef46e5f
LP
574 assert(class >= 0);
575 assert(class < _IMAGE_CLASS_MAX);
c2ce6a3d 576 assert(name);
cd61c3bf 577
c2ce6a3d
LP
578 /* There are no images with invalid names */
579 if (!image_name_is_valid(name))
3a6ce860 580 return -ENOENT;
cd61c3bf 581
a5ecdf7c
LP
582 _cleanup_strv_free_ char **names = make_possible_filenames(class, name);
583 if (!names)
584 return -ENOMEM;
585
73740c9f 586 NULSTR_FOREACH(path, pick_image_search_path(class)) {
d577d4a4 587 _cleanup_free_ char *resolved = NULL;
c2ce6a3d 588 _cleanup_closedir_ DIR *d = NULL;
3a6ce860 589 struct stat st;
d577d4a4 590 int flags;
cd61c3bf 591
f461a28d 592 r = chase_and_opendir(path, root, CHASE_PREFIX_ROOT, &resolved, &d);
d577d4a4
LP
593 if (r == -ENOENT)
594 continue;
595 if (r < 0)
596 return r;
cd61c3bf 597
d577d4a4
LP
598 /* As mentioned above, we follow symlinks on this fstatat(), because we want to permit people
599 * to symlink block devices into the search path. (For now, we disable that when operating
600 * relative to some root directory.) */
601 flags = root ? AT_SYMLINK_NOFOLLOW : 0;
5fc7f358 602
a5ecdf7c
LP
603 STRV_FOREACH(n, names) {
604 _cleanup_free_ char *fname_buf = NULL;
605 const char *fname = *n;
3a6ce860 606
a5ecdf7c
LP
607 if (fstatat(dirfd(d), fname, &st, flags) < 0) {
608 if (errno != ENOENT)
609 return -errno;
5fc7f358 610
a5ecdf7c
LP
611 continue; /* Vanished while we were looking at it */
612 }
613
614 if (endswith(fname, ".raw")) {
615 if (!S_ISREG(st.st_mode)) {
616 log_debug("Ignoring non-regular file '%s' with .raw suffix.", fname);
3a6ce860 617 continue;
a5ecdf7c 618 }
3a6ce860 619
a5ecdf7c 620 } else if (endswith(fname, ".v")) {
3a6ce860 621
a5ecdf7c
LP
622 if (!S_ISDIR(st.st_mode)) {
623 log_debug("Ignoring non-directory file '%s' with .v suffix.", fname);
624 continue;
625 }
626
627 _cleanup_free_ char *suffix = NULL;
628 suffix = strdup(ASSERT_PTR(startswith(fname, name)));
629 if (!suffix)
630 return -ENOMEM;
631
632 *ASSERT_PTR(endswith(suffix, ".v")) = 0;
633
634 _cleanup_free_ char *vp = path_join(resolved, fname);
635 if (!vp)
636 return -ENOMEM;
637
638 PickFilter filter = {
639 .type_mask = endswith(suffix, ".raw") ? (UINT32_C(1) << DT_REG) | (UINT32_C(1) << DT_BLK) : (UINT32_C(1) << DT_DIR),
640 .basename = name,
641 .architecture = _ARCHITECTURE_INVALID,
421a4ba7 642 .suffix = STRV_MAKE(suffix),
a5ecdf7c
LP
643 };
644
645 _cleanup_(pick_result_done) PickResult result = PICK_RESULT_NULL;
646 r = path_pick(root,
647 /* toplevel_fd= */ AT_FDCWD,
648 vp,
649 &filter,
650 PICK_ARCHITECTURE|PICK_TRIES,
651 &result);
652 if (r < 0) {
653 log_debug_errno(r, "Failed to pick versioned image on '%s', skipping: %m", vp);
654 continue;
655 }
656 if (!result.path) {
657 log_debug("Found versioned directory '%s', without matching entry, skipping: %m", vp);
658 continue;
659 }
660
661 /* Refresh the stat data for the discovered target */
662 st = result.st;
663
664 _cleanup_free_ char *bn = NULL;
665 r = path_extract_filename(result.path, &bn);
666 if (r < 0) {
667 log_debug_errno(r, "Failed to extract basename of image path '%s', skipping: %m", result.path);
668 continue;
669 }
670
671 fname_buf = path_join(fname, bn);
672 if (!fname_buf)
673 return log_oom();
3a6ce860 674
a5ecdf7c 675 fname = fname_buf;
3a6ce860 676
a5ecdf7c
LP
677 } else if (!S_ISDIR(st.st_mode) && !S_ISBLK(st.st_mode)) {
678 log_debug("Ignoring non-directory and non-block device file '%s' without suffix.", fname);
3a6ce860 679 continue;
a5ecdf7c 680 }
3a6ce860 681
a5ecdf7c
LP
682 r = image_make(class, name, dirfd(d), resolved, fname, &st, ret);
683 if (IN_SET(r, -ENOENT, -EMEDIUMTYPE))
684 continue;
685 if (r < 0)
686 return r;
cd61c3bf 687
a5ecdf7c
LP
688 if (ret)
689 (*ret)->discoverable = true;
cf604fd4 690
a5ecdf7c
LP
691 return 1;
692 }
c2ce6a3d
LP
693 }
694
cf604fd4 695 if (class == IMAGE_MACHINE && streq(name, ".host")) {
3775e141 696 r = image_make(class, ".host", AT_FDCWD, NULL, empty_to_root(root), NULL, ret);
cf604fd4
LP
697 if (r < 0)
698 return r;
699
700 if (ret)
701 (*ret)->discoverable = true;
702
a5ecdf7c 703 return 1;
cf604fd4 704 }
5fc7f358 705
3a6ce860 706 return -ENOENT;
c2ce6a3d
LP
707};
708
2ddf182b 709int image_from_path(const char *path, Image **ret) {
cf604fd4
LP
710
711 /* Note that we don't set the 'discoverable' field of the returned object, because we don't check here whether
712 * the image is in the image search path. And if it is we don't know if the path we used is actually not
3fe91079 713 * overridden by another, different image earlier in the search path */
cf604fd4 714
2ddf182b 715 if (path_equal(path, "/"))
3775e141 716 return image_make(IMAGE_MACHINE, ".host", AT_FDCWD, NULL, "/", NULL, ret);
2ddf182b 717
3775e141 718 return image_make(_IMAGE_CLASS_INVALID, NULL, AT_FDCWD, NULL, path, NULL, ret);
2ddf182b
LP
719}
720
d577d4a4 721int image_find_harder(ImageClass class, const char *name_or_path, const char *root, Image **ret) {
2ddf182b 722 if (image_name_is_valid(name_or_path))
d577d4a4 723 return image_find(class, name_or_path, root, ret);
2ddf182b
LP
724
725 return image_from_path(name_or_path, ret);
726}
727
d577d4a4
LP
728int image_discover(
729 ImageClass class,
730 const char *root,
731 Hashmap *h) {
732
c2ce6a3d
LP
733 int r;
734
5ef46e5f
LP
735 assert(class >= 0);
736 assert(class < _IMAGE_CLASS_MAX);
c2ce6a3d
LP
737 assert(h);
738
73740c9f 739 NULSTR_FOREACH(path, pick_image_search_path(class)) {
d577d4a4 740 _cleanup_free_ char *resolved = NULL;
c2ce6a3d 741 _cleanup_closedir_ DIR *d = NULL;
c2ce6a3d 742
f461a28d 743 r = chase_and_opendir(path, root, CHASE_PREFIX_ROOT, &resolved, &d);
d577d4a4
LP
744 if (r == -ENOENT)
745 continue;
746 if (r < 0)
747 return r;
c2ce6a3d
LP
748
749 FOREACH_DIRENT_ALL(de, d, return -errno) {
a5ecdf7c 750 _cleanup_free_ char *pretty = NULL, *fname_buf = NULL;
c2ce6a3d 751 _cleanup_(image_unrefp) Image *image = NULL;
a5ecdf7c 752 const char *fname = de->d_name;
3a6ce860 753 struct stat st;
d577d4a4 754 int flags;
c2ce6a3d 755
a5ecdf7c 756 if (dot_or_dot_dot(fname))
c2ce6a3d
LP
757 continue;
758
d577d4a4
LP
759 /* As mentioned above, we follow symlinks on this fstatat(), because we want to
760 * permit people to symlink block devices into the search path. */
761 flags = root ? AT_SYMLINK_NOFOLLOW : 0;
a5ecdf7c 762 if (fstatat(dirfd(d), fname, &st, flags) < 0) {
3a6ce860
LP
763 if (errno == ENOENT)
764 continue;
765
766 return -errno;
767 }
768
a5ecdf7c
LP
769 if (S_ISREG(st.st_mode)) {
770 r = extract_image_basename(
771 fname,
772 image_class_suffix_to_string(class),
773 STRV_MAKE(".raw"),
774 &pretty,
775 /* suffix= */ NULL);
776 if (r < 0) {
777 log_debug_errno(r, "Skipping directory entry '%s', which doesn't look like an image.", fname);
778 continue;
779 }
780 } else if (S_ISDIR(st.st_mode)) {
781 const char *v;
782
783 v = endswith(fname, ".v");
784 if (v) {
785 _cleanup_free_ char *suffix = NULL, *nov = NULL;
786
787 nov = strndup(fname, v - fname); /* Chop off the .v */
788 if (!nov)
789 return -ENOMEM;
790
791 r = extract_image_basename(
792 nov,
793 image_class_suffix_to_string(class),
794 STRV_MAKE(".raw", ""),
795 &pretty,
796 &suffix);
797 if (r < 0) {
798 log_debug_errno(r, "Skipping directory entry '%s', which doesn't look like a versioned image.", fname);
799 continue;
800 }
801
802 _cleanup_free_ char *vp = path_join(resolved, fname);
803 if (!vp)
804 return -ENOMEM;
805
806 PickFilter filter = {
807 .type_mask = endswith(suffix, ".raw") ? (UINT32_C(1) << DT_REG) | (UINT32_C(1) << DT_BLK) : (UINT32_C(1) << DT_DIR),
808 .basename = pretty,
809 .architecture = _ARCHITECTURE_INVALID,
421a4ba7 810 .suffix = STRV_MAKE(suffix),
a5ecdf7c
LP
811 };
812
813 _cleanup_(pick_result_done) PickResult result = PICK_RESULT_NULL;
814 r = path_pick(root,
815 /* toplevel_fd= */ AT_FDCWD,
816 vp,
817 &filter,
818 PICK_ARCHITECTURE|PICK_TRIES,
819 &result);
820 if (r < 0) {
821 log_debug_errno(r, "Failed to pick versioned image on '%s', skipping: %m", vp);
822 continue;
823 }
824 if (!result.path) {
825 log_debug("Found versioned directory '%s', without matching entry, skipping: %m", vp);
826 continue;
827 }
828
829 /* Refresh the stat data for the discovered target */
830 st = result.st;
831
832 _cleanup_free_ char *bn = NULL;
833 r = path_extract_filename(result.path, &bn);
834 if (r < 0) {
835 log_debug_errno(r, "Failed to extract basename of image path '%s', skipping: %m", result.path);
836 continue;
837 }
838
839 fname_buf = path_join(fname, bn);
840 if (!fname_buf)
841 return log_oom();
842
843 fname = fname_buf;
844 } else {
845 r = extract_image_basename(
846 fname,
847 image_class_suffix_to_string(class),
848 /* format_suffix= */ NULL,
849 &pretty,
850 /* ret_suffix= */ NULL);
851 if (r < 0) {
852 log_debug_errno(r, "Skipping directory entry '%s', which doesn't look like an image.", fname);
853 continue;
854 }
855 }
856
857 } else if (S_ISBLK(st.st_mode)) {
858 r = extract_image_basename(
859 fname,
860 /* class_suffix= */ NULL,
861 /* format_suffix= */ NULL,
862 &pretty,
863 /* ret_v_suffix= */ NULL);
864 if (r < 0) {
865 log_debug_errno(r, "Skipping directory entry '%s', which doesn't look like an image.", fname);
866 continue;
867 }
868 } else {
869 log_debug("Skipping directory entry '%s', which is neither regular file, directory nor block device.", fname);
1bf36bf9 870 continue;
e7df707b 871 }
1bf36bf9
LP
872
873 if (hashmap_contains(h, pretty))
c2ce6a3d
LP
874 continue;
875
a5ecdf7c 876 r = image_make(class, pretty, dirfd(d), resolved, fname, &st, &image);
3a6ce860 877 if (IN_SET(r, -ENOENT, -EMEDIUMTYPE))
c2ce6a3d
LP
878 continue;
879 if (r < 0)
880 return r;
881
cf604fd4
LP
882 image->discoverable = true;
883
c2ce6a3d
LP
884 r = hashmap_put(h, image->name, image);
885 if (r < 0)
886 return r;
887
5df1a553 888 TAKE_PTR(image);
cd61c3bf
LP
889 }
890 }
891
5ef46e5f 892 if (class == IMAGE_MACHINE && !hashmap_contains(h, ".host")) {
5fc7f358
LP
893 _cleanup_(image_unrefp) Image *image = NULL;
894
3775e141 895 r = image_make(IMAGE_MACHINE, ".host", AT_FDCWD, NULL, empty_to_root("/"), NULL, &image);
5fc7f358
LP
896 if (r < 0)
897 return r;
898
cf604fd4
LP
899 image->discoverable = true;
900
5fc7f358
LP
901 r = hashmap_put(h, image->name, image);
902 if (r < 0)
903 return r;
904
905 image = NULL;
5fc7f358
LP
906 }
907
cd61c3bf
LP
908 return 0;
909}
910
08682124 911int image_remove(Image *i) {
8e766630 912 _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
8e0b6570 913 _cleanup_strv_free_ char **settings = NULL;
bafbac4e 914 _cleanup_free_ char *roothash = NULL;
30535c16
LP
915 int r;
916
08682124
LP
917 assert(i);
918
d94c2b06 919 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
08682124
LP
920 return -EROFS;
921
8e0b6570
LP
922 settings = image_settings_path(i);
923 if (!settings)
924 return -ENOMEM;
925
162f6477
LP
926 r = image_roothash_path(i, &roothash);
927 if (r < 0)
928 return r;
bafbac4e 929
30535c16
LP
930 /* Make sure we don't interfere with a running nspawn */
931 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
932 if (r < 0)
933 return r;
934
ebd93cb6
LP
935 switch (i->type) {
936
937 case IMAGE_SUBVOLUME:
9fb0b9c7
LP
938
939 /* Let's unlink first, maybe it is a symlink? If that works we are happy. Otherwise, let's get out the
940 * big guns */
941 if (unlink(i->path) < 0) {
942 r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
943 if (r < 0)
944 return r;
945 }
946
8e0b6570 947 break;
ebd93cb6
LP
948
949 case IMAGE_DIRECTORY:
01b72568 950 /* Allow deletion of read-only directories */
db9a4254 951 (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL, NULL);
8e0b6570
LP
952 r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
953 if (r < 0)
954 return r;
955
956 break;
01b72568 957
eb38edce
LP
958 case IMAGE_BLOCK:
959
960 /* If this is inside of /dev, then it's a real block device, hence let's not touch the device node
961 * itself (but let's remove the stuff stored alongside it). If it's anywhere else, let's try to unlink
962 * the thing (it's most likely a symlink after all). */
963
964 if (path_startswith(i->path, "/dev"))
965 break;
966
4831981d 967 _fallthrough_;
aceac2f0 968 case IMAGE_RAW:
41d1ed05
LP
969 if (unlink(i->path) < 0)
970 return -errno;
8e0b6570 971 break;
ebd93cb6
LP
972
973 default:
15411c0c 974 return -EOPNOTSUPP;
ebd93cb6 975 }
8e0b6570 976
de010b0b 977 STRV_FOREACH(j, settings)
8e0b6570
LP
978 if (unlink(*j) < 0 && errno != ENOENT)
979 log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j);
8e0b6570 980
bafbac4e
LP
981 if (unlink(roothash) < 0 && errno != ENOENT)
982 log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", roothash);
983
8e0b6570
LP
984 return 0;
985}
986
bafbac4e 987static int rename_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
162f6477
LP
988 _cleanup_free_ char *fn = NULL, *rs = NULL;
989 int r;
8e0b6570 990
162f6477
LP
991 fn = strjoin(new_name, suffix);
992 if (!fn)
8e0b6570
LP
993 return -ENOMEM;
994
162f6477
LP
995 r = file_in_same_dir(path, fn, &rs);
996 if (r < 0)
997 return r;
998
8e0b6570 999 return rename_noreplace(AT_FDCWD, path, AT_FDCWD, rs);
ebd93cb6
LP
1000}
1001
1002int image_rename(Image *i, const char *new_name) {
8e766630 1003 _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
bafbac4e 1004 _cleanup_free_ char *new_path = NULL, *nn = NULL, *roothash = NULL;
8e0b6570 1005 _cleanup_strv_free_ char **settings = NULL;
01b72568 1006 unsigned file_attr = 0;
ebd93cb6
LP
1007 int r;
1008
1009 assert(i);
1010
1011 if (!image_name_is_valid(new_name))
1012 return -EINVAL;
1013
d94c2b06 1014 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
ebd93cb6
LP
1015 return -EROFS;
1016
8e0b6570
LP
1017 settings = image_settings_path(i);
1018 if (!settings)
1019 return -ENOMEM;
1020
162f6477
LP
1021 r = image_roothash_path(i, &roothash);
1022 if (r < 0)
1023 return r;
bafbac4e 1024
30535c16
LP
1025 /* Make sure we don't interfere with a running nspawn */
1026 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
1027 if (r < 0)
1028 return r;
1029
1030 /* Make sure nobody takes the new name, between the time we
1031 * checked it is currently unused in all search paths, and the
f8e2f4d6 1032 * time we take possession of it */
30535c16
LP
1033 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
1034 if (r < 0)
1035 return r;
1036
d577d4a4 1037 r = image_find(IMAGE_MACHINE, new_name, NULL, NULL);
3a6ce860 1038 if (r >= 0)
ebd93cb6 1039 return -EEXIST;
3a6ce860
LP
1040 if (r != -ENOENT)
1041 return r;
ebd93cb6
LP
1042
1043 switch (i->type) {
1044
ebd93cb6 1045 case IMAGE_DIRECTORY:
01b72568
LP
1046 /* Turn of the immutable bit while we rename the image, so that we can rename it */
1047 (void) read_attr_path(i->path, &file_attr);
1048
1049 if (file_attr & FS_IMMUTABLE_FL)
db9a4254 1050 (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL, NULL);
01b72568 1051
4831981d 1052 _fallthrough_;
01b72568 1053 case IMAGE_SUBVOLUME:
162f6477 1054 r = file_in_same_dir(i->path, new_name, &new_path);
ebd93cb6
LP
1055 break;
1056
eb38edce
LP
1057 case IMAGE_BLOCK:
1058
1059 /* Refuse renaming raw block devices in /dev, the names are picked by udev after all. */
1060 if (path_startswith(i->path, "/dev"))
1061 return -EROFS;
1062
162f6477 1063 r = file_in_same_dir(i->path, new_name, &new_path);
eb38edce
LP
1064 break;
1065
aceac2f0 1066 case IMAGE_RAW: {
ebd93cb6
LP
1067 const char *fn;
1068
63c372cb 1069 fn = strjoina(new_name, ".raw");
162f6477
LP
1070
1071 r = file_in_same_dir(i->path, fn, &new_path);
ebd93cb6
LP
1072 break;
1073 }
1074
1075 default:
15411c0c 1076 return -EOPNOTSUPP;
ebd93cb6 1077 }
162f6477
LP
1078 if (r < 0)
1079 return r;
ebd93cb6
LP
1080
1081 nn = strdup(new_name);
1082 if (!nn)
1083 return -ENOMEM;
1084
f85ef957
AC
1085 r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
1086 if (r < 0)
1087 return r;
ebd93cb6 1088
01b72568
LP
1089 /* Restore the immutable bit, if it was set before */
1090 if (file_attr & FS_IMMUTABLE_FL)
db9a4254 1091 (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL, NULL);
01b72568 1092
f9ecfd3b
DL
1093 free_and_replace(i->path, new_path);
1094 free_and_replace(i->name, nn);
ebd93cb6 1095
8e0b6570 1096 STRV_FOREACH(j, settings) {
bafbac4e 1097 r = rename_auxiliary_file(*j, new_name, ".nspawn");
8e0b6570
LP
1098 if (r < 0 && r != -ENOENT)
1099 log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j);
1100 }
1101
bafbac4e
LP
1102 r = rename_auxiliary_file(roothash, new_name, ".roothash");
1103 if (r < 0 && r != -ENOENT)
1104 log_debug_errno(r, "Failed to rename roothash file %s, ignoring: %m", roothash);
1105
ebd93cb6
LP
1106 return 0;
1107}
1108
bafbac4e 1109static int clone_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
162f6477
LP
1110 _cleanup_free_ char *fn = NULL, *rs = NULL;
1111 int r;
8e0b6570 1112
162f6477
LP
1113 fn = strjoin(new_name, suffix);
1114 if (!fn)
8e0b6570
LP
1115 return -ENOMEM;
1116
162f6477
LP
1117 r = file_in_same_dir(path, fn, &rs);
1118 if (r < 0)
1119 return r;
1120
7c2f5495 1121 return copy_file_atomic(path, rs, 0664, COPY_REFLINK);
8e0b6570
LP
1122}
1123
ebd93cb6 1124int image_clone(Image *i, const char *new_name, bool read_only) {
8e766630 1125 _cleanup_(release_lock_file) LockFile name_lock = LOCK_FILE_INIT;
8e0b6570 1126 _cleanup_strv_free_ char **settings = NULL;
bafbac4e 1127 _cleanup_free_ char *roothash = NULL;
ebd93cb6
LP
1128 const char *new_path;
1129 int r;
1130
1131 assert(i);
1132
1133 if (!image_name_is_valid(new_name))
1134 return -EINVAL;
1135
8e0b6570
LP
1136 settings = image_settings_path(i);
1137 if (!settings)
1138 return -ENOMEM;
1139
162f6477
LP
1140 r = image_roothash_path(i, &roothash);
1141 if (r < 0)
1142 return r;
bafbac4e 1143
30535c16
LP
1144 /* Make sure nobody takes the new name, between the time we
1145 * checked it is currently unused in all search paths, and the
f8e2f4d6 1146 * time we take possession of it */
30535c16
LP
1147 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
1148 if (r < 0)
1149 return r;
1150
d577d4a4 1151 r = image_find(IMAGE_MACHINE, new_name, NULL, NULL);
3a6ce860 1152 if (r >= 0)
ebd93cb6 1153 return -EEXIST;
3a6ce860
LP
1154 if (r != -ENOENT)
1155 return r;
ebd93cb6
LP
1156
1157 switch (i->type) {
1158
1159 case IMAGE_SUBVOLUME:
1160 case IMAGE_DIRECTORY:
9a50e3ca 1161 /* If we can we'll always try to create a new btrfs subvolume here, even if the source is a plain
13e785f7 1162 * directory. */
9a50e3ca 1163
63c372cb 1164 new_path = strjoina("/var/lib/machines/", new_name);
ebd93cb6 1165
fab4ef72
DDM
1166 r = btrfs_subvol_snapshot_at(AT_FDCWD, i->path, AT_FDCWD, new_path,
1167 (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
1168 BTRFS_SNAPSHOT_FALLBACK_COPY |
1169 BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
1170 BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE |
1171 BTRFS_SNAPSHOT_RECURSIVE |
1172 BTRFS_SNAPSHOT_QUOTA);
17cbb288 1173 if (r >= 0)
9a50e3ca 1174 /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */
8120ee28 1175 (void) btrfs_subvol_auto_qgroup(new_path, 0, true);
5bcd08db 1176
ebd93cb6
LP
1177 break;
1178
aceac2f0 1179 case IMAGE_RAW:
63c372cb 1180 new_path = strjoina("/var/lib/machines/", new_name, ".raw");
ebd93cb6 1181
7c2f5495
DDM
1182 r = copy_file_atomic_full(i->path, new_path, read_only ? 0444 : 0644, FS_NOCOW_FL, FS_NOCOW_FL,
1183 COPY_REFLINK|COPY_CRTIME, NULL, NULL);
ebd93cb6
LP
1184 break;
1185
eb38edce 1186 case IMAGE_BLOCK:
ebd93cb6 1187 default:
15411c0c 1188 return -EOPNOTSUPP;
ebd93cb6
LP
1189 }
1190
1191 if (r < 0)
1192 return r;
1193
8e0b6570 1194 STRV_FOREACH(j, settings) {
bafbac4e 1195 r = clone_auxiliary_file(*j, new_name, ".nspawn");
8e0b6570
LP
1196 if (r < 0 && r != -ENOENT)
1197 log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j);
1198 }
1199
bafbac4e
LP
1200 r = clone_auxiliary_file(roothash, new_name, ".roothash");
1201 if (r < 0 && r != -ENOENT)
1202 log_debug_errno(r, "Failed to clone root hash file %s, ignoring: %m", roothash);
1203
ebd93cb6
LP
1204 return 0;
1205}
1206
1207int image_read_only(Image *i, bool b) {
8e766630 1208 _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
ebd93cb6 1209 int r;
c7664c07 1210
ebd93cb6
LP
1211 assert(i);
1212
d94c2b06 1213 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
ebd93cb6
LP
1214 return -EROFS;
1215
30535c16
LP
1216 /* Make sure we don't interfere with a running nspawn */
1217 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
1218 if (r < 0)
1219 return r;
1220
ebd93cb6
LP
1221 switch (i->type) {
1222
1223 case IMAGE_SUBVOLUME:
5bcd08db
LP
1224
1225 /* Note that we set the flag only on the top-level
1226 * subvolume of the image. */
1227
ebd93cb6
LP
1228 r = btrfs_subvol_set_read_only(i->path, b);
1229 if (r < 0)
1230 return r;
01b72568
LP
1231
1232 break;
1233
1234 case IMAGE_DIRECTORY:
1235 /* For simple directory trees we cannot use the access
1236 mode of the top-level directory, since it has an
1237 effect on the container itself. However, we can
1238 use the "immutable" flag, to at least make the
1239 top-level directory read-only. It's not as good as
1240 a read-only subvolume, but at least something, and
13e785f7 1241 we can read the value back. */
01b72568 1242
db9a4254 1243 r = chattr_path(i->path, b ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL, NULL);
01b72568
LP
1244 if (r < 0)
1245 return r;
1246
ebd93cb6
LP
1247 break;
1248
aceac2f0 1249 case IMAGE_RAW: {
ebd93cb6
LP
1250 struct stat st;
1251
1252 if (stat(i->path, &st) < 0)
1253 return -errno;
1254
1255 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
1256 return -errno;
f2068bcc
LP
1257
1258 /* If the images is now read-only, it's a good time to
1259 * defrag it, given that no write patterns will
1260 * fragment it again. */
1261 if (b)
1262 (void) btrfs_defrag(i->path);
ebd93cb6
LP
1263 break;
1264 }
1265
eb38edce 1266 case IMAGE_BLOCK: {
254d1313 1267 _cleanup_close_ int fd = -EBADF;
eb38edce
LP
1268 struct stat st;
1269 int state = b;
1270
1271 fd = open(i->path, O_CLOEXEC|O_RDONLY|O_NONBLOCK|O_NOCTTY);
1272 if (fd < 0)
1273 return -errno;
1274
1275 if (fstat(fd, &st) < 0)
1276 return -errno;
1277 if (!S_ISBLK(st.st_mode))
1278 return -ENOTTY;
1279
1280 if (ioctl(fd, BLKROSET, &state) < 0)
1281 return -errno;
1282
1283 break;
1284 }
1285
ebd93cb6 1286 default:
15411c0c 1287 return -EOPNOTSUPP;
ebd93cb6
LP
1288 }
1289
1290 return 0;
08682124
LP
1291}
1292
8759bc95
LP
1293static void make_lock_dir(void) {
1294 (void) mkdir_p("/run/systemd/nspawn", 0755);
1295 (void) mkdir("/run/systemd/nspawn/locks", 0700);
1296}
1297
cb1b813f
LP
1298int image_path_lock(
1299 const char *path,
1300 int operation,
1301 LockFile *ret_global,
1302 LockFile *ret_local) {
1303
30535c16
LP
1304 _cleanup_free_ char *p = NULL;
1305 LockFile t = LOCK_FILE_INIT;
1306 struct stat st;
f25bed67 1307 bool exclusive;
30535c16
LP
1308 int r;
1309
1310 assert(path);
cb1b813f 1311 assert(ret_local);
30535c16 1312
f25bed67
LP
1313 /* Locks an image path. This actually creates two locks: one "local" one, next to the image path
1314 * itself, which might be shared via NFS. And another "global" one, in /run, that uses the
1315 * device/inode number. This has the benefit that we can even lock a tree that is a mount point,
1316 * correctly. */
30535c16 1317
30535c16
LP
1318 if (!path_is_absolute(path))
1319 return -EINVAL;
1320
f25bed67
LP
1321 switch (operation & (LOCK_SH|LOCK_EX)) {
1322 case LOCK_SH:
1323 exclusive = false;
1324 break;
1325 case LOCK_EX:
1326 exclusive = true;
1327 break;
1328 default:
1329 return -EINVAL;
1330 }
1331
b6e953f2 1332 if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
cb1b813f
LP
1333 *ret_local = LOCK_FILE_INIT;
1334 if (ret_global)
1335 *ret_global = LOCK_FILE_INIT;
b6e953f2
LP
1336 return 0;
1337 }
1338
f25bed67
LP
1339 /* Prohibit taking exclusive locks on the host image. We can't allow this, since we ourselves are
1340 * running off it after all, and we don't want any images to manipulate the host image. We make an
1341 * exception for shared locks however: we allow those (and make them NOPs since there's no point in
1342 * taking them if there can't be exclusive locks). Strictly speaking these are questionable as well,
1343 * since it means changes made to the host might propagate to the container as they happen (and a
1344 * shared lock kinda suggests that no changes happen at all while it is in place), but it's too
1345 * useful not to allow read-only containers off the host root, hence let's support this, and trust
1346 * the user to do the right thing with this. */
1347 if (path_equal(path, "/")) {
1348 if (exclusive)
1349 return -EBUSY;
1350
cb1b813f
LP
1351 *ret_local = LOCK_FILE_INIT;
1352 if (ret_global)
1353 *ret_global = LOCK_FILE_INIT;
f25bed67
LP
1354 return 0;
1355 }
b6e953f2 1356
cb1b813f
LP
1357 if (ret_global) {
1358 if (stat(path, &st) >= 0) {
1359 if (S_ISBLK(st.st_mode))
1360 r = asprintf(&p, "/run/systemd/nspawn/locks/block-%u:%u", major(st.st_rdev), minor(st.st_rdev));
1361 else if (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode))
1362 r = asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino);
1363 else
1364 return -ENOTTY;
1365 if (r < 0)
1366 return -ENOMEM;
1367 }
30535c16
LP
1368 }
1369
f25bed67
LP
1370 /* For block devices we don't need the "local" lock, as the major/minor lock above should be
1371 * sufficient, since block devices are host local anyway. */
1372 if (!path_startswith(path, "/dev/")) {
eb38edce 1373 r = make_lock_file_for(path, operation, &t);
8be17c9b 1374 if (r < 0) {
f25bed67 1375 if (!exclusive && r == -EROFS)
771b7ead 1376 log_debug_errno(r, "Failed to create shared lock for '%s', ignoring: %m", path);
8be17c9b
LT
1377 else
1378 return r;
1379 }
eb38edce 1380 }
30535c16
LP
1381
1382 if (p) {
8759bc95 1383 make_lock_dir();
30535c16 1384
cb1b813f 1385 r = make_lock_file(p, operation, ret_global);
30535c16
LP
1386 if (r < 0) {
1387 release_lock_file(&t);
1388 return r;
1389 }
cb1b813f
LP
1390 } else if (ret_global)
1391 *ret_global = LOCK_FILE_INIT;
30535c16 1392
cb1b813f 1393 *ret_local = t;
30535c16
LP
1394 return 0;
1395}
1396
cb81cd80 1397int image_set_limit(Image *i, uint64_t referenced_max) {
d6ce17c7
LP
1398 assert(i);
1399
d94c2b06 1400 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
d6ce17c7
LP
1401 return -EROFS;
1402
1403 if (i->type != IMAGE_SUBVOLUME)
15411c0c 1404 return -EOPNOTSUPP;
d6ce17c7 1405
5bcd08db
LP
1406 /* We set the quota both for the subvolume as well as for the
1407 * subtree. The latter is mostly for historical reasons, since
1408 * we didn't use to have a concept of subtree quota, and hence
1409 * only modified the subvolume quota. */
1410
1411 (void) btrfs_qgroup_set_limit(i->path, 0, referenced_max);
1412 (void) btrfs_subvol_auto_qgroup(i->path, 0, true);
1413 return btrfs_subvol_set_subtree_quota_limit(i->path, 0, referenced_max);
d6ce17c7
LP
1414}
1415
84be0c71 1416int image_read_metadata(Image *i, const ImagePolicy *image_policy) {
8e766630 1417 _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
c7664c07
LP
1418 int r;
1419
1420 assert(i);
1421
1422 r = image_path_lock(i->path, LOCK_SH|LOCK_NB, &global_lock, &local_lock);
1423 if (r < 0)
1424 return r;
1425
1426 switch (i->type) {
1427
1428 case IMAGE_SUBVOLUME:
1429 case IMAGE_DIRECTORY: {
a81fe93e
LP
1430 _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL, **sysext_release = NULL, **confext_release = NULL;
1431 _cleanup_free_ char *hostname = NULL, *path = NULL;
c7664c07 1432 sd_id128_t machine_id = SD_ID128_NULL;
c7664c07 1433
b60e0f57 1434 if (i->class == IMAGE_SYSEXT) {
6afa5d86
LB
1435 r = extension_has_forbidden_content(i->path);
1436 if (r < 0)
1437 return r;
1438 if (r > 0)
1439 return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
1440 "Conflicting content found in image %s, refusing.",
1441 i->name);
1442 }
1443
f461a28d 1444 r = chase("/etc/hostname", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL);
c7664c07
LP
1445 if (r < 0 && r != -ENOENT)
1446 log_debug_errno(r, "Failed to chase /etc/hostname in image %s: %m", i->name);
1447 else if (r >= 0) {
1448 r = read_etc_hostname(path, &hostname);
1449 if (r < 0)
cc4482ee 1450 log_debug_errno(r, "Failed to read /etc/hostname of image %s: %m", i->name);
c7664c07
LP
1451 }
1452
1453 path = mfree(path);
1454
f7b5f399
YW
1455 r = id128_get_machine(i->path, &machine_id);
1456 if (r < 0)
1457 log_debug_errno(r, "Failed to read machine ID in image %s, ignoring: %m", i->name);
c7664c07 1458
f461a28d 1459 r = chase("/etc/machine-info", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL);
c7664c07
LP
1460 if (r < 0 && r != -ENOENT)
1461 log_debug_errno(r, "Failed to chase /etc/machine-info in image %s: %m", i->name);
1462 else if (r >= 0) {
aa8fbc74 1463 r = load_env_file_pairs(NULL, path, &machine_info);
c7664c07
LP
1464 if (r < 0)
1465 log_debug_errno(r, "Failed to parse machine-info data of %s: %m", i->name);
1466 }
1467
d58ad743
LP
1468 r = load_os_release_pairs(i->path, &os_release);
1469 if (r < 0)
1470 log_debug_errno(r, "Failed to read os-release in image, ignoring: %m");
c7664c07 1471
a81fe93e 1472 r = load_extension_release_pairs(i->path, IMAGE_SYSEXT, i->name, /* relax_extension_release_check= */ false, &sysext_release);
bcf94222 1473 if (r < 0)
a81fe93e
LP
1474 log_debug_errno(r, "Failed to read sysext-release in image, ignoring: %m");
1475
1476 r = load_extension_release_pairs(i->path, IMAGE_CONFEXT, i->name, /* relax_extension_release_check= */ false, &confext_release);
1477 if (r < 0)
1478 log_debug_errno(r, "Failed to read confext-release in image, ignoring: %m");
bcf94222 1479
c7664c07
LP
1480 free_and_replace(i->hostname, hostname);
1481 i->machine_id = machine_id;
1482 strv_free_and_replace(i->machine_info, machine_info);
1483 strv_free_and_replace(i->os_release, os_release);
a81fe93e
LP
1484 strv_free_and_replace(i->sysext_release, sysext_release);
1485 strv_free_and_replace(i->confext_release, confext_release);
c7664c07
LP
1486 break;
1487 }
1488
1489 case IMAGE_RAW:
1490 case IMAGE_BLOCK: {
1491 _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
1492 _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
f4a63ce2
LP
1493 DissectImageFlags flags =
1494 DISSECT_IMAGE_GENERIC_ROOT |
1495 DISSECT_IMAGE_REQUIRE_ROOT |
1496 DISSECT_IMAGE_RELAX_VAR_CHECK |
1497 DISSECT_IMAGE_READ_ONLY |
1498 DISSECT_IMAGE_USR_NO_ROOT |
1499 DISSECT_IMAGE_ADD_PARTITION_DEVICES |
1500 DISSECT_IMAGE_PIN_PARTITION_DEVICES |
1501 DISSECT_IMAGE_VALIDATE_OS |
1502 DISSECT_IMAGE_VALIDATE_OS_EXT |
1503 DISSECT_IMAGE_ALLOW_USERSPACE_VERITY;
1504
1505 r = loop_device_make_by_path(
1506 i->path,
1507 O_RDONLY,
1508 /* sector_size= */ UINT32_MAX,
1509 LO_FLAGS_PARTSCAN,
1510 LOCK_SH,
1511 &d);
41bc4849
LP
1512 if (r < 0)
1513 return r;
1514
bad31660
YW
1515 r = dissect_loop_device(
1516 d,
84be0c71
LP
1517 /* verity= */ NULL,
1518 /* mount_options= */ NULL,
1519 image_policy,
f4a63ce2 1520 flags,
75dc190d 1521 &m);
c7664c07
LP
1522 if (r < 0)
1523 return r;
1524
44e3097d
LP
1525 r = dissected_image_acquire_metadata(
1526 m,
1527 /* userns_fd= */ -EBADF,
1528 flags);
c7664c07
LP
1529 if (r < 0)
1530 return r;
1531
1532 free_and_replace(i->hostname, m->hostname);
1533 i->machine_id = m->machine_id;
1534 strv_free_and_replace(i->machine_info, m->machine_info);
1535 strv_free_and_replace(i->os_release, m->os_release);
a81fe93e
LP
1536 strv_free_and_replace(i->sysext_release, m->sysext_release);
1537 strv_free_and_replace(i->confext_release, m->confext_release);
c7664c07
LP
1538
1539 break;
1540 }
1541
1542 default:
1543 return -EOPNOTSUPP;
1544 }
1545
1546 i->metadata_valid = true;
1547
1548 return 0;
1549}
1550
30535c16 1551int image_name_lock(const char *name, int operation, LockFile *ret) {
99d97afc
LP
1552 const char *p;
1553
30535c16
LP
1554 assert(name);
1555 assert(ret);
1556
1557 /* Locks an image name, regardless of the precise path used. */
1558
99d97afc
LP
1559 if (streq(name, ".host"))
1560 return -EBUSY;
1561
30535c16
LP
1562 if (!image_name_is_valid(name))
1563 return -EINVAL;
1564
b6e953f2
LP
1565 if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
1566 *ret = (LockFile) LOCK_FILE_INIT;
1567 return 0;
1568 }
1569
8759bc95 1570 make_lock_dir();
99d97afc
LP
1571
1572 p = strjoina("/run/systemd/nspawn/locks/name-", name);
30535c16
LP
1573 return make_lock_file(p, operation, ret);
1574}
1575
d577d4a4
LP
1576bool image_in_search_path(
1577 ImageClass class,
1578 const char *root,
1579 const char *image) {
1580
ace9ab19
LP
1581 assert(image);
1582
73740c9f 1583 NULSTR_FOREACH(path, pick_image_search_path(class)) {
d577d4a4 1584 const char *p, *q;
ace9ab19
LP
1585 size_t k;
1586
d577d4a4
LP
1587 if (!empty_or_root(root)) {
1588 q = path_startswith(path, root);
1589 if (!q)
1590 continue;
1591 } else
1592 q = path;
1593
1594 p = path_startswith(q, path);
ace9ab19
LP
1595 if (!p)
1596 continue;
1597
1598 /* Make sure there's a filename following */
1599 k = strcspn(p, "/");
1600 if (k == 0)
1601 continue;
1602
1603 p += k;
1604
1605 /* Accept trailing slashes */
1606 if (p[strspn(p, "/")] == 0)
1607 return true;
ace9ab19
LP
1608 }
1609
1610 return false;
1611}
1612
f5151fb4
LP
1613int image_to_json(const struct Image *img, JsonVariant **ret) {
1614 assert(img);
1615
1616 return json_build(ret,
1617 JSON_BUILD_OBJECT(
1618 JSON_BUILD_PAIR_STRING("Type", image_type_to_string(img->type)),
1619 JSON_BUILD_PAIR_STRING("Class", image_class_to_string(img->class)),
1620 JSON_BUILD_PAIR_STRING("Name", img->name),
1621 JSON_BUILD_PAIR_CONDITION(img->path, "Path", JSON_BUILD_STRING(img->path)),
1622 JSON_BUILD_PAIR_BOOLEAN("ReadOnly", img->read_only),
1623 JSON_BUILD_PAIR_CONDITION(img->crtime != 0, "CreationTimestamp", JSON_BUILD_UNSIGNED(img->crtime)),
1624 JSON_BUILD_PAIR_CONDITION(img->mtime != 0, "ModificationTimestamp", JSON_BUILD_UNSIGNED(img->mtime)),
1625 JSON_BUILD_PAIR_CONDITION(img->usage != UINT64_MAX, "Usage", JSON_BUILD_UNSIGNED(img->usage)),
1626 JSON_BUILD_PAIR_CONDITION(img->usage_exclusive != UINT64_MAX, "UsageExclusive", JSON_BUILD_UNSIGNED(img->usage_exclusive)),
1627 JSON_BUILD_PAIR_CONDITION(img->limit != UINT64_MAX, "Limit", JSON_BUILD_UNSIGNED(img->limit)),
1628 JSON_BUILD_PAIR_CONDITION(img->limit_exclusive != UINT64_MAX, "LimitExclusive", JSON_BUILD_UNSIGNED(img->limit_exclusive))));
1629}
1630
cd61c3bf
LP
1631static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
1632 [IMAGE_DIRECTORY] = "directory",
1633 [IMAGE_SUBVOLUME] = "subvolume",
25cdbd04
LP
1634 [IMAGE_RAW] = "raw",
1635 [IMAGE_BLOCK] = "block",
cd61c3bf
LP
1636};
1637
1638DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);