]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/shared/discover-image.c
cryptsetup: mention correct action in log message
[thirdparty/systemd.git] / src / shared / discover-image.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
cd61c3bf 2
a8fbdf54 3#include <errno.h>
ebd93cb6 4#include <fcntl.h>
e08f94ac
LP
5#include <linux/fs.h>
6#include <linux/loop.h>
65ddc2c5 7#include <linux/magic.h>
a8fbdf54
TA
8#include <stdio.h>
9#include <stdlib.h>
e306723e 10#include <sys/file.h>
204f52e3 11#include <sys/ioctl.h>
a8fbdf54
TA
12#include <sys/stat.h>
13#include <unistd.h>
546dbec5 14
b5efdb8a 15#include "alloc-util.h"
01db9c85 16#include "blockdev-util.h"
cd61c3bf 17#include "btrfs-util.h"
f461a28d 18#include "chase.h"
c8b3094d 19#include "chattr-util.h"
ebd93cb6 20#include "copy.h"
a0956174 21#include "dirent-util.h"
57f1b61b 22#include "discover-image.h"
c7664c07 23#include "dissect-image.h"
686d13b9 24#include "env-file.h"
b6e953f2 25#include "env-util.h"
6afa5d86 26#include "extension-util.h"
3ffd4af2 27#include "fd-util.h"
f4f15635 28#include "fs-util.h"
a8fbdf54 29#include "hashmap.h"
e2054217 30#include "hostname-setup.h"
c7664c07 31#include "id128-util.h"
73740c9f 32#include "initrd-util.h"
64e89f56 33#include "lock-util.h"
a8fbdf54 34#include "log.h"
c7664c07 35#include "loop-util.h"
546dbec5 36#include "macro.h"
30535c16 37#include "mkdir.h"
d8b4d14d 38#include "nulstr-util.h"
d58ad743 39#include "os-util.h"
8e0b6570 40#include "path-util.h"
c6878637 41#include "rm-rf.h"
65ddc2c5 42#include "stat-util.h"
8b43440b 43#include "string-table.h"
07630cea 44#include "string-util.h"
8e0b6570 45#include "strv.h"
a8fbdf54 46#include "time-util.h"
8e0b6570 47#include "utf8.h"
a5ecdf7c 48#include "vpick.h"
89a5a90c 49#include "xattr-util.h"
cd61c3bf 50
f7178a04 51const char* const image_search_path[_IMAGE_CLASS_MAX] = {
9bca4ae4
LP
52 [IMAGE_MACHINE] = "/etc/machines\0" /* only place symlinks here */
53 "/run/machines\0" /* and here too */
54 "/var/lib/machines\0" /* the main place for images */
55 "/var/lib/container\0" /* legacy */
56 "/usr/local/lib/machines\0"
57 "/usr/lib/machines\0",
5ef46e5f 58
9bca4ae4
LP
59 [IMAGE_PORTABLE] = "/etc/portables\0" /* only place symlinks here */
60 "/run/portables\0" /* and here too */
61 "/var/lib/portables\0" /* the main place for images */
62 "/usr/local/lib/portables\0"
63 "/usr/lib/portables\0",
64
de862276
LB
65 /* Note that we don't allow storing extensions under /usr/, unlike with other image types. That's
66 * because extension images are supposed to extend /usr/, so you get into recursive races, especially
67 * with directory-based extensions, as the kernel's OverlayFS explicitly checks for this and errors
68 * out with -ELOOP if it finds that a lowerdir= is a child of another lowerdir=. */
b60e0f57 69 [IMAGE_SYSEXT] = "/etc/extensions\0" /* only place symlinks here */
70 "/run/extensions\0" /* and here too */
71 "/var/lib/extensions\0", /* the main place for images */
72
73 [IMAGE_CONFEXT] = "/run/confexts\0" /* only place symlinks here */
74 "/var/lib/confexts\0" /* the main place for images */
75 "/usr/local/lib/confexts\0"
76 "/usr/lib/confexts\0",
5ef46e5f 77};
c2ce6a3d 78
d4fee894
LP
79/* Inside the initrd, use a slightly different set of search path (i.e. include .extra/sysext/ and
80 * .extra/confext/ in extension search dir) */
73740c9f
LP
81static const char* const image_search_path_initrd[_IMAGE_CLASS_MAX] = {
82 /* (entries that aren't listed here will get the same search path as for the non initrd-case) */
83
b151e696
LP
84 [IMAGE_SYSEXT] = "/etc/extensions\0" /* only place symlinks here */
85 "/run/extensions\0" /* and here too */
86 "/var/lib/extensions\0" /* the main place for images */
d4fee894
LP
87 "/.extra/sysext\0", /* put sysext picked up by systemd-stub last, since not trusted */
88
89 [IMAGE_CONFEXT] = "/run/confexts\0" /* only place symlinks here */
90 "/var/lib/confexts\0" /* the main place for images */
91 "/usr/local/lib/confexts\0"
92 "/.extra/confext\0", /* put confext picked up by systemd-stub last, since not trusted */
73740c9f
LP
93};
94
a747994b
LP
95static const char* image_class_suffix_table[_IMAGE_CLASS_MAX] = {
96 [IMAGE_SYSEXT] = ".sysext",
97 [IMAGE_CONFEXT] = ".confext",
98};
99
100DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(image_class_suffix, ImageClass);
101
7af5785d
LP
102static const char *const image_root_table[_IMAGE_CLASS_MAX] = {
103 [IMAGE_MACHINE] = "/var/lib/machines",
104 [IMAGE_PORTABLE] = "/var/lib/portables",
105 [IMAGE_SYSEXT] = "/var/lib/extensions",
106 [IMAGE_CONFEXT] = "/var/lib/confexts",
107};
108
109DEFINE_STRING_TABLE_LOOKUP_TO_STRING(image_root, ImageClass);
110
8301aa0b
YW
111static Image *image_free(Image *i) {
112 assert(i);
9614bb06 113
cd61c3bf
LP
114 free(i->name);
115 free(i->path);
c7664c07
LP
116
117 free(i->hostname);
118 strv_free(i->machine_info);
119 strv_free(i->os_release);
a81fe93e
LP
120 strv_free(i->sysext_release);
121 strv_free(i->confext_release);
c7664c07 122
6b430fdb 123 return mfree(i);
cd61c3bf
LP
124}
125
8301aa0b 126DEFINE_TRIVIAL_REF_UNREF_FUNC(Image, image, image_free);
b07ec5a1
YW
127DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(image_hash_ops, char, string_hash_func, string_compare_func,
128 Image, image_unref);
9614bb06 129
8e0b6570
LP
130static char **image_settings_path(Image *image) {
131 _cleanup_strv_free_ char **l = NULL;
162f6477
LP
132 _cleanup_free_ char *fn = NULL;
133 size_t i = 0;
134 int r;
8e0b6570
LP
135
136 assert(image);
137
138 l = new0(char*, 4);
139 if (!l)
140 return NULL;
141
162f6477
LP
142 fn = strjoin(image->name, ".nspawn");
143 if (!fn)
144 return NULL;
8e0b6570 145
b910cc72
LP
146 FOREACH_STRING(s, "/etc/systemd/nspawn", "/run/systemd/nspawn") {
147 l[i] = path_join(s, fn);
8e0b6570
LP
148 if (!l[i])
149 return NULL;
150
151 i++;
152 }
153
162f6477
LP
154 r = file_in_same_dir(image->path, fn, l + i);
155 if (r == -ENOMEM)
8e0b6570 156 return NULL;
162f6477
LP
157 if (r < 0)
158 log_debug_errno(r, "Failed to generate .nspawn settings path from image path, ignoring: %m");
159
160 strv_uniq(l);
8e0b6570 161
ae2a15bc 162 return TAKE_PTR(l);
8e0b6570
LP
163}
164
162f6477
LP
165static int image_roothash_path(Image *image, char **ret) {
166 _cleanup_free_ char *fn = NULL;
bafbac4e
LP
167
168 assert(image);
169
162f6477
LP
170 fn = strjoin(image->name, ".roothash");
171 if (!fn)
172 return -ENOMEM;
bafbac4e 173
162f6477 174 return file_in_same_dir(image->path, fn, ret);
bafbac4e
LP
175}
176
c2ce6a3d 177static int image_new(
cd61c3bf 178 ImageType t,
3775e141 179 ImageClass c,
5fc7f358 180 const char *pretty,
cd61c3bf 181 const char *path,
5fc7f358 182 const char *filename,
cd61c3bf 183 bool read_only,
10f9c755 184 usec_t crtime,
cd61c3bf 185 usec_t mtime,
c2ce6a3d 186 Image **ret) {
cd61c3bf
LP
187
188 _cleanup_(image_unrefp) Image *i = NULL;
cd61c3bf 189
cd61c3bf
LP
190 assert(t >= 0);
191 assert(t < _IMAGE_TYPE_MAX);
5fc7f358
LP
192 assert(pretty);
193 assert(filename);
c2ce6a3d 194 assert(ret);
cd61c3bf 195
c2108701 196 i = new(Image, 1);
cd61c3bf
LP
197 if (!i)
198 return -ENOMEM;
199
c2108701
LP
200 *i = (Image) {
201 .n_ref = 1,
202 .type = t,
3775e141 203 .class = c,
c2108701
LP
204 .read_only = read_only,
205 .crtime = crtime,
206 .mtime = mtime,
207 .usage = UINT64_MAX,
208 .usage_exclusive = UINT64_MAX,
209 .limit = UINT64_MAX,
210 .limit_exclusive = UINT64_MAX,
211 };
cd61c3bf 212
5fc7f358 213 i->name = strdup(pretty);
cd61c3bf
LP
214 if (!i->name)
215 return -ENOMEM;
216
657ee2d8 217 i->path = path_join(path, filename);
5fc7f358
LP
218 if (!i->path)
219 return -ENOMEM;
220
4ff361cc 221 path_simplify(i->path);
cd61c3bf 222
1cc6c93a 223 *ret = TAKE_PTR(i);
c2ce6a3d 224
cd61c3bf
LP
225 return 0;
226}
227
a5ecdf7c 228static int extract_image_basename(
a747994b 229 const char *path,
a5ecdf7c
LP
230 const char *class_suffix, /* e.g. ".sysext" (this is an optional suffix) */
231 char **format_suffixes, /* e.g. ".raw" (one of these will be required) */
232 char **ret_basename,
233 char **ret_suffix) {
a747994b 234
a5ecdf7c 235 _cleanup_free_ char *name = NULL, *suffix = NULL;
a747994b 236 int r;
4756c94e
LP
237
238 assert(path);
4756c94e 239
a747994b
LP
240 r = path_extract_filename(path, &name);
241 if (r < 0)
242 return r;
4756c94e 243
a5ecdf7c 244 if (format_suffixes) {
2e6f012b 245 char *e = endswith_strv(name, format_suffixes);
a747994b 246 if (!e) /* Format suffix is required */
4756c94e
LP
247 return -EINVAL;
248
a5ecdf7c
LP
249 if (ret_suffix) {
250 suffix = strdup(e);
251 if (!suffix)
252 return -ENOMEM;
253 }
254
4756c94e
LP
255 *e = 0;
256 }
257
a747994b
LP
258 if (class_suffix) {
259 char *e = endswith(name, class_suffix);
a5ecdf7c
LP
260 if (e) { /* Class suffix is optional */
261 if (ret_suffix) {
262 _cleanup_free_ char *j = strjoin(e, suffix);
263 if (!j)
264 return -ENOMEM;
265
266 free_and_replace(suffix, j);
267 }
268
a747994b 269 *e = 0;
a5ecdf7c 270 }
a747994b
LP
271 }
272
4756c94e
LP
273 if (!image_name_is_valid(name))
274 return -EINVAL;
275
a5ecdf7c
LP
276 if (ret_suffix)
277 *ret_suffix = TAKE_PTR(suffix);
278
279 if (ret_basename)
280 *ret_basename = TAKE_PTR(name);
281
4756c94e
LP
282 return 0;
283}
284
96ac6d3f
YW
285static int image_update_quota(Image *i, int fd) {
286 _cleanup_close_ int fd_close = -EBADF;
287 int r;
288
289 assert(i);
290
291 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
292 return -EROFS;
293
294 if (i->type != IMAGE_SUBVOLUME)
295 return -EOPNOTSUPP;
296
297 if (fd < 0) {
298 fd_close = open(i->path, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
299 if (fd_close < 0)
300 return -errno;
301 fd = fd_close;
302 }
303
304 r = btrfs_quota_scan_ongoing(fd);
305 if (r < 0)
306 return r;
307 if (r > 0)
308 return 0;
309
310 BtrfsQuotaInfo quota;
311 r = btrfs_subvol_get_subtree_quota_fd(fd, 0, &quota);
312 if (r < 0)
313 return r;
314
315 i->usage = quota.referenced;
316 i->usage_exclusive = quota.exclusive;
317 i->limit = quota.referenced_max;
318 i->limit_exclusive = quota.exclusive_max;
319
320 return 1;
321}
322
5fc7f358 323static int image_make(
3775e141 324 ImageClass c,
5fc7f358
LP
325 const char *pretty,
326 int dfd,
327 const char *path,
328 const char *filename,
3a6ce860 329 const struct stat *st,
5fc7f358
LP
330 Image **ret) {
331
2570578d 332 _cleanup_free_ char *pretty_buffer = NULL, *parent = NULL;
3a6ce860 333 struct stat stbuf;
5fc7f358 334 bool read_only;
cd61c3bf
LP
335 int r;
336
3a6ce860 337 assert(dfd >= 0 || dfd == AT_FDCWD);
bcb846f3 338 assert(path || dfd == AT_FDCWD);
5fc7f358 339 assert(filename);
cd61c3bf 340
eb38edce 341 /* We explicitly *do* follow symlinks here, since we want to allow symlinking trees, raw files and block
3a6ce860
LP
342 * devices into /var/lib/machines/, and treat them normally.
343 *
344 * This function returns -ENOENT if we can't find the image after all, and -EMEDIUMTYPE if it's not a file we
345 * recognize. */
cd61c3bf 346
3a6ce860
LP
347 if (!st) {
348 if (fstatat(dfd, filename, &stbuf, 0) < 0)
349 return -errno;
350
351 st = &stbuf;
352 }
cd61c3bf 353
2570578d
LP
354 if (!path) {
355 if (dfd == AT_FDCWD)
356 (void) safe_getcwd(&parent);
357 else
358 (void) fd_get_path(dfd, &parent);
359 }
bcb846f3 360
5fc7f358
LP
361 read_only =
362 (path && path_startswith(path, "/usr")) ||
08ff5529 363 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
86e339c8 364
3a6ce860 365 if (S_ISDIR(st->st_mode)) {
254d1313 366 _cleanup_close_ int fd = -EBADF;
01b72568 367 unsigned file_attr = 0;
02dcf215 368 usec_t crtime = 0;
cd61c3bf 369
c2ce6a3d 370 if (!ret)
3a6ce860 371 return 0;
cd61c3bf 372
4756c94e 373 if (!pretty) {
a5ecdf7c
LP
374 r = extract_image_basename(
375 filename,
376 image_class_suffix_to_string(c),
377 /* format_suffix= */ NULL,
378 &pretty_buffer,
379 /* ret_suffix= */ NULL);
4756c94e
LP
380 if (r < 0)
381 return r;
382
383 pretty = pretty_buffer;
384 }
5fc7f358 385
01b72568
LP
386 fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
387 if (fd < 0)
388 return -errno;
389
674b04ff 390 if (btrfs_might_be_subvol(st)) {
cd61c3bf 391
65ddc2c5 392 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
21222ea5
LP
393 if (r < 0)
394 return r;
79de6eb1 395 if (r > 0) {
10f9c755 396 BtrfsSubvolInfo info;
cd61c3bf 397
c2ce6a3d 398 /* It's a btrfs subvolume */
cd61c3bf 399
5bcd08db 400 r = btrfs_subvol_get_info_fd(fd, 0, &info);
10f9c755
LP
401 if (r < 0)
402 return r;
c2ce6a3d
LP
403
404 r = image_new(IMAGE_SUBVOLUME,
3775e141 405 c,
5fc7f358 406 pretty,
c2ce6a3d 407 path,
5fc7f358
LP
408 filename,
409 info.read_only || read_only,
10f9c755 410 info.otime,
c2ce6a3d 411 0,
c2ce6a3d
LP
412 ret);
413 if (r < 0)
414 return r;
415
96ac6d3f 416 (void) image_update_quota(*ret, fd);
3a6ce860 417 return 0;
cd61c3bf 418 }
c2ce6a3d 419 }
cd61c3bf 420
02dcf215 421 /* Get directory creation time (not available everywhere, but that's OK */
12a7f04a 422 (void) fd_getcrtime(fd, &crtime);
02dcf215
LP
423
424 /* If the IMMUTABLE bit is set, we consider the directory read-only. Since the ioctl is not
01b72568
LP
425 * supported everywhere we ignore failures. */
426 (void) read_attr_fd(fd, &file_attr);
cd61c3bf 427
01b72568 428 /* It's just a normal directory. */
c2ce6a3d 429 r = image_new(IMAGE_DIRECTORY,
3775e141 430 c,
5fc7f358 431 pretty,
c2ce6a3d 432 path,
5fc7f358 433 filename,
01b72568 434 read_only || (file_attr & FS_IMMUTABLE_FL),
02dcf215
LP
435 crtime,
436 0, /* we don't use mtime of stat() here, since it's not the time of last change of the tree, but only of the top-level dir */
c2ce6a3d
LP
437 ret);
438 if (r < 0)
439 return r;
cd61c3bf 440
3a6ce860 441 return 0;
cd61c3bf 442
3a6ce860 443 } else if (S_ISREG(st->st_mode) && endswith(filename, ".raw")) {
10f9c755 444 usec_t crtime = 0;
cd61c3bf 445
aceac2f0 446 /* It's a RAW disk image */
cd61c3bf 447
c2ce6a3d 448 if (!ret)
3a6ce860 449 return 0;
cd61c3bf 450
c53e07e2 451 (void) fd_getcrtime_at(dfd, filename, AT_SYMLINK_FOLLOW, &crtime);
10f9c755 452
4756c94e 453 if (!pretty) {
a5ecdf7c
LP
454 r = extract_image_basename(
455 filename,
456 image_class_suffix_to_string(c),
457 STRV_MAKE(".raw"),
458 &pretty_buffer,
459 /* ret_suffix= */ NULL);
4756c94e
LP
460 if (r < 0)
461 return r;
462
463 pretty = pretty_buffer;
464 }
10f9c755 465
aceac2f0 466 r = image_new(IMAGE_RAW,
3775e141 467 c,
5fc7f358 468 pretty,
c2ce6a3d 469 path,
5fc7f358 470 filename,
3a6ce860 471 !(st->st_mode & 0222) || read_only,
10f9c755 472 crtime,
3a6ce860 473 timespec_load(&st->st_mtim),
c2ce6a3d
LP
474 ret);
475 if (r < 0)
476 return r;
cd61c3bf 477
3a6ce860
LP
478 (*ret)->usage = (*ret)->usage_exclusive = st->st_blocks * 512;
479 (*ret)->limit = (*ret)->limit_exclusive = st->st_size;
b6b18498 480
3a6ce860 481 return 0;
eb38edce 482
3a6ce860 483 } else if (S_ISBLK(st->st_mode)) {
254d1313 484 _cleanup_close_ int block_fd = -EBADF;
eb38edce
LP
485 uint64_t size = UINT64_MAX;
486
487 /* A block device */
488
489 if (!ret)
3a6ce860 490 return 0;
eb38edce 491
4756c94e 492 if (!pretty) {
a5ecdf7c
LP
493 r = extract_image_basename(
494 filename,
495 /* class_suffix= */ NULL,
496 /* format_suffix= */ NULL,
497 &pretty_buffer,
498 /* ret_suffix= */ NULL);
4756c94e
LP
499 if (r < 0)
500 return r;
501
502 pretty = pretty_buffer;
503 }
eb38edce
LP
504
505 block_fd = openat(dfd, filename, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
506 if (block_fd < 0)
2570578d 507 log_debug_errno(errno, "Failed to open block device %s/%s, ignoring: %m", path ?: strnull(parent), filename);
eb38edce 508 else {
3a6ce860
LP
509 /* Refresh stat data after opening the node */
510 if (fstat(block_fd, &stbuf) < 0)
eb38edce 511 return -errno;
3a6ce860
LP
512 st = &stbuf;
513
514 if (!S_ISBLK(st->st_mode)) /* Verify that what we opened is actually what we think it is */
eb38edce
LP
515 return -ENOTTY;
516
517 if (!read_only) {
518 int state = 0;
519
520 if (ioctl(block_fd, BLKROGET, &state) < 0)
2570578d 521 log_debug_errno(errno, "Failed to issue BLKROGET on device %s/%s, ignoring: %m", path ?: strnull(parent), filename);
eb38edce
LP
522 else if (state)
523 read_only = true;
524 }
525
01db9c85
LP
526 r = blockdev_get_device_size(block_fd, &size);
527 if (r < 0)
528 log_debug_errno(r, "Failed to issue BLKGETSIZE64 on device %s/%s, ignoring: %m", path ?: strnull(parent), filename);
eb38edce
LP
529
530 block_fd = safe_close(block_fd);
531 }
532
533 r = image_new(IMAGE_BLOCK,
3775e141 534 c,
eb38edce
LP
535 pretty,
536 path,
537 filename,
3a6ce860 538 !(st->st_mode & 0222) || read_only,
eb38edce
LP
539 0,
540 0,
541 ret);
542 if (r < 0)
543 return r;
544
ed0cb346 545 if (!IN_SET(size, 0, UINT64_MAX))
eb38edce
LP
546 (*ret)->usage = (*ret)->usage_exclusive = (*ret)->limit = (*ret)->limit_exclusive = size;
547
3a6ce860 548 return 0;
c2ce6a3d 549 }
cd61c3bf 550
3a6ce860 551 return -EMEDIUMTYPE;
c2ce6a3d 552}
cd61c3bf 553
73740c9f
LP
554static const char *pick_image_search_path(ImageClass class) {
555 if (class < 0 || class >= _IMAGE_CLASS_MAX)
556 return NULL;
557
558 /* Use the initrd search path if there is one, otherwise use the common one */
559 return in_initrd() && image_search_path_initrd[class] ? image_search_path_initrd[class] : image_search_path[class];
560}
561
a5ecdf7c
LP
562static char **make_possible_filenames(ImageClass class, const char *image_name) {
563 _cleanup_strv_free_ char **l = NULL;
564
565 assert(image_name);
566
567 FOREACH_STRING(v_suffix, "", ".v")
568 FOREACH_STRING(format_suffix, "", ".raw") {
569 _cleanup_free_ char *j = NULL;
570 const char *class_suffix;
571
572 class_suffix = image_class_suffix_to_string(class);
573 if (class_suffix) {
574 j = strjoin(image_name, class_suffix, format_suffix, v_suffix);
575 if (!j)
576 return NULL;
577
578 if (strv_consume(&l, TAKE_PTR(j)) < 0)
579 return NULL;
580 }
581
582 j = strjoin(image_name, format_suffix, v_suffix);
583 if (!j)
584 return NULL;
585
586 if (strv_consume(&l, TAKE_PTR(j)) < 0)
587 return NULL;
588 }
589
590 return TAKE_PTR(l);
591}
592
d577d4a4
LP
593int image_find(ImageClass class,
594 const char *name,
595 const char *root,
596 Image **ret) {
597
c2ce6a3d 598 int r;
cd61c3bf 599
5ef46e5f
LP
600 assert(class >= 0);
601 assert(class < _IMAGE_CLASS_MAX);
c2ce6a3d 602 assert(name);
cd61c3bf 603
c2ce6a3d
LP
604 /* There are no images with invalid names */
605 if (!image_name_is_valid(name))
3a6ce860 606 return -ENOENT;
cd61c3bf 607
a5ecdf7c
LP
608 _cleanup_strv_free_ char **names = make_possible_filenames(class, name);
609 if (!names)
610 return -ENOMEM;
611
73740c9f 612 NULSTR_FOREACH(path, pick_image_search_path(class)) {
d577d4a4 613 _cleanup_free_ char *resolved = NULL;
c2ce6a3d 614 _cleanup_closedir_ DIR *d = NULL;
3a6ce860 615 struct stat st;
d577d4a4 616 int flags;
cd61c3bf 617
f461a28d 618 r = chase_and_opendir(path, root, CHASE_PREFIX_ROOT, &resolved, &d);
d577d4a4
LP
619 if (r == -ENOENT)
620 continue;
621 if (r < 0)
622 return r;
cd61c3bf 623
d577d4a4
LP
624 /* As mentioned above, we follow symlinks on this fstatat(), because we want to permit people
625 * to symlink block devices into the search path. (For now, we disable that when operating
626 * relative to some root directory.) */
627 flags = root ? AT_SYMLINK_NOFOLLOW : 0;
5fc7f358 628
a5ecdf7c
LP
629 STRV_FOREACH(n, names) {
630 _cleanup_free_ char *fname_buf = NULL;
631 const char *fname = *n;
3a6ce860 632
a5ecdf7c
LP
633 if (fstatat(dirfd(d), fname, &st, flags) < 0) {
634 if (errno != ENOENT)
635 return -errno;
5fc7f358 636
a5ecdf7c
LP
637 continue; /* Vanished while we were looking at it */
638 }
639
640 if (endswith(fname, ".raw")) {
641 if (!S_ISREG(st.st_mode)) {
642 log_debug("Ignoring non-regular file '%s' with .raw suffix.", fname);
3a6ce860 643 continue;
a5ecdf7c 644 }
3a6ce860 645
a5ecdf7c 646 } else if (endswith(fname, ".v")) {
3a6ce860 647
a5ecdf7c
LP
648 if (!S_ISDIR(st.st_mode)) {
649 log_debug("Ignoring non-directory file '%s' with .v suffix.", fname);
650 continue;
651 }
652
653 _cleanup_free_ char *suffix = NULL;
654 suffix = strdup(ASSERT_PTR(startswith(fname, name)));
655 if (!suffix)
656 return -ENOMEM;
657
658 *ASSERT_PTR(endswith(suffix, ".v")) = 0;
659
660 _cleanup_free_ char *vp = path_join(resolved, fname);
661 if (!vp)
662 return -ENOMEM;
663
664 PickFilter filter = {
665 .type_mask = endswith(suffix, ".raw") ? (UINT32_C(1) << DT_REG) | (UINT32_C(1) << DT_BLK) : (UINT32_C(1) << DT_DIR),
666 .basename = name,
667 .architecture = _ARCHITECTURE_INVALID,
421a4ba7 668 .suffix = STRV_MAKE(suffix),
a5ecdf7c
LP
669 };
670
671 _cleanup_(pick_result_done) PickResult result = PICK_RESULT_NULL;
672 r = path_pick(root,
673 /* toplevel_fd= */ AT_FDCWD,
674 vp,
675 &filter,
676 PICK_ARCHITECTURE|PICK_TRIES,
677 &result);
678 if (r < 0) {
679 log_debug_errno(r, "Failed to pick versioned image on '%s', skipping: %m", vp);
680 continue;
681 }
682 if (!result.path) {
683 log_debug("Found versioned directory '%s', without matching entry, skipping: %m", vp);
684 continue;
685 }
686
687 /* Refresh the stat data for the discovered target */
688 st = result.st;
689
690 _cleanup_free_ char *bn = NULL;
691 r = path_extract_filename(result.path, &bn);
692 if (r < 0) {
693 log_debug_errno(r, "Failed to extract basename of image path '%s', skipping: %m", result.path);
694 continue;
695 }
696
697 fname_buf = path_join(fname, bn);
698 if (!fname_buf)
699 return log_oom();
3a6ce860 700
a5ecdf7c 701 fname = fname_buf;
3a6ce860 702
a5ecdf7c
LP
703 } else if (!S_ISDIR(st.st_mode) && !S_ISBLK(st.st_mode)) {
704 log_debug("Ignoring non-directory and non-block device file '%s' without suffix.", fname);
3a6ce860 705 continue;
a5ecdf7c 706 }
3a6ce860 707
a5ecdf7c
LP
708 r = image_make(class, name, dirfd(d), resolved, fname, &st, ret);
709 if (IN_SET(r, -ENOENT, -EMEDIUMTYPE))
710 continue;
711 if (r < 0)
712 return r;
cd61c3bf 713
a5ecdf7c
LP
714 if (ret)
715 (*ret)->discoverable = true;
cf604fd4 716
a5ecdf7c
LP
717 return 1;
718 }
c2ce6a3d
LP
719 }
720
cf604fd4 721 if (class == IMAGE_MACHINE && streq(name, ".host")) {
3775e141 722 r = image_make(class, ".host", AT_FDCWD, NULL, empty_to_root(root), NULL, ret);
cf604fd4
LP
723 if (r < 0)
724 return r;
725
726 if (ret)
727 (*ret)->discoverable = true;
728
a5ecdf7c 729 return 1;
cf604fd4 730 }
5fc7f358 731
3a6ce860 732 return -ENOENT;
c2ce6a3d
LP
733};
734
2ddf182b 735int image_from_path(const char *path, Image **ret) {
cf604fd4
LP
736
737 /* Note that we don't set the 'discoverable' field of the returned object, because we don't check here whether
738 * the image is in the image search path. And if it is we don't know if the path we used is actually not
3fe91079 739 * overridden by another, different image earlier in the search path */
cf604fd4 740
2ddf182b 741 if (path_equal(path, "/"))
3775e141 742 return image_make(IMAGE_MACHINE, ".host", AT_FDCWD, NULL, "/", NULL, ret);
2ddf182b 743
3775e141 744 return image_make(_IMAGE_CLASS_INVALID, NULL, AT_FDCWD, NULL, path, NULL, ret);
2ddf182b
LP
745}
746
d577d4a4 747int image_find_harder(ImageClass class, const char *name_or_path, const char *root, Image **ret) {
2ddf182b 748 if (image_name_is_valid(name_or_path))
d577d4a4 749 return image_find(class, name_or_path, root, ret);
2ddf182b
LP
750
751 return image_from_path(name_or_path, ret);
752}
753
d577d4a4
LP
754int image_discover(
755 ImageClass class,
756 const char *root,
757 Hashmap *h) {
758
c2ce6a3d
LP
759 int r;
760
5ef46e5f
LP
761 assert(class >= 0);
762 assert(class < _IMAGE_CLASS_MAX);
c2ce6a3d
LP
763 assert(h);
764
73740c9f 765 NULSTR_FOREACH(path, pick_image_search_path(class)) {
d577d4a4 766 _cleanup_free_ char *resolved = NULL;
c2ce6a3d 767 _cleanup_closedir_ DIR *d = NULL;
c2ce6a3d 768
f461a28d 769 r = chase_and_opendir(path, root, CHASE_PREFIX_ROOT, &resolved, &d);
d577d4a4
LP
770 if (r == -ENOENT)
771 continue;
772 if (r < 0)
773 return r;
c2ce6a3d
LP
774
775 FOREACH_DIRENT_ALL(de, d, return -errno) {
a5ecdf7c 776 _cleanup_free_ char *pretty = NULL, *fname_buf = NULL;
c2ce6a3d 777 _cleanup_(image_unrefp) Image *image = NULL;
a5ecdf7c 778 const char *fname = de->d_name;
3a6ce860 779 struct stat st;
d577d4a4 780 int flags;
c2ce6a3d 781
a5ecdf7c 782 if (dot_or_dot_dot(fname))
c2ce6a3d
LP
783 continue;
784
d577d4a4
LP
785 /* As mentioned above, we follow symlinks on this fstatat(), because we want to
786 * permit people to symlink block devices into the search path. */
787 flags = root ? AT_SYMLINK_NOFOLLOW : 0;
a5ecdf7c 788 if (fstatat(dirfd(d), fname, &st, flags) < 0) {
3a6ce860
LP
789 if (errno == ENOENT)
790 continue;
791
792 return -errno;
793 }
794
a5ecdf7c
LP
795 if (S_ISREG(st.st_mode)) {
796 r = extract_image_basename(
797 fname,
798 image_class_suffix_to_string(class),
799 STRV_MAKE(".raw"),
800 &pretty,
801 /* suffix= */ NULL);
802 if (r < 0) {
803 log_debug_errno(r, "Skipping directory entry '%s', which doesn't look like an image.", fname);
804 continue;
805 }
806 } else if (S_ISDIR(st.st_mode)) {
807 const char *v;
808
809 v = endswith(fname, ".v");
810 if (v) {
811 _cleanup_free_ char *suffix = NULL, *nov = NULL;
812
813 nov = strndup(fname, v - fname); /* Chop off the .v */
814 if (!nov)
815 return -ENOMEM;
816
817 r = extract_image_basename(
818 nov,
819 image_class_suffix_to_string(class),
820 STRV_MAKE(".raw", ""),
821 &pretty,
822 &suffix);
823 if (r < 0) {
824 log_debug_errno(r, "Skipping directory entry '%s', which doesn't look like a versioned image.", fname);
825 continue;
826 }
827
828 _cleanup_free_ char *vp = path_join(resolved, fname);
829 if (!vp)
830 return -ENOMEM;
831
832 PickFilter filter = {
833 .type_mask = endswith(suffix, ".raw") ? (UINT32_C(1) << DT_REG) | (UINT32_C(1) << DT_BLK) : (UINT32_C(1) << DT_DIR),
834 .basename = pretty,
835 .architecture = _ARCHITECTURE_INVALID,
421a4ba7 836 .suffix = STRV_MAKE(suffix),
a5ecdf7c
LP
837 };
838
839 _cleanup_(pick_result_done) PickResult result = PICK_RESULT_NULL;
840 r = path_pick(root,
841 /* toplevel_fd= */ AT_FDCWD,
842 vp,
843 &filter,
844 PICK_ARCHITECTURE|PICK_TRIES,
845 &result);
846 if (r < 0) {
847 log_debug_errno(r, "Failed to pick versioned image on '%s', skipping: %m", vp);
848 continue;
849 }
850 if (!result.path) {
851 log_debug("Found versioned directory '%s', without matching entry, skipping: %m", vp);
852 continue;
853 }
854
855 /* Refresh the stat data for the discovered target */
856 st = result.st;
857
858 _cleanup_free_ char *bn = NULL;
859 r = path_extract_filename(result.path, &bn);
860 if (r < 0) {
861 log_debug_errno(r, "Failed to extract basename of image path '%s', skipping: %m", result.path);
862 continue;
863 }
864
865 fname_buf = path_join(fname, bn);
866 if (!fname_buf)
867 return log_oom();
868
869 fname = fname_buf;
870 } else {
871 r = extract_image_basename(
872 fname,
873 image_class_suffix_to_string(class),
874 /* format_suffix= */ NULL,
875 &pretty,
876 /* ret_suffix= */ NULL);
877 if (r < 0) {
878 log_debug_errno(r, "Skipping directory entry '%s', which doesn't look like an image.", fname);
879 continue;
880 }
881 }
882
883 } else if (S_ISBLK(st.st_mode)) {
884 r = extract_image_basename(
885 fname,
886 /* class_suffix= */ NULL,
887 /* format_suffix= */ NULL,
888 &pretty,
889 /* ret_v_suffix= */ NULL);
890 if (r < 0) {
891 log_debug_errno(r, "Skipping directory entry '%s', which doesn't look like an image.", fname);
892 continue;
893 }
894 } else {
895 log_debug("Skipping directory entry '%s', which is neither regular file, directory nor block device.", fname);
1bf36bf9 896 continue;
e7df707b 897 }
1bf36bf9
LP
898
899 if (hashmap_contains(h, pretty))
c2ce6a3d
LP
900 continue;
901
a5ecdf7c 902 r = image_make(class, pretty, dirfd(d), resolved, fname, &st, &image);
3a6ce860 903 if (IN_SET(r, -ENOENT, -EMEDIUMTYPE))
c2ce6a3d
LP
904 continue;
905 if (r < 0)
906 return r;
907
cf604fd4
LP
908 image->discoverable = true;
909
c2ce6a3d
LP
910 r = hashmap_put(h, image->name, image);
911 if (r < 0)
912 return r;
913
5df1a553 914 TAKE_PTR(image);
cd61c3bf
LP
915 }
916 }
917
5ef46e5f 918 if (class == IMAGE_MACHINE && !hashmap_contains(h, ".host")) {
5fc7f358
LP
919 _cleanup_(image_unrefp) Image *image = NULL;
920
3775e141 921 r = image_make(IMAGE_MACHINE, ".host", AT_FDCWD, NULL, empty_to_root("/"), NULL, &image);
5fc7f358
LP
922 if (r < 0)
923 return r;
924
cf604fd4
LP
925 image->discoverable = true;
926
5fc7f358
LP
927 r = hashmap_put(h, image->name, image);
928 if (r < 0)
929 return r;
930
931 image = NULL;
5fc7f358
LP
932 }
933
cd61c3bf
LP
934 return 0;
935}
936
08682124 937int image_remove(Image *i) {
8e766630 938 _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
8e0b6570 939 _cleanup_strv_free_ char **settings = NULL;
bafbac4e 940 _cleanup_free_ char *roothash = NULL;
30535c16
LP
941 int r;
942
08682124
LP
943 assert(i);
944
d94c2b06 945 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
08682124
LP
946 return -EROFS;
947
8e0b6570
LP
948 settings = image_settings_path(i);
949 if (!settings)
950 return -ENOMEM;
951
162f6477
LP
952 r = image_roothash_path(i, &roothash);
953 if (r < 0)
954 return r;
bafbac4e 955
30535c16
LP
956 /* Make sure we don't interfere with a running nspawn */
957 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
958 if (r < 0)
959 return r;
960
ebd93cb6
LP
961 switch (i->type) {
962
963 case IMAGE_SUBVOLUME:
9fb0b9c7
LP
964
965 /* Let's unlink first, maybe it is a symlink? If that works we are happy. Otherwise, let's get out the
966 * big guns */
967 if (unlink(i->path) < 0) {
968 r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
969 if (r < 0)
970 return r;
971 }
972
8e0b6570 973 break;
ebd93cb6
LP
974
975 case IMAGE_DIRECTORY:
01b72568 976 /* Allow deletion of read-only directories */
db9a4254 977 (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL, NULL);
8e0b6570
LP
978 r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
979 if (r < 0)
980 return r;
981
982 break;
01b72568 983
eb38edce
LP
984 case IMAGE_BLOCK:
985
986 /* If this is inside of /dev, then it's a real block device, hence let's not touch the device node
987 * itself (but let's remove the stuff stored alongside it). If it's anywhere else, let's try to unlink
988 * the thing (it's most likely a symlink after all). */
989
990 if (path_startswith(i->path, "/dev"))
991 break;
992
4831981d 993 _fallthrough_;
aceac2f0 994 case IMAGE_RAW:
41d1ed05
LP
995 if (unlink(i->path) < 0)
996 return -errno;
8e0b6570 997 break;
ebd93cb6
LP
998
999 default:
15411c0c 1000 return -EOPNOTSUPP;
ebd93cb6 1001 }
8e0b6570 1002
de010b0b 1003 STRV_FOREACH(j, settings)
8e0b6570
LP
1004 if (unlink(*j) < 0 && errno != ENOENT)
1005 log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j);
8e0b6570 1006
bafbac4e
LP
1007 if (unlink(roothash) < 0 && errno != ENOENT)
1008 log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", roothash);
1009
8e0b6570
LP
1010 return 0;
1011}
1012
bafbac4e 1013static int rename_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
162f6477
LP
1014 _cleanup_free_ char *fn = NULL, *rs = NULL;
1015 int r;
8e0b6570 1016
162f6477
LP
1017 fn = strjoin(new_name, suffix);
1018 if (!fn)
8e0b6570
LP
1019 return -ENOMEM;
1020
162f6477
LP
1021 r = file_in_same_dir(path, fn, &rs);
1022 if (r < 0)
1023 return r;
1024
8e0b6570 1025 return rename_noreplace(AT_FDCWD, path, AT_FDCWD, rs);
ebd93cb6
LP
1026}
1027
1028int image_rename(Image *i, const char *new_name) {
8e766630 1029 _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
bafbac4e 1030 _cleanup_free_ char *new_path = NULL, *nn = NULL, *roothash = NULL;
8e0b6570 1031 _cleanup_strv_free_ char **settings = NULL;
01b72568 1032 unsigned file_attr = 0;
ebd93cb6
LP
1033 int r;
1034
1035 assert(i);
1036
1037 if (!image_name_is_valid(new_name))
1038 return -EINVAL;
1039
d94c2b06 1040 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
ebd93cb6
LP
1041 return -EROFS;
1042
8e0b6570
LP
1043 settings = image_settings_path(i);
1044 if (!settings)
1045 return -ENOMEM;
1046
162f6477
LP
1047 r = image_roothash_path(i, &roothash);
1048 if (r < 0)
1049 return r;
bafbac4e 1050
30535c16
LP
1051 /* Make sure we don't interfere with a running nspawn */
1052 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
1053 if (r < 0)
1054 return r;
1055
1056 /* Make sure nobody takes the new name, between the time we
1057 * checked it is currently unused in all search paths, and the
f8e2f4d6 1058 * time we take possession of it */
30535c16
LP
1059 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
1060 if (r < 0)
1061 return r;
1062
d577d4a4 1063 r = image_find(IMAGE_MACHINE, new_name, NULL, NULL);
3a6ce860 1064 if (r >= 0)
ebd93cb6 1065 return -EEXIST;
3a6ce860
LP
1066 if (r != -ENOENT)
1067 return r;
ebd93cb6
LP
1068
1069 switch (i->type) {
1070
ebd93cb6 1071 case IMAGE_DIRECTORY:
01b72568
LP
1072 /* Turn of the immutable bit while we rename the image, so that we can rename it */
1073 (void) read_attr_path(i->path, &file_attr);
1074
1075 if (file_attr & FS_IMMUTABLE_FL)
db9a4254 1076 (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL, NULL);
01b72568 1077
4831981d 1078 _fallthrough_;
01b72568 1079 case IMAGE_SUBVOLUME:
162f6477 1080 r = file_in_same_dir(i->path, new_name, &new_path);
ebd93cb6
LP
1081 break;
1082
eb38edce
LP
1083 case IMAGE_BLOCK:
1084
1085 /* Refuse renaming raw block devices in /dev, the names are picked by udev after all. */
1086 if (path_startswith(i->path, "/dev"))
1087 return -EROFS;
1088
162f6477 1089 r = file_in_same_dir(i->path, new_name, &new_path);
eb38edce
LP
1090 break;
1091
aceac2f0 1092 case IMAGE_RAW: {
ebd93cb6
LP
1093 const char *fn;
1094
63c372cb 1095 fn = strjoina(new_name, ".raw");
162f6477
LP
1096
1097 r = file_in_same_dir(i->path, fn, &new_path);
ebd93cb6
LP
1098 break;
1099 }
1100
1101 default:
15411c0c 1102 return -EOPNOTSUPP;
ebd93cb6 1103 }
162f6477
LP
1104 if (r < 0)
1105 return r;
ebd93cb6
LP
1106
1107 nn = strdup(new_name);
1108 if (!nn)
1109 return -ENOMEM;
1110
f85ef957
AC
1111 r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
1112 if (r < 0)
1113 return r;
ebd93cb6 1114
01b72568
LP
1115 /* Restore the immutable bit, if it was set before */
1116 if (file_attr & FS_IMMUTABLE_FL)
db9a4254 1117 (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL, NULL);
01b72568 1118
f9ecfd3b
DL
1119 free_and_replace(i->path, new_path);
1120 free_and_replace(i->name, nn);
ebd93cb6 1121
8e0b6570 1122 STRV_FOREACH(j, settings) {
bafbac4e 1123 r = rename_auxiliary_file(*j, new_name, ".nspawn");
8e0b6570
LP
1124 if (r < 0 && r != -ENOENT)
1125 log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j);
1126 }
1127
bafbac4e
LP
1128 r = rename_auxiliary_file(roothash, new_name, ".roothash");
1129 if (r < 0 && r != -ENOENT)
1130 log_debug_errno(r, "Failed to rename roothash file %s, ignoring: %m", roothash);
1131
ebd93cb6
LP
1132 return 0;
1133}
1134
bafbac4e 1135static int clone_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
162f6477
LP
1136 _cleanup_free_ char *fn = NULL, *rs = NULL;
1137 int r;
8e0b6570 1138
162f6477
LP
1139 fn = strjoin(new_name, suffix);
1140 if (!fn)
8e0b6570
LP
1141 return -ENOMEM;
1142
162f6477
LP
1143 r = file_in_same_dir(path, fn, &rs);
1144 if (r < 0)
1145 return r;
1146
7c2f5495 1147 return copy_file_atomic(path, rs, 0664, COPY_REFLINK);
8e0b6570
LP
1148}
1149
ebd93cb6 1150int image_clone(Image *i, const char *new_name, bool read_only) {
8e766630 1151 _cleanup_(release_lock_file) LockFile name_lock = LOCK_FILE_INIT;
8e0b6570 1152 _cleanup_strv_free_ char **settings = NULL;
bafbac4e 1153 _cleanup_free_ char *roothash = NULL;
ebd93cb6
LP
1154 const char *new_path;
1155 int r;
1156
1157 assert(i);
1158
1159 if (!image_name_is_valid(new_name))
1160 return -EINVAL;
1161
8e0b6570
LP
1162 settings = image_settings_path(i);
1163 if (!settings)
1164 return -ENOMEM;
1165
162f6477
LP
1166 r = image_roothash_path(i, &roothash);
1167 if (r < 0)
1168 return r;
bafbac4e 1169
30535c16
LP
1170 /* Make sure nobody takes the new name, between the time we
1171 * checked it is currently unused in all search paths, and the
f8e2f4d6 1172 * time we take possession of it */
30535c16
LP
1173 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
1174 if (r < 0)
1175 return r;
1176
d577d4a4 1177 r = image_find(IMAGE_MACHINE, new_name, NULL, NULL);
3a6ce860 1178 if (r >= 0)
ebd93cb6 1179 return -EEXIST;
3a6ce860
LP
1180 if (r != -ENOENT)
1181 return r;
ebd93cb6
LP
1182
1183 switch (i->type) {
1184
1185 case IMAGE_SUBVOLUME:
1186 case IMAGE_DIRECTORY:
9a50e3ca 1187 /* If we can we'll always try to create a new btrfs subvolume here, even if the source is a plain
13e785f7 1188 * directory. */
9a50e3ca 1189
63c372cb 1190 new_path = strjoina("/var/lib/machines/", new_name);
ebd93cb6 1191
fab4ef72
DDM
1192 r = btrfs_subvol_snapshot_at(AT_FDCWD, i->path, AT_FDCWD, new_path,
1193 (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
1194 BTRFS_SNAPSHOT_FALLBACK_COPY |
1195 BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
1196 BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE |
1197 BTRFS_SNAPSHOT_RECURSIVE |
1198 BTRFS_SNAPSHOT_QUOTA);
17cbb288 1199 if (r >= 0)
9a50e3ca 1200 /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */
8120ee28 1201 (void) btrfs_subvol_auto_qgroup(new_path, 0, true);
5bcd08db 1202
ebd93cb6
LP
1203 break;
1204
aceac2f0 1205 case IMAGE_RAW:
63c372cb 1206 new_path = strjoina("/var/lib/machines/", new_name, ".raw");
ebd93cb6 1207
7c2f5495
DDM
1208 r = copy_file_atomic_full(i->path, new_path, read_only ? 0444 : 0644, FS_NOCOW_FL, FS_NOCOW_FL,
1209 COPY_REFLINK|COPY_CRTIME, NULL, NULL);
ebd93cb6
LP
1210 break;
1211
eb38edce 1212 case IMAGE_BLOCK:
ebd93cb6 1213 default:
15411c0c 1214 return -EOPNOTSUPP;
ebd93cb6
LP
1215 }
1216
1217 if (r < 0)
1218 return r;
1219
8e0b6570 1220 STRV_FOREACH(j, settings) {
bafbac4e 1221 r = clone_auxiliary_file(*j, new_name, ".nspawn");
8e0b6570
LP
1222 if (r < 0 && r != -ENOENT)
1223 log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j);
1224 }
1225
bafbac4e
LP
1226 r = clone_auxiliary_file(roothash, new_name, ".roothash");
1227 if (r < 0 && r != -ENOENT)
1228 log_debug_errno(r, "Failed to clone root hash file %s, ignoring: %m", roothash);
1229
ebd93cb6
LP
1230 return 0;
1231}
1232
1233int image_read_only(Image *i, bool b) {
8e766630 1234 _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
ebd93cb6 1235 int r;
c7664c07 1236
ebd93cb6
LP
1237 assert(i);
1238
d94c2b06 1239 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
ebd93cb6
LP
1240 return -EROFS;
1241
30535c16
LP
1242 /* Make sure we don't interfere with a running nspawn */
1243 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
1244 if (r < 0)
1245 return r;
1246
ebd93cb6
LP
1247 switch (i->type) {
1248
1249 case IMAGE_SUBVOLUME:
5bcd08db
LP
1250
1251 /* Note that we set the flag only on the top-level
1252 * subvolume of the image. */
1253
ebd93cb6
LP
1254 r = btrfs_subvol_set_read_only(i->path, b);
1255 if (r < 0)
1256 return r;
01b72568
LP
1257
1258 break;
1259
1260 case IMAGE_DIRECTORY:
1261 /* For simple directory trees we cannot use the access
1262 mode of the top-level directory, since it has an
1263 effect on the container itself. However, we can
1264 use the "immutable" flag, to at least make the
1265 top-level directory read-only. It's not as good as
1266 a read-only subvolume, but at least something, and
13e785f7 1267 we can read the value back. */
01b72568 1268
db9a4254 1269 r = chattr_path(i->path, b ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL, NULL);
01b72568
LP
1270 if (r < 0)
1271 return r;
1272
ebd93cb6
LP
1273 break;
1274
aceac2f0 1275 case IMAGE_RAW: {
ebd93cb6
LP
1276 struct stat st;
1277
1278 if (stat(i->path, &st) < 0)
1279 return -errno;
1280
1281 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
1282 return -errno;
f2068bcc
LP
1283
1284 /* If the images is now read-only, it's a good time to
1285 * defrag it, given that no write patterns will
1286 * fragment it again. */
1287 if (b)
1288 (void) btrfs_defrag(i->path);
ebd93cb6
LP
1289 break;
1290 }
1291
eb38edce 1292 case IMAGE_BLOCK: {
254d1313 1293 _cleanup_close_ int fd = -EBADF;
eb38edce
LP
1294 struct stat st;
1295 int state = b;
1296
1297 fd = open(i->path, O_CLOEXEC|O_RDONLY|O_NONBLOCK|O_NOCTTY);
1298 if (fd < 0)
1299 return -errno;
1300
1301 if (fstat(fd, &st) < 0)
1302 return -errno;
1303 if (!S_ISBLK(st.st_mode))
1304 return -ENOTTY;
1305
1306 if (ioctl(fd, BLKROSET, &state) < 0)
1307 return -errno;
1308
1309 break;
1310 }
1311
ebd93cb6 1312 default:
15411c0c 1313 return -EOPNOTSUPP;
ebd93cb6
LP
1314 }
1315
608c321f 1316 i->read_only = b;
ebd93cb6 1317 return 0;
08682124
LP
1318}
1319
8759bc95
LP
1320static void make_lock_dir(void) {
1321 (void) mkdir_p("/run/systemd/nspawn", 0755);
1322 (void) mkdir("/run/systemd/nspawn/locks", 0700);
1323}
1324
cb1b813f
LP
1325int image_path_lock(
1326 const char *path,
1327 int operation,
1328 LockFile *ret_global,
1329 LockFile *ret_local) {
1330
30535c16
LP
1331 _cleanup_free_ char *p = NULL;
1332 LockFile t = LOCK_FILE_INIT;
1333 struct stat st;
f25bed67 1334 bool exclusive;
30535c16
LP
1335 int r;
1336
1337 assert(path);
cb1b813f 1338 assert(ret_local);
30535c16 1339
f25bed67
LP
1340 /* Locks an image path. This actually creates two locks: one "local" one, next to the image path
1341 * itself, which might be shared via NFS. And another "global" one, in /run, that uses the
1342 * device/inode number. This has the benefit that we can even lock a tree that is a mount point,
1343 * correctly. */
30535c16 1344
30535c16
LP
1345 if (!path_is_absolute(path))
1346 return -EINVAL;
1347
f25bed67
LP
1348 switch (operation & (LOCK_SH|LOCK_EX)) {
1349 case LOCK_SH:
1350 exclusive = false;
1351 break;
1352 case LOCK_EX:
1353 exclusive = true;
1354 break;
1355 default:
1356 return -EINVAL;
1357 }
1358
b6e953f2 1359 if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
cb1b813f
LP
1360 *ret_local = LOCK_FILE_INIT;
1361 if (ret_global)
1362 *ret_global = LOCK_FILE_INIT;
b6e953f2
LP
1363 return 0;
1364 }
1365
f25bed67
LP
1366 /* Prohibit taking exclusive locks on the host image. We can't allow this, since we ourselves are
1367 * running off it after all, and we don't want any images to manipulate the host image. We make an
1368 * exception for shared locks however: we allow those (and make them NOPs since there's no point in
1369 * taking them if there can't be exclusive locks). Strictly speaking these are questionable as well,
1370 * since it means changes made to the host might propagate to the container as they happen (and a
1371 * shared lock kinda suggests that no changes happen at all while it is in place), but it's too
1372 * useful not to allow read-only containers off the host root, hence let's support this, and trust
1373 * the user to do the right thing with this. */
1374 if (path_equal(path, "/")) {
1375 if (exclusive)
1376 return -EBUSY;
1377
cb1b813f
LP
1378 *ret_local = LOCK_FILE_INIT;
1379 if (ret_global)
1380 *ret_global = LOCK_FILE_INIT;
f25bed67
LP
1381 return 0;
1382 }
b6e953f2 1383
cb1b813f
LP
1384 if (ret_global) {
1385 if (stat(path, &st) >= 0) {
1386 if (S_ISBLK(st.st_mode))
1387 r = asprintf(&p, "/run/systemd/nspawn/locks/block-%u:%u", major(st.st_rdev), minor(st.st_rdev));
1388 else if (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode))
1389 r = asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino);
1390 else
1391 return -ENOTTY;
1392 if (r < 0)
1393 return -ENOMEM;
1394 }
30535c16
LP
1395 }
1396
f25bed67
LP
1397 /* For block devices we don't need the "local" lock, as the major/minor lock above should be
1398 * sufficient, since block devices are host local anyway. */
1399 if (!path_startswith(path, "/dev/")) {
eb38edce 1400 r = make_lock_file_for(path, operation, &t);
8be17c9b 1401 if (r < 0) {
f25bed67 1402 if (!exclusive && r == -EROFS)
771b7ead 1403 log_debug_errno(r, "Failed to create shared lock for '%s', ignoring: %m", path);
8be17c9b
LT
1404 else
1405 return r;
1406 }
eb38edce 1407 }
30535c16
LP
1408
1409 if (p) {
8759bc95 1410 make_lock_dir();
30535c16 1411
cb1b813f 1412 r = make_lock_file(p, operation, ret_global);
30535c16
LP
1413 if (r < 0) {
1414 release_lock_file(&t);
1415 return r;
1416 }
cb1b813f
LP
1417 } else if (ret_global)
1418 *ret_global = LOCK_FILE_INIT;
30535c16 1419
cb1b813f 1420 *ret_local = t;
30535c16
LP
1421 return 0;
1422}
1423
cb81cd80 1424int image_set_limit(Image *i, uint64_t referenced_max) {
96ac6d3f
YW
1425 int r;
1426
d6ce17c7
LP
1427 assert(i);
1428
d94c2b06 1429 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
d6ce17c7
LP
1430 return -EROFS;
1431
1432 if (i->type != IMAGE_SUBVOLUME)
15411c0c 1433 return -EOPNOTSUPP;
d6ce17c7 1434
5bcd08db
LP
1435 /* We set the quota both for the subvolume as well as for the
1436 * subtree. The latter is mostly for historical reasons, since
1437 * we didn't use to have a concept of subtree quota, and hence
1438 * only modified the subvolume quota. */
1439
1440 (void) btrfs_qgroup_set_limit(i->path, 0, referenced_max);
1441 (void) btrfs_subvol_auto_qgroup(i->path, 0, true);
96ac6d3f
YW
1442 r = btrfs_subvol_set_subtree_quota_limit(i->path, 0, referenced_max);
1443 if (r < 0)
1444 return r;
1445
1446 (void) image_update_quota(i, -EBADF);
1447 return 0;
d6ce17c7
LP
1448}
1449
84be0c71 1450int image_read_metadata(Image *i, const ImagePolicy *image_policy) {
8e766630 1451 _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
c7664c07
LP
1452 int r;
1453
1454 assert(i);
1455
1456 r = image_path_lock(i->path, LOCK_SH|LOCK_NB, &global_lock, &local_lock);
1457 if (r < 0)
1458 return r;
1459
1460 switch (i->type) {
1461
1462 case IMAGE_SUBVOLUME:
1463 case IMAGE_DIRECTORY: {
a81fe93e
LP
1464 _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL, **sysext_release = NULL, **confext_release = NULL;
1465 _cleanup_free_ char *hostname = NULL, *path = NULL;
c7664c07 1466 sd_id128_t machine_id = SD_ID128_NULL;
c7664c07 1467
b60e0f57 1468 if (i->class == IMAGE_SYSEXT) {
6afa5d86
LB
1469 r = extension_has_forbidden_content(i->path);
1470 if (r < 0)
1471 return r;
1472 if (r > 0)
1473 return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
1474 "Conflicting content found in image %s, refusing.",
1475 i->name);
1476 }
1477
f461a28d 1478 r = chase("/etc/hostname", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL);
c7664c07
LP
1479 if (r < 0 && r != -ENOENT)
1480 log_debug_errno(r, "Failed to chase /etc/hostname in image %s: %m", i->name);
1481 else if (r >= 0) {
1482 r = read_etc_hostname(path, &hostname);
1483 if (r < 0)
cc4482ee 1484 log_debug_errno(r, "Failed to read /etc/hostname of image %s: %m", i->name);
c7664c07
LP
1485 }
1486
1487 path = mfree(path);
1488
f7b5f399
YW
1489 r = id128_get_machine(i->path, &machine_id);
1490 if (r < 0)
1491 log_debug_errno(r, "Failed to read machine ID in image %s, ignoring: %m", i->name);
c7664c07 1492
f461a28d 1493 r = chase("/etc/machine-info", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL);
c7664c07
LP
1494 if (r < 0 && r != -ENOENT)
1495 log_debug_errno(r, "Failed to chase /etc/machine-info in image %s: %m", i->name);
1496 else if (r >= 0) {
aa8fbc74 1497 r = load_env_file_pairs(NULL, path, &machine_info);
c7664c07
LP
1498 if (r < 0)
1499 log_debug_errno(r, "Failed to parse machine-info data of %s: %m", i->name);
1500 }
1501
d58ad743
LP
1502 r = load_os_release_pairs(i->path, &os_release);
1503 if (r < 0)
1504 log_debug_errno(r, "Failed to read os-release in image, ignoring: %m");
c7664c07 1505
a81fe93e 1506 r = load_extension_release_pairs(i->path, IMAGE_SYSEXT, i->name, /* relax_extension_release_check= */ false, &sysext_release);
bcf94222 1507 if (r < 0)
a81fe93e
LP
1508 log_debug_errno(r, "Failed to read sysext-release in image, ignoring: %m");
1509
1510 r = load_extension_release_pairs(i->path, IMAGE_CONFEXT, i->name, /* relax_extension_release_check= */ false, &confext_release);
1511 if (r < 0)
1512 log_debug_errno(r, "Failed to read confext-release in image, ignoring: %m");
bcf94222 1513
c7664c07
LP
1514 free_and_replace(i->hostname, hostname);
1515 i->machine_id = machine_id;
1516 strv_free_and_replace(i->machine_info, machine_info);
1517 strv_free_and_replace(i->os_release, os_release);
a81fe93e
LP
1518 strv_free_and_replace(i->sysext_release, sysext_release);
1519 strv_free_and_replace(i->confext_release, confext_release);
c7664c07
LP
1520 break;
1521 }
1522
1523 case IMAGE_RAW:
1524 case IMAGE_BLOCK: {
1525 _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
1526 _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
f4a63ce2
LP
1527 DissectImageFlags flags =
1528 DISSECT_IMAGE_GENERIC_ROOT |
1529 DISSECT_IMAGE_REQUIRE_ROOT |
1530 DISSECT_IMAGE_RELAX_VAR_CHECK |
1531 DISSECT_IMAGE_READ_ONLY |
1532 DISSECT_IMAGE_USR_NO_ROOT |
1533 DISSECT_IMAGE_ADD_PARTITION_DEVICES |
1534 DISSECT_IMAGE_PIN_PARTITION_DEVICES |
1535 DISSECT_IMAGE_VALIDATE_OS |
1536 DISSECT_IMAGE_VALIDATE_OS_EXT |
1537 DISSECT_IMAGE_ALLOW_USERSPACE_VERITY;
1538
1539 r = loop_device_make_by_path(
1540 i->path,
1541 O_RDONLY,
1542 /* sector_size= */ UINT32_MAX,
1543 LO_FLAGS_PARTSCAN,
1544 LOCK_SH,
1545 &d);
41bc4849
LP
1546 if (r < 0)
1547 return r;
1548
bad31660
YW
1549 r = dissect_loop_device(
1550 d,
84be0c71
LP
1551 /* verity= */ NULL,
1552 /* mount_options= */ NULL,
1553 image_policy,
f4a63ce2 1554 flags,
75dc190d 1555 &m);
c7664c07
LP
1556 if (r < 0)
1557 return r;
1558
44e3097d
LP
1559 r = dissected_image_acquire_metadata(
1560 m,
1561 /* userns_fd= */ -EBADF,
1562 flags);
c7664c07
LP
1563 if (r < 0)
1564 return r;
1565
1566 free_and_replace(i->hostname, m->hostname);
1567 i->machine_id = m->machine_id;
1568 strv_free_and_replace(i->machine_info, m->machine_info);
1569 strv_free_and_replace(i->os_release, m->os_release);
a81fe93e
LP
1570 strv_free_and_replace(i->sysext_release, m->sysext_release);
1571 strv_free_and_replace(i->confext_release, m->confext_release);
c7664c07
LP
1572
1573 break;
1574 }
1575
1576 default:
1577 return -EOPNOTSUPP;
1578 }
1579
1580 i->metadata_valid = true;
1581
1582 return 0;
1583}
1584
30535c16 1585int image_name_lock(const char *name, int operation, LockFile *ret) {
99d97afc
LP
1586 const char *p;
1587
30535c16
LP
1588 assert(name);
1589 assert(ret);
1590
1591 /* Locks an image name, regardless of the precise path used. */
1592
99d97afc
LP
1593 if (streq(name, ".host"))
1594 return -EBUSY;
1595
30535c16
LP
1596 if (!image_name_is_valid(name))
1597 return -EINVAL;
1598
b6e953f2
LP
1599 if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
1600 *ret = (LockFile) LOCK_FILE_INIT;
1601 return 0;
1602 }
1603
8759bc95 1604 make_lock_dir();
99d97afc
LP
1605
1606 p = strjoina("/run/systemd/nspawn/locks/name-", name);
30535c16
LP
1607 return make_lock_file(p, operation, ret);
1608}
1609
d577d4a4
LP
1610bool image_in_search_path(
1611 ImageClass class,
1612 const char *root,
1613 const char *image) {
1614
ace9ab19
LP
1615 assert(image);
1616
73740c9f 1617 NULSTR_FOREACH(path, pick_image_search_path(class)) {
d577d4a4 1618 const char *p, *q;
ace9ab19
LP
1619 size_t k;
1620
d577d4a4
LP
1621 if (!empty_or_root(root)) {
1622 q = path_startswith(path, root);
1623 if (!q)
1624 continue;
1625 } else
1626 q = path;
1627
1628 p = path_startswith(q, path);
ace9ab19
LP
1629 if (!p)
1630 continue;
1631
1632 /* Make sure there's a filename following */
1633 k = strcspn(p, "/");
1634 if (k == 0)
1635 continue;
1636
1637 p += k;
1638
1639 /* Accept trailing slashes */
1640 if (p[strspn(p, "/")] == 0)
1641 return true;
ace9ab19
LP
1642 }
1643
1644 return false;
1645}
1646
f5151fb4
LP
1647int image_to_json(const struct Image *img, JsonVariant **ret) {
1648 assert(img);
1649
1650 return json_build(ret,
1651 JSON_BUILD_OBJECT(
1652 JSON_BUILD_PAIR_STRING("Type", image_type_to_string(img->type)),
1653 JSON_BUILD_PAIR_STRING("Class", image_class_to_string(img->class)),
1654 JSON_BUILD_PAIR_STRING("Name", img->name),
1655 JSON_BUILD_PAIR_CONDITION(img->path, "Path", JSON_BUILD_STRING(img->path)),
1656 JSON_BUILD_PAIR_BOOLEAN("ReadOnly", img->read_only),
1657 JSON_BUILD_PAIR_CONDITION(img->crtime != 0, "CreationTimestamp", JSON_BUILD_UNSIGNED(img->crtime)),
1658 JSON_BUILD_PAIR_CONDITION(img->mtime != 0, "ModificationTimestamp", JSON_BUILD_UNSIGNED(img->mtime)),
1659 JSON_BUILD_PAIR_CONDITION(img->usage != UINT64_MAX, "Usage", JSON_BUILD_UNSIGNED(img->usage)),
1660 JSON_BUILD_PAIR_CONDITION(img->usage_exclusive != UINT64_MAX, "UsageExclusive", JSON_BUILD_UNSIGNED(img->usage_exclusive)),
1661 JSON_BUILD_PAIR_CONDITION(img->limit != UINT64_MAX, "Limit", JSON_BUILD_UNSIGNED(img->limit)),
1662 JSON_BUILD_PAIR_CONDITION(img->limit_exclusive != UINT64_MAX, "LimitExclusive", JSON_BUILD_UNSIGNED(img->limit_exclusive))));
1663}
1664
cd61c3bf
LP
1665static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
1666 [IMAGE_DIRECTORY] = "directory",
1667 [IMAGE_SUBVOLUME] = "subvolume",
25cdbd04
LP
1668 [IMAGE_RAW] = "raw",
1669 [IMAGE_BLOCK] = "block",
cd61c3bf
LP
1670};
1671
1672DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);