]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/dissect-image.c
resolved: reply using unicast mDNS when appropriate
[thirdparty/systemd.git] / src / shared / dissect-image.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #if HAVE_VALGRIND_MEMCHECK_H
4 #include <valgrind/memcheck.h>
5 #endif
6
7 #include <linux/dm-ioctl.h>
8 #include <linux/loop.h>
9 #include <sys/mount.h>
10 #include <sys/prctl.h>
11 #include <sys/wait.h>
12 #include <sysexits.h>
13
14 #include "sd-device.h"
15 #include "sd-id128.h"
16
17 #include "architecture.h"
18 #include "ask-password-api.h"
19 #include "blkid-util.h"
20 #include "blockdev-util.h"
21 #include "copy.h"
22 #include "cryptsetup-util.h"
23 #include "def.h"
24 #include "device-nodes.h"
25 #include "device-util.h"
26 #include "discover-image.h"
27 #include "dissect-image.h"
28 #include "dm-util.h"
29 #include "env-file.h"
30 #include "extension-release.h"
31 #include "fd-util.h"
32 #include "fileio.h"
33 #include "fs-util.h"
34 #include "fsck-util.h"
35 #include "gpt.h"
36 #include "hexdecoct.h"
37 #include "hostname-setup.h"
38 #include "id128-util.h"
39 #include "import-util.h"
40 #include "mkdir.h"
41 #include "mount-util.h"
42 #include "mountpoint-util.h"
43 #include "namespace-util.h"
44 #include "nulstr-util.h"
45 #include "os-util.h"
46 #include "path-util.h"
47 #include "process-util.h"
48 #include "raw-clone.h"
49 #include "signal-util.h"
50 #include "stat-util.h"
51 #include "stdio-util.h"
52 #include "string-table.h"
53 #include "string-util.h"
54 #include "strv.h"
55 #include "tmpfile-util.h"
56 #include "udev-util.h"
57 #include "user-util.h"
58 #include "xattr-util.h"
59
60 /* how many times to wait for the device nodes to appear */
61 #define N_DEVICE_NODE_LIST_ATTEMPTS 10
62
63 int probe_filesystem(const char *node, char **ret_fstype) {
64 /* Try to find device content type and return it in *ret_fstype. If nothing is found,
65 * 0/NULL will be returned. -EUCLEAN will be returned for ambiguous results, and an
66 * different error otherwise. */
67
68 #if HAVE_BLKID
69 _cleanup_(blkid_free_probep) blkid_probe b = NULL;
70 const char *fstype;
71 int r;
72
73 errno = 0;
74 b = blkid_new_probe_from_filename(node);
75 if (!b)
76 return errno_or_else(ENOMEM);
77
78 blkid_probe_enable_superblocks(b, 1);
79 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
80
81 errno = 0;
82 r = blkid_do_safeprobe(b);
83 if (r == 1) {
84 log_debug("No type detected on partition %s", node);
85 goto not_found;
86 }
87 if (r == -2)
88 return log_debug_errno(SYNTHETIC_ERRNO(EUCLEAN),
89 "Results ambiguous for partition %s", node);
90 if (r != 0)
91 return errno_or_else(EIO);
92
93 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
94
95 if (fstype) {
96 char *t;
97
98 t = strdup(fstype);
99 if (!t)
100 return -ENOMEM;
101
102 *ret_fstype = t;
103 return 1;
104 }
105
106 not_found:
107 *ret_fstype = NULL;
108 return 0;
109 #else
110 return -EOPNOTSUPP;
111 #endif
112 }
113
114 #if HAVE_BLKID
115 static int enumerator_for_parent(sd_device *d, sd_device_enumerator **ret) {
116 _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
117 int r;
118
119 assert(d);
120 assert(ret);
121
122 r = sd_device_enumerator_new(&e);
123 if (r < 0)
124 return r;
125
126 r = sd_device_enumerator_allow_uninitialized(e);
127 if (r < 0)
128 return r;
129
130 r = sd_device_enumerator_add_match_parent(e, d);
131 if (r < 0)
132 return r;
133
134 *ret = TAKE_PTR(e);
135 return 0;
136 }
137
138 static int device_is_partition(sd_device *d, blkid_partition pp) {
139 blkid_loff_t bsize, bstart;
140 uint64_t size, start;
141 int partno, bpartno, r;
142 const char *ss, *v;
143
144 assert(d);
145 assert(pp);
146
147 r = sd_device_get_subsystem(d, &ss);
148 if (r < 0)
149 return r;
150 if (!streq(ss, "block"))
151 return false;
152
153 r = sd_device_get_sysattr_value(d, "partition", &v);
154 if (r == -ENOENT || /* Not a partition device */
155 ERRNO_IS_PRIVILEGE(r)) /* Not ready to access? */
156 return false;
157 if (r < 0)
158 return r;
159 r = safe_atoi(v, &partno);
160 if (r < 0)
161 return r;
162
163 errno = 0;
164 bpartno = blkid_partition_get_partno(pp);
165 if (bpartno < 0)
166 return errno_or_else(EIO);
167
168 if (partno != bpartno)
169 return false;
170
171 r = sd_device_get_sysattr_value(d, "start", &v);
172 if (r < 0)
173 return r;
174 r = safe_atou64(v, &start);
175 if (r < 0)
176 return r;
177
178 errno = 0;
179 bstart = blkid_partition_get_start(pp);
180 if (bstart < 0)
181 return errno_or_else(EIO);
182
183 if (start != (uint64_t) bstart)
184 return false;
185
186 r = sd_device_get_sysattr_value(d, "size", &v);
187 if (r < 0)
188 return r;
189 r = safe_atou64(v, &size);
190 if (r < 0)
191 return r;
192
193 errno = 0;
194 bsize = blkid_partition_get_size(pp);
195 if (bsize < 0)
196 return errno_or_else(EIO);
197
198 if (size != (uint64_t) bsize)
199 return false;
200
201 return true;
202 }
203
204 static int find_partition(
205 sd_device *parent,
206 blkid_partition pp,
207 sd_device **ret) {
208
209 _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
210 sd_device *q;
211 int r;
212
213 assert(parent);
214 assert(pp);
215 assert(ret);
216
217 r = enumerator_for_parent(parent, &e);
218 if (r < 0)
219 return r;
220
221 FOREACH_DEVICE(e, q) {
222 r = device_is_partition(q, pp);
223 if (r < 0)
224 return r;
225 if (r > 0) {
226 *ret = sd_device_ref(q);
227 return 0;
228 }
229 }
230
231 return -ENXIO;
232 }
233
234 struct wait_data {
235 sd_device *parent_device;
236 blkid_partition blkidp;
237 sd_device *found;
238 };
239
240 static inline void wait_data_done(struct wait_data *d) {
241 sd_device_unref(d->found);
242 }
243
244 static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) {
245 const char *parent1_path, *parent2_path;
246 struct wait_data *w = userdata;
247 sd_device *pp;
248 int r;
249
250 assert(w);
251
252 if (device_for_action(device, SD_DEVICE_REMOVE))
253 return 0;
254
255 r = sd_device_get_parent(device, &pp);
256 if (r < 0)
257 return 0; /* Doesn't have a parent? No relevant to us */
258
259 r = sd_device_get_syspath(pp, &parent1_path); /* Check parent of device of this action */
260 if (r < 0)
261 goto finish;
262
263 r = sd_device_get_syspath(w->parent_device, &parent2_path); /* Check parent of device we are looking for */
264 if (r < 0)
265 goto finish;
266
267 if (!path_equal(parent1_path, parent2_path))
268 return 0; /* Has a different parent than what we need, not interesting to us */
269
270 r = device_is_partition(device, w->blkidp);
271 if (r < 0)
272 goto finish;
273 if (r == 0) /* Not the one we need */
274 return 0;
275
276 /* It's the one we need! Yay! */
277 assert(!w->found);
278 w->found = sd_device_ref(device);
279 r = 0;
280
281 finish:
282 return sd_event_exit(sd_device_monitor_get_event(monitor), r);
283 }
284
285 static int wait_for_partition_device(
286 sd_device *parent,
287 blkid_partition pp,
288 usec_t deadline,
289 sd_device **ret) {
290
291 _cleanup_(sd_event_source_unrefp) sd_event_source *timeout_source = NULL;
292 _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor = NULL;
293 _cleanup_(sd_event_unrefp) sd_event *event = NULL;
294 int r;
295
296 assert(parent);
297 assert(pp);
298 assert(ret);
299
300 r = find_partition(parent, pp, ret);
301 if (r != -ENXIO)
302 return r;
303
304 r = sd_event_new(&event);
305 if (r < 0)
306 return r;
307
308 r = sd_device_monitor_new(&monitor);
309 if (r < 0)
310 return r;
311
312 r = sd_device_monitor_filter_add_match_subsystem_devtype(monitor, "block", "partition");
313 if (r < 0)
314 return r;
315
316 r = sd_device_monitor_attach_event(monitor, event);
317 if (r < 0)
318 return r;
319
320 _cleanup_(wait_data_done) struct wait_data w = {
321 .parent_device = parent,
322 .blkidp = pp,
323 };
324
325 r = sd_device_monitor_start(monitor, device_monitor_handler, &w);
326 if (r < 0)
327 return r;
328
329 /* Check again, the partition might have appeared in the meantime */
330 r = find_partition(parent, pp, ret);
331 if (r != -ENXIO)
332 return r;
333
334 if (deadline != USEC_INFINITY) {
335 r = sd_event_add_time(
336 event, &timeout_source,
337 CLOCK_MONOTONIC, deadline, 0,
338 NULL, INT_TO_PTR(-ETIMEDOUT));
339 if (r < 0)
340 return r;
341 }
342
343 r = sd_event_loop(event);
344 if (r < 0)
345 return r;
346
347 assert(w.found);
348 *ret = TAKE_PTR(w.found);
349 return 0;
350 }
351
352 static void check_partition_flags(
353 const char *node,
354 unsigned long long pflags,
355 unsigned long long supported) {
356
357 assert(node);
358
359 /* Mask away all flags supported by this partition's type and the three flags the UEFI spec defines generically */
360 pflags &= ~(supported | GPT_FLAG_REQUIRED_PARTITION | GPT_FLAG_NO_BLOCK_IO_PROTOCOL | GPT_FLAG_LEGACY_BIOS_BOOTABLE);
361
362 if (pflags == 0)
363 return;
364
365 /* If there are other bits set, then log about it, to make things discoverable */
366 for (unsigned i = 0; i < sizeof(pflags) * 8; i++) {
367 unsigned long long bit = 1ULL << i;
368 if (!FLAGS_SET(pflags, bit))
369 continue;
370
371 log_debug("Unexpected partition flag %llu set on %s!", bit, node);
372 }
373 }
374
375 static int device_wait_for_initialization_harder(
376 sd_device *device,
377 const char *subsystem,
378 usec_t deadline,
379 sd_device **ret) {
380
381 _cleanup_free_ char *uevent = NULL;
382 usec_t start, left, retrigger_timeout;
383 int r;
384
385 start = now(CLOCK_MONOTONIC);
386 left = usec_sub_unsigned(deadline, start);
387
388 if (DEBUG_LOGGING) {
389 char buf[FORMAT_TIMESPAN_MAX];
390 const char *sn = NULL;
391
392 (void) sd_device_get_sysname(device, &sn);
393 log_debug("Waiting for device '%s' to initialize for %s.", strna(sn), format_timespan(buf, sizeof(buf), left, 0));
394 }
395
396 if (left != USEC_INFINITY)
397 retrigger_timeout = CLAMP(left / 4, 1 * USEC_PER_SEC, 5 * USEC_PER_SEC); /* A fourth of the total timeout, but let's clamp to 1s…5s range */
398 else
399 retrigger_timeout = 2 * USEC_PER_SEC;
400
401 for (;;) {
402 usec_t local_deadline, n;
403 bool last_try;
404
405 n = now(CLOCK_MONOTONIC);
406 assert(n >= start);
407
408 /* Find next deadline, when we'll retrigger */
409 local_deadline = start +
410 DIV_ROUND_UP(n - start, retrigger_timeout) * retrigger_timeout;
411
412 if (deadline != USEC_INFINITY && deadline <= local_deadline) {
413 local_deadline = deadline;
414 last_try = true;
415 } else
416 last_try = false;
417
418 r = device_wait_for_initialization(device, subsystem, local_deadline, ret);
419 if (r >= 0 && DEBUG_LOGGING) {
420 char buf[FORMAT_TIMESPAN_MAX];
421 const char *sn = NULL;
422
423 (void) sd_device_get_sysname(device, &sn);
424 log_debug("Successfully waited for device '%s' to initialize for %s.", strna(sn), format_timespan(buf, sizeof(buf), usec_sub_unsigned(now(CLOCK_MONOTONIC), start), 0));
425
426 }
427 if (r != -ETIMEDOUT || last_try)
428 return r;
429
430 if (!uevent) {
431 const char *syspath;
432
433 r = sd_device_get_syspath(device, &syspath);
434 if (r < 0)
435 return r;
436
437 uevent = path_join(syspath, "uevent");
438 if (!uevent)
439 return -ENOMEM;
440 }
441
442 if (DEBUG_LOGGING) {
443 char buf[FORMAT_TIMESPAN_MAX];
444
445 log_debug("Device didn't initialize within %s, assuming lost event. Retriggering device through %s.",
446 format_timespan(buf, sizeof(buf), usec_sub_unsigned(now(CLOCK_MONOTONIC), start), 0),
447 uevent);
448 }
449
450 r = write_string_file(uevent, "change", WRITE_STRING_FILE_DISABLE_BUFFER);
451 if (r < 0)
452 return r;
453 }
454 }
455 #endif
456
457 #define DEVICE_TIMEOUT_USEC (45 * USEC_PER_SEC)
458
459 static void dissected_partition_done(DissectedPartition *p) {
460 assert(p);
461
462 free(p->fstype);
463 free(p->node);
464 free(p->label);
465 free(p->decrypted_fstype);
466 free(p->decrypted_node);
467 free(p->mount_options);
468
469 *p = (DissectedPartition) {
470 .partno = -1,
471 .architecture = -1
472 };
473 }
474
475 int dissect_image(
476 int fd,
477 const VeritySettings *verity,
478 const MountOptions *mount_options,
479 DissectImageFlags flags,
480 DissectedImage **ret) {
481
482 #if HAVE_BLKID
483 #ifdef GPT_ROOT_NATIVE
484 sd_id128_t root_uuid = SD_ID128_NULL, root_verity_uuid = SD_ID128_NULL;
485 #endif
486 #ifdef GPT_USR_NATIVE
487 sd_id128_t usr_uuid = SD_ID128_NULL, usr_verity_uuid = SD_ID128_NULL;
488 #endif
489 bool is_gpt, is_mbr, generic_rw, multiple_generic = false;
490 _cleanup_(sd_device_unrefp) sd_device *d = NULL;
491 _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
492 _cleanup_(blkid_free_probep) blkid_probe b = NULL;
493 _cleanup_free_ char *generic_node = NULL;
494 sd_id128_t generic_uuid = SD_ID128_NULL;
495 const char *pttype = NULL, *sysname = NULL;
496 blkid_partlist pl;
497 int r, generic_nr, n_partitions;
498 struct stat st;
499 usec_t deadline;
500
501 assert(fd >= 0);
502 assert(ret);
503 assert(!verity || verity->root_hash || verity->root_hash_size == 0);
504 assert(!((flags & DISSECT_IMAGE_GPT_ONLY) && (flags & DISSECT_IMAGE_NO_PARTITION_TABLE)));
505
506 /* Probes a disk image, and returns information about what it found in *ret.
507 *
508 * Returns -ENOPKG if no suitable partition table or file system could be found.
509 * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found.
510 * Returns -ENXIO if we couldn't find any partition suitable as root or /usr partition
511 * Returns -ENOTUNIQ if we only found multiple generic partitions and thus don't know what to do with that */
512
513 if (verity && verity->root_hash) {
514 sd_id128_t fsuuid, vuuid;
515
516 /* If a root hash is supplied, then we use the root partition that has a UUID that match the
517 * first 128bit of the root hash. And we use the verity partition that has a UUID that match
518 * the final 128bit. */
519
520 if (verity->root_hash_size < sizeof(sd_id128_t))
521 return -EINVAL;
522
523 memcpy(&fsuuid, verity->root_hash, sizeof(sd_id128_t));
524 memcpy(&vuuid, (const uint8_t*) verity->root_hash + verity->root_hash_size - sizeof(sd_id128_t), sizeof(sd_id128_t));
525
526 if (sd_id128_is_null(fsuuid))
527 return -EINVAL;
528 if (sd_id128_is_null(vuuid))
529 return -EINVAL;
530
531 /* If the verity data declares it's for the /usr partition, then search for that, in all
532 * other cases assume it's for the root partition. */
533 #ifdef GPT_USR_NATIVE
534 if (verity->designator == PARTITION_USR) {
535 usr_uuid = fsuuid;
536 usr_verity_uuid = vuuid;
537 } else {
538 #endif
539 #ifdef GPT_ROOT_NATIVE
540 root_uuid = fsuuid;
541 root_verity_uuid = vuuid;
542 #endif
543 #ifdef GPT_USR_NATIVE
544 }
545 #endif
546 }
547
548 if (fstat(fd, &st) < 0)
549 return -errno;
550
551 if (!S_ISBLK(st.st_mode))
552 return -ENOTBLK;
553
554 r = sd_device_new_from_stat_rdev(&d, &st);
555 if (r < 0)
556 return r;
557
558 if (!FLAGS_SET(flags, DISSECT_IMAGE_NO_UDEV)) {
559 _cleanup_(sd_device_unrefp) sd_device *initialized = NULL;
560
561 /* If udev support is enabled, then let's wait for the device to be initialized before we doing anything. */
562
563 r = device_wait_for_initialization_harder(
564 d,
565 "block",
566 usec_add(now(CLOCK_MONOTONIC), DEVICE_TIMEOUT_USEC),
567 &initialized);
568 if (r < 0)
569 return r;
570
571 sd_device_unref(d);
572 d = TAKE_PTR(initialized);
573 }
574
575 b = blkid_new_probe();
576 if (!b)
577 return -ENOMEM;
578
579 errno = 0;
580 r = blkid_probe_set_device(b, fd, 0, 0);
581 if (r != 0)
582 return errno_or_else(ENOMEM);
583
584 if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) {
585 /* Look for file system superblocks, unless we only shall look for GPT partition tables */
586 blkid_probe_enable_superblocks(b, 1);
587 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE);
588 }
589
590 blkid_probe_enable_partitions(b, 1);
591 blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
592
593 errno = 0;
594 r = blkid_do_safeprobe(b);
595 if (IN_SET(r, -2, 1))
596 return log_debug_errno(SYNTHETIC_ERRNO(ENOPKG), "Failed to identify any partition table.");
597 if (r != 0)
598 return errno_or_else(EIO);
599
600 m = new0(DissectedImage, 1);
601 if (!m)
602 return -ENOMEM;
603
604 r = sd_device_get_sysname(d, &sysname);
605 if (r < 0)
606 return log_debug_errno(r, "Failed to get device sysname: %m");
607 if (startswith(sysname, "loop")) {
608 _cleanup_free_ char *name_stripped = NULL;
609 const char *full_path;
610
611 r = sd_device_get_sysattr_value(d, "loop/backing_file", &full_path);
612 if (r < 0)
613 log_debug_errno(r, "Failed to lookup image name via loop device backing file sysattr, ignoring: %m");
614 else {
615 r = raw_strip_suffixes(basename(full_path), &name_stripped);
616 if (r < 0)
617 return r;
618 }
619
620 free_and_replace(m->image_name, name_stripped);
621 } else {
622 r = free_and_strdup(&m->image_name, sysname);
623 if (r < 0)
624 return r;
625 }
626
627 if (!image_name_is_valid(m->image_name)) {
628 log_debug("Image name %s is not valid, ignoring", strempty(m->image_name));
629 m->image_name = mfree(m->image_name);
630 }
631
632 if ((!(flags & DISSECT_IMAGE_GPT_ONLY) &&
633 (flags & DISSECT_IMAGE_GENERIC_ROOT)) ||
634 (flags & DISSECT_IMAGE_NO_PARTITION_TABLE)) {
635 const char *usage = NULL;
636
637 /* If flags permit this, also allow using non-partitioned single-filesystem images */
638
639 (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL);
640 if (STRPTR_IN_SET(usage, "filesystem", "crypto")) {
641 const char *fstype = NULL, *options = NULL, *devname = NULL;
642 _cleanup_free_ char *t = NULL, *n = NULL, *o = NULL;
643
644 /* OK, we have found a file system, that's our root partition then. */
645 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
646
647 if (fstype) {
648 t = strdup(fstype);
649 if (!t)
650 return -ENOMEM;
651 }
652
653 r = sd_device_get_devname(d, &devname);
654 if (r < 0)
655 return r;
656
657 n = strdup(devname);
658 if (!n)
659 return -ENOMEM;
660
661 m->single_file_system = true;
662 m->verity = verity && verity->root_hash && verity->data_path && (verity->designator < 0 || verity->designator == PARTITION_ROOT);
663 m->can_verity = verity && verity->data_path;
664
665 options = mount_options_from_designator(mount_options, PARTITION_ROOT);
666 if (options) {
667 o = strdup(options);
668 if (!o)
669 return -ENOMEM;
670 }
671
672 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
673 .found = true,
674 .rw = !m->verity,
675 .partno = -1,
676 .architecture = _ARCHITECTURE_INVALID,
677 .fstype = TAKE_PTR(t),
678 .node = TAKE_PTR(n),
679 .mount_options = TAKE_PTR(o),
680 };
681
682 m->encrypted = streq_ptr(fstype, "crypto_LUKS");
683
684 *ret = TAKE_PTR(m);
685 return 0;
686 }
687 }
688
689 (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
690 if (!pttype)
691 return -ENOPKG;
692
693 is_gpt = streq_ptr(pttype, "gpt");
694 is_mbr = streq_ptr(pttype, "dos");
695
696 if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr))
697 return -ENOPKG;
698
699 /* Safety check: refuse block devices that carry a partition table but for which the kernel doesn't
700 * do partition scanning. */
701 r = blockdev_partscan_enabled(fd);
702 if (r < 0)
703 return r;
704 if (r == 0)
705 return -EPROTONOSUPPORT;
706
707 errno = 0;
708 pl = blkid_probe_get_partitions(b);
709 if (!pl)
710 return errno_or_else(ENOMEM);
711
712 errno = 0;
713 n_partitions = blkid_partlist_numof_partitions(pl);
714 if (n_partitions < 0)
715 return errno_or_else(EIO);
716
717 deadline = usec_add(now(CLOCK_MONOTONIC), DEVICE_TIMEOUT_USEC);
718 for (int i = 0; i < n_partitions; i++) {
719 _cleanup_(sd_device_unrefp) sd_device *q = NULL;
720 unsigned long long pflags;
721 blkid_partition pp;
722 const char *node;
723 int nr;
724
725 errno = 0;
726 pp = blkid_partlist_get_partition(pl, i);
727 if (!pp)
728 return errno_or_else(EIO);
729
730 r = wait_for_partition_device(d, pp, deadline, &q);
731 if (r < 0)
732 return r;
733
734 r = sd_device_get_devname(q, &node);
735 if (r < 0)
736 return r;
737
738 pflags = blkid_partition_get_flags(pp);
739
740 errno = 0;
741 nr = blkid_partition_get_partno(pp);
742 if (nr < 0)
743 return errno_or_else(EIO);
744
745 if (is_gpt) {
746 PartitionDesignator designator = _PARTITION_DESIGNATOR_INVALID;
747 int architecture = _ARCHITECTURE_INVALID;
748 const char *stype, *sid, *fstype = NULL, *label;
749 sd_id128_t type_id, id;
750 bool rw = true;
751
752 sid = blkid_partition_get_uuid(pp);
753 if (!sid)
754 continue;
755 if (sd_id128_from_string(sid, &id) < 0)
756 continue;
757
758 stype = blkid_partition_get_type_string(pp);
759 if (!stype)
760 continue;
761 if (sd_id128_from_string(stype, &type_id) < 0)
762 continue;
763
764 label = blkid_partition_get_name(pp); /* libblkid returns NULL here if empty */
765
766 if (sd_id128_equal(type_id, GPT_HOME)) {
767
768 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
769
770 if (pflags & GPT_FLAG_NO_AUTO)
771 continue;
772
773 designator = PARTITION_HOME;
774 rw = !(pflags & GPT_FLAG_READ_ONLY);
775
776 } else if (sd_id128_equal(type_id, GPT_SRV)) {
777
778 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
779
780 if (pflags & GPT_FLAG_NO_AUTO)
781 continue;
782
783 designator = PARTITION_SRV;
784 rw = !(pflags & GPT_FLAG_READ_ONLY);
785
786 } else if (sd_id128_equal(type_id, GPT_ESP)) {
787
788 /* Note that we don't check the GPT_FLAG_NO_AUTO flag for the ESP, as it is
789 * not defined there. We instead check the GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as
790 * recommended by the UEFI spec (See "12.3.3 Number and Location of System
791 * Partitions"). */
792
793 if (pflags & GPT_FLAG_NO_BLOCK_IO_PROTOCOL)
794 continue;
795
796 designator = PARTITION_ESP;
797 fstype = "vfat";
798
799 } else if (sd_id128_equal(type_id, GPT_XBOOTLDR)) {
800
801 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
802
803 if (pflags & GPT_FLAG_NO_AUTO)
804 continue;
805
806 designator = PARTITION_XBOOTLDR;
807 rw = !(pflags & GPT_FLAG_READ_ONLY);
808 }
809 #ifdef GPT_ROOT_NATIVE
810 else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
811
812 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
813
814 if (pflags & GPT_FLAG_NO_AUTO)
815 continue;
816
817 /* If a root ID is specified, ignore everything but the root id */
818 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
819 continue;
820
821 designator = PARTITION_ROOT;
822 architecture = native_architecture();
823 rw = !(pflags & GPT_FLAG_READ_ONLY);
824
825 } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) {
826
827 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
828
829 if (pflags & GPT_FLAG_NO_AUTO)
830 continue;
831
832 m->can_verity = true;
833
834 /* Ignore verity unless a root hash is specified */
835 if (sd_id128_is_null(root_verity_uuid) || !sd_id128_equal(root_verity_uuid, id))
836 continue;
837
838 designator = PARTITION_ROOT_VERITY;
839 fstype = "DM_verity_hash";
840 architecture = native_architecture();
841 rw = false;
842 }
843 #endif
844 #ifdef GPT_ROOT_SECONDARY
845 else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
846
847 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
848
849 if (pflags & GPT_FLAG_NO_AUTO)
850 continue;
851
852 /* If a root ID is specified, ignore everything but the root id */
853 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
854 continue;
855
856 designator = PARTITION_ROOT_SECONDARY;
857 architecture = SECONDARY_ARCHITECTURE;
858 rw = !(pflags & GPT_FLAG_READ_ONLY);
859
860 } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) {
861
862 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
863
864 if (pflags & GPT_FLAG_NO_AUTO)
865 continue;
866
867 m->can_verity = true;
868
869 /* Ignore verity unless root has is specified */
870 if (sd_id128_is_null(root_verity_uuid) || !sd_id128_equal(root_verity_uuid, id))
871 continue;
872
873 designator = PARTITION_ROOT_SECONDARY_VERITY;
874 fstype = "DM_verity_hash";
875 architecture = SECONDARY_ARCHITECTURE;
876 rw = false;
877 }
878 #endif
879 #ifdef GPT_USR_NATIVE
880 else if (sd_id128_equal(type_id, GPT_USR_NATIVE)) {
881
882 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
883
884 if (pflags & GPT_FLAG_NO_AUTO)
885 continue;
886
887 /* If a usr ID is specified, ignore everything but the usr id */
888 if (!sd_id128_is_null(usr_uuid) && !sd_id128_equal(usr_uuid, id))
889 continue;
890
891 designator = PARTITION_USR;
892 architecture = native_architecture();
893 rw = !(pflags & GPT_FLAG_READ_ONLY);
894
895 } else if (sd_id128_equal(type_id, GPT_USR_NATIVE_VERITY)) {
896
897 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
898
899 if (pflags & GPT_FLAG_NO_AUTO)
900 continue;
901
902 m->can_verity = true;
903
904 /* Ignore verity unless a usr hash is specified */
905 if (sd_id128_is_null(usr_verity_uuid) || !sd_id128_equal(usr_verity_uuid, id))
906 continue;
907
908 designator = PARTITION_USR_VERITY;
909 fstype = "DM_verity_hash";
910 architecture = native_architecture();
911 rw = false;
912 }
913 #endif
914 #ifdef GPT_USR_SECONDARY
915 else if (sd_id128_equal(type_id, GPT_USR_SECONDARY)) {
916
917 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
918
919 if (pflags & GPT_FLAG_NO_AUTO)
920 continue;
921
922 /* If a usr ID is specified, ignore everything but the usr id */
923 if (!sd_id128_is_null(usr_uuid) && !sd_id128_equal(usr_uuid, id))
924 continue;
925
926 designator = PARTITION_USR_SECONDARY;
927 architecture = SECONDARY_ARCHITECTURE;
928 rw = !(pflags & GPT_FLAG_READ_ONLY);
929
930 } else if (sd_id128_equal(type_id, GPT_USR_SECONDARY_VERITY)) {
931
932 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
933
934 if (pflags & GPT_FLAG_NO_AUTO)
935 continue;
936
937 m->can_verity = true;
938
939 /* Ignore verity unless usr has is specified */
940 if (sd_id128_is_null(usr_verity_uuid) || !sd_id128_equal(usr_verity_uuid, id))
941 continue;
942
943 designator = PARTITION_USR_SECONDARY_VERITY;
944 fstype = "DM_verity_hash";
945 architecture = SECONDARY_ARCHITECTURE;
946 rw = false;
947 }
948 #endif
949 else if (sd_id128_equal(type_id, GPT_SWAP)) {
950
951 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO);
952
953 if (pflags & GPT_FLAG_NO_AUTO)
954 continue;
955
956 designator = PARTITION_SWAP;
957 fstype = "swap";
958
959 } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) {
960
961 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
962
963 if (pflags & GPT_FLAG_NO_AUTO)
964 continue;
965
966 if (generic_node)
967 multiple_generic = true;
968 else {
969 generic_nr = nr;
970 generic_rw = !(pflags & GPT_FLAG_READ_ONLY);
971 generic_uuid = id;
972 generic_node = strdup(node);
973 if (!generic_node)
974 return -ENOMEM;
975 }
976
977 } else if (sd_id128_equal(type_id, GPT_TMP)) {
978
979 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
980
981 if (pflags & GPT_FLAG_NO_AUTO)
982 continue;
983
984 designator = PARTITION_TMP;
985 rw = !(pflags & GPT_FLAG_READ_ONLY);
986
987 } else if (sd_id128_equal(type_id, GPT_VAR)) {
988
989 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
990
991 if (pflags & GPT_FLAG_NO_AUTO)
992 continue;
993
994 if (!FLAGS_SET(flags, DISSECT_IMAGE_RELAX_VAR_CHECK)) {
995 sd_id128_t var_uuid;
996
997 /* For /var we insist that the uuid of the partition matches the
998 * HMAC-SHA256 of the /var GPT partition type uuid, keyed by machine
999 * ID. Why? Unlike the other partitions /var is inherently
1000 * installation specific, hence we need to be careful not to mount it
1001 * in the wrong installation. By hashing the partition UUID from
1002 * /etc/machine-id we can securely bind the partition to the
1003 * installation. */
1004
1005 r = sd_id128_get_machine_app_specific(GPT_VAR, &var_uuid);
1006 if (r < 0)
1007 return r;
1008
1009 if (!sd_id128_equal(var_uuid, id)) {
1010 log_debug("Found a /var/ partition, but its UUID didn't match our expectations, ignoring.");
1011 continue;
1012 }
1013 }
1014
1015 designator = PARTITION_VAR;
1016 rw = !(pflags & GPT_FLAG_READ_ONLY);
1017 }
1018
1019 if (designator != _PARTITION_DESIGNATOR_INVALID) {
1020 _cleanup_free_ char *t = NULL, *n = NULL, *o = NULL, *l = NULL;
1021 const char *options = NULL;
1022
1023 if (m->partitions[designator].found) {
1024 /* For most partition types the first one we see wins. Except for the
1025 * rootfs and /usr, where we do a version compare of the label, and
1026 * let the newest version win. This permits a simple A/B versioning
1027 * scheme in OS images. */
1028
1029 if (!PARTITION_DESIGNATOR_VERSIONED(designator) ||
1030 strverscmp_improved(m->partitions[designator].label, label) >= 0)
1031 continue;
1032
1033 dissected_partition_done(m->partitions + designator);
1034 }
1035
1036 if (fstype) {
1037 t = strdup(fstype);
1038 if (!t)
1039 return -ENOMEM;
1040 }
1041
1042 n = strdup(node);
1043 if (!n)
1044 return -ENOMEM;
1045
1046 if (label) {
1047 l = strdup(label);
1048 if (!l)
1049 return -ENOMEM;
1050 }
1051
1052 options = mount_options_from_designator(mount_options, designator);
1053 if (options) {
1054 o = strdup(options);
1055 if (!o)
1056 return -ENOMEM;
1057 }
1058
1059 m->partitions[designator] = (DissectedPartition) {
1060 .found = true,
1061 .partno = nr,
1062 .rw = rw,
1063 .architecture = architecture,
1064 .node = TAKE_PTR(n),
1065 .fstype = TAKE_PTR(t),
1066 .label = TAKE_PTR(l),
1067 .uuid = id,
1068 .mount_options = TAKE_PTR(o),
1069 };
1070 }
1071
1072 } else if (is_mbr) {
1073
1074 switch (blkid_partition_get_type(pp)) {
1075
1076 case 0x83: /* Linux partition */
1077
1078 if (pflags != 0x80) /* Bootable flag */
1079 continue;
1080
1081 if (generic_node)
1082 multiple_generic = true;
1083 else {
1084 generic_nr = nr;
1085 generic_rw = true;
1086 generic_node = strdup(node);
1087 if (!generic_node)
1088 return -ENOMEM;
1089 }
1090
1091 break;
1092
1093 case 0xEA: { /* Boot Loader Spec extended $BOOT partition */
1094 _cleanup_free_ char *n = NULL, *o = NULL;
1095 sd_id128_t id = SD_ID128_NULL;
1096 const char *sid, *options = NULL;
1097
1098 /* First one wins */
1099 if (m->partitions[PARTITION_XBOOTLDR].found)
1100 continue;
1101
1102 sid = blkid_partition_get_uuid(pp);
1103 if (sid)
1104 (void) sd_id128_from_string(sid, &id);
1105
1106 n = strdup(node);
1107 if (!n)
1108 return -ENOMEM;
1109
1110 options = mount_options_from_designator(mount_options, PARTITION_XBOOTLDR);
1111 if (options) {
1112 o = strdup(options);
1113 if (!o)
1114 return -ENOMEM;
1115 }
1116
1117 m->partitions[PARTITION_XBOOTLDR] = (DissectedPartition) {
1118 .found = true,
1119 .partno = nr,
1120 .rw = true,
1121 .architecture = _ARCHITECTURE_INVALID,
1122 .node = TAKE_PTR(n),
1123 .uuid = id,
1124 .mount_options = TAKE_PTR(o),
1125 };
1126
1127 break;
1128 }}
1129 }
1130 }
1131
1132 if (m->partitions[PARTITION_ROOT].found) {
1133 /* If we found the primary arch, then invalidate the secondary arch to avoid any ambiguities,
1134 * since we never want to mount the secondary arch in this case. */
1135 m->partitions[PARTITION_ROOT_SECONDARY].found = false;
1136 m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found = false;
1137 m->partitions[PARTITION_USR_SECONDARY].found = false;
1138 m->partitions[PARTITION_USR_SECONDARY_VERITY].found = false;
1139
1140 } else if (m->partitions[PARTITION_ROOT_VERITY].found)
1141 return -EADDRNOTAVAIL; /* Verity found but no matching rootfs? Something is off, refuse. */
1142
1143 else if (m->partitions[PARTITION_ROOT_SECONDARY].found) {
1144
1145 /* No root partition found but there's one for the secondary architecture? Then upgrade
1146 * secondary arch to first */
1147
1148 m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY];
1149 zero(m->partitions[PARTITION_ROOT_SECONDARY]);
1150 m->partitions[PARTITION_ROOT_VERITY] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY];
1151 zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY]);
1152
1153 m->partitions[PARTITION_USR] = m->partitions[PARTITION_USR_SECONDARY];
1154 zero(m->partitions[PARTITION_USR_SECONDARY]);
1155 m->partitions[PARTITION_USR_VERITY] = m->partitions[PARTITION_USR_SECONDARY_VERITY];
1156 zero(m->partitions[PARTITION_USR_SECONDARY_VERITY]);
1157
1158 } else if (m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found)
1159 return -EADDRNOTAVAIL; /* as above */
1160
1161 else if (m->partitions[PARTITION_USR].found) {
1162
1163 /* Invalidate secondary arch /usr/ if we found the primary arch */
1164 m->partitions[PARTITION_USR_SECONDARY].found = false;
1165 m->partitions[PARTITION_USR_SECONDARY_VERITY].found = false;
1166
1167 } else if (m->partitions[PARTITION_USR_VERITY].found)
1168 return -EADDRNOTAVAIL; /* as above */
1169
1170 else if (m->partitions[PARTITION_USR_SECONDARY].found) {
1171
1172 /* Upgrade secondary arch to primary */
1173 m->partitions[PARTITION_USR] = m->partitions[PARTITION_USR_SECONDARY];
1174 zero(m->partitions[PARTITION_USR_SECONDARY]);
1175 m->partitions[PARTITION_USR_VERITY] = m->partitions[PARTITION_USR_SECONDARY_VERITY];
1176 zero(m->partitions[PARTITION_USR_SECONDARY_VERITY]);
1177
1178 } else if (m->partitions[PARTITION_USR_SECONDARY_VERITY].found)
1179 return -EADDRNOTAVAIL; /* as above */
1180
1181 else if ((flags & DISSECT_IMAGE_GENERIC_ROOT) &&
1182 (!verity || !verity->root_hash)) {
1183
1184 /* OK, we found nothing usable, then check if there's a single generic one distro, and use
1185 * that. If the root hash was set however, then we won't fall back to a generic node, because
1186 * the root hash decides. */
1187
1188 /* If we didn't find a properly marked root partition, but we did find a single suitable
1189 * generic Linux partition, then use this as root partition, if the caller asked for it. */
1190 if (multiple_generic)
1191 return -ENOTUNIQ;
1192
1193 /* If we didn't find a generic node, then we can't fix this up either */
1194 if (generic_node) {
1195 _cleanup_free_ char *o = NULL;
1196 const char *options;
1197
1198 options = mount_options_from_designator(mount_options, PARTITION_ROOT);
1199 if (options) {
1200 o = strdup(options);
1201 if (!o)
1202 return -ENOMEM;
1203 }
1204
1205 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
1206 .found = true,
1207 .rw = generic_rw,
1208 .partno = generic_nr,
1209 .architecture = _ARCHITECTURE_INVALID,
1210 .node = TAKE_PTR(generic_node),
1211 .uuid = generic_uuid,
1212 .mount_options = TAKE_PTR(o),
1213 };
1214 }
1215 }
1216
1217 /* Check if we have a root fs if we are told to do check. /usr alone is fine too, but only if appropriate flag for that is set too */
1218 if (FLAGS_SET(flags, DISSECT_IMAGE_REQUIRE_ROOT) &&
1219 !(m->partitions[PARTITION_ROOT].found || (m->partitions[PARTITION_USR].found && FLAGS_SET(flags, DISSECT_IMAGE_USR_NO_ROOT))))
1220 return -ENXIO;
1221
1222 /* Refuse if we found a verity partition for /usr but no matching file system partition */
1223 if (!m->partitions[PARTITION_USR].found && m->partitions[PARTITION_USR_VERITY].found)
1224 return -EADDRNOTAVAIL;
1225
1226 /* Combinations of verity /usr with verity-less root is OK, but the reverse is not */
1227 if (m->partitions[PARTITION_ROOT_VERITY].found && m->partitions[PARTITION_USR].found && !m->partitions[PARTITION_USR_VERITY].found)
1228 return -EADDRNOTAVAIL;
1229
1230 if (verity && verity->root_hash) {
1231 if (verity->designator < 0 || verity->designator == PARTITION_ROOT) {
1232 if (!m->partitions[PARTITION_ROOT_VERITY].found || !m->partitions[PARTITION_ROOT].found)
1233 return -EADDRNOTAVAIL;
1234
1235 /* If we found a verity setup, then the root partition is necessarily read-only. */
1236 m->partitions[PARTITION_ROOT].rw = false;
1237 m->verity = true;
1238 }
1239
1240 if (verity->designator == PARTITION_USR) {
1241 if (!m->partitions[PARTITION_USR_VERITY].found || !m->partitions[PARTITION_USR].found)
1242 return -EADDRNOTAVAIL;
1243
1244 m->partitions[PARTITION_USR].rw = false;
1245 m->verity = true;
1246 }
1247 }
1248
1249 blkid_free_probe(b);
1250 b = NULL;
1251
1252 /* Fill in file system types if we don't know them yet. */
1253 for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
1254 DissectedPartition *p = m->partitions + i;
1255
1256 if (!p->found)
1257 continue;
1258
1259 if (!p->fstype && p->node) {
1260 r = probe_filesystem(p->node, &p->fstype);
1261 if (r < 0 && r != -EUCLEAN)
1262 return r;
1263 }
1264
1265 if (streq_ptr(p->fstype, "crypto_LUKS"))
1266 m->encrypted = true;
1267
1268 if (p->fstype && fstype_is_ro(p->fstype))
1269 p->rw = false;
1270 }
1271
1272 *ret = TAKE_PTR(m);
1273 return 0;
1274 #else
1275 return -EOPNOTSUPP;
1276 #endif
1277 }
1278
1279 DissectedImage* dissected_image_unref(DissectedImage *m) {
1280 if (!m)
1281 return NULL;
1282
1283 for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++)
1284 dissected_partition_done(m->partitions + i);
1285
1286 free(m->image_name);
1287 free(m->hostname);
1288 strv_free(m->machine_info);
1289 strv_free(m->os_release);
1290 strv_free(m->extension_release);
1291
1292 return mfree(m);
1293 }
1294
1295 static int is_loop_device(const char *path) {
1296 char s[SYS_BLOCK_PATH_MAX("/../loop/")];
1297 struct stat st;
1298
1299 assert(path);
1300
1301 if (stat(path, &st) < 0)
1302 return -errno;
1303
1304 if (!S_ISBLK(st.st_mode))
1305 return -ENOTBLK;
1306
1307 xsprintf_sys_block_path(s, "/loop/", st.st_dev);
1308 if (access(s, F_OK) < 0) {
1309 if (errno != ENOENT)
1310 return -errno;
1311
1312 /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */
1313 xsprintf_sys_block_path(s, "/../loop/", st.st_dev);
1314 if (access(s, F_OK) < 0)
1315 return errno == ENOENT ? false : -errno;
1316 }
1317
1318 return true;
1319 }
1320
1321 static int run_fsck(const char *node, const char *fstype) {
1322 int r, exit_status;
1323 pid_t pid;
1324
1325 assert(node);
1326 assert(fstype);
1327
1328 r = fsck_exists(fstype);
1329 if (r < 0) {
1330 log_debug_errno(r, "Couldn't determine whether fsck for %s exists, proceeding anyway.", fstype);
1331 return 0;
1332 }
1333 if (r == 0) {
1334 log_debug("Not checking partition %s, as fsck for %s does not exist.", node, fstype);
1335 return 0;
1336 }
1337
1338 r = safe_fork("(fsck)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_NULL_STDIO, &pid);
1339 if (r < 0)
1340 return log_debug_errno(r, "Failed to fork off fsck: %m");
1341 if (r == 0) {
1342 /* Child */
1343 execl("/sbin/fsck", "/sbin/fsck", "-aT", node, NULL);
1344 log_open();
1345 log_debug_errno(errno, "Failed to execl() fsck: %m");
1346 _exit(FSCK_OPERATIONAL_ERROR);
1347 }
1348
1349 exit_status = wait_for_terminate_and_check("fsck", pid, 0);
1350 if (exit_status < 0)
1351 return log_debug_errno(exit_status, "Failed to fork off /sbin/fsck: %m");
1352
1353 if ((exit_status & ~FSCK_ERROR_CORRECTED) != FSCK_SUCCESS) {
1354 log_debug("fsck failed with exit status %i.", exit_status);
1355
1356 if ((exit_status & (FSCK_SYSTEM_SHOULD_REBOOT|FSCK_ERRORS_LEFT_UNCORRECTED)) != 0)
1357 return log_debug_errno(SYNTHETIC_ERRNO(EUCLEAN), "File system is corrupted, refusing.");
1358
1359 log_debug("Ignoring fsck error.");
1360 }
1361
1362 return 0;
1363 }
1364
1365 static int mount_partition(
1366 DissectedPartition *m,
1367 const char *where,
1368 const char *directory,
1369 uid_t uid_shift,
1370 DissectImageFlags flags) {
1371
1372 _cleanup_free_ char *chased = NULL, *options = NULL;
1373 const char *p, *node, *fstype;
1374 bool rw;
1375 int r;
1376
1377 assert(m);
1378 assert(where);
1379
1380 /* Use decrypted node and matching fstype if available, otherwise use the original device */
1381 node = m->decrypted_node ?: m->node;
1382 fstype = m->decrypted_node ? m->decrypted_fstype: m->fstype;
1383
1384 if (!m->found || !node)
1385 return 0;
1386 if (!fstype)
1387 return -EAFNOSUPPORT;
1388
1389 /* We are looking at an encrypted partition? This either means stacked encryption, or the caller didn't call dissected_image_decrypt() beforehand. Let's return a recognizable error for this case. */
1390 if (streq(fstype, "crypto_LUKS"))
1391 return -EUNATCH;
1392
1393 rw = m->rw && !(flags & DISSECT_IMAGE_READ_ONLY);
1394
1395 if (FLAGS_SET(flags, DISSECT_IMAGE_FSCK) && rw) {
1396 r = run_fsck(node, fstype);
1397 if (r < 0)
1398 return r;
1399 }
1400
1401 if (directory) {
1402 /* Automatically create missing mount points inside the image, if necessary. */
1403 r = mkdir_p_root(where, directory, uid_shift, (gid_t) uid_shift, 0755);
1404 if (r < 0 && r != -EROFS)
1405 return r;
1406
1407 r = chase_symlinks(directory, where, CHASE_PREFIX_ROOT, &chased, NULL);
1408 if (r < 0)
1409 return r;
1410
1411 p = chased;
1412 } else {
1413 /* Create top-level mount if missing – but only if this is asked for. This won't modify the
1414 * image (as the branch above does) but the host hierarchy, and the created directory might
1415 * survive our mount in the host hierarchy hence. */
1416 if (FLAGS_SET(flags, DISSECT_IMAGE_MKDIR)) {
1417 r = mkdir_p(where, 0755);
1418 if (r < 0)
1419 return r;
1420 }
1421
1422 p = where;
1423 }
1424
1425 /* If requested, turn on discard support. */
1426 if (fstype_can_discard(fstype) &&
1427 ((flags & DISSECT_IMAGE_DISCARD) ||
1428 ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node) > 0))) {
1429 options = strdup("discard");
1430 if (!options)
1431 return -ENOMEM;
1432 }
1433
1434 if (uid_is_valid(uid_shift) && uid_shift != 0 && fstype_can_uid_gid(fstype)) {
1435 _cleanup_free_ char *uid_option = NULL;
1436
1437 if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
1438 return -ENOMEM;
1439
1440 if (!strextend_with_separator(&options, ",", uid_option))
1441 return -ENOMEM;
1442 }
1443
1444 if (!isempty(m->mount_options))
1445 if (!strextend_with_separator(&options, ",", m->mount_options))
1446 return -ENOMEM;
1447
1448 r = mount_nofollow_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
1449 if (r < 0)
1450 return r;
1451
1452 return 1;
1453 }
1454
1455 static int mount_root_tmpfs(const char *where, uid_t uid_shift, DissectImageFlags flags) {
1456 _cleanup_free_ char *options = NULL;
1457 int r;
1458
1459 assert(where);
1460
1461 /* For images that contain /usr/ but no rootfs, let's mount rootfs as tmpfs */
1462
1463 if (FLAGS_SET(flags, DISSECT_IMAGE_MKDIR)) {
1464 r = mkdir_p(where, 0755);
1465 if (r < 0)
1466 return r;
1467 }
1468
1469 if (uid_is_valid(uid_shift)) {
1470 if (asprintf(&options, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
1471 return -ENOMEM;
1472 }
1473
1474 r = mount_nofollow_verbose(LOG_DEBUG, "rootfs", where, "tmpfs", MS_NODEV, options);
1475 if (r < 0)
1476 return r;
1477
1478 return 1;
1479 }
1480
1481 int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) {
1482 int r, xbootldr_mounted;
1483
1484 assert(m);
1485 assert(where);
1486
1487 /* Returns:
1488 *
1489 * -ENXIO → No root partition found
1490 * -EMEDIUMTYPE → DISSECT_IMAGE_VALIDATE_OS set but no os-release/extension-release file found
1491 * -EUNATCH → Encrypted partition found for which no dm-crypt was set up yet
1492 * -EUCLEAN → fsck for file system failed
1493 * -EBUSY → File system already mounted/used elsewhere (kernel)
1494 * -EAFNOSUPPORT → File system type not supported or not known
1495 */
1496
1497 if (!(m->partitions[PARTITION_ROOT].found ||
1498 (m->partitions[PARTITION_USR].found && FLAGS_SET(flags, DISSECT_IMAGE_USR_NO_ROOT))))
1499 return -ENXIO; /* Require a root fs or at least a /usr/ fs (the latter is subject to a flag of its own) */
1500
1501 if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) {
1502
1503 /* First mount the root fs. If there's none we use a tmpfs. */
1504 if (m->partitions[PARTITION_ROOT].found)
1505 r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, flags);
1506 else
1507 r = mount_root_tmpfs(where, uid_shift, flags);
1508 if (r < 0)
1509 return r;
1510
1511 /* For us mounting root always means mounting /usr as well */
1512 r = mount_partition(m->partitions + PARTITION_USR, where, "/usr", uid_shift, flags);
1513 if (r < 0)
1514 return r;
1515
1516 if (flags & DISSECT_IMAGE_VALIDATE_OS) {
1517 r = path_is_os_tree(where);
1518 if (r < 0)
1519 return r;
1520 if (r == 0) {
1521 r = path_is_extension_tree(where, m->image_name);
1522 if (r < 0)
1523 return r;
1524 if (r == 0)
1525 return -EMEDIUMTYPE;
1526 }
1527 }
1528 }
1529
1530 if (flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY)
1531 return 0;
1532
1533 r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, flags);
1534 if (r < 0)
1535 return r;
1536
1537 r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, flags);
1538 if (r < 0)
1539 return r;
1540
1541 r = mount_partition(m->partitions + PARTITION_VAR, where, "/var", uid_shift, flags);
1542 if (r < 0)
1543 return r;
1544
1545 r = mount_partition(m->partitions + PARTITION_TMP, where, "/var/tmp", uid_shift, flags);
1546 if (r < 0)
1547 return r;
1548
1549 xbootldr_mounted = mount_partition(m->partitions + PARTITION_XBOOTLDR, where, "/boot", uid_shift, flags);
1550 if (xbootldr_mounted < 0)
1551 return xbootldr_mounted;
1552
1553 if (m->partitions[PARTITION_ESP].found) {
1554 int esp_done = false;
1555
1556 /* Mount the ESP to /efi if it exists. If it doesn't exist, use /boot instead, but only if it
1557 * exists and is empty, and we didn't already mount the XBOOTLDR partition into it. */
1558
1559 r = chase_symlinks("/efi", where, CHASE_PREFIX_ROOT, NULL, NULL);
1560 if (r < 0) {
1561 if (r != -ENOENT)
1562 return r;
1563
1564 /* /efi doesn't exist. Let's see if /boot is suitable then */
1565
1566 if (!xbootldr_mounted) {
1567 _cleanup_free_ char *p = NULL;
1568
1569 r = chase_symlinks("/boot", where, CHASE_PREFIX_ROOT, &p, NULL);
1570 if (r < 0) {
1571 if (r != -ENOENT)
1572 return r;
1573 } else if (dir_is_empty(p) > 0) {
1574 /* It exists and is an empty directory. Let's mount the ESP there. */
1575 r = mount_partition(m->partitions + PARTITION_ESP, where, "/boot", uid_shift, flags);
1576 if (r < 0)
1577 return r;
1578
1579 esp_done = true;
1580 }
1581 }
1582 }
1583
1584 if (!esp_done) {
1585 /* OK, let's mount the ESP now to /efi (possibly creating the dir if missing) */
1586
1587 r = mount_partition(m->partitions + PARTITION_ESP, where, "/efi", uid_shift, flags);
1588 if (r < 0)
1589 return r;
1590 }
1591 }
1592
1593 return 0;
1594 }
1595
1596 int dissected_image_mount_and_warn(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) {
1597 int r;
1598
1599 assert(m);
1600 assert(where);
1601
1602 r = dissected_image_mount(m, where, uid_shift, flags);
1603 if (r == -ENXIO)
1604 return log_error_errno(r, "Not root file system found in image.");
1605 if (r == -EMEDIUMTYPE)
1606 return log_error_errno(r, "No suitable os-release/extension-release file in image found.");
1607 if (r == -EUNATCH)
1608 return log_error_errno(r, "Encrypted file system discovered, but decryption not requested.");
1609 if (r == -EUCLEAN)
1610 return log_error_errno(r, "File system check on image failed.");
1611 if (r == -EBUSY)
1612 return log_error_errno(r, "File system already mounted elsewhere.");
1613 if (r == -EAFNOSUPPORT)
1614 return log_error_errno(r, "File system type not supported or not known.");
1615 if (r < 0)
1616 return log_error_errno(r, "Failed to mount image: %m");
1617
1618 return r;
1619 }
1620
1621 #if HAVE_LIBCRYPTSETUP
1622 typedef struct DecryptedPartition {
1623 struct crypt_device *device;
1624 char *name;
1625 bool relinquished;
1626 } DecryptedPartition;
1627
1628 struct DecryptedImage {
1629 DecryptedPartition *decrypted;
1630 size_t n_decrypted;
1631 size_t n_allocated;
1632 };
1633 #endif
1634
1635 DecryptedImage* decrypted_image_unref(DecryptedImage* d) {
1636 #if HAVE_LIBCRYPTSETUP
1637 int r;
1638
1639 if (!d)
1640 return NULL;
1641
1642 for (size_t i = 0; i < d->n_decrypted; i++) {
1643 DecryptedPartition *p = d->decrypted + i;
1644
1645 if (p->device && p->name && !p->relinquished) {
1646 r = sym_crypt_deactivate_by_name(p->device, p->name, 0);
1647 if (r < 0)
1648 log_debug_errno(r, "Failed to deactivate encrypted partition %s", p->name);
1649 }
1650
1651 if (p->device)
1652 sym_crypt_free(p->device);
1653 free(p->name);
1654 }
1655
1656 free(d->decrypted);
1657 free(d);
1658 #endif
1659 return NULL;
1660 }
1661
1662 #if HAVE_LIBCRYPTSETUP
1663
1664 static int make_dm_name_and_node(const void *original_node, const char *suffix, char **ret_name, char **ret_node) {
1665 _cleanup_free_ char *name = NULL, *node = NULL;
1666 const char *base;
1667
1668 assert(original_node);
1669 assert(suffix);
1670 assert(ret_name);
1671 assert(ret_node);
1672
1673 base = strrchr(original_node, '/');
1674 if (!base)
1675 base = original_node;
1676 else
1677 base++;
1678 if (isempty(base))
1679 return -EINVAL;
1680
1681 name = strjoin(base, suffix);
1682 if (!name)
1683 return -ENOMEM;
1684 if (!filename_is_valid(name))
1685 return -EINVAL;
1686
1687 node = path_join(sym_crypt_get_dir(), name);
1688 if (!node)
1689 return -ENOMEM;
1690
1691 *ret_name = TAKE_PTR(name);
1692 *ret_node = TAKE_PTR(node);
1693
1694 return 0;
1695 }
1696
1697 static int decrypt_partition(
1698 DissectedPartition *m,
1699 const char *passphrase,
1700 DissectImageFlags flags,
1701 DecryptedImage *d) {
1702
1703 _cleanup_free_ char *node = NULL, *name = NULL;
1704 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
1705 int r;
1706
1707 assert(m);
1708 assert(d);
1709
1710 if (!m->found || !m->node || !m->fstype)
1711 return 0;
1712
1713 if (!streq(m->fstype, "crypto_LUKS"))
1714 return 0;
1715
1716 if (!passphrase)
1717 return -ENOKEY;
1718
1719 r = dlopen_cryptsetup();
1720 if (r < 0)
1721 return r;
1722
1723 r = make_dm_name_and_node(m->node, "-decrypted", &name, &node);
1724 if (r < 0)
1725 return r;
1726
1727 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
1728 return -ENOMEM;
1729
1730 r = sym_crypt_init(&cd, m->node);
1731 if (r < 0)
1732 return log_debug_errno(r, "Failed to initialize dm-crypt: %m");
1733
1734 cryptsetup_enable_logging(cd);
1735
1736 r = sym_crypt_load(cd, CRYPT_LUKS, NULL);
1737 if (r < 0)
1738 return log_debug_errno(r, "Failed to load LUKS metadata: %m");
1739
1740 r = sym_crypt_activate_by_passphrase(cd, name, CRYPT_ANY_SLOT, passphrase, strlen(passphrase),
1741 ((flags & DISSECT_IMAGE_READ_ONLY) ? CRYPT_ACTIVATE_READONLY : 0) |
1742 ((flags & DISSECT_IMAGE_DISCARD_ON_CRYPTO) ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0));
1743 if (r < 0) {
1744 log_debug_errno(r, "Failed to activate LUKS device: %m");
1745 return r == -EPERM ? -EKEYREJECTED : r;
1746 }
1747
1748 d->decrypted[d->n_decrypted++] = (DecryptedPartition) {
1749 .name = TAKE_PTR(name),
1750 .device = TAKE_PTR(cd),
1751 };
1752
1753 m->decrypted_node = TAKE_PTR(node);
1754
1755 return 0;
1756 }
1757
1758 static int verity_can_reuse(
1759 const VeritySettings *verity,
1760 const char *name,
1761 struct crypt_device **ret_cd) {
1762
1763 /* If the same volume was already open, check that the root hashes match, and reuse it if they do */
1764 _cleanup_free_ char *root_hash_existing = NULL;
1765 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
1766 struct crypt_params_verity crypt_params = {};
1767 size_t root_hash_existing_size;
1768 int r;
1769
1770 assert(verity);
1771 assert(name);
1772 assert(ret_cd);
1773
1774 r = sym_crypt_init_by_name(&cd, name);
1775 if (r < 0)
1776 return log_debug_errno(r, "Error opening verity device, crypt_init_by_name failed: %m");
1777
1778 r = sym_crypt_get_verity_info(cd, &crypt_params);
1779 if (r < 0)
1780 return log_debug_errno(r, "Error opening verity device, crypt_get_verity_info failed: %m");
1781
1782 root_hash_existing_size = verity->root_hash_size;
1783 root_hash_existing = malloc0(root_hash_existing_size);
1784 if (!root_hash_existing)
1785 return -ENOMEM;
1786
1787 r = sym_crypt_volume_key_get(cd, CRYPT_ANY_SLOT, root_hash_existing, &root_hash_existing_size, NULL, 0);
1788 if (r < 0)
1789 return log_debug_errno(r, "Error opening verity device, crypt_volume_key_get failed: %m");
1790 if (verity->root_hash_size != root_hash_existing_size ||
1791 memcmp(root_hash_existing, verity->root_hash, verity->root_hash_size) != 0)
1792 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Error opening verity device, it already exists but root hashes are different.");
1793
1794 #if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY
1795 /* Ensure that, if signatures are supported, we only reuse the device if the previous mount used the
1796 * same settings, so that a previous unsigned mount will not be reused if the user asks to use
1797 * signing for the new one, and vice versa. */
1798 if (!!verity->root_hash_sig != !!(crypt_params.flags & CRYPT_VERITY_ROOT_HASH_SIGNATURE))
1799 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Error opening verity device, it already exists but signature settings are not the same.");
1800 #endif
1801
1802 *ret_cd = TAKE_PTR(cd);
1803 return 0;
1804 }
1805
1806 static inline char* dm_deferred_remove_clean(char *name) {
1807 if (!name)
1808 return NULL;
1809
1810 (void) sym_crypt_deactivate_by_name(NULL, name, CRYPT_DEACTIVATE_DEFERRED);
1811 return mfree(name);
1812 }
1813 DEFINE_TRIVIAL_CLEANUP_FUNC(char *, dm_deferred_remove_clean);
1814
1815 static int verity_partition(
1816 PartitionDesignator designator,
1817 DissectedPartition *m,
1818 DissectedPartition *v,
1819 const VeritySettings *verity,
1820 DissectImageFlags flags,
1821 DecryptedImage *d) {
1822
1823 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
1824 _cleanup_(dm_deferred_remove_cleanp) char *restore_deferred_remove = NULL;
1825 _cleanup_free_ char *node = NULL, *name = NULL;
1826 int r;
1827
1828 assert(m);
1829 assert(v || (verity && verity->data_path));
1830
1831 if (!verity || !verity->root_hash)
1832 return 0;
1833 if (!((verity->designator < 0 && designator == PARTITION_ROOT) ||
1834 (verity->designator == designator)))
1835 return 0;
1836
1837 if (!m->found || !m->node || !m->fstype)
1838 return 0;
1839 if (!verity->data_path) {
1840 if (!v->found || !v->node || !v->fstype)
1841 return 0;
1842
1843 if (!streq(v->fstype, "DM_verity_hash"))
1844 return 0;
1845 }
1846
1847 r = dlopen_cryptsetup();
1848 if (r < 0)
1849 return r;
1850
1851 if (FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE)) {
1852 /* Use the roothash, which is unique per volume, as the device node name, so that it can be reused */
1853 _cleanup_free_ char *root_hash_encoded = NULL;
1854
1855 root_hash_encoded = hexmem(verity->root_hash, verity->root_hash_size);
1856 if (!root_hash_encoded)
1857 return -ENOMEM;
1858
1859 r = make_dm_name_and_node(root_hash_encoded, "-verity", &name, &node);
1860 } else
1861 r = make_dm_name_and_node(m->node, "-verity", &name, &node);
1862 if (r < 0)
1863 return r;
1864
1865 r = sym_crypt_init(&cd, verity->data_path ?: v->node);
1866 if (r < 0)
1867 return r;
1868
1869 cryptsetup_enable_logging(cd);
1870
1871 r = sym_crypt_load(cd, CRYPT_VERITY, NULL);
1872 if (r < 0)
1873 return r;
1874
1875 r = sym_crypt_set_data_device(cd, m->node);
1876 if (r < 0)
1877 return r;
1878
1879 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
1880 return -ENOMEM;
1881
1882 /* If activating fails because the device already exists, check the metadata and reuse it if it matches.
1883 * In case of ENODEV/ENOENT, which can happen if another process is activating at the exact same time,
1884 * retry a few times before giving up. */
1885 for (unsigned i = 0; i < N_DEVICE_NODE_LIST_ATTEMPTS; i++) {
1886 if (verity->root_hash_sig) {
1887 #if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY
1888 r = sym_crypt_activate_by_signed_key(
1889 cd,
1890 name,
1891 verity->root_hash,
1892 verity->root_hash_size,
1893 verity->root_hash_sig,
1894 verity->root_hash_sig_size,
1895 CRYPT_ACTIVATE_READONLY);
1896 #else
1897 r = log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
1898 "Activation of verity device with signature requested, but not supported by %s due to missing crypt_activate_by_signed_key().", program_invocation_short_name);
1899 #endif
1900 } else
1901 r = sym_crypt_activate_by_volume_key(
1902 cd,
1903 name,
1904 verity->root_hash,
1905 verity->root_hash_size,
1906 CRYPT_ACTIVATE_READONLY);
1907 /* libdevmapper can return EINVAL when the device is already in the activation stage.
1908 * There's no way to distinguish this situation from a genuine error due to invalid
1909 * parameters, so immediately fall back to activating the device with a unique name.
1910 * Improvements in libcrypsetup can ensure this never happens:
1911 * https://gitlab.com/cryptsetup/cryptsetup/-/merge_requests/96 */
1912 if (r == -EINVAL && FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE))
1913 return verity_partition(designator, m, v, verity, flags & ~DISSECT_IMAGE_VERITY_SHARE, d);
1914 if (!IN_SET(r,
1915 0, /* Success */
1916 -EEXIST, /* Volume is already open and ready to be used */
1917 -EBUSY, /* Volume is being opened but not ready, crypt_init_by_name can fetch details */
1918 -ENODEV /* Volume is being opened but not ready, crypt_init_by_name would fail, try to open again */))
1919 return r;
1920 if (IN_SET(r, -EEXIST, -EBUSY)) {
1921 struct crypt_device *existing_cd = NULL;
1922
1923 if (!restore_deferred_remove){
1924 /* To avoid races, disable automatic removal on umount while setting up the new device. Restore it on failure. */
1925 r = dm_deferred_remove_cancel(name);
1926 /* If activation returns EBUSY there might be no deferred removal to cancel, that's fine */
1927 if (r < 0 && r != -ENXIO)
1928 return log_debug_errno(r, "Disabling automated deferred removal for verity device %s failed: %m", node);
1929 if (r == 0) {
1930 restore_deferred_remove = strdup(name);
1931 if (!restore_deferred_remove)
1932 return -ENOMEM;
1933 }
1934 }
1935
1936 r = verity_can_reuse(verity, name, &existing_cd);
1937 /* Same as above, -EINVAL can randomly happen when it actually means -EEXIST */
1938 if (r == -EINVAL && FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE))
1939 return verity_partition(designator, m, v, verity, flags & ~DISSECT_IMAGE_VERITY_SHARE, d);
1940 if (!IN_SET(r, 0, -ENODEV, -ENOENT, -EBUSY))
1941 return log_debug_errno(r, "Checking whether existing verity device %s can be reused failed: %m", node);
1942 if (r == 0) {
1943 /* devmapper might say that the device exists, but the devlink might not yet have been
1944 * created. Check and wait for the udev event in that case. */
1945 r = device_wait_for_devlink(node, "block", usec_add(now(CLOCK_MONOTONIC), 100 * USEC_PER_MSEC), NULL);
1946 /* Fallback to activation with a unique device if it's taking too long */
1947 if (r == -ETIMEDOUT)
1948 break;
1949 if (r < 0)
1950 return r;
1951
1952 if (cd)
1953 sym_crypt_free(cd);
1954 cd = existing_cd;
1955 }
1956 }
1957 if (r == 0)
1958 break;
1959
1960 /* Device is being opened by another process, but it has not finished yet, yield for 2ms */
1961 (void) usleep(2 * USEC_PER_MSEC);
1962 }
1963
1964 /* An existing verity device was reported by libcryptsetup/libdevmapper, but we can't use it at this time.
1965 * Fall back to activating it with a unique device name. */
1966 if (r != 0 && FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE))
1967 return verity_partition(designator, m, v, verity, flags & ~DISSECT_IMAGE_VERITY_SHARE, d);
1968
1969 /* Everything looks good and we'll be able to mount the device, so deferred remove will be re-enabled at that point. */
1970 restore_deferred_remove = mfree(restore_deferred_remove);
1971
1972 d->decrypted[d->n_decrypted++] = (DecryptedPartition) {
1973 .name = TAKE_PTR(name),
1974 .device = TAKE_PTR(cd),
1975 };
1976
1977 m->decrypted_node = TAKE_PTR(node);
1978
1979 return 0;
1980 }
1981 #endif
1982
1983 int dissected_image_decrypt(
1984 DissectedImage *m,
1985 const char *passphrase,
1986 const VeritySettings *verity,
1987 DissectImageFlags flags,
1988 DecryptedImage **ret) {
1989
1990 #if HAVE_LIBCRYPTSETUP
1991 _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL;
1992 int r;
1993 #endif
1994
1995 assert(m);
1996 assert(!verity || verity->root_hash || verity->root_hash_size == 0);
1997
1998 /* Returns:
1999 *
2000 * = 0 → There was nothing to decrypt
2001 * > 0 → Decrypted successfully
2002 * -ENOKEY → There's something to decrypt but no key was supplied
2003 * -EKEYREJECTED → Passed key was not correct
2004 */
2005
2006 if (verity && verity->root_hash && verity->root_hash_size < sizeof(sd_id128_t))
2007 return -EINVAL;
2008
2009 if (!m->encrypted && !m->verity) {
2010 *ret = NULL;
2011 return 0;
2012 }
2013
2014 #if HAVE_LIBCRYPTSETUP
2015 d = new0(DecryptedImage, 1);
2016 if (!d)
2017 return -ENOMEM;
2018
2019 for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
2020 DissectedPartition *p = m->partitions + i;
2021 PartitionDesignator k;
2022
2023 if (!p->found)
2024 continue;
2025
2026 r = decrypt_partition(p, passphrase, flags, d);
2027 if (r < 0)
2028 return r;
2029
2030 k = PARTITION_VERITY_OF(i);
2031 if (k >= 0) {
2032 r = verity_partition(i, p, m->partitions + k, verity, flags | DISSECT_IMAGE_VERITY_SHARE, d);
2033 if (r < 0)
2034 return r;
2035 }
2036
2037 if (!p->decrypted_fstype && p->decrypted_node) {
2038 r = probe_filesystem(p->decrypted_node, &p->decrypted_fstype);
2039 if (r < 0 && r != -EUCLEAN)
2040 return r;
2041 }
2042 }
2043
2044 *ret = TAKE_PTR(d);
2045
2046 return 1;
2047 #else
2048 return -EOPNOTSUPP;
2049 #endif
2050 }
2051
2052 int dissected_image_decrypt_interactively(
2053 DissectedImage *m,
2054 const char *passphrase,
2055 const VeritySettings *verity,
2056 DissectImageFlags flags,
2057 DecryptedImage **ret) {
2058
2059 _cleanup_strv_free_erase_ char **z = NULL;
2060 int n = 3, r;
2061
2062 if (passphrase)
2063 n--;
2064
2065 for (;;) {
2066 r = dissected_image_decrypt(m, passphrase, verity, flags, ret);
2067 if (r >= 0)
2068 return r;
2069 if (r == -EKEYREJECTED)
2070 log_error_errno(r, "Incorrect passphrase, try again!");
2071 else if (r != -ENOKEY)
2072 return log_error_errno(r, "Failed to decrypt image: %m");
2073
2074 if (--n < 0)
2075 return log_error_errno(SYNTHETIC_ERRNO(EKEYREJECTED),
2076 "Too many retries.");
2077
2078 z = strv_free(z);
2079
2080 r = ask_password_auto("Please enter image passphrase:", NULL, "dissect", "dissect", "dissect.passphrase", USEC_INFINITY, 0, &z);
2081 if (r < 0)
2082 return log_error_errno(r, "Failed to query for passphrase: %m");
2083
2084 passphrase = z[0];
2085 }
2086 }
2087
2088 int decrypted_image_relinquish(DecryptedImage *d) {
2089 assert(d);
2090
2091 /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a
2092 * boolean so that we don't clean it up ourselves either anymore */
2093
2094 #if HAVE_LIBCRYPTSETUP
2095 int r;
2096
2097 for (size_t i = 0; i < d->n_decrypted; i++) {
2098 DecryptedPartition *p = d->decrypted + i;
2099
2100 if (p->relinquished)
2101 continue;
2102
2103 r = sym_crypt_deactivate_by_name(NULL, p->name, CRYPT_DEACTIVATE_DEFERRED);
2104 if (r < 0)
2105 return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name);
2106
2107 p->relinquished = true;
2108 }
2109 #endif
2110
2111 return 0;
2112 }
2113
2114 static char *build_auxiliary_path(const char *image, const char *suffix) {
2115 const char *e;
2116 char *n;
2117
2118 assert(image);
2119 assert(suffix);
2120
2121 e = endswith(image, ".raw");
2122 if (!e)
2123 return strjoin(e, suffix);
2124
2125 n = new(char, e - image + strlen(suffix) + 1);
2126 if (!n)
2127 return NULL;
2128
2129 strcpy(mempcpy(n, image, e - image), suffix);
2130 return n;
2131 }
2132
2133 void verity_settings_done(VeritySettings *v) {
2134 assert(v);
2135
2136 v->root_hash = mfree(v->root_hash);
2137 v->root_hash_size = 0;
2138
2139 v->root_hash_sig = mfree(v->root_hash_sig);
2140 v->root_hash_sig_size = 0;
2141
2142 v->data_path = mfree(v->data_path);
2143 }
2144
2145 int verity_settings_load(
2146 VeritySettings *verity,
2147 const char *image,
2148 const char *root_hash_path,
2149 const char *root_hash_sig_path) {
2150
2151 _cleanup_free_ void *root_hash = NULL, *root_hash_sig = NULL;
2152 size_t root_hash_size = 0, root_hash_sig_size = 0;
2153 _cleanup_free_ char *verity_data_path = NULL;
2154 PartitionDesignator designator;
2155 int r;
2156
2157 assert(verity);
2158 assert(image);
2159 assert(verity->designator < 0 || IN_SET(verity->designator, PARTITION_ROOT, PARTITION_USR));
2160
2161 /* If we are asked to load the root hash for a device node, exit early */
2162 if (is_device_path(image))
2163 return 0;
2164
2165 designator = verity->designator;
2166
2167 /* We only fill in what isn't already filled in */
2168
2169 if (!verity->root_hash) {
2170 _cleanup_free_ char *text = NULL;
2171
2172 if (root_hash_path) {
2173 /* If explicitly specified it takes precedence */
2174 r = read_one_line_file(root_hash_path, &text);
2175 if (r < 0)
2176 return r;
2177
2178 if (designator < 0)
2179 designator = PARTITION_ROOT;
2180 } else {
2181 /* Otherwise look for xattr and separate file, and first for the data for root and if
2182 * that doesn't exist for /usr */
2183
2184 if (designator < 0 || designator == PARTITION_ROOT) {
2185 r = getxattr_malloc(image, "user.verity.roothash", &text, true);
2186 if (r < 0) {
2187 _cleanup_free_ char *p = NULL;
2188
2189 if (!IN_SET(r, -ENODATA, -ENOENT) && !ERRNO_IS_NOT_SUPPORTED(r))
2190 return r;
2191
2192 p = build_auxiliary_path(image, ".roothash");
2193 if (!p)
2194 return -ENOMEM;
2195
2196 r = read_one_line_file(p, &text);
2197 if (r < 0 && r != -ENOENT)
2198 return r;
2199 }
2200
2201 if (text)
2202 designator = PARTITION_ROOT;
2203 }
2204
2205 if (!text && (designator < 0 || designator == PARTITION_USR)) {
2206 /* So in the "roothash" xattr/file name above the "root" of course primarily
2207 * refers to the root of the Verity Merkle tree. But coincidentally it also
2208 * is the hash for the *root* file system, i.e. the "root" neatly refers to
2209 * two distinct concepts called "root". Taking benefit of this happy
2210 * coincidence we call the file with the root hash for the /usr/ file system
2211 * `usrhash`, because `usrroothash` or `rootusrhash` would just be too
2212 * confusing. We thus drop the reference to the root of the Merkle tree, and
2213 * just indicate which file system it's about. */
2214 r = getxattr_malloc(image, "user.verity.usrhash", &text, true);
2215 if (r < 0) {
2216 _cleanup_free_ char *p = NULL;
2217
2218 if (!IN_SET(r, -ENODATA, -ENOENT) && !ERRNO_IS_NOT_SUPPORTED(r))
2219 return r;
2220
2221 p = build_auxiliary_path(image, ".usrhash");
2222 if (!p)
2223 return -ENOMEM;
2224
2225 r = read_one_line_file(p, &text);
2226 if (r < 0 && r != -ENOENT)
2227 return r;
2228 }
2229
2230 if (text)
2231 designator = PARTITION_USR;
2232 }
2233 }
2234
2235 if (text) {
2236 r = unhexmem(text, strlen(text), &root_hash, &root_hash_size);
2237 if (r < 0)
2238 return r;
2239 if (root_hash_size < sizeof(sd_id128_t))
2240 return -EINVAL;
2241 }
2242 }
2243
2244 if ((root_hash || verity->root_hash) && !verity->root_hash_sig) {
2245 if (root_hash_sig_path) {
2246 r = read_full_file(root_hash_sig_path, (char**) &root_hash_sig, &root_hash_sig_size);
2247 if (r < 0 && r != -ENOENT)
2248 return r;
2249
2250 if (designator < 0)
2251 designator = PARTITION_ROOT;
2252 } else {
2253 if (designator < 0 || designator == PARTITION_ROOT) {
2254 _cleanup_free_ char *p = NULL;
2255
2256 /* Follow naming convention recommended by the relevant RFC:
2257 * https://tools.ietf.org/html/rfc5751#section-3.2.1 */
2258 p = build_auxiliary_path(image, ".roothash.p7s");
2259 if (!p)
2260 return -ENOMEM;
2261
2262 r = read_full_file(p, (char**) &root_hash_sig, &root_hash_sig_size);
2263 if (r < 0 && r != -ENOENT)
2264 return r;
2265 if (r >= 0)
2266 designator = PARTITION_ROOT;
2267 }
2268
2269 if (!root_hash_sig && (designator < 0 || designator == PARTITION_USR)) {
2270 _cleanup_free_ char *p = NULL;
2271
2272 p = build_auxiliary_path(image, ".usrhash.p7s");
2273 if (!p)
2274 return -ENOMEM;
2275
2276 r = read_full_file(p, (char**) &root_hash_sig, &root_hash_sig_size);
2277 if (r < 0 && r != -ENOENT)
2278 return r;
2279 if (r >= 0)
2280 designator = PARTITION_USR;
2281 }
2282 }
2283
2284 if (root_hash_sig && root_hash_sig_size == 0) /* refuse empty size signatures */
2285 return -EINVAL;
2286 }
2287
2288 if (!verity->data_path) {
2289 _cleanup_free_ char *p = NULL;
2290
2291 p = build_auxiliary_path(image, ".verity");
2292 if (!p)
2293 return -ENOMEM;
2294
2295 if (access(p, F_OK) < 0) {
2296 if (errno != ENOENT)
2297 return -errno;
2298 } else
2299 verity_data_path = TAKE_PTR(p);
2300 }
2301
2302 if (root_hash) {
2303 verity->root_hash = TAKE_PTR(root_hash);
2304 verity->root_hash_size = root_hash_size;
2305 }
2306
2307 if (root_hash_sig) {
2308 verity->root_hash_sig = TAKE_PTR(root_hash_sig);
2309 verity->root_hash_sig_size = root_hash_sig_size;
2310 }
2311
2312 if (verity_data_path)
2313 verity->data_path = TAKE_PTR(verity_data_path);
2314
2315 if (verity->designator < 0)
2316 verity->designator = designator;
2317
2318 return 1;
2319 }
2320
2321 int dissected_image_acquire_metadata(DissectedImage *m) {
2322
2323 enum {
2324 META_HOSTNAME,
2325 META_MACHINE_ID,
2326 META_MACHINE_INFO,
2327 META_OS_RELEASE,
2328 META_EXTENSION_RELEASE,
2329 _META_MAX,
2330 };
2331
2332 static const char *paths[_META_MAX] = {
2333 [META_HOSTNAME] = "/etc/hostname\0",
2334 [META_MACHINE_ID] = "/etc/machine-id\0",
2335 [META_MACHINE_INFO] = "/etc/machine-info\0",
2336 [META_OS_RELEASE] = "/etc/os-release\0"
2337 "/usr/lib/os-release\0",
2338 [META_EXTENSION_RELEASE] = NULL,
2339 };
2340
2341 _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL, **extension_release = NULL;
2342 _cleanup_close_pair_ int error_pipe[2] = { -1, -1 };
2343 _cleanup_(rmdir_and_freep) char *t = NULL;
2344 _cleanup_(sigkill_waitp) pid_t child = 0;
2345 sd_id128_t machine_id = SD_ID128_NULL;
2346 _cleanup_free_ char *hostname = NULL;
2347 unsigned n_meta_initialized = 0;
2348 int fds[2 * _META_MAX], r, v;
2349 ssize_t n;
2350
2351 BLOCK_SIGNALS(SIGCHLD);
2352
2353 assert(m);
2354
2355 /* As per the os-release spec, if the image is an extension it will have a file
2356 * named after the image name in extension-release.d/ */
2357 if (m->image_name) {
2358 char *ext;
2359
2360 ext = strjoina("/usr/lib/extension-release.d/extension-release.", m->image_name, "0");
2361 ext[strlen(ext) - 1] = '\0'; /* Extra \0 for NULSTR_FOREACH using placeholder from above */
2362 paths[META_EXTENSION_RELEASE] = ext;
2363 } else
2364 log_debug("No image name available, will skip extension-release metadata");
2365
2366 for (; n_meta_initialized < _META_MAX; n_meta_initialized ++) {
2367 if (!paths[n_meta_initialized]) {
2368 fds[2*n_meta_initialized] = fds[2*n_meta_initialized+1] = -1;
2369 continue;
2370 }
2371
2372 if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) {
2373 r = -errno;
2374 goto finish;
2375 }
2376 }
2377
2378 r = mkdtemp_malloc("/tmp/dissect-XXXXXX", &t);
2379 if (r < 0)
2380 goto finish;
2381
2382 if (pipe2(error_pipe, O_CLOEXEC) < 0) {
2383 r = -errno;
2384 goto finish;
2385 }
2386
2387 r = safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE, &child);
2388 if (r < 0)
2389 goto finish;
2390 if (r == 0) {
2391 error_pipe[0] = safe_close(error_pipe[0]);
2392
2393 r = dissected_image_mount(
2394 m,
2395 t,
2396 UID_INVALID,
2397 DISSECT_IMAGE_READ_ONLY|
2398 DISSECT_IMAGE_MOUNT_ROOT_ONLY|
2399 DISSECT_IMAGE_VALIDATE_OS|
2400 DISSECT_IMAGE_USR_NO_ROOT);
2401 if (r < 0) {
2402 /* Let parent know the error */
2403 (void) write(error_pipe[1], &r, sizeof(r));
2404
2405 log_debug_errno(r, "Failed to mount dissected image: %m");
2406 _exit(EXIT_FAILURE);
2407 }
2408
2409 for (unsigned k = 0; k < _META_MAX; k++) {
2410 _cleanup_close_ int fd = -ENOENT;
2411 const char *p;
2412
2413 if (!paths[k])
2414 continue;
2415
2416 fds[2*k] = safe_close(fds[2*k]);
2417
2418 NULSTR_FOREACH(p, paths[k]) {
2419 fd = chase_symlinks_and_open(p, t, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL);
2420 if (fd >= 0)
2421 break;
2422 }
2423 if (fd < 0) {
2424 log_debug_errno(fd, "Failed to read %s file of image, ignoring: %m", paths[k]);
2425 fds[2*k+1] = safe_close(fds[2*k+1]);
2426 continue;
2427 }
2428
2429 r = copy_bytes(fd, fds[2*k+1], UINT64_MAX, 0);
2430 if (r < 0) {
2431 (void) write(error_pipe[1], &r, sizeof(r));
2432 _exit(EXIT_FAILURE);
2433 }
2434
2435 fds[2*k+1] = safe_close(fds[2*k+1]);
2436 }
2437
2438 _exit(EXIT_SUCCESS);
2439 }
2440
2441 error_pipe[1] = safe_close(error_pipe[1]);
2442
2443 for (unsigned k = 0; k < _META_MAX; k++) {
2444 _cleanup_fclose_ FILE *f = NULL;
2445
2446 if (!paths[k])
2447 continue;
2448
2449 fds[2*k+1] = safe_close(fds[2*k+1]);
2450
2451 f = take_fdopen(&fds[2*k], "r");
2452 if (!f) {
2453 r = -errno;
2454 goto finish;
2455 }
2456
2457 switch (k) {
2458
2459 case META_HOSTNAME:
2460 r = read_etc_hostname_stream(f, &hostname);
2461 if (r < 0)
2462 log_debug_errno(r, "Failed to read /etc/hostname: %m");
2463
2464 break;
2465
2466 case META_MACHINE_ID: {
2467 _cleanup_free_ char *line = NULL;
2468
2469 r = read_line(f, LONG_LINE_MAX, &line);
2470 if (r < 0)
2471 log_debug_errno(r, "Failed to read /etc/machine-id: %m");
2472 else if (r == 33) {
2473 r = sd_id128_from_string(line, &machine_id);
2474 if (r < 0)
2475 log_debug_errno(r, "Image contains invalid /etc/machine-id: %s", line);
2476 } else if (r == 0)
2477 log_debug("/etc/machine-id file is empty.");
2478 else if (streq(line, "uninitialized"))
2479 log_debug("/etc/machine-id file is uninitialized (likely aborted first boot).");
2480 else
2481 log_debug("/etc/machine-id has unexpected length %i.", r);
2482
2483 break;
2484 }
2485
2486 case META_MACHINE_INFO:
2487 r = load_env_file_pairs(f, "machine-info", &machine_info);
2488 if (r < 0)
2489 log_debug_errno(r, "Failed to read /etc/machine-info: %m");
2490
2491 break;
2492
2493 case META_OS_RELEASE:
2494 r = load_env_file_pairs(f, "os-release", &os_release);
2495 if (r < 0)
2496 log_debug_errno(r, "Failed to read OS release file: %m");
2497
2498 break;
2499
2500 case META_EXTENSION_RELEASE:
2501 r = load_env_file_pairs(f, "extension-release", &extension_release);
2502 if (r < 0)
2503 log_debug_errno(r, "Failed to read extension release file: %m");
2504
2505 break;
2506 }
2507 }
2508
2509 r = wait_for_terminate_and_check("(sd-dissect)", child, 0);
2510 child = 0;
2511 if (r < 0)
2512 return r;
2513
2514 n = read(error_pipe[0], &v, sizeof(v));
2515 if (n < 0)
2516 return -errno;
2517 if (n == sizeof(v))
2518 return v; /* propagate error sent to us from child */
2519 if (n != 0)
2520 return -EIO;
2521
2522 if (r != EXIT_SUCCESS)
2523 return -EPROTO;
2524
2525 free_and_replace(m->hostname, hostname);
2526 m->machine_id = machine_id;
2527 strv_free_and_replace(m->machine_info, machine_info);
2528 strv_free_and_replace(m->os_release, os_release);
2529 strv_free_and_replace(m->extension_release, extension_release);
2530
2531 finish:
2532 for (unsigned k = 0; k < n_meta_initialized; k++)
2533 safe_close_pair(fds + 2*k);
2534
2535 return r;
2536 }
2537
2538 int dissect_image_and_warn(
2539 int fd,
2540 const char *name,
2541 const VeritySettings *verity,
2542 const MountOptions *mount_options,
2543 DissectImageFlags flags,
2544 DissectedImage **ret) {
2545
2546 _cleanup_free_ char *buffer = NULL;
2547 int r;
2548
2549 if (!name) {
2550 r = fd_get_path(fd, &buffer);
2551 if (r < 0)
2552 return r;
2553
2554 name = buffer;
2555 }
2556
2557 r = dissect_image(fd, verity, mount_options, flags, ret);
2558 switch (r) {
2559
2560 case -EOPNOTSUPP:
2561 return log_error_errno(r, "Dissecting images is not supported, compiled without blkid support.");
2562
2563 case -ENOPKG:
2564 return log_error_errno(r, "Couldn't identify a suitable partition table or file system in '%s'.", name);
2565
2566 case -EADDRNOTAVAIL:
2567 return log_error_errno(r, "No root partition for specified root hash found in '%s'.", name);
2568
2569 case -ENOTUNIQ:
2570 return log_error_errno(r, "Multiple suitable root partitions found in image '%s'.", name);
2571
2572 case -ENXIO:
2573 return log_error_errno(r, "No suitable root partition found in image '%s'.", name);
2574
2575 case -EPROTONOSUPPORT:
2576 return log_error_errno(r, "Device '%s' is loopback block device with partition scanning turned off, please turn it on.", name);
2577
2578 default:
2579 if (r < 0)
2580 return log_error_errno(r, "Failed to dissect image '%s': %m", name);
2581
2582 return r;
2583 }
2584 }
2585
2586 bool dissected_image_can_do_verity(const DissectedImage *image, PartitionDesignator partition_designator) {
2587 if (image->single_file_system)
2588 return partition_designator == PARTITION_ROOT && image->can_verity;
2589
2590 return PARTITION_VERITY_OF(partition_designator) >= 0;
2591 }
2592
2593 bool dissected_image_has_verity(const DissectedImage *image, PartitionDesignator partition_designator) {
2594 int k;
2595
2596 if (image->single_file_system)
2597 return partition_designator == PARTITION_ROOT && image->verity;
2598
2599 k = PARTITION_VERITY_OF(partition_designator);
2600 return k >= 0 && image->partitions[k].found;
2601 }
2602
2603 MountOptions* mount_options_free_all(MountOptions *options) {
2604 MountOptions *m;
2605
2606 while ((m = options)) {
2607 LIST_REMOVE(mount_options, options, m);
2608 free(m->options);
2609 free(m);
2610 }
2611
2612 return NULL;
2613 }
2614
2615 const char* mount_options_from_designator(const MountOptions *options, PartitionDesignator designator) {
2616 const MountOptions *m;
2617
2618 LIST_FOREACH(mount_options, m, options)
2619 if (designator == m->partition_designator && !isempty(m->options))
2620 return m->options;
2621
2622 return NULL;
2623 }
2624
2625 int mount_image_privately_interactively(
2626 const char *image,
2627 DissectImageFlags flags,
2628 char **ret_directory,
2629 LoopDevice **ret_loop_device,
2630 DecryptedImage **ret_decrypted_image) {
2631
2632 _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
2633 _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
2634 _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL;
2635 _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
2636 _cleanup_(rmdir_and_freep) char *created_dir = NULL;
2637 _cleanup_free_ char *temp = NULL;
2638 int r;
2639
2640 /* Mounts an OS image at a temporary place, inside a newly created mount namespace of our own. This
2641 * is used by tools such as systemd-tmpfiles or systemd-firstboot to operate on some disk image
2642 * easily. */
2643
2644 assert(image);
2645 assert(ret_directory);
2646 assert(ret_loop_device);
2647 assert(ret_decrypted_image);
2648
2649 r = verity_settings_load(&verity, image, NULL, NULL);
2650 if (r < 0)
2651 return log_error_errno(r, "Failed to load root hash data: %m");
2652
2653 r = tempfn_random_child(NULL, program_invocation_short_name, &temp);
2654 if (r < 0)
2655 return log_error_errno(r, "Failed to generate temporary mount directory: %m");
2656
2657 r = loop_device_make_by_path(
2658 image,
2659 FLAGS_SET(flags, DISSECT_IMAGE_READ_ONLY) ? O_RDONLY : O_RDWR,
2660 FLAGS_SET(flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN,
2661 &d);
2662 if (r < 0)
2663 return log_error_errno(r, "Failed to set up loopback device: %m");
2664
2665 r = dissect_image_and_warn(d->fd, image, &verity, NULL, flags, &dissected_image);
2666 if (r < 0)
2667 return r;
2668
2669 r = dissected_image_decrypt_interactively(dissected_image, NULL, &verity, flags, &decrypted_image);
2670 if (r < 0)
2671 return r;
2672
2673 r = detach_mount_namespace();
2674 if (r < 0)
2675 return log_error_errno(r, "Failed to detach mount namespace: %m");
2676
2677 r = mkdir_p(temp, 0700);
2678 if (r < 0)
2679 return log_error_errno(r, "Failed to create mount point: %m");
2680
2681 created_dir = TAKE_PTR(temp);
2682
2683 r = dissected_image_mount_and_warn(dissected_image, created_dir, UID_INVALID, flags);
2684 if (r < 0)
2685 return r;
2686
2687 if (decrypted_image) {
2688 r = decrypted_image_relinquish(decrypted_image);
2689 if (r < 0)
2690 return log_error_errno(r, "Failed to relinquish DM devices: %m");
2691 }
2692
2693 loop_device_relinquish(d);
2694
2695 *ret_directory = TAKE_PTR(created_dir);
2696 *ret_loop_device = TAKE_PTR(d);
2697 *ret_decrypted_image = TAKE_PTR(decrypted_image);
2698
2699 return 0;
2700 }
2701
2702 static const char *const partition_designator_table[] = {
2703 [PARTITION_ROOT] = "root",
2704 [PARTITION_ROOT_SECONDARY] = "root-secondary",
2705 [PARTITION_USR] = "usr",
2706 [PARTITION_USR_SECONDARY] = "usr-secondary",
2707 [PARTITION_HOME] = "home",
2708 [PARTITION_SRV] = "srv",
2709 [PARTITION_ESP] = "esp",
2710 [PARTITION_XBOOTLDR] = "xbootldr",
2711 [PARTITION_SWAP] = "swap",
2712 [PARTITION_ROOT_VERITY] = "root-verity",
2713 [PARTITION_ROOT_SECONDARY_VERITY] = "root-secondary-verity",
2714 [PARTITION_USR_VERITY] = "usr-verity",
2715 [PARTITION_USR_SECONDARY_VERITY] = "usr-secondary-verity",
2716 [PARTITION_TMP] = "tmp",
2717 [PARTITION_VAR] = "var",
2718 };
2719
2720 int verity_dissect_and_mount(
2721 const char *src,
2722 const char *dest,
2723 const MountOptions *options,
2724 const char *required_host_os_release_id,
2725 const char *required_host_os_release_version_id,
2726 const char *required_host_os_release_sysext_level) {
2727
2728 _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
2729 _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL;
2730 _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
2731 _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
2732 DissectImageFlags dissect_image_flags;
2733 int r;
2734
2735 assert(src);
2736 assert(dest);
2737
2738 r = verity_settings_load(&verity, src, NULL, NULL);
2739 if (r < 0)
2740 return log_debug_errno(r, "Failed to load root hash: %m");
2741
2742 dissect_image_flags = verity.data_path ? DISSECT_IMAGE_NO_PARTITION_TABLE : 0;
2743
2744 r = loop_device_make_by_path(
2745 src,
2746 -1,
2747 verity.data_path ? 0 : LO_FLAGS_PARTSCAN,
2748 &loop_device);
2749 if (r < 0)
2750 return log_debug_errno(r, "Failed to create loop device for image: %m");
2751
2752 r = dissect_image(
2753 loop_device->fd,
2754 &verity,
2755 options,
2756 dissect_image_flags,
2757 &dissected_image);
2758 /* No partition table? Might be a single-filesystem image, try again */
2759 if (!verity.data_path && r == -ENOPKG)
2760 r = dissect_image(
2761 loop_device->fd,
2762 &verity,
2763 options,
2764 dissect_image_flags|DISSECT_IMAGE_NO_PARTITION_TABLE,
2765 &dissected_image);
2766 if (r < 0)
2767 return log_debug_errno(r, "Failed to dissect image: %m");
2768
2769 r = dissected_image_decrypt(
2770 dissected_image,
2771 NULL,
2772 &verity,
2773 dissect_image_flags,
2774 &decrypted_image);
2775 if (r < 0)
2776 return log_debug_errno(r, "Failed to decrypt dissected image: %m");
2777
2778 r = mkdir_p_label(dest, 0755);
2779 if (r < 0)
2780 return log_debug_errno(r, "Failed to create destination directory %s: %m", dest);
2781 r = umount_recursive(dest, 0);
2782 if (r < 0)
2783 return log_debug_errno(r, "Failed to umount under destination directory %s: %m", dest);
2784
2785 r = dissected_image_mount(dissected_image, dest, UID_INVALID, dissect_image_flags);
2786 if (r < 0)
2787 return log_debug_errno(r, "Failed to mount image: %m");
2788
2789 /* If we got os-release values from the caller, then we need to match them with the image's
2790 * extension-release.d/ content. Return -EINVAL if there's any mismatch.
2791 * First, check the distro ID. If that matches, then check the new SYSEXT_LEVEL value if
2792 * available, or else fallback to VERSION_ID. */
2793 if (required_host_os_release_id &&
2794 (required_host_os_release_version_id || required_host_os_release_sysext_level)) {
2795 _cleanup_strv_free_ char **extension_release = NULL;
2796
2797 r = load_extension_release_pairs(dest, dissected_image->image_name, &extension_release);
2798 if (r < 0)
2799 return log_debug_errno(r, "Failed to parse image %s extension-release metadata: %m", dissected_image->image_name);
2800
2801 r = extension_release_validate(
2802 dissected_image->image_name,
2803 required_host_os_release_id,
2804 required_host_os_release_version_id,
2805 required_host_os_release_sysext_level,
2806 extension_release);
2807 if (r == 0)
2808 return log_debug_errno(SYNTHETIC_ERRNO(ESTALE), "Image %s extension-release metadata does not match the root's", dissected_image->image_name);
2809 if (r < 0)
2810 return log_debug_errno(r, "Failed to compare image %s extension-release metadata with the root's os-release: %m", dissected_image->image_name);
2811 }
2812
2813 if (decrypted_image) {
2814 r = decrypted_image_relinquish(decrypted_image);
2815 if (r < 0)
2816 return log_debug_errno(r, "Failed to relinquish decrypted image: %m");
2817 }
2818
2819 loop_device_relinquish(loop_device);
2820
2821 return 0;
2822 }
2823
2824 DEFINE_STRING_TABLE_LOOKUP(partition_designator, PartitionDesignator);