]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/dissect-image.c
dissect-image: optionally, validate dm-verity signatures in userspace
[thirdparty/systemd.git] / src / shared / dissect-image.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #if HAVE_VALGRIND_MEMCHECK_H
4 #include <valgrind/memcheck.h>
5 #endif
6
7 #include <linux/dm-ioctl.h>
8 #include <linux/loop.h>
9 #include <sys/mount.h>
10 #include <sys/prctl.h>
11 #include <sys/wait.h>
12 #include <sysexits.h>
13
14 #if HAVE_OPENSSL
15 #include <openssl/err.h>
16 #include <openssl/pem.h>
17 #include <openssl/x509.h>
18 #endif
19
20 #include "sd-device.h"
21 #include "sd-id128.h"
22
23 #include "architecture.h"
24 #include "ask-password-api.h"
25 #include "blkid-util.h"
26 #include "blockdev-util.h"
27 #include "conf-files.h"
28 #include "copy.h"
29 #include "cryptsetup-util.h"
30 #include "def.h"
31 #include "device-nodes.h"
32 #include "device-util.h"
33 #include "discover-image.h"
34 #include "dissect-image.h"
35 #include "dm-util.h"
36 #include "env-file.h"
37 #include "env-util.h"
38 #include "extension-release.h"
39 #include "fd-util.h"
40 #include "fileio.h"
41 #include "fs-util.h"
42 #include "fsck-util.h"
43 #include "gpt.h"
44 #include "hexdecoct.h"
45 #include "hostname-setup.h"
46 #include "id128-util.h"
47 #include "import-util.h"
48 #include "mkdir.h"
49 #include "mount-util.h"
50 #include "mountpoint-util.h"
51 #include "namespace-util.h"
52 #include "nulstr-util.h"
53 #include "openssl-util.h"
54 #include "os-util.h"
55 #include "path-util.h"
56 #include "process-util.h"
57 #include "raw-clone.h"
58 #include "resize-fs.h"
59 #include "signal-util.h"
60 #include "stat-util.h"
61 #include "stdio-util.h"
62 #include "string-table.h"
63 #include "string-util.h"
64 #include "strv.h"
65 #include "tmpfile-util.h"
66 #include "udev-util.h"
67 #include "user-util.h"
68 #include "xattr-util.h"
69
70 /* how many times to wait for the device nodes to appear */
71 #define N_DEVICE_NODE_LIST_ATTEMPTS 10
72
73 int probe_filesystem(const char *node, char **ret_fstype) {
74 /* Try to find device content type and return it in *ret_fstype. If nothing is found,
75 * 0/NULL will be returned. -EUCLEAN will be returned for ambiguous results, and an
76 * different error otherwise. */
77
78 #if HAVE_BLKID
79 _cleanup_(blkid_free_probep) blkid_probe b = NULL;
80 const char *fstype;
81 int r;
82
83 errno = 0;
84 b = blkid_new_probe_from_filename(node);
85 if (!b)
86 return errno_or_else(ENOMEM);
87
88 blkid_probe_enable_superblocks(b, 1);
89 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
90
91 errno = 0;
92 r = blkid_do_safeprobe(b);
93 if (r == 1) {
94 log_debug("No type detected on partition %s", node);
95 goto not_found;
96 }
97 if (r == -2)
98 return log_debug_errno(SYNTHETIC_ERRNO(EUCLEAN),
99 "Results ambiguous for partition %s", node);
100 if (r != 0)
101 return errno_or_else(EIO);
102
103 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
104
105 if (fstype) {
106 char *t;
107
108 t = strdup(fstype);
109 if (!t)
110 return -ENOMEM;
111
112 *ret_fstype = t;
113 return 1;
114 }
115
116 not_found:
117 *ret_fstype = NULL;
118 return 0;
119 #else
120 return -EOPNOTSUPP;
121 #endif
122 }
123
124 #if HAVE_BLKID
125 static int enumerator_for_parent(sd_device *d, sd_device_enumerator **ret) {
126 _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
127 int r;
128
129 assert(d);
130 assert(ret);
131
132 r = sd_device_enumerator_new(&e);
133 if (r < 0)
134 return r;
135
136 r = sd_device_enumerator_add_match_subsystem(e, "block", true);
137 if (r < 0)
138 return r;
139
140 r = sd_device_enumerator_add_match_parent(e, d);
141 if (r < 0)
142 return r;
143
144 r = sd_device_enumerator_add_match_sysattr(e, "partition", NULL, true);
145 if (r < 0)
146 return r;
147
148 *ret = TAKE_PTR(e);
149 return 0;
150 }
151
152 static int device_is_partition(
153 sd_device *d,
154 sd_device *expected_parent,
155 blkid_partition pp) {
156
157 const char *v, *parent_syspath, *expected_parent_syspath;
158 blkid_loff_t bsize, bstart;
159 uint64_t size, start;
160 int partno, bpartno, r;
161 sd_device *parent;
162
163 assert(d);
164 assert(expected_parent);
165 assert(pp);
166
167 r = sd_device_get_subsystem(d, &v);
168 if (r < 0)
169 return r;
170 if (!streq(v, "block"))
171 return false;
172
173 if (sd_device_get_devtype(d, &v) < 0 || !streq(v, "partition"))
174 return false;
175
176 r = sd_device_get_parent(d, &parent);
177 if (r < 0)
178 return false; /* Doesn't have a parent? No relevant to us */
179
180 r = sd_device_get_syspath(parent, &parent_syspath); /* Check parent of device of this action */
181 if (r < 0)
182 return r;
183
184 r = sd_device_get_syspath(expected_parent, &expected_parent_syspath); /* Check parent of device we are looking for */
185 if (r < 0)
186 return r;
187
188 if (!path_equal(parent_syspath, expected_parent_syspath))
189 return false; /* Has a different parent than what we need, not interesting to us */
190
191 /* On kernel uevents we may find the partition number in the PARTN= field. Let's use that preferably,
192 * since it's cheaper and more importantly: the sysfs attribute "partition" appears to become
193 * available late, hence let's use the property instead, which is available at the moment we see the
194 * uevent. */
195 r = sd_device_get_property_value(d, "PARTN", &v);
196 if (r == -ENOENT)
197 r = sd_device_get_sysattr_value(d, "partition", &v);
198 if (r < 0)
199 return r;
200
201 r = safe_atoi(v, &partno);
202 if (r < 0)
203 return r;
204
205 errno = 0;
206 bpartno = blkid_partition_get_partno(pp);
207 if (bpartno < 0)
208 return errno_or_else(EIO);
209
210 if (partno != bpartno)
211 return false;
212
213 r = sd_device_get_sysattr_value(d, "start", &v);
214 if (r < 0)
215 return r;
216 r = safe_atou64(v, &start);
217 if (r < 0)
218 return r;
219
220 errno = 0;
221 bstart = blkid_partition_get_start(pp);
222 if (bstart < 0)
223 return errno_or_else(EIO);
224
225 if (start != (uint64_t) bstart)
226 return false;
227
228 r = sd_device_get_sysattr_value(d, "size", &v);
229 if (r < 0)
230 return r;
231 r = safe_atou64(v, &size);
232 if (r < 0)
233 return r;
234
235 errno = 0;
236 bsize = blkid_partition_get_size(pp);
237 if (bsize < 0)
238 return errno_or_else(EIO);
239
240 if (size != (uint64_t) bsize)
241 return false;
242
243 return true;
244 }
245
246 static int find_partition(
247 sd_device *parent,
248 blkid_partition pp,
249 usec_t timestamp_not_before,
250 DissectImageFlags flags,
251 sd_device **ret) {
252
253 _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
254 sd_device *q;
255 int r;
256
257 assert(parent);
258 assert(pp);
259 assert(ret);
260
261 r = enumerator_for_parent(parent, &e);
262 if (r < 0)
263 return r;
264
265 FOREACH_DEVICE(e, q) {
266 uint64_t usec;
267
268 if (!FLAGS_SET(flags, DISSECT_IMAGE_NO_UDEV)) {
269 r = sd_device_get_usec_initialized(q, &usec);
270 if (r == -EBUSY) /* Not initialized yet */
271 continue;
272 if (r < 0)
273 return r;
274
275 if (timestamp_not_before != USEC_INFINITY &&
276 usec < timestamp_not_before) /* udev database entry older than our attachment? Then it's not ours */
277 continue;
278 }
279
280 r = device_is_partition(q, parent, pp);
281 if (r < 0)
282 return r;
283 if (r > 0) {
284 *ret = sd_device_ref(q);
285 return 0;
286 }
287 }
288
289 return -ENXIO;
290 }
291
292 struct wait_data {
293 sd_device *parent_device;
294 blkid_partition blkidp;
295 sd_device *found;
296 uint64_t diskseq;
297 uint64_t uevent_seqnum_not_before;
298 usec_t timestamp_not_before;
299 DissectImageFlags flags;
300 };
301
302 static inline void wait_data_done(struct wait_data *d) {
303 sd_device_unref(d->found);
304 }
305
306 static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) {
307 struct wait_data *w = userdata;
308 int r;
309
310 assert(w);
311
312 if (device_for_action(device, SD_DEVICE_REMOVE))
313 return 0;
314
315 if (w->diskseq != 0) {
316 uint64_t diskseq;
317
318 /* If w->diskseq is non-zero, then we must have a disk seqnum */
319 r = sd_device_get_diskseq(device, &diskseq);
320 if (r < 0) {
321 log_debug_errno(r, "Dropping event because it has no diskseq, but waiting for %" PRIu64, w->diskseq);
322 return 0;
323 }
324 if (diskseq < w->diskseq) {
325 log_debug("Dropping event because diskseq too old (%" PRIu64 " < %" PRIu64 ")",
326 diskseq, w->diskseq);
327 return 0;
328 }
329 if (diskseq > w->diskseq) {
330 r = -EBUSY;
331 goto finish; /* Newer than what we were expecting, so we missed it, stop waiting */
332 }
333 } else if (w->uevent_seqnum_not_before != UINT64_MAX) {
334 uint64_t seqnum;
335
336 r = sd_device_get_seqnum(device, &seqnum);
337 if (r < 0)
338 goto finish;
339
340 if (seqnum <= w->uevent_seqnum_not_before) { /* From an older use of this loop device */
341 log_debug("Dropping event because seqnum too old (%" PRIu64 " <= %" PRIu64 ")",
342 seqnum, w->uevent_seqnum_not_before);
343 return 0;
344 }
345 }
346
347 r = device_is_partition(device, w->parent_device, w->blkidp);
348 if (r < 0)
349 goto finish;
350 if (r == 0) /* Not the one we need */
351 return 0;
352
353 /* It's the one we need! Yay! */
354 assert(!w->found);
355 w->found = sd_device_ref(device);
356 r = 0;
357
358 finish:
359 return sd_event_exit(sd_device_monitor_get_event(monitor), r);
360 }
361
362 static int timeout_handler(sd_event_source *s, uint64_t usec, void *userdata) {
363 struct wait_data *w = userdata;
364 int r;
365
366 assert(w);
367
368 /* Why partition not appeared within the timeout? We may lost some uevent, as some properties
369 * were not ready when we received uevent... Not sure, but anyway, let's try to find the
370 * partition again before give up. */
371
372 r = find_partition(w->parent_device, w->blkidp, w->timestamp_not_before, w->flags, &w->found);
373 if (r == -ENXIO)
374 return log_debug_errno(SYNTHETIC_ERRNO(ETIMEDOUT),
375 "Partition still not appeared after timeout reached.");
376 if (r < 0)
377 return log_debug_errno(r, "Failed to find partition: %m");
378
379 log_debug("Partition appeared after timeout reached.");
380 return sd_event_exit(sd_event_source_get_event(s), 0);
381 }
382
383 static int retry_handler(sd_event_source *s, uint64_t usec, void *userdata) {
384 struct wait_data *w = userdata;
385 int r;
386
387 assert(w);
388
389 r = find_partition(w->parent_device, w->blkidp, w->timestamp_not_before, w->flags, &w->found);
390 if (r != -ENXIO) {
391 if (r < 0)
392 return log_debug_errno(r, "Failed to find partition: %m");
393
394 log_debug("Partition found by a periodic search.");
395 return sd_event_exit(sd_event_source_get_event(s), 0);
396 }
397
398 r = sd_event_source_set_time_relative(s, 500 * USEC_PER_MSEC);
399 if (r < 0)
400 return r;
401
402 return sd_event_source_set_enabled(s, SD_EVENT_ONESHOT);
403 }
404
405 static int wait_for_partition_device(
406 sd_device *parent,
407 blkid_partition pp,
408 usec_t deadline,
409 uint64_t diskseq,
410 uint64_t uevent_seqnum_not_before,
411 usec_t timestamp_not_before,
412 DissectImageFlags flags,
413 sd_device **ret) {
414
415 _cleanup_(sd_event_source_unrefp) sd_event_source *timeout_source = NULL, *retry_source = NULL;
416 _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor = NULL;
417 _cleanup_(sd_event_unrefp) sd_event *event = NULL;
418 int r;
419
420 assert(parent);
421 assert(pp);
422 assert(ret);
423
424 r = find_partition(parent, pp, timestamp_not_before, flags, ret);
425 if (r != -ENXIO)
426 return r;
427
428 r = sd_event_new(&event);
429 if (r < 0)
430 return r;
431
432 r = sd_device_monitor_new(&monitor);
433 if (r < 0)
434 return r;
435
436 r = sd_device_monitor_filter_add_match_subsystem_devtype(monitor, "block", "partition");
437 if (r < 0)
438 return r;
439
440 r = sd_device_monitor_filter_add_match_parent(monitor, parent, true);
441 if (r < 0)
442 return r;
443
444 r = sd_device_monitor_filter_add_match_sysattr(monitor, "partition", NULL, true);
445 if (r < 0)
446 return r;
447
448 r = sd_device_monitor_attach_event(monitor, event);
449 if (r < 0)
450 return r;
451
452 _cleanup_(wait_data_done) struct wait_data w = {
453 .parent_device = parent,
454 .blkidp = pp,
455 .diskseq = diskseq,
456 .uevent_seqnum_not_before = uevent_seqnum_not_before,
457 .timestamp_not_before = timestamp_not_before,
458 .flags = flags,
459 };
460
461 r = sd_device_monitor_start(monitor, device_monitor_handler, &w);
462 if (r < 0)
463 return r;
464
465 /* Check again, the partition might have appeared in the meantime */
466 r = find_partition(parent, pp, timestamp_not_before, flags, ret);
467 if (r != -ENXIO)
468 return r;
469
470 if (deadline != USEC_INFINITY) {
471 r = sd_event_add_time(
472 event, &timeout_source,
473 CLOCK_MONOTONIC, deadline, 0,
474 timeout_handler, &w);
475 if (r < 0)
476 return r;
477
478 r = sd_event_source_set_exit_on_failure(timeout_source, true);
479 if (r < 0)
480 return r;
481 }
482
483 /* If we don't have a disk sequence number then we cannot do exact matching,
484 * and we cannot know if we missed it or if it has not been sent yet, so set
485 * up additional retries to increase the chances of receiving the event. */
486 if (diskseq == 0) {
487 r = sd_event_add_time_relative(
488 event, &retry_source,
489 CLOCK_MONOTONIC, 500 * USEC_PER_MSEC, 0,
490 retry_handler, &w);
491 if (r < 0)
492 return r;
493
494 r = sd_event_source_set_exit_on_failure(retry_source, true);
495 if (r < 0)
496 return r;
497 }
498
499 r = sd_event_loop(event);
500 if (r < 0)
501 return r;
502
503 assert(w.found);
504 *ret = TAKE_PTR(w.found);
505 return 0;
506 }
507
508 static void check_partition_flags(
509 const char *node,
510 unsigned long long pflags,
511 unsigned long long supported) {
512
513 assert(node);
514
515 /* Mask away all flags supported by this partition's type and the three flags the UEFI spec defines generically */
516 pflags &= ~(supported | GPT_FLAG_REQUIRED_PARTITION | GPT_FLAG_NO_BLOCK_IO_PROTOCOL | GPT_FLAG_LEGACY_BIOS_BOOTABLE);
517
518 if (pflags == 0)
519 return;
520
521 /* If there are other bits set, then log about it, to make things discoverable */
522 for (unsigned i = 0; i < sizeof(pflags) * 8; i++) {
523 unsigned long long bit = 1ULL << i;
524 if (!FLAGS_SET(pflags, bit))
525 continue;
526
527 log_debug("Unexpected partition flag %llu set on %s!", bit, node);
528 }
529 }
530
531 static int device_wait_for_initialization_harder(
532 sd_device *device,
533 const char *subsystem,
534 usec_t deadline,
535 sd_device **ret) {
536
537 usec_t start, left, retrigger_timeout;
538 int r;
539
540 start = now(CLOCK_MONOTONIC);
541 left = usec_sub_unsigned(deadline, start);
542
543 if (DEBUG_LOGGING) {
544 const char *sn = NULL;
545
546 (void) sd_device_get_sysname(device, &sn);
547 log_device_debug(device,
548 "Waiting for device '%s' to initialize for %s.", strna(sn), FORMAT_TIMESPAN(left, 0));
549 }
550
551 if (left != USEC_INFINITY)
552 retrigger_timeout = CLAMP(left / 4, 1 * USEC_PER_SEC, 5 * USEC_PER_SEC); /* A fourth of the total timeout, but let's clamp to 1s…5s range */
553 else
554 retrigger_timeout = 2 * USEC_PER_SEC;
555
556 for (;;) {
557 usec_t local_deadline, n;
558 bool last_try;
559
560 n = now(CLOCK_MONOTONIC);
561 assert(n >= start);
562
563 /* Find next deadline, when we'll retrigger */
564 local_deadline = start +
565 DIV_ROUND_UP(n - start, retrigger_timeout) * retrigger_timeout;
566
567 if (deadline != USEC_INFINITY && deadline <= local_deadline) {
568 local_deadline = deadline;
569 last_try = true;
570 } else
571 last_try = false;
572
573 r = device_wait_for_initialization(device, subsystem, local_deadline, ret);
574 if (r >= 0 && DEBUG_LOGGING) {
575 const char *sn = NULL;
576
577 (void) sd_device_get_sysname(device, &sn);
578 log_device_debug(device,
579 "Successfully waited for device '%s' to initialize for %s.",
580 strna(sn),
581 FORMAT_TIMESPAN(usec_sub_unsigned(now(CLOCK_MONOTONIC), start), 0));
582
583 }
584 if (r != -ETIMEDOUT || last_try)
585 return r;
586
587 if (DEBUG_LOGGING)
588 log_device_debug(device,
589 "Device didn't initialize within %s, assuming lost event. Retriggering device.",
590 FORMAT_TIMESPAN(usec_sub_unsigned(now(CLOCK_MONOTONIC), start), 0));
591
592 r = sd_device_trigger(device, SD_DEVICE_CHANGE);
593 if (r < 0)
594 return r;
595 }
596 }
597 #endif
598
599 #define DEVICE_TIMEOUT_USEC (45 * USEC_PER_SEC)
600
601 static void dissected_partition_done(DissectedPartition *p) {
602 assert(p);
603
604 free(p->fstype);
605 free(p->node);
606 free(p->label);
607 free(p->decrypted_fstype);
608 free(p->decrypted_node);
609 free(p->mount_options);
610
611 *p = (DissectedPartition) {
612 .partno = -1,
613 .architecture = -1
614 };
615 }
616
617 int dissect_image(
618 int fd,
619 const VeritySettings *verity,
620 const MountOptions *mount_options,
621 uint64_t diskseq,
622 uint64_t uevent_seqnum_not_before,
623 usec_t timestamp_not_before,
624 DissectImageFlags flags,
625 DissectedImage **ret) {
626
627 #if HAVE_BLKID
628 #ifdef GPT_ROOT_NATIVE
629 sd_id128_t root_uuid = SD_ID128_NULL, root_verity_uuid = SD_ID128_NULL;
630 #endif
631 #ifdef GPT_USR_NATIVE
632 sd_id128_t usr_uuid = SD_ID128_NULL, usr_verity_uuid = SD_ID128_NULL;
633 #endif
634 bool is_gpt, is_mbr, multiple_generic = false,
635 generic_rw = false, /* initialize to appease gcc */
636 generic_growfs = false;
637 _cleanup_(sd_device_unrefp) sd_device *d = NULL;
638 _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
639 _cleanup_(blkid_free_probep) blkid_probe b = NULL;
640 _cleanup_free_ char *generic_node = NULL;
641 sd_id128_t generic_uuid = SD_ID128_NULL;
642 const char *pttype = NULL, *sysname = NULL;
643 blkid_partlist pl;
644 int r, generic_nr = -1, n_partitions;
645 struct stat st;
646 usec_t deadline;
647
648 assert(fd >= 0);
649 assert(ret);
650 assert(!verity || verity->designator < 0 || IN_SET(verity->designator, PARTITION_ROOT, PARTITION_USR));
651 assert(!verity || verity->root_hash || verity->root_hash_size == 0);
652 assert(!verity || verity->root_hash_sig || verity->root_hash_sig_size == 0);
653 assert(!verity || (verity->root_hash || !verity->root_hash_sig));
654 assert(!((flags & DISSECT_IMAGE_GPT_ONLY) && (flags & DISSECT_IMAGE_NO_PARTITION_TABLE)));
655
656 /* Probes a disk image, and returns information about what it found in *ret.
657 *
658 * Returns -ENOPKG if no suitable partition table or file system could be found.
659 * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found.
660 * Returns -ENXIO if we couldn't find any partition suitable as root or /usr partition
661 * Returns -ENOTUNIQ if we only found multiple generic partitions and thus don't know what to do with that */
662
663 if (verity && verity->root_hash) {
664 sd_id128_t fsuuid, vuuid;
665
666 /* If a root hash is supplied, then we use the root partition that has a UUID that match the
667 * first 128bit of the root hash. And we use the verity partition that has a UUID that match
668 * the final 128bit. */
669
670 if (verity->root_hash_size < sizeof(sd_id128_t))
671 return -EINVAL;
672
673 memcpy(&fsuuid, verity->root_hash, sizeof(sd_id128_t));
674 memcpy(&vuuid, (const uint8_t*) verity->root_hash + verity->root_hash_size - sizeof(sd_id128_t), sizeof(sd_id128_t));
675
676 if (sd_id128_is_null(fsuuid))
677 return -EINVAL;
678 if (sd_id128_is_null(vuuid))
679 return -EINVAL;
680
681 /* If the verity data declares it's for the /usr partition, then search for that, in all
682 * other cases assume it's for the root partition. */
683 #ifdef GPT_USR_NATIVE
684 if (verity->designator == PARTITION_USR) {
685 usr_uuid = fsuuid;
686 usr_verity_uuid = vuuid;
687 } else {
688 #endif
689 #ifdef GPT_ROOT_NATIVE
690 root_uuid = fsuuid;
691 root_verity_uuid = vuuid;
692 #endif
693 #ifdef GPT_USR_NATIVE
694 }
695 #endif
696 }
697
698 if (fstat(fd, &st) < 0)
699 return -errno;
700
701 if (!S_ISBLK(st.st_mode))
702 return -ENOTBLK;
703
704 r = sd_device_new_from_stat_rdev(&d, &st);
705 if (r < 0)
706 return r;
707
708 if (!FLAGS_SET(flags, DISSECT_IMAGE_NO_UDEV)) {
709 _cleanup_(sd_device_unrefp) sd_device *initialized = NULL;
710
711 /* If udev support is enabled, then let's wait for the device to be initialized before we doing anything. */
712
713 r = device_wait_for_initialization_harder(
714 d,
715 "block",
716 usec_add(now(CLOCK_MONOTONIC), DEVICE_TIMEOUT_USEC),
717 &initialized);
718 if (r < 0)
719 return r;
720
721 sd_device_unref(d);
722 d = TAKE_PTR(initialized);
723 }
724
725 b = blkid_new_probe();
726 if (!b)
727 return -ENOMEM;
728
729 errno = 0;
730 r = blkid_probe_set_device(b, fd, 0, 0);
731 if (r != 0)
732 return errno_or_else(ENOMEM);
733
734 if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) {
735 /* Look for file system superblocks, unless we only shall look for GPT partition tables */
736 blkid_probe_enable_superblocks(b, 1);
737 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE);
738 }
739
740 blkid_probe_enable_partitions(b, 1);
741 blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
742
743 errno = 0;
744 r = blkid_do_safeprobe(b);
745 if (IN_SET(r, -2, 1))
746 return log_debug_errno(SYNTHETIC_ERRNO(ENOPKG), "Failed to identify any partition table.");
747 if (r != 0)
748 return errno_or_else(EIO);
749
750 m = new0(DissectedImage, 1);
751 if (!m)
752 return -ENOMEM;
753
754 r = sd_device_get_sysname(d, &sysname);
755 if (r < 0)
756 return log_debug_errno(r, "Failed to get device sysname: %m");
757 if (startswith(sysname, "loop")) {
758 _cleanup_free_ char *name_stripped = NULL;
759 const char *full_path;
760
761 r = sd_device_get_sysattr_value(d, "loop/backing_file", &full_path);
762 if (r < 0)
763 log_debug_errno(r, "Failed to lookup image name via loop device backing file sysattr, ignoring: %m");
764 else {
765 r = raw_strip_suffixes(basename(full_path), &name_stripped);
766 if (r < 0)
767 return r;
768 }
769
770 free_and_replace(m->image_name, name_stripped);
771 } else {
772 r = free_and_strdup(&m->image_name, sysname);
773 if (r < 0)
774 return r;
775 }
776
777 if (!image_name_is_valid(m->image_name)) {
778 log_debug("Image name %s is not valid, ignoring", strempty(m->image_name));
779 m->image_name = mfree(m->image_name);
780 }
781
782 if ((!(flags & DISSECT_IMAGE_GPT_ONLY) &&
783 (flags & DISSECT_IMAGE_GENERIC_ROOT)) ||
784 (flags & DISSECT_IMAGE_NO_PARTITION_TABLE)) {
785 const char *usage = NULL;
786
787 /* If flags permit this, also allow using non-partitioned single-filesystem images */
788
789 (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL);
790 if (STRPTR_IN_SET(usage, "filesystem", "crypto")) {
791 const char *fstype = NULL, *options = NULL, *devname = NULL;
792 _cleanup_free_ char *t = NULL, *n = NULL, *o = NULL;
793
794 /* OK, we have found a file system, that's our root partition then. */
795 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
796
797 if (fstype) {
798 t = strdup(fstype);
799 if (!t)
800 return -ENOMEM;
801 }
802
803 r = sd_device_get_devname(d, &devname);
804 if (r < 0)
805 return r;
806
807 n = strdup(devname);
808 if (!n)
809 return -ENOMEM;
810
811 m->single_file_system = true;
812 m->encrypted = streq_ptr(fstype, "crypto_LUKS");
813
814 m->has_verity = verity && verity->data_path;
815 m->verity_ready = m->has_verity &&
816 verity->root_hash &&
817 (verity->designator < 0 || verity->designator == PARTITION_ROOT);
818
819 m->has_verity_sig = false; /* signature not embedded, must be specified */
820 m->verity_sig_ready = m->verity_ready &&
821 verity->root_hash_sig;
822
823 options = mount_options_from_designator(mount_options, PARTITION_ROOT);
824 if (options) {
825 o = strdup(options);
826 if (!o)
827 return -ENOMEM;
828 }
829
830 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
831 .found = true,
832 .rw = !m->verity_ready && !fstype_is_ro(fstype),
833 .partno = -1,
834 .architecture = _ARCHITECTURE_INVALID,
835 .fstype = TAKE_PTR(t),
836 .node = TAKE_PTR(n),
837 .mount_options = TAKE_PTR(o),
838 .offset = 0,
839 .size = UINT64_MAX,
840 };
841
842 *ret = TAKE_PTR(m);
843 return 0;
844 }
845 }
846
847 (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
848 if (!pttype)
849 return -ENOPKG;
850
851 is_gpt = streq_ptr(pttype, "gpt");
852 is_mbr = streq_ptr(pttype, "dos");
853
854 if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr))
855 return -ENOPKG;
856
857 /* We support external verity data partitions only if the image has no partition table */
858 if (verity && verity->data_path)
859 return -EBADR;
860
861 /* Safety check: refuse block devices that carry a partition table but for which the kernel doesn't
862 * do partition scanning. */
863 r = blockdev_partscan_enabled(fd);
864 if (r < 0)
865 return r;
866 if (r == 0)
867 return -EPROTONOSUPPORT;
868
869 errno = 0;
870 pl = blkid_probe_get_partitions(b);
871 if (!pl)
872 return errno_or_else(ENOMEM);
873
874 errno = 0;
875 n_partitions = blkid_partlist_numof_partitions(pl);
876 if (n_partitions < 0)
877 return errno_or_else(EIO);
878
879 deadline = usec_add(now(CLOCK_MONOTONIC), DEVICE_TIMEOUT_USEC);
880 for (int i = 0; i < n_partitions; i++) {
881 _cleanup_(sd_device_unrefp) sd_device *q = NULL;
882 unsigned long long pflags;
883 blkid_loff_t start, size;
884 blkid_partition pp;
885 const char *node;
886 int nr;
887
888 errno = 0;
889 pp = blkid_partlist_get_partition(pl, i);
890 if (!pp)
891 return errno_or_else(EIO);
892
893 r = wait_for_partition_device(d, pp, deadline, diskseq, uevent_seqnum_not_before, timestamp_not_before, flags, &q);
894 if (r < 0)
895 return r;
896
897 r = sd_device_get_devname(q, &node);
898 if (r < 0)
899 return r;
900
901 pflags = blkid_partition_get_flags(pp);
902
903 errno = 0;
904 nr = blkid_partition_get_partno(pp);
905 if (nr < 0)
906 return errno_or_else(EIO);
907
908 errno = 0;
909 start = blkid_partition_get_start(pp);
910 if (start < 0)
911 return errno_or_else(EIO);
912
913 assert((uint64_t) start < UINT64_MAX/512);
914
915 errno = 0;
916 size = blkid_partition_get_size(pp);
917 if (size < 0)
918 return errno_or_else(EIO);
919
920 assert((uint64_t) size < UINT64_MAX/512);
921
922 if (is_gpt) {
923 PartitionDesignator designator = _PARTITION_DESIGNATOR_INVALID;
924 int architecture = _ARCHITECTURE_INVALID;
925 const char *stype, *sid, *fstype = NULL, *label;
926 sd_id128_t type_id, id;
927 bool rw = true, growfs = false;
928
929 sid = blkid_partition_get_uuid(pp);
930 if (!sid)
931 continue;
932 if (sd_id128_from_string(sid, &id) < 0)
933 continue;
934
935 stype = blkid_partition_get_type_string(pp);
936 if (!stype)
937 continue;
938 if (sd_id128_from_string(stype, &type_id) < 0)
939 continue;
940
941 label = blkid_partition_get_name(pp); /* libblkid returns NULL here if empty */
942
943 if (sd_id128_equal(type_id, GPT_HOME)) {
944
945 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY|GPT_FLAG_GROWFS);
946
947 if (pflags & GPT_FLAG_NO_AUTO)
948 continue;
949
950 designator = PARTITION_HOME;
951 rw = !(pflags & GPT_FLAG_READ_ONLY);
952 growfs = FLAGS_SET(pflags, GPT_FLAG_GROWFS);
953
954 } else if (sd_id128_equal(type_id, GPT_SRV)) {
955
956 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY|GPT_FLAG_GROWFS);
957
958 if (pflags & GPT_FLAG_NO_AUTO)
959 continue;
960
961 designator = PARTITION_SRV;
962 rw = !(pflags & GPT_FLAG_READ_ONLY);
963 growfs = FLAGS_SET(pflags, GPT_FLAG_GROWFS);
964
965 } else if (sd_id128_equal(type_id, GPT_ESP)) {
966
967 /* Note that we don't check the GPT_FLAG_NO_AUTO flag for the ESP, as it is
968 * not defined there. We instead check the GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as
969 * recommended by the UEFI spec (See "12.3.3 Number and Location of System
970 * Partitions"). */
971
972 if (pflags & GPT_FLAG_NO_BLOCK_IO_PROTOCOL)
973 continue;
974
975 designator = PARTITION_ESP;
976 fstype = "vfat";
977
978 } else if (sd_id128_equal(type_id, GPT_XBOOTLDR)) {
979
980 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY|GPT_FLAG_GROWFS);
981
982 if (pflags & GPT_FLAG_NO_AUTO)
983 continue;
984
985 designator = PARTITION_XBOOTLDR;
986 rw = !(pflags & GPT_FLAG_READ_ONLY);
987 growfs = FLAGS_SET(pflags, GPT_FLAG_GROWFS);
988 }
989 #ifdef GPT_ROOT_NATIVE
990 else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
991
992 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY|GPT_FLAG_GROWFS);
993
994 if (pflags & GPT_FLAG_NO_AUTO)
995 continue;
996
997 /* If a root ID is specified, ignore everything but the root id */
998 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
999 continue;
1000
1001 designator = PARTITION_ROOT;
1002 architecture = native_architecture();
1003 rw = !(pflags & GPT_FLAG_READ_ONLY);
1004 growfs = FLAGS_SET(pflags, GPT_FLAG_GROWFS);
1005
1006 } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) {
1007
1008 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
1009
1010 if (pflags & GPT_FLAG_NO_AUTO)
1011 continue;
1012
1013 m->has_verity = true;
1014
1015 /* If no verity configuration is specified, then don't do verity */
1016 if (!verity)
1017 continue;
1018 if (verity->designator >= 0 && verity->designator != PARTITION_ROOT)
1019 continue;
1020
1021 /* If root hash is specified, then ignore everything but the root id */
1022 if (!sd_id128_is_null(root_verity_uuid) && !sd_id128_equal(root_verity_uuid, id))
1023 continue;
1024
1025 designator = PARTITION_ROOT_VERITY;
1026 fstype = "DM_verity_hash";
1027 architecture = native_architecture();
1028 rw = false;
1029
1030 } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY_SIG)) {
1031
1032 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
1033
1034 if (pflags & GPT_FLAG_NO_AUTO)
1035 continue;
1036
1037 m->has_verity_sig = true;
1038
1039 /* If root hash is specified explicitly, then ignore any embedded signature */
1040 if (!verity)
1041 continue;
1042 if (verity->designator >= 0 && verity->designator != PARTITION_ROOT)
1043 continue;
1044 if (verity->root_hash)
1045 continue;
1046
1047 designator = PARTITION_ROOT_VERITY_SIG;
1048 fstype = "verity_hash_signature";
1049 architecture = native_architecture();
1050 rw = false;
1051 }
1052 #endif
1053 #ifdef GPT_ROOT_SECONDARY
1054 else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
1055
1056 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY|GPT_FLAG_GROWFS);
1057
1058 if (pflags & GPT_FLAG_NO_AUTO)
1059 continue;
1060
1061 /* If a root ID is specified, ignore everything but the root id */
1062 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
1063 continue;
1064
1065 designator = PARTITION_ROOT_SECONDARY;
1066 architecture = SECONDARY_ARCHITECTURE;
1067 rw = !(pflags & GPT_FLAG_READ_ONLY);
1068 growfs = FLAGS_SET(pflags, GPT_FLAG_GROWFS);
1069
1070 } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) {
1071
1072 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
1073
1074 if (pflags & GPT_FLAG_NO_AUTO)
1075 continue;
1076
1077 m->has_verity = true;
1078
1079 /* Don't do verity if no verity config is passed in */
1080 if (!verity)
1081 continue;
1082 if (verity->designator >= 0 && verity->designator != PARTITION_ROOT)
1083 continue;
1084
1085 /* If root hash is specified, then ignore everything but the root id */
1086 if (!sd_id128_is_null(root_verity_uuid) && !sd_id128_equal(root_verity_uuid, id))
1087 continue;
1088
1089 designator = PARTITION_ROOT_SECONDARY_VERITY;
1090 fstype = "DM_verity_hash";
1091 architecture = SECONDARY_ARCHITECTURE;
1092 rw = false;
1093
1094 } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY_SIG)) {
1095
1096 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
1097
1098 if (pflags & GPT_FLAG_NO_AUTO)
1099 continue;
1100
1101 m->has_verity_sig = true;
1102
1103 /* If root hash is specified explicitly, then ignore any embedded signature */
1104 if (!verity)
1105 continue;
1106 if (verity->designator >= 0 && verity->designator != PARTITION_ROOT)
1107 continue;
1108 if (verity->root_hash)
1109 continue;
1110
1111 designator = PARTITION_ROOT_SECONDARY_VERITY_SIG;
1112 fstype = "verity_hash_signature";
1113 architecture = native_architecture();
1114 rw = false;
1115 }
1116 #endif
1117 #ifdef GPT_USR_NATIVE
1118 else if (sd_id128_equal(type_id, GPT_USR_NATIVE)) {
1119
1120 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY|GPT_FLAG_GROWFS);
1121
1122 if (pflags & GPT_FLAG_NO_AUTO)
1123 continue;
1124
1125 /* If a usr ID is specified, ignore everything but the usr id */
1126 if (!sd_id128_is_null(usr_uuid) && !sd_id128_equal(usr_uuid, id))
1127 continue;
1128
1129 designator = PARTITION_USR;
1130 architecture = native_architecture();
1131 rw = !(pflags & GPT_FLAG_READ_ONLY);
1132 growfs = FLAGS_SET(pflags, GPT_FLAG_GROWFS);
1133
1134 } else if (sd_id128_equal(type_id, GPT_USR_NATIVE_VERITY)) {
1135
1136 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
1137
1138 if (pflags & GPT_FLAG_NO_AUTO)
1139 continue;
1140
1141 m->has_verity = true;
1142
1143 if (!verity)
1144 continue;
1145 if (verity->designator >= 0 && verity->designator != PARTITION_USR)
1146 continue;
1147
1148 /* If usr hash is specified, then ignore everything but the usr id */
1149 if (!sd_id128_is_null(usr_verity_uuid) && !sd_id128_equal(usr_verity_uuid, id))
1150 continue;
1151
1152 designator = PARTITION_USR_VERITY;
1153 fstype = "DM_verity_hash";
1154 architecture = native_architecture();
1155 rw = false;
1156
1157 } else if (sd_id128_equal(type_id, GPT_USR_NATIVE_VERITY_SIG)) {
1158
1159 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
1160
1161 if (pflags & GPT_FLAG_NO_AUTO)
1162 continue;
1163
1164 m->has_verity_sig = true;
1165
1166 /* If usr hash is specified explicitly, then ignore any embedded signature */
1167 if (!verity)
1168 continue;
1169 if (verity->designator >= 0 && verity->designator != PARTITION_USR)
1170 continue;
1171 if (verity->root_hash)
1172 continue;
1173
1174 designator = PARTITION_USR_VERITY_SIG;
1175 fstype = "verity_hash_signature";
1176 architecture = native_architecture();
1177 rw = false;
1178 }
1179 #endif
1180 #ifdef GPT_USR_SECONDARY
1181 else if (sd_id128_equal(type_id, GPT_USR_SECONDARY)) {
1182
1183 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY|GPT_FLAG_GROWFS);
1184
1185 if (pflags & GPT_FLAG_NO_AUTO)
1186 continue;
1187
1188 /* If a usr ID is specified, ignore everything but the usr id */
1189 if (!sd_id128_is_null(usr_uuid) && !sd_id128_equal(usr_uuid, id))
1190 continue;
1191
1192 designator = PARTITION_USR_SECONDARY;
1193 architecture = SECONDARY_ARCHITECTURE;
1194 rw = !(pflags & GPT_FLAG_READ_ONLY);
1195 growfs = FLAGS_SET(pflags, GPT_FLAG_GROWFS);
1196
1197 } else if (sd_id128_equal(type_id, GPT_USR_SECONDARY_VERITY)) {
1198
1199 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
1200
1201 if (pflags & GPT_FLAG_NO_AUTO)
1202 continue;
1203
1204 m->has_verity = true;
1205
1206 if (!verity)
1207 continue;
1208 if (verity->designator >= 0 && verity->designator != PARTITION_USR)
1209 continue;
1210
1211 /* If usr hash is specified, then ignore everything but the root id */
1212 if (!sd_id128_is_null(usr_verity_uuid) && !sd_id128_equal(usr_verity_uuid, id))
1213 continue;
1214
1215 designator = PARTITION_USR_SECONDARY_VERITY;
1216 fstype = "DM_verity_hash";
1217 architecture = SECONDARY_ARCHITECTURE;
1218 rw = false;
1219
1220 } else if (sd_id128_equal(type_id, GPT_USR_SECONDARY_VERITY_SIG)) {
1221
1222 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
1223
1224 if (pflags & GPT_FLAG_NO_AUTO)
1225 continue;
1226
1227 m->has_verity_sig = true;
1228
1229 /* If usr hash is specified explicitly, then ignore any embedded signature */
1230 if (!verity)
1231 continue;
1232 if (verity->designator >= 0 && verity->designator != PARTITION_USR)
1233 continue;
1234 if (verity->root_hash)
1235 continue;
1236
1237 designator = PARTITION_USR_SECONDARY_VERITY_SIG;
1238 fstype = "verity_hash_signature";
1239 architecture = native_architecture();
1240 rw = false;
1241 }
1242 #endif
1243 else if (sd_id128_equal(type_id, GPT_SWAP)) {
1244
1245 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO);
1246
1247 if (pflags & GPT_FLAG_NO_AUTO)
1248 continue;
1249
1250 designator = PARTITION_SWAP;
1251
1252 } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) {
1253
1254 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY|GPT_FLAG_GROWFS);
1255
1256 if (pflags & GPT_FLAG_NO_AUTO)
1257 continue;
1258
1259 if (generic_node)
1260 multiple_generic = true;
1261 else {
1262 generic_nr = nr;
1263 generic_rw = !(pflags & GPT_FLAG_READ_ONLY);
1264 generic_growfs = FLAGS_SET(pflags, GPT_FLAG_GROWFS);
1265 generic_uuid = id;
1266 generic_node = strdup(node);
1267 if (!generic_node)
1268 return -ENOMEM;
1269 }
1270
1271 } else if (sd_id128_equal(type_id, GPT_TMP)) {
1272
1273 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY|GPT_FLAG_GROWFS);
1274
1275 if (pflags & GPT_FLAG_NO_AUTO)
1276 continue;
1277
1278 designator = PARTITION_TMP;
1279 rw = !(pflags & GPT_FLAG_READ_ONLY);
1280 growfs = FLAGS_SET(pflags, GPT_FLAG_GROWFS);
1281
1282 } else if (sd_id128_equal(type_id, GPT_VAR)) {
1283
1284 check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY|GPT_FLAG_GROWFS);
1285
1286 if (pflags & GPT_FLAG_NO_AUTO)
1287 continue;
1288
1289 if (!FLAGS_SET(flags, DISSECT_IMAGE_RELAX_VAR_CHECK)) {
1290 sd_id128_t var_uuid;
1291
1292 /* For /var we insist that the uuid of the partition matches the
1293 * HMAC-SHA256 of the /var GPT partition type uuid, keyed by machine
1294 * ID. Why? Unlike the other partitions /var is inherently
1295 * installation specific, hence we need to be careful not to mount it
1296 * in the wrong installation. By hashing the partition UUID from
1297 * /etc/machine-id we can securely bind the partition to the
1298 * installation. */
1299
1300 r = sd_id128_get_machine_app_specific(GPT_VAR, &var_uuid);
1301 if (r < 0)
1302 return r;
1303
1304 if (!sd_id128_equal(var_uuid, id)) {
1305 log_debug("Found a /var/ partition, but its UUID didn't match our expectations, ignoring.");
1306 continue;
1307 }
1308 }
1309
1310 designator = PARTITION_VAR;
1311 rw = !(pflags & GPT_FLAG_READ_ONLY);
1312 growfs = FLAGS_SET(pflags, GPT_FLAG_GROWFS);
1313 }
1314
1315 if (designator != _PARTITION_DESIGNATOR_INVALID) {
1316 _cleanup_free_ char *t = NULL, *n = NULL, *o = NULL, *l = NULL;
1317 const char *options = NULL;
1318
1319 if (m->partitions[designator].found) {
1320 /* For most partition types the first one we see wins. Except for the
1321 * rootfs and /usr, where we do a version compare of the label, and
1322 * let the newest version win. This permits a simple A/B versioning
1323 * scheme in OS images. */
1324
1325 if (!PARTITION_DESIGNATOR_VERSIONED(designator) ||
1326 strverscmp_improved(m->partitions[designator].label, label) >= 0)
1327 continue;
1328
1329 dissected_partition_done(m->partitions + designator);
1330 }
1331
1332 if (fstype) {
1333 t = strdup(fstype);
1334 if (!t)
1335 return -ENOMEM;
1336 }
1337
1338 n = strdup(node);
1339 if (!n)
1340 return -ENOMEM;
1341
1342 if (label) {
1343 l = strdup(label);
1344 if (!l)
1345 return -ENOMEM;
1346 }
1347
1348 options = mount_options_from_designator(mount_options, designator);
1349 if (options) {
1350 o = strdup(options);
1351 if (!o)
1352 return -ENOMEM;
1353 }
1354
1355 m->partitions[designator] = (DissectedPartition) {
1356 .found = true,
1357 .partno = nr,
1358 .rw = rw,
1359 .growfs = growfs,
1360 .architecture = architecture,
1361 .node = TAKE_PTR(n),
1362 .fstype = TAKE_PTR(t),
1363 .label = TAKE_PTR(l),
1364 .uuid = id,
1365 .mount_options = TAKE_PTR(o),
1366 .offset = (uint64_t) start * 512,
1367 .size = (uint64_t) size * 512,
1368 };
1369 }
1370
1371 } else if (is_mbr) {
1372
1373 switch (blkid_partition_get_type(pp)) {
1374
1375 case 0x83: /* Linux partition */
1376
1377 if (pflags != 0x80) /* Bootable flag */
1378 continue;
1379
1380 if (generic_node)
1381 multiple_generic = true;
1382 else {
1383 generic_nr = nr;
1384 generic_rw = true;
1385 generic_growfs = false;
1386 generic_node = strdup(node);
1387 if (!generic_node)
1388 return -ENOMEM;
1389 }
1390
1391 break;
1392
1393 case 0xEA: { /* Boot Loader Spec extended $BOOT partition */
1394 _cleanup_free_ char *n = NULL, *o = NULL;
1395 sd_id128_t id = SD_ID128_NULL;
1396 const char *sid, *options = NULL;
1397
1398 /* First one wins */
1399 if (m->partitions[PARTITION_XBOOTLDR].found)
1400 continue;
1401
1402 sid = blkid_partition_get_uuid(pp);
1403 if (sid)
1404 (void) sd_id128_from_string(sid, &id);
1405
1406 n = strdup(node);
1407 if (!n)
1408 return -ENOMEM;
1409
1410 options = mount_options_from_designator(mount_options, PARTITION_XBOOTLDR);
1411 if (options) {
1412 o = strdup(options);
1413 if (!o)
1414 return -ENOMEM;
1415 }
1416
1417 m->partitions[PARTITION_XBOOTLDR] = (DissectedPartition) {
1418 .found = true,
1419 .partno = nr,
1420 .rw = true,
1421 .growfs = false,
1422 .architecture = _ARCHITECTURE_INVALID,
1423 .node = TAKE_PTR(n),
1424 .uuid = id,
1425 .mount_options = TAKE_PTR(o),
1426 .offset = (uint64_t) start * 512,
1427 .size = (uint64_t) size * 512,
1428 };
1429
1430 break;
1431 }}
1432 }
1433 }
1434
1435 if (m->partitions[PARTITION_ROOT].found) {
1436 /* If we found the primary arch, then invalidate the secondary arch to avoid any ambiguities,
1437 * since we never want to mount the secondary arch in this case. */
1438 m->partitions[PARTITION_ROOT_SECONDARY].found = false;
1439 m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found = false;
1440 m->partitions[PARTITION_ROOT_SECONDARY_VERITY_SIG].found = false;
1441 m->partitions[PARTITION_USR_SECONDARY].found = false;
1442 m->partitions[PARTITION_USR_SECONDARY_VERITY].found = false;
1443 m->partitions[PARTITION_USR_SECONDARY_VERITY_SIG].found = false;
1444
1445 } else if (m->partitions[PARTITION_ROOT_VERITY].found ||
1446 m->partitions[PARTITION_ROOT_VERITY_SIG].found)
1447 return -EADDRNOTAVAIL; /* Verity found but no matching rootfs? Something is off, refuse. */
1448
1449 else if (m->partitions[PARTITION_ROOT_SECONDARY].found) {
1450
1451 /* No root partition found but there's one for the secondary architecture? Then upgrade
1452 * secondary arch to first */
1453
1454 m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY];
1455 zero(m->partitions[PARTITION_ROOT_SECONDARY]);
1456 m->partitions[PARTITION_ROOT_VERITY] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY];
1457 zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY]);
1458 m->partitions[PARTITION_ROOT_VERITY_SIG] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY_SIG];
1459 zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY_SIG]);
1460
1461 m->partitions[PARTITION_USR] = m->partitions[PARTITION_USR_SECONDARY];
1462 zero(m->partitions[PARTITION_USR_SECONDARY]);
1463 m->partitions[PARTITION_USR_VERITY] = m->partitions[PARTITION_USR_SECONDARY_VERITY];
1464 zero(m->partitions[PARTITION_USR_SECONDARY_VERITY]);
1465 m->partitions[PARTITION_USR_VERITY_SIG] = m->partitions[PARTITION_USR_SECONDARY_VERITY_SIG];
1466 zero(m->partitions[PARTITION_USR_SECONDARY_VERITY_SIG]);
1467
1468 } else if (m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found ||
1469 m->partitions[PARTITION_ROOT_SECONDARY_VERITY_SIG].found)
1470 return -EADDRNOTAVAIL; /* as above */
1471
1472 /* Hmm, we found a signature partition but no Verity data? Something is off. */
1473 if (m->partitions[PARTITION_ROOT_VERITY_SIG].found && !m->partitions[PARTITION_ROOT_VERITY].found)
1474 return -EADDRNOTAVAIL;
1475
1476 if (m->partitions[PARTITION_USR].found) {
1477 /* Invalidate secondary arch /usr/ if we found the primary arch */
1478 m->partitions[PARTITION_USR_SECONDARY].found = false;
1479 m->partitions[PARTITION_USR_SECONDARY_VERITY].found = false;
1480 m->partitions[PARTITION_USR_SECONDARY_VERITY_SIG].found = false;
1481
1482 } else if (m->partitions[PARTITION_USR_VERITY].found ||
1483 m->partitions[PARTITION_USR_VERITY_SIG].found)
1484 return -EADDRNOTAVAIL; /* as above */
1485
1486 else if (m->partitions[PARTITION_USR_SECONDARY].found) {
1487
1488 /* Upgrade secondary arch to primary */
1489 m->partitions[PARTITION_USR] = m->partitions[PARTITION_USR_SECONDARY];
1490 zero(m->partitions[PARTITION_USR_SECONDARY]);
1491 m->partitions[PARTITION_USR_VERITY] = m->partitions[PARTITION_USR_SECONDARY_VERITY];
1492 zero(m->partitions[PARTITION_USR_SECONDARY_VERITY]);
1493 m->partitions[PARTITION_USR_VERITY_SIG] = m->partitions[PARTITION_USR_SECONDARY_VERITY_SIG];
1494 zero(m->partitions[PARTITION_USR_SECONDARY_VERITY_SIG]);
1495
1496 } else if (m->partitions[PARTITION_USR_SECONDARY_VERITY].found ||
1497 m->partitions[PARTITION_USR_SECONDARY_VERITY_SIG].found)
1498 return -EADDRNOTAVAIL; /* as above */
1499
1500 /* Hmm, we found a signature partition but no Verity data? Something is off. */
1501 if (m->partitions[PARTITION_USR_VERITY_SIG].found && !m->partitions[PARTITION_USR_VERITY].found)
1502 return -EADDRNOTAVAIL;
1503
1504 /* If root and /usr are combined then insist that the architecture matches */
1505 if (m->partitions[PARTITION_ROOT].found &&
1506 m->partitions[PARTITION_USR].found &&
1507 (m->partitions[PARTITION_ROOT].architecture >= 0 &&
1508 m->partitions[PARTITION_USR].architecture >= 0 &&
1509 m->partitions[PARTITION_ROOT].architecture != m->partitions[PARTITION_USR].architecture))
1510 return -EADDRNOTAVAIL;
1511
1512 if (!m->partitions[PARTITION_ROOT].found &&
1513 !m->partitions[PARTITION_USR].found &&
1514 (flags & DISSECT_IMAGE_GENERIC_ROOT) &&
1515 (!verity || !verity->root_hash || verity->designator != PARTITION_USR)) {
1516
1517 /* OK, we found nothing usable, then check if there's a single generic one distro, and use
1518 * that. If the root hash was set however, then we won't fall back to a generic node, because
1519 * the root hash decides. */
1520
1521 /* If we didn't find a properly marked root partition, but we did find a single suitable
1522 * generic Linux partition, then use this as root partition, if the caller asked for it. */
1523 if (multiple_generic)
1524 return -ENOTUNIQ;
1525
1526 /* If we didn't find a generic node, then we can't fix this up either */
1527 if (generic_node) {
1528 _cleanup_free_ char *o = NULL;
1529 const char *options;
1530
1531 options = mount_options_from_designator(mount_options, PARTITION_ROOT);
1532 if (options) {
1533 o = strdup(options);
1534 if (!o)
1535 return -ENOMEM;
1536 }
1537
1538 assert(generic_nr >= 0);
1539 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
1540 .found = true,
1541 .rw = generic_rw,
1542 .growfs = generic_growfs,
1543 .partno = generic_nr,
1544 .architecture = _ARCHITECTURE_INVALID,
1545 .node = TAKE_PTR(generic_node),
1546 .uuid = generic_uuid,
1547 .mount_options = TAKE_PTR(o),
1548 .offset = UINT64_MAX,
1549 .size = UINT64_MAX,
1550 };
1551 }
1552 }
1553
1554 /* Check if we have a root fs if we are told to do check. /usr alone is fine too, but only if appropriate flag for that is set too */
1555 if (FLAGS_SET(flags, DISSECT_IMAGE_REQUIRE_ROOT) &&
1556 !(m->partitions[PARTITION_ROOT].found || (m->partitions[PARTITION_USR].found && FLAGS_SET(flags, DISSECT_IMAGE_USR_NO_ROOT))))
1557 return -ENXIO;
1558
1559 if (m->partitions[PARTITION_ROOT_VERITY].found) {
1560 /* We only support one verity partition per image, i.e. can't do for both /usr and root fs */
1561 if (m->partitions[PARTITION_USR_VERITY].found)
1562 return -ENOTUNIQ;
1563
1564 /* We don't support verity enabled root with a split out /usr. Neither with nor without
1565 * verity there. (Note that we do support verity-less root with verity-full /usr, though.) */
1566 if (m->partitions[PARTITION_USR].found)
1567 return -EADDRNOTAVAIL;
1568 }
1569
1570 if (verity) {
1571 /* If a verity designator is specified, then insist that the matching partition exists */
1572 if (verity->designator >= 0 && !m->partitions[verity->designator].found)
1573 return -EADDRNOTAVAIL;
1574
1575 if (verity->root_hash) {
1576 /* If we have an explicit root hash and found the partitions for it, then we are ready to use
1577 * Verity, set things up for it */
1578
1579 if (verity->designator < 0 || verity->designator == PARTITION_ROOT) {
1580 if (!m->partitions[PARTITION_ROOT_VERITY].found || !m->partitions[PARTITION_ROOT].found)
1581 return -EADDRNOTAVAIL;
1582
1583 /* If we found a verity setup, then the root partition is necessarily read-only. */
1584 m->partitions[PARTITION_ROOT].rw = false;
1585 m->verity_ready = true;
1586
1587 } else {
1588 assert(verity->designator == PARTITION_USR);
1589
1590 if (!m->partitions[PARTITION_USR_VERITY].found || !m->partitions[PARTITION_USR].found)
1591 return -EADDRNOTAVAIL;
1592
1593 m->partitions[PARTITION_USR].rw = false;
1594 m->verity_ready = true;
1595 }
1596
1597 if (m->verity_ready)
1598 m->verity_sig_ready = !!verity->root_hash_sig;
1599
1600 } else if (m->partitions[verity->designator == PARTITION_USR ? PARTITION_USR_VERITY_SIG : PARTITION_ROOT_VERITY_SIG].found) {
1601
1602 /* If we found an embedded signature partition, we are ready, too. */
1603
1604 m->verity_ready = m->verity_sig_ready = true;
1605 m->partitions[verity->designator == PARTITION_USR ? PARTITION_USR : PARTITION_ROOT].rw = false;
1606 }
1607 }
1608
1609 blkid_free_probe(b);
1610 b = NULL;
1611
1612 /* Fill in file system types if we don't know them yet. */
1613 for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
1614 DissectedPartition *p = m->partitions + i;
1615
1616 if (!p->found)
1617 continue;
1618
1619 if (!p->fstype && p->node) {
1620 r = probe_filesystem(p->node, &p->fstype);
1621 if (r < 0 && r != -EUCLEAN)
1622 return r;
1623 }
1624
1625 if (streq_ptr(p->fstype, "crypto_LUKS"))
1626 m->encrypted = true;
1627
1628 if (p->fstype && fstype_is_ro(p->fstype))
1629 p->rw = false;
1630
1631 if (!p->rw)
1632 p->growfs = false;
1633 }
1634
1635 *ret = TAKE_PTR(m);
1636 return 0;
1637 #else
1638 return -EOPNOTSUPP;
1639 #endif
1640 }
1641
1642 DissectedImage* dissected_image_unref(DissectedImage *m) {
1643 if (!m)
1644 return NULL;
1645
1646 for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++)
1647 dissected_partition_done(m->partitions + i);
1648
1649 free(m->image_name);
1650 free(m->hostname);
1651 strv_free(m->machine_info);
1652 strv_free(m->os_release);
1653 strv_free(m->extension_release);
1654
1655 return mfree(m);
1656 }
1657
1658 static int is_loop_device(const char *path) {
1659 char s[SYS_BLOCK_PATH_MAX("/../loop/")];
1660 struct stat st;
1661
1662 assert(path);
1663
1664 if (stat(path, &st) < 0)
1665 return -errno;
1666
1667 if (!S_ISBLK(st.st_mode))
1668 return -ENOTBLK;
1669
1670 xsprintf_sys_block_path(s, "/loop/", st.st_dev);
1671 if (access(s, F_OK) < 0) {
1672 if (errno != ENOENT)
1673 return -errno;
1674
1675 /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */
1676 xsprintf_sys_block_path(s, "/../loop/", st.st_dev);
1677 if (access(s, F_OK) < 0)
1678 return errno == ENOENT ? false : -errno;
1679 }
1680
1681 return true;
1682 }
1683
1684 static int run_fsck(const char *node, const char *fstype) {
1685 int r, exit_status;
1686 pid_t pid;
1687
1688 assert(node);
1689 assert(fstype);
1690
1691 r = fsck_exists(fstype);
1692 if (r < 0) {
1693 log_debug_errno(r, "Couldn't determine whether fsck for %s exists, proceeding anyway.", fstype);
1694 return 0;
1695 }
1696 if (r == 0) {
1697 log_debug("Not checking partition %s, as fsck for %s does not exist.", node, fstype);
1698 return 0;
1699 }
1700
1701 r = safe_fork("(fsck)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_NULL_STDIO, &pid);
1702 if (r < 0)
1703 return log_debug_errno(r, "Failed to fork off fsck: %m");
1704 if (r == 0) {
1705 /* Child */
1706 execl("/sbin/fsck", "/sbin/fsck", "-aT", node, NULL);
1707 log_open();
1708 log_debug_errno(errno, "Failed to execl() fsck: %m");
1709 _exit(FSCK_OPERATIONAL_ERROR);
1710 }
1711
1712 exit_status = wait_for_terminate_and_check("fsck", pid, 0);
1713 if (exit_status < 0)
1714 return log_debug_errno(exit_status, "Failed to fork off /sbin/fsck: %m");
1715
1716 if ((exit_status & ~FSCK_ERROR_CORRECTED) != FSCK_SUCCESS) {
1717 log_debug("fsck failed with exit status %i.", exit_status);
1718
1719 if ((exit_status & (FSCK_SYSTEM_SHOULD_REBOOT|FSCK_ERRORS_LEFT_UNCORRECTED)) != 0)
1720 return log_debug_errno(SYNTHETIC_ERRNO(EUCLEAN), "File system is corrupted, refusing.");
1721
1722 log_debug("Ignoring fsck error.");
1723 }
1724
1725 return 0;
1726 }
1727
1728 static int fs_grow(const char *node_path, const char *mount_path) {
1729 _cleanup_close_ int mount_fd = -1, node_fd = -1;
1730 uint64_t size, newsize;
1731 int r;
1732
1733 node_fd = open(node_path, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
1734 if (node_fd < 0)
1735 return log_debug_errno(errno, "Failed to open node device %s: %m", node_path);
1736
1737 if (ioctl(node_fd, BLKGETSIZE64, &size) != 0)
1738 return log_debug_errno(errno, "Failed to get block device size of %s: %m", node_path);
1739
1740 mount_fd = open(mount_path, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
1741 if (mount_fd < 0)
1742 return log_debug_errno(errno, "Failed to open mountd file system %s: %m", mount_path);
1743
1744 log_debug("Resizing \"%s\" to %"PRIu64" bytes...", mount_path, size);
1745 r = resize_fs(mount_fd, size, &newsize);
1746 if (r < 0)
1747 return log_debug_errno(r, "Failed to resize \"%s\" to %"PRIu64" bytes: %m", mount_path, size);
1748
1749 if (newsize == size)
1750 log_debug("Successfully resized \"%s\" to %s bytes.",
1751 mount_path, FORMAT_BYTES(newsize));
1752 else {
1753 assert(newsize < size);
1754 log_debug("Successfully resized \"%s\" to %s bytes (%"PRIu64" bytes lost due to blocksize).",
1755 mount_path, FORMAT_BYTES(newsize), size - newsize);
1756 }
1757
1758 return 0;
1759 }
1760
1761 static int mount_partition(
1762 DissectedPartition *m,
1763 const char *where,
1764 const char *directory,
1765 uid_t uid_shift,
1766 uid_t uid_range,
1767 DissectImageFlags flags) {
1768
1769 _cleanup_free_ char *chased = NULL, *options = NULL;
1770 const char *p, *node, *fstype;
1771 bool rw, remap_uid_gid = false;
1772 int r;
1773
1774 assert(m);
1775 assert(where);
1776
1777 /* Use decrypted node and matching fstype if available, otherwise use the original device */
1778 node = m->decrypted_node ?: m->node;
1779 fstype = m->decrypted_node ? m->decrypted_fstype: m->fstype;
1780
1781 if (!m->found || !node)
1782 return 0;
1783 if (!fstype)
1784 return -EAFNOSUPPORT;
1785
1786 /* We are looking at an encrypted partition? This either means stacked encryption, or the caller didn't call dissected_image_decrypt() beforehand. Let's return a recognizable error for this case. */
1787 if (streq(fstype, "crypto_LUKS"))
1788 return -EUNATCH;
1789
1790 rw = m->rw && !(flags & DISSECT_IMAGE_MOUNT_READ_ONLY);
1791
1792 if (FLAGS_SET(flags, DISSECT_IMAGE_FSCK) && rw) {
1793 r = run_fsck(node, fstype);
1794 if (r < 0)
1795 return r;
1796 }
1797
1798 if (directory) {
1799 /* Automatically create missing mount points inside the image, if necessary. */
1800 r = mkdir_p_root(where, directory, uid_shift, (gid_t) uid_shift, 0755);
1801 if (r < 0 && r != -EROFS)
1802 return r;
1803
1804 r = chase_symlinks(directory, where, CHASE_PREFIX_ROOT, &chased, NULL);
1805 if (r < 0)
1806 return r;
1807
1808 p = chased;
1809 } else {
1810 /* Create top-level mount if missing – but only if this is asked for. This won't modify the
1811 * image (as the branch above does) but the host hierarchy, and the created directory might
1812 * survive our mount in the host hierarchy hence. */
1813 if (FLAGS_SET(flags, DISSECT_IMAGE_MKDIR)) {
1814 r = mkdir_p(where, 0755);
1815 if (r < 0)
1816 return r;
1817 }
1818
1819 p = where;
1820 }
1821
1822 /* If requested, turn on discard support. */
1823 if (fstype_can_discard(fstype) &&
1824 ((flags & DISSECT_IMAGE_DISCARD) ||
1825 ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node) > 0))) {
1826 options = strdup("discard");
1827 if (!options)
1828 return -ENOMEM;
1829 }
1830
1831 if (uid_is_valid(uid_shift) && uid_shift != 0) {
1832
1833 if (fstype_can_uid_gid(fstype)) {
1834 _cleanup_free_ char *uid_option = NULL;
1835
1836 if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
1837 return -ENOMEM;
1838
1839 if (!strextend_with_separator(&options, ",", uid_option))
1840 return -ENOMEM;
1841 } else if (FLAGS_SET(flags, DISSECT_IMAGE_MOUNT_IDMAPPED))
1842 remap_uid_gid = true;
1843 }
1844
1845 if (!isempty(m->mount_options))
1846 if (!strextend_with_separator(&options, ",", m->mount_options))
1847 return -ENOMEM;
1848
1849 /* So, when you request MS_RDONLY from ext4, then this means nothing. It happily still writes to the
1850 * backing storage. What's worse, the BLKRO[GS]ET flag and (in case of loopback devices)
1851 * LO_FLAGS_READ_ONLY don't mean anything, they affect userspace accesses only, and write accesses
1852 * from the upper file system still get propagated through to the underlying file system,
1853 * unrestricted. To actually get ext4/xfs/btrfs to stop writing to the device we need to specify
1854 * "norecovery" as mount option, in addition to MS_RDONLY. Yes, this sucks, since it means we need to
1855 * carry a per file system table here.
1856 *
1857 * Note that this means that we might not be able to mount corrupted file systems as read-only
1858 * anymore (since in some cases the kernel implementations will refuse mounting when corrupted,
1859 * read-only and "norecovery" is specified). But I think for the case of automatically determined
1860 * mount options for loopback devices this is the right choice, since otherwise using the same
1861 * loopback file twice even in read-only mode, is going to fail badly sooner or later. The usecase of
1862 * making reuse of the immutable images "just work" is more relevant to us than having read-only
1863 * access that actually modifies stuff work on such image files. Or to say this differently: if
1864 * people want their file systems to be fixed up they should just open them in writable mode, where
1865 * all these problems don't exist. */
1866 if (!rw && STRPTR_IN_SET(fstype, "ext3", "ext4", "xfs", "btrfs"))
1867 if (!strextend_with_separator(&options, ",", "norecovery"))
1868 return -ENOMEM;
1869
1870 r = mount_nofollow_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
1871 if (r < 0)
1872 return r;
1873
1874 if (rw && m->growfs && FLAGS_SET(flags, DISSECT_IMAGE_GROWFS))
1875 (void) fs_grow(node, p);
1876
1877 if (remap_uid_gid) {
1878 r = remount_idmap(p, uid_shift, uid_range);
1879 if (r < 0)
1880 return r;
1881 }
1882
1883 return 1;
1884 }
1885
1886 static int mount_root_tmpfs(const char *where, uid_t uid_shift, DissectImageFlags flags) {
1887 _cleanup_free_ char *options = NULL;
1888 int r;
1889
1890 assert(where);
1891
1892 /* For images that contain /usr/ but no rootfs, let's mount rootfs as tmpfs */
1893
1894 if (FLAGS_SET(flags, DISSECT_IMAGE_MKDIR)) {
1895 r = mkdir_p(where, 0755);
1896 if (r < 0)
1897 return r;
1898 }
1899
1900 if (uid_is_valid(uid_shift)) {
1901 if (asprintf(&options, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
1902 return -ENOMEM;
1903 }
1904
1905 r = mount_nofollow_verbose(LOG_DEBUG, "rootfs", where, "tmpfs", MS_NODEV, options);
1906 if (r < 0)
1907 return r;
1908
1909 return 1;
1910 }
1911
1912 int dissected_image_mount(
1913 DissectedImage *m,
1914 const char *where,
1915 uid_t uid_shift,
1916 uid_t uid_range,
1917 DissectImageFlags flags) {
1918
1919 int r, xbootldr_mounted;
1920
1921 assert(m);
1922 assert(where);
1923
1924 /* Returns:
1925 *
1926 * -ENXIO → No root partition found
1927 * -EMEDIUMTYPE → DISSECT_IMAGE_VALIDATE_OS set but no os-release/extension-release file found
1928 * -EUNATCH → Encrypted partition found for which no dm-crypt was set up yet
1929 * -EUCLEAN → fsck for file system failed
1930 * -EBUSY → File system already mounted/used elsewhere (kernel)
1931 * -EAFNOSUPPORT → File system type not supported or not known
1932 */
1933
1934 if (!(m->partitions[PARTITION_ROOT].found ||
1935 (m->partitions[PARTITION_USR].found && FLAGS_SET(flags, DISSECT_IMAGE_USR_NO_ROOT))))
1936 return -ENXIO; /* Require a root fs or at least a /usr/ fs (the latter is subject to a flag of its own) */
1937
1938 if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) {
1939
1940 /* First mount the root fs. If there's none we use a tmpfs. */
1941 if (m->partitions[PARTITION_ROOT].found)
1942 r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, uid_range, flags);
1943 else
1944 r = mount_root_tmpfs(where, uid_shift, flags);
1945 if (r < 0)
1946 return r;
1947
1948 /* For us mounting root always means mounting /usr as well */
1949 r = mount_partition(m->partitions + PARTITION_USR, where, "/usr", uid_shift, uid_range, flags);
1950 if (r < 0)
1951 return r;
1952
1953 if ((flags & (DISSECT_IMAGE_VALIDATE_OS|DISSECT_IMAGE_VALIDATE_OS_EXT)) != 0) {
1954 /* If either one of the validation flags are set, ensure that the image qualifies
1955 * as one or the other (or both). */
1956 bool ok = false;
1957
1958 if (FLAGS_SET(flags, DISSECT_IMAGE_VALIDATE_OS)) {
1959 r = path_is_os_tree(where);
1960 if (r < 0)
1961 return r;
1962 if (r > 0)
1963 ok = true;
1964 }
1965 if (!ok && FLAGS_SET(flags, DISSECT_IMAGE_VALIDATE_OS_EXT)) {
1966 r = path_is_extension_tree(where, m->image_name);
1967 if (r < 0)
1968 return r;
1969 if (r > 0)
1970 ok = true;
1971 }
1972
1973 if (!ok)
1974 return -ENOMEDIUM;
1975 }
1976 }
1977
1978 if (flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY)
1979 return 0;
1980
1981 r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, uid_range, flags);
1982 if (r < 0)
1983 return r;
1984
1985 r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, uid_range, flags);
1986 if (r < 0)
1987 return r;
1988
1989 r = mount_partition(m->partitions + PARTITION_VAR, where, "/var", uid_shift, uid_range, flags);
1990 if (r < 0)
1991 return r;
1992
1993 r = mount_partition(m->partitions + PARTITION_TMP, where, "/var/tmp", uid_shift, uid_range, flags);
1994 if (r < 0)
1995 return r;
1996
1997 xbootldr_mounted = mount_partition(m->partitions + PARTITION_XBOOTLDR, where, "/boot", uid_shift, uid_range, flags);
1998 if (xbootldr_mounted < 0)
1999 return xbootldr_mounted;
2000
2001 if (m->partitions[PARTITION_ESP].found) {
2002 int esp_done = false;
2003
2004 /* Mount the ESP to /efi if it exists. If it doesn't exist, use /boot instead, but only if it
2005 * exists and is empty, and we didn't already mount the XBOOTLDR partition into it. */
2006
2007 r = chase_symlinks("/efi", where, CHASE_PREFIX_ROOT, NULL, NULL);
2008 if (r < 0) {
2009 if (r != -ENOENT)
2010 return r;
2011
2012 /* /efi doesn't exist. Let's see if /boot is suitable then */
2013
2014 if (!xbootldr_mounted) {
2015 _cleanup_free_ char *p = NULL;
2016
2017 r = chase_symlinks("/boot", where, CHASE_PREFIX_ROOT, &p, NULL);
2018 if (r < 0) {
2019 if (r != -ENOENT)
2020 return r;
2021 } else if (dir_is_empty(p) > 0) {
2022 /* It exists and is an empty directory. Let's mount the ESP there. */
2023 r = mount_partition(m->partitions + PARTITION_ESP, where, "/boot", uid_shift, uid_range, flags);
2024 if (r < 0)
2025 return r;
2026
2027 esp_done = true;
2028 }
2029 }
2030 }
2031
2032 if (!esp_done) {
2033 /* OK, let's mount the ESP now to /efi (possibly creating the dir if missing) */
2034
2035 r = mount_partition(m->partitions + PARTITION_ESP, where, "/efi", uid_shift, uid_range, flags);
2036 if (r < 0)
2037 return r;
2038 }
2039 }
2040
2041 return 0;
2042 }
2043
2044 int dissected_image_mount_and_warn(
2045 DissectedImage *m,
2046 const char *where,
2047 uid_t uid_shift,
2048 uid_t uid_range,
2049 DissectImageFlags flags) {
2050
2051 int r;
2052
2053 assert(m);
2054 assert(where);
2055
2056 r = dissected_image_mount(m, where, uid_shift, uid_range, flags);
2057 if (r == -ENXIO)
2058 return log_error_errno(r, "Not root file system found in image.");
2059 if (r == -EMEDIUMTYPE)
2060 return log_error_errno(r, "No suitable os-release/extension-release file in image found.");
2061 if (r == -EUNATCH)
2062 return log_error_errno(r, "Encrypted file system discovered, but decryption not requested.");
2063 if (r == -EUCLEAN)
2064 return log_error_errno(r, "File system check on image failed.");
2065 if (r == -EBUSY)
2066 return log_error_errno(r, "File system already mounted elsewhere.");
2067 if (r == -EAFNOSUPPORT)
2068 return log_error_errno(r, "File system type not supported or not known.");
2069 if (r < 0)
2070 return log_error_errno(r, "Failed to mount image: %m");
2071
2072 return r;
2073 }
2074
2075 #if HAVE_LIBCRYPTSETUP
2076 typedef struct DecryptedPartition {
2077 struct crypt_device *device;
2078 char *name;
2079 bool relinquished;
2080 } DecryptedPartition;
2081
2082 struct DecryptedImage {
2083 DecryptedPartition *decrypted;
2084 size_t n_decrypted;
2085 };
2086 #endif
2087
2088 DecryptedImage* decrypted_image_unref(DecryptedImage* d) {
2089 #if HAVE_LIBCRYPTSETUP
2090 int r;
2091
2092 if (!d)
2093 return NULL;
2094
2095 for (size_t i = 0; i < d->n_decrypted; i++) {
2096 DecryptedPartition *p = d->decrypted + i;
2097
2098 if (p->device && p->name && !p->relinquished) {
2099 r = sym_crypt_deactivate_by_name(p->device, p->name, 0);
2100 if (r < 0)
2101 log_debug_errno(r, "Failed to deactivate encrypted partition %s", p->name);
2102 }
2103
2104 if (p->device)
2105 sym_crypt_free(p->device);
2106 free(p->name);
2107 }
2108
2109 free(d->decrypted);
2110 free(d);
2111 #endif
2112 return NULL;
2113 }
2114
2115 #if HAVE_LIBCRYPTSETUP
2116
2117 static int make_dm_name_and_node(const void *original_node, const char *suffix, char **ret_name, char **ret_node) {
2118 _cleanup_free_ char *name = NULL, *node = NULL;
2119 const char *base;
2120
2121 assert(original_node);
2122 assert(suffix);
2123 assert(ret_name);
2124 assert(ret_node);
2125
2126 base = strrchr(original_node, '/');
2127 if (!base)
2128 base = original_node;
2129 else
2130 base++;
2131 if (isempty(base))
2132 return -EINVAL;
2133
2134 name = strjoin(base, suffix);
2135 if (!name)
2136 return -ENOMEM;
2137 if (!filename_is_valid(name))
2138 return -EINVAL;
2139
2140 node = path_join(sym_crypt_get_dir(), name);
2141 if (!node)
2142 return -ENOMEM;
2143
2144 *ret_name = TAKE_PTR(name);
2145 *ret_node = TAKE_PTR(node);
2146
2147 return 0;
2148 }
2149
2150 static int decrypt_partition(
2151 DissectedPartition *m,
2152 const char *passphrase,
2153 DissectImageFlags flags,
2154 DecryptedImage *d) {
2155
2156 _cleanup_free_ char *node = NULL, *name = NULL;
2157 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
2158 int r;
2159
2160 assert(m);
2161 assert(d);
2162
2163 if (!m->found || !m->node || !m->fstype)
2164 return 0;
2165
2166 if (!streq(m->fstype, "crypto_LUKS"))
2167 return 0;
2168
2169 if (!passphrase)
2170 return -ENOKEY;
2171
2172 r = dlopen_cryptsetup();
2173 if (r < 0)
2174 return r;
2175
2176 r = make_dm_name_and_node(m->node, "-decrypted", &name, &node);
2177 if (r < 0)
2178 return r;
2179
2180 if (!GREEDY_REALLOC0(d->decrypted, d->n_decrypted + 1))
2181 return -ENOMEM;
2182
2183 r = sym_crypt_init(&cd, m->node);
2184 if (r < 0)
2185 return log_debug_errno(r, "Failed to initialize dm-crypt: %m");
2186
2187 cryptsetup_enable_logging(cd);
2188
2189 r = sym_crypt_load(cd, CRYPT_LUKS, NULL);
2190 if (r < 0)
2191 return log_debug_errno(r, "Failed to load LUKS metadata: %m");
2192
2193 r = sym_crypt_activate_by_passphrase(cd, name, CRYPT_ANY_SLOT, passphrase, strlen(passphrase),
2194 ((flags & DISSECT_IMAGE_DEVICE_READ_ONLY) ? CRYPT_ACTIVATE_READONLY : 0) |
2195 ((flags & DISSECT_IMAGE_DISCARD_ON_CRYPTO) ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0));
2196 if (r < 0) {
2197 log_debug_errno(r, "Failed to activate LUKS device: %m");
2198 return r == -EPERM ? -EKEYREJECTED : r;
2199 }
2200
2201 d->decrypted[d->n_decrypted++] = (DecryptedPartition) {
2202 .name = TAKE_PTR(name),
2203 .device = TAKE_PTR(cd),
2204 };
2205
2206 m->decrypted_node = TAKE_PTR(node);
2207
2208 return 0;
2209 }
2210
2211 static int verity_can_reuse(
2212 const VeritySettings *verity,
2213 const char *name,
2214 struct crypt_device **ret_cd) {
2215
2216 /* If the same volume was already open, check that the root hashes match, and reuse it if they do */
2217 _cleanup_free_ char *root_hash_existing = NULL;
2218 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
2219 struct crypt_params_verity crypt_params = {};
2220 size_t root_hash_existing_size;
2221 int r;
2222
2223 assert(verity);
2224 assert(name);
2225 assert(ret_cd);
2226
2227 r = sym_crypt_init_by_name(&cd, name);
2228 if (r < 0)
2229 return log_debug_errno(r, "Error opening verity device, crypt_init_by_name failed: %m");
2230
2231 cryptsetup_enable_logging(cd);
2232
2233 r = sym_crypt_get_verity_info(cd, &crypt_params);
2234 if (r < 0)
2235 return log_debug_errno(r, "Error opening verity device, crypt_get_verity_info failed: %m");
2236
2237 root_hash_existing_size = verity->root_hash_size;
2238 root_hash_existing = malloc0(root_hash_existing_size);
2239 if (!root_hash_existing)
2240 return -ENOMEM;
2241
2242 r = sym_crypt_volume_key_get(cd, CRYPT_ANY_SLOT, root_hash_existing, &root_hash_existing_size, NULL, 0);
2243 if (r < 0)
2244 return log_debug_errno(r, "Error opening verity device, crypt_volume_key_get failed: %m");
2245 if (verity->root_hash_size != root_hash_existing_size ||
2246 memcmp(root_hash_existing, verity->root_hash, verity->root_hash_size) != 0)
2247 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Error opening verity device, it already exists but root hashes are different.");
2248
2249 #if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY
2250 /* Ensure that, if signatures are supported, we only reuse the device if the previous mount used the
2251 * same settings, so that a previous unsigned mount will not be reused if the user asks to use
2252 * signing for the new one, and vice versa. */
2253 if (!!verity->root_hash_sig != !!(crypt_params.flags & CRYPT_VERITY_ROOT_HASH_SIGNATURE))
2254 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Error opening verity device, it already exists but signature settings are not the same.");
2255 #endif
2256
2257 *ret_cd = TAKE_PTR(cd);
2258 return 0;
2259 }
2260
2261 static inline char* dm_deferred_remove_clean(char *name) {
2262 if (!name)
2263 return NULL;
2264
2265 (void) sym_crypt_deactivate_by_name(NULL, name, CRYPT_DEACTIVATE_DEFERRED);
2266 return mfree(name);
2267 }
2268 DEFINE_TRIVIAL_CLEANUP_FUNC(char *, dm_deferred_remove_clean);
2269
2270 static int validate_signature_userspace(const VeritySettings *verity) {
2271 #if HAVE_OPENSSL
2272 _cleanup_(sk_X509_free_allp) STACK_OF(X509) *sk = NULL;
2273 _cleanup_strv_free_ char **certs = NULL;
2274 _cleanup_(PKCS7_freep) PKCS7 *p7 = NULL;
2275 _cleanup_free_ char *s = NULL;
2276 _cleanup_(BIO_freep) BIO *bio = NULL; /* 'bio' must be freed first, 's' second, hence keep this order
2277 * of declaration in place, please */
2278 const unsigned char *d;
2279 char **i;
2280 int r;
2281
2282 assert(verity);
2283 assert(verity->root_hash);
2284 assert(verity->root_hash_sig);
2285
2286 /* Because installing a signature certificate into the kernel chain is so messy, let's optionally do
2287 * userspace validation. */
2288
2289 r = conf_files_list_nulstr(&certs, ".crt", NULL, CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED, CONF_PATHS_NULSTR("verity.d"));
2290 if (r < 0)
2291 return log_debug_errno(r, "Failed to enumerate certificates: %m");
2292 if (strv_isempty(certs)) {
2293 log_debug("No userspace dm-verity certificates found.");
2294 return 0;
2295 }
2296
2297 d = verity->root_hash_sig;
2298 p7 = d2i_PKCS7(NULL, &d, (long) verity->root_hash_sig_size);
2299 if (!p7)
2300 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse PKCS7 DER signature data.");
2301
2302 s = hexmem(verity->root_hash, verity->root_hash_size);
2303 if (!s)
2304 return log_oom_debug();
2305
2306 bio = BIO_new_mem_buf(s, strlen(s));
2307 if (!bio)
2308 return log_oom_debug();
2309
2310 sk = sk_X509_new_null();
2311 if (!sk)
2312 return log_oom_debug();
2313
2314 STRV_FOREACH(i, certs) {
2315 _cleanup_(X509_freep) X509 *c = NULL;
2316 _cleanup_fclose_ FILE *f = NULL;
2317
2318 f = fopen(*i, "re");
2319 if (!f) {
2320 log_debug_errno(errno, "Failed to open '%s', ignoring: %m", *i);
2321 continue;
2322 }
2323
2324 c = PEM_read_X509(f, NULL, NULL, NULL);
2325 if (!c) {
2326 log_debug("Failed to load X509 certificate '%s', ignoring.", *i);
2327 continue;
2328 }
2329
2330 if (sk_X509_push(sk, c) == 0)
2331 return log_oom_debug();
2332
2333 TAKE_PTR(c);
2334 }
2335
2336 r = PKCS7_verify(p7, sk, NULL, bio, NULL, PKCS7_NOINTERN|PKCS7_NOVERIFY);
2337 if (r)
2338 log_debug("Userspace PKCS#7 validation succeeded.");
2339 else
2340 log_debug("Userspace PKCS#7 validation failed: %s", ERR_error_string(ERR_get_error(), NULL));
2341
2342 return r;
2343 #else
2344 log_debug("Not doing client-side validation of dm-verity root hash signatures, OpenSSL support disabled.");
2345 return 0;
2346 #endif
2347 }
2348
2349 static int do_crypt_activate_verity(
2350 struct crypt_device *cd,
2351 const char *name,
2352 const VeritySettings *verity) {
2353
2354 bool check_signature;
2355 int r;
2356
2357 assert(cd);
2358 assert(name);
2359 assert(verity);
2360
2361 if (verity->root_hash_sig) {
2362 r = getenv_bool_secure("SYSTEMD_DISSECT_VERITY_SIGNATURE");
2363 if (r < 0 && r != -ENXIO)
2364 log_debug_errno(r, "Failed to parse $SYSTEMD_DISSECT_VERITY_SIGNATURE");
2365
2366 check_signature = r != 0;
2367 } else
2368 check_signature = false;
2369
2370 if (check_signature) {
2371
2372 #if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY
2373 /* First, if we have support for signed keys in the kernel, then try that first. */
2374 r = sym_crypt_activate_by_signed_key(
2375 cd,
2376 name,
2377 verity->root_hash,
2378 verity->root_hash_size,
2379 verity->root_hash_sig,
2380 verity->root_hash_sig_size,
2381 CRYPT_ACTIVATE_READONLY);
2382 if (r >= 0)
2383 return r;
2384
2385 log_debug("Validation of dm-verity signature failed via the kernel, trying userspace validation instead.");
2386 #else
2387 log_debug("Activation of verity device with signature requested, but not supported via the kernel by %s due to missing crypt_activate_by_signed_key(), trying userspace validation instead.",
2388 program_invocation_short_name);
2389 #endif
2390
2391 /* So this didn't work via the kernel, then let's try userspace validation instead. If that
2392 * works we'll try to activate without telling the kernel the signature. */
2393
2394 r = validate_signature_userspace(verity);
2395 if (r < 0)
2396 return r;
2397 if (r == 0)
2398 return log_debug_errno(SYNTHETIC_ERRNO(ENOKEY),
2399 "Activation of signed Verity volume worked neither via the kernel nor in userspace, can't activate.");
2400 }
2401
2402 return sym_crypt_activate_by_volume_key(
2403 cd,
2404 name,
2405 verity->root_hash,
2406 verity->root_hash_size,
2407 CRYPT_ACTIVATE_READONLY);
2408 }
2409
2410 static int verity_partition(
2411 PartitionDesignator designator,
2412 DissectedPartition *m,
2413 DissectedPartition *v,
2414 const VeritySettings *verity,
2415 DissectImageFlags flags,
2416 DecryptedImage *d) {
2417
2418 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
2419 _cleanup_(dm_deferred_remove_cleanp) char *restore_deferred_remove = NULL;
2420 _cleanup_free_ char *node = NULL, *name = NULL;
2421 int r;
2422
2423 assert(m);
2424 assert(v || (verity && verity->data_path));
2425
2426 if (!verity || !verity->root_hash)
2427 return 0;
2428 if (!((verity->designator < 0 && designator == PARTITION_ROOT) ||
2429 (verity->designator == designator)))
2430 return 0;
2431
2432 if (!m->found || !m->node || !m->fstype)
2433 return 0;
2434 if (!verity->data_path) {
2435 if (!v->found || !v->node || !v->fstype)
2436 return 0;
2437
2438 if (!streq(v->fstype, "DM_verity_hash"))
2439 return 0;
2440 }
2441
2442 r = dlopen_cryptsetup();
2443 if (r < 0)
2444 return r;
2445
2446 if (FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE)) {
2447 /* Use the roothash, which is unique per volume, as the device node name, so that it can be reused */
2448 _cleanup_free_ char *root_hash_encoded = NULL;
2449
2450 root_hash_encoded = hexmem(verity->root_hash, verity->root_hash_size);
2451 if (!root_hash_encoded)
2452 return -ENOMEM;
2453
2454 r = make_dm_name_and_node(root_hash_encoded, "-verity", &name, &node);
2455 } else
2456 r = make_dm_name_and_node(m->node, "-verity", &name, &node);
2457 if (r < 0)
2458 return r;
2459
2460 r = sym_crypt_init(&cd, verity->data_path ?: v->node);
2461 if (r < 0)
2462 return r;
2463
2464 cryptsetup_enable_logging(cd);
2465
2466 r = sym_crypt_load(cd, CRYPT_VERITY, NULL);
2467 if (r < 0)
2468 return r;
2469
2470 r = sym_crypt_set_data_device(cd, m->node);
2471 if (r < 0)
2472 return r;
2473
2474 if (!GREEDY_REALLOC0(d->decrypted, d->n_decrypted + 1))
2475 return -ENOMEM;
2476
2477 /* If activating fails because the device already exists, check the metadata and reuse it if it matches.
2478 * In case of ENODEV/ENOENT, which can happen if another process is activating at the exact same time,
2479 * retry a few times before giving up. */
2480 for (unsigned i = 0; i < N_DEVICE_NODE_LIST_ATTEMPTS; i++) {
2481
2482 r = do_crypt_activate_verity(cd, name, verity);
2483 /* libdevmapper can return EINVAL when the device is already in the activation stage.
2484 * There's no way to distinguish this situation from a genuine error due to invalid
2485 * parameters, so immediately fall back to activating the device with a unique name.
2486 * Improvements in libcrypsetup can ensure this never happens:
2487 * https://gitlab.com/cryptsetup/cryptsetup/-/merge_requests/96 */
2488 if (r == -EINVAL && FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE))
2489 return verity_partition(designator, m, v, verity, flags & ~DISSECT_IMAGE_VERITY_SHARE, d);
2490 if (!IN_SET(r,
2491 0, /* Success */
2492 -EEXIST, /* Volume is already open and ready to be used */
2493 -EBUSY, /* Volume is being opened but not ready, crypt_init_by_name can fetch details */
2494 -ENODEV /* Volume is being opened but not ready, crypt_init_by_name would fail, try to open again */))
2495 return r;
2496 if (IN_SET(r, -EEXIST, -EBUSY)) {
2497 struct crypt_device *existing_cd = NULL;
2498
2499 if (!restore_deferred_remove){
2500 /* To avoid races, disable automatic removal on umount while setting up the new device. Restore it on failure. */
2501 r = dm_deferred_remove_cancel(name);
2502 /* If activation returns EBUSY there might be no deferred removal to cancel, that's fine */
2503 if (r < 0 && r != -ENXIO)
2504 return log_debug_errno(r, "Disabling automated deferred removal for verity device %s failed: %m", node);
2505 if (r == 0) {
2506 restore_deferred_remove = strdup(name);
2507 if (!restore_deferred_remove)
2508 return -ENOMEM;
2509 }
2510 }
2511
2512 r = verity_can_reuse(verity, name, &existing_cd);
2513 /* Same as above, -EINVAL can randomly happen when it actually means -EEXIST */
2514 if (r == -EINVAL && FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE))
2515 return verity_partition(designator, m, v, verity, flags & ~DISSECT_IMAGE_VERITY_SHARE, d);
2516 if (!IN_SET(r, 0, -ENODEV, -ENOENT, -EBUSY))
2517 return log_debug_errno(r, "Checking whether existing verity device %s can be reused failed: %m", node);
2518 if (r == 0) {
2519 /* devmapper might say that the device exists, but the devlink might not yet have been
2520 * created. Check and wait for the udev event in that case. */
2521 r = device_wait_for_devlink(node, "block", usec_add(now(CLOCK_MONOTONIC), 100 * USEC_PER_MSEC), NULL);
2522 /* Fallback to activation with a unique device if it's taking too long */
2523 if (r == -ETIMEDOUT)
2524 break;
2525 if (r < 0)
2526 return r;
2527
2528 if (cd)
2529 sym_crypt_free(cd);
2530 cd = existing_cd;
2531 }
2532 }
2533 if (r == 0)
2534 break;
2535
2536 /* Device is being opened by another process, but it has not finished yet, yield for 2ms */
2537 (void) usleep(2 * USEC_PER_MSEC);
2538 }
2539
2540 /* An existing verity device was reported by libcryptsetup/libdevmapper, but we can't use it at this time.
2541 * Fall back to activating it with a unique device name. */
2542 if (r != 0 && FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE))
2543 return verity_partition(designator, m, v, verity, flags & ~DISSECT_IMAGE_VERITY_SHARE, d);
2544
2545 /* Everything looks good and we'll be able to mount the device, so deferred remove will be re-enabled at that point. */
2546 restore_deferred_remove = mfree(restore_deferred_remove);
2547
2548 d->decrypted[d->n_decrypted++] = (DecryptedPartition) {
2549 .name = TAKE_PTR(name),
2550 .device = TAKE_PTR(cd),
2551 };
2552
2553 m->decrypted_node = TAKE_PTR(node);
2554
2555 return 0;
2556 }
2557 #endif
2558
2559 int dissected_image_decrypt(
2560 DissectedImage *m,
2561 const char *passphrase,
2562 const VeritySettings *verity,
2563 DissectImageFlags flags,
2564 DecryptedImage **ret) {
2565
2566 #if HAVE_LIBCRYPTSETUP
2567 _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL;
2568 int r;
2569 #endif
2570
2571 assert(m);
2572 assert(!verity || verity->root_hash || verity->root_hash_size == 0);
2573
2574 /* Returns:
2575 *
2576 * = 0 → There was nothing to decrypt
2577 * > 0 → Decrypted successfully
2578 * -ENOKEY → There's something to decrypt but no key was supplied
2579 * -EKEYREJECTED → Passed key was not correct
2580 */
2581
2582 if (verity && verity->root_hash && verity->root_hash_size < sizeof(sd_id128_t))
2583 return -EINVAL;
2584
2585 if (!m->encrypted && !m->verity_ready) {
2586 *ret = NULL;
2587 return 0;
2588 }
2589
2590 #if HAVE_LIBCRYPTSETUP
2591 d = new0(DecryptedImage, 1);
2592 if (!d)
2593 return -ENOMEM;
2594
2595 for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
2596 DissectedPartition *p = m->partitions + i;
2597 PartitionDesignator k;
2598
2599 if (!p->found)
2600 continue;
2601
2602 r = decrypt_partition(p, passphrase, flags, d);
2603 if (r < 0)
2604 return r;
2605
2606 k = PARTITION_VERITY_OF(i);
2607 if (k >= 0) {
2608 r = verity_partition(i, p, m->partitions + k, verity, flags | DISSECT_IMAGE_VERITY_SHARE, d);
2609 if (r < 0)
2610 return r;
2611 }
2612
2613 if (!p->decrypted_fstype && p->decrypted_node) {
2614 r = probe_filesystem(p->decrypted_node, &p->decrypted_fstype);
2615 if (r < 0 && r != -EUCLEAN)
2616 return r;
2617 }
2618 }
2619
2620 *ret = TAKE_PTR(d);
2621
2622 return 1;
2623 #else
2624 return -EOPNOTSUPP;
2625 #endif
2626 }
2627
2628 int dissected_image_decrypt_interactively(
2629 DissectedImage *m,
2630 const char *passphrase,
2631 const VeritySettings *verity,
2632 DissectImageFlags flags,
2633 DecryptedImage **ret) {
2634
2635 _cleanup_strv_free_erase_ char **z = NULL;
2636 int n = 3, r;
2637
2638 if (passphrase)
2639 n--;
2640
2641 for (;;) {
2642 r = dissected_image_decrypt(m, passphrase, verity, flags, ret);
2643 if (r >= 0)
2644 return r;
2645 if (r == -EKEYREJECTED)
2646 log_error_errno(r, "Incorrect passphrase, try again!");
2647 else if (r != -ENOKEY)
2648 return log_error_errno(r, "Failed to decrypt image: %m");
2649
2650 if (--n < 0)
2651 return log_error_errno(SYNTHETIC_ERRNO(EKEYREJECTED),
2652 "Too many retries.");
2653
2654 z = strv_free(z);
2655
2656 r = ask_password_auto("Please enter image passphrase:", NULL, "dissect", "dissect", "dissect.passphrase", USEC_INFINITY, 0, &z);
2657 if (r < 0)
2658 return log_error_errno(r, "Failed to query for passphrase: %m");
2659
2660 passphrase = z[0];
2661 }
2662 }
2663
2664 int decrypted_image_relinquish(DecryptedImage *d) {
2665 assert(d);
2666
2667 /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a
2668 * boolean so that we don't clean it up ourselves either anymore */
2669
2670 #if HAVE_LIBCRYPTSETUP
2671 int r;
2672
2673 for (size_t i = 0; i < d->n_decrypted; i++) {
2674 DecryptedPartition *p = d->decrypted + i;
2675
2676 if (p->relinquished)
2677 continue;
2678
2679 r = sym_crypt_deactivate_by_name(NULL, p->name, CRYPT_DEACTIVATE_DEFERRED);
2680 if (r < 0)
2681 return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name);
2682
2683 p->relinquished = true;
2684 }
2685 #endif
2686
2687 return 0;
2688 }
2689
2690 static char *build_auxiliary_path(const char *image, const char *suffix) {
2691 const char *e;
2692 char *n;
2693
2694 assert(image);
2695 assert(suffix);
2696
2697 e = endswith(image, ".raw");
2698 if (!e)
2699 return strjoin(e, suffix);
2700
2701 n = new(char, e - image + strlen(suffix) + 1);
2702 if (!n)
2703 return NULL;
2704
2705 strcpy(mempcpy(n, image, e - image), suffix);
2706 return n;
2707 }
2708
2709 void verity_settings_done(VeritySettings *v) {
2710 assert(v);
2711
2712 v->root_hash = mfree(v->root_hash);
2713 v->root_hash_size = 0;
2714
2715 v->root_hash_sig = mfree(v->root_hash_sig);
2716 v->root_hash_sig_size = 0;
2717
2718 v->data_path = mfree(v->data_path);
2719 }
2720
2721 int verity_settings_load(
2722 VeritySettings *verity,
2723 const char *image,
2724 const char *root_hash_path,
2725 const char *root_hash_sig_path) {
2726
2727 _cleanup_free_ void *root_hash = NULL, *root_hash_sig = NULL;
2728 size_t root_hash_size = 0, root_hash_sig_size = 0;
2729 _cleanup_free_ char *verity_data_path = NULL;
2730 PartitionDesignator designator;
2731 int r;
2732
2733 assert(verity);
2734 assert(image);
2735 assert(verity->designator < 0 || IN_SET(verity->designator, PARTITION_ROOT, PARTITION_USR));
2736
2737 /* If we are asked to load the root hash for a device node, exit early */
2738 if (is_device_path(image))
2739 return 0;
2740
2741 r = getenv_bool_secure("SYSTEMD_DISSECT_VERITY_SIDECAR");
2742 if (r < 0 && r != -ENXIO)
2743 log_debug_errno(r, "Failed to parse $SYSTEMD_DISSECT_VERITY_SIDECAR, ignoring: %m");
2744 if (r == 0)
2745 return 0;
2746
2747 designator = verity->designator;
2748
2749 /* We only fill in what isn't already filled in */
2750
2751 if (!verity->root_hash) {
2752 _cleanup_free_ char *text = NULL;
2753
2754 if (root_hash_path) {
2755 /* If explicitly specified it takes precedence */
2756 r = read_one_line_file(root_hash_path, &text);
2757 if (r < 0)
2758 return r;
2759
2760 if (designator < 0)
2761 designator = PARTITION_ROOT;
2762 } else {
2763 /* Otherwise look for xattr and separate file, and first for the data for root and if
2764 * that doesn't exist for /usr */
2765
2766 if (designator < 0 || designator == PARTITION_ROOT) {
2767 r = getxattr_malloc(image, "user.verity.roothash", &text, true);
2768 if (r < 0) {
2769 _cleanup_free_ char *p = NULL;
2770
2771 if (!IN_SET(r, -ENODATA, -ENOENT) && !ERRNO_IS_NOT_SUPPORTED(r))
2772 return r;
2773
2774 p = build_auxiliary_path(image, ".roothash");
2775 if (!p)
2776 return -ENOMEM;
2777
2778 r = read_one_line_file(p, &text);
2779 if (r < 0 && r != -ENOENT)
2780 return r;
2781 }
2782
2783 if (text)
2784 designator = PARTITION_ROOT;
2785 }
2786
2787 if (!text && (designator < 0 || designator == PARTITION_USR)) {
2788 /* So in the "roothash" xattr/file name above the "root" of course primarily
2789 * refers to the root of the Verity Merkle tree. But coincidentally it also
2790 * is the hash for the *root* file system, i.e. the "root" neatly refers to
2791 * two distinct concepts called "root". Taking benefit of this happy
2792 * coincidence we call the file with the root hash for the /usr/ file system
2793 * `usrhash`, because `usrroothash` or `rootusrhash` would just be too
2794 * confusing. We thus drop the reference to the root of the Merkle tree, and
2795 * just indicate which file system it's about. */
2796 r = getxattr_malloc(image, "user.verity.usrhash", &text, true);
2797 if (r < 0) {
2798 _cleanup_free_ char *p = NULL;
2799
2800 if (!IN_SET(r, -ENODATA, -ENOENT) && !ERRNO_IS_NOT_SUPPORTED(r))
2801 return r;
2802
2803 p = build_auxiliary_path(image, ".usrhash");
2804 if (!p)
2805 return -ENOMEM;
2806
2807 r = read_one_line_file(p, &text);
2808 if (r < 0 && r != -ENOENT)
2809 return r;
2810 }
2811
2812 if (text)
2813 designator = PARTITION_USR;
2814 }
2815 }
2816
2817 if (text) {
2818 r = unhexmem(text, strlen(text), &root_hash, &root_hash_size);
2819 if (r < 0)
2820 return r;
2821 if (root_hash_size < sizeof(sd_id128_t))
2822 return -EINVAL;
2823 }
2824 }
2825
2826 if ((root_hash || verity->root_hash) && !verity->root_hash_sig) {
2827 if (root_hash_sig_path) {
2828 r = read_full_file(root_hash_sig_path, (char**) &root_hash_sig, &root_hash_sig_size);
2829 if (r < 0 && r != -ENOENT)
2830 return r;
2831
2832 if (designator < 0)
2833 designator = PARTITION_ROOT;
2834 } else {
2835 if (designator < 0 || designator == PARTITION_ROOT) {
2836 _cleanup_free_ char *p = NULL;
2837
2838 /* Follow naming convention recommended by the relevant RFC:
2839 * https://tools.ietf.org/html/rfc5751#section-3.2.1 */
2840 p = build_auxiliary_path(image, ".roothash.p7s");
2841 if (!p)
2842 return -ENOMEM;
2843
2844 r = read_full_file(p, (char**) &root_hash_sig, &root_hash_sig_size);
2845 if (r < 0 && r != -ENOENT)
2846 return r;
2847 if (r >= 0)
2848 designator = PARTITION_ROOT;
2849 }
2850
2851 if (!root_hash_sig && (designator < 0 || designator == PARTITION_USR)) {
2852 _cleanup_free_ char *p = NULL;
2853
2854 p = build_auxiliary_path(image, ".usrhash.p7s");
2855 if (!p)
2856 return -ENOMEM;
2857
2858 r = read_full_file(p, (char**) &root_hash_sig, &root_hash_sig_size);
2859 if (r < 0 && r != -ENOENT)
2860 return r;
2861 if (r >= 0)
2862 designator = PARTITION_USR;
2863 }
2864 }
2865
2866 if (root_hash_sig && root_hash_sig_size == 0) /* refuse empty size signatures */
2867 return -EINVAL;
2868 }
2869
2870 if (!verity->data_path) {
2871 _cleanup_free_ char *p = NULL;
2872
2873 p = build_auxiliary_path(image, ".verity");
2874 if (!p)
2875 return -ENOMEM;
2876
2877 if (access(p, F_OK) < 0) {
2878 if (errno != ENOENT)
2879 return -errno;
2880 } else
2881 verity_data_path = TAKE_PTR(p);
2882 }
2883
2884 if (root_hash) {
2885 verity->root_hash = TAKE_PTR(root_hash);
2886 verity->root_hash_size = root_hash_size;
2887 }
2888
2889 if (root_hash_sig) {
2890 verity->root_hash_sig = TAKE_PTR(root_hash_sig);
2891 verity->root_hash_sig_size = root_hash_sig_size;
2892 }
2893
2894 if (verity_data_path)
2895 verity->data_path = TAKE_PTR(verity_data_path);
2896
2897 if (verity->designator < 0)
2898 verity->designator = designator;
2899
2900 return 1;
2901 }
2902
2903 int dissected_image_load_verity_sig_partition(
2904 DissectedImage *m,
2905 int fd,
2906 VeritySettings *verity) {
2907
2908 _cleanup_free_ void *root_hash = NULL, *root_hash_sig = NULL;
2909 _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
2910 size_t root_hash_size, root_hash_sig_size;
2911 _cleanup_free_ char *buf = NULL;
2912 PartitionDesignator d;
2913 DissectedPartition *p;
2914 JsonVariant *rh, *sig;
2915 ssize_t n;
2916 char *e;
2917 int r;
2918
2919 assert(m);
2920 assert(fd >= 0);
2921 assert(verity);
2922
2923 if (verity->root_hash && verity->root_hash_sig) /* Already loaded? */
2924 return 0;
2925
2926 r = getenv_bool_secure("SYSTEMD_DISSECT_VERITY_EMBEDDED");
2927 if (r < 0 && r != -ENXIO)
2928 log_debug_errno(r, "Failed to parse $SYSTEMD_DISSECT_VERITY_EMBEDDED, ignoring: %m");
2929 if (r == 0)
2930 return 0;
2931
2932 d = PARTITION_VERITY_SIG_OF(verity->designator < 0 ? PARTITION_ROOT : verity->designator);
2933 assert(d >= 0);
2934
2935 p = m->partitions + d;
2936 if (!p->found)
2937 return 0;
2938 if (p->offset == UINT64_MAX || p->size == UINT64_MAX)
2939 return -EINVAL;
2940
2941 if (p->size > 4*1024*1024) /* Signature data cannot possible be larger than 4M, refuse that */
2942 return -EFBIG;
2943
2944 buf = new(char, p->size+1);
2945 if (!buf)
2946 return -ENOMEM;
2947
2948 n = pread(fd, buf, p->size, p->offset);
2949 if (n < 0)
2950 return -ENOMEM;
2951 if ((uint64_t) n != p->size)
2952 return -EIO;
2953
2954 e = memchr(buf, 0, p->size);
2955 if (e) {
2956 /* If we found a NUL byte then the rest of the data must be NUL too */
2957 if (!memeqzero(e, p->size - (e - buf)))
2958 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Signature data contains embedded NUL byte.");
2959 } else
2960 buf[p->size] = 0;
2961
2962 r = json_parse(buf, 0, &v, NULL, NULL);
2963 if (r < 0)
2964 return log_debug_errno(r, "Failed to parse signature JSON data: %m");
2965
2966 rh = json_variant_by_key(v, "rootHash");
2967 if (!rh)
2968 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Signature JSON object lacks 'rootHash' field.");
2969 if (!json_variant_is_string(rh))
2970 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "'rootHash' field of signature JSON object is not a string.");
2971
2972 r = unhexmem(json_variant_string(rh), SIZE_MAX, &root_hash, &root_hash_size);
2973 if (r < 0)
2974 return log_debug_errno(r, "Failed to parse root hash field: %m");
2975
2976 /* Check if specified root hash matches if it is specified */
2977 if (verity->root_hash &&
2978 memcmp_nn(verity->root_hash, verity->root_hash_size, root_hash, root_hash_size) != 0) {
2979 _cleanup_free_ char *a = NULL, *b = NULL;
2980
2981 a = hexmem(root_hash, root_hash_size);
2982 b = hexmem(verity->root_hash, verity->root_hash_size);
2983
2984 return log_debug_errno(r, "Root hash in signature JSON data (%s) doesn't match configured hash (%s).", strna(a), strna(b));
2985 }
2986
2987 sig = json_variant_by_key(v, "signature");
2988 if (!sig)
2989 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Signature JSON object lacks 'signature' field.");
2990 if (!json_variant_is_string(sig))
2991 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "'signature' field of signature JSON object is not a string.");
2992
2993 r = unbase64mem(json_variant_string(sig), SIZE_MAX, &root_hash_sig, &root_hash_sig_size);
2994 if (r < 0)
2995 return log_debug_errno(r, "Failed to parse signature field: %m");
2996
2997 free_and_replace(verity->root_hash, root_hash);
2998 verity->root_hash_size = root_hash_size;
2999
3000 free_and_replace(verity->root_hash_sig, root_hash_sig);
3001 verity->root_hash_sig_size = root_hash_sig_size;
3002
3003 return 1;
3004 }
3005
3006 int dissected_image_acquire_metadata(DissectedImage *m) {
3007
3008 enum {
3009 META_HOSTNAME,
3010 META_MACHINE_ID,
3011 META_MACHINE_INFO,
3012 META_OS_RELEASE,
3013 META_EXTENSION_RELEASE,
3014 _META_MAX,
3015 };
3016
3017 static const char *const paths[_META_MAX] = {
3018 [META_HOSTNAME] = "/etc/hostname\0",
3019 [META_MACHINE_ID] = "/etc/machine-id\0",
3020 [META_MACHINE_INFO] = "/etc/machine-info\0",
3021 [META_OS_RELEASE] = ("/etc/os-release\0"
3022 "/usr/lib/os-release\0"),
3023 [META_EXTENSION_RELEASE] = "extension-release\0", /* Used only for logging. */
3024 };
3025
3026 _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL, **extension_release = NULL;
3027 _cleanup_close_pair_ int error_pipe[2] = { -1, -1 };
3028 _cleanup_(rmdir_and_freep) char *t = NULL;
3029 _cleanup_(sigkill_waitp) pid_t child = 0;
3030 sd_id128_t machine_id = SD_ID128_NULL;
3031 _cleanup_free_ char *hostname = NULL;
3032 unsigned n_meta_initialized = 0;
3033 int fds[2 * _META_MAX], r, v;
3034 ssize_t n;
3035
3036 BLOCK_SIGNALS(SIGCHLD);
3037
3038 assert(m);
3039
3040 for (; n_meta_initialized < _META_MAX; n_meta_initialized ++) {
3041 if (!paths[n_meta_initialized]) {
3042 fds[2*n_meta_initialized] = fds[2*n_meta_initialized+1] = -1;
3043 continue;
3044 }
3045
3046 if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) {
3047 r = -errno;
3048 goto finish;
3049 }
3050 }
3051
3052 r = mkdtemp_malloc("/tmp/dissect-XXXXXX", &t);
3053 if (r < 0)
3054 goto finish;
3055
3056 if (pipe2(error_pipe, O_CLOEXEC) < 0) {
3057 r = -errno;
3058 goto finish;
3059 }
3060
3061 r = safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE, &child);
3062 if (r < 0)
3063 goto finish;
3064 if (r == 0) {
3065 error_pipe[0] = safe_close(error_pipe[0]);
3066
3067 r = dissected_image_mount(
3068 m,
3069 t,
3070 UID_INVALID,
3071 UID_INVALID,
3072 DISSECT_IMAGE_READ_ONLY|
3073 DISSECT_IMAGE_MOUNT_ROOT_ONLY|
3074 DISSECT_IMAGE_VALIDATE_OS|
3075 DISSECT_IMAGE_VALIDATE_OS_EXT|
3076 DISSECT_IMAGE_USR_NO_ROOT);
3077 if (r < 0) {
3078 /* Let parent know the error */
3079 (void) write(error_pipe[1], &r, sizeof(r));
3080
3081 log_debug_errno(r, "Failed to mount dissected image: %m");
3082 _exit(EXIT_FAILURE);
3083 }
3084
3085 for (unsigned k = 0; k < _META_MAX; k++) {
3086 _cleanup_close_ int fd = -ENOENT;
3087 const char *p;
3088
3089 if (!paths[k])
3090 continue;
3091
3092 fds[2*k] = safe_close(fds[2*k]);
3093
3094 if (k == META_EXTENSION_RELEASE) {
3095 /* As per the os-release spec, if the image is an extension it will have a file
3096 * named after the image name in extension-release.d/ - we use the image name
3097 * and try to resolve it with the extension-release helpers, as sometimes
3098 * the image names are mangled on deployment and do not match anymore.
3099 * Unlike other paths this is not fixed, and the image name
3100 * can be mangled on deployment, so by calling into the helper
3101 * we allow a fallback that matches on the first extension-release
3102 * file found in the directory, if one named after the image cannot
3103 * be found first. */
3104 r = open_extension_release(t, m->image_name, NULL, &fd);
3105 if (r < 0)
3106 fd = r; /* Propagate the error. */
3107 } else
3108 NULSTR_FOREACH(p, paths[k]) {
3109 fd = chase_symlinks_and_open(p, t, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL);
3110 if (fd >= 0)
3111 break;
3112 }
3113 if (fd < 0) {
3114 log_debug_errno(fd, "Failed to read %s file of image, ignoring: %m", paths[k]);
3115 fds[2*k+1] = safe_close(fds[2*k+1]);
3116 continue;
3117 }
3118
3119 r = copy_bytes(fd, fds[2*k+1], UINT64_MAX, 0);
3120 if (r < 0) {
3121 (void) write(error_pipe[1], &r, sizeof(r));
3122 _exit(EXIT_FAILURE);
3123 }
3124
3125 fds[2*k+1] = safe_close(fds[2*k+1]);
3126 }
3127
3128 _exit(EXIT_SUCCESS);
3129 }
3130
3131 error_pipe[1] = safe_close(error_pipe[1]);
3132
3133 for (unsigned k = 0; k < _META_MAX; k++) {
3134 _cleanup_fclose_ FILE *f = NULL;
3135
3136 if (!paths[k])
3137 continue;
3138
3139 fds[2*k+1] = safe_close(fds[2*k+1]);
3140
3141 f = take_fdopen(&fds[2*k], "r");
3142 if (!f) {
3143 r = -errno;
3144 goto finish;
3145 }
3146
3147 switch (k) {
3148
3149 case META_HOSTNAME:
3150 r = read_etc_hostname_stream(f, &hostname);
3151 if (r < 0)
3152 log_debug_errno(r, "Failed to read /etc/hostname: %m");
3153
3154 break;
3155
3156 case META_MACHINE_ID: {
3157 _cleanup_free_ char *line = NULL;
3158
3159 r = read_line(f, LONG_LINE_MAX, &line);
3160 if (r < 0)
3161 log_debug_errno(r, "Failed to read /etc/machine-id: %m");
3162 else if (r == 33) {
3163 r = sd_id128_from_string(line, &machine_id);
3164 if (r < 0)
3165 log_debug_errno(r, "Image contains invalid /etc/machine-id: %s", line);
3166 } else if (r == 0)
3167 log_debug("/etc/machine-id file is empty.");
3168 else if (streq(line, "uninitialized"))
3169 log_debug("/etc/machine-id file is uninitialized (likely aborted first boot).");
3170 else
3171 log_debug("/etc/machine-id has unexpected length %i.", r);
3172
3173 break;
3174 }
3175
3176 case META_MACHINE_INFO:
3177 r = load_env_file_pairs(f, "machine-info", &machine_info);
3178 if (r < 0)
3179 log_debug_errno(r, "Failed to read /etc/machine-info: %m");
3180
3181 break;
3182
3183 case META_OS_RELEASE:
3184 r = load_env_file_pairs(f, "os-release", &os_release);
3185 if (r < 0)
3186 log_debug_errno(r, "Failed to read OS release file: %m");
3187
3188 break;
3189
3190 case META_EXTENSION_RELEASE:
3191 r = load_env_file_pairs(f, "extension-release", &extension_release);
3192 if (r < 0)
3193 log_debug_errno(r, "Failed to read extension release file: %m");
3194
3195 break;
3196 }
3197 }
3198
3199 r = wait_for_terminate_and_check("(sd-dissect)", child, 0);
3200 child = 0;
3201 if (r < 0)
3202 return r;
3203
3204 n = read(error_pipe[0], &v, sizeof(v));
3205 if (n < 0)
3206 return -errno;
3207 if (n == sizeof(v))
3208 return v; /* propagate error sent to us from child */
3209 if (n != 0)
3210 return -EIO;
3211
3212 if (r != EXIT_SUCCESS)
3213 return -EPROTO;
3214
3215 free_and_replace(m->hostname, hostname);
3216 m->machine_id = machine_id;
3217 strv_free_and_replace(m->machine_info, machine_info);
3218 strv_free_and_replace(m->os_release, os_release);
3219 strv_free_and_replace(m->extension_release, extension_release);
3220
3221 finish:
3222 for (unsigned k = 0; k < n_meta_initialized; k++)
3223 safe_close_pair(fds + 2*k);
3224
3225 return r;
3226 }
3227
3228 int dissect_image_and_warn(
3229 int fd,
3230 const char *name,
3231 const VeritySettings *verity,
3232 const MountOptions *mount_options,
3233 uint64_t diskseq,
3234 uint64_t uevent_seqnum_not_before,
3235 usec_t timestamp_not_before,
3236 DissectImageFlags flags,
3237 DissectedImage **ret) {
3238
3239 _cleanup_free_ char *buffer = NULL;
3240 int r;
3241
3242 if (!name) {
3243 r = fd_get_path(fd, &buffer);
3244 if (r < 0)
3245 return r;
3246
3247 name = buffer;
3248 }
3249
3250 r = dissect_image(fd, verity, mount_options, diskseq, uevent_seqnum_not_before, timestamp_not_before, flags, ret);
3251 switch (r) {
3252
3253 case -EOPNOTSUPP:
3254 return log_error_errno(r, "Dissecting images is not supported, compiled without blkid support.");
3255
3256 case -ENOPKG:
3257 return log_error_errno(r, "Couldn't identify a suitable partition table or file system in '%s'.", name);
3258
3259 case -EADDRNOTAVAIL:
3260 return log_error_errno(r, "No root partition for specified root hash found in '%s'.", name);
3261
3262 case -ENOTUNIQ:
3263 return log_error_errno(r, "Multiple suitable root partitions found in image '%s'.", name);
3264
3265 case -ENXIO:
3266 return log_error_errno(r, "No suitable root partition found in image '%s'.", name);
3267
3268 case -EPROTONOSUPPORT:
3269 return log_error_errno(r, "Device '%s' is loopback block device with partition scanning turned off, please turn it on.", name);
3270
3271 default:
3272 if (r < 0)
3273 return log_error_errno(r, "Failed to dissect image '%s': %m", name);
3274
3275 return r;
3276 }
3277 }
3278
3279 bool dissected_image_verity_candidate(const DissectedImage *image, PartitionDesignator partition_designator) {
3280 assert(image);
3281
3282 /* Checks if this partition could theoretically do Verity. For non-partitioned images this only works
3283 * if there's an external verity file supplied, for which we can consult .has_verity. For partitioned
3284 * images we only check the partition type.
3285 *
3286 * This call is used to decide whether to suppress or show a verity column in tabular output of the
3287 * image. */
3288
3289 if (image->single_file_system)
3290 return partition_designator == PARTITION_ROOT && image->has_verity;
3291
3292 return PARTITION_VERITY_OF(partition_designator) >= 0;
3293 }
3294
3295 bool dissected_image_verity_ready(const DissectedImage *image, PartitionDesignator partition_designator) {
3296 PartitionDesignator k;
3297
3298 assert(image);
3299
3300 /* Checks if this partition has verity data available that we can activate. For non-partitioned this
3301 * works for the root partition, for others only if the associated verity partition was found. */
3302
3303 if (!image->verity_ready)
3304 return false;
3305
3306 if (image->single_file_system)
3307 return partition_designator == PARTITION_ROOT;
3308
3309 k = PARTITION_VERITY_OF(partition_designator);
3310 return k >= 0 && image->partitions[k].found;
3311 }
3312
3313 bool dissected_image_verity_sig_ready(const DissectedImage *image, PartitionDesignator partition_designator) {
3314 PartitionDesignator k;
3315
3316 assert(image);
3317
3318 /* Checks if this partition has verity signature data available that we can use. */
3319
3320 if (!image->verity_sig_ready)
3321 return false;
3322
3323 if (image->single_file_system)
3324 return partition_designator == PARTITION_ROOT;
3325
3326 k = PARTITION_VERITY_SIG_OF(partition_designator);
3327 return k >= 0 && image->partitions[k].found;
3328 }
3329
3330 MountOptions* mount_options_free_all(MountOptions *options) {
3331 MountOptions *m;
3332
3333 while ((m = options)) {
3334 LIST_REMOVE(mount_options, options, m);
3335 free(m->options);
3336 free(m);
3337 }
3338
3339 return NULL;
3340 }
3341
3342 const char* mount_options_from_designator(const MountOptions *options, PartitionDesignator designator) {
3343 const MountOptions *m;
3344
3345 LIST_FOREACH(mount_options, m, options)
3346 if (designator == m->partition_designator && !isempty(m->options))
3347 return m->options;
3348
3349 return NULL;
3350 }
3351
3352 int mount_image_privately_interactively(
3353 const char *image,
3354 DissectImageFlags flags,
3355 char **ret_directory,
3356 LoopDevice **ret_loop_device,
3357 DecryptedImage **ret_decrypted_image) {
3358
3359 _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
3360 _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
3361 _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL;
3362 _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
3363 _cleanup_(rmdir_and_freep) char *created_dir = NULL;
3364 _cleanup_free_ char *temp = NULL;
3365 int r;
3366
3367 /* Mounts an OS image at a temporary place, inside a newly created mount namespace of our own. This
3368 * is used by tools such as systemd-tmpfiles or systemd-firstboot to operate on some disk image
3369 * easily. */
3370
3371 assert(image);
3372 assert(ret_directory);
3373 assert(ret_loop_device);
3374 assert(ret_decrypted_image);
3375
3376 r = verity_settings_load(&verity, image, NULL, NULL);
3377 if (r < 0)
3378 return log_error_errno(r, "Failed to load root hash data: %m");
3379
3380 r = tempfn_random_child(NULL, program_invocation_short_name, &temp);
3381 if (r < 0)
3382 return log_error_errno(r, "Failed to generate temporary mount directory: %m");
3383
3384 r = loop_device_make_by_path(
3385 image,
3386 FLAGS_SET(flags, DISSECT_IMAGE_DEVICE_READ_ONLY) ? O_RDONLY : O_RDWR,
3387 FLAGS_SET(flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN,
3388 &d);
3389 if (r < 0)
3390 return log_error_errno(r, "Failed to set up loopback device for %s: %m", image);
3391
3392 r = dissect_image_and_warn(d->fd, image, &verity, NULL, d->diskseq, d->uevent_seqnum_not_before, d->timestamp_not_before, flags, &dissected_image);
3393 if (r < 0)
3394 return r;
3395
3396 r = dissected_image_load_verity_sig_partition(dissected_image, d->fd, &verity);
3397 if (r < 0)
3398 return r;
3399
3400 r = dissected_image_decrypt_interactively(dissected_image, NULL, &verity, flags, &decrypted_image);
3401 if (r < 0)
3402 return r;
3403
3404 r = detach_mount_namespace();
3405 if (r < 0)
3406 return log_error_errno(r, "Failed to detach mount namespace: %m");
3407
3408 r = mkdir_p(temp, 0700);
3409 if (r < 0)
3410 return log_error_errno(r, "Failed to create mount point: %m");
3411
3412 created_dir = TAKE_PTR(temp);
3413
3414 r = dissected_image_mount_and_warn(dissected_image, created_dir, UID_INVALID, UID_INVALID, flags);
3415 if (r < 0)
3416 return r;
3417
3418 if (decrypted_image) {
3419 r = decrypted_image_relinquish(decrypted_image);
3420 if (r < 0)
3421 return log_error_errno(r, "Failed to relinquish DM devices: %m");
3422 }
3423
3424 loop_device_relinquish(d);
3425
3426 *ret_directory = TAKE_PTR(created_dir);
3427 *ret_loop_device = TAKE_PTR(d);
3428 *ret_decrypted_image = TAKE_PTR(decrypted_image);
3429
3430 return 0;
3431 }
3432
3433 static const char *const partition_designator_table[] = {
3434 [PARTITION_ROOT] = "root",
3435 [PARTITION_ROOT_SECONDARY] = "root-secondary",
3436 [PARTITION_USR] = "usr",
3437 [PARTITION_USR_SECONDARY] = "usr-secondary",
3438 [PARTITION_HOME] = "home",
3439 [PARTITION_SRV] = "srv",
3440 [PARTITION_ESP] = "esp",
3441 [PARTITION_XBOOTLDR] = "xbootldr",
3442 [PARTITION_SWAP] = "swap",
3443 [PARTITION_ROOT_VERITY] = "root-verity",
3444 [PARTITION_ROOT_SECONDARY_VERITY] = "root-secondary-verity",
3445 [PARTITION_USR_VERITY] = "usr-verity",
3446 [PARTITION_USR_SECONDARY_VERITY] = "usr-secondary-verity",
3447 [PARTITION_ROOT_VERITY_SIG] = "root-verity-sig",
3448 [PARTITION_ROOT_SECONDARY_VERITY_SIG] = "root-secondary-verity-sig",
3449 [PARTITION_USR_VERITY_SIG] = "usr-verity-sig",
3450 [PARTITION_USR_SECONDARY_VERITY_SIG] = "usr-secondary-verity-sig",
3451 [PARTITION_TMP] = "tmp",
3452 [PARTITION_VAR] = "var",
3453 };
3454
3455 int verity_dissect_and_mount(
3456 const char *src,
3457 const char *dest,
3458 const MountOptions *options,
3459 const char *required_host_os_release_id,
3460 const char *required_host_os_release_version_id,
3461 const char *required_host_os_release_sysext_level) {
3462
3463 _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
3464 _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL;
3465 _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
3466 _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
3467 DissectImageFlags dissect_image_flags;
3468 int r;
3469
3470 assert(src);
3471 assert(dest);
3472
3473 r = verity_settings_load(&verity, src, NULL, NULL);
3474 if (r < 0)
3475 return log_debug_errno(r, "Failed to load root hash: %m");
3476
3477 dissect_image_flags = verity.data_path ? DISSECT_IMAGE_NO_PARTITION_TABLE : 0;
3478
3479 r = loop_device_make_by_path(
3480 src,
3481 -1,
3482 verity.data_path ? 0 : LO_FLAGS_PARTSCAN,
3483 &loop_device);
3484 if (r < 0)
3485 return log_debug_errno(r, "Failed to create loop device for image: %m");
3486
3487 r = dissect_image(
3488 loop_device->fd,
3489 &verity,
3490 options,
3491 loop_device->diskseq,
3492 loop_device->uevent_seqnum_not_before,
3493 loop_device->timestamp_not_before,
3494 dissect_image_flags,
3495 &dissected_image);
3496 /* No partition table? Might be a single-filesystem image, try again */
3497 if (!verity.data_path && r == -ENOPKG)
3498 r = dissect_image(
3499 loop_device->fd,
3500 &verity,
3501 options,
3502 loop_device->diskseq,
3503 loop_device->uevent_seqnum_not_before,
3504 loop_device->timestamp_not_before,
3505 dissect_image_flags | DISSECT_IMAGE_NO_PARTITION_TABLE,
3506 &dissected_image);
3507 if (r < 0)
3508 return log_debug_errno(r, "Failed to dissect image: %m");
3509
3510 r = dissected_image_load_verity_sig_partition(dissected_image, loop_device->fd, &verity);
3511 if (r < 0)
3512 return r;
3513
3514 r = dissected_image_decrypt(
3515 dissected_image,
3516 NULL,
3517 &verity,
3518 dissect_image_flags,
3519 &decrypted_image);
3520 if (r < 0)
3521 return log_debug_errno(r, "Failed to decrypt dissected image: %m");
3522
3523 r = mkdir_p_label(dest, 0755);
3524 if (r < 0)
3525 return log_debug_errno(r, "Failed to create destination directory %s: %m", dest);
3526 r = umount_recursive(dest, 0);
3527 if (r < 0)
3528 return log_debug_errno(r, "Failed to umount under destination directory %s: %m", dest);
3529
3530 r = dissected_image_mount(dissected_image, dest, UID_INVALID, UID_INVALID, dissect_image_flags);
3531 if (r < 0)
3532 return log_debug_errno(r, "Failed to mount image: %m");
3533
3534 /* If we got os-release values from the caller, then we need to match them with the image's
3535 * extension-release.d/ content. Return -EINVAL if there's any mismatch.
3536 * First, check the distro ID. If that matches, then check the new SYSEXT_LEVEL value if
3537 * available, or else fallback to VERSION_ID. */
3538 if (required_host_os_release_id &&
3539 (required_host_os_release_version_id || required_host_os_release_sysext_level)) {
3540 _cleanup_strv_free_ char **extension_release = NULL;
3541
3542 r = load_extension_release_pairs(dest, dissected_image->image_name, &extension_release);
3543 if (r < 0)
3544 return log_debug_errno(r, "Failed to parse image %s extension-release metadata: %m", dissected_image->image_name);
3545
3546 r = extension_release_validate(
3547 dissected_image->image_name,
3548 required_host_os_release_id,
3549 required_host_os_release_version_id,
3550 required_host_os_release_sysext_level,
3551 extension_release);
3552 if (r == 0)
3553 return log_debug_errno(SYNTHETIC_ERRNO(ESTALE), "Image %s extension-release metadata does not match the root's", dissected_image->image_name);
3554 if (r < 0)
3555 return log_debug_errno(r, "Failed to compare image %s extension-release metadata with the root's os-release: %m", dissected_image->image_name);
3556 }
3557
3558 if (decrypted_image) {
3559 r = decrypted_image_relinquish(decrypted_image);
3560 if (r < 0)
3561 return log_debug_errno(r, "Failed to relinquish decrypted image: %m");
3562 }
3563
3564 loop_device_relinquish(loop_device);
3565
3566 return 0;
3567 }
3568
3569 DEFINE_STRING_TABLE_LOOKUP(partition_designator, PartitionDesignator);