1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
3 #if HAVE_VALGRIND_MEMCHECK_H
4 #include <valgrind/memcheck.h>
7 #include <linux/dm-ioctl.h>
8 #include <linux/loop.h>
10 #include <sys/prctl.h>
15 #include <openssl/err.h>
16 #include <openssl/pem.h>
17 #include <openssl/x509.h>
20 #include "sd-device.h"
23 #include "architecture.h"
24 #include "ask-password-api.h"
25 #include "blkid-util.h"
26 #include "blockdev-util.h"
27 #include "chase-symlinks.h"
28 #include "conf-files.h"
30 #include "cryptsetup-util.h"
32 #include "device-nodes.h"
33 #include "device-util.h"
34 #include "discover-image.h"
35 #include "dissect-image.h"
39 #include "extension-release.h"
43 #include "fsck-util.h"
45 #include "hexdecoct.h"
46 #include "hostname-setup.h"
47 #include "id128-util.h"
48 #include "import-util.h"
50 #include "mkdir-label.h"
51 #include "mount-util.h"
52 #include "mountpoint-util.h"
53 #include "namespace-util.h"
54 #include "nulstr-util.h"
55 #include "openssl-util.h"
57 #include "path-util.h"
58 #include "process-util.h"
59 #include "raw-clone.h"
60 #include "resize-fs.h"
61 #include "signal-util.h"
62 #include "stat-util.h"
63 #include "stdio-util.h"
64 #include "string-table.h"
65 #include "string-util.h"
67 #include "tmpfile-util.h"
68 #include "udev-util.h"
69 #include "user-util.h"
70 #include "xattr-util.h"
72 /* how many times to wait for the device nodes to appear */
73 #define N_DEVICE_NODE_LIST_ATTEMPTS 10
75 int probe_filesystem(const char *node
, char **ret_fstype
) {
76 /* Try to find device content type and return it in *ret_fstype. If nothing is found,
77 * 0/NULL will be returned. -EUCLEAN will be returned for ambiguous results, and an
78 * different error otherwise. */
81 _cleanup_(blkid_free_probep
) blkid_probe b
= NULL
;
86 b
= blkid_new_probe_from_filename(node
);
88 return errno_or_else(ENOMEM
);
90 blkid_probe_enable_superblocks(b
, 1);
91 blkid_probe_set_superblocks_flags(b
, BLKID_SUBLKS_TYPE
);
94 r
= blkid_do_safeprobe(b
);
96 log_debug("No type detected on partition %s", node
);
100 return log_debug_errno(SYNTHETIC_ERRNO(EUCLEAN
),
101 "Results ambiguous for partition %s", node
);
103 return errno_or_else(EIO
);
105 (void) blkid_probe_lookup_value(b
, "TYPE", &fstype
, NULL
);
127 static int enumerator_for_parent(sd_device
*d
, sd_device_enumerator
**ret
) {
128 _cleanup_(sd_device_enumerator_unrefp
) sd_device_enumerator
*e
= NULL
;
134 r
= sd_device_enumerator_new(&e
);
138 r
= sd_device_enumerator_add_match_subsystem(e
, "block", true);
142 r
= sd_device_enumerator_add_match_parent(e
, d
);
146 r
= sd_device_enumerator_add_match_sysattr(e
, "partition", NULL
, true);
154 static int device_is_partition(
156 sd_device
*expected_parent
,
157 blkid_partition pp
) {
159 const char *v
, *parent_syspath
, *expected_parent_syspath
;
160 blkid_loff_t bsize
, bstart
;
161 uint64_t size
, start
;
162 int partno
, bpartno
, r
;
166 assert(expected_parent
);
169 r
= sd_device_get_subsystem(d
, &v
);
172 if (!streq(v
, "block"))
175 if (sd_device_get_devtype(d
, &v
) < 0 || !streq(v
, "partition"))
178 r
= sd_device_get_parent(d
, &parent
);
180 return false; /* Doesn't have a parent? No relevant to us */
182 r
= sd_device_get_syspath(parent
, &parent_syspath
); /* Check parent of device of this action */
186 r
= sd_device_get_syspath(expected_parent
, &expected_parent_syspath
); /* Check parent of device we are looking for */
190 if (!path_equal(parent_syspath
, expected_parent_syspath
))
191 return false; /* Has a different parent than what we need, not interesting to us */
193 /* On kernel uevents we may find the partition number in the PARTN= field. Let's use that preferably,
194 * since it's cheaper and more importantly: the sysfs attribute "partition" appears to become
195 * available late, hence let's use the property instead, which is available at the moment we see the
197 r
= sd_device_get_property_value(d
, "PARTN", &v
);
199 r
= sd_device_get_sysattr_value(d
, "partition", &v
);
203 r
= safe_atoi(v
, &partno
);
208 bpartno
= blkid_partition_get_partno(pp
);
210 return errno_or_else(EIO
);
212 if (partno
!= bpartno
)
215 r
= sd_device_get_sysattr_value(d
, "start", &v
);
218 r
= safe_atou64(v
, &start
);
223 bstart
= blkid_partition_get_start(pp
);
225 return errno_or_else(EIO
);
227 if (start
!= (uint64_t) bstart
)
230 r
= sd_device_get_sysattr_value(d
, "size", &v
);
233 r
= safe_atou64(v
, &size
);
238 bsize
= blkid_partition_get_size(pp
);
240 return errno_or_else(EIO
);
242 if (size
!= (uint64_t) bsize
)
248 static int find_partition(
251 usec_t timestamp_not_before
,
252 DissectImageFlags flags
,
255 _cleanup_(sd_device_enumerator_unrefp
) sd_device_enumerator
*e
= NULL
;
263 r
= enumerator_for_parent(parent
, &e
);
267 FOREACH_DEVICE(e
, q
) {
270 if (!FLAGS_SET(flags
, DISSECT_IMAGE_NO_UDEV
)) {
271 r
= sd_device_get_usec_initialized(q
, &usec
);
272 if (r
== -EBUSY
) /* Not initialized yet */
277 if (timestamp_not_before
!= USEC_INFINITY
&&
278 usec
< timestamp_not_before
) /* udev database entry older than our attachment? Then it's not ours */
282 r
= device_is_partition(q
, parent
, pp
);
286 *ret
= sd_device_ref(q
);
295 sd_device
*parent_device
;
296 blkid_partition blkidp
;
299 uint64_t uevent_seqnum_not_before
;
300 usec_t timestamp_not_before
;
301 DissectImageFlags flags
;
304 static inline void wait_data_done(struct wait_data
*d
) {
305 sd_device_unref(d
->found
);
308 static int device_monitor_handler(sd_device_monitor
*monitor
, sd_device
*device
, void *userdata
) {
309 struct wait_data
*w
= userdata
;
314 if (device_for_action(device
, SD_DEVICE_REMOVE
))
317 if (w
->diskseq
!= 0) {
320 /* If w->diskseq is non-zero, then we must have a disk seqnum */
321 r
= sd_device_get_diskseq(device
, &diskseq
);
323 log_debug_errno(r
, "Dropping event because it has no diskseq, but waiting for %" PRIu64
, w
->diskseq
);
326 if (diskseq
< w
->diskseq
) {
327 log_debug("Dropping event because diskseq too old (%" PRIu64
" < %" PRIu64
")",
328 diskseq
, w
->diskseq
);
331 if (diskseq
> w
->diskseq
) {
333 goto finish
; /* Newer than what we were expecting, so we missed it, stop waiting */
335 } else if (w
->uevent_seqnum_not_before
!= UINT64_MAX
) {
338 r
= sd_device_get_seqnum(device
, &seqnum
);
342 if (seqnum
<= w
->uevent_seqnum_not_before
) { /* From an older use of this loop device */
343 log_debug("Dropping event because seqnum too old (%" PRIu64
" <= %" PRIu64
")",
344 seqnum
, w
->uevent_seqnum_not_before
);
349 r
= device_is_partition(device
, w
->parent_device
, w
->blkidp
);
352 if (r
== 0) /* Not the one we need */
355 /* It's the one we need! Yay! */
357 w
->found
= sd_device_ref(device
);
361 return sd_event_exit(sd_device_monitor_get_event(monitor
), r
);
364 static int timeout_handler(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
365 struct wait_data
*w
= userdata
;
370 /* Why partition not appeared within the timeout? We may lost some uevent, as some properties
371 * were not ready when we received uevent... Not sure, but anyway, let's try to find the
372 * partition again before give up. */
374 r
= find_partition(w
->parent_device
, w
->blkidp
, w
->timestamp_not_before
, w
->flags
, &w
->found
);
376 return log_debug_errno(SYNTHETIC_ERRNO(ETIMEDOUT
),
377 "Partition still not appeared after timeout reached.");
379 return log_debug_errno(r
, "Failed to find partition: %m");
381 log_debug("Partition appeared after timeout reached.");
382 return sd_event_exit(sd_event_source_get_event(s
), 0);
385 static int retry_handler(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
386 struct wait_data
*w
= userdata
;
391 r
= find_partition(w
->parent_device
, w
->blkidp
, w
->timestamp_not_before
, w
->flags
, &w
->found
);
394 return log_debug_errno(r
, "Failed to find partition: %m");
396 log_debug("Partition found by a periodic search.");
397 return sd_event_exit(sd_event_source_get_event(s
), 0);
400 r
= sd_event_source_set_time_relative(s
, 500 * USEC_PER_MSEC
);
404 return sd_event_source_set_enabled(s
, SD_EVENT_ONESHOT
);
407 static int wait_for_partition_device(
412 uint64_t uevent_seqnum_not_before
,
413 usec_t timestamp_not_before
,
414 DissectImageFlags flags
,
417 _cleanup_(sd_event_source_unrefp
) sd_event_source
*timeout_source
= NULL
, *retry_source
= NULL
;
418 _cleanup_(sd_device_monitor_unrefp
) sd_device_monitor
*monitor
= NULL
;
419 _cleanup_(sd_event_unrefp
) sd_event
*event
= NULL
;
426 r
= find_partition(parent
, pp
, timestamp_not_before
, flags
, ret
);
430 r
= sd_event_new(&event
);
434 r
= sd_device_monitor_new(&monitor
);
438 r
= sd_device_monitor_filter_add_match_subsystem_devtype(monitor
, "block", "partition");
442 r
= sd_device_monitor_filter_add_match_parent(monitor
, parent
, true);
446 r
= sd_device_monitor_filter_add_match_sysattr(monitor
, "partition", NULL
, true);
450 r
= sd_device_monitor_attach_event(monitor
, event
);
454 _cleanup_(wait_data_done
) struct wait_data w
= {
455 .parent_device
= parent
,
458 .uevent_seqnum_not_before
= uevent_seqnum_not_before
,
459 .timestamp_not_before
= timestamp_not_before
,
463 r
= sd_device_monitor_start(monitor
, device_monitor_handler
, &w
);
467 /* Check again, the partition might have appeared in the meantime */
468 r
= find_partition(parent
, pp
, timestamp_not_before
, flags
, ret
);
472 if (deadline
!= USEC_INFINITY
) {
473 r
= sd_event_add_time(
474 event
, &timeout_source
,
475 CLOCK_MONOTONIC
, deadline
, 0,
476 timeout_handler
, &w
);
480 r
= sd_event_source_set_exit_on_failure(timeout_source
, true);
485 /* If we don't have a disk sequence number then we cannot do exact matching,
486 * and we cannot know if we missed it or if it has not been sent yet, so set
487 * up additional retries to increase the chances of receiving the event. */
489 r
= sd_event_add_time_relative(
490 event
, &retry_source
,
491 CLOCK_MONOTONIC
, 500 * USEC_PER_MSEC
, 0,
496 r
= sd_event_source_set_exit_on_failure(retry_source
, true);
501 r
= sd_event_loop(event
);
506 *ret
= TAKE_PTR(w
.found
);
510 static void check_partition_flags(
512 unsigned long long pflags
,
513 unsigned long long supported
) {
517 /* Mask away all flags supported by this partition's type and the three flags the UEFI spec defines generically */
518 pflags
&= ~(supported
| GPT_FLAG_REQUIRED_PARTITION
| GPT_FLAG_NO_BLOCK_IO_PROTOCOL
| GPT_FLAG_LEGACY_BIOS_BOOTABLE
);
523 /* If there are other bits set, then log about it, to make things discoverable */
524 for (unsigned i
= 0; i
< sizeof(pflags
) * 8; i
++) {
525 unsigned long long bit
= 1ULL << i
;
526 if (!FLAGS_SET(pflags
, bit
))
529 log_debug("Unexpected partition flag %llu set on %s!", bit
, node
);
533 static int device_wait_for_initialization_harder(
535 const char *subsystem
,
539 usec_t start
, left
, retrigger_timeout
;
542 start
= now(CLOCK_MONOTONIC
);
543 left
= usec_sub_unsigned(deadline
, start
);
546 const char *sn
= NULL
;
548 (void) sd_device_get_sysname(device
, &sn
);
549 log_device_debug(device
,
550 "Will wait up to %s for '%s' to initialize…", FORMAT_TIMESPAN(left
, 0), strna(sn
));
553 if (left
!= USEC_INFINITY
)
554 retrigger_timeout
= CLAMP(left
/ 4, 1 * USEC_PER_SEC
, 5 * USEC_PER_SEC
); /* A fourth of the total timeout, but let's clamp to 1s…5s range */
556 retrigger_timeout
= 2 * USEC_PER_SEC
;
559 usec_t local_deadline
, n
;
562 n
= now(CLOCK_MONOTONIC
);
565 /* Find next deadline, when we'll retrigger */
566 local_deadline
= start
+
567 DIV_ROUND_UP(n
- start
, retrigger_timeout
) * retrigger_timeout
;
569 if (deadline
!= USEC_INFINITY
&& deadline
<= local_deadline
) {
570 local_deadline
= deadline
;
575 r
= device_wait_for_initialization(device
, subsystem
, local_deadline
, ret
);
576 if (r
>= 0 && DEBUG_LOGGING
) {
577 const char *sn
= NULL
;
579 (void) sd_device_get_sysname(device
, &sn
);
580 log_device_debug(device
,
581 "Successfully waited for device '%s' to initialize for %s.",
583 FORMAT_TIMESPAN(usec_sub_unsigned(now(CLOCK_MONOTONIC
), start
), 0));
586 if (r
!= -ETIMEDOUT
|| last_try
)
590 log_device_debug(device
,
591 "Device didn't initialize within %s, assuming lost event. Retriggering device.",
592 FORMAT_TIMESPAN(usec_sub_unsigned(now(CLOCK_MONOTONIC
), start
), 0));
594 r
= sd_device_trigger(device
, SD_DEVICE_CHANGE
);
601 #define DEVICE_TIMEOUT_USEC (45 * USEC_PER_SEC)
603 static void dissected_partition_done(DissectedPartition
*p
) {
609 free(p
->decrypted_fstype
);
610 free(p
->decrypted_node
);
611 free(p
->mount_options
);
613 *p
= (DissectedPartition
) {
621 const VeritySettings
*verity
,
622 const MountOptions
*mount_options
,
624 uint64_t uevent_seqnum_not_before
,
625 usec_t timestamp_not_before
,
626 DissectImageFlags flags
,
627 DissectedImage
**ret
) {
630 sd_id128_t root_uuid
= SD_ID128_NULL
, root_verity_uuid
= SD_ID128_NULL
;
631 sd_id128_t usr_uuid
= SD_ID128_NULL
, usr_verity_uuid
= SD_ID128_NULL
;
632 bool is_gpt
, is_mbr
, multiple_generic
= false,
633 generic_rw
= false, /* initialize to appease gcc */
634 generic_growfs
= false;
635 _cleanup_(sd_device_unrefp
) sd_device
*d
= NULL
;
636 _cleanup_(dissected_image_unrefp
) DissectedImage
*m
= NULL
;
637 _cleanup_(blkid_free_probep
) blkid_probe b
= NULL
;
638 _cleanup_free_
char *generic_node
= NULL
;
639 sd_id128_t generic_uuid
= SD_ID128_NULL
;
640 const char *pttype
= NULL
, *sysname
= NULL
;
642 int r
, generic_nr
= -1, n_partitions
;
648 assert(!verity
|| verity
->designator
< 0 || IN_SET(verity
->designator
, PARTITION_ROOT
, PARTITION_USR
));
649 assert(!verity
|| verity
->root_hash
|| verity
->root_hash_size
== 0);
650 assert(!verity
|| verity
->root_hash_sig
|| verity
->root_hash_sig_size
== 0);
651 assert(!verity
|| (verity
->root_hash
|| !verity
->root_hash_sig
));
652 assert(!((flags
& DISSECT_IMAGE_GPT_ONLY
) && (flags
& DISSECT_IMAGE_NO_PARTITION_TABLE
)));
654 /* Probes a disk image, and returns information about what it found in *ret.
656 * Returns -ENOPKG if no suitable partition table or file system could be found.
657 * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found.
658 * Returns -ENXIO if we couldn't find any partition suitable as root or /usr partition
659 * Returns -ENOTUNIQ if we only found multiple generic partitions and thus don't know what to do with that */
661 if (verity
&& verity
->root_hash
) {
662 sd_id128_t fsuuid
, vuuid
;
664 /* If a root hash is supplied, then we use the root partition that has a UUID that match the
665 * first 128bit of the root hash. And we use the verity partition that has a UUID that match
666 * the final 128bit. */
668 if (verity
->root_hash_size
< sizeof(sd_id128_t
))
671 memcpy(&fsuuid
, verity
->root_hash
, sizeof(sd_id128_t
));
672 memcpy(&vuuid
, (const uint8_t*) verity
->root_hash
+ verity
->root_hash_size
- sizeof(sd_id128_t
), sizeof(sd_id128_t
));
674 if (sd_id128_is_null(fsuuid
))
676 if (sd_id128_is_null(vuuid
))
679 /* If the verity data declares it's for the /usr partition, then search for that, in all
680 * other cases assume it's for the root partition. */
681 if (verity
->designator
== PARTITION_USR
) {
683 usr_verity_uuid
= vuuid
;
686 root_verity_uuid
= vuuid
;
690 if (fstat(fd
, &st
) < 0)
693 if (!S_ISBLK(st
.st_mode
))
696 r
= sd_device_new_from_stat_rdev(&d
, &st
);
700 if (!FLAGS_SET(flags
, DISSECT_IMAGE_NO_UDEV
)) {
701 _cleanup_(sd_device_unrefp
) sd_device
*initialized
= NULL
;
703 /* If udev support is enabled, then let's wait for the device to be initialized before we doing anything. */
705 r
= device_wait_for_initialization_harder(
708 usec_add(now(CLOCK_MONOTONIC
), DEVICE_TIMEOUT_USEC
),
714 d
= TAKE_PTR(initialized
);
717 b
= blkid_new_probe();
722 r
= blkid_probe_set_device(b
, fd
, 0, 0);
724 return errno_or_else(ENOMEM
);
726 if ((flags
& DISSECT_IMAGE_GPT_ONLY
) == 0) {
727 /* Look for file system superblocks, unless we only shall look for GPT partition tables */
728 blkid_probe_enable_superblocks(b
, 1);
729 blkid_probe_set_superblocks_flags(b
, BLKID_SUBLKS_TYPE
|BLKID_SUBLKS_USAGE
);
732 blkid_probe_enable_partitions(b
, 1);
733 blkid_probe_set_partitions_flags(b
, BLKID_PARTS_ENTRY_DETAILS
);
736 r
= blkid_do_safeprobe(b
);
737 if (IN_SET(r
, -2, 1))
738 return log_debug_errno(SYNTHETIC_ERRNO(ENOPKG
), "Failed to identify any partition table.");
740 return errno_or_else(EIO
);
742 m
= new(DissectedImage
, 1);
746 *m
= (DissectedImage
) {
747 .has_init_system
= -1,
750 r
= sd_device_get_sysname(d
, &sysname
);
752 return log_debug_errno(r
, "Failed to get device sysname: %m");
753 if (startswith(sysname
, "loop")) {
754 _cleanup_free_
char *name_stripped
= NULL
;
755 const char *full_path
;
757 r
= sd_device_get_sysattr_value(d
, "loop/backing_file", &full_path
);
759 log_debug_errno(r
, "Failed to lookup image name via loop device backing file sysattr, ignoring: %m");
761 r
= raw_strip_suffixes(basename(full_path
), &name_stripped
);
766 free_and_replace(m
->image_name
, name_stripped
);
768 r
= free_and_strdup(&m
->image_name
, sysname
);
773 if (!image_name_is_valid(m
->image_name
)) {
774 log_debug("Image name %s is not valid, ignoring", strempty(m
->image_name
));
775 m
->image_name
= mfree(m
->image_name
);
778 if ((!(flags
& DISSECT_IMAGE_GPT_ONLY
) &&
779 (flags
& DISSECT_IMAGE_GENERIC_ROOT
)) ||
780 (flags
& DISSECT_IMAGE_NO_PARTITION_TABLE
)) {
781 const char *usage
= NULL
;
783 /* If flags permit this, also allow using non-partitioned single-filesystem images */
785 (void) blkid_probe_lookup_value(b
, "USAGE", &usage
, NULL
);
786 if (STRPTR_IN_SET(usage
, "filesystem", "crypto")) {
787 const char *fstype
= NULL
, *options
= NULL
, *devname
= NULL
;
788 _cleanup_free_
char *t
= NULL
, *n
= NULL
, *o
= NULL
;
790 /* OK, we have found a file system, that's our root partition then. */
791 (void) blkid_probe_lookup_value(b
, "TYPE", &fstype
, NULL
);
799 r
= sd_device_get_devname(d
, &devname
);
807 m
->single_file_system
= true;
808 m
->encrypted
= streq_ptr(fstype
, "crypto_LUKS");
810 m
->has_verity
= verity
&& verity
->data_path
;
811 m
->verity_ready
= m
->has_verity
&&
813 (verity
->designator
< 0 || verity
->designator
== PARTITION_ROOT
);
815 m
->has_verity_sig
= false; /* signature not embedded, must be specified */
816 m
->verity_sig_ready
= m
->verity_ready
&&
817 verity
->root_hash_sig
;
819 options
= mount_options_from_designator(mount_options
, PARTITION_ROOT
);
826 m
->partitions
[PARTITION_ROOT
] = (DissectedPartition
) {
828 .rw
= !m
->verity_ready
&& !fstype_is_ro(fstype
),
830 .architecture
= _ARCHITECTURE_INVALID
,
831 .fstype
= TAKE_PTR(t
),
833 .mount_options
= TAKE_PTR(o
),
843 (void) blkid_probe_lookup_value(b
, "PTTYPE", &pttype
, NULL
);
847 is_gpt
= streq_ptr(pttype
, "gpt");
848 is_mbr
= streq_ptr(pttype
, "dos");
850 if (!is_gpt
&& ((flags
& DISSECT_IMAGE_GPT_ONLY
) || !is_mbr
))
853 /* We support external verity data partitions only if the image has no partition table */
854 if (verity
&& verity
->data_path
)
857 /* Safety check: refuse block devices that carry a partition table but for which the kernel doesn't
858 * do partition scanning. */
859 r
= blockdev_partscan_enabled(fd
);
863 return -EPROTONOSUPPORT
;
866 pl
= blkid_probe_get_partitions(b
);
868 return errno_or_else(ENOMEM
);
871 n_partitions
= blkid_partlist_numof_partitions(pl
);
872 if (n_partitions
< 0)
873 return errno_or_else(EIO
);
875 deadline
= usec_add(now(CLOCK_MONOTONIC
), DEVICE_TIMEOUT_USEC
);
876 for (int i
= 0; i
< n_partitions
; i
++) {
877 _cleanup_(sd_device_unrefp
) sd_device
*q
= NULL
;
878 unsigned long long pflags
;
879 blkid_loff_t start
, size
;
885 pp
= blkid_partlist_get_partition(pl
, i
);
887 return errno_or_else(EIO
);
889 r
= wait_for_partition_device(d
, pp
, deadline
, diskseq
, uevent_seqnum_not_before
, timestamp_not_before
, flags
, &q
);
893 r
= sd_device_get_devname(q
, &node
);
897 pflags
= blkid_partition_get_flags(pp
);
900 nr
= blkid_partition_get_partno(pp
);
902 return errno_or_else(EIO
);
905 start
= blkid_partition_get_start(pp
);
907 return errno_or_else(EIO
);
909 assert((uint64_t) start
< UINT64_MAX
/512);
912 size
= blkid_partition_get_size(pp
);
914 return errno_or_else(EIO
);
916 assert((uint64_t) size
< UINT64_MAX
/512);
919 PartitionDesignator designator
= _PARTITION_DESIGNATOR_INVALID
;
920 int architecture
= _ARCHITECTURE_INVALID
;
921 const char *stype
, *sid
, *fstype
= NULL
, *label
;
922 sd_id128_t type_id
, id
;
923 bool rw
= true, growfs
= false;
925 sid
= blkid_partition_get_uuid(pp
);
928 if (sd_id128_from_string(sid
, &id
) < 0)
931 stype
= blkid_partition_get_type_string(pp
);
934 if (sd_id128_from_string(stype
, &type_id
) < 0)
937 label
= blkid_partition_get_name(pp
); /* libblkid returns NULL here if empty */
939 if (sd_id128_equal(type_id
, GPT_HOME
)) {
941 check_partition_flags(node
, pflags
, GPT_FLAG_NO_AUTO
|GPT_FLAG_READ_ONLY
|GPT_FLAG_GROWFS
);
943 if (pflags
& GPT_FLAG_NO_AUTO
)
946 designator
= PARTITION_HOME
;
947 rw
= !(pflags
& GPT_FLAG_READ_ONLY
);
948 growfs
= FLAGS_SET(pflags
, GPT_FLAG_GROWFS
);
950 } else if (sd_id128_equal(type_id
, GPT_SRV
)) {
952 check_partition_flags(node
, pflags
, GPT_FLAG_NO_AUTO
|GPT_FLAG_READ_ONLY
|GPT_FLAG_GROWFS
);
954 if (pflags
& GPT_FLAG_NO_AUTO
)
957 designator
= PARTITION_SRV
;
958 rw
= !(pflags
& GPT_FLAG_READ_ONLY
);
959 growfs
= FLAGS_SET(pflags
, GPT_FLAG_GROWFS
);
961 } else if (sd_id128_equal(type_id
, GPT_ESP
)) {
963 /* Note that we don't check the GPT_FLAG_NO_AUTO flag for the ESP, as it is
964 * not defined there. We instead check the GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as
965 * recommended by the UEFI spec (See "12.3.3 Number and Location of System
968 if (pflags
& GPT_FLAG_NO_BLOCK_IO_PROTOCOL
)
971 designator
= PARTITION_ESP
;
974 } else if (sd_id128_equal(type_id
, GPT_XBOOTLDR
)) {
976 check_partition_flags(node
, pflags
, GPT_FLAG_NO_AUTO
|GPT_FLAG_READ_ONLY
|GPT_FLAG_GROWFS
);
978 if (pflags
& GPT_FLAG_NO_AUTO
)
981 designator
= PARTITION_XBOOTLDR
;
982 rw
= !(pflags
& GPT_FLAG_READ_ONLY
);
983 growfs
= FLAGS_SET(pflags
, GPT_FLAG_GROWFS
);
985 } else if (gpt_partition_type_is_root(type_id
)) {
987 check_partition_flags(node
, pflags
, GPT_FLAG_NO_AUTO
|GPT_FLAG_READ_ONLY
|GPT_FLAG_GROWFS
);
989 if (pflags
& GPT_FLAG_NO_AUTO
)
992 /* If a root ID is specified, ignore everything but the root id */
993 if (!sd_id128_is_null(root_uuid
) && !sd_id128_equal(root_uuid
, id
))
996 assert_se((architecture
= gpt_partition_type_uuid_to_arch(type_id
)) >= 0);
997 designator
= PARTITION_ROOT_OF_ARCH(architecture
);
998 rw
= !(pflags
& GPT_FLAG_READ_ONLY
);
999 growfs
= FLAGS_SET(pflags
, GPT_FLAG_GROWFS
);
1001 } else if (gpt_partition_type_is_root_verity(type_id
)) {
1003 check_partition_flags(node
, pflags
, GPT_FLAG_NO_AUTO
|GPT_FLAG_READ_ONLY
);
1005 if (pflags
& GPT_FLAG_NO_AUTO
)
1008 m
->has_verity
= true;
1010 /* If no verity configuration is specified, then don't do verity */
1013 if (verity
->designator
>= 0 && verity
->designator
!= PARTITION_ROOT
)
1016 /* If root hash is specified, then ignore everything but the root id */
1017 if (!sd_id128_is_null(root_verity_uuid
) && !sd_id128_equal(root_verity_uuid
, id
))
1020 assert_se((architecture
= gpt_partition_type_uuid_to_arch(type_id
)) >= 0);
1021 designator
= PARTITION_VERITY_OF(PARTITION_ROOT_OF_ARCH(architecture
));
1022 fstype
= "DM_verity_hash";
1025 } else if (gpt_partition_type_is_root_verity_sig(type_id
)) {
1027 check_partition_flags(node
, pflags
, GPT_FLAG_NO_AUTO
|GPT_FLAG_READ_ONLY
);
1029 if (pflags
& GPT_FLAG_NO_AUTO
)
1032 m
->has_verity_sig
= true;
1034 /* If root hash is specified explicitly, then ignore any embedded signature */
1037 if (verity
->designator
>= 0 && verity
->designator
!= PARTITION_ROOT
)
1039 if (verity
->root_hash
)
1042 assert_se((architecture
= gpt_partition_type_uuid_to_arch(type_id
)) >= 0);
1043 designator
= PARTITION_VERITY_SIG_OF(PARTITION_ROOT_OF_ARCH(architecture
));
1044 fstype
= "verity_hash_signature";
1047 } else if (gpt_partition_type_is_usr(type_id
)) {
1049 check_partition_flags(node
, pflags
, GPT_FLAG_NO_AUTO
|GPT_FLAG_READ_ONLY
|GPT_FLAG_GROWFS
);
1051 if (pflags
& GPT_FLAG_NO_AUTO
)
1054 /* If a usr ID is specified, ignore everything but the usr id */
1055 if (!sd_id128_is_null(usr_uuid
) && !sd_id128_equal(usr_uuid
, id
))
1058 assert_se((architecture
= gpt_partition_type_uuid_to_arch(type_id
)) >= 0);
1059 designator
= PARTITION_USR_OF_ARCH(architecture
);
1060 rw
= !(pflags
& GPT_FLAG_READ_ONLY
);
1061 growfs
= FLAGS_SET(pflags
, GPT_FLAG_GROWFS
);
1063 } else if (gpt_partition_type_is_usr_verity(type_id
)) {
1065 check_partition_flags(node
, pflags
, GPT_FLAG_NO_AUTO
|GPT_FLAG_READ_ONLY
);
1067 if (pflags
& GPT_FLAG_NO_AUTO
)
1070 m
->has_verity
= true;
1074 if (verity
->designator
>= 0 && verity
->designator
!= PARTITION_USR
)
1077 /* If usr hash is specified, then ignore everything but the usr id */
1078 if (!sd_id128_is_null(usr_verity_uuid
) && !sd_id128_equal(usr_verity_uuid
, id
))
1081 assert_se((architecture
= gpt_partition_type_uuid_to_arch(type_id
)) >= 0);
1082 designator
= PARTITION_VERITY_OF(PARTITION_USR_OF_ARCH(architecture
));
1083 fstype
= "DM_verity_hash";
1086 } else if (gpt_partition_type_is_usr_verity_sig(type_id
)) {
1088 check_partition_flags(node
, pflags
, GPT_FLAG_NO_AUTO
|GPT_FLAG_READ_ONLY
);
1090 if (pflags
& GPT_FLAG_NO_AUTO
)
1093 m
->has_verity_sig
= true;
1095 /* If usr hash is specified explicitly, then ignore any embedded signature */
1098 if (verity
->designator
>= 0 && verity
->designator
!= PARTITION_USR
)
1100 if (verity
->root_hash
)
1103 assert_se((architecture
= gpt_partition_type_uuid_to_arch(type_id
)) >= 0);
1104 designator
= PARTITION_VERITY_SIG_OF(PARTITION_USR_OF_ARCH(architecture
));
1105 fstype
= "verity_hash_signature";
1108 } else if (sd_id128_equal(type_id
, GPT_SWAP
)) {
1110 check_partition_flags(node
, pflags
, GPT_FLAG_NO_AUTO
);
1112 if (pflags
& GPT_FLAG_NO_AUTO
)
1115 designator
= PARTITION_SWAP
;
1117 } else if (sd_id128_equal(type_id
, GPT_LINUX_GENERIC
)) {
1119 check_partition_flags(node
, pflags
, GPT_FLAG_NO_AUTO
|GPT_FLAG_READ_ONLY
|GPT_FLAG_GROWFS
);
1121 if (pflags
& GPT_FLAG_NO_AUTO
)
1125 multiple_generic
= true;
1128 generic_rw
= !(pflags
& GPT_FLAG_READ_ONLY
);
1129 generic_growfs
= FLAGS_SET(pflags
, GPT_FLAG_GROWFS
);
1131 generic_node
= strdup(node
);
1136 } else if (sd_id128_equal(type_id
, GPT_TMP
)) {
1138 check_partition_flags(node
, pflags
, GPT_FLAG_NO_AUTO
|GPT_FLAG_READ_ONLY
|GPT_FLAG_GROWFS
);
1140 if (pflags
& GPT_FLAG_NO_AUTO
)
1143 designator
= PARTITION_TMP
;
1144 rw
= !(pflags
& GPT_FLAG_READ_ONLY
);
1145 growfs
= FLAGS_SET(pflags
, GPT_FLAG_GROWFS
);
1147 } else if (sd_id128_equal(type_id
, GPT_VAR
)) {
1149 check_partition_flags(node
, pflags
, GPT_FLAG_NO_AUTO
|GPT_FLAG_READ_ONLY
|GPT_FLAG_GROWFS
);
1151 if (pflags
& GPT_FLAG_NO_AUTO
)
1154 if (!FLAGS_SET(flags
, DISSECT_IMAGE_RELAX_VAR_CHECK
)) {
1155 sd_id128_t var_uuid
;
1157 /* For /var we insist that the uuid of the partition matches the
1158 * HMAC-SHA256 of the /var GPT partition type uuid, keyed by machine
1159 * ID. Why? Unlike the other partitions /var is inherently
1160 * installation specific, hence we need to be careful not to mount it
1161 * in the wrong installation. By hashing the partition UUID from
1162 * /etc/machine-id we can securely bind the partition to the
1165 r
= sd_id128_get_machine_app_specific(GPT_VAR
, &var_uuid
);
1169 if (!sd_id128_equal(var_uuid
, id
)) {
1170 log_debug("Found a /var/ partition, but its UUID didn't match our expectations, ignoring.");
1175 designator
= PARTITION_VAR
;
1176 rw
= !(pflags
& GPT_FLAG_READ_ONLY
);
1177 growfs
= FLAGS_SET(pflags
, GPT_FLAG_GROWFS
);
1180 if (designator
!= _PARTITION_DESIGNATOR_INVALID
) {
1181 _cleanup_free_
char *t
= NULL
, *n
= NULL
, *o
= NULL
, *l
= NULL
;
1182 const char *options
= NULL
;
1184 if (m
->partitions
[designator
].found
) {
1185 /* For most partition types the first one we see wins. Except for the
1186 * rootfs and /usr, where we do a version compare of the label, and
1187 * let the newest version win. This permits a simple A/B versioning
1188 * scheme in OS images. */
1190 if (!PARTITION_DESIGNATOR_VERSIONED(designator
) ||
1191 strverscmp_improved(m
->partitions
[designator
].label
, label
) >= 0)
1194 dissected_partition_done(m
->partitions
+ designator
);
1213 options
= mount_options_from_designator(mount_options
, designator
);
1215 o
= strdup(options
);
1220 m
->partitions
[designator
] = (DissectedPartition
) {
1225 .architecture
= architecture
,
1226 .node
= TAKE_PTR(n
),
1227 .fstype
= TAKE_PTR(t
),
1228 .label
= TAKE_PTR(l
),
1230 .mount_options
= TAKE_PTR(o
),
1231 .offset
= (uint64_t) start
* 512,
1232 .size
= (uint64_t) size
* 512,
1236 } else if (is_mbr
) {
1238 switch (blkid_partition_get_type(pp
)) {
1240 case 0x83: /* Linux partition */
1242 if (pflags
!= 0x80) /* Bootable flag */
1246 multiple_generic
= true;
1250 generic_growfs
= false;
1251 generic_node
= strdup(node
);
1258 case 0xEA: { /* Boot Loader Spec extended $BOOT partition */
1259 _cleanup_free_
char *n
= NULL
, *o
= NULL
;
1260 sd_id128_t id
= SD_ID128_NULL
;
1261 const char *sid
, *options
= NULL
;
1263 /* First one wins */
1264 if (m
->partitions
[PARTITION_XBOOTLDR
].found
)
1267 sid
= blkid_partition_get_uuid(pp
);
1269 (void) sd_id128_from_string(sid
, &id
);
1275 options
= mount_options_from_designator(mount_options
, PARTITION_XBOOTLDR
);
1277 o
= strdup(options
);
1282 m
->partitions
[PARTITION_XBOOTLDR
] = (DissectedPartition
) {
1287 .architecture
= _ARCHITECTURE_INVALID
,
1288 .node
= TAKE_PTR(n
),
1290 .mount_options
= TAKE_PTR(o
),
1291 .offset
= (uint64_t) start
* 512,
1292 .size
= (uint64_t) size
* 512,
1300 if (m
->partitions
[PARTITION_ROOT
].found
) {
1301 /* If we found the primary arch, then invalidate the secondary and other arch to avoid any
1302 * ambiguities, since we never want to mount the secondary or other arch in this case. */
1303 m
->partitions
[PARTITION_ROOT_SECONDARY
].found
= false;
1304 m
->partitions
[PARTITION_ROOT_SECONDARY_VERITY
].found
= false;
1305 m
->partitions
[PARTITION_ROOT_SECONDARY_VERITY_SIG
].found
= false;
1306 m
->partitions
[PARTITION_USR_SECONDARY
].found
= false;
1307 m
->partitions
[PARTITION_USR_SECONDARY_VERITY
].found
= false;
1308 m
->partitions
[PARTITION_USR_SECONDARY_VERITY_SIG
].found
= false;
1310 m
->partitions
[PARTITION_ROOT_OTHER
].found
= false;
1311 m
->partitions
[PARTITION_ROOT_OTHER_VERITY
].found
= false;
1312 m
->partitions
[PARTITION_ROOT_OTHER_VERITY_SIG
].found
= false;
1313 m
->partitions
[PARTITION_USR_OTHER
].found
= false;
1314 m
->partitions
[PARTITION_USR_OTHER_VERITY
].found
= false;
1315 m
->partitions
[PARTITION_USR_OTHER_VERITY_SIG
].found
= false;
1317 } else if (m
->partitions
[PARTITION_ROOT_VERITY
].found
||
1318 m
->partitions
[PARTITION_ROOT_VERITY_SIG
].found
)
1319 return -EADDRNOTAVAIL
; /* Verity found but no matching rootfs? Something is off, refuse. */
1321 else if (m
->partitions
[PARTITION_ROOT_SECONDARY
].found
) {
1323 /* No root partition found but there's one for the secondary architecture? Then upgrade
1324 * secondary arch to first and invalidate the other arch. */
1326 log_debug("No root partition found of the native architecture, falling back to a root "
1327 "partition of the secondary architecture.");
1329 m
->partitions
[PARTITION_ROOT
] = m
->partitions
[PARTITION_ROOT_SECONDARY
];
1330 zero(m
->partitions
[PARTITION_ROOT_SECONDARY
]);
1331 m
->partitions
[PARTITION_ROOT_VERITY
] = m
->partitions
[PARTITION_ROOT_SECONDARY_VERITY
];
1332 zero(m
->partitions
[PARTITION_ROOT_SECONDARY_VERITY
]);
1333 m
->partitions
[PARTITION_ROOT_VERITY_SIG
] = m
->partitions
[PARTITION_ROOT_SECONDARY_VERITY_SIG
];
1334 zero(m
->partitions
[PARTITION_ROOT_SECONDARY_VERITY_SIG
]);
1336 m
->partitions
[PARTITION_USR
] = m
->partitions
[PARTITION_USR_SECONDARY
];
1337 zero(m
->partitions
[PARTITION_USR_SECONDARY
]);
1338 m
->partitions
[PARTITION_USR_VERITY
] = m
->partitions
[PARTITION_USR_SECONDARY_VERITY
];
1339 zero(m
->partitions
[PARTITION_USR_SECONDARY_VERITY
]);
1340 m
->partitions
[PARTITION_USR_VERITY_SIG
] = m
->partitions
[PARTITION_USR_SECONDARY_VERITY_SIG
];
1341 zero(m
->partitions
[PARTITION_USR_SECONDARY_VERITY_SIG
]);
1343 m
->partitions
[PARTITION_ROOT_OTHER
].found
= false;
1344 m
->partitions
[PARTITION_ROOT_OTHER_VERITY
].found
= false;
1345 m
->partitions
[PARTITION_ROOT_OTHER_VERITY_SIG
].found
= false;
1346 m
->partitions
[PARTITION_USR_OTHER
].found
= false;
1347 m
->partitions
[PARTITION_USR_OTHER_VERITY
].found
= false;
1348 m
->partitions
[PARTITION_USR_OTHER_VERITY_SIG
].found
= false;
1350 } else if (m
->partitions
[PARTITION_ROOT_SECONDARY_VERITY
].found
||
1351 m
->partitions
[PARTITION_ROOT_SECONDARY_VERITY_SIG
].found
)
1352 return -EADDRNOTAVAIL
; /* as above */
1354 else if (m
->partitions
[PARTITION_ROOT_OTHER
].found
) {
1356 /* No root or secondary partition found but there's one for another architecture? Then
1357 * upgrade the other architecture to first. */
1359 log_debug("No root partition found of the native architecture or the secondary architecture, "
1360 "falling back to a root partition of a non-native architecture (%s).",
1361 architecture_to_string(m
->partitions
[PARTITION_ROOT_OTHER
].architecture
));
1363 m
->partitions
[PARTITION_ROOT
] = m
->partitions
[PARTITION_ROOT_OTHER
];
1364 zero(m
->partitions
[PARTITION_ROOT_OTHER
]);
1365 m
->partitions
[PARTITION_ROOT_VERITY
] = m
->partitions
[PARTITION_ROOT_OTHER_VERITY
];
1366 zero(m
->partitions
[PARTITION_ROOT_OTHER_VERITY
]);
1367 m
->partitions
[PARTITION_ROOT_VERITY_SIG
] = m
->partitions
[PARTITION_ROOT_OTHER_VERITY_SIG
];
1368 zero(m
->partitions
[PARTITION_ROOT_OTHER_VERITY_SIG
]);
1370 m
->partitions
[PARTITION_USR
] = m
->partitions
[PARTITION_USR_OTHER
];
1371 zero(m
->partitions
[PARTITION_USR_OTHER
]);
1372 m
->partitions
[PARTITION_USR_VERITY
] = m
->partitions
[PARTITION_USR_OTHER_VERITY
];
1373 zero(m
->partitions
[PARTITION_USR_OTHER_VERITY
]);
1374 m
->partitions
[PARTITION_USR_VERITY_SIG
] = m
->partitions
[PARTITION_USR_OTHER_VERITY_SIG
];
1375 zero(m
->partitions
[PARTITION_USR_OTHER_VERITY_SIG
]);
1378 /* Hmm, we found a signature partition but no Verity data? Something is off. */
1379 if (m
->partitions
[PARTITION_ROOT_VERITY_SIG
].found
&& !m
->partitions
[PARTITION_ROOT_VERITY
].found
)
1380 return -EADDRNOTAVAIL
;
1382 if (m
->partitions
[PARTITION_USR
].found
) {
1383 /* Invalidate secondary and other arch /usr/ if we found the primary arch */
1384 m
->partitions
[PARTITION_USR_SECONDARY
].found
= false;
1385 m
->partitions
[PARTITION_USR_SECONDARY_VERITY
].found
= false;
1386 m
->partitions
[PARTITION_USR_SECONDARY_VERITY_SIG
].found
= false;
1388 m
->partitions
[PARTITION_USR_OTHER
].found
= false;
1389 m
->partitions
[PARTITION_USR_OTHER_VERITY
].found
= false;
1390 m
->partitions
[PARTITION_USR_OTHER_VERITY_SIG
].found
= false;
1392 } else if (m
->partitions
[PARTITION_USR_VERITY
].found
||
1393 m
->partitions
[PARTITION_USR_VERITY_SIG
].found
)
1394 return -EADDRNOTAVAIL
; /* as above */
1396 else if (m
->partitions
[PARTITION_USR_SECONDARY
].found
) {
1398 log_debug("No usr partition found of the native architecture, falling back to a usr "
1399 "partition of the secondary architecture.");
1401 /* Upgrade secondary arch to primary */
1402 m
->partitions
[PARTITION_USR
] = m
->partitions
[PARTITION_USR_SECONDARY
];
1403 zero(m
->partitions
[PARTITION_USR_SECONDARY
]);
1404 m
->partitions
[PARTITION_USR_VERITY
] = m
->partitions
[PARTITION_USR_SECONDARY_VERITY
];
1405 zero(m
->partitions
[PARTITION_USR_SECONDARY_VERITY
]);
1406 m
->partitions
[PARTITION_USR_VERITY_SIG
] = m
->partitions
[PARTITION_USR_SECONDARY_VERITY_SIG
];
1407 zero(m
->partitions
[PARTITION_USR_SECONDARY_VERITY_SIG
]);
1409 m
->partitions
[PARTITION_USR_OTHER
].found
= false;
1410 m
->partitions
[PARTITION_USR_OTHER_VERITY
].found
= false;
1411 m
->partitions
[PARTITION_USR_OTHER_VERITY_SIG
].found
= false;
1413 } else if (m
->partitions
[PARTITION_USR_SECONDARY_VERITY
].found
||
1414 m
->partitions
[PARTITION_USR_SECONDARY_VERITY_SIG
].found
)
1415 return -EADDRNOTAVAIL
; /* as above */
1417 else if (m
->partitions
[PARTITION_USR_OTHER
].found
) {
1419 log_debug("No usr partition found of the native architecture or the secondary architecture, "
1420 "falling back to a usr partition of a non-native architecture (%s).",
1421 architecture_to_string(m
->partitions
[PARTITION_ROOT_OTHER
].architecture
));
1423 /* Upgrade other arch to primary */
1424 m
->partitions
[PARTITION_USR
] = m
->partitions
[PARTITION_USR_OTHER
];
1425 zero(m
->partitions
[PARTITION_USR_OTHER
]);
1426 m
->partitions
[PARTITION_USR_VERITY
] = m
->partitions
[PARTITION_USR_OTHER_VERITY
];
1427 zero(m
->partitions
[PARTITION_USR_OTHER_VERITY
]);
1428 m
->partitions
[PARTITION_USR_VERITY_SIG
] = m
->partitions
[PARTITION_USR_OTHER_VERITY_SIG
];
1429 zero(m
->partitions
[PARTITION_USR_OTHER_VERITY_SIG
]);
1432 /* Hmm, we found a signature partition but no Verity data? Something is off. */
1433 if (m
->partitions
[PARTITION_USR_VERITY_SIG
].found
&& !m
->partitions
[PARTITION_USR_VERITY
].found
)
1434 return -EADDRNOTAVAIL
;
1436 /* If root and /usr are combined then insist that the architecture matches */
1437 if (m
->partitions
[PARTITION_ROOT
].found
&&
1438 m
->partitions
[PARTITION_USR
].found
&&
1439 (m
->partitions
[PARTITION_ROOT
].architecture
>= 0 &&
1440 m
->partitions
[PARTITION_USR
].architecture
>= 0 &&
1441 m
->partitions
[PARTITION_ROOT
].architecture
!= m
->partitions
[PARTITION_USR
].architecture
))
1442 return -EADDRNOTAVAIL
;
1444 if (!m
->partitions
[PARTITION_ROOT
].found
&&
1445 !m
->partitions
[PARTITION_USR
].found
&&
1446 (flags
& DISSECT_IMAGE_GENERIC_ROOT
) &&
1447 (!verity
|| !verity
->root_hash
|| verity
->designator
!= PARTITION_USR
)) {
1449 /* OK, we found nothing usable, then check if there's a single generic one distro, and use
1450 * that. If the root hash was set however, then we won't fall back to a generic node, because
1451 * the root hash decides. */
1453 /* If we didn't find a properly marked root partition, but we did find a single suitable
1454 * generic Linux partition, then use this as root partition, if the caller asked for it. */
1455 if (multiple_generic
)
1458 /* If we didn't find a generic node, then we can't fix this up either */
1460 _cleanup_free_
char *o
= NULL
;
1461 const char *options
;
1463 options
= mount_options_from_designator(mount_options
, PARTITION_ROOT
);
1465 o
= strdup(options
);
1470 assert(generic_nr
>= 0);
1471 m
->partitions
[PARTITION_ROOT
] = (DissectedPartition
) {
1474 .growfs
= generic_growfs
,
1475 .partno
= generic_nr
,
1476 .architecture
= _ARCHITECTURE_INVALID
,
1477 .node
= TAKE_PTR(generic_node
),
1478 .uuid
= generic_uuid
,
1479 .mount_options
= TAKE_PTR(o
),
1480 .offset
= UINT64_MAX
,
1486 /* Check if we have a root fs if we are told to do check. /usr alone is fine too, but only if appropriate flag for that is set too */
1487 if (FLAGS_SET(flags
, DISSECT_IMAGE_REQUIRE_ROOT
) &&
1488 !(m
->partitions
[PARTITION_ROOT
].found
|| (m
->partitions
[PARTITION_USR
].found
&& FLAGS_SET(flags
, DISSECT_IMAGE_USR_NO_ROOT
))))
1491 if (m
->partitions
[PARTITION_ROOT_VERITY
].found
) {
1492 /* We only support one verity partition per image, i.e. can't do for both /usr and root fs */
1493 if (m
->partitions
[PARTITION_USR_VERITY
].found
)
1496 /* We don't support verity enabled root with a split out /usr. Neither with nor without
1497 * verity there. (Note that we do support verity-less root with verity-full /usr, though.) */
1498 if (m
->partitions
[PARTITION_USR
].found
)
1499 return -EADDRNOTAVAIL
;
1503 /* If a verity designator is specified, then insist that the matching partition exists */
1504 if (verity
->designator
>= 0 && !m
->partitions
[verity
->designator
].found
)
1505 return -EADDRNOTAVAIL
;
1507 if (verity
->root_hash
) {
1508 /* If we have an explicit root hash and found the partitions for it, then we are ready to use
1509 * Verity, set things up for it */
1511 if (verity
->designator
< 0 || verity
->designator
== PARTITION_ROOT
) {
1512 if (!m
->partitions
[PARTITION_ROOT_VERITY
].found
|| !m
->partitions
[PARTITION_ROOT
].found
)
1513 return -EADDRNOTAVAIL
;
1515 /* If we found a verity setup, then the root partition is necessarily read-only. */
1516 m
->partitions
[PARTITION_ROOT
].rw
= false;
1517 m
->verity_ready
= true;
1520 assert(verity
->designator
== PARTITION_USR
);
1522 if (!m
->partitions
[PARTITION_USR_VERITY
].found
|| !m
->partitions
[PARTITION_USR
].found
)
1523 return -EADDRNOTAVAIL
;
1525 m
->partitions
[PARTITION_USR
].rw
= false;
1526 m
->verity_ready
= true;
1529 if (m
->verity_ready
)
1530 m
->verity_sig_ready
= verity
->root_hash_sig
;
1532 } else if (m
->partitions
[verity
->designator
== PARTITION_USR
? PARTITION_USR_VERITY_SIG
: PARTITION_ROOT_VERITY_SIG
].found
) {
1534 /* If we found an embedded signature partition, we are ready, too. */
1536 m
->verity_ready
= m
->verity_sig_ready
= true;
1537 m
->partitions
[verity
->designator
== PARTITION_USR
? PARTITION_USR
: PARTITION_ROOT
].rw
= false;
1541 blkid_free_probe(b
);
1544 /* Fill in file system types if we don't know them yet. */
1545 for (PartitionDesignator i
= 0; i
< _PARTITION_DESIGNATOR_MAX
; i
++) {
1546 DissectedPartition
*p
= m
->partitions
+ i
;
1551 if (!p
->fstype
&& p
->node
) {
1552 r
= probe_filesystem(p
->node
, &p
->fstype
);
1553 if (r
< 0 && r
!= -EUCLEAN
)
1557 if (streq_ptr(p
->fstype
, "crypto_LUKS"))
1558 m
->encrypted
= true;
1560 if (p
->fstype
&& fstype_is_ro(p
->fstype
))
1574 DissectedImage
* dissected_image_unref(DissectedImage
*m
) {
1578 for (PartitionDesignator i
= 0; i
< _PARTITION_DESIGNATOR_MAX
; i
++)
1579 dissected_partition_done(m
->partitions
+ i
);
1581 free(m
->image_name
);
1583 strv_free(m
->machine_info
);
1584 strv_free(m
->os_release
);
1585 strv_free(m
->extension_release
);
1590 static int is_loop_device(const char *path
) {
1591 char s
[SYS_BLOCK_PATH_MAX("/../loop/")];
1596 if (stat(path
, &st
) < 0)
1599 if (!S_ISBLK(st
.st_mode
))
1602 xsprintf_sys_block_path(s
, "/loop/", st
.st_dev
);
1603 if (access(s
, F_OK
) < 0) {
1604 if (errno
!= ENOENT
)
1607 /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */
1608 xsprintf_sys_block_path(s
, "/../loop/", st
.st_dev
);
1609 if (access(s
, F_OK
) < 0)
1610 return errno
== ENOENT
? false : -errno
;
1616 static int run_fsck(const char *node
, const char *fstype
) {
1623 r
= fsck_exists(fstype
);
1625 log_debug_errno(r
, "Couldn't determine whether fsck for %s exists, proceeding anyway.", fstype
);
1629 log_debug("Not checking partition %s, as fsck for %s does not exist.", node
, fstype
);
1633 r
= safe_fork("(fsck)", FORK_RESET_SIGNALS
|FORK_CLOSE_ALL_FDS
|FORK_RLIMIT_NOFILE_SAFE
|FORK_DEATHSIG
|FORK_NULL_STDIO
, &pid
);
1635 return log_debug_errno(r
, "Failed to fork off fsck: %m");
1638 execl("/sbin/fsck", "/sbin/fsck", "-aT", node
, NULL
);
1640 log_debug_errno(errno
, "Failed to execl() fsck: %m");
1641 _exit(FSCK_OPERATIONAL_ERROR
);
1644 exit_status
= wait_for_terminate_and_check("fsck", pid
, 0);
1645 if (exit_status
< 0)
1646 return log_debug_errno(exit_status
, "Failed to fork off /sbin/fsck: %m");
1648 if ((exit_status
& ~FSCK_ERROR_CORRECTED
) != FSCK_SUCCESS
) {
1649 log_debug("fsck failed with exit status %i.", exit_status
);
1651 if ((exit_status
& (FSCK_SYSTEM_SHOULD_REBOOT
|FSCK_ERRORS_LEFT_UNCORRECTED
)) != 0)
1652 return log_debug_errno(SYNTHETIC_ERRNO(EUCLEAN
), "File system is corrupted, refusing.");
1654 log_debug("Ignoring fsck error.");
1660 static int fs_grow(const char *node_path
, const char *mount_path
) {
1661 _cleanup_close_
int mount_fd
= -1, node_fd
= -1;
1662 uint64_t size
, newsize
;
1665 node_fd
= open(node_path
, O_RDONLY
|O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
);
1667 return log_debug_errno(errno
, "Failed to open node device %s: %m", node_path
);
1669 if (ioctl(node_fd
, BLKGETSIZE64
, &size
) != 0)
1670 return log_debug_errno(errno
, "Failed to get block device size of %s: %m", node_path
);
1672 mount_fd
= open(mount_path
, O_RDONLY
|O_DIRECTORY
|O_CLOEXEC
);
1674 return log_debug_errno(errno
, "Failed to open mountd file system %s: %m", mount_path
);
1676 log_debug("Resizing \"%s\" to %"PRIu64
" bytes...", mount_path
, size
);
1677 r
= resize_fs(mount_fd
, size
, &newsize
);
1679 return log_debug_errno(r
, "Failed to resize \"%s\" to %"PRIu64
" bytes: %m", mount_path
, size
);
1681 if (newsize
== size
)
1682 log_debug("Successfully resized \"%s\" to %s bytes.",
1683 mount_path
, FORMAT_BYTES(newsize
));
1685 assert(newsize
< size
);
1686 log_debug("Successfully resized \"%s\" to %s bytes (%"PRIu64
" bytes lost due to blocksize).",
1687 mount_path
, FORMAT_BYTES(newsize
), size
- newsize
);
1693 static int mount_partition(
1694 DissectedPartition
*m
,
1696 const char *directory
,
1699 DissectImageFlags flags
) {
1701 _cleanup_free_
char *chased
= NULL
, *options
= NULL
;
1702 const char *p
, *node
, *fstype
;
1703 bool rw
, remap_uid_gid
= false;
1709 /* Use decrypted node and matching fstype if available, otherwise use the original device */
1710 node
= m
->decrypted_node
?: m
->node
;
1711 fstype
= m
->decrypted_node
? m
->decrypted_fstype
: m
->fstype
;
1713 if (!m
->found
|| !node
)
1716 return -EAFNOSUPPORT
;
1718 /* We are looking at an encrypted partition? This either means stacked encryption, or the caller didn't call dissected_image_decrypt() beforehand. Let's return a recognizable error for this case. */
1719 if (streq(fstype
, "crypto_LUKS"))
1722 rw
= m
->rw
&& !(flags
& DISSECT_IMAGE_MOUNT_READ_ONLY
);
1724 if (FLAGS_SET(flags
, DISSECT_IMAGE_FSCK
) && rw
) {
1725 r
= run_fsck(node
, fstype
);
1731 /* Automatically create missing mount points inside the image, if necessary. */
1732 r
= mkdir_p_root(where
, directory
, uid_shift
, (gid_t
) uid_shift
, 0755);
1733 if (r
< 0 && r
!= -EROFS
)
1736 r
= chase_symlinks(directory
, where
, CHASE_PREFIX_ROOT
, &chased
, NULL
);
1742 /* Create top-level mount if missing – but only if this is asked for. This won't modify the
1743 * image (as the branch above does) but the host hierarchy, and the created directory might
1744 * survive our mount in the host hierarchy hence. */
1745 if (FLAGS_SET(flags
, DISSECT_IMAGE_MKDIR
)) {
1746 r
= mkdir_p(where
, 0755);
1754 /* If requested, turn on discard support. */
1755 if (fstype_can_discard(fstype
) &&
1756 ((flags
& DISSECT_IMAGE_DISCARD
) ||
1757 ((flags
& DISSECT_IMAGE_DISCARD_ON_LOOP
) && is_loop_device(m
->node
) > 0))) {
1758 options
= strdup("discard");
1763 if (uid_is_valid(uid_shift
) && uid_shift
!= 0) {
1765 if (fstype_can_uid_gid(fstype
)) {
1766 _cleanup_free_
char *uid_option
= NULL
;
1768 if (asprintf(&uid_option
, "uid=" UID_FMT
",gid=" GID_FMT
, uid_shift
, (gid_t
) uid_shift
) < 0)
1771 if (!strextend_with_separator(&options
, ",", uid_option
))
1773 } else if (FLAGS_SET(flags
, DISSECT_IMAGE_MOUNT_IDMAPPED
))
1774 remap_uid_gid
= true;
1777 if (!isempty(m
->mount_options
))
1778 if (!strextend_with_separator(&options
, ",", m
->mount_options
))
1781 /* So, when you request MS_RDONLY from ext4, then this means nothing. It happily still writes to the
1782 * backing storage. What's worse, the BLKRO[GS]ET flag and (in case of loopback devices)
1783 * LO_FLAGS_READ_ONLY don't mean anything, they affect userspace accesses only, and write accesses
1784 * from the upper file system still get propagated through to the underlying file system,
1785 * unrestricted. To actually get ext4/xfs/btrfs to stop writing to the device we need to specify
1786 * "norecovery" as mount option, in addition to MS_RDONLY. Yes, this sucks, since it means we need to
1787 * carry a per file system table here.
1789 * Note that this means that we might not be able to mount corrupted file systems as read-only
1790 * anymore (since in some cases the kernel implementations will refuse mounting when corrupted,
1791 * read-only and "norecovery" is specified). But I think for the case of automatically determined
1792 * mount options for loopback devices this is the right choice, since otherwise using the same
1793 * loopback file twice even in read-only mode, is going to fail badly sooner or later. The usecase of
1794 * making reuse of the immutable images "just work" is more relevant to us than having read-only
1795 * access that actually modifies stuff work on such image files. Or to say this differently: if
1796 * people want their file systems to be fixed up they should just open them in writable mode, where
1797 * all these problems don't exist. */
1798 if (!rw
&& STRPTR_IN_SET(fstype
, "ext3", "ext4", "xfs", "btrfs"))
1799 if (!strextend_with_separator(&options
, ",", "norecovery"))
1802 r
= mount_nofollow_verbose(LOG_DEBUG
, node
, p
, fstype
, MS_NODEV
|(rw
? 0 : MS_RDONLY
), options
);
1806 if (rw
&& m
->growfs
&& FLAGS_SET(flags
, DISSECT_IMAGE_GROWFS
))
1807 (void) fs_grow(node
, p
);
1809 if (remap_uid_gid
) {
1810 r
= remount_idmap(p
, uid_shift
, uid_range
);
1818 static int mount_root_tmpfs(const char *where
, uid_t uid_shift
, DissectImageFlags flags
) {
1819 _cleanup_free_
char *options
= NULL
;
1824 /* For images that contain /usr/ but no rootfs, let's mount rootfs as tmpfs */
1826 if (FLAGS_SET(flags
, DISSECT_IMAGE_MKDIR
)) {
1827 r
= mkdir_p(where
, 0755);
1832 if (uid_is_valid(uid_shift
)) {
1833 if (asprintf(&options
, "uid=" UID_FMT
",gid=" GID_FMT
, uid_shift
, (gid_t
) uid_shift
) < 0)
1837 r
= mount_nofollow_verbose(LOG_DEBUG
, "rootfs", where
, "tmpfs", MS_NODEV
, options
);
1844 int dissected_image_mount(
1849 DissectImageFlags flags
) {
1851 int r
, xbootldr_mounted
;
1858 * -ENXIO → No root partition found
1859 * -EMEDIUMTYPE → DISSECT_IMAGE_VALIDATE_OS set but no os-release/extension-release file found
1860 * -EUNATCH → Encrypted partition found for which no dm-crypt was set up yet
1861 * -EUCLEAN → fsck for file system failed
1862 * -EBUSY → File system already mounted/used elsewhere (kernel)
1863 * -EAFNOSUPPORT → File system type not supported or not known
1866 if (!(m
->partitions
[PARTITION_ROOT
].found
||
1867 (m
->partitions
[PARTITION_USR
].found
&& FLAGS_SET(flags
, DISSECT_IMAGE_USR_NO_ROOT
))))
1868 return -ENXIO
; /* Require a root fs or at least a /usr/ fs (the latter is subject to a flag of its own) */
1870 if ((flags
& DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY
) == 0) {
1872 /* First mount the root fs. If there's none we use a tmpfs. */
1873 if (m
->partitions
[PARTITION_ROOT
].found
)
1874 r
= mount_partition(m
->partitions
+ PARTITION_ROOT
, where
, NULL
, uid_shift
, uid_range
, flags
);
1876 r
= mount_root_tmpfs(where
, uid_shift
, flags
);
1880 /* For us mounting root always means mounting /usr as well */
1881 r
= mount_partition(m
->partitions
+ PARTITION_USR
, where
, "/usr", uid_shift
, uid_range
, flags
);
1885 if ((flags
& (DISSECT_IMAGE_VALIDATE_OS
|DISSECT_IMAGE_VALIDATE_OS_EXT
)) != 0) {
1886 /* If either one of the validation flags are set, ensure that the image qualifies
1887 * as one or the other (or both). */
1890 if (FLAGS_SET(flags
, DISSECT_IMAGE_VALIDATE_OS
)) {
1891 r
= path_is_os_tree(where
);
1897 if (!ok
&& FLAGS_SET(flags
, DISSECT_IMAGE_VALIDATE_OS_EXT
)) {
1898 r
= path_is_extension_tree(where
, m
->image_name
);
1910 if (flags
& DISSECT_IMAGE_MOUNT_ROOT_ONLY
)
1913 r
= mount_partition(m
->partitions
+ PARTITION_HOME
, where
, "/home", uid_shift
, uid_range
, flags
);
1917 r
= mount_partition(m
->partitions
+ PARTITION_SRV
, where
, "/srv", uid_shift
, uid_range
, flags
);
1921 r
= mount_partition(m
->partitions
+ PARTITION_VAR
, where
, "/var", uid_shift
, uid_range
, flags
);
1925 r
= mount_partition(m
->partitions
+ PARTITION_TMP
, where
, "/var/tmp", uid_shift
, uid_range
, flags
);
1929 xbootldr_mounted
= mount_partition(m
->partitions
+ PARTITION_XBOOTLDR
, where
, "/boot", uid_shift
, uid_range
, flags
);
1930 if (xbootldr_mounted
< 0)
1931 return xbootldr_mounted
;
1933 if (m
->partitions
[PARTITION_ESP
].found
) {
1934 int esp_done
= false;
1936 /* Mount the ESP to /efi if it exists. If it doesn't exist, use /boot instead, but only if it
1937 * exists and is empty, and we didn't already mount the XBOOTLDR partition into it. */
1939 r
= chase_symlinks("/efi", where
, CHASE_PREFIX_ROOT
, NULL
, NULL
);
1944 /* /efi doesn't exist. Let's see if /boot is suitable then */
1946 if (!xbootldr_mounted
) {
1947 _cleanup_free_
char *p
= NULL
;
1949 r
= chase_symlinks("/boot", where
, CHASE_PREFIX_ROOT
, &p
, NULL
);
1953 } else if (dir_is_empty(p
) > 0) {
1954 /* It exists and is an empty directory. Let's mount the ESP there. */
1955 r
= mount_partition(m
->partitions
+ PARTITION_ESP
, where
, "/boot", uid_shift
, uid_range
, flags
);
1965 /* OK, let's mount the ESP now to /efi (possibly creating the dir if missing) */
1967 r
= mount_partition(m
->partitions
+ PARTITION_ESP
, where
, "/efi", uid_shift
, uid_range
, flags
);
1976 int dissected_image_mount_and_warn(
1981 DissectImageFlags flags
) {
1988 r
= dissected_image_mount(m
, where
, uid_shift
, uid_range
, flags
);
1990 return log_error_errno(r
, "Not root file system found in image.");
1991 if (r
== -EMEDIUMTYPE
)
1992 return log_error_errno(r
, "No suitable os-release/extension-release file in image found.");
1994 return log_error_errno(r
, "Encrypted file system discovered, but decryption not requested.");
1996 return log_error_errno(r
, "File system check on image failed.");
1998 return log_error_errno(r
, "File system already mounted elsewhere.");
1999 if (r
== -EAFNOSUPPORT
)
2000 return log_error_errno(r
, "File system type not supported or not known.");
2002 return log_error_errno(r
, "Failed to mount image: %m");
2007 #if HAVE_LIBCRYPTSETUP
2008 typedef struct DecryptedPartition
{
2009 struct crypt_device
*device
;
2012 } DecryptedPartition
;
2014 struct DecryptedImage
{
2015 DecryptedPartition
*decrypted
;
2020 DecryptedImage
* decrypted_image_unref(DecryptedImage
* d
) {
2021 #if HAVE_LIBCRYPTSETUP
2027 for (size_t i
= 0; i
< d
->n_decrypted
; i
++) {
2028 DecryptedPartition
*p
= d
->decrypted
+ i
;
2030 if (p
->device
&& p
->name
&& !p
->relinquished
) {
2031 r
= sym_crypt_deactivate_by_name(p
->device
, p
->name
, 0);
2033 log_debug_errno(r
, "Failed to deactivate encrypted partition %s", p
->name
);
2037 sym_crypt_free(p
->device
);
2047 #if HAVE_LIBCRYPTSETUP
2049 static int make_dm_name_and_node(const void *original_node
, const char *suffix
, char **ret_name
, char **ret_node
) {
2050 _cleanup_free_
char *name
= NULL
, *node
= NULL
;
2053 assert(original_node
);
2058 base
= strrchr(original_node
, '/');
2060 base
= original_node
;
2066 name
= strjoin(base
, suffix
);
2069 if (!filename_is_valid(name
))
2072 node
= path_join(sym_crypt_get_dir(), name
);
2076 *ret_name
= TAKE_PTR(name
);
2077 *ret_node
= TAKE_PTR(node
);
2082 static int decrypt_partition(
2083 DissectedPartition
*m
,
2084 const char *passphrase
,
2085 DissectImageFlags flags
,
2086 DecryptedImage
*d
) {
2088 _cleanup_free_
char *node
= NULL
, *name
= NULL
;
2089 _cleanup_(sym_crypt_freep
) struct crypt_device
*cd
= NULL
;
2095 if (!m
->found
|| !m
->node
|| !m
->fstype
)
2098 if (!streq(m
->fstype
, "crypto_LUKS"))
2104 r
= dlopen_cryptsetup();
2108 r
= make_dm_name_and_node(m
->node
, "-decrypted", &name
, &node
);
2112 if (!GREEDY_REALLOC0(d
->decrypted
, d
->n_decrypted
+ 1))
2115 r
= sym_crypt_init(&cd
, m
->node
);
2117 return log_debug_errno(r
, "Failed to initialize dm-crypt: %m");
2119 cryptsetup_enable_logging(cd
);
2121 r
= sym_crypt_load(cd
, CRYPT_LUKS
, NULL
);
2123 return log_debug_errno(r
, "Failed to load LUKS metadata: %m");
2125 r
= sym_crypt_activate_by_passphrase(cd
, name
, CRYPT_ANY_SLOT
, passphrase
, strlen(passphrase
),
2126 ((flags
& DISSECT_IMAGE_DEVICE_READ_ONLY
) ? CRYPT_ACTIVATE_READONLY
: 0) |
2127 ((flags
& DISSECT_IMAGE_DISCARD_ON_CRYPTO
) ? CRYPT_ACTIVATE_ALLOW_DISCARDS
: 0));
2129 log_debug_errno(r
, "Failed to activate LUKS device: %m");
2130 return r
== -EPERM
? -EKEYREJECTED
: r
;
2133 d
->decrypted
[d
->n_decrypted
++] = (DecryptedPartition
) {
2134 .name
= TAKE_PTR(name
),
2135 .device
= TAKE_PTR(cd
),
2138 m
->decrypted_node
= TAKE_PTR(node
);
2143 static int verity_can_reuse(
2144 const VeritySettings
*verity
,
2146 struct crypt_device
**ret_cd
) {
2148 /* If the same volume was already open, check that the root hashes match, and reuse it if they do */
2149 _cleanup_free_
char *root_hash_existing
= NULL
;
2150 _cleanup_(sym_crypt_freep
) struct crypt_device
*cd
= NULL
;
2151 struct crypt_params_verity crypt_params
= {};
2152 size_t root_hash_existing_size
;
2159 r
= sym_crypt_init_by_name(&cd
, name
);
2161 return log_debug_errno(r
, "Error opening verity device, crypt_init_by_name failed: %m");
2163 cryptsetup_enable_logging(cd
);
2165 r
= sym_crypt_get_verity_info(cd
, &crypt_params
);
2167 return log_debug_errno(r
, "Error opening verity device, crypt_get_verity_info failed: %m");
2169 root_hash_existing_size
= verity
->root_hash_size
;
2170 root_hash_existing
= malloc0(root_hash_existing_size
);
2171 if (!root_hash_existing
)
2174 r
= sym_crypt_volume_key_get(cd
, CRYPT_ANY_SLOT
, root_hash_existing
, &root_hash_existing_size
, NULL
, 0);
2176 return log_debug_errno(r
, "Error opening verity device, crypt_volume_key_get failed: %m");
2177 if (verity
->root_hash_size
!= root_hash_existing_size
||
2178 memcmp(root_hash_existing
, verity
->root_hash
, verity
->root_hash_size
) != 0)
2179 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL
), "Error opening verity device, it already exists but root hashes are different.");
2181 #if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY
2182 /* Ensure that, if signatures are supported, we only reuse the device if the previous mount used the
2183 * same settings, so that a previous unsigned mount will not be reused if the user asks to use
2184 * signing for the new one, and vice versa. */
2185 if (!!verity
->root_hash_sig
!= !!(crypt_params
.flags
& CRYPT_VERITY_ROOT_HASH_SIGNATURE
))
2186 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL
), "Error opening verity device, it already exists but signature settings are not the same.");
2189 *ret_cd
= TAKE_PTR(cd
);
2193 static inline char* dm_deferred_remove_clean(char *name
) {
2197 (void) sym_crypt_deactivate_by_name(NULL
, name
, CRYPT_DEACTIVATE_DEFERRED
);
2200 DEFINE_TRIVIAL_CLEANUP_FUNC(char *, dm_deferred_remove_clean
);
2202 static int validate_signature_userspace(const VeritySettings
*verity
) {
2204 _cleanup_(sk_X509_free_allp
) STACK_OF(X509
) *sk
= NULL
;
2205 _cleanup_strv_free_
char **certs
= NULL
;
2206 _cleanup_(PKCS7_freep
) PKCS7
*p7
= NULL
;
2207 _cleanup_free_
char *s
= NULL
;
2208 _cleanup_(BIO_freep
) BIO
*bio
= NULL
; /* 'bio' must be freed first, 's' second, hence keep this order
2209 * of declaration in place, please */
2210 const unsigned char *d
;
2215 assert(verity
->root_hash
);
2216 assert(verity
->root_hash_sig
);
2218 /* Because installing a signature certificate into the kernel chain is so messy, let's optionally do
2219 * userspace validation. */
2221 r
= conf_files_list_nulstr(&certs
, ".crt", NULL
, CONF_FILES_REGULAR
|CONF_FILES_FILTER_MASKED
, CONF_PATHS_NULSTR("verity.d"));
2223 return log_debug_errno(r
, "Failed to enumerate certificates: %m");
2224 if (strv_isempty(certs
)) {
2225 log_debug("No userspace dm-verity certificates found.");
2229 d
= verity
->root_hash_sig
;
2230 p7
= d2i_PKCS7(NULL
, &d
, (long) verity
->root_hash_sig_size
);
2232 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL
), "Failed to parse PKCS7 DER signature data.");
2234 s
= hexmem(verity
->root_hash
, verity
->root_hash_size
);
2236 return log_oom_debug();
2238 bio
= BIO_new_mem_buf(s
, strlen(s
));
2240 return log_oom_debug();
2242 sk
= sk_X509_new_null();
2244 return log_oom_debug();
2246 STRV_FOREACH(i
, certs
) {
2247 _cleanup_(X509_freep
) X509
*c
= NULL
;
2248 _cleanup_fclose_
FILE *f
= NULL
;
2250 f
= fopen(*i
, "re");
2252 log_debug_errno(errno
, "Failed to open '%s', ignoring: %m", *i
);
2256 c
= PEM_read_X509(f
, NULL
, NULL
, NULL
);
2258 log_debug("Failed to load X509 certificate '%s', ignoring.", *i
);
2262 if (sk_X509_push(sk
, c
) == 0)
2263 return log_oom_debug();
2268 r
= PKCS7_verify(p7
, sk
, NULL
, bio
, NULL
, PKCS7_NOINTERN
|PKCS7_NOVERIFY
);
2270 log_debug("Userspace PKCS#7 validation succeeded.");
2272 log_debug("Userspace PKCS#7 validation failed: %s", ERR_error_string(ERR_get_error(), NULL
));
2276 log_debug("Not doing client-side validation of dm-verity root hash signatures, OpenSSL support disabled.");
2281 static int do_crypt_activate_verity(
2282 struct crypt_device
*cd
,
2284 const VeritySettings
*verity
) {
2286 bool check_signature
;
2293 if (verity
->root_hash_sig
) {
2294 r
= getenv_bool_secure("SYSTEMD_DISSECT_VERITY_SIGNATURE");
2295 if (r
< 0 && r
!= -ENXIO
)
2296 log_debug_errno(r
, "Failed to parse $SYSTEMD_DISSECT_VERITY_SIGNATURE");
2298 check_signature
= r
!= 0;
2300 check_signature
= false;
2302 if (check_signature
) {
2304 #if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY
2305 /* First, if we have support for signed keys in the kernel, then try that first. */
2306 r
= sym_crypt_activate_by_signed_key(
2310 verity
->root_hash_size
,
2311 verity
->root_hash_sig
,
2312 verity
->root_hash_sig_size
,
2313 CRYPT_ACTIVATE_READONLY
);
2317 log_debug("Validation of dm-verity signature failed via the kernel, trying userspace validation instead.");
2319 log_debug("Activation of verity device with signature requested, but not supported via the kernel by %s due to missing crypt_activate_by_signed_key(), trying userspace validation instead.",
2320 program_invocation_short_name
);
2323 /* So this didn't work via the kernel, then let's try userspace validation instead. If that
2324 * works we'll try to activate without telling the kernel the signature. */
2326 r
= validate_signature_userspace(verity
);
2330 return log_debug_errno(SYNTHETIC_ERRNO(ENOKEY
),
2331 "Activation of signed Verity volume worked neither via the kernel nor in userspace, can't activate.");
2334 return sym_crypt_activate_by_volume_key(
2338 verity
->root_hash_size
,
2339 CRYPT_ACTIVATE_READONLY
);
2342 static int verity_partition(
2343 PartitionDesignator designator
,
2344 DissectedPartition
*m
,
2345 DissectedPartition
*v
,
2346 const VeritySettings
*verity
,
2347 DissectImageFlags flags
,
2348 DecryptedImage
*d
) {
2350 _cleanup_(sym_crypt_freep
) struct crypt_device
*cd
= NULL
;
2351 _cleanup_(dm_deferred_remove_cleanp
) char *restore_deferred_remove
= NULL
;
2352 _cleanup_free_
char *node
= NULL
, *name
= NULL
;
2356 assert(v
|| (verity
&& verity
->data_path
));
2358 if (!verity
|| !verity
->root_hash
)
2360 if (!((verity
->designator
< 0 && designator
== PARTITION_ROOT
) ||
2361 (verity
->designator
== designator
)))
2364 if (!m
->found
|| !m
->node
|| !m
->fstype
)
2366 if (!verity
->data_path
) {
2367 if (!v
->found
|| !v
->node
|| !v
->fstype
)
2370 if (!streq(v
->fstype
, "DM_verity_hash"))
2374 r
= dlopen_cryptsetup();
2378 if (FLAGS_SET(flags
, DISSECT_IMAGE_VERITY_SHARE
)) {
2379 /* Use the roothash, which is unique per volume, as the device node name, so that it can be reused */
2380 _cleanup_free_
char *root_hash_encoded
= NULL
;
2382 root_hash_encoded
= hexmem(verity
->root_hash
, verity
->root_hash_size
);
2383 if (!root_hash_encoded
)
2386 r
= make_dm_name_and_node(root_hash_encoded
, "-verity", &name
, &node
);
2388 r
= make_dm_name_and_node(m
->node
, "-verity", &name
, &node
);
2392 r
= sym_crypt_init(&cd
, verity
->data_path
?: v
->node
);
2396 cryptsetup_enable_logging(cd
);
2398 r
= sym_crypt_load(cd
, CRYPT_VERITY
, NULL
);
2402 r
= sym_crypt_set_data_device(cd
, m
->node
);
2406 if (!GREEDY_REALLOC0(d
->decrypted
, d
->n_decrypted
+ 1))
2409 /* If activating fails because the device already exists, check the metadata and reuse it if it matches.
2410 * In case of ENODEV/ENOENT, which can happen if another process is activating at the exact same time,
2411 * retry a few times before giving up. */
2412 for (unsigned i
= 0; i
< N_DEVICE_NODE_LIST_ATTEMPTS
; i
++) {
2414 r
= do_crypt_activate_verity(cd
, name
, verity
);
2415 /* libdevmapper can return EINVAL when the device is already in the activation stage.
2416 * There's no way to distinguish this situation from a genuine error due to invalid
2417 * parameters, so immediately fall back to activating the device with a unique name.
2418 * Improvements in libcrypsetup can ensure this never happens:
2419 * https://gitlab.com/cryptsetup/cryptsetup/-/merge_requests/96 */
2420 if (r
== -EINVAL
&& FLAGS_SET(flags
, DISSECT_IMAGE_VERITY_SHARE
))
2421 return verity_partition(designator
, m
, v
, verity
, flags
& ~DISSECT_IMAGE_VERITY_SHARE
, d
);
2424 -EEXIST
, /* Volume is already open and ready to be used */
2425 -EBUSY
, /* Volume is being opened but not ready, crypt_init_by_name can fetch details */
2426 -ENODEV
/* Volume is being opened but not ready, crypt_init_by_name would fail, try to open again */))
2428 if (IN_SET(r
, -EEXIST
, -EBUSY
)) {
2429 struct crypt_device
*existing_cd
= NULL
;
2431 if (!restore_deferred_remove
){
2432 /* To avoid races, disable automatic removal on umount while setting up the new device. Restore it on failure. */
2433 r
= dm_deferred_remove_cancel(name
);
2434 /* If activation returns EBUSY there might be no deferred removal to cancel, that's fine */
2435 if (r
< 0 && r
!= -ENXIO
)
2436 return log_debug_errno(r
, "Disabling automated deferred removal for verity device %s failed: %m", node
);
2438 restore_deferred_remove
= strdup(name
);
2439 if (!restore_deferred_remove
)
2444 r
= verity_can_reuse(verity
, name
, &existing_cd
);
2445 /* Same as above, -EINVAL can randomly happen when it actually means -EEXIST */
2446 if (r
== -EINVAL
&& FLAGS_SET(flags
, DISSECT_IMAGE_VERITY_SHARE
))
2447 return verity_partition(designator
, m
, v
, verity
, flags
& ~DISSECT_IMAGE_VERITY_SHARE
, d
);
2448 if (!IN_SET(r
, 0, -ENODEV
, -ENOENT
, -EBUSY
))
2449 return log_debug_errno(r
, "Checking whether existing verity device %s can be reused failed: %m", node
);
2451 /* devmapper might say that the device exists, but the devlink might not yet have been
2452 * created. Check and wait for the udev event in that case. */
2453 r
= device_wait_for_devlink(node
, "block", usec_add(now(CLOCK_MONOTONIC
), 100 * USEC_PER_MSEC
), NULL
);
2454 /* Fallback to activation with a unique device if it's taking too long */
2455 if (r
== -ETIMEDOUT
)
2468 /* Device is being opened by another process, but it has not finished yet, yield for 2ms */
2469 (void) usleep(2 * USEC_PER_MSEC
);
2472 /* An existing verity device was reported by libcryptsetup/libdevmapper, but we can't use it at this time.
2473 * Fall back to activating it with a unique device name. */
2474 if (r
!= 0 && FLAGS_SET(flags
, DISSECT_IMAGE_VERITY_SHARE
))
2475 return verity_partition(designator
, m
, v
, verity
, flags
& ~DISSECT_IMAGE_VERITY_SHARE
, d
);
2477 /* Everything looks good and we'll be able to mount the device, so deferred remove will be re-enabled at that point. */
2478 restore_deferred_remove
= mfree(restore_deferred_remove
);
2480 d
->decrypted
[d
->n_decrypted
++] = (DecryptedPartition
) {
2481 .name
= TAKE_PTR(name
),
2482 .device
= TAKE_PTR(cd
),
2485 m
->decrypted_node
= TAKE_PTR(node
);
2491 int dissected_image_decrypt(
2493 const char *passphrase
,
2494 const VeritySettings
*verity
,
2495 DissectImageFlags flags
,
2496 DecryptedImage
**ret
) {
2498 #if HAVE_LIBCRYPTSETUP
2499 _cleanup_(decrypted_image_unrefp
) DecryptedImage
*d
= NULL
;
2504 assert(!verity
|| verity
->root_hash
|| verity
->root_hash_size
== 0);
2508 * = 0 → There was nothing to decrypt
2509 * > 0 → Decrypted successfully
2510 * -ENOKEY → There's something to decrypt but no key was supplied
2511 * -EKEYREJECTED → Passed key was not correct
2514 if (verity
&& verity
->root_hash
&& verity
->root_hash_size
< sizeof(sd_id128_t
))
2517 if (!m
->encrypted
&& !m
->verity_ready
) {
2522 #if HAVE_LIBCRYPTSETUP
2523 d
= new0(DecryptedImage
, 1);
2527 for (PartitionDesignator i
= 0; i
< _PARTITION_DESIGNATOR_MAX
; i
++) {
2528 DissectedPartition
*p
= m
->partitions
+ i
;
2529 PartitionDesignator k
;
2534 r
= decrypt_partition(p
, passphrase
, flags
, d
);
2538 k
= PARTITION_VERITY_OF(i
);
2540 r
= verity_partition(i
, p
, m
->partitions
+ k
, verity
, flags
| DISSECT_IMAGE_VERITY_SHARE
, d
);
2545 if (!p
->decrypted_fstype
&& p
->decrypted_node
) {
2546 r
= probe_filesystem(p
->decrypted_node
, &p
->decrypted_fstype
);
2547 if (r
< 0 && r
!= -EUCLEAN
)
2560 int dissected_image_decrypt_interactively(
2562 const char *passphrase
,
2563 const VeritySettings
*verity
,
2564 DissectImageFlags flags
,
2565 DecryptedImage
**ret
) {
2567 _cleanup_strv_free_erase_
char **z
= NULL
;
2574 r
= dissected_image_decrypt(m
, passphrase
, verity
, flags
, ret
);
2577 if (r
== -EKEYREJECTED
)
2578 log_error_errno(r
, "Incorrect passphrase, try again!");
2579 else if (r
!= -ENOKEY
)
2580 return log_error_errno(r
, "Failed to decrypt image: %m");
2583 return log_error_errno(SYNTHETIC_ERRNO(EKEYREJECTED
),
2584 "Too many retries.");
2588 r
= ask_password_auto("Please enter image passphrase:", NULL
, "dissect", "dissect", "dissect.passphrase", USEC_INFINITY
, 0, &z
);
2590 return log_error_errno(r
, "Failed to query for passphrase: %m");
2596 int decrypted_image_relinquish(DecryptedImage
*d
) {
2599 /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a
2600 * boolean so that we don't clean it up ourselves either anymore */
2602 #if HAVE_LIBCRYPTSETUP
2605 for (size_t i
= 0; i
< d
->n_decrypted
; i
++) {
2606 DecryptedPartition
*p
= d
->decrypted
+ i
;
2608 if (p
->relinquished
)
2611 r
= sym_crypt_deactivate_by_name(NULL
, p
->name
, CRYPT_DEACTIVATE_DEFERRED
);
2613 return log_debug_errno(r
, "Failed to mark %s for auto-removal: %m", p
->name
);
2615 p
->relinquished
= true;
2622 static char *build_auxiliary_path(const char *image
, const char *suffix
) {
2629 e
= endswith(image
, ".raw");
2631 return strjoin(e
, suffix
);
2633 n
= new(char, e
- image
+ strlen(suffix
) + 1);
2637 strcpy(mempcpy(n
, image
, e
- image
), suffix
);
2641 void verity_settings_done(VeritySettings
*v
) {
2644 v
->root_hash
= mfree(v
->root_hash
);
2645 v
->root_hash_size
= 0;
2647 v
->root_hash_sig
= mfree(v
->root_hash_sig
);
2648 v
->root_hash_sig_size
= 0;
2650 v
->data_path
= mfree(v
->data_path
);
2653 int verity_settings_load(
2654 VeritySettings
*verity
,
2656 const char *root_hash_path
,
2657 const char *root_hash_sig_path
) {
2659 _cleanup_free_
void *root_hash
= NULL
, *root_hash_sig
= NULL
;
2660 size_t root_hash_size
= 0, root_hash_sig_size
= 0;
2661 _cleanup_free_
char *verity_data_path
= NULL
;
2662 PartitionDesignator designator
;
2667 assert(verity
->designator
< 0 || IN_SET(verity
->designator
, PARTITION_ROOT
, PARTITION_USR
));
2669 /* If we are asked to load the root hash for a device node, exit early */
2670 if (is_device_path(image
))
2673 r
= getenv_bool_secure("SYSTEMD_DISSECT_VERITY_SIDECAR");
2674 if (r
< 0 && r
!= -ENXIO
)
2675 log_debug_errno(r
, "Failed to parse $SYSTEMD_DISSECT_VERITY_SIDECAR, ignoring: %m");
2679 designator
= verity
->designator
;
2681 /* We only fill in what isn't already filled in */
2683 if (!verity
->root_hash
) {
2684 _cleanup_free_
char *text
= NULL
;
2686 if (root_hash_path
) {
2687 /* If explicitly specified it takes precedence */
2688 r
= read_one_line_file(root_hash_path
, &text
);
2693 designator
= PARTITION_ROOT
;
2695 /* Otherwise look for xattr and separate file, and first for the data for root and if
2696 * that doesn't exist for /usr */
2698 if (designator
< 0 || designator
== PARTITION_ROOT
) {
2699 r
= getxattr_malloc(image
, "user.verity.roothash", &text
);
2701 _cleanup_free_
char *p
= NULL
;
2703 if (!IN_SET(r
, -ENODATA
, -ENOENT
) && !ERRNO_IS_NOT_SUPPORTED(r
))
2706 p
= build_auxiliary_path(image
, ".roothash");
2710 r
= read_one_line_file(p
, &text
);
2711 if (r
< 0 && r
!= -ENOENT
)
2716 designator
= PARTITION_ROOT
;
2719 if (!text
&& (designator
< 0 || designator
== PARTITION_USR
)) {
2720 /* So in the "roothash" xattr/file name above the "root" of course primarily
2721 * refers to the root of the Verity Merkle tree. But coincidentally it also
2722 * is the hash for the *root* file system, i.e. the "root" neatly refers to
2723 * two distinct concepts called "root". Taking benefit of this happy
2724 * coincidence we call the file with the root hash for the /usr/ file system
2725 * `usrhash`, because `usrroothash` or `rootusrhash` would just be too
2726 * confusing. We thus drop the reference to the root of the Merkle tree, and
2727 * just indicate which file system it's about. */
2728 r
= getxattr_malloc(image
, "user.verity.usrhash", &text
);
2730 _cleanup_free_
char *p
= NULL
;
2732 if (!IN_SET(r
, -ENODATA
, -ENOENT
) && !ERRNO_IS_NOT_SUPPORTED(r
))
2735 p
= build_auxiliary_path(image
, ".usrhash");
2739 r
= read_one_line_file(p
, &text
);
2740 if (r
< 0 && r
!= -ENOENT
)
2745 designator
= PARTITION_USR
;
2750 r
= unhexmem(text
, strlen(text
), &root_hash
, &root_hash_size
);
2753 if (root_hash_size
< sizeof(sd_id128_t
))
2758 if ((root_hash
|| verity
->root_hash
) && !verity
->root_hash_sig
) {
2759 if (root_hash_sig_path
) {
2760 r
= read_full_file(root_hash_sig_path
, (char**) &root_hash_sig
, &root_hash_sig_size
);
2761 if (r
< 0 && r
!= -ENOENT
)
2765 designator
= PARTITION_ROOT
;
2767 if (designator
< 0 || designator
== PARTITION_ROOT
) {
2768 _cleanup_free_
char *p
= NULL
;
2770 /* Follow naming convention recommended by the relevant RFC:
2771 * https://tools.ietf.org/html/rfc5751#section-3.2.1 */
2772 p
= build_auxiliary_path(image
, ".roothash.p7s");
2776 r
= read_full_file(p
, (char**) &root_hash_sig
, &root_hash_sig_size
);
2777 if (r
< 0 && r
!= -ENOENT
)
2780 designator
= PARTITION_ROOT
;
2783 if (!root_hash_sig
&& (designator
< 0 || designator
== PARTITION_USR
)) {
2784 _cleanup_free_
char *p
= NULL
;
2786 p
= build_auxiliary_path(image
, ".usrhash.p7s");
2790 r
= read_full_file(p
, (char**) &root_hash_sig
, &root_hash_sig_size
);
2791 if (r
< 0 && r
!= -ENOENT
)
2794 designator
= PARTITION_USR
;
2798 if (root_hash_sig
&& root_hash_sig_size
== 0) /* refuse empty size signatures */
2802 if (!verity
->data_path
) {
2803 _cleanup_free_
char *p
= NULL
;
2805 p
= build_auxiliary_path(image
, ".verity");
2809 if (access(p
, F_OK
) < 0) {
2810 if (errno
!= ENOENT
)
2813 verity_data_path
= TAKE_PTR(p
);
2817 verity
->root_hash
= TAKE_PTR(root_hash
);
2818 verity
->root_hash_size
= root_hash_size
;
2821 if (root_hash_sig
) {
2822 verity
->root_hash_sig
= TAKE_PTR(root_hash_sig
);
2823 verity
->root_hash_sig_size
= root_hash_sig_size
;
2826 if (verity_data_path
)
2827 verity
->data_path
= TAKE_PTR(verity_data_path
);
2829 if (verity
->designator
< 0)
2830 verity
->designator
= designator
;
2835 int dissected_image_load_verity_sig_partition(
2838 VeritySettings
*verity
) {
2840 _cleanup_free_
void *root_hash
= NULL
, *root_hash_sig
= NULL
;
2841 _cleanup_(json_variant_unrefp
) JsonVariant
*v
= NULL
;
2842 size_t root_hash_size
, root_hash_sig_size
;
2843 _cleanup_free_
char *buf
= NULL
;
2844 PartitionDesignator d
;
2845 DissectedPartition
*p
;
2846 JsonVariant
*rh
, *sig
;
2855 if (verity
->root_hash
&& verity
->root_hash_sig
) /* Already loaded? */
2858 r
= getenv_bool_secure("SYSTEMD_DISSECT_VERITY_EMBEDDED");
2859 if (r
< 0 && r
!= -ENXIO
)
2860 log_debug_errno(r
, "Failed to parse $SYSTEMD_DISSECT_VERITY_EMBEDDED, ignoring: %m");
2864 d
= PARTITION_VERITY_SIG_OF(verity
->designator
< 0 ? PARTITION_ROOT
: verity
->designator
);
2867 p
= m
->partitions
+ d
;
2870 if (p
->offset
== UINT64_MAX
|| p
->size
== UINT64_MAX
)
2873 if (p
->size
> 4*1024*1024) /* Signature data cannot possible be larger than 4M, refuse that */
2876 buf
= new(char, p
->size
+1);
2880 n
= pread(fd
, buf
, p
->size
, p
->offset
);
2883 if ((uint64_t) n
!= p
->size
)
2886 e
= memchr(buf
, 0, p
->size
);
2888 /* If we found a NUL byte then the rest of the data must be NUL too */
2889 if (!memeqzero(e
, p
->size
- (e
- buf
)))
2890 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL
), "Signature data contains embedded NUL byte.");
2894 r
= json_parse(buf
, 0, &v
, NULL
, NULL
);
2896 return log_debug_errno(r
, "Failed to parse signature JSON data: %m");
2898 rh
= json_variant_by_key(v
, "rootHash");
2900 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL
), "Signature JSON object lacks 'rootHash' field.");
2901 if (!json_variant_is_string(rh
))
2902 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL
), "'rootHash' field of signature JSON object is not a string.");
2904 r
= unhexmem(json_variant_string(rh
), SIZE_MAX
, &root_hash
, &root_hash_size
);
2906 return log_debug_errno(r
, "Failed to parse root hash field: %m");
2908 /* Check if specified root hash matches if it is specified */
2909 if (verity
->root_hash
&&
2910 memcmp_nn(verity
->root_hash
, verity
->root_hash_size
, root_hash
, root_hash_size
) != 0) {
2911 _cleanup_free_
char *a
= NULL
, *b
= NULL
;
2913 a
= hexmem(root_hash
, root_hash_size
);
2914 b
= hexmem(verity
->root_hash
, verity
->root_hash_size
);
2916 return log_debug_errno(r
, "Root hash in signature JSON data (%s) doesn't match configured hash (%s).", strna(a
), strna(b
));
2919 sig
= json_variant_by_key(v
, "signature");
2921 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL
), "Signature JSON object lacks 'signature' field.");
2922 if (!json_variant_is_string(sig
))
2923 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL
), "'signature' field of signature JSON object is not a string.");
2925 r
= unbase64mem(json_variant_string(sig
), SIZE_MAX
, &root_hash_sig
, &root_hash_sig_size
);
2927 return log_debug_errno(r
, "Failed to parse signature field: %m");
2929 free_and_replace(verity
->root_hash
, root_hash
);
2930 verity
->root_hash_size
= root_hash_size
;
2932 free_and_replace(verity
->root_hash_sig
, root_hash_sig
);
2933 verity
->root_hash_sig_size
= root_hash_sig_size
;
2938 int dissected_image_acquire_metadata(DissectedImage
*m
, DissectImageFlags extra_flags
) {
2945 META_EXTENSION_RELEASE
,
2946 META_HAS_INIT_SYSTEM
,
2950 static const char *const paths
[_META_MAX
] = {
2951 [META_HOSTNAME
] = "/etc/hostname\0",
2952 [META_MACHINE_ID
] = "/etc/machine-id\0",
2953 [META_MACHINE_INFO
] = "/etc/machine-info\0",
2954 [META_OS_RELEASE
] = ("/etc/os-release\0"
2955 "/usr/lib/os-release\0"),
2956 [META_EXTENSION_RELEASE
] = "extension-release\0", /* Used only for logging. */
2957 [META_HAS_INIT_SYSTEM
] = "has-init-system\0", /* ditto */
2960 _cleanup_strv_free_
char **machine_info
= NULL
, **os_release
= NULL
, **extension_release
= NULL
;
2961 _cleanup_close_pair_
int error_pipe
[2] = { -1, -1 };
2962 _cleanup_(rmdir_and_freep
) char *t
= NULL
;
2963 _cleanup_(sigkill_waitp
) pid_t child
= 0;
2964 sd_id128_t machine_id
= SD_ID128_NULL
;
2965 _cleanup_free_
char *hostname
= NULL
;
2966 unsigned n_meta_initialized
= 0;
2967 int fds
[2 * _META_MAX
], r
, v
;
2968 int has_init_system
= -1;
2971 BLOCK_SIGNALS(SIGCHLD
);
2975 for (; n_meta_initialized
< _META_MAX
; n_meta_initialized
++) {
2976 if (!paths
[n_meta_initialized
]) {
2977 fds
[2*n_meta_initialized
] = fds
[2*n_meta_initialized
+1] = -1;
2981 if (pipe2(fds
+ 2*n_meta_initialized
, O_CLOEXEC
) < 0) {
2987 r
= mkdtemp_malloc("/tmp/dissect-XXXXXX", &t
);
2991 if (pipe2(error_pipe
, O_CLOEXEC
) < 0) {
2996 r
= safe_fork("(sd-dissect)", FORK_RESET_SIGNALS
|FORK_DEATHSIG
|FORK_NEW_MOUNTNS
|FORK_MOUNTNS_SLAVE
, &child
);
3000 /* Child in a new mount namespace */
3001 error_pipe
[0] = safe_close(error_pipe
[0]);
3003 r
= dissected_image_mount(
3009 DISSECT_IMAGE_READ_ONLY
|
3010 DISSECT_IMAGE_MOUNT_ROOT_ONLY
|
3011 DISSECT_IMAGE_USR_NO_ROOT
);
3013 log_debug_errno(r
, "Failed to mount dissected image: %m");
3017 for (unsigned k
= 0; k
< _META_MAX
; k
++) {
3018 _cleanup_close_
int fd
= -ENOENT
;
3024 fds
[2*k
] = safe_close(fds
[2*k
]);
3028 case META_EXTENSION_RELEASE
:
3029 /* As per the os-release spec, if the image is an extension it will have a file
3030 * named after the image name in extension-release.d/ - we use the image name
3031 * and try to resolve it with the extension-release helpers, as sometimes
3032 * the image names are mangled on deployment and do not match anymore.
3033 * Unlike other paths this is not fixed, and the image name
3034 * can be mangled on deployment, so by calling into the helper
3035 * we allow a fallback that matches on the first extension-release
3036 * file found in the directory, if one named after the image cannot
3037 * be found first. */
3038 r
= open_extension_release(t
, m
->image_name
, NULL
, &fd
);
3040 fd
= r
; /* Propagate the error. */
3043 case META_HAS_INIT_SYSTEM
: {
3047 FOREACH_STRING(init
,
3048 "/usr/lib/systemd/systemd", /* systemd on /usr merged system */
3049 "/lib/systemd/systemd", /* systemd on /usr non-merged systems */
3050 "/sbin/init") { /* traditional path the Linux kernel invokes */
3052 r
= chase_symlinks(init
, t
, CHASE_PREFIX_ROOT
, NULL
, NULL
);
3055 log_debug_errno(r
, "Failed to resolve %s, ignoring: %m", init
);
3062 r
= loop_write(fds
[2*k
+1], &found
, sizeof(found
), false);
3070 NULSTR_FOREACH(p
, paths
[k
]) {
3071 fd
= chase_symlinks_and_open(p
, t
, CHASE_PREFIX_ROOT
, O_RDONLY
|O_CLOEXEC
|O_NOCTTY
, NULL
);
3078 log_debug_errno(fd
, "Failed to read %s file of image, ignoring: %m", paths
[k
]);
3079 fds
[2*k
+1] = safe_close(fds
[2*k
+1]);
3083 r
= copy_bytes(fd
, fds
[2*k
+1], UINT64_MAX
, 0);
3087 fds
[2*k
+1] = safe_close(fds
[2*k
+1]);
3090 _exit(EXIT_SUCCESS
);
3093 /* Let parent know the error */
3094 (void) write(error_pipe
[1], &r
, sizeof(r
));
3095 _exit(EXIT_FAILURE
);
3098 error_pipe
[1] = safe_close(error_pipe
[1]);
3100 for (unsigned k
= 0; k
< _META_MAX
; k
++) {
3101 _cleanup_fclose_
FILE *f
= NULL
;
3106 fds
[2*k
+1] = safe_close(fds
[2*k
+1]);
3108 f
= take_fdopen(&fds
[2*k
], "r");
3117 r
= read_etc_hostname_stream(f
, &hostname
);
3119 log_debug_errno(r
, "Failed to read /etc/hostname of image: %m");
3123 case META_MACHINE_ID
: {
3124 _cleanup_free_
char *line
= NULL
;
3126 r
= read_line(f
, LONG_LINE_MAX
, &line
);
3128 log_debug_errno(r
, "Failed to read /etc/machine-id of image: %m");
3130 r
= sd_id128_from_string(line
, &machine_id
);
3132 log_debug_errno(r
, "Image contains invalid /etc/machine-id: %s", line
);
3134 log_debug("/etc/machine-id file of image is empty.");
3135 else if (streq(line
, "uninitialized"))
3136 log_debug("/etc/machine-id file of image is uninitialized (likely aborted first boot).");
3138 log_debug("/etc/machine-id file of image has unexpected length %i.", r
);
3143 case META_MACHINE_INFO
:
3144 r
= load_env_file_pairs(f
, "machine-info", &machine_info
);
3146 log_debug_errno(r
, "Failed to read /etc/machine-info of image: %m");
3150 case META_OS_RELEASE
:
3151 r
= load_env_file_pairs(f
, "os-release", &os_release
);
3153 log_debug_errno(r
, "Failed to read OS release file of image: %m");
3157 case META_EXTENSION_RELEASE
:
3158 r
= load_env_file_pairs(f
, "extension-release", &extension_release
);
3160 log_debug_errno(r
, "Failed to read extension release file of image: %m");
3164 case META_HAS_INIT_SYSTEM
: {
3169 nr
= fread(&b
, 1, sizeof(b
), f
);
3170 if (nr
!= sizeof(b
))
3171 log_debug_errno(errno_or_else(EIO
), "Failed to read has-init-system boolean: %m");
3173 has_init_system
= b
;
3179 r
= wait_for_terminate_and_check("(sd-dissect)", child
, 0);
3184 n
= read(error_pipe
[0], &v
, sizeof(v
));
3188 return v
; /* propagate error sent to us from child */
3192 if (r
!= EXIT_SUCCESS
)
3195 free_and_replace(m
->hostname
, hostname
);
3196 m
->machine_id
= machine_id
;
3197 strv_free_and_replace(m
->machine_info
, machine_info
);
3198 strv_free_and_replace(m
->os_release
, os_release
);
3199 strv_free_and_replace(m
->extension_release
, extension_release
);
3200 m
->has_init_system
= has_init_system
;
3203 for (unsigned k
= 0; k
< n_meta_initialized
; k
++)
3204 safe_close_pair(fds
+ 2*k
);
3209 int dissect_image_and_warn(
3212 const VeritySettings
*verity
,
3213 const MountOptions
*mount_options
,
3215 uint64_t uevent_seqnum_not_before
,
3216 usec_t timestamp_not_before
,
3217 DissectImageFlags flags
,
3218 DissectedImage
**ret
) {
3220 _cleanup_free_
char *buffer
= NULL
;
3224 r
= fd_get_path(fd
, &buffer
);
3231 r
= dissect_image(fd
, verity
, mount_options
, diskseq
, uevent_seqnum_not_before
, timestamp_not_before
, flags
, ret
);
3235 return log_error_errno(r
, "Dissecting images is not supported, compiled without blkid support.");
3238 return log_error_errno(r
, "%s: Couldn't identify a suitable partition table or file system.", name
);
3241 return log_error_errno(r
, "%s: The image does not pass validation.", name
);
3243 case -EADDRNOTAVAIL
:
3244 return log_error_errno(r
, "%s: No root partition for specified root hash found.", name
);
3247 return log_error_errno(r
, "%s: Multiple suitable root partitions found in image.", name
);
3250 return log_error_errno(r
, "%s: No suitable root partition found in image.", name
);
3252 case -EPROTONOSUPPORT
:
3253 return log_error_errno(r
, "Device '%s' is loopback block device with partition scanning turned off, please turn it on.", name
);
3256 return log_error_errno(r
, "%s: Image is not a block device.", name
);
3259 return log_error_errno(r
,
3260 "Combining partitioned images (such as '%s') with external Verity data (such as '%s') not supported. "
3261 "(Consider setting $SYSTEMD_DISSECT_VERITY_SIDECAR=0 to disable automatic discovery of external Verity data.)",
3262 name
, strna(verity
? verity
->data_path
: NULL
));
3266 return log_error_errno(r
, "Failed to dissect image '%s': %m", name
);
3272 bool dissected_image_verity_candidate(const DissectedImage
*image
, PartitionDesignator partition_designator
) {
3275 /* Checks if this partition could theoretically do Verity. For non-partitioned images this only works
3276 * if there's an external verity file supplied, for which we can consult .has_verity. For partitioned
3277 * images we only check the partition type.
3279 * This call is used to decide whether to suppress or show a verity column in tabular output of the
3282 if (image
->single_file_system
)
3283 return partition_designator
== PARTITION_ROOT
&& image
->has_verity
;
3285 return PARTITION_VERITY_OF(partition_designator
) >= 0;
3288 bool dissected_image_verity_ready(const DissectedImage
*image
, PartitionDesignator partition_designator
) {
3289 PartitionDesignator k
;
3293 /* Checks if this partition has verity data available that we can activate. For non-partitioned this
3294 * works for the root partition, for others only if the associated verity partition was found. */
3296 if (!image
->verity_ready
)
3299 if (image
->single_file_system
)
3300 return partition_designator
== PARTITION_ROOT
;
3302 k
= PARTITION_VERITY_OF(partition_designator
);
3303 return k
>= 0 && image
->partitions
[k
].found
;
3306 bool dissected_image_verity_sig_ready(const DissectedImage
*image
, PartitionDesignator partition_designator
) {
3307 PartitionDesignator k
;
3311 /* Checks if this partition has verity signature data available that we can use. */
3313 if (!image
->verity_sig_ready
)
3316 if (image
->single_file_system
)
3317 return partition_designator
== PARTITION_ROOT
;
3319 k
= PARTITION_VERITY_SIG_OF(partition_designator
);
3320 return k
>= 0 && image
->partitions
[k
].found
;
3323 MountOptions
* mount_options_free_all(MountOptions
*options
) {
3326 while ((m
= options
)) {
3327 LIST_REMOVE(mount_options
, options
, m
);
3335 const char* mount_options_from_designator(const MountOptions
*options
, PartitionDesignator designator
) {
3336 const MountOptions
*m
;
3338 LIST_FOREACH(mount_options
, m
, options
)
3339 if (designator
== m
->partition_designator
&& !isempty(m
->options
))
3345 int mount_image_privately_interactively(
3347 DissectImageFlags flags
,
3348 char **ret_directory
,
3349 LoopDevice
**ret_loop_device
,
3350 DecryptedImage
**ret_decrypted_image
) {
3352 _cleanup_(verity_settings_done
) VeritySettings verity
= VERITY_SETTINGS_DEFAULT
;
3353 _cleanup_(loop_device_unrefp
) LoopDevice
*d
= NULL
;
3354 _cleanup_(decrypted_image_unrefp
) DecryptedImage
*decrypted_image
= NULL
;
3355 _cleanup_(dissected_image_unrefp
) DissectedImage
*dissected_image
= NULL
;
3356 _cleanup_(rmdir_and_freep
) char *created_dir
= NULL
;
3357 _cleanup_free_
char *temp
= NULL
;
3360 /* Mounts an OS image at a temporary place, inside a newly created mount namespace of our own. This
3361 * is used by tools such as systemd-tmpfiles or systemd-firstboot to operate on some disk image
3365 assert(ret_directory
);
3366 assert(ret_loop_device
);
3367 assert(ret_decrypted_image
);
3369 r
= verity_settings_load(&verity
, image
, NULL
, NULL
);
3371 return log_error_errno(r
, "Failed to load root hash data: %m");
3373 r
= tempfn_random_child(NULL
, program_invocation_short_name
, &temp
);
3375 return log_error_errno(r
, "Failed to generate temporary mount directory: %m");
3377 r
= loop_device_make_by_path(
3379 FLAGS_SET(flags
, DISSECT_IMAGE_DEVICE_READ_ONLY
) ? O_RDONLY
: O_RDWR
,
3380 FLAGS_SET(flags
, DISSECT_IMAGE_NO_PARTITION_TABLE
) ? 0 : LO_FLAGS_PARTSCAN
,
3383 return log_error_errno(r
, "Failed to set up loopback device for %s: %m", image
);
3385 r
= dissect_image_and_warn(d
->fd
, image
, &verity
, NULL
, d
->diskseq
, d
->uevent_seqnum_not_before
, d
->timestamp_not_before
, flags
, &dissected_image
);
3389 r
= dissected_image_load_verity_sig_partition(dissected_image
, d
->fd
, &verity
);
3393 r
= dissected_image_decrypt_interactively(dissected_image
, NULL
, &verity
, flags
, &decrypted_image
);
3397 r
= detach_mount_namespace();
3399 return log_error_errno(r
, "Failed to detach mount namespace: %m");
3401 r
= mkdir_p(temp
, 0700);
3403 return log_error_errno(r
, "Failed to create mount point: %m");
3405 created_dir
= TAKE_PTR(temp
);
3407 r
= dissected_image_mount_and_warn(dissected_image
, created_dir
, UID_INVALID
, UID_INVALID
, flags
);
3411 if (decrypted_image
) {
3412 r
= decrypted_image_relinquish(decrypted_image
);
3414 return log_error_errno(r
, "Failed to relinquish DM devices: %m");
3417 loop_device_relinquish(d
);
3419 *ret_directory
= TAKE_PTR(created_dir
);
3420 *ret_loop_device
= TAKE_PTR(d
);
3421 *ret_decrypted_image
= TAKE_PTR(decrypted_image
);
3426 static const char *const partition_designator_table
[] = {
3427 [PARTITION_ROOT
] = "root",
3428 [PARTITION_ROOT_SECONDARY
] = "root-secondary",
3429 [PARTITION_ROOT_OTHER
] = "root-other",
3430 [PARTITION_USR
] = "usr",
3431 [PARTITION_USR_SECONDARY
] = "usr-secondary",
3432 [PARTITION_USR_OTHER
] = "usr-other",
3433 [PARTITION_HOME
] = "home",
3434 [PARTITION_SRV
] = "srv",
3435 [PARTITION_ESP
] = "esp",
3436 [PARTITION_XBOOTLDR
] = "xbootldr",
3437 [PARTITION_SWAP
] = "swap",
3438 [PARTITION_ROOT_VERITY
] = "root-verity",
3439 [PARTITION_ROOT_SECONDARY_VERITY
] = "root-secondary-verity",
3440 [PARTITION_ROOT_OTHER_VERITY
] = "root-other-verity",
3441 [PARTITION_USR_VERITY
] = "usr-verity",
3442 [PARTITION_USR_SECONDARY_VERITY
] = "usr-secondary-verity",
3443 [PARTITION_USR_OTHER_VERITY
] = "usr-other-verity",
3444 [PARTITION_ROOT_VERITY_SIG
] = "root-verity-sig",
3445 [PARTITION_ROOT_SECONDARY_VERITY_SIG
] = "root-secondary-verity-sig",
3446 [PARTITION_ROOT_OTHER_VERITY_SIG
] = "root-other-verity-sig",
3447 [PARTITION_USR_VERITY_SIG
] = "usr-verity-sig",
3448 [PARTITION_USR_SECONDARY_VERITY_SIG
] = "usr-secondary-verity-sig",
3449 [PARTITION_USR_OTHER_VERITY_SIG
] = "usr-other-verity-sig",
3450 [PARTITION_TMP
] = "tmp",
3451 [PARTITION_VAR
] = "var",
3454 int verity_dissect_and_mount(
3457 const MountOptions
*options
,
3458 const char *required_host_os_release_id
,
3459 const char *required_host_os_release_version_id
,
3460 const char *required_host_os_release_sysext_level
,
3461 const char *required_sysext_scope
) {
3463 _cleanup_(loop_device_unrefp
) LoopDevice
*loop_device
= NULL
;
3464 _cleanup_(decrypted_image_unrefp
) DecryptedImage
*decrypted_image
= NULL
;
3465 _cleanup_(dissected_image_unrefp
) DissectedImage
*dissected_image
= NULL
;
3466 _cleanup_(verity_settings_done
) VeritySettings verity
= VERITY_SETTINGS_DEFAULT
;
3467 DissectImageFlags dissect_image_flags
;
3473 r
= verity_settings_load(&verity
, src
, NULL
, NULL
);
3475 return log_debug_errno(r
, "Failed to load root hash: %m");
3477 dissect_image_flags
= verity
.data_path
? DISSECT_IMAGE_NO_PARTITION_TABLE
: 0;
3479 r
= loop_device_make_by_path(
3482 verity
.data_path
? 0 : LO_FLAGS_PARTSCAN
,
3485 return log_debug_errno(r
, "Failed to create loop device for image: %m");
3491 loop_device
->diskseq
,
3492 loop_device
->uevent_seqnum_not_before
,
3493 loop_device
->timestamp_not_before
,
3494 dissect_image_flags
,
3496 /* No partition table? Might be a single-filesystem image, try again */
3497 if (!verity
.data_path
&& r
== -ENOPKG
)
3502 loop_device
->diskseq
,
3503 loop_device
->uevent_seqnum_not_before
,
3504 loop_device
->timestamp_not_before
,
3505 dissect_image_flags
| DISSECT_IMAGE_NO_PARTITION_TABLE
,
3508 return log_debug_errno(r
, "Failed to dissect image: %m");
3510 r
= dissected_image_load_verity_sig_partition(dissected_image
, loop_device
->fd
, &verity
);
3514 r
= dissected_image_decrypt(
3518 dissect_image_flags
,
3521 return log_debug_errno(r
, "Failed to decrypt dissected image: %m");
3523 r
= mkdir_p_label(dest
, 0755);
3525 return log_debug_errno(r
, "Failed to create destination directory %s: %m", dest
);
3526 r
= umount_recursive(dest
, 0);
3528 return log_debug_errno(r
, "Failed to umount under destination directory %s: %m", dest
);
3530 r
= dissected_image_mount(dissected_image
, dest
, UID_INVALID
, UID_INVALID
, dissect_image_flags
);
3532 return log_debug_errno(r
, "Failed to mount image: %m");
3534 /* If we got os-release values from the caller, then we need to match them with the image's
3535 * extension-release.d/ content. Return -EINVAL if there's any mismatch.
3536 * First, check the distro ID. If that matches, then check the new SYSEXT_LEVEL value if
3537 * available, or else fallback to VERSION_ID. */
3538 if (required_host_os_release_id
&&
3539 (required_host_os_release_version_id
|| required_host_os_release_sysext_level
)) {
3540 _cleanup_strv_free_
char **extension_release
= NULL
;
3542 r
= load_extension_release_pairs(dest
, dissected_image
->image_name
, &extension_release
);
3544 return log_debug_errno(r
, "Failed to parse image %s extension-release metadata: %m", dissected_image
->image_name
);
3546 r
= extension_release_validate(
3547 dissected_image
->image_name
,
3548 required_host_os_release_id
,
3549 required_host_os_release_version_id
,
3550 required_host_os_release_sysext_level
,
3551 required_sysext_scope
,
3554 return log_debug_errno(SYNTHETIC_ERRNO(ESTALE
), "Image %s extension-release metadata does not match the root's", dissected_image
->image_name
);
3556 return log_debug_errno(r
, "Failed to compare image %s extension-release metadata with the root's os-release: %m", dissected_image
->image_name
);
3559 if (decrypted_image
) {
3560 r
= decrypted_image_relinquish(decrypted_image
);
3562 return log_debug_errno(r
, "Failed to relinquish decrypted image: %m");
3565 loop_device_relinquish(loop_device
);
3570 DEFINE_STRING_TABLE_LOOKUP(partition_designator
, PartitionDesignator
);