]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/shared/dissect-image.c
tree-wide: use UINT64_MAX or friends
[thirdparty/systemd.git] / src / shared / dissect-image.c
index 91120d72194f490dcc6c51542937e8e0d9d643ef..997b265259bde7a1c5584499569322c3038ac7e8 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: LGPL-2.1+ */
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
 
 #if HAVE_VALGRIND_MEMCHECK_H
 #include <valgrind/memcheck.h>
 #include "def.h"
 #include "device-nodes.h"
 #include "device-util.h"
+#include "discover-image.h"
 #include "dissect-image.h"
 #include "dm-util.h"
 #include "env-file.h"
+#include "extension-release.h"
 #include "fd-util.h"
 #include "fileio.h"
 #include "fs-util.h"
 #include "fsck-util.h"
 #include "gpt.h"
 #include "hexdecoct.h"
-#include "hostname-util.h"
+#include "hostname-setup.h"
 #include "id128-util.h"
+#include "import-util.h"
 #include "mkdir.h"
 #include "mount-util.h"
 #include "mountpoint-util.h"
@@ -109,172 +112,241 @@ not_found:
 }
 
 #if HAVE_BLKID
-/* Detect RPMB and Boot partitions, which are not listed by blkid.
- * See https://github.com/systemd/systemd/issues/5806. */
-static bool device_is_mmc_special_partition(sd_device *d) {
-        const char *sysname;
+static int enumerator_for_parent(sd_device *d, sd_device_enumerator **ret) {
+        _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+        int r;
 
         assert(d);
+        assert(ret);
 
-        if (sd_device_get_sysname(d, &sysname) < 0)
-                return false;
+        r = sd_device_enumerator_new(&e);
+        if (r < 0)
+                return r;
+
+        r = sd_device_enumerator_allow_uninitialized(e);
+        if (r < 0)
+                return r;
 
-        return startswith(sysname, "mmcblk") &&
-                (endswith(sysname, "rpmb") || endswith(sysname, "boot0") || endswith(sysname, "boot1"));
+        r = sd_device_enumerator_add_match_parent(e, d);
+        if (r < 0)
+                return r;
+
+        *ret = TAKE_PTR(e);
+        return 0;
 }
 
-static bool device_is_block(sd_device *d) {
-        const char *ss;
+static int device_is_partition(sd_device *d, blkid_partition pp) {
+        blkid_loff_t bsize, bstart;
+        uint64_t size, start;
+        int partno, bpartno, r;
+        const char *ss, *v;
 
         assert(d);
+        assert(pp);
 
-        if (sd_device_get_subsystem(d, &ss) < 0)
+        r = sd_device_get_subsystem(d, &ss);
+        if (r < 0)
+                return r;
+        if (!streq(ss, "block"))
                 return false;
 
-        return streq(ss, "block");
-}
+        r = sd_device_get_sysattr_value(d, "partition", &v);
+        if (r == -ENOENT ||        /* Not a partition device */
+            ERRNO_IS_PRIVILEGE(r)) /* Not ready to access? */
+                return false;
+        if (r < 0)
+                return r;
+        r = safe_atoi(v, &partno);
+        if (r < 0)
+                return r;
 
-static int enumerator_for_parent(sd_device *d, sd_device_enumerator **ret) {
-        _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
-        int r;
+        errno = 0;
+        bpartno = blkid_partition_get_partno(pp);
+        if (bpartno < 0)
+                return errno_or_else(EIO);
 
-        assert(d);
-        assert(ret);
+        if (partno != bpartno)
+                return false;
 
-        r = sd_device_enumerator_new(&e);
+        r = sd_device_get_sysattr_value(d, "start", &v);
         if (r < 0)
                 return r;
-
-        r = sd_device_enumerator_allow_uninitialized(e);
+        r = safe_atou64(v, &start);
         if (r < 0)
                 return r;
 
-        r = sd_device_enumerator_add_match_parent(e, d);
+        errno = 0;
+        bstart = blkid_partition_get_start(pp);
+        if (bstart < 0)
+                return errno_or_else(EIO);
+
+        if (start != (uint64_t) bstart)
+                return false;
+
+        r = sd_device_get_sysattr_value(d, "size", &v);
+        if (r < 0)
+                return r;
+        r = safe_atou64(v, &size);
         if (r < 0)
                 return r;
 
-        *ret = TAKE_PTR(e);
-        return 0;
+        errno = 0;
+        bsize = blkid_partition_get_size(pp);
+        if (bsize < 0)
+                return errno_or_else(EIO);
+
+        if (size != (uint64_t) bsize)
+                return false;
+
+        return true;
 }
 
-static int wait_for_partitions_to_appear(
-                int fd,
-                sd_device *d,
-                unsigned num_partitions,
-                DissectImageFlags flags,
-                sd_device_enumerator **ret_enumerator) {
+static int find_partition(
+                sd_device *parent,
+                blkid_partition pp,
+                sd_device **ret) {
 
         _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
         sd_device *q;
-        unsigned n;
         int r;
 
-        assert(fd >= 0);
-        assert(d);
-        assert(ret_enumerator);
+        assert(parent);
+        assert(pp);
+        assert(ret);
 
-        r = enumerator_for_parent(d, &e);
+        r = enumerator_for_parent(parent, &e);
         if (r < 0)
                 return r;
 
-        /* Count the partitions enumerated by the kernel */
-        n = 0;
         FOREACH_DEVICE(e, q) {
-                if (sd_device_get_devnum(q, NULL) < 0)
-                        continue;
-                if (!device_is_block(q))
-                        continue;
-                if (device_is_mmc_special_partition(q))
-                        continue;
-
-                if (!FLAGS_SET(flags, DISSECT_IMAGE_NO_UDEV)) {
-                        r = device_wait_for_initialization(q, "block", USEC_INFINITY, NULL);
-                        if (r < 0)
-                                return r;
+                r = device_is_partition(q, pp);
+                if (r < 0)
+                        return r;
+                if (r > 0) {
+                        *ret = sd_device_ref(q);
+                        return 0;
                 }
-
-                n++;
         }
 
-        if (n == num_partitions + 1) {
-                *ret_enumerator = TAKE_PTR(e);
-                return 0; /* success! */
-        }
-        if (n > num_partitions + 1)
-                return log_debug_errno(SYNTHETIC_ERRNO(EIO),
-                                       "blkid and kernel partition lists do not match.");
+        return -ENXIO;
+}
+
+struct wait_data {
+        sd_device *parent_device;
+        blkid_partition blkidp;
+        sd_device *found;
+};
 
-        /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running or it
-         * got EBUSY because udev already opened the device. Let's reprobe the device, which is a synchronous
-         * call that waits until probing is complete. */
+static inline void wait_data_done(struct wait_data *d) {
+        sd_device_unref(d->found);
+}
 
-        for (unsigned j = 0; ; j++) {
-                if (j++ > 20)
-                        return -EBUSY;
+static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) {
+        const char *parent1_path, *parent2_path;
+        struct wait_data *w = userdata;
+        sd_device *pp;
+        int r;
 
-                if (ioctl(fd, BLKRRPART, 0) >= 0)
-                        break;
-                r = -errno;
-                if (r == -EINVAL) {
-                        /* If we are running on a block device that has partition scanning off, return an
-                         * explicit recognizable error about this, so that callers can generate a proper
-                         * message explaining the situation. */
+        assert(w);
 
-                        r = blockdev_partscan_enabled(fd);
-                        if (r < 0)
-                                return r;
-                        if (r == 0)
-                                return log_debug_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT),
-                                                       "Device is a loop device and partition scanning is off!");
+        if (device_for_action(device, SD_DEVICE_REMOVE))
+                return 0;
 
-                        return -EINVAL; /* original error */
-                }
-                if (r != -EBUSY)
-                        return r;
+        r = sd_device_get_parent(device, &pp);
+        if (r < 0)
+                return 0; /* Doesn't have a parent? No relevant to us */
 
-                /* If something else has the device open, such as an udev rule, the ioctl will return
-                 * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a bit,
-                 * and try again.
-                 *
-                 * This is really something they should fix in the kernel! */
-                (void) usleep(50 * USEC_PER_MSEC);
+        r = sd_device_get_syspath(pp, &parent1_path); /* Check parent of device of this action */
+        if (r < 0)
+                goto finish;
 
-        }
+        r = sd_device_get_syspath(w->parent_device, &parent2_path); /* Check parent of device we are looking for */
+        if (r < 0)
+                goto finish;
+
+        if (!path_equal(parent1_path, parent2_path))
+                return 0; /* Has a different parent than what we need, not interesting to us */
+
+        r = device_is_partition(device, w->blkidp);
+        if (r < 0)
+                goto finish;
+        if (r == 0) /* Not the one we need */
+                return 0;
+
+        /* It's the one we need! Yay! */
+        assert(!w->found);
+        w->found = sd_device_ref(device);
+        r = 0;
 
-        return -EAGAIN; /* no success yet, try again */
+finish:
+        return sd_event_exit(sd_device_monitor_get_event(monitor), r);
 }
 
-static int loop_wait_for_partitions_to_appear(
-                int fd,
-                sd_device *d,
-                unsigned num_partitions,
-                DissectImageFlags flags,
-                sd_device_enumerator **ret_enumerator) {
-        _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+static int wait_for_partition_device(
+                sd_device *parent,
+                blkid_partition pp,
+                usec_t deadline,
+                sd_device **ret) {
+
+        _cleanup_(sd_event_source_unrefp) sd_event_source *timeout_source = NULL;
+        _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor = NULL;
+        _cleanup_(sd_event_unrefp) sd_event *event = NULL;
         int r;
 
-        assert(fd >= 0);
-        assert(d);
-        assert(ret_enumerator);
+        assert(parent);
+        assert(pp);
+        assert(ret);
 
-        log_debug("Waiting for device (parent + %d partitions) to appear...", num_partitions);
+        r = find_partition(parent, pp, ret);
+        if (r != -ENXIO)
+                return r;
 
-        if (!FLAGS_SET(flags, DISSECT_IMAGE_NO_UDEV)) {
-                r = device_wait_for_initialization(d, "block", USEC_INFINITY, &device);
-                if (r < 0)
-                        return r;
-        } else
-                device = sd_device_ref(d);
+        r = sd_event_new(&event);
+        if (r < 0)
+                return r;
 
-        for (unsigned i = 0; i < N_DEVICE_NODE_LIST_ATTEMPTS; i++) {
-                r = wait_for_partitions_to_appear(fd, device, num_partitions, flags, ret_enumerator);
-                if (r != -EAGAIN)
+        r = sd_device_monitor_new(&monitor);
+        if (r < 0)
+                return r;
+
+        r = sd_device_monitor_filter_add_match_subsystem_devtype(monitor, "block", "partition");
+        if (r < 0)
+                return r;
+
+        r = sd_device_monitor_attach_event(monitor, event);
+        if (r < 0)
+                return r;
+
+        _cleanup_(wait_data_done) struct wait_data w = {
+                .parent_device = parent,
+                .blkidp = pp,
+        };
+
+        r = sd_device_monitor_start(monitor, device_monitor_handler, &w);
+        if (r < 0)
+                return r;
+
+        /* Check again, the partition might have appeared in the meantime */
+        r = find_partition(parent, pp, ret);
+        if (r != -ENXIO)
+                return r;
+
+        if (deadline != USEC_INFINITY) {
+                r = sd_event_add_time(
+                                event, &timeout_source,
+                                CLOCK_MONOTONIC, deadline, 0,
+                                NULL, INT_TO_PTR(-ETIMEDOUT));
+                if (r < 0)
                         return r;
         }
 
-        return log_debug_errno(SYNTHETIC_ERRNO(ENXIO),
-                               "Kernel partitions dit not appear within %d attempts",
-                               N_DEVICE_NODE_LIST_ATTEMPTS);
+        r = sd_event_loop(event);
+        if (r < 0)
+                return r;
+
+        assert(w.found);
+        *ret = TAKE_PTR(w.found);
+        return 0;
 }
 
 static void check_partition_flags(
@@ -300,8 +372,90 @@ static void check_partition_flags(
         }
 }
 
+static int device_wait_for_initialization_harder(
+                sd_device *device,
+                const char *subsystem,
+                usec_t deadline,
+                sd_device **ret) {
+
+        _cleanup_free_ char *uevent = NULL;
+        usec_t start, left, retrigger_timeout;
+        int r;
+
+        start = now(CLOCK_MONOTONIC);
+        left = usec_sub_unsigned(deadline, start);
+
+        if (DEBUG_LOGGING) {
+                char buf[FORMAT_TIMESPAN_MAX];
+                const char *sn = NULL;
+
+                (void) sd_device_get_sysname(device, &sn);
+                log_debug("Waiting for device '%s' to initialize for %s.", strna(sn), format_timespan(buf, sizeof(buf), left, 0));
+        }
+
+        if (left != USEC_INFINITY)
+                retrigger_timeout = CLAMP(left / 4, 1 * USEC_PER_SEC, 5 * USEC_PER_SEC); /* A fourth of the total timeout, but let's clamp to 1s…5s range */
+        else
+                retrigger_timeout = 2 * USEC_PER_SEC;
+
+        for (;;) {
+                usec_t local_deadline, n;
+                bool last_try;
+
+                n = now(CLOCK_MONOTONIC);
+                assert(n >= start);
+
+                /* Find next deadline, when we'll retrigger */
+                local_deadline = start +
+                        DIV_ROUND_UP(n - start, retrigger_timeout) * retrigger_timeout;
+
+                if (deadline != USEC_INFINITY && deadline <= local_deadline) {
+                        local_deadline = deadline;
+                        last_try = true;
+                } else
+                        last_try = false;
+
+                r = device_wait_for_initialization(device, subsystem, local_deadline, ret);
+                if (r >= 0 && DEBUG_LOGGING) {
+                        char buf[FORMAT_TIMESPAN_MAX];
+                        const char *sn = NULL;
+
+                        (void) sd_device_get_sysname(device, &sn);
+                        log_debug("Successfully waited for device '%s' to initialize for %s.", strna(sn), format_timespan(buf, sizeof(buf), usec_sub_unsigned(now(CLOCK_MONOTONIC), start), 0));
+
+                }
+                if (r != -ETIMEDOUT || last_try)
+                        return r;
+
+                if (!uevent) {
+                        const char *syspath;
+
+                        r = sd_device_get_syspath(device, &syspath);
+                        if (r < 0)
+                                return r;
+
+                        uevent = path_join(syspath, "uevent");
+                        if (!uevent)
+                                return -ENOMEM;
+                }
+
+                if (DEBUG_LOGGING) {
+                        char buf[FORMAT_TIMESPAN_MAX];
+
+                        log_debug("Device didn't initialize within %s, assuming lost event. Retriggering device through %s.",
+                                  format_timespan(buf, sizeof(buf), usec_sub_unsigned(now(CLOCK_MONOTONIC), start), 0),
+                                  uevent);
+                }
+
+                r = write_string_file(uevent, "change", WRITE_STRING_FILE_DISABLE_BUFFER);
+                if (r < 0)
+                        return r;
+        }
+}
 #endif
 
+#define DEVICE_TIMEOUT_USEC (45 * USEC_PER_SEC)
+
 int dissect_image(
                 int fd,
                 const VeritySettings *verity,
@@ -310,20 +464,23 @@ int dissect_image(
                 DissectedImage **ret) {
 
 #if HAVE_BLKID
-        sd_id128_t root_uuid = SD_ID128_NULL, root_verity_uuid = SD_ID128_NULL,
-                usr_uuid = SD_ID128_NULL, usr_verity_uuid = SD_ID128_NULL;
-        _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+#ifdef GPT_ROOT_NATIVE
+        sd_id128_t root_uuid = SD_ID128_NULL, root_verity_uuid = SD_ID128_NULL;
+#endif
+#ifdef GPT_USR_NATIVE
+        sd_id128_t usr_uuid = SD_ID128_NULL, usr_verity_uuid = SD_ID128_NULL;
+#endif
         bool is_gpt, is_mbr, generic_rw, multiple_generic = false;
         _cleanup_(sd_device_unrefp) sd_device *d = NULL;
         _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
         _cleanup_(blkid_free_probep) blkid_probe b = NULL;
         _cleanup_free_ char *generic_node = NULL;
         sd_id128_t generic_uuid = SD_ID128_NULL;
-        const char *pttype = NULL;
+        const char *pttype = NULL, *sysname = NULL;
         blkid_partlist pl;
-        int r, generic_nr;
+        int r, generic_nr, n_partitions;
         struct stat st;
-        sd_device *q;
+        usec_t deadline;
 
         assert(fd >= 0);
         assert(ret);
@@ -355,13 +512,19 @@ int dissect_image(
 
                 /* If the verity data declares it's for the /usr partition, then search for that, in all
                  * other cases assume it's for the root partition. */
+#ifdef GPT_USR_NATIVE
                 if (verity->designator == PARTITION_USR) {
                         usr_uuid = fsuuid;
                         usr_verity_uuid = vuuid;
                 } else {
+#endif
+#ifdef GPT_ROOT_NATIVE
                         root_uuid = fsuuid;
                         root_verity_uuid = vuuid;
+#endif
+#ifdef GPT_USR_NATIVE
                 }
+#endif
         }
 
         if (fstat(fd, &st) < 0)
@@ -370,6 +533,27 @@ int dissect_image(
         if (!S_ISBLK(st.st_mode))
                 return -ENOTBLK;
 
+        r = sd_device_new_from_stat_rdev(&d, &st);
+        if (r < 0)
+                return r;
+
+        if (!FLAGS_SET(flags, DISSECT_IMAGE_NO_UDEV)) {
+                _cleanup_(sd_device_unrefp) sd_device *initialized = NULL;
+
+                /* If udev support is enabled, then let's wait for the device to be initialized before we doing anything. */
+
+                r = device_wait_for_initialization_harder(
+                                d,
+                                "block",
+                                usec_add(now(CLOCK_MONOTONIC), DEVICE_TIMEOUT_USEC),
+                                &initialized);
+                if (r < 0)
+                        return r;
+
+                sd_device_unref(d);
+                d = TAKE_PTR(initialized);
+        }
+
         b = blkid_new_probe();
         if (!b)
                 return -ENOMEM;
@@ -399,9 +583,33 @@ int dissect_image(
         if (!m)
                 return -ENOMEM;
 
-        r = sd_device_new_from_devnum(&d, 'b', st.st_rdev);
+        r = sd_device_get_sysname(d, &sysname);
         if (r < 0)
-                return r;
+                return log_debug_errno(r, "Failed to get device sysname: %m");
+        if (startswith(sysname, "loop")) {
+                _cleanup_free_ char *name_stripped = NULL;
+                const char *full_path;
+
+                r = sd_device_get_sysattr_value(d, "loop/backing_file", &full_path);
+                if (r < 0)
+                        log_debug_errno(r, "Failed to lookup image name via loop device backing file sysattr, ignoring: %m");
+                else {
+                        r = raw_strip_suffixes(basename(full_path), &name_stripped);
+                        if (r < 0)
+                                return r;
+                }
+
+                free_and_replace(m->image_name, name_stripped);
+        } else {
+                r = free_and_strdup(&m->image_name, sysname);
+                if (r < 0)
+                        return r;
+        }
+
+        if (!image_name_is_valid(m->image_name)) {
+                log_debug("Image name %s is not valid, ignoring", strempty(m->image_name));
+                m->image_name = mfree(m->image_name);
+        }
 
         if ((!(flags & DISSECT_IMAGE_GPT_ONLY) &&
             (flags & DISSECT_IMAGE_REQUIRE_ROOT)) ||
@@ -412,8 +620,8 @@ int dissect_image(
 
                 (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL);
                 if (STRPTR_IN_SET(usage, "filesystem", "crypto")) {
+                        const char *fstype = NULL, *options = NULL, *devname = NULL;
                         _cleanup_free_ char *t = NULL, *n = NULL, *o = NULL;
-                        const char *fstype = NULL, *options = NULL;
 
                         /* OK, we have found a file system, that's our root partition then. */
                         (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
@@ -424,10 +632,14 @@ int dissect_image(
                                         return -ENOMEM;
                         }
 
-                        r = device_path_make_major_minor(st.st_mode, st.st_rdev, &n);
+                        r = sd_device_get_devname(d, &devname);
                         if (r < 0)
                                 return r;
 
+                        n = strdup(devname);
+                        if (!n)
+                                return -ENOMEM;
+
                         m->single_file_system = true;
                         m->verity = verity && verity->root_hash && verity->data_path && (verity->designator < 0 || verity->designator == PARTITION_ROOT);
                         m->can_verity = verity && verity->data_path;
@@ -451,13 +663,7 @@ int dissect_image(
 
                         m->encrypted = streq_ptr(fstype, "crypto_LUKS");
 
-                        /* Even on a single partition we need to wait for udev to create the
-                         * /dev/block/X:Y symlink to /dev/loopZ */
-                        r = loop_wait_for_partitions_to_appear(fd, d, 0, flags, &e);
-                        if (r < 0)
-                                return r;
                         *ret = TAKE_PTR(m);
-
                         return 0;
                 }
         }
@@ -472,48 +678,51 @@ int dissect_image(
         if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr))
                 return -ENOPKG;
 
+        /* Safety check: refuse block devices that carry a partition table but for which the kernel doesn't
+         * do partition scanning. */
+        r = blockdev_partscan_enabled(fd);
+        if (r < 0)
+                return r;
+        if (r == 0)
+                return -EPROTONOSUPPORT;
+
         errno = 0;
         pl = blkid_probe_get_partitions(b);
         if (!pl)
                 return errno_or_else(ENOMEM);
 
-        r = loop_wait_for_partitions_to_appear(fd, d, blkid_partlist_numof_partitions(pl), flags, &e);
-        if (r < 0)
-                return r;
+        errno = 0;
+        n_partitions = blkid_partlist_numof_partitions(pl);
+        if (n_partitions < 0)
+                return errno_or_else(EIO);
 
-        FOREACH_DEVICE(e, q) {
+        deadline = usec_add(now(CLOCK_MONOTONIC), DEVICE_TIMEOUT_USEC);
+        for (int i = 0; i < n_partitions; i++) {
+                _cleanup_(sd_device_unrefp) sd_device *q = NULL;
                 unsigned long long pflags;
                 blkid_partition pp;
                 const char *node;
-                dev_t qn;
                 int nr;
 
-                r = sd_device_get_devnum(q, &qn);
-                if (r < 0)
-                        continue;
-
-                if (st.st_rdev == qn)
-                        continue;
-
-                if (!device_is_block(q))
-                        continue;
+                errno = 0;
+                pp = blkid_partlist_get_partition(pl, i);
+                if (!pp)
+                        return errno_or_else(EIO);
 
-                if (device_is_mmc_special_partition(q))
-                        continue;
+                r = wait_for_partition_device(d, pp, deadline, &q);
+                if (r < 0)
+                        return r;
 
                 r = sd_device_get_devname(q, &node);
                 if (r < 0)
-                        continue;
-
-                pp = blkid_partlist_devno_to_partition(pl, qn);
-                if (!pp)
-                        continue;
+                        return r;
 
                 pflags = blkid_partition_get_flags(pp);
 
+                errno = 0;
                 nr = blkid_partition_get_partno(pp);
                 if (nr < 0)
-                        continue;
+                        return errno_or_else(EIO);
 
                 if (is_gpt) {
                         PartitionDesignator designator = _PARTITION_DESIGNATOR_INVALID;
@@ -1020,9 +1229,11 @@ DissectedImage* dissected_image_unref(DissectedImage *m) {
                 free(m->partitions[i].mount_options);
         }
 
+        free(m->image_name);
         free(m->hostname);
         strv_free(m->machine_info);
         strv_free(m->os_release);
+        strv_free(m->extension_release);
 
         return mfree(m);
 }
@@ -1163,12 +1374,12 @@ static int mount_partition(
                 if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
                         return -ENOMEM;
 
-                if (!strextend_with_separator(&options, ",", uid_option, NULL))
+                if (!strextend_with_separator(&options, ",", uid_option))
                         return -ENOMEM;
         }
 
         if (!isempty(m->mount_options))
-                if (!strextend_with_separator(&options, ",", m->mount_options, NULL))
+                if (!strextend_with_separator(&options, ",", m->mount_options))
                         return -ENOMEM;
 
         if (FLAGS_SET(flags, DISSECT_IMAGE_MKDIR)) {
@@ -1193,7 +1404,7 @@ int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift,
         /* Returns:
          *
          *  -ENXIO        → No root partition found
-         *  -EMEDIUMTYPE  → DISSECT_IMAGE_VALIDATE_OS set but no os-release file found
+         *  -EMEDIUMTYPE  → DISSECT_IMAGE_VALIDATE_OS set but no os-release/extension-release file found
          *  -EUNATCH      → Encrypted partition found for which no dm-crypt was set up yet
          *  -EUCLEAN      → fsck for file system failed
          *  -EBUSY        → File system already mounted/used elsewhere (kernel)
@@ -1223,8 +1434,13 @@ int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift,
                         r = path_is_os_tree(where);
                         if (r < 0)
                                 return r;
-                        if (r == 0)
-                                return -EMEDIUMTYPE;
+                        if (r == 0) {
+                                r = path_is_extension_tree(where, m->image_name);
+                                if (r < 0)
+                                        return r;
+                                if (r == 0)
+                                        return -EMEDIUMTYPE;
+                        }
                 }
         }
 
@@ -1304,7 +1520,7 @@ int dissected_image_mount_and_warn(DissectedImage *m, const char *where, uid_t u
         if (r == -ENXIO)
                 return log_error_errno(r, "Not root file system found in image.");
         if (r == -EMEDIUMTYPE)
-                return log_error_errno(r, "No suitable os-release file in image found.");
+                return log_error_errno(r, "No suitable os-release/extension-release file in image found.");
         if (r == -EUNATCH)
                 return log_error_errno(r, "Encrypted file system discovered, but decryption not requested.");
         if (r == -EUCLEAN)
@@ -1335,13 +1551,12 @@ struct DecryptedImage {
 
 DecryptedImage* decrypted_image_unref(DecryptedImage* d) {
 #if HAVE_LIBCRYPTSETUP
-        size_t i;
         int r;
 
         if (!d)
                 return NULL;
 
-        for (i = 0; i < d->n_decrypted; i++) {
+        for (size_t i = 0; i < d->n_decrypted; i++) {
                 DecryptedPartition *p = d->decrypted + i;
 
                 if (p->device && p->name && !p->relinquished) {
@@ -1495,7 +1710,7 @@ static int verity_can_reuse(
 #if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY
         /* Ensure that, if signatures are supported, we only reuse the device if the previous mount used the
          * same settings, so that a previous unsigned mount will not be reused if the user asks to use
-         * signing for the new one, and viceversa. */
+         * signing for the new one, and vice versa. */
         if (!!verity->root_hash_sig != !!(crypt_params.flags & CRYPT_VERITY_ROOT_HASH_SIGNATURE))
                 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Error opening verity device, it already exists but signature settings are not the same.");
 #endif
@@ -1504,12 +1719,12 @@ static int verity_can_reuse(
         return 0;
 }
 
-static inline void dm_deferred_remove_clean(char *name) {
+static inline char* dm_deferred_remove_clean(char *name) {
         if (!name)
-                return;
+                return NULL;
 
         (void) sym_crypt_deactivate_by_name(NULL, name, CRYPT_DEACTIVATE_DEFERRED);
-        free(name);
+        return mfree(name);
 }
 DEFINE_TRIVIAL_CLEANUP_FUNC(char *, dm_deferred_remove_clean);
 
@@ -1643,7 +1858,7 @@ static int verity_partition(
                         if (r == 0) {
                                 /* devmapper might say that the device exists, but the devlink might not yet have been
                                  * created. Check and wait for the udev event in that case. */
-                                r = device_wait_for_devlink(node, "block", 100 * USEC_PER_MSEC, NULL);
+                                r = device_wait_for_devlink(node, "block", usec_add(now(CLOCK_MONOTONIC), 100 * USEC_PER_MSEC), NULL);
                                 /* Fallback to activation with a unique device if it's taking too long */
                                 if (r == -ETIMEDOUT)
                                         break;
@@ -1787,19 +2002,15 @@ int dissected_image_decrypt_interactively(
 }
 
 int decrypted_image_relinquish(DecryptedImage *d) {
-
-#if HAVE_LIBCRYPTSETUP
-        size_t i;
-        int r;
-#endif
-
         assert(d);
 
-        /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a boolean so
-         * that we don't clean it up ourselves either anymore */
+        /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a
+         * boolean so that we don't clean it up ourselves either anymore */
 
 #if HAVE_LIBCRYPTSETUP
-        for (i = 0; i < d->n_decrypted; i++) {
+        int r;
+
+        for (size_t i = 0; i < d->n_decrypted; i++) {
                 DecryptedPartition *p = d->decrypted + i;
 
                 if (p->relinquished)
@@ -1946,9 +2157,9 @@ int verity_settings_load(
                 }
         }
 
-        if (verity->root_hash && !verity->root_hash_sig) {
+        if ((root_hash || verity->root_hash) && !verity->root_hash_sig) {
                 if (root_hash_sig_path) {
-                        r = read_full_file_full(AT_FDCWD, root_hash_sig_path, 0, (char**) &root_hash_sig, &root_hash_sig_size);
+                        r = read_full_file(root_hash_sig_path, (char**) &root_hash_sig, &root_hash_sig_size);
                         if (r < 0 && r != -ENOENT)
                                 return r;
 
@@ -1964,7 +2175,7 @@ int verity_settings_load(
                                 if (!p)
                                         return -ENOMEM;
 
-                                r = read_full_file_full(AT_FDCWD, p, 0, (char**) &root_hash_sig, &root_hash_sig_size);
+                                r = read_full_file(p, (char**) &root_hash_sig, &root_hash_sig_size);
                                 if (r < 0 && r != -ENOENT)
                                         return r;
                                 if (r >= 0)
@@ -1978,7 +2189,7 @@ int verity_settings_load(
                                 if (!p)
                                         return -ENOMEM;
 
-                                r = read_full_file_full(AT_FDCWD, p, 0, (char**) &root_hash_sig, &root_hash_sig_size);
+                                r = read_full_file(p, (char**) &root_hash_sig, &root_hash_sig_size);
                                 if (r < 0 && r != -ENOENT)
                                         return r;
                                 if (r >= 0)
@@ -2030,24 +2241,26 @@ int dissected_image_acquire_metadata(DissectedImage *m) {
                 META_MACHINE_ID,
                 META_MACHINE_INFO,
                 META_OS_RELEASE,
+                META_EXTENSION_RELEASE,
                 _META_MAX,
         };
 
-        static const char *const paths[_META_MAX] = {
-                [META_HOSTNAME]     = "/etc/hostname\0",
-                [META_MACHINE_ID]   = "/etc/machine-id\0",
-                [META_MACHINE_INFO] = "/etc/machine-info\0",
-                [META_OS_RELEASE]   = "/etc/os-release\0"
-                                      "/usr/lib/os-release\0",
+        static const char *paths[_META_MAX] = {
+                [META_HOSTNAME]          = "/etc/hostname\0",
+                [META_MACHINE_ID]        = "/etc/machine-id\0",
+                [META_MACHINE_INFO]      = "/etc/machine-info\0",
+                [META_OS_RELEASE]        = ("/etc/os-release\0"
+                                           "/usr/lib/os-release\0"),
+                [META_EXTENSION_RELEASE] = NULL,
         };
 
-        _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
+        _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL, **extension_release = NULL;
         _cleanup_close_pair_ int error_pipe[2] = { -1, -1 };
         _cleanup_(rmdir_and_freep) char *t = NULL;
         _cleanup_(sigkill_waitp) pid_t child = 0;
         sd_id128_t machine_id = SD_ID128_NULL;
         _cleanup_free_ char *hostname = NULL;
-        unsigned n_meta_initialized = 0, k;
+        unsigned n_meta_initialized = 0;
         int fds[2 * _META_MAX], r, v;
         ssize_t n;
 
@@ -2055,11 +2268,24 @@ int dissected_image_acquire_metadata(DissectedImage *m) {
 
         assert(m);
 
-        for (; n_meta_initialized < _META_MAX; n_meta_initialized ++)
+        /* As per the os-release spec, if the image is an extension it will have a file
+         * named after the image name in extension-release.d/ */
+        if (m->image_name)
+                paths[META_EXTENSION_RELEASE] = strjoina("/usr/lib/extension-release.d/extension-release.", m->image_name);
+        else
+                log_debug("No image name available, will skip extension-release metadata");
+
+        for (; n_meta_initialized < _META_MAX; n_meta_initialized ++) {
+                if (!paths[n_meta_initialized]) {
+                        fds[2*n_meta_initialized] = fds[2*n_meta_initialized+1] = -1;
+                        continue;
+                }
+
                 if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) {
                         r = -errno;
                         goto finish;
                 }
+        }
 
         r = mkdtemp_malloc("/tmp/dissect-XXXXXX", &t);
         if (r < 0)
@@ -2085,10 +2311,13 @@ int dissected_image_acquire_metadata(DissectedImage *m) {
                         _exit(EXIT_FAILURE);
                 }
 
-                for (k = 0; k < _META_MAX; k++) {
+                for (unsigned k = 0; k < _META_MAX; k++) {
                         _cleanup_close_ int fd = -ENOENT;
                         const char *p;
 
+                        if (!paths[k])
+                                continue;
+
                         fds[2*k] = safe_close(fds[2*k]);
 
                         NULSTR_FOREACH(p, paths[k]) {
@@ -2102,7 +2331,7 @@ int dissected_image_acquire_metadata(DissectedImage *m) {
                                 continue;
                         }
 
-                        r = copy_bytes(fd, fds[2*k+1], (uint64_t) -1, 0);
+                        r = copy_bytes(fd, fds[2*k+1], UINT64_MAX, 0);
                         if (r < 0) {
                                 (void) write(error_pipe[1], &r, sizeof(r));
                                 _exit(EXIT_FAILURE);
@@ -2116,9 +2345,12 @@ int dissected_image_acquire_metadata(DissectedImage *m) {
 
         error_pipe[1] = safe_close(error_pipe[1]);
 
-        for (k = 0; k < _META_MAX; k++) {
+        for (unsigned k = 0; k < _META_MAX; k++) {
                 _cleanup_fclose_ FILE *f = NULL;
 
+                if (!paths[k])
+                        continue;
+
                 fds[2*k+1] = safe_close(fds[2*k+1]);
 
                 f = take_fdopen(&fds[2*k], "r");
@@ -2148,6 +2380,8 @@ int dissected_image_acquire_metadata(DissectedImage *m) {
                                         log_debug_errno(r, "Image contains invalid /etc/machine-id: %s", line);
                         } else if (r == 0)
                                 log_debug("/etc/machine-id file is empty.");
+                        else if (streq(line, "uninitialized"))
+                                log_debug("/etc/machine-id file is uninitialized (likely aborted first boot).");
                         else
                                 log_debug("/etc/machine-id has unexpected length %i.", r);
 
@@ -2167,6 +2401,13 @@ int dissected_image_acquire_metadata(DissectedImage *m) {
                                 log_debug_errno(r, "Failed to read OS release file: %m");
 
                         break;
+
+                case META_EXTENSION_RELEASE:
+                        r = load_env_file_pairs(f, "extension-release", &extension_release);
+                        if (r < 0)
+                                log_debug_errno(r, "Failed to read extension release file: %m");
+
+                        break;
                 }
         }
 
@@ -2190,9 +2431,10 @@ int dissected_image_acquire_metadata(DissectedImage *m) {
         m->machine_id = machine_id;
         strv_free_and_replace(m->machine_info, machine_info);
         strv_free_and_replace(m->os_release, os_release);
+        strv_free_and_replace(m->extension_release, extension_release);
 
 finish:
-        for (k = 0; k < n_meta_initialized; k++)
+        for (unsigned k = 0; k < n_meta_initialized; k++)
                 safe_close_pair(fds + 2*k);
 
         return r;
@@ -2375,4 +2617,108 @@ static const char *const partition_designator_table[] = {
         [PARTITION_VAR] = "var",
 };
 
+int verity_dissect_and_mount(
+                const char *src,
+                const char *dest,
+                const MountOptions *options,
+                const char *required_host_os_release_id,
+                const char *required_host_os_release_version_id,
+                const char *required_host_os_release_sysext_level) {
+
+        _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
+        _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL;
+        _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
+        _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
+        DissectImageFlags dissect_image_flags;
+        int r;
+
+        assert(src);
+        assert(dest);
+
+        r = verity_settings_load(&verity, src, NULL, NULL);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to load root hash: %m");
+
+        dissect_image_flags = verity.data_path ? DISSECT_IMAGE_NO_PARTITION_TABLE : 0;
+
+        r = loop_device_make_by_path(
+                        src,
+                        -1,
+                        verity.data_path ? 0 : LO_FLAGS_PARTSCAN,
+                        &loop_device);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to create loop device for image: %m");
+
+        r = dissect_image(
+                        loop_device->fd,
+                        &verity,
+                        options,
+                        dissect_image_flags,
+                        &dissected_image);
+        /* No partition table? Might be a single-filesystem image, try again */
+        if (!verity.data_path && r == -ENOPKG)
+                 r = dissect_image(
+                                loop_device->fd,
+                                &verity,
+                                options,
+                                dissect_image_flags|DISSECT_IMAGE_NO_PARTITION_TABLE,
+                                &dissected_image);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to dissect image: %m");
+
+        r = dissected_image_decrypt(
+                        dissected_image,
+                        NULL,
+                        &verity,
+                        dissect_image_flags,
+                        &decrypted_image);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to decrypt dissected image: %m");
+
+        r = mkdir_p_label(dest, 0755);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to create destination directory %s: %m", dest);
+        r = umount_recursive(dest, 0);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to umount under destination directory %s: %m", dest);
+
+        r = dissected_image_mount(dissected_image, dest, UID_INVALID, dissect_image_flags);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to mount image: %m");
+
+        /* If we got os-release values from the caller, then we need to match them with the image's
+         * extension-release.d/ content. Return -EINVAL if there's any mismatch.
+         * First, check the distro ID. If that matches, then check the new SYSEXT_LEVEL value if
+         * available, or else fallback to VERSION_ID. */
+        if (required_host_os_release_id &&
+            (required_host_os_release_version_id || required_host_os_release_sysext_level)) {
+                _cleanup_strv_free_ char **extension_release = NULL;
+
+                r = load_extension_release_pairs(dest, dissected_image->image_name, &extension_release);
+                if (r < 0)
+                        return log_debug_errno(r, "Failed to parse image %s extension-release metadata: %m", dissected_image->image_name);
+
+                r = extension_release_validate(
+                        dissected_image->image_name,
+                        required_host_os_release_id,
+                        required_host_os_release_version_id,
+                        required_host_os_release_sysext_level,
+                        extension_release);
+                if (r == 0)
+                        return log_debug_errno(SYNTHETIC_ERRNO(ESTALE), "Image %s extension-release metadata does not match the root's", dissected_image->image_name);
+                if (r < 0)
+                        return log_debug_errno(r, "Failed to compare image %s extension-release metadata with the root's os-release: %m", dissected_image->image_name);
+        }
+
+        if (decrypted_image) {
+                r = decrypted_image_relinquish(decrypted_image);
+                if (r < 0)
+                        return log_debug_errno(r, "Failed to relinquish decrypted image: %m");
+        }
+
+        loop_device_relinquish(loop_device);
+
+        return 0;
+}
+
 DEFINE_STRING_TABLE_LOOKUP(partition_designator, PartitionDesignator);