]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/shared/dissect-image.c
Merge pull request #12750 from keszybz/tmpfiles-c-copy
[thirdparty/systemd.git] / src / shared / dissect-image.c
index 4e572ac861c5a18c24b633f593ee46888b9b16b9..030e12e4ee2e688aa07167838348b26973491e32 100644 (file)
@@ -1,5 +1,7 @@
 /* SPDX-License-Identifier: LGPL-2.1+ */
 
+#include <linux/dm-ioctl.h>
+#include <linux/loop.h>
 #include <sys/mount.h>
 #include <sys/prctl.h>
 #include <sys/wait.h>
@@ -17,6 +19,7 @@
 #include "device-nodes.h"
 #include "device-util.h"
 #include "dissect-image.h"
+#include "dm-util.h"
 #include "env-file.h"
 #include "fd-util.h"
 #include "fileio.h"
 #include "hexdecoct.h"
 #include "hostname-util.h"
 #include "id128-util.h"
-#include "linux-3.13/dm-ioctl.h"
 #include "missing.h"
 #include "mount-util.h"
 #include "mountpoint-util.h"
+#include "nulstr-util.h"
 #include "os-util.h"
 #include "path-util.h"
 #include "process-util.h"
 #include "string-util.h"
 #include "strv.h"
 #include "tmpfile-util.h"
+#include "udev-util.h"
 #include "user-util.h"
 #include "xattr-util.h"
 
 int probe_filesystem(const char *node, char **ret_fstype) {
         /* Try to find device content type and return it in *ret_fstype. If nothing is found,
-         * 0/NULL will be returned. -EUCLEAN will be returned for ambigous results, and an
+         * 0/NULL will be returned. -EUCLEAN will be returned for ambiguous results, and an
          * different error otherwise. */
 
 #if HAVE_BLKID
@@ -101,6 +105,8 @@ not_found:
 static bool device_is_mmc_special_partition(sd_device *d) {
         const char *sysname;
 
+        assert(d);
+
         if (sd_device_get_sysname(d, &sysname) < 0)
                 return false;
 
@@ -111,11 +117,158 @@ static bool device_is_mmc_special_partition(sd_device *d) {
 static bool device_is_block(sd_device *d) {
         const char *ss;
 
+        assert(d);
+
         if (sd_device_get_subsystem(d, &ss) < 0)
                 return false;
 
         return streq(ss, "block");
 }
+
+static int enumerator_for_parent(sd_device *d, sd_device_enumerator **ret) {
+        _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+        int r;
+
+        assert(d);
+        assert(ret);
+
+        r = sd_device_enumerator_new(&e);
+        if (r < 0)
+                return r;
+
+        r = sd_device_enumerator_allow_uninitialized(e);
+        if (r < 0)
+                return r;
+
+        r = sd_device_enumerator_add_match_parent(e, d);
+        if (r < 0)
+                return r;
+
+        *ret = TAKE_PTR(e);
+        return 0;
+}
+
+/* how many times to wait for the device nodes to appear */
+#define N_DEVICE_NODE_LIST_ATTEMPTS 10
+
+static int wait_for_partitions_to_appear(
+                int fd,
+                sd_device *d,
+                unsigned num_partitions,
+                DissectImageFlags flags,
+                sd_device_enumerator **ret_enumerator) {
+
+        _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+        sd_device *q;
+        unsigned n;
+        int r;
+
+        assert(fd >= 0);
+        assert(d);
+        assert(ret_enumerator);
+
+        r = enumerator_for_parent(d, &e);
+        if (r < 0)
+                return r;
+
+        /* Count the partitions enumerated by the kernel */
+        n = 0;
+        FOREACH_DEVICE(e, q) {
+                if (sd_device_get_devnum(q, NULL) < 0)
+                        continue;
+                if (!device_is_block(q))
+                        continue;
+                if (device_is_mmc_special_partition(q))
+                        continue;
+
+                if (!FLAGS_SET(flags, DISSECT_IMAGE_NO_UDEV)) {
+                        r = device_wait_for_initialization(q, "block", USEC_INFINITY, NULL);
+                        if (r < 0)
+                                return r;
+                }
+
+                n++;
+        }
+
+        if (n == num_partitions + 1) {
+                *ret_enumerator = TAKE_PTR(e);
+                return 0; /* success! */
+        }
+        if (n > num_partitions + 1)
+                return log_debug_errno(SYNTHETIC_ERRNO(EIO),
+                                       "blkid and kernel partition lists do not match.");
+
+        /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running or it
+         * got EBUSY because udev already opened the device. Let's reprobe the device, which is a synchronous
+         * call that waits until probing is complete. */
+
+        for (unsigned j = 0; ; j++) {
+                if (j++ > 20)
+                        return -EBUSY;
+
+                if (ioctl(fd, BLKRRPART, 0) >= 0)
+                        break;
+                r = -errno;
+                if (r == -EINVAL) {
+                        struct loop_info64 info;
+
+                        /* If we are running on a loop device that has partition scanning off, return
+                         * an explicit recognizable error about this, so that callers can generate a
+                         * proper message explaining the situation. */
+
+                        if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0 && (info.lo_flags & LO_FLAGS_PARTSCAN) == 0) {
+                                log_debug("Device is a loop device and partition scanning is off!");
+                                return -EPROTONOSUPPORT;
+                        }
+                }
+                if (r != -EBUSY)
+                        return r;
+
+                /* If something else has the device open, such as an udev rule, the ioctl will return
+                 * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a bit,
+                 * and try again.
+                 *
+                 * This is really something they should fix in the kernel! */
+                (void) usleep(50 * USEC_PER_MSEC);
+
+        }
+
+        return -EAGAIN; /* no success yet, try again */
+}
+
+static int loop_wait_for_partitions_to_appear(
+                int fd,
+                sd_device *d,
+                unsigned num_partitions,
+                DissectImageFlags flags,
+                sd_device_enumerator **ret_enumerator) {
+        _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+        int r;
+
+        assert(fd >= 0);
+        assert(d);
+        assert(ret_enumerator);
+
+        log_debug("Waiting for device (parent + %d partitions) to appear...", num_partitions);
+
+        if (!FLAGS_SET(flags, DISSECT_IMAGE_NO_UDEV)) {
+                r = device_wait_for_initialization(d, "block", USEC_INFINITY, &device);
+                if (r < 0)
+                        return r;
+        } else
+                device = sd_device_ref(d);
+
+        for (unsigned i = 0; i < N_DEVICE_NODE_LIST_ATTEMPTS; i++) {
+                r = wait_for_partitions_to_appear(fd, device, num_partitions, flags, ret_enumerator);
+                if (r != -EAGAIN)
+                        return r;
+        }
+
+        return log_debug_errno(SYNTHETIC_ERRNO(ENXIO),
+                               "Kernel partitions dit not appear within %d attempts",
+                               N_DEVICE_NODE_LIST_ATTEMPTS);
+}
+
 #endif
 
 int dissect_image(
@@ -193,10 +346,8 @@ int dissect_image(
 
         errno = 0;
         r = blkid_do_safeprobe(b);
-        if (IN_SET(r, -2, 1)) {
-                log_debug("Failed to identify any partition table.");
-                return -ENOPKG;
-        }
+        if (IN_SET(r, -2, 1))
+                return log_debug_errno(SYNTHETIC_ERRNO(ENOPKG), "Failed to identify any partition table.");
         if (r != 0)
                 return -errno ?: -EIO;
 
@@ -204,6 +355,10 @@ int dissect_image(
         if (!m)
                 return -ENOMEM;
 
+        r = sd_device_new_from_devnum(&d, 'b', st.st_rdev);
+        if (r < 0)
+                return r;
+
         if (!(flags & DISSECT_IMAGE_GPT_ONLY) &&
             (flags & DISSECT_IMAGE_REQUIRE_ROOT)) {
                 const char *usage = NULL;
@@ -237,6 +392,10 @@ int dissect_image(
 
                         m->encrypted = streq_ptr(fstype, "crypto_LUKS");
 
+                        r = loop_wait_for_partitions_to_appear(fd, d, 0, flags, &e);
+                        if (r < 0)
+                                return r;
+
                         *ret = TAKE_PTR(m);
 
                         return 0;
@@ -258,96 +417,10 @@ int dissect_image(
         if (!pl)
                 return -errno ?: -ENOMEM;
 
-        r = sd_device_new_from_devnum(&d, 'b', st.st_rdev);
+        r = loop_wait_for_partitions_to_appear(fd, d, blkid_partlist_numof_partitions(pl), flags, &e);
         if (r < 0)
                 return r;
 
-        for (i = 0;; i++) {
-                int n, z;
-
-                if (i >= 10) {
-                        log_debug("Kernel partitions never appeared.");
-                        return -ENXIO;
-                }
-
-                r = sd_device_enumerator_new(&e);
-                if (r < 0)
-                        return r;
-
-                r = sd_device_enumerator_allow_uninitialized(e);
-                if (r < 0)
-                        return r;
-
-                r = sd_device_enumerator_add_match_parent(e, d);
-                if (r < 0)
-                        return r;
-
-                /* Count the partitions enumerated by the kernel */
-                n = 0;
-                FOREACH_DEVICE(e, q) {
-                        if (sd_device_get_devnum(q, NULL) < 0)
-                                continue;
-
-                        if (!device_is_block(q))
-                                continue;
-
-                        if (device_is_mmc_special_partition(q))
-                                continue;
-                        n++;
-                }
-
-                /* Count the partitions enumerated by blkid */
-                z = blkid_partlist_numof_partitions(pl);
-                if (n == z + 1)
-                        break;
-                if (n > z + 1) {
-                        log_debug("blkid and kernel partition list do not match.");
-                        return -EIO;
-                }
-                if (n < z + 1) {
-                        unsigned j = 0;
-
-                        /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running
-                         * or it got EBUSY because udev already opened the device. Let's reprobe the device, which is a
-                         * synchronous call that waits until probing is complete. */
-
-                        for (;;) {
-                                if (j++ > 20)
-                                        return -EBUSY;
-
-                                if (ioctl(fd, BLKRRPART, 0) < 0) {
-                                        r = -errno;
-
-                                        if (r == -EINVAL) {
-                                                struct loop_info64 info;
-
-                                                /* If we are running on a loop device that has partition scanning off,
-                                                 * return an explicit recognizable error about this, so that callers
-                                                 * can generate a proper message explaining the situation. */
-
-                                                if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0 && (info.lo_flags & LO_FLAGS_PARTSCAN) == 0) {
-                                                        log_debug("Device is loop device and partition scanning is off!");
-                                                        return -EPROTONOSUPPORT;
-                                                }
-                                        }
-                                        if (r != -EBUSY)
-                                                return r;
-                                } else
-                                        break;
-
-                                /* If something else has the device open, such as an udev rule, the ioctl will return
-                                 * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a
-                                 * bit, and try again.
-                                 *
-                                 * This is really something they should fix in the kernel! */
-
-                                (void) usleep(50 * USEC_PER_MSEC);
-                        }
-                }
-
-                e = sd_device_enumerator_unref(e);
-        }
-
         FOREACH_DEVICE(e, q) {
                 unsigned long long pflags;
                 blkid_partition pp;
@@ -425,6 +498,14 @@ int dissect_image(
 
                                 designator = PARTITION_ESP;
                                 fstype = "vfat";
+
+                        } else if (sd_id128_equal(type_id, GPT_XBOOTLDR)) {
+
+                                if (pflags & GPT_FLAG_NO_AUTO)
+                                        continue;
+
+                                designator = PARTITION_XBOOTLDR;
+                                rw = !(pflags & GPT_FLAG_READ_ONLY);
                         }
 #ifdef GPT_ROOT_NATIVE
                         else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
@@ -540,21 +621,53 @@ int dissect_image(
 
                 } else if (is_mbr) {
 
-                        if (pflags != 0x80) /* Bootable flag */
-                                continue;
+                        switch (blkid_partition_get_type(pp)) {
 
-                        if (blkid_partition_get_type(pp) != 0x83) /* Linux partition */
-                                continue;
+                        case 0x83: /* Linux partition */
 
-                        if (generic_node)
-                                multiple_generic = true;
-                        else {
-                                generic_nr = nr;
-                                generic_rw = true;
-                                generic_node = strdup(node);
-                                if (!generic_node)
+                                if (pflags != 0x80) /* Bootable flag */
+                                        continue;
+
+                                if (generic_node)
+                                        multiple_generic = true;
+                                else {
+                                        generic_nr = nr;
+                                        generic_rw = true;
+                                        generic_node = strdup(node);
+                                        if (!generic_node)
+                                                return -ENOMEM;
+                                }
+
+                                break;
+
+                        case 0xEA: { /* Boot Loader Spec extended $BOOT partition */
+                                _cleanup_free_ char *n = NULL;
+                                sd_id128_t id = SD_ID128_NULL;
+                                const char *sid;
+
+                                /* First one wins */
+                                if (m->partitions[PARTITION_XBOOTLDR].found)
+                                        continue;
+
+                                sid = blkid_partition_get_uuid(pp);
+                                if (sid)
+                                        (void) sd_id128_from_string(sid, &id);
+
+                                n = strdup(node);
+                                if (!n)
                                         return -ENOMEM;
-                        }
+
+                                m->partitions[PARTITION_XBOOTLDR] = (DissectedPartition) {
+                                        .found = true,
+                                        .partno = nr,
+                                        .rw = true,
+                                        .architecture = _ARCHITECTURE_INVALID,
+                                        .node = TAKE_PTR(n),
+                                        .uuid = id,
+                                };
+
+                                break;
+                        }}
                 }
         }
 
@@ -747,11 +860,15 @@ static int mount_partition(
                         return -ENOMEM;
         }
 
-        return mount_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
+        r = mount_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
+        if (r < 0)
+                return r;
+
+        return 1;
 }
 
 int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) {
-        int r;
+        int r, boot_mounted;
 
         assert(m);
         assert(where);
@@ -784,21 +901,26 @@ int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift,
         if (r < 0)
                 return r;
 
+        boot_mounted = mount_partition(m->partitions + PARTITION_XBOOTLDR, where, "/boot", uid_shift, flags);
+        if (boot_mounted < 0)
+                return boot_mounted;
+
         if (m->partitions[PARTITION_ESP].found) {
-                const char *mp;
+                /* Mount the ESP to /efi if it exists. If it doesn't exist, use /boot instead, but only if it
+                 * exists and is empty, and we didn't already mount the XBOOTLDR partition into it. */
 
-                /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */
+                r = chase_symlinks("/efi", where, CHASE_PREFIX_ROOT, NULL);
+                if (r >= 0) {
+                        r = mount_partition(m->partitions + PARTITION_ESP, where, "/efi", uid_shift, flags);
+                        if (r < 0)
+                                return r;
 
-                FOREACH_STRING(mp, "/efi", "/boot") {
+                } else if (boot_mounted <= 0) {
                         _cleanup_free_ char *p = NULL;
 
-                        r = chase_symlinks(mp, where, CHASE_PREFIX_ROOT, &p);
-                        if (r < 0)
-                                continue;
-
-                        r = dir_is_empty(p);
-                        if (r > 0) {
-                                r = mount_partition(m->partitions + PARTITION_ESP, where, mp, uid_shift, flags);
+                        r = chase_symlinks("/boot", where, CHASE_PREFIX_ROOT, &p);
+                        if (r >= 0 && dir_is_empty(p) > 0) {
+                                r = mount_partition(m->partitions + PARTITION_ESP, where, "/boot", uid_shift, flags);
                                 if (r < 0)
                                         return r;
                         }
@@ -873,7 +995,7 @@ static int make_dm_name_and_node(const void *original_node, const char *suffix,
         if (!filename_is_valid(name))
                 return -EINVAL;
 
-        node = strjoin(crypt_get_dir(), "/", name);
+        node = path_join(crypt_get_dir(), name);
         if (!node)
                 return -ENOMEM;
 
@@ -1104,38 +1226,6 @@ int dissected_image_decrypt_interactively(
         }
 }
 
-#if HAVE_LIBCRYPTSETUP
-static int deferred_remove(DecryptedPartition *p) {
-
-        struct dm_ioctl dm = {
-                .version = {
-                        DM_VERSION_MAJOR,
-                        DM_VERSION_MINOR,
-                        DM_VERSION_PATCHLEVEL
-                },
-                .data_size = sizeof(dm),
-                .flags = DM_DEFERRED_REMOVE,
-        };
-
-        _cleanup_close_ int fd = -1;
-
-        assert(p);
-
-        /* Unfortunately, libcryptsetup doesn't provide a proper API for this, hence call the ioctl() directly. */
-
-        fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC);
-        if (fd < 0)
-                return -errno;
-
-        strncpy(dm.name, p->name, sizeof(dm.name));
-
-        if (ioctl(fd, DM_DEV_REMOVE, &dm))
-                return -errno;
-
-        return 0;
-}
-#endif
-
 int decrypted_image_relinquish(DecryptedImage *d) {
 
 #if HAVE_LIBCRYPTSETUP
@@ -1155,7 +1245,7 @@ int decrypted_image_relinquish(DecryptedImage *d) {
                 if (p->relinquished)
                         continue;
 
-                r = deferred_remove(p);
+                r = dm_deferred_remove(p->name);
                 if (r < 0)
                         return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name);
 
@@ -1300,7 +1390,7 @@ int dissected_image_acquire_metadata(DissectedImage *m) {
 
                 fds[2*k+1] = safe_close(fds[2*k+1]);
 
-                f = fdopen(fds[2*k], "re");
+                f = fdopen(fds[2*k], "r");
                 if (!f) {
                         r = -errno;
                         goto finish;
@@ -1425,6 +1515,7 @@ static const char *const partition_designator_table[] = {
         [PARTITION_HOME] = "home",
         [PARTITION_SRV] = "srv",
         [PARTITION_ESP] = "esp",
+        [PARTITION_XBOOTLDR] = "xbootldr",
         [PARTITION_SWAP] = "swap",
         [PARTITION_ROOT_VERITY] = "root-verity",
         [PARTITION_ROOT_SECONDARY_VERITY] = "root-secondary-verity",