]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/shared/dissect-image.c
dissect-image: wait for the main device and all partitions to be known by udev
[thirdparty/systemd.git] / src / shared / dissect-image.c
index 11c792f4a88d9cdecae6211c67d9f2af231ee216..4c2e41c8b8ee16b57d7cab7807b981d6f4273e56 100644 (file)
@@ -1,27 +1,10 @@
 /* SPDX-License-Identifier: LGPL-2.1+ */
-/***
-  This file is part of systemd.
-
-  Copyright 2016 Lennart Poettering
-
-  systemd is free software; you can redistribute it and/or modify it
-  under the terms of the GNU Lesser General Public License as published by
-  the Free Software Foundation; either version 2.1 of the License, or
-  (at your option) any later version.
-
-  systemd is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-  Lesser General Public License for more details.
-
-  You should have received a copy of the GNU Lesser General Public License
-  along with systemd; If not, see <http://www.gnu.org/licenses/>.
-***/
 
 #include <sys/mount.h>
 #include <sys/prctl.h>
 #include <sys/wait.h>
 
+#include "sd-device.h"
 #include "sd-id128.h"
 
 #include "architecture.h"
@@ -32,7 +15,9 @@
 #include "crypt-util.h"
 #include "def.h"
 #include "device-nodes.h"
+#include "device-util.h"
 #include "dissect-image.h"
+#include "env-file.h"
 #include "fd-util.h"
 #include "fileio.h"
 #include "fs-util.h"
@@ -43,6 +28,8 @@
 #include "linux-3.13/dm-ioctl.h"
 #include "missing.h"
 #include "mount-util.h"
+#include "mountpoint-util.h"
+#include "os-util.h"
 #include "path-util.h"
 #include "process-util.h"
 #include "raw-clone.h"
@@ -52,6 +39,7 @@
 #include "string-table.h"
 #include "string-util.h"
 #include "strv.h"
+#include "tmpfile-util.h"
 #include "udev-util.h"
 #include "user-util.h"
 #include "xattr-util.h"
@@ -62,7 +50,7 @@ int probe_filesystem(const char *node, char **ret_fstype) {
          * different error otherwise. */
 
 #if HAVE_BLKID
-        _cleanup_blkid_free_probe_ blkid_probe b = NULL;
+        _cleanup_(blkid_free_probep) blkid_probe b = NULL;
         const char *fstype;
         int r;
 
@@ -108,6 +96,156 @@ not_found:
 #endif
 }
 
+#if HAVE_BLKID
+/* Detect RPMB and Boot partitions, which are not listed by blkid.
+ * See https://github.com/systemd/systemd/issues/5806. */
+static bool device_is_mmc_special_partition(sd_device *d) {
+        const char *sysname;
+
+        if (sd_device_get_sysname(d, &sysname) < 0)
+                return false;
+
+        return startswith(sysname, "mmcblk") &&
+                (endswith(sysname, "rpmb") || endswith(sysname, "boot0") || endswith(sysname, "boot1"));
+}
+
+static bool device_is_block(sd_device *d) {
+        const char *ss;
+
+        if (sd_device_get_subsystem(d, &ss) < 0)
+                return false;
+
+        return streq(ss, "block");
+}
+
+static int enumerator_for_parent(sd_device *d, sd_device_enumerator **ret) {
+        _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+        int r;
+
+        r = sd_device_enumerator_new(&e);
+        if (r < 0)
+                return r;
+
+        r = sd_device_enumerator_allow_uninitialized(e);
+        if (r < 0)
+                return r;
+
+        r = sd_device_enumerator_add_match_parent(e, d);
+        if (r < 0)
+                return r;
+
+        *ret = TAKE_PTR(e);
+        return 0;
+}
+
+/* how many times to wait for the device nodes to appear */
+#define N_DEVICE_NODE_LIST_ATTEMPTS 10
+
+static int wait_for_partitions_to_appear(
+                int fd,
+                sd_device *d,
+                unsigned num_partitions,
+                sd_device_enumerator **ret_enumerator) {
+
+        _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+        sd_device *q;
+        unsigned n;
+        int r;
+
+        r = enumerator_for_parent(d, &e);
+        if (r < 0)
+                return r;
+
+        /* Count the partitions enumerated by the kernel */
+        n = 0;
+        FOREACH_DEVICE(e, q) {
+                if (sd_device_get_devnum(q, NULL) < 0)
+                        continue;
+                if (!device_is_block(q))
+                        continue;
+                if (device_is_mmc_special_partition(q))
+                        continue;
+
+                r = device_wait_for_initialization(q, "block", NULL);
+                if (r < 0)
+                        return r;
+
+                n++;
+        }
+
+        if (n == num_partitions + 1) {
+                *ret_enumerator = TAKE_PTR(e);
+                return 0; /* success! */
+        }
+        if (n > num_partitions + 1)
+                return log_debug_errno(SYNTHETIC_ERRNO(EIO),
+                                       "blkid and kernel partition lists do not match.");
+
+        /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running or it
+         * got EBUSY because udev already opened the device. Let's reprobe the device, which is a synchronous
+         * call that waits until probing is complete. */
+
+        for (unsigned j = 0; ; j++) {
+                if (j++ > 20)
+                        return -EBUSY;
+
+                if (ioctl(fd, BLKRRPART, 0) >= 0)
+                        break;
+                r = -errno;
+                if (r == -EINVAL) {
+                        struct loop_info64 info;
+
+                        /* If we are running on a loop device that has partition scanning off, return
+                         * an explicit recognizable error about this, so that callers can generate a
+                         * proper message explaining the situation. */
+
+                        if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0 && (info.lo_flags & LO_FLAGS_PARTSCAN) == 0) {
+                                log_debug("Device is a loop device and partition scanning is off!");
+                                return -EPROTONOSUPPORT;
+                        }
+                }
+                if (r != -EBUSY)
+                        return r;
+
+                /* If something else has the device open, such as an udev rule, the ioctl will return
+                 * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a bit,
+                 * and try again.
+                 *
+                 * This is really something they should fix in the kernel! */
+                (void) usleep(50 * USEC_PER_MSEC);
+
+        }
+
+        return -EAGAIN; /* no success yet, try again */
+}
+
+static int loop_wait_for_partitions_to_appear(
+                int fd,
+                sd_device *d,
+                unsigned num_partitions,
+                sd_device_enumerator **ret_enumerator) {
+        _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+        int r;
+
+        log_debug("Waiting for device (parent + %d partitions) to appear...", num_partitions);
+
+        r = device_wait_for_initialization(d, "block", &device);
+        if (r < 0)
+                return r;
+
+        for (unsigned i = 0; i < N_DEVICE_NODE_LIST_ATTEMPTS; i++) {
+                r = wait_for_partitions_to_appear(fd, device, num_partitions, ret_enumerator);
+                if (r != -EAGAIN)
+                        return r;
+        }
+
+        return log_debug_errno(SYNTHETIC_ERRNO(ENXIO),
+                               "Kernel partitions dit not appear within %d attempts",
+                               N_DEVICE_NODE_LIST_ATTEMPTS);
+}
+
+#endif
+
 int dissect_image(
                 int fd,
                 const void *root_hash,
@@ -117,19 +255,18 @@ int dissect_image(
 
 #if HAVE_BLKID
         sd_id128_t root_uuid = SD_ID128_NULL, verity_uuid = SD_ID128_NULL;
-        _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL;
+        _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
         bool is_gpt, is_mbr, generic_rw, multiple_generic = false;
-        _cleanup_udev_device_unref_ struct udev_device *d = NULL;
+        _cleanup_(sd_device_unrefp) sd_device *d = NULL;
         _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
-        _cleanup_blkid_free_probe_ blkid_probe b = NULL;
-        _cleanup_udev_unref_ struct udev *udev = NULL;
+        _cleanup_(blkid_free_probep) blkid_probe b = NULL;
         _cleanup_free_ char *generic_node = NULL;
         sd_id128_t generic_uuid = SD_ID128_NULL;
         const char *pttype = NULL;
-        struct udev_list_entry *first, *item;
         blkid_partlist pl;
         int r, generic_nr;
         struct stat st;
+        sd_device *q;
         unsigned i;
 
         assert(fd >= 0);
@@ -195,6 +332,10 @@ int dissect_image(
         if (!m)
                 return -ENOMEM;
 
+        r = sd_device_new_from_devnum(&d, 'b', st.st_rdev);
+        if (r < 0)
+                return r;
+
         if (!(flags & DISSECT_IMAGE_GPT_ONLY) &&
             (flags & DISSECT_IMAGE_REQUIRE_ROOT)) {
                 const char *usage = NULL;
@@ -213,24 +354,26 @@ int dissect_image(
                                         return -ENOMEM;
                         }
 
-                        if (asprintf(&n, "/dev/block/%u:%u", major(st.st_rdev), minor(st.st_rdev)) < 0)
-                                return -ENOMEM;
+                        r = device_path_make_major_minor(st.st_mode, st.st_rdev, &n);
+                        if (r < 0)
+                                return r;
 
                         m->partitions[PARTITION_ROOT] = (DissectedPartition) {
                                 .found = true,
                                 .rw = true,
                                 .partno = -1,
                                 .architecture = _ARCHITECTURE_INVALID,
-                                .fstype = t,
-                                .node = n,
+                                .fstype = TAKE_PTR(t),
+                                .node = TAKE_PTR(n),
                         };
 
-                        t = n = NULL;
+                        m->encrypted = streq_ptr(fstype, "crypto_LUKS");
 
-                        m->encrypted = streq(fstype, "crypto_LUKS");
+                        r = loop_wait_for_partitions_to_appear(fd, d, 0, &e);
+                        if (r < 0)
+                                return r;
 
-                        *ret = m;
-                        m = NULL;
+                        *ret = TAKE_PTR(m);
 
                         return 0;
                 }
@@ -251,121 +394,32 @@ int dissect_image(
         if (!pl)
                 return -errno ?: -ENOMEM;
 
-        udev = udev_new();
-        if (!udev)
-                return -errno;
-
-        d = udev_device_new_from_devnum(udev, 'b', st.st_rdev);
-        if (!d)
-                return -ENOMEM;
-
-        for (i = 0;; i++) {
-                int n, z;
-
-                if (i >= 10) {
-                        log_debug("Kernel partitions never appeared.");
-                        return -ENXIO;
-                }
-
-                e = udev_enumerate_new(udev);
-                if (!e)
-                        return -errno;
-
-                r = udev_enumerate_add_match_parent(e, d);
-                if (r < 0)
-                        return r;
-
-                r = udev_enumerate_scan_devices(e);
-                if (r < 0)
-                        return r;
-
-                /* Count the partitions enumerated by the kernel */
-                n = 0;
-                first = udev_enumerate_get_list_entry(e);
-                udev_list_entry_foreach(item, first)
-                        n++;
-
-                /* Count the partitions enumerated by blkid */
-                z = blkid_partlist_numof_partitions(pl);
-                if (n == z + 1)
-                        break;
-                if (n > z + 1) {
-                        log_debug("blkid and kernel partition list do not match.");
-                        return -EIO;
-                }
-                if (n < z + 1) {
-                        unsigned j = 0;
-
-                        /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running
-                         * or it got EBUSY because udev already opened the device. Let's reprobe the device, which is a
-                         * synchronous call that waits until probing is complete. */
-
-                        for (;;) {
-                                if (j++ > 20)
-                                        return -EBUSY;
-
-                                if (ioctl(fd, BLKRRPART, 0) < 0) {
-                                        r = -errno;
-
-                                        if (r == -EINVAL) {
-                                                struct loop_info64 info;
-
-                                                /* If we are running on a loop device that has partition scanning off,
-                                                 * return an explicit recognizable error about this, so that callers
-                                                 * can generate a proper message explaining the situation. */
-
-                                                if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0 && (info.lo_flags & LO_FLAGS_PARTSCAN) == 0) {
-                                                        log_debug("Device is loop device and partition scanning is off!");
-                                                        return -EPROTONOSUPPORT;
-                                                }
-                                        }
-                                        if (r != -EBUSY)
-                                                return r;
-                                } else
-                                        break;
-
-                                /* If something else has the device open, such as an udev rule, the ioctl will return
-                                 * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a
-                                 * bit, and try again.
-                                 *
-                                 * This is really something they should fix in the kernel! */
-
-                                (void) usleep(50 * USEC_PER_MSEC);
-                        }
-                }
-
-                e = udev_enumerate_unref(e);
-        }
+        r = loop_wait_for_partitions_to_appear(fd, d, blkid_partlist_numof_partitions(pl), &e);
+        if (r < 0)
+                return r;
 
-        first = udev_enumerate_get_list_entry(e);
-        udev_list_entry_foreach(item, first) {
-                _cleanup_udev_device_unref_ struct udev_device *q;
+        FOREACH_DEVICE(e, q) {
                 unsigned long long pflags;
                 blkid_partition pp;
-                const char *node, *sysname;
+                const char *node;
                 dev_t qn;
                 int nr;
 
-                q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
-                if (!q)
-                        return -errno;
-
-                qn = udev_device_get_devnum(q);
-                if (major(qn) == 0)
+                r = sd_device_get_devnum(q, &qn);
+                if (r < 0)
                         continue;
 
                 if (st.st_rdev == qn)
                         continue;
 
-                /* Filter out weird MMC RPMB partitions, which cannot reasonably be read, see
-                 * https://github.com/systemd/systemd/issues/5806 */
-                sysname = udev_device_get_sysname(q);
-                if (sysname && startswith(sysname, "mmcblk") &&
-                        (endswith(sysname, "rpmb") || endswith(sysname, "boot0" ) || endswith(sysname, "boot1")))
+                if (!device_is_block(q))
+                        continue;
+
+                if (device_is_mmc_special_partition(q))
                         continue;
 
-                node = udev_device_get_devnode(q);
-                if (!node)
+                r = sd_device_get_devname(q, &node);
+                if (r < 0)
                         continue;
 
                 pp = blkid_partlist_devno_to_partition(pl, qn);
@@ -528,12 +582,10 @@ int dissect_image(
                                         .partno = nr,
                                         .rw = rw,
                                         .architecture = architecture,
-                                        .node = n,
-                                        .fstype = t,
+                                        .node = TAKE_PTR(n),
+                                        .fstype = TAKE_PTR(t),
                                         .uuid = id,
                                 };
-
-                                n = t = NULL;
                         }
 
                 } else if (is_mbr) {
@@ -591,11 +643,9 @@ int dissect_image(
                                 .rw = generic_rw,
                                 .partno = generic_nr,
                                 .architecture = _ARCHITECTURE_INVALID,
-                                .node = generic_node,
+                                .node = TAKE_PTR(generic_node),
                                 .uuid = generic_uuid,
                         };
-
-                        generic_node = NULL;
                 }
         }
 
@@ -638,8 +688,7 @@ int dissect_image(
                         p->rw = false;
         }
 
-        *ret = m;
-        m = NULL;
+        *ret = TAKE_PTR(m);
 
         return 0;
 #else
@@ -764,9 +813,17 @@ int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift,
                 r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, flags);
                 if (r < 0)
                         return r;
+
+                if (flags & DISSECT_IMAGE_VALIDATE_OS) {
+                        r = path_is_os_tree(where);
+                        if (r < 0)
+                                return r;
+                        if (r == 0)
+                                return -EMEDIUMTYPE;
+                }
         }
 
-        if ((flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY))
+        if (flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY)
                 return 0;
 
         r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, flags);
@@ -870,10 +927,9 @@ static int make_dm_name_and_node(const void *original_node, const char *suffix,
         if (!node)
                 return -ENOMEM;
 
-        *ret_name = name;
-        *ret_node = node;
+        *ret_name = TAKE_PTR(name);
+        *ret_node = TAKE_PTR(node);
 
-        name = node = NULL;
         return 0;
 }
 
@@ -922,15 +978,11 @@ static int decrypt_partition(
                 return r == -EPERM ? -EKEYREJECTED : r;
         }
 
-        d->decrypted[d->n_decrypted].name = name;
-        name = NULL;
-
-        d->decrypted[d->n_decrypted].device = cd;
-        cd = NULL;
+        d->decrypted[d->n_decrypted].name = TAKE_PTR(name);
+        d->decrypted[d->n_decrypted].device = TAKE_PTR(cd);
         d->n_decrypted++;
 
-        m->decrypted_node = node;
-        node = NULL;
+        m->decrypted_node = TAKE_PTR(node);
 
         return 0;
 }
@@ -984,15 +1036,11 @@ static int verity_partition(
         if (r < 0)
                 return r;
 
-        d->decrypted[d->n_decrypted].name = name;
-        name = NULL;
-
-        d->decrypted[d->n_decrypted].device = cd;
-        cd = NULL;
+        d->decrypted[d->n_decrypted].name = TAKE_PTR(name);
+        d->decrypted[d->n_decrypted].device = TAKE_PTR(cd);
         d->n_decrypted++;
 
-        m->decrypted_node = node;
-        node = NULL;
+        m->decrypted_node = TAKE_PTR(node);
 
         return 0;
 }
@@ -1061,8 +1109,7 @@ int dissected_image_decrypt(
                 }
         }
 
-        *ret = d;
-        d = NULL;
+        *ret = TAKE_PTR(d);
 
         return 1;
 #else
@@ -1090,19 +1137,16 @@ int dissected_image_decrypt_interactively(
                         return r;
                 if (r == -EKEYREJECTED)
                         log_error_errno(r, "Incorrect passphrase, try again!");
-                else if (r != -ENOKEY) {
-                        log_error_errno(r, "Failed to decrypt image: %m");
-                        return r;
-                }
+                else if (r != -ENOKEY)
+                        return log_error_errno(r, "Failed to decrypt image: %m");
 
-                if (--n < 0) {
-                        log_error("Too many retries.");
-                        return -EKEYREJECTED;
-                }
+                if (--n < 0)
+                        return log_error_errno(SYNTHETIC_ERRNO(EKEYREJECTED),
+                                               "Too many retries.");
 
                 z = strv_free(z);
 
-                r = ask_password_auto("Please enter image passphrase!", NULL, "dissect", "dissect", USEC_INFINITY, 0, &z);
+                r = ask_password_auto("Please enter image passphrase:", NULL, "dissect", "dissect", USEC_INFINITY, 0, &z);
                 if (r < 0)
                         return log_error_errno(r, "Failed to query for passphrase: %m");
 
@@ -1220,11 +1264,9 @@ int root_hash_load(const char *image, void **ret, size_t *ret_size) {
         if (l < sizeof(sd_id128_t))
                 return -EINVAL;
 
-        *ret = k;
+        *ret = TAKE_PTR(k);
         *ret_size = l;
 
-        k = NULL;
-
         return 1;
 }
 
@@ -1267,17 +1309,15 @@ int dissected_image_acquire_metadata(DissectedImage *m) {
         if (r < 0)
                 goto finish;
 
-        r = safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_NEW_MOUNTNS, &child);
+        r = safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE, &child);
         if (r < 0)
                 goto finish;
         if (r == 0) {
-                /* Make sure we never propagate to the host */
-                if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
-                        _exit(EXIT_FAILURE);
-
-                r = dissected_image_mount(m, t, UID_INVALID, DISSECT_IMAGE_READ_ONLY);
-                if (r < 0)
+                r = dissected_image_mount(m, t, UID_INVALID, DISSECT_IMAGE_READ_ONLY|DISSECT_IMAGE_MOUNT_ROOT_ONLY|DISSECT_IMAGE_VALIDATE_OS);
+                if (r < 0) {
+                        log_debug_errno(r, "Failed to mount dissected image: %m");
                         _exit(EXIT_FAILURE);
+                }
 
                 for (k = 0; k < _META_MAX; k++) {
                         _cleanup_close_ int fd = -1;
@@ -1310,7 +1350,7 @@ int dissected_image_acquire_metadata(DissectedImage *m) {
 
                 fds[2*k+1] = safe_close(fds[2*k+1]);
 
-                f = fdopen(fds[2*k], "re");
+                f = fdopen(fds[2*k], "r");
                 if (!f) {
                         r = -errno;
                         goto finish;
@@ -1346,14 +1386,14 @@ int dissected_image_acquire_metadata(DissectedImage *m) {
                 }
 
                 case META_MACHINE_INFO:
-                        r = load_env_file_pairs(f, "machine-info", NULL, &machine_info);
+                        r = load_env_file_pairs(f, "machine-info", &machine_info);
                         if (r < 0)
                                 log_debug_errno(r, "Failed to read /etc/machine-info: %m");
 
                         break;
 
                 case META_OS_RELEASE:
-                        r = load_env_file_pairs(f, "os-release", NULL, &os_release);
+                        r = load_env_file_pairs(f, "os-release", &os_release);
                         if (r < 0)
                                 log_debug_errno(r, "Failed to read OS release file: %m");