]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/shared/dissect-image.c
tree-wide: "<n>bit" → "<n>-bit"
[thirdparty/systemd.git] / src / shared / dissect-image.c
index 46c42cfc953bc554648d56001d505115f7f93c0d..44e77f932825004f99b8284c73be463af233cf0e 100644 (file)
@@ -252,6 +252,21 @@ int probe_filesystem_full(
         if (!b)
                 return -ENOMEM;
 
+        /* The Linux kernel maintains separate block device caches for main ("whole") and partition block
+         * devices, which means making a change to one might not be reflected immediately when reading via
+         * the other. That's massively confusing when mixing accesses to such devices. Let's address this in
+         * a limited way: when probing a file system that is not at the beginning of the block device we
+         * apparently probe a partition via the main block device, and in that case let's first flush the
+         * main block device cache, so that we get the data that the per-partition block device last
+         * sync'ed on.
+         *
+         * This only works under the assumption that any tools that write to the partition block devices
+         * issue an syncfs()/fsync() on the device after making changes. Typically file system formatting
+         * tools that write a superblock onto a partition block device do that, however. */
+        if (offset != 0)
+                if (ioctl(fd, BLKFLSBUF, 0) < 0)
+                        log_debug_errno(errno, "Failed to flush block device cache, ignoring: %m");
+
         errno = 0;
         r = blkid_probe_set_device(
                         b,
@@ -696,8 +711,8 @@ static int dissect_image(
                 sd_id128_t fsuuid, vuuid;
 
                 /* If a root hash is supplied, then we use the root partition that has a UUID that match the
-                 * first 128bit of the root hash. And we use the verity partition that has a UUID that match
-                 * the final 128bit. */
+                 * first 128-bit of the root hash. And we use the verity partition that has a UUID that match
+                 * the final 128-bit. */
 
                 if (verity->root_hash_size < sizeof(sd_id128_t))
                         return -EINVAL;
@@ -1202,16 +1217,19 @@ static int dissect_image(
                                 }
 
                                 if (m->partitions[type.designator].found) {
+                                        int c;
+
                                         /* For most partition types the first one we see wins. Except for the
                                          * rootfs and /usr, where we do a version compare of the label, and
                                          * let the newest version win. This permits a simple A/B versioning
                                          * scheme in OS images. */
 
-                                        if (compare_arch(type.arch, m->partitions[type.designator].architecture) <= 0)
+                                        c = compare_arch(type.arch, m->partitions[type.designator].architecture);
+                                        if (c < 0) /* the arch we already found is better than the one we found now */
                                                 continue;
-
-                                        if (!partition_designator_is_versioned(type.designator) ||
-                                            strverscmp_improved(m->partitions[type.designator].label, label) >= 0)
+                                        if (c == 0 && /* same arch? then go by version in label */
+                                            (!partition_designator_is_versioned(type.designator) ||
+                                             strverscmp_improved(label, m->partitions[type.designator].label) <= 0))
                                                 continue;
 
                                         dissected_partition_done(m->partitions + type.designator);
@@ -1575,7 +1593,8 @@ int dissect_image_file(
 #endif
 }
 
-static int dissect_log_error(int r, const char *name, const VeritySettings *verity) {
+int dissect_log_error(int log_level, int r, const char *name, const VeritySettings *verity) {
+        assert(log_level >= 0 && log_level <= LOG_DEBUG);
         assert(name);
 
         switch (r) {
@@ -1584,43 +1603,43 @@ static int dissect_log_error(int r, const char *name, const VeritySettings *veri
                 return r;
 
         case -EOPNOTSUPP:
-                return log_error_errno(r, "Dissecting images is not supported, compiled without blkid support.");
+                return log_full_errno(log_level, r, "Dissecting images is not supported, compiled without blkid support.");
 
         case -ENOPKG:
-                return log_error_errno(r, "%s: Couldn't identify a suitable partition table or file system.", name);
+                return log_full_errno(log_level, r, "%s: Couldn't identify a suitable partition table or file system.", name);
 
         case -ENOMEDIUM:
-                return log_error_errno(r, "%s: The image does not pass os-release/extension-release validation.", name);
+                return log_full_errno(log_level, r, "%s: The image does not pass os-release/extension-release validation.", name);
 
         case -EADDRNOTAVAIL:
-                return log_error_errno(r, "%s: No root partition for specified root hash found.", name);
+                return log_full_errno(log_level, r, "%s: No root partition for specified root hash found.", name);
 
         case -ENOTUNIQ:
-                return log_error_errno(r, "%s: Multiple suitable root partitions found in image.", name);
+                return log_full_errno(log_level, r, "%s: Multiple suitable root partitions found in image.", name);
 
         case -ENXIO:
-                return log_error_errno(r, "%s: No suitable root partition found in image.", name);
+                return log_full_errno(log_level, r, "%s: No suitable root partition found in image.", name);
 
         case -EPROTONOSUPPORT:
-                return log_error_errno(r, "Device '%s' is a loopback block device with partition scanning turned off, please turn it on.", name);
+                return log_full_errno(log_level, r, "Device '%s' is a loopback block device with partition scanning turned off, please turn it on.", name);
 
         case -ENOTBLK:
-                return log_error_errno(r, "%s: Image is not a block device.", name);
+                return log_full_errno(log_level, r, "%s: Image is not a block device.", name);
 
         case -EBADR:
-                return log_error_errno(r,
-                                       "Combining partitioned images (such as '%s') with external Verity data (such as '%s') not supported. "
-                                       "(Consider setting $SYSTEMD_DISSECT_VERITY_SIDECAR=0 to disable automatic discovery of external Verity data.)",
-                                       name, strna(verity ? verity->data_path : NULL));
+                return log_full_errno(log_level, r,
+                                      "Combining partitioned images (such as '%s') with external Verity data (such as '%s') not supported. "
+                                      "(Consider setting $SYSTEMD_DISSECT_VERITY_SIDECAR=0 to disable automatic discovery of external Verity data.)",
+                                      name, strna(verity ? verity->data_path : NULL));
 
         case -ERFKILL:
-                return log_error_errno(r, "%s: image does not match image policy.", name);
+                return log_full_errno(log_level, r, "%s: image does not match image policy.", name);
 
         case -ENOMSG:
-                return log_error_errno(r, "%s: no suitable partitions found.", name);
+                return log_full_errno(log_level, r, "%s: no suitable partitions found.", name);
 
         default:
-                return log_error_errno(r, "Failed to dissect image '%s': %m", name);
+                return log_full_errno(log_level, r, "%s: cannot dissect image: %m", name);
         }
 }
 
@@ -1633,6 +1652,7 @@ int dissect_image_file_and_warn(
                 DissectedImage **ret) {
 
         return dissect_log_error(
+                        LOG_ERR,
                         dissect_image_file(path, verity, mount_options, image_policy, flags, ret),
                         path,
                         verity);
@@ -1717,7 +1737,7 @@ static int run_fsck(int node_fd, const char *fstype) {
                 return log_debug_errno(r, "Failed to fork off fsck: %m");
         if (r == 0) {
                 /* Child */
-                execl("/sbin/fsck", "/sbin/fsck", "-aT", FORMAT_PROC_FD_PATH(node_fd), NULL);
+                execlp("fsck", "fsck", "-aT", FORMAT_PROC_FD_PATH(node_fd), NULL);
                 log_open();
                 log_debug_errno(errno, "Failed to execl() fsck: %m");
                 _exit(FSCK_OPERATIONAL_ERROR);
@@ -1725,7 +1745,7 @@ static int run_fsck(int node_fd, const char *fstype) {
 
         exit_status = wait_for_terminate_and_check("fsck", pid, 0);
         if (exit_status < 0)
-                return log_debug_errno(exit_status, "Failed to fork off /sbin/fsck: %m");
+                return log_debug_errno(exit_status, "Failed to fork off fsck: %m");
 
         if ((exit_status & ~FSCK_ERROR_CORRECTED) != FSCK_SUCCESS) {
                 log_debug("fsck failed with exit status %i.", exit_status);
@@ -1890,11 +1910,6 @@ static int mount_partition(
 
         if (!fstype)
                 return -EAFNOSUPPORT;
-        r = dissect_fstype_ok(fstype);
-        if (r < 0)
-                return r;
-        if (!r)
-                return -EIDRM; /* Recognizable error */
 
         /* We are looking at an encrypted partition? This either means stacked encryption, or the caller
          * didn't call dissected_image_decrypt() beforehand. Let's return a recognizable error for this
@@ -1902,6 +1917,12 @@ static int mount_partition(
         if (streq(fstype, "crypto_LUKS"))
                 return -EUNATCH;
 
+        r = dissect_fstype_ok(fstype);
+        if (r < 0)
+                return r;
+        if (!r)
+                return -EIDRM; /* Recognizable error */
+
         rw = m->rw && !(flags & DISSECT_IMAGE_MOUNT_READ_ONLY);
 
         discard = ((flags & DISSECT_IMAGE_DISCARD) ||
@@ -2001,6 +2022,27 @@ static int mount_root_tmpfs(const char *where, uid_t uid_shift, DissectImageFlag
         return 1;
 }
 
+static int mount_point_is_available(const char *where, const char *path, bool missing_ok) {
+        _cleanup_free_ char *p = NULL;
+        int r;
+
+        /* Check whether <path> is suitable as a mountpoint, i.e. is an empty directory
+         * or does not exist at all (when missing_ok). */
+
+        r = chase(path, where, CHASE_PREFIX_ROOT, &p, NULL);
+        if (r == -ENOENT)
+                return missing_ok;
+        if (r < 0)
+                return log_debug_errno(r, "Failed to chase \"%s\": %m", path);
+
+        r = dir_is_empty(p, /* ignore_hidden_or_backup= */ false);
+        if (r == -ENOTDIR)
+                return false;
+        if (r < 0)
+                return log_debug_errno(r, "Failed to check directory \"%s\": %m", p);
+        return true;
+}
+
 int dissected_image_mount(
                 DissectedImage *m,
                 const char *where,
@@ -2008,7 +2050,7 @@ int dissected_image_mount(
                 uid_t uid_range,
                 DissectImageFlags flags) {
 
-        int r, xbootldr_mounted;
+        int r;
 
         assert(m);
         assert(where);
@@ -2092,45 +2134,42 @@ int dissected_image_mount(
         if (r < 0)
                 return r;
 
-        xbootldr_mounted = mount_partition(PARTITION_XBOOTLDR, m->partitions + PARTITION_XBOOTLDR, where, "/boot", uid_shift, uid_range, flags);
-        if (xbootldr_mounted < 0)
-                return xbootldr_mounted;
+        int slash_boot_is_available;
+        r = slash_boot_is_available = mount_point_is_available(where, "/boot", /* missing_ok = */ true);
+        if (r < 0)
+                return r;
+        if (r > 0) {
+                r = mount_partition(PARTITION_XBOOTLDR, m->partitions + PARTITION_XBOOTLDR, where, "/boot", uid_shift, uid_range, flags);
+                if (r < 0)
+                        return r;
+                slash_boot_is_available = !r;
+        }
 
         if (m->partitions[PARTITION_ESP].found) {
-                int esp_done = false;
+                const char *esp_path = NULL;
 
-                /* Mount the ESP to /efi if it exists. If it doesn't exist, use /boot instead, but only if it
-                 * exists and is empty, and we didn't already mount the XBOOTLDR partition into it. */
+                /* Mount the ESP to /boot/ if it exists and is empty and we didn't already mount the XBOOTLDR
+                 * partition into it. Otherwise, use /efi instead, but only if it exists and is empty. */
 
-                r = chase("/efi", where, CHASE_PREFIX_ROOT, NULL, NULL);
-                if (r < 0) {
-                        if (r != -ENOENT)
+                if (slash_boot_is_available) {
+                        r = mount_point_is_available(where, "/boot", /* missing_ok = */ false);
+                        if (r < 0)
                                 return r;
-
-                        /* /efi doesn't exist. Let's see if /boot is suitable then */
-
-                        if (!xbootldr_mounted) {
-                                _cleanup_free_ char *p = NULL;
-
-                                r = chase("/boot", where, CHASE_PREFIX_ROOT, &p, NULL);
-                                if (r < 0) {
-                                        if (r != -ENOENT)
-                                                return r;
-                                } else if (dir_is_empty(p, /* ignore_hidden_or_backup= */ false) > 0) {
-                                        /* It exists and is an empty directory. Let's mount the ESP there. */
-                                        r = mount_partition(PARTITION_ESP, m->partitions + PARTITION_ESP, where, "/boot", uid_shift, uid_range, flags);
-                                        if (r < 0)
-                                                return r;
-
-                                        esp_done = true;
-                                }
-                        }
+                        if (r > 0)
+                                esp_path = "/boot";
                 }
 
-                if (!esp_done) {
-                        /* OK, let's mount the ESP now to /efi (possibly creating the dir if missing) */
+                if (!esp_path) {
+                        r = mount_point_is_available(where, "/efi", /* missing_ok = */ true);
+                        if (r < 0)
+                                return r;
+                        if (r > 0)
+                                esp_path = "/efi";
+                }
 
-                        r = mount_partition(PARTITION_ESP, m->partitions + PARTITION_ESP, where, "/efi", uid_shift, uid_range, flags);
+                if (esp_path) {
+                        /* OK, let's mount the ESP now (possibly creating the dir if missing) */
+                        r = mount_partition(PARTITION_ESP, m->partitions + PARTITION_ESP, where, esp_path, uid_shift, uid_range, flags);
                         if (r < 0)
                                 return r;
                 }
@@ -2727,7 +2766,7 @@ static int verity_partition(
 
         try_again:
                 /* Device is being removed by another process. Let's wait for a while. */
-                (void) usleep(2 * USEC_PER_MSEC);
+                (void) usleep_safe(2 * USEC_PER_MSEC);
         }
 
         /* All trials failed or a conflicting verity device exists. Let's try to activate with a unique name. */
@@ -3157,7 +3196,7 @@ int dissected_image_load_verity_sig_partition(
                 return -EINVAL;
 
         if (p->size > 4*1024*1024) /* Signature data cannot possible be larger than 4M, refuse that */
-                return -EFBIG;
+                return log_debug_errno(SYNTHETIC_ERRNO(EFBIG), "Verity signature partition is larger than 4M, refusing.");
 
         buf = new(char, p->size+1);
         if (!buf)
@@ -3560,6 +3599,7 @@ int dissect_loop_device_and_warn(
         assert(loop);
 
         return dissect_log_error(
+                        LOG_ERR,
                         dissect_loop_device(loop, verity, mount_options, image_policy, flags, ret),
                         loop->backing_file ?: loop->node,
                         verity);
@@ -3648,8 +3688,7 @@ int mount_image_privately_interactively(
         _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
         _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
         _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
-        _cleanup_(rmdir_and_freep) char *created_dir = NULL;
-        _cleanup_free_ char *temp = NULL;
+        _cleanup_free_ char *dir = NULL;
         int r;
 
         /* Mounts an OS image at a temporary place, inside a newly created mount namespace of our own. This
@@ -3657,7 +3696,6 @@ int mount_image_privately_interactively(
          * easily. */
 
         assert(image);
-        assert(ret_directory);
         assert(ret_loop_device);
 
         /* We intend to mount this right-away, hence add the partitions if needed and pin them. */
@@ -3668,10 +3706,6 @@ int mount_image_privately_interactively(
         if (r < 0)
                 return log_error_errno(r, "Failed to load root hash data: %m");
 
-        r = tempfn_random_child(NULL, program_invocation_short_name, &temp);
-        if (r < 0)
-                return log_error_errno(r, "Failed to generate temporary mount directory: %m");
-
         r = loop_device_make_by_path(
                         image,
                         FLAGS_SET(flags, DISSECT_IMAGE_DEVICE_READ_ONLY) ? O_RDONLY : O_RDWR,
@@ -3704,13 +3738,16 @@ int mount_image_privately_interactively(
         if (r < 0)
                 return log_error_errno(r, "Failed to detach mount namespace: %m");
 
-        r = mkdir_p(temp, 0700);
+        r = mkdir_p("/run/systemd/mount-rootfs", 0555);
         if (r < 0)
                 return log_error_errno(r, "Failed to create mount point: %m");
 
-        created_dir = TAKE_PTR(temp);
-
-        r = dissected_image_mount_and_warn(dissected_image, created_dir, UID_INVALID, UID_INVALID, flags);
+        r = dissected_image_mount_and_warn(
+                        dissected_image,
+                        "/run/systemd/mount-rootfs",
+                        /* uid_shift= */ UID_INVALID,
+                        /* uid_range= */ UID_INVALID,
+                        flags);
         if (r < 0)
                 return r;
 
@@ -3722,19 +3759,26 @@ int mount_image_privately_interactively(
         if (r < 0)
                 return log_error_errno(r, "Failed to relinquish DM and loopback block devices: %m");
 
+        if (ret_directory) {
+                dir = strdup("/run/systemd/mount-rootfs");
+                if (!dir)
+                        return log_oom();
+        }
+
         if (ret_dir_fd) {
                 _cleanup_close_ int dir_fd = -EBADF;
 
-                dir_fd = open(created_dir, O_CLOEXEC|O_DIRECTORY);
+                dir_fd = open("/run/systemd/mount-rootfs", O_CLOEXEC|O_DIRECTORY);
                 if (dir_fd < 0)
                         return log_error_errno(errno, "Failed to open mount point directory: %m");
 
                 *ret_dir_fd = TAKE_FD(dir_fd);
         }
 
-        *ret_directory = TAKE_PTR(created_dir);
-        *ret_loop_device = TAKE_PTR(d);
+        if (ret_directory)
+                *ret_directory = TAKE_PTR(dir);
 
+        *ret_loop_device = TAKE_PTR(d);
         return 0;
 }