]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
loop-util: always tell kernel explicitly about loopback sector size
authorLennart Poettering <lennart@poettering.net>
Tue, 17 Jan 2023 17:50:59 +0000 (18:50 +0100)
committerLennart Poettering <lennart@poettering.net>
Wed, 18 Jan 2023 09:47:17 +0000 (10:47 +0100)
Let's not leave the sector size unspecified: either set a user supplied
value, or auto-detect the right size by probing the disk image
accordingly.

src/core/namespace.c
src/dissect/dissect.c
src/home/homework-luks.c
src/nspawn/nspawn.c
src/portable/portable.c
src/shared/discover-image.c
src/shared/dissect-image.c
src/shared/dissect-image.h
src/shared/loop-util.c
src/shared/loop-util.h
src/sysext/sysext.c

index d46daa3c05c8c0a663745731c44f0e800901ba1b..feae4dcbbf084cc6ce1b13ec7e6e39e8fe6bda7b 100644 (file)
@@ -2093,6 +2093,7 @@ int setup_namespace(
                 r = loop_device_make_by_path(
                                 root_image,
                                 FLAGS_SET(dissect_image_flags, DISSECT_IMAGE_DEVICE_READ_ONLY) ? O_RDONLY : -1 /* < 0 means writable if possible, read-only as fallback */,
+                                /* sector_size= */ UINT32_MAX,
                                 FLAGS_SET(dissect_image_flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN,
                                 LOCK_SH,
                                 &loop_device);
index 25272c5e1b9ceb44d23fcf345325a4af268370b2..df08cc2b7439794175c925d23937fa91ac105496 100644 (file)
@@ -1437,9 +1437,9 @@ static int run(int argc, char *argv[]) {
         loop_flags = FLAGS_SET(arg_flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN;
 
         if (arg_in_memory)
-                r = loop_device_make_by_path_memory(arg_image, open_flags, loop_flags, LOCK_SH, &d);
+                r = loop_device_make_by_path_memory(arg_image, open_flags, /* sector_size= */ UINT32_MAX, loop_flags, LOCK_SH, &d);
         else
-                r = loop_device_make_by_path(arg_image, open_flags, loop_flags, LOCK_SH, &d);
+                r = loop_device_make_by_path(arg_image, open_flags, /* sector_size= */ UINT32_MAX, loop_flags, LOCK_SH, &d);
         if (r < 0)
                 return log_error_errno(r, "Failed to set up loopback device for %s: %m", arg_image);
 
index 3433cf7d40f4598e3f1473811340f31ffdddf29e..5ad7706636d47bc8354ed6e6826077984cd25845 100644 (file)
@@ -1378,7 +1378,15 @@ int home_setup_luks(
                                 return r;
                 }
 
-                r = loop_device_make(setup->image_fd, O_RDWR, offset, size, user_record_luks_sector_size(h), 0, LOCK_UN, &setup->loop);
+                r = loop_device_make(
+                                setup->image_fd,
+                                O_RDWR,
+                                offset,
+                                size,
+                                h->luks_sector_size == UINT64_MAX ? UINT32_MAX : user_record_luks_sector_size(h), /* if sector size is not specified, select UINT32_MAX, i.e. auto-probe */
+                                /* loop_flags= */ 0,
+                                LOCK_UN,
+                                &setup->loop);
                 if (r == -ENOENT) {
                         log_error_errno(r, "Loopback block device support is not available on this system.");
                         return -ENOLINK; /* make recognizable */
@@ -2319,7 +2327,15 @@ int home_create_luks(
 
         log_info("Writing of partition table completed.");
 
-        r = loop_device_make(setup->image_fd, O_RDWR, partition_offset, partition_size, user_record_luks_sector_size(h), 0, LOCK_EX, &setup->loop);
+        r = loop_device_make(
+                        setup->image_fd,
+                        O_RDWR,
+                        partition_offset,
+                        partition_size,
+                        user_record_luks_sector_size(h),
+                        0,
+                        LOCK_EX,
+                        &setup->loop);
         if (r < 0) {
                 if (r == -ENOENT) { /* this means /dev/loop-control doesn't exist, i.e. we are in a container
                                      * or similar and loopback bock devices are not available, return a
index 551f8f975c30e00c6408972977781c46e959f773..9f747a72b1968c94d5011297cd2cb767722f2b00 100644 (file)
@@ -5726,6 +5726,7 @@ static int run(int argc, char *argv[]) {
                 r = loop_device_make_by_path(
                                 arg_image,
                                 arg_read_only ? O_RDONLY : O_RDWR,
+                                /* sector_size= */ UINT32_MAX,
                                 FLAGS_SET(dissect_image_flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN,
                                 LOCK_SH,
                                 &loop);
index 7be1afbcc4bd8d4969bfc6265019945b1c44c62e..7811833fac0a49fb18142418db1c7e1e32571faf 100644 (file)
@@ -335,7 +335,7 @@ static int portable_extract_by_path(
 
         assert(path);
 
-        r = loop_device_make_by_path(path, O_RDONLY, LO_FLAGS_PARTSCAN, LOCK_SH, &d);
+        r = loop_device_make_by_path(path, O_RDONLY, /* sector_size= */ UINT32_MAX, LO_FLAGS_PARTSCAN, LOCK_SH, &d);
         if (r == -EISDIR) {
                 _cleanup_free_ char *image_name = NULL;
 
index 1f0f7ca7da0d02235d6cabab78533dff9fed925c..7f1fe4f4dcda0dcda476d0d879c8d61daa8d7b38 100644 (file)
@@ -1197,7 +1197,7 @@ int image_read_metadata(Image *i) {
                 _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
                 _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
 
-                r = loop_device_make_by_path(i->path, O_RDONLY, LO_FLAGS_PARTSCAN, LOCK_SH, &d);
+                r = loop_device_make_by_path(i->path, O_RDONLY, /* sector_size= */ UINT32_MAX, LO_FLAGS_PARTSCAN, LOCK_SH, &d);
                 if (r < 0)
                         return r;
 
index d78568d3440bb06059846c7c491cc45be9fe3032..da8eba83deab4f0b35870605fa8093ce12df8e70 100644 (file)
@@ -546,6 +546,7 @@ static int dissect_image(
         assert(!verity || verity->root_hash_sig || verity->root_hash_sig_size == 0);
         assert(!verity || (verity->root_hash || !verity->root_hash_sig));
         assert(!((flags & DISSECT_IMAGE_GPT_ONLY) && (flags & DISSECT_IMAGE_NO_PARTITION_TABLE)));
+        assert(m->sector_size > 0);
 
         /* Probes a disk image, and returns information about what it found in *ret.
          *
@@ -594,6 +595,11 @@ static int dissect_image(
         if (r != 0)
                 return errno_or_else(ENOMEM);
 
+        errno = 0;
+        r = blkid_probe_set_sectorsize(b, m->sector_size);
+        if (r != 0)
+                return errno_or_else(EIO);
+
         if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) {
                 /* Look for file system superblocks, unless we only shall look for GPT partition tables */
                 blkid_probe_enable_superblocks(b, 1);
@@ -1326,6 +1332,10 @@ int dissect_image_file(
         if (r < 0)
                 return r;
 
+        r = probe_sector_size(fd, &m->sector_size);
+        if (r < 0)
+                return r;
+
         r = dissect_image(m, fd, path, verity, mount_options, flags);
         if (r < 0)
                 return r;
@@ -3143,6 +3153,7 @@ int dissect_loop_device(
                 return r;
 
         m->loop = loop_device_ref(loop);
+        m->sector_size = m->loop->sector_size;
 
         r = dissect_image(m, loop->fd, loop->node, verity, mount_options, flags);
         if (r < 0)
@@ -3318,6 +3329,7 @@ int mount_image_privately_interactively(
         r = loop_device_make_by_path(
                         image,
                         FLAGS_SET(flags, DISSECT_IMAGE_DEVICE_READ_ONLY) ? O_RDONLY : O_RDWR,
+                        /* sector_size= */ UINT32_MAX,
                         FLAGS_SET(flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN,
                         LOCK_SH,
                         &d);
@@ -3409,7 +3421,8 @@ int verity_dissect_and_mount(
          * accepted by LOOP_CONFIGURE, so just let loop_device_make_by_path reopen it as a regular FD. */
         r = loop_device_make_by_path(
                         src_fd >= 0 ? FORMAT_PROC_FD_PATH(src_fd) : src,
-                        -1,
+                        /* open_flags= */ -1,
+                        /* sector_size= */ UINT32_MAX,
                         verity.data_path ? 0 : LO_FLAGS_PARTSCAN,
                         LOCK_SH,
                         &loop_device);
index 638524f71612da17c1bacaf9bedd5cd09da6b18b..a69482d237147c7d7548d7b2975b11280ebdf6d1 100644 (file)
@@ -93,6 +93,8 @@ struct DissectedImage {
         DissectedPartition partitions[_PARTITION_DESIGNATOR_MAX];
         DecryptedImage *decrypted_image;
 
+        uint32_t sector_size;
+
         /* Meta information extracted from /etc/os-release and similar */
         char *image_name;
         sd_id128_t image_uuid;
index f6ae769d89201b34a9c364dd4aa238dc04c854ae..5945ba2ad459638748f15b5e6fa49c5bc98a6524 100644 (file)
@@ -20,6 +20,7 @@
 #include "data-fd-util.h"
 #include "device-util.h"
 #include "devnum-util.h"
+#include "dissect-image.h"
 #include "env-util.h"
 #include "errno-util.h"
 #include "fd-util.h"
@@ -409,6 +410,7 @@ static int loop_configure(
                 .diskseq = diskseq,
                 .uevent_seqnum_not_before = seqnum,
                 .timestamp_not_before = timestamp,
+                .sector_size = c->block_size,
         };
 
         *ret = TAKE_PTR(d);
@@ -421,7 +423,7 @@ static int loop_device_make_internal(
                 int open_flags,
                 uint64_t offset,
                 uint64_t size,
-                uint32_t block_size,
+                uint32_t sector_size,
                 uint32_t loop_flags,
                 int lock_op,
                 LoopDevice **ret) {
@@ -492,9 +494,50 @@ static int loop_device_make_internal(
         if (control < 0)
                 return -errno;
 
+        if (sector_size == 0)
+                /* If no sector size is specified, default to the classic default */
+                sector_size = 512;
+        else if (sector_size == UINT32_MAX) {
+
+                if (S_ISBLK(st.st_mode))
+                        /* If the sector size is specified as UINT32_MAX we'll propagate the sector size of
+                         * the underlying block device. */
+                        r = blockdev_get_sector_size(fd, &sector_size);
+                else {
+                        _cleanup_close_ int non_direct_io_fd = -1;
+                        int probe_fd;
+
+                        assert(S_ISREG(st.st_mode));
+
+                        /* If sector size is specified as UINT32_MAX, we'll try to probe the right sector
+                         * size of the image in question by looking for the GPT partition header at various
+                         * offsets. This of course only works if the image already has a disk label.
+                         *
+                         * So here we actually want to read the file contents ourselves. This is quite likely
+                         * not going to work if we managed to enable O_DIRECT, because in such a case there
+                         * are some pretty strict alignment requirements to offset, size and target, but
+                         * there's no way to query what alignment specifically is actually required. Hence,
+                         * let's avoid the mess, and temporarily open an fd without O_DIRECT for the probing
+                         * logic. */
+
+                        if (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO)) {
+                                non_direct_io_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+                                if (non_direct_io_fd < 0)
+                                        return non_direct_io_fd;
+
+                                probe_fd = non_direct_io_fd;
+                        } else
+                                probe_fd = fd;
+
+                        r = probe_sector_size(probe_fd, &sector_size);
+                }
+                if (r < 0)
+                        return r;
+        }
+
         config = (struct loop_config) {
                 .fd = fd,
-                .block_size = block_size,
+                .block_size = sector_size,
                 .info = {
                         /* Use the specified flags, but configure the read-only flag from the open flags, and force autoclear */
                         .lo_flags = (loop_flags & ~LO_FLAGS_READ_ONLY) | ((open_flags & O_ACCMODE) == O_RDONLY ? LO_FLAGS_READ_ONLY : 0) | LO_FLAGS_AUTOCLEAR,
@@ -578,7 +621,7 @@ int loop_device_make(
                 int open_flags,
                 uint64_t offset,
                 uint64_t size,
-                uint32_t block_size,
+                uint32_t sector_size,
                 uint32_t loop_flags,
                 int lock_op,
                 LoopDevice **ret) {
@@ -592,7 +635,7 @@ int loop_device_make(
                         open_flags,
                         offset,
                         size,
-                        block_size,
+                        sector_size,
                         loop_flags_mangle(loop_flags),
                         lock_op,
                         ret);
@@ -601,6 +644,7 @@ int loop_device_make(
 int loop_device_make_by_path(
                 const char *path,
                 int open_flags,
+                uint32_t sector_size,
                 uint32_t loop_flags,
                 int lock_op,
                 LoopDevice **ret) {
@@ -656,12 +700,13 @@ int loop_device_make_by_path(
                   direct ? "enabled" : "disabled",
                   direct != (direct_flags != 0) ? " (O_DIRECT was requested but not supported)" : "");
 
-        return loop_device_make_internal(path, fd, open_flags, 0, 0, 0, loop_flags, lock_op, ret);
+        return loop_device_make_internal(path, fd, open_flags, 0, 0, sector_size, loop_flags, lock_op, ret);
 }
 
 int loop_device_make_by_path_memory(
                 const char *path,
                 int open_flags,
+                uint32_t sector_size,
                 uint32_t loop_flags,
                 int lock_op,
                 LoopDevice **ret) {
@@ -697,7 +742,7 @@ int loop_device_make_by_path_memory(
 
         fd = safe_close(fd); /* Let's close the original early */
 
-        return loop_device_make_internal(NULL, mfd, open_flags, 0, 0, 0, loop_flags, lock_op, ret);
+        return loop_device_make_internal(NULL, mfd, open_flags, 0, 0, sector_size, loop_flags, lock_op, ret);
 }
 
 static LoopDevice* loop_device_free(LoopDevice *d) {
@@ -839,6 +884,11 @@ int loop_device_open(
         if (r < 0 && r != -EOPNOTSUPP)
                 return r;
 
+        uint32_t sector_size;
+        r = blockdev_get_sector_size(fd, &sector_size);
+        if (r < 0)
+                return r;
+
         r = sd_device_get_devnum(dev, &devnum);
         if (r < 0)
                 return r;
@@ -868,6 +918,7 @@ int loop_device_open(
                 .diskseq = diskseq,
                 .uevent_seqnum_not_before = UINT64_MAX,
                 .timestamp_not_before = USEC_INFINITY,
+                .sector_size = sector_size,
         };
 
         *ret = d;
index 512b5ab4a22d6510b5388bf07a0fb42130d06995..5c79ed7ccee90710b8b0185d5948e948f5ce51ac 100644 (file)
@@ -23,14 +23,15 @@ struct LoopDevice {
         uint64_t diskseq; /* Block device sequence number, monothonically incremented by the kernel on create/attach, or 0 if we don't know */
         uint64_t uevent_seqnum_not_before; /* uevent sequm right before we attached the loopback device, or UINT64_MAX if we don't know */
         usec_t timestamp_not_before; /* CLOCK_MONOTONIC timestamp taken immediately before attaching the loopback device, or USEC_INFINITY if we don't know */
+        uint32_t sector_size;
 };
 
 /* Returns true if LoopDevice object is not actually a loopback device but some other block device we just wrap */
 #define LOOP_DEVICE_IS_FOREIGN(d) ((d)->nr < 0)
 
-int loop_device_make(int fd, int open_flags, uint64_t offset, uint64_t size, uint32_t block_size, uint32_t loop_flags, int lock_op, LoopDevice **ret);
-int loop_device_make_by_path(const char *path, int open_flags, uint32_t loop_flags, int lock_op, LoopDevice **ret);
-int loop_device_make_by_path_memory(const char *path, int open_flags, uint32_t loop_flags, int lock_op, LoopDevice **ret);
+int loop_device_make(int fd, int open_flags, uint64_t offset, uint64_t size, uint32_t sector_size, uint32_t loop_flags, int lock_op, LoopDevice **ret);
+int loop_device_make_by_path(const char *path, int open_flags, uint32_t sector_size, uint32_t loop_flags, int lock_op, LoopDevice **ret);
+int loop_device_make_by_path_memory(const char *path, int open_flags, uint32_t sector_size, uint32_t loop_flags, int lock_op, LoopDevice **ret);
 int loop_device_open(sd_device *dev, int open_flags, int lock_op, LoopDevice **ret);
 int loop_device_open_from_fd(int fd, int open_flags, int lock_op, LoopDevice **ret);
 int loop_device_open_from_path(const char *path, int open_flags, int lock_op, LoopDevice **ret);
index 4fbc5e877e813d0470447e73e6d8f839c18a61cb..0c4391991be4fa7ef6caa163c8e9dad0a2c6ffdd 100644 (file)
@@ -540,6 +540,7 @@ static int merge_subprocess(Hashmap *images, const char *workspace) {
                         r = loop_device_make_by_path(
                                         img->path,
                                         O_RDONLY,
+                                        /* sector_size= */ UINT32_MAX,
                                         FLAGS_SET(flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN,
                                         LOCK_SH,
                                         &d);