return write_string_filef(sysfs_path, WRITE_STRING_FILE_DISABLE_BUFFER, "%" PRIu64, max_discard);
}
+static int probe_sector_size_harder(int fd, uint32_t *ret) {
+ _cleanup_close_ int non_direct_io_fd = -EBADF;
+ int probe_fd, f_flags;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ /* Wraps probe_sector_size() but handles O_DIRECT: if the fd is opened with O_DIRECT there are
+ * strict alignment requirements for reads, so we temporarily reopen it without O_DIRECT for the
+ * probing logic. */
+
+ f_flags = fcntl(fd, F_GETFL);
+ if (f_flags < 0)
+ return -errno;
+
+ if (FLAGS_SET(f_flags, O_DIRECT)) {
+ non_direct_io_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+ if (non_direct_io_fd < 0)
+ return non_direct_io_fd;
+
+ probe_fd = non_direct_io_fd;
+ } else
+ probe_fd = fd;
+
+ return probe_sector_size(probe_fd, ret);
+}
+
static int loop_device_make_internal(
const char *path,
int fd,
assert(open_flags < 0 || IN_SET(open_flags, O_RDWR, O_RDONLY));
assert(ret);
+ /* sector_size interpretation:
+ * 0 → use device sector size for block devices, 512 for regular files
+ * UINT32_MAX → probe GPT header to find the right sector size, fall back to 0 behavior
+ * other → use the specified sector size explicitly */
+
f_flags = fcntl(fd, F_GETFL);
if (f_flags < 0)
return -errno;
return log_debug_errno(SYNTHETIC_ERRNO(EBADFD), "Access mode of image file is write only (?)");
}
+ if (sector_size == UINT32_MAX) {
+ /* If sector size is specified as UINT32_MAX, we'll try to probe the right sector size
+ * by looking for the GPT partition header at various offsets. This of course only works
+ * if the image already has a disk label. */
+
+ r = probe_sector_size_harder(fd, §or_size);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ sector_size = 0; /* If we can't probe anything, use default sector size. */
+ }
+
if (fstat(fd, &st) < 0)
return -errno;
if (S_ISBLK(st.st_mode)) {
- if (offset == 0 && IN_SET(size, 0, UINT64_MAX))
+ uint32_t device_ssz;
+ r = blockdev_get_sector_size(fd, &device_ssz);
+ if (r < 0)
+ return r;
+
+ if (sector_size == 0)
+ sector_size = device_ssz;
+
+ if (offset == 0 && IN_SET(size, 0, UINT64_MAX) && sector_size == device_ssz)
/* If this is already a block device and we are supposed to cover the whole of it
- * then store an fd to the original open device node — and do not actually create an
- * unnecessary loopback device for it. */
+ * then store an fd to the original open device node — and do not actually create
+ * an unnecessary loopback device for it. If an explicit sector size was requested
+ * that differs from the device sector size, or if the probed GPT sector size
+ * differs (e.g. CD-ROMs with 2048-byte blocks but a 512-byte sector GPT), create
+ * a real loop device to change the sector size. */
return loop_device_open_from_fd(fd, open_flags, lock_op, ret);
} else {
r = stat_verify_regular(&st);
if (r < 0)
return r;
+
+ if (sector_size == 0)
+ sector_size = 512;
}
if (path) {
if (control < 0)
return -errno;
- if (sector_size == 0)
- /* If no sector size is specified, default to the classic default */
- sector_size = 512;
- else if (sector_size == UINT32_MAX) {
-
- if (S_ISBLK(st.st_mode))
- /* If the sector size is specified as UINT32_MAX we'll propagate the sector size of
- * the underlying block device. */
- r = blockdev_get_sector_size(fd, §or_size);
- else {
- _cleanup_close_ int non_direct_io_fd = -EBADF;
- int probe_fd;
-
- assert(S_ISREG(st.st_mode));
-
- /* If sector size is specified as UINT32_MAX, we'll try to probe the right sector
- * size of the image in question by looking for the GPT partition header at various
- * offsets. This of course only works if the image already has a disk label.
- *
- * So here we actually want to read the file contents ourselves. This is quite likely
- * not going to work if we managed to enable O_DIRECT, because in such a case there
- * are some pretty strict alignment requirements to offset, size and target, but
- * there's no way to query what alignment specifically is actually required. Hence,
- * let's avoid the mess, and temporarily open an fd without O_DIRECT for the probing
- * logic. */
-
- if (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO)) {
- non_direct_io_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
- if (non_direct_io_fd < 0)
- return non_direct_io_fd;
-
- probe_fd = non_direct_io_fd;
- } else
- probe_fd = fd;
-
- r = probe_sector_size(probe_fd, §or_size);
- }
- if (r < 0)
- return r;
- }
-
/* Strip LO_FLAGS_PARTSCAN from LOOP_CONFIGURE and enable it afterwards via
* LOOP_SET_STATUS64 to work around a kernel race: LOOP_CONFIGURE sends a uevent with
* GD_NEED_PART_SCAN set before calling loop_reread_partitions(). If udev opens the device in
#endif
}
+static int make_test_image(int *ret_fd) {
+ _cleanup_free_ char *p = NULL, *cmd = NULL;
+ _cleanup_pclose_ FILE *sfdisk = NULL;
+
+ ASSERT_OK(tempfn_random_child("/var/tmp", "sfdisk", &p));
+ int fd = ASSERT_OK_ERRNO(open(p, O_CREAT|O_EXCL|O_RDWR|O_CLOEXEC|O_NOFOLLOW, 0666));
+ ASSERT_OK_ERRNO(ftruncate(fd, 256*1024*1024));
+
+ cmd = ASSERT_NOT_NULL(strjoin("sfdisk ", p));
+ sfdisk = ASSERT_NOT_NULL(popen(cmd, "we"));
+
+ fputs("label: gpt\n"
+ "size=32M, type=C12A7328-F81F-11D2-BA4B-00A0C93EC93B\n", sfdisk);
+
+ ASSERT_EQ(pclose(sfdisk), 0);
+ sfdisk = NULL;
+
+ (void) unlink(p);
+
+ *ret_fd = fd;
+ return 0;
+}
+
+TEST(sector_size_regular_file) {
+ _cleanup_(loop_device_unrefp) LoopDevice *loop = NULL;
+ _cleanup_close_ int fd = -EBADF;
+
+ if (have_effective_cap(CAP_SYS_ADMIN) <= 0) {
+ log_tests_skipped("not running privileged");
+ return;
+ }
+
+ if (detect_container() != 0 || running_in_chroot() != 0) {
+ log_tests_skipped("Test not supported in a container/chroot");
+ return;
+ }
+
+ ASSERT_OK(make_test_image(&fd));
+
+ /* sector_size=0 on regular file: should default to 512 */
+ ASSERT_OK(loop_device_make(fd, O_RDWR, 0, UINT64_MAX, 0, 0, LOCK_EX, &loop));
+ ASSERT_EQ(loop->sector_size, 512u);
+ loop = loop_device_unref(loop);
+
+ /* sector_size=UINT32_MAX on regular file with GPT: should probe and find 512 */
+ ASSERT_OK(loop_device_make(fd, O_RDWR, 0, UINT64_MAX, UINT32_MAX, 0, LOCK_EX, &loop));
+ ASSERT_EQ(loop->sector_size, 512u);
+ loop = loop_device_unref(loop);
+
+ /* Explicit sector_size=512 on regular file */
+ ASSERT_OK(loop_device_make(fd, O_RDWR, 0, UINT64_MAX, 512, 0, LOCK_EX, &loop));
+ ASSERT_EQ(loop->sector_size, 512u);
+ loop = loop_device_unref(loop);
+
+ /* Explicit sector_size=4096 on regular file */
+ ASSERT_OK(loop_device_make(fd, O_RDWR, 0, UINT64_MAX, 4096, 0, LOCK_EX, &loop));
+ ASSERT_EQ(loop->sector_size, 4096u);
+ loop = loop_device_unref(loop);
+}
+
+TEST(sector_size_block_device) {
+ _cleanup_(loop_device_unrefp) LoopDevice *block_loop = NULL, *loop = NULL;
+ _cleanup_close_ int fd = -EBADF;
+
+ if (have_effective_cap(CAP_SYS_ADMIN) <= 0) {
+ log_tests_skipped("not running privileged");
+ return;
+ }
+
+ if (detect_container() != 0 || running_in_chroot() != 0) {
+ log_tests_skipped("Test not supported in a container/chroot, requires udev/uevent notifications");
+ return;
+ }
+
+ ASSERT_OK(make_test_image(&fd));
+
+ /* Create a loop device to use as our block device */
+ ASSERT_OK(loop_device_make(fd, O_RDWR, 0, UINT64_MAX, 0, LO_FLAGS_PARTSCAN, LOCK_EX, &block_loop));
+ ASSERT_FALSE(LOOP_DEVICE_IS_FOREIGN(block_loop));
+ ASSERT_OK(loop_device_flock(block_loop, LOCK_SH));
+
+ uint32_t device_ssz = block_loop->sector_size;
+
+ /* sector_size=0 on block device: should use device directly */
+ ASSERT_OK(loop_device_make(block_loop->fd, O_RDWR, 0, UINT64_MAX, 0, 0, LOCK_SH, &loop));
+ ASSERT_FALSE(loop->created);
+ ASSERT_EQ(loop->sector_size, device_ssz);
+ loop = loop_device_unref(loop);
+
+ /* sector_size=UINT32_MAX on block device: should probe, match device, use directly */
+ ASSERT_OK(loop_device_make(block_loop->fd, O_RDWR, 0, UINT64_MAX, UINT32_MAX, 0, LOCK_SH, &loop));
+ ASSERT_FALSE(loop->created);
+ ASSERT_EQ(loop->sector_size, device_ssz);
+ loop = loop_device_unref(loop);
+
+ /* sector_size=UINT32_MAX with LO_FLAGS_PARTSCAN: should probe, match, use directly */
+ ASSERT_OK(loop_device_make(block_loop->fd, O_RDWR, 0, UINT64_MAX, UINT32_MAX, LO_FLAGS_PARTSCAN, LOCK_SH, &loop));
+ ASSERT_FALSE(loop->created);
+ ASSERT_EQ(loop->sector_size, device_ssz);
+ loop = loop_device_unref(loop);
+
+ /* Explicit sector_size matching device: should use device directly */
+ ASSERT_OK(loop_device_make(block_loop->fd, O_RDWR, 0, UINT64_MAX, device_ssz, 0, LOCK_SH, &loop));
+ ASSERT_FALSE(loop->created);
+ ASSERT_EQ(loop->sector_size, device_ssz);
+ loop = loop_device_unref(loop);
+
+ /* Explicit sector_size=4096 (differs from device 512): should create a real loop device */
+ if (device_ssz != 4096) {
+ ASSERT_OK(loop_device_make(block_loop->fd, O_RDWR, 0, UINT64_MAX, 4096, 0, LOCK_SH, &loop));
+ ASSERT_TRUE(loop->created);
+ ASSERT_EQ(loop->sector_size, 4096u);
+ loop = loop_device_unref(loop);
+ }
+}
+
+TEST(sector_size_mismatch) {
+ _cleanup_(loop_device_unrefp) LoopDevice *block_loop = NULL, *loop = NULL;
+ _cleanup_close_ int fd = -EBADF;
+
+ if (have_effective_cap(CAP_SYS_ADMIN) <= 0) {
+ log_tests_skipped("not running privileged");
+ return;
+ }
+
+ if (detect_container() != 0 || running_in_chroot() != 0) {
+ log_tests_skipped("Test not supported in a container/chroot");
+ return;
+ }
+
+ /* Create an image with a GPT written at 512-byte sectors, then create a loop device with
+ * 4096-byte sectors on top. This simulates the CD-ROM scenario where the device has large
+ * blocks but the GPT uses 512-byte sectors. */
+ ASSERT_OK(make_test_image(&fd));
+
+ /* Create a loop device with 4096-byte sector size — GPT was written at 512 */
+ ASSERT_OK(loop_device_make(fd, O_RDWR, 0, UINT64_MAX, 4096, 0, LOCK_EX, &block_loop));
+ ASSERT_TRUE(block_loop->created);
+ ASSERT_EQ(block_loop->sector_size, 4096u);
+ ASSERT_OK(loop_device_flock(block_loop, LOCK_SH));
+
+ /* sector_size=0: no preference, should use block device directly despite GPT mismatch */
+ ASSERT_OK(loop_device_make(block_loop->fd, O_RDWR, 0, UINT64_MAX, 0, 0, LOCK_SH, &loop));
+ ASSERT_FALSE(loop->created);
+ ASSERT_EQ(loop->sector_size, 4096u);
+ loop = loop_device_unref(loop);
+
+ /* sector_size=UINT32_MAX: should probe GPT at 512, detect mismatch with device 4096,
+ * and create a new loop device with 512-byte sectors */
+ ASSERT_OK(loop_device_make(block_loop->fd, O_RDWR, 0, UINT64_MAX, UINT32_MAX, 0, LOCK_SH, &loop));
+ ASSERT_TRUE(loop->created);
+ ASSERT_EQ(loop->sector_size, 512u);
+ loop = loop_device_unref(loop);
+
+ /* Explicit sector_size=512: differs from device 4096, should create a new loop device */
+ ASSERT_OK(loop_device_make(block_loop->fd, O_RDWR, 0, UINT64_MAX, 512, 0, LOCK_SH, &loop));
+ ASSERT_TRUE(loop->created);
+ ASSERT_EQ(loop->sector_size, 512u);
+ loop = loop_device_unref(loop);
+
+ /* Explicit sector_size=4096: matches device, should use directly */
+ ASSERT_OK(loop_device_make(block_loop->fd, O_RDWR, 0, UINT64_MAX, 4096, 0, LOCK_SH, &loop));
+ ASSERT_FALSE(loop->created);
+ ASSERT_EQ(loop->sector_size, 4096u);
+ loop = loop_device_unref(loop);
+}
+
DEFINE_TEST_MAIN_WITH_INTRO(LOG_DEBUG, intro);