From: Daan De Meyer Date: Mon, 15 Jun 2026 09:06:42 +0000 (+0000) Subject: loop-util: shortcut block device fd when it carries no partition table X-Git-Tag: v261-rc4~15 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3978b076afbaca9b65543b629a27e807de2765df;p=thirdparty%2Fsystemd.git loop-util: shortcut block device fd when it carries no partition table 663f0bf5cb stopped reusing the original block device fd whenever partition scanning was requested (LO_FLAGS_PARTSCAN) but couldn't be enabled on the device, so that nested partition tables on devices the kernel won't scan (e.g. the pmOS/android case) get exposed via a real loop device. However that also forced a pointless loop device for any partition that carries a file system directly, e.g. a btrfs subvolume mounted via MountImages=. For multi-device btrfs this is fatal: the kernel rejects seeing the same member via both the original partition and the loop device, and the mount fails. A loop device is only ever needed here to expose a nested partition table. So only refuse the shortcut when the device actually carries one, probed via gpt_probe(), instead of whenever partition scanning is disabled. Devices carrying a file system directly (or nothing) take the shortcut as before. Add an integration test to cover the failure scenario of the original issue. Fixes: https://github.com/systemd/systemd/issues/42520 Replaces: https://github.com/systemd/systemd/pull/42576 Follow-up for 663f0bf5cb79ecaf6dd71441ecdc9dc401e7eae6 Co-Authored-By: Luca Boccassi Co-developed-by: Claude Opus 4.8 --- diff --git a/src/dissect/dissect.c b/src/dissect/dissect.c index d30749117ce..e70263a9fd4 100644 --- a/src/dissect/dissect.c +++ b/src/dissect/dissect.c @@ -2043,6 +2043,11 @@ static int run(int argc, char *argv[]) { open_flags = FLAGS_SET(arg_flags, DISSECT_IMAGE_DEVICE_READ_ONLY) ? O_RDONLY : -1; loop_flags = FLAGS_SET(arg_flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN; + /* --attach hands a loop device to the user, who may populate it with a (nested) + * partition table afterwards, so force a real loopback device with partition + * scanning even if the image is currently unpartitioned. */ + if (arg_action == ACTION_ATTACH) + loop_flags |= LOOP_DEVICE_MAY_POPULATE_PARTITION_TABLE; if (arg_in_memory) r = loop_device_make_by_path_memory(arg_image, open_flags, /* sector_size= */ UINT32_MAX, loop_flags, LOCK_SH, &d); diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c index b3423dd97d3..14744a4c8d1 100644 --- a/src/shared/dissect-image.c +++ b/src/shared/dissect-image.c @@ -320,6 +320,63 @@ not_found: #endif } +int probe_partition_table(int fd, char **ret_pttype) { + + /* Probes the whole device referenced by fd for a partition table and returns its blkid type (e.g. + * "gpt" or "dos") in *ret_pttype, or NULL if none is found. Returns a negative error on failure + * (including -EUCLEAN for ambiguous results). */ + +#if HAVE_BLKID + _cleanup_(blkid_free_probep) blkid_probe b = NULL; + const char *pttype = NULL; + int r; + + assert(fd >= 0); + assert(ret_pttype); + + r = dlopen_libblkid(LOG_DEBUG); + if (r < 0) + return r; + + b = sym_blkid_new_probe(); + if (!b) + return -ENOMEM; + + errno = 0; + r = sym_blkid_probe_set_device(b, fd, /* offset= */ 0, /* size= */ 0 /* i.e. everything */); + if (r != 0) + return errno_or_else(ENOMEM); + + sym_blkid_probe_enable_partitions(b, 1); + + errno = 0; + r = sym_blkid_do_safeprobe(b); + if (r == _BLKID_SAFEPROBE_NOT_FOUND) { + log_debug("No partition table detected."); + *ret_pttype = NULL; + return 0; + } + if (r == _BLKID_SAFEPROBE_AMBIGUOUS) + return log_debug_errno(SYNTHETIC_ERRNO(EUCLEAN), "Partition table results ambiguous."); + if (r == _BLKID_SAFEPROBE_ERROR) + return log_debug_errno(errno_or_else(EIO), "Failed to probe for partition table: %m"); + + assert(r == _BLKID_SAFEPROBE_FOUND); + + (void) sym_blkid_probe_lookup_value(b, "PTTYPE", &pttype, /* len= */ NULL); + if (!pttype) { + log_debug("No partition table detected."); + *ret_pttype = NULL; + return 0; + } + + log_debug("Probed partition table type '%s'.", pttype); + return strdup_to_full(ret_pttype, pttype); +#else + return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Compiled without blkid support, cannot probe for partition table."); +#endif +} + #if HAVE_BLKID static int image_policy_may_use( const ImagePolicy *policy, diff --git a/src/shared/dissect-image.h b/src/shared/dissect-image.h index 09d7db5952b..fe6af790bfb 100644 --- a/src/shared/dissect-image.h +++ b/src/shared/dissect-image.h @@ -160,6 +160,8 @@ static inline int probe_filesystem(const char *path, char **ret_fstype) { return probe_filesystem_full(-1, path, 0, UINT64_MAX, /* bool restrict_fstypes= */ false, ret_fstype); } +int probe_partition_table(int fd, char **ret_pttype); + int dissect_log_error(int log_level, int r, const char *name, const VeritySettings *verity); int dissect_image_file(const char *path, const VeritySettings *verity, const MountOptions *mount_options, const ImagePolicy *image_policy, const ImageFilter *filter, DissectImageFlags flags, DissectedImage **ret); int dissect_image_file_and_warn(const char *path, const VeritySettings *verity, const MountOptions *mount_options, const ImagePolicy *image_policy, const ImageFilter *filter, DissectImageFlags flags, DissectedImage **ret); diff --git a/src/shared/loop-util.c b/src/shared/loop-util.c index 1debc4267f4..4e2b2f70c11 100644 --- a/src/shared/loop-util.c +++ b/src/shared/loop-util.c @@ -31,6 +31,7 @@ #include "stat-util.h" #include "stdio-util.h" #include "string-util.h" +#include "strv.h" #include "time-util.h" static void cleanup_clear_loop_close(int *fd) { @@ -419,31 +420,48 @@ static int fd_set_max_discard(int fd, uint64_t max_discard) { return write_string_filef(sysfs_path, WRITE_STRING_FILE_DISABLE_BUFFER, "%" PRIu64, max_discard); } -static int probe_sector_size_harder(int fd, uint32_t *ret) { - _cleanup_close_ int non_direct_io_fd = -EBADF; - int probe_fd, f_flags; +static int probe_fd_open(int fd, int f_flags, int *ret_to_close) { + int r; assert(fd >= 0); - assert(ret); + assert(ret_to_close); - /* Wraps probe_sector_size() but handles O_DIRECT: if the fd is opened with O_DIRECT there are - * strict alignment requirements for reads, so we temporarily reopen it without O_DIRECT for the - * probing logic. */ + /* blkid- and pread-based probing has no special handling for the strict alignment requirements of + * O_DIRECT, so if fd was opened with O_DIRECT we reopen it without for the probing logic. Returns the + * fd to use for probing; when a new fd had to be opened it is also stored in *ret_to_close for the + * caller to close, otherwise *ret_to_close is set to -EBADF and the original fd is returned. */ - f_flags = fcntl(fd, F_GETFL); - if (f_flags < 0) - return -errno; + if (!FLAGS_SET(f_flags, O_DIRECT)) { + *ret_to_close = -EBADF; + return fd; + } - if (FLAGS_SET(f_flags, O_DIRECT)) { - non_direct_io_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_NONBLOCK); - if (non_direct_io_fd < 0) - return non_direct_io_fd; + r = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_NONBLOCK); + if (r < 0) + return r; - probe_fd = non_direct_io_fd; - } else - probe_fd = fd; + return (*ret_to_close = r); +} + +static int fd_has_partition_table(int fd) { + _cleanup_free_ char *pttype = NULL; + int r; - return probe_sector_size(probe_fd, ret); + assert(fd >= 0); + + /* Checks whether the device carries a partition table the image dissection logic acts upon. We use + * this to decide whether wrapping the device in a loopback device with partition scanning enabled + * actually serves a purpose: if there are no partitions to expose we can hand back the original fd + * instead. Expects an fd suitable for probing, i.e. opened without O_DIRECT (see probe_fd_open()). */ + + r = probe_partition_table(fd, &pttype); /* already logs on error */ + if (r < 0) + return r; + + /* Only GPT and MBR ("dos") tables are understood by the dissection logic and require partition + * scanning to expose their partitions; anything else it treats as unpartitioned, so a loopback + * device wouldn't help (and STRPTR_IN_SET() handles a NULL pttype, i.e. no table, as false). */ + return STRPTR_IN_SET(pttype, "gpt", "dos"); } static int loop_device_can_shortcut( @@ -458,9 +476,10 @@ static int loop_device_can_shortcut( /* Returns whether we can hand back the original block device fd instead of allocating a real * loopback device for it: it must cover the whole device, the requested sector size must match the - * device's sector size, and if partscan was requested it must already be enabled on the device - * (otherwise e.g. partition block devices or loop devices created without LO_FLAGS_PARTSCAN would - * be reused even though they cannot expose nested partitions). */ + * device's sector size, and if partscan was requested the device must either already have it enabled + * or — unless the caller declared it may populate the image via LOOP_DEVICE_MAY_POPULATE_PARTITION_TABLE + * — carry no partition table at all (in which case there are no partitions to scan and the loopback + * would serve no purpose). */ assert(fd >= 0); @@ -475,8 +494,26 @@ static int loop_device_can_shortcut( r = blockdev_partscan_enabled_fd(fd); if (r < 0) return r; - if (r == 0) - return false; + if (r == 0) { + /* Partition scanning was requested but cannot be enabled on this device (e.g. it's a + * partition itself). If the caller might write a (nested) partition table into the + * device, it must get a real loopback device so scanning works once the table is + * there. */ + if (FLAGS_SET(loop_flags, LOOP_DEVICE_MAY_POPULATE_PARTITION_TABLE)) + return false; + + /* Otherwise we shortcut when the device carries no partition table: there are then no + * partitions to scan, and routing e.g. a multi-device btrfs member through a loop + * device breaks it, see https://github.com/systemd/systemd/issues/42520. + * + * If we can't probe the device, fall back to allocating a real loop device rather than + * failing the whole operation: we can't prove there's no partition table, and the + * image is potentially untrusted (a crafted or corrupt partition table can make the + * probe fail, e.g. with -EUCLEAN), so failing here would be a fail-unsafe DoS. */ + r = fd_has_partition_table(fd); + if (r != 0) + return false; + } } return true; @@ -494,10 +531,10 @@ static int loop_device_make_internal( LoopDevice **ret) { _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; - _cleanup_close_ int reopened_fd = -EBADF, control = -EBADF; + _cleanup_close_ int reopened_fd = -EBADF, control = -EBADF, probe_close_fd = -EBADF; _cleanup_free_ char *backing_file = NULL; struct loop_config config; - int r, f_flags; + int r, f_flags, probe_fd = -EBADF; struct stat st; assert(fd >= 0); @@ -528,7 +565,13 @@ static int loop_device_make_internal( * by looking for the GPT partition header at various offsets. This of course only works * if the image already has a disk label. */ - r = probe_sector_size_harder(fd, §or_size); + if (probe_fd < 0) { + probe_fd = probe_fd_open(fd, f_flags, &probe_close_fd); + if (probe_fd < 0) + return probe_fd; + } + + r = probe_sector_size(probe_fd, §or_size); if (r < 0) return r; if (r == 0) @@ -547,7 +590,13 @@ static int loop_device_make_internal( if (sector_size == 0) sector_size = device_ssz; - r = loop_device_can_shortcut(fd, offset, size, sector_size, device_ssz, loop_flags); + if (probe_fd < 0) { + probe_fd = probe_fd_open(fd, f_flags, &probe_close_fd); + if (probe_fd < 0) + return probe_fd; + } + + r = loop_device_can_shortcut(probe_fd, offset, size, sector_size, device_ssz, loop_flags); if (r < 0) return r; if (r > 0) @@ -613,8 +662,9 @@ static int loop_device_make_internal( .fd = fd, .block_size = sector_size, .info = { - /* Use the specified flags, but configure the read-only flag from the open flags, and force autoclear */ - .lo_flags = ((loop_flags & ~(LO_FLAGS_READ_ONLY|LO_FLAGS_PARTSCAN)) | + /* Use the specified flags, but strip our systemd-internal flags and the read-only and + * partscan flags (the latter handled separately below/above), and force autoclear */ + .lo_flags = ((loop_flags & ~(LOOP_DEVICE_MAY_POPULATE_PARTITION_TABLE|LO_FLAGS_READ_ONLY|LO_FLAGS_PARTSCAN)) | ((open_flags & O_ACCMODE_STRICT) == O_RDONLY ? LO_FLAGS_READ_ONLY : 0) | LO_FLAGS_AUTOCLEAR), .lo_offset = offset, diff --git a/src/shared/loop-util.h b/src/shared/loop-util.h index ed02a69d878..ae8374d17bd 100644 --- a/src/shared/loop-util.h +++ b/src/shared/loop-util.h @@ -26,6 +26,18 @@ typedef struct LoopDevice { /* Returns true if LoopDevice object is not actually a loopback device but some other block device we just wrap */ #define LOOP_DEVICE_IS_FOREIGN(d) ((d)->nr < 0) +/* systemd-internal flags OR'd into the loop_flags argument of loop_device_make() and friends, in addition to + * the kernel's LO_FLAGS_*. These live in high bits to stay clear of the kernel values and are masked out + * before the flags reach the kernel. + * + * LOOP_DEVICE_MAY_POPULATE_PARTITION_TABLE: by default, when LO_FLAGS_PARTSCAN is requested but cannot be + * enabled on the device and the device carries no partition table, we hand back the original fd instead of + * allocating a loopback device — there's nothing to scan, and routing e.g. a multi-device btrfs member + * through a loopback breaks it (https://github.com/systemd/systemd/issues/42520). Callers that might write a + * (nested) partition table into the device and rely on partition scanning to pick it up afterwards must set + * this flag to force a real loopback device even when the device is currently unpartitioned. */ +#define LOOP_DEVICE_MAY_POPULATE_PARTITION_TABLE (UINT32_C(1) << 16) + int loop_device_make(int fd, int open_flags, uint64_t offset, uint64_t size, uint32_t sector_size, uint32_t loop_flags, int lock_op, LoopDevice **ret); int loop_device_make_by_path_at(int dir_fd, const char *path, int open_flags, uint32_t sector_size, uint32_t loop_flags, int lock_op, LoopDevice **ret); static inline int loop_device_make_by_path(const char *path, int open_flags, uint32_t sector_size, uint32_t loop_flags, int lock_op, LoopDevice **ret) { diff --git a/src/test/test-loop-util.c b/src/test/test-loop-util.c index fca125564a1..00fe1a33f61 100644 --- a/src/test/test-loop-util.c +++ b/src/test/test-loop-util.c @@ -571,4 +571,139 @@ TEST(partscan_required) { loop = loop_device_unref(loop); } +TEST(partscan_not_needed_without_partition_table) { +#if HAVE_BLKID + _cleanup_(loop_device_unrefp) LoopDevice *block_loop = NULL, *loop = NULL; + _cleanup_free_ char *p = NULL; + _cleanup_close_ int fd = -EBADF; + + if (have_effective_cap(CAP_SYS_ADMIN) <= 0) { + log_tests_skipped("not running privileged"); + return; + } + + if (detect_container() != 0 || running_in_chroot() != 0) { + log_tests_skipped("Test not supported in a container/chroot, requires udev/uevent notifications"); + return; + } + + /* The regression in 663f0bf5cb allocated a loop device whenever partition scanning was requested but + * couldn't be enabled, regardless of the device's contents. That's harmless for most file systems but + * fatal for multi-device btrfs, which rejects seeing the same member via both the original device and + * the loop device (https://github.com/systemd/systemd/issues/42520). A loop device is only ever needed + * to expose a nested partition table though, so any device without one — here simply an empty device — + * must take the shortcut. */ + ASSERT_OK(tempfn_random_child("/var/tmp", "loop-util", &p)); + fd = ASSERT_OK_ERRNO(open(p, O_CREAT|O_EXCL|O_RDWR|O_CLOEXEC|O_NOFOLLOW, 0666)); + ASSERT_OK_ERRNO(ftruncate(fd, 256*1024*1024)); + (void) unlink(p); + + /* Set up a backing loop device without LO_FLAGS_PARTSCAN. */ + ASSERT_OK(loop_device_make(fd, O_RDWR, /* offset= */ 0, UINT64_MAX, /* sector_size= */ 0, /* loop_flags= */ 0, LOCK_EX, &block_loop)); + ASSERT_TRUE(block_loop->created); + ASSERT_OK(loop_device_flock(block_loop, LOCK_SH)); + + /* By default LO_FLAGS_PARTSCAN is requested but there's no partition table to scan, so the shortcut + * must be taken (reuse the device) rather than allocating a new loop device. */ + ASSERT_OK(loop_device_make(block_loop->fd, O_RDWR, /* offset= */ 0, UINT64_MAX, /* sector_size= */ 0, LO_FLAGS_PARTSCAN, LOCK_SH, &loop)); + ASSERT_FALSE(loop->created); + loop = loop_device_unref(loop); + + /* But a caller that declares it may populate the image with a partition table must get a real loop + * device even though the device is currently unpartitioned. */ + ASSERT_OK(loop_device_make(block_loop->fd, O_RDWR, /* offset= */ 0, UINT64_MAX, /* sector_size= */ 0, LO_FLAGS_PARTSCAN|LOOP_DEVICE_MAY_POPULATE_PARTITION_TABLE, LOCK_SH, &loop)); + ASSERT_TRUE(loop->created); +#else + log_tests_skipped("blkid not available"); +#endif +} + +static void test_nested_partition_table_one(const char *nested_table) { +#if HAVE_BLKID + _cleanup_(dissected_image_unrefp) DissectedImage *dissected = NULL; + _cleanup_(loop_device_unrefp) LoopDevice *outer_loop = NULL, *loop = NULL; + _cleanup_pclose_ FILE *sfdisk = NULL; + _cleanup_close_ int fd = -EBADF, part_fd = -EBADF; + _cleanup_free_ char *p = NULL, *cmd = NULL; + const char *node; + + assert(nested_table); + + if (have_effective_cap(CAP_SYS_ADMIN) <= 0) { + log_tests_skipped("not running privileged"); + return; + } + + if (detect_container() != 0 || running_in_chroot() != 0) { + log_tests_skipped("Test not supported in a container/chroot, requires udev/uevent notifications"); + return; + } + + /* Build an image with a single root partition spanning the whole disk. */ + ASSERT_OK(tempfn_random_child("/var/tmp", "sfdisk", &p)); + fd = ASSERT_OK_ERRNO(open(p, O_CREAT|O_EXCL|O_RDWR|O_CLOEXEC|O_NOFOLLOW, 0666)); + ASSERT_OK_ERRNO(ftruncate(fd, 256*1024*1024)); + + cmd = ASSERT_NOT_NULL(strjoin("sfdisk ", p)); + sfdisk = ASSERT_NOT_NULL(popen(cmd, "we")); + fputs("label: gpt\n" + "type=", sfdisk); +#ifdef SD_GPT_ROOT_NATIVE + fprintf(sfdisk, SD_ID128_UUID_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(SD_GPT_ROOT_NATIVE)); +#else + fprintf(sfdisk, SD_ID128_UUID_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(SD_GPT_ROOT_X86_64)); +#endif + ASSERT_EQ(pclose(sfdisk), 0); + sfdisk = NULL; + (void) unlink(p); + + /* Wrap it in a loopback device with partition scanning and let the dissection logic materialize the + * partition block device via BLKPG. That's synchronous, so unlike waiting for udev to create the + * node this is not racy. */ + ASSERT_OK(loop_device_make(fd, O_RDWR, /* offset= */ 0, UINT64_MAX, /* sector_size= */ 0, LO_FLAGS_PARTSCAN, LOCK_EX, &outer_loop)); + ASSERT_OK(dissect_loop_device( + outer_loop, + /* verity= */ NULL, + /* mount_options= */ NULL, + /* image_policy= */ NULL, + /* image_filter= */ NULL, + DISSECT_IMAGE_ADD_PARTITION_DEVICES|DISSECT_IMAGE_PIN_PARTITION_DEVICES, + &dissected)); + ASSERT_TRUE(dissected->partitions[PARTITION_ROOT].found); + node = ASSERT_NOT_NULL(dissected->partitions[PARTITION_ROOT].node); + + /* Carve a nested partition table into that partition, mimicking the pmOS/android case (663f0bf5cb) + * where a partition carries a partition table the kernel won't scan, as partition devices don't + * support partition scanning. We write and read it back through the same partition node, so the + * buffer cache stays coherent. */ + cmd = mfree(cmd); + cmd = ASSERT_NOT_NULL(strjoin("sfdisk --no-reread --no-tell-kernel ", node)); + sfdisk = ASSERT_NOT_NULL(popen(cmd, "we")); + fputs(nested_table, sfdisk); + ASSERT_EQ(pclose(sfdisk), 0); + sfdisk = NULL; + + /* The partition has partition scanning disabled but now carries a partition table, so the shortcut + * must be refused and a real loop device with partition scanning allocated (even without + * LOOP_DEVICE_MAY_POPULATE_PARTITION_TABLE, since there genuinely is a table to scan). */ + part_fd = ASSERT_OK_ERRNO(open(node, O_RDWR|O_CLOEXEC|O_NOCTTY)); + ASSERT_OK(loop_device_make(part_fd, O_RDWR, /* offset= */ 0, UINT64_MAX, /* sector_size= */ 0, LO_FLAGS_PARTSCAN, LOCK_SH, &loop)); + ASSERT_TRUE(loop->created); +#else + log_tests_skipped("blkid not available"); +#endif +} + +TEST(partscan_required_for_nested_gpt) { + test_nested_partition_table_one("label: gpt\n" + "size=1MiB, type=0FC63DAF-8483-4772-8E79-3D69D8477DE4\n"); +} + +TEST(partscan_required_for_nested_mbr) { + /* Make sure an MBR ("dos") table — which the dissection logic acts on just like GPT — likewise + * prevents the shortcut, i.e. partition-table detection isn't limited to GPT. */ + test_nested_partition_table_one("label: dos\n" + "size=1MiB, type=83\n"); +} + DEFINE_TEST_MAIN_WITH_INTRO(LOG_DEBUG, intro); diff --git a/test/units/TEST-50-DISSECT.dissect.sh b/test/units/TEST-50-DISSECT.dissect.sh index 136444532d3..2fe6bf19d95 100755 --- a/test/units/TEST-50-DISSECT.dissect.sh +++ b/test/units/TEST-50-DISSECT.dissect.sh @@ -1250,3 +1250,66 @@ rm -rf "$defs" "$imgs" (! systemd-run -P -p ExtensionImages="/this/should/definitely/not/exist.img" false) (! systemd-run -P -p RootImage="/this/should/definitely/not/exist.img" false) (! systemd-run -P -p ExtensionDirectories="/foo/bar /foo/baz" false) + +# Ensure a multi-device btrfs doesn't fail to mount due to loopdev +# https://github.com/systemd/systemd/issues/42520: +if [[ -f "${BTRFS_MEMBER1:-}" ]]; then + img="" mnt="" loop="" + + # This block runs under 'set -e'; register cleanup up front so a failure in between can't leak the + # attached loop device or the image and perturb later device enumeration / udevadm settle. + # shellcheck disable=SC2317 + cleanup_btrfs_mountimages() { + if [[ -n "$mnt" ]]; then + umount -R "$mnt" || true + fi + if [[ -n "$loop" ]]; then + # Drop the members from the kernel's global, boot-wide btrfs device cache before detaching; + # otherwise the cached entries dangle at recycled loop minors and trip multi-device detection in + # later tests reusing the same /dev/loopN. + btrfs device scan --forget "${loop}p1" "${loop}p2" || true + losetup -d "$loop" || true + # Pair the detach with a settle, like every other losetup -d in this test, so teardown isn't + # still in flight when control returns to the broader TEST-50 run. + udevadm settle --timeout=60 || true + fi + rm -f "$img" + # Only remove the mountpoint once it is confirmed unmounted, so a failed unmount above doesn't make + # rm -rf recurse through the mountpoint into the still-mounted filesystem. + if [[ -n "$mnt" ]] && ! mountpoint -q "$mnt"; then + rm -rf "$mnt" + fi + } + trap cleanup_btrfs_mountimages EXIT + + img="$(mktemp /var/tmp/test-50-mountimages-btrfs.img.XXXXXXXXXX)" + mnt="$(mktemp -d "$IMAGE_DIR/test-50-mountimages-btrfs.mnt.XXXXXXXXXX")" + truncate -s 600M "$img" + echo -e 'label: gpt\nsize=280MiB, type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, name=data1\ntype=0FC63DAF-8483-4772-8E79-3D69D8477DE4, name=data2' | sfdisk "$img" + loop="$(losetup --show -P -f "$img")" + udevadm wait --timeout=60 --settle --initialized=no "${loop}p1" "${loop}p2" + udevadm lock --timeout=60 --device="$loop" dd if="$BTRFS_MEMBER1" of="${loop}p1" bs=4M + udevadm lock --timeout=60 --device="$loop" dd if="$BTRFS_MEMBER2" of="${loop}p2" bs=4M + udevadm settle --timeout=60 + btrfs device scan "${loop}p1" "${loop}p2" + + mount -t btrfs "${loop}p1" "$mnt" + btrfs subvolume create "$mnt/@demo" + echo "MARKER=1" >"$mnt/@demo/os-release" + btrfs subvolume create "$mnt/@" + btrfs subvolume set-default "$mnt/@" + umount "$mnt" + mount -t btrfs "${loop}p1" "$mnt" + + systemd-run -P \ + -p MountImages="${loop}p1:/run/img-btrfs:subvol=@demo" \ + cat /run/img-btrfs/os-release | grep -F "MARKER=1" >/dev/null + # Double check that there's no loopdev + src="$(systemd-run -P \ + -p MountImages="${loop}p1:/run/img-btrfs:subvol=@demo" \ + findmnt -n -o SOURCE /run/img-btrfs)" + assert_eq "${src%%\[*}" "${loop}p1" + + trap - EXIT + cleanup_btrfs_mountimages +fi diff --git a/test/units/TEST-50-DISSECT.sh b/test/units/TEST-50-DISSECT.sh index 973f1848378..07bed11adcb 100755 --- a/test/units/TEST-50-DISSECT.sh +++ b/test/units/TEST-50-DISSECT.sh @@ -14,6 +14,8 @@ set -o pipefail at_exit() { set +e + rm -f "${BTRFS_MEMBER1:-}" "${BTRFS_MEMBER2:-}" + if [[ -z "${IMAGE_DIR:-}" ]]; then return fi @@ -266,6 +268,22 @@ udevadm lock --timeout=60 --device="${loop}p3" dd if="$MINIMAL_IMAGE.verity-sig" losetup -d "$loop" udevadm settle --timeout=60 +# Pre-build the multi-device (raid1) btrfs members as mkfs.btrfs barfs later when there's a bunch of mounts +# for some reason +if command -v mkfs.btrfs >/dev/null; then + BTRFS_MEMBER1="$(mktemp /var/tmp/test-50-btrfs-member1.XXXXXXXXXX)" + BTRFS_MEMBER2="$(mktemp /var/tmp/test-50-btrfs-member2.XXXXXXXXXX)" + export BTRFS_MEMBER1 + export BTRFS_MEMBER2 + # mkfs.btrfs is known to be flaky in this environment (see above), so under 'set -e' tolerate a setup + # failure by degrading to skipping the btrfs MountImages= subtest (the consumer guards on -f) rather + # than aborting the whole TEST-50 run and taking down every unrelated subtest with it. + if ! { truncate -s 256M "$BTRFS_MEMBER1" "$BTRFS_MEMBER2" && mkfs.btrfs -draid1 -mraid1 "$BTRFS_MEMBER1" "$BTRFS_MEMBER2"; }; then + rm -f "$BTRFS_MEMBER1" "$BTRFS_MEMBER2" + BTRFS_MEMBER1='' BTRFS_MEMBER2='' + fi +fi + : "Run subtests" run_subtests