#include "alloc-util.h"
#include "blockdev-util.h"
+#include "data-fd-util.h"
#include "device-util.h"
#include "devnum-util.h"
#include "env-util.h"
}
static int open_lock_fd(int primary_fd, int operation) {
- _cleanup_close_ int lock_fd = -1;
+ _cleanup_close_ int lock_fd = -EBADF;
assert(primary_fd >= 0);
assert(IN_SET(operation & ~LOCK_NB, LOCK_SH, LOCK_EX));
- lock_fd = fd_reopen(primary_fd, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ lock_fd = fd_reopen(primary_fd, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
if (lock_fd < 0)
return lock_fd;
assert(fd >= 0);
assert(c);
+ if (c->block_size != 0) {
+ int z;
+
+ if (ioctl(fd, BLKSSZGET, &z) < 0)
+ return -errno;
+
+ assert(z >= 0);
+ if ((uint32_t) z != c->block_size)
+ log_debug("LOOP_CONFIGURE didn't honour requested block size %u, got %i instead. Ignoring.", c->block_size, z);
+ }
+
if (c->info.lo_sizelimit != 0) {
/* Kernel 5.8 vanilla doesn't properly propagate the size limit into the
* block device. If it's used, let's immediately check if it had the desired
return -errno;
if (z != c->info.lo_sizelimit) {
- log_debug("LOOP_CONFIGURE is broken, doesn't honour .lo_sizelimit. Falling back to LOOP_SET_STATUS64.");
+ log_debug("LOOP_CONFIGURE is broken, doesn't honour .info.lo_sizelimit. Falling back to LOOP_SET_STATUS64.");
broken = true;
}
}
info_copy.lo_flags &= LOOP_SET_STATUS_SETTABLE_FLAGS;
/* Since kernel commit 5db470e229e22b7eda6e23b5566e532c96fb5bc3 (kernel v5.0) the LOOP_SET_STATUS64
- * ioctl can return EAGAIN in case we change the lo_offset field, if someone else is accessing the
+ * ioctl can return EAGAIN in case we change the info.lo_offset field, if someone else is accessing the
* block device while we try to reconfigure it. This is a pretty common case, since udev might
* instantly start probing the device as soon as we attach an fd to it. Hence handle it in two ways:
* first, let's take the BSD lock to ensure that udev will not step in between the point in
}
static int loop_configure(
- sd_device *dev,
- int fd,
int nr,
+ int open_flags,
+ int lock_op,
const struct loop_config *c,
- uint64_t *ret_seqnum_not_before,
- usec_t *ret_timestamp_not_before,
- int *ret_lock_fd) {
+ LoopDevice **ret) {
static bool loop_configure_broken = false;
- _cleanup_close_ int lock_fd = -1;
- uint64_t seqnum;
- usec_t timestamp;
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ _cleanup_(cleanup_clear_loop_close) int loop_with_fd = -EBADF; /* This must be declared before lock_fd. */
+ _cleanup_close_ int fd = -EBADF, lock_fd = -EBADF;
+ _cleanup_free_ char *node = NULL;
+ uint64_t diskseq = 0, seqnum = UINT64_MAX;
+ usec_t timestamp = USEC_INFINITY;
+ dev_t devno;
int r;
- assert(fd >= 0);
assert(nr >= 0);
assert(c);
+ assert(ret);
+
+ if (asprintf(&node, "/dev/loop%i", nr) < 0)
+ return -ENOMEM;
+
+ r = sd_device_new_from_devname(&dev, node);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_devnum(dev, &devno);
+ if (r < 0)
+ return r;
+
+ fd = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
+ if (fd < 0)
+ return fd;
/* Let's lock the device before we do anything. We take the BSD lock on a second, separately opened
* fd for the device. udev after all watches for close() events (specifically IN_CLOSE_WRITE) on
if (lock_fd < 0)
return lock_fd;
+ /* Let's see if backing file is really unattached. Someone may already attach a backing file without
+ * taking BSD lock. */
+ r = loop_is_bound(fd);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return -EBUSY;
+
/* Let's see if the device is really detached, i.e. currently has no associated partition block
* devices. On various kernels (such as 5.8) it is possible to have a loopback block device that
* superficially is detached but still has partition block devices associated for it. Let's then
* manually remove the partitions via BLKPG, and tell the caller we did that via EUCLEAN, so they try
* again. */
- r = block_device_has_partitions(dev);
+ r = block_device_remove_all_partitions(dev, fd);
if (r < 0)
return r;
- if (r > 0) {
- r = loop_is_bound(fd);
- if (r < 0)
- return r;
- if (r > 0)
- return -EBUSY;
-
- /* Unbound but has children? Remove all partitions, and report this to the caller, to try
- * again, and count this as an attempt. */
-
- r = block_device_remove_all_partitions(dev, fd);
- if (r < 0)
- return r;
-
+ if (r > 0)
+ /* Removed all partitions. Let's report this to the caller, to try again, and count this as
+ * an attempt. */
return -EUCLEAN;
- }
if (!loop_configure_broken) {
/* Acquire uevent seqnum immediately before attaching the loopback device. This allows
loop_configure_broken = true;
} else {
- r = loop_configure_verify(fd, c);
+ loop_with_fd = TAKE_FD(fd);
+
+ r = loop_configure_verify(loop_with_fd, c);
if (r < 0)
- goto fail;
+ return r;
if (r == 0) {
/* LOOP_CONFIGURE doesn't work. Remember that. */
loop_configure_broken = true;
* good chance we cannot actually reuse the loopback device right-away. Hence
* let's assume it's busy, avoid the trouble and let the calling loop call us
* again with a new, likely unused device. */
- r = -EBUSY;
- goto fail;
+ return -EBUSY;
}
}
}
if (ioctl(fd, LOOP_SET_FD, c->fd) < 0)
return -errno;
- r = loop_configure_fallback(fd, c);
+ loop_with_fd = TAKE_FD(fd);
+
+ r = loop_configure_fallback(loop_with_fd, c);
if (r < 0)
- goto fail;
+ return r;
}
- if (ret_seqnum_not_before)
- *ret_seqnum_not_before = seqnum;
- if (ret_timestamp_not_before)
- *ret_timestamp_not_before = timestamp;
- if (ret_lock_fd)
- *ret_lock_fd = TAKE_FD(lock_fd);
+ r = fd_get_diskseq(loop_with_fd, &diskseq);
+ if (r < 0 && r != -EOPNOTSUPP)
+ return r;
- return 0;
+ switch (lock_op & ~LOCK_NB) {
+ case LOCK_EX: /* Already in effect */
+ break;
+ case LOCK_SH: /* Downgrade */
+ if (flock(lock_fd, lock_op) < 0)
+ return -errno;
+ break;
+ case LOCK_UN: /* Release */
+ lock_fd = safe_close(lock_fd);
+ break;
+ default:
+ assert_not_reached();
+ }
-fail:
- /* Close the lock fd explicitly before clearing the loopback block device, since an additional open
- * fd would block the clearing to succeed */
- lock_fd = safe_close(lock_fd);
- (void) ioctl(fd, LOOP_CLR_FD);
- return r;
+ LoopDevice *d = new(LoopDevice, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (LoopDevice) {
+ .n_ref = 1,
+ .fd = TAKE_FD(loop_with_fd),
+ .lock_fd = TAKE_FD(lock_fd),
+ .node = TAKE_PTR(node),
+ .nr = nr,
+ .devno = devno,
+ .dev = TAKE_PTR(dev),
+ .diskseq = diskseq,
+ .uevent_seqnum_not_before = seqnum,
+ .timestamp_not_before = timestamp,
+ };
+
+ *ret = TAKE_PTR(d);
+ return 0;
}
static int loop_device_make_internal(
int open_flags,
uint64_t offset,
uint64_t size,
+ uint32_t block_size,
uint32_t loop_flags,
int lock_op,
LoopDevice **ret) {
- _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
- _cleanup_close_ int direct_io_fd = -1;
- _cleanup_free_ char *node = NULL, *backing_file = NULL;
+ _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
+ _cleanup_close_ int direct_io_fd = -EBADF, control = -EBADF;
+ _cleanup_free_ char *backing_file = NULL;
struct loop_config config;
- LoopDevice *d;
- uint64_t seqnum = UINT64_MAX;
- usec_t timestamp = USEC_INFINITY;
- int nr, r, f_flags;
+ int r, f_flags;
struct stat st;
assert(fd >= 0);
/* If this is already a block device and we are supposed to cover the whole of it
* then store an fd to the original open device node — and do not actually create an
* unnecessary loopback device for it. */
- return loop_device_open_full(NULL, fd, open_flags, lock_op, ret);
+ return loop_device_open_from_fd(fd, open_flags, lock_op, ret);
} else {
r = stat_verify_regular(&st);
if (r < 0)
fd = direct_io_fd; /* From now on, operate on our new O_DIRECT fd */
}
- /* On failure, lock_fd must be closed at first, otherwise LOOP_CLR_FD will fail. */
- _cleanup_close_ int control = -1;
- _cleanup_(cleanup_clear_loop_close) int loop_with_fd = -1;
- _cleanup_close_ int lock_fd = -1;
-
control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
if (control < 0)
return -errno;
config = (struct loop_config) {
.fd = fd,
+ .block_size = block_size,
.info = {
/* Use the specified flags, but configure the read-only flag from the open flags, and force autoclear */
.lo_flags = (loop_flags & ~LO_FLAGS_READ_ONLY) | ((open_flags & O_ACCMODE) == O_RDONLY ? LO_FLAGS_READ_ONLY : 0) | LO_FLAGS_AUTOCLEAR,
/* Loop around LOOP_CTL_GET_FREE, since at the moment we attempt to open the returned device it might
* be gone already, taken by somebody else racing against us. */
for (unsigned n_attempts = 0;;) {
- _cleanup_close_ int loop = -1;
+ int nr;
/* Let's take a lock on the control device first. On a busy system, where many programs
* attempt to allocate a loopback device at the same time, we might otherwise keep looping
if (nr < 0)
return -errno;
- node = mfree(node);
- if (asprintf(&node, "/dev/loop%i", nr) < 0)
- return -ENOMEM;
-
- dev = sd_device_unref(dev);
- r = sd_device_new_from_devname(&dev, node);
- if (r < 0)
- return r;
+ r = loop_configure(nr, open_flags, lock_op, &config, &d);
+ if (r >= 0)
+ break;
- loop = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
- if (loop < 0) {
- /* Somebody might've gotten the same number from the kernel, used the device,
- * and called LOOP_CTL_REMOVE on it. Let's retry with a new number. */
- if (!ERRNO_IS_DEVICE_ABSENT(errno))
- return -errno;
- } else {
- r = loop_configure(dev, loop, nr, &config, &seqnum, ×tamp, &lock_fd);
- if (r >= 0) {
- loop_with_fd = TAKE_FD(loop);
- break;
- }
- if (!IN_SET(r, -EBUSY, -EUCLEAN)) /* Busy, or some left-over partition devices that
- * were cleaned up. */
- return r;
- }
+ /* -ENODEV or friends: Somebody might've gotten the same number from the kernel, used the
+ * device, and called LOOP_CTL_REMOVE on it. Let's retry with a new number.
+ * -EBUSY: a file descriptor is already bound to the loopback block device.
+ * -EUCLEAN: some left-over partition devices that were cleaned up. */
+ if (!ERRNO_IS_DEVICE_ABSENT(errno) && !IN_SET(r, -EBUSY, -EUCLEAN))
+ return -errno;
/* OK, this didn't work, let's try again a bit later, but first release the lock on the
* control device */
if (++n_attempts >= 64) /* Give up eventually */
return -EBUSY;
- /* Now close the loop device explicitly. This will release any lock acquired by
- * attach_empty_file() or similar, while we sleep below. */
- loop = safe_close(loop);
-
/* Wait some random time, to make collision less likely. Let's pick a random time in the
* range 0ms…250ms, linearly scaled by the number of failed attempts. */
(void) usleep(random_u64_range(UINT64_C(10) * USEC_PER_MSEC +
UINT64_C(240) * USEC_PER_MSEC * n_attempts/64));
}
- if (fstat(loop_with_fd, &st) < 0)
- return -errno;
- assert(S_ISBLK(st.st_mode));
-
- uint64_t diskseq = 0;
- r = fd_get_diskseq(loop_with_fd, &diskseq);
- if (r < 0 && r != -EOPNOTSUPP)
- return r;
-
- switch (lock_op & ~LOCK_NB) {
- case LOCK_EX: /* Already in effect */
- break;
- case LOCK_SH: /* Downgrade */
- if (flock(lock_fd, lock_op) < 0)
- return -errno;
- break;
- case LOCK_UN: /* Release */
- lock_fd = safe_close(lock_fd);
- break;
- default:
- assert_not_reached();
- }
-
- d = new(LoopDevice, 1);
- if (!d)
- return -ENOMEM;
- *d = (LoopDevice) {
- .fd = TAKE_FD(loop_with_fd),
- .lock_fd = TAKE_FD(lock_fd),
- .node = TAKE_PTR(node),
- .nr = nr,
- .devno = st.st_rdev,
- .dev = TAKE_PTR(dev),
- .backing_file = TAKE_PTR(backing_file),
- .diskseq = diskseq,
- .uevent_seqnum_not_before = seqnum,
- .timestamp_not_before = timestamp,
- };
+ d->backing_file = TAKE_PTR(backing_file);
log_debug("Successfully acquired %s, devno=%u:%u, nr=%i, diskseq=%" PRIu64,
d->node,
d->nr,
d->diskseq);
- *ret = d;
- return d->fd;
+ *ret = TAKE_PTR(d);
+ return 0;
}
static uint32_t loop_flags_mangle(uint32_t loop_flags) {
int open_flags,
uint64_t offset,
uint64_t size,
+ uint32_t block_size,
uint32_t loop_flags,
int lock_op,
LoopDevice **ret) {
open_flags,
offset,
size,
+ block_size,
loop_flags_mangle(loop_flags),
lock_op,
ret);
LoopDevice **ret) {
int r, basic_flags, direct_flags, rdwr_flags;
- _cleanup_close_ int fd = -1;
+ _cleanup_close_ int fd = -EBADF;
bool direct = false;
assert(path);
direct ? "enabled" : "disabled",
direct != (direct_flags != 0) ? " (O_DIRECT was requested but not supported)" : "");
- return loop_device_make_internal(path, fd, open_flags, 0, 0, loop_flags, lock_op, ret);
+ return loop_device_make_internal(path, fd, open_flags, 0, 0, 0, loop_flags, lock_op, ret);
}
-LoopDevice* loop_device_unref(LoopDevice *d) {
+int loop_device_make_by_path_memory(
+ const char *path,
+ int open_flags,
+ uint32_t loop_flags,
+ int lock_op,
+ LoopDevice **ret) {
+
+ _cleanup_close_ int fd = -EBADF, mfd = -EBADF;
+ _cleanup_free_ char *fn = NULL;
+ struct stat st;
+ int r;
+
+ assert(path);
+ assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
+ assert(ret);
+
+ loop_flags &= ~LO_FLAGS_DIRECT_IO; /* memfds don't support O_DIRECT, hence LO_FLAGS_DIRECT_IO can't be used either */
+
+ fd = open(path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|O_RDONLY);
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISREG(st.st_mode) && !S_ISBLK(st.st_mode))
+ return -EBADF;
+
+ r = path_extract_filename(path, &fn);
+ if (r < 0)
+ return r;
+
+ mfd = memfd_clone_fd(fd, fn, open_flags|O_CLOEXEC);
+ if (mfd < 0)
+ return mfd;
+
+ fd = safe_close(fd); /* Let's close the original early */
+
+ return loop_device_make_internal(NULL, mfd, open_flags, 0, 0, 0, loop_flags, lock_op, ret);
+}
+
+static LoopDevice* loop_device_free(LoopDevice *d) {
_cleanup_close_ int control = -1;
int r;
return mfree(d);
}
+DEFINE_TRIVIAL_REF_UNREF_FUNC(LoopDevice, loop_device, loop_device_free);
+
void loop_device_relinquish(LoopDevice *d) {
assert(d);
d->relinquished = false;
}
-int loop_device_open_full(
- const char *loop_path,
- int loop_fd,
+int loop_device_open(
+ sd_device *dev,
int open_flags,
int lock_op,
LoopDevice **ret) {
- _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
- _cleanup_close_ int fd = -1, lock_fd = -1;
- _cleanup_free_ char *p = NULL, *backing_file = NULL;
+ _cleanup_close_ int fd = -EBADF, lock_fd = -EBADF;
+ _cleanup_free_ char *node = NULL, *backing_file = NULL;
struct loop_info64 info;
uint64_t diskseq = 0;
- struct stat st;
LoopDevice *d;
+ const char *s;
+ dev_t devnum;
int r, nr = -1;
- assert(loop_path || loop_fd >= 0);
+ assert(dev);
assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
assert(ret);
- if (loop_fd < 0) {
- fd = open(loop_path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
- if (fd < 0)
- return -errno;
- loop_fd = fd;
- }
-
- if (fstat(loop_fd, &st) < 0)
- return -errno;
- if (!S_ISBLK(st.st_mode))
- return -ENOTBLK;
-
- r = sd_device_new_from_stat_rdev(&dev, &st);
- if (r < 0)
- return r;
+ /* Even if fd is provided through the argument in loop_device_open_from_fd(), we reopen the inode
+ * here, instead of keeping just a dup() clone of it around, since we want to ensure that the
+ * O_DIRECT flag of the handle we keep is off, we have our own file index, and have the right
+ * read/write mode in effect. */
+ fd = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
+ if (fd < 0)
+ return fd;
- if (fd < 0) {
- /* If loop_fd is provided through the argument, then we reopen the inode here, instead of
- * keeping just a dup() clone of it around, since we want to ensure that the O_DIRECT
- * flag of the handle we keep is off, we have our own file index, and have the right
- * read/write mode in effect.*/
- fd = fd_reopen(loop_fd, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
- if (fd < 0)
- return fd;
- loop_fd = fd;
+ if ((lock_op & ~LOCK_NB) != LOCK_UN) {
+ lock_fd = open_lock_fd(fd, lock_op);
+ if (lock_fd < 0)
+ return lock_fd;
}
- if (ioctl(loop_fd, LOOP_GET_STATUS64, &info) >= 0) {
- const char *s;
-
+ if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0) {
#if HAVE_VALGRIND_MEMCHECK_H
/* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
}
}
- r = fd_get_diskseq(loop_fd, &diskseq);
+ r = fd_get_diskseq(fd, &diskseq);
if (r < 0 && r != -EOPNOTSUPP)
return r;
- if ((lock_op & ~LOCK_NB) != LOCK_UN) {
- lock_fd = open_lock_fd(loop_fd, lock_op);
- if (lock_fd < 0)
- return lock_fd;
- }
+ r = sd_device_get_devnum(dev, &devnum);
+ if (r < 0)
+ return r;
- r = sd_device_get_devname(dev, &loop_path);
+ r = sd_device_get_devname(dev, &s);
if (r < 0)
return r;
- p = strdup(loop_path);
- if (!p)
+ node = strdup(s);
+ if (!node)
return -ENOMEM;
d = new(LoopDevice, 1);
return -ENOMEM;
*d = (LoopDevice) {
+ .n_ref = 1,
.fd = TAKE_FD(fd),
.lock_fd = TAKE_FD(lock_fd),
.nr = nr,
- .node = TAKE_PTR(p),
- .dev = TAKE_PTR(dev),
+ .node = TAKE_PTR(node),
+ .dev = sd_device_ref(dev),
.backing_file = TAKE_PTR(backing_file),
.relinquished = true, /* It's not ours, don't try to destroy it when this object is freed */
- .devno = st.st_rdev,
+ .devno = devnum,
.diskseq = diskseq,
.uevent_seqnum_not_before = UINT64_MAX,
.timestamp_not_before = USEC_INFINITY,
};
*ret = d;
- return d->fd;
+ return 0;
+}
+
+int loop_device_open_from_fd(
+ int fd,
+ int open_flags,
+ int lock_op,
+ LoopDevice **ret) {
+
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ int r;
+
+ assert(fd >= 0);
+
+ r = block_device_new_from_fd(fd, 0, &dev);
+ if (r < 0)
+ return r;
+
+ return loop_device_open(dev, open_flags, lock_op, ret);
+}
+
+int loop_device_open_from_path(
+ const char *path,
+ int open_flags,
+ int lock_op,
+ LoopDevice **ret) {
+
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ int r;
+
+ assert(path);
+
+ r = block_device_new_from_path(path, 0, &dev);
+ if (r < 0)
+ return r;
+
+ return loop_device_open(dev, open_flags, lock_op, ret);
}
static int resize_partition(int partition_fd, uint64_t offset, uint64_t size) {
char sysfs[STRLEN("/sys/dev/block/:/partition") + 2*DECIMAL_STR_MAX(dev_t) + 1];
_cleanup_free_ char *buffer = NULL;
uint64_t current_offset, current_size, partno;
- _cleanup_close_ int whole_fd = -1;
+ _cleanup_close_ int whole_fd = -EBADF;
struct stat st;
dev_t devno;
int r;