#include "env-util.h"
#include "errno-util.h"
#include "fd-util.h"
+#include "fs-util.h"
#include "fileio.h"
#include "loop-util.h"
#include "missing_loop.h"
static int loop_is_bound(int fd) {
struct loop_info64 info;
- assert(fd >= 0);
-
- if (ioctl(fd, LOOP_GET_STATUS64, &info) < 0) {
+ if (ioctl(ASSERT_FD(fd), LOOP_GET_STATUS64, &info) < 0) {
if (errno == ENXIO)
return false; /* not bound! */
static int open_lock_fd(int primary_fd, int operation) {
_cleanup_close_ int lock_fd = -EBADF;
- assert(primary_fd >= 0);
assert(IN_SET(operation & ~LOCK_NB, LOCK_SH, LOCK_EX));
- lock_fd = fd_reopen(primary_fd, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ lock_fd = fd_reopen(ASSERT_FD(primary_fd), O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
if (lock_fd < 0)
return lock_fd;
}
static int loop_configure_verify_direct_io(int fd, const struct loop_config *c) {
- assert(fd);
+ assert(fd >= 0);
assert(c);
if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_DIRECT_IO)) {
* check here if enabling direct IO worked, to make this easily debuggable however.
*
* (Should anyone really care and actually wants direct IO on old kernels: it might be worth
- * enabling direct IO with iteratively larger block sizes until it eventually works.) */
+ * enabling direct IO with iteratively larger block sizes until it eventually works.)
+ *
+ * On older kernels (e.g.: 5.10) when this is attempted on a file stored on a dm-crypt
+ * backed partition the kernel will start returning I/O errors when accessing the mounted
+ * loop device, so return a recognizable error that causes the operation to be started
+ * from scratch without the LO_FLAGS_DIRECT_IO flag. */
if (!FLAGS_SET(info.lo_flags, LO_FLAGS_DIRECT_IO))
- log_debug("Could not enable direct IO mode, proceeding in buffered IO mode.");
+ return log_debug_errno(
+ SYNTHETIC_ERRNO(ENOANO),
+ "Could not enable direct IO mode, retrying in buffered IO mode.");
}
return 0;
* effect hence. And if not use classic LOOP_SET_STATUS64. */
uint64_t z;
- if (ioctl(fd, BLKGETSIZE64, &z) < 0)
- return -errno;
+ r = blockdev_get_device_size(fd, &z);
+ if (r < 0)
+ return r;
if (z != c->info.lo_sizelimit) {
log_debug("LOOP_CONFIGURE is broken, doesn't honour .info.lo_sizelimit. Falling back to LOOP_SET_STATUS64.");
/* Sleep some random time, but at least 10ms, at most 250ms. Increase the delay the more
* failed attempts we see */
- (void) usleep(UINT64_C(10) * USEC_PER_MSEC +
+ (void) usleep_safe(UINT64_C(10) * USEC_PER_MSEC +
random_u64_range(UINT64_C(240) * USEC_PER_MSEC * n_attempts/64));
}
assert(ret);
if (asprintf(&node, "/dev/loop%i", nr) < 0)
- return -ENOMEM;
+ return log_oom_debug();
r = sd_device_new_from_devname(&dev, node);
if (r < 0)
- return r;
+ return log_debug_errno(r, "Failed to create sd_device object for \"%s\": %m", node);
r = sd_device_get_devnum(dev, &devno);
if (r < 0)
- return r;
+ return log_device_debug_errno(dev, r, "Failed to get devnum: %m");
fd = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
if (fd < 0)
- return fd;
+ return log_device_debug_errno(dev, fd, "Failed to open device: %m");
/* Let's lock the device before we do anything. We take the BSD lock on a second, separately opened
* fd for the device. udev after all watches for close() events (specifically IN_CLOSE_WRITE) on
* automatically release the lock, after we are done. */
lock_fd = open_lock_fd(fd, LOCK_EX);
if (lock_fd < 0)
- return lock_fd;
+ return log_device_debug_errno(dev, lock_fd, "Failed to acquire lock: %m");
+
+ log_device_debug(dev, "Acquired exclusive lock.");
/* Let's see if backing file is really unattached. Someone may already attach a backing file without
* taking BSD lock. */
r = loop_is_bound(fd);
if (r < 0)
- return r;
+ return log_device_debug_errno(dev, r, "Failed to check if the loopback block device is bound: %m");
if (r > 0)
- return -EBUSY;
+ return log_device_debug_errno(dev, SYNTHETIC_ERRNO(EBUSY),
+ "The loopback block device is already bound, ignoring.");
/* Let's see if the device is really detached, i.e. currently has no associated partition block
* devices. On various kernels (such as 5.8) it is possible to have a loopback block device that
* again. */
r = block_device_remove_all_partitions(dev, fd);
if (r < 0)
- return r;
+ return log_device_debug_errno(dev, r, "Failed to remove partitions on the loopback block device: %m");
if (r > 0)
/* Removed all partitions. Let's report this to the caller, to try again, and count this as
* an attempt. */
- return -EUCLEAN;
+ return log_device_debug_errno(dev, SYNTHETIC_ERRNO(EUCLEAN),
+ "Removed partitions on the loopback block device.");
if (!loop_configure_broken) {
/* Acquire uevent seqnum immediately before attaching the loopback device. This allows
* use. But doing this at least shortens the race window a bit. */
r = get_current_uevent_seqnum(&seqnum);
if (r < 0)
- return r;
+ return log_device_debug_errno(dev, r, "Failed to get the current uevent seqnum: %m");
timestamp = now(CLOCK_MONOTONIC);
* rather than ENOTTY on loopback block devices. They should fix that in the kernel,
* but in the meantime we accept both here. */
if (!ERRNO_IS_NOT_SUPPORTED(errno) && errno != EINVAL)
- return -errno;
+ return log_device_debug_errno(dev, errno, "ioctl(LOOP_CONFIGURE) failed: %m");
loop_configure_broken = true;
} else {
r = loop_configure_verify(loop_with_fd, c);
if (r < 0)
- return r;
+ return log_device_debug_errno(dev, r, "Failed to verify if loopback block device is correctly configured: %m");
if (r == 0) {
/* LOOP_CONFIGURE doesn't work. Remember that. */
loop_configure_broken = true;
/* Let's read the seqnum again, to shorten the window. */
r = get_current_uevent_seqnum(&seqnum);
if (r < 0)
- return r;
+ return log_device_debug_errno(dev, r, "Failed to get the current uevent seqnum: %m");
timestamp = now(CLOCK_MONOTONIC);
if (ioctl(fd, LOOP_SET_FD, c->fd) < 0)
- return -errno;
+ return log_device_debug_errno(dev, errno, "ioctl(LOOP_SET_FD) failed: %m");
loop_with_fd = TAKE_FD(fd);
r = fd_get_diskseq(loop_with_fd, &diskseq);
if (r < 0 && r != -EOPNOTSUPP)
- return r;
+ return log_device_debug_errno(dev, r, "Failed to get diskseq: %m");
switch (lock_op & ~LOCK_NB) {
case LOCK_EX: /* Already in effect */
break;
case LOCK_SH: /* Downgrade */
if (flock(lock_fd, lock_op) < 0)
- return -errno;
+ return log_device_debug_errno(dev, errno, "Failed to downgrade lock level: %m");
break;
case LOCK_UN: /* Release */
lock_fd = safe_close(lock_fd);
assert_not_reached();
}
+ uint64_t device_size;
+ r = blockdev_get_device_size(loop_with_fd, &device_size);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get loopback device size: %m");
+
LoopDevice *d = new(LoopDevice, 1);
if (!d)
- return -ENOMEM;
+ return log_oom_debug();
*d = (LoopDevice) {
.n_ref = 1,
.uevent_seqnum_not_before = seqnum,
.timestamp_not_before = timestamp,
.sector_size = c->block_size,
+ .device_size = device_size,
+ .created = true,
};
*ret = TAKE_PTR(d);
LoopDevice **ret) {
_cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
- _cleanup_close_ int direct_io_fd = -EBADF, control = -EBADF;
+ _cleanup_close_ int reopened_fd = -EBADF, control = -EBADF;
_cleanup_free_ char *backing_file = NULL;
struct loop_config config;
int r, f_flags;
struct stat st;
- assert(fd >= 0);
assert(ret);
assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
- if (fstat(fd, &st) < 0)
+ if (fstat(ASSERT_FD(fd), &st) < 0)
return -errno;
if (S_ISBLK(st.st_mode)) {
* Our intention here is that LO_FLAGS_DIRECT_IO is the primary knob, and O_DIRECT derived
* from that automatically. */
- direct_io_fd = fd_reopen(fd, (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0)|O_CLOEXEC|O_NONBLOCK|open_flags);
- if (direct_io_fd < 0) {
+ reopened_fd = fd_reopen(fd, (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0)|O_CLOEXEC|O_NONBLOCK|open_flags);
+ if (reopened_fd < 0) {
if (!FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO))
- return log_debug_errno(errno, "Failed to reopen file descriptor without O_DIRECT: %m");
+ return log_debug_errno(reopened_fd, "Failed to reopen file descriptor without O_DIRECT: %m");
/* Some file systems might not support O_DIRECT, let's gracefully continue without it then. */
- log_debug_errno(errno, "Failed to enable O_DIRECT for backing file descriptor for loopback device. Continuing without.");
+ log_debug_errno(reopened_fd, "Failed to enable O_DIRECT for backing file descriptor for loopback device. Continuing without.");
loop_flags &= ~LO_FLAGS_DIRECT_IO;
} else
- fd = direct_io_fd; /* From now on, operate on our new O_DIRECT fd */
+ fd = reopened_fd; /* From now on, operate on our new O_DIRECT fd */
}
control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
/* Loop around LOOP_CTL_GET_FREE, since at the moment we attempt to open the returned device it might
* be gone already, taken by somebody else racing against us. */
for (unsigned n_attempts = 0;;) {
+ usec_t usec;
int nr;
/* Let's take a lock on the control device first. On a busy system, where many programs
/* -ENODEV or friends: Somebody might've gotten the same number from the kernel, used the
* device, and called LOOP_CTL_REMOVE on it. Let's retry with a new number.
* -EBUSY: a file descriptor is already bound to the loopback block device.
- * -EUCLEAN: some left-over partition devices that were cleaned up. */
- if (!ERRNO_IS_DEVICE_ABSENT(r) && !IN_SET(r, -EBUSY, -EUCLEAN))
+ * -EUCLEAN: some left-over partition devices that were cleaned up.
+ * -ENOANO: we tried to use LO_FLAGS_DIRECT_IO but the kernel rejected it. */
+ if (!ERRNO_IS_DEVICE_ABSENT(r) && !IN_SET(r, -EBUSY, -EUCLEAN, -ENOANO))
return r;
/* OK, this didn't work, let's try again a bit later, but first release the lock on the
if (++n_attempts >= 64) /* Give up eventually */
return -EBUSY;
+ /* If we failed to enable direct IO mode, let's retry without it. We restart the process as
+ * on some combination of kernel version and storage filesystem, the kernel is very unhappy
+ * about a failed DIRECT_IO enablement and throws I/O errors. */
+ if (r == -ENOANO && FLAGS_SET(config.info.lo_flags, LO_FLAGS_DIRECT_IO)) {
+ config.info.lo_flags &= ~LO_FLAGS_DIRECT_IO;
+ open_flags &= ~O_DIRECT;
+
+ int non_direct_io_fd = fd_reopen(config.fd, O_CLOEXEC|O_NONBLOCK|open_flags);
+ if (non_direct_io_fd < 0)
+ return log_debug_errno(
+ non_direct_io_fd,
+ "Failed to reopen file descriptor without O_DIRECT: %m");
+
+ safe_close(reopened_fd);
+ fd = config.fd = /* For cleanups */ reopened_fd = non_direct_io_fd;
+ }
+
/* Wait some random time, to make collision less likely. Let's pick a random time in the
* range 0ms…250ms, linearly scaled by the number of failed attempts. */
- (void) usleep(random_u64_range(UINT64_C(10) * USEC_PER_MSEC +
- UINT64_C(240) * USEC_PER_MSEC * n_attempts/64));
+ usec = random_u64_range(UINT64_C(10) * USEC_PER_MSEC +
+ UINT64_C(240) * USEC_PER_MSEC * n_attempts/64);
+ log_debug("Trying again after %s.", FORMAT_TIMESPAN(usec, USEC_PER_MSEC));
+ (void) usleep_safe(usec);
}
d->backing_file = TAKE_PTR(backing_file);
ret);
}
-int loop_device_make_by_path(
+int loop_device_make_by_path_at(
+ int dir_fd,
const char *path,
int open_flags,
uint32_t sector_size,
_cleanup_close_ int fd = -EBADF;
bool direct = false;
+ assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
assert(path);
assert(ret);
assert(open_flags < 0 || IN_SET(open_flags, O_RDWR, O_RDONLY));
direct_flags = FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0;
rdwr_flags = open_flags >= 0 ? open_flags : O_RDWR;
- fd = open(path, basic_flags|direct_flags|rdwr_flags);
+ fd = xopenat(dir_fd, path, basic_flags|direct_flags|rdwr_flags, /* xopen_flags = */ 0, /* mode = */ 0);
if (fd < 0 && direct_flags != 0) /* If we had O_DIRECT on, and things failed with that, let's immediately try again without */
- fd = open(path, basic_flags|rdwr_flags);
+ fd = xopenat(dir_fd, path, basic_flags|rdwr_flags, /* xopen_flags = */ 0, /* mode = */ 0);
else
direct = direct_flags != 0;
if (fd < 0) {
if (open_flags >= 0 || !(ERRNO_IS_PRIVILEGE(r) || r == -EROFS))
return r;
- fd = open(path, basic_flags|direct_flags|O_RDONLY);
+ fd = xopenat(dir_fd, path, basic_flags|direct_flags|O_RDONLY, /* xopen_flags = */ 0, /* mode = */ 0);
if (fd < 0 && direct_flags != 0) /* as above */
- fd = open(path, basic_flags|O_RDONLY);
+ fd = xopenat(dir_fd, path, basic_flags|O_RDONLY, /* xopen_flags = */ 0, /* mode = */ 0);
else
direct = direct_flags != 0;
if (fd < 0)
direct ? "enabled" : "disabled",
direct != (direct_flags != 0) ? " (O_DIRECT was requested but not supported)" : "");
- return loop_device_make_internal(path, fd, open_flags, 0, 0, sector_size, loop_flags, lock_op, ret);
+ return loop_device_make_internal(
+ dir_fd == AT_FDCWD ? path : NULL,
+ fd,
+ open_flags,
+ /* offset = */ 0,
+ /* size = */ 0,
+ sector_size,
+ loop_flags,
+ lock_op,
+ ret);
}
int loop_device_make_by_path_memory(
}
/* Now that the block device is released, let's also try to remove it */
- if (control >= 0)
- for (unsigned n_attempts = 0;;) {
+ if (control >= 0) {
+ useconds_t delay = 5 * USEC_PER_MSEC; /* A total delay of 5090 ms between 39 attempts,
+ * (4*5 + 5*10 + 5*20 + … + 3*640) = 5090. */
+
+ for (unsigned attempt = 1;; attempt++) {
if (ioctl(control, LOOP_CTL_REMOVE, d->nr) >= 0)
break;
- if (errno != EBUSY || ++n_attempts >= 64) {
+ if (errno != EBUSY || attempt > 38) {
log_debug_errno(errno, "Failed to remove device %s: %m", strna(d->node));
break;
}
- (void) usleep(50 * USEC_PER_MSEC);
+ if (attempt % 5 == 0) {
+ log_debug("Device is still busy after %u attempts…", attempt);
+ delay *= 2;
+ }
+
+ (void) usleep_safe(delay);
}
+ }
free(d->node);
sd_device_unref(d->dev);
if (r < 0)
return r;
+ uint64_t device_size;
+ r = blockdev_get_device_size(fd, &device_size);
+ if (r < 0)
+ return r;
+
r = sd_device_get_devnum(dev, &devnum);
if (r < 0)
return r;
.uevent_seqnum_not_before = UINT64_MAX,
.timestamp_not_before = USEC_INFINITY,
.sector_size = sector_size,
+ .device_size = device_size,
+ .created = false,
};
*ret = d;
_cleanup_(sd_device_unrefp) sd_device *dev = NULL;
int r;
- assert(fd >= 0);
-
- r = block_device_new_from_fd(fd, 0, &dev);
+ r = block_device_new_from_fd(ASSERT_FD(fd), 0, &dev);
if (r < 0)
return r;
dev_t devno;
int r;
- assert(partition_fd >= 0);
-
/* Resizes the partition the loopback device refer to (assuming it refers to one instead of an actual
* loopback device), and changes the offset, if needed. This is a fancy wrapper around
* BLKPG_RESIZE_PARTITION. */
- if (fstat(partition_fd, &st) < 0)
+ if (fstat(ASSERT_FD(partition_fd), &st) < 0)
return -errno;
assert(S_ISBLK(st.st_mode));
return -EINVAL;
current_offset *= 512U;
- if (ioctl(partition_fd, BLKGETSIZE64, ¤t_size) < 0)
- return -EINVAL;
+ r = blockdev_get_device_size(partition_fd, ¤t_size);
+ if (r < 0)
+ return r;
if (size == UINT64_MAX && offset == UINT64_MAX)
return 0;
/* If we had no lock fd so far, create one and lock it right-away */
if (d->lock_fd < 0) {
- assert(d->fd >= 0);
-
- d->lock_fd = open_lock_fd(d->fd, operation);
+ d->lock_fd = open_lock_fd(ASSERT_FD(d->fd), operation);
if (d->lock_fd < 0)
return d->lock_fd;
int loop_device_sync(LoopDevice *d) {
assert(d);
- assert(d->fd >= 0);
/* We also do this implicitly in loop_device_unref(). Doing this explicitly here has the benefit that
* we can check the return value though. */
- return RET_NERRNO(fsync(d->fd));
+ return RET_NERRNO(fsync(ASSERT_FD(d->fd)));
}
int loop_device_set_autoclear(LoopDevice *d, bool autoclear) {
assert(d);
- if (ioctl(d->fd, LOOP_GET_STATUS64, &info) < 0)
+ if (ioctl(ASSERT_FD(d->fd), LOOP_GET_STATUS64, &info) < 0)
return -errno;
if (autoclear == FLAGS_SET(info.lo_flags, LO_FLAGS_AUTOCLEAR))
* which is a kernel generated string, subject to file system namespaces and such.
*
* .lo_file_name is useful since userspace can select it freely when creating a loopback block
- * device, and we can use it for /dev/loop/by-ref/ symlinks, and similar, so that apps can recognize
- * their own loopback files. */
+ * device, and we can use it for /dev/disk/by-loop-ref/ symlinks, and similar, so that apps can
+ * recognize their own loopback files. */
if (name && strlen(name) >= sizeof(info.lo_file_name))
return -ENOBUFS;
- if (ioctl(d->fd, LOOP_GET_STATUS64, &info) < 0)
+ if (ioctl(ASSERT_FD(d->fd), LOOP_GET_STATUS64, &info) < 0)
return -errno;
if (strneq((char*) info.lo_file_name, strempty(name), sizeof(info.lo_file_name)))