]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/shared/loop-util.c
Merge pull request #30284 from YHNdnzj/fstab-wantedby-defaultdeps
[thirdparty/systemd.git] / src / shared / loop-util.c
index a85cff09142b2dcdbd758c8b02f3283a6d5c89d8..8434587286cff9701ff21ba13e6e1cc3777bacc7 100644 (file)
@@ -88,7 +88,7 @@ static int open_lock_fd(int primary_fd, int operation) {
 }
 
 static int loop_configure_verify_direct_io(int fd, const struct loop_config *c) {
-        assert(fd);
+        assert(fd >= 0);
         assert(c);
 
         if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_DIRECT_IO)) {
@@ -108,9 +108,16 @@ static int loop_configure_verify_direct_io(int fd, const struct loop_config *c)
                  * check here if enabling direct IO worked, to make this easily debuggable however.
                  *
                  * (Should anyone really care and actually wants direct IO on old kernels: it might be worth
-                 * enabling direct IO with iteratively larger block sizes until it eventually works.) */
+                 * enabling direct IO with iteratively larger block sizes until it eventually works.)
+                 *
+                 * On older kernels (e.g.: 5.10) when this is attempted on a file stored on a dm-crypt
+                 * backed partition the kernel will start returning I/O errors when accessing the mounted
+                 * loop device, so return a recognizable error that causes the operation to be started
+                 * from scratch without the LO_FLAGS_DIRECT_IO flag. */
                 if (!FLAGS_SET(info.lo_flags, LO_FLAGS_DIRECT_IO))
-                        log_debug("Could not enable direct IO mode, proceeding in buffered IO mode.");
+                        return log_debug_errno(
+                                        SYNTHETIC_ERRNO(ENOANO),
+                                        "Could not enable direct IO mode, retrying in buffered IO mode.");
         }
 
         return 0;
@@ -142,8 +149,9 @@ static int loop_configure_verify(int fd, const struct loop_config *c) {
                  * effect hence. And if not use classic LOOP_SET_STATUS64. */
                 uint64_t z;
 
-                if (ioctl(fd, BLKGETSIZE64, &z) < 0)
-                        return -errno;
+                r = blockdev_get_device_size(fd, &z);
+                if (r < 0)
+                        return r;
 
                 if (z != c->info.lo_sizelimit) {
                         log_debug("LOOP_CONFIGURE is broken, doesn't honour .info.lo_sizelimit. Falling back to LOOP_SET_STATUS64.");
@@ -203,7 +211,7 @@ static int loop_configure_fallback(int fd, const struct loop_config *c) {
 
                 /* Sleep some random time, but at least 10ms, at most 250ms. Increase the delay the more
                  * failed attempts we see */
-                (void) usleep(UINT64_C(10) * USEC_PER_MSEC +
+                (void) usleep_safe(UINT64_C(10) * USEC_PER_MSEC +
                               random_u64_range(UINT64_C(240) * USEC_PER_MSEC * n_attempts/64));
         }
 
@@ -397,6 +405,11 @@ static int loop_configure(
                 assert_not_reached();
         }
 
+        uint64_t device_size;
+        r = blockdev_get_device_size(loop_with_fd, &device_size);
+        if (r < 0)
+                return log_device_debug_errno(dev, r, "Failed to get loopback device size: %m");
+
         LoopDevice *d = new(LoopDevice, 1);
         if (!d)
                 return log_oom_debug();
@@ -413,6 +426,8 @@ static int loop_configure(
                 .uevent_seqnum_not_before = seqnum,
                 .timestamp_not_before = timestamp,
                 .sector_size = c->block_size,
+                .device_size = device_size,
+                .created = true,
         };
 
         *ret = TAKE_PTR(d);
@@ -431,7 +446,7 @@ static int loop_device_make_internal(
                 LoopDevice **ret) {
 
         _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
-        _cleanup_close_ int direct_io_fd = -EBADF, control = -EBADF;
+        _cleanup_close_ int reopened_fd = -EBADF, control = -EBADF;
         _cleanup_free_ char *backing_file = NULL;
         struct loop_config config;
         int r, f_flags;
@@ -479,16 +494,16 @@ static int loop_device_make_internal(
                  * Our intention here is that LO_FLAGS_DIRECT_IO is the primary knob, and O_DIRECT derived
                  * from that automatically. */
 
-                direct_io_fd = fd_reopen(fd, (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0)|O_CLOEXEC|O_NONBLOCK|open_flags);
-                if (direct_io_fd < 0) {
+                reopened_fd = fd_reopen(fd, (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0)|O_CLOEXEC|O_NONBLOCK|open_flags);
+                if (reopened_fd < 0) {
                         if (!FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO))
-                                return log_debug_errno(errno, "Failed to reopen file descriptor without O_DIRECT: %m");
+                                return log_debug_errno(reopened_fd, "Failed to reopen file descriptor without O_DIRECT: %m");
 
                         /* Some file systems might not support O_DIRECT, let's gracefully continue without it then. */
-                        log_debug_errno(errno, "Failed to enable O_DIRECT for backing file descriptor for loopback device. Continuing without.");
+                        log_debug_errno(reopened_fd, "Failed to enable O_DIRECT for backing file descriptor for loopback device. Continuing without.");
                         loop_flags &= ~LO_FLAGS_DIRECT_IO;
                 } else
-                        fd = direct_io_fd; /* From now on, operate on our new O_DIRECT fd */
+                        fd = reopened_fd; /* From now on, operate on our new O_DIRECT fd */
         }
 
         control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
@@ -578,8 +593,9 @@ static int loop_device_make_internal(
                 /* -ENODEV or friends: Somebody might've gotten the same number from the kernel, used the
                  * device, and called LOOP_CTL_REMOVE on it. Let's retry with a new number.
                  * -EBUSY: a file descriptor is already bound to the loopback block device.
-                 * -EUCLEAN: some left-over partition devices that were cleaned up. */
-                if (!ERRNO_IS_DEVICE_ABSENT(r) && !IN_SET(r, -EBUSY, -EUCLEAN))
+                 * -EUCLEAN: some left-over partition devices that were cleaned up.
+                 * -ENOANO: we tried to use LO_FLAGS_DIRECT_IO but the kernel rejected it. */
+                if (!ERRNO_IS_DEVICE_ABSENT(r) && !IN_SET(r, -EBUSY, -EUCLEAN, -ENOANO))
                         return r;
 
                 /* OK, this didn't work, let's try again a bit later, but first release the lock on the
@@ -590,12 +606,29 @@ static int loop_device_make_internal(
                 if (++n_attempts >= 64) /* Give up eventually */
                         return -EBUSY;
 
+                /* If we failed to enable direct IO mode, let's retry without it. We restart the process as
+                 * on some combination of kernel version and storage filesystem, the kernel is very unhappy
+                 * about a failed DIRECT_IO enablement and throws I/O errors. */
+                if (r == -ENOANO && FLAGS_SET(config.info.lo_flags, LO_FLAGS_DIRECT_IO)) {
+                        config.info.lo_flags &= ~LO_FLAGS_DIRECT_IO;
+                        open_flags &= ~O_DIRECT;
+
+                        int non_direct_io_fd = fd_reopen(config.fd, O_CLOEXEC|O_NONBLOCK|open_flags);
+                        if (non_direct_io_fd < 0)
+                                return log_debug_errno(
+                                                non_direct_io_fd,
+                                                "Failed to reopen file descriptor without O_DIRECT: %m");
+
+                        safe_close(reopened_fd);
+                        fd = config.fd = /* For cleanups */ reopened_fd = non_direct_io_fd;
+                }
+
                 /* Wait some random time, to make collision less likely. Let's pick a random time in the
                  * range 0ms…250ms, linearly scaled by the number of failed attempts. */
                 usec = random_u64_range(UINT64_C(10) * USEC_PER_MSEC +
                                         UINT64_C(240) * USEC_PER_MSEC * n_attempts/64);
                 log_debug("Trying again after %s.", FORMAT_TIMESPAN(usec, USEC_PER_MSEC));
-                (void) usleep(usec);
+                (void) usleep_safe(usec);
         }
 
         d->backing_file = TAKE_PTR(backing_file);
@@ -677,9 +710,9 @@ int loop_device_make_by_path_at(
         direct_flags = FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0;
         rdwr_flags = open_flags >= 0 ? open_flags : O_RDWR;
 
-        fd = xopenat(dir_fd, path, basic_flags|direct_flags|rdwr_flags, 0);
+        fd = xopenat(dir_fd, path, basic_flags|direct_flags|rdwr_flags, /* xopen_flags = */ 0, /* mode = */ 0);
         if (fd < 0 && direct_flags != 0) /* If we had O_DIRECT on, and things failed with that, let's immediately try again without */
-                fd = xopenat(dir_fd, path, basic_flags|rdwr_flags, 0);
+                fd = xopenat(dir_fd, path, basic_flags|rdwr_flags, /* xopen_flags = */ 0, /* mode = */ 0);
         else
                 direct = direct_flags != 0;
         if (fd < 0) {
@@ -689,9 +722,9 @@ int loop_device_make_by_path_at(
                 if (open_flags >= 0 || !(ERRNO_IS_PRIVILEGE(r) || r == -EROFS))
                         return r;
 
-                fd = xopenat(dir_fd, path, basic_flags|direct_flags|O_RDONLY, 0);
+                fd = xopenat(dir_fd, path, basic_flags|direct_flags|O_RDONLY, /* xopen_flags = */ 0, /* mode = */ 0);
                 if (fd < 0 && direct_flags != 0) /* as above */
-                        fd = xopenat(dir_fd, path, basic_flags|O_RDONLY, 0);
+                        fd = xopenat(dir_fd, path, basic_flags|O_RDONLY, /* xopen_flags = */ 0, /* mode = */ 0);
                 else
                         direct = direct_flags != 0;
                 if (fd < 0)
@@ -817,16 +850,25 @@ static LoopDevice* loop_device_free(LoopDevice *d) {
         }
 
         /* Now that the block device is released, let's also try to remove it */
-        if (control >= 0)
-                for (unsigned n_attempts = 0;;) {
+        if (control >= 0) {
+                useconds_t delay = 5 * USEC_PER_MSEC;  /* A total delay of 5090 ms between 39 attempts,
+                                                        * (4*5 + 5*10 + 5*20 + … + 3*640) = 5090. */
+
+                for (unsigned attempt = 1;; attempt++) {
                         if (ioctl(control, LOOP_CTL_REMOVE, d->nr) >= 0)
                                 break;
-                        if (errno != EBUSY || ++n_attempts >= 64) {
+                        if (errno != EBUSY || attempt > 38) {
                                 log_debug_errno(errno, "Failed to remove device %s: %m", strna(d->node));
                                 break;
                         }
-                        (void) usleep(50 * USEC_PER_MSEC);
+                        if (attempt % 5 == 0) {
+                                log_debug("Device is still busy after %u attempts…", attempt);
+                                delay *= 2;
+                        }
+
+                        (void) usleep_safe(delay);
                 }
+        }
 
         free(d->node);
         sd_device_unref(d->dev);
@@ -910,6 +952,11 @@ int loop_device_open(
         if (r < 0)
                 return r;
 
+        uint64_t device_size;
+        r = blockdev_get_device_size(fd, &device_size);
+        if (r < 0)
+                return r;
+
         r = sd_device_get_devnum(dev, &devnum);
         if (r < 0)
                 return r;
@@ -942,6 +989,8 @@ int loop_device_open(
                 .uevent_seqnum_not_before = UINT64_MAX,
                 .timestamp_not_before = USEC_INFINITY,
                 .sector_size = sector_size,
+                .device_size = device_size,
+                .created = false,
         };
 
         *ret = d;
@@ -1023,8 +1072,9 @@ static int resize_partition(int partition_fd, uint64_t offset, uint64_t size) {
                 return -EINVAL;
         current_offset *= 512U;
 
-        if (ioctl(partition_fd, BLKGETSIZE64, &current_size) < 0)
-                return -EINVAL;
+        r = blockdev_get_device_size(partition_fd, &current_size);
+        if (r < 0)
+                return r;
 
         if (size == UINT64_MAX && offset == UINT64_MAX)
                 return 0;
@@ -1150,8 +1200,8 @@ int loop_device_set_filename(LoopDevice *d, const char *name) {
          * which is a kernel generated string, subject to file system namespaces and such.
          *
          * .lo_file_name is useful since userspace can select it freely when creating a loopback block
-         * device, and we can use it for /dev/loop/by-ref/ symlinks, and similar, so that apps can recognize
-         * their own loopback files. */
+         * device, and we can use it for /dev/disk/by-loop-ref/ symlinks, and similar, so that apps can
+         * recognize their own loopback files. */
 
         if (name && strlen(name) >= sizeof(info.lo_file_name))
                 return -ENOBUFS;