src/shared/loop-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
   2
   3 #if HAVE_VALGRIND_MEMCHECK_H
   4 #include <valgrind/memcheck.h>
   5 #endif
   6
   7 #include <errno.h>
   8 #include <fcntl.h>
   9 #include <linux/blkpg.h>
  10 #include <linux/fs.h>
  11 #include <linux/loop.h>
  12 #include <sys/file.h>
  13 #include <sys/ioctl.h>
  14 #include <unistd.h>
  15
  16 #include "sd-device.h"
  17
  18 #include "alloc-util.h"
  19 #include "blockdev-util.h"
  20 #include "data-fd-util.h"
  21 #include "device-util.h"
  22 #include "devnum-util.h"
  23 #include "dissect-image.h"
  24 #include "env-util.h"
  25 #include "errno-util.h"
  26 #include "fd-util.h"
  27 #include "fs-util.h"
  28 #include "fileio.h"
  29 #include "loop-util.h"
  30 #include "missing_loop.h"
  31 #include "parse-util.h"
  32 #include "path-util.h"
  33 #include "random-util.h"
  34 #include "stat-util.h"
  35 #include "stdio-util.h"
  36 #include "string-util.h"
  37 #include "tmpfile-util.h"
  38
  39 static void cleanup_clear_loop_close(int *fd) {
  40         if (*fd < 0)
  41                 return;
  42
  43         (void) ioctl(*fd, LOOP_CLR_FD);
  44         (void) safe_close(*fd);
  45 }
  46
  47 static int loop_is_bound(int fd) {
  48         struct loop_info64 info;
  49
  50         if (ioctl(ASSERT_FD(fd), LOOP_GET_STATUS64, &info) < 0) {
  51                 if (errno == ENXIO)
  52                         return false; /* not bound! */
  53
  54                 return -errno;
  55         }
  56
  57         return true; /* bound! */
  58 }
  59
  60 static int open_lock_fd(int primary_fd, int operation) {
  61         _cleanup_close_ int lock_fd = -EBADF;
  62
  63         assert(IN_SET(operation & ~LOCK_NB, LOCK_SH, LOCK_EX));
  64
  65         lock_fd = fd_reopen(ASSERT_FD(primary_fd), O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
  66         if (lock_fd < 0)
  67                 return lock_fd;
  68
  69         if (flock(lock_fd, operation) < 0)
  70                 return -errno;
  71
  72         return TAKE_FD(lock_fd);
  73 }
  74
  75 static int loop_configure_verify_direct_io(int fd, const struct loop_config *c) {
  76         assert(fd >= 0);
  77         assert(c);
  78
  79         if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_DIRECT_IO)) {
  80                 struct loop_info64 info;
  81
  82                 if (ioctl(fd, LOOP_GET_STATUS64, &info) < 0)
  83                         return log_debug_errno(errno, "Failed to issue LOOP_GET_STATUS64: %m");
  84
  85 #if HAVE_VALGRIND_MEMCHECK_H
  86                 VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
  87 #endif
  88
  89                 /* On older kernels (<= 5.3) it was necessary to set the block size of the loopback block
  90                  * device to the logical block size of the underlying file system. Since there was no nice
  91                  * way to query the value, we are not bothering to do this however. On newer kernels the
  92                  * block size is propagated automatically and does not require intervention from us. We'll
  93                  * check here if enabling direct IO worked, to make this easily debuggable however.
  94                  *
  95                  * (Should anyone really care and actually wants direct IO on old kernels: it might be worth
  96                  * enabling direct IO with iteratively larger block sizes until it eventually works.)
  97                  *
  98                  * On older kernels (e.g.: 5.10) when this is attempted on a file stored on a dm-crypt
  99                  * backed partition the kernel will start returning I/O errors when accessing the mounted
 100                  * loop device, so return a recognizable error that causes the operation to be started
 101                  * from scratch without the LO_FLAGS_DIRECT_IO flag. */
 102                 if (!FLAGS_SET(info.lo_flags, LO_FLAGS_DIRECT_IO))
 103                         return log_debug_errno(
 104                                         SYNTHETIC_ERRNO(ENOANO),
 105                                         "Could not enable direct IO mode, retrying in buffered IO mode.");
 106         }
 107
 108         return 0;
 109 }
 110
 111 static int loop_configure_verify(int fd, const struct loop_config *c) {
 112         bool broken = false;
 113         int r;
 114
 115         assert(fd >= 0);
 116         assert(c);
 117
 118         if (c->block_size != 0) {
 119                 uint32_t ssz;
 120
 121                 r = blockdev_get_sector_size(fd, &ssz);
 122                 if (r < 0)
 123                         return r;
 124
 125                 if (ssz != c->block_size) {
 126                         log_debug("LOOP_CONFIGURE didn't honour requested block size %" PRIu32 ", got %" PRIu32 " instead. Ignoring.", c->block_size, ssz);
 127                         broken = true;
 128                 }
 129         }
 130
 131         if (c->info.lo_sizelimit != 0) {
 132                 /* Kernel 5.8 vanilla doesn't properly propagate the size limit into the
 133                  * block device. If it's used, let's immediately check if it had the desired
 134                  * effect hence. And if not use classic LOOP_SET_STATUS64. */
 135                 uint64_t z;
 136
 137                 r = blockdev_get_device_size(fd, &z);
 138                 if (r < 0)
 139                         return r;
 140
 141                 if (z != c->info.lo_sizelimit) {
 142                         log_debug("LOOP_CONFIGURE is broken, doesn't honour .info.lo_sizelimit. Falling back to LOOP_SET_STATUS64.");
 143                         broken = true;
 144                 }
 145         }
 146
 147         if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_PARTSCAN)) {
 148                 /* Kernel 5.8 vanilla doesn't properly propagate the partition scanning flag
 149                  * into the block device. Let's hence verify if things work correctly here
 150                  * before returning. */
 151
 152                 r = blockdev_partscan_enabled(fd);
 153                 if (r < 0)
 154                         return r;
 155                 if (r == 0) {
 156                         log_debug("LOOP_CONFIGURE is broken, doesn't honour LO_FLAGS_PARTSCAN. Falling back to LOOP_SET_STATUS64.");
 157                         broken = true;
 158                 }
 159         }
 160
 161         r = loop_configure_verify_direct_io(fd, c);
 162         if (r < 0)
 163                 return r;
 164
 165         return !broken;
 166 }
 167
 168 static int loop_configure_fallback(int fd, const struct loop_config *c) {
 169         struct loop_info64 info_copy;
 170         int r;
 171
 172         assert(fd >= 0);
 173         assert(c);
 174
 175         /* Only some of the flags LOOP_CONFIGURE can set are also settable via LOOP_SET_STATUS64, hence mask
 176          * them out. */
 177         info_copy = c->info;
 178         info_copy.lo_flags &= LOOP_SET_STATUS_SETTABLE_FLAGS;
 179
 180         /* Since kernel commit 5db470e229e22b7eda6e23b5566e532c96fb5bc3 (kernel v5.0) the LOOP_SET_STATUS64
 181          * ioctl can return EAGAIN in case we change the info.lo_offset field, if someone else is accessing the
 182          * block device while we try to reconfigure it. This is a pretty common case, since udev might
 183          * instantly start probing the device as soon as we attach an fd to it. Hence handle it in two ways:
 184          * first, let's take the BSD lock to ensure that udev will not step in between the point in
 185          * time where we attach the fd and where we reconfigure the device. Secondly, let's wait 50ms on
 186          * EAGAIN and retry. The former should be an efficient mechanism to avoid we have to wait 50ms
 187          * needlessly if we are just racing against udev. The latter is protection against all other cases,
 188          * i.e. peers that do not take the BSD lock. */
 189
 190         for (unsigned n_attempts = 0;;) {
 191                 if (ioctl(fd, LOOP_SET_STATUS64, &info_copy) >= 0)
 192                         break;
 193
 194                 if (errno != EAGAIN || ++n_attempts >= 64)
 195                         return log_debug_errno(errno, "Failed to configure loopback block device: %m");
 196
 197                 /* Sleep some random time, but at least 10ms, at most 250ms. Increase the delay the more
 198                  * failed attempts we see */
 199                 (void) usleep_safe(UINT64_C(10) * USEC_PER_MSEC +
 200                               random_u64_range(UINT64_C(240) * USEC_PER_MSEC * n_attempts/64));
 201         }
 202
 203         /* Work around a kernel bug, where changing offset/size of the loopback device doesn't correctly
 204          * invalidate the buffer cache. For details see:
 205          *
 206          *     https://android.googlesource.com/platform/system/apex/+/bef74542fbbb4cd629793f4efee8e0053b360570
 207          *
 208          * This was fixed in kernel 5.0, see:
 209          *
 210          *     https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5db470e229e22b7eda6e23b5566e532c96fb5bc3
 211          *
 212          * We'll run the work-around here in the legacy LOOP_SET_STATUS64 codepath. In the LOOP_CONFIGURE
 213          * codepath above it should not be necessary. */
 214         if (c->info.lo_offset != 0 || c->info.lo_sizelimit != 0)
 215                 if (ioctl(fd, BLKFLSBUF, 0) < 0)
 216                         log_debug_errno(errno, "Failed to issue BLKFLSBUF ioctl, ignoring: %m");
 217
 218         /* If a block size is requested then try to configure it. If that doesn't work, ignore errors, but
 219          * afterwards, let's validate what is in effect, and if it doesn't match what we want, fail */
 220         if (c->block_size != 0) {
 221                 uint32_t ssz;
 222
 223                 if (ioctl(fd, LOOP_SET_BLOCK_SIZE, (unsigned long) c->block_size) < 0)
 224                         log_debug_errno(errno, "Failed to set sector size, ignoring: %m");
 225
 226                 r = blockdev_get_sector_size(fd, &ssz);
 227                 if (r < 0)
 228                         return log_debug_errno(r, "Failed to read sector size: %m");
 229                 if (ssz != c->block_size)
 230                         return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Sector size of loopback device doesn't match what we requested, refusing.");
 231         }
 232
 233         /* LO_FLAGS_DIRECT_IO is a flags we need to configure via explicit ioctls. */
 234         if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_DIRECT_IO))
 235                 if (ioctl(fd, LOOP_SET_DIRECT_IO, 1UL) < 0)
 236                         log_debug_errno(errno, "Failed to enable direct IO mode, ignoring: %m");
 237
 238         return loop_configure_verify_direct_io(fd, c);
 239 }
 240
 241 static int loop_configure(
 242                 int nr,
 243                 int open_flags,
 244                 int lock_op,
 245                 const struct loop_config *c,
 246                 LoopDevice **ret) {
 247
 248         static bool loop_configure_broken = false;
 249
 250         _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
 251         _cleanup_(cleanup_clear_loop_close) int loop_with_fd = -EBADF; /* This must be declared before lock_fd. */
 252         _cleanup_close_ int fd = -EBADF, lock_fd = -EBADF;
 253         _cleanup_free_ char *node = NULL;
 254         uint64_t diskseq = 0;
 255         dev_t devno;
 256         int r;
 257
 258         assert(nr >= 0);
 259         assert(c);
 260         assert(ret);
 261
 262         if (asprintf(&node, "/dev/loop%i", nr) < 0)
 263                 return log_oom_debug();
 264
 265         r = sd_device_new_from_devname(&dev, node);
 266         if (r < 0)
 267                 return log_debug_errno(r, "Failed to create sd_device object for \"%s\": %m", node);
 268
 269         r = sd_device_get_devnum(dev, &devno);
 270         if (r < 0)
 271                 return log_device_debug_errno(dev, r, "Failed to get devnum: %m");
 272
 273         fd = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
 274         if (fd < 0)
 275                 return log_device_debug_errno(dev, fd, "Failed to open device: %m");
 276
 277         /* Let's lock the device before we do anything. We take the BSD lock on a second, separately opened
 278          * fd for the device. udev after all watches for close() events (specifically IN_CLOSE_WRITE) on
 279          * block devices to reprobe them, hence by having a separate fd we will later close() we can ensure
 280          * we trigger udev after everything is done. If we'd lock our own fd instead and keep it open for a
 281          * long time udev would possibly never run on it again, even though the fd is unlocked, simply
 282          * because we never close() it. It also has the nice benefit we can use the _cleanup_close_ logic to
 283          * automatically release the lock, after we are done. */
 284         lock_fd = open_lock_fd(fd, LOCK_EX);
 285         if (lock_fd < 0)
 286                 return log_device_debug_errno(dev, lock_fd, "Failed to acquire lock: %m");
 287
 288         log_device_debug(dev, "Acquired exclusive lock.");
 289
 290         /* Let's see if backing file is really unattached. Someone may already attach a backing file without
 291          * taking BSD lock. */
 292         r = loop_is_bound(fd);
 293         if (r < 0)
 294                 return log_device_debug_errno(dev, r, "Failed to check if the loopback block device is bound: %m");
 295         if (r > 0)
 296                 return log_device_debug_errno(dev, SYNTHETIC_ERRNO(EBUSY),
 297                                               "The loopback block device is already bound, ignoring.");
 298
 299         /* Let's see if the device is really detached, i.e. currently has no associated partition block
 300          * devices. On various kernels (such as 5.8) it is possible to have a loopback block device that
 301          * superficially is detached but still has partition block devices associated for it. Let's then
 302          * manually remove the partitions via BLKPG, and tell the caller we did that via EUCLEAN, so they try
 303          * again. */
 304         r = block_device_remove_all_partitions(dev, fd);
 305         if (r < 0)
 306                 return log_device_debug_errno(dev, r, "Failed to remove partitions on the loopback block device: %m");
 307         if (r > 0)
 308                 /* Removed all partitions. Let's report this to the caller, to try again, and count this as
 309                  * an attempt. */
 310                 return log_device_debug_errno(dev, SYNTHETIC_ERRNO(EUCLEAN),
 311                                               "Removed partitions on the loopback block device.");
 312
 313         if (!loop_configure_broken) {
 314                 if (ioctl(fd, LOOP_CONFIGURE, c) < 0) {
 315                         /* Do fallback only if LOOP_CONFIGURE is not supported, propagate all other
 316                          * errors. Note that the kernel is weird: non-existing ioctls currently return EINVAL
 317                          * rather than ENOTTY on loopback block devices. They should fix that in the kernel,
 318                          * but in the meantime we accept both here. */
 319                         if (!ERRNO_IS_NOT_SUPPORTED(errno) && errno != EINVAL)
 320                                 return log_device_debug_errno(dev, errno, "ioctl(LOOP_CONFIGURE) failed: %m");
 321
 322                         loop_configure_broken = true;
 323                 } else {
 324                         loop_with_fd = TAKE_FD(fd);
 325
 326                         r = loop_configure_verify(loop_with_fd, c);
 327                         if (r < 0)
 328                                 return log_device_debug_errno(dev, r, "Failed to verify if loopback block device is correctly configured: %m");
 329                         if (r == 0) {
 330                                 /* LOOP_CONFIGURE doesn't work. Remember that. */
 331                                 loop_configure_broken = true;
 332
 333                                 /* We return EBUSY here instead of retrying immediately with LOOP_SET_FD,
 334                                  * because LOOP_CLR_FD is async: if the operation cannot be executed right
 335                                  * away it just sets the autoclear flag on the device. This means there's a
 336                                  * good chance we cannot actually reuse the loopback device right-away. Hence
 337                                  * let's assume it's busy, avoid the trouble and let the calling loop call us
 338                                  * again with a new, likely unused device. */
 339                                 return -EBUSY;
 340                         }
 341                 }
 342         }
 343
 344         if (loop_configure_broken) {
 345                 if (ioctl(fd, LOOP_SET_FD, c->fd) < 0)
 346                         return log_device_debug_errno(dev, errno, "ioctl(LOOP_SET_FD) failed: %m");
 347
 348                 loop_with_fd = TAKE_FD(fd);
 349
 350                 r = loop_configure_fallback(loop_with_fd, c);
 351                 if (r < 0)
 352                         return r;
 353         }
 354
 355         r = fd_get_diskseq(loop_with_fd, &diskseq);
 356         if (r < 0 && r != -EOPNOTSUPP)
 357                 return log_device_debug_errno(dev, r, "Failed to get diskseq: %m");
 358
 359         switch (lock_op & ~LOCK_NB) {
 360         case LOCK_EX: /* Already in effect */
 361                 break;
 362         case LOCK_SH: /* Downgrade */
 363                 if (flock(lock_fd, lock_op) < 0)
 364                         return log_device_debug_errno(dev, errno, "Failed to downgrade lock level: %m");
 365                 break;
 366         case LOCK_UN: /* Release */
 367                 lock_fd = safe_close(lock_fd);
 368                 break;
 369         default:
 370                 assert_not_reached();
 371         }
 372
 373         uint64_t device_size;
 374         r = blockdev_get_device_size(loop_with_fd, &device_size);
 375         if (r < 0)
 376                 return log_device_debug_errno(dev, r, "Failed to get loopback device size: %m");
 377
 378         LoopDevice *d = new(LoopDevice, 1);
 379         if (!d)
 380                 return log_oom_debug();
 381
 382         *d = (LoopDevice) {
 383                 .n_ref = 1,
 384                 .fd = TAKE_FD(loop_with_fd),
 385                 .lock_fd = TAKE_FD(lock_fd),
 386                 .node = TAKE_PTR(node),
 387                 .nr = nr,
 388                 .devno = devno,
 389                 .dev = TAKE_PTR(dev),
 390                 .diskseq = diskseq,
 391                 .sector_size = c->block_size,
 392                 .device_size = device_size,
 393                 .created = true,
 394         };
 395
 396         *ret = TAKE_PTR(d);
 397         return 0;
 398 }
 399
 400 static int loop_device_make_internal(
 401                 const char *path,
 402                 int fd,
 403                 int open_flags,
 404                 uint64_t offset,
 405                 uint64_t size,
 406                 uint32_t sector_size,
 407                 uint32_t loop_flags,
 408                 int lock_op,
 409                 LoopDevice **ret) {
 410
 411         _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
 412         _cleanup_close_ int reopened_fd = -EBADF, control = -EBADF;
 413         _cleanup_free_ char *backing_file = NULL;
 414         struct loop_config config;
 415         int r, f_flags;
 416         struct stat st;
 417
 418         assert(ret);
 419         assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
 420
 421         if (fstat(ASSERT_FD(fd), &st) < 0)
 422                 return -errno;
 423
 424         if (S_ISBLK(st.st_mode)) {
 425                 if (offset == 0 && IN_SET(size, 0, UINT64_MAX))
 426                         /* If this is already a block device and we are supposed to cover the whole of it
 427                          * then store an fd to the original open device node — and do not actually create an
 428                          * unnecessary loopback device for it. */
 429                         return loop_device_open_from_fd(fd, open_flags, lock_op, ret);
 430         } else {
 431                 r = stat_verify_regular(&st);
 432                 if (r < 0)
 433                         return r;
 434         }
 435
 436         if (path) {
 437                 r = path_make_absolute_cwd(path, &backing_file);
 438                 if (r < 0)
 439                         return r;
 440
 441                 path_simplify(backing_file);
 442         } else {
 443                 r = fd_get_path(fd, &backing_file);
 444                 if (r < 0)
 445                         return r;
 446         }
 447
 448         f_flags = fcntl(fd, F_GETFL);
 449         if (f_flags < 0)
 450                 return -errno;
 451
 452         if (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) != FLAGS_SET(f_flags, O_DIRECT)) {
 453                 /* If LO_FLAGS_DIRECT_IO is requested, then make sure we have the fd open with O_DIRECT, as
 454                  * that's required. Conversely, if it's off require that O_DIRECT is off too (that's because
 455                  * new kernels will implicitly enable LO_FLAGS_DIRECT_IO if O_DIRECT is set).
 456                  *
 457                  * Our intention here is that LO_FLAGS_DIRECT_IO is the primary knob, and O_DIRECT derived
 458                  * from that automatically. */
 459
 460                 reopened_fd = fd_reopen(fd, (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0)|O_CLOEXEC|O_NONBLOCK|open_flags);
 461                 if (reopened_fd < 0) {
 462                         if (!FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO))
 463                                 return log_debug_errno(reopened_fd, "Failed to reopen file descriptor without O_DIRECT: %m");
 464
 465                         /* Some file systems might not support O_DIRECT, let's gracefully continue without it then. */
 466                         log_debug_errno(reopened_fd, "Failed to enable O_DIRECT for backing file descriptor for loopback device. Continuing without.");
 467                         loop_flags &= ~LO_FLAGS_DIRECT_IO;
 468                 } else
 469                         fd = reopened_fd; /* From now on, operate on our new O_DIRECT fd */
 470         }
 471
 472         control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
 473         if (control < 0)
 474                 return -errno;
 475
 476         if (sector_size == 0)
 477                 /* If no sector size is specified, default to the classic default */
 478                 sector_size = 512;
 479         else if (sector_size == UINT32_MAX) {
 480
 481                 if (S_ISBLK(st.st_mode))
 482                         /* If the sector size is specified as UINT32_MAX we'll propagate the sector size of
 483                          * the underlying block device. */
 484                         r = blockdev_get_sector_size(fd, &sector_size);
 485                 else {
 486                         _cleanup_close_ int non_direct_io_fd = -EBADF;
 487                         int probe_fd;
 488
 489                         assert(S_ISREG(st.st_mode));
 490
 491                         /* If sector size is specified as UINT32_MAX, we'll try to probe the right sector
 492                          * size of the image in question by looking for the GPT partition header at various
 493                          * offsets. This of course only works if the image already has a disk label.
 494                          *
 495                          * So here we actually want to read the file contents ourselves. This is quite likely
 496                          * not going to work if we managed to enable O_DIRECT, because in such a case there
 497                          * are some pretty strict alignment requirements to offset, size and target, but
 498                          * there's no way to query what alignment specifically is actually required. Hence,
 499                          * let's avoid the mess, and temporarily open an fd without O_DIRECT for the probing
 500                          * logic. */
 501
 502                         if (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO)) {
 503                                 non_direct_io_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
 504                                 if (non_direct_io_fd < 0)
 505                                         return non_direct_io_fd;
 506
 507                                 probe_fd = non_direct_io_fd;
 508                         } else
 509                                 probe_fd = fd;
 510
 511                         r = probe_sector_size(probe_fd, &sector_size);
 512                 }
 513                 if (r < 0)
 514                         return r;
 515         }
 516
 517         config = (struct loop_config) {
 518                 .fd = fd,
 519                 .block_size = sector_size,
 520                 .info = {
 521                         /* Use the specified flags, but configure the read-only flag from the open flags, and force autoclear */
 522                         .lo_flags = (loop_flags & ~LO_FLAGS_READ_ONLY) | ((open_flags & O_ACCMODE) == O_RDONLY ? LO_FLAGS_READ_ONLY : 0) | LO_FLAGS_AUTOCLEAR,
 523                         .lo_offset = offset,
 524                         .lo_sizelimit = size == UINT64_MAX ? 0 : size,
 525                 },
 526         };
 527
 528         /* Loop around LOOP_CTL_GET_FREE, since at the moment we attempt to open the returned device it might
 529          * be gone already, taken by somebody else racing against us. */
 530         for (unsigned n_attempts = 0;;) {
 531                 usec_t usec;
 532                 int nr;
 533
 534                 /* Let's take a lock on the control device first. On a busy system, where many programs
 535                  * attempt to allocate a loopback device at the same time, we might otherwise keep looping
 536                  * around relatively heavy operations: asking for a free loopback device, then opening it,
 537                  * validating it, attaching something to it. Let's serialize this whole operation, to make
 538                  * unnecessary busywork less likely. Note that this is just something we do to optimize our
 539                  * own code (and whoever else decides to use LOCK_EX locks for this), taking this lock is not
 540                  * necessary, it just means it's less likely we have to iterate through this loop again and
 541                  * again if our own code races against our own code.
 542                  *
 543                  * Note: our lock protocol is to take the /dev/loop-control lock first, and the block device
 544                  * lock second, if both are taken, and always in this order, to avoid ABBA locking issues. */
 545                 if (flock(control, LOCK_EX) < 0)
 546                         return -errno;
 547
 548                 nr = ioctl(control, LOOP_CTL_GET_FREE);
 549                 if (nr < 0)
 550                         return -errno;
 551
 552                 r = loop_configure(nr, open_flags, lock_op, &config, &d);
 553                 if (r >= 0)
 554                         break;
 555
 556                 /* -ENODEV or friends: Somebody might've gotten the same number from the kernel, used the
 557                  * device, and called LOOP_CTL_REMOVE on it. Let's retry with a new number.
 558                  * -EBUSY: a file descriptor is already bound to the loopback block device.
 559                  * -EUCLEAN: some left-over partition devices that were cleaned up.
 560                  * -ENOANO: we tried to use LO_FLAGS_DIRECT_IO but the kernel rejected it. */
 561                 if (!ERRNO_IS_DEVICE_ABSENT(r) && !IN_SET(r, -EBUSY, -EUCLEAN, -ENOANO))
 562                         return r;
 563
 564                 /* OK, this didn't work, let's try again a bit later, but first release the lock on the
 565                  * control device */
 566                 if (flock(control, LOCK_UN) < 0)
 567                         return -errno;
 568
 569                 if (++n_attempts >= 64) /* Give up eventually */
 570                         return -EBUSY;
 571
 572                 /* If we failed to enable direct IO mode, let's retry without it. We restart the process as
 573                  * on some combination of kernel version and storage filesystem, the kernel is very unhappy
 574                  * about a failed DIRECT_IO enablement and throws I/O errors. */
 575                 if (r == -ENOANO && FLAGS_SET(config.info.lo_flags, LO_FLAGS_DIRECT_IO)) {
 576                         config.info.lo_flags &= ~LO_FLAGS_DIRECT_IO;
 577                         open_flags &= ~O_DIRECT;
 578
 579                         int non_direct_io_fd = fd_reopen(config.fd, O_CLOEXEC|O_NONBLOCK|open_flags);
 580                         if (non_direct_io_fd < 0)
 581                                 return log_debug_errno(
 582                                                 non_direct_io_fd,
 583                                                 "Failed to reopen file descriptor without O_DIRECT: %m");
 584
 585                         safe_close(reopened_fd);
 586                         fd = config.fd = /* For cleanups */ reopened_fd = non_direct_io_fd;
 587                 }
 588
 589                 /* Wait some random time, to make collision less likely. Let's pick a random time in the
 590                  * range 0ms…250ms, linearly scaled by the number of failed attempts. */
 591                 usec = random_u64_range(UINT64_C(10) * USEC_PER_MSEC +
 592                                         UINT64_C(240) * USEC_PER_MSEC * n_attempts/64);
 593                 log_debug("Trying again after %s.", FORMAT_TIMESPAN(usec, USEC_PER_MSEC));
 594                 (void) usleep_safe(usec);
 595         }
 596
 597         d->backing_file = TAKE_PTR(backing_file);
 598         d->backing_inode = st.st_ino;
 599         d->backing_devno = st.st_dev;
 600
 601         log_debug("Successfully acquired %s, devno=%u:%u, nr=%i, diskseq=%" PRIu64,
 602                   d->node,
 603                   major(d->devno), minor(d->devno),
 604                   d->nr,
 605                   d->diskseq);
 606
 607         *ret = TAKE_PTR(d);
 608         return 0;
 609 }
 610
 611 static uint32_t loop_flags_mangle(uint32_t loop_flags) {
 612         int r;
 613
 614         r = getenv_bool("SYSTEMD_LOOP_DIRECT_IO");
 615         if (r < 0 && r != -ENXIO)
 616                 log_debug_errno(r, "Failed to parse $SYSTEMD_LOOP_DIRECT_IO, ignoring: %m");
 617
 618         return UPDATE_FLAG(loop_flags, LO_FLAGS_DIRECT_IO, r != 0); /* Turn on LO_FLAGS_DIRECT_IO by default, unless explicitly configured to off. */
 619 }
 620
 621 int loop_device_make(
 622                 int fd,
 623                 int open_flags,
 624                 uint64_t offset,
 625                 uint64_t size,
 626                 uint32_t sector_size,
 627                 uint32_t loop_flags,
 628                 int lock_op,
 629                 LoopDevice **ret) {
 630
 631         assert(fd >= 0);
 632         assert(ret);
 633
 634         return loop_device_make_internal(
 635                         NULL,
 636                         fd,
 637                         open_flags,
 638                         offset,
 639                         size,
 640                         sector_size,
 641                         loop_flags_mangle(loop_flags),
 642                         lock_op,
 643                         ret);
 644 }
 645
 646 int loop_device_make_by_path_at(
 647                 int dir_fd,
 648                 const char *path,
 649                 int open_flags,
 650                 uint32_t sector_size,
 651                 uint32_t loop_flags,
 652                 int lock_op,
 653                 LoopDevice **ret) {
 654
 655         int r, basic_flags, direct_flags, rdwr_flags;
 656         _cleanup_close_ int fd = -EBADF;
 657         bool direct = false;
 658
 659         assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
 660         assert(path);
 661         assert(ret);
 662         assert(open_flags < 0 || IN_SET(open_flags, O_RDWR, O_RDONLY));
 663
 664         /* Passing < 0 as open_flags here means we'll try to open the device writable if we can, retrying
 665          * read-only if we cannot. */
 666
 667         loop_flags = loop_flags_mangle(loop_flags);
 668
 669         /* Let's open with O_DIRECT if we can. But not all file systems support that, hence fall back to
 670          * non-O_DIRECT mode automatically, if it fails. */
 671
 672         basic_flags = O_CLOEXEC|O_NONBLOCK|O_NOCTTY;
 673         direct_flags = FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0;
 674         rdwr_flags = open_flags >= 0 ? open_flags : O_RDWR;
 675
 676         fd = xopenat(dir_fd, path, basic_flags|direct_flags|rdwr_flags);
 677         if (fd < 0 && direct_flags != 0) /* If we had O_DIRECT on, and things failed with that, let's immediately try again without */
 678                 fd = xopenat(dir_fd, path, basic_flags|rdwr_flags);
 679         else
 680                 direct = direct_flags != 0;
 681         if (fd < 0) {
 682                 r = fd;
 683
 684                 /* Retry read-only? */
 685                 if (open_flags >= 0 || !(ERRNO_IS_PRIVILEGE(r) || r == -EROFS))
 686                         return r;
 687
 688                 fd = xopenat(dir_fd, path, basic_flags|direct_flags|O_RDONLY);
 689                 if (fd < 0 && direct_flags != 0) /* as above */
 690                         fd = xopenat(dir_fd, path, basic_flags|O_RDONLY);
 691                 else
 692                         direct = direct_flags != 0;
 693                 if (fd < 0)
 694                         return r; /* Propagate original error */
 695
 696                 open_flags = O_RDONLY;
 697         } else if (open_flags < 0)
 698                 open_flags = O_RDWR;
 699
 700         log_debug("Opened '%s' in %s access mode%s, with O_DIRECT %s%s.",
 701                   path,
 702                   open_flags == O_RDWR ? "O_RDWR" : "O_RDONLY",
 703                   open_flags != rdwr_flags ? " (O_RDWR was requested but not allowed)" : "",
 704                   direct ? "enabled" : "disabled",
 705                   direct != (direct_flags != 0) ? " (O_DIRECT was requested but not supported)" : "");
 706
 707         return loop_device_make_internal(
 708                         dir_fd == AT_FDCWD ? path : NULL,
 709                         fd,
 710                         open_flags,
 711                         /* offset = */ 0,
 712                         /* size = */ 0,
 713                         sector_size,
 714                         loop_flags,
 715                         lock_op,
 716                         ret);
 717 }
 718
 719 int loop_device_make_by_path_memory(
 720                 const char *path,
 721                 int open_flags,
 722                 uint32_t sector_size,
 723                 uint32_t loop_flags,
 724                 int lock_op,
 725                 LoopDevice **ret) {
 726
 727         _cleanup_close_ int fd = -EBADF, mfd = -EBADF;
 728         _cleanup_free_ char *fn = NULL;
 729         struct stat st;
 730         int r;
 731
 732         assert(path);
 733         assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
 734         assert(ret);
 735
 736         loop_flags &= ~LO_FLAGS_DIRECT_IO; /* memfds don't support O_DIRECT, hence LO_FLAGS_DIRECT_IO can't be used either */
 737
 738         fd = open(path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|O_RDONLY);
 739         if (fd < 0)
 740                 return -errno;
 741
 742         if (fstat(fd, &st) < 0)
 743                 return -errno;
 744
 745         if (!S_ISREG(st.st_mode) && !S_ISBLK(st.st_mode))
 746                 return -EBADF;
 747
 748         r = path_extract_filename(path, &fn);
 749         if (r < 0)
 750                 return r;
 751
 752         mfd = memfd_clone_fd(fd, fn, open_flags|O_CLOEXEC);
 753         if (mfd < 0)
 754                 return mfd;
 755
 756         fd = safe_close(fd); /* Let's close the original early */
 757
 758         return loop_device_make_internal(NULL, mfd, open_flags, 0, 0, sector_size, loop_flags, lock_op, ret);
 759 }
 760
 761 static LoopDevice* loop_device_free(LoopDevice *d) {
 762         _cleanup_close_ int control = -EBADF;
 763         int r;
 764
 765         if (!d)
 766                 return NULL;
 767
 768         /* Release any lock we might have on the device first. We want to open+lock the /dev/loop-control
 769          * device below, but our lock protocol says that if both control and block device locks are taken,
 770          * the control lock needs to be taken first, the block device lock second — in order to avoid ABBA
 771          * locking issues. Moreover, we want to issue LOOP_CLR_FD on the block device further down, and that
 772          * would fail if we had another fd open to the device. */
 773         d->lock_fd = safe_close(d->lock_fd);
 774
 775         /* Let's open the control device early, and lock it, so that we can release our block device and
 776          * delete it in a synchronized fashion, and allocators won't needlessly see the block device as free
 777          * while we are about to delete it. */
 778         if (!LOOP_DEVICE_IS_FOREIGN(d) && !d->relinquished) {
 779                 control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
 780                 if (control < 0)
 781                         log_debug_errno(errno, "Failed to open loop control device, cannot remove loop device '%s', ignoring: %m", strna(d->node));
 782                 else if (flock(control, LOCK_EX) < 0)
 783                         log_debug_errno(errno, "Failed to lock loop control device, ignoring: %m");
 784         }
 785
 786         /* Then let's release the loopback block device */
 787         if (d->fd >= 0) {
 788                 /* Implicitly sync the device, since otherwise in-flight blocks might not get written */
 789                 if (fsync(d->fd) < 0)
 790                         log_debug_errno(errno, "Failed to sync loop block device, ignoring: %m");
 791
 792                 if (!LOOP_DEVICE_IS_FOREIGN(d) && !d->relinquished) {
 793                         /* We are supposed to clear the loopback device. Let's do this synchronously: lock
 794                          * the device, manually remove all partitions and then clear it. This should ensure
 795                          * udev doesn't concurrently access the devices, and we can be reasonably sure that
 796                          * once we are done here the device is cleared and all its partition children
 797                          * removed. Note that we lock our primary device fd here (and not a separate locking
 798                          * fd, as we do during allocation, since we want to keep the lock all the way through
 799                          * the LOOP_CLR_FD, but that call would fail if we had more than one fd open.) */
 800
 801                         if (flock(d->fd, LOCK_EX) < 0)
 802                                 log_debug_errno(errno, "Failed to lock loop block device, ignoring: %m");
 803
 804                         r = block_device_remove_all_partitions(d->dev, d->fd);
 805                         if (r < 0)
 806                                 log_debug_errno(r, "Failed to remove partitions of loopback block device, ignoring: %m");
 807
 808                         if (ioctl(d->fd, LOOP_CLR_FD) < 0)
 809                                 log_debug_errno(errno, "Failed to clear loop device, ignoring: %m");
 810                 }
 811
 812                 safe_close(d->fd);
 813         }
 814
 815         /* Now that the block device is released, let's also try to remove it */
 816         if (control >= 0) {
 817                 useconds_t delay = 5 * USEC_PER_MSEC;  /* A total delay of 5090 ms between 39 attempts,
 818                                                         * (4*5 + 5*10 + 5*20 + … + 3*640) = 5090. */
 819
 820                 for (unsigned attempt = 1;; attempt++) {
 821                         if (ioctl(control, LOOP_CTL_REMOVE, d->nr) >= 0)
 822                                 break;
 823                         if (errno != EBUSY || attempt > 38) {
 824                                 log_debug_errno(errno, "Failed to remove device %s: %m", strna(d->node));
 825                                 break;
 826                         }
 827                         if (attempt % 5 == 0) {
 828                                 log_debug("Device is still busy after %u attempts…", attempt);
 829                                 delay *= 2;
 830                         }
 831
 832                         (void) usleep_safe(delay);
 833                 }
 834         }
 835
 836         free(d->node);
 837         sd_device_unref(d->dev);
 838         free(d->backing_file);
 839         return mfree(d);
 840 }
 841
 842 DEFINE_TRIVIAL_REF_UNREF_FUNC(LoopDevice, loop_device, loop_device_free);
 843
 844 void loop_device_relinquish(LoopDevice *d) {
 845         assert(d);
 846
 847         /* Don't attempt to clean up the loop device anymore from this point on. Leave the clean-ing up to the kernel
 848          * itself, using the loop device "auto-clear" logic we already turned on when creating the device. */
 849
 850         d->relinquished = true;
 851 }
 852
 853 void loop_device_unrelinquish(LoopDevice *d) {
 854         assert(d);
 855         d->relinquished = false;
 856 }
 857
 858 int loop_device_open(
 859                 sd_device *dev,
 860                 int open_flags,
 861                 int lock_op,
 862                 LoopDevice **ret) {
 863
 864         _cleanup_close_ int fd = -EBADF, lock_fd = -EBADF;
 865         _cleanup_free_ char *node = NULL, *backing_file = NULL;
 866         dev_t devnum, backing_devno = 0;
 867         struct loop_info64 info;
 868         ino_t backing_inode = 0;
 869         uint64_t diskseq = 0;
 870         LoopDevice *d;
 871         const char *s;
 872         int r, nr = -1;
 873
 874         assert(dev);
 875         assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
 876         assert(ret);
 877
 878         /* Even if fd is provided through the argument in loop_device_open_from_fd(), we reopen the inode
 879          * here, instead of keeping just a dup() clone of it around, since we want to ensure that the
 880          * O_DIRECT flag of the handle we keep is off, we have our own file index, and have the right
 881          * read/write mode in effect. */
 882         fd = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
 883         if (fd < 0)
 884                 return fd;
 885
 886         if ((lock_op & ~LOCK_NB) != LOCK_UN) {
 887                 lock_fd = open_lock_fd(fd, lock_op);
 888                 if (lock_fd < 0)
 889                         return lock_fd;
 890         }
 891
 892         if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0) {
 893 #if HAVE_VALGRIND_MEMCHECK_H
 894                 /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
 895                 VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
 896 #endif
 897                 nr = info.lo_number;
 898
 899                 if (sd_device_get_sysattr_value(dev, "loop/backing_file", &s) >= 0) {
 900                         backing_file = strdup(s);
 901                         if (!backing_file)
 902                                 return -ENOMEM;
 903                 }
 904
 905                 backing_devno = info.lo_device;
 906                 backing_inode = info.lo_inode;
 907         }
 908
 909         r = fd_get_diskseq(fd, &diskseq);
 910         if (r < 0 && r != -EOPNOTSUPP)
 911                 return r;
 912
 913         uint32_t sector_size;
 914         r = blockdev_get_sector_size(fd, &sector_size);
 915         if (r < 0)
 916                 return r;
 917
 918         uint64_t device_size;
 919         r = blockdev_get_device_size(fd, &device_size);
 920         if (r < 0)
 921                 return r;
 922
 923         r = sd_device_get_devnum(dev, &devnum);
 924         if (r < 0)
 925                 return r;
 926
 927         r = sd_device_get_devname(dev, &s);
 928         if (r < 0)
 929                 return r;
 930
 931         node = strdup(s);
 932         if (!node)
 933                 return -ENOMEM;
 934
 935         d = new(LoopDevice, 1);
 936         if (!d)
 937                 return -ENOMEM;
 938
 939         *d = (LoopDevice) {
 940                 .n_ref = 1,
 941                 .fd = TAKE_FD(fd),
 942                 .lock_fd = TAKE_FD(lock_fd),
 943                 .nr = nr,
 944                 .node = TAKE_PTR(node),
 945                 .dev = sd_device_ref(dev),
 946                 .backing_file = TAKE_PTR(backing_file),
 947                 .backing_inode = backing_inode,
 948                 .backing_devno = backing_devno,
 949                 .relinquished = true, /* It's not ours, don't try to destroy it when this object is freed */
 950                 .devno = devnum,
 951                 .diskseq = diskseq,
 952                 .sector_size = sector_size,
 953                 .device_size = device_size,
 954                 .created = false,
 955         };
 956
 957         *ret = d;
 958         return 0;
 959 }
 960
 961 int loop_device_open_from_fd(
 962                 int fd,
 963                 int open_flags,
 964                 int lock_op,
 965                 LoopDevice **ret) {
 966
 967         _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
 968         int r;
 969
 970         r = block_device_new_from_fd(ASSERT_FD(fd), 0, &dev);
 971         if (r < 0)
 972                 return r;
 973
 974         return loop_device_open(dev, open_flags, lock_op, ret);
 975 }
 976
 977 int loop_device_open_from_path(
 978                 const char *path,
 979                 int open_flags,
 980                 int lock_op,
 981                 LoopDevice **ret) {
 982
 983         _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
 984         int r;
 985
 986         assert(path);
 987
 988         r = block_device_new_from_path(path, 0, &dev);
 989         if (r < 0)
 990                 return r;
 991
 992         return loop_device_open(dev, open_flags, lock_op, ret);
 993 }
 994
 995 static int resize_partition(int partition_fd, uint64_t offset, uint64_t size) {
 996         char sysfs[STRLEN("/sys/dev/block/:/partition") + 2*DECIMAL_STR_MAX(dev_t) + 1];
 997         _cleanup_free_ char *buffer = NULL;
 998         uint64_t current_offset, current_size, partno;
 999         _cleanup_close_ int whole_fd = -EBADF;
1000         struct stat st;
1001         dev_t devno;
1002         int r;
1003
1004         /* Resizes the partition the loopback device refer to (assuming it refers to one instead of an actual
1005          * loopback device), and changes the offset, if needed. This is a fancy wrapper around
1006          * BLKPG_RESIZE_PARTITION. */
1007
1008         if (fstat(ASSERT_FD(partition_fd), &st) < 0)
1009                 return -errno;
1010
1011         assert(S_ISBLK(st.st_mode));
1012
1013         xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/partition", DEVNUM_FORMAT_VAL(st.st_rdev));
1014         r = read_one_line_file(sysfs, &buffer);
1015         if (r == -ENOENT) /* not a partition, cannot resize */
1016                 return -ENOTTY;
1017         if (r < 0)
1018                 return r;
1019         r = safe_atou64(buffer, &partno);
1020         if (r < 0)
1021                 return r;
1022
1023         xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/start", DEVNUM_FORMAT_VAL(st.st_rdev));
1024
1025         buffer = mfree(buffer);
1026         r = read_one_line_file(sysfs, &buffer);
1027         if (r < 0)
1028                 return r;
1029         r = safe_atou64(buffer, &current_offset);
1030         if (r < 0)
1031                 return r;
1032         if (current_offset > UINT64_MAX/512U)
1033                 return -EINVAL;
1034         current_offset *= 512U;
1035
1036         r = blockdev_get_device_size(partition_fd, &current_size);
1037         if (r < 0)
1038                 return r;
1039
1040         if (size == UINT64_MAX && offset == UINT64_MAX)
1041                 return 0;
1042         if (current_size == size && current_offset == offset)
1043                 return 0;
1044
1045         xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/../dev", DEVNUM_FORMAT_VAL(st.st_rdev));
1046
1047         buffer = mfree(buffer);
1048         r = read_one_line_file(sysfs, &buffer);
1049         if (r < 0)
1050                 return r;
1051         r = parse_devnum(buffer, &devno);
1052         if (r < 0)
1053                 return r;
1054
1055         whole_fd = r = device_open_from_devnum(S_IFBLK, devno, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY, NULL);
1056         if (r < 0)
1057                 return r;
1058
1059         return block_device_resize_partition(
1060                         whole_fd,
1061                         partno,
1062                         offset == UINT64_MAX ? current_offset : offset,
1063                         size == UINT64_MAX ? current_size : size);
1064 }
1065
1066 int loop_device_refresh_size(LoopDevice *d, uint64_t offset, uint64_t size) {
1067         struct loop_info64 info;
1068
1069         assert(d);
1070         assert(d->fd >= 0);
1071
1072         /* Changes the offset/start of the loop device relative to the beginning of the underlying file or
1073          * block device. If this loop device actually refers to a partition and not a loopback device, we'll
1074          * try to adjust the partition offsets instead.
1075          *
1076          * If either offset or size is UINT64_MAX we won't change that parameter. */
1077
1078         if (d->nr < 0) /* not a loopback device */
1079                 return resize_partition(d->fd, offset, size);
1080
1081         if (ioctl(d->fd, LOOP_GET_STATUS64, &info) < 0)
1082                 return -errno;
1083
1084 #if HAVE_VALGRIND_MEMCHECK_H
1085         /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
1086         VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
1087 #endif
1088
1089         if (size == UINT64_MAX && offset == UINT64_MAX)
1090                 return 0;
1091         if (info.lo_sizelimit == size && info.lo_offset == offset)
1092                 return 0;
1093
1094         if (size != UINT64_MAX)
1095                 info.lo_sizelimit = size;
1096         if (offset != UINT64_MAX)
1097                 info.lo_offset = offset;
1098
1099         return RET_NERRNO(ioctl(d->fd, LOOP_SET_STATUS64, &info));
1100 }
1101
1102 int loop_device_flock(LoopDevice *d, int operation) {
1103         assert(IN_SET(operation & ~LOCK_NB, LOCK_UN, LOCK_SH, LOCK_EX));
1104         assert(d);
1105
1106         /* When unlocking just close the lock fd */
1107         if ((operation & ~LOCK_NB) == LOCK_UN) {
1108                 d->lock_fd = safe_close(d->lock_fd);
1109                 return 0;
1110         }
1111
1112         /* If we had no lock fd so far, create one and lock it right-away */
1113         if (d->lock_fd < 0) {
1114                 d->lock_fd = open_lock_fd(ASSERT_FD(d->fd), operation);
1115                 if (d->lock_fd < 0)
1116                         return d->lock_fd;
1117
1118                 return 0;
1119         }
1120
1121         /* Otherwise change the current lock mode on the existing fd */
1122         return RET_NERRNO(flock(d->lock_fd, operation));
1123 }
1124
1125 int loop_device_sync(LoopDevice *d) {
1126         assert(d);
1127
1128         /* We also do this implicitly in loop_device_unref(). Doing this explicitly here has the benefit that
1129          * we can check the return value though. */
1130
1131         return RET_NERRNO(fsync(ASSERT_FD(d->fd)));
1132 }
1133
1134 int loop_device_set_autoclear(LoopDevice *d, bool autoclear) {
1135         struct loop_info64 info;
1136
1137         assert(d);
1138
1139         if (ioctl(ASSERT_FD(d->fd), LOOP_GET_STATUS64, &info) < 0)
1140                 return -errno;
1141
1142         if (autoclear == FLAGS_SET(info.lo_flags, LO_FLAGS_AUTOCLEAR))
1143                 return 0;
1144
1145         SET_FLAG(info.lo_flags, LO_FLAGS_AUTOCLEAR, autoclear);
1146
1147         if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0)
1148                 return -errno;
1149
1150         return 1;
1151 }
1152
1153 int loop_device_set_filename(LoopDevice *d, const char *name) {
1154         struct loop_info64 info;
1155
1156         assert(d);
1157
1158         /* Sets the .lo_file_name of the loopback device. This is supposed to contain the path to the file
1159          * backing the block device, but is actually just a free-form string you can pass to the kernel. Most
1160          * tools that actually care for the backing file path use the sysfs attribute file loop/backing_file
1161          * which is a kernel generated string, subject to file system namespaces and such.
1162          *
1163          * .lo_file_name is useful since userspace can select it freely when creating a loopback block
1164          * device, and we can use it for /dev/disk/by-loop-ref/ symlinks, and similar, so that apps can
1165          * recognize their own loopback files. */
1166
1167         if (name && strlen(name) >= sizeof(info.lo_file_name))
1168                 return -ENOBUFS;
1169
1170         if (ioctl(ASSERT_FD(d->fd), LOOP_GET_STATUS64, &info) < 0)
1171                 return -errno;
1172
1173         if (strneq((char*) info.lo_file_name, strempty(name), sizeof(info.lo_file_name)))
1174                 return 0;
1175
1176         if (name) {
1177                 strncpy((char*) info.lo_file_name, name, sizeof(info.lo_file_name)-1);
1178                 info.lo_file_name[sizeof(info.lo_file_name)-1] = 0;
1179         } else
1180                 memzero(info.lo_file_name, sizeof(info.lo_file_name));
1181
1182         if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0)
1183                 return -errno;
1184
1185         return 1;
1186 }