src/shared/loop-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
   2
   3 #if HAVE_VALGRIND_MEMCHECK_H
   4 #include <valgrind/memcheck.h>
   5 #endif
   6
   7 #include <errno.h>
   8 #include <fcntl.h>
   9 #include <linux/blkpg.h>
  10 #include <linux/fs.h>
  11 #include <linux/loop.h>
  12 #include <sys/file.h>
  13 #include <sys/ioctl.h>
  14 #include <unistd.h>
  15
  16 #include "sd-device.h"
  17
  18 #include "alloc-util.h"
  19 #include "blockdev-util.h"
  20 #include "data-fd-util.h"
  21 #include "device-util.h"
  22 #include "devnum-util.h"
  23 #include "dissect-image.h"
  24 #include "env-util.h"
  25 #include "errno-util.h"
  26 #include "fd-util.h"
  27 #include "fs-util.h"
  28 #include "fileio.h"
  29 #include "loop-util.h"
  30 #include "missing_loop.h"
  31 #include "parse-util.h"
  32 #include "path-util.h"
  33 #include "random-util.h"
  34 #include "stat-util.h"
  35 #include "stdio-util.h"
  36 #include "string-util.h"
  37 #include "tmpfile-util.h"
  38
  39 static void cleanup_clear_loop_close(int *fd) {
  40         if (*fd < 0)
  41                 return;
  42
  43         (void) ioctl(*fd, LOOP_CLR_FD);
  44         (void) safe_close(*fd);
  45 }
  46
  47 static int loop_is_bound(int fd) {
  48         struct loop_info64 info;
  49
  50         if (ioctl(ASSERT_FD(fd), LOOP_GET_STATUS64, &info) < 0) {
  51                 if (errno == ENXIO)
  52                         return false; /* not bound! */
  53
  54                 return -errno;
  55         }
  56
  57         return true; /* bound! */
  58 }
  59
  60 static int get_current_uevent_seqnum(uint64_t *ret) {
  61         _cleanup_free_ char *p = NULL;
  62         int r;
  63
  64         r = read_full_virtual_file("/sys/kernel/uevent_seqnum", &p, NULL);
  65         if (r < 0)
  66                 return log_debug_errno(r, "Failed to read current uevent sequence number: %m");
  67
  68         r = safe_atou64(strstrip(p), ret);
  69         if (r < 0)
  70                 return log_debug_errno(r, "Failed to parse current uevent sequence number: %s", p);
  71
  72         return 0;
  73 }
  74
  75 static int open_lock_fd(int primary_fd, int operation) {
  76         _cleanup_close_ int lock_fd = -EBADF;
  77
  78         assert(IN_SET(operation & ~LOCK_NB, LOCK_SH, LOCK_EX));
  79
  80         lock_fd = fd_reopen(ASSERT_FD(primary_fd), O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
  81         if (lock_fd < 0)
  82                 return lock_fd;
  83
  84         if (flock(lock_fd, operation) < 0)
  85                 return -errno;
  86
  87         return TAKE_FD(lock_fd);
  88 }
  89
  90 static int loop_configure_verify_direct_io(int fd, const struct loop_config *c) {
  91         assert(fd >= 0);
  92         assert(c);
  93
  94         if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_DIRECT_IO)) {
  95                 struct loop_info64 info;
  96
  97                 if (ioctl(fd, LOOP_GET_STATUS64, &info) < 0)
  98                         return log_debug_errno(errno, "Failed to issue LOOP_GET_STATUS64: %m");
  99
 100 #if HAVE_VALGRIND_MEMCHECK_H
 101                 VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
 102 #endif
 103
 104                 /* On older kernels (<= 5.3) it was necessary to set the block size of the loopback block
 105                  * device to the logical block size of the underlying file system. Since there was no nice
 106                  * way to query the value, we are not bothering to do this however. On newer kernels the
 107                  * block size is propagated automatically and does not require intervention from us. We'll
 108                  * check here if enabling direct IO worked, to make this easily debuggable however.
 109                  *
 110                  * (Should anyone really care and actually wants direct IO on old kernels: it might be worth
 111                  * enabling direct IO with iteratively larger block sizes until it eventually works.) */
 112                 if (!FLAGS_SET(info.lo_flags, LO_FLAGS_DIRECT_IO))
 113                         log_debug("Could not enable direct IO mode, proceeding in buffered IO mode.");
 114         }
 115
 116         return 0;
 117 }
 118
 119 static int loop_configure_verify(int fd, const struct loop_config *c) {
 120         bool broken = false;
 121         int r;
 122
 123         assert(fd >= 0);
 124         assert(c);
 125
 126         if (c->block_size != 0) {
 127                 uint32_t ssz;
 128
 129                 r = blockdev_get_sector_size(fd, &ssz);
 130                 if (r < 0)
 131                         return r;
 132
 133                 if (ssz != c->block_size) {
 134                         log_debug("LOOP_CONFIGURE didn't honour requested block size %" PRIu32 ", got %" PRIu32 " instead. Ignoring.", c->block_size, ssz);
 135                         broken = true;
 136                 }
 137         }
 138
 139         if (c->info.lo_sizelimit != 0) {
 140                 /* Kernel 5.8 vanilla doesn't properly propagate the size limit into the
 141                  * block device. If it's used, let's immediately check if it had the desired
 142                  * effect hence. And if not use classic LOOP_SET_STATUS64. */
 143                 uint64_t z;
 144
 145                 if (ioctl(fd, BLKGETSIZE64, &z) < 0)
 146                         return -errno;
 147
 148                 if (z != c->info.lo_sizelimit) {
 149                         log_debug("LOOP_CONFIGURE is broken, doesn't honour .info.lo_sizelimit. Falling back to LOOP_SET_STATUS64.");
 150                         broken = true;
 151                 }
 152         }
 153
 154         if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_PARTSCAN)) {
 155                 /* Kernel 5.8 vanilla doesn't properly propagate the partition scanning flag
 156                  * into the block device. Let's hence verify if things work correctly here
 157                  * before returning. */
 158
 159                 r = blockdev_partscan_enabled(fd);
 160                 if (r < 0)
 161                         return r;
 162                 if (r == 0) {
 163                         log_debug("LOOP_CONFIGURE is broken, doesn't honour LO_FLAGS_PARTSCAN. Falling back to LOOP_SET_STATUS64.");
 164                         broken = true;
 165                 }
 166         }
 167
 168         r = loop_configure_verify_direct_io(fd, c);
 169         if (r < 0)
 170                 return r;
 171
 172         return !broken;
 173 }
 174
 175 static int loop_configure_fallback(int fd, const struct loop_config *c) {
 176         struct loop_info64 info_copy;
 177         int r;
 178
 179         assert(fd >= 0);
 180         assert(c);
 181
 182         /* Only some of the flags LOOP_CONFIGURE can set are also settable via LOOP_SET_STATUS64, hence mask
 183          * them out. */
 184         info_copy = c->info;
 185         info_copy.lo_flags &= LOOP_SET_STATUS_SETTABLE_FLAGS;
 186
 187         /* Since kernel commit 5db470e229e22b7eda6e23b5566e532c96fb5bc3 (kernel v5.0) the LOOP_SET_STATUS64
 188          * ioctl can return EAGAIN in case we change the info.lo_offset field, if someone else is accessing the
 189          * block device while we try to reconfigure it. This is a pretty common case, since udev might
 190          * instantly start probing the device as soon as we attach an fd to it. Hence handle it in two ways:
 191          * first, let's take the BSD lock to ensure that udev will not step in between the point in
 192          * time where we attach the fd and where we reconfigure the device. Secondly, let's wait 50ms on
 193          * EAGAIN and retry. The former should be an efficient mechanism to avoid we have to wait 50ms
 194          * needlessly if we are just racing against udev. The latter is protection against all other cases,
 195          * i.e. peers that do not take the BSD lock. */
 196
 197         for (unsigned n_attempts = 0;;) {
 198                 if (ioctl(fd, LOOP_SET_STATUS64, &info_copy) >= 0)
 199                         break;
 200
 201                 if (errno != EAGAIN || ++n_attempts >= 64)
 202                         return log_debug_errno(errno, "Failed to configure loopback block device: %m");
 203
 204                 /* Sleep some random time, but at least 10ms, at most 250ms. Increase the delay the more
 205                  * failed attempts we see */
 206                 (void) usleep_safe(UINT64_C(10) * USEC_PER_MSEC +
 207                               random_u64_range(UINT64_C(240) * USEC_PER_MSEC * n_attempts/64));
 208         }
 209
 210         /* Work around a kernel bug, where changing offset/size of the loopback device doesn't correctly
 211          * invalidate the buffer cache. For details see:
 212          *
 213          *     https://android.googlesource.com/platform/system/apex/+/bef74542fbbb4cd629793f4efee8e0053b360570
 214          *
 215          * This was fixed in kernel 5.0, see:
 216          *
 217          *     https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5db470e229e22b7eda6e23b5566e532c96fb5bc3
 218          *
 219          * We'll run the work-around here in the legacy LOOP_SET_STATUS64 codepath. In the LOOP_CONFIGURE
 220          * codepath above it should not be necessary. */
 221         if (c->info.lo_offset != 0 || c->info.lo_sizelimit != 0)
 222                 if (ioctl(fd, BLKFLSBUF, 0) < 0)
 223                         log_debug_errno(errno, "Failed to issue BLKFLSBUF ioctl, ignoring: %m");
 224
 225         /* If a block size is requested then try to configure it. If that doesn't work, ignore errors, but
 226          * afterwards, let's validate what is in effect, and if it doesn't match what we want, fail */
 227         if (c->block_size != 0) {
 228                 uint32_t ssz;
 229
 230                 if (ioctl(fd, LOOP_SET_BLOCK_SIZE, (unsigned long) c->block_size) < 0)
 231                         log_debug_errno(errno, "Failed to set sector size, ignoring: %m");
 232
 233                 r = blockdev_get_sector_size(fd, &ssz);
 234                 if (r < 0)
 235                         return log_debug_errno(r, "Failed to read sector size: %m");
 236                 if (ssz != c->block_size)
 237                         return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Sector size of loopback device doesn't match what we requested, refusing.");
 238         }
 239
 240         /* LO_FLAGS_DIRECT_IO is a flags we need to configure via explicit ioctls. */
 241         if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_DIRECT_IO))
 242                 if (ioctl(fd, LOOP_SET_DIRECT_IO, 1UL) < 0)
 243                         log_debug_errno(errno, "Failed to enable direct IO mode, ignoring: %m");
 244
 245         return loop_configure_verify_direct_io(fd, c);
 246 }
 247
 248 static int loop_configure(
 249                 int nr,
 250                 int open_flags,
 251                 int lock_op,
 252                 const struct loop_config *c,
 253                 LoopDevice **ret) {
 254
 255         static bool loop_configure_broken = false;
 256
 257         _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
 258         _cleanup_(cleanup_clear_loop_close) int loop_with_fd = -EBADF; /* This must be declared before lock_fd. */
 259         _cleanup_close_ int fd = -EBADF, lock_fd = -EBADF;
 260         _cleanup_free_ char *node = NULL;
 261         uint64_t diskseq = 0, seqnum = UINT64_MAX;
 262         usec_t timestamp = USEC_INFINITY;
 263         dev_t devno;
 264         int r;
 265
 266         assert(nr >= 0);
 267         assert(c);
 268         assert(ret);
 269
 270         if (asprintf(&node, "/dev/loop%i", nr) < 0)
 271                 return log_oom_debug();
 272
 273         r = sd_device_new_from_devname(&dev, node);
 274         if (r < 0)
 275                 return log_debug_errno(r, "Failed to create sd_device object for \"%s\": %m", node);
 276
 277         r = sd_device_get_devnum(dev, &devno);
 278         if (r < 0)
 279                 return log_device_debug_errno(dev, r, "Failed to get devnum: %m");
 280
 281         fd = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
 282         if (fd < 0)
 283                 return log_device_debug_errno(dev, fd, "Failed to open device: %m");
 284
 285         /* Let's lock the device before we do anything. We take the BSD lock on a second, separately opened
 286          * fd for the device. udev after all watches for close() events (specifically IN_CLOSE_WRITE) on
 287          * block devices to reprobe them, hence by having a separate fd we will later close() we can ensure
 288          * we trigger udev after everything is done. If we'd lock our own fd instead and keep it open for a
 289          * long time udev would possibly never run on it again, even though the fd is unlocked, simply
 290          * because we never close() it. It also has the nice benefit we can use the _cleanup_close_ logic to
 291          * automatically release the lock, after we are done. */
 292         lock_fd = open_lock_fd(fd, LOCK_EX);
 293         if (lock_fd < 0)
 294                 return log_device_debug_errno(dev, lock_fd, "Failed to acquire lock: %m");
 295
 296         log_device_debug(dev, "Acquired exclusive lock.");
 297
 298         /* Let's see if backing file is really unattached. Someone may already attach a backing file without
 299          * taking BSD lock. */
 300         r = loop_is_bound(fd);
 301         if (r < 0)
 302                 return log_device_debug_errno(dev, r, "Failed to check if the loopback block device is bound: %m");
 303         if (r > 0)
 304                 return log_device_debug_errno(dev, SYNTHETIC_ERRNO(EBUSY),
 305                                               "The loopback block device is already bound, ignoring.");
 306
 307         /* Let's see if the device is really detached, i.e. currently has no associated partition block
 308          * devices. On various kernels (such as 5.8) it is possible to have a loopback block device that
 309          * superficially is detached but still has partition block devices associated for it. Let's then
 310          * manually remove the partitions via BLKPG, and tell the caller we did that via EUCLEAN, so they try
 311          * again. */
 312         r = block_device_remove_all_partitions(dev, fd);
 313         if (r < 0)
 314                 return log_device_debug_errno(dev, r, "Failed to remove partitions on the loopback block device: %m");
 315         if (r > 0)
 316                 /* Removed all partitions. Let's report this to the caller, to try again, and count this as
 317                  * an attempt. */
 318                 return log_device_debug_errno(dev, SYNTHETIC_ERRNO(EUCLEAN),
 319                                               "Removed partitions on the loopback block device.");
 320
 321         if (!loop_configure_broken) {
 322                 /* Acquire uevent seqnum immediately before attaching the loopback device. This allows
 323                  * callers to ignore all uevents with a seqnum before this one, if they need to associate
 324                  * uevent with this attachment. Doing so isn't race-free though, as uevents that happen in
 325                  * the window between this reading of the seqnum, and the LOOP_CONFIGURE call might still be
 326                  * mistaken as originating from our attachment, even though might be caused by an earlier
 327                  * use. But doing this at least shortens the race window a bit. */
 328                 r = get_current_uevent_seqnum(&seqnum);
 329                 if (r < 0)
 330                         return log_device_debug_errno(dev, r, "Failed to get the current uevent seqnum: %m");
 331
 332                 timestamp = now(CLOCK_MONOTONIC);
 333
 334                 if (ioctl(fd, LOOP_CONFIGURE, c) < 0) {
 335                         /* Do fallback only if LOOP_CONFIGURE is not supported, propagate all other
 336                          * errors. Note that the kernel is weird: non-existing ioctls currently return EINVAL
 337                          * rather than ENOTTY on loopback block devices. They should fix that in the kernel,
 338                          * but in the meantime we accept both here. */
 339                         if (!ERRNO_IS_NOT_SUPPORTED(errno) && errno != EINVAL)
 340                                 return log_device_debug_errno(dev, errno, "ioctl(LOOP_CONFIGURE) failed: %m");
 341
 342                         loop_configure_broken = true;
 343                 } else {
 344                         loop_with_fd = TAKE_FD(fd);
 345
 346                         r = loop_configure_verify(loop_with_fd, c);
 347                         if (r < 0)
 348                                 return log_device_debug_errno(dev, r, "Failed to verify if loopback block device is correctly configured: %m");
 349                         if (r == 0) {
 350                                 /* LOOP_CONFIGURE doesn't work. Remember that. */
 351                                 loop_configure_broken = true;
 352
 353                                 /* We return EBUSY here instead of retrying immediately with LOOP_SET_FD,
 354                                  * because LOOP_CLR_FD is async: if the operation cannot be executed right
 355                                  * away it just sets the autoclear flag on the device. This means there's a
 356                                  * good chance we cannot actually reuse the loopback device right-away. Hence
 357                                  * let's assume it's busy, avoid the trouble and let the calling loop call us
 358                                  * again with a new, likely unused device. */
 359                                 return -EBUSY;
 360                         }
 361                 }
 362         }
 363
 364         if (loop_configure_broken) {
 365                 /* Let's read the seqnum again, to shorten the window. */
 366                 r = get_current_uevent_seqnum(&seqnum);
 367                 if (r < 0)
 368                         return log_device_debug_errno(dev, r, "Failed to get the current uevent seqnum: %m");
 369
 370                 timestamp = now(CLOCK_MONOTONIC);
 371
 372                 if (ioctl(fd, LOOP_SET_FD, c->fd) < 0)
 373                         return log_device_debug_errno(dev, errno, "ioctl(LOOP_SET_FD) failed: %m");
 374
 375                 loop_with_fd = TAKE_FD(fd);
 376
 377                 r = loop_configure_fallback(loop_with_fd, c);
 378                 if (r < 0)
 379                         return r;
 380         }
 381
 382         r = fd_get_diskseq(loop_with_fd, &diskseq);
 383         if (r < 0 && r != -EOPNOTSUPP)
 384                 return log_device_debug_errno(dev, r, "Failed to get diskseq: %m");
 385
 386         switch (lock_op & ~LOCK_NB) {
 387         case LOCK_EX: /* Already in effect */
 388                 break;
 389         case LOCK_SH: /* Downgrade */
 390                 if (flock(lock_fd, lock_op) < 0)
 391                         return log_device_debug_errno(dev, errno, "Failed to downgrade lock level: %m");
 392                 break;
 393         case LOCK_UN: /* Release */
 394                 lock_fd = safe_close(lock_fd);
 395                 break;
 396         default:
 397                 assert_not_reached();
 398         }
 399
 400         LoopDevice *d = new(LoopDevice, 1);
 401         if (!d)
 402                 return log_oom_debug();
 403
 404         *d = (LoopDevice) {
 405                 .n_ref = 1,
 406                 .fd = TAKE_FD(loop_with_fd),
 407                 .lock_fd = TAKE_FD(lock_fd),
 408                 .node = TAKE_PTR(node),
 409                 .nr = nr,
 410                 .devno = devno,
 411                 .dev = TAKE_PTR(dev),
 412                 .diskseq = diskseq,
 413                 .uevent_seqnum_not_before = seqnum,
 414                 .timestamp_not_before = timestamp,
 415                 .sector_size = c->block_size,
 416         };
 417
 418         *ret = TAKE_PTR(d);
 419         return 0;
 420 }
 421
 422 static int loop_device_make_internal(
 423                 const char *path,
 424                 int fd,
 425                 int open_flags,
 426                 uint64_t offset,
 427                 uint64_t size,
 428                 uint32_t sector_size,
 429                 uint32_t loop_flags,
 430                 int lock_op,
 431                 LoopDevice **ret) {
 432
 433         _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
 434         _cleanup_close_ int direct_io_fd = -EBADF, control = -EBADF;
 435         _cleanup_free_ char *backing_file = NULL;
 436         struct loop_config config;
 437         int r, f_flags;
 438         struct stat st;
 439
 440         assert(ret);
 441         assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
 442
 443         if (fstat(ASSERT_FD(fd), &st) < 0)
 444                 return -errno;
 445
 446         if (S_ISBLK(st.st_mode)) {
 447                 if (offset == 0 && IN_SET(size, 0, UINT64_MAX))
 448                         /* If this is already a block device and we are supposed to cover the whole of it
 449                          * then store an fd to the original open device node — and do not actually create an
 450                          * unnecessary loopback device for it. */
 451                         return loop_device_open_from_fd(fd, open_flags, lock_op, ret);
 452         } else {
 453                 r = stat_verify_regular(&st);
 454                 if (r < 0)
 455                         return r;
 456         }
 457
 458         if (path) {
 459                 r = path_make_absolute_cwd(path, &backing_file);
 460                 if (r < 0)
 461                         return r;
 462
 463                 path_simplify(backing_file);
 464         } else {
 465                 r = fd_get_path(fd, &backing_file);
 466                 if (r < 0)
 467                         return r;
 468         }
 469
 470         f_flags = fcntl(fd, F_GETFL);
 471         if (f_flags < 0)
 472                 return -errno;
 473
 474         if (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) != FLAGS_SET(f_flags, O_DIRECT)) {
 475                 /* If LO_FLAGS_DIRECT_IO is requested, then make sure we have the fd open with O_DIRECT, as
 476                  * that's required. Conversely, if it's off require that O_DIRECT is off too (that's because
 477                  * new kernels will implicitly enable LO_FLAGS_DIRECT_IO if O_DIRECT is set).
 478                  *
 479                  * Our intention here is that LO_FLAGS_DIRECT_IO is the primary knob, and O_DIRECT derived
 480                  * from that automatically. */
 481
 482                 direct_io_fd = fd_reopen(fd, (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0)|O_CLOEXEC|O_NONBLOCK|open_flags);
 483                 if (direct_io_fd < 0) {
 484                         if (!FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO))
 485                                 return log_debug_errno(errno, "Failed to reopen file descriptor without O_DIRECT: %m");
 486
 487                         /* Some file systems might not support O_DIRECT, let's gracefully continue without it then. */
 488                         log_debug_errno(errno, "Failed to enable O_DIRECT for backing file descriptor for loopback device. Continuing without.");
 489                         loop_flags &= ~LO_FLAGS_DIRECT_IO;
 490                 } else
 491                         fd = direct_io_fd; /* From now on, operate on our new O_DIRECT fd */
 492         }
 493
 494         control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
 495         if (control < 0)
 496                 return -errno;
 497
 498         if (sector_size == 0)
 499                 /* If no sector size is specified, default to the classic default */
 500                 sector_size = 512;
 501         else if (sector_size == UINT32_MAX) {
 502
 503                 if (S_ISBLK(st.st_mode))
 504                         /* If the sector size is specified as UINT32_MAX we'll propagate the sector size of
 505                          * the underlying block device. */
 506                         r = blockdev_get_sector_size(fd, &sector_size);
 507                 else {
 508                         _cleanup_close_ int non_direct_io_fd = -EBADF;
 509                         int probe_fd;
 510
 511                         assert(S_ISREG(st.st_mode));
 512
 513                         /* If sector size is specified as UINT32_MAX, we'll try to probe the right sector
 514                          * size of the image in question by looking for the GPT partition header at various
 515                          * offsets. This of course only works if the image already has a disk label.
 516                          *
 517                          * So here we actually want to read the file contents ourselves. This is quite likely
 518                          * not going to work if we managed to enable O_DIRECT, because in such a case there
 519                          * are some pretty strict alignment requirements to offset, size and target, but
 520                          * there's no way to query what alignment specifically is actually required. Hence,
 521                          * let's avoid the mess, and temporarily open an fd without O_DIRECT for the probing
 522                          * logic. */
 523
 524                         if (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO)) {
 525                                 non_direct_io_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
 526                                 if (non_direct_io_fd < 0)
 527                                         return non_direct_io_fd;
 528
 529                                 probe_fd = non_direct_io_fd;
 530                         } else
 531                                 probe_fd = fd;
 532
 533                         r = probe_sector_size(probe_fd, &sector_size);
 534                 }
 535                 if (r < 0)
 536                         return r;
 537         }
 538
 539         config = (struct loop_config) {
 540                 .fd = fd,
 541                 .block_size = sector_size,
 542                 .info = {
 543                         /* Use the specified flags, but configure the read-only flag from the open flags, and force autoclear */
 544                         .lo_flags = (loop_flags & ~LO_FLAGS_READ_ONLY) | ((open_flags & O_ACCMODE) == O_RDONLY ? LO_FLAGS_READ_ONLY : 0) | LO_FLAGS_AUTOCLEAR,
 545                         .lo_offset = offset,
 546                         .lo_sizelimit = size == UINT64_MAX ? 0 : size,
 547                 },
 548         };
 549
 550         /* Loop around LOOP_CTL_GET_FREE, since at the moment we attempt to open the returned device it might
 551          * be gone already, taken by somebody else racing against us. */
 552         for (unsigned n_attempts = 0;;) {
 553                 usec_t usec;
 554                 int nr;
 555
 556                 /* Let's take a lock on the control device first. On a busy system, where many programs
 557                  * attempt to allocate a loopback device at the same time, we might otherwise keep looping
 558                  * around relatively heavy operations: asking for a free loopback device, then opening it,
 559                  * validating it, attaching something to it. Let's serialize this whole operation, to make
 560                  * unnecessary busywork less likely. Note that this is just something we do to optimize our
 561                  * own code (and whoever else decides to use LOCK_EX locks for this), taking this lock is not
 562                  * necessary, it just means it's less likely we have to iterate through this loop again and
 563                  * again if our own code races against our own code.
 564                  *
 565                  * Note: our lock protocol is to take the /dev/loop-control lock first, and the block device
 566                  * lock second, if both are taken, and always in this order, to avoid ABBA locking issues. */
 567                 if (flock(control, LOCK_EX) < 0)
 568                         return -errno;
 569
 570                 nr = ioctl(control, LOOP_CTL_GET_FREE);
 571                 if (nr < 0)
 572                         return -errno;
 573
 574                 r = loop_configure(nr, open_flags, lock_op, &config, &d);
 575                 if (r >= 0)
 576                         break;
 577
 578                 /* -ENODEV or friends: Somebody might've gotten the same number from the kernel, used the
 579                  * device, and called LOOP_CTL_REMOVE on it. Let's retry with a new number.
 580                  * -EBUSY: a file descriptor is already bound to the loopback block device.
 581                  * -EUCLEAN: some left-over partition devices that were cleaned up. */
 582                 if (!ERRNO_IS_DEVICE_ABSENT(r) && !IN_SET(r, -EBUSY, -EUCLEAN))
 583                         return r;
 584
 585                 /* OK, this didn't work, let's try again a bit later, but first release the lock on the
 586                  * control device */
 587                 if (flock(control, LOCK_UN) < 0)
 588                         return -errno;
 589
 590                 if (++n_attempts >= 64) /* Give up eventually */
 591                         return -EBUSY;
 592
 593                 /* Wait some random time, to make collision less likely. Let's pick a random time in the
 594                  * range 0ms…250ms, linearly scaled by the number of failed attempts. */
 595                 usec = random_u64_range(UINT64_C(10) * USEC_PER_MSEC +
 596                                         UINT64_C(240) * USEC_PER_MSEC * n_attempts/64);
 597                 log_debug("Trying again after %s.", FORMAT_TIMESPAN(usec, USEC_PER_MSEC));
 598                 (void) usleep_safe(usec);
 599         }
 600
 601         d->backing_file = TAKE_PTR(backing_file);
 602         d->backing_inode = st.st_ino;
 603         d->backing_devno = st.st_dev;
 604
 605         log_debug("Successfully acquired %s, devno=%u:%u, nr=%i, diskseq=%" PRIu64,
 606                   d->node,
 607                   major(d->devno), minor(d->devno),
 608                   d->nr,
 609                   d->diskseq);
 610
 611         *ret = TAKE_PTR(d);
 612         return 0;
 613 }
 614
 615 static uint32_t loop_flags_mangle(uint32_t loop_flags) {
 616         int r;
 617
 618         r = getenv_bool("SYSTEMD_LOOP_DIRECT_IO");
 619         if (r < 0 && r != -ENXIO)
 620                 log_debug_errno(r, "Failed to parse $SYSTEMD_LOOP_DIRECT_IO, ignoring: %m");
 621
 622         return UPDATE_FLAG(loop_flags, LO_FLAGS_DIRECT_IO, r != 0); /* Turn on LO_FLAGS_DIRECT_IO by default, unless explicitly configured to off. */
 623 }
 624
 625 int loop_device_make(
 626                 int fd,
 627                 int open_flags,
 628                 uint64_t offset,
 629                 uint64_t size,
 630                 uint32_t sector_size,
 631                 uint32_t loop_flags,
 632                 int lock_op,
 633                 LoopDevice **ret) {
 634
 635         assert(fd >= 0);
 636         assert(ret);
 637
 638         return loop_device_make_internal(
 639                         NULL,
 640                         fd,
 641                         open_flags,
 642                         offset,
 643                         size,
 644                         sector_size,
 645                         loop_flags_mangle(loop_flags),
 646                         lock_op,
 647                         ret);
 648 }
 649
 650 int loop_device_make_by_path_at(
 651                 int dir_fd,
 652                 const char *path,
 653                 int open_flags,
 654                 uint32_t sector_size,
 655                 uint32_t loop_flags,
 656                 int lock_op,
 657                 LoopDevice **ret) {
 658
 659         int r, basic_flags, direct_flags, rdwr_flags;
 660         _cleanup_close_ int fd = -EBADF;
 661         bool direct = false;
 662
 663         assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
 664         assert(path);
 665         assert(ret);
 666         assert(open_flags < 0 || IN_SET(open_flags, O_RDWR, O_RDONLY));
 667
 668         /* Passing < 0 as open_flags here means we'll try to open the device writable if we can, retrying
 669          * read-only if we cannot. */
 670
 671         loop_flags = loop_flags_mangle(loop_flags);
 672
 673         /* Let's open with O_DIRECT if we can. But not all file systems support that, hence fall back to
 674          * non-O_DIRECT mode automatically, if it fails. */
 675
 676         basic_flags = O_CLOEXEC|O_NONBLOCK|O_NOCTTY;
 677         direct_flags = FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0;
 678         rdwr_flags = open_flags >= 0 ? open_flags : O_RDWR;
 679
 680         fd = xopenat(dir_fd, path, basic_flags|direct_flags|rdwr_flags, /* xopen_flags = */ 0, /* mode = */ 0);
 681         if (fd < 0 && direct_flags != 0) /* If we had O_DIRECT on, and things failed with that, let's immediately try again without */
 682                 fd = xopenat(dir_fd, path, basic_flags|rdwr_flags, /* xopen_flags = */ 0, /* mode = */ 0);
 683         else
 684                 direct = direct_flags != 0;
 685         if (fd < 0) {
 686                 r = -errno;
 687
 688                 /* Retry read-only? */
 689                 if (open_flags >= 0 || !(ERRNO_IS_PRIVILEGE(r) || r == -EROFS))
 690                         return r;
 691
 692                 fd = xopenat(dir_fd, path, basic_flags|direct_flags|O_RDONLY, /* xopen_flags = */ 0, /* mode = */ 0);
 693                 if (fd < 0 && direct_flags != 0) /* as above */
 694                         fd = xopenat(dir_fd, path, basic_flags|O_RDONLY, /* xopen_flags = */ 0, /* mode = */ 0);
 695                 else
 696                         direct = direct_flags != 0;
 697                 if (fd < 0)
 698                         return r; /* Propagate original error */
 699
 700                 open_flags = O_RDONLY;
 701         } else if (open_flags < 0)
 702                 open_flags = O_RDWR;
 703
 704         log_debug("Opened '%s' in %s access mode%s, with O_DIRECT %s%s.",
 705                   path,
 706                   open_flags == O_RDWR ? "O_RDWR" : "O_RDONLY",
 707                   open_flags != rdwr_flags ? " (O_RDWR was requested but not allowed)" : "",
 708                   direct ? "enabled" : "disabled",
 709                   direct != (direct_flags != 0) ? " (O_DIRECT was requested but not supported)" : "");
 710
 711         return loop_device_make_internal(
 712                         dir_fd == AT_FDCWD ? path : NULL,
 713                         fd,
 714                         open_flags,
 715                         /* offset = */ 0,
 716                         /* size = */ 0,
 717                         sector_size,
 718                         loop_flags,
 719                         lock_op,
 720                         ret);
 721 }
 722
 723 int loop_device_make_by_path_memory(
 724                 const char *path,
 725                 int open_flags,
 726                 uint32_t sector_size,
 727                 uint32_t loop_flags,
 728                 int lock_op,
 729                 LoopDevice **ret) {
 730
 731         _cleanup_close_ int fd = -EBADF, mfd = -EBADF;
 732         _cleanup_free_ char *fn = NULL;
 733         struct stat st;
 734         int r;
 735
 736         assert(path);
 737         assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
 738         assert(ret);
 739
 740         loop_flags &= ~LO_FLAGS_DIRECT_IO; /* memfds don't support O_DIRECT, hence LO_FLAGS_DIRECT_IO can't be used either */
 741
 742         fd = open(path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|O_RDONLY);
 743         if (fd < 0)
 744                 return -errno;
 745
 746         if (fstat(fd, &st) < 0)
 747                 return -errno;
 748
 749         if (!S_ISREG(st.st_mode) && !S_ISBLK(st.st_mode))
 750                 return -EBADF;
 751
 752         r = path_extract_filename(path, &fn);
 753         if (r < 0)
 754                 return r;
 755
 756         mfd = memfd_clone_fd(fd, fn, open_flags|O_CLOEXEC);
 757         if (mfd < 0)
 758                 return mfd;
 759
 760         fd = safe_close(fd); /* Let's close the original early */
 761
 762         return loop_device_make_internal(NULL, mfd, open_flags, 0, 0, sector_size, loop_flags, lock_op, ret);
 763 }
 764
 765 static LoopDevice* loop_device_free(LoopDevice *d) {
 766         _cleanup_close_ int control = -EBADF;
 767         int r;
 768
 769         if (!d)
 770                 return NULL;
 771
 772         /* Release any lock we might have on the device first. We want to open+lock the /dev/loop-control
 773          * device below, but our lock protocol says that if both control and block device locks are taken,
 774          * the control lock needs to be taken first, the block device lock second — in order to avoid ABBA
 775          * locking issues. Moreover, we want to issue LOOP_CLR_FD on the block device further down, and that
 776          * would fail if we had another fd open to the device. */
 777         d->lock_fd = safe_close(d->lock_fd);
 778
 779         /* Let's open the control device early, and lock it, so that we can release our block device and
 780          * delete it in a synchronized fashion, and allocators won't needlessly see the block device as free
 781          * while we are about to delete it. */
 782         if (!LOOP_DEVICE_IS_FOREIGN(d) && !d->relinquished) {
 783                 control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
 784                 if (control < 0)
 785                         log_debug_errno(errno, "Failed to open loop control device, cannot remove loop device '%s', ignoring: %m", strna(d->node));
 786                 else if (flock(control, LOCK_EX) < 0)
 787                         log_debug_errno(errno, "Failed to lock loop control device, ignoring: %m");
 788         }
 789
 790         /* Then let's release the loopback block device */
 791         if (d->fd >= 0) {
 792                 /* Implicitly sync the device, since otherwise in-flight blocks might not get written */
 793                 if (fsync(d->fd) < 0)
 794                         log_debug_errno(errno, "Failed to sync loop block device, ignoring: %m");
 795
 796                 if (!LOOP_DEVICE_IS_FOREIGN(d) && !d->relinquished) {
 797                         /* We are supposed to clear the loopback device. Let's do this synchronously: lock
 798                          * the device, manually remove all partitions and then clear it. This should ensure
 799                          * udev doesn't concurrently access the devices, and we can be reasonably sure that
 800                          * once we are done here the device is cleared and all its partition children
 801                          * removed. Note that we lock our primary device fd here (and not a separate locking
 802                          * fd, as we do during allocation, since we want to keep the lock all the way through
 803                          * the LOOP_CLR_FD, but that call would fail if we had more than one fd open.) */
 804
 805                         if (flock(d->fd, LOCK_EX) < 0)
 806                                 log_debug_errno(errno, "Failed to lock loop block device, ignoring: %m");
 807
 808                         r = block_device_remove_all_partitions(d->dev, d->fd);
 809                         if (r < 0)
 810                                 log_debug_errno(r, "Failed to remove partitions of loopback block device, ignoring: %m");
 811
 812                         if (ioctl(d->fd, LOOP_CLR_FD) < 0)
 813                                 log_debug_errno(errno, "Failed to clear loop device, ignoring: %m");
 814                 }
 815
 816                 safe_close(d->fd);
 817         }
 818
 819         /* Now that the block device is released, let's also try to remove it */
 820         if (control >= 0) {
 821                 useconds_t delay = 5 * USEC_PER_MSEC;  /* A total delay of 5090 ms between 39 attempts,
 822                                                         * (4*5 + 5*10 + 5*20 + … + 3*640) = 5090. */
 823
 824                 for (unsigned attempt = 1;; attempt++) {
 825                         if (ioctl(control, LOOP_CTL_REMOVE, d->nr) >= 0)
 826                                 break;
 827                         if (errno != EBUSY || attempt > 38) {
 828                                 log_debug_errno(errno, "Failed to remove device %s: %m", strna(d->node));
 829                                 break;
 830                         }
 831                         if (attempt % 5 == 0) {
 832                                 log_debug("Device is still busy after %u attempts…", attempt);
 833                                 delay *= 2;
 834                         }
 835
 836                         (void) usleep_safe(delay);
 837                 }
 838         }
 839
 840         free(d->node);
 841         sd_device_unref(d->dev);
 842         free(d->backing_file);
 843         return mfree(d);
 844 }
 845
 846 DEFINE_TRIVIAL_REF_UNREF_FUNC(LoopDevice, loop_device, loop_device_free);
 847
 848 void loop_device_relinquish(LoopDevice *d) {
 849         assert(d);
 850
 851         /* Don't attempt to clean up the loop device anymore from this point on. Leave the clean-ing up to the kernel
 852          * itself, using the loop device "auto-clear" logic we already turned on when creating the device. */
 853
 854         d->relinquished = true;
 855 }
 856
 857 void loop_device_unrelinquish(LoopDevice *d) {
 858         assert(d);
 859         d->relinquished = false;
 860 }
 861
 862 int loop_device_open(
 863                 sd_device *dev,
 864                 int open_flags,
 865                 int lock_op,
 866                 LoopDevice **ret) {
 867
 868         _cleanup_close_ int fd = -EBADF, lock_fd = -EBADF;
 869         _cleanup_free_ char *node = NULL, *backing_file = NULL;
 870         dev_t devnum, backing_devno = 0;
 871         struct loop_info64 info;
 872         ino_t backing_inode = 0;
 873         uint64_t diskseq = 0;
 874         LoopDevice *d;
 875         const char *s;
 876         int r, nr = -1;
 877
 878         assert(dev);
 879         assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
 880         assert(ret);
 881
 882         /* Even if fd is provided through the argument in loop_device_open_from_fd(), we reopen the inode
 883          * here, instead of keeping just a dup() clone of it around, since we want to ensure that the
 884          * O_DIRECT flag of the handle we keep is off, we have our own file index, and have the right
 885          * read/write mode in effect. */
 886         fd = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
 887         if (fd < 0)
 888                 return fd;
 889
 890         if ((lock_op & ~LOCK_NB) != LOCK_UN) {
 891                 lock_fd = open_lock_fd(fd, lock_op);
 892                 if (lock_fd < 0)
 893                         return lock_fd;
 894         }
 895
 896         if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0) {
 897 #if HAVE_VALGRIND_MEMCHECK_H
 898                 /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
 899                 VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
 900 #endif
 901                 nr = info.lo_number;
 902
 903                 if (sd_device_get_sysattr_value(dev, "loop/backing_file", &s) >= 0) {
 904                         backing_file = strdup(s);
 905                         if (!backing_file)
 906                                 return -ENOMEM;
 907                 }
 908
 909                 backing_devno = info.lo_device;
 910                 backing_inode = info.lo_inode;
 911         }
 912
 913         r = fd_get_diskseq(fd, &diskseq);
 914         if (r < 0 && r != -EOPNOTSUPP)
 915                 return r;
 916
 917         uint32_t sector_size;
 918         r = blockdev_get_sector_size(fd, &sector_size);
 919         if (r < 0)
 920                 return r;
 921
 922         r = sd_device_get_devnum(dev, &devnum);
 923         if (r < 0)
 924                 return r;
 925
 926         r = sd_device_get_devname(dev, &s);
 927         if (r < 0)
 928                 return r;
 929
 930         node = strdup(s);
 931         if (!node)
 932                 return -ENOMEM;
 933
 934         d = new(LoopDevice, 1);
 935         if (!d)
 936                 return -ENOMEM;
 937
 938         *d = (LoopDevice) {
 939                 .n_ref = 1,
 940                 .fd = TAKE_FD(fd),
 941                 .lock_fd = TAKE_FD(lock_fd),
 942                 .nr = nr,
 943                 .node = TAKE_PTR(node),
 944                 .dev = sd_device_ref(dev),
 945                 .backing_file = TAKE_PTR(backing_file),
 946                 .backing_inode = backing_inode,
 947                 .backing_devno = backing_devno,
 948                 .relinquished = true, /* It's not ours, don't try to destroy it when this object is freed */
 949                 .devno = devnum,
 950                 .diskseq = diskseq,
 951                 .uevent_seqnum_not_before = UINT64_MAX,
 952                 .timestamp_not_before = USEC_INFINITY,
 953                 .sector_size = sector_size,
 954         };
 955
 956         *ret = d;
 957         return 0;
 958 }
 959
 960 int loop_device_open_from_fd(
 961                 int fd,
 962                 int open_flags,
 963                 int lock_op,
 964                 LoopDevice **ret) {
 965
 966         _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
 967         int r;
 968
 969         r = block_device_new_from_fd(ASSERT_FD(fd), 0, &dev);
 970         if (r < 0)
 971                 return r;
 972
 973         return loop_device_open(dev, open_flags, lock_op, ret);
 974 }
 975
 976 int loop_device_open_from_path(
 977                 const char *path,
 978                 int open_flags,
 979                 int lock_op,
 980                 LoopDevice **ret) {
 981
 982         _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
 983         int r;
 984
 985         assert(path);
 986
 987         r = block_device_new_from_path(path, 0, &dev);
 988         if (r < 0)
 989                 return r;
 990
 991         return loop_device_open(dev, open_flags, lock_op, ret);
 992 }
 993
 994 static int resize_partition(int partition_fd, uint64_t offset, uint64_t size) {
 995         char sysfs[STRLEN("/sys/dev/block/:/partition") + 2*DECIMAL_STR_MAX(dev_t) + 1];
 996         _cleanup_free_ char *buffer = NULL;
 997         uint64_t current_offset, current_size, partno;
 998         _cleanup_close_ int whole_fd = -EBADF;
 999         struct stat st;
1000         dev_t devno;
1001         int r;
1002
1003         /* Resizes the partition the loopback device refer to (assuming it refers to one instead of an actual
1004          * loopback device), and changes the offset, if needed. This is a fancy wrapper around
1005          * BLKPG_RESIZE_PARTITION. */
1006
1007         if (fstat(ASSERT_FD(partition_fd), &st) < 0)
1008                 return -errno;
1009
1010         assert(S_ISBLK(st.st_mode));
1011
1012         xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/partition", DEVNUM_FORMAT_VAL(st.st_rdev));
1013         r = read_one_line_file(sysfs, &buffer);
1014         if (r == -ENOENT) /* not a partition, cannot resize */
1015                 return -ENOTTY;
1016         if (r < 0)
1017                 return r;
1018         r = safe_atou64(buffer, &partno);
1019         if (r < 0)
1020                 return r;
1021
1022         xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/start", DEVNUM_FORMAT_VAL(st.st_rdev));
1023
1024         buffer = mfree(buffer);
1025         r = read_one_line_file(sysfs, &buffer);
1026         if (r < 0)
1027                 return r;
1028         r = safe_atou64(buffer, &current_offset);
1029         if (r < 0)
1030                 return r;
1031         if (current_offset > UINT64_MAX/512U)
1032                 return -EINVAL;
1033         current_offset *= 512U;
1034
1035         if (ioctl(partition_fd, BLKGETSIZE64, &current_size) < 0)
1036                 return -EINVAL;
1037
1038         if (size == UINT64_MAX && offset == UINT64_MAX)
1039                 return 0;
1040         if (current_size == size && current_offset == offset)
1041                 return 0;
1042
1043         xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/../dev", DEVNUM_FORMAT_VAL(st.st_rdev));
1044
1045         buffer = mfree(buffer);
1046         r = read_one_line_file(sysfs, &buffer);
1047         if (r < 0)
1048                 return r;
1049         r = parse_devnum(buffer, &devno);
1050         if (r < 0)
1051                 return r;
1052
1053         whole_fd = r = device_open_from_devnum(S_IFBLK, devno, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY, NULL);
1054         if (r < 0)
1055                 return r;
1056
1057         return block_device_resize_partition(
1058                         whole_fd,
1059                         partno,
1060                         offset == UINT64_MAX ? current_offset : offset,
1061                         size == UINT64_MAX ? current_size : size);
1062 }
1063
1064 int loop_device_refresh_size(LoopDevice *d, uint64_t offset, uint64_t size) {
1065         struct loop_info64 info;
1066
1067         assert(d);
1068         assert(d->fd >= 0);
1069
1070         /* Changes the offset/start of the loop device relative to the beginning of the underlying file or
1071          * block device. If this loop device actually refers to a partition and not a loopback device, we'll
1072          * try to adjust the partition offsets instead.
1073          *
1074          * If either offset or size is UINT64_MAX we won't change that parameter. */
1075
1076         if (d->nr < 0) /* not a loopback device */
1077                 return resize_partition(d->fd, offset, size);
1078
1079         if (ioctl(d->fd, LOOP_GET_STATUS64, &info) < 0)
1080                 return -errno;
1081
1082 #if HAVE_VALGRIND_MEMCHECK_H
1083         /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
1084         VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
1085 #endif
1086
1087         if (size == UINT64_MAX && offset == UINT64_MAX)
1088                 return 0;
1089         if (info.lo_sizelimit == size && info.lo_offset == offset)
1090                 return 0;
1091
1092         if (size != UINT64_MAX)
1093                 info.lo_sizelimit = size;
1094         if (offset != UINT64_MAX)
1095                 info.lo_offset = offset;
1096
1097         return RET_NERRNO(ioctl(d->fd, LOOP_SET_STATUS64, &info));
1098 }
1099
1100 int loop_device_flock(LoopDevice *d, int operation) {
1101         assert(IN_SET(operation & ~LOCK_NB, LOCK_UN, LOCK_SH, LOCK_EX));
1102         assert(d);
1103
1104         /* When unlocking just close the lock fd */
1105         if ((operation & ~LOCK_NB) == LOCK_UN) {
1106                 d->lock_fd = safe_close(d->lock_fd);
1107                 return 0;
1108         }
1109
1110         /* If we had no lock fd so far, create one and lock it right-away */
1111         if (d->lock_fd < 0) {
1112                 d->lock_fd = open_lock_fd(ASSERT_FD(d->fd), operation);
1113                 if (d->lock_fd < 0)
1114                         return d->lock_fd;
1115
1116                 return 0;
1117         }
1118
1119         /* Otherwise change the current lock mode on the existing fd */
1120         return RET_NERRNO(flock(d->lock_fd, operation));
1121 }
1122
1123 int loop_device_sync(LoopDevice *d) {
1124         assert(d);
1125
1126         /* We also do this implicitly in loop_device_unref(). Doing this explicitly here has the benefit that
1127          * we can check the return value though. */
1128
1129         return RET_NERRNO(fsync(ASSERT_FD(d->fd)));
1130 }
1131
1132 int loop_device_set_autoclear(LoopDevice *d, bool autoclear) {
1133         struct loop_info64 info;
1134
1135         assert(d);
1136
1137         if (ioctl(ASSERT_FD(d->fd), LOOP_GET_STATUS64, &info) < 0)
1138                 return -errno;
1139
1140         if (autoclear == FLAGS_SET(info.lo_flags, LO_FLAGS_AUTOCLEAR))
1141                 return 0;
1142
1143         SET_FLAG(info.lo_flags, LO_FLAGS_AUTOCLEAR, autoclear);
1144
1145         if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0)
1146                 return -errno;
1147
1148         return 1;
1149 }
1150
1151 int loop_device_set_filename(LoopDevice *d, const char *name) {
1152         struct loop_info64 info;
1153
1154         assert(d);
1155
1156         /* Sets the .lo_file_name of the loopback device. This is supposed to contain the path to the file
1157          * backing the block device, but is actually just a free-form string you can pass to the kernel. Most
1158          * tools that actually care for the backing file path use the sysfs attribute file loop/backing_file
1159          * which is a kernel generated string, subject to file system namespaces and such.
1160          *
1161          * .lo_file_name is useful since userspace can select it freely when creating a loopback block
1162          * device, and we can use it for /dev/disk/by-loop-ref/ symlinks, and similar, so that apps can
1163          * recognize their own loopback files. */
1164
1165         if (name && strlen(name) >= sizeof(info.lo_file_name))
1166                 return -ENOBUFS;
1167
1168         if (ioctl(ASSERT_FD(d->fd), LOOP_GET_STATUS64, &info) < 0)
1169                 return -errno;
1170
1171         if (strneq((char*) info.lo_file_name, strempty(name), sizeof(info.lo_file_name)))
1172                 return 0;
1173
1174         if (name) {
1175                 strncpy((char*) info.lo_file_name, name, sizeof(info.lo_file_name)-1);
1176                 info.lo_file_name[sizeof(info.lo_file_name)-1] = 0;
1177         } else
1178                 memzero(info.lo_file_name, sizeof(info.lo_file_name));
1179
1180         if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0)
1181                 return -errno;
1182
1183         return 1;
1184 }