src/shared/loop-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
   2
   3 #if HAVE_VALGRIND_MEMCHECK_H
   4 #include <valgrind/memcheck.h>
   5 #endif
   6
   7 #include <errno.h>
   8 #include <fcntl.h>
   9 #include <linux/blkpg.h>
  10 #include <linux/fs.h>
  11 #include <linux/loop.h>
  12 #include <sys/file.h>
  13 #include <sys/ioctl.h>
  14 #include <unistd.h>
  15
  16 #include "sd-device.h"
  17
  18 #include "alloc-util.h"
  19 #include "blockdev-util.h"
  20 #include "data-fd-util.h"
  21 #include "device-util.h"
  22 #include "devnum-util.h"
  23 #include "dissect-image.h"
  24 #include "env-util.h"
  25 #include "errno-util.h"
  26 #include "fd-util.h"
  27 #include "fs-util.h"
  28 #include "fileio.h"
  29 #include "loop-util.h"
  30 #include "missing_loop.h"
  31 #include "parse-util.h"
  32 #include "path-util.h"
  33 #include "random-util.h"
  34 #include "stat-util.h"
  35 #include "stdio-util.h"
  36 #include "string-util.h"
  37 #include "tmpfile-util.h"
  38
  39 static void cleanup_clear_loop_close(int *fd) {
  40         if (*fd < 0)
  41                 return;
  42
  43         (void) ioctl(*fd, LOOP_CLR_FD);
  44         (void) safe_close(*fd);
  45 }
  46
  47 static int loop_is_bound(int fd) {
  48         struct loop_info64 info;
  49
  50         if (ioctl(ASSERT_FD(fd), LOOP_GET_STATUS64, &info) < 0) {
  51                 if (errno == ENXIO)
  52                         return false; /* not bound! */
  53
  54                 return -errno;
  55         }
  56
  57         return true; /* bound! */
  58 }
  59
  60 static int get_current_uevent_seqnum(uint64_t *ret) {
  61         _cleanup_free_ char *p = NULL;
  62         int r;
  63
  64         r = read_full_virtual_file("/sys/kernel/uevent_seqnum", &p, NULL);
  65         if (r < 0)
  66                 return log_debug_errno(r, "Failed to read current uevent sequence number: %m");
  67
  68         r = safe_atou64(strstrip(p), ret);
  69         if (r < 0)
  70                 return log_debug_errno(r, "Failed to parse current uevent sequence number: %s", p);
  71
  72         return 0;
  73 }
  74
  75 static int open_lock_fd(int primary_fd, int operation) {
  76         _cleanup_close_ int lock_fd = -EBADF;
  77
  78         assert(IN_SET(operation & ~LOCK_NB, LOCK_SH, LOCK_EX));
  79
  80         lock_fd = fd_reopen(ASSERT_FD(primary_fd), O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
  81         if (lock_fd < 0)
  82                 return lock_fd;
  83
  84         if (flock(lock_fd, operation) < 0)
  85                 return -errno;
  86
  87         return TAKE_FD(lock_fd);
  88 }
  89
  90 static int loop_configure_verify_direct_io(int fd, const struct loop_config *c) {
  91         assert(fd >= 0);
  92         assert(c);
  93
  94         if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_DIRECT_IO)) {
  95                 struct loop_info64 info;
  96
  97                 if (ioctl(fd, LOOP_GET_STATUS64, &info) < 0)
  98                         return log_debug_errno(errno, "Failed to issue LOOP_GET_STATUS64: %m");
  99
 100 #if HAVE_VALGRIND_MEMCHECK_H
 101                 VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
 102 #endif
 103
 104                 /* On older kernels (<= 5.3) it was necessary to set the block size of the loopback block
 105                  * device to the logical block size of the underlying file system. Since there was no nice
 106                  * way to query the value, we are not bothering to do this however. On newer kernels the
 107                  * block size is propagated automatically and does not require intervention from us. We'll
 108                  * check here if enabling direct IO worked, to make this easily debuggable however.
 109                  *
 110                  * (Should anyone really care and actually wants direct IO on old kernels: it might be worth
 111                  * enabling direct IO with iteratively larger block sizes until it eventually works.)
 112                  *
 113                  * On older kernels (e.g.: 5.10) when this is attempted on a file stored on a dm-crypt
 114                  * backed partition the kernel will start returning I/O errors when accessing the mounted
 115                  * loop device, so return a recognizable error that causes the operation to be started
 116                  * from scratch without the LO_FLAGS_DIRECT_IO flag. */
 117                 if (!FLAGS_SET(info.lo_flags, LO_FLAGS_DIRECT_IO))
 118                         return log_debug_errno(
 119                                         SYNTHETIC_ERRNO(ENOANO),
 120                                         "Could not enable direct IO mode, retrying in buffered IO mode.");
 121         }
 122
 123         return 0;
 124 }
 125
 126 static int loop_configure_verify(int fd, const struct loop_config *c) {
 127         bool broken = false;
 128         int r;
 129
 130         assert(fd >= 0);
 131         assert(c);
 132
 133         if (c->block_size != 0) {
 134                 uint32_t ssz;
 135
 136                 r = blockdev_get_sector_size(fd, &ssz);
 137                 if (r < 0)
 138                         return r;
 139
 140                 if (ssz != c->block_size) {
 141                         log_debug("LOOP_CONFIGURE didn't honour requested block size %" PRIu32 ", got %" PRIu32 " instead. Ignoring.", c->block_size, ssz);
 142                         broken = true;
 143                 }
 144         }
 145
 146         if (c->info.lo_sizelimit != 0) {
 147                 /* Kernel 5.8 vanilla doesn't properly propagate the size limit into the
 148                  * block device. If it's used, let's immediately check if it had the desired
 149                  * effect hence. And if not use classic LOOP_SET_STATUS64. */
 150                 uint64_t z;
 151
 152                 if (ioctl(fd, BLKGETSIZE64, &z) < 0)
 153                         return -errno;
 154
 155                 if (z != c->info.lo_sizelimit) {
 156                         log_debug("LOOP_CONFIGURE is broken, doesn't honour .info.lo_sizelimit. Falling back to LOOP_SET_STATUS64.");
 157                         broken = true;
 158                 }
 159         }
 160
 161         if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_PARTSCAN)) {
 162                 /* Kernel 5.8 vanilla doesn't properly propagate the partition scanning flag
 163                  * into the block device. Let's hence verify if things work correctly here
 164                  * before returning. */
 165
 166                 r = blockdev_partscan_enabled(fd);
 167                 if (r < 0)
 168                         return r;
 169                 if (r == 0) {
 170                         log_debug("LOOP_CONFIGURE is broken, doesn't honour LO_FLAGS_PARTSCAN. Falling back to LOOP_SET_STATUS64.");
 171                         broken = true;
 172                 }
 173         }
 174
 175         r = loop_configure_verify_direct_io(fd, c);
 176         if (r < 0)
 177                 return r;
 178
 179         return !broken;
 180 }
 181
 182 static int loop_configure_fallback(int fd, const struct loop_config *c) {
 183         struct loop_info64 info_copy;
 184         int r;
 185
 186         assert(fd >= 0);
 187         assert(c);
 188
 189         /* Only some of the flags LOOP_CONFIGURE can set are also settable via LOOP_SET_STATUS64, hence mask
 190          * them out. */
 191         info_copy = c->info;
 192         info_copy.lo_flags &= LOOP_SET_STATUS_SETTABLE_FLAGS;
 193
 194         /* Since kernel commit 5db470e229e22b7eda6e23b5566e532c96fb5bc3 (kernel v5.0) the LOOP_SET_STATUS64
 195          * ioctl can return EAGAIN in case we change the info.lo_offset field, if someone else is accessing the
 196          * block device while we try to reconfigure it. This is a pretty common case, since udev might
 197          * instantly start probing the device as soon as we attach an fd to it. Hence handle it in two ways:
 198          * first, let's take the BSD lock to ensure that udev will not step in between the point in
 199          * time where we attach the fd and where we reconfigure the device. Secondly, let's wait 50ms on
 200          * EAGAIN and retry. The former should be an efficient mechanism to avoid we have to wait 50ms
 201          * needlessly if we are just racing against udev. The latter is protection against all other cases,
 202          * i.e. peers that do not take the BSD lock. */
 203
 204         for (unsigned n_attempts = 0;;) {
 205                 if (ioctl(fd, LOOP_SET_STATUS64, &info_copy) >= 0)
 206                         break;
 207
 208                 if (errno != EAGAIN || ++n_attempts >= 64)
 209                         return log_debug_errno(errno, "Failed to configure loopback block device: %m");
 210
 211                 /* Sleep some random time, but at least 10ms, at most 250ms. Increase the delay the more
 212                  * failed attempts we see */
 213                 (void) usleep_safe(UINT64_C(10) * USEC_PER_MSEC +
 214                               random_u64_range(UINT64_C(240) * USEC_PER_MSEC * n_attempts/64));
 215         }
 216
 217         /* Work around a kernel bug, where changing offset/size of the loopback device doesn't correctly
 218          * invalidate the buffer cache. For details see:
 219          *
 220          *     https://android.googlesource.com/platform/system/apex/+/bef74542fbbb4cd629793f4efee8e0053b360570
 221          *
 222          * This was fixed in kernel 5.0, see:
 223          *
 224          *     https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5db470e229e22b7eda6e23b5566e532c96fb5bc3
 225          *
 226          * We'll run the work-around here in the legacy LOOP_SET_STATUS64 codepath. In the LOOP_CONFIGURE
 227          * codepath above it should not be necessary. */
 228         if (c->info.lo_offset != 0 || c->info.lo_sizelimit != 0)
 229                 if (ioctl(fd, BLKFLSBUF, 0) < 0)
 230                         log_debug_errno(errno, "Failed to issue BLKFLSBUF ioctl, ignoring: %m");
 231
 232         /* If a block size is requested then try to configure it. If that doesn't work, ignore errors, but
 233          * afterwards, let's validate what is in effect, and if it doesn't match what we want, fail */
 234         if (c->block_size != 0) {
 235                 uint32_t ssz;
 236
 237                 if (ioctl(fd, LOOP_SET_BLOCK_SIZE, (unsigned long) c->block_size) < 0)
 238                         log_debug_errno(errno, "Failed to set sector size, ignoring: %m");
 239
 240                 r = blockdev_get_sector_size(fd, &ssz);
 241                 if (r < 0)
 242                         return log_debug_errno(r, "Failed to read sector size: %m");
 243                 if (ssz != c->block_size)
 244                         return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Sector size of loopback device doesn't match what we requested, refusing.");
 245         }
 246
 247         /* LO_FLAGS_DIRECT_IO is a flags we need to configure via explicit ioctls. */
 248         if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_DIRECT_IO))
 249                 if (ioctl(fd, LOOP_SET_DIRECT_IO, 1UL) < 0)
 250                         log_debug_errno(errno, "Failed to enable direct IO mode, ignoring: %m");
 251
 252         return loop_configure_verify_direct_io(fd, c);
 253 }
 254
 255 static int loop_configure(
 256                 int nr,
 257                 int open_flags,
 258                 int lock_op,
 259                 const struct loop_config *c,
 260                 LoopDevice **ret) {
 261
 262         static bool loop_configure_broken = false;
 263
 264         _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
 265         _cleanup_(cleanup_clear_loop_close) int loop_with_fd = -EBADF; /* This must be declared before lock_fd. */
 266         _cleanup_close_ int fd = -EBADF, lock_fd = -EBADF;
 267         _cleanup_free_ char *node = NULL;
 268         uint64_t diskseq = 0, seqnum = UINT64_MAX;
 269         usec_t timestamp = USEC_INFINITY;
 270         dev_t devno;
 271         int r;
 272
 273         assert(nr >= 0);
 274         assert(c);
 275         assert(ret);
 276
 277         if (asprintf(&node, "/dev/loop%i", nr) < 0)
 278                 return log_oom_debug();
 279
 280         r = sd_device_new_from_devname(&dev, node);
 281         if (r < 0)
 282                 return log_debug_errno(r, "Failed to create sd_device object for \"%s\": %m", node);
 283
 284         r = sd_device_get_devnum(dev, &devno);
 285         if (r < 0)
 286                 return log_device_debug_errno(dev, r, "Failed to get devnum: %m");
 287
 288         fd = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
 289         if (fd < 0)
 290                 return log_device_debug_errno(dev, fd, "Failed to open device: %m");
 291
 292         /* Let's lock the device before we do anything. We take the BSD lock on a second, separately opened
 293          * fd for the device. udev after all watches for close() events (specifically IN_CLOSE_WRITE) on
 294          * block devices to reprobe them, hence by having a separate fd we will later close() we can ensure
 295          * we trigger udev after everything is done. If we'd lock our own fd instead and keep it open for a
 296          * long time udev would possibly never run on it again, even though the fd is unlocked, simply
 297          * because we never close() it. It also has the nice benefit we can use the _cleanup_close_ logic to
 298          * automatically release the lock, after we are done. */
 299         lock_fd = open_lock_fd(fd, LOCK_EX);
 300         if (lock_fd < 0)
 301                 return log_device_debug_errno(dev, lock_fd, "Failed to acquire lock: %m");
 302
 303         log_device_debug(dev, "Acquired exclusive lock.");
 304
 305         /* Let's see if backing file is really unattached. Someone may already attach a backing file without
 306          * taking BSD lock. */
 307         r = loop_is_bound(fd);
 308         if (r < 0)
 309                 return log_device_debug_errno(dev, r, "Failed to check if the loopback block device is bound: %m");
 310         if (r > 0)
 311                 return log_device_debug_errno(dev, SYNTHETIC_ERRNO(EBUSY),
 312                                               "The loopback block device is already bound, ignoring.");
 313
 314         /* Let's see if the device is really detached, i.e. currently has no associated partition block
 315          * devices. On various kernels (such as 5.8) it is possible to have a loopback block device that
 316          * superficially is detached but still has partition block devices associated for it. Let's then
 317          * manually remove the partitions via BLKPG, and tell the caller we did that via EUCLEAN, so they try
 318          * again. */
 319         r = block_device_remove_all_partitions(dev, fd);
 320         if (r < 0)
 321                 return log_device_debug_errno(dev, r, "Failed to remove partitions on the loopback block device: %m");
 322         if (r > 0)
 323                 /* Removed all partitions. Let's report this to the caller, to try again, and count this as
 324                  * an attempt. */
 325                 return log_device_debug_errno(dev, SYNTHETIC_ERRNO(EUCLEAN),
 326                                               "Removed partitions on the loopback block device.");
 327
 328         if (!loop_configure_broken) {
 329                 /* Acquire uevent seqnum immediately before attaching the loopback device. This allows
 330                  * callers to ignore all uevents with a seqnum before this one, if they need to associate
 331                  * uevent with this attachment. Doing so isn't race-free though, as uevents that happen in
 332                  * the window between this reading of the seqnum, and the LOOP_CONFIGURE call might still be
 333                  * mistaken as originating from our attachment, even though might be caused by an earlier
 334                  * use. But doing this at least shortens the race window a bit. */
 335                 r = get_current_uevent_seqnum(&seqnum);
 336                 if (r < 0)
 337                         return log_device_debug_errno(dev, r, "Failed to get the current uevent seqnum: %m");
 338
 339                 timestamp = now(CLOCK_MONOTONIC);
 340
 341                 if (ioctl(fd, LOOP_CONFIGURE, c) < 0) {
 342                         /* Do fallback only if LOOP_CONFIGURE is not supported, propagate all other
 343                          * errors. Note that the kernel is weird: non-existing ioctls currently return EINVAL
 344                          * rather than ENOTTY on loopback block devices. They should fix that in the kernel,
 345                          * but in the meantime we accept both here. */
 346                         if (!ERRNO_IS_NOT_SUPPORTED(errno) && errno != EINVAL)
 347                                 return log_device_debug_errno(dev, errno, "ioctl(LOOP_CONFIGURE) failed: %m");
 348
 349                         loop_configure_broken = true;
 350                 } else {
 351                         loop_with_fd = TAKE_FD(fd);
 352
 353                         r = loop_configure_verify(loop_with_fd, c);
 354                         if (r < 0)
 355                                 return log_device_debug_errno(dev, r, "Failed to verify if loopback block device is correctly configured: %m");
 356                         if (r == 0) {
 357                                 /* LOOP_CONFIGURE doesn't work. Remember that. */
 358                                 loop_configure_broken = true;
 359
 360                                 /* We return EBUSY here instead of retrying immediately with LOOP_SET_FD,
 361                                  * because LOOP_CLR_FD is async: if the operation cannot be executed right
 362                                  * away it just sets the autoclear flag on the device. This means there's a
 363                                  * good chance we cannot actually reuse the loopback device right-away. Hence
 364                                  * let's assume it's busy, avoid the trouble and let the calling loop call us
 365                                  * again with a new, likely unused device. */
 366                                 return -EBUSY;
 367                         }
 368                 }
 369         }
 370
 371         if (loop_configure_broken) {
 372                 /* Let's read the seqnum again, to shorten the window. */
 373                 r = get_current_uevent_seqnum(&seqnum);
 374                 if (r < 0)
 375                         return log_device_debug_errno(dev, r, "Failed to get the current uevent seqnum: %m");
 376
 377                 timestamp = now(CLOCK_MONOTONIC);
 378
 379                 if (ioctl(fd, LOOP_SET_FD, c->fd) < 0)
 380                         return log_device_debug_errno(dev, errno, "ioctl(LOOP_SET_FD) failed: %m");
 381
 382                 loop_with_fd = TAKE_FD(fd);
 383
 384                 r = loop_configure_fallback(loop_with_fd, c);
 385                 if (r < 0)
 386                         return r;
 387         }
 388
 389         r = fd_get_diskseq(loop_with_fd, &diskseq);
 390         if (r < 0 && r != -EOPNOTSUPP)
 391                 return log_device_debug_errno(dev, r, "Failed to get diskseq: %m");
 392
 393         switch (lock_op & ~LOCK_NB) {
 394         case LOCK_EX: /* Already in effect */
 395                 break;
 396         case LOCK_SH: /* Downgrade */
 397                 if (flock(lock_fd, lock_op) < 0)
 398                         return log_device_debug_errno(dev, errno, "Failed to downgrade lock level: %m");
 399                 break;
 400         case LOCK_UN: /* Release */
 401                 lock_fd = safe_close(lock_fd);
 402                 break;
 403         default:
 404                 assert_not_reached();
 405         }
 406
 407         LoopDevice *d = new(LoopDevice, 1);
 408         if (!d)
 409                 return log_oom_debug();
 410
 411         *d = (LoopDevice) {
 412                 .n_ref = 1,
 413                 .fd = TAKE_FD(loop_with_fd),
 414                 .lock_fd = TAKE_FD(lock_fd),
 415                 .node = TAKE_PTR(node),
 416                 .nr = nr,
 417                 .devno = devno,
 418                 .dev = TAKE_PTR(dev),
 419                 .diskseq = diskseq,
 420                 .uevent_seqnum_not_before = seqnum,
 421                 .timestamp_not_before = timestamp,
 422                 .sector_size = c->block_size,
 423         };
 424
 425         *ret = TAKE_PTR(d);
 426         return 0;
 427 }
 428
 429 static int loop_device_make_internal(
 430                 const char *path,
 431                 int fd,
 432                 int open_flags,
 433                 uint64_t offset,
 434                 uint64_t size,
 435                 uint32_t sector_size,
 436                 uint32_t loop_flags,
 437                 int lock_op,
 438                 LoopDevice **ret) {
 439
 440         _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
 441         _cleanup_close_ int reopened_fd = -EBADF, control = -EBADF;
 442         _cleanup_free_ char *backing_file = NULL;
 443         struct loop_config config;
 444         int r, f_flags;
 445         struct stat st;
 446
 447         assert(ret);
 448         assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
 449
 450         if (fstat(ASSERT_FD(fd), &st) < 0)
 451                 return -errno;
 452
 453         if (S_ISBLK(st.st_mode)) {
 454                 if (offset == 0 && IN_SET(size, 0, UINT64_MAX))
 455                         /* If this is already a block device and we are supposed to cover the whole of it
 456                          * then store an fd to the original open device node — and do not actually create an
 457                          * unnecessary loopback device for it. */
 458                         return loop_device_open_from_fd(fd, open_flags, lock_op, ret);
 459         } else {
 460                 r = stat_verify_regular(&st);
 461                 if (r < 0)
 462                         return r;
 463         }
 464
 465         if (path) {
 466                 r = path_make_absolute_cwd(path, &backing_file);
 467                 if (r < 0)
 468                         return r;
 469
 470                 path_simplify(backing_file);
 471         } else {
 472                 r = fd_get_path(fd, &backing_file);
 473                 if (r < 0)
 474                         return r;
 475         }
 476
 477         f_flags = fcntl(fd, F_GETFL);
 478         if (f_flags < 0)
 479                 return -errno;
 480
 481         if (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) != FLAGS_SET(f_flags, O_DIRECT)) {
 482                 /* If LO_FLAGS_DIRECT_IO is requested, then make sure we have the fd open with O_DIRECT, as
 483                  * that's required. Conversely, if it's off require that O_DIRECT is off too (that's because
 484                  * new kernels will implicitly enable LO_FLAGS_DIRECT_IO if O_DIRECT is set).
 485                  *
 486                  * Our intention here is that LO_FLAGS_DIRECT_IO is the primary knob, and O_DIRECT derived
 487                  * from that automatically. */
 488
 489                 reopened_fd = fd_reopen(fd, (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0)|O_CLOEXEC|O_NONBLOCK|open_flags);
 490                 if (reopened_fd < 0) {
 491                         if (!FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO))
 492                                 return log_debug_errno(reopened_fd, "Failed to reopen file descriptor without O_DIRECT: %m");
 493
 494                         /* Some file systems might not support O_DIRECT, let's gracefully continue without it then. */
 495                         log_debug_errno(reopened_fd, "Failed to enable O_DIRECT for backing file descriptor for loopback device. Continuing without.");
 496                         loop_flags &= ~LO_FLAGS_DIRECT_IO;
 497                 } else
 498                         fd = reopened_fd; /* From now on, operate on our new O_DIRECT fd */
 499         }
 500
 501         control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
 502         if (control < 0)
 503                 return -errno;
 504
 505         if (sector_size == 0)
 506                 /* If no sector size is specified, default to the classic default */
 507                 sector_size = 512;
 508         else if (sector_size == UINT32_MAX) {
 509
 510                 if (S_ISBLK(st.st_mode))
 511                         /* If the sector size is specified as UINT32_MAX we'll propagate the sector size of
 512                          * the underlying block device. */
 513                         r = blockdev_get_sector_size(fd, &sector_size);
 514                 else {
 515                         _cleanup_close_ int non_direct_io_fd = -EBADF;
 516                         int probe_fd;
 517
 518                         assert(S_ISREG(st.st_mode));
 519
 520                         /* If sector size is specified as UINT32_MAX, we'll try to probe the right sector
 521                          * size of the image in question by looking for the GPT partition header at various
 522                          * offsets. This of course only works if the image already has a disk label.
 523                          *
 524                          * So here we actually want to read the file contents ourselves. This is quite likely
 525                          * not going to work if we managed to enable O_DIRECT, because in such a case there
 526                          * are some pretty strict alignment requirements to offset, size and target, but
 527                          * there's no way to query what alignment specifically is actually required. Hence,
 528                          * let's avoid the mess, and temporarily open an fd without O_DIRECT for the probing
 529                          * logic. */
 530
 531                         if (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO)) {
 532                                 non_direct_io_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
 533                                 if (non_direct_io_fd < 0)
 534                                         return non_direct_io_fd;
 535
 536                                 probe_fd = non_direct_io_fd;
 537                         } else
 538                                 probe_fd = fd;
 539
 540                         r = probe_sector_size(probe_fd, &sector_size);
 541                 }
 542                 if (r < 0)
 543                         return r;
 544         }
 545
 546         config = (struct loop_config) {
 547                 .fd = fd,
 548                 .block_size = sector_size,
 549                 .info = {
 550                         /* Use the specified flags, but configure the read-only flag from the open flags, and force autoclear */
 551                         .lo_flags = (loop_flags & ~LO_FLAGS_READ_ONLY) | ((open_flags & O_ACCMODE) == O_RDONLY ? LO_FLAGS_READ_ONLY : 0) | LO_FLAGS_AUTOCLEAR,
 552                         .lo_offset = offset,
 553                         .lo_sizelimit = size == UINT64_MAX ? 0 : size,
 554                 },
 555         };
 556
 557         /* Loop around LOOP_CTL_GET_FREE, since at the moment we attempt to open the returned device it might
 558          * be gone already, taken by somebody else racing against us. */
 559         for (unsigned n_attempts = 0;;) {
 560                 usec_t usec;
 561                 int nr;
 562
 563                 /* Let's take a lock on the control device first. On a busy system, where many programs
 564                  * attempt to allocate a loopback device at the same time, we might otherwise keep looping
 565                  * around relatively heavy operations: asking for a free loopback device, then opening it,
 566                  * validating it, attaching something to it. Let's serialize this whole operation, to make
 567                  * unnecessary busywork less likely. Note that this is just something we do to optimize our
 568                  * own code (and whoever else decides to use LOCK_EX locks for this), taking this lock is not
 569                  * necessary, it just means it's less likely we have to iterate through this loop again and
 570                  * again if our own code races against our own code.
 571                  *
 572                  * Note: our lock protocol is to take the /dev/loop-control lock first, and the block device
 573                  * lock second, if both are taken, and always in this order, to avoid ABBA locking issues. */
 574                 if (flock(control, LOCK_EX) < 0)
 575                         return -errno;
 576
 577                 nr = ioctl(control, LOOP_CTL_GET_FREE);
 578                 if (nr < 0)
 579                         return -errno;
 580
 581                 r = loop_configure(nr, open_flags, lock_op, &config, &d);
 582                 if (r >= 0)
 583                         break;
 584
 585                 /* -ENODEV or friends: Somebody might've gotten the same number from the kernel, used the
 586                  * device, and called LOOP_CTL_REMOVE on it. Let's retry with a new number.
 587                  * -EBUSY: a file descriptor is already bound to the loopback block device.
 588                  * -EUCLEAN: some left-over partition devices that were cleaned up.
 589                  * -ENOANO: we tried to use LO_FLAGS_DIRECT_IO but the kernel rejected it. */
 590                 if (!ERRNO_IS_DEVICE_ABSENT(r) && !IN_SET(r, -EBUSY, -EUCLEAN, -ENOANO))
 591                         return r;
 592
 593                 /* OK, this didn't work, let's try again a bit later, but first release the lock on the
 594                  * control device */
 595                 if (flock(control, LOCK_UN) < 0)
 596                         return -errno;
 597
 598                 if (++n_attempts >= 64) /* Give up eventually */
 599                         return -EBUSY;
 600
 601                 /* If we failed to enable direct IO mode, let's retry without it. We restart the process as
 602                  * on some combination of kernel version and storage filesystem, the kernel is very unhappy
 603                  * about a failed DIRECT_IO enablement and throws I/O errors. */
 604                 if (r == -ENOANO && FLAGS_SET(config.info.lo_flags, LO_FLAGS_DIRECT_IO)) {
 605                         config.info.lo_flags &= ~LO_FLAGS_DIRECT_IO;
 606                         open_flags &= ~O_DIRECT;
 607
 608                         int non_direct_io_fd = fd_reopen(config.fd, O_CLOEXEC|O_NONBLOCK|open_flags);
 609                         if (non_direct_io_fd < 0)
 610                                 return log_debug_errno(
 611                                                 non_direct_io_fd,
 612                                                 "Failed to reopen file descriptor without O_DIRECT: %m");
 613
 614                         safe_close(reopened_fd);
 615                         fd = config.fd = /* For cleanups */ reopened_fd = non_direct_io_fd;
 616                 }
 617
 618                 /* Wait some random time, to make collision less likely. Let's pick a random time in the
 619                  * range 0ms…250ms, linearly scaled by the number of failed attempts. */
 620                 usec = random_u64_range(UINT64_C(10) * USEC_PER_MSEC +
 621                                         UINT64_C(240) * USEC_PER_MSEC * n_attempts/64);
 622                 log_debug("Trying again after %s.", FORMAT_TIMESPAN(usec, USEC_PER_MSEC));
 623                 (void) usleep_safe(usec);
 624         }
 625
 626         d->backing_file = TAKE_PTR(backing_file);
 627         d->backing_inode = st.st_ino;
 628         d->backing_devno = st.st_dev;
 629
 630         log_debug("Successfully acquired %s, devno=%u:%u, nr=%i, diskseq=%" PRIu64,
 631                   d->node,
 632                   major(d->devno), minor(d->devno),
 633                   d->nr,
 634                   d->diskseq);
 635
 636         *ret = TAKE_PTR(d);
 637         return 0;
 638 }
 639
 640 static uint32_t loop_flags_mangle(uint32_t loop_flags) {
 641         int r;
 642
 643         r = getenv_bool("SYSTEMD_LOOP_DIRECT_IO");
 644         if (r < 0 && r != -ENXIO)
 645                 log_debug_errno(r, "Failed to parse $SYSTEMD_LOOP_DIRECT_IO, ignoring: %m");
 646
 647         return UPDATE_FLAG(loop_flags, LO_FLAGS_DIRECT_IO, r != 0); /* Turn on LO_FLAGS_DIRECT_IO by default, unless explicitly configured to off. */
 648 }
 649
 650 int loop_device_make(
 651                 int fd,
 652                 int open_flags,
 653                 uint64_t offset,
 654                 uint64_t size,
 655                 uint32_t sector_size,
 656                 uint32_t loop_flags,
 657                 int lock_op,
 658                 LoopDevice **ret) {
 659
 660         assert(fd >= 0);
 661         assert(ret);
 662
 663         return loop_device_make_internal(
 664                         NULL,
 665                         fd,
 666                         open_flags,
 667                         offset,
 668                         size,
 669                         sector_size,
 670                         loop_flags_mangle(loop_flags),
 671                         lock_op,
 672                         ret);
 673 }
 674
 675 int loop_device_make_by_path_at(
 676                 int dir_fd,
 677                 const char *path,
 678                 int open_flags,
 679                 uint32_t sector_size,
 680                 uint32_t loop_flags,
 681                 int lock_op,
 682                 LoopDevice **ret) {
 683
 684         int r, basic_flags, direct_flags, rdwr_flags;
 685         _cleanup_close_ int fd = -EBADF;
 686         bool direct = false;
 687
 688         assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
 689         assert(path);
 690         assert(ret);
 691         assert(open_flags < 0 || IN_SET(open_flags, O_RDWR, O_RDONLY));
 692
 693         /* Passing < 0 as open_flags here means we'll try to open the device writable if we can, retrying
 694          * read-only if we cannot. */
 695
 696         loop_flags = loop_flags_mangle(loop_flags);
 697
 698         /* Let's open with O_DIRECT if we can. But not all file systems support that, hence fall back to
 699          * non-O_DIRECT mode automatically, if it fails. */
 700
 701         basic_flags = O_CLOEXEC|O_NONBLOCK|O_NOCTTY;
 702         direct_flags = FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0;
 703         rdwr_flags = open_flags >= 0 ? open_flags : O_RDWR;
 704
 705         fd = xopenat(dir_fd, path, basic_flags|direct_flags|rdwr_flags, /* xopen_flags = */ 0, /* mode = */ 0);
 706         if (fd < 0 && direct_flags != 0) /* If we had O_DIRECT on, and things failed with that, let's immediately try again without */
 707                 fd = xopenat(dir_fd, path, basic_flags|rdwr_flags, /* xopen_flags = */ 0, /* mode = */ 0);
 708         else
 709                 direct = direct_flags != 0;
 710         if (fd < 0) {
 711                 r = -errno;
 712
 713                 /* Retry read-only? */
 714                 if (open_flags >= 0 || !(ERRNO_IS_PRIVILEGE(r) || r == -EROFS))
 715                         return r;
 716
 717                 fd = xopenat(dir_fd, path, basic_flags|direct_flags|O_RDONLY, /* xopen_flags = */ 0, /* mode = */ 0);
 718                 if (fd < 0 && direct_flags != 0) /* as above */
 719                         fd = xopenat(dir_fd, path, basic_flags|O_RDONLY, /* xopen_flags = */ 0, /* mode = */ 0);
 720                 else
 721                         direct = direct_flags != 0;
 722                 if (fd < 0)
 723                         return r; /* Propagate original error */
 724
 725                 open_flags = O_RDONLY;
 726         } else if (open_flags < 0)
 727                 open_flags = O_RDWR;
 728
 729         log_debug("Opened '%s' in %s access mode%s, with O_DIRECT %s%s.",
 730                   path,
 731                   open_flags == O_RDWR ? "O_RDWR" : "O_RDONLY",
 732                   open_flags != rdwr_flags ? " (O_RDWR was requested but not allowed)" : "",
 733                   direct ? "enabled" : "disabled",
 734                   direct != (direct_flags != 0) ? " (O_DIRECT was requested but not supported)" : "");
 735
 736         return loop_device_make_internal(
 737                         dir_fd == AT_FDCWD ? path : NULL,
 738                         fd,
 739                         open_flags,
 740                         /* offset = */ 0,
 741                         /* size = */ 0,
 742                         sector_size,
 743                         loop_flags,
 744                         lock_op,
 745                         ret);
 746 }
 747
 748 int loop_device_make_by_path_memory(
 749                 const char *path,
 750                 int open_flags,
 751                 uint32_t sector_size,
 752                 uint32_t loop_flags,
 753                 int lock_op,
 754                 LoopDevice **ret) {
 755
 756         _cleanup_close_ int fd = -EBADF, mfd = -EBADF;
 757         _cleanup_free_ char *fn = NULL;
 758         struct stat st;
 759         int r;
 760
 761         assert(path);
 762         assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
 763         assert(ret);
 764
 765         loop_flags &= ~LO_FLAGS_DIRECT_IO; /* memfds don't support O_DIRECT, hence LO_FLAGS_DIRECT_IO can't be used either */
 766
 767         fd = open(path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|O_RDONLY);
 768         if (fd < 0)
 769                 return -errno;
 770
 771         if (fstat(fd, &st) < 0)
 772                 return -errno;
 773
 774         if (!S_ISREG(st.st_mode) && !S_ISBLK(st.st_mode))
 775                 return -EBADF;
 776
 777         r = path_extract_filename(path, &fn);
 778         if (r < 0)
 779                 return r;
 780
 781         mfd = memfd_clone_fd(fd, fn, open_flags|O_CLOEXEC);
 782         if (mfd < 0)
 783                 return mfd;
 784
 785         fd = safe_close(fd); /* Let's close the original early */
 786
 787         return loop_device_make_internal(NULL, mfd, open_flags, 0, 0, sector_size, loop_flags, lock_op, ret);
 788 }
 789
 790 static LoopDevice* loop_device_free(LoopDevice *d) {
 791         _cleanup_close_ int control = -EBADF;
 792         int r;
 793
 794         if (!d)
 795                 return NULL;
 796
 797         /* Release any lock we might have on the device first. We want to open+lock the /dev/loop-control
 798          * device below, but our lock protocol says that if both control and block device locks are taken,
 799          * the control lock needs to be taken first, the block device lock second — in order to avoid ABBA
 800          * locking issues. Moreover, we want to issue LOOP_CLR_FD on the block device further down, and that
 801          * would fail if we had another fd open to the device. */
 802         d->lock_fd = safe_close(d->lock_fd);
 803
 804         /* Let's open the control device early, and lock it, so that we can release our block device and
 805          * delete it in a synchronized fashion, and allocators won't needlessly see the block device as free
 806          * while we are about to delete it. */
 807         if (!LOOP_DEVICE_IS_FOREIGN(d) && !d->relinquished) {
 808                 control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
 809                 if (control < 0)
 810                         log_debug_errno(errno, "Failed to open loop control device, cannot remove loop device '%s', ignoring: %m", strna(d->node));
 811                 else if (flock(control, LOCK_EX) < 0)
 812                         log_debug_errno(errno, "Failed to lock loop control device, ignoring: %m");
 813         }
 814
 815         /* Then let's release the loopback block device */
 816         if (d->fd >= 0) {
 817                 /* Implicitly sync the device, since otherwise in-flight blocks might not get written */
 818                 if (fsync(d->fd) < 0)
 819                         log_debug_errno(errno, "Failed to sync loop block device, ignoring: %m");
 820
 821                 if (!LOOP_DEVICE_IS_FOREIGN(d) && !d->relinquished) {
 822                         /* We are supposed to clear the loopback device. Let's do this synchronously: lock
 823                          * the device, manually remove all partitions and then clear it. This should ensure
 824                          * udev doesn't concurrently access the devices, and we can be reasonably sure that
 825                          * once we are done here the device is cleared and all its partition children
 826                          * removed. Note that we lock our primary device fd here (and not a separate locking
 827                          * fd, as we do during allocation, since we want to keep the lock all the way through
 828                          * the LOOP_CLR_FD, but that call would fail if we had more than one fd open.) */
 829
 830                         if (flock(d->fd, LOCK_EX) < 0)
 831                                 log_debug_errno(errno, "Failed to lock loop block device, ignoring: %m");
 832
 833                         r = block_device_remove_all_partitions(d->dev, d->fd);
 834                         if (r < 0)
 835                                 log_debug_errno(r, "Failed to remove partitions of loopback block device, ignoring: %m");
 836
 837                         if (ioctl(d->fd, LOOP_CLR_FD) < 0)
 838                                 log_debug_errno(errno, "Failed to clear loop device, ignoring: %m");
 839                 }
 840
 841                 safe_close(d->fd);
 842         }
 843
 844         /* Now that the block device is released, let's also try to remove it */
 845         if (control >= 0) {
 846                 useconds_t delay = 5 * USEC_PER_MSEC;  /* A total delay of 5090 ms between 39 attempts,
 847                                                         * (4*5 + 5*10 + 5*20 + … + 3*640) = 5090. */
 848
 849                 for (unsigned attempt = 1;; attempt++) {
 850                         if (ioctl(control, LOOP_CTL_REMOVE, d->nr) >= 0)
 851                                 break;
 852                         if (errno != EBUSY || attempt > 38) {
 853                                 log_debug_errno(errno, "Failed to remove device %s: %m", strna(d->node));
 854                                 break;
 855                         }
 856                         if (attempt % 5 == 0) {
 857                                 log_debug("Device is still busy after %u attempts…", attempt);
 858                                 delay *= 2;
 859                         }
 860
 861                         (void) usleep_safe(delay);
 862                 }
 863         }
 864
 865         free(d->node);
 866         sd_device_unref(d->dev);
 867         free(d->backing_file);
 868         return mfree(d);
 869 }
 870
 871 DEFINE_TRIVIAL_REF_UNREF_FUNC(LoopDevice, loop_device, loop_device_free);
 872
 873 void loop_device_relinquish(LoopDevice *d) {
 874         assert(d);
 875
 876         /* Don't attempt to clean up the loop device anymore from this point on. Leave the clean-ing up to the kernel
 877          * itself, using the loop device "auto-clear" logic we already turned on when creating the device. */
 878
 879         d->relinquished = true;
 880 }
 881
 882 void loop_device_unrelinquish(LoopDevice *d) {
 883         assert(d);
 884         d->relinquished = false;
 885 }
 886
 887 int loop_device_open(
 888                 sd_device *dev,
 889                 int open_flags,
 890                 int lock_op,
 891                 LoopDevice **ret) {
 892
 893         _cleanup_close_ int fd = -EBADF, lock_fd = -EBADF;
 894         _cleanup_free_ char *node = NULL, *backing_file = NULL;
 895         dev_t devnum, backing_devno = 0;
 896         struct loop_info64 info;
 897         ino_t backing_inode = 0;
 898         uint64_t diskseq = 0;
 899         LoopDevice *d;
 900         const char *s;
 901         int r, nr = -1;
 902
 903         assert(dev);
 904         assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
 905         assert(ret);
 906
 907         /* Even if fd is provided through the argument in loop_device_open_from_fd(), we reopen the inode
 908          * here, instead of keeping just a dup() clone of it around, since we want to ensure that the
 909          * O_DIRECT flag of the handle we keep is off, we have our own file index, and have the right
 910          * read/write mode in effect. */
 911         fd = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
 912         if (fd < 0)
 913                 return fd;
 914
 915         if ((lock_op & ~LOCK_NB) != LOCK_UN) {
 916                 lock_fd = open_lock_fd(fd, lock_op);
 917                 if (lock_fd < 0)
 918                         return lock_fd;
 919         }
 920
 921         if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0) {
 922 #if HAVE_VALGRIND_MEMCHECK_H
 923                 /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
 924                 VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
 925 #endif
 926                 nr = info.lo_number;
 927
 928                 if (sd_device_get_sysattr_value(dev, "loop/backing_file", &s) >= 0) {
 929                         backing_file = strdup(s);
 930                         if (!backing_file)
 931                                 return -ENOMEM;
 932                 }
 933
 934                 backing_devno = info.lo_device;
 935                 backing_inode = info.lo_inode;
 936         }
 937
 938         r = fd_get_diskseq(fd, &diskseq);
 939         if (r < 0 && r != -EOPNOTSUPP)
 940                 return r;
 941
 942         uint32_t sector_size;
 943         r = blockdev_get_sector_size(fd, &sector_size);
 944         if (r < 0)
 945                 return r;
 946
 947         r = sd_device_get_devnum(dev, &devnum);
 948         if (r < 0)
 949                 return r;
 950
 951         r = sd_device_get_devname(dev, &s);
 952         if (r < 0)
 953                 return r;
 954
 955         node = strdup(s);
 956         if (!node)
 957                 return -ENOMEM;
 958
 959         d = new(LoopDevice, 1);
 960         if (!d)
 961                 return -ENOMEM;
 962
 963         *d = (LoopDevice) {
 964                 .n_ref = 1,
 965                 .fd = TAKE_FD(fd),
 966                 .lock_fd = TAKE_FD(lock_fd),
 967                 .nr = nr,
 968                 .node = TAKE_PTR(node),
 969                 .dev = sd_device_ref(dev),
 970                 .backing_file = TAKE_PTR(backing_file),
 971                 .backing_inode = backing_inode,
 972                 .backing_devno = backing_devno,
 973                 .relinquished = true, /* It's not ours, don't try to destroy it when this object is freed */
 974                 .devno = devnum,
 975                 .diskseq = diskseq,
 976                 .uevent_seqnum_not_before = UINT64_MAX,
 977                 .timestamp_not_before = USEC_INFINITY,
 978                 .sector_size = sector_size,
 979         };
 980
 981         *ret = d;
 982         return 0;
 983 }
 984
 985 int loop_device_open_from_fd(
 986                 int fd,
 987                 int open_flags,
 988                 int lock_op,
 989                 LoopDevice **ret) {
 990
 991         _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
 992         int r;
 993
 994         r = block_device_new_from_fd(ASSERT_FD(fd), 0, &dev);
 995         if (r < 0)
 996                 return r;
 997
 998         return loop_device_open(dev, open_flags, lock_op, ret);
 999 }
1000
1001 int loop_device_open_from_path(
1002                 const char *path,
1003                 int open_flags,
1004                 int lock_op,
1005                 LoopDevice **ret) {
1006
1007         _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
1008         int r;
1009
1010         assert(path);
1011
1012         r = block_device_new_from_path(path, 0, &dev);
1013         if (r < 0)
1014                 return r;
1015
1016         return loop_device_open(dev, open_flags, lock_op, ret);
1017 }
1018
1019 static int resize_partition(int partition_fd, uint64_t offset, uint64_t size) {
1020         char sysfs[STRLEN("/sys/dev/block/:/partition") + 2*DECIMAL_STR_MAX(dev_t) + 1];
1021         _cleanup_free_ char *buffer = NULL;
1022         uint64_t current_offset, current_size, partno;
1023         _cleanup_close_ int whole_fd = -EBADF;
1024         struct stat st;
1025         dev_t devno;
1026         int r;
1027
1028         /* Resizes the partition the loopback device refer to (assuming it refers to one instead of an actual
1029          * loopback device), and changes the offset, if needed. This is a fancy wrapper around
1030          * BLKPG_RESIZE_PARTITION. */
1031
1032         if (fstat(ASSERT_FD(partition_fd), &st) < 0)
1033                 return -errno;
1034
1035         assert(S_ISBLK(st.st_mode));
1036
1037         xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/partition", DEVNUM_FORMAT_VAL(st.st_rdev));
1038         r = read_one_line_file(sysfs, &buffer);
1039         if (r == -ENOENT) /* not a partition, cannot resize */
1040                 return -ENOTTY;
1041         if (r < 0)
1042                 return r;
1043         r = safe_atou64(buffer, &partno);
1044         if (r < 0)
1045                 return r;
1046
1047         xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/start", DEVNUM_FORMAT_VAL(st.st_rdev));
1048
1049         buffer = mfree(buffer);
1050         r = read_one_line_file(sysfs, &buffer);
1051         if (r < 0)
1052                 return r;
1053         r = safe_atou64(buffer, &current_offset);
1054         if (r < 0)
1055                 return r;
1056         if (current_offset > UINT64_MAX/512U)
1057                 return -EINVAL;
1058         current_offset *= 512U;
1059
1060         if (ioctl(partition_fd, BLKGETSIZE64, &current_size) < 0)
1061                 return -EINVAL;
1062
1063         if (size == UINT64_MAX && offset == UINT64_MAX)
1064                 return 0;
1065         if (current_size == size && current_offset == offset)
1066                 return 0;
1067
1068         xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/../dev", DEVNUM_FORMAT_VAL(st.st_rdev));
1069
1070         buffer = mfree(buffer);
1071         r = read_one_line_file(sysfs, &buffer);
1072         if (r < 0)
1073                 return r;
1074         r = parse_devnum(buffer, &devno);
1075         if (r < 0)
1076                 return r;
1077
1078         whole_fd = r = device_open_from_devnum(S_IFBLK, devno, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY, NULL);
1079         if (r < 0)
1080                 return r;
1081
1082         return block_device_resize_partition(
1083                         whole_fd,
1084                         partno,
1085                         offset == UINT64_MAX ? current_offset : offset,
1086                         size == UINT64_MAX ? current_size : size);
1087 }
1088
1089 int loop_device_refresh_size(LoopDevice *d, uint64_t offset, uint64_t size) {
1090         struct loop_info64 info;
1091
1092         assert(d);
1093         assert(d->fd >= 0);
1094
1095         /* Changes the offset/start of the loop device relative to the beginning of the underlying file or
1096          * block device. If this loop device actually refers to a partition and not a loopback device, we'll
1097          * try to adjust the partition offsets instead.
1098          *
1099          * If either offset or size is UINT64_MAX we won't change that parameter. */
1100
1101         if (d->nr < 0) /* not a loopback device */
1102                 return resize_partition(d->fd, offset, size);
1103
1104         if (ioctl(d->fd, LOOP_GET_STATUS64, &info) < 0)
1105                 return -errno;
1106
1107 #if HAVE_VALGRIND_MEMCHECK_H
1108         /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
1109         VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
1110 #endif
1111
1112         if (size == UINT64_MAX && offset == UINT64_MAX)
1113                 return 0;
1114         if (info.lo_sizelimit == size && info.lo_offset == offset)
1115                 return 0;
1116
1117         if (size != UINT64_MAX)
1118                 info.lo_sizelimit = size;
1119         if (offset != UINT64_MAX)
1120                 info.lo_offset = offset;
1121
1122         return RET_NERRNO(ioctl(d->fd, LOOP_SET_STATUS64, &info));
1123 }
1124
1125 int loop_device_flock(LoopDevice *d, int operation) {
1126         assert(IN_SET(operation & ~LOCK_NB, LOCK_UN, LOCK_SH, LOCK_EX));
1127         assert(d);
1128
1129         /* When unlocking just close the lock fd */
1130         if ((operation & ~LOCK_NB) == LOCK_UN) {
1131                 d->lock_fd = safe_close(d->lock_fd);
1132                 return 0;
1133         }
1134
1135         /* If we had no lock fd so far, create one and lock it right-away */
1136         if (d->lock_fd < 0) {
1137                 d->lock_fd = open_lock_fd(ASSERT_FD(d->fd), operation);
1138                 if (d->lock_fd < 0)
1139                         return d->lock_fd;
1140
1141                 return 0;
1142         }
1143
1144         /* Otherwise change the current lock mode on the existing fd */
1145         return RET_NERRNO(flock(d->lock_fd, operation));
1146 }
1147
1148 int loop_device_sync(LoopDevice *d) {
1149         assert(d);
1150
1151         /* We also do this implicitly in loop_device_unref(). Doing this explicitly here has the benefit that
1152          * we can check the return value though. */
1153
1154         return RET_NERRNO(fsync(ASSERT_FD(d->fd)));
1155 }
1156
1157 int loop_device_set_autoclear(LoopDevice *d, bool autoclear) {
1158         struct loop_info64 info;
1159
1160         assert(d);
1161
1162         if (ioctl(ASSERT_FD(d->fd), LOOP_GET_STATUS64, &info) < 0)
1163                 return -errno;
1164
1165         if (autoclear == FLAGS_SET(info.lo_flags, LO_FLAGS_AUTOCLEAR))
1166                 return 0;
1167
1168         SET_FLAG(info.lo_flags, LO_FLAGS_AUTOCLEAR, autoclear);
1169
1170         if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0)
1171                 return -errno;
1172
1173         return 1;
1174 }
1175
1176 int loop_device_set_filename(LoopDevice *d, const char *name) {
1177         struct loop_info64 info;
1178
1179         assert(d);
1180
1181         /* Sets the .lo_file_name of the loopback device. This is supposed to contain the path to the file
1182          * backing the block device, but is actually just a free-form string you can pass to the kernel. Most
1183          * tools that actually care for the backing file path use the sysfs attribute file loop/backing_file
1184          * which is a kernel generated string, subject to file system namespaces and such.
1185          *
1186          * .lo_file_name is useful since userspace can select it freely when creating a loopback block
1187          * device, and we can use it for /dev/disk/by-loop-ref/ symlinks, and similar, so that apps can
1188          * recognize their own loopback files. */
1189
1190         if (name && strlen(name) >= sizeof(info.lo_file_name))
1191                 return -ENOBUFS;
1192
1193         if (ioctl(ASSERT_FD(d->fd), LOOP_GET_STATUS64, &info) < 0)
1194                 return -errno;
1195
1196         if (strneq((char*) info.lo_file_name, strempty(name), sizeof(info.lo_file_name)))
1197                 return 0;
1198
1199         if (name) {
1200                 strncpy((char*) info.lo_file_name, name, sizeof(info.lo_file_name)-1);
1201                 info.lo_file_name[sizeof(info.lo_file_name)-1] = 0;
1202         } else
1203                 memzero(info.lo_file_name, sizeof(info.lo_file_name));
1204
1205         if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0)
1206                 return -errno;
1207
1208         return 1;
1209 }