1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
3 #if HAVE_VALGRIND_MEMCHECK_H
4 #include <valgrind/memcheck.h>
9 #include <linux/blkpg.h>
11 #include <linux/loop.h>
13 #include <sys/ioctl.h>
16 #include "sd-device.h"
18 #include "alloc-util.h"
19 #include "blockdev-util.h"
20 #include "device-util.h"
21 #include "devnum-util.h"
23 #include "errno-util.h"
26 #include "loop-util.h"
27 #include "missing_loop.h"
28 #include "parse-util.h"
29 #include "path-util.h"
30 #include "random-util.h"
31 #include "stat-util.h"
32 #include "stdio-util.h"
33 #include "string-util.h"
34 #include "tmpfile-util.h"
36 static void cleanup_clear_loop_close(int *fd
) {
40 (void) ioctl(*fd
, LOOP_CLR_FD
);
41 (void) safe_close(*fd
);
44 static int loop_is_bound(int fd
) {
45 struct loop_info64 info
;
49 if (ioctl(fd
, LOOP_GET_STATUS64
, &info
) < 0) {
51 return false; /* not bound! */
56 return true; /* bound! */
59 static int get_current_uevent_seqnum(uint64_t *ret
) {
60 _cleanup_free_
char *p
= NULL
;
63 r
= read_full_virtual_file("/sys/kernel/uevent_seqnum", &p
, NULL
);
65 return log_debug_errno(r
, "Failed to read current uevent sequence number: %m");
67 r
= safe_atou64(strstrip(p
), ret
);
69 return log_debug_errno(r
, "Failed to parse current uevent sequence number: %s", p
);
74 static int open_lock_fd(int primary_fd
, int operation
) {
75 _cleanup_close_
int lock_fd
= -1;
77 assert(primary_fd
>= 0);
78 assert(IN_SET(operation
& ~LOCK_NB
, LOCK_SH
, LOCK_EX
));
80 lock_fd
= fd_reopen(primary_fd
, O_RDWR
|O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
);
84 if (flock(lock_fd
, operation
) < 0)
87 return TAKE_FD(lock_fd
);
90 static int loop_configure_verify_direct_io(int fd
, const struct loop_config
*c
) {
94 if (FLAGS_SET(c
->info
.lo_flags
, LO_FLAGS_DIRECT_IO
)) {
95 struct loop_info64 info
;
97 if (ioctl(fd
, LOOP_GET_STATUS64
, &info
) < 0)
98 return log_debug_errno(errno
, "Failed to issue LOOP_GET_STATUS64: %m");
100 #if HAVE_VALGRIND_MEMCHECK_H
101 VALGRIND_MAKE_MEM_DEFINED(&info
, sizeof(info
));
104 /* On older kernels (<= 5.3) it was necessary to set the block size of the loopback block
105 * device to the logical block size of the underlying file system. Since there was no nice
106 * way to query the value, we are not bothering to do this however. On newer kernels the
107 * block size is propagated automatically and does not require intervention from us. We'll
108 * check here if enabling direct IO worked, to make this easily debuggable however.
110 * (Should anyone really care and actually wants direct IO on old kernels: it might be worth
111 * enabling direct IO with iteratively larger block sizes until it eventually works.) */
112 if (!FLAGS_SET(info
.lo_flags
, LO_FLAGS_DIRECT_IO
))
113 log_debug("Could not enable direct IO mode, proceeding in buffered IO mode.");
119 static int loop_configure_verify(int fd
, const struct loop_config
*c
) {
126 if (c
->info
.lo_sizelimit
!= 0) {
127 /* Kernel 5.8 vanilla doesn't properly propagate the size limit into the
128 * block device. If it's used, let's immediately check if it had the desired
129 * effect hence. And if not use classic LOOP_SET_STATUS64. */
132 if (ioctl(fd
, BLKGETSIZE64
, &z
) < 0)
135 if (z
!= c
->info
.lo_sizelimit
) {
136 log_debug("LOOP_CONFIGURE is broken, doesn't honour .lo_sizelimit. Falling back to LOOP_SET_STATUS64.");
141 if (FLAGS_SET(c
->info
.lo_flags
, LO_FLAGS_PARTSCAN
)) {
142 /* Kernel 5.8 vanilla doesn't properly propagate the partition scanning flag
143 * into the block device. Let's hence verify if things work correctly here
144 * before returning. */
146 r
= blockdev_partscan_enabled(fd
);
150 log_debug("LOOP_CONFIGURE is broken, doesn't honour LO_FLAGS_PARTSCAN. Falling back to LOOP_SET_STATUS64.");
155 r
= loop_configure_verify_direct_io(fd
, c
);
162 static int loop_configure_fallback(int fd
, const struct loop_config
*c
) {
163 struct loop_info64 info_copy
;
168 /* Only some of the flags LOOP_CONFIGURE can set are also settable via LOOP_SET_STATUS64, hence mask
171 info_copy
.lo_flags
&= LOOP_SET_STATUS_SETTABLE_FLAGS
;
173 /* Since kernel commit 5db470e229e22b7eda6e23b5566e532c96fb5bc3 (kernel v5.0) the LOOP_SET_STATUS64
174 * ioctl can return EAGAIN in case we change the lo_offset field, if someone else is accessing the
175 * block device while we try to reconfigure it. This is a pretty common case, since udev might
176 * instantly start probing the device as soon as we attach an fd to it. Hence handle it in two ways:
177 * first, let's take the BSD lock to ensure that udev will not step in between the point in
178 * time where we attach the fd and where we reconfigure the device. Secondly, let's wait 50ms on
179 * EAGAIN and retry. The former should be an efficient mechanism to avoid we have to wait 50ms
180 * needlessly if we are just racing against udev. The latter is protection against all other cases,
181 * i.e. peers that do not take the BSD lock. */
183 for (unsigned n_attempts
= 0;;) {
184 if (ioctl(fd
, LOOP_SET_STATUS64
, &info_copy
) >= 0)
187 if (errno
!= EAGAIN
|| ++n_attempts
>= 64)
188 return log_debug_errno(errno
, "Failed to configure loopback block device: %m");
190 /* Sleep some random time, but at least 10ms, at most 250ms. Increase the delay the more
191 * failed attempts we see */
192 (void) usleep(UINT64_C(10) * USEC_PER_MSEC
+
193 random_u64_range(UINT64_C(240) * USEC_PER_MSEC
* n_attempts
/64));
196 /* Work around a kernel bug, where changing offset/size of the loopback device doesn't correctly
197 * invalidate the buffer cache. For details see:
199 * https://android.googlesource.com/platform/system/apex/+/bef74542fbbb4cd629793f4efee8e0053b360570
201 * This was fixed in kernel 5.0, see:
203 * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5db470e229e22b7eda6e23b5566e532c96fb5bc3
205 * We'll run the work-around here in the legacy LOOP_SET_STATUS64 codepath. In the LOOP_CONFIGURE
206 * codepath above it should not be necessary. */
207 if (c
->info
.lo_offset
!= 0 || c
->info
.lo_sizelimit
!= 0)
208 if (ioctl(fd
, BLKFLSBUF
, 0) < 0)
209 log_debug_errno(errno
, "Failed to issue BLKFLSBUF ioctl, ignoring: %m");
211 /* LO_FLAGS_DIRECT_IO is a flags we need to configure via explicit ioctls. */
212 if (FLAGS_SET(c
->info
.lo_flags
, LO_FLAGS_DIRECT_IO
))
213 if (ioctl(fd
, LOOP_SET_DIRECT_IO
, 1UL) < 0)
214 log_debug_errno(errno
, "Failed to enable direct IO mode, ignoring: %m");
216 return loop_configure_verify_direct_io(fd
, c
);
219 static int loop_configure(
223 const struct loop_config
*c
,
224 bool *try_loop_configure
,
225 uint64_t *ret_seqnum_not_before
,
226 usec_t
*ret_timestamp_not_before
,
229 _cleanup_close_
int lock_fd
= -1;
237 assert(try_loop_configure
);
239 /* Let's lock the device before we do anything. We take the BSD lock on a second, separately opened
240 * fd for the device. udev after all watches for close() events (specifically IN_CLOSE_WRITE) on
241 * block devices to reprobe them, hence by having a separate fd we will later close() we can ensure
242 * we trigger udev after everything is done. If we'd lock our own fd instead and keep it open for a
243 * long time udev would possibly never run on it again, even though the fd is unlocked, simply
244 * because we never close() it. It also has the nice benefit we can use the _cleanup_close_ logic to
245 * automatically release the lock, after we are done. */
246 lock_fd
= open_lock_fd(fd
, LOCK_EX
);
250 /* Let's see if the device is really detached, i.e. currently has no associated partition block
251 * devices. On various kernels (such as 5.8) it is possible to have a loopback block device that
252 * superficially is detached but still has partition block devices associated for it. Let's then
253 * manually remove the partitions via BLKPG, and tell the caller we did that via EUCLEAN, so they try
255 r
= block_device_has_partitions(dev
);
259 r
= loop_is_bound(fd
);
265 /* Unbound but has children? Remove all partitions, and report this to the caller, to try
266 * again, and count this as an attempt. */
268 r
= block_device_remove_all_partitions(dev
, fd
);
275 if (*try_loop_configure
) {
276 /* Acquire uevent seqnum immediately before attaching the loopback device. This allows
277 * callers to ignore all uevents with a seqnum before this one, if they need to associate
278 * uevent with this attachment. Doing so isn't race-free though, as uevents that happen in
279 * the window between this reading of the seqnum, and the LOOP_CONFIGURE call might still be
280 * mistaken as originating from our attachment, even though might be caused by an earlier
281 * use. But doing this at least shortens the race window a bit. */
282 r
= get_current_uevent_seqnum(&seqnum
);
286 timestamp
= now(CLOCK_MONOTONIC
);
288 if (ioctl(fd
, LOOP_CONFIGURE
, c
) < 0) {
289 /* Do fallback only if LOOP_CONFIGURE is not supported, propagate all other
290 * errors. Note that the kernel is weird: non-existing ioctls currently return EINVAL
291 * rather than ENOTTY on loopback block devices. They should fix that in the kernel,
292 * but in the meantime we accept both here. */
293 if (!ERRNO_IS_NOT_SUPPORTED(errno
) && errno
!= EINVAL
)
296 *try_loop_configure
= false;
298 r
= loop_configure_verify(fd
, c
);
302 /* LOOP_CONFIGURE doesn't work. Remember that. */
303 *try_loop_configure
= false;
305 /* We return EBUSY here instead of retrying immediately with LOOP_SET_FD,
306 * because LOOP_CLR_FD is async: if the operation cannot be executed right
307 * away it just sets the autoclear flag on the device. This means there's a
308 * good chance we cannot actually reuse the loopback device right-away. Hence
309 * let's assume it's busy, avoid the trouble and let the calling loop call us
310 * again with a new, likely unused device. */
319 /* Let's read the seqnum again, to shorten the window. */
320 r
= get_current_uevent_seqnum(&seqnum
);
324 timestamp
= now(CLOCK_MONOTONIC
);
326 if (ioctl(fd
, LOOP_SET_FD
, c
->fd
) < 0)
329 r
= loop_configure_fallback(fd
, c
);
334 if (ret_seqnum_not_before
)
335 *ret_seqnum_not_before
= seqnum
;
336 if (ret_timestamp_not_before
)
337 *ret_timestamp_not_before
= timestamp
;
339 *ret_lock_fd
= TAKE_FD(lock_fd
);
344 /* Close the lock fd explicitly before clearing the loopback block device, since an additional open
345 * fd would block the clearing to succeed */
346 lock_fd
= safe_close(lock_fd
);
347 (void) ioctl(fd
, LOOP_CLR_FD
);
351 static int loop_device_make_internal(
361 _cleanup_(sd_device_unrefp
) sd_device
*dev
= NULL
;
362 _cleanup_close_
int direct_io_fd
= -1;
363 _cleanup_free_
char *node
= NULL
, *backing_file
= NULL
;
364 bool try_loop_configure
= true;
365 struct loop_config config
;
367 uint64_t seqnum
= UINT64_MAX
;
368 usec_t timestamp
= USEC_INFINITY
;
374 assert(IN_SET(open_flags
, O_RDWR
, O_RDONLY
));
376 if (fstat(fd
, &st
) < 0)
379 if (S_ISBLK(st
.st_mode
)) {
380 if (offset
== 0 && IN_SET(size
, 0, UINT64_MAX
))
381 /* If this is already a block device and we are supposed to cover the whole of it
382 * then store an fd to the original open device node — and do not actually create an
383 * unnecessary loopback device for it. */
384 return loop_device_open_full(NULL
, fd
, open_flags
, lock_op
, ret
);
386 r
= stat_verify_regular(&st
);
392 r
= path_make_absolute_cwd(path
, &backing_file
);
396 path_simplify(backing_file
);
398 r
= fd_get_path(fd
, &backing_file
);
403 f_flags
= fcntl(fd
, F_GETFL
);
407 if (FLAGS_SET(loop_flags
, LO_FLAGS_DIRECT_IO
) != FLAGS_SET(f_flags
, O_DIRECT
)) {
408 /* If LO_FLAGS_DIRECT_IO is requested, then make sure we have the fd open with O_DIRECT, as
409 * that's required. Conversely, if it's off require that O_DIRECT is off too (that's because
410 * new kernels will implicitly enable LO_FLAGS_DIRECT_IO if O_DIRECT is set).
412 * Our intention here is that LO_FLAGS_DIRECT_IO is the primary knob, and O_DIRECT derived
413 * from that automatically. */
415 direct_io_fd
= fd_reopen(fd
, (FLAGS_SET(loop_flags
, LO_FLAGS_DIRECT_IO
) ? O_DIRECT
: 0)|O_CLOEXEC
|O_NONBLOCK
|open_flags
);
416 if (direct_io_fd
< 0) {
417 if (!FLAGS_SET(loop_flags
, LO_FLAGS_DIRECT_IO
))
418 return log_debug_errno(errno
, "Failed to reopen file descriptor without O_DIRECT: %m");
420 /* Some file systems might not support O_DIRECT, let's gracefully continue without it then. */
421 log_debug_errno(errno
, "Failed to enable O_DIRECT for backing file descriptor for loopback device. Continuing without.");
422 loop_flags
&= ~LO_FLAGS_DIRECT_IO
;
424 fd
= direct_io_fd
; /* From now on, operate on our new O_DIRECT fd */
427 /* On failure, lock_fd must be closed at first, otherwise LOOP_CLR_FD will fail. */
428 _cleanup_close_
int control
= -1;
429 _cleanup_(cleanup_clear_loop_close
) int loop_with_fd
= -1;
430 _cleanup_close_
int lock_fd
= -1;
432 control
= open("/dev/loop-control", O_RDWR
|O_CLOEXEC
|O_NOCTTY
|O_NONBLOCK
);
436 config
= (struct loop_config
) {
439 /* Use the specified flags, but configure the read-only flag from the open flags, and force autoclear */
440 .lo_flags
= (loop_flags
& ~LO_FLAGS_READ_ONLY
) | ((open_flags
& O_ACCMODE
) == O_RDONLY
? LO_FLAGS_READ_ONLY
: 0) | LO_FLAGS_AUTOCLEAR
,
442 .lo_sizelimit
= size
== UINT64_MAX
? 0 : size
,
446 /* Loop around LOOP_CTL_GET_FREE, since at the moment we attempt to open the returned device it might
447 * be gone already, taken by somebody else racing against us. */
448 for (unsigned n_attempts
= 0;;) {
449 _cleanup_close_
int loop
= -1;
451 /* Let's take a lock on the control device first. On a busy system, where many programs
452 * attempt to allocate a loopback device at the same time, we might otherwise keep looping
453 * around relatively heavy operations: asking for a free loopback device, then opening it,
454 * validating it, attaching something to it. Let's serialize this whole operation, to make
455 * unnecessary busywork less likely. Note that this is just something we do to optimize our
456 * own code (and whoever else decides to use LOCK_EX locks for this), taking this lock is not
457 * necessary, it just means it's less likely we have to iterate through this loop again and
458 * again if our own code races against our own code.
460 * Note: our lock protocol is to take the /dev/loop-control lock first, and the block device
461 * lock second, if both are taken, and always in this order, to avoid ABBA locking issues. */
462 if (flock(control
, LOCK_EX
) < 0)
465 nr
= ioctl(control
, LOOP_CTL_GET_FREE
);
470 if (asprintf(&node
, "/dev/loop%i", nr
) < 0)
473 dev
= sd_device_unref(dev
);
474 r
= sd_device_new_from_devname(&dev
, node
);
478 loop
= sd_device_open(dev
, O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
|open_flags
);
480 /* Somebody might've gotten the same number from the kernel, used the device,
481 * and called LOOP_CTL_REMOVE on it. Let's retry with a new number. */
482 if (!ERRNO_IS_DEVICE_ABSENT(errno
))
485 r
= loop_configure(dev
, loop
, nr
, &config
, &try_loop_configure
, &seqnum
, ×tamp
, &lock_fd
);
487 loop_with_fd
= TAKE_FD(loop
);
490 if (!IN_SET(r
, -EBUSY
, -EUCLEAN
)) /* Busy, or some left-over partition devices that
491 * were cleaned up. */
495 /* OK, this didn't work, let's try again a bit later, but first release the lock on the
497 if (flock(control
, LOCK_UN
) < 0)
500 if (++n_attempts
>= 64) /* Give up eventually */
503 /* Now close the loop device explicitly. This will release any lock acquired by
504 * attach_empty_file() or similar, while we sleep below. */
505 loop
= safe_close(loop
);
507 /* Wait some random time, to make collision less likely. Let's pick a random time in the
508 * range 0ms…250ms, linearly scaled by the number of failed attempts. */
509 (void) usleep(random_u64_range(UINT64_C(10) * USEC_PER_MSEC
+
510 UINT64_C(240) * USEC_PER_MSEC
* n_attempts
/64));
513 if (fstat(loop_with_fd
, &st
) < 0)
515 assert(S_ISBLK(st
.st_mode
));
517 uint64_t diskseq
= 0;
518 r
= fd_get_diskseq(loop_with_fd
, &diskseq
);
519 if (r
< 0 && r
!= -EOPNOTSUPP
)
522 switch (lock_op
& ~LOCK_NB
) {
523 case LOCK_EX
: /* Already in effect */
525 case LOCK_SH
: /* Downgrade */
526 if (flock(lock_fd
, lock_op
) < 0)
529 case LOCK_UN
: /* Release */
530 lock_fd
= safe_close(lock_fd
);
533 assert_not_reached();
536 d
= new(LoopDevice
, 1);
540 .fd
= TAKE_FD(loop_with_fd
),
541 .lock_fd
= TAKE_FD(lock_fd
),
542 .node
= TAKE_PTR(node
),
545 .dev
= TAKE_PTR(dev
),
546 .backing_file
= TAKE_PTR(backing_file
),
548 .uevent_seqnum_not_before
= seqnum
,
549 .timestamp_not_before
= timestamp
,
552 log_debug("Successfully acquired %s, devno=%u:%u, nr=%i, diskseq=%" PRIu64
,
554 major(d
->devno
), minor(d
->devno
),
562 static uint32_t loop_flags_mangle(uint32_t loop_flags
) {
565 r
= getenv_bool("SYSTEMD_LOOP_DIRECT_IO");
566 if (r
< 0 && r
!= -ENXIO
)
567 log_debug_errno(r
, "Failed to parse $SYSTEMD_LOOP_DIRECT_IO, ignoring: %m");
569 return UPDATE_FLAG(loop_flags
, LO_FLAGS_DIRECT_IO
, r
!= 0); /* Turn on LO_FLAGS_DIRECT_IO by default, unless explicitly configured to off. */
572 int loop_device_make(
584 return loop_device_make_internal(
590 loop_flags_mangle(loop_flags
),
595 int loop_device_make_by_path(
602 int r
, basic_flags
, direct_flags
, rdwr_flags
;
603 _cleanup_close_
int fd
= -1;
608 assert(open_flags
< 0 || IN_SET(open_flags
, O_RDWR
, O_RDONLY
));
610 /* Passing < 0 as open_flags here means we'll try to open the device writable if we can, retrying
611 * read-only if we cannot. */
613 loop_flags
= loop_flags_mangle(loop_flags
);
615 /* Let's open with O_DIRECT if we can. But not all file systems support that, hence fall back to
616 * non-O_DIRECT mode automatically, if it fails. */
618 basic_flags
= O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
;
619 direct_flags
= FLAGS_SET(loop_flags
, LO_FLAGS_DIRECT_IO
) ? O_DIRECT
: 0;
620 rdwr_flags
= open_flags
>= 0 ? open_flags
: O_RDWR
;
622 fd
= open(path
, basic_flags
|direct_flags
|rdwr_flags
);
623 if (fd
< 0 && direct_flags
!= 0) /* If we had O_DIRECT on, and things failed with that, let's immediately try again without */
624 fd
= open(path
, basic_flags
|rdwr_flags
);
626 direct
= direct_flags
!= 0;
630 /* Retry read-only? */
631 if (open_flags
>= 0 || !(ERRNO_IS_PRIVILEGE(r
) || r
== -EROFS
))
634 fd
= open(path
, basic_flags
|direct_flags
|O_RDONLY
);
635 if (fd
< 0 && direct_flags
!= 0) /* as above */
636 fd
= open(path
, basic_flags
|O_RDONLY
);
638 direct
= direct_flags
!= 0;
640 return r
; /* Propagate original error */
642 open_flags
= O_RDONLY
;
643 } else if (open_flags
< 0)
646 log_debug("Opened '%s' in %s access mode%s, with O_DIRECT %s%s.",
648 open_flags
== O_RDWR
? "O_RDWR" : "O_RDONLY",
649 open_flags
!= rdwr_flags
? " (O_RDWR was requested but not allowed)" : "",
650 direct
? "enabled" : "disabled",
651 direct
!= (direct_flags
!= 0) ? " (O_DIRECT was requested but not supported)" : "");
653 return loop_device_make_internal(path
, fd
, open_flags
, 0, 0, loop_flags
, lock_op
, ret
);
656 LoopDevice
* loop_device_unref(LoopDevice
*d
) {
657 _cleanup_close_
int control
= -1;
663 /* Release any lock we might have on the device first. We want to open+lock the /dev/loop-control
664 * device below, but our lock protocol says that if both control and block device locks are taken,
665 * the control lock needs to be taken first, the block device lock second — in order to avoid ABBA
666 * locking issues. Moreover, we want to issue LOOP_CLR_FD on the block device further down, and that
667 * would fail if we had another fd open to the device. */
668 d
->lock_fd
= safe_close(d
->lock_fd
);
670 /* Let's open the control device early, and lock it, so that we can release our block device and
671 * delete it in a synchronized fashion, and allocators won't needlessly see the block device as free
672 * while we are about to delete it. */
673 if (!LOOP_DEVICE_IS_FOREIGN(d
) && !d
->relinquished
) {
674 control
= open("/dev/loop-control", O_RDWR
|O_CLOEXEC
|O_NOCTTY
|O_NONBLOCK
);
676 log_debug_errno(errno
, "Failed to open loop control device, cannot remove loop device '%s', ignoring: %m", strna(d
->node
));
677 else if (flock(control
, LOCK_EX
) < 0)
678 log_debug_errno(errno
, "Failed to lock loop control device, ignoring: %m");
681 /* Then let's release the loopback block device */
683 /* Implicitly sync the device, since otherwise in-flight blocks might not get written */
684 if (fsync(d
->fd
) < 0)
685 log_debug_errno(errno
, "Failed to sync loop block device, ignoring: %m");
687 if (!LOOP_DEVICE_IS_FOREIGN(d
) && !d
->relinquished
) {
688 /* We are supposed to clear the loopback device. Let's do this synchronously: lock
689 * the device, manually remove all partitions and then clear it. This should ensure
690 * udev doesn't concurrently access the devices, and we can be reasonably sure that
691 * once we are done here the device is cleared and all its partition children
692 * removed. Note that we lock our primary device fd here (and not a separate locking
693 * fd, as we do during allocation, since we want to keep the lock all the way through
694 * the LOOP_CLR_FD, but that call would fail if we had more than one fd open.) */
696 if (flock(d
->fd
, LOCK_EX
) < 0)
697 log_debug_errno(errno
, "Failed to lock loop block device, ignoring: %m");
699 r
= block_device_remove_all_partitions(d
->dev
, d
->fd
);
701 log_debug_errno(r
, "Failed to remove partitions of loopback block device, ignoring: %m");
703 if (ioctl(d
->fd
, LOOP_CLR_FD
) < 0)
704 log_debug_errno(errno
, "Failed to clear loop device, ignoring: %m");
710 /* Now that the block device is released, let's also try to remove it */
712 for (unsigned n_attempts
= 0;;) {
713 if (ioctl(control
, LOOP_CTL_REMOVE
, d
->nr
) >= 0)
715 if (errno
!= EBUSY
|| ++n_attempts
>= 64) {
716 log_debug_errno(errno
, "Failed to remove device %s: %m", strna(d
->node
));
719 (void) usleep(50 * USEC_PER_MSEC
);
723 sd_device_unref(d
->dev
);
724 free(d
->backing_file
);
728 void loop_device_relinquish(LoopDevice
*d
) {
731 /* Don't attempt to clean up the loop device anymore from this point on. Leave the clean-ing up to the kernel
732 * itself, using the loop device "auto-clear" logic we already turned on when creating the device. */
734 d
->relinquished
= true;
737 void loop_device_unrelinquish(LoopDevice
*d
) {
739 d
->relinquished
= false;
742 int loop_device_open_full(
743 const char *loop_path
,
749 _cleanup_(sd_device_unrefp
) sd_device
*dev
= NULL
;
750 _cleanup_close_
int fd
= -1, lock_fd
= -1;
751 _cleanup_free_
char *p
= NULL
, *backing_file
= NULL
;
752 struct loop_info64 info
;
753 uint64_t diskseq
= 0;
758 assert(loop_path
|| loop_fd
>= 0);
759 assert(IN_SET(open_flags
, O_RDWR
, O_RDONLY
));
763 fd
= open(loop_path
, O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
|open_flags
);
769 if (fstat(loop_fd
, &st
) < 0)
771 if (!S_ISBLK(st
.st_mode
))
774 r
= sd_device_new_from_stat_rdev(&dev
, &st
);
779 /* If loop_fd is provided through the argument, then we reopen the inode here, instead of
780 * keeping just a dup() clone of it around, since we want to ensure that the O_DIRECT
781 * flag of the handle we keep is off, we have our own file index, and have the right
782 * read/write mode in effect.*/
783 fd
= fd_reopen(loop_fd
, O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
|open_flags
);
789 if (ioctl(loop_fd
, LOOP_GET_STATUS64
, &info
) >= 0) {
792 #if HAVE_VALGRIND_MEMCHECK_H
793 /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
794 VALGRIND_MAKE_MEM_DEFINED(&info
, sizeof(info
));
798 if (sd_device_get_sysattr_value(dev
, "loop/backing_file", &s
) >= 0) {
799 backing_file
= strdup(s
);
805 r
= fd_get_diskseq(loop_fd
, &diskseq
);
806 if (r
< 0 && r
!= -EOPNOTSUPP
)
809 if ((lock_op
& ~LOCK_NB
) != LOCK_UN
) {
810 lock_fd
= open_lock_fd(loop_fd
, lock_op
);
815 r
= sd_device_get_devname(dev
, &loop_path
);
819 p
= strdup(loop_path
);
823 d
= new(LoopDevice
, 1);
829 .lock_fd
= TAKE_FD(lock_fd
),
832 .dev
= TAKE_PTR(dev
),
833 .backing_file
= TAKE_PTR(backing_file
),
834 .relinquished
= true, /* It's not ours, don't try to destroy it when this object is freed */
837 .uevent_seqnum_not_before
= UINT64_MAX
,
838 .timestamp_not_before
= USEC_INFINITY
,
845 static int resize_partition(int partition_fd
, uint64_t offset
, uint64_t size
) {
846 char sysfs
[STRLEN("/sys/dev/block/:/partition") + 2*DECIMAL_STR_MAX(dev_t
) + 1];
847 _cleanup_free_
char *buffer
= NULL
;
848 uint64_t current_offset
, current_size
, partno
;
849 _cleanup_close_
int whole_fd
= -1;
854 assert(partition_fd
>= 0);
856 /* Resizes the partition the loopback device refer to (assuming it refers to one instead of an actual
857 * loopback device), and changes the offset, if needed. This is a fancy wrapper around
858 * BLKPG_RESIZE_PARTITION. */
860 if (fstat(partition_fd
, &st
) < 0)
863 assert(S_ISBLK(st
.st_mode
));
865 xsprintf(sysfs
, "/sys/dev/block/" DEVNUM_FORMAT_STR
"/partition", DEVNUM_FORMAT_VAL(st
.st_rdev
));
866 r
= read_one_line_file(sysfs
, &buffer
);
867 if (r
== -ENOENT
) /* not a partition, cannot resize */
871 r
= safe_atou64(buffer
, &partno
);
875 xsprintf(sysfs
, "/sys/dev/block/" DEVNUM_FORMAT_STR
"/start", DEVNUM_FORMAT_VAL(st
.st_rdev
));
877 buffer
= mfree(buffer
);
878 r
= read_one_line_file(sysfs
, &buffer
);
881 r
= safe_atou64(buffer
, ¤t_offset
);
884 if (current_offset
> UINT64_MAX
/512U)
886 current_offset
*= 512U;
888 if (ioctl(partition_fd
, BLKGETSIZE64
, ¤t_size
) < 0)
891 if (size
== UINT64_MAX
&& offset
== UINT64_MAX
)
893 if (current_size
== size
&& current_offset
== offset
)
896 xsprintf(sysfs
, "/sys/dev/block/" DEVNUM_FORMAT_STR
"/../dev", DEVNUM_FORMAT_VAL(st
.st_rdev
));
898 buffer
= mfree(buffer
);
899 r
= read_one_line_file(sysfs
, &buffer
);
902 r
= parse_devnum(buffer
, &devno
);
906 whole_fd
= r
= device_open_from_devnum(S_IFBLK
, devno
, O_RDWR
|O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
, NULL
);
910 return block_device_resize_partition(
913 offset
== UINT64_MAX
? current_offset
: offset
,
914 size
== UINT64_MAX
? current_size
: size
);
917 int loop_device_refresh_size(LoopDevice
*d
, uint64_t offset
, uint64_t size
) {
918 struct loop_info64 info
;
923 /* Changes the offset/start of the loop device relative to the beginning of the underlying file or
924 * block device. If this loop device actually refers to a partition and not a loopback device, we'll
925 * try to adjust the partition offsets instead.
927 * If either offset or size is UINT64_MAX we won't change that parameter. */
929 if (d
->nr
< 0) /* not a loopback device */
930 return resize_partition(d
->fd
, offset
, size
);
932 if (ioctl(d
->fd
, LOOP_GET_STATUS64
, &info
) < 0)
935 #if HAVE_VALGRIND_MEMCHECK_H
936 /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
937 VALGRIND_MAKE_MEM_DEFINED(&info
, sizeof(info
));
940 if (size
== UINT64_MAX
&& offset
== UINT64_MAX
)
942 if (info
.lo_sizelimit
== size
&& info
.lo_offset
== offset
)
945 if (size
!= UINT64_MAX
)
946 info
.lo_sizelimit
= size
;
947 if (offset
!= UINT64_MAX
)
948 info
.lo_offset
= offset
;
950 return RET_NERRNO(ioctl(d
->fd
, LOOP_SET_STATUS64
, &info
));
953 int loop_device_flock(LoopDevice
*d
, int operation
) {
954 assert(IN_SET(operation
& ~LOCK_NB
, LOCK_UN
, LOCK_SH
, LOCK_EX
));
957 /* When unlocking just close the lock fd */
958 if ((operation
& ~LOCK_NB
) == LOCK_UN
) {
959 d
->lock_fd
= safe_close(d
->lock_fd
);
963 /* If we had no lock fd so far, create one and lock it right-away */
964 if (d
->lock_fd
< 0) {
967 d
->lock_fd
= open_lock_fd(d
->fd
, operation
);
974 /* Otherwise change the current lock mode on the existing fd */
975 return RET_NERRNO(flock(d
->lock_fd
, operation
));
978 int loop_device_sync(LoopDevice
*d
) {
982 /* We also do this implicitly in loop_device_unref(). Doing this explicitly here has the benefit that
983 * we can check the return value though. */
985 return RET_NERRNO(fsync(d
->fd
));