]>
Commit | Line | Data |
---|---|---|
db9ecf05 | 1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
8c1be37e | 2 | |
10c1b188 LP |
3 | #if HAVE_VALGRIND_MEMCHECK_H |
4 | #include <valgrind/memcheck.h> | |
5 | #endif | |
6 | ||
dccca82b | 7 | #include <errno.h> |
8c1be37e | 8 | #include <fcntl.h> |
f1443709 LP |
9 | #include <linux/blkpg.h> |
10 | #include <linux/fs.h> | |
8c1be37e | 11 | #include <linux/loop.h> |
441ec804 | 12 | #include <sys/file.h> |
8c1be37e | 13 | #include <sys/ioctl.h> |
f2d9213f | 14 | #include <unistd.h> |
8c1be37e | 15 | |
021bf175 LP |
16 | #include "sd-device.h" |
17 | ||
8c1be37e | 18 | #include "alloc-util.h" |
86c1c1f3 | 19 | #include "blockdev-util.h" |
021bf175 | 20 | #include "device-util.h" |
7176f06c | 21 | #include "devnum-util.h" |
e8c7c4d9 | 22 | #include "env-util.h" |
b0a94268 | 23 | #include "errno-util.h" |
8c1be37e | 24 | #include "fd-util.h" |
f1443709 | 25 | #include "fileio.h" |
8c1be37e | 26 | #include "loop-util.h" |
86c1c1f3 | 27 | #include "missing_loop.h" |
f1443709 | 28 | #include "parse-util.h" |
e77cab82 | 29 | #include "path-util.h" |
b202ec20 | 30 | #include "random-util.h" |
3cc44114 | 31 | #include "stat-util.h" |
f1443709 | 32 | #include "stdio-util.h" |
f2d9213f | 33 | #include "string-util.h" |
021bf175 | 34 | #include "tmpfile-util.h" |
8c1be37e | 35 | |
e8af3bfd | 36 | static void cleanup_clear_loop_close(int *fd) { |
86c1c1f3 LP |
37 | if (*fd < 0) |
38 | return; | |
39 | ||
40 | (void) ioctl(*fd, LOOP_CLR_FD); | |
41 | (void) safe_close(*fd); | |
42 | } | |
43 | ||
021bf175 LP |
44 | static int loop_is_bound(int fd) { |
45 | struct loop_info64 info; | |
46 | ||
47 | assert(fd >= 0); | |
48 | ||
49 | if (ioctl(fd, LOOP_GET_STATUS64, &info) < 0) { | |
50 | if (errno == ENXIO) | |
51 | return false; /* not bound! */ | |
52 | ||
53 | return -errno; | |
54 | } | |
55 | ||
56 | return true; /* bound! */ | |
57 | } | |
58 | ||
31c75fcc LP |
59 | static int get_current_uevent_seqnum(uint64_t *ret) { |
60 | _cleanup_free_ char *p = NULL; | |
61 | int r; | |
62 | ||
63 | r = read_full_virtual_file("/sys/kernel/uevent_seqnum", &p, NULL); | |
64 | if (r < 0) | |
65 | return log_debug_errno(r, "Failed to read current uevent sequence number: %m"); | |
66 | ||
a145f8c0 | 67 | r = safe_atou64(strstrip(p), ret); |
31c75fcc LP |
68 | if (r < 0) |
69 | return log_debug_errno(r, "Failed to parse current uevent sequence number: %s", p); | |
70 | ||
71 | return 0; | |
72 | } | |
73 | ||
7f52206a | 74 | static int open_lock_fd(int primary_fd, int operation) { |
10719a6f | 75 | _cleanup_close_ int lock_fd = -1; |
7f52206a LP |
76 | |
77 | assert(primary_fd >= 0); | |
10719a6f | 78 | assert(IN_SET(operation & ~LOCK_NB, LOCK_SH, LOCK_EX)); |
7f52206a LP |
79 | |
80 | lock_fd = fd_reopen(primary_fd, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY); | |
81 | if (lock_fd < 0) | |
82 | return lock_fd; | |
10719a6f | 83 | |
7f52206a LP |
84 | if (flock(lock_fd, operation) < 0) |
85 | return -errno; | |
86 | ||
10719a6f | 87 | return TAKE_FD(lock_fd); |
7f52206a LP |
88 | } |
89 | ||
54ba7daf YW |
90 | static int loop_configure_verify_direct_io(int fd, const struct loop_config *c) { |
91 | assert(fd); | |
92 | assert(c); | |
93 | ||
94 | if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_DIRECT_IO)) { | |
95 | struct loop_info64 info; | |
96 | ||
97 | if (ioctl(fd, LOOP_GET_STATUS64, &info) < 0) | |
98 | return log_debug_errno(errno, "Failed to issue LOOP_GET_STATUS64: %m"); | |
99 | ||
100 | #if HAVE_VALGRIND_MEMCHECK_H | |
101 | VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info)); | |
102 | #endif | |
103 | ||
104 | /* On older kernels (<= 5.3) it was necessary to set the block size of the loopback block | |
105 | * device to the logical block size of the underlying file system. Since there was no nice | |
106 | * way to query the value, we are not bothering to do this however. On newer kernels the | |
107 | * block size is propagated automatically and does not require intervention from us. We'll | |
108 | * check here if enabling direct IO worked, to make this easily debuggable however. | |
109 | * | |
110 | * (Should anyone really care and actually wants direct IO on old kernels: it might be worth | |
111 | * enabling direct IO with iteratively larger block sizes until it eventually works.) */ | |
112 | if (!FLAGS_SET(info.lo_flags, LO_FLAGS_DIRECT_IO)) | |
113 | log_debug("Could not enable direct IO mode, proceeding in buffered IO mode."); | |
114 | } | |
115 | ||
116 | return 0; | |
117 | } | |
118 | ||
119 | static int loop_configure_verify(int fd, const struct loop_config *c) { | |
120 | bool broken = false; | |
121 | int r; | |
122 | ||
123 | assert(fd >= 0); | |
124 | assert(c); | |
125 | ||
126 | if (c->info.lo_sizelimit != 0) { | |
127 | /* Kernel 5.8 vanilla doesn't properly propagate the size limit into the | |
128 | * block device. If it's used, let's immediately check if it had the desired | |
129 | * effect hence. And if not use classic LOOP_SET_STATUS64. */ | |
130 | uint64_t z; | |
131 | ||
132 | if (ioctl(fd, BLKGETSIZE64, &z) < 0) | |
133 | return -errno; | |
134 | ||
135 | if (z != c->info.lo_sizelimit) { | |
136 | log_debug("LOOP_CONFIGURE is broken, doesn't honour .lo_sizelimit. Falling back to LOOP_SET_STATUS64."); | |
137 | broken = true; | |
138 | } | |
139 | } | |
140 | ||
141 | if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_PARTSCAN)) { | |
142 | /* Kernel 5.8 vanilla doesn't properly propagate the partition scanning flag | |
143 | * into the block device. Let's hence verify if things work correctly here | |
144 | * before returning. */ | |
145 | ||
146 | r = blockdev_partscan_enabled(fd); | |
147 | if (r < 0) | |
148 | return r; | |
149 | if (r == 0) { | |
150 | log_debug("LOOP_CONFIGURE is broken, doesn't honour LO_FLAGS_PARTSCAN. Falling back to LOOP_SET_STATUS64."); | |
151 | broken = true; | |
152 | } | |
153 | } | |
154 | ||
155 | r = loop_configure_verify_direct_io(fd, c); | |
156 | if (r < 0) | |
157 | return r; | |
158 | ||
159 | return !broken; | |
160 | } | |
161 | ||
162 | static int loop_configure_fallback(int fd, const struct loop_config *c) { | |
163 | struct loop_info64 info_copy; | |
164 | ||
165 | assert(fd >= 0); | |
166 | assert(c); | |
167 | ||
168 | /* Only some of the flags LOOP_CONFIGURE can set are also settable via LOOP_SET_STATUS64, hence mask | |
169 | * them out. */ | |
170 | info_copy = c->info; | |
171 | info_copy.lo_flags &= LOOP_SET_STATUS_SETTABLE_FLAGS; | |
172 | ||
173 | /* Since kernel commit 5db470e229e22b7eda6e23b5566e532c96fb5bc3 (kernel v5.0) the LOOP_SET_STATUS64 | |
174 | * ioctl can return EAGAIN in case we change the lo_offset field, if someone else is accessing the | |
175 | * block device while we try to reconfigure it. This is a pretty common case, since udev might | |
176 | * instantly start probing the device as soon as we attach an fd to it. Hence handle it in two ways: | |
177 | * first, let's take the BSD lock to ensure that udev will not step in between the point in | |
178 | * time where we attach the fd and where we reconfigure the device. Secondly, let's wait 50ms on | |
179 | * EAGAIN and retry. The former should be an efficient mechanism to avoid we have to wait 50ms | |
180 | * needlessly if we are just racing against udev. The latter is protection against all other cases, | |
181 | * i.e. peers that do not take the BSD lock. */ | |
182 | ||
183 | for (unsigned n_attempts = 0;;) { | |
184 | if (ioctl(fd, LOOP_SET_STATUS64, &info_copy) >= 0) | |
185 | break; | |
186 | ||
187 | if (errno != EAGAIN || ++n_attempts >= 64) | |
188 | return log_debug_errno(errno, "Failed to configure loopback block device: %m"); | |
189 | ||
190 | /* Sleep some random time, but at least 10ms, at most 250ms. Increase the delay the more | |
191 | * failed attempts we see */ | |
192 | (void) usleep(UINT64_C(10) * USEC_PER_MSEC + | |
193 | random_u64_range(UINT64_C(240) * USEC_PER_MSEC * n_attempts/64)); | |
194 | } | |
195 | ||
196 | /* Work around a kernel bug, where changing offset/size of the loopback device doesn't correctly | |
197 | * invalidate the buffer cache. For details see: | |
198 | * | |
199 | * https://android.googlesource.com/platform/system/apex/+/bef74542fbbb4cd629793f4efee8e0053b360570 | |
200 | * | |
201 | * This was fixed in kernel 5.0, see: | |
202 | * | |
203 | * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5db470e229e22b7eda6e23b5566e532c96fb5bc3 | |
204 | * | |
205 | * We'll run the work-around here in the legacy LOOP_SET_STATUS64 codepath. In the LOOP_CONFIGURE | |
206 | * codepath above it should not be necessary. */ | |
207 | if (c->info.lo_offset != 0 || c->info.lo_sizelimit != 0) | |
208 | if (ioctl(fd, BLKFLSBUF, 0) < 0) | |
209 | log_debug_errno(errno, "Failed to issue BLKFLSBUF ioctl, ignoring: %m"); | |
210 | ||
211 | /* LO_FLAGS_DIRECT_IO is a flags we need to configure via explicit ioctls. */ | |
212 | if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_DIRECT_IO)) | |
213 | if (ioctl(fd, LOOP_SET_DIRECT_IO, 1UL) < 0) | |
214 | log_debug_errno(errno, "Failed to enable direct IO mode, ignoring: %m"); | |
215 | ||
216 | return loop_configure_verify_direct_io(fd, c); | |
217 | } | |
218 | ||
95c50092 | 219 | static int loop_configure( |
cc5bae6c | 220 | sd_device *dev, |
95c50092 | 221 | int fd, |
021bf175 | 222 | int nr, |
95c50092 | 223 | const struct loop_config *c, |
31c75fcc | 224 | bool *try_loop_configure, |
8ede1e86 | 225 | uint64_t *ret_seqnum_not_before, |
7f52206a LP |
226 | usec_t *ret_timestamp_not_before, |
227 | int *ret_lock_fd) { | |
95c50092 | 228 | |
738f29cb | 229 | _cleanup_close_ int lock_fd = -1; |
31c75fcc | 230 | uint64_t seqnum; |
8ede1e86 | 231 | usec_t timestamp; |
86c1c1f3 LP |
232 | int r; |
233 | ||
234 | assert(fd >= 0); | |
021bf175 | 235 | assert(nr >= 0); |
86c1c1f3 | 236 | assert(c); |
95c50092 LP |
237 | assert(try_loop_configure); |
238 | ||
021bf175 LP |
239 | /* Let's lock the device before we do anything. We take the BSD lock on a second, separately opened |
240 | * fd for the device. udev after all watches for close() events (specifically IN_CLOSE_WRITE) on | |
241 | * block devices to reprobe them, hence by having a separate fd we will later close() we can ensure | |
242 | * we trigger udev after everything is done. If we'd lock our own fd instead and keep it open for a | |
243 | * long time udev would possibly never run on it again, even though the fd is unlocked, simply | |
244 | * because we never close() it. It also has the nice benefit we can use the _cleanup_close_ logic to | |
245 | * automatically release the lock, after we are done. */ | |
7f52206a | 246 | lock_fd = open_lock_fd(fd, LOCK_EX); |
021bf175 LP |
247 | if (lock_fd < 0) |
248 | return lock_fd; | |
021bf175 LP |
249 | |
250 | /* Let's see if the device is really detached, i.e. currently has no associated partition block | |
251 | * devices. On various kernels (such as 5.8) it is possible to have a loopback block device that | |
247738b4 LP |
252 | * superficially is detached but still has partition block devices associated for it. Let's then |
253 | * manually remove the partitions via BLKPG, and tell the caller we did that via EUCLEAN, so they try | |
254 | * again. */ | |
aa0295f1 | 255 | r = block_device_has_partitions(dev); |
021bf175 LP |
256 | if (r < 0) |
257 | return r; | |
258 | if (r > 0) { | |
259 | r = loop_is_bound(fd); | |
260 | if (r < 0) | |
261 | return r; | |
262 | if (r > 0) | |
263 | return -EBUSY; | |
264 | ||
247738b4 LP |
265 | /* Unbound but has children? Remove all partitions, and report this to the caller, to try |
266 | * again, and count this as an attempt. */ | |
267 | ||
46c3a288 | 268 | r = block_device_remove_all_partitions(dev, fd); |
247738b4 LP |
269 | if (r < 0) |
270 | return r; | |
271 | ||
272 | return -EUCLEAN; | |
021bf175 LP |
273 | } |
274 | ||
95c50092 | 275 | if (*try_loop_configure) { |
31c75fcc LP |
276 | /* Acquire uevent seqnum immediately before attaching the loopback device. This allows |
277 | * callers to ignore all uevents with a seqnum before this one, if they need to associate | |
278 | * uevent with this attachment. Doing so isn't race-free though, as uevents that happen in | |
279 | * the window between this reading of the seqnum, and the LOOP_CONFIGURE call might still be | |
280 | * mistaken as originating from our attachment, even though might be caused by an earlier | |
281 | * use. But doing this at least shortens the race window a bit. */ | |
282 | r = get_current_uevent_seqnum(&seqnum); | |
283 | if (r < 0) | |
284 | return r; | |
54ba7daf | 285 | |
8ede1e86 | 286 | timestamp = now(CLOCK_MONOTONIC); |
31c75fcc | 287 | |
95c50092 LP |
288 | if (ioctl(fd, LOOP_CONFIGURE, c) < 0) { |
289 | /* Do fallback only if LOOP_CONFIGURE is not supported, propagate all other | |
290 | * errors. Note that the kernel is weird: non-existing ioctls currently return EINVAL | |
291 | * rather than ENOTTY on loopback block devices. They should fix that in the kernel, | |
292 | * but in the meantime we accept both here. */ | |
293 | if (!ERRNO_IS_NOT_SUPPORTED(errno) && errno != EINVAL) | |
294 | return -errno; | |
86c1c1f3 | 295 | |
95c50092 LP |
296 | *try_loop_configure = false; |
297 | } else { | |
54ba7daf YW |
298 | r = loop_configure_verify(fd, c); |
299 | if (r < 0) | |
300 | goto fail; | |
301 | if (r == 0) { | |
95c50092 LP |
302 | /* LOOP_CONFIGURE doesn't work. Remember that. */ |
303 | *try_loop_configure = false; | |
304 | ||
305 | /* We return EBUSY here instead of retrying immediately with LOOP_SET_FD, | |
306 | * because LOOP_CLR_FD is async: if the operation cannot be executed right | |
307 | * away it just sets the autoclear flag on the device. This means there's a | |
308 | * good chance we cannot actually reuse the loopback device right-away. Hence | |
309 | * let's assume it's busy, avoid the trouble and let the calling loop call us | |
310 | * again with a new, likely unused device. */ | |
311 | r = -EBUSY; | |
bb2551bd | 312 | goto fail; |
bb2551bd | 313 | } |
bb2551bd | 314 | |
7f52206a | 315 | goto success; |
95c50092 | 316 | } |
86c1c1f3 LP |
317 | } |
318 | ||
31c75fcc LP |
319 | /* Let's read the seqnum again, to shorten the window. */ |
320 | r = get_current_uevent_seqnum(&seqnum); | |
321 | if (r < 0) | |
322 | return r; | |
323 | ||
54ba7daf | 324 | timestamp = now(CLOCK_MONOTONIC); |
738f29cb | 325 | |
86c1c1f3 LP |
326 | if (ioctl(fd, LOOP_SET_FD, c->fd) < 0) |
327 | return -errno; | |
328 | ||
54ba7daf YW |
329 | r = loop_configure_fallback(fd, c); |
330 | if (r < 0) | |
331 | goto fail; | |
e8c7c4d9 | 332 | |
7f52206a | 333 | success: |
31c75fcc LP |
334 | if (ret_seqnum_not_before) |
335 | *ret_seqnum_not_before = seqnum; | |
8ede1e86 LP |
336 | if (ret_timestamp_not_before) |
337 | *ret_timestamp_not_before = timestamp; | |
7f52206a LP |
338 | if (ret_lock_fd) |
339 | *ret_lock_fd = TAKE_FD(lock_fd); | |
31c75fcc | 340 | |
86c1c1f3 LP |
341 | return 0; |
342 | ||
343 | fail: | |
87862cc2 LP |
344 | /* Close the lock fd explicitly before clearing the loopback block device, since an additional open |
345 | * fd would block the clearing to succeed */ | |
346 | lock_fd = safe_close(lock_fd); | |
86c1c1f3 LP |
347 | (void) ioctl(fd, LOOP_CLR_FD); |
348 | return r; | |
e8af3bfd ZJS |
349 | } |
350 | ||
e8c7c4d9 | 351 | static int loop_device_make_internal( |
e77cab82 | 352 | const char *path, |
ed9eeb7b LP |
353 | int fd, |
354 | int open_flags, | |
355 | uint64_t offset, | |
356 | uint64_t size, | |
357 | uint32_t loop_flags, | |
7f52206a | 358 | int lock_op, |
ed9eeb7b | 359 | LoopDevice **ret) { |
8c1be37e | 360 | |
cc5bae6c | 361 | _cleanup_(sd_device_unrefp) sd_device *dev = NULL; |
e42270b6 | 362 | _cleanup_close_ int direct_io_fd = -1; |
e77cab82 | 363 | _cleanup_free_ char *node = NULL, *backing_file = NULL; |
95c50092 | 364 | bool try_loop_configure = true; |
86c1c1f3 | 365 | struct loop_config config; |
9bf86007 | 366 | LoopDevice *d; |
31c75fcc | 367 | uint64_t seqnum = UINT64_MAX; |
8ede1e86 | 368 | usec_t timestamp = USEC_INFINITY; |
9bf86007 | 369 | int nr, r, f_flags; |
8c1be37e | 370 | struct stat st; |
8c1be37e LP |
371 | |
372 | assert(fd >= 0); | |
373 | assert(ret); | |
374 | assert(IN_SET(open_flags, O_RDWR, O_RDONLY)); | |
375 | ||
376 | if (fstat(fd, &st) < 0) | |
377 | return -errno; | |
378 | ||
379 | if (S_ISBLK(st.st_mode)) { | |
1996ad28 | 380 | if (offset == 0 && IN_SET(size, 0, UINT64_MAX)) |
d7654742 LP |
381 | /* If this is already a block device and we are supposed to cover the whole of it |
382 | * then store an fd to the original open device node — and do not actually create an | |
1996ad28 YW |
383 | * unnecessary loopback device for it. */ |
384 | return loop_device_open_full(NULL, fd, open_flags, lock_op, ret); | |
ed9eeb7b LP |
385 | } else { |
386 | r = stat_verify_regular(&st); | |
387 | if (r < 0) | |
388 | return r; | |
8c1be37e LP |
389 | } |
390 | ||
e77cab82 YW |
391 | if (path) { |
392 | r = path_make_absolute_cwd(path, &backing_file); | |
393 | if (r < 0) | |
394 | return r; | |
395 | ||
396 | path_simplify(backing_file); | |
397 | } else { | |
398 | r = fd_get_path(fd, &backing_file); | |
399 | if (r < 0) | |
400 | return r; | |
401 | } | |
402 | ||
e8c7c4d9 LP |
403 | f_flags = fcntl(fd, F_GETFL); |
404 | if (f_flags < 0) | |
405 | return -errno; | |
406 | ||
407 | if (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) != FLAGS_SET(f_flags, O_DIRECT)) { | |
408 | /* If LO_FLAGS_DIRECT_IO is requested, then make sure we have the fd open with O_DIRECT, as | |
409 | * that's required. Conversely, if it's off require that O_DIRECT is off too (that's because | |
410 | * new kernels will implicitly enable LO_FLAGS_DIRECT_IO if O_DIRECT is set). | |
411 | * | |
412 | * Our intention here is that LO_FLAGS_DIRECT_IO is the primary knob, and O_DIRECT derived | |
413 | * from that automatically. */ | |
414 | ||
415 | direct_io_fd = fd_reopen(fd, (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0)|O_CLOEXEC|O_NONBLOCK|open_flags); | |
416 | if (direct_io_fd < 0) { | |
417 | if (!FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO)) | |
418 | return log_debug_errno(errno, "Failed to reopen file descriptor without O_DIRECT: %m"); | |
419 | ||
420 | /* Some file systems might not support O_DIRECT, let's gracefully continue without it then. */ | |
421 | log_debug_errno(errno, "Failed to enable O_DIRECT for backing file descriptor for loopback device. Continuing without."); | |
422 | loop_flags &= ~LO_FLAGS_DIRECT_IO; | |
423 | } else | |
424 | fd = direct_io_fd; /* From now on, operate on our new O_DIRECT fd */ | |
425 | } | |
426 | ||
e42270b6 | 427 | /* On failure, lock_fd must be closed at first, otherwise LOOP_CLR_FD will fail. */ |
e8af3bfd ZJS |
428 | _cleanup_close_ int control = -1; |
429 | _cleanup_(cleanup_clear_loop_close) int loop_with_fd = -1; | |
e42270b6 | 430 | _cleanup_close_ int lock_fd = -1; |
e8af3bfd | 431 | |
8c1be37e LP |
432 | control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK); |
433 | if (control < 0) | |
434 | return -errno; | |
435 | ||
86c1c1f3 LP |
436 | config = (struct loop_config) { |
437 | .fd = fd, | |
438 | .info = { | |
439 | /* Use the specified flags, but configure the read-only flag from the open flags, and force autoclear */ | |
0950526a | 440 | .lo_flags = (loop_flags & ~LO_FLAGS_READ_ONLY) | ((open_flags & O_ACCMODE) == O_RDONLY ? LO_FLAGS_READ_ONLY : 0) | LO_FLAGS_AUTOCLEAR, |
86c1c1f3 LP |
441 | .lo_offset = offset, |
442 | .lo_sizelimit = size == UINT64_MAX ? 0 : size, | |
443 | }, | |
444 | }; | |
445 | ||
0f6519d4 LP |
446 | /* Loop around LOOP_CTL_GET_FREE, since at the moment we attempt to open the returned device it might |
447 | * be gone already, taken by somebody else racing against us. */ | |
e8af3bfd ZJS |
448 | for (unsigned n_attempts = 0;;) { |
449 | _cleanup_close_ int loop = -1; | |
450 | ||
cc530466 LP |
451 | /* Let's take a lock on the control device first. On a busy system, where many programs |
452 | * attempt to allocate a loopback device at the same time, we might otherwise keep looping | |
453 | * around relatively heavy operations: asking for a free loopback device, then opening it, | |
454 | * validating it, attaching something to it. Let's serialize this whole operation, to make | |
455 | * unnecessary busywork less likely. Note that this is just something we do to optimize our | |
456 | * own code (and whoever else decides to use LOCK_EX locks for this), taking this lock is not | |
457 | * necessary, it just means it's less likely we have to iterate through this loop again and | |
4c1d50e6 LP |
458 | * again if our own code races against our own code. |
459 | * | |
460 | * Note: our lock protocol is to take the /dev/loop-control lock first, and the block device | |
461 | * lock second, if both are taken, and always in this order, to avoid ABBA locking issues. */ | |
cc530466 LP |
462 | if (flock(control, LOCK_EX) < 0) |
463 | return -errno; | |
464 | ||
0f6519d4 LP |
465 | nr = ioctl(control, LOOP_CTL_GET_FREE); |
466 | if (nr < 0) | |
467 | return -errno; | |
8c1be37e | 468 | |
d12c0f4c | 469 | node = mfree(node); |
672780cd | 470 | if (asprintf(&node, "/dev/loop%i", nr) < 0) |
0f6519d4 | 471 | return -ENOMEM; |
8c1be37e | 472 | |
cc5bae6c YW |
473 | dev = sd_device_unref(dev); |
474 | r = sd_device_new_from_devname(&dev, node); | |
475 | if (r < 0) | |
476 | return r; | |
477 | ||
478 | loop = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags); | |
01813148 ZJS |
479 | if (loop < 0) { |
480 | /* Somebody might've gotten the same number from the kernel, used the device, | |
481 | * and called LOOP_CTL_REMOVE on it. Let's retry with a new number. */ | |
49043f81 | 482 | if (!ERRNO_IS_DEVICE_ABSENT(errno)) |
01813148 ZJS |
483 | return -errno; |
484 | } else { | |
cc5bae6c | 485 | r = loop_configure(dev, loop, nr, &config, &try_loop_configure, &seqnum, ×tamp, &lock_fd); |
86c1c1f3 | 486 | if (r >= 0) { |
01813148 ZJS |
487 | loop_with_fd = TAKE_FD(loop); |
488 | break; | |
489 | } | |
247738b4 LP |
490 | if (!IN_SET(r, -EBUSY, -EUCLEAN)) /* Busy, or some left-over partition devices that |
491 | * were cleaned up. */ | |
86c1c1f3 | 492 | return r; |
e8af3bfd | 493 | } |
01813148 | 494 | |
cc530466 LP |
495 | /* OK, this didn't work, let's try again a bit later, but first release the lock on the |
496 | * control device */ | |
497 | if (flock(control, LOCK_UN) < 0) | |
498 | return -errno; | |
499 | ||
e8af3bfd ZJS |
500 | if (++n_attempts >= 64) /* Give up eventually */ |
501 | return -EBUSY; | |
0f6519d4 | 502 | |
3e921057 LP |
503 | /* Now close the loop device explicitly. This will release any lock acquired by |
504 | * attach_empty_file() or similar, while we sleep below. */ | |
505 | loop = safe_close(loop); | |
b202ec20 LP |
506 | |
507 | /* Wait some random time, to make collision less likely. Let's pick a random time in the | |
508 | * range 0ms…250ms, linearly scaled by the number of failed attempts. */ | |
b0dbffd8 LP |
509 | (void) usleep(random_u64_range(UINT64_C(10) * USEC_PER_MSEC + |
510 | UINT64_C(240) * USEC_PER_MSEC * n_attempts/64)); | |
0f6519d4 | 511 | } |
8c1be37e | 512 | |
f3859d5f LP |
513 | if (fstat(loop_with_fd, &st) < 0) |
514 | return -errno; | |
515 | assert(S_ISBLK(st.st_mode)); | |
516 | ||
bcef1743 | 517 | uint64_t diskseq = 0; |
7e93a658 | 518 | r = fd_get_diskseq(loop_with_fd, &diskseq); |
bcef1743 LB |
519 | if (r < 0 && r != -EOPNOTSUPP) |
520 | return r; | |
521 | ||
7f52206a LP |
522 | switch (lock_op & ~LOCK_NB) { |
523 | case LOCK_EX: /* Already in effect */ | |
524 | break; | |
525 | case LOCK_SH: /* Downgrade */ | |
526 | if (flock(lock_fd, lock_op) < 0) | |
527 | return -errno; | |
528 | break; | |
529 | case LOCK_UN: /* Release */ | |
530 | lock_fd = safe_close(lock_fd); | |
531 | break; | |
532 | default: | |
533 | assert_not_reached(); | |
534 | } | |
535 | ||
8c1be37e | 536 | d = new(LoopDevice, 1); |
e8af3bfd ZJS |
537 | if (!d) |
538 | return -ENOMEM; | |
8c1be37e | 539 | *d = (LoopDevice) { |
e8af3bfd | 540 | .fd = TAKE_FD(loop_with_fd), |
7f52206a | 541 | .lock_fd = TAKE_FD(lock_fd), |
672780cd | 542 | .node = TAKE_PTR(node), |
8c1be37e | 543 | .nr = nr, |
f3859d5f | 544 | .devno = st.st_rdev, |
cc5bae6c | 545 | .dev = TAKE_PTR(dev), |
e77cab82 | 546 | .backing_file = TAKE_PTR(backing_file), |
bcef1743 | 547 | .diskseq = diskseq, |
31c75fcc | 548 | .uevent_seqnum_not_before = seqnum, |
8ede1e86 | 549 | .timestamp_not_before = timestamp, |
8c1be37e LP |
550 | }; |
551 | ||
3b195f63 LP |
552 | log_debug("Successfully acquired %s, devno=%u:%u, nr=%i, diskseq=%" PRIu64, |
553 | d->node, | |
554 | major(d->devno), minor(d->devno), | |
555 | d->nr, | |
556 | d->diskseq); | |
557 | ||
8c1be37e | 558 | *ret = d; |
38bd449f | 559 | return d->fd; |
8c1be37e LP |
560 | } |
561 | ||
e8c7c4d9 LP |
562 | static uint32_t loop_flags_mangle(uint32_t loop_flags) { |
563 | int r; | |
564 | ||
565 | r = getenv_bool("SYSTEMD_LOOP_DIRECT_IO"); | |
566 | if (r < 0 && r != -ENXIO) | |
567 | log_debug_errno(r, "Failed to parse $SYSTEMD_LOOP_DIRECT_IO, ignoring: %m"); | |
568 | ||
bfd08445 | 569 | return UPDATE_FLAG(loop_flags, LO_FLAGS_DIRECT_IO, r != 0); /* Turn on LO_FLAGS_DIRECT_IO by default, unless explicitly configured to off. */ |
e8c7c4d9 LP |
570 | } |
571 | ||
572 | int loop_device_make( | |
573 | int fd, | |
574 | int open_flags, | |
575 | uint64_t offset, | |
576 | uint64_t size, | |
577 | uint32_t loop_flags, | |
7f52206a | 578 | int lock_op, |
e8c7c4d9 LP |
579 | LoopDevice **ret) { |
580 | ||
581 | assert(fd >= 0); | |
582 | assert(ret); | |
e8c7c4d9 LP |
583 | |
584 | return loop_device_make_internal( | |
e77cab82 | 585 | NULL, |
e8c7c4d9 LP |
586 | fd, |
587 | open_flags, | |
588 | offset, | |
589 | size, | |
bfd08445 | 590 | loop_flags_mangle(loop_flags), |
7f52206a | 591 | lock_op, |
e8c7c4d9 LP |
592 | ret); |
593 | } | |
594 | ||
79e8393a LP |
595 | int loop_device_make_by_path( |
596 | const char *path, | |
597 | int open_flags, | |
598 | uint32_t loop_flags, | |
7f52206a | 599 | int lock_op, |
79e8393a LP |
600 | LoopDevice **ret) { |
601 | ||
e8c7c4d9 | 602 | int r, basic_flags, direct_flags, rdwr_flags; |
8c1be37e | 603 | _cleanup_close_ int fd = -1; |
aa4d3aa3 | 604 | bool direct = false; |
8c1be37e LP |
605 | |
606 | assert(path); | |
607 | assert(ret); | |
b0a94268 | 608 | assert(open_flags < 0 || IN_SET(open_flags, O_RDWR, O_RDONLY)); |
8c1be37e | 609 | |
b0a94268 LP |
610 | /* Passing < 0 as open_flags here means we'll try to open the device writable if we can, retrying |
611 | * read-only if we cannot. */ | |
612 | ||
e8c7c4d9 LP |
613 | loop_flags = loop_flags_mangle(loop_flags); |
614 | ||
615 | /* Let's open with O_DIRECT if we can. But not all file systems support that, hence fall back to | |
616 | * non-O_DIRECT mode automatically, if it fails. */ | |
617 | ||
618 | basic_flags = O_CLOEXEC|O_NONBLOCK|O_NOCTTY; | |
619 | direct_flags = FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0; | |
620 | rdwr_flags = open_flags >= 0 ? open_flags : O_RDWR; | |
621 | ||
622 | fd = open(path, basic_flags|direct_flags|rdwr_flags); | |
623 | if (fd < 0 && direct_flags != 0) /* If we had O_DIRECT on, and things failed with that, let's immediately try again without */ | |
624 | fd = open(path, basic_flags|rdwr_flags); | |
aa4d3aa3 LP |
625 | else |
626 | direct = direct_flags != 0; | |
b0a94268 LP |
627 | if (fd < 0) { |
628 | r = -errno; | |
629 | ||
630 | /* Retry read-only? */ | |
631 | if (open_flags >= 0 || !(ERRNO_IS_PRIVILEGE(r) || r == -EROFS)) | |
632 | return r; | |
633 | ||
e8c7c4d9 LP |
634 | fd = open(path, basic_flags|direct_flags|O_RDONLY); |
635 | if (fd < 0 && direct_flags != 0) /* as above */ | |
636 | fd = open(path, basic_flags|O_RDONLY); | |
aa4d3aa3 LP |
637 | else |
638 | direct = direct_flags != 0; | |
b0a94268 LP |
639 | if (fd < 0) |
640 | return r; /* Propagate original error */ | |
641 | ||
642 | open_flags = O_RDONLY; | |
643 | } else if (open_flags < 0) | |
644 | open_flags = O_RDWR; | |
8c1be37e | 645 | |
aa4d3aa3 LP |
646 | log_debug("Opened '%s' in %s access mode%s, with O_DIRECT %s%s.", |
647 | path, | |
648 | open_flags == O_RDWR ? "O_RDWR" : "O_RDONLY", | |
649 | open_flags != rdwr_flags ? " (O_RDWR was requested but not allowed)" : "", | |
650 | direct ? "enabled" : "disabled", | |
651 | direct != (direct_flags != 0) ? " (O_DIRECT was requested but not supported)" : ""); | |
652 | ||
e77cab82 | 653 | return loop_device_make_internal(path, fd, open_flags, 0, 0, loop_flags, lock_op, ret); |
8c1be37e LP |
654 | } |
655 | ||
656 | LoopDevice* loop_device_unref(LoopDevice *d) { | |
4c1d50e6 | 657 | _cleanup_close_ int control = -1; |
3a6ed1e1 LP |
658 | int r; |
659 | ||
8c1be37e LP |
660 | if (!d) |
661 | return NULL; | |
662 | ||
4c1d50e6 LP |
663 | /* Release any lock we might have on the device first. We want to open+lock the /dev/loop-control |
664 | * device below, but our lock protocol says that if both control and block device locks are taken, | |
665 | * the control lock needs to be taken first, the block device lock second — in order to avoid ABBA | |
666 | * locking issues. Moreover, we want to issue LOOP_CLR_FD on the block device further down, and that | |
667 | * would fail if we had another fd open to the device. */ | |
7f52206a LP |
668 | d->lock_fd = safe_close(d->lock_fd); |
669 | ||
4c1d50e6 LP |
670 | /* Let's open the control device early, and lock it, so that we can release our block device and |
671 | * delete it in a synchronized fashion, and allocators won't needlessly see the block device as free | |
672 | * while we are about to delete it. */ | |
7cb349f0 | 673 | if (!LOOP_DEVICE_IS_FOREIGN(d) && !d->relinquished) { |
4c1d50e6 LP |
674 | control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK); |
675 | if (control < 0) | |
676 | log_debug_errno(errno, "Failed to open loop control device, cannot remove loop device '%s', ignoring: %m", strna(d->node)); | |
677 | else if (flock(control, LOCK_EX) < 0) | |
678 | log_debug_errno(errno, "Failed to lock loop control device, ignoring: %m"); | |
679 | } | |
680 | ||
681 | /* Then let's release the loopback block device */ | |
8c1be37e | 682 | if (d->fd >= 0) { |
cae1e8fb LP |
683 | /* Implicitly sync the device, since otherwise in-flight blocks might not get written */ |
684 | if (fsync(d->fd) < 0) | |
685 | log_debug_errno(errno, "Failed to sync loop block device, ignoring: %m"); | |
686 | ||
7cb349f0 | 687 | if (!LOOP_DEVICE_IS_FOREIGN(d) && !d->relinquished) { |
3a6ed1e1 LP |
688 | /* We are supposed to clear the loopback device. Let's do this synchronously: lock |
689 | * the device, manually remove all partitions and then clear it. This should ensure | |
690 | * udev doesn't concurrently access the devices, and we can be reasonably sure that | |
691 | * once we are done here the device is cleared and all its partition children | |
692 | * removed. Note that we lock our primary device fd here (and not a separate locking | |
693 | * fd, as we do during allocation, since we want to keep the lock all the way through | |
694 | * the LOOP_CLR_FD, but that call would fail if we had more than one fd open.) */ | |
8c1be37e | 695 | |
3a6ed1e1 LP |
696 | if (flock(d->fd, LOCK_EX) < 0) |
697 | log_debug_errno(errno, "Failed to lock loop block device, ignoring: %m"); | |
698 | ||
46c3a288 | 699 | r = block_device_remove_all_partitions(d->dev, d->fd); |
3a6ed1e1 LP |
700 | if (r < 0) |
701 | log_debug_errno(r, "Failed to remove partitions of loopback block device, ignoring: %m"); | |
702 | ||
703 | if (ioctl(d->fd, LOOP_CLR_FD) < 0) | |
704 | log_debug_errno(errno, "Failed to clear loop device, ignoring: %m"); | |
8c1be37e LP |
705 | } |
706 | ||
707 | safe_close(d->fd); | |
708 | } | |
709 | ||
4c1d50e6 LP |
710 | /* Now that the block device is released, let's also try to remove it */ |
711 | if (control >= 0) | |
712 | for (unsigned n_attempts = 0;;) { | |
713 | if (ioctl(control, LOOP_CTL_REMOVE, d->nr) >= 0) | |
714 | break; | |
715 | if (errno != EBUSY || ++n_attempts >= 64) { | |
716 | log_debug_errno(errno, "Failed to remove device %s: %m", strna(d->node)); | |
717 | break; | |
f2d9213f | 718 | } |
4c1d50e6 LP |
719 | (void) usleep(50 * USEC_PER_MSEC); |
720 | } | |
8c1be37e LP |
721 | |
722 | free(d->node); | |
cc5bae6c | 723 | sd_device_unref(d->dev); |
e77cab82 | 724 | free(d->backing_file); |
5fecf46d | 725 | return mfree(d); |
8c1be37e | 726 | } |
a2ea3b2f LP |
727 | |
728 | void loop_device_relinquish(LoopDevice *d) { | |
729 | assert(d); | |
730 | ||
731 | /* Don't attempt to clean up the loop device anymore from this point on. Leave the clean-ing up to the kernel | |
732 | * itself, using the loop device "auto-clear" logic we already turned on when creating the device. */ | |
733 | ||
734 | d->relinquished = true; | |
735 | } | |
9dabc4fd | 736 | |
24d59aee DDM |
737 | void loop_device_unrelinquish(LoopDevice *d) { |
738 | assert(d); | |
739 | d->relinquished = false; | |
740 | } | |
741 | ||
a8d8a619 | 742 | int loop_device_open_full( |
7f52206a | 743 | const char *loop_path, |
a8d8a619 | 744 | int loop_fd, |
7f52206a LP |
745 | int open_flags, |
746 | int lock_op, | |
747 | LoopDevice **ret) { | |
748 | ||
cc5bae6c | 749 | _cleanup_(sd_device_unrefp) sd_device *dev = NULL; |
a8d8a619 | 750 | _cleanup_close_ int fd = -1, lock_fd = -1; |
e77cab82 | 751 | _cleanup_free_ char *p = NULL, *backing_file = NULL; |
b26c39ad | 752 | struct loop_info64 info; |
ffcb3324 | 753 | uint64_t diskseq = 0; |
9dabc4fd LP |
754 | struct stat st; |
755 | LoopDevice *d; | |
a8d8a619 | 756 | int r, nr = -1; |
9dabc4fd | 757 | |
a8d8a619 | 758 | assert(loop_path || loop_fd >= 0); |
e8c7c4d9 | 759 | assert(IN_SET(open_flags, O_RDWR, O_RDONLY)); |
9dabc4fd LP |
760 | assert(ret); |
761 | ||
a8d8a619 YW |
762 | if (loop_fd < 0) { |
763 | fd = open(loop_path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags); | |
764 | if (fd < 0) | |
765 | return -errno; | |
766 | loop_fd = fd; | |
767 | } | |
9dabc4fd LP |
768 | |
769 | if (fstat(loop_fd, &st) < 0) | |
770 | return -errno; | |
9dabc4fd LP |
771 | if (!S_ISBLK(st.st_mode)) |
772 | return -ENOTBLK; | |
773 | ||
cc5bae6c YW |
774 | r = sd_device_new_from_stat_rdev(&dev, &st); |
775 | if (r < 0) | |
776 | return r; | |
777 | ||
a8d8a619 YW |
778 | if (fd < 0) { |
779 | /* If loop_fd is provided through the argument, then we reopen the inode here, instead of | |
780 | * keeping just a dup() clone of it around, since we want to ensure that the O_DIRECT | |
781 | * flag of the handle we keep is off, we have our own file index, and have the right | |
782 | * read/write mode in effect.*/ | |
783 | fd = fd_reopen(loop_fd, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags); | |
784 | if (fd < 0) | |
785 | return fd; | |
786 | loop_fd = fd; | |
787 | } | |
788 | ||
10c1b188 | 789 | if (ioctl(loop_fd, LOOP_GET_STATUS64, &info) >= 0) { |
e77cab82 YW |
790 | const char *s; |
791 | ||
10c1b188 LP |
792 | #if HAVE_VALGRIND_MEMCHECK_H |
793 | /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */ | |
794 | VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info)); | |
795 | #endif | |
b26c39ad | 796 | nr = info.lo_number; |
e77cab82 YW |
797 | |
798 | if (sd_device_get_sysattr_value(dev, "loop/backing_file", &s) >= 0) { | |
799 | backing_file = strdup(s); | |
800 | if (!backing_file) | |
801 | return -ENOMEM; | |
802 | } | |
a8d8a619 | 803 | } |
b26c39ad | 804 | |
ffcb3324 YW |
805 | r = fd_get_diskseq(loop_fd, &diskseq); |
806 | if (r < 0 && r != -EOPNOTSUPP) | |
807 | return r; | |
808 | ||
7f52206a LP |
809 | if ((lock_op & ~LOCK_NB) != LOCK_UN) { |
810 | lock_fd = open_lock_fd(loop_fd, lock_op); | |
811 | if (lock_fd < 0) | |
812 | return lock_fd; | |
813 | } | |
814 | ||
cc5bae6c YW |
815 | r = sd_device_get_devname(dev, &loop_path); |
816 | if (r < 0) | |
817 | return r; | |
818 | ||
819 | p = strdup(loop_path); | |
820 | if (!p) | |
821 | return -ENOMEM; | |
9dabc4fd LP |
822 | |
823 | d = new(LoopDevice, 1); | |
824 | if (!d) | |
825 | return -ENOMEM; | |
826 | ||
827 | *d = (LoopDevice) { | |
a8d8a619 | 828 | .fd = TAKE_FD(fd), |
7f52206a | 829 | .lock_fd = TAKE_FD(lock_fd), |
b26c39ad | 830 | .nr = nr, |
9dabc4fd | 831 | .node = TAKE_PTR(p), |
cc5bae6c | 832 | .dev = TAKE_PTR(dev), |
e77cab82 | 833 | .backing_file = TAKE_PTR(backing_file), |
9dabc4fd | 834 | .relinquished = true, /* It's not ours, don't try to destroy it when this object is freed */ |
9b5626d6 | 835 | .devno = st.st_rdev, |
ffcb3324 | 836 | .diskseq = diskseq, |
31c75fcc | 837 | .uevent_seqnum_not_before = UINT64_MAX, |
8ede1e86 | 838 | .timestamp_not_before = USEC_INFINITY, |
9dabc4fd LP |
839 | }; |
840 | ||
841 | *ret = d; | |
842 | return d->fd; | |
843 | } | |
844 | ||
f1443709 LP |
845 | static int resize_partition(int partition_fd, uint64_t offset, uint64_t size) { |
846 | char sysfs[STRLEN("/sys/dev/block/:/partition") + 2*DECIMAL_STR_MAX(dev_t) + 1]; | |
ca822829 | 847 | _cleanup_free_ char *buffer = NULL; |
f1443709 LP |
848 | uint64_t current_offset, current_size, partno; |
849 | _cleanup_close_ int whole_fd = -1; | |
850 | struct stat st; | |
851 | dev_t devno; | |
852 | int r; | |
853 | ||
854 | assert(partition_fd >= 0); | |
855 | ||
856 | /* Resizes the partition the loopback device refer to (assuming it refers to one instead of an actual | |
857 | * loopback device), and changes the offset, if needed. This is a fancy wrapper around | |
858 | * BLKPG_RESIZE_PARTITION. */ | |
859 | ||
860 | if (fstat(partition_fd, &st) < 0) | |
861 | return -errno; | |
862 | ||
863 | assert(S_ISBLK(st.st_mode)); | |
864 | ||
ed13feff | 865 | xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/partition", DEVNUM_FORMAT_VAL(st.st_rdev)); |
f1443709 LP |
866 | r = read_one_line_file(sysfs, &buffer); |
867 | if (r == -ENOENT) /* not a partition, cannot resize */ | |
868 | return -ENOTTY; | |
869 | if (r < 0) | |
870 | return r; | |
871 | r = safe_atou64(buffer, &partno); | |
872 | if (r < 0) | |
873 | return r; | |
874 | ||
ed13feff | 875 | xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/start", DEVNUM_FORMAT_VAL(st.st_rdev)); |
f1443709 LP |
876 | |
877 | buffer = mfree(buffer); | |
878 | r = read_one_line_file(sysfs, &buffer); | |
879 | if (r < 0) | |
880 | return r; | |
881 | r = safe_atou64(buffer, ¤t_offset); | |
882 | if (r < 0) | |
883 | return r; | |
884 | if (current_offset > UINT64_MAX/512U) | |
885 | return -EINVAL; | |
886 | current_offset *= 512U; | |
887 | ||
888 | if (ioctl(partition_fd, BLKGETSIZE64, ¤t_size) < 0) | |
889 | return -EINVAL; | |
890 | ||
891 | if (size == UINT64_MAX && offset == UINT64_MAX) | |
892 | return 0; | |
893 | if (current_size == size && current_offset == offset) | |
894 | return 0; | |
895 | ||
ed13feff | 896 | xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/../dev", DEVNUM_FORMAT_VAL(st.st_rdev)); |
f1443709 LP |
897 | |
898 | buffer = mfree(buffer); | |
899 | r = read_one_line_file(sysfs, &buffer); | |
900 | if (r < 0) | |
901 | return r; | |
7176f06c | 902 | r = parse_devnum(buffer, &devno); |
f1443709 LP |
903 | if (r < 0) |
904 | return r; | |
905 | ||
ca822829 | 906 | whole_fd = r = device_open_from_devnum(S_IFBLK, devno, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY, NULL); |
f1443709 LP |
907 | if (r < 0) |
908 | return r; | |
909 | ||
91e1ce1a LP |
910 | return block_device_resize_partition( |
911 | whole_fd, | |
912 | partno, | |
913 | offset == UINT64_MAX ? current_offset : offset, | |
914 | size == UINT64_MAX ? current_size : size); | |
f1443709 LP |
915 | } |
916 | ||
c37878fc LP |
917 | int loop_device_refresh_size(LoopDevice *d, uint64_t offset, uint64_t size) { |
918 | struct loop_info64 info; | |
ff27ef4b | 919 | |
9dabc4fd | 920 | assert(d); |
ff27ef4b | 921 | assert(d->fd >= 0); |
9dabc4fd | 922 | |
f1443709 LP |
923 | /* Changes the offset/start of the loop device relative to the beginning of the underlying file or |
924 | * block device. If this loop device actually refers to a partition and not a loopback device, we'll | |
925 | * try to adjust the partition offsets instead. | |
926 | * | |
927 | * If either offset or size is UINT64_MAX we won't change that parameter. */ | |
928 | ||
f1443709 LP |
929 | if (d->nr < 0) /* not a loopback device */ |
930 | return resize_partition(d->fd, offset, size); | |
931 | ||
c37878fc LP |
932 | if (ioctl(d->fd, LOOP_GET_STATUS64, &info) < 0) |
933 | return -errno; | |
934 | ||
10c1b188 LP |
935 | #if HAVE_VALGRIND_MEMCHECK_H |
936 | /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */ | |
937 | VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info)); | |
938 | #endif | |
939 | ||
c37878fc LP |
940 | if (size == UINT64_MAX && offset == UINT64_MAX) |
941 | return 0; | |
942 | if (info.lo_sizelimit == size && info.lo_offset == offset) | |
943 | return 0; | |
944 | ||
945 | if (size != UINT64_MAX) | |
946 | info.lo_sizelimit = size; | |
947 | if (offset != UINT64_MAX) | |
948 | info.lo_offset = offset; | |
949 | ||
7c248223 | 950 | return RET_NERRNO(ioctl(d->fd, LOOP_SET_STATUS64, &info)); |
9dabc4fd | 951 | } |
441ec804 LP |
952 | |
953 | int loop_device_flock(LoopDevice *d, int operation) { | |
7f52206a | 954 | assert(IN_SET(operation & ~LOCK_NB, LOCK_UN, LOCK_SH, LOCK_EX)); |
441ec804 LP |
955 | assert(d); |
956 | ||
7f52206a LP |
957 | /* When unlocking just close the lock fd */ |
958 | if ((operation & ~LOCK_NB) == LOCK_UN) { | |
959 | d->lock_fd = safe_close(d->lock_fd); | |
960 | return 0; | |
961 | } | |
962 | ||
963 | /* If we had no lock fd so far, create one and lock it right-away */ | |
964 | if (d->lock_fd < 0) { | |
965 | assert(d->fd >= 0); | |
966 | ||
967 | d->lock_fd = open_lock_fd(d->fd, operation); | |
968 | if (d->lock_fd < 0) | |
969 | return d->lock_fd; | |
970 | ||
971 | return 0; | |
972 | } | |
441ec804 | 973 | |
7f52206a LP |
974 | /* Otherwise change the current lock mode on the existing fd */ |
975 | return RET_NERRNO(flock(d->lock_fd, operation)); | |
441ec804 | 976 | } |
8dbc208c LP |
977 | |
978 | int loop_device_sync(LoopDevice *d) { | |
979 | assert(d); | |
ff27ef4b | 980 | assert(d->fd >= 0); |
8dbc208c LP |
981 | |
982 | /* We also do this implicitly in loop_device_unref(). Doing this explicitly here has the benefit that | |
983 | * we can check the return value though. */ | |
984 | ||
7c248223 | 985 | return RET_NERRNO(fsync(d->fd)); |
8dbc208c | 986 | } |