]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/loop-util.c
7aee239e33a81c82b2e314407dc62758aa50be45
[thirdparty/systemd.git] / src / shared / loop-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #if HAVE_VALGRIND_MEMCHECK_H
4 #include <valgrind/memcheck.h>
5 #endif
6
7 #include <errno.h>
8 #include <fcntl.h>
9 #include <linux/blkpg.h>
10 #include <linux/fs.h>
11 #include <linux/loop.h>
12 #include <sys/file.h>
13 #include <sys/ioctl.h>
14 #include <unistd.h>
15
16 #include "alloc-util.h"
17 #include "errno-util.h"
18 #include "fd-util.h"
19 #include "fileio.h"
20 #include "loop-util.h"
21 #include "parse-util.h"
22 #include "stat-util.h"
23 #include "stdio-util.h"
24 #include "string-util.h"
25
26 static void cleanup_clear_loop_close(int *fd) {
27 if (*fd >= 0) {
28 (void) ioctl(*fd, LOOP_CLR_FD);
29 (void) safe_close(*fd);
30 }
31 }
32
33 int loop_device_make(
34 int fd,
35 int open_flags,
36 uint64_t offset,
37 uint64_t size,
38 uint32_t loop_flags,
39 LoopDevice **ret) {
40
41 _cleanup_free_ char *loopdev = NULL;
42 struct loop_info64 info;
43 LoopDevice *d = NULL;
44 struct stat st;
45 int nr = -1, r;
46
47 assert(fd >= 0);
48 assert(ret);
49 assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
50
51 if (fstat(fd, &st) < 0)
52 return -errno;
53
54 if (S_ISBLK(st.st_mode)) {
55 if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0) {
56 /* Oh! This is a loopback device? That's interesting! */
57
58 #if HAVE_VALGRIND_MEMCHECK_H
59 /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
60 VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
61 #endif
62 nr = info.lo_number;
63
64 if (asprintf(&loopdev, "/dev/loop%i", nr) < 0)
65 return -ENOMEM;
66 }
67
68 if (offset == 0 && IN_SET(size, 0, UINT64_MAX)) {
69 _cleanup_close_ int copy = -1;
70
71 /* If this is already a block device, store a copy of the fd as it is */
72
73 copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
74 if (copy < 0)
75 return -errno;
76
77 d = new(LoopDevice, 1);
78 if (!d)
79 return -ENOMEM;
80 *d = (LoopDevice) {
81 .fd = TAKE_FD(copy),
82 .nr = nr,
83 .node = TAKE_PTR(loopdev),
84 .relinquished = true, /* It's not allocated by us, don't destroy it when this object is freed */
85 };
86
87 *ret = d;
88 return d->fd;
89 }
90 } else {
91 r = stat_verify_regular(&st);
92 if (r < 0)
93 return r;
94 }
95
96 _cleanup_close_ int control = -1;
97 _cleanup_(cleanup_clear_loop_close) int loop_with_fd = -1;
98
99 control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
100 if (control < 0)
101 return -errno;
102
103 /* Loop around LOOP_CTL_GET_FREE, since at the moment we attempt to open the returned device it might
104 * be gone already, taken by somebody else racing against us. */
105 for (unsigned n_attempts = 0;;) {
106 _cleanup_close_ int loop = -1;
107
108 nr = ioctl(control, LOOP_CTL_GET_FREE);
109 if (nr < 0)
110 return -errno;
111
112 if (asprintf(&loopdev, "/dev/loop%i", nr) < 0)
113 return -ENOMEM;
114
115 loop = open(loopdev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
116 if (loop < 0) {
117 /* Somebody might've gotten the same number from the kernel, used the device,
118 * and called LOOP_CTL_REMOVE on it. Let's retry with a new number. */
119 if (errno != ENOENT)
120 return -errno;
121 } else {
122 if (ioctl(loop, LOOP_SET_FD, fd) >= 0) {
123 loop_with_fd = TAKE_FD(loop);
124 break;
125 }
126 if (errno != EBUSY)
127 return -errno;
128 }
129
130 if (++n_attempts >= 64) /* Give up eventually */
131 return -EBUSY;
132
133 loopdev = mfree(loopdev);
134 }
135
136 info = (struct loop_info64) {
137 /* Use the specified flags, but configure the read-only flag from the open flags, and force autoclear */
138 .lo_flags = (loop_flags & ~LO_FLAGS_READ_ONLY) | ((loop_flags & O_ACCMODE) == O_RDONLY ? LO_FLAGS_READ_ONLY : 0) | LO_FLAGS_AUTOCLEAR,
139 .lo_offset = offset,
140 .lo_sizelimit = size == UINT64_MAX ? 0 : size,
141 };
142
143 if (ioctl(loop_with_fd, LOOP_SET_STATUS64, &info) < 0)
144 return -errno;
145
146 d = new(LoopDevice, 1);
147 if (!d)
148 return -ENOMEM;
149 *d = (LoopDevice) {
150 .fd = TAKE_FD(loop_with_fd),
151 .node = TAKE_PTR(loopdev),
152 .nr = nr,
153 };
154
155 *ret = d;
156 return 0;
157 }
158
159 int loop_device_make_by_path(const char *path, int open_flags, uint32_t loop_flags, LoopDevice **ret) {
160 _cleanup_close_ int fd = -1;
161 int r;
162
163 assert(path);
164 assert(ret);
165 assert(open_flags < 0 || IN_SET(open_flags, O_RDWR, O_RDONLY));
166
167 /* Passing < 0 as open_flags here means we'll try to open the device writable if we can, retrying
168 * read-only if we cannot. */
169
170 fd = open(path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|(open_flags >= 0 ? open_flags : O_RDWR));
171 if (fd < 0) {
172 r = -errno;
173
174 /* Retry read-only? */
175 if (open_flags >= 0 || !(ERRNO_IS_PRIVILEGE(r) || r == -EROFS))
176 return r;
177
178 fd = open(path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|O_RDONLY);
179 if (fd < 0)
180 return r; /* Propagate original error */
181
182 open_flags = O_RDONLY;
183 } else if (open_flags < 0)
184 open_flags = O_RDWR;
185
186 return loop_device_make(fd, open_flags, 0, 0, loop_flags, ret);
187 }
188
189 LoopDevice* loop_device_unref(LoopDevice *d) {
190 if (!d)
191 return NULL;
192
193 if (d->fd >= 0) {
194 if (d->nr >= 0 && !d->relinquished) {
195 if (ioctl(d->fd, LOOP_CLR_FD) < 0)
196 log_debug_errno(errno, "Failed to clear loop device: %m");
197
198 }
199
200 safe_close(d->fd);
201 }
202
203 if (d->nr >= 0 && !d->relinquished) {
204 _cleanup_close_ int control = -1;
205
206 control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
207 if (control < 0)
208 log_warning_errno(errno,
209 "Failed to open loop control device, cannot remove loop device %s: %m",
210 strna(d->node));
211 else
212 for (unsigned n_attempts = 0;;) {
213 if (ioctl(control, LOOP_CTL_REMOVE, d->nr) >= 0)
214 break;
215 if (errno != EBUSY || ++n_attempts >= 64) {
216 log_warning_errno(errno, "Failed to remove device %s: %m", strna(d->node));
217 break;
218 }
219 usleep(50 * USEC_PER_MSEC);
220 }
221 }
222
223 free(d->node);
224 return mfree(d);
225 }
226
227 void loop_device_relinquish(LoopDevice *d) {
228 assert(d);
229
230 /* Don't attempt to clean up the loop device anymore from this point on. Leave the clean-ing up to the kernel
231 * itself, using the loop device "auto-clear" logic we already turned on when creating the device. */
232
233 d->relinquished = true;
234 }
235
236 int loop_device_open(const char *loop_path, int open_flags, LoopDevice **ret) {
237 _cleanup_close_ int loop_fd = -1;
238 _cleanup_free_ char *p = NULL;
239 struct loop_info64 info;
240 struct stat st;
241 LoopDevice *d;
242 int nr;
243
244 assert(loop_path);
245 assert(ret);
246
247 loop_fd = open(loop_path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
248 if (loop_fd < 0)
249 return -errno;
250
251 if (fstat(loop_fd, &st) < 0)
252 return -errno;
253 if (!S_ISBLK(st.st_mode))
254 return -ENOTBLK;
255
256 if (ioctl(loop_fd, LOOP_GET_STATUS64, &info) >= 0) {
257 #if HAVE_VALGRIND_MEMCHECK_H
258 /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
259 VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
260 #endif
261 nr = info.lo_number;
262 } else
263 nr = -1;
264
265 p = strdup(loop_path);
266 if (!p)
267 return -ENOMEM;
268
269 d = new(LoopDevice, 1);
270 if (!d)
271 return -ENOMEM;
272
273 *d = (LoopDevice) {
274 .fd = TAKE_FD(loop_fd),
275 .nr = nr,
276 .node = TAKE_PTR(p),
277 .relinquished = true, /* It's not ours, don't try to destroy it when this object is freed */
278 };
279
280 *ret = d;
281 return d->fd;
282 }
283
284 static int resize_partition(int partition_fd, uint64_t offset, uint64_t size) {
285 char sysfs[STRLEN("/sys/dev/block/:/partition") + 2*DECIMAL_STR_MAX(dev_t) + 1];
286 _cleanup_free_ char *whole = NULL, *buffer = NULL;
287 uint64_t current_offset, current_size, partno;
288 _cleanup_close_ int whole_fd = -1;
289 struct stat st;
290 dev_t devno;
291 int r;
292
293 assert(partition_fd >= 0);
294
295 /* Resizes the partition the loopback device refer to (assuming it refers to one instead of an actual
296 * loopback device), and changes the offset, if needed. This is a fancy wrapper around
297 * BLKPG_RESIZE_PARTITION. */
298
299 if (fstat(partition_fd, &st) < 0)
300 return -errno;
301
302 assert(S_ISBLK(st.st_mode));
303
304 xsprintf(sysfs, "/sys/dev/block/%u:%u/partition", major(st.st_rdev), minor(st.st_rdev));
305 r = read_one_line_file(sysfs, &buffer);
306 if (r == -ENOENT) /* not a partition, cannot resize */
307 return -ENOTTY;
308 if (r < 0)
309 return r;
310 r = safe_atou64(buffer, &partno);
311 if (r < 0)
312 return r;
313
314 xsprintf(sysfs, "/sys/dev/block/%u:%u/start", major(st.st_rdev), minor(st.st_rdev));
315
316 buffer = mfree(buffer);
317 r = read_one_line_file(sysfs, &buffer);
318 if (r < 0)
319 return r;
320 r = safe_atou64(buffer, &current_offset);
321 if (r < 0)
322 return r;
323 if (current_offset > UINT64_MAX/512U)
324 return -EINVAL;
325 current_offset *= 512U;
326
327 if (ioctl(partition_fd, BLKGETSIZE64, &current_size) < 0)
328 return -EINVAL;
329
330 if (size == UINT64_MAX && offset == UINT64_MAX)
331 return 0;
332 if (current_size == size && current_offset == offset)
333 return 0;
334
335 xsprintf(sysfs, "/sys/dev/block/%u:%u/../dev", major(st.st_rdev), minor(st.st_rdev));
336
337 buffer = mfree(buffer);
338 r = read_one_line_file(sysfs, &buffer);
339 if (r < 0)
340 return r;
341 r = parse_dev(buffer, &devno);
342 if (r < 0)
343 return r;
344
345 r = device_path_make_major_minor(S_IFBLK, devno, &whole);
346 if (r < 0)
347 return r;
348
349 whole_fd = open(whole, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
350 if (whole_fd < 0)
351 return -errno;
352
353 struct blkpg_partition bp = {
354 .pno = partno,
355 .start = offset == UINT64_MAX ? current_offset : offset,
356 .length = size == UINT64_MAX ? current_size : size,
357 };
358
359 struct blkpg_ioctl_arg ba = {
360 .op = BLKPG_RESIZE_PARTITION,
361 .data = &bp,
362 .datalen = sizeof(bp),
363 };
364
365 if (ioctl(whole_fd, BLKPG, &ba) < 0)
366 return -errno;
367
368 return 0;
369 }
370
371 int loop_device_refresh_size(LoopDevice *d, uint64_t offset, uint64_t size) {
372 struct loop_info64 info;
373 assert(d);
374
375 /* Changes the offset/start of the loop device relative to the beginning of the underlying file or
376 * block device. If this loop device actually refers to a partition and not a loopback device, we'll
377 * try to adjust the partition offsets instead.
378 *
379 * If either offset or size is UINT64_MAX we won't change that parameter. */
380
381 if (d->fd < 0)
382 return -EBADF;
383
384 if (d->nr < 0) /* not a loopback device */
385 return resize_partition(d->fd, offset, size);
386
387 if (ioctl(d->fd, LOOP_GET_STATUS64, &info) < 0)
388 return -errno;
389
390 #if HAVE_VALGRIND_MEMCHECK_H
391 /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
392 VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
393 #endif
394
395 if (size == UINT64_MAX && offset == UINT64_MAX)
396 return 0;
397 if (info.lo_sizelimit == size && info.lo_offset == offset)
398 return 0;
399
400 if (size != UINT64_MAX)
401 info.lo_sizelimit = size;
402 if (offset != UINT64_MAX)
403 info.lo_offset = offset;
404
405 if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0)
406 return -errno;
407
408 return 0;
409 }
410
411 int loop_device_flock(LoopDevice *d, int operation) {
412 assert(d);
413
414 if (d->fd < 0)
415 return -EBADF;
416
417 if (flock(d->fd, operation) < 0)
418 return -errno;
419
420 return 0;
421 }