]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/blockdev-util.c
Merge pull request #24877 from brauner/namespace_utils
[thirdparty/systemd.git] / src / shared / blockdev-util.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <linux/blkpg.h>
4 #include <sys/file.h>
5 #include <sys/ioctl.h>
6 #include <sys/mount.h>
7 #include <unistd.h>
8
9 #include "sd-device.h"
10
11 #include "alloc-util.h"
12 #include "blockdev-util.h"
13 #include "btrfs-util.h"
14 #include "device-util.h"
15 #include "devnum-util.h"
16 #include "dirent-util.h"
17 #include "errno-util.h"
18 #include "fd-util.h"
19 #include "fileio.h"
20 #include "missing_magic.h"
21 #include "parse-util.h"
22
23 static int fd_get_devnum(int fd, BlockDeviceLookupFlag flags, dev_t *ret) {
24 struct stat st;
25 dev_t devnum;
26 int r;
27
28 assert(fd >= 0);
29 assert(ret);
30
31 if (fstat(fd, &st) < 0)
32 return -errno;
33
34 if (S_ISBLK(st.st_mode))
35 devnum = st.st_rdev;
36 else if (!FLAGS_SET(flags, BLOCK_DEVICE_LOOKUP_BACKING))
37 return -ENOTBLK;
38 else if (!S_ISREG(st.st_mode) && !S_ISDIR(st.st_mode))
39 return -ENOTBLK;
40 else if (major(st.st_dev) != 0)
41 devnum = st.st_dev;
42 else {
43 /* If major(st.st_dev) is zero, this might mean we are backed by btrfs, which needs special
44 * handing, to get the backing device node. */
45
46 r = fcntl(fd, F_GETFL);
47 if (r < 0)
48 return -errno;
49
50 if (FLAGS_SET(r, O_PATH)) {
51 _cleanup_close_ int regfd = -1;
52
53 /* The fstat() above we can execute on an O_PATH fd. But the btrfs ioctl we cannot.
54 * Hence acquire a "real" fd first, without the O_PATH flag. */
55
56 regfd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
57 if (regfd < 0)
58 return regfd;
59
60 r = btrfs_get_block_device_fd(regfd, &devnum);
61 } else
62 r = btrfs_get_block_device_fd(fd, &devnum);
63 if (r == -ENOTTY) /* not btrfs */
64 return -ENOTBLK;
65 if (r < 0)
66 return r;
67 }
68
69 *ret = devnum;
70 return 0;
71 }
72
73 int block_device_is_whole_disk(sd_device *dev) {
74 const char *s;
75 int r;
76
77 assert(dev);
78
79 r = sd_device_get_subsystem(dev, &s);
80 if (r < 0)
81 return r;
82
83 if (!streq(s, "block"))
84 return -ENOTBLK;
85
86 r = sd_device_get_devtype(dev, &s);
87 if (r < 0)
88 return r;
89
90 return streq(s, "disk");
91 }
92
93 int block_device_get_whole_disk(sd_device *dev, sd_device **ret) {
94 int r;
95
96 assert(dev);
97 assert(ret);
98
99 /* Do not unref returned sd_device object. */
100
101 r = block_device_is_whole_disk(dev);
102 if (r < 0)
103 return r;
104 if (r == 0) {
105 r = sd_device_get_parent(dev, &dev);
106 if (r == -ENOENT) /* Already removed? Let's return a recognizable error. */
107 return -ENODEV;
108 if (r < 0)
109 return r;
110
111 r = block_device_is_whole_disk(dev);
112 if (r < 0)
113 return r;
114 if (r == 0)
115 return -ENXIO;
116 }
117
118 *ret = dev;
119 return 0;
120 }
121
122 static int block_device_get_originating(sd_device *dev, sd_device **ret) {
123 _cleanup_(sd_device_unrefp) sd_device *first_found = NULL;
124 const char *suffix;
125 sd_device *child;
126 dev_t devnum = 0; /* avoid false maybe-uninitialized warning */
127
128 /* For the specified block device tries to chase it through the layers, in case LUKS-style DM
129 * stacking is used, trying to find the next underlying layer. */
130
131 assert(dev);
132 assert(ret);
133
134 FOREACH_DEVICE_CHILD_WITH_SUFFIX(dev, child, suffix) {
135 sd_device *child_whole_disk;
136 dev_t n;
137
138 if (!path_startswith(suffix, "slaves"))
139 continue;
140
141 if (block_device_get_whole_disk(child, &child_whole_disk) < 0)
142 continue;
143
144 if (sd_device_get_devnum(child_whole_disk, &n) < 0)
145 continue;
146
147 if (!first_found) {
148 first_found = sd_device_ref(child);
149 devnum = n;
150 continue;
151 }
152
153 /* We found a device backed by multiple other devices. We don't really support automatic
154 * discovery on such setups, with the exception of dm-verity partitions. In this case there
155 * are two backing devices: the data partition and the hash partition. We are fine with such
156 * setups, however, only if both partitions are on the same physical device. Hence, let's
157 * verify this by iterating over every node in the 'slaves/' directory and comparing them with
158 * the first that gets returned by readdir(), to ensure they all point to the same device. */
159 if (n != devnum)
160 return -ENOTUNIQ;
161 }
162
163 if (!first_found)
164 return -ENOENT;
165
166 *ret = TAKE_PTR(first_found);
167 return 1; /* found */
168 }
169
170 int block_device_new_from_fd(int fd, BlockDeviceLookupFlag flags, sd_device **ret) {
171 _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
172 dev_t devnum;
173 int r;
174
175 assert(fd >= 0);
176 assert(ret);
177
178 r = fd_get_devnum(fd, flags, &devnum);
179 if (r < 0)
180 return r;
181
182 r = sd_device_new_from_devnum(&dev, 'b', devnum);
183 if (r < 0)
184 return r;
185
186 if (FLAGS_SET(flags, BLOCK_DEVICE_LOOKUP_ORIGINATING)) {
187 _cleanup_(sd_device_unrefp) sd_device *dev_origin = NULL;
188 sd_device *dev_whole_disk;
189
190 r = block_device_get_whole_disk(dev, &dev_whole_disk);
191 if (r < 0)
192 return r;
193
194 r = block_device_get_originating(dev_whole_disk, &dev_origin);
195 if (r < 0 && r != -ENOENT)
196 return r;
197 if (r > 0)
198 device_unref_and_replace(dev, dev_origin);
199 }
200
201 if (FLAGS_SET(flags, BLOCK_DEVICE_LOOKUP_WHOLE_DISK)) {
202 sd_device *dev_whole_disk;
203
204 r = block_device_get_whole_disk(dev, &dev_whole_disk);
205 if (r < 0)
206 return r;
207
208 *ret = sd_device_ref(dev_whole_disk);
209 return 0;
210 }
211
212 *ret = sd_device_ref(dev);
213 return 0;
214 }
215
216 int block_device_new_from_path(const char *path, BlockDeviceLookupFlag flags, sd_device **ret) {
217 _cleanup_close_ int fd = -1;
218
219 assert(path);
220 assert(ret);
221
222 fd = open(path, O_CLOEXEC|O_PATH);
223 if (fd < 0)
224 return -errno;
225
226 return block_device_new_from_fd(fd, flags, ret);
227 }
228
229 int block_get_whole_disk(dev_t d, dev_t *ret) {
230 char p[SYS_BLOCK_PATH_MAX("/partition")];
231 _cleanup_free_ char *s = NULL;
232 dev_t devt;
233 int r;
234
235 assert(ret);
236
237 if (major(d) == 0)
238 return -ENODEV;
239
240 /* If it has a queue this is good enough for us */
241 xsprintf_sys_block_path(p, "/queue", d);
242 if (access(p, F_OK) >= 0) {
243 *ret = d;
244 return 0;
245 }
246 if (errno != ENOENT)
247 return -errno;
248
249 /* If it is a partition find the originating device */
250 xsprintf_sys_block_path(p, "/partition", d);
251 if (access(p, F_OK) < 0)
252 return -errno;
253
254 /* Get parent dev_t */
255 xsprintf_sys_block_path(p, "/../dev", d);
256 r = read_one_line_file(p, &s);
257 if (r < 0)
258 return r;
259
260 r = parse_devnum(s, &devt);
261 if (r < 0)
262 return r;
263
264 /* Only return this if it is really good enough for us. */
265 xsprintf_sys_block_path(p, "/queue", devt);
266 if (access(p, F_OK) < 0)
267 return -errno;
268
269 *ret = devt;
270 return 1;
271 }
272
273 int get_block_device_fd(int fd, dev_t *ret) {
274 struct stat st;
275 int r;
276
277 assert(fd >= 0);
278 assert(ret);
279
280 /* Gets the block device directly backing a file system. If the block device is encrypted, returns
281 * the device mapper block device. */
282
283 if (fstat(fd, &st))
284 return -errno;
285
286 if (major(st.st_dev) != 0) {
287 *ret = st.st_dev;
288 return 1;
289 }
290
291 r = fcntl(fd, F_GETFL);
292 if (r < 0)
293 return -errno;
294 if (FLAGS_SET(r, O_PATH) && (S_ISREG(st.st_mode) || S_ISDIR(st.st_mode))) {
295 _cleanup_close_ int real_fd = -1;
296
297 /* The fstat() above we can execute on an O_PATH fd. But the btrfs ioctl we cannot. Hence
298 * acquire a "real" fd first, without the O_PATH flag. */
299
300 real_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC);
301 if (real_fd < 0)
302 return real_fd;
303 r = btrfs_get_block_device_fd(real_fd, ret);
304 } else
305 r = btrfs_get_block_device_fd(fd, ret);
306 if (r > 0)
307 return 1;
308 if (r != -ENOTTY) /* not btrfs */
309 return r;
310
311 *ret = 0;
312 return 0;
313 }
314
315 int get_block_device(const char *path, dev_t *ret) {
316 _cleanup_close_ int fd = -1;
317
318 assert(path);
319 assert(ret);
320
321 fd = open(path, O_RDONLY|O_NOFOLLOW|O_CLOEXEC);
322 if (fd < 0)
323 return -errno;
324
325 return get_block_device_fd(fd, ret);
326 }
327
328 int block_get_originating(dev_t dt, dev_t *ret) {
329 _cleanup_(sd_device_unrefp) sd_device *dev = NULL, *origin = NULL;
330 int r;
331
332 assert(ret);
333
334 r = sd_device_new_from_devnum(&dev, 'b', dt);
335 if (r < 0)
336 return r;
337
338 r = block_device_get_originating(dev, &origin);
339 if (r < 0)
340 return r;
341
342 return sd_device_get_devnum(origin, ret);
343 }
344
345 int get_block_device_harder_fd(int fd, dev_t *ret) {
346 int r;
347
348 assert(fd >= 0);
349 assert(ret);
350
351 /* Gets the backing block device for a file system, and handles LUKS encrypted file systems, looking for its
352 * immediate parent, if there is one. */
353
354 r = get_block_device_fd(fd, ret);
355 if (r <= 0)
356 return r;
357
358 r = block_get_originating(*ret, ret);
359 if (r < 0)
360 log_debug_errno(r, "Failed to chase block device, ignoring: %m");
361
362 return 1;
363 }
364
365 int get_block_device_harder(const char *path, dev_t *ret) {
366 _cleanup_close_ int fd = -1;
367
368 assert(path);
369 assert(ret);
370
371 fd = open(path, O_RDONLY|O_NOFOLLOW|O_CLOEXEC);
372 if (fd < 0)
373 return -errno;
374
375 return get_block_device_harder_fd(fd, ret);
376 }
377
378 int lock_whole_block_device(dev_t devt, int operation) {
379 _cleanup_close_ int lock_fd = -1;
380 dev_t whole_devt;
381 int r;
382
383 /* Let's get a BSD file lock on the whole block device, as per: https://systemd.io/BLOCK_DEVICE_LOCKING */
384
385 r = block_get_whole_disk(devt, &whole_devt);
386 if (r < 0)
387 return r;
388
389 lock_fd = r = device_open_from_devnum(S_IFBLK, whole_devt, O_RDONLY|O_CLOEXEC|O_NONBLOCK, NULL);
390 if (r < 0)
391 return r;
392
393 if (flock(lock_fd, operation) < 0)
394 return -errno;
395
396 return TAKE_FD(lock_fd);
397 }
398
399 int blockdev_partscan_enabled(int fd) {
400 _cleanup_free_ char *p = NULL, *buf = NULL;
401 unsigned long long ull;
402 struct stat st;
403 int r;
404
405 /* Checks if partition scanning is correctly enabled on the block device */
406
407 if (fstat(fd, &st) < 0)
408 return -errno;
409
410 if (!S_ISBLK(st.st_mode))
411 return -ENOTBLK;
412
413 if (asprintf(&p, "/sys/dev/block/%u:%u/capability", major(st.st_rdev), minor(st.st_rdev)) < 0)
414 return -ENOMEM;
415
416 r = read_one_line_file(p, &buf);
417 if (r == -ENOENT) /* If the capability file doesn't exist then we are most likely looking at a
418 * partition block device, not the whole block device. And that means we have no
419 * partition scanning on for it (we do for its parent, but not for the partition
420 * itself). */
421 return false;
422 if (r < 0)
423 return r;
424
425 r = safe_atollu_full(buf, 16, &ull);
426 if (r < 0)
427 return r;
428
429 #ifndef GENHD_FL_NO_PART_SCAN
430 #define GENHD_FL_NO_PART_SCAN (0x0200)
431 #endif
432
433 return !FLAGS_SET(ull, GENHD_FL_NO_PART_SCAN);
434 }
435
436 static int blockdev_is_encrypted(const char *sysfs_path, unsigned depth_left) {
437 _cleanup_free_ char *p = NULL, *uuids = NULL;
438 _cleanup_closedir_ DIR *d = NULL;
439 int r, found_encrypted = false;
440
441 assert(sysfs_path);
442
443 if (depth_left == 0)
444 return -EINVAL;
445
446 p = path_join(sysfs_path, "dm/uuid");
447 if (!p)
448 return -ENOMEM;
449
450 r = read_one_line_file(p, &uuids);
451 if (r != -ENOENT) {
452 if (r < 0)
453 return r;
454
455 /* The DM device's uuid attribute is prefixed with "CRYPT-" if this is a dm-crypt device. */
456 if (startswith(uuids, "CRYPT-"))
457 return true;
458 }
459
460 /* Not a dm-crypt device itself. But maybe it is on top of one? Follow the links in the "slaves/"
461 * subdir. */
462
463 p = mfree(p);
464 p = path_join(sysfs_path, "slaves");
465 if (!p)
466 return -ENOMEM;
467
468 d = opendir(p);
469 if (!d) {
470 if (errno == ENOENT) /* Doesn't have underlying devices */
471 return false;
472
473 return -errno;
474 }
475
476 for (;;) {
477 _cleanup_free_ char *q = NULL;
478 struct dirent *de;
479
480 errno = 0;
481 de = readdir_no_dot(d);
482 if (!de) {
483 if (errno != 0)
484 return -errno;
485
486 break; /* No more underlying devices */
487 }
488
489 q = path_join(p, de->d_name);
490 if (!q)
491 return -ENOMEM;
492
493 r = blockdev_is_encrypted(q, depth_left - 1);
494 if (r < 0)
495 return r;
496 if (r == 0) /* we found one that is not encrypted? then propagate that immediately */
497 return false;
498
499 found_encrypted = true;
500 }
501
502 return found_encrypted;
503 }
504
505 int fd_is_encrypted(int fd) {
506 char p[SYS_BLOCK_PATH_MAX(NULL)];
507 dev_t devt;
508 int r;
509
510 r = get_block_device_fd(fd, &devt);
511 if (r < 0)
512 return r;
513 if (r == 0) /* doesn't have a block device */
514 return false;
515
516 xsprintf_sys_block_path(p, NULL, devt);
517
518 return blockdev_is_encrypted(p, 10 /* safety net: maximum recursion depth */);
519 }
520
521 int path_is_encrypted(const char *path) {
522 char p[SYS_BLOCK_PATH_MAX(NULL)];
523 dev_t devt;
524 int r;
525
526 r = get_block_device(path, &devt);
527 if (r < 0)
528 return r;
529 if (r == 0) /* doesn't have a block device */
530 return false;
531
532 xsprintf_sys_block_path(p, NULL, devt);
533
534 return blockdev_is_encrypted(p, 10 /* safety net: maximum recursion depth */);
535 }
536
537 int fd_get_whole_disk(int fd, bool backing, dev_t *ret) {
538 dev_t devt;
539 int r;
540
541 assert(fd >= 0);
542 assert(ret);
543
544 r = fd_get_devnum(fd, backing ? BLOCK_DEVICE_LOOKUP_BACKING : 0, &devt);
545 if (r < 0)
546 return r;
547
548 return block_get_whole_disk(devt, ret);
549 }
550
551 int path_get_whole_disk(const char *path, bool backing, dev_t *ret) {
552 _cleanup_close_ int fd = -1;
553
554 fd = open(path, O_CLOEXEC|O_PATH);
555 if (fd < 0)
556 return -errno;
557
558 return fd_get_whole_disk(fd, backing, ret);
559 }
560
561 int block_device_add_partition(
562 int fd,
563 const char *name,
564 int nr,
565 uint64_t start,
566 uint64_t size) {
567
568 assert(fd >= 0);
569 assert(name);
570 assert(nr > 0);
571
572 struct blkpg_partition bp = {
573 .pno = nr,
574 .start = start,
575 .length = size,
576 };
577
578 struct blkpg_ioctl_arg ba = {
579 .op = BLKPG_ADD_PARTITION,
580 .data = &bp,
581 .datalen = sizeof(bp),
582 };
583
584 if (strlen(name) >= sizeof(bp.devname))
585 return -EINVAL;
586
587 strcpy(bp.devname, name);
588
589 return RET_NERRNO(ioctl(fd, BLKPG, &ba));
590 }
591
592 int block_device_remove_partition(
593 int fd,
594 const char *name,
595 int nr) {
596
597 assert(fd >= 0);
598 assert(name);
599 assert(nr > 0);
600
601 struct blkpg_partition bp = {
602 .pno = nr,
603 };
604
605 struct blkpg_ioctl_arg ba = {
606 .op = BLKPG_DEL_PARTITION,
607 .data = &bp,
608 .datalen = sizeof(bp),
609 };
610
611 if (strlen(name) >= sizeof(bp.devname))
612 return -EINVAL;
613
614 strcpy(bp.devname, name);
615
616 return RET_NERRNO(ioctl(fd, BLKPG, &ba));
617 }
618
619 int block_device_resize_partition(
620 int fd,
621 int nr,
622 uint64_t start,
623 uint64_t size) {
624
625 assert(fd >= 0);
626 assert(nr > 0);
627
628 struct blkpg_partition bp = {
629 .pno = nr,
630 .start = start,
631 .length = size,
632 };
633
634 struct blkpg_ioctl_arg ba = {
635 .op = BLKPG_RESIZE_PARTITION,
636 .data = &bp,
637 .datalen = sizeof(bp),
638 };
639
640 return RET_NERRNO(ioctl(fd, BLKPG, &ba));
641 }
642
643 int partition_enumerator_new(sd_device *dev, sd_device_enumerator **ret) {
644 _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
645 const char *s;
646 int r;
647
648 assert(dev);
649 assert(ret);
650
651 /* Refuse invocation on partition block device, insist on "whole" device */
652 r = block_device_is_whole_disk(dev);
653 if (r < 0)
654 return r;
655 if (r == 0)
656 return -ENXIO; /* return a recognizable error */
657
658 r = sd_device_enumerator_new(&e);
659 if (r < 0)
660 return r;
661
662 r = sd_device_enumerator_allow_uninitialized(e);
663 if (r < 0)
664 return r;
665
666 r = sd_device_enumerator_add_match_parent(e, dev);
667 if (r < 0)
668 return r;
669
670 r = sd_device_get_sysname(dev, &s);
671 if (r < 0)
672 return r;
673
674 /* Also add sysname check for safety. Hopefully, this also improves performance. */
675 s = strjoina(s, "*");
676 r = sd_device_enumerator_add_match_sysname(e, s);
677 if (r < 0)
678 return r;
679
680 r = sd_device_enumerator_add_match_subsystem(e, "block", /* match = */ true);
681 if (r < 0)
682 return r;
683
684 r = sd_device_enumerator_add_match_property(e, "DEVTYPE", "partition");
685 if (r < 0)
686 return r;
687
688 *ret = TAKE_PTR(e);
689 return 0;
690 }
691
692 int block_device_remove_all_partitions(sd_device *dev, int fd) {
693 _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
694 _cleanup_(sd_device_unrefp) sd_device *dev_unref = NULL;
695 _cleanup_close_ int fd_close = -1;
696 bool has_partitions = false;
697 sd_device *part;
698 int r, k = 0;
699
700 assert(dev || fd >= 0);
701
702 if (!dev) {
703 r = block_device_new_from_fd(fd, 0, &dev_unref);
704 if (r < 0)
705 return r;
706
707 dev = dev_unref;
708 }
709
710 r = partition_enumerator_new(dev, &e);
711 if (r < 0)
712 return r;
713
714 if (fd < 0) {
715 fd_close = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|O_RDONLY);
716 if (fd_close < 0)
717 return fd_close;
718
719 fd = fd_close;
720 }
721
722 FOREACH_DEVICE(e, part) {
723 const char *v, *devname;
724 int nr;
725
726 has_partitions = true;
727
728 r = sd_device_get_devname(part, &devname);
729 if (r < 0)
730 return r;
731
732 r = sd_device_get_property_value(part, "PARTN", &v);
733 if (r < 0)
734 return r;
735
736 r = safe_atoi(v, &nr);
737 if (r < 0)
738 return r;
739
740 r = block_device_remove_partition(fd, devname, nr);
741 if (r == -ENODEV) {
742 log_debug("Kernel removed partition %s before us, ignoring", devname);
743 continue;
744 }
745 if (r < 0) {
746 log_debug_errno(r, "Failed to remove partition %s: %m", devname);
747 k = k < 0 ? k : r;
748 continue;
749 }
750
751 log_debug("Removed partition %s", devname);
752 }
753
754 return k < 0 ? k : has_partitions;
755 }
756
757 int block_device_has_partitions(sd_device *dev) {
758 _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
759 int r;
760
761 assert(dev);
762
763 /* Checks if the specified device currently has partitions. */
764
765 r = partition_enumerator_new(dev, &e);
766 if (r < 0)
767 return r;
768
769 return !!sd_device_enumerator_get_device_first(e);
770 }
771
772 int blockdev_reread_partition_table(sd_device *dev) {
773 _cleanup_close_ int fd = -1;
774
775 assert(dev);
776
777 /* Try to re-read the partition table. This only succeeds if none of the devices is busy. */
778
779 fd = sd_device_open(dev, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
780 if (fd < 0)
781 return fd;
782
783 if (flock(fd, LOCK_EX|LOCK_NB) < 0)
784 return -errno;
785
786 if (ioctl(fd, BLKRRPART, 0) < 0)
787 return -errno;
788
789 return 0;
790 }