1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
3 #include <linux/blkpg.h>
11 #include "alloc-util.h"
12 #include "blockdev-util.h"
13 #include "btrfs-util.h"
14 #include "device-private.h"
15 #include "device-util.h"
16 #include "devnum-util.h"
17 #include "dirent-util.h"
18 #include "errno-util.h"
22 #include "missing_magic.h"
23 #include "parse-util.h"
25 static int fd_get_devnum(int fd
, BlockDeviceLookupFlag flags
, dev_t
*ret
) {
33 if (fstat(fd
, &st
) < 0)
36 if (S_ISBLK(st
.st_mode
))
38 else if (!FLAGS_SET(flags
, BLOCK_DEVICE_LOOKUP_BACKING
))
40 else if (!S_ISREG(st
.st_mode
) && !S_ISDIR(st
.st_mode
))
42 else if (major(st
.st_dev
) != 0)
45 /* If major(st.st_dev) is zero, this might mean we are backed by btrfs, which needs special
46 * handing, to get the backing device node. */
48 r
= btrfs_get_block_device_fd(fd
, &devnum
);
49 if (r
== -ENOTTY
) /* not btrfs */
59 int block_device_is_whole_disk(sd_device
*dev
) {
62 if (!device_in_subsystem(dev
, "block"))
65 return device_is_devtype(dev
, "disk");
68 int block_device_get_whole_disk(sd_device
*dev
, sd_device
**ret
) {
74 /* Do not unref returned sd_device object. */
76 r
= block_device_is_whole_disk(dev
);
80 r
= sd_device_get_parent(dev
, &dev
);
81 if (r
== -ENOENT
) /* Already removed? Let's return a recognizable error. */
86 r
= block_device_is_whole_disk(dev
);
97 int block_device_get_originating(sd_device
*dev
, sd_device
**ret
) {
98 _cleanup_(sd_device_unrefp
) sd_device
*first_found
= NULL
;
100 dev_t devnum
= 0; /* avoid false maybe-uninitialized warning */
102 /* For the specified block device tries to chase it through the layers, in case LUKS-style DM
103 * stacking is used, trying to find the next underlying layer. */
108 FOREACH_DEVICE_CHILD_WITH_SUFFIX(dev
, child
, suffix
) {
109 sd_device
*child_whole_disk
;
112 if (!path_startswith(suffix
, "slaves"))
115 if (block_device_get_whole_disk(child
, &child_whole_disk
) < 0)
118 if (sd_device_get_devnum(child_whole_disk
, &n
) < 0)
122 first_found
= sd_device_ref(child
);
127 /* We found a device backed by multiple other devices. We don't really support automatic
128 * discovery on such setups, with the exception of dm-verity partitions. In this case there
129 * are two backing devices: the data partition and the hash partition. We are fine with such
130 * setups, however, only if both partitions are on the same physical device. Hence, let's
131 * verify this by iterating over every node in the 'slaves/' directory and comparing them with
132 * the first that gets returned by readdir(), to ensure they all point to the same device. */
140 *ret
= TAKE_PTR(first_found
);
141 return 1; /* found */
144 int block_device_new_from_fd(int fd
, BlockDeviceLookupFlag flags
, sd_device
**ret
) {
145 _cleanup_(sd_device_unrefp
) sd_device
*dev
= NULL
;
152 r
= fd_get_devnum(fd
, flags
, &devnum
);
156 r
= sd_device_new_from_devnum(&dev
, 'b', devnum
);
160 if (FLAGS_SET(flags
, BLOCK_DEVICE_LOOKUP_ORIGINATING
)) {
161 _cleanup_(sd_device_unrefp
) sd_device
*dev_origin
= NULL
;
162 sd_device
*dev_whole_disk
;
164 r
= block_device_get_whole_disk(dev
, &dev_whole_disk
);
168 r
= block_device_get_originating(dev_whole_disk
, &dev_origin
);
169 if (r
< 0 && r
!= -ENOENT
)
172 device_unref_and_replace(dev
, dev_origin
);
175 if (FLAGS_SET(flags
, BLOCK_DEVICE_LOOKUP_WHOLE_DISK
)) {
176 sd_device
*dev_whole_disk
;
178 r
= block_device_get_whole_disk(dev
, &dev_whole_disk
);
182 *ret
= sd_device_ref(dev_whole_disk
);
186 *ret
= sd_device_ref(dev
);
190 int block_device_new_from_path(const char *path
, BlockDeviceLookupFlag flags
, sd_device
**ret
) {
191 _cleanup_close_
int fd
= -EBADF
;
196 fd
= open(path
, O_CLOEXEC
|O_PATH
);
200 return block_device_new_from_fd(fd
, flags
, ret
);
203 int block_get_whole_disk(dev_t d
, dev_t
*ret
) {
204 char p
[SYS_BLOCK_PATH_MAX("/partition")];
205 _cleanup_free_
char *s
= NULL
;
214 /* If it has a queue this is good enough for us */
215 xsprintf_sys_block_path(p
, "/queue", d
);
216 if (access(p
, F_OK
) >= 0) {
223 /* If it is a partition find the originating device */
224 xsprintf_sys_block_path(p
, "/partition", d
);
225 if (access(p
, F_OK
) < 0)
228 /* Get parent dev_t */
229 xsprintf_sys_block_path(p
, "/../dev", d
);
230 r
= read_one_line_file(p
, &s
);
234 r
= parse_devnum(s
, &devt
);
238 /* Only return this if it is really good enough for us. */
239 xsprintf_sys_block_path(p
, "/queue", devt
);
240 if (access(p
, F_OK
) < 0)
247 int get_block_device_fd(int fd
, dev_t
*ret
) {
254 /* Gets the block device directly backing a file system. If the block device is encrypted, returns
255 * the device mapper block device. */
260 if (major(st
.st_dev
) != 0) {
265 r
= btrfs_get_block_device_fd(fd
, ret
);
268 if (r
!= -ENOTTY
) /* not btrfs */
275 int get_block_device(const char *path
, dev_t
*ret
) {
276 _cleanup_close_
int fd
= -EBADF
;
281 fd
= open(path
, O_RDONLY
|O_NOFOLLOW
|O_CLOEXEC
);
285 return get_block_device_fd(fd
, ret
);
288 int block_get_originating(dev_t dt
, dev_t
*ret
) {
289 _cleanup_(sd_device_unrefp
) sd_device
*dev
= NULL
, *origin
= NULL
;
294 r
= sd_device_new_from_devnum(&dev
, 'b', dt
);
298 r
= block_device_get_originating(dev
, &origin
);
302 return sd_device_get_devnum(origin
, ret
);
305 int get_block_device_harder_fd(int fd
, dev_t
*ret
) {
311 /* Gets the backing block device for a file system, and handles LUKS encrypted file systems, looking for its
312 * immediate parent, if there is one. */
314 r
= get_block_device_fd(fd
, ret
);
318 r
= block_get_originating(*ret
, ret
);
320 log_debug_errno(r
, "Failed to chase block device, ignoring: %m");
325 int get_block_device_harder(const char *path
, dev_t
*ret
) {
326 _cleanup_close_
int fd
= -EBADF
;
331 fd
= open(path
, O_RDONLY
|O_NOFOLLOW
|O_CLOEXEC
);
335 return get_block_device_harder_fd(fd
, ret
);
338 int lock_whole_block_device(dev_t devt
, int operation
) {
339 _cleanup_close_
int lock_fd
= -EBADF
;
343 /* Let's get a BSD file lock on the whole block device, as per: https://systemd.io/BLOCK_DEVICE_LOCKING */
345 r
= block_get_whole_disk(devt
, &whole_devt
);
349 lock_fd
= r
= device_open_from_devnum(S_IFBLK
, whole_devt
, O_RDONLY
|O_CLOEXEC
|O_NONBLOCK
, NULL
);
353 if (flock(lock_fd
, operation
) < 0)
356 return TAKE_FD(lock_fd
);
359 int blockdev_partscan_enabled(sd_device
*dev
) {
363 /* Checks if partition scanning is correctly enabled on the block device.
365 * The 'GENHD_FL_NO_PART_SCAN' flag was introduced by
366 * https://github.com/torvalds/linux/commit/d27769ec3df1a8de9ca450d2dcd72d1ab259ba32 (v3.2).
367 * But at that time, the flag is also effectively implied when 'minors' element of 'struct gendisk'
368 * is 1, which can be check with 'ext_range' sysfs attribute. Explicit flag ('GENHD_FL_NO_PART_SCAN')
369 * can be obtained from 'capability' sysattr.
371 * With https://github.com/torvalds/linux/commit/46e7eac647b34ed4106a8262f8bedbb90801fadd (v5.17),
372 * the flag is renamed to GENHD_FL_NO_PART.
374 * With https://github.com/torvalds/linux/commit/1ebe2e5f9d68e94c524aba876f27b945669a7879 (v5.17),
375 * we can check the flag from 'ext_range' sysfs attribute directly.
377 * With https://github.com/torvalds/linux/commit/430cc5d3ab4d0ba0bd011cfbb0035e46ba92920c (v5.17),
378 * the value of GENHD_FL_NO_PART is changed from 0x0200 to 0x0004. 💣💣💣
379 * Note, the new value was used by the GENHD_FL_MEDIA_CHANGE_NOTIFY flag, which was introduced by
380 * 86ce18d7b7925bfd6b64c061828ca2a857ee83b8 (v2.6.22), and removed by
381 * 9243c6f3e012a92dd900d97ef45efaf8a8edc448 (v5.7). If we believe the commit message of
382 * e81cd5a983bb35dabd38ee472cf3fea1c63e0f23, the flag was never used. So, fortunately, we can use
383 * both the new and old values safely.
385 * With https://github.com/torvalds/linux/commit/b9684a71fca793213378dd410cd11675d973eaa1 (v5.19),
386 * another flag GD_SUPPRESS_PART_SCAN is introduced for loopback block device, and partition scanning
387 * is done only when both GENHD_FL_NO_PART and GD_SUPPRESS_PART_SCAN are not set. Before the commit,
388 * LO_FLAGS_PARTSCAN flag was directly tied with GENHD_FL_NO_PART. But with this change now it is
389 * tied with GD_SUPPRESS_PART_SCAN. So, LO_FLAGS_PARTSCAN cannot be obtained from 'ext_range'
390 * sysattr, which corresponds to GENHD_FL_NO_PART, and we need to read 'loop/partscan'. 💣💣💣
392 * With https://github.com/torvalds/linux/commit/73a166d9749230d598320fdae3b687cdc0e2e205 (v6.3),
393 * the GD_SUPPRESS_PART_SCAN flag is also introduced for userspace block device (ublk). Though, not
394 * sure if we should support the device...
396 * With https://github.com/torvalds/linux/commit/e81cd5a983bb35dabd38ee472cf3fea1c63e0f23 (v6.3),
397 * the 'capability' sysfs attribute is deprecated, hence we cannot check flags from it. 💣💣💣
399 * With https://github.com/torvalds/linux/commit/a4217c6740dc64a3eb6815868a9260825e8c68c6 (v6.10,
400 * backported to v6.6+), the partscan status is directly exposed as 'partscan' sysattr.
402 * To support both old and new kernels, we need to do the following:
403 * 1) check 'partscan' sysfs attribute where the information is made directly available,
404 * 2) check if the blockdev refers to a partition, where partscan is not supported,
405 * 3) check 'loop/partscan' sysfs attribute for loopback block devices, and if '0' we can conclude
406 * partition scanning is disabled,
407 * 4) check 'ext_range' sysfs attribute, and if '1' we can conclude partition scanning is disabled,
408 * 5) otherwise check 'capability' sysfs attribute for ancient version. */
412 /* For v6.10 or newer. */
413 r
= device_get_sysattr_bool(dev
, "partscan");
417 /* Partition block devices never have partition scanning on, there's no concept of sub-partitions for
419 if (device_is_devtype(dev
, "partition"))
422 /* For loopback block device, especially for v5.19 or newer. Even if this is enabled, we also need to
423 * check GENHD_FL_NO_PART flag through 'ext_range' and 'capability' sysfs attributes below. */
424 if (device_get_sysattr_bool(dev
, "loop/partscan") == 0)
427 r
= device_get_sysattr_int(dev
, "ext_range", &ext_range
);
428 if (r
== -ENOENT
) /* If the ext_range file doesn't exist then we are most likely looking at a
429 * partition block device, not the whole block device. And that means we have no
430 * partition scanning on for it (we do for its parent, but not for the partition
436 if (ext_range
<= 1) /* The value should be always positive, but the kernel uses '%d' for the
437 * attribute. Let's gracefully handle zero or negative. */
440 r
= device_get_sysattr_unsigned_full(dev
, "capability", 16, &capability
);
446 #define GENHD_FL_NO_PART_OLD 0x0200
447 #define GENHD_FL_NO_PART_NEW 0x0004
448 /* If one of the NO_PART flags is set, part scanning is definitely off. */
449 if ((capability
& (GENHD_FL_NO_PART_OLD
| GENHD_FL_NO_PART_NEW
)) != 0)
452 /* Otherwise, assume part scanning is on, we have no further checks available. Assume the best. */
456 int blockdev_partscan_enabled_fd(int fd
) {
457 _cleanup_(sd_device_unrefp
) sd_device
*dev
= NULL
;
462 r
= block_device_new_from_fd(fd
, 0, &dev
);
466 return blockdev_partscan_enabled(dev
);
469 static int blockdev_is_encrypted(const char *sysfs_path
, unsigned depth_left
) {
470 _cleanup_free_
char *p
= NULL
, *uuids
= NULL
;
471 _cleanup_closedir_
DIR *d
= NULL
;
472 int r
, found_encrypted
= false;
479 p
= path_join(sysfs_path
, "dm/uuid");
483 r
= read_one_line_file(p
, &uuids
);
488 /* The DM device's uuid attribute is prefixed with "CRYPT-" if this is a dm-crypt device. */
489 if (startswith(uuids
, "CRYPT-"))
493 /* Not a dm-crypt device itself. But maybe it is on top of one? Follow the links in the "slaves/"
497 p
= path_join(sysfs_path
, "slaves");
503 if (errno
== ENOENT
) /* Doesn't have underlying devices */
510 _cleanup_free_
char *q
= NULL
;
514 de
= readdir_no_dot(d
);
519 break; /* No more underlying devices */
522 q
= path_join(p
, de
->d_name
);
526 r
= blockdev_is_encrypted(q
, depth_left
- 1);
529 if (r
== 0) /* we found one that is not encrypted? then propagate that immediately */
532 found_encrypted
= true;
535 return found_encrypted
;
538 int fd_is_encrypted(int fd
) {
539 char p
[SYS_BLOCK_PATH_MAX(NULL
)];
543 r
= get_block_device_fd(fd
, &devt
);
546 if (r
== 0) /* doesn't have a block device */
549 xsprintf_sys_block_path(p
, NULL
, devt
);
551 return blockdev_is_encrypted(p
, 10 /* safety net: maximum recursion depth */);
554 int path_is_encrypted(const char *path
) {
555 char p
[SYS_BLOCK_PATH_MAX(NULL
)];
559 r
= get_block_device(path
, &devt
);
562 if (r
== 0) /* doesn't have a block device */
565 xsprintf_sys_block_path(p
, NULL
, devt
);
567 return blockdev_is_encrypted(p
, 10 /* safety net: maximum recursion depth */);
570 int fd_get_whole_disk(int fd
, bool backing
, dev_t
*ret
) {
577 r
= fd_get_devnum(fd
, backing
? BLOCK_DEVICE_LOOKUP_BACKING
: 0, &devt
);
581 return block_get_whole_disk(devt
, ret
);
584 int path_get_whole_disk(const char *path
, bool backing
, dev_t
*ret
) {
585 _cleanup_close_
int fd
= -EBADF
;
587 fd
= open(path
, O_CLOEXEC
|O_PATH
);
591 return fd_get_whole_disk(fd
, backing
, ret
);
594 int block_device_add_partition(
605 struct blkpg_partition bp
= {
611 struct blkpg_ioctl_arg ba
= {
612 .op
= BLKPG_ADD_PARTITION
,
614 .datalen
= sizeof(bp
),
617 if (strlen(name
) >= sizeof(bp
.devname
))
620 strcpy(bp
.devname
, name
);
622 return RET_NERRNO(ioctl(fd
, BLKPG
, &ba
));
625 int block_device_remove_partition(
634 struct blkpg_partition bp
= {
638 struct blkpg_ioctl_arg ba
= {
639 .op
= BLKPG_DEL_PARTITION
,
641 .datalen
= sizeof(bp
),
644 if (strlen(name
) >= sizeof(bp
.devname
))
647 strcpy(bp
.devname
, name
);
649 return RET_NERRNO(ioctl(fd
, BLKPG
, &ba
));
652 int block_device_resize_partition(
661 struct blkpg_partition bp
= {
667 struct blkpg_ioctl_arg ba
= {
668 .op
= BLKPG_RESIZE_PARTITION
,
670 .datalen
= sizeof(bp
),
673 return RET_NERRNO(ioctl(fd
, BLKPG
, &ba
));
676 int partition_enumerator_new(sd_device
*dev
, sd_device_enumerator
**ret
) {
677 _cleanup_(sd_device_enumerator_unrefp
) sd_device_enumerator
*e
= NULL
;
684 /* Refuse invocation on partition block device, insist on "whole" device */
685 r
= block_device_is_whole_disk(dev
);
689 return -ENXIO
; /* return a recognizable error */
691 r
= sd_device_enumerator_new(&e
);
695 r
= sd_device_enumerator_allow_uninitialized(e
);
699 r
= sd_device_enumerator_add_match_parent(e
, dev
);
703 r
= sd_device_get_sysname(dev
, &s
);
707 /* Also add sysname check for safety. Hopefully, this also improves performance. */
708 s
= strjoina(s
, "*");
709 r
= sd_device_enumerator_add_match_sysname(e
, s
);
713 r
= sd_device_enumerator_add_match_subsystem(e
, "block", /* match = */ true);
717 r
= sd_device_enumerator_add_match_property(e
, "DEVTYPE", "partition");
725 int block_device_remove_all_partitions(sd_device
*dev
, int fd
) {
726 _cleanup_(sd_device_enumerator_unrefp
) sd_device_enumerator
*e
= NULL
;
727 _cleanup_(sd_device_unrefp
) sd_device
*dev_unref
= NULL
;
728 _cleanup_close_
int fd_close
= -EBADF
;
729 bool has_partitions
= false;
732 assert(dev
|| fd
>= 0);
735 r
= block_device_new_from_fd(fd
, 0, &dev_unref
);
742 r
= partition_enumerator_new(dev
, &e
);
747 fd_close
= sd_device_open(dev
, O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
|O_RDONLY
);
754 FOREACH_DEVICE(e
, part
) {
755 const char *v
, *devname
;
758 has_partitions
= true;
760 r
= sd_device_get_devname(part
, &devname
);
764 r
= sd_device_get_property_value(part
, "PARTN", &v
);
768 r
= safe_atoi(v
, &nr
);
772 r
= btrfs_forget_device(devname
);
773 if (r
< 0 && r
!= -ENOENT
)
774 log_debug_errno(r
, "Failed to forget btrfs device %s, ignoring: %m", devname
);
776 r
= block_device_remove_partition(fd
, devname
, nr
);
778 log_debug("Kernel removed partition %s before us, ignoring", devname
);
782 log_debug_errno(r
, "Failed to remove partition %s: %m", devname
);
787 log_debug("Removed partition %s", devname
);
790 return k
< 0 ? k
: has_partitions
;
793 int block_device_has_partitions(sd_device
*dev
) {
794 _cleanup_(sd_device_enumerator_unrefp
) sd_device_enumerator
*e
= NULL
;
799 /* Checks if the specified device currently has partitions. */
801 r
= partition_enumerator_new(dev
, &e
);
805 return !!sd_device_enumerator_get_device_first(e
);
808 int blockdev_reread_partition_table(sd_device
*dev
) {
809 _cleanup_close_
int fd
= -EBADF
;
813 /* Try to re-read the partition table. This only succeeds if none of the devices is busy. */
815 fd
= sd_device_open(dev
, O_RDONLY
|O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
);
819 if (flock(fd
, LOCK_EX
|LOCK_NB
) < 0)
822 if (ioctl(fd
, BLKRRPART
, 0) < 0)
828 int blockdev_get_sector_size(int fd
, uint32_t *ret
) {
834 if (ioctl(fd
, BLKSSZGET
, &ssz
) < 0)
836 if (ssz
<= 0) /* make sure the field is initialized */
837 return log_debug_errno(SYNTHETIC_ERRNO(EIO
), "Block device reported invalid sector size %i.", ssz
);
843 int blockdev_get_device_size(int fd
, uint64_t *ret
) {
849 /* This is just a type-safe wrapper around BLKGETSIZE64 that gets us around having to include messy linux/fs.h in various clients */
851 if (ioctl(fd
, BLKGETSIZE64
, &sz
) < 0)
858 int blockdev_get_root(int level
, dev_t
*ret
) {
859 _cleanup_free_
char *p
= NULL
;
863 /* Returns the device node backing the root file system. Traces through
864 * dm-crypt/dm-verity/... Returns > 0 and the devno of the device on success. If there's no block
865 * device (or multiple) returns 0 and a devno of 0. Failure otherwise.
867 * If the root mount has been replaced by some form of volatile file system (overlayfs), the original
868 * root block device node is symlinked in /run/systemd/volatile-root. Let's read that here. */
869 r
= readlink_malloc("/run/systemd/volatile-root", &p
);
870 if (r
== -ENOENT
) { /* volatile-root not found */
871 r
= get_block_device_harder("/", &devno
);
873 return btrfs_log_dev_root(level
, r
, "root file system");
875 return log_full_errno(level
, r
, "Failed to determine block device of root file system: %m");
876 if (r
== 0) { /* Not backed by a single block device. (Could be NFS or so, or could be multi-device RAID or so) */
877 r
= get_block_device_harder("/usr", &devno
);
879 return btrfs_log_dev_root(level
, r
, "/usr");
881 return log_full_errno(level
, r
, "Failed to determine block device of /usr/ file system: %m");
882 if (r
== 0) { /* /usr/ not backed by single block device, either. */
883 log_debug("Neither root nor /usr/ file system are on a (single) block device.");
892 return log_full_errno(level
, r
, "Failed to read symlink /run/systemd/volatile-root: %m");
895 r
= device_path_parse_major_minor(p
, &m
, &devno
);
897 return log_full_errno(level
, r
, "Failed to parse major/minor device node: %m");
899 return log_full_errno(level
, SYNTHETIC_ERRNO(ENOTBLK
), "Volatile root device is of wrong type.");