]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/blockdev-util.c
man/systemd-sysext: list ephemeral/ephemeral-import in the list of options
[thirdparty/systemd.git] / src / shared / blockdev-util.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <linux/blkpg.h>
4 #include <linux/fs.h>
5 #include <sys/file.h>
6 #include <sys/ioctl.h>
7 #include <sys/stat.h>
8 #include <unistd.h>
9
10 #include "sd-device.h"
11
12 #include "alloc-util.h"
13 #include "blockdev-util.h"
14 #include "btrfs-util.h"
15 #include "device-private.h"
16 #include "device-util.h"
17 #include "devnum-util.h"
18 #include "dirent-util.h"
19 #include "errno-util.h"
20 #include "fd-util.h"
21 #include "fileio.h"
22 #include "fs-util.h"
23 #include "parse-util.h"
24 #include "path-util.h"
25 #include "string-util.h"
26
27 static int fd_get_devnum(int fd, BlockDeviceLookupFlag flags, dev_t *ret) {
28 struct stat st;
29 dev_t devnum;
30 int r;
31
32 assert(fd >= 0);
33 assert(ret);
34
35 if (fstat(fd, &st) < 0)
36 return -errno;
37
38 if (S_ISBLK(st.st_mode))
39 devnum = st.st_rdev;
40 else if (!FLAGS_SET(flags, BLOCK_DEVICE_LOOKUP_BACKING))
41 return -ENOTBLK;
42 else if (!S_ISREG(st.st_mode) && !S_ISDIR(st.st_mode))
43 return -ENOTBLK;
44 else if (major(st.st_dev) != 0)
45 devnum = st.st_dev;
46 else {
47 /* If major(st.st_dev) is zero, this might mean we are backed by btrfs, which needs special
48 * handing, to get the backing device node. */
49
50 r = btrfs_get_block_device_fd(fd, &devnum);
51 if (r == -ENOTTY) /* not btrfs */
52 return -ENOTBLK;
53 if (r < 0)
54 return r;
55 }
56
57 *ret = devnum;
58 return 0;
59 }
60
61 int block_device_is_whole_disk(sd_device *dev) {
62 int r;
63
64 assert(dev);
65
66 r = device_in_subsystem(dev, "block");
67 if (r < 0)
68 return r;
69 if (r == 0)
70 return -ENOTBLK;
71
72 return device_is_devtype(dev, "disk");
73 }
74
75 int block_device_get_whole_disk(sd_device *dev, sd_device **ret) {
76 int r;
77
78 assert(dev);
79 assert(ret);
80
81 /* Do not unref returned sd_device object. */
82
83 r = block_device_is_whole_disk(dev);
84 if (r < 0)
85 return r;
86 if (r == 0) {
87 r = sd_device_get_parent(dev, &dev);
88 if (r == -ENOENT) /* Already removed? Let's return a recognizable error. */
89 return -ENODEV;
90 if (r < 0)
91 return r;
92
93 r = block_device_is_whole_disk(dev);
94 if (r < 0)
95 return r;
96 if (r == 0)
97 return -ENXIO;
98 }
99
100 *ret = dev;
101 return 0;
102 }
103
104 int block_device_get_originating(sd_device *dev, sd_device **ret) {
105 _cleanup_(sd_device_unrefp) sd_device *first_found = NULL;
106 const char *suffix;
107 dev_t devnum = 0; /* avoid false maybe-uninitialized warning */
108
109 /* For the specified block device tries to chase it through the layers, in case LUKS-style DM
110 * stacking is used, trying to find the next underlying layer. */
111
112 assert(dev);
113 assert(ret);
114
115 FOREACH_DEVICE_CHILD_WITH_SUFFIX(dev, child, suffix) {
116 sd_device *child_whole_disk;
117 dev_t n;
118
119 if (!path_startswith(suffix, "slaves"))
120 continue;
121
122 if (block_device_get_whole_disk(child, &child_whole_disk) < 0)
123 continue;
124
125 if (sd_device_get_devnum(child_whole_disk, &n) < 0)
126 continue;
127
128 if (!first_found) {
129 first_found = sd_device_ref(child);
130 devnum = n;
131 continue;
132 }
133
134 /* We found a device backed by multiple other devices. We don't really support automatic
135 * discovery on such setups, with the exception of dm-verity partitions. In this case there
136 * are two backing devices: the data partition and the hash partition. We are fine with such
137 * setups, however, only if both partitions are on the same physical device. Hence, let's
138 * verify this by iterating over every node in the 'slaves/' directory and comparing them with
139 * the first that gets returned by readdir(), to ensure they all point to the same device. */
140 if (n != devnum)
141 return -ENOTUNIQ;
142 }
143
144 if (!first_found)
145 return -ENOENT;
146
147 *ret = TAKE_PTR(first_found);
148 return 0;
149 }
150
151 int block_device_new_from_fd(int fd, BlockDeviceLookupFlag flags, sd_device **ret) {
152 _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
153 dev_t devnum;
154 int r;
155
156 assert(fd >= 0);
157 assert(ret);
158
159 r = fd_get_devnum(fd, flags, &devnum);
160 if (r < 0)
161 return r;
162
163 r = sd_device_new_from_devnum(&dev, 'b', devnum);
164 if (r < 0)
165 return r;
166
167 if (FLAGS_SET(flags, BLOCK_DEVICE_LOOKUP_ORIGINATING)) {
168 _cleanup_(sd_device_unrefp) sd_device *dev_origin = NULL;
169 sd_device *dev_whole_disk;
170
171 r = block_device_get_whole_disk(dev, &dev_whole_disk);
172 if (r < 0)
173 return r;
174
175 r = block_device_get_originating(dev_whole_disk, &dev_origin);
176 if (r >= 0)
177 device_unref_and_replace(dev, dev_origin);
178 else if (r != -ENOENT)
179 return r;
180 }
181
182 if (FLAGS_SET(flags, BLOCK_DEVICE_LOOKUP_WHOLE_DISK)) {
183 sd_device *dev_whole_disk;
184
185 r = block_device_get_whole_disk(dev, &dev_whole_disk);
186 if (r < 0)
187 return r;
188
189 *ret = sd_device_ref(dev_whole_disk);
190 return 0;
191 }
192
193 *ret = sd_device_ref(dev);
194 return 0;
195 }
196
197 int block_device_new_from_path(const char *path, BlockDeviceLookupFlag flags, sd_device **ret) {
198 _cleanup_close_ int fd = -EBADF;
199
200 assert(path);
201 assert(ret);
202
203 fd = open(path, O_CLOEXEC|O_PATH);
204 if (fd < 0)
205 return -errno;
206
207 return block_device_new_from_fd(fd, flags, ret);
208 }
209
210 int block_get_whole_disk(dev_t d, dev_t *ret) {
211 char p[SYS_BLOCK_PATH_MAX("/partition")];
212 _cleanup_free_ char *s = NULL;
213 dev_t devt;
214 int r;
215
216 assert(ret);
217
218 if (major(d) == 0)
219 return -ENODEV;
220
221 /* If it has a queue this is good enough for us */
222 xsprintf_sys_block_path(p, "/queue", d);
223 if (access(p, F_OK) >= 0) {
224 *ret = d;
225 return 0;
226 }
227 if (errno != ENOENT)
228 return -errno;
229
230 /* If it is a partition find the originating device */
231 xsprintf_sys_block_path(p, "/partition", d);
232 if (access(p, F_OK) < 0)
233 return -errno;
234
235 /* Get parent dev_t */
236 xsprintf_sys_block_path(p, "/../dev", d);
237 r = read_one_line_file(p, &s);
238 if (r < 0)
239 return r;
240
241 r = parse_devnum(s, &devt);
242 if (r < 0)
243 return r;
244
245 /* Only return this if it is really good enough for us. */
246 xsprintf_sys_block_path(p, "/queue", devt);
247 if (access(p, F_OK) < 0)
248 return -errno;
249
250 *ret = devt;
251 return 1;
252 }
253
254 int get_block_device_fd(int fd, dev_t *ret) {
255 struct stat st;
256 int r;
257
258 assert(fd >= 0);
259 assert(ret);
260
261 /* Gets the block device directly backing a file system. If the block device is encrypted, returns
262 * the device mapper block device. */
263
264 if (fstat(fd, &st))
265 return -errno;
266
267 if (major(st.st_dev) != 0) {
268 *ret = st.st_dev;
269 return 1;
270 }
271
272 r = btrfs_get_block_device_fd(fd, ret);
273 if (r != -ENOTTY) /* ENOTTY: not btrfs */
274 return r;
275
276 *ret = 0;
277 return 0;
278 }
279
280 int get_block_device(const char *path, dev_t *ret) {
281 _cleanup_close_ int fd = -EBADF;
282
283 assert(path);
284 assert(ret);
285
286 fd = open(path, O_RDONLY|O_NOFOLLOW|O_CLOEXEC);
287 if (fd < 0)
288 return -errno;
289
290 return get_block_device_fd(fd, ret);
291 }
292
293 int block_get_originating(dev_t dt, dev_t *ret) {
294 _cleanup_(sd_device_unrefp) sd_device *dev = NULL, *origin = NULL;
295 int r;
296
297 assert(ret);
298
299 r = sd_device_new_from_devnum(&dev, 'b', dt);
300 if (r < 0)
301 return r;
302
303 r = block_device_get_originating(dev, &origin);
304 if (r < 0)
305 return r;
306
307 return sd_device_get_devnum(origin, ret);
308 }
309
310 int get_block_device_harder_fd(int fd, dev_t *ret) {
311 int r;
312
313 assert(fd >= 0);
314 assert(ret);
315
316 /* Gets the backing block device for a file system, and handles LUKS encrypted file systems, looking for its
317 * immediate parent, if there is one. */
318
319 r = get_block_device_fd(fd, ret);
320 if (r <= 0)
321 return r;
322
323 r = block_get_originating(*ret, ret);
324 if (r < 0)
325 log_debug_errno(r, "Failed to chase block device, ignoring: %m");
326
327 return 1;
328 }
329
330 int get_block_device_harder(const char *path, dev_t *ret) {
331 _cleanup_close_ int fd = -EBADF;
332
333 assert(path);
334 assert(ret);
335
336 fd = open(path, O_RDONLY|O_NOFOLLOW|O_CLOEXEC);
337 if (fd < 0)
338 return -errno;
339
340 return get_block_device_harder_fd(fd, ret);
341 }
342
343 int lock_whole_block_device(dev_t devt, int operation) {
344 _cleanup_close_ int lock_fd = -EBADF;
345 dev_t whole_devt;
346 int r;
347
348 /* Let's get a BSD file lock on the whole block device, as per: https://systemd.io/BLOCK_DEVICE_LOCKING */
349
350 r = block_get_whole_disk(devt, &whole_devt);
351 if (r < 0)
352 return r;
353
354 lock_fd = r = device_open_from_devnum(S_IFBLK, whole_devt, O_RDONLY|O_CLOEXEC|O_NONBLOCK, NULL);
355 if (r < 0)
356 return r;
357
358 if (flock(lock_fd, operation) < 0)
359 return -errno;
360
361 return TAKE_FD(lock_fd);
362 }
363
364 int blockdev_partscan_enabled(sd_device *dev) {
365 unsigned capability;
366 int r, ext_range;
367
368 /* Checks if partition scanning is correctly enabled on the block device.
369 *
370 * The 'GENHD_FL_NO_PART_SCAN' flag was introduced by
371 * https://github.com/torvalds/linux/commit/d27769ec3df1a8de9ca450d2dcd72d1ab259ba32 (v3.2).
372 * But at that time, the flag is also effectively implied when 'minors' element of 'struct gendisk'
373 * is 1, which can be check with 'ext_range' sysfs attribute. Explicit flag ('GENHD_FL_NO_PART_SCAN')
374 * can be obtained from 'capability' sysattr.
375 *
376 * With https://github.com/torvalds/linux/commit/46e7eac647b34ed4106a8262f8bedbb90801fadd (v5.17),
377 * the flag is renamed to GENHD_FL_NO_PART.
378 *
379 * With https://github.com/torvalds/linux/commit/1ebe2e5f9d68e94c524aba876f27b945669a7879 (v5.17),
380 * we can check the flag from 'ext_range' sysfs attribute directly.
381 *
382 * With https://github.com/torvalds/linux/commit/430cc5d3ab4d0ba0bd011cfbb0035e46ba92920c (v5.17),
383 * the value of GENHD_FL_NO_PART is changed from 0x0200 to 0x0004. 💣💣💣
384 * Note, the new value was used by the GENHD_FL_MEDIA_CHANGE_NOTIFY flag, which was introduced by
385 * 86ce18d7b7925bfd6b64c061828ca2a857ee83b8 (v2.6.22), and removed by
386 * 9243c6f3e012a92dd900d97ef45efaf8a8edc448 (v5.7). If we believe the commit message of
387 * e81cd5a983bb35dabd38ee472cf3fea1c63e0f23, the flag was never used. So, fortunately, we can use
388 * both the new and old values safely.
389 *
390 * With https://github.com/torvalds/linux/commit/b9684a71fca793213378dd410cd11675d973eaa1 (v5.19),
391 * another flag GD_SUPPRESS_PART_SCAN is introduced for loopback block device, and partition scanning
392 * is done only when both GENHD_FL_NO_PART and GD_SUPPRESS_PART_SCAN are not set. Before the commit,
393 * LO_FLAGS_PARTSCAN flag was directly tied with GENHD_FL_NO_PART. But with this change now it is
394 * tied with GD_SUPPRESS_PART_SCAN. So, LO_FLAGS_PARTSCAN cannot be obtained from 'ext_range'
395 * sysattr, which corresponds to GENHD_FL_NO_PART, and we need to read 'loop/partscan'. 💣💣💣
396 *
397 * With https://github.com/torvalds/linux/commit/73a166d9749230d598320fdae3b687cdc0e2e205 (v6.3),
398 * the GD_SUPPRESS_PART_SCAN flag is also introduced for userspace block device (ublk). Though, not
399 * sure if we should support the device...
400 *
401 * With https://github.com/torvalds/linux/commit/e81cd5a983bb35dabd38ee472cf3fea1c63e0f23 (v6.3),
402 * the 'capability' sysfs attribute is deprecated, hence we cannot check flags from it. 💣💣💣
403 *
404 * With https://github.com/torvalds/linux/commit/a4217c6740dc64a3eb6815868a9260825e8c68c6 (v6.10,
405 * backported to v6.6+), the partscan status is directly exposed as 'partscan' sysattr.
406 *
407 * To support both old and new kernels, we need to do the following:
408 * 1) check 'partscan' sysfs attribute where the information is made directly available,
409 * 2) check if the blockdev refers to a partition, where partscan is not supported,
410 * 3) check 'loop/partscan' sysfs attribute for loopback block devices, and if '0' we can conclude
411 * partition scanning is disabled,
412 * 4) check 'ext_range' sysfs attribute, and if '1' we can conclude partition scanning is disabled,
413 * 5) otherwise check 'capability' sysfs attribute for ancient version. */
414
415 assert(dev);
416
417 /* For v6.10 or newer. */
418 r = device_get_sysattr_bool(dev, "partscan");
419 if (r != -ENOENT)
420 return r;
421
422 /* Partition block devices never have partition scanning on, there's no concept of sub-partitions for
423 * partitions. */
424 r = device_is_devtype(dev, "partition");
425 if (r < 0)
426 return r;
427 if (r > 0)
428 return false;
429
430 /* For loopback block device, especially for v5.19 or newer. Even if this is enabled, we also need to
431 * check GENHD_FL_NO_PART flag through 'ext_range' and 'capability' sysfs attributes below. */
432 if (device_get_sysattr_bool(dev, "loop/partscan") == 0)
433 return false;
434
435 r = device_get_sysattr_int(dev, "ext_range", &ext_range);
436 if (r == -ENOENT) /* If the ext_range file doesn't exist then we are most likely looking at a
437 * partition block device, not the whole block device. And that means we have no
438 * partition scanning on for it (we do for its parent, but not for the partition
439 * itself). */
440 return false;
441 if (r < 0)
442 return r;
443
444 if (ext_range <= 1) /* The value should be always positive, but the kernel uses '%d' for the
445 * attribute. Let's gracefully handle zero or negative. */
446 return false;
447
448 r = device_get_sysattr_unsigned_full(dev, "capability", 16, &capability);
449 if (r == -ENOENT)
450 return false;
451 if (r < 0)
452 return r;
453
454 #define GENHD_FL_NO_PART_OLD 0x0200
455 #define GENHD_FL_NO_PART_NEW 0x0004
456 /* If one of the NO_PART flags is set, part scanning is definitely off. */
457 if ((capability & (GENHD_FL_NO_PART_OLD | GENHD_FL_NO_PART_NEW)) != 0)
458 return false;
459
460 /* Otherwise, assume part scanning is on, we have no further checks available. Assume the best. */
461 return true;
462 }
463
464 int blockdev_partscan_enabled_fd(int fd) {
465 _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
466 int r;
467
468 assert(fd >= 0);
469
470 r = block_device_new_from_fd(fd, 0, &dev);
471 if (r < 0)
472 return r;
473
474 return blockdev_partscan_enabled(dev);
475 }
476
477 static int blockdev_is_encrypted(const char *sysfs_path, unsigned depth_left) {
478 _cleanup_free_ char *p = NULL, *uuids = NULL;
479 _cleanup_closedir_ DIR *d = NULL;
480 int r, found_encrypted = false;
481
482 assert(sysfs_path);
483
484 if (depth_left == 0)
485 return -EINVAL;
486
487 p = path_join(sysfs_path, "dm/uuid");
488 if (!p)
489 return -ENOMEM;
490
491 r = read_one_line_file(p, &uuids);
492 if (r != -ENOENT) {
493 if (r < 0)
494 return r;
495
496 /* The DM device's uuid attribute is prefixed with "CRYPT-" if this is a dm-crypt device. */
497 if (startswith(uuids, "CRYPT-"))
498 return true;
499 }
500
501 /* Not a dm-crypt device itself. But maybe it is on top of one? Follow the links in the "slaves/"
502 * subdir. */
503
504 p = mfree(p);
505 p = path_join(sysfs_path, "slaves");
506 if (!p)
507 return -ENOMEM;
508
509 d = opendir(p);
510 if (!d) {
511 if (errno == ENOENT) /* Doesn't have underlying devices */
512 return false;
513
514 return -errno;
515 }
516
517 for (;;) {
518 _cleanup_free_ char *q = NULL;
519 struct dirent *de;
520
521 errno = 0;
522 de = readdir_no_dot(d);
523 if (!de) {
524 if (errno != 0)
525 return -errno;
526
527 break; /* No more underlying devices */
528 }
529
530 q = path_join(p, de->d_name);
531 if (!q)
532 return -ENOMEM;
533
534 r = blockdev_is_encrypted(q, depth_left - 1);
535 if (r < 0)
536 return r;
537 if (r == 0) /* we found one that is not encrypted? then propagate that immediately */
538 return false;
539
540 found_encrypted = true;
541 }
542
543 return found_encrypted;
544 }
545
546 int fd_is_encrypted(int fd) {
547 char p[SYS_BLOCK_PATH_MAX("")];
548 dev_t devt;
549 int r;
550
551 r = get_block_device_fd(fd, &devt);
552 if (r < 0)
553 return r;
554 if (r == 0) /* doesn't have a block device */
555 return false;
556
557 xsprintf_sys_block_path(p, NULL, devt);
558
559 return blockdev_is_encrypted(p, 10 /* safety net: maximum recursion depth */);
560 }
561
562 int path_is_encrypted(const char *path) {
563 char p[SYS_BLOCK_PATH_MAX("")];
564 dev_t devt;
565 int r;
566
567 r = get_block_device(path, &devt);
568 if (r < 0)
569 return r;
570 if (r == 0) /* doesn't have a block device */
571 return false;
572
573 xsprintf_sys_block_path(p, NULL, devt);
574
575 return blockdev_is_encrypted(p, 10 /* safety net: maximum recursion depth */);
576 }
577
578 int fd_get_whole_disk(int fd, bool backing, dev_t *ret) {
579 dev_t devt;
580 int r;
581
582 assert(fd >= 0);
583 assert(ret);
584
585 r = fd_get_devnum(fd, backing ? BLOCK_DEVICE_LOOKUP_BACKING : 0, &devt);
586 if (r < 0)
587 return r;
588
589 return block_get_whole_disk(devt, ret);
590 }
591
592 int path_get_whole_disk(const char *path, bool backing, dev_t *ret) {
593 _cleanup_close_ int fd = -EBADF;
594
595 fd = open(path, O_CLOEXEC|O_PATH);
596 if (fd < 0)
597 return -errno;
598
599 return fd_get_whole_disk(fd, backing, ret);
600 }
601
602 int block_device_add_partition(
603 int fd,
604 const char *name,
605 int nr,
606 uint64_t start,
607 uint64_t size) {
608
609 assert(fd >= 0);
610 assert(name);
611 assert(nr > 0);
612
613 struct blkpg_partition bp = {
614 .pno = nr,
615 .start = start,
616 .length = size,
617 };
618
619 struct blkpg_ioctl_arg ba = {
620 .op = BLKPG_ADD_PARTITION,
621 .data = &bp,
622 .datalen = sizeof(bp),
623 };
624
625 if (strlen(name) >= sizeof(bp.devname))
626 return -EINVAL;
627
628 strcpy(bp.devname, name);
629
630 return RET_NERRNO(ioctl(fd, BLKPG, &ba));
631 }
632
633 int block_device_remove_partition(
634 int fd,
635 const char *name,
636 int nr) {
637
638 assert(fd >= 0);
639 assert(name);
640 assert(nr > 0);
641
642 struct blkpg_partition bp = {
643 .pno = nr,
644 };
645
646 struct blkpg_ioctl_arg ba = {
647 .op = BLKPG_DEL_PARTITION,
648 .data = &bp,
649 .datalen = sizeof(bp),
650 };
651
652 if (strlen(name) >= sizeof(bp.devname))
653 return -EINVAL;
654
655 strcpy(bp.devname, name);
656
657 return RET_NERRNO(ioctl(fd, BLKPG, &ba));
658 }
659
660 int block_device_resize_partition(
661 int fd,
662 int nr,
663 uint64_t start,
664 uint64_t size) {
665
666 assert(fd >= 0);
667 assert(nr > 0);
668
669 struct blkpg_partition bp = {
670 .pno = nr,
671 .start = start,
672 .length = size,
673 };
674
675 struct blkpg_ioctl_arg ba = {
676 .op = BLKPG_RESIZE_PARTITION,
677 .data = &bp,
678 .datalen = sizeof(bp),
679 };
680
681 return RET_NERRNO(ioctl(fd, BLKPG, &ba));
682 }
683
684 int partition_enumerator_new(sd_device *dev, sd_device_enumerator **ret) {
685 _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
686 const char *s;
687 int r;
688
689 assert(dev);
690 assert(ret);
691
692 /* Refuse invocation on partition block device, insist on "whole" device */
693 r = block_device_is_whole_disk(dev);
694 if (r < 0)
695 return r;
696 if (r == 0)
697 return -ENXIO; /* return a recognizable error */
698
699 r = sd_device_enumerator_new(&e);
700 if (r < 0)
701 return r;
702
703 r = sd_device_enumerator_allow_uninitialized(e);
704 if (r < 0)
705 return r;
706
707 r = sd_device_enumerator_add_match_parent(e, dev);
708 if (r < 0)
709 return r;
710
711 r = sd_device_get_sysname(dev, &s);
712 if (r < 0)
713 return r;
714
715 /* Also add sysname check for safety. Hopefully, this also improves performance. */
716 s = strjoina(s, "*");
717 r = sd_device_enumerator_add_match_sysname(e, s);
718 if (r < 0)
719 return r;
720
721 r = sd_device_enumerator_add_match_subsystem(e, "block", /* match = */ true);
722 if (r < 0)
723 return r;
724
725 r = sd_device_enumerator_add_match_property(e, "DEVTYPE", "partition");
726 if (r < 0)
727 return r;
728
729 *ret = TAKE_PTR(e);
730 return 0;
731 }
732
733 int block_device_remove_all_partitions(sd_device *dev, int fd) {
734 _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
735 _cleanup_(sd_device_unrefp) sd_device *dev_unref = NULL;
736 _cleanup_close_ int fd_close = -EBADF;
737 bool has_partitions = false;
738 int r, k = 0;
739
740 assert(dev || fd >= 0);
741
742 if (!dev) {
743 r = block_device_new_from_fd(fd, 0, &dev_unref);
744 if (r < 0)
745 return r;
746
747 dev = dev_unref;
748 }
749
750 r = partition_enumerator_new(dev, &e);
751 if (r < 0)
752 return r;
753
754 if (fd < 0) {
755 fd_close = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|O_RDONLY);
756 if (fd_close < 0)
757 return fd_close;
758
759 fd = fd_close;
760 }
761
762 FOREACH_DEVICE(e, part) {
763 const char *v, *devname;
764 int nr;
765
766 has_partitions = true;
767
768 r = sd_device_get_devname(part, &devname);
769 if (r < 0)
770 return r;
771
772 r = sd_device_get_property_value(part, "PARTN", &v);
773 if (r < 0)
774 return r;
775
776 r = safe_atoi(v, &nr);
777 if (r < 0)
778 return r;
779
780 r = btrfs_forget_device(devname);
781 if (r < 0 && r != -ENOENT)
782 log_debug_errno(r, "Failed to forget btrfs device %s, ignoring: %m", devname);
783
784 r = block_device_remove_partition(fd, devname, nr);
785 if (r == -ENODEV) {
786 log_debug("Kernel removed partition %s before us, ignoring", devname);
787 continue;
788 }
789 if (r < 0) {
790 log_debug_errno(r, "Failed to remove partition %s: %m", devname);
791 k = k < 0 ? k : r;
792 continue;
793 }
794
795 log_debug("Removed partition %s", devname);
796 }
797
798 return k < 0 ? k : has_partitions;
799 }
800
801
802 int blockdev_reread_partition_table(sd_device *dev) {
803 _cleanup_close_ int fd = -EBADF;
804
805 assert(dev);
806
807 /* Try to re-read the partition table. This only succeeds if none of the devices is busy. */
808
809 fd = sd_device_open(dev, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
810 if (fd < 0)
811 return fd;
812
813 if (flock(fd, LOCK_EX|LOCK_NB) < 0)
814 return -errno;
815
816 if (ioctl(fd, BLKRRPART, 0) < 0)
817 return -errno;
818
819 return 0;
820 }
821
822 int blockdev_get_sector_size(int fd, uint32_t *ret) {
823 int ssz = 0;
824
825 assert(fd >= 0);
826 assert(ret);
827
828 if (ioctl(fd, BLKSSZGET, &ssz) < 0)
829 return -errno;
830 if (ssz <= 0) /* make sure the field is initialized */
831 return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Block device reported invalid sector size %i.", ssz);
832
833 *ret = ssz;
834 return 0;
835 }
836
837 int blockdev_get_device_size(int fd, uint64_t *ret) {
838 uint64_t sz = 0;
839
840 assert(fd >= 0);
841 assert(ret);
842
843 /* This is just a type-safe wrapper around BLKGETSIZE64 that gets us around having to include messy linux/fs.h in various clients */
844
845 if (ioctl(fd, BLKGETSIZE64, &sz) < 0)
846 return -errno;
847
848 *ret = sz;
849 return 0;
850 }
851
852 int blockdev_get_root(int level, dev_t *ret) {
853 _cleanup_free_ char *p = NULL;
854 dev_t devno;
855 int r;
856
857 /* Returns the device node backing the root file system. Traces through
858 * dm-crypt/dm-verity/... Returns > 0 and the devno of the device on success. If there's no block
859 * device (or multiple) returns 0 and a devno of 0. Failure otherwise.
860 *
861 * If the root mount has been replaced by some form of volatile file system (overlayfs), the original
862 * root block device node is symlinked in /run/systemd/volatile-root. Let's read that here. */
863 r = readlink_malloc("/run/systemd/volatile-root", &p);
864 if (r == -ENOENT) { /* volatile-root not found */
865 r = get_block_device_harder("/", &devno);
866 if (r == -EUCLEAN)
867 return btrfs_log_dev_root(level, r, "root file system");
868 if (r < 0)
869 return log_full_errno(level, r, "Failed to determine block device of root file system: %m");
870 if (r == 0) { /* Not backed by a single block device. (Could be NFS or so, or could be multi-device RAID or so) */
871 r = get_block_device_harder("/usr", &devno);
872 if (r == -EUCLEAN)
873 return btrfs_log_dev_root(level, r, "/usr");
874 if (r < 0)
875 return log_full_errno(level, r, "Failed to determine block device of /usr/ file system: %m");
876 if (r == 0) { /* /usr/ not backed by single block device, either. */
877 log_debug("Neither root nor /usr/ file system are on a (single) block device.");
878
879 if (ret)
880 *ret = 0;
881
882 return 0;
883 }
884 }
885 } else if (r < 0)
886 return log_full_errno(level, r, "Failed to read symlink /run/systemd/volatile-root: %m");
887 else {
888 mode_t m;
889 r = device_path_parse_major_minor(p, &m, &devno);
890 if (r < 0)
891 return log_full_errno(level, r, "Failed to parse major/minor device node: %m");
892 if (!S_ISBLK(m))
893 return log_full_errno(level, SYNTHETIC_ERRNO(ENOTBLK), "Volatile root device is of wrong type.");
894 }
895
896 if (ret)
897 *ret = devno;
898
899 return 1;
900 }