]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/dissect-image.c
249f043f96ad83f5c0a312230dc8cecdc3463f3a
[thirdparty/systemd.git] / src / shared / dissect-image.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 Copyright 2016 Lennart Poettering
4 ***/
5
6 #include <sys/mount.h>
7 #include <sys/prctl.h>
8 #include <sys/wait.h>
9
10 #include "sd-id128.h"
11
12 #include "architecture.h"
13 #include "ask-password-api.h"
14 #include "blkid-util.h"
15 #include "blockdev-util.h"
16 #include "copy.h"
17 #include "crypt-util.h"
18 #include "def.h"
19 #include "device-nodes.h"
20 #include "dissect-image.h"
21 #include "fd-util.h"
22 #include "fileio.h"
23 #include "fs-util.h"
24 #include "gpt.h"
25 #include "hexdecoct.h"
26 #include "hostname-util.h"
27 #include "id128-util.h"
28 #include "linux-3.13/dm-ioctl.h"
29 #include "missing.h"
30 #include "mount-util.h"
31 #include "os-util.h"
32 #include "path-util.h"
33 #include "process-util.h"
34 #include "raw-clone.h"
35 #include "signal-util.h"
36 #include "stat-util.h"
37 #include "stdio-util.h"
38 #include "string-table.h"
39 #include "string-util.h"
40 #include "strv.h"
41 #include "udev-util.h"
42 #include "user-util.h"
43 #include "xattr-util.h"
44
45 int probe_filesystem(const char *node, char **ret_fstype) {
46 /* Try to find device content type and return it in *ret_fstype. If nothing is found,
47 * 0/NULL will be returned. -EUCLEAN will be returned for ambigous results, and an
48 * different error otherwise. */
49
50 #if HAVE_BLKID
51 _cleanup_(blkid_free_probep) blkid_probe b = NULL;
52 const char *fstype;
53 int r;
54
55 errno = 0;
56 b = blkid_new_probe_from_filename(node);
57 if (!b)
58 return -errno ?: -ENOMEM;
59
60 blkid_probe_enable_superblocks(b, 1);
61 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
62
63 errno = 0;
64 r = blkid_do_safeprobe(b);
65 if (r == 1) {
66 log_debug("No type detected on partition %s", node);
67 goto not_found;
68 }
69 if (r == -2) {
70 log_debug("Results ambiguous for partition %s", node);
71 return -EUCLEAN;
72 }
73 if (r != 0)
74 return -errno ?: -EIO;
75
76 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
77
78 if (fstype) {
79 char *t;
80
81 t = strdup(fstype);
82 if (!t)
83 return -ENOMEM;
84
85 *ret_fstype = t;
86 return 1;
87 }
88
89 not_found:
90 *ret_fstype = NULL;
91 return 0;
92 #else
93 return -EOPNOTSUPP;
94 #endif
95 }
96
97 #if HAVE_BLKID
98 /* Detect RPMB and Boot partitions, which are not listed by blkid.
99 * See https://github.com/systemd/systemd/issues/5806. */
100 static bool device_is_mmc_special_partition(struct udev_device *d) {
101 const char *sysname;
102
103 sysname = udev_device_get_sysname(d);
104 return sysname && startswith(sysname, "mmcblk") &&
105 (endswith(sysname, "rpmb") || endswith(sysname, "boot0") || endswith(sysname, "boot1"));
106 }
107
108 static bool device_is_block(struct udev_device *d) {
109 const char *ss;
110
111 ss = udev_device_get_subsystem(d);
112 if (!ss)
113 return false;
114
115 return streq(ss, "block");
116 }
117 #endif
118
119 int dissect_image(
120 int fd,
121 const void *root_hash,
122 size_t root_hash_size,
123 DissectImageFlags flags,
124 DissectedImage **ret) {
125
126 #if HAVE_BLKID
127 sd_id128_t root_uuid = SD_ID128_NULL, verity_uuid = SD_ID128_NULL;
128 _cleanup_(udev_enumerate_unrefp) struct udev_enumerate *e = NULL;
129 bool is_gpt, is_mbr, generic_rw, multiple_generic = false;
130 _cleanup_(udev_device_unrefp) struct udev_device *d = NULL;
131 _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
132 _cleanup_(blkid_free_probep) blkid_probe b = NULL;
133 _cleanup_(udev_unrefp) struct udev *udev = NULL;
134 _cleanup_free_ char *generic_node = NULL;
135 sd_id128_t generic_uuid = SD_ID128_NULL;
136 const char *pttype = NULL;
137 struct udev_list_entry *first, *item;
138 blkid_partlist pl;
139 int r, generic_nr;
140 struct stat st;
141 unsigned i;
142
143 assert(fd >= 0);
144 assert(ret);
145 assert(root_hash || root_hash_size == 0);
146
147 /* Probes a disk image, and returns information about what it found in *ret.
148 *
149 * Returns -ENOPKG if no suitable partition table or file system could be found.
150 * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found. */
151
152 if (root_hash) {
153 /* If a root hash is supplied, then we use the root partition that has a UUID that match the first
154 * 128bit of the root hash. And we use the verity partition that has a UUID that match the final
155 * 128bit. */
156
157 if (root_hash_size < sizeof(sd_id128_t))
158 return -EINVAL;
159
160 memcpy(&root_uuid, root_hash, sizeof(sd_id128_t));
161 memcpy(&verity_uuid, (const uint8_t*) root_hash + root_hash_size - sizeof(sd_id128_t), sizeof(sd_id128_t));
162
163 if (sd_id128_is_null(root_uuid))
164 return -EINVAL;
165 if (sd_id128_is_null(verity_uuid))
166 return -EINVAL;
167 }
168
169 if (fstat(fd, &st) < 0)
170 return -errno;
171
172 if (!S_ISBLK(st.st_mode))
173 return -ENOTBLK;
174
175 b = blkid_new_probe();
176 if (!b)
177 return -ENOMEM;
178
179 errno = 0;
180 r = blkid_probe_set_device(b, fd, 0, 0);
181 if (r != 0)
182 return -errno ?: -ENOMEM;
183
184 if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) {
185 /* Look for file system superblocks, unless we only shall look for GPT partition tables */
186 blkid_probe_enable_superblocks(b, 1);
187 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE);
188 }
189
190 blkid_probe_enable_partitions(b, 1);
191 blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
192
193 errno = 0;
194 r = blkid_do_safeprobe(b);
195 if (IN_SET(r, -2, 1)) {
196 log_debug("Failed to identify any partition table.");
197 return -ENOPKG;
198 }
199 if (r != 0)
200 return -errno ?: -EIO;
201
202 m = new0(DissectedImage, 1);
203 if (!m)
204 return -ENOMEM;
205
206 if (!(flags & DISSECT_IMAGE_GPT_ONLY) &&
207 (flags & DISSECT_IMAGE_REQUIRE_ROOT)) {
208 const char *usage = NULL;
209
210 (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL);
211 if (STRPTR_IN_SET(usage, "filesystem", "crypto")) {
212 _cleanup_free_ char *t = NULL, *n = NULL;
213 const char *fstype = NULL;
214
215 /* OK, we have found a file system, that's our root partition then. */
216 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
217
218 if (fstype) {
219 t = strdup(fstype);
220 if (!t)
221 return -ENOMEM;
222 }
223
224 if (asprintf(&n, "/dev/block/%u:%u", major(st.st_rdev), minor(st.st_rdev)) < 0)
225 return -ENOMEM;
226
227 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
228 .found = true,
229 .rw = true,
230 .partno = -1,
231 .architecture = _ARCHITECTURE_INVALID,
232 .fstype = TAKE_PTR(t),
233 .node = TAKE_PTR(n),
234 };
235
236 m->encrypted = streq(fstype, "crypto_LUKS");
237
238 *ret = TAKE_PTR(m);
239
240 return 0;
241 }
242 }
243
244 (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
245 if (!pttype)
246 return -ENOPKG;
247
248 is_gpt = streq_ptr(pttype, "gpt");
249 is_mbr = streq_ptr(pttype, "dos");
250
251 if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr))
252 return -ENOPKG;
253
254 errno = 0;
255 pl = blkid_probe_get_partitions(b);
256 if (!pl)
257 return -errno ?: -ENOMEM;
258
259 udev = udev_new();
260 if (!udev)
261 return -errno;
262
263 d = udev_device_new_from_devnum(udev, 'b', st.st_rdev);
264 if (!d)
265 return -ENOMEM;
266
267 for (i = 0;; i++) {
268 int n, z;
269
270 if (i >= 10) {
271 log_debug("Kernel partitions never appeared.");
272 return -ENXIO;
273 }
274
275 e = udev_enumerate_new(udev);
276 if (!e)
277 return -errno;
278
279 r = udev_enumerate_add_match_parent(e, d);
280 if (r < 0)
281 return r;
282
283 r = udev_enumerate_scan_devices(e);
284 if (r < 0)
285 return r;
286
287 /* Count the partitions enumerated by the kernel */
288 n = 0;
289 first = udev_enumerate_get_list_entry(e);
290 udev_list_entry_foreach(item, first) {
291 _cleanup_(udev_device_unrefp) struct udev_device *q;
292 dev_t qn;
293
294 q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
295 if (!q)
296 return -errno;
297
298 qn = udev_device_get_devnum(q);
299 if (major(qn) == 0)
300 continue;
301
302 if (!device_is_block(q))
303 continue;
304
305 if (device_is_mmc_special_partition(q))
306 continue;
307 n++;
308 }
309
310 /* Count the partitions enumerated by blkid */
311 z = blkid_partlist_numof_partitions(pl);
312 if (n == z + 1)
313 break;
314 if (n > z + 1) {
315 log_debug("blkid and kernel partition list do not match.");
316 return -EIO;
317 }
318 if (n < z + 1) {
319 unsigned j = 0;
320
321 /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running
322 * or it got EBUSY because udev already opened the device. Let's reprobe the device, which is a
323 * synchronous call that waits until probing is complete. */
324
325 for (;;) {
326 if (j++ > 20)
327 return -EBUSY;
328
329 if (ioctl(fd, BLKRRPART, 0) < 0) {
330 r = -errno;
331
332 if (r == -EINVAL) {
333 struct loop_info64 info;
334
335 /* If we are running on a loop device that has partition scanning off,
336 * return an explicit recognizable error about this, so that callers
337 * can generate a proper message explaining the situation. */
338
339 if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0 && (info.lo_flags & LO_FLAGS_PARTSCAN) == 0) {
340 log_debug("Device is loop device and partition scanning is off!");
341 return -EPROTONOSUPPORT;
342 }
343 }
344 if (r != -EBUSY)
345 return r;
346 } else
347 break;
348
349 /* If something else has the device open, such as an udev rule, the ioctl will return
350 * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a
351 * bit, and try again.
352 *
353 * This is really something they should fix in the kernel! */
354
355 (void) usleep(50 * USEC_PER_MSEC);
356 }
357 }
358
359 e = udev_enumerate_unref(e);
360 }
361
362 first = udev_enumerate_get_list_entry(e);
363 udev_list_entry_foreach(item, first) {
364 _cleanup_(udev_device_unrefp) struct udev_device *q;
365 unsigned long long pflags;
366 blkid_partition pp;
367 const char *node;
368 dev_t qn;
369 int nr;
370
371 q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
372 if (!q)
373 return -errno;
374
375 qn = udev_device_get_devnum(q);
376 if (major(qn) == 0)
377 continue;
378
379 if (st.st_rdev == qn)
380 continue;
381
382 if (!device_is_block(q))
383 continue;
384
385 if (device_is_mmc_special_partition(q))
386 continue;
387
388 node = udev_device_get_devnode(q);
389 if (!node)
390 continue;
391
392 pp = blkid_partlist_devno_to_partition(pl, qn);
393 if (!pp)
394 continue;
395
396 pflags = blkid_partition_get_flags(pp);
397
398 nr = blkid_partition_get_partno(pp);
399 if (nr < 0)
400 continue;
401
402 if (is_gpt) {
403 int designator = _PARTITION_DESIGNATOR_INVALID, architecture = _ARCHITECTURE_INVALID;
404 const char *stype, *sid, *fstype = NULL;
405 sd_id128_t type_id, id;
406 bool rw = true;
407
408 sid = blkid_partition_get_uuid(pp);
409 if (!sid)
410 continue;
411 if (sd_id128_from_string(sid, &id) < 0)
412 continue;
413
414 stype = blkid_partition_get_type_string(pp);
415 if (!stype)
416 continue;
417 if (sd_id128_from_string(stype, &type_id) < 0)
418 continue;
419
420 if (sd_id128_equal(type_id, GPT_HOME)) {
421
422 if (pflags & GPT_FLAG_NO_AUTO)
423 continue;
424
425 designator = PARTITION_HOME;
426 rw = !(pflags & GPT_FLAG_READ_ONLY);
427 } else if (sd_id128_equal(type_id, GPT_SRV)) {
428
429 if (pflags & GPT_FLAG_NO_AUTO)
430 continue;
431
432 designator = PARTITION_SRV;
433 rw = !(pflags & GPT_FLAG_READ_ONLY);
434 } else if (sd_id128_equal(type_id, GPT_ESP)) {
435
436 /* Note that we don't check the GPT_FLAG_NO_AUTO flag for the ESP, as it is not defined
437 * there. We instead check the GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as recommended by the
438 * UEFI spec (See "12.3.3 Number and Location of System Partitions"). */
439
440 if (pflags & GPT_FLAG_NO_BLOCK_IO_PROTOCOL)
441 continue;
442
443 designator = PARTITION_ESP;
444 fstype = "vfat";
445 }
446 #ifdef GPT_ROOT_NATIVE
447 else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
448
449 if (pflags & GPT_FLAG_NO_AUTO)
450 continue;
451
452 /* If a root ID is specified, ignore everything but the root id */
453 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
454 continue;
455
456 designator = PARTITION_ROOT;
457 architecture = native_architecture();
458 rw = !(pflags & GPT_FLAG_READ_ONLY);
459 } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) {
460
461 if (pflags & GPT_FLAG_NO_AUTO)
462 continue;
463
464 m->can_verity = true;
465
466 /* Ignore verity unless a root hash is specified */
467 if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
468 continue;
469
470 designator = PARTITION_ROOT_VERITY;
471 fstype = "DM_verity_hash";
472 architecture = native_architecture();
473 rw = false;
474 }
475 #endif
476 #ifdef GPT_ROOT_SECONDARY
477 else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
478
479 if (pflags & GPT_FLAG_NO_AUTO)
480 continue;
481
482 /* If a root ID is specified, ignore everything but the root id */
483 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
484 continue;
485
486 designator = PARTITION_ROOT_SECONDARY;
487 architecture = SECONDARY_ARCHITECTURE;
488 rw = !(pflags & GPT_FLAG_READ_ONLY);
489 } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) {
490
491 if (pflags & GPT_FLAG_NO_AUTO)
492 continue;
493
494 m->can_verity = true;
495
496 /* Ignore verity unless root has is specified */
497 if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
498 continue;
499
500 designator = PARTITION_ROOT_SECONDARY_VERITY;
501 fstype = "DM_verity_hash";
502 architecture = SECONDARY_ARCHITECTURE;
503 rw = false;
504 }
505 #endif
506 else if (sd_id128_equal(type_id, GPT_SWAP)) {
507
508 if (pflags & GPT_FLAG_NO_AUTO)
509 continue;
510
511 designator = PARTITION_SWAP;
512 fstype = "swap";
513 } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) {
514
515 if (pflags & GPT_FLAG_NO_AUTO)
516 continue;
517
518 if (generic_node)
519 multiple_generic = true;
520 else {
521 generic_nr = nr;
522 generic_rw = !(pflags & GPT_FLAG_READ_ONLY);
523 generic_uuid = id;
524 generic_node = strdup(node);
525 if (!generic_node)
526 return -ENOMEM;
527 }
528 }
529
530 if (designator != _PARTITION_DESIGNATOR_INVALID) {
531 _cleanup_free_ char *t = NULL, *n = NULL;
532
533 /* First one wins */
534 if (m->partitions[designator].found)
535 continue;
536
537 if (fstype) {
538 t = strdup(fstype);
539 if (!t)
540 return -ENOMEM;
541 }
542
543 n = strdup(node);
544 if (!n)
545 return -ENOMEM;
546
547 m->partitions[designator] = (DissectedPartition) {
548 .found = true,
549 .partno = nr,
550 .rw = rw,
551 .architecture = architecture,
552 .node = TAKE_PTR(n),
553 .fstype = TAKE_PTR(t),
554 .uuid = id,
555 };
556 }
557
558 } else if (is_mbr) {
559
560 if (pflags != 0x80) /* Bootable flag */
561 continue;
562
563 if (blkid_partition_get_type(pp) != 0x83) /* Linux partition */
564 continue;
565
566 if (generic_node)
567 multiple_generic = true;
568 else {
569 generic_nr = nr;
570 generic_rw = true;
571 generic_node = strdup(node);
572 if (!generic_node)
573 return -ENOMEM;
574 }
575 }
576 }
577
578 if (!m->partitions[PARTITION_ROOT].found) {
579 /* No root partition found? Then let's see if ther's one for the secondary architecture. And if not
580 * either, then check if there's a single generic one, and use that. */
581
582 if (m->partitions[PARTITION_ROOT_VERITY].found)
583 return -EADDRNOTAVAIL;
584
585 if (m->partitions[PARTITION_ROOT_SECONDARY].found) {
586 m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY];
587 zero(m->partitions[PARTITION_ROOT_SECONDARY]);
588
589 m->partitions[PARTITION_ROOT_VERITY] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY];
590 zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY]);
591
592 } else if (flags & DISSECT_IMAGE_REQUIRE_ROOT) {
593
594 /* If the root has was set, then we won't fallback to a generic node, because the root hash
595 * decides */
596 if (root_hash)
597 return -EADDRNOTAVAIL;
598
599 /* If we didn't find a generic node, then we can't fix this up either */
600 if (!generic_node)
601 return -ENXIO;
602
603 /* If we didn't find a properly marked root partition, but we did find a single suitable
604 * generic Linux partition, then use this as root partition, if the caller asked for it. */
605 if (multiple_generic)
606 return -ENOTUNIQ;
607
608 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
609 .found = true,
610 .rw = generic_rw,
611 .partno = generic_nr,
612 .architecture = _ARCHITECTURE_INVALID,
613 .node = TAKE_PTR(generic_node),
614 .uuid = generic_uuid,
615 };
616 }
617 }
618
619 if (root_hash) {
620 if (!m->partitions[PARTITION_ROOT_VERITY].found || !m->partitions[PARTITION_ROOT].found)
621 return -EADDRNOTAVAIL;
622
623 /* If we found the primary root with the hash, then we definitely want to suppress any secondary root
624 * (which would be weird, after all the root hash should only be assigned to one pair of
625 * partitions... */
626 m->partitions[PARTITION_ROOT_SECONDARY].found = false;
627 m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found = false;
628
629 /* If we found a verity setup, then the root partition is necessarily read-only. */
630 m->partitions[PARTITION_ROOT].rw = false;
631
632 m->verity = true;
633 }
634
635 blkid_free_probe(b);
636 b = NULL;
637
638 /* Fill in file system types if we don't know them yet. */
639 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
640 DissectedPartition *p = m->partitions + i;
641
642 if (!p->found)
643 continue;
644
645 if (!p->fstype && p->node) {
646 r = probe_filesystem(p->node, &p->fstype);
647 if (r < 0 && r != -EUCLEAN)
648 return r;
649 }
650
651 if (streq_ptr(p->fstype, "crypto_LUKS"))
652 m->encrypted = true;
653
654 if (p->fstype && fstype_is_ro(p->fstype))
655 p->rw = false;
656 }
657
658 *ret = TAKE_PTR(m);
659
660 return 0;
661 #else
662 return -EOPNOTSUPP;
663 #endif
664 }
665
666 DissectedImage* dissected_image_unref(DissectedImage *m) {
667 unsigned i;
668
669 if (!m)
670 return NULL;
671
672 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
673 free(m->partitions[i].fstype);
674 free(m->partitions[i].node);
675 free(m->partitions[i].decrypted_fstype);
676 free(m->partitions[i].decrypted_node);
677 }
678
679 free(m->hostname);
680 strv_free(m->machine_info);
681 strv_free(m->os_release);
682
683 return mfree(m);
684 }
685
686 static int is_loop_device(const char *path) {
687 char s[SYS_BLOCK_PATH_MAX("/../loop/")];
688 struct stat st;
689
690 assert(path);
691
692 if (stat(path, &st) < 0)
693 return -errno;
694
695 if (!S_ISBLK(st.st_mode))
696 return -ENOTBLK;
697
698 xsprintf_sys_block_path(s, "/loop/", st.st_dev);
699 if (access(s, F_OK) < 0) {
700 if (errno != ENOENT)
701 return -errno;
702
703 /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */
704 xsprintf_sys_block_path(s, "/../loop/", st.st_dev);
705 if (access(s, F_OK) < 0)
706 return errno == ENOENT ? false : -errno;
707 }
708
709 return true;
710 }
711
712 static int mount_partition(
713 DissectedPartition *m,
714 const char *where,
715 const char *directory,
716 uid_t uid_shift,
717 DissectImageFlags flags) {
718
719 _cleanup_free_ char *chased = NULL, *options = NULL;
720 const char *p, *node, *fstype;
721 bool rw;
722 int r;
723
724 assert(m);
725 assert(where);
726
727 node = m->decrypted_node ?: m->node;
728 fstype = m->decrypted_fstype ?: m->fstype;
729
730 if (!m->found || !node || !fstype)
731 return 0;
732
733 /* Stacked encryption? Yuck */
734 if (streq_ptr(fstype, "crypto_LUKS"))
735 return -ELOOP;
736
737 rw = m->rw && !(flags & DISSECT_IMAGE_READ_ONLY);
738
739 if (directory) {
740 r = chase_symlinks(directory, where, CHASE_PREFIX_ROOT, &chased);
741 if (r < 0)
742 return r;
743
744 p = chased;
745 } else
746 p = where;
747
748 /* If requested, turn on discard support. */
749 if (fstype_can_discard(fstype) &&
750 ((flags & DISSECT_IMAGE_DISCARD) ||
751 ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node)))) {
752 options = strdup("discard");
753 if (!options)
754 return -ENOMEM;
755 }
756
757 if (uid_is_valid(uid_shift) && uid_shift != 0 && fstype_can_uid_gid(fstype)) {
758 _cleanup_free_ char *uid_option = NULL;
759
760 if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
761 return -ENOMEM;
762
763 if (!strextend_with_separator(&options, ",", uid_option, NULL))
764 return -ENOMEM;
765 }
766
767 return mount_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
768 }
769
770 int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) {
771 int r;
772
773 assert(m);
774 assert(where);
775
776 if (!m->partitions[PARTITION_ROOT].found)
777 return -ENXIO;
778
779 if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) {
780 r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, flags);
781 if (r < 0)
782 return r;
783
784 if (flags & DISSECT_IMAGE_VALIDATE_OS) {
785 r = path_is_os_tree(where);
786 if (r < 0)
787 return r;
788 if (r == 0)
789 return -EMEDIUMTYPE;
790 }
791 }
792
793 if ((flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY))
794 return 0;
795
796 r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, flags);
797 if (r < 0)
798 return r;
799
800 r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, flags);
801 if (r < 0)
802 return r;
803
804 if (m->partitions[PARTITION_ESP].found) {
805 const char *mp;
806
807 /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */
808
809 FOREACH_STRING(mp, "/efi", "/boot") {
810 _cleanup_free_ char *p = NULL;
811
812 r = chase_symlinks(mp, where, CHASE_PREFIX_ROOT, &p);
813 if (r < 0)
814 continue;
815
816 r = dir_is_empty(p);
817 if (r > 0) {
818 r = mount_partition(m->partitions + PARTITION_ESP, where, mp, uid_shift, flags);
819 if (r < 0)
820 return r;
821 }
822 }
823 }
824
825 return 0;
826 }
827
828 #if HAVE_LIBCRYPTSETUP
829 typedef struct DecryptedPartition {
830 struct crypt_device *device;
831 char *name;
832 bool relinquished;
833 } DecryptedPartition;
834
835 struct DecryptedImage {
836 DecryptedPartition *decrypted;
837 size_t n_decrypted;
838 size_t n_allocated;
839 };
840 #endif
841
842 DecryptedImage* decrypted_image_unref(DecryptedImage* d) {
843 #if HAVE_LIBCRYPTSETUP
844 size_t i;
845 int r;
846
847 if (!d)
848 return NULL;
849
850 for (i = 0; i < d->n_decrypted; i++) {
851 DecryptedPartition *p = d->decrypted + i;
852
853 if (p->device && p->name && !p->relinquished) {
854 r = crypt_deactivate(p->device, p->name);
855 if (r < 0)
856 log_debug_errno(r, "Failed to deactivate encrypted partition %s", p->name);
857 }
858
859 if (p->device)
860 crypt_free(p->device);
861 free(p->name);
862 }
863
864 free(d);
865 #endif
866 return NULL;
867 }
868
869 #if HAVE_LIBCRYPTSETUP
870
871 static int make_dm_name_and_node(const void *original_node, const char *suffix, char **ret_name, char **ret_node) {
872 _cleanup_free_ char *name = NULL, *node = NULL;
873 const char *base;
874
875 assert(original_node);
876 assert(suffix);
877 assert(ret_name);
878 assert(ret_node);
879
880 base = strrchr(original_node, '/');
881 if (!base)
882 return -EINVAL;
883 base++;
884 if (isempty(base))
885 return -EINVAL;
886
887 name = strjoin(base, suffix);
888 if (!name)
889 return -ENOMEM;
890 if (!filename_is_valid(name))
891 return -EINVAL;
892
893 node = strjoin(crypt_get_dir(), "/", name);
894 if (!node)
895 return -ENOMEM;
896
897 *ret_name = TAKE_PTR(name);
898 *ret_node = TAKE_PTR(node);
899
900 return 0;
901 }
902
903 static int decrypt_partition(
904 DissectedPartition *m,
905 const char *passphrase,
906 DissectImageFlags flags,
907 DecryptedImage *d) {
908
909 _cleanup_free_ char *node = NULL, *name = NULL;
910 _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
911 int r;
912
913 assert(m);
914 assert(d);
915
916 if (!m->found || !m->node || !m->fstype)
917 return 0;
918
919 if (!streq(m->fstype, "crypto_LUKS"))
920 return 0;
921
922 if (!passphrase)
923 return -ENOKEY;
924
925 r = make_dm_name_and_node(m->node, "-decrypted", &name, &node);
926 if (r < 0)
927 return r;
928
929 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
930 return -ENOMEM;
931
932 r = crypt_init(&cd, m->node);
933 if (r < 0)
934 return log_debug_errno(r, "Failed to initialize dm-crypt: %m");
935
936 r = crypt_load(cd, CRYPT_LUKS, NULL);
937 if (r < 0)
938 return log_debug_errno(r, "Failed to load LUKS metadata: %m");
939
940 r = crypt_activate_by_passphrase(cd, name, CRYPT_ANY_SLOT, passphrase, strlen(passphrase),
941 ((flags & DISSECT_IMAGE_READ_ONLY) ? CRYPT_ACTIVATE_READONLY : 0) |
942 ((flags & DISSECT_IMAGE_DISCARD_ON_CRYPTO) ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0));
943 if (r < 0) {
944 log_debug_errno(r, "Failed to activate LUKS device: %m");
945 return r == -EPERM ? -EKEYREJECTED : r;
946 }
947
948 d->decrypted[d->n_decrypted].name = TAKE_PTR(name);
949 d->decrypted[d->n_decrypted].device = TAKE_PTR(cd);
950 d->n_decrypted++;
951
952 m->decrypted_node = TAKE_PTR(node);
953
954 return 0;
955 }
956
957 static int verity_partition(
958 DissectedPartition *m,
959 DissectedPartition *v,
960 const void *root_hash,
961 size_t root_hash_size,
962 DissectImageFlags flags,
963 DecryptedImage *d) {
964
965 _cleanup_free_ char *node = NULL, *name = NULL;
966 _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
967 int r;
968
969 assert(m);
970 assert(v);
971
972 if (!root_hash)
973 return 0;
974
975 if (!m->found || !m->node || !m->fstype)
976 return 0;
977 if (!v->found || !v->node || !v->fstype)
978 return 0;
979
980 if (!streq(v->fstype, "DM_verity_hash"))
981 return 0;
982
983 r = make_dm_name_and_node(m->node, "-verity", &name, &node);
984 if (r < 0)
985 return r;
986
987 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
988 return -ENOMEM;
989
990 r = crypt_init(&cd, v->node);
991 if (r < 0)
992 return r;
993
994 r = crypt_load(cd, CRYPT_VERITY, NULL);
995 if (r < 0)
996 return r;
997
998 r = crypt_set_data_device(cd, m->node);
999 if (r < 0)
1000 return r;
1001
1002 r = crypt_activate_by_volume_key(cd, name, root_hash, root_hash_size, CRYPT_ACTIVATE_READONLY);
1003 if (r < 0)
1004 return r;
1005
1006 d->decrypted[d->n_decrypted].name = TAKE_PTR(name);
1007 d->decrypted[d->n_decrypted].device = TAKE_PTR(cd);
1008 d->n_decrypted++;
1009
1010 m->decrypted_node = TAKE_PTR(node);
1011
1012 return 0;
1013 }
1014 #endif
1015
1016 int dissected_image_decrypt(
1017 DissectedImage *m,
1018 const char *passphrase,
1019 const void *root_hash,
1020 size_t root_hash_size,
1021 DissectImageFlags flags,
1022 DecryptedImage **ret) {
1023
1024 #if HAVE_LIBCRYPTSETUP
1025 _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL;
1026 unsigned i;
1027 int r;
1028 #endif
1029
1030 assert(m);
1031 assert(root_hash || root_hash_size == 0);
1032
1033 /* Returns:
1034 *
1035 * = 0 → There was nothing to decrypt
1036 * > 0 → Decrypted successfully
1037 * -ENOKEY → There's something to decrypt but no key was supplied
1038 * -EKEYREJECTED → Passed key was not correct
1039 */
1040
1041 if (root_hash && root_hash_size < sizeof(sd_id128_t))
1042 return -EINVAL;
1043
1044 if (!m->encrypted && !m->verity) {
1045 *ret = NULL;
1046 return 0;
1047 }
1048
1049 #if HAVE_LIBCRYPTSETUP
1050 d = new0(DecryptedImage, 1);
1051 if (!d)
1052 return -ENOMEM;
1053
1054 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
1055 DissectedPartition *p = m->partitions + i;
1056 int k;
1057
1058 if (!p->found)
1059 continue;
1060
1061 r = decrypt_partition(p, passphrase, flags, d);
1062 if (r < 0)
1063 return r;
1064
1065 k = PARTITION_VERITY_OF(i);
1066 if (k >= 0) {
1067 r = verity_partition(p, m->partitions + k, root_hash, root_hash_size, flags, d);
1068 if (r < 0)
1069 return r;
1070 }
1071
1072 if (!p->decrypted_fstype && p->decrypted_node) {
1073 r = probe_filesystem(p->decrypted_node, &p->decrypted_fstype);
1074 if (r < 0 && r != -EUCLEAN)
1075 return r;
1076 }
1077 }
1078
1079 *ret = TAKE_PTR(d);
1080
1081 return 1;
1082 #else
1083 return -EOPNOTSUPP;
1084 #endif
1085 }
1086
1087 int dissected_image_decrypt_interactively(
1088 DissectedImage *m,
1089 const char *passphrase,
1090 const void *root_hash,
1091 size_t root_hash_size,
1092 DissectImageFlags flags,
1093 DecryptedImage **ret) {
1094
1095 _cleanup_strv_free_erase_ char **z = NULL;
1096 int n = 3, r;
1097
1098 if (passphrase)
1099 n--;
1100
1101 for (;;) {
1102 r = dissected_image_decrypt(m, passphrase, root_hash, root_hash_size, flags, ret);
1103 if (r >= 0)
1104 return r;
1105 if (r == -EKEYREJECTED)
1106 log_error_errno(r, "Incorrect passphrase, try again!");
1107 else if (r != -ENOKEY) {
1108 log_error_errno(r, "Failed to decrypt image: %m");
1109 return r;
1110 }
1111
1112 if (--n < 0) {
1113 log_error("Too many retries.");
1114 return -EKEYREJECTED;
1115 }
1116
1117 z = strv_free(z);
1118
1119 r = ask_password_auto("Please enter image passphrase!", NULL, "dissect", "dissect", USEC_INFINITY, 0, &z);
1120 if (r < 0)
1121 return log_error_errno(r, "Failed to query for passphrase: %m");
1122
1123 passphrase = z[0];
1124 }
1125 }
1126
1127 #if HAVE_LIBCRYPTSETUP
1128 static int deferred_remove(DecryptedPartition *p) {
1129
1130 struct dm_ioctl dm = {
1131 .version = {
1132 DM_VERSION_MAJOR,
1133 DM_VERSION_MINOR,
1134 DM_VERSION_PATCHLEVEL
1135 },
1136 .data_size = sizeof(dm),
1137 .flags = DM_DEFERRED_REMOVE,
1138 };
1139
1140 _cleanup_close_ int fd = -1;
1141
1142 assert(p);
1143
1144 /* Unfortunately, libcryptsetup doesn't provide a proper API for this, hence call the ioctl() directly. */
1145
1146 fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC);
1147 if (fd < 0)
1148 return -errno;
1149
1150 strncpy(dm.name, p->name, sizeof(dm.name));
1151
1152 if (ioctl(fd, DM_DEV_REMOVE, &dm))
1153 return -errno;
1154
1155 return 0;
1156 }
1157 #endif
1158
1159 int decrypted_image_relinquish(DecryptedImage *d) {
1160
1161 #if HAVE_LIBCRYPTSETUP
1162 size_t i;
1163 int r;
1164 #endif
1165
1166 assert(d);
1167
1168 /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a boolean so
1169 * that we don't clean it up ourselves either anymore */
1170
1171 #if HAVE_LIBCRYPTSETUP
1172 for (i = 0; i < d->n_decrypted; i++) {
1173 DecryptedPartition *p = d->decrypted + i;
1174
1175 if (p->relinquished)
1176 continue;
1177
1178 r = deferred_remove(p);
1179 if (r < 0)
1180 return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name);
1181
1182 p->relinquished = true;
1183 }
1184 #endif
1185
1186 return 0;
1187 }
1188
1189 int root_hash_load(const char *image, void **ret, size_t *ret_size) {
1190 _cleanup_free_ char *text = NULL;
1191 _cleanup_free_ void *k = NULL;
1192 size_t l;
1193 int r;
1194
1195 assert(image);
1196 assert(ret);
1197 assert(ret_size);
1198
1199 if (is_device_path(image)) {
1200 /* If we are asked to load the root hash for a device node, exit early */
1201 *ret = NULL;
1202 *ret_size = 0;
1203 return 0;
1204 }
1205
1206 r = getxattr_malloc(image, "user.verity.roothash", &text, true);
1207 if (r < 0) {
1208 char *fn, *e, *n;
1209
1210 if (!IN_SET(r, -ENODATA, -EOPNOTSUPP, -ENOENT))
1211 return r;
1212
1213 fn = newa(char, strlen(image) + STRLEN(".roothash") + 1);
1214 n = stpcpy(fn, image);
1215 e = endswith(fn, ".raw");
1216 if (e)
1217 n = e;
1218
1219 strcpy(n, ".roothash");
1220
1221 r = read_one_line_file(fn, &text);
1222 if (r == -ENOENT) {
1223 *ret = NULL;
1224 *ret_size = 0;
1225 return 0;
1226 }
1227 if (r < 0)
1228 return r;
1229 }
1230
1231 r = unhexmem(text, strlen(text), &k, &l);
1232 if (r < 0)
1233 return r;
1234 if (l < sizeof(sd_id128_t))
1235 return -EINVAL;
1236
1237 *ret = TAKE_PTR(k);
1238 *ret_size = l;
1239
1240 return 1;
1241 }
1242
1243 int dissected_image_acquire_metadata(DissectedImage *m) {
1244
1245 enum {
1246 META_HOSTNAME,
1247 META_MACHINE_ID,
1248 META_MACHINE_INFO,
1249 META_OS_RELEASE,
1250 _META_MAX,
1251 };
1252
1253 static const char *const paths[_META_MAX] = {
1254 [META_HOSTNAME] = "/etc/hostname\0",
1255 [META_MACHINE_ID] = "/etc/machine-id\0",
1256 [META_MACHINE_INFO] = "/etc/machine-info\0",
1257 [META_OS_RELEASE] = "/etc/os-release\0/usr/lib/os-release\0",
1258 };
1259
1260 _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
1261 _cleanup_(rmdir_and_freep) char *t = NULL;
1262 _cleanup_(sigkill_waitp) pid_t child = 0;
1263 sd_id128_t machine_id = SD_ID128_NULL;
1264 _cleanup_free_ char *hostname = NULL;
1265 unsigned n_meta_initialized = 0, k;
1266 int fds[2 * _META_MAX], r;
1267
1268 BLOCK_SIGNALS(SIGCHLD);
1269
1270 assert(m);
1271
1272 for (; n_meta_initialized < _META_MAX; n_meta_initialized ++)
1273 if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) {
1274 r = -errno;
1275 goto finish;
1276 }
1277
1278 r = mkdtemp_malloc("/tmp/dissect-XXXXXX", &t);
1279 if (r < 0)
1280 goto finish;
1281
1282 r = safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE, &child);
1283 if (r < 0)
1284 goto finish;
1285 if (r == 0) {
1286 r = dissected_image_mount(m, t, UID_INVALID, DISSECT_IMAGE_READ_ONLY|DISSECT_IMAGE_MOUNT_ROOT_ONLY|DISSECT_IMAGE_VALIDATE_OS);
1287 if (r < 0) {
1288 log_debug_errno(r, "Failed to mount dissected image: %m");
1289 _exit(EXIT_FAILURE);
1290 }
1291
1292 for (k = 0; k < _META_MAX; k++) {
1293 _cleanup_close_ int fd = -1;
1294 const char *p;
1295
1296 fds[2*k] = safe_close(fds[2*k]);
1297
1298 NULSTR_FOREACH(p, paths[k]) {
1299 fd = chase_symlinks_and_open(p, t, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL);
1300 if (fd >= 0)
1301 break;
1302 }
1303 if (fd < 0) {
1304 log_debug_errno(fd, "Failed to read %s file of image, ignoring: %m", paths[k]);
1305 continue;
1306 }
1307
1308 r = copy_bytes(fd, fds[2*k+1], (uint64_t) -1, 0);
1309 if (r < 0)
1310 _exit(EXIT_FAILURE);
1311
1312 fds[2*k+1] = safe_close(fds[2*k+1]);
1313 }
1314
1315 _exit(EXIT_SUCCESS);
1316 }
1317
1318 for (k = 0; k < _META_MAX; k++) {
1319 _cleanup_fclose_ FILE *f = NULL;
1320
1321 fds[2*k+1] = safe_close(fds[2*k+1]);
1322
1323 f = fdopen(fds[2*k], "re");
1324 if (!f) {
1325 r = -errno;
1326 goto finish;
1327 }
1328
1329 fds[2*k] = -1;
1330
1331 switch (k) {
1332
1333 case META_HOSTNAME:
1334 r = read_etc_hostname_stream(f, &hostname);
1335 if (r < 0)
1336 log_debug_errno(r, "Failed to read /etc/hostname: %m");
1337
1338 break;
1339
1340 case META_MACHINE_ID: {
1341 _cleanup_free_ char *line = NULL;
1342
1343 r = read_line(f, LONG_LINE_MAX, &line);
1344 if (r < 0)
1345 log_debug_errno(r, "Failed to read /etc/machine-id: %m");
1346 else if (r == 33) {
1347 r = sd_id128_from_string(line, &machine_id);
1348 if (r < 0)
1349 log_debug_errno(r, "Image contains invalid /etc/machine-id: %s", line);
1350 } else if (r == 0)
1351 log_debug("/etc/machine-id file is empty.");
1352 else
1353 log_debug("/etc/machine-id has unexpected length %i.", r);
1354
1355 break;
1356 }
1357
1358 case META_MACHINE_INFO:
1359 r = load_env_file_pairs(f, "machine-info", NULL, &machine_info);
1360 if (r < 0)
1361 log_debug_errno(r, "Failed to read /etc/machine-info: %m");
1362
1363 break;
1364
1365 case META_OS_RELEASE:
1366 r = load_env_file_pairs(f, "os-release", NULL, &os_release);
1367 if (r < 0)
1368 log_debug_errno(r, "Failed to read OS release file: %m");
1369
1370 break;
1371 }
1372 }
1373
1374 r = wait_for_terminate_and_check("(sd-dissect)", child, 0);
1375 child = 0;
1376 if (r < 0)
1377 goto finish;
1378 if (r != EXIT_SUCCESS)
1379 return -EPROTO;
1380
1381 free_and_replace(m->hostname, hostname);
1382 m->machine_id = machine_id;
1383 strv_free_and_replace(m->machine_info, machine_info);
1384 strv_free_and_replace(m->os_release, os_release);
1385
1386 finish:
1387 for (k = 0; k < n_meta_initialized; k++)
1388 safe_close_pair(fds + 2*k);
1389
1390 return r;
1391 }
1392
1393 int dissect_image_and_warn(
1394 int fd,
1395 const char *name,
1396 const void *root_hash,
1397 size_t root_hash_size,
1398 DissectImageFlags flags,
1399 DissectedImage **ret) {
1400
1401 _cleanup_free_ char *buffer = NULL;
1402 int r;
1403
1404 if (!name) {
1405 r = fd_get_path(fd, &buffer);
1406 if (r < 0)
1407 return r;
1408
1409 name = buffer;
1410 }
1411
1412 r = dissect_image(fd, root_hash, root_hash_size, flags, ret);
1413
1414 switch (r) {
1415
1416 case -EOPNOTSUPP:
1417 return log_error_errno(r, "Dissecting images is not supported, compiled without blkid support.");
1418
1419 case -ENOPKG:
1420 return log_error_errno(r, "Couldn't identify a suitable partition table or file system in '%s'.", name);
1421
1422 case -EADDRNOTAVAIL:
1423 return log_error_errno(r, "No root partition for specified root hash found in '%s'.", name);
1424
1425 case -ENOTUNIQ:
1426 return log_error_errno(r, "Multiple suitable root partitions found in image '%s'.", name);
1427
1428 case -ENXIO:
1429 return log_error_errno(r, "No suitable root partition found in image '%s'.", name);
1430
1431 case -EPROTONOSUPPORT:
1432 return log_error_errno(r, "Device '%s' is loopback block device with partition scanning turned off, please turn it on.", name);
1433
1434 default:
1435 if (r < 0)
1436 return log_error_errno(r, "Failed to dissect image '%s': %m", name);
1437
1438 return r;
1439 }
1440 }
1441
1442 static const char *const partition_designator_table[] = {
1443 [PARTITION_ROOT] = "root",
1444 [PARTITION_ROOT_SECONDARY] = "root-secondary",
1445 [PARTITION_HOME] = "home",
1446 [PARTITION_SRV] = "srv",
1447 [PARTITION_ESP] = "esp",
1448 [PARTITION_SWAP] = "swap",
1449 [PARTITION_ROOT_VERITY] = "root-verity",
1450 [PARTITION_ROOT_SECONDARY_VERITY] = "root-secondary-verity",
1451 };
1452
1453 DEFINE_STRING_TABLE_LOOKUP(partition_designator, int);