]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/dissect-image.c
Merge pull request #8417 from brauner/2018-03-09/add_bind_mount_fallback_to_private_d...
[thirdparty/systemd.git] / src / shared / dissect-image.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2016 Lennart Poettering
6 ***/
7
8 #include <sys/mount.h>
9 #include <sys/prctl.h>
10 #include <sys/wait.h>
11
12 #include "sd-id128.h"
13
14 #include "architecture.h"
15 #include "ask-password-api.h"
16 #include "blkid-util.h"
17 #include "blockdev-util.h"
18 #include "copy.h"
19 #include "crypt-util.h"
20 #include "def.h"
21 #include "device-nodes.h"
22 #include "dissect-image.h"
23 #include "fd-util.h"
24 #include "fileio.h"
25 #include "fs-util.h"
26 #include "gpt.h"
27 #include "hexdecoct.h"
28 #include "hostname-util.h"
29 #include "id128-util.h"
30 #include "linux-3.13/dm-ioctl.h"
31 #include "missing.h"
32 #include "mount-util.h"
33 #include "path-util.h"
34 #include "process-util.h"
35 #include "raw-clone.h"
36 #include "signal-util.h"
37 #include "stat-util.h"
38 #include "stdio-util.h"
39 #include "string-table.h"
40 #include "string-util.h"
41 #include "strv.h"
42 #include "udev-util.h"
43 #include "user-util.h"
44 #include "xattr-util.h"
45
46 int probe_filesystem(const char *node, char **ret_fstype) {
47 /* Try to find device content type and return it in *ret_fstype. If nothing is found,
48 * 0/NULL will be returned. -EUCLEAN will be returned for ambigous results, and an
49 * different error otherwise. */
50
51 #if HAVE_BLKID
52 _cleanup_blkid_free_probe_ blkid_probe b = NULL;
53 const char *fstype;
54 int r;
55
56 errno = 0;
57 b = blkid_new_probe_from_filename(node);
58 if (!b)
59 return -errno ?: -ENOMEM;
60
61 blkid_probe_enable_superblocks(b, 1);
62 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
63
64 errno = 0;
65 r = blkid_do_safeprobe(b);
66 if (r == 1) {
67 log_debug("No type detected on partition %s", node);
68 goto not_found;
69 }
70 if (r == -2) {
71 log_debug("Results ambiguous for partition %s", node);
72 return -EUCLEAN;
73 }
74 if (r != 0)
75 return -errno ?: -EIO;
76
77 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
78
79 if (fstype) {
80 char *t;
81
82 t = strdup(fstype);
83 if (!t)
84 return -ENOMEM;
85
86 *ret_fstype = t;
87 return 1;
88 }
89
90 not_found:
91 *ret_fstype = NULL;
92 return 0;
93 #else
94 return -EOPNOTSUPP;
95 #endif
96 }
97
98 /* Detect RPMB and Boot partitions, which are not listed by blkid.
99 * See https://github.com/systemd/systemd/issues/5806. */
100 static bool device_is_mmc_special_partition(struct udev_device *d) {
101 const char *sysname;
102
103 sysname = udev_device_get_sysname(d);
104 return sysname && startswith(sysname, "mmcblk") &&
105 (endswith(sysname, "rpmb") || endswith(sysname, "boot0") || endswith(sysname, "boot1"));
106 }
107
108 static bool device_is_block(struct udev_device *d) {
109 const char *ss;
110
111 ss = udev_device_get_subsystem(d);
112 if (!ss)
113 return false;
114
115 return streq(ss, "block");
116 }
117
118 int dissect_image(
119 int fd,
120 const void *root_hash,
121 size_t root_hash_size,
122 DissectImageFlags flags,
123 DissectedImage **ret) {
124
125 #if HAVE_BLKID
126 sd_id128_t root_uuid = SD_ID128_NULL, verity_uuid = SD_ID128_NULL;
127 _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL;
128 bool is_gpt, is_mbr, generic_rw, multiple_generic = false;
129 _cleanup_udev_device_unref_ struct udev_device *d = NULL;
130 _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
131 _cleanup_blkid_free_probe_ blkid_probe b = NULL;
132 _cleanup_udev_unref_ struct udev *udev = NULL;
133 _cleanup_free_ char *generic_node = NULL;
134 sd_id128_t generic_uuid = SD_ID128_NULL;
135 const char *pttype = NULL;
136 struct udev_list_entry *first, *item;
137 blkid_partlist pl;
138 int r, generic_nr;
139 struct stat st;
140 unsigned i;
141
142 assert(fd >= 0);
143 assert(ret);
144 assert(root_hash || root_hash_size == 0);
145
146 /* Probes a disk image, and returns information about what it found in *ret.
147 *
148 * Returns -ENOPKG if no suitable partition table or file system could be found.
149 * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found. */
150
151 if (root_hash) {
152 /* If a root hash is supplied, then we use the root partition that has a UUID that match the first
153 * 128bit of the root hash. And we use the verity partition that has a UUID that match the final
154 * 128bit. */
155
156 if (root_hash_size < sizeof(sd_id128_t))
157 return -EINVAL;
158
159 memcpy(&root_uuid, root_hash, sizeof(sd_id128_t));
160 memcpy(&verity_uuid, (const uint8_t*) root_hash + root_hash_size - sizeof(sd_id128_t), sizeof(sd_id128_t));
161
162 if (sd_id128_is_null(root_uuid))
163 return -EINVAL;
164 if (sd_id128_is_null(verity_uuid))
165 return -EINVAL;
166 }
167
168 if (fstat(fd, &st) < 0)
169 return -errno;
170
171 if (!S_ISBLK(st.st_mode))
172 return -ENOTBLK;
173
174 b = blkid_new_probe();
175 if (!b)
176 return -ENOMEM;
177
178 errno = 0;
179 r = blkid_probe_set_device(b, fd, 0, 0);
180 if (r != 0)
181 return -errno ?: -ENOMEM;
182
183 if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) {
184 /* Look for file system superblocks, unless we only shall look for GPT partition tables */
185 blkid_probe_enable_superblocks(b, 1);
186 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE);
187 }
188
189 blkid_probe_enable_partitions(b, 1);
190 blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
191
192 errno = 0;
193 r = blkid_do_safeprobe(b);
194 if (IN_SET(r, -2, 1)) {
195 log_debug("Failed to identify any partition table.");
196 return -ENOPKG;
197 }
198 if (r != 0)
199 return -errno ?: -EIO;
200
201 m = new0(DissectedImage, 1);
202 if (!m)
203 return -ENOMEM;
204
205 if (!(flags & DISSECT_IMAGE_GPT_ONLY) &&
206 (flags & DISSECT_IMAGE_REQUIRE_ROOT)) {
207 const char *usage = NULL;
208
209 (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL);
210 if (STRPTR_IN_SET(usage, "filesystem", "crypto")) {
211 _cleanup_free_ char *t = NULL, *n = NULL;
212 const char *fstype = NULL;
213
214 /* OK, we have found a file system, that's our root partition then. */
215 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
216
217 if (fstype) {
218 t = strdup(fstype);
219 if (!t)
220 return -ENOMEM;
221 }
222
223 if (asprintf(&n, "/dev/block/%u:%u", major(st.st_rdev), minor(st.st_rdev)) < 0)
224 return -ENOMEM;
225
226 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
227 .found = true,
228 .rw = true,
229 .partno = -1,
230 .architecture = _ARCHITECTURE_INVALID,
231 .fstype = TAKE_PTR(t),
232 .node = TAKE_PTR(n),
233 };
234
235 m->encrypted = streq(fstype, "crypto_LUKS");
236
237 *ret = TAKE_PTR(m);
238
239 return 0;
240 }
241 }
242
243 (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
244 if (!pttype)
245 return -ENOPKG;
246
247 is_gpt = streq_ptr(pttype, "gpt");
248 is_mbr = streq_ptr(pttype, "dos");
249
250 if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr))
251 return -ENOPKG;
252
253 errno = 0;
254 pl = blkid_probe_get_partitions(b);
255 if (!pl)
256 return -errno ?: -ENOMEM;
257
258 udev = udev_new();
259 if (!udev)
260 return -errno;
261
262 d = udev_device_new_from_devnum(udev, 'b', st.st_rdev);
263 if (!d)
264 return -ENOMEM;
265
266 for (i = 0;; i++) {
267 int n, z;
268
269 if (i >= 10) {
270 log_debug("Kernel partitions never appeared.");
271 return -ENXIO;
272 }
273
274 e = udev_enumerate_new(udev);
275 if (!e)
276 return -errno;
277
278 r = udev_enumerate_add_match_parent(e, d);
279 if (r < 0)
280 return r;
281
282 r = udev_enumerate_scan_devices(e);
283 if (r < 0)
284 return r;
285
286 /* Count the partitions enumerated by the kernel */
287 n = 0;
288 first = udev_enumerate_get_list_entry(e);
289 udev_list_entry_foreach(item, first) {
290 _cleanup_udev_device_unref_ struct udev_device *q;
291 dev_t qn;
292
293 q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
294 if (!q)
295 return -errno;
296
297 qn = udev_device_get_devnum(q);
298 if (major(qn) == 0)
299 continue;
300
301 if (!device_is_block(q))
302 continue;
303
304 if (device_is_mmc_special_partition(q))
305 continue;
306 n++;
307 }
308
309 /* Count the partitions enumerated by blkid */
310 z = blkid_partlist_numof_partitions(pl);
311 if (n == z + 1)
312 break;
313 if (n > z + 1) {
314 log_debug("blkid and kernel partition list do not match.");
315 return -EIO;
316 }
317 if (n < z + 1) {
318 unsigned j = 0;
319
320 /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running
321 * or it got EBUSY because udev already opened the device. Let's reprobe the device, which is a
322 * synchronous call that waits until probing is complete. */
323
324 for (;;) {
325 if (j++ > 20)
326 return -EBUSY;
327
328 if (ioctl(fd, BLKRRPART, 0) < 0) {
329 r = -errno;
330
331 if (r == -EINVAL) {
332 struct loop_info64 info;
333
334 /* If we are running on a loop device that has partition scanning off,
335 * return an explicit recognizable error about this, so that callers
336 * can generate a proper message explaining the situation. */
337
338 if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0 && (info.lo_flags & LO_FLAGS_PARTSCAN) == 0) {
339 log_debug("Device is loop device and partition scanning is off!");
340 return -EPROTONOSUPPORT;
341 }
342 }
343 if (r != -EBUSY)
344 return r;
345 } else
346 break;
347
348 /* If something else has the device open, such as an udev rule, the ioctl will return
349 * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a
350 * bit, and try again.
351 *
352 * This is really something they should fix in the kernel! */
353
354 (void) usleep(50 * USEC_PER_MSEC);
355 }
356 }
357
358 e = udev_enumerate_unref(e);
359 }
360
361 first = udev_enumerate_get_list_entry(e);
362 udev_list_entry_foreach(item, first) {
363 _cleanup_udev_device_unref_ struct udev_device *q;
364 unsigned long long pflags;
365 blkid_partition pp;
366 const char *node;
367 dev_t qn;
368 int nr;
369
370 q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
371 if (!q)
372 return -errno;
373
374 qn = udev_device_get_devnum(q);
375 if (major(qn) == 0)
376 continue;
377
378 if (st.st_rdev == qn)
379 continue;
380
381 if (!device_is_block(q))
382 continue;
383
384 if (device_is_mmc_special_partition(q))
385 continue;
386
387 node = udev_device_get_devnode(q);
388 if (!node)
389 continue;
390
391 pp = blkid_partlist_devno_to_partition(pl, qn);
392 if (!pp)
393 continue;
394
395 pflags = blkid_partition_get_flags(pp);
396
397 nr = blkid_partition_get_partno(pp);
398 if (nr < 0)
399 continue;
400
401 if (is_gpt) {
402 int designator = _PARTITION_DESIGNATOR_INVALID, architecture = _ARCHITECTURE_INVALID;
403 const char *stype, *sid, *fstype = NULL;
404 sd_id128_t type_id, id;
405 bool rw = true;
406
407 sid = blkid_partition_get_uuid(pp);
408 if (!sid)
409 continue;
410 if (sd_id128_from_string(sid, &id) < 0)
411 continue;
412
413 stype = blkid_partition_get_type_string(pp);
414 if (!stype)
415 continue;
416 if (sd_id128_from_string(stype, &type_id) < 0)
417 continue;
418
419 if (sd_id128_equal(type_id, GPT_HOME)) {
420
421 if (pflags & GPT_FLAG_NO_AUTO)
422 continue;
423
424 designator = PARTITION_HOME;
425 rw = !(pflags & GPT_FLAG_READ_ONLY);
426 } else if (sd_id128_equal(type_id, GPT_SRV)) {
427
428 if (pflags & GPT_FLAG_NO_AUTO)
429 continue;
430
431 designator = PARTITION_SRV;
432 rw = !(pflags & GPT_FLAG_READ_ONLY);
433 } else if (sd_id128_equal(type_id, GPT_ESP)) {
434
435 /* Note that we don't check the GPT_FLAG_NO_AUTO flag for the ESP, as it is not defined
436 * there. We instead check the GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as recommended by the
437 * UEFI spec (See "12.3.3 Number and Location of System Partitions"). */
438
439 if (pflags & GPT_FLAG_NO_BLOCK_IO_PROTOCOL)
440 continue;
441
442 designator = PARTITION_ESP;
443 fstype = "vfat";
444 }
445 #ifdef GPT_ROOT_NATIVE
446 else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
447
448 if (pflags & GPT_FLAG_NO_AUTO)
449 continue;
450
451 /* If a root ID is specified, ignore everything but the root id */
452 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
453 continue;
454
455 designator = PARTITION_ROOT;
456 architecture = native_architecture();
457 rw = !(pflags & GPT_FLAG_READ_ONLY);
458 } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) {
459
460 if (pflags & GPT_FLAG_NO_AUTO)
461 continue;
462
463 m->can_verity = true;
464
465 /* Ignore verity unless a root hash is specified */
466 if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
467 continue;
468
469 designator = PARTITION_ROOT_VERITY;
470 fstype = "DM_verity_hash";
471 architecture = native_architecture();
472 rw = false;
473 }
474 #endif
475 #ifdef GPT_ROOT_SECONDARY
476 else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
477
478 if (pflags & GPT_FLAG_NO_AUTO)
479 continue;
480
481 /* If a root ID is specified, ignore everything but the root id */
482 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
483 continue;
484
485 designator = PARTITION_ROOT_SECONDARY;
486 architecture = SECONDARY_ARCHITECTURE;
487 rw = !(pflags & GPT_FLAG_READ_ONLY);
488 } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) {
489
490 if (pflags & GPT_FLAG_NO_AUTO)
491 continue;
492
493 m->can_verity = true;
494
495 /* Ignore verity unless root has is specified */
496 if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
497 continue;
498
499 designator = PARTITION_ROOT_SECONDARY_VERITY;
500 fstype = "DM_verity_hash";
501 architecture = SECONDARY_ARCHITECTURE;
502 rw = false;
503 }
504 #endif
505 else if (sd_id128_equal(type_id, GPT_SWAP)) {
506
507 if (pflags & GPT_FLAG_NO_AUTO)
508 continue;
509
510 designator = PARTITION_SWAP;
511 fstype = "swap";
512 } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) {
513
514 if (pflags & GPT_FLAG_NO_AUTO)
515 continue;
516
517 if (generic_node)
518 multiple_generic = true;
519 else {
520 generic_nr = nr;
521 generic_rw = !(pflags & GPT_FLAG_READ_ONLY);
522 generic_uuid = id;
523 generic_node = strdup(node);
524 if (!generic_node)
525 return -ENOMEM;
526 }
527 }
528
529 if (designator != _PARTITION_DESIGNATOR_INVALID) {
530 _cleanup_free_ char *t = NULL, *n = NULL;
531
532 /* First one wins */
533 if (m->partitions[designator].found)
534 continue;
535
536 if (fstype) {
537 t = strdup(fstype);
538 if (!t)
539 return -ENOMEM;
540 }
541
542 n = strdup(node);
543 if (!n)
544 return -ENOMEM;
545
546 m->partitions[designator] = (DissectedPartition) {
547 .found = true,
548 .partno = nr,
549 .rw = rw,
550 .architecture = architecture,
551 .node = TAKE_PTR(n),
552 .fstype = TAKE_PTR(t),
553 .uuid = id,
554 };
555 }
556
557 } else if (is_mbr) {
558
559 if (pflags != 0x80) /* Bootable flag */
560 continue;
561
562 if (blkid_partition_get_type(pp) != 0x83) /* Linux partition */
563 continue;
564
565 if (generic_node)
566 multiple_generic = true;
567 else {
568 generic_nr = nr;
569 generic_rw = true;
570 generic_node = strdup(node);
571 if (!generic_node)
572 return -ENOMEM;
573 }
574 }
575 }
576
577 if (!m->partitions[PARTITION_ROOT].found) {
578 /* No root partition found? Then let's see if ther's one for the secondary architecture. And if not
579 * either, then check if there's a single generic one, and use that. */
580
581 if (m->partitions[PARTITION_ROOT_VERITY].found)
582 return -EADDRNOTAVAIL;
583
584 if (m->partitions[PARTITION_ROOT_SECONDARY].found) {
585 m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY];
586 zero(m->partitions[PARTITION_ROOT_SECONDARY]);
587
588 m->partitions[PARTITION_ROOT_VERITY] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY];
589 zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY]);
590
591 } else if (flags & DISSECT_IMAGE_REQUIRE_ROOT) {
592
593 /* If the root has was set, then we won't fallback to a generic node, because the root hash
594 * decides */
595 if (root_hash)
596 return -EADDRNOTAVAIL;
597
598 /* If we didn't find a generic node, then we can't fix this up either */
599 if (!generic_node)
600 return -ENXIO;
601
602 /* If we didn't find a properly marked root partition, but we did find a single suitable
603 * generic Linux partition, then use this as root partition, if the caller asked for it. */
604 if (multiple_generic)
605 return -ENOTUNIQ;
606
607 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
608 .found = true,
609 .rw = generic_rw,
610 .partno = generic_nr,
611 .architecture = _ARCHITECTURE_INVALID,
612 .node = TAKE_PTR(generic_node),
613 .uuid = generic_uuid,
614 };
615 }
616 }
617
618 if (root_hash) {
619 if (!m->partitions[PARTITION_ROOT_VERITY].found || !m->partitions[PARTITION_ROOT].found)
620 return -EADDRNOTAVAIL;
621
622 /* If we found the primary root with the hash, then we definitely want to suppress any secondary root
623 * (which would be weird, after all the root hash should only be assigned to one pair of
624 * partitions... */
625 m->partitions[PARTITION_ROOT_SECONDARY].found = false;
626 m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found = false;
627
628 /* If we found a verity setup, then the root partition is necessarily read-only. */
629 m->partitions[PARTITION_ROOT].rw = false;
630
631 m->verity = true;
632 }
633
634 blkid_free_probe(b);
635 b = NULL;
636
637 /* Fill in file system types if we don't know them yet. */
638 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
639 DissectedPartition *p = m->partitions + i;
640
641 if (!p->found)
642 continue;
643
644 if (!p->fstype && p->node) {
645 r = probe_filesystem(p->node, &p->fstype);
646 if (r < 0 && r != -EUCLEAN)
647 return r;
648 }
649
650 if (streq_ptr(p->fstype, "crypto_LUKS"))
651 m->encrypted = true;
652
653 if (p->fstype && fstype_is_ro(p->fstype))
654 p->rw = false;
655 }
656
657 *ret = TAKE_PTR(m);
658
659 return 0;
660 #else
661 return -EOPNOTSUPP;
662 #endif
663 }
664
665 DissectedImage* dissected_image_unref(DissectedImage *m) {
666 unsigned i;
667
668 if (!m)
669 return NULL;
670
671 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
672 free(m->partitions[i].fstype);
673 free(m->partitions[i].node);
674 free(m->partitions[i].decrypted_fstype);
675 free(m->partitions[i].decrypted_node);
676 }
677
678 free(m->hostname);
679 strv_free(m->machine_info);
680 strv_free(m->os_release);
681
682 return mfree(m);
683 }
684
685 static int is_loop_device(const char *path) {
686 char s[SYS_BLOCK_PATH_MAX("/../loop/")];
687 struct stat st;
688
689 assert(path);
690
691 if (stat(path, &st) < 0)
692 return -errno;
693
694 if (!S_ISBLK(st.st_mode))
695 return -ENOTBLK;
696
697 xsprintf_sys_block_path(s, "/loop/", st.st_dev);
698 if (access(s, F_OK) < 0) {
699 if (errno != ENOENT)
700 return -errno;
701
702 /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */
703 xsprintf_sys_block_path(s, "/../loop/", st.st_dev);
704 if (access(s, F_OK) < 0)
705 return errno == ENOENT ? false : -errno;
706 }
707
708 return true;
709 }
710
711 static int mount_partition(
712 DissectedPartition *m,
713 const char *where,
714 const char *directory,
715 uid_t uid_shift,
716 DissectImageFlags flags) {
717
718 _cleanup_free_ char *chased = NULL, *options = NULL;
719 const char *p, *node, *fstype;
720 bool rw;
721 int r;
722
723 assert(m);
724 assert(where);
725
726 node = m->decrypted_node ?: m->node;
727 fstype = m->decrypted_fstype ?: m->fstype;
728
729 if (!m->found || !node || !fstype)
730 return 0;
731
732 /* Stacked encryption? Yuck */
733 if (streq_ptr(fstype, "crypto_LUKS"))
734 return -ELOOP;
735
736 rw = m->rw && !(flags & DISSECT_IMAGE_READ_ONLY);
737
738 if (directory) {
739 r = chase_symlinks(directory, where, CHASE_PREFIX_ROOT, &chased);
740 if (r < 0)
741 return r;
742
743 p = chased;
744 } else
745 p = where;
746
747 /* If requested, turn on discard support. */
748 if (fstype_can_discard(fstype) &&
749 ((flags & DISSECT_IMAGE_DISCARD) ||
750 ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node)))) {
751 options = strdup("discard");
752 if (!options)
753 return -ENOMEM;
754 }
755
756 if (uid_is_valid(uid_shift) && uid_shift != 0 && fstype_can_uid_gid(fstype)) {
757 _cleanup_free_ char *uid_option = NULL;
758
759 if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
760 return -ENOMEM;
761
762 if (!strextend_with_separator(&options, ",", uid_option, NULL))
763 return -ENOMEM;
764 }
765
766 return mount_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
767 }
768
769 int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) {
770 int r;
771
772 assert(m);
773 assert(where);
774
775 if (!m->partitions[PARTITION_ROOT].found)
776 return -ENXIO;
777
778 if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) {
779 r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, flags);
780 if (r < 0)
781 return r;
782 }
783
784 if ((flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY))
785 return 0;
786
787 r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, flags);
788 if (r < 0)
789 return r;
790
791 r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, flags);
792 if (r < 0)
793 return r;
794
795 if (m->partitions[PARTITION_ESP].found) {
796 const char *mp;
797
798 /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */
799
800 FOREACH_STRING(mp, "/efi", "/boot") {
801 _cleanup_free_ char *p = NULL;
802
803 r = chase_symlinks(mp, where, CHASE_PREFIX_ROOT, &p);
804 if (r < 0)
805 continue;
806
807 r = dir_is_empty(p);
808 if (r > 0) {
809 r = mount_partition(m->partitions + PARTITION_ESP, where, mp, uid_shift, flags);
810 if (r < 0)
811 return r;
812 }
813 }
814 }
815
816 return 0;
817 }
818
819 #if HAVE_LIBCRYPTSETUP
820 typedef struct DecryptedPartition {
821 struct crypt_device *device;
822 char *name;
823 bool relinquished;
824 } DecryptedPartition;
825
826 struct DecryptedImage {
827 DecryptedPartition *decrypted;
828 size_t n_decrypted;
829 size_t n_allocated;
830 };
831 #endif
832
833 DecryptedImage* decrypted_image_unref(DecryptedImage* d) {
834 #if HAVE_LIBCRYPTSETUP
835 size_t i;
836 int r;
837
838 if (!d)
839 return NULL;
840
841 for (i = 0; i < d->n_decrypted; i++) {
842 DecryptedPartition *p = d->decrypted + i;
843
844 if (p->device && p->name && !p->relinquished) {
845 r = crypt_deactivate(p->device, p->name);
846 if (r < 0)
847 log_debug_errno(r, "Failed to deactivate encrypted partition %s", p->name);
848 }
849
850 if (p->device)
851 crypt_free(p->device);
852 free(p->name);
853 }
854
855 free(d);
856 #endif
857 return NULL;
858 }
859
860 #if HAVE_LIBCRYPTSETUP
861
862 static int make_dm_name_and_node(const void *original_node, const char *suffix, char **ret_name, char **ret_node) {
863 _cleanup_free_ char *name = NULL, *node = NULL;
864 const char *base;
865
866 assert(original_node);
867 assert(suffix);
868 assert(ret_name);
869 assert(ret_node);
870
871 base = strrchr(original_node, '/');
872 if (!base)
873 return -EINVAL;
874 base++;
875 if (isempty(base))
876 return -EINVAL;
877
878 name = strjoin(base, suffix);
879 if (!name)
880 return -ENOMEM;
881 if (!filename_is_valid(name))
882 return -EINVAL;
883
884 node = strjoin(crypt_get_dir(), "/", name);
885 if (!node)
886 return -ENOMEM;
887
888 *ret_name = TAKE_PTR(name);
889 *ret_node = TAKE_PTR(node);
890
891 return 0;
892 }
893
894 static int decrypt_partition(
895 DissectedPartition *m,
896 const char *passphrase,
897 DissectImageFlags flags,
898 DecryptedImage *d) {
899
900 _cleanup_free_ char *node = NULL, *name = NULL;
901 _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
902 int r;
903
904 assert(m);
905 assert(d);
906
907 if (!m->found || !m->node || !m->fstype)
908 return 0;
909
910 if (!streq(m->fstype, "crypto_LUKS"))
911 return 0;
912
913 if (!passphrase)
914 return -ENOKEY;
915
916 r = make_dm_name_and_node(m->node, "-decrypted", &name, &node);
917 if (r < 0)
918 return r;
919
920 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
921 return -ENOMEM;
922
923 r = crypt_init(&cd, m->node);
924 if (r < 0)
925 return log_debug_errno(r, "Failed to initialize dm-crypt: %m");
926
927 r = crypt_load(cd, CRYPT_LUKS, NULL);
928 if (r < 0)
929 return log_debug_errno(r, "Failed to load LUKS metadata: %m");
930
931 r = crypt_activate_by_passphrase(cd, name, CRYPT_ANY_SLOT, passphrase, strlen(passphrase),
932 ((flags & DISSECT_IMAGE_READ_ONLY) ? CRYPT_ACTIVATE_READONLY : 0) |
933 ((flags & DISSECT_IMAGE_DISCARD_ON_CRYPTO) ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0));
934 if (r < 0) {
935 log_debug_errno(r, "Failed to activate LUKS device: %m");
936 return r == -EPERM ? -EKEYREJECTED : r;
937 }
938
939 d->decrypted[d->n_decrypted].name = TAKE_PTR(name);
940 d->decrypted[d->n_decrypted].device = TAKE_PTR(cd);
941 d->n_decrypted++;
942
943 m->decrypted_node = TAKE_PTR(node);
944
945 return 0;
946 }
947
948 static int verity_partition(
949 DissectedPartition *m,
950 DissectedPartition *v,
951 const void *root_hash,
952 size_t root_hash_size,
953 DissectImageFlags flags,
954 DecryptedImage *d) {
955
956 _cleanup_free_ char *node = NULL, *name = NULL;
957 _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
958 int r;
959
960 assert(m);
961 assert(v);
962
963 if (!root_hash)
964 return 0;
965
966 if (!m->found || !m->node || !m->fstype)
967 return 0;
968 if (!v->found || !v->node || !v->fstype)
969 return 0;
970
971 if (!streq(v->fstype, "DM_verity_hash"))
972 return 0;
973
974 r = make_dm_name_and_node(m->node, "-verity", &name, &node);
975 if (r < 0)
976 return r;
977
978 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
979 return -ENOMEM;
980
981 r = crypt_init(&cd, v->node);
982 if (r < 0)
983 return r;
984
985 r = crypt_load(cd, CRYPT_VERITY, NULL);
986 if (r < 0)
987 return r;
988
989 r = crypt_set_data_device(cd, m->node);
990 if (r < 0)
991 return r;
992
993 r = crypt_activate_by_volume_key(cd, name, root_hash, root_hash_size, CRYPT_ACTIVATE_READONLY);
994 if (r < 0)
995 return r;
996
997 d->decrypted[d->n_decrypted].name = TAKE_PTR(name);
998 d->decrypted[d->n_decrypted].device = TAKE_PTR(cd);
999 d->n_decrypted++;
1000
1001 m->decrypted_node = TAKE_PTR(node);
1002
1003 return 0;
1004 }
1005 #endif
1006
1007 int dissected_image_decrypt(
1008 DissectedImage *m,
1009 const char *passphrase,
1010 const void *root_hash,
1011 size_t root_hash_size,
1012 DissectImageFlags flags,
1013 DecryptedImage **ret) {
1014
1015 #if HAVE_LIBCRYPTSETUP
1016 _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL;
1017 unsigned i;
1018 int r;
1019 #endif
1020
1021 assert(m);
1022 assert(root_hash || root_hash_size == 0);
1023
1024 /* Returns:
1025 *
1026 * = 0 → There was nothing to decrypt
1027 * > 0 → Decrypted successfully
1028 * -ENOKEY → There's something to decrypt but no key was supplied
1029 * -EKEYREJECTED → Passed key was not correct
1030 */
1031
1032 if (root_hash && root_hash_size < sizeof(sd_id128_t))
1033 return -EINVAL;
1034
1035 if (!m->encrypted && !m->verity) {
1036 *ret = NULL;
1037 return 0;
1038 }
1039
1040 #if HAVE_LIBCRYPTSETUP
1041 d = new0(DecryptedImage, 1);
1042 if (!d)
1043 return -ENOMEM;
1044
1045 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
1046 DissectedPartition *p = m->partitions + i;
1047 int k;
1048
1049 if (!p->found)
1050 continue;
1051
1052 r = decrypt_partition(p, passphrase, flags, d);
1053 if (r < 0)
1054 return r;
1055
1056 k = PARTITION_VERITY_OF(i);
1057 if (k >= 0) {
1058 r = verity_partition(p, m->partitions + k, root_hash, root_hash_size, flags, d);
1059 if (r < 0)
1060 return r;
1061 }
1062
1063 if (!p->decrypted_fstype && p->decrypted_node) {
1064 r = probe_filesystem(p->decrypted_node, &p->decrypted_fstype);
1065 if (r < 0 && r != -EUCLEAN)
1066 return r;
1067 }
1068 }
1069
1070 *ret = TAKE_PTR(d);
1071
1072 return 1;
1073 #else
1074 return -EOPNOTSUPP;
1075 #endif
1076 }
1077
1078 int dissected_image_decrypt_interactively(
1079 DissectedImage *m,
1080 const char *passphrase,
1081 const void *root_hash,
1082 size_t root_hash_size,
1083 DissectImageFlags flags,
1084 DecryptedImage **ret) {
1085
1086 _cleanup_strv_free_erase_ char **z = NULL;
1087 int n = 3, r;
1088
1089 if (passphrase)
1090 n--;
1091
1092 for (;;) {
1093 r = dissected_image_decrypt(m, passphrase, root_hash, root_hash_size, flags, ret);
1094 if (r >= 0)
1095 return r;
1096 if (r == -EKEYREJECTED)
1097 log_error_errno(r, "Incorrect passphrase, try again!");
1098 else if (r != -ENOKEY) {
1099 log_error_errno(r, "Failed to decrypt image: %m");
1100 return r;
1101 }
1102
1103 if (--n < 0) {
1104 log_error("Too many retries.");
1105 return -EKEYREJECTED;
1106 }
1107
1108 z = strv_free(z);
1109
1110 r = ask_password_auto("Please enter image passphrase!", NULL, "dissect", "dissect", USEC_INFINITY, 0, &z);
1111 if (r < 0)
1112 return log_error_errno(r, "Failed to query for passphrase: %m");
1113
1114 passphrase = z[0];
1115 }
1116 }
1117
1118 #if HAVE_LIBCRYPTSETUP
1119 static int deferred_remove(DecryptedPartition *p) {
1120
1121 struct dm_ioctl dm = {
1122 .version = {
1123 DM_VERSION_MAJOR,
1124 DM_VERSION_MINOR,
1125 DM_VERSION_PATCHLEVEL
1126 },
1127 .data_size = sizeof(dm),
1128 .flags = DM_DEFERRED_REMOVE,
1129 };
1130
1131 _cleanup_close_ int fd = -1;
1132
1133 assert(p);
1134
1135 /* Unfortunately, libcryptsetup doesn't provide a proper API for this, hence call the ioctl() directly. */
1136
1137 fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC);
1138 if (fd < 0)
1139 return -errno;
1140
1141 strncpy(dm.name, p->name, sizeof(dm.name));
1142
1143 if (ioctl(fd, DM_DEV_REMOVE, &dm))
1144 return -errno;
1145
1146 return 0;
1147 }
1148 #endif
1149
1150 int decrypted_image_relinquish(DecryptedImage *d) {
1151
1152 #if HAVE_LIBCRYPTSETUP
1153 size_t i;
1154 int r;
1155 #endif
1156
1157 assert(d);
1158
1159 /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a boolean so
1160 * that we don't clean it up ourselves either anymore */
1161
1162 #if HAVE_LIBCRYPTSETUP
1163 for (i = 0; i < d->n_decrypted; i++) {
1164 DecryptedPartition *p = d->decrypted + i;
1165
1166 if (p->relinquished)
1167 continue;
1168
1169 r = deferred_remove(p);
1170 if (r < 0)
1171 return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name);
1172
1173 p->relinquished = true;
1174 }
1175 #endif
1176
1177 return 0;
1178 }
1179
1180 int root_hash_load(const char *image, void **ret, size_t *ret_size) {
1181 _cleanup_free_ char *text = NULL;
1182 _cleanup_free_ void *k = NULL;
1183 size_t l;
1184 int r;
1185
1186 assert(image);
1187 assert(ret);
1188 assert(ret_size);
1189
1190 if (is_device_path(image)) {
1191 /* If we are asked to load the root hash for a device node, exit early */
1192 *ret = NULL;
1193 *ret_size = 0;
1194 return 0;
1195 }
1196
1197 r = getxattr_malloc(image, "user.verity.roothash", &text, true);
1198 if (r < 0) {
1199 char *fn, *e, *n;
1200
1201 if (!IN_SET(r, -ENODATA, -EOPNOTSUPP, -ENOENT))
1202 return r;
1203
1204 fn = newa(char, strlen(image) + STRLEN(".roothash") + 1);
1205 n = stpcpy(fn, image);
1206 e = endswith(fn, ".raw");
1207 if (e)
1208 n = e;
1209
1210 strcpy(n, ".roothash");
1211
1212 r = read_one_line_file(fn, &text);
1213 if (r == -ENOENT) {
1214 *ret = NULL;
1215 *ret_size = 0;
1216 return 0;
1217 }
1218 if (r < 0)
1219 return r;
1220 }
1221
1222 r = unhexmem(text, strlen(text), &k, &l);
1223 if (r < 0)
1224 return r;
1225 if (l < sizeof(sd_id128_t))
1226 return -EINVAL;
1227
1228 *ret = TAKE_PTR(k);
1229 *ret_size = l;
1230
1231 return 1;
1232 }
1233
1234 int dissected_image_acquire_metadata(DissectedImage *m) {
1235
1236 enum {
1237 META_HOSTNAME,
1238 META_MACHINE_ID,
1239 META_MACHINE_INFO,
1240 META_OS_RELEASE,
1241 _META_MAX,
1242 };
1243
1244 static const char *const paths[_META_MAX] = {
1245 [META_HOSTNAME] = "/etc/hostname\0",
1246 [META_MACHINE_ID] = "/etc/machine-id\0",
1247 [META_MACHINE_INFO] = "/etc/machine-info\0",
1248 [META_OS_RELEASE] = "/etc/os-release\0/usr/lib/os-release\0",
1249 };
1250
1251 _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
1252 _cleanup_(rmdir_and_freep) char *t = NULL;
1253 _cleanup_(sigkill_waitp) pid_t child = 0;
1254 sd_id128_t machine_id = SD_ID128_NULL;
1255 _cleanup_free_ char *hostname = NULL;
1256 unsigned n_meta_initialized = 0, k;
1257 int fds[2 * _META_MAX], r;
1258
1259 BLOCK_SIGNALS(SIGCHLD);
1260
1261 assert(m);
1262
1263 for (; n_meta_initialized < _META_MAX; n_meta_initialized ++)
1264 if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) {
1265 r = -errno;
1266 goto finish;
1267 }
1268
1269 r = mkdtemp_malloc("/tmp/dissect-XXXXXX", &t);
1270 if (r < 0)
1271 goto finish;
1272
1273 r = safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_NEW_MOUNTNS, &child);
1274 if (r < 0)
1275 goto finish;
1276 if (r == 0) {
1277 /* Make sure we never propagate to the host */
1278 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
1279 _exit(EXIT_FAILURE);
1280
1281 r = dissected_image_mount(m, t, UID_INVALID, DISSECT_IMAGE_READ_ONLY|DISSECT_IMAGE_MOUNT_ROOT_ONLY);
1282 if (r < 0) {
1283 log_debug_errno(r, "Failed to mount dissected image: %m");
1284 _exit(EXIT_FAILURE);
1285 }
1286
1287 for (k = 0; k < _META_MAX; k++) {
1288 _cleanup_close_ int fd = -1;
1289 const char *p;
1290
1291 fds[2*k] = safe_close(fds[2*k]);
1292
1293 NULSTR_FOREACH(p, paths[k]) {
1294 fd = chase_symlinks_and_open(p, t, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL);
1295 if (fd >= 0)
1296 break;
1297 }
1298 if (fd < 0) {
1299 log_debug_errno(fd, "Failed to read %s file of image, ignoring: %m", paths[k]);
1300 continue;
1301 }
1302
1303 r = copy_bytes(fd, fds[2*k+1], (uint64_t) -1, 0);
1304 if (r < 0)
1305 _exit(EXIT_FAILURE);
1306
1307 fds[2*k+1] = safe_close(fds[2*k+1]);
1308 }
1309
1310 _exit(EXIT_SUCCESS);
1311 }
1312
1313 for (k = 0; k < _META_MAX; k++) {
1314 _cleanup_fclose_ FILE *f = NULL;
1315
1316 fds[2*k+1] = safe_close(fds[2*k+1]);
1317
1318 f = fdopen(fds[2*k], "re");
1319 if (!f) {
1320 r = -errno;
1321 goto finish;
1322 }
1323
1324 fds[2*k] = -1;
1325
1326 switch (k) {
1327
1328 case META_HOSTNAME:
1329 r = read_etc_hostname_stream(f, &hostname);
1330 if (r < 0)
1331 log_debug_errno(r, "Failed to read /etc/hostname: %m");
1332
1333 break;
1334
1335 case META_MACHINE_ID: {
1336 _cleanup_free_ char *line = NULL;
1337
1338 r = read_line(f, LONG_LINE_MAX, &line);
1339 if (r < 0)
1340 log_debug_errno(r, "Failed to read /etc/machine-id: %m");
1341 else if (r == 33) {
1342 r = sd_id128_from_string(line, &machine_id);
1343 if (r < 0)
1344 log_debug_errno(r, "Image contains invalid /etc/machine-id: %s", line);
1345 } else if (r == 0)
1346 log_debug("/etc/machine-id file is empty.");
1347 else
1348 log_debug("/etc/machine-id has unexpected length %i.", r);
1349
1350 break;
1351 }
1352
1353 case META_MACHINE_INFO:
1354 r = load_env_file_pairs(f, "machine-info", NULL, &machine_info);
1355 if (r < 0)
1356 log_debug_errno(r, "Failed to read /etc/machine-info: %m");
1357
1358 break;
1359
1360 case META_OS_RELEASE:
1361 r = load_env_file_pairs(f, "os-release", NULL, &os_release);
1362 if (r < 0)
1363 log_debug_errno(r, "Failed to read OS release file: %m");
1364
1365 break;
1366 }
1367 }
1368
1369 r = wait_for_terminate_and_check("(sd-dissect)", child, 0);
1370 child = 0;
1371 if (r < 0)
1372 goto finish;
1373 if (r != EXIT_SUCCESS)
1374 return -EPROTO;
1375
1376 free_and_replace(m->hostname, hostname);
1377 m->machine_id = machine_id;
1378 strv_free_and_replace(m->machine_info, machine_info);
1379 strv_free_and_replace(m->os_release, os_release);
1380
1381 finish:
1382 for (k = 0; k < n_meta_initialized; k++)
1383 safe_close_pair(fds + 2*k);
1384
1385 return r;
1386 }
1387
1388 int dissect_image_and_warn(
1389 int fd,
1390 const char *name,
1391 const void *root_hash,
1392 size_t root_hash_size,
1393 DissectImageFlags flags,
1394 DissectedImage **ret) {
1395
1396 _cleanup_free_ char *buffer = NULL;
1397 int r;
1398
1399 if (!name) {
1400 r = fd_get_path(fd, &buffer);
1401 if (r < 0)
1402 return r;
1403
1404 name = buffer;
1405 }
1406
1407 r = dissect_image(fd, root_hash, root_hash_size, flags, ret);
1408
1409 switch (r) {
1410
1411 case -EOPNOTSUPP:
1412 return log_error_errno(r, "Dissecting images is not supported, compiled without blkid support.");
1413
1414 case -ENOPKG:
1415 return log_error_errno(r, "Couldn't identify a suitable partition table or file system in '%s'.", name);
1416
1417 case -EADDRNOTAVAIL:
1418 return log_error_errno(r, "No root partition for specified root hash found in '%s'.", name);
1419
1420 case -ENOTUNIQ:
1421 return log_error_errno(r, "Multiple suitable root partitions found in image '%s'.", name);
1422
1423 case -ENXIO:
1424 return log_error_errno(r, "No suitable root partition found in image '%s'.", name);
1425
1426 case -EPROTONOSUPPORT:
1427 return log_error_errno(r, "Device '%s' is loopback block device with partition scanning turned off, please turn it on.", name);
1428
1429 default:
1430 if (r < 0)
1431 return log_error_errno(r, "Failed to dissect image '%s': %m", name);
1432
1433 return r;
1434 }
1435 }
1436
1437 static const char *const partition_designator_table[] = {
1438 [PARTITION_ROOT] = "root",
1439 [PARTITION_ROOT_SECONDARY] = "root-secondary",
1440 [PARTITION_HOME] = "home",
1441 [PARTITION_SRV] = "srv",
1442 [PARTITION_ESP] = "esp",
1443 [PARTITION_SWAP] = "swap",
1444 [PARTITION_ROOT_VERITY] = "root-verity",
1445 [PARTITION_ROOT_SECONDARY_VERITY] = "root-secondary-verity",
1446 };
1447
1448 DEFINE_STRING_TABLE_LOOKUP(partition_designator, int);