]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/dissect-image.c
Merge pull request #7398 from keszybz/udev-list
[thirdparty/systemd.git] / src / shared / dissect-image.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2016 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #if HAVE_LIBCRYPTSETUP
22 #include <libcryptsetup.h>
23 #ifndef CRYPT_LUKS
24 #define CRYPT_LUKS NULL
25 #endif
26 #endif
27 #include <sys/mount.h>
28 #include <sys/prctl.h>
29 #include <sys/wait.h>
30
31 #include "architecture.h"
32 #include "ask-password-api.h"
33 #include "blkid-util.h"
34 #include "copy.h"
35 #include "def.h"
36 #include "dissect-image.h"
37 #include "fd-util.h"
38 #include "fileio.h"
39 #include "fs-util.h"
40 #include "gpt.h"
41 #include "hexdecoct.h"
42 #include "hostname-util.h"
43 #include "id128-util.h"
44 #include "linux-3.13/dm-ioctl.h"
45 #include "mount-util.h"
46 #include "path-util.h"
47 #include "process-util.h"
48 #include "raw-clone.h"
49 #include "signal-util.h"
50 #include "stat-util.h"
51 #include "stdio-util.h"
52 #include "string-table.h"
53 #include "string-util.h"
54 #include "strv.h"
55 #include "udev-util.h"
56 #include "xattr-util.h"
57
58 _unused_ static int probe_filesystem(const char *node, char **ret_fstype) {
59 #if HAVE_BLKID
60 _cleanup_blkid_free_probe_ blkid_probe b = NULL;
61 const char *fstype;
62 int r;
63
64 b = blkid_new_probe_from_filename(node);
65 if (!b)
66 return -ENOMEM;
67
68 blkid_probe_enable_superblocks(b, 1);
69 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
70
71 errno = 0;
72 r = blkid_do_safeprobe(b);
73 if (IN_SET(r, -2, 1)) {
74 log_debug("Failed to identify any partition type on partition %s", node);
75 goto not_found;
76 }
77 if (r != 0)
78 return -errno ?: -EIO;
79
80 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
81
82 if (fstype) {
83 char *t;
84
85 t = strdup(fstype);
86 if (!t)
87 return -ENOMEM;
88
89 *ret_fstype = t;
90 return 1;
91 }
92
93 not_found:
94 *ret_fstype = NULL;
95 return 0;
96 #else
97 return -EOPNOTSUPP;
98 #endif
99 }
100
101 int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DissectedImage **ret) {
102
103 #if HAVE_BLKID
104 sd_id128_t root_uuid = SD_ID128_NULL, verity_uuid = SD_ID128_NULL;
105 _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL;
106 bool is_gpt, is_mbr, generic_rw, multiple_generic = false;
107 _cleanup_udev_device_unref_ struct udev_device *d = NULL;
108 _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
109 _cleanup_blkid_free_probe_ blkid_probe b = NULL;
110 _cleanup_udev_unref_ struct udev *udev = NULL;
111 _cleanup_free_ char *generic_node = NULL;
112 sd_id128_t generic_uuid = SD_ID128_NULL;
113 const char *pttype = NULL;
114 struct udev_list_entry *first, *item;
115 blkid_partlist pl;
116 int r, generic_nr;
117 struct stat st;
118 unsigned i;
119
120 assert(fd >= 0);
121 assert(ret);
122 assert(root_hash || root_hash_size == 0);
123
124 /* Probes a disk image, and returns information about what it found in *ret.
125 *
126 * Returns -ENOPKG if no suitable partition table or file system could be found.
127 * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found. */
128
129 if (root_hash) {
130 /* If a root hash is supplied, then we use the root partition that has a UUID that match the first
131 * 128bit of the root hash. And we use the verity partition that has a UUID that match the final
132 * 128bit. */
133
134 if (root_hash_size < sizeof(sd_id128_t))
135 return -EINVAL;
136
137 memcpy(&root_uuid, root_hash, sizeof(sd_id128_t));
138 memcpy(&verity_uuid, (const uint8_t*) root_hash + root_hash_size - sizeof(sd_id128_t), sizeof(sd_id128_t));
139
140 if (sd_id128_is_null(root_uuid))
141 return -EINVAL;
142 if (sd_id128_is_null(verity_uuid))
143 return -EINVAL;
144 }
145
146 if (fstat(fd, &st) < 0)
147 return -errno;
148
149 if (!S_ISBLK(st.st_mode))
150 return -ENOTBLK;
151
152 b = blkid_new_probe();
153 if (!b)
154 return -ENOMEM;
155
156 errno = 0;
157 r = blkid_probe_set_device(b, fd, 0, 0);
158 if (r != 0)
159 return -errno ?: -ENOMEM;
160
161 if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) {
162 /* Look for file system superblocks, unless we only shall look for GPT partition tables */
163 blkid_probe_enable_superblocks(b, 1);
164 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE);
165 }
166
167 blkid_probe_enable_partitions(b, 1);
168 blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
169
170 errno = 0;
171 r = blkid_do_safeprobe(b);
172 if (IN_SET(r, -2, 1)) {
173 log_debug("Failed to identify any partition table.");
174 return -ENOPKG;
175 }
176 if (r != 0)
177 return -errno ?: -EIO;
178
179 m = new0(DissectedImage, 1);
180 if (!m)
181 return -ENOMEM;
182
183 if (!(flags & DISSECT_IMAGE_GPT_ONLY) &&
184 (flags & DISSECT_IMAGE_REQUIRE_ROOT)) {
185 const char *usage = NULL;
186
187 (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL);
188 if (STRPTR_IN_SET(usage, "filesystem", "crypto")) {
189 _cleanup_free_ char *t = NULL, *n = NULL;
190 const char *fstype = NULL;
191
192 /* OK, we have found a file system, that's our root partition then. */
193 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
194
195 if (fstype) {
196 t = strdup(fstype);
197 if (!t)
198 return -ENOMEM;
199 }
200
201 if (asprintf(&n, "/dev/block/%u:%u", major(st.st_rdev), minor(st.st_rdev)) < 0)
202 return -ENOMEM;
203
204 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
205 .found = true,
206 .rw = true,
207 .partno = -1,
208 .architecture = _ARCHITECTURE_INVALID,
209 .fstype = t,
210 .node = n,
211 };
212
213 t = n = NULL;
214
215 m->encrypted = streq(fstype, "crypto_LUKS");
216
217 *ret = m;
218 m = NULL;
219
220 return 0;
221 }
222 }
223
224 (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
225 if (!pttype)
226 return -ENOPKG;
227
228 is_gpt = streq_ptr(pttype, "gpt");
229 is_mbr = streq_ptr(pttype, "dos");
230
231 if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr))
232 return -ENOPKG;
233
234 errno = 0;
235 pl = blkid_probe_get_partitions(b);
236 if (!pl)
237 return -errno ?: -ENOMEM;
238
239 udev = udev_new();
240 if (!udev)
241 return -errno;
242
243 d = udev_device_new_from_devnum(udev, 'b', st.st_rdev);
244 if (!d)
245 return -ENOMEM;
246
247 for (i = 0;; i++) {
248 int n, z;
249
250 if (i >= 10) {
251 log_debug("Kernel partitions never appeared.");
252 return -ENXIO;
253 }
254
255 e = udev_enumerate_new(udev);
256 if (!e)
257 return -errno;
258
259 r = udev_enumerate_add_match_parent(e, d);
260 if (r < 0)
261 return r;
262
263 r = udev_enumerate_scan_devices(e);
264 if (r < 0)
265 return r;
266
267 /* Count the partitions enumerated by the kernel */
268 n = 0;
269 first = udev_enumerate_get_list_entry(e);
270 udev_list_entry_foreach(item, first)
271 n++;
272
273 /* Count the partitions enumerated by blkid */
274 z = blkid_partlist_numof_partitions(pl);
275 if (n == z + 1)
276 break;
277 if (n > z + 1) {
278 log_debug("blkid and kernel partition list do not match.");
279 return -EIO;
280 }
281 if (n < z + 1) {
282 unsigned j = 0;
283
284 /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running
285 * or it got EBUSY because udev already opened the device. Let's reprobe the device, which is a
286 * synchronous call that waits until probing is complete. */
287
288 for (;;) {
289 if (j++ > 20)
290 return -EBUSY;
291
292 if (ioctl(fd, BLKRRPART, 0) < 0) {
293 r = -errno;
294
295 if (r == -EINVAL) {
296 struct loop_info64 info;
297
298 /* If we are running on a loop device that has partition scanning off,
299 * return an explicit recognizable error about this, so that callers
300 * can generate a proper message explaining the situation. */
301
302 if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0 && (info.lo_flags & LO_FLAGS_PARTSCAN) == 0) {
303 log_debug("Device is loop device and partition scanning is off!");
304 return -EPROTONOSUPPORT;
305 }
306 }
307 if (r != -EBUSY)
308 return r;
309 } else
310 break;
311
312 /* If something else has the device open, such as an udev rule, the ioctl will return
313 * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a
314 * bit, and try again.
315 *
316 * This is really something they should fix in the kernel! */
317
318 (void) usleep(50 * USEC_PER_MSEC);
319 }
320 }
321
322 e = udev_enumerate_unref(e);
323 }
324
325 first = udev_enumerate_get_list_entry(e);
326 udev_list_entry_foreach(item, first) {
327 _cleanup_udev_device_unref_ struct udev_device *q;
328 unsigned long long pflags;
329 blkid_partition pp;
330 const char *node, *sysname;
331 dev_t qn;
332 int nr;
333
334 q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
335 if (!q)
336 return -errno;
337
338 qn = udev_device_get_devnum(q);
339 if (major(qn) == 0)
340 continue;
341
342 if (st.st_rdev == qn)
343 continue;
344
345 /* Filter out weird MMC RPMB partitions, which cannot reasonably be read, see
346 * https://github.com/systemd/systemd/issues/5806 */
347 sysname = udev_device_get_sysname(q);
348 if (sysname && startswith(sysname, "mmcblk") && endswith(sysname, "rpmb"))
349 continue;
350
351 node = udev_device_get_devnode(q);
352 if (!node)
353 continue;
354
355 pp = blkid_partlist_devno_to_partition(pl, qn);
356 if (!pp)
357 continue;
358
359 pflags = blkid_partition_get_flags(pp);
360
361 nr = blkid_partition_get_partno(pp);
362 if (nr < 0)
363 continue;
364
365 if (is_gpt) {
366 int designator = _PARTITION_DESIGNATOR_INVALID, architecture = _ARCHITECTURE_INVALID;
367 const char *stype, *sid, *fstype = NULL;
368 sd_id128_t type_id, id;
369 bool rw = true;
370
371 sid = blkid_partition_get_uuid(pp);
372 if (!sid)
373 continue;
374 if (sd_id128_from_string(sid, &id) < 0)
375 continue;
376
377 stype = blkid_partition_get_type_string(pp);
378 if (!stype)
379 continue;
380 if (sd_id128_from_string(stype, &type_id) < 0)
381 continue;
382
383 if (sd_id128_equal(type_id, GPT_HOME)) {
384
385 if (pflags & GPT_FLAG_NO_AUTO)
386 continue;
387
388 designator = PARTITION_HOME;
389 rw = !(pflags & GPT_FLAG_READ_ONLY);
390 } else if (sd_id128_equal(type_id, GPT_SRV)) {
391
392 if (pflags & GPT_FLAG_NO_AUTO)
393 continue;
394
395 designator = PARTITION_SRV;
396 rw = !(pflags & GPT_FLAG_READ_ONLY);
397 } else if (sd_id128_equal(type_id, GPT_ESP)) {
398
399 /* Note that we don't check the GPT_FLAG_NO_AUTO flag for the ESP, as it is not defined
400 * there. We instead check the GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as recommended by the
401 * UEFI spec (See "12.3.3 Number and Location of System Partitions"). */
402
403 if (pflags & GPT_FLAG_NO_BLOCK_IO_PROTOCOL)
404 continue;
405
406 designator = PARTITION_ESP;
407 fstype = "vfat";
408 }
409 #ifdef GPT_ROOT_NATIVE
410 else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
411
412 if (pflags & GPT_FLAG_NO_AUTO)
413 continue;
414
415 /* If a root ID is specified, ignore everything but the root id */
416 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
417 continue;
418
419 designator = PARTITION_ROOT;
420 architecture = native_architecture();
421 rw = !(pflags & GPT_FLAG_READ_ONLY);
422 } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) {
423
424 if (pflags & GPT_FLAG_NO_AUTO)
425 continue;
426
427 m->can_verity = true;
428
429 /* Ignore verity unless a root hash is specified */
430 if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
431 continue;
432
433 designator = PARTITION_ROOT_VERITY;
434 fstype = "DM_verity_hash";
435 architecture = native_architecture();
436 rw = false;
437 }
438 #endif
439 #ifdef GPT_ROOT_SECONDARY
440 else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
441
442 if (pflags & GPT_FLAG_NO_AUTO)
443 continue;
444
445 /* If a root ID is specified, ignore everything but the root id */
446 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
447 continue;
448
449 designator = PARTITION_ROOT_SECONDARY;
450 architecture = SECONDARY_ARCHITECTURE;
451 rw = !(pflags & GPT_FLAG_READ_ONLY);
452 } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) {
453
454 if (pflags & GPT_FLAG_NO_AUTO)
455 continue;
456
457 m->can_verity = true;
458
459 /* Ignore verity unless root has is specified */
460 if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
461 continue;
462
463 designator = PARTITION_ROOT_SECONDARY_VERITY;
464 fstype = "DM_verity_hash";
465 architecture = SECONDARY_ARCHITECTURE;
466 rw = false;
467 }
468 #endif
469 else if (sd_id128_equal(type_id, GPT_SWAP)) {
470
471 if (pflags & GPT_FLAG_NO_AUTO)
472 continue;
473
474 designator = PARTITION_SWAP;
475 fstype = "swap";
476 } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) {
477
478 if (pflags & GPT_FLAG_NO_AUTO)
479 continue;
480
481 if (generic_node)
482 multiple_generic = true;
483 else {
484 generic_nr = nr;
485 generic_rw = !(pflags & GPT_FLAG_READ_ONLY);
486 generic_uuid = id;
487 generic_node = strdup(node);
488 if (!generic_node)
489 return -ENOMEM;
490 }
491 }
492
493 if (designator != _PARTITION_DESIGNATOR_INVALID) {
494 _cleanup_free_ char *t = NULL, *n = NULL;
495
496 /* First one wins */
497 if (m->partitions[designator].found)
498 continue;
499
500 if (fstype) {
501 t = strdup(fstype);
502 if (!t)
503 return -ENOMEM;
504 }
505
506 n = strdup(node);
507 if (!n)
508 return -ENOMEM;
509
510 m->partitions[designator] = (DissectedPartition) {
511 .found = true,
512 .partno = nr,
513 .rw = rw,
514 .architecture = architecture,
515 .node = n,
516 .fstype = t,
517 .uuid = id,
518 };
519
520 n = t = NULL;
521 }
522
523 } else if (is_mbr) {
524
525 if (pflags != 0x80) /* Bootable flag */
526 continue;
527
528 if (blkid_partition_get_type(pp) != 0x83) /* Linux partition */
529 continue;
530
531 if (generic_node)
532 multiple_generic = true;
533 else {
534 generic_nr = nr;
535 generic_rw = true;
536 generic_node = strdup(node);
537 if (!generic_node)
538 return -ENOMEM;
539 }
540 }
541 }
542
543 if (!m->partitions[PARTITION_ROOT].found) {
544 /* No root partition found? Then let's see if ther's one for the secondary architecture. And if not
545 * either, then check if there's a single generic one, and use that. */
546
547 if (m->partitions[PARTITION_ROOT_VERITY].found)
548 return -EADDRNOTAVAIL;
549
550 if (m->partitions[PARTITION_ROOT_SECONDARY].found) {
551 m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY];
552 zero(m->partitions[PARTITION_ROOT_SECONDARY]);
553
554 m->partitions[PARTITION_ROOT_VERITY] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY];
555 zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY]);
556
557 } else if (flags & DISSECT_IMAGE_REQUIRE_ROOT) {
558
559 /* If the root has was set, then we won't fallback to a generic node, because the root hash
560 * decides */
561 if (root_hash)
562 return -EADDRNOTAVAIL;
563
564 /* If we didn't find a generic node, then we can't fix this up either */
565 if (!generic_node)
566 return -ENXIO;
567
568 /* If we didn't find a properly marked root partition, but we did find a single suitable
569 * generic Linux partition, then use this as root partition, if the caller asked for it. */
570 if (multiple_generic)
571 return -ENOTUNIQ;
572
573 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
574 .found = true,
575 .rw = generic_rw,
576 .partno = generic_nr,
577 .architecture = _ARCHITECTURE_INVALID,
578 .node = generic_node,
579 .uuid = generic_uuid,
580 };
581
582 generic_node = NULL;
583 }
584 }
585
586 if (root_hash) {
587 if (!m->partitions[PARTITION_ROOT_VERITY].found || !m->partitions[PARTITION_ROOT].found)
588 return -EADDRNOTAVAIL;
589
590 /* If we found the primary root with the hash, then we definitely want to suppress any secondary root
591 * (which would be weird, after all the root hash should only be assigned to one pair of
592 * partitions... */
593 m->partitions[PARTITION_ROOT_SECONDARY].found = false;
594 m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found = false;
595
596 /* If we found a verity setup, then the root partition is necessarily read-only. */
597 m->partitions[PARTITION_ROOT].rw = false;
598
599 m->verity = true;
600 }
601
602 blkid_free_probe(b);
603 b = NULL;
604
605 /* Fill in file system types if we don't know them yet. */
606 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
607 DissectedPartition *p = m->partitions + i;
608
609 if (!p->found)
610 continue;
611
612 if (!p->fstype && p->node) {
613 r = probe_filesystem(p->node, &p->fstype);
614 if (r < 0)
615 return r;
616 }
617
618 if (streq_ptr(p->fstype, "crypto_LUKS"))
619 m->encrypted = true;
620
621 if (p->fstype && fstype_is_ro(p->fstype))
622 p->rw = false;
623 }
624
625 *ret = m;
626 m = NULL;
627
628 return 0;
629 #else
630 return -EOPNOTSUPP;
631 #endif
632 }
633
634 DissectedImage* dissected_image_unref(DissectedImage *m) {
635 unsigned i;
636
637 if (!m)
638 return NULL;
639
640 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
641 free(m->partitions[i].fstype);
642 free(m->partitions[i].node);
643 free(m->partitions[i].decrypted_fstype);
644 free(m->partitions[i].decrypted_node);
645 }
646
647 free(m->hostname);
648 strv_free(m->machine_info);
649 strv_free(m->os_release);
650
651 return mfree(m);
652 }
653
654 static int is_loop_device(const char *path) {
655 char s[strlen("/sys/dev/block/") + DECIMAL_STR_MAX(dev_t) + 1 + DECIMAL_STR_MAX(dev_t) + strlen("/../loop/")];
656 struct stat st;
657
658 assert(path);
659
660 if (stat(path, &st) < 0)
661 return -errno;
662
663 if (!S_ISBLK(st.st_mode))
664 return -ENOTBLK;
665
666 xsprintf(s, "/sys/dev/block/%u:%u/loop/", major(st.st_rdev), minor(st.st_rdev));
667 if (access(s, F_OK) < 0) {
668 if (errno != ENOENT)
669 return -errno;
670
671 /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */
672 xsprintf(s, "/sys/dev/block/%u:%u/../loop/", major(st.st_rdev), minor(st.st_rdev));
673 if (access(s, F_OK) < 0)
674 return errno == ENOENT ? false : -errno;
675 }
676
677 return true;
678 }
679
680 static int mount_partition(
681 DissectedPartition *m,
682 const char *where,
683 const char *directory,
684 DissectImageFlags flags) {
685
686 const char *p, *options = NULL, *node, *fstype;
687 _cleanup_free_ char *chased = NULL;
688 bool rw;
689 int r;
690
691 assert(m);
692 assert(where);
693
694 node = m->decrypted_node ?: m->node;
695 fstype = m->decrypted_fstype ?: m->fstype;
696
697 if (!m->found || !node || !fstype)
698 return 0;
699
700 /* Stacked encryption? Yuck */
701 if (streq_ptr(fstype, "crypto_LUKS"))
702 return -ELOOP;
703
704 rw = m->rw && !(flags & DISSECT_IMAGE_READ_ONLY);
705
706 if (directory) {
707 r = chase_symlinks(directory, where, CHASE_PREFIX_ROOT, &chased);
708 if (r < 0)
709 return r;
710
711 p = chased;
712 } else
713 p = where;
714
715 /* If requested, turn on discard support. */
716 if (fstype_can_discard(fstype) &&
717 ((flags & DISSECT_IMAGE_DISCARD) ||
718 ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node))))
719 options = "discard";
720
721 return mount_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
722 }
723
724 int dissected_image_mount(DissectedImage *m, const char *where, DissectImageFlags flags) {
725 int r;
726
727 assert(m);
728 assert(where);
729
730 if (!m->partitions[PARTITION_ROOT].found)
731 return -ENXIO;
732
733 r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, flags);
734 if (r < 0)
735 return r;
736
737 r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", flags);
738 if (r < 0)
739 return r;
740
741 r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", flags);
742 if (r < 0)
743 return r;
744
745 if (m->partitions[PARTITION_ESP].found) {
746 const char *mp;
747
748 /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */
749
750 FOREACH_STRING(mp, "/efi", "/boot") {
751 _cleanup_free_ char *p = NULL;
752
753 r = chase_symlinks(mp, where, CHASE_PREFIX_ROOT, &p);
754 if (r < 0)
755 continue;
756
757 r = dir_is_empty(p);
758 if (r > 0) {
759 r = mount_partition(m->partitions + PARTITION_ESP, where, mp, flags);
760 if (r < 0)
761 return r;
762 }
763 }
764 }
765
766 return 0;
767 }
768
769 #if HAVE_LIBCRYPTSETUP
770 typedef struct DecryptedPartition {
771 struct crypt_device *device;
772 char *name;
773 bool relinquished;
774 } DecryptedPartition;
775
776 struct DecryptedImage {
777 DecryptedPartition *decrypted;
778 size_t n_decrypted;
779 size_t n_allocated;
780 };
781 #endif
782
783 DecryptedImage* decrypted_image_unref(DecryptedImage* d) {
784 #if HAVE_LIBCRYPTSETUP
785 size_t i;
786 int r;
787
788 if (!d)
789 return NULL;
790
791 for (i = 0; i < d->n_decrypted; i++) {
792 DecryptedPartition *p = d->decrypted + i;
793
794 if (p->device && p->name && !p->relinquished) {
795 r = crypt_deactivate(p->device, p->name);
796 if (r < 0)
797 log_debug_errno(r, "Failed to deactivate encrypted partition %s", p->name);
798 }
799
800 if (p->device)
801 crypt_free(p->device);
802 free(p->name);
803 }
804
805 free(d);
806 #endif
807 return NULL;
808 }
809
810 #if HAVE_LIBCRYPTSETUP
811
812 static int make_dm_name_and_node(const void *original_node, const char *suffix, char **ret_name, char **ret_node) {
813 _cleanup_free_ char *name = NULL, *node = NULL;
814 const char *base;
815
816 assert(original_node);
817 assert(suffix);
818 assert(ret_name);
819 assert(ret_node);
820
821 base = strrchr(original_node, '/');
822 if (!base)
823 return -EINVAL;
824 base++;
825 if (isempty(base))
826 return -EINVAL;
827
828 name = strjoin(base, suffix);
829 if (!name)
830 return -ENOMEM;
831 if (!filename_is_valid(name))
832 return -EINVAL;
833
834 node = strjoin(crypt_get_dir(), "/", name);
835 if (!node)
836 return -ENOMEM;
837
838 *ret_name = name;
839 *ret_node = node;
840
841 name = node = NULL;
842 return 0;
843 }
844
845 static int decrypt_partition(
846 DissectedPartition *m,
847 const char *passphrase,
848 DissectImageFlags flags,
849 DecryptedImage *d) {
850
851 _cleanup_free_ char *node = NULL, *name = NULL;
852 struct crypt_device *cd;
853 int r;
854
855 assert(m);
856 assert(d);
857
858 if (!m->found || !m->node || !m->fstype)
859 return 0;
860
861 if (!streq(m->fstype, "crypto_LUKS"))
862 return 0;
863
864 r = make_dm_name_and_node(m->node, "-decrypted", &name, &node);
865 if (r < 0)
866 return r;
867
868 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
869 return -ENOMEM;
870
871 r = crypt_init(&cd, m->node);
872 if (r < 0)
873 return log_debug_errno(r, "Failed to initialize dm-crypt: %m");
874
875 r = crypt_load(cd, CRYPT_LUKS, NULL);
876 if (r < 0) {
877 log_debug_errno(r, "Failed to load LUKS metadata: %m");
878 goto fail;
879 }
880
881 r = crypt_activate_by_passphrase(cd, name, CRYPT_ANY_SLOT, passphrase, strlen(passphrase),
882 ((flags & DISSECT_IMAGE_READ_ONLY) ? CRYPT_ACTIVATE_READONLY : 0) |
883 ((flags & DISSECT_IMAGE_DISCARD_ON_CRYPTO) ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0));
884 if (r < 0)
885 log_debug_errno(r, "Failed to activate LUKS device: %m");
886 if (r == -EPERM) {
887 r = -EKEYREJECTED;
888 goto fail;
889 }
890 if (r < 0)
891 goto fail;
892
893 d->decrypted[d->n_decrypted].name = name;
894 name = NULL;
895
896 d->decrypted[d->n_decrypted].device = cd;
897 d->n_decrypted++;
898
899 m->decrypted_node = node;
900 node = NULL;
901
902 return 0;
903
904 fail:
905 crypt_free(cd);
906 return r;
907 }
908
909 static int verity_partition(
910 DissectedPartition *m,
911 DissectedPartition *v,
912 const void *root_hash,
913 size_t root_hash_size,
914 DissectImageFlags flags,
915 DecryptedImage *d) {
916
917 _cleanup_free_ char *node = NULL, *name = NULL;
918 struct crypt_device *cd;
919 int r;
920
921 assert(m);
922 assert(v);
923
924 if (!root_hash)
925 return 0;
926
927 if (!m->found || !m->node || !m->fstype)
928 return 0;
929 if (!v->found || !v->node || !v->fstype)
930 return 0;
931
932 if (!streq(v->fstype, "DM_verity_hash"))
933 return 0;
934
935 r = make_dm_name_and_node(m->node, "-verity", &name, &node);
936 if (r < 0)
937 return r;
938
939 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
940 return -ENOMEM;
941
942 r = crypt_init(&cd, v->node);
943 if (r < 0)
944 return r;
945
946 r = crypt_load(cd, CRYPT_VERITY, NULL);
947 if (r < 0)
948 goto fail;
949
950 r = crypt_set_data_device(cd, m->node);
951 if (r < 0)
952 goto fail;
953
954 r = crypt_activate_by_volume_key(cd, name, root_hash, root_hash_size, CRYPT_ACTIVATE_READONLY);
955 if (r < 0)
956 goto fail;
957
958 d->decrypted[d->n_decrypted].name = name;
959 name = NULL;
960
961 d->decrypted[d->n_decrypted].device = cd;
962 d->n_decrypted++;
963
964 m->decrypted_node = node;
965 node = NULL;
966
967 return 0;
968
969 fail:
970 crypt_free(cd);
971 return r;
972 }
973 #endif
974
975 int dissected_image_decrypt(
976 DissectedImage *m,
977 const char *passphrase,
978 const void *root_hash,
979 size_t root_hash_size,
980 DissectImageFlags flags,
981 DecryptedImage **ret) {
982
983 _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL;
984 #if HAVE_LIBCRYPTSETUP
985 unsigned i;
986 int r;
987 #endif
988
989 assert(m);
990 assert(root_hash || root_hash_size == 0);
991
992 /* Returns:
993 *
994 * = 0 → There was nothing to decrypt
995 * > 0 → Decrypted successfully
996 * -ENOKEY → There's something to decrypt but no key was supplied
997 * -EKEYREJECTED → Passed key was not correct
998 */
999
1000 if (root_hash && root_hash_size < sizeof(sd_id128_t))
1001 return -EINVAL;
1002
1003 if (!m->encrypted && !m->verity) {
1004 *ret = NULL;
1005 return 0;
1006 }
1007
1008 #if HAVE_LIBCRYPTSETUP
1009 if (m->encrypted && !passphrase)
1010 return -ENOKEY;
1011
1012 d = new0(DecryptedImage, 1);
1013 if (!d)
1014 return -ENOMEM;
1015
1016 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
1017 DissectedPartition *p = m->partitions + i;
1018 int k;
1019
1020 if (!p->found)
1021 continue;
1022
1023 r = decrypt_partition(p, passphrase, flags, d);
1024 if (r < 0)
1025 return r;
1026
1027 k = PARTITION_VERITY_OF(i);
1028 if (k >= 0) {
1029 r = verity_partition(p, m->partitions + k, root_hash, root_hash_size, flags, d);
1030 if (r < 0)
1031 return r;
1032 }
1033
1034 if (!p->decrypted_fstype && p->decrypted_node) {
1035 r = probe_filesystem(p->decrypted_node, &p->decrypted_fstype);
1036 if (r < 0)
1037 return r;
1038 }
1039 }
1040
1041 *ret = d;
1042 d = NULL;
1043
1044 return 1;
1045 #else
1046 return -EOPNOTSUPP;
1047 #endif
1048 }
1049
1050 int dissected_image_decrypt_interactively(
1051 DissectedImage *m,
1052 const char *passphrase,
1053 const void *root_hash,
1054 size_t root_hash_size,
1055 DissectImageFlags flags,
1056 DecryptedImage **ret) {
1057
1058 _cleanup_strv_free_erase_ char **z = NULL;
1059 int n = 3, r;
1060
1061 if (passphrase)
1062 n--;
1063
1064 for (;;) {
1065 r = dissected_image_decrypt(m, passphrase, root_hash, root_hash_size, flags, ret);
1066 if (r >= 0)
1067 return r;
1068 if (r == -EKEYREJECTED)
1069 log_error_errno(r, "Incorrect passphrase, try again!");
1070 else if (r != -ENOKEY) {
1071 log_error_errno(r, "Failed to decrypt image: %m");
1072 return r;
1073 }
1074
1075 if (--n < 0) {
1076 log_error("Too many retries.");
1077 return -EKEYREJECTED;
1078 }
1079
1080 z = strv_free(z);
1081
1082 r = ask_password_auto("Please enter image passphrase!", NULL, "dissect", "dissect", USEC_INFINITY, 0, &z);
1083 if (r < 0)
1084 return log_error_errno(r, "Failed to query for passphrase: %m");
1085
1086 passphrase = z[0];
1087 }
1088 }
1089
1090 #if HAVE_LIBCRYPTSETUP
1091 static int deferred_remove(DecryptedPartition *p) {
1092
1093 struct dm_ioctl dm = {
1094 .version = {
1095 DM_VERSION_MAJOR,
1096 DM_VERSION_MINOR,
1097 DM_VERSION_PATCHLEVEL
1098 },
1099 .data_size = sizeof(dm),
1100 .flags = DM_DEFERRED_REMOVE,
1101 };
1102
1103 _cleanup_close_ int fd = -1;
1104
1105 assert(p);
1106
1107 /* Unfortunately, libcryptsetup doesn't provide a proper API for this, hence call the ioctl() directly. */
1108
1109 fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC);
1110 if (fd < 0)
1111 return -errno;
1112
1113 strncpy(dm.name, p->name, sizeof(dm.name));
1114
1115 if (ioctl(fd, DM_DEV_REMOVE, &dm))
1116 return -errno;
1117
1118 return 0;
1119 }
1120 #endif
1121
1122 int decrypted_image_relinquish(DecryptedImage *d) {
1123
1124 #if HAVE_LIBCRYPTSETUP
1125 size_t i;
1126 int r;
1127 #endif
1128
1129 assert(d);
1130
1131 /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a boolean so
1132 * that we don't clean it up ourselves either anymore */
1133
1134 #if HAVE_LIBCRYPTSETUP
1135 for (i = 0; i < d->n_decrypted; i++) {
1136 DecryptedPartition *p = d->decrypted + i;
1137
1138 if (p->relinquished)
1139 continue;
1140
1141 r = deferred_remove(p);
1142 if (r < 0)
1143 return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name);
1144
1145 p->relinquished = true;
1146 }
1147 #endif
1148
1149 return 0;
1150 }
1151
1152 int root_hash_load(const char *image, void **ret, size_t *ret_size) {
1153 _cleanup_free_ char *text = NULL;
1154 _cleanup_free_ void *k = NULL;
1155 size_t l;
1156 int r;
1157
1158 assert(image);
1159 assert(ret);
1160 assert(ret_size);
1161
1162 if (is_device_path(image)) {
1163 /* If we are asked to load the root hash for a device node, exit early */
1164 *ret = NULL;
1165 *ret_size = 0;
1166 return 0;
1167 }
1168
1169 r = getxattr_malloc(image, "user.verity.roothash", &text, true);
1170 if (r < 0) {
1171 char *fn, *e, *n;
1172
1173 if (!IN_SET(r, -ENODATA, -EOPNOTSUPP, -ENOENT))
1174 return r;
1175
1176 fn = newa(char, strlen(image) + strlen(".roothash") + 1);
1177 n = stpcpy(fn, image);
1178 e = endswith(fn, ".raw");
1179 if (e)
1180 n = e;
1181
1182 strcpy(n, ".roothash");
1183
1184 r = read_one_line_file(fn, &text);
1185 if (r == -ENOENT) {
1186 *ret = NULL;
1187 *ret_size = 0;
1188 return 0;
1189 }
1190 if (r < 0)
1191 return r;
1192 }
1193
1194 r = unhexmem(text, strlen(text), &k, &l);
1195 if (r < 0)
1196 return r;
1197 if (l < sizeof(sd_id128_t))
1198 return -EINVAL;
1199
1200 *ret = k;
1201 *ret_size = l;
1202
1203 k = NULL;
1204
1205 return 1;
1206 }
1207
1208 int dissected_image_acquire_metadata(DissectedImage *m) {
1209
1210 enum {
1211 META_HOSTNAME,
1212 META_MACHINE_ID,
1213 META_MACHINE_INFO,
1214 META_OS_RELEASE,
1215 _META_MAX,
1216 };
1217
1218 static const char *const paths[_META_MAX] = {
1219 [META_HOSTNAME] = "/etc/hostname\0",
1220 [META_MACHINE_ID] = "/etc/machine-id\0",
1221 [META_MACHINE_INFO] = "/etc/machine-info\0",
1222 [META_OS_RELEASE] = "/etc/os-release\0/usr/lib/os-release\0",
1223 };
1224
1225 _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
1226 _cleanup_(rmdir_and_freep) char *t = NULL;
1227 _cleanup_(sigkill_waitp) pid_t child = 0;
1228 sd_id128_t machine_id = SD_ID128_NULL;
1229 _cleanup_free_ char *hostname = NULL;
1230 unsigned n_meta_initialized = 0, k;
1231 int fds[2 * _META_MAX], r;
1232 siginfo_t si;
1233
1234 BLOCK_SIGNALS(SIGCHLD);
1235
1236 assert(m);
1237
1238 for (; n_meta_initialized < _META_MAX; n_meta_initialized ++)
1239 if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) {
1240 r = -errno;
1241 goto finish;
1242 }
1243
1244 r = mkdtemp_malloc("/tmp/dissect-XXXXXX", &t);
1245 if (r < 0)
1246 goto finish;
1247
1248 child = raw_clone(SIGCHLD|CLONE_NEWNS);
1249 if (child < 0) {
1250 r = -errno;
1251 goto finish;
1252 }
1253
1254 if (child == 0) {
1255
1256 (void) reset_all_signal_handlers();
1257 (void) reset_signal_mask();
1258 assert_se(prctl(PR_SET_PDEATHSIG, SIGTERM) == 0);
1259
1260 /* Make sure we never propagate to the host */
1261 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
1262 _exit(EXIT_FAILURE);
1263
1264 r = dissected_image_mount(m, t, DISSECT_IMAGE_READ_ONLY);
1265 if (r < 0)
1266 _exit(EXIT_FAILURE);
1267
1268 for (k = 0; k < _META_MAX; k++) {
1269 _cleanup_close_ int fd = -1;
1270 const char *p;
1271
1272 fds[2*k] = safe_close(fds[2*k]);
1273
1274 NULSTR_FOREACH(p, paths[k]) {
1275 _cleanup_free_ char *q = NULL;
1276
1277 r = chase_symlinks(p, t, CHASE_PREFIX_ROOT, &q);
1278 if (r < 0)
1279 continue;
1280
1281 fd = open(q, O_RDONLY|O_CLOEXEC|O_NOCTTY);
1282 if (fd >= 0)
1283 break;
1284 }
1285 if (fd < 0)
1286 continue;
1287
1288 r = copy_bytes(fd, fds[2*k+1], (uint64_t) -1, 0);
1289 if (r < 0)
1290 _exit(EXIT_FAILURE);
1291
1292 fds[2*k+1] = safe_close(fds[2*k+1]);
1293 }
1294
1295 _exit(EXIT_SUCCESS);
1296 }
1297
1298 for (k = 0; k < _META_MAX; k++) {
1299 _cleanup_fclose_ FILE *f = NULL;
1300
1301 fds[2*k+1] = safe_close(fds[2*k+1]);
1302
1303 f = fdopen(fds[2*k], "re");
1304 if (!f) {
1305 r = -errno;
1306 goto finish;
1307 }
1308
1309 fds[2*k] = -1;
1310
1311 switch (k) {
1312
1313 case META_HOSTNAME:
1314 r = read_etc_hostname_stream(f, &hostname);
1315 if (r < 0)
1316 log_debug_errno(r, "Failed to read /etc/hostname: %m");
1317
1318 break;
1319
1320 case META_MACHINE_ID: {
1321 _cleanup_free_ char *line = NULL;
1322
1323 r = read_line(f, LONG_LINE_MAX, &line);
1324 if (r < 0)
1325 log_debug_errno(r, "Failed to read /etc/machine-id: %m");
1326 else if (r == 33) {
1327 r = sd_id128_from_string(line, &machine_id);
1328 if (r < 0)
1329 log_debug_errno(r, "Image contains invalid /etc/machine-id: %s", line);
1330 } else if (r == 0)
1331 log_debug("/etc/machine-id file is empty.");
1332 else
1333 log_debug("/etc/machine-id has unexpected length %i.", r);
1334
1335 break;
1336 }
1337
1338 case META_MACHINE_INFO:
1339 r = load_env_file_pairs(f, "machine-info", NULL, &machine_info);
1340 if (r < 0)
1341 log_debug_errno(r, "Failed to read /etc/machine-info: %m");
1342
1343 break;
1344
1345 case META_OS_RELEASE:
1346 r = load_env_file_pairs(f, "os-release", NULL, &os_release);
1347 if (r < 0)
1348 log_debug_errno(r, "Failed to read OS release file: %m");
1349
1350 break;
1351 }
1352 }
1353
1354 r = wait_for_terminate(child, &si);
1355 if (r < 0)
1356 goto finish;
1357 child = 0;
1358
1359 if (si.si_code != CLD_EXITED || si.si_status != EXIT_SUCCESS) {
1360 r = -EPROTO;
1361 goto finish;
1362 }
1363
1364 free_and_replace(m->hostname, hostname);
1365 m->machine_id = machine_id;
1366 strv_free_and_replace(m->machine_info, machine_info);
1367 strv_free_and_replace(m->os_release, os_release);
1368
1369 finish:
1370 for (k = 0; k < n_meta_initialized; k++)
1371 safe_close_pair(fds + 2*k);
1372
1373 return r;
1374 }
1375
1376 static const char *const partition_designator_table[] = {
1377 [PARTITION_ROOT] = "root",
1378 [PARTITION_ROOT_SECONDARY] = "root-secondary",
1379 [PARTITION_HOME] = "home",
1380 [PARTITION_SRV] = "srv",
1381 [PARTITION_ESP] = "esp",
1382 [PARTITION_SWAP] = "swap",
1383 [PARTITION_ROOT_VERITY] = "root-verity",
1384 [PARTITION_ROOT_SECONDARY_VERITY] = "root-secondary-verity",
1385 };
1386
1387 DEFINE_STRING_TABLE_LOOKUP(partition_designator, int);