]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/dissect-image.c
Define CRYPT_LUKS in crypt-util.h
[thirdparty/systemd.git] / src / shared / dissect-image.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2016 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <sys/mount.h>
22 #include <sys/prctl.h>
23 #include <sys/wait.h>
24
25 #include "architecture.h"
26 #include "ask-password-api.h"
27 #include "blkid-util.h"
28 #include "copy.h"
29 #include "crypt-util.h"
30 #include "def.h"
31 #include "device-nodes.h"
32 #include "dissect-image.h"
33 #include "fd-util.h"
34 #include "fileio.h"
35 #include "fs-util.h"
36 #include "gpt.h"
37 #include "hexdecoct.h"
38 #include "hostname-util.h"
39 #include "id128-util.h"
40 #include "linux-3.13/dm-ioctl.h"
41 #include "mount-util.h"
42 #include "path-util.h"
43 #include "process-util.h"
44 #include "raw-clone.h"
45 #include "signal-util.h"
46 #include "stat-util.h"
47 #include "stdio-util.h"
48 #include "string-table.h"
49 #include "string-util.h"
50 #include "strv.h"
51 #include "udev-util.h"
52 #include "xattr-util.h"
53
54 _unused_ static int probe_filesystem(const char *node, char **ret_fstype) {
55 #if HAVE_BLKID
56 _cleanup_blkid_free_probe_ blkid_probe b = NULL;
57 const char *fstype;
58 int r;
59
60 b = blkid_new_probe_from_filename(node);
61 if (!b)
62 return -ENOMEM;
63
64 blkid_probe_enable_superblocks(b, 1);
65 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
66
67 errno = 0;
68 r = blkid_do_safeprobe(b);
69 if (IN_SET(r, -2, 1)) {
70 log_debug("Failed to identify any partition type on partition %s", node);
71 goto not_found;
72 }
73 if (r != 0)
74 return -errno ?: -EIO;
75
76 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
77
78 if (fstype) {
79 char *t;
80
81 t = strdup(fstype);
82 if (!t)
83 return -ENOMEM;
84
85 *ret_fstype = t;
86 return 1;
87 }
88
89 not_found:
90 *ret_fstype = NULL;
91 return 0;
92 #else
93 return -EOPNOTSUPP;
94 #endif
95 }
96
97 int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DissectedImage **ret) {
98
99 #if HAVE_BLKID
100 sd_id128_t root_uuid = SD_ID128_NULL, verity_uuid = SD_ID128_NULL;
101 _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL;
102 bool is_gpt, is_mbr, generic_rw, multiple_generic = false;
103 _cleanup_udev_device_unref_ struct udev_device *d = NULL;
104 _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
105 _cleanup_blkid_free_probe_ blkid_probe b = NULL;
106 _cleanup_udev_unref_ struct udev *udev = NULL;
107 _cleanup_free_ char *generic_node = NULL;
108 sd_id128_t generic_uuid = SD_ID128_NULL;
109 const char *pttype = NULL;
110 struct udev_list_entry *first, *item;
111 blkid_partlist pl;
112 int r, generic_nr;
113 struct stat st;
114 unsigned i;
115
116 assert(fd >= 0);
117 assert(ret);
118 assert(root_hash || root_hash_size == 0);
119
120 /* Probes a disk image, and returns information about what it found in *ret.
121 *
122 * Returns -ENOPKG if no suitable partition table or file system could be found.
123 * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found. */
124
125 if (root_hash) {
126 /* If a root hash is supplied, then we use the root partition that has a UUID that match the first
127 * 128bit of the root hash. And we use the verity partition that has a UUID that match the final
128 * 128bit. */
129
130 if (root_hash_size < sizeof(sd_id128_t))
131 return -EINVAL;
132
133 memcpy(&root_uuid, root_hash, sizeof(sd_id128_t));
134 memcpy(&verity_uuid, (const uint8_t*) root_hash + root_hash_size - sizeof(sd_id128_t), sizeof(sd_id128_t));
135
136 if (sd_id128_is_null(root_uuid))
137 return -EINVAL;
138 if (sd_id128_is_null(verity_uuid))
139 return -EINVAL;
140 }
141
142 if (fstat(fd, &st) < 0)
143 return -errno;
144
145 if (!S_ISBLK(st.st_mode))
146 return -ENOTBLK;
147
148 b = blkid_new_probe();
149 if (!b)
150 return -ENOMEM;
151
152 errno = 0;
153 r = blkid_probe_set_device(b, fd, 0, 0);
154 if (r != 0)
155 return -errno ?: -ENOMEM;
156
157 if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) {
158 /* Look for file system superblocks, unless we only shall look for GPT partition tables */
159 blkid_probe_enable_superblocks(b, 1);
160 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE);
161 }
162
163 blkid_probe_enable_partitions(b, 1);
164 blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
165
166 errno = 0;
167 r = blkid_do_safeprobe(b);
168 if (IN_SET(r, -2, 1)) {
169 log_debug("Failed to identify any partition table.");
170 return -ENOPKG;
171 }
172 if (r != 0)
173 return -errno ?: -EIO;
174
175 m = new0(DissectedImage, 1);
176 if (!m)
177 return -ENOMEM;
178
179 if (!(flags & DISSECT_IMAGE_GPT_ONLY) &&
180 (flags & DISSECT_IMAGE_REQUIRE_ROOT)) {
181 const char *usage = NULL;
182
183 (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL);
184 if (STRPTR_IN_SET(usage, "filesystem", "crypto")) {
185 _cleanup_free_ char *t = NULL, *n = NULL;
186 const char *fstype = NULL;
187
188 /* OK, we have found a file system, that's our root partition then. */
189 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
190
191 if (fstype) {
192 t = strdup(fstype);
193 if (!t)
194 return -ENOMEM;
195 }
196
197 if (asprintf(&n, "/dev/block/%u:%u", major(st.st_rdev), minor(st.st_rdev)) < 0)
198 return -ENOMEM;
199
200 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
201 .found = true,
202 .rw = true,
203 .partno = -1,
204 .architecture = _ARCHITECTURE_INVALID,
205 .fstype = t,
206 .node = n,
207 };
208
209 t = n = NULL;
210
211 m->encrypted = streq(fstype, "crypto_LUKS");
212
213 *ret = m;
214 m = NULL;
215
216 return 0;
217 }
218 }
219
220 (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
221 if (!pttype)
222 return -ENOPKG;
223
224 is_gpt = streq_ptr(pttype, "gpt");
225 is_mbr = streq_ptr(pttype, "dos");
226
227 if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr))
228 return -ENOPKG;
229
230 errno = 0;
231 pl = blkid_probe_get_partitions(b);
232 if (!pl)
233 return -errno ?: -ENOMEM;
234
235 udev = udev_new();
236 if (!udev)
237 return -errno;
238
239 d = udev_device_new_from_devnum(udev, 'b', st.st_rdev);
240 if (!d)
241 return -ENOMEM;
242
243 for (i = 0;; i++) {
244 int n, z;
245
246 if (i >= 10) {
247 log_debug("Kernel partitions never appeared.");
248 return -ENXIO;
249 }
250
251 e = udev_enumerate_new(udev);
252 if (!e)
253 return -errno;
254
255 r = udev_enumerate_add_match_parent(e, d);
256 if (r < 0)
257 return r;
258
259 r = udev_enumerate_scan_devices(e);
260 if (r < 0)
261 return r;
262
263 /* Count the partitions enumerated by the kernel */
264 n = 0;
265 first = udev_enumerate_get_list_entry(e);
266 udev_list_entry_foreach(item, first)
267 n++;
268
269 /* Count the partitions enumerated by blkid */
270 z = blkid_partlist_numof_partitions(pl);
271 if (n == z + 1)
272 break;
273 if (n > z + 1) {
274 log_debug("blkid and kernel partition list do not match.");
275 return -EIO;
276 }
277 if (n < z + 1) {
278 unsigned j = 0;
279
280 /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running
281 * or it got EBUSY because udev already opened the device. Let's reprobe the device, which is a
282 * synchronous call that waits until probing is complete. */
283
284 for (;;) {
285 if (j++ > 20)
286 return -EBUSY;
287
288 if (ioctl(fd, BLKRRPART, 0) < 0) {
289 r = -errno;
290
291 if (r == -EINVAL) {
292 struct loop_info64 info;
293
294 /* If we are running on a loop device that has partition scanning off,
295 * return an explicit recognizable error about this, so that callers
296 * can generate a proper message explaining the situation. */
297
298 if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0 && (info.lo_flags & LO_FLAGS_PARTSCAN) == 0) {
299 log_debug("Device is loop device and partition scanning is off!");
300 return -EPROTONOSUPPORT;
301 }
302 }
303 if (r != -EBUSY)
304 return r;
305 } else
306 break;
307
308 /* If something else has the device open, such as an udev rule, the ioctl will return
309 * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a
310 * bit, and try again.
311 *
312 * This is really something they should fix in the kernel! */
313
314 (void) usleep(50 * USEC_PER_MSEC);
315 }
316 }
317
318 e = udev_enumerate_unref(e);
319 }
320
321 first = udev_enumerate_get_list_entry(e);
322 udev_list_entry_foreach(item, first) {
323 _cleanup_udev_device_unref_ struct udev_device *q;
324 unsigned long long pflags;
325 blkid_partition pp;
326 const char *node, *sysname;
327 dev_t qn;
328 int nr;
329
330 q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
331 if (!q)
332 return -errno;
333
334 qn = udev_device_get_devnum(q);
335 if (major(qn) == 0)
336 continue;
337
338 if (st.st_rdev == qn)
339 continue;
340
341 /* Filter out weird MMC RPMB partitions, which cannot reasonably be read, see
342 * https://github.com/systemd/systemd/issues/5806 */
343 sysname = udev_device_get_sysname(q);
344 if (sysname && startswith(sysname, "mmcblk") && endswith(sysname, "rpmb"))
345 continue;
346
347 node = udev_device_get_devnode(q);
348 if (!node)
349 continue;
350
351 pp = blkid_partlist_devno_to_partition(pl, qn);
352 if (!pp)
353 continue;
354
355 pflags = blkid_partition_get_flags(pp);
356
357 nr = blkid_partition_get_partno(pp);
358 if (nr < 0)
359 continue;
360
361 if (is_gpt) {
362 int designator = _PARTITION_DESIGNATOR_INVALID, architecture = _ARCHITECTURE_INVALID;
363 const char *stype, *sid, *fstype = NULL;
364 sd_id128_t type_id, id;
365 bool rw = true;
366
367 sid = blkid_partition_get_uuid(pp);
368 if (!sid)
369 continue;
370 if (sd_id128_from_string(sid, &id) < 0)
371 continue;
372
373 stype = blkid_partition_get_type_string(pp);
374 if (!stype)
375 continue;
376 if (sd_id128_from_string(stype, &type_id) < 0)
377 continue;
378
379 if (sd_id128_equal(type_id, GPT_HOME)) {
380
381 if (pflags & GPT_FLAG_NO_AUTO)
382 continue;
383
384 designator = PARTITION_HOME;
385 rw = !(pflags & GPT_FLAG_READ_ONLY);
386 } else if (sd_id128_equal(type_id, GPT_SRV)) {
387
388 if (pflags & GPT_FLAG_NO_AUTO)
389 continue;
390
391 designator = PARTITION_SRV;
392 rw = !(pflags & GPT_FLAG_READ_ONLY);
393 } else if (sd_id128_equal(type_id, GPT_ESP)) {
394
395 /* Note that we don't check the GPT_FLAG_NO_AUTO flag for the ESP, as it is not defined
396 * there. We instead check the GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as recommended by the
397 * UEFI spec (See "12.3.3 Number and Location of System Partitions"). */
398
399 if (pflags & GPT_FLAG_NO_BLOCK_IO_PROTOCOL)
400 continue;
401
402 designator = PARTITION_ESP;
403 fstype = "vfat";
404 }
405 #ifdef GPT_ROOT_NATIVE
406 else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
407
408 if (pflags & GPT_FLAG_NO_AUTO)
409 continue;
410
411 /* If a root ID is specified, ignore everything but the root id */
412 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
413 continue;
414
415 designator = PARTITION_ROOT;
416 architecture = native_architecture();
417 rw = !(pflags & GPT_FLAG_READ_ONLY);
418 } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) {
419
420 if (pflags & GPT_FLAG_NO_AUTO)
421 continue;
422
423 m->can_verity = true;
424
425 /* Ignore verity unless a root hash is specified */
426 if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
427 continue;
428
429 designator = PARTITION_ROOT_VERITY;
430 fstype = "DM_verity_hash";
431 architecture = native_architecture();
432 rw = false;
433 }
434 #endif
435 #ifdef GPT_ROOT_SECONDARY
436 else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
437
438 if (pflags & GPT_FLAG_NO_AUTO)
439 continue;
440
441 /* If a root ID is specified, ignore everything but the root id */
442 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
443 continue;
444
445 designator = PARTITION_ROOT_SECONDARY;
446 architecture = SECONDARY_ARCHITECTURE;
447 rw = !(pflags & GPT_FLAG_READ_ONLY);
448 } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) {
449
450 if (pflags & GPT_FLAG_NO_AUTO)
451 continue;
452
453 m->can_verity = true;
454
455 /* Ignore verity unless root has is specified */
456 if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
457 continue;
458
459 designator = PARTITION_ROOT_SECONDARY_VERITY;
460 fstype = "DM_verity_hash";
461 architecture = SECONDARY_ARCHITECTURE;
462 rw = false;
463 }
464 #endif
465 else if (sd_id128_equal(type_id, GPT_SWAP)) {
466
467 if (pflags & GPT_FLAG_NO_AUTO)
468 continue;
469
470 designator = PARTITION_SWAP;
471 fstype = "swap";
472 } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) {
473
474 if (pflags & GPT_FLAG_NO_AUTO)
475 continue;
476
477 if (generic_node)
478 multiple_generic = true;
479 else {
480 generic_nr = nr;
481 generic_rw = !(pflags & GPT_FLAG_READ_ONLY);
482 generic_uuid = id;
483 generic_node = strdup(node);
484 if (!generic_node)
485 return -ENOMEM;
486 }
487 }
488
489 if (designator != _PARTITION_DESIGNATOR_INVALID) {
490 _cleanup_free_ char *t = NULL, *n = NULL;
491
492 /* First one wins */
493 if (m->partitions[designator].found)
494 continue;
495
496 if (fstype) {
497 t = strdup(fstype);
498 if (!t)
499 return -ENOMEM;
500 }
501
502 n = strdup(node);
503 if (!n)
504 return -ENOMEM;
505
506 m->partitions[designator] = (DissectedPartition) {
507 .found = true,
508 .partno = nr,
509 .rw = rw,
510 .architecture = architecture,
511 .node = n,
512 .fstype = t,
513 .uuid = id,
514 };
515
516 n = t = NULL;
517 }
518
519 } else if (is_mbr) {
520
521 if (pflags != 0x80) /* Bootable flag */
522 continue;
523
524 if (blkid_partition_get_type(pp) != 0x83) /* Linux partition */
525 continue;
526
527 if (generic_node)
528 multiple_generic = true;
529 else {
530 generic_nr = nr;
531 generic_rw = true;
532 generic_node = strdup(node);
533 if (!generic_node)
534 return -ENOMEM;
535 }
536 }
537 }
538
539 if (!m->partitions[PARTITION_ROOT].found) {
540 /* No root partition found? Then let's see if ther's one for the secondary architecture. And if not
541 * either, then check if there's a single generic one, and use that. */
542
543 if (m->partitions[PARTITION_ROOT_VERITY].found)
544 return -EADDRNOTAVAIL;
545
546 if (m->partitions[PARTITION_ROOT_SECONDARY].found) {
547 m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY];
548 zero(m->partitions[PARTITION_ROOT_SECONDARY]);
549
550 m->partitions[PARTITION_ROOT_VERITY] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY];
551 zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY]);
552
553 } else if (flags & DISSECT_IMAGE_REQUIRE_ROOT) {
554
555 /* If the root has was set, then we won't fallback to a generic node, because the root hash
556 * decides */
557 if (root_hash)
558 return -EADDRNOTAVAIL;
559
560 /* If we didn't find a generic node, then we can't fix this up either */
561 if (!generic_node)
562 return -ENXIO;
563
564 /* If we didn't find a properly marked root partition, but we did find a single suitable
565 * generic Linux partition, then use this as root partition, if the caller asked for it. */
566 if (multiple_generic)
567 return -ENOTUNIQ;
568
569 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
570 .found = true,
571 .rw = generic_rw,
572 .partno = generic_nr,
573 .architecture = _ARCHITECTURE_INVALID,
574 .node = generic_node,
575 .uuid = generic_uuid,
576 };
577
578 generic_node = NULL;
579 }
580 }
581
582 if (root_hash) {
583 if (!m->partitions[PARTITION_ROOT_VERITY].found || !m->partitions[PARTITION_ROOT].found)
584 return -EADDRNOTAVAIL;
585
586 /* If we found the primary root with the hash, then we definitely want to suppress any secondary root
587 * (which would be weird, after all the root hash should only be assigned to one pair of
588 * partitions... */
589 m->partitions[PARTITION_ROOT_SECONDARY].found = false;
590 m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found = false;
591
592 /* If we found a verity setup, then the root partition is necessarily read-only. */
593 m->partitions[PARTITION_ROOT].rw = false;
594
595 m->verity = true;
596 }
597
598 blkid_free_probe(b);
599 b = NULL;
600
601 /* Fill in file system types if we don't know them yet. */
602 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
603 DissectedPartition *p = m->partitions + i;
604
605 if (!p->found)
606 continue;
607
608 if (!p->fstype && p->node) {
609 r = probe_filesystem(p->node, &p->fstype);
610 if (r < 0)
611 return r;
612 }
613
614 if (streq_ptr(p->fstype, "crypto_LUKS"))
615 m->encrypted = true;
616
617 if (p->fstype && fstype_is_ro(p->fstype))
618 p->rw = false;
619 }
620
621 *ret = m;
622 m = NULL;
623
624 return 0;
625 #else
626 return -EOPNOTSUPP;
627 #endif
628 }
629
630 DissectedImage* dissected_image_unref(DissectedImage *m) {
631 unsigned i;
632
633 if (!m)
634 return NULL;
635
636 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
637 free(m->partitions[i].fstype);
638 free(m->partitions[i].node);
639 free(m->partitions[i].decrypted_fstype);
640 free(m->partitions[i].decrypted_node);
641 }
642
643 free(m->hostname);
644 strv_free(m->machine_info);
645 strv_free(m->os_release);
646
647 return mfree(m);
648 }
649
650 static int is_loop_device(const char *path) {
651 char s[SYS_BLOCK_PATH_MAX("/../loop/")];
652 struct stat st;
653
654 assert(path);
655
656 if (stat(path, &st) < 0)
657 return -errno;
658
659 if (!S_ISBLK(st.st_mode))
660 return -ENOTBLK;
661
662 xsprintf_sys_block_path(s, "/loop/", st.st_dev);
663 if (access(s, F_OK) < 0) {
664 if (errno != ENOENT)
665 return -errno;
666
667 /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */
668 xsprintf_sys_block_path(s, "/../loop/", st.st_dev);
669 if (access(s, F_OK) < 0)
670 return errno == ENOENT ? false : -errno;
671 }
672
673 return true;
674 }
675
676 static int mount_partition(
677 DissectedPartition *m,
678 const char *where,
679 const char *directory,
680 DissectImageFlags flags) {
681
682 const char *p, *options = NULL, *node, *fstype;
683 _cleanup_free_ char *chased = NULL;
684 bool rw;
685 int r;
686
687 assert(m);
688 assert(where);
689
690 node = m->decrypted_node ?: m->node;
691 fstype = m->decrypted_fstype ?: m->fstype;
692
693 if (!m->found || !node || !fstype)
694 return 0;
695
696 /* Stacked encryption? Yuck */
697 if (streq_ptr(fstype, "crypto_LUKS"))
698 return -ELOOP;
699
700 rw = m->rw && !(flags & DISSECT_IMAGE_READ_ONLY);
701
702 if (directory) {
703 r = chase_symlinks(directory, where, CHASE_PREFIX_ROOT, &chased);
704 if (r < 0)
705 return r;
706
707 p = chased;
708 } else
709 p = where;
710
711 /* If requested, turn on discard support. */
712 if (fstype_can_discard(fstype) &&
713 ((flags & DISSECT_IMAGE_DISCARD) ||
714 ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node))))
715 options = "discard";
716
717 return mount_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
718 }
719
720 int dissected_image_mount(DissectedImage *m, const char *where, DissectImageFlags flags) {
721 int r;
722
723 assert(m);
724 assert(where);
725
726 if (!m->partitions[PARTITION_ROOT].found)
727 return -ENXIO;
728
729 r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, flags);
730 if (r < 0)
731 return r;
732
733 r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", flags);
734 if (r < 0)
735 return r;
736
737 r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", flags);
738 if (r < 0)
739 return r;
740
741 if (m->partitions[PARTITION_ESP].found) {
742 const char *mp;
743
744 /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */
745
746 FOREACH_STRING(mp, "/efi", "/boot") {
747 _cleanup_free_ char *p = NULL;
748
749 r = chase_symlinks(mp, where, CHASE_PREFIX_ROOT, &p);
750 if (r < 0)
751 continue;
752
753 r = dir_is_empty(p);
754 if (r > 0) {
755 r = mount_partition(m->partitions + PARTITION_ESP, where, mp, flags);
756 if (r < 0)
757 return r;
758 }
759 }
760 }
761
762 return 0;
763 }
764
765 #if HAVE_LIBCRYPTSETUP
766 typedef struct DecryptedPartition {
767 struct crypt_device *device;
768 char *name;
769 bool relinquished;
770 } DecryptedPartition;
771
772 struct DecryptedImage {
773 DecryptedPartition *decrypted;
774 size_t n_decrypted;
775 size_t n_allocated;
776 };
777 #endif
778
779 DecryptedImage* decrypted_image_unref(DecryptedImage* d) {
780 #if HAVE_LIBCRYPTSETUP
781 size_t i;
782 int r;
783
784 if (!d)
785 return NULL;
786
787 for (i = 0; i < d->n_decrypted; i++) {
788 DecryptedPartition *p = d->decrypted + i;
789
790 if (p->device && p->name && !p->relinquished) {
791 r = crypt_deactivate(p->device, p->name);
792 if (r < 0)
793 log_debug_errno(r, "Failed to deactivate encrypted partition %s", p->name);
794 }
795
796 if (p->device)
797 crypt_free(p->device);
798 free(p->name);
799 }
800
801 free(d);
802 #endif
803 return NULL;
804 }
805
806 #if HAVE_LIBCRYPTSETUP
807
808 static int make_dm_name_and_node(const void *original_node, const char *suffix, char **ret_name, char **ret_node) {
809 _cleanup_free_ char *name = NULL, *node = NULL;
810 const char *base;
811
812 assert(original_node);
813 assert(suffix);
814 assert(ret_name);
815 assert(ret_node);
816
817 base = strrchr(original_node, '/');
818 if (!base)
819 return -EINVAL;
820 base++;
821 if (isempty(base))
822 return -EINVAL;
823
824 name = strjoin(base, suffix);
825 if (!name)
826 return -ENOMEM;
827 if (!filename_is_valid(name))
828 return -EINVAL;
829
830 node = strjoin(crypt_get_dir(), "/", name);
831 if (!node)
832 return -ENOMEM;
833
834 *ret_name = name;
835 *ret_node = node;
836
837 name = node = NULL;
838 return 0;
839 }
840
841 static int decrypt_partition(
842 DissectedPartition *m,
843 const char *passphrase,
844 DissectImageFlags flags,
845 DecryptedImage *d) {
846
847 _cleanup_free_ char *node = NULL, *name = NULL;
848 _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
849 int r;
850
851 assert(m);
852 assert(d);
853
854 if (!m->found || !m->node || !m->fstype)
855 return 0;
856
857 if (!streq(m->fstype, "crypto_LUKS"))
858 return 0;
859
860 if (!passphrase)
861 return -ENOKEY;
862
863 r = make_dm_name_and_node(m->node, "-decrypted", &name, &node);
864 if (r < 0)
865 return r;
866
867 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
868 return -ENOMEM;
869
870 r = crypt_init(&cd, m->node);
871 if (r < 0)
872 return log_debug_errno(r, "Failed to initialize dm-crypt: %m");
873
874 r = crypt_load(cd, CRYPT_LUKS, NULL);
875 if (r < 0)
876 return log_debug_errno(r, "Failed to load LUKS metadata: %m");
877
878 r = crypt_activate_by_passphrase(cd, name, CRYPT_ANY_SLOT, passphrase, strlen(passphrase),
879 ((flags & DISSECT_IMAGE_READ_ONLY) ? CRYPT_ACTIVATE_READONLY : 0) |
880 ((flags & DISSECT_IMAGE_DISCARD_ON_CRYPTO) ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0));
881 if (r < 0) {
882 log_debug_errno(r, "Failed to activate LUKS device: %m");
883 return r == -EPERM ? -EKEYREJECTED : r;
884 }
885
886 d->decrypted[d->n_decrypted].name = name;
887 name = NULL;
888
889 d->decrypted[d->n_decrypted].device = cd;
890 cd = NULL;
891 d->n_decrypted++;
892
893 m->decrypted_node = node;
894 node = NULL;
895
896 return 0;
897 }
898
899 static int verity_partition(
900 DissectedPartition *m,
901 DissectedPartition *v,
902 const void *root_hash,
903 size_t root_hash_size,
904 DissectImageFlags flags,
905 DecryptedImage *d) {
906
907 _cleanup_free_ char *node = NULL, *name = NULL;
908 _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
909 int r;
910
911 assert(m);
912 assert(v);
913
914 if (!root_hash)
915 return 0;
916
917 if (!m->found || !m->node || !m->fstype)
918 return 0;
919 if (!v->found || !v->node || !v->fstype)
920 return 0;
921
922 if (!streq(v->fstype, "DM_verity_hash"))
923 return 0;
924
925 r = make_dm_name_and_node(m->node, "-verity", &name, &node);
926 if (r < 0)
927 return r;
928
929 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
930 return -ENOMEM;
931
932 r = crypt_init(&cd, v->node);
933 if (r < 0)
934 return r;
935
936 r = crypt_load(cd, CRYPT_VERITY, NULL);
937 if (r < 0)
938 return r;
939
940 r = crypt_set_data_device(cd, m->node);
941 if (r < 0)
942 return r;
943
944 r = crypt_activate_by_volume_key(cd, name, root_hash, root_hash_size, CRYPT_ACTIVATE_READONLY);
945 if (r < 0)
946 return r;
947
948 d->decrypted[d->n_decrypted].name = name;
949 name = NULL;
950
951 d->decrypted[d->n_decrypted].device = cd;
952 cd = NULL;
953 d->n_decrypted++;
954
955 m->decrypted_node = node;
956 node = NULL;
957
958 return 0;
959 }
960 #endif
961
962 int dissected_image_decrypt(
963 DissectedImage *m,
964 const char *passphrase,
965 const void *root_hash,
966 size_t root_hash_size,
967 DissectImageFlags flags,
968 DecryptedImage **ret) {
969
970 _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL;
971 #if HAVE_LIBCRYPTSETUP
972 unsigned i;
973 int r;
974 #endif
975
976 assert(m);
977 assert(root_hash || root_hash_size == 0);
978
979 /* Returns:
980 *
981 * = 0 → There was nothing to decrypt
982 * > 0 → Decrypted successfully
983 * -ENOKEY → There's something to decrypt but no key was supplied
984 * -EKEYREJECTED → Passed key was not correct
985 */
986
987 if (root_hash && root_hash_size < sizeof(sd_id128_t))
988 return -EINVAL;
989
990 if (!m->encrypted && !m->verity) {
991 *ret = NULL;
992 return 0;
993 }
994
995 #if HAVE_LIBCRYPTSETUP
996 d = new0(DecryptedImage, 1);
997 if (!d)
998 return -ENOMEM;
999
1000 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
1001 DissectedPartition *p = m->partitions + i;
1002 int k;
1003
1004 if (!p->found)
1005 continue;
1006
1007 r = decrypt_partition(p, passphrase, flags, d);
1008 if (r < 0)
1009 return r;
1010
1011 k = PARTITION_VERITY_OF(i);
1012 if (k >= 0) {
1013 r = verity_partition(p, m->partitions + k, root_hash, root_hash_size, flags, d);
1014 if (r < 0)
1015 return r;
1016 }
1017
1018 if (!p->decrypted_fstype && p->decrypted_node) {
1019 r = probe_filesystem(p->decrypted_node, &p->decrypted_fstype);
1020 if (r < 0)
1021 return r;
1022 }
1023 }
1024
1025 *ret = d;
1026 d = NULL;
1027
1028 return 1;
1029 #else
1030 return -EOPNOTSUPP;
1031 #endif
1032 }
1033
1034 int dissected_image_decrypt_interactively(
1035 DissectedImage *m,
1036 const char *passphrase,
1037 const void *root_hash,
1038 size_t root_hash_size,
1039 DissectImageFlags flags,
1040 DecryptedImage **ret) {
1041
1042 _cleanup_strv_free_erase_ char **z = NULL;
1043 int n = 3, r;
1044
1045 if (passphrase)
1046 n--;
1047
1048 for (;;) {
1049 r = dissected_image_decrypt(m, passphrase, root_hash, root_hash_size, flags, ret);
1050 if (r >= 0)
1051 return r;
1052 if (r == -EKEYREJECTED)
1053 log_error_errno(r, "Incorrect passphrase, try again!");
1054 else if (r != -ENOKEY) {
1055 log_error_errno(r, "Failed to decrypt image: %m");
1056 return r;
1057 }
1058
1059 if (--n < 0) {
1060 log_error("Too many retries.");
1061 return -EKEYREJECTED;
1062 }
1063
1064 z = strv_free(z);
1065
1066 r = ask_password_auto("Please enter image passphrase!", NULL, "dissect", "dissect", USEC_INFINITY, 0, &z);
1067 if (r < 0)
1068 return log_error_errno(r, "Failed to query for passphrase: %m");
1069
1070 passphrase = z[0];
1071 }
1072 }
1073
1074 #if HAVE_LIBCRYPTSETUP
1075 static int deferred_remove(DecryptedPartition *p) {
1076
1077 struct dm_ioctl dm = {
1078 .version = {
1079 DM_VERSION_MAJOR,
1080 DM_VERSION_MINOR,
1081 DM_VERSION_PATCHLEVEL
1082 },
1083 .data_size = sizeof(dm),
1084 .flags = DM_DEFERRED_REMOVE,
1085 };
1086
1087 _cleanup_close_ int fd = -1;
1088
1089 assert(p);
1090
1091 /* Unfortunately, libcryptsetup doesn't provide a proper API for this, hence call the ioctl() directly. */
1092
1093 fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC);
1094 if (fd < 0)
1095 return -errno;
1096
1097 strncpy(dm.name, p->name, sizeof(dm.name));
1098
1099 if (ioctl(fd, DM_DEV_REMOVE, &dm))
1100 return -errno;
1101
1102 return 0;
1103 }
1104 #endif
1105
1106 int decrypted_image_relinquish(DecryptedImage *d) {
1107
1108 #if HAVE_LIBCRYPTSETUP
1109 size_t i;
1110 int r;
1111 #endif
1112
1113 assert(d);
1114
1115 /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a boolean so
1116 * that we don't clean it up ourselves either anymore */
1117
1118 #if HAVE_LIBCRYPTSETUP
1119 for (i = 0; i < d->n_decrypted; i++) {
1120 DecryptedPartition *p = d->decrypted + i;
1121
1122 if (p->relinquished)
1123 continue;
1124
1125 r = deferred_remove(p);
1126 if (r < 0)
1127 return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name);
1128
1129 p->relinquished = true;
1130 }
1131 #endif
1132
1133 return 0;
1134 }
1135
1136 int root_hash_load(const char *image, void **ret, size_t *ret_size) {
1137 _cleanup_free_ char *text = NULL;
1138 _cleanup_free_ void *k = NULL;
1139 size_t l;
1140 int r;
1141
1142 assert(image);
1143 assert(ret);
1144 assert(ret_size);
1145
1146 if (is_device_path(image)) {
1147 /* If we are asked to load the root hash for a device node, exit early */
1148 *ret = NULL;
1149 *ret_size = 0;
1150 return 0;
1151 }
1152
1153 r = getxattr_malloc(image, "user.verity.roothash", &text, true);
1154 if (r < 0) {
1155 char *fn, *e, *n;
1156
1157 if (!IN_SET(r, -ENODATA, -EOPNOTSUPP, -ENOENT))
1158 return r;
1159
1160 fn = newa(char, strlen(image) + strlen(".roothash") + 1);
1161 n = stpcpy(fn, image);
1162 e = endswith(fn, ".raw");
1163 if (e)
1164 n = e;
1165
1166 strcpy(n, ".roothash");
1167
1168 r = read_one_line_file(fn, &text);
1169 if (r == -ENOENT) {
1170 *ret = NULL;
1171 *ret_size = 0;
1172 return 0;
1173 }
1174 if (r < 0)
1175 return r;
1176 }
1177
1178 r = unhexmem(text, strlen(text), &k, &l);
1179 if (r < 0)
1180 return r;
1181 if (l < sizeof(sd_id128_t))
1182 return -EINVAL;
1183
1184 *ret = k;
1185 *ret_size = l;
1186
1187 k = NULL;
1188
1189 return 1;
1190 }
1191
1192 int dissected_image_acquire_metadata(DissectedImage *m) {
1193
1194 enum {
1195 META_HOSTNAME,
1196 META_MACHINE_ID,
1197 META_MACHINE_INFO,
1198 META_OS_RELEASE,
1199 _META_MAX,
1200 };
1201
1202 static const char *const paths[_META_MAX] = {
1203 [META_HOSTNAME] = "/etc/hostname\0",
1204 [META_MACHINE_ID] = "/etc/machine-id\0",
1205 [META_MACHINE_INFO] = "/etc/machine-info\0",
1206 [META_OS_RELEASE] = "/etc/os-release\0/usr/lib/os-release\0",
1207 };
1208
1209 _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
1210 _cleanup_(rmdir_and_freep) char *t = NULL;
1211 _cleanup_(sigkill_waitp) pid_t child = 0;
1212 sd_id128_t machine_id = SD_ID128_NULL;
1213 _cleanup_free_ char *hostname = NULL;
1214 unsigned n_meta_initialized = 0, k;
1215 int fds[2 * _META_MAX], r;
1216 siginfo_t si;
1217
1218 BLOCK_SIGNALS(SIGCHLD);
1219
1220 assert(m);
1221
1222 for (; n_meta_initialized < _META_MAX; n_meta_initialized ++)
1223 if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) {
1224 r = -errno;
1225 goto finish;
1226 }
1227
1228 r = mkdtemp_malloc("/tmp/dissect-XXXXXX", &t);
1229 if (r < 0)
1230 goto finish;
1231
1232 child = raw_clone(SIGCHLD|CLONE_NEWNS);
1233 if (child < 0) {
1234 r = -errno;
1235 goto finish;
1236 }
1237
1238 if (child == 0) {
1239
1240 (void) reset_all_signal_handlers();
1241 (void) reset_signal_mask();
1242 assert_se(prctl(PR_SET_PDEATHSIG, SIGTERM) == 0);
1243
1244 /* Make sure we never propagate to the host */
1245 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
1246 _exit(EXIT_FAILURE);
1247
1248 r = dissected_image_mount(m, t, DISSECT_IMAGE_READ_ONLY);
1249 if (r < 0)
1250 _exit(EXIT_FAILURE);
1251
1252 for (k = 0; k < _META_MAX; k++) {
1253 _cleanup_close_ int fd = -1;
1254 const char *p;
1255
1256 fds[2*k] = safe_close(fds[2*k]);
1257
1258 NULSTR_FOREACH(p, paths[k]) {
1259 _cleanup_free_ char *q = NULL;
1260
1261 r = chase_symlinks(p, t, CHASE_PREFIX_ROOT, &q);
1262 if (r < 0)
1263 continue;
1264
1265 fd = open(q, O_RDONLY|O_CLOEXEC|O_NOCTTY);
1266 if (fd >= 0)
1267 break;
1268 }
1269 if (fd < 0)
1270 continue;
1271
1272 r = copy_bytes(fd, fds[2*k+1], (uint64_t) -1, 0);
1273 if (r < 0)
1274 _exit(EXIT_FAILURE);
1275
1276 fds[2*k+1] = safe_close(fds[2*k+1]);
1277 }
1278
1279 _exit(EXIT_SUCCESS);
1280 }
1281
1282 for (k = 0; k < _META_MAX; k++) {
1283 _cleanup_fclose_ FILE *f = NULL;
1284
1285 fds[2*k+1] = safe_close(fds[2*k+1]);
1286
1287 f = fdopen(fds[2*k], "re");
1288 if (!f) {
1289 r = -errno;
1290 goto finish;
1291 }
1292
1293 fds[2*k] = -1;
1294
1295 switch (k) {
1296
1297 case META_HOSTNAME:
1298 r = read_etc_hostname_stream(f, &hostname);
1299 if (r < 0)
1300 log_debug_errno(r, "Failed to read /etc/hostname: %m");
1301
1302 break;
1303
1304 case META_MACHINE_ID: {
1305 _cleanup_free_ char *line = NULL;
1306
1307 r = read_line(f, LONG_LINE_MAX, &line);
1308 if (r < 0)
1309 log_debug_errno(r, "Failed to read /etc/machine-id: %m");
1310 else if (r == 33) {
1311 r = sd_id128_from_string(line, &machine_id);
1312 if (r < 0)
1313 log_debug_errno(r, "Image contains invalid /etc/machine-id: %s", line);
1314 } else if (r == 0)
1315 log_debug("/etc/machine-id file is empty.");
1316 else
1317 log_debug("/etc/machine-id has unexpected length %i.", r);
1318
1319 break;
1320 }
1321
1322 case META_MACHINE_INFO:
1323 r = load_env_file_pairs(f, "machine-info", NULL, &machine_info);
1324 if (r < 0)
1325 log_debug_errno(r, "Failed to read /etc/machine-info: %m");
1326
1327 break;
1328
1329 case META_OS_RELEASE:
1330 r = load_env_file_pairs(f, "os-release", NULL, &os_release);
1331 if (r < 0)
1332 log_debug_errno(r, "Failed to read OS release file: %m");
1333
1334 break;
1335 }
1336 }
1337
1338 r = wait_for_terminate(child, &si);
1339 if (r < 0)
1340 goto finish;
1341 child = 0;
1342
1343 if (si.si_code != CLD_EXITED || si.si_status != EXIT_SUCCESS) {
1344 r = -EPROTO;
1345 goto finish;
1346 }
1347
1348 free_and_replace(m->hostname, hostname);
1349 m->machine_id = machine_id;
1350 strv_free_and_replace(m->machine_info, machine_info);
1351 strv_free_and_replace(m->os_release, os_release);
1352
1353 finish:
1354 for (k = 0; k < n_meta_initialized; k++)
1355 safe_close_pair(fds + 2*k);
1356
1357 return r;
1358 }
1359
1360 static const char *const partition_designator_table[] = {
1361 [PARTITION_ROOT] = "root",
1362 [PARTITION_ROOT_SECONDARY] = "root-secondary",
1363 [PARTITION_HOME] = "home",
1364 [PARTITION_SRV] = "srv",
1365 [PARTITION_ESP] = "esp",
1366 [PARTITION_SWAP] = "swap",
1367 [PARTITION_ROOT_VERITY] = "root-verity",
1368 [PARTITION_ROOT_SECONDARY_VERITY] = "root-secondary-verity",
1369 };
1370
1371 DEFINE_STRING_TABLE_LOOKUP(partition_designator, int);