]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/dissect-image.c
Merge pull request #7608 from poettering/more-news-v236
[thirdparty/systemd.git] / src / shared / dissect-image.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2016 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <sys/mount.h>
22 #include <sys/prctl.h>
23 #include <sys/wait.h>
24
25 #include "architecture.h"
26 #include "ask-password-api.h"
27 #include "blkid-util.h"
28 #include "copy.h"
29 #include "crypt-util.h"
30 #include "def.h"
31 #include "device-nodes.h"
32 #include "dissect-image.h"
33 #include "fd-util.h"
34 #include "fileio.h"
35 #include "fs-util.h"
36 #include "gpt.h"
37 #include "hexdecoct.h"
38 #include "hostname-util.h"
39 #include "id128-util.h"
40 #include "linux-3.13/dm-ioctl.h"
41 #include "mount-util.h"
42 #include "path-util.h"
43 #include "process-util.h"
44 #include "raw-clone.h"
45 #include "signal-util.h"
46 #include "stat-util.h"
47 #include "stdio-util.h"
48 #include "string-table.h"
49 #include "string-util.h"
50 #include "strv.h"
51 #include "udev-util.h"
52 #include "user-util.h"
53 #include "xattr-util.h"
54
55 int probe_filesystem(const char *node, char **ret_fstype) {
56 /* Try to find device content type and return it in *ret_fstype. If nothing is found,
57 * 0/NULL will be returned. -EUCLEAN will be returned for ambigous results, and an
58 * different error otherwise. */
59
60 #if HAVE_BLKID
61 _cleanup_blkid_free_probe_ blkid_probe b = NULL;
62 const char *fstype;
63 int r;
64
65 errno = 0;
66 b = blkid_new_probe_from_filename(node);
67 if (!b)
68 return -errno ?: -ENOMEM;
69
70 blkid_probe_enable_superblocks(b, 1);
71 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
72
73 errno = 0;
74 r = blkid_do_safeprobe(b);
75 if (r == 1) {
76 log_debug("No type detected on partition %s", node);
77 goto not_found;
78 }
79 if (r == -2) {
80 log_debug("Results ambiguous for partition %s", node);
81 return -EUCLEAN;
82 }
83 if (r != 0)
84 return -errno ?: -EIO;
85
86 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
87
88 if (fstype) {
89 char *t;
90
91 t = strdup(fstype);
92 if (!t)
93 return -ENOMEM;
94
95 *ret_fstype = t;
96 return 1;
97 }
98
99 not_found:
100 *ret_fstype = NULL;
101 return 0;
102 #else
103 return -EOPNOTSUPP;
104 #endif
105 }
106
107 int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DissectedImage **ret) {
108
109 #if HAVE_BLKID
110 sd_id128_t root_uuid = SD_ID128_NULL, verity_uuid = SD_ID128_NULL;
111 _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL;
112 bool is_gpt, is_mbr, generic_rw, multiple_generic = false;
113 _cleanup_udev_device_unref_ struct udev_device *d = NULL;
114 _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
115 _cleanup_blkid_free_probe_ blkid_probe b = NULL;
116 _cleanup_udev_unref_ struct udev *udev = NULL;
117 _cleanup_free_ char *generic_node = NULL;
118 sd_id128_t generic_uuid = SD_ID128_NULL;
119 const char *pttype = NULL;
120 struct udev_list_entry *first, *item;
121 blkid_partlist pl;
122 int r, generic_nr;
123 struct stat st;
124 unsigned i;
125
126 assert(fd >= 0);
127 assert(ret);
128 assert(root_hash || root_hash_size == 0);
129
130 /* Probes a disk image, and returns information about what it found in *ret.
131 *
132 * Returns -ENOPKG if no suitable partition table or file system could be found.
133 * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found. */
134
135 if (root_hash) {
136 /* If a root hash is supplied, then we use the root partition that has a UUID that match the first
137 * 128bit of the root hash. And we use the verity partition that has a UUID that match the final
138 * 128bit. */
139
140 if (root_hash_size < sizeof(sd_id128_t))
141 return -EINVAL;
142
143 memcpy(&root_uuid, root_hash, sizeof(sd_id128_t));
144 memcpy(&verity_uuid, (const uint8_t*) root_hash + root_hash_size - sizeof(sd_id128_t), sizeof(sd_id128_t));
145
146 if (sd_id128_is_null(root_uuid))
147 return -EINVAL;
148 if (sd_id128_is_null(verity_uuid))
149 return -EINVAL;
150 }
151
152 if (fstat(fd, &st) < 0)
153 return -errno;
154
155 if (!S_ISBLK(st.st_mode))
156 return -ENOTBLK;
157
158 b = blkid_new_probe();
159 if (!b)
160 return -ENOMEM;
161
162 errno = 0;
163 r = blkid_probe_set_device(b, fd, 0, 0);
164 if (r != 0)
165 return -errno ?: -ENOMEM;
166
167 if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) {
168 /* Look for file system superblocks, unless we only shall look for GPT partition tables */
169 blkid_probe_enable_superblocks(b, 1);
170 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE);
171 }
172
173 blkid_probe_enable_partitions(b, 1);
174 blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
175
176 errno = 0;
177 r = blkid_do_safeprobe(b);
178 if (IN_SET(r, -2, 1)) {
179 log_debug("Failed to identify any partition table.");
180 return -ENOPKG;
181 }
182 if (r != 0)
183 return -errno ?: -EIO;
184
185 m = new0(DissectedImage, 1);
186 if (!m)
187 return -ENOMEM;
188
189 if (!(flags & DISSECT_IMAGE_GPT_ONLY) &&
190 (flags & DISSECT_IMAGE_REQUIRE_ROOT)) {
191 const char *usage = NULL;
192
193 (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL);
194 if (STRPTR_IN_SET(usage, "filesystem", "crypto")) {
195 _cleanup_free_ char *t = NULL, *n = NULL;
196 const char *fstype = NULL;
197
198 /* OK, we have found a file system, that's our root partition then. */
199 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
200
201 if (fstype) {
202 t = strdup(fstype);
203 if (!t)
204 return -ENOMEM;
205 }
206
207 if (asprintf(&n, "/dev/block/%u:%u", major(st.st_rdev), minor(st.st_rdev)) < 0)
208 return -ENOMEM;
209
210 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
211 .found = true,
212 .rw = true,
213 .partno = -1,
214 .architecture = _ARCHITECTURE_INVALID,
215 .fstype = t,
216 .node = n,
217 };
218
219 t = n = NULL;
220
221 m->encrypted = streq(fstype, "crypto_LUKS");
222
223 *ret = m;
224 m = NULL;
225
226 return 0;
227 }
228 }
229
230 (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
231 if (!pttype)
232 return -ENOPKG;
233
234 is_gpt = streq_ptr(pttype, "gpt");
235 is_mbr = streq_ptr(pttype, "dos");
236
237 if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr))
238 return -ENOPKG;
239
240 errno = 0;
241 pl = blkid_probe_get_partitions(b);
242 if (!pl)
243 return -errno ?: -ENOMEM;
244
245 udev = udev_new();
246 if (!udev)
247 return -errno;
248
249 d = udev_device_new_from_devnum(udev, 'b', st.st_rdev);
250 if (!d)
251 return -ENOMEM;
252
253 for (i = 0;; i++) {
254 int n, z;
255
256 if (i >= 10) {
257 log_debug("Kernel partitions never appeared.");
258 return -ENXIO;
259 }
260
261 e = udev_enumerate_new(udev);
262 if (!e)
263 return -errno;
264
265 r = udev_enumerate_add_match_parent(e, d);
266 if (r < 0)
267 return r;
268
269 r = udev_enumerate_scan_devices(e);
270 if (r < 0)
271 return r;
272
273 /* Count the partitions enumerated by the kernel */
274 n = 0;
275 first = udev_enumerate_get_list_entry(e);
276 udev_list_entry_foreach(item, first)
277 n++;
278
279 /* Count the partitions enumerated by blkid */
280 z = blkid_partlist_numof_partitions(pl);
281 if (n == z + 1)
282 break;
283 if (n > z + 1) {
284 log_debug("blkid and kernel partition list do not match.");
285 return -EIO;
286 }
287 if (n < z + 1) {
288 unsigned j = 0;
289
290 /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running
291 * or it got EBUSY because udev already opened the device. Let's reprobe the device, which is a
292 * synchronous call that waits until probing is complete. */
293
294 for (;;) {
295 if (j++ > 20)
296 return -EBUSY;
297
298 if (ioctl(fd, BLKRRPART, 0) < 0) {
299 r = -errno;
300
301 if (r == -EINVAL) {
302 struct loop_info64 info;
303
304 /* If we are running on a loop device that has partition scanning off,
305 * return an explicit recognizable error about this, so that callers
306 * can generate a proper message explaining the situation. */
307
308 if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0 && (info.lo_flags & LO_FLAGS_PARTSCAN) == 0) {
309 log_debug("Device is loop device and partition scanning is off!");
310 return -EPROTONOSUPPORT;
311 }
312 }
313 if (r != -EBUSY)
314 return r;
315 } else
316 break;
317
318 /* If something else has the device open, such as an udev rule, the ioctl will return
319 * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a
320 * bit, and try again.
321 *
322 * This is really something they should fix in the kernel! */
323
324 (void) usleep(50 * USEC_PER_MSEC);
325 }
326 }
327
328 e = udev_enumerate_unref(e);
329 }
330
331 first = udev_enumerate_get_list_entry(e);
332 udev_list_entry_foreach(item, first) {
333 _cleanup_udev_device_unref_ struct udev_device *q;
334 unsigned long long pflags;
335 blkid_partition pp;
336 const char *node, *sysname;
337 dev_t qn;
338 int nr;
339
340 q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
341 if (!q)
342 return -errno;
343
344 qn = udev_device_get_devnum(q);
345 if (major(qn) == 0)
346 continue;
347
348 if (st.st_rdev == qn)
349 continue;
350
351 /* Filter out weird MMC RPMB partitions, which cannot reasonably be read, see
352 * https://github.com/systemd/systemd/issues/5806 */
353 sysname = udev_device_get_sysname(q);
354 if (sysname && startswith(sysname, "mmcblk") && endswith(sysname, "rpmb"))
355 continue;
356
357 node = udev_device_get_devnode(q);
358 if (!node)
359 continue;
360
361 pp = blkid_partlist_devno_to_partition(pl, qn);
362 if (!pp)
363 continue;
364
365 pflags = blkid_partition_get_flags(pp);
366
367 nr = blkid_partition_get_partno(pp);
368 if (nr < 0)
369 continue;
370
371 if (is_gpt) {
372 int designator = _PARTITION_DESIGNATOR_INVALID, architecture = _ARCHITECTURE_INVALID;
373 const char *stype, *sid, *fstype = NULL;
374 sd_id128_t type_id, id;
375 bool rw = true;
376
377 sid = blkid_partition_get_uuid(pp);
378 if (!sid)
379 continue;
380 if (sd_id128_from_string(sid, &id) < 0)
381 continue;
382
383 stype = blkid_partition_get_type_string(pp);
384 if (!stype)
385 continue;
386 if (sd_id128_from_string(stype, &type_id) < 0)
387 continue;
388
389 if (sd_id128_equal(type_id, GPT_HOME)) {
390
391 if (pflags & GPT_FLAG_NO_AUTO)
392 continue;
393
394 designator = PARTITION_HOME;
395 rw = !(pflags & GPT_FLAG_READ_ONLY);
396 } else if (sd_id128_equal(type_id, GPT_SRV)) {
397
398 if (pflags & GPT_FLAG_NO_AUTO)
399 continue;
400
401 designator = PARTITION_SRV;
402 rw = !(pflags & GPT_FLAG_READ_ONLY);
403 } else if (sd_id128_equal(type_id, GPT_ESP)) {
404
405 /* Note that we don't check the GPT_FLAG_NO_AUTO flag for the ESP, as it is not defined
406 * there. We instead check the GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as recommended by the
407 * UEFI spec (See "12.3.3 Number and Location of System Partitions"). */
408
409 if (pflags & GPT_FLAG_NO_BLOCK_IO_PROTOCOL)
410 continue;
411
412 designator = PARTITION_ESP;
413 fstype = "vfat";
414 }
415 #ifdef GPT_ROOT_NATIVE
416 else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
417
418 if (pflags & GPT_FLAG_NO_AUTO)
419 continue;
420
421 /* If a root ID is specified, ignore everything but the root id */
422 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
423 continue;
424
425 designator = PARTITION_ROOT;
426 architecture = native_architecture();
427 rw = !(pflags & GPT_FLAG_READ_ONLY);
428 } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) {
429
430 if (pflags & GPT_FLAG_NO_AUTO)
431 continue;
432
433 m->can_verity = true;
434
435 /* Ignore verity unless a root hash is specified */
436 if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
437 continue;
438
439 designator = PARTITION_ROOT_VERITY;
440 fstype = "DM_verity_hash";
441 architecture = native_architecture();
442 rw = false;
443 }
444 #endif
445 #ifdef GPT_ROOT_SECONDARY
446 else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
447
448 if (pflags & GPT_FLAG_NO_AUTO)
449 continue;
450
451 /* If a root ID is specified, ignore everything but the root id */
452 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
453 continue;
454
455 designator = PARTITION_ROOT_SECONDARY;
456 architecture = SECONDARY_ARCHITECTURE;
457 rw = !(pflags & GPT_FLAG_READ_ONLY);
458 } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) {
459
460 if (pflags & GPT_FLAG_NO_AUTO)
461 continue;
462
463 m->can_verity = true;
464
465 /* Ignore verity unless root has is specified */
466 if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
467 continue;
468
469 designator = PARTITION_ROOT_SECONDARY_VERITY;
470 fstype = "DM_verity_hash";
471 architecture = SECONDARY_ARCHITECTURE;
472 rw = false;
473 }
474 #endif
475 else if (sd_id128_equal(type_id, GPT_SWAP)) {
476
477 if (pflags & GPT_FLAG_NO_AUTO)
478 continue;
479
480 designator = PARTITION_SWAP;
481 fstype = "swap";
482 } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) {
483
484 if (pflags & GPT_FLAG_NO_AUTO)
485 continue;
486
487 if (generic_node)
488 multiple_generic = true;
489 else {
490 generic_nr = nr;
491 generic_rw = !(pflags & GPT_FLAG_READ_ONLY);
492 generic_uuid = id;
493 generic_node = strdup(node);
494 if (!generic_node)
495 return -ENOMEM;
496 }
497 }
498
499 if (designator != _PARTITION_DESIGNATOR_INVALID) {
500 _cleanup_free_ char *t = NULL, *n = NULL;
501
502 /* First one wins */
503 if (m->partitions[designator].found)
504 continue;
505
506 if (fstype) {
507 t = strdup(fstype);
508 if (!t)
509 return -ENOMEM;
510 }
511
512 n = strdup(node);
513 if (!n)
514 return -ENOMEM;
515
516 m->partitions[designator] = (DissectedPartition) {
517 .found = true,
518 .partno = nr,
519 .rw = rw,
520 .architecture = architecture,
521 .node = n,
522 .fstype = t,
523 .uuid = id,
524 };
525
526 n = t = NULL;
527 }
528
529 } else if (is_mbr) {
530
531 if (pflags != 0x80) /* Bootable flag */
532 continue;
533
534 if (blkid_partition_get_type(pp) != 0x83) /* Linux partition */
535 continue;
536
537 if (generic_node)
538 multiple_generic = true;
539 else {
540 generic_nr = nr;
541 generic_rw = true;
542 generic_node = strdup(node);
543 if (!generic_node)
544 return -ENOMEM;
545 }
546 }
547 }
548
549 if (!m->partitions[PARTITION_ROOT].found) {
550 /* No root partition found? Then let's see if ther's one for the secondary architecture. And if not
551 * either, then check if there's a single generic one, and use that. */
552
553 if (m->partitions[PARTITION_ROOT_VERITY].found)
554 return -EADDRNOTAVAIL;
555
556 if (m->partitions[PARTITION_ROOT_SECONDARY].found) {
557 m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY];
558 zero(m->partitions[PARTITION_ROOT_SECONDARY]);
559
560 m->partitions[PARTITION_ROOT_VERITY] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY];
561 zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY]);
562
563 } else if (flags & DISSECT_IMAGE_REQUIRE_ROOT) {
564
565 /* If the root has was set, then we won't fallback to a generic node, because the root hash
566 * decides */
567 if (root_hash)
568 return -EADDRNOTAVAIL;
569
570 /* If we didn't find a generic node, then we can't fix this up either */
571 if (!generic_node)
572 return -ENXIO;
573
574 /* If we didn't find a properly marked root partition, but we did find a single suitable
575 * generic Linux partition, then use this as root partition, if the caller asked for it. */
576 if (multiple_generic)
577 return -ENOTUNIQ;
578
579 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
580 .found = true,
581 .rw = generic_rw,
582 .partno = generic_nr,
583 .architecture = _ARCHITECTURE_INVALID,
584 .node = generic_node,
585 .uuid = generic_uuid,
586 };
587
588 generic_node = NULL;
589 }
590 }
591
592 if (root_hash) {
593 if (!m->partitions[PARTITION_ROOT_VERITY].found || !m->partitions[PARTITION_ROOT].found)
594 return -EADDRNOTAVAIL;
595
596 /* If we found the primary root with the hash, then we definitely want to suppress any secondary root
597 * (which would be weird, after all the root hash should only be assigned to one pair of
598 * partitions... */
599 m->partitions[PARTITION_ROOT_SECONDARY].found = false;
600 m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found = false;
601
602 /* If we found a verity setup, then the root partition is necessarily read-only. */
603 m->partitions[PARTITION_ROOT].rw = false;
604
605 m->verity = true;
606 }
607
608 blkid_free_probe(b);
609 b = NULL;
610
611 /* Fill in file system types if we don't know them yet. */
612 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
613 DissectedPartition *p = m->partitions + i;
614
615 if (!p->found)
616 continue;
617
618 if (!p->fstype && p->node) {
619 r = probe_filesystem(p->node, &p->fstype);
620 if (r < 0 && r != -EUCLEAN)
621 return r;
622 }
623
624 if (streq_ptr(p->fstype, "crypto_LUKS"))
625 m->encrypted = true;
626
627 if (p->fstype && fstype_is_ro(p->fstype))
628 p->rw = false;
629 }
630
631 *ret = m;
632 m = NULL;
633
634 return 0;
635 #else
636 return -EOPNOTSUPP;
637 #endif
638 }
639
640 DissectedImage* dissected_image_unref(DissectedImage *m) {
641 unsigned i;
642
643 if (!m)
644 return NULL;
645
646 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
647 free(m->partitions[i].fstype);
648 free(m->partitions[i].node);
649 free(m->partitions[i].decrypted_fstype);
650 free(m->partitions[i].decrypted_node);
651 }
652
653 free(m->hostname);
654 strv_free(m->machine_info);
655 strv_free(m->os_release);
656
657 return mfree(m);
658 }
659
660 static int is_loop_device(const char *path) {
661 char s[SYS_BLOCK_PATH_MAX("/../loop/")];
662 struct stat st;
663
664 assert(path);
665
666 if (stat(path, &st) < 0)
667 return -errno;
668
669 if (!S_ISBLK(st.st_mode))
670 return -ENOTBLK;
671
672 xsprintf_sys_block_path(s, "/loop/", st.st_dev);
673 if (access(s, F_OK) < 0) {
674 if (errno != ENOENT)
675 return -errno;
676
677 /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */
678 xsprintf_sys_block_path(s, "/../loop/", st.st_dev);
679 if (access(s, F_OK) < 0)
680 return errno == ENOENT ? false : -errno;
681 }
682
683 return true;
684 }
685
686 static int mount_partition(
687 DissectedPartition *m,
688 const char *where,
689 const char *directory,
690 uid_t uid_shift,
691 DissectImageFlags flags) {
692
693 _cleanup_free_ char *chased = NULL, *options = NULL;
694 const char *p, *node, *fstype;
695 bool rw;
696 int r;
697
698 assert(m);
699 assert(where);
700
701 node = m->decrypted_node ?: m->node;
702 fstype = m->decrypted_fstype ?: m->fstype;
703
704 if (!m->found || !node || !fstype)
705 return 0;
706
707 /* Stacked encryption? Yuck */
708 if (streq_ptr(fstype, "crypto_LUKS"))
709 return -ELOOP;
710
711 rw = m->rw && !(flags & DISSECT_IMAGE_READ_ONLY);
712
713 if (directory) {
714 r = chase_symlinks(directory, where, CHASE_PREFIX_ROOT, &chased);
715 if (r < 0)
716 return r;
717
718 p = chased;
719 } else
720 p = where;
721
722 /* If requested, turn on discard support. */
723 if (fstype_can_discard(fstype) &&
724 ((flags & DISSECT_IMAGE_DISCARD) ||
725 ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node)))) {
726 options = strdup("discard");
727 if (!options)
728 return -ENOMEM;
729 }
730
731 if (uid_is_valid(uid_shift) && uid_shift != 0 && fstype_can_uid_gid(fstype)) {
732 _cleanup_free_ char *uid_option = NULL;
733
734 if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
735 return -ENOMEM;
736
737 if (!strextend_with_separator(&options, ",", uid_option, NULL))
738 return -ENOMEM;
739 }
740
741 return mount_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
742 }
743
744 int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) {
745 int r;
746
747 assert(m);
748 assert(where);
749
750 if (!m->partitions[PARTITION_ROOT].found)
751 return -ENXIO;
752
753 if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) {
754 r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, flags);
755 if (r < 0)
756 return r;
757 }
758
759 if ((flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY))
760 return 0;
761
762 r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, flags);
763 if (r < 0)
764 return r;
765
766 r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, flags);
767 if (r < 0)
768 return r;
769
770 if (m->partitions[PARTITION_ESP].found) {
771 const char *mp;
772
773 /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */
774
775 FOREACH_STRING(mp, "/efi", "/boot") {
776 _cleanup_free_ char *p = NULL;
777
778 r = chase_symlinks(mp, where, CHASE_PREFIX_ROOT, &p);
779 if (r < 0)
780 continue;
781
782 r = dir_is_empty(p);
783 if (r > 0) {
784 r = mount_partition(m->partitions + PARTITION_ESP, where, mp, uid_shift, flags);
785 if (r < 0)
786 return r;
787 }
788 }
789 }
790
791 return 0;
792 }
793
794 #if HAVE_LIBCRYPTSETUP
795 typedef struct DecryptedPartition {
796 struct crypt_device *device;
797 char *name;
798 bool relinquished;
799 } DecryptedPartition;
800
801 struct DecryptedImage {
802 DecryptedPartition *decrypted;
803 size_t n_decrypted;
804 size_t n_allocated;
805 };
806 #endif
807
808 DecryptedImage* decrypted_image_unref(DecryptedImage* d) {
809 #if HAVE_LIBCRYPTSETUP
810 size_t i;
811 int r;
812
813 if (!d)
814 return NULL;
815
816 for (i = 0; i < d->n_decrypted; i++) {
817 DecryptedPartition *p = d->decrypted + i;
818
819 if (p->device && p->name && !p->relinquished) {
820 r = crypt_deactivate(p->device, p->name);
821 if (r < 0)
822 log_debug_errno(r, "Failed to deactivate encrypted partition %s", p->name);
823 }
824
825 if (p->device)
826 crypt_free(p->device);
827 free(p->name);
828 }
829
830 free(d);
831 #endif
832 return NULL;
833 }
834
835 #if HAVE_LIBCRYPTSETUP
836
837 static int make_dm_name_and_node(const void *original_node, const char *suffix, char **ret_name, char **ret_node) {
838 _cleanup_free_ char *name = NULL, *node = NULL;
839 const char *base;
840
841 assert(original_node);
842 assert(suffix);
843 assert(ret_name);
844 assert(ret_node);
845
846 base = strrchr(original_node, '/');
847 if (!base)
848 return -EINVAL;
849 base++;
850 if (isempty(base))
851 return -EINVAL;
852
853 name = strjoin(base, suffix);
854 if (!name)
855 return -ENOMEM;
856 if (!filename_is_valid(name))
857 return -EINVAL;
858
859 node = strjoin(crypt_get_dir(), "/", name);
860 if (!node)
861 return -ENOMEM;
862
863 *ret_name = name;
864 *ret_node = node;
865
866 name = node = NULL;
867 return 0;
868 }
869
870 static int decrypt_partition(
871 DissectedPartition *m,
872 const char *passphrase,
873 DissectImageFlags flags,
874 DecryptedImage *d) {
875
876 _cleanup_free_ char *node = NULL, *name = NULL;
877 _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
878 int r;
879
880 assert(m);
881 assert(d);
882
883 if (!m->found || !m->node || !m->fstype)
884 return 0;
885
886 if (!streq(m->fstype, "crypto_LUKS"))
887 return 0;
888
889 if (!passphrase)
890 return -ENOKEY;
891
892 r = make_dm_name_and_node(m->node, "-decrypted", &name, &node);
893 if (r < 0)
894 return r;
895
896 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
897 return -ENOMEM;
898
899 r = crypt_init(&cd, m->node);
900 if (r < 0)
901 return log_debug_errno(r, "Failed to initialize dm-crypt: %m");
902
903 r = crypt_load(cd, CRYPT_LUKS, NULL);
904 if (r < 0)
905 return log_debug_errno(r, "Failed to load LUKS metadata: %m");
906
907 r = crypt_activate_by_passphrase(cd, name, CRYPT_ANY_SLOT, passphrase, strlen(passphrase),
908 ((flags & DISSECT_IMAGE_READ_ONLY) ? CRYPT_ACTIVATE_READONLY : 0) |
909 ((flags & DISSECT_IMAGE_DISCARD_ON_CRYPTO) ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0));
910 if (r < 0) {
911 log_debug_errno(r, "Failed to activate LUKS device: %m");
912 return r == -EPERM ? -EKEYREJECTED : r;
913 }
914
915 d->decrypted[d->n_decrypted].name = name;
916 name = NULL;
917
918 d->decrypted[d->n_decrypted].device = cd;
919 cd = NULL;
920 d->n_decrypted++;
921
922 m->decrypted_node = node;
923 node = NULL;
924
925 return 0;
926 }
927
928 static int verity_partition(
929 DissectedPartition *m,
930 DissectedPartition *v,
931 const void *root_hash,
932 size_t root_hash_size,
933 DissectImageFlags flags,
934 DecryptedImage *d) {
935
936 _cleanup_free_ char *node = NULL, *name = NULL;
937 _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
938 int r;
939
940 assert(m);
941 assert(v);
942
943 if (!root_hash)
944 return 0;
945
946 if (!m->found || !m->node || !m->fstype)
947 return 0;
948 if (!v->found || !v->node || !v->fstype)
949 return 0;
950
951 if (!streq(v->fstype, "DM_verity_hash"))
952 return 0;
953
954 r = make_dm_name_and_node(m->node, "-verity", &name, &node);
955 if (r < 0)
956 return r;
957
958 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
959 return -ENOMEM;
960
961 r = crypt_init(&cd, v->node);
962 if (r < 0)
963 return r;
964
965 r = crypt_load(cd, CRYPT_VERITY, NULL);
966 if (r < 0)
967 return r;
968
969 r = crypt_set_data_device(cd, m->node);
970 if (r < 0)
971 return r;
972
973 r = crypt_activate_by_volume_key(cd, name, root_hash, root_hash_size, CRYPT_ACTIVATE_READONLY);
974 if (r < 0)
975 return r;
976
977 d->decrypted[d->n_decrypted].name = name;
978 name = NULL;
979
980 d->decrypted[d->n_decrypted].device = cd;
981 cd = NULL;
982 d->n_decrypted++;
983
984 m->decrypted_node = node;
985 node = NULL;
986
987 return 0;
988 }
989 #endif
990
991 int dissected_image_decrypt(
992 DissectedImage *m,
993 const char *passphrase,
994 const void *root_hash,
995 size_t root_hash_size,
996 DissectImageFlags flags,
997 DecryptedImage **ret) {
998
999 #if HAVE_LIBCRYPTSETUP
1000 _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL;
1001 unsigned i;
1002 int r;
1003 #endif
1004
1005 assert(m);
1006 assert(root_hash || root_hash_size == 0);
1007
1008 /* Returns:
1009 *
1010 * = 0 → There was nothing to decrypt
1011 * > 0 → Decrypted successfully
1012 * -ENOKEY → There's something to decrypt but no key was supplied
1013 * -EKEYREJECTED → Passed key was not correct
1014 */
1015
1016 if (root_hash && root_hash_size < sizeof(sd_id128_t))
1017 return -EINVAL;
1018
1019 if (!m->encrypted && !m->verity) {
1020 *ret = NULL;
1021 return 0;
1022 }
1023
1024 #if HAVE_LIBCRYPTSETUP
1025 d = new0(DecryptedImage, 1);
1026 if (!d)
1027 return -ENOMEM;
1028
1029 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
1030 DissectedPartition *p = m->partitions + i;
1031 int k;
1032
1033 if (!p->found)
1034 continue;
1035
1036 r = decrypt_partition(p, passphrase, flags, d);
1037 if (r < 0)
1038 return r;
1039
1040 k = PARTITION_VERITY_OF(i);
1041 if (k >= 0) {
1042 r = verity_partition(p, m->partitions + k, root_hash, root_hash_size, flags, d);
1043 if (r < 0)
1044 return r;
1045 }
1046
1047 if (!p->decrypted_fstype && p->decrypted_node) {
1048 r = probe_filesystem(p->decrypted_node, &p->decrypted_fstype);
1049 if (r < 0 && r != -EUCLEAN)
1050 return r;
1051 }
1052 }
1053
1054 *ret = d;
1055 d = NULL;
1056
1057 return 1;
1058 #else
1059 return -EOPNOTSUPP;
1060 #endif
1061 }
1062
1063 int dissected_image_decrypt_interactively(
1064 DissectedImage *m,
1065 const char *passphrase,
1066 const void *root_hash,
1067 size_t root_hash_size,
1068 DissectImageFlags flags,
1069 DecryptedImage **ret) {
1070
1071 _cleanup_strv_free_erase_ char **z = NULL;
1072 int n = 3, r;
1073
1074 if (passphrase)
1075 n--;
1076
1077 for (;;) {
1078 r = dissected_image_decrypt(m, passphrase, root_hash, root_hash_size, flags, ret);
1079 if (r >= 0)
1080 return r;
1081 if (r == -EKEYREJECTED)
1082 log_error_errno(r, "Incorrect passphrase, try again!");
1083 else if (r != -ENOKEY) {
1084 log_error_errno(r, "Failed to decrypt image: %m");
1085 return r;
1086 }
1087
1088 if (--n < 0) {
1089 log_error("Too many retries.");
1090 return -EKEYREJECTED;
1091 }
1092
1093 z = strv_free(z);
1094
1095 r = ask_password_auto("Please enter image passphrase!", NULL, "dissect", "dissect", USEC_INFINITY, 0, &z);
1096 if (r < 0)
1097 return log_error_errno(r, "Failed to query for passphrase: %m");
1098
1099 passphrase = z[0];
1100 }
1101 }
1102
1103 #if HAVE_LIBCRYPTSETUP
1104 static int deferred_remove(DecryptedPartition *p) {
1105
1106 struct dm_ioctl dm = {
1107 .version = {
1108 DM_VERSION_MAJOR,
1109 DM_VERSION_MINOR,
1110 DM_VERSION_PATCHLEVEL
1111 },
1112 .data_size = sizeof(dm),
1113 .flags = DM_DEFERRED_REMOVE,
1114 };
1115
1116 _cleanup_close_ int fd = -1;
1117
1118 assert(p);
1119
1120 /* Unfortunately, libcryptsetup doesn't provide a proper API for this, hence call the ioctl() directly. */
1121
1122 fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC);
1123 if (fd < 0)
1124 return -errno;
1125
1126 strncpy(dm.name, p->name, sizeof(dm.name));
1127
1128 if (ioctl(fd, DM_DEV_REMOVE, &dm))
1129 return -errno;
1130
1131 return 0;
1132 }
1133 #endif
1134
1135 int decrypted_image_relinquish(DecryptedImage *d) {
1136
1137 #if HAVE_LIBCRYPTSETUP
1138 size_t i;
1139 int r;
1140 #endif
1141
1142 assert(d);
1143
1144 /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a boolean so
1145 * that we don't clean it up ourselves either anymore */
1146
1147 #if HAVE_LIBCRYPTSETUP
1148 for (i = 0; i < d->n_decrypted; i++) {
1149 DecryptedPartition *p = d->decrypted + i;
1150
1151 if (p->relinquished)
1152 continue;
1153
1154 r = deferred_remove(p);
1155 if (r < 0)
1156 return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name);
1157
1158 p->relinquished = true;
1159 }
1160 #endif
1161
1162 return 0;
1163 }
1164
1165 int root_hash_load(const char *image, void **ret, size_t *ret_size) {
1166 _cleanup_free_ char *text = NULL;
1167 _cleanup_free_ void *k = NULL;
1168 size_t l;
1169 int r;
1170
1171 assert(image);
1172 assert(ret);
1173 assert(ret_size);
1174
1175 if (is_device_path(image)) {
1176 /* If we are asked to load the root hash for a device node, exit early */
1177 *ret = NULL;
1178 *ret_size = 0;
1179 return 0;
1180 }
1181
1182 r = getxattr_malloc(image, "user.verity.roothash", &text, true);
1183 if (r < 0) {
1184 char *fn, *e, *n;
1185
1186 if (!IN_SET(r, -ENODATA, -EOPNOTSUPP, -ENOENT))
1187 return r;
1188
1189 fn = newa(char, strlen(image) + STRLEN(".roothash") + 1);
1190 n = stpcpy(fn, image);
1191 e = endswith(fn, ".raw");
1192 if (e)
1193 n = e;
1194
1195 strcpy(n, ".roothash");
1196
1197 r = read_one_line_file(fn, &text);
1198 if (r == -ENOENT) {
1199 *ret = NULL;
1200 *ret_size = 0;
1201 return 0;
1202 }
1203 if (r < 0)
1204 return r;
1205 }
1206
1207 r = unhexmem(text, strlen(text), &k, &l);
1208 if (r < 0)
1209 return r;
1210 if (l < sizeof(sd_id128_t))
1211 return -EINVAL;
1212
1213 *ret = k;
1214 *ret_size = l;
1215
1216 k = NULL;
1217
1218 return 1;
1219 }
1220
1221 int dissected_image_acquire_metadata(DissectedImage *m) {
1222
1223 enum {
1224 META_HOSTNAME,
1225 META_MACHINE_ID,
1226 META_MACHINE_INFO,
1227 META_OS_RELEASE,
1228 _META_MAX,
1229 };
1230
1231 static const char *const paths[_META_MAX] = {
1232 [META_HOSTNAME] = "/etc/hostname\0",
1233 [META_MACHINE_ID] = "/etc/machine-id\0",
1234 [META_MACHINE_INFO] = "/etc/machine-info\0",
1235 [META_OS_RELEASE] = "/etc/os-release\0/usr/lib/os-release\0",
1236 };
1237
1238 _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
1239 _cleanup_(rmdir_and_freep) char *t = NULL;
1240 _cleanup_(sigkill_waitp) pid_t child = 0;
1241 sd_id128_t machine_id = SD_ID128_NULL;
1242 _cleanup_free_ char *hostname = NULL;
1243 unsigned n_meta_initialized = 0, k;
1244 int fds[2 * _META_MAX], r;
1245 siginfo_t si;
1246
1247 BLOCK_SIGNALS(SIGCHLD);
1248
1249 assert(m);
1250
1251 for (; n_meta_initialized < _META_MAX; n_meta_initialized ++)
1252 if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) {
1253 r = -errno;
1254 goto finish;
1255 }
1256
1257 r = mkdtemp_malloc("/tmp/dissect-XXXXXX", &t);
1258 if (r < 0)
1259 goto finish;
1260
1261 child = raw_clone(SIGCHLD|CLONE_NEWNS);
1262 if (child < 0) {
1263 r = -errno;
1264 goto finish;
1265 }
1266
1267 if (child == 0) {
1268
1269 (void) reset_all_signal_handlers();
1270 (void) reset_signal_mask();
1271 assert_se(prctl(PR_SET_PDEATHSIG, SIGTERM) == 0);
1272
1273 /* Make sure we never propagate to the host */
1274 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
1275 _exit(EXIT_FAILURE);
1276
1277 r = dissected_image_mount(m, t, UID_INVALID, DISSECT_IMAGE_READ_ONLY);
1278 if (r < 0)
1279 _exit(EXIT_FAILURE);
1280
1281 for (k = 0; k < _META_MAX; k++) {
1282 _cleanup_close_ int fd = -1;
1283 const char *p;
1284
1285 fds[2*k] = safe_close(fds[2*k]);
1286
1287 NULSTR_FOREACH(p, paths[k]) {
1288 _cleanup_free_ char *q = NULL;
1289
1290 r = chase_symlinks(p, t, CHASE_PREFIX_ROOT, &q);
1291 if (r < 0)
1292 continue;
1293
1294 fd = open(q, O_RDONLY|O_CLOEXEC|O_NOCTTY);
1295 if (fd >= 0)
1296 break;
1297 }
1298 if (fd < 0)
1299 continue;
1300
1301 r = copy_bytes(fd, fds[2*k+1], (uint64_t) -1, 0);
1302 if (r < 0)
1303 _exit(EXIT_FAILURE);
1304
1305 fds[2*k+1] = safe_close(fds[2*k+1]);
1306 }
1307
1308 _exit(EXIT_SUCCESS);
1309 }
1310
1311 for (k = 0; k < _META_MAX; k++) {
1312 _cleanup_fclose_ FILE *f = NULL;
1313
1314 fds[2*k+1] = safe_close(fds[2*k+1]);
1315
1316 f = fdopen(fds[2*k], "re");
1317 if (!f) {
1318 r = -errno;
1319 goto finish;
1320 }
1321
1322 fds[2*k] = -1;
1323
1324 switch (k) {
1325
1326 case META_HOSTNAME:
1327 r = read_etc_hostname_stream(f, &hostname);
1328 if (r < 0)
1329 log_debug_errno(r, "Failed to read /etc/hostname: %m");
1330
1331 break;
1332
1333 case META_MACHINE_ID: {
1334 _cleanup_free_ char *line = NULL;
1335
1336 r = read_line(f, LONG_LINE_MAX, &line);
1337 if (r < 0)
1338 log_debug_errno(r, "Failed to read /etc/machine-id: %m");
1339 else if (r == 33) {
1340 r = sd_id128_from_string(line, &machine_id);
1341 if (r < 0)
1342 log_debug_errno(r, "Image contains invalid /etc/machine-id: %s", line);
1343 } else if (r == 0)
1344 log_debug("/etc/machine-id file is empty.");
1345 else
1346 log_debug("/etc/machine-id has unexpected length %i.", r);
1347
1348 break;
1349 }
1350
1351 case META_MACHINE_INFO:
1352 r = load_env_file_pairs(f, "machine-info", NULL, &machine_info);
1353 if (r < 0)
1354 log_debug_errno(r, "Failed to read /etc/machine-info: %m");
1355
1356 break;
1357
1358 case META_OS_RELEASE:
1359 r = load_env_file_pairs(f, "os-release", NULL, &os_release);
1360 if (r < 0)
1361 log_debug_errno(r, "Failed to read OS release file: %m");
1362
1363 break;
1364 }
1365 }
1366
1367 r = wait_for_terminate(child, &si);
1368 if (r < 0)
1369 goto finish;
1370 child = 0;
1371
1372 if (si.si_code != CLD_EXITED || si.si_status != EXIT_SUCCESS) {
1373 r = -EPROTO;
1374 goto finish;
1375 }
1376
1377 free_and_replace(m->hostname, hostname);
1378 m->machine_id = machine_id;
1379 strv_free_and_replace(m->machine_info, machine_info);
1380 strv_free_and_replace(m->os_release, os_release);
1381
1382 finish:
1383 for (k = 0; k < n_meta_initialized; k++)
1384 safe_close_pair(fds + 2*k);
1385
1386 return r;
1387 }
1388
1389 static const char *const partition_designator_table[] = {
1390 [PARTITION_ROOT] = "root",
1391 [PARTITION_ROOT_SECONDARY] = "root-secondary",
1392 [PARTITION_HOME] = "home",
1393 [PARTITION_SRV] = "srv",
1394 [PARTITION_ESP] = "esp",
1395 [PARTITION_SWAP] = "swap",
1396 [PARTITION_ROOT_VERITY] = "root-verity",
1397 [PARTITION_ROOT_SECONDARY_VERITY] = "root-secondary-verity",
1398 };
1399
1400 DEFINE_STRING_TABLE_LOOKUP(partition_designator, int);