]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/dissect-image.c
Merge pull request #7881 from keszybz/pcre
[thirdparty/systemd.git] / src / shared / dissect-image.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2016 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <sys/mount.h>
22 #include <sys/prctl.h>
23 #include <sys/wait.h>
24
25 #include "sd-id128.h"
26
27 #include "architecture.h"
28 #include "ask-password-api.h"
29 #include "blkid-util.h"
30 #include "blockdev-util.h"
31 #include "copy.h"
32 #include "crypt-util.h"
33 #include "def.h"
34 #include "device-nodes.h"
35 #include "dissect-image.h"
36 #include "fd-util.h"
37 #include "fileio.h"
38 #include "fs-util.h"
39 #include "gpt.h"
40 #include "hexdecoct.h"
41 #include "hostname-util.h"
42 #include "id128-util.h"
43 #include "linux-3.13/dm-ioctl.h"
44 #include "missing.h"
45 #include "mount-util.h"
46 #include "path-util.h"
47 #include "process-util.h"
48 #include "raw-clone.h"
49 #include "signal-util.h"
50 #include "stat-util.h"
51 #include "stdio-util.h"
52 #include "string-table.h"
53 #include "string-util.h"
54 #include "strv.h"
55 #include "udev-util.h"
56 #include "user-util.h"
57 #include "xattr-util.h"
58
59 int probe_filesystem(const char *node, char **ret_fstype) {
60 /* Try to find device content type and return it in *ret_fstype. If nothing is found,
61 * 0/NULL will be returned. -EUCLEAN will be returned for ambigous results, and an
62 * different error otherwise. */
63
64 #if HAVE_BLKID
65 _cleanup_blkid_free_probe_ blkid_probe b = NULL;
66 const char *fstype;
67 int r;
68
69 errno = 0;
70 b = blkid_new_probe_from_filename(node);
71 if (!b)
72 return -errno ?: -ENOMEM;
73
74 blkid_probe_enable_superblocks(b, 1);
75 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
76
77 errno = 0;
78 r = blkid_do_safeprobe(b);
79 if (r == 1) {
80 log_debug("No type detected on partition %s", node);
81 goto not_found;
82 }
83 if (r == -2) {
84 log_debug("Results ambiguous for partition %s", node);
85 return -EUCLEAN;
86 }
87 if (r != 0)
88 return -errno ?: -EIO;
89
90 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
91
92 if (fstype) {
93 char *t;
94
95 t = strdup(fstype);
96 if (!t)
97 return -ENOMEM;
98
99 *ret_fstype = t;
100 return 1;
101 }
102
103 not_found:
104 *ret_fstype = NULL;
105 return 0;
106 #else
107 return -EOPNOTSUPP;
108 #endif
109 }
110
111 int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DissectedImage **ret) {
112
113 #if HAVE_BLKID
114 sd_id128_t root_uuid = SD_ID128_NULL, verity_uuid = SD_ID128_NULL;
115 _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL;
116 bool is_gpt, is_mbr, generic_rw, multiple_generic = false;
117 _cleanup_udev_device_unref_ struct udev_device *d = NULL;
118 _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
119 _cleanup_blkid_free_probe_ blkid_probe b = NULL;
120 _cleanup_udev_unref_ struct udev *udev = NULL;
121 _cleanup_free_ char *generic_node = NULL;
122 sd_id128_t generic_uuid = SD_ID128_NULL;
123 const char *pttype = NULL;
124 struct udev_list_entry *first, *item;
125 blkid_partlist pl;
126 int r, generic_nr;
127 struct stat st;
128 unsigned i;
129
130 assert(fd >= 0);
131 assert(ret);
132 assert(root_hash || root_hash_size == 0);
133
134 /* Probes a disk image, and returns information about what it found in *ret.
135 *
136 * Returns -ENOPKG if no suitable partition table or file system could be found.
137 * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found. */
138
139 if (root_hash) {
140 /* If a root hash is supplied, then we use the root partition that has a UUID that match the first
141 * 128bit of the root hash. And we use the verity partition that has a UUID that match the final
142 * 128bit. */
143
144 if (root_hash_size < sizeof(sd_id128_t))
145 return -EINVAL;
146
147 memcpy(&root_uuid, root_hash, sizeof(sd_id128_t));
148 memcpy(&verity_uuid, (const uint8_t*) root_hash + root_hash_size - sizeof(sd_id128_t), sizeof(sd_id128_t));
149
150 if (sd_id128_is_null(root_uuid))
151 return -EINVAL;
152 if (sd_id128_is_null(verity_uuid))
153 return -EINVAL;
154 }
155
156 if (fstat(fd, &st) < 0)
157 return -errno;
158
159 if (!S_ISBLK(st.st_mode))
160 return -ENOTBLK;
161
162 b = blkid_new_probe();
163 if (!b)
164 return -ENOMEM;
165
166 errno = 0;
167 r = blkid_probe_set_device(b, fd, 0, 0);
168 if (r != 0)
169 return -errno ?: -ENOMEM;
170
171 if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) {
172 /* Look for file system superblocks, unless we only shall look for GPT partition tables */
173 blkid_probe_enable_superblocks(b, 1);
174 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE);
175 }
176
177 blkid_probe_enable_partitions(b, 1);
178 blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
179
180 errno = 0;
181 r = blkid_do_safeprobe(b);
182 if (IN_SET(r, -2, 1)) {
183 log_debug("Failed to identify any partition table.");
184 return -ENOPKG;
185 }
186 if (r != 0)
187 return -errno ?: -EIO;
188
189 m = new0(DissectedImage, 1);
190 if (!m)
191 return -ENOMEM;
192
193 if (!(flags & DISSECT_IMAGE_GPT_ONLY) &&
194 (flags & DISSECT_IMAGE_REQUIRE_ROOT)) {
195 const char *usage = NULL;
196
197 (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL);
198 if (STRPTR_IN_SET(usage, "filesystem", "crypto")) {
199 _cleanup_free_ char *t = NULL, *n = NULL;
200 const char *fstype = NULL;
201
202 /* OK, we have found a file system, that's our root partition then. */
203 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
204
205 if (fstype) {
206 t = strdup(fstype);
207 if (!t)
208 return -ENOMEM;
209 }
210
211 if (asprintf(&n, "/dev/block/%u:%u", major(st.st_rdev), minor(st.st_rdev)) < 0)
212 return -ENOMEM;
213
214 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
215 .found = true,
216 .rw = true,
217 .partno = -1,
218 .architecture = _ARCHITECTURE_INVALID,
219 .fstype = t,
220 .node = n,
221 };
222
223 t = n = NULL;
224
225 m->encrypted = streq(fstype, "crypto_LUKS");
226
227 *ret = m;
228 m = NULL;
229
230 return 0;
231 }
232 }
233
234 (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
235 if (!pttype)
236 return -ENOPKG;
237
238 is_gpt = streq_ptr(pttype, "gpt");
239 is_mbr = streq_ptr(pttype, "dos");
240
241 if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr))
242 return -ENOPKG;
243
244 errno = 0;
245 pl = blkid_probe_get_partitions(b);
246 if (!pl)
247 return -errno ?: -ENOMEM;
248
249 udev = udev_new();
250 if (!udev)
251 return -errno;
252
253 d = udev_device_new_from_devnum(udev, 'b', st.st_rdev);
254 if (!d)
255 return -ENOMEM;
256
257 for (i = 0;; i++) {
258 int n, z;
259
260 if (i >= 10) {
261 log_debug("Kernel partitions never appeared.");
262 return -ENXIO;
263 }
264
265 e = udev_enumerate_new(udev);
266 if (!e)
267 return -errno;
268
269 r = udev_enumerate_add_match_parent(e, d);
270 if (r < 0)
271 return r;
272
273 r = udev_enumerate_scan_devices(e);
274 if (r < 0)
275 return r;
276
277 /* Count the partitions enumerated by the kernel */
278 n = 0;
279 first = udev_enumerate_get_list_entry(e);
280 udev_list_entry_foreach(item, first)
281 n++;
282
283 /* Count the partitions enumerated by blkid */
284 z = blkid_partlist_numof_partitions(pl);
285 if (n == z + 1)
286 break;
287 if (n > z + 1) {
288 log_debug("blkid and kernel partition list do not match.");
289 return -EIO;
290 }
291 if (n < z + 1) {
292 unsigned j = 0;
293
294 /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running
295 * or it got EBUSY because udev already opened the device. Let's reprobe the device, which is a
296 * synchronous call that waits until probing is complete. */
297
298 for (;;) {
299 if (j++ > 20)
300 return -EBUSY;
301
302 if (ioctl(fd, BLKRRPART, 0) < 0) {
303 r = -errno;
304
305 if (r == -EINVAL) {
306 struct loop_info64 info;
307
308 /* If we are running on a loop device that has partition scanning off,
309 * return an explicit recognizable error about this, so that callers
310 * can generate a proper message explaining the situation. */
311
312 if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0 && (info.lo_flags & LO_FLAGS_PARTSCAN) == 0) {
313 log_debug("Device is loop device and partition scanning is off!");
314 return -EPROTONOSUPPORT;
315 }
316 }
317 if (r != -EBUSY)
318 return r;
319 } else
320 break;
321
322 /* If something else has the device open, such as an udev rule, the ioctl will return
323 * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a
324 * bit, and try again.
325 *
326 * This is really something they should fix in the kernel! */
327
328 (void) usleep(50 * USEC_PER_MSEC);
329 }
330 }
331
332 e = udev_enumerate_unref(e);
333 }
334
335 first = udev_enumerate_get_list_entry(e);
336 udev_list_entry_foreach(item, first) {
337 _cleanup_udev_device_unref_ struct udev_device *q;
338 unsigned long long pflags;
339 blkid_partition pp;
340 const char *node, *sysname;
341 dev_t qn;
342 int nr;
343
344 q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
345 if (!q)
346 return -errno;
347
348 qn = udev_device_get_devnum(q);
349 if (major(qn) == 0)
350 continue;
351
352 if (st.st_rdev == qn)
353 continue;
354
355 /* Filter out weird MMC RPMB partitions, which cannot reasonably be read, see
356 * https://github.com/systemd/systemd/issues/5806 */
357 sysname = udev_device_get_sysname(q);
358 if (sysname && startswith(sysname, "mmcblk") &&
359 (endswith(sysname, "rpmb") || endswith(sysname, "boot0" ) || endswith(sysname, "boot1")))
360 continue;
361
362 node = udev_device_get_devnode(q);
363 if (!node)
364 continue;
365
366 pp = blkid_partlist_devno_to_partition(pl, qn);
367 if (!pp)
368 continue;
369
370 pflags = blkid_partition_get_flags(pp);
371
372 nr = blkid_partition_get_partno(pp);
373 if (nr < 0)
374 continue;
375
376 if (is_gpt) {
377 int designator = _PARTITION_DESIGNATOR_INVALID, architecture = _ARCHITECTURE_INVALID;
378 const char *stype, *sid, *fstype = NULL;
379 sd_id128_t type_id, id;
380 bool rw = true;
381
382 sid = blkid_partition_get_uuid(pp);
383 if (!sid)
384 continue;
385 if (sd_id128_from_string(sid, &id) < 0)
386 continue;
387
388 stype = blkid_partition_get_type_string(pp);
389 if (!stype)
390 continue;
391 if (sd_id128_from_string(stype, &type_id) < 0)
392 continue;
393
394 if (sd_id128_equal(type_id, GPT_HOME)) {
395
396 if (pflags & GPT_FLAG_NO_AUTO)
397 continue;
398
399 designator = PARTITION_HOME;
400 rw = !(pflags & GPT_FLAG_READ_ONLY);
401 } else if (sd_id128_equal(type_id, GPT_SRV)) {
402
403 if (pflags & GPT_FLAG_NO_AUTO)
404 continue;
405
406 designator = PARTITION_SRV;
407 rw = !(pflags & GPT_FLAG_READ_ONLY);
408 } else if (sd_id128_equal(type_id, GPT_ESP)) {
409
410 /* Note that we don't check the GPT_FLAG_NO_AUTO flag for the ESP, as it is not defined
411 * there. We instead check the GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as recommended by the
412 * UEFI spec (See "12.3.3 Number and Location of System Partitions"). */
413
414 if (pflags & GPT_FLAG_NO_BLOCK_IO_PROTOCOL)
415 continue;
416
417 designator = PARTITION_ESP;
418 fstype = "vfat";
419 }
420 #ifdef GPT_ROOT_NATIVE
421 else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
422
423 if (pflags & GPT_FLAG_NO_AUTO)
424 continue;
425
426 /* If a root ID is specified, ignore everything but the root id */
427 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
428 continue;
429
430 designator = PARTITION_ROOT;
431 architecture = native_architecture();
432 rw = !(pflags & GPT_FLAG_READ_ONLY);
433 } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) {
434
435 if (pflags & GPT_FLAG_NO_AUTO)
436 continue;
437
438 m->can_verity = true;
439
440 /* Ignore verity unless a root hash is specified */
441 if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
442 continue;
443
444 designator = PARTITION_ROOT_VERITY;
445 fstype = "DM_verity_hash";
446 architecture = native_architecture();
447 rw = false;
448 }
449 #endif
450 #ifdef GPT_ROOT_SECONDARY
451 else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
452
453 if (pflags & GPT_FLAG_NO_AUTO)
454 continue;
455
456 /* If a root ID is specified, ignore everything but the root id */
457 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
458 continue;
459
460 designator = PARTITION_ROOT_SECONDARY;
461 architecture = SECONDARY_ARCHITECTURE;
462 rw = !(pflags & GPT_FLAG_READ_ONLY);
463 } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) {
464
465 if (pflags & GPT_FLAG_NO_AUTO)
466 continue;
467
468 m->can_verity = true;
469
470 /* Ignore verity unless root has is specified */
471 if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
472 continue;
473
474 designator = PARTITION_ROOT_SECONDARY_VERITY;
475 fstype = "DM_verity_hash";
476 architecture = SECONDARY_ARCHITECTURE;
477 rw = false;
478 }
479 #endif
480 else if (sd_id128_equal(type_id, GPT_SWAP)) {
481
482 if (pflags & GPT_FLAG_NO_AUTO)
483 continue;
484
485 designator = PARTITION_SWAP;
486 fstype = "swap";
487 } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) {
488
489 if (pflags & GPT_FLAG_NO_AUTO)
490 continue;
491
492 if (generic_node)
493 multiple_generic = true;
494 else {
495 generic_nr = nr;
496 generic_rw = !(pflags & GPT_FLAG_READ_ONLY);
497 generic_uuid = id;
498 generic_node = strdup(node);
499 if (!generic_node)
500 return -ENOMEM;
501 }
502 }
503
504 if (designator != _PARTITION_DESIGNATOR_INVALID) {
505 _cleanup_free_ char *t = NULL, *n = NULL;
506
507 /* First one wins */
508 if (m->partitions[designator].found)
509 continue;
510
511 if (fstype) {
512 t = strdup(fstype);
513 if (!t)
514 return -ENOMEM;
515 }
516
517 n = strdup(node);
518 if (!n)
519 return -ENOMEM;
520
521 m->partitions[designator] = (DissectedPartition) {
522 .found = true,
523 .partno = nr,
524 .rw = rw,
525 .architecture = architecture,
526 .node = n,
527 .fstype = t,
528 .uuid = id,
529 };
530
531 n = t = NULL;
532 }
533
534 } else if (is_mbr) {
535
536 if (pflags != 0x80) /* Bootable flag */
537 continue;
538
539 if (blkid_partition_get_type(pp) != 0x83) /* Linux partition */
540 continue;
541
542 if (generic_node)
543 multiple_generic = true;
544 else {
545 generic_nr = nr;
546 generic_rw = true;
547 generic_node = strdup(node);
548 if (!generic_node)
549 return -ENOMEM;
550 }
551 }
552 }
553
554 if (!m->partitions[PARTITION_ROOT].found) {
555 /* No root partition found? Then let's see if ther's one for the secondary architecture. And if not
556 * either, then check if there's a single generic one, and use that. */
557
558 if (m->partitions[PARTITION_ROOT_VERITY].found)
559 return -EADDRNOTAVAIL;
560
561 if (m->partitions[PARTITION_ROOT_SECONDARY].found) {
562 m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY];
563 zero(m->partitions[PARTITION_ROOT_SECONDARY]);
564
565 m->partitions[PARTITION_ROOT_VERITY] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY];
566 zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY]);
567
568 } else if (flags & DISSECT_IMAGE_REQUIRE_ROOT) {
569
570 /* If the root has was set, then we won't fallback to a generic node, because the root hash
571 * decides */
572 if (root_hash)
573 return -EADDRNOTAVAIL;
574
575 /* If we didn't find a generic node, then we can't fix this up either */
576 if (!generic_node)
577 return -ENXIO;
578
579 /* If we didn't find a properly marked root partition, but we did find a single suitable
580 * generic Linux partition, then use this as root partition, if the caller asked for it. */
581 if (multiple_generic)
582 return -ENOTUNIQ;
583
584 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
585 .found = true,
586 .rw = generic_rw,
587 .partno = generic_nr,
588 .architecture = _ARCHITECTURE_INVALID,
589 .node = generic_node,
590 .uuid = generic_uuid,
591 };
592
593 generic_node = NULL;
594 }
595 }
596
597 if (root_hash) {
598 if (!m->partitions[PARTITION_ROOT_VERITY].found || !m->partitions[PARTITION_ROOT].found)
599 return -EADDRNOTAVAIL;
600
601 /* If we found the primary root with the hash, then we definitely want to suppress any secondary root
602 * (which would be weird, after all the root hash should only be assigned to one pair of
603 * partitions... */
604 m->partitions[PARTITION_ROOT_SECONDARY].found = false;
605 m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found = false;
606
607 /* If we found a verity setup, then the root partition is necessarily read-only. */
608 m->partitions[PARTITION_ROOT].rw = false;
609
610 m->verity = true;
611 }
612
613 blkid_free_probe(b);
614 b = NULL;
615
616 /* Fill in file system types if we don't know them yet. */
617 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
618 DissectedPartition *p = m->partitions + i;
619
620 if (!p->found)
621 continue;
622
623 if (!p->fstype && p->node) {
624 r = probe_filesystem(p->node, &p->fstype);
625 if (r < 0 && r != -EUCLEAN)
626 return r;
627 }
628
629 if (streq_ptr(p->fstype, "crypto_LUKS"))
630 m->encrypted = true;
631
632 if (p->fstype && fstype_is_ro(p->fstype))
633 p->rw = false;
634 }
635
636 *ret = m;
637 m = NULL;
638
639 return 0;
640 #else
641 return -EOPNOTSUPP;
642 #endif
643 }
644
645 DissectedImage* dissected_image_unref(DissectedImage *m) {
646 unsigned i;
647
648 if (!m)
649 return NULL;
650
651 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
652 free(m->partitions[i].fstype);
653 free(m->partitions[i].node);
654 free(m->partitions[i].decrypted_fstype);
655 free(m->partitions[i].decrypted_node);
656 }
657
658 free(m->hostname);
659 strv_free(m->machine_info);
660 strv_free(m->os_release);
661
662 return mfree(m);
663 }
664
665 static int is_loop_device(const char *path) {
666 char s[SYS_BLOCK_PATH_MAX("/../loop/")];
667 struct stat st;
668
669 assert(path);
670
671 if (stat(path, &st) < 0)
672 return -errno;
673
674 if (!S_ISBLK(st.st_mode))
675 return -ENOTBLK;
676
677 xsprintf_sys_block_path(s, "/loop/", st.st_dev);
678 if (access(s, F_OK) < 0) {
679 if (errno != ENOENT)
680 return -errno;
681
682 /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */
683 xsprintf_sys_block_path(s, "/../loop/", st.st_dev);
684 if (access(s, F_OK) < 0)
685 return errno == ENOENT ? false : -errno;
686 }
687
688 return true;
689 }
690
691 static int mount_partition(
692 DissectedPartition *m,
693 const char *where,
694 const char *directory,
695 uid_t uid_shift,
696 DissectImageFlags flags) {
697
698 _cleanup_free_ char *chased = NULL, *options = NULL;
699 const char *p, *node, *fstype;
700 bool rw;
701 int r;
702
703 assert(m);
704 assert(where);
705
706 node = m->decrypted_node ?: m->node;
707 fstype = m->decrypted_fstype ?: m->fstype;
708
709 if (!m->found || !node || !fstype)
710 return 0;
711
712 /* Stacked encryption? Yuck */
713 if (streq_ptr(fstype, "crypto_LUKS"))
714 return -ELOOP;
715
716 rw = m->rw && !(flags & DISSECT_IMAGE_READ_ONLY);
717
718 if (directory) {
719 r = chase_symlinks(directory, where, CHASE_PREFIX_ROOT, &chased);
720 if (r < 0)
721 return r;
722
723 p = chased;
724 } else
725 p = where;
726
727 /* If requested, turn on discard support. */
728 if (fstype_can_discard(fstype) &&
729 ((flags & DISSECT_IMAGE_DISCARD) ||
730 ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node)))) {
731 options = strdup("discard");
732 if (!options)
733 return -ENOMEM;
734 }
735
736 if (uid_is_valid(uid_shift) && uid_shift != 0 && fstype_can_uid_gid(fstype)) {
737 _cleanup_free_ char *uid_option = NULL;
738
739 if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
740 return -ENOMEM;
741
742 if (!strextend_with_separator(&options, ",", uid_option, NULL))
743 return -ENOMEM;
744 }
745
746 return mount_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
747 }
748
749 int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) {
750 int r;
751
752 assert(m);
753 assert(where);
754
755 if (!m->partitions[PARTITION_ROOT].found)
756 return -ENXIO;
757
758 if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) {
759 r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, flags);
760 if (r < 0)
761 return r;
762 }
763
764 if ((flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY))
765 return 0;
766
767 r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, flags);
768 if (r < 0)
769 return r;
770
771 r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, flags);
772 if (r < 0)
773 return r;
774
775 if (m->partitions[PARTITION_ESP].found) {
776 const char *mp;
777
778 /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */
779
780 FOREACH_STRING(mp, "/efi", "/boot") {
781 _cleanup_free_ char *p = NULL;
782
783 r = chase_symlinks(mp, where, CHASE_PREFIX_ROOT, &p);
784 if (r < 0)
785 continue;
786
787 r = dir_is_empty(p);
788 if (r > 0) {
789 r = mount_partition(m->partitions + PARTITION_ESP, where, mp, uid_shift, flags);
790 if (r < 0)
791 return r;
792 }
793 }
794 }
795
796 return 0;
797 }
798
799 #if HAVE_LIBCRYPTSETUP
800 typedef struct DecryptedPartition {
801 struct crypt_device *device;
802 char *name;
803 bool relinquished;
804 } DecryptedPartition;
805
806 struct DecryptedImage {
807 DecryptedPartition *decrypted;
808 size_t n_decrypted;
809 size_t n_allocated;
810 };
811 #endif
812
813 DecryptedImage* decrypted_image_unref(DecryptedImage* d) {
814 #if HAVE_LIBCRYPTSETUP
815 size_t i;
816 int r;
817
818 if (!d)
819 return NULL;
820
821 for (i = 0; i < d->n_decrypted; i++) {
822 DecryptedPartition *p = d->decrypted + i;
823
824 if (p->device && p->name && !p->relinquished) {
825 r = crypt_deactivate(p->device, p->name);
826 if (r < 0)
827 log_debug_errno(r, "Failed to deactivate encrypted partition %s", p->name);
828 }
829
830 if (p->device)
831 crypt_free(p->device);
832 free(p->name);
833 }
834
835 free(d);
836 #endif
837 return NULL;
838 }
839
840 #if HAVE_LIBCRYPTSETUP
841
842 static int make_dm_name_and_node(const void *original_node, const char *suffix, char **ret_name, char **ret_node) {
843 _cleanup_free_ char *name = NULL, *node = NULL;
844 const char *base;
845
846 assert(original_node);
847 assert(suffix);
848 assert(ret_name);
849 assert(ret_node);
850
851 base = strrchr(original_node, '/');
852 if (!base)
853 return -EINVAL;
854 base++;
855 if (isempty(base))
856 return -EINVAL;
857
858 name = strjoin(base, suffix);
859 if (!name)
860 return -ENOMEM;
861 if (!filename_is_valid(name))
862 return -EINVAL;
863
864 node = strjoin(crypt_get_dir(), "/", name);
865 if (!node)
866 return -ENOMEM;
867
868 *ret_name = name;
869 *ret_node = node;
870
871 name = node = NULL;
872 return 0;
873 }
874
875 static int decrypt_partition(
876 DissectedPartition *m,
877 const char *passphrase,
878 DissectImageFlags flags,
879 DecryptedImage *d) {
880
881 _cleanup_free_ char *node = NULL, *name = NULL;
882 _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
883 int r;
884
885 assert(m);
886 assert(d);
887
888 if (!m->found || !m->node || !m->fstype)
889 return 0;
890
891 if (!streq(m->fstype, "crypto_LUKS"))
892 return 0;
893
894 if (!passphrase)
895 return -ENOKEY;
896
897 r = make_dm_name_and_node(m->node, "-decrypted", &name, &node);
898 if (r < 0)
899 return r;
900
901 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
902 return -ENOMEM;
903
904 r = crypt_init(&cd, m->node);
905 if (r < 0)
906 return log_debug_errno(r, "Failed to initialize dm-crypt: %m");
907
908 r = crypt_load(cd, CRYPT_LUKS, NULL);
909 if (r < 0)
910 return log_debug_errno(r, "Failed to load LUKS metadata: %m");
911
912 r = crypt_activate_by_passphrase(cd, name, CRYPT_ANY_SLOT, passphrase, strlen(passphrase),
913 ((flags & DISSECT_IMAGE_READ_ONLY) ? CRYPT_ACTIVATE_READONLY : 0) |
914 ((flags & DISSECT_IMAGE_DISCARD_ON_CRYPTO) ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0));
915 if (r < 0) {
916 log_debug_errno(r, "Failed to activate LUKS device: %m");
917 return r == -EPERM ? -EKEYREJECTED : r;
918 }
919
920 d->decrypted[d->n_decrypted].name = name;
921 name = NULL;
922
923 d->decrypted[d->n_decrypted].device = cd;
924 cd = NULL;
925 d->n_decrypted++;
926
927 m->decrypted_node = node;
928 node = NULL;
929
930 return 0;
931 }
932
933 static int verity_partition(
934 DissectedPartition *m,
935 DissectedPartition *v,
936 const void *root_hash,
937 size_t root_hash_size,
938 DissectImageFlags flags,
939 DecryptedImage *d) {
940
941 _cleanup_free_ char *node = NULL, *name = NULL;
942 _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
943 int r;
944
945 assert(m);
946 assert(v);
947
948 if (!root_hash)
949 return 0;
950
951 if (!m->found || !m->node || !m->fstype)
952 return 0;
953 if (!v->found || !v->node || !v->fstype)
954 return 0;
955
956 if (!streq(v->fstype, "DM_verity_hash"))
957 return 0;
958
959 r = make_dm_name_and_node(m->node, "-verity", &name, &node);
960 if (r < 0)
961 return r;
962
963 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
964 return -ENOMEM;
965
966 r = crypt_init(&cd, v->node);
967 if (r < 0)
968 return r;
969
970 r = crypt_load(cd, CRYPT_VERITY, NULL);
971 if (r < 0)
972 return r;
973
974 r = crypt_set_data_device(cd, m->node);
975 if (r < 0)
976 return r;
977
978 r = crypt_activate_by_volume_key(cd, name, root_hash, root_hash_size, CRYPT_ACTIVATE_READONLY);
979 if (r < 0)
980 return r;
981
982 d->decrypted[d->n_decrypted].name = name;
983 name = NULL;
984
985 d->decrypted[d->n_decrypted].device = cd;
986 cd = NULL;
987 d->n_decrypted++;
988
989 m->decrypted_node = node;
990 node = NULL;
991
992 return 0;
993 }
994 #endif
995
996 int dissected_image_decrypt(
997 DissectedImage *m,
998 const char *passphrase,
999 const void *root_hash,
1000 size_t root_hash_size,
1001 DissectImageFlags flags,
1002 DecryptedImage **ret) {
1003
1004 #if HAVE_LIBCRYPTSETUP
1005 _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL;
1006 unsigned i;
1007 int r;
1008 #endif
1009
1010 assert(m);
1011 assert(root_hash || root_hash_size == 0);
1012
1013 /* Returns:
1014 *
1015 * = 0 → There was nothing to decrypt
1016 * > 0 → Decrypted successfully
1017 * -ENOKEY → There's something to decrypt but no key was supplied
1018 * -EKEYREJECTED → Passed key was not correct
1019 */
1020
1021 if (root_hash && root_hash_size < sizeof(sd_id128_t))
1022 return -EINVAL;
1023
1024 if (!m->encrypted && !m->verity) {
1025 *ret = NULL;
1026 return 0;
1027 }
1028
1029 #if HAVE_LIBCRYPTSETUP
1030 d = new0(DecryptedImage, 1);
1031 if (!d)
1032 return -ENOMEM;
1033
1034 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
1035 DissectedPartition *p = m->partitions + i;
1036 int k;
1037
1038 if (!p->found)
1039 continue;
1040
1041 r = decrypt_partition(p, passphrase, flags, d);
1042 if (r < 0)
1043 return r;
1044
1045 k = PARTITION_VERITY_OF(i);
1046 if (k >= 0) {
1047 r = verity_partition(p, m->partitions + k, root_hash, root_hash_size, flags, d);
1048 if (r < 0)
1049 return r;
1050 }
1051
1052 if (!p->decrypted_fstype && p->decrypted_node) {
1053 r = probe_filesystem(p->decrypted_node, &p->decrypted_fstype);
1054 if (r < 0 && r != -EUCLEAN)
1055 return r;
1056 }
1057 }
1058
1059 *ret = d;
1060 d = NULL;
1061
1062 return 1;
1063 #else
1064 return -EOPNOTSUPP;
1065 #endif
1066 }
1067
1068 int dissected_image_decrypt_interactively(
1069 DissectedImage *m,
1070 const char *passphrase,
1071 const void *root_hash,
1072 size_t root_hash_size,
1073 DissectImageFlags flags,
1074 DecryptedImage **ret) {
1075
1076 _cleanup_strv_free_erase_ char **z = NULL;
1077 int n = 3, r;
1078
1079 if (passphrase)
1080 n--;
1081
1082 for (;;) {
1083 r = dissected_image_decrypt(m, passphrase, root_hash, root_hash_size, flags, ret);
1084 if (r >= 0)
1085 return r;
1086 if (r == -EKEYREJECTED)
1087 log_error_errno(r, "Incorrect passphrase, try again!");
1088 else if (r != -ENOKEY) {
1089 log_error_errno(r, "Failed to decrypt image: %m");
1090 return r;
1091 }
1092
1093 if (--n < 0) {
1094 log_error("Too many retries.");
1095 return -EKEYREJECTED;
1096 }
1097
1098 z = strv_free(z);
1099
1100 r = ask_password_auto("Please enter image passphrase!", NULL, "dissect", "dissect", USEC_INFINITY, 0, &z);
1101 if (r < 0)
1102 return log_error_errno(r, "Failed to query for passphrase: %m");
1103
1104 passphrase = z[0];
1105 }
1106 }
1107
1108 #if HAVE_LIBCRYPTSETUP
1109 static int deferred_remove(DecryptedPartition *p) {
1110
1111 struct dm_ioctl dm = {
1112 .version = {
1113 DM_VERSION_MAJOR,
1114 DM_VERSION_MINOR,
1115 DM_VERSION_PATCHLEVEL
1116 },
1117 .data_size = sizeof(dm),
1118 .flags = DM_DEFERRED_REMOVE,
1119 };
1120
1121 _cleanup_close_ int fd = -1;
1122
1123 assert(p);
1124
1125 /* Unfortunately, libcryptsetup doesn't provide a proper API for this, hence call the ioctl() directly. */
1126
1127 fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC);
1128 if (fd < 0)
1129 return -errno;
1130
1131 strncpy(dm.name, p->name, sizeof(dm.name));
1132
1133 if (ioctl(fd, DM_DEV_REMOVE, &dm))
1134 return -errno;
1135
1136 return 0;
1137 }
1138 #endif
1139
1140 int decrypted_image_relinquish(DecryptedImage *d) {
1141
1142 #if HAVE_LIBCRYPTSETUP
1143 size_t i;
1144 int r;
1145 #endif
1146
1147 assert(d);
1148
1149 /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a boolean so
1150 * that we don't clean it up ourselves either anymore */
1151
1152 #if HAVE_LIBCRYPTSETUP
1153 for (i = 0; i < d->n_decrypted; i++) {
1154 DecryptedPartition *p = d->decrypted + i;
1155
1156 if (p->relinquished)
1157 continue;
1158
1159 r = deferred_remove(p);
1160 if (r < 0)
1161 return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name);
1162
1163 p->relinquished = true;
1164 }
1165 #endif
1166
1167 return 0;
1168 }
1169
1170 int root_hash_load(const char *image, void **ret, size_t *ret_size) {
1171 _cleanup_free_ char *text = NULL;
1172 _cleanup_free_ void *k = NULL;
1173 size_t l;
1174 int r;
1175
1176 assert(image);
1177 assert(ret);
1178 assert(ret_size);
1179
1180 if (is_device_path(image)) {
1181 /* If we are asked to load the root hash for a device node, exit early */
1182 *ret = NULL;
1183 *ret_size = 0;
1184 return 0;
1185 }
1186
1187 r = getxattr_malloc(image, "user.verity.roothash", &text, true);
1188 if (r < 0) {
1189 char *fn, *e, *n;
1190
1191 if (!IN_SET(r, -ENODATA, -EOPNOTSUPP, -ENOENT))
1192 return r;
1193
1194 fn = newa(char, strlen(image) + STRLEN(".roothash") + 1);
1195 n = stpcpy(fn, image);
1196 e = endswith(fn, ".raw");
1197 if (e)
1198 n = e;
1199
1200 strcpy(n, ".roothash");
1201
1202 r = read_one_line_file(fn, &text);
1203 if (r == -ENOENT) {
1204 *ret = NULL;
1205 *ret_size = 0;
1206 return 0;
1207 }
1208 if (r < 0)
1209 return r;
1210 }
1211
1212 r = unhexmem(text, strlen(text), &k, &l);
1213 if (r < 0)
1214 return r;
1215 if (l < sizeof(sd_id128_t))
1216 return -EINVAL;
1217
1218 *ret = k;
1219 *ret_size = l;
1220
1221 k = NULL;
1222
1223 return 1;
1224 }
1225
1226 int dissected_image_acquire_metadata(DissectedImage *m) {
1227
1228 enum {
1229 META_HOSTNAME,
1230 META_MACHINE_ID,
1231 META_MACHINE_INFO,
1232 META_OS_RELEASE,
1233 _META_MAX,
1234 };
1235
1236 static const char *const paths[_META_MAX] = {
1237 [META_HOSTNAME] = "/etc/hostname\0",
1238 [META_MACHINE_ID] = "/etc/machine-id\0",
1239 [META_MACHINE_INFO] = "/etc/machine-info\0",
1240 [META_OS_RELEASE] = "/etc/os-release\0/usr/lib/os-release\0",
1241 };
1242
1243 _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
1244 _cleanup_(rmdir_and_freep) char *t = NULL;
1245 _cleanup_(sigkill_waitp) pid_t child = 0;
1246 sd_id128_t machine_id = SD_ID128_NULL;
1247 _cleanup_free_ char *hostname = NULL;
1248 unsigned n_meta_initialized = 0, k;
1249 int fds[2 * _META_MAX], r;
1250
1251 BLOCK_SIGNALS(SIGCHLD);
1252
1253 assert(m);
1254
1255 for (; n_meta_initialized < _META_MAX; n_meta_initialized ++)
1256 if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) {
1257 r = -errno;
1258 goto finish;
1259 }
1260
1261 r = mkdtemp_malloc("/tmp/dissect-XXXXXX", &t);
1262 if (r < 0)
1263 goto finish;
1264
1265 r = safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_NEW_MOUNTNS, &child);
1266 if (r < 0)
1267 goto finish;
1268 if (r == 0) {
1269 /* Make sure we never propagate to the host */
1270 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
1271 _exit(EXIT_FAILURE);
1272
1273 r = dissected_image_mount(m, t, UID_INVALID, DISSECT_IMAGE_READ_ONLY);
1274 if (r < 0)
1275 _exit(EXIT_FAILURE);
1276
1277 for (k = 0; k < _META_MAX; k++) {
1278 _cleanup_close_ int fd = -1;
1279 const char *p;
1280
1281 fds[2*k] = safe_close(fds[2*k]);
1282
1283 NULSTR_FOREACH(p, paths[k]) {
1284 _cleanup_free_ char *q = NULL;
1285
1286 r = chase_symlinks(p, t, CHASE_PREFIX_ROOT, &q);
1287 if (r < 0)
1288 continue;
1289
1290 fd = open(q, O_RDONLY|O_CLOEXEC|O_NOCTTY);
1291 if (fd >= 0)
1292 break;
1293 }
1294 if (fd < 0)
1295 continue;
1296
1297 r = copy_bytes(fd, fds[2*k+1], (uint64_t) -1, 0);
1298 if (r < 0)
1299 _exit(EXIT_FAILURE);
1300
1301 fds[2*k+1] = safe_close(fds[2*k+1]);
1302 }
1303
1304 _exit(EXIT_SUCCESS);
1305 }
1306
1307 for (k = 0; k < _META_MAX; k++) {
1308 _cleanup_fclose_ FILE *f = NULL;
1309
1310 fds[2*k+1] = safe_close(fds[2*k+1]);
1311
1312 f = fdopen(fds[2*k], "re");
1313 if (!f) {
1314 r = -errno;
1315 goto finish;
1316 }
1317
1318 fds[2*k] = -1;
1319
1320 switch (k) {
1321
1322 case META_HOSTNAME:
1323 r = read_etc_hostname_stream(f, &hostname);
1324 if (r < 0)
1325 log_debug_errno(r, "Failed to read /etc/hostname: %m");
1326
1327 break;
1328
1329 case META_MACHINE_ID: {
1330 _cleanup_free_ char *line = NULL;
1331
1332 r = read_line(f, LONG_LINE_MAX, &line);
1333 if (r < 0)
1334 log_debug_errno(r, "Failed to read /etc/machine-id: %m");
1335 else if (r == 33) {
1336 r = sd_id128_from_string(line, &machine_id);
1337 if (r < 0)
1338 log_debug_errno(r, "Image contains invalid /etc/machine-id: %s", line);
1339 } else if (r == 0)
1340 log_debug("/etc/machine-id file is empty.");
1341 else
1342 log_debug("/etc/machine-id has unexpected length %i.", r);
1343
1344 break;
1345 }
1346
1347 case META_MACHINE_INFO:
1348 r = load_env_file_pairs(f, "machine-info", NULL, &machine_info);
1349 if (r < 0)
1350 log_debug_errno(r, "Failed to read /etc/machine-info: %m");
1351
1352 break;
1353
1354 case META_OS_RELEASE:
1355 r = load_env_file_pairs(f, "os-release", NULL, &os_release);
1356 if (r < 0)
1357 log_debug_errno(r, "Failed to read OS release file: %m");
1358
1359 break;
1360 }
1361 }
1362
1363 r = wait_for_terminate_and_check("(sd-dissect)", child, 0);
1364 child = 0;
1365 if (r < 0)
1366 goto finish;
1367 if (r != EXIT_SUCCESS)
1368 return -EPROTO;
1369
1370 free_and_replace(m->hostname, hostname);
1371 m->machine_id = machine_id;
1372 strv_free_and_replace(m->machine_info, machine_info);
1373 strv_free_and_replace(m->os_release, os_release);
1374
1375 finish:
1376 for (k = 0; k < n_meta_initialized; k++)
1377 safe_close_pair(fds + 2*k);
1378
1379 return r;
1380 }
1381
1382 static const char *const partition_designator_table[] = {
1383 [PARTITION_ROOT] = "root",
1384 [PARTITION_ROOT_SECONDARY] = "root-secondary",
1385 [PARTITION_HOME] = "home",
1386 [PARTITION_SRV] = "srv",
1387 [PARTITION_ESP] = "esp",
1388 [PARTITION_SWAP] = "swap",
1389 [PARTITION_ROOT_VERITY] = "root-verity",
1390 [PARTITION_ROOT_SECONDARY_VERITY] = "root-secondary-verity",
1391 };
1392
1393 DEFINE_STRING_TABLE_LOOKUP(partition_designator, int);