]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/dissect-image.c
log: minimize includes in log.h
[thirdparty/systemd.git] / src / shared / dissect-image.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2016 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <sys/mount.h>
22 #include <sys/prctl.h>
23 #include <sys/wait.h>
24
25 #include "sd-id128.h"
26
27 #include "architecture.h"
28 #include "ask-password-api.h"
29 #include "blkid-util.h"
30 #include "blockdev-util.h"
31 #include "copy.h"
32 #include "crypt-util.h"
33 #include "def.h"
34 #include "device-nodes.h"
35 #include "dissect-image.h"
36 #include "fd-util.h"
37 #include "fileio.h"
38 #include "fs-util.h"
39 #include "gpt.h"
40 #include "hexdecoct.h"
41 #include "hostname-util.h"
42 #include "id128-util.h"
43 #include "linux-3.13/dm-ioctl.h"
44 #include "missing.h"
45 #include "mount-util.h"
46 #include "path-util.h"
47 #include "process-util.h"
48 #include "raw-clone.h"
49 #include "signal-util.h"
50 #include "stat-util.h"
51 #include "stdio-util.h"
52 #include "string-table.h"
53 #include "string-util.h"
54 #include "strv.h"
55 #include "udev-util.h"
56 #include "user-util.h"
57 #include "xattr-util.h"
58
59 int probe_filesystem(const char *node, char **ret_fstype) {
60 /* Try to find device content type and return it in *ret_fstype. If nothing is found,
61 * 0/NULL will be returned. -EUCLEAN will be returned for ambigous results, and an
62 * different error otherwise. */
63
64 #if HAVE_BLKID
65 _cleanup_blkid_free_probe_ blkid_probe b = NULL;
66 const char *fstype;
67 int r;
68
69 errno = 0;
70 b = blkid_new_probe_from_filename(node);
71 if (!b)
72 return -errno ?: -ENOMEM;
73
74 blkid_probe_enable_superblocks(b, 1);
75 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
76
77 errno = 0;
78 r = blkid_do_safeprobe(b);
79 if (r == 1) {
80 log_debug("No type detected on partition %s", node);
81 goto not_found;
82 }
83 if (r == -2) {
84 log_debug("Results ambiguous for partition %s", node);
85 return -EUCLEAN;
86 }
87 if (r != 0)
88 return -errno ?: -EIO;
89
90 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
91
92 if (fstype) {
93 char *t;
94
95 t = strdup(fstype);
96 if (!t)
97 return -ENOMEM;
98
99 *ret_fstype = t;
100 return 1;
101 }
102
103 not_found:
104 *ret_fstype = NULL;
105 return 0;
106 #else
107 return -EOPNOTSUPP;
108 #endif
109 }
110
111 int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DissectedImage **ret) {
112
113 #if HAVE_BLKID
114 sd_id128_t root_uuid = SD_ID128_NULL, verity_uuid = SD_ID128_NULL;
115 _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL;
116 bool is_gpt, is_mbr, generic_rw, multiple_generic = false;
117 _cleanup_udev_device_unref_ struct udev_device *d = NULL;
118 _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
119 _cleanup_blkid_free_probe_ blkid_probe b = NULL;
120 _cleanup_udev_unref_ struct udev *udev = NULL;
121 _cleanup_free_ char *generic_node = NULL;
122 sd_id128_t generic_uuid = SD_ID128_NULL;
123 const char *pttype = NULL;
124 struct udev_list_entry *first, *item;
125 blkid_partlist pl;
126 int r, generic_nr;
127 struct stat st;
128 unsigned i;
129
130 assert(fd >= 0);
131 assert(ret);
132 assert(root_hash || root_hash_size == 0);
133
134 /* Probes a disk image, and returns information about what it found in *ret.
135 *
136 * Returns -ENOPKG if no suitable partition table or file system could be found.
137 * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found. */
138
139 if (root_hash) {
140 /* If a root hash is supplied, then we use the root partition that has a UUID that match the first
141 * 128bit of the root hash. And we use the verity partition that has a UUID that match the final
142 * 128bit. */
143
144 if (root_hash_size < sizeof(sd_id128_t))
145 return -EINVAL;
146
147 memcpy(&root_uuid, root_hash, sizeof(sd_id128_t));
148 memcpy(&verity_uuid, (const uint8_t*) root_hash + root_hash_size - sizeof(sd_id128_t), sizeof(sd_id128_t));
149
150 if (sd_id128_is_null(root_uuid))
151 return -EINVAL;
152 if (sd_id128_is_null(verity_uuid))
153 return -EINVAL;
154 }
155
156 if (fstat(fd, &st) < 0)
157 return -errno;
158
159 if (!S_ISBLK(st.st_mode))
160 return -ENOTBLK;
161
162 b = blkid_new_probe();
163 if (!b)
164 return -ENOMEM;
165
166 errno = 0;
167 r = blkid_probe_set_device(b, fd, 0, 0);
168 if (r != 0)
169 return -errno ?: -ENOMEM;
170
171 if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) {
172 /* Look for file system superblocks, unless we only shall look for GPT partition tables */
173 blkid_probe_enable_superblocks(b, 1);
174 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE);
175 }
176
177 blkid_probe_enable_partitions(b, 1);
178 blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
179
180 errno = 0;
181 r = blkid_do_safeprobe(b);
182 if (IN_SET(r, -2, 1)) {
183 log_debug("Failed to identify any partition table.");
184 return -ENOPKG;
185 }
186 if (r != 0)
187 return -errno ?: -EIO;
188
189 m = new0(DissectedImage, 1);
190 if (!m)
191 return -ENOMEM;
192
193 if (!(flags & DISSECT_IMAGE_GPT_ONLY) &&
194 (flags & DISSECT_IMAGE_REQUIRE_ROOT)) {
195 const char *usage = NULL;
196
197 (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL);
198 if (STRPTR_IN_SET(usage, "filesystem", "crypto")) {
199 _cleanup_free_ char *t = NULL, *n = NULL;
200 const char *fstype = NULL;
201
202 /* OK, we have found a file system, that's our root partition then. */
203 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
204
205 if (fstype) {
206 t = strdup(fstype);
207 if (!t)
208 return -ENOMEM;
209 }
210
211 if (asprintf(&n, "/dev/block/%u:%u", major(st.st_rdev), minor(st.st_rdev)) < 0)
212 return -ENOMEM;
213
214 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
215 .found = true,
216 .rw = true,
217 .partno = -1,
218 .architecture = _ARCHITECTURE_INVALID,
219 .fstype = t,
220 .node = n,
221 };
222
223 t = n = NULL;
224
225 m->encrypted = streq(fstype, "crypto_LUKS");
226
227 *ret = m;
228 m = NULL;
229
230 return 0;
231 }
232 }
233
234 (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
235 if (!pttype)
236 return -ENOPKG;
237
238 is_gpt = streq_ptr(pttype, "gpt");
239 is_mbr = streq_ptr(pttype, "dos");
240
241 if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr))
242 return -ENOPKG;
243
244 errno = 0;
245 pl = blkid_probe_get_partitions(b);
246 if (!pl)
247 return -errno ?: -ENOMEM;
248
249 udev = udev_new();
250 if (!udev)
251 return -errno;
252
253 d = udev_device_new_from_devnum(udev, 'b', st.st_rdev);
254 if (!d)
255 return -ENOMEM;
256
257 for (i = 0;; i++) {
258 int n, z;
259
260 if (i >= 10) {
261 log_debug("Kernel partitions never appeared.");
262 return -ENXIO;
263 }
264
265 e = udev_enumerate_new(udev);
266 if (!e)
267 return -errno;
268
269 r = udev_enumerate_add_match_parent(e, d);
270 if (r < 0)
271 return r;
272
273 r = udev_enumerate_scan_devices(e);
274 if (r < 0)
275 return r;
276
277 /* Count the partitions enumerated by the kernel */
278 n = 0;
279 first = udev_enumerate_get_list_entry(e);
280 udev_list_entry_foreach(item, first)
281 n++;
282
283 /* Count the partitions enumerated by blkid */
284 z = blkid_partlist_numof_partitions(pl);
285 if (n == z + 1)
286 break;
287 if (n > z + 1) {
288 log_debug("blkid and kernel partition list do not match.");
289 return -EIO;
290 }
291 if (n < z + 1) {
292 unsigned j = 0;
293
294 /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running
295 * or it got EBUSY because udev already opened the device. Let's reprobe the device, which is a
296 * synchronous call that waits until probing is complete. */
297
298 for (;;) {
299 if (j++ > 20)
300 return -EBUSY;
301
302 if (ioctl(fd, BLKRRPART, 0) < 0) {
303 r = -errno;
304
305 if (r == -EINVAL) {
306 struct loop_info64 info;
307
308 /* If we are running on a loop device that has partition scanning off,
309 * return an explicit recognizable error about this, so that callers
310 * can generate a proper message explaining the situation. */
311
312 if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0 && (info.lo_flags & LO_FLAGS_PARTSCAN) == 0) {
313 log_debug("Device is loop device and partition scanning is off!");
314 return -EPROTONOSUPPORT;
315 }
316 }
317 if (r != -EBUSY)
318 return r;
319 } else
320 break;
321
322 /* If something else has the device open, such as an udev rule, the ioctl will return
323 * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a
324 * bit, and try again.
325 *
326 * This is really something they should fix in the kernel! */
327
328 (void) usleep(50 * USEC_PER_MSEC);
329 }
330 }
331
332 e = udev_enumerate_unref(e);
333 }
334
335 first = udev_enumerate_get_list_entry(e);
336 udev_list_entry_foreach(item, first) {
337 _cleanup_udev_device_unref_ struct udev_device *q;
338 unsigned long long pflags;
339 blkid_partition pp;
340 const char *node, *sysname;
341 dev_t qn;
342 int nr;
343
344 q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
345 if (!q)
346 return -errno;
347
348 qn = udev_device_get_devnum(q);
349 if (major(qn) == 0)
350 continue;
351
352 if (st.st_rdev == qn)
353 continue;
354
355 /* Filter out weird MMC RPMB partitions, which cannot reasonably be read, see
356 * https://github.com/systemd/systemd/issues/5806 */
357 sysname = udev_device_get_sysname(q);
358 if (sysname && startswith(sysname, "mmcblk") && endswith(sysname, "rpmb"))
359 continue;
360
361 node = udev_device_get_devnode(q);
362 if (!node)
363 continue;
364
365 pp = blkid_partlist_devno_to_partition(pl, qn);
366 if (!pp)
367 continue;
368
369 pflags = blkid_partition_get_flags(pp);
370
371 nr = blkid_partition_get_partno(pp);
372 if (nr < 0)
373 continue;
374
375 if (is_gpt) {
376 int designator = _PARTITION_DESIGNATOR_INVALID, architecture = _ARCHITECTURE_INVALID;
377 const char *stype, *sid, *fstype = NULL;
378 sd_id128_t type_id, id;
379 bool rw = true;
380
381 sid = blkid_partition_get_uuid(pp);
382 if (!sid)
383 continue;
384 if (sd_id128_from_string(sid, &id) < 0)
385 continue;
386
387 stype = blkid_partition_get_type_string(pp);
388 if (!stype)
389 continue;
390 if (sd_id128_from_string(stype, &type_id) < 0)
391 continue;
392
393 if (sd_id128_equal(type_id, GPT_HOME)) {
394
395 if (pflags & GPT_FLAG_NO_AUTO)
396 continue;
397
398 designator = PARTITION_HOME;
399 rw = !(pflags & GPT_FLAG_READ_ONLY);
400 } else if (sd_id128_equal(type_id, GPT_SRV)) {
401
402 if (pflags & GPT_FLAG_NO_AUTO)
403 continue;
404
405 designator = PARTITION_SRV;
406 rw = !(pflags & GPT_FLAG_READ_ONLY);
407 } else if (sd_id128_equal(type_id, GPT_ESP)) {
408
409 /* Note that we don't check the GPT_FLAG_NO_AUTO flag for the ESP, as it is not defined
410 * there. We instead check the GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as recommended by the
411 * UEFI spec (See "12.3.3 Number and Location of System Partitions"). */
412
413 if (pflags & GPT_FLAG_NO_BLOCK_IO_PROTOCOL)
414 continue;
415
416 designator = PARTITION_ESP;
417 fstype = "vfat";
418 }
419 #ifdef GPT_ROOT_NATIVE
420 else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
421
422 if (pflags & GPT_FLAG_NO_AUTO)
423 continue;
424
425 /* If a root ID is specified, ignore everything but the root id */
426 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
427 continue;
428
429 designator = PARTITION_ROOT;
430 architecture = native_architecture();
431 rw = !(pflags & GPT_FLAG_READ_ONLY);
432 } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) {
433
434 if (pflags & GPT_FLAG_NO_AUTO)
435 continue;
436
437 m->can_verity = true;
438
439 /* Ignore verity unless a root hash is specified */
440 if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
441 continue;
442
443 designator = PARTITION_ROOT_VERITY;
444 fstype = "DM_verity_hash";
445 architecture = native_architecture();
446 rw = false;
447 }
448 #endif
449 #ifdef GPT_ROOT_SECONDARY
450 else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
451
452 if (pflags & GPT_FLAG_NO_AUTO)
453 continue;
454
455 /* If a root ID is specified, ignore everything but the root id */
456 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
457 continue;
458
459 designator = PARTITION_ROOT_SECONDARY;
460 architecture = SECONDARY_ARCHITECTURE;
461 rw = !(pflags & GPT_FLAG_READ_ONLY);
462 } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) {
463
464 if (pflags & GPT_FLAG_NO_AUTO)
465 continue;
466
467 m->can_verity = true;
468
469 /* Ignore verity unless root has is specified */
470 if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
471 continue;
472
473 designator = PARTITION_ROOT_SECONDARY_VERITY;
474 fstype = "DM_verity_hash";
475 architecture = SECONDARY_ARCHITECTURE;
476 rw = false;
477 }
478 #endif
479 else if (sd_id128_equal(type_id, GPT_SWAP)) {
480
481 if (pflags & GPT_FLAG_NO_AUTO)
482 continue;
483
484 designator = PARTITION_SWAP;
485 fstype = "swap";
486 } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) {
487
488 if (pflags & GPT_FLAG_NO_AUTO)
489 continue;
490
491 if (generic_node)
492 multiple_generic = true;
493 else {
494 generic_nr = nr;
495 generic_rw = !(pflags & GPT_FLAG_READ_ONLY);
496 generic_uuid = id;
497 generic_node = strdup(node);
498 if (!generic_node)
499 return -ENOMEM;
500 }
501 }
502
503 if (designator != _PARTITION_DESIGNATOR_INVALID) {
504 _cleanup_free_ char *t = NULL, *n = NULL;
505
506 /* First one wins */
507 if (m->partitions[designator].found)
508 continue;
509
510 if (fstype) {
511 t = strdup(fstype);
512 if (!t)
513 return -ENOMEM;
514 }
515
516 n = strdup(node);
517 if (!n)
518 return -ENOMEM;
519
520 m->partitions[designator] = (DissectedPartition) {
521 .found = true,
522 .partno = nr,
523 .rw = rw,
524 .architecture = architecture,
525 .node = n,
526 .fstype = t,
527 .uuid = id,
528 };
529
530 n = t = NULL;
531 }
532
533 } else if (is_mbr) {
534
535 if (pflags != 0x80) /* Bootable flag */
536 continue;
537
538 if (blkid_partition_get_type(pp) != 0x83) /* Linux partition */
539 continue;
540
541 if (generic_node)
542 multiple_generic = true;
543 else {
544 generic_nr = nr;
545 generic_rw = true;
546 generic_node = strdup(node);
547 if (!generic_node)
548 return -ENOMEM;
549 }
550 }
551 }
552
553 if (!m->partitions[PARTITION_ROOT].found) {
554 /* No root partition found? Then let's see if ther's one for the secondary architecture. And if not
555 * either, then check if there's a single generic one, and use that. */
556
557 if (m->partitions[PARTITION_ROOT_VERITY].found)
558 return -EADDRNOTAVAIL;
559
560 if (m->partitions[PARTITION_ROOT_SECONDARY].found) {
561 m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY];
562 zero(m->partitions[PARTITION_ROOT_SECONDARY]);
563
564 m->partitions[PARTITION_ROOT_VERITY] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY];
565 zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY]);
566
567 } else if (flags & DISSECT_IMAGE_REQUIRE_ROOT) {
568
569 /* If the root has was set, then we won't fallback to a generic node, because the root hash
570 * decides */
571 if (root_hash)
572 return -EADDRNOTAVAIL;
573
574 /* If we didn't find a generic node, then we can't fix this up either */
575 if (!generic_node)
576 return -ENXIO;
577
578 /* If we didn't find a properly marked root partition, but we did find a single suitable
579 * generic Linux partition, then use this as root partition, if the caller asked for it. */
580 if (multiple_generic)
581 return -ENOTUNIQ;
582
583 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
584 .found = true,
585 .rw = generic_rw,
586 .partno = generic_nr,
587 .architecture = _ARCHITECTURE_INVALID,
588 .node = generic_node,
589 .uuid = generic_uuid,
590 };
591
592 generic_node = NULL;
593 }
594 }
595
596 if (root_hash) {
597 if (!m->partitions[PARTITION_ROOT_VERITY].found || !m->partitions[PARTITION_ROOT].found)
598 return -EADDRNOTAVAIL;
599
600 /* If we found the primary root with the hash, then we definitely want to suppress any secondary root
601 * (which would be weird, after all the root hash should only be assigned to one pair of
602 * partitions... */
603 m->partitions[PARTITION_ROOT_SECONDARY].found = false;
604 m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found = false;
605
606 /* If we found a verity setup, then the root partition is necessarily read-only. */
607 m->partitions[PARTITION_ROOT].rw = false;
608
609 m->verity = true;
610 }
611
612 blkid_free_probe(b);
613 b = NULL;
614
615 /* Fill in file system types if we don't know them yet. */
616 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
617 DissectedPartition *p = m->partitions + i;
618
619 if (!p->found)
620 continue;
621
622 if (!p->fstype && p->node) {
623 r = probe_filesystem(p->node, &p->fstype);
624 if (r < 0 && r != -EUCLEAN)
625 return r;
626 }
627
628 if (streq_ptr(p->fstype, "crypto_LUKS"))
629 m->encrypted = true;
630
631 if (p->fstype && fstype_is_ro(p->fstype))
632 p->rw = false;
633 }
634
635 *ret = m;
636 m = NULL;
637
638 return 0;
639 #else
640 return -EOPNOTSUPP;
641 #endif
642 }
643
644 DissectedImage* dissected_image_unref(DissectedImage *m) {
645 unsigned i;
646
647 if (!m)
648 return NULL;
649
650 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
651 free(m->partitions[i].fstype);
652 free(m->partitions[i].node);
653 free(m->partitions[i].decrypted_fstype);
654 free(m->partitions[i].decrypted_node);
655 }
656
657 free(m->hostname);
658 strv_free(m->machine_info);
659 strv_free(m->os_release);
660
661 return mfree(m);
662 }
663
664 static int is_loop_device(const char *path) {
665 char s[SYS_BLOCK_PATH_MAX("/../loop/")];
666 struct stat st;
667
668 assert(path);
669
670 if (stat(path, &st) < 0)
671 return -errno;
672
673 if (!S_ISBLK(st.st_mode))
674 return -ENOTBLK;
675
676 xsprintf_sys_block_path(s, "/loop/", st.st_dev);
677 if (access(s, F_OK) < 0) {
678 if (errno != ENOENT)
679 return -errno;
680
681 /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */
682 xsprintf_sys_block_path(s, "/../loop/", st.st_dev);
683 if (access(s, F_OK) < 0)
684 return errno == ENOENT ? false : -errno;
685 }
686
687 return true;
688 }
689
690 static int mount_partition(
691 DissectedPartition *m,
692 const char *where,
693 const char *directory,
694 uid_t uid_shift,
695 DissectImageFlags flags) {
696
697 _cleanup_free_ char *chased = NULL, *options = NULL;
698 const char *p, *node, *fstype;
699 bool rw;
700 int r;
701
702 assert(m);
703 assert(where);
704
705 node = m->decrypted_node ?: m->node;
706 fstype = m->decrypted_fstype ?: m->fstype;
707
708 if (!m->found || !node || !fstype)
709 return 0;
710
711 /* Stacked encryption? Yuck */
712 if (streq_ptr(fstype, "crypto_LUKS"))
713 return -ELOOP;
714
715 rw = m->rw && !(flags & DISSECT_IMAGE_READ_ONLY);
716
717 if (directory) {
718 r = chase_symlinks(directory, where, CHASE_PREFIX_ROOT, &chased);
719 if (r < 0)
720 return r;
721
722 p = chased;
723 } else
724 p = where;
725
726 /* If requested, turn on discard support. */
727 if (fstype_can_discard(fstype) &&
728 ((flags & DISSECT_IMAGE_DISCARD) ||
729 ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node)))) {
730 options = strdup("discard");
731 if (!options)
732 return -ENOMEM;
733 }
734
735 if (uid_is_valid(uid_shift) && uid_shift != 0 && fstype_can_uid_gid(fstype)) {
736 _cleanup_free_ char *uid_option = NULL;
737
738 if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
739 return -ENOMEM;
740
741 if (!strextend_with_separator(&options, ",", uid_option, NULL))
742 return -ENOMEM;
743 }
744
745 return mount_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
746 }
747
748 int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) {
749 int r;
750
751 assert(m);
752 assert(where);
753
754 if (!m->partitions[PARTITION_ROOT].found)
755 return -ENXIO;
756
757 if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) {
758 r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, flags);
759 if (r < 0)
760 return r;
761 }
762
763 if ((flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY))
764 return 0;
765
766 r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, flags);
767 if (r < 0)
768 return r;
769
770 r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, flags);
771 if (r < 0)
772 return r;
773
774 if (m->partitions[PARTITION_ESP].found) {
775 const char *mp;
776
777 /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */
778
779 FOREACH_STRING(mp, "/efi", "/boot") {
780 _cleanup_free_ char *p = NULL;
781
782 r = chase_symlinks(mp, where, CHASE_PREFIX_ROOT, &p);
783 if (r < 0)
784 continue;
785
786 r = dir_is_empty(p);
787 if (r > 0) {
788 r = mount_partition(m->partitions + PARTITION_ESP, where, mp, uid_shift, flags);
789 if (r < 0)
790 return r;
791 }
792 }
793 }
794
795 return 0;
796 }
797
798 #if HAVE_LIBCRYPTSETUP
799 typedef struct DecryptedPartition {
800 struct crypt_device *device;
801 char *name;
802 bool relinquished;
803 } DecryptedPartition;
804
805 struct DecryptedImage {
806 DecryptedPartition *decrypted;
807 size_t n_decrypted;
808 size_t n_allocated;
809 };
810 #endif
811
812 DecryptedImage* decrypted_image_unref(DecryptedImage* d) {
813 #if HAVE_LIBCRYPTSETUP
814 size_t i;
815 int r;
816
817 if (!d)
818 return NULL;
819
820 for (i = 0; i < d->n_decrypted; i++) {
821 DecryptedPartition *p = d->decrypted + i;
822
823 if (p->device && p->name && !p->relinquished) {
824 r = crypt_deactivate(p->device, p->name);
825 if (r < 0)
826 log_debug_errno(r, "Failed to deactivate encrypted partition %s", p->name);
827 }
828
829 if (p->device)
830 crypt_free(p->device);
831 free(p->name);
832 }
833
834 free(d);
835 #endif
836 return NULL;
837 }
838
839 #if HAVE_LIBCRYPTSETUP
840
841 static int make_dm_name_and_node(const void *original_node, const char *suffix, char **ret_name, char **ret_node) {
842 _cleanup_free_ char *name = NULL, *node = NULL;
843 const char *base;
844
845 assert(original_node);
846 assert(suffix);
847 assert(ret_name);
848 assert(ret_node);
849
850 base = strrchr(original_node, '/');
851 if (!base)
852 return -EINVAL;
853 base++;
854 if (isempty(base))
855 return -EINVAL;
856
857 name = strjoin(base, suffix);
858 if (!name)
859 return -ENOMEM;
860 if (!filename_is_valid(name))
861 return -EINVAL;
862
863 node = strjoin(crypt_get_dir(), "/", name);
864 if (!node)
865 return -ENOMEM;
866
867 *ret_name = name;
868 *ret_node = node;
869
870 name = node = NULL;
871 return 0;
872 }
873
874 static int decrypt_partition(
875 DissectedPartition *m,
876 const char *passphrase,
877 DissectImageFlags flags,
878 DecryptedImage *d) {
879
880 _cleanup_free_ char *node = NULL, *name = NULL;
881 _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
882 int r;
883
884 assert(m);
885 assert(d);
886
887 if (!m->found || !m->node || !m->fstype)
888 return 0;
889
890 if (!streq(m->fstype, "crypto_LUKS"))
891 return 0;
892
893 if (!passphrase)
894 return -ENOKEY;
895
896 r = make_dm_name_and_node(m->node, "-decrypted", &name, &node);
897 if (r < 0)
898 return r;
899
900 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
901 return -ENOMEM;
902
903 r = crypt_init(&cd, m->node);
904 if (r < 0)
905 return log_debug_errno(r, "Failed to initialize dm-crypt: %m");
906
907 r = crypt_load(cd, CRYPT_LUKS, NULL);
908 if (r < 0)
909 return log_debug_errno(r, "Failed to load LUKS metadata: %m");
910
911 r = crypt_activate_by_passphrase(cd, name, CRYPT_ANY_SLOT, passphrase, strlen(passphrase),
912 ((flags & DISSECT_IMAGE_READ_ONLY) ? CRYPT_ACTIVATE_READONLY : 0) |
913 ((flags & DISSECT_IMAGE_DISCARD_ON_CRYPTO) ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0));
914 if (r < 0) {
915 log_debug_errno(r, "Failed to activate LUKS device: %m");
916 return r == -EPERM ? -EKEYREJECTED : r;
917 }
918
919 d->decrypted[d->n_decrypted].name = name;
920 name = NULL;
921
922 d->decrypted[d->n_decrypted].device = cd;
923 cd = NULL;
924 d->n_decrypted++;
925
926 m->decrypted_node = node;
927 node = NULL;
928
929 return 0;
930 }
931
932 static int verity_partition(
933 DissectedPartition *m,
934 DissectedPartition *v,
935 const void *root_hash,
936 size_t root_hash_size,
937 DissectImageFlags flags,
938 DecryptedImage *d) {
939
940 _cleanup_free_ char *node = NULL, *name = NULL;
941 _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
942 int r;
943
944 assert(m);
945 assert(v);
946
947 if (!root_hash)
948 return 0;
949
950 if (!m->found || !m->node || !m->fstype)
951 return 0;
952 if (!v->found || !v->node || !v->fstype)
953 return 0;
954
955 if (!streq(v->fstype, "DM_verity_hash"))
956 return 0;
957
958 r = make_dm_name_and_node(m->node, "-verity", &name, &node);
959 if (r < 0)
960 return r;
961
962 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
963 return -ENOMEM;
964
965 r = crypt_init(&cd, v->node);
966 if (r < 0)
967 return r;
968
969 r = crypt_load(cd, CRYPT_VERITY, NULL);
970 if (r < 0)
971 return r;
972
973 r = crypt_set_data_device(cd, m->node);
974 if (r < 0)
975 return r;
976
977 r = crypt_activate_by_volume_key(cd, name, root_hash, root_hash_size, CRYPT_ACTIVATE_READONLY);
978 if (r < 0)
979 return r;
980
981 d->decrypted[d->n_decrypted].name = name;
982 name = NULL;
983
984 d->decrypted[d->n_decrypted].device = cd;
985 cd = NULL;
986 d->n_decrypted++;
987
988 m->decrypted_node = node;
989 node = NULL;
990
991 return 0;
992 }
993 #endif
994
995 int dissected_image_decrypt(
996 DissectedImage *m,
997 const char *passphrase,
998 const void *root_hash,
999 size_t root_hash_size,
1000 DissectImageFlags flags,
1001 DecryptedImage **ret) {
1002
1003 #if HAVE_LIBCRYPTSETUP
1004 _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL;
1005 unsigned i;
1006 int r;
1007 #endif
1008
1009 assert(m);
1010 assert(root_hash || root_hash_size == 0);
1011
1012 /* Returns:
1013 *
1014 * = 0 → There was nothing to decrypt
1015 * > 0 → Decrypted successfully
1016 * -ENOKEY → There's something to decrypt but no key was supplied
1017 * -EKEYREJECTED → Passed key was not correct
1018 */
1019
1020 if (root_hash && root_hash_size < sizeof(sd_id128_t))
1021 return -EINVAL;
1022
1023 if (!m->encrypted && !m->verity) {
1024 *ret = NULL;
1025 return 0;
1026 }
1027
1028 #if HAVE_LIBCRYPTSETUP
1029 d = new0(DecryptedImage, 1);
1030 if (!d)
1031 return -ENOMEM;
1032
1033 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
1034 DissectedPartition *p = m->partitions + i;
1035 int k;
1036
1037 if (!p->found)
1038 continue;
1039
1040 r = decrypt_partition(p, passphrase, flags, d);
1041 if (r < 0)
1042 return r;
1043
1044 k = PARTITION_VERITY_OF(i);
1045 if (k >= 0) {
1046 r = verity_partition(p, m->partitions + k, root_hash, root_hash_size, flags, d);
1047 if (r < 0)
1048 return r;
1049 }
1050
1051 if (!p->decrypted_fstype && p->decrypted_node) {
1052 r = probe_filesystem(p->decrypted_node, &p->decrypted_fstype);
1053 if (r < 0 && r != -EUCLEAN)
1054 return r;
1055 }
1056 }
1057
1058 *ret = d;
1059 d = NULL;
1060
1061 return 1;
1062 #else
1063 return -EOPNOTSUPP;
1064 #endif
1065 }
1066
1067 int dissected_image_decrypt_interactively(
1068 DissectedImage *m,
1069 const char *passphrase,
1070 const void *root_hash,
1071 size_t root_hash_size,
1072 DissectImageFlags flags,
1073 DecryptedImage **ret) {
1074
1075 _cleanup_strv_free_erase_ char **z = NULL;
1076 int n = 3, r;
1077
1078 if (passphrase)
1079 n--;
1080
1081 for (;;) {
1082 r = dissected_image_decrypt(m, passphrase, root_hash, root_hash_size, flags, ret);
1083 if (r >= 0)
1084 return r;
1085 if (r == -EKEYREJECTED)
1086 log_error_errno(r, "Incorrect passphrase, try again!");
1087 else if (r != -ENOKEY) {
1088 log_error_errno(r, "Failed to decrypt image: %m");
1089 return r;
1090 }
1091
1092 if (--n < 0) {
1093 log_error("Too many retries.");
1094 return -EKEYREJECTED;
1095 }
1096
1097 z = strv_free(z);
1098
1099 r = ask_password_auto("Please enter image passphrase!", NULL, "dissect", "dissect", USEC_INFINITY, 0, &z);
1100 if (r < 0)
1101 return log_error_errno(r, "Failed to query for passphrase: %m");
1102
1103 passphrase = z[0];
1104 }
1105 }
1106
1107 #if HAVE_LIBCRYPTSETUP
1108 static int deferred_remove(DecryptedPartition *p) {
1109
1110 struct dm_ioctl dm = {
1111 .version = {
1112 DM_VERSION_MAJOR,
1113 DM_VERSION_MINOR,
1114 DM_VERSION_PATCHLEVEL
1115 },
1116 .data_size = sizeof(dm),
1117 .flags = DM_DEFERRED_REMOVE,
1118 };
1119
1120 _cleanup_close_ int fd = -1;
1121
1122 assert(p);
1123
1124 /* Unfortunately, libcryptsetup doesn't provide a proper API for this, hence call the ioctl() directly. */
1125
1126 fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC);
1127 if (fd < 0)
1128 return -errno;
1129
1130 strncpy(dm.name, p->name, sizeof(dm.name));
1131
1132 if (ioctl(fd, DM_DEV_REMOVE, &dm))
1133 return -errno;
1134
1135 return 0;
1136 }
1137 #endif
1138
1139 int decrypted_image_relinquish(DecryptedImage *d) {
1140
1141 #if HAVE_LIBCRYPTSETUP
1142 size_t i;
1143 int r;
1144 #endif
1145
1146 assert(d);
1147
1148 /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a boolean so
1149 * that we don't clean it up ourselves either anymore */
1150
1151 #if HAVE_LIBCRYPTSETUP
1152 for (i = 0; i < d->n_decrypted; i++) {
1153 DecryptedPartition *p = d->decrypted + i;
1154
1155 if (p->relinquished)
1156 continue;
1157
1158 r = deferred_remove(p);
1159 if (r < 0)
1160 return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name);
1161
1162 p->relinquished = true;
1163 }
1164 #endif
1165
1166 return 0;
1167 }
1168
1169 int root_hash_load(const char *image, void **ret, size_t *ret_size) {
1170 _cleanup_free_ char *text = NULL;
1171 _cleanup_free_ void *k = NULL;
1172 size_t l;
1173 int r;
1174
1175 assert(image);
1176 assert(ret);
1177 assert(ret_size);
1178
1179 if (is_device_path(image)) {
1180 /* If we are asked to load the root hash for a device node, exit early */
1181 *ret = NULL;
1182 *ret_size = 0;
1183 return 0;
1184 }
1185
1186 r = getxattr_malloc(image, "user.verity.roothash", &text, true);
1187 if (r < 0) {
1188 char *fn, *e, *n;
1189
1190 if (!IN_SET(r, -ENODATA, -EOPNOTSUPP, -ENOENT))
1191 return r;
1192
1193 fn = newa(char, strlen(image) + STRLEN(".roothash") + 1);
1194 n = stpcpy(fn, image);
1195 e = endswith(fn, ".raw");
1196 if (e)
1197 n = e;
1198
1199 strcpy(n, ".roothash");
1200
1201 r = read_one_line_file(fn, &text);
1202 if (r == -ENOENT) {
1203 *ret = NULL;
1204 *ret_size = 0;
1205 return 0;
1206 }
1207 if (r < 0)
1208 return r;
1209 }
1210
1211 r = unhexmem(text, strlen(text), &k, &l);
1212 if (r < 0)
1213 return r;
1214 if (l < sizeof(sd_id128_t))
1215 return -EINVAL;
1216
1217 *ret = k;
1218 *ret_size = l;
1219
1220 k = NULL;
1221
1222 return 1;
1223 }
1224
1225 int dissected_image_acquire_metadata(DissectedImage *m) {
1226
1227 enum {
1228 META_HOSTNAME,
1229 META_MACHINE_ID,
1230 META_MACHINE_INFO,
1231 META_OS_RELEASE,
1232 _META_MAX,
1233 };
1234
1235 static const char *const paths[_META_MAX] = {
1236 [META_HOSTNAME] = "/etc/hostname\0",
1237 [META_MACHINE_ID] = "/etc/machine-id\0",
1238 [META_MACHINE_INFO] = "/etc/machine-info\0",
1239 [META_OS_RELEASE] = "/etc/os-release\0/usr/lib/os-release\0",
1240 };
1241
1242 _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
1243 _cleanup_(rmdir_and_freep) char *t = NULL;
1244 _cleanup_(sigkill_waitp) pid_t child = 0;
1245 sd_id128_t machine_id = SD_ID128_NULL;
1246 _cleanup_free_ char *hostname = NULL;
1247 unsigned n_meta_initialized = 0, k;
1248 int fds[2 * _META_MAX], r;
1249
1250 BLOCK_SIGNALS(SIGCHLD);
1251
1252 assert(m);
1253
1254 for (; n_meta_initialized < _META_MAX; n_meta_initialized ++)
1255 if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) {
1256 r = -errno;
1257 goto finish;
1258 }
1259
1260 r = mkdtemp_malloc("/tmp/dissect-XXXXXX", &t);
1261 if (r < 0)
1262 goto finish;
1263
1264 r = safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_NEW_MOUNTNS, &child);
1265 if (r < 0)
1266 goto finish;
1267 if (r == 0) {
1268 /* Make sure we never propagate to the host */
1269 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
1270 _exit(EXIT_FAILURE);
1271
1272 r = dissected_image_mount(m, t, UID_INVALID, DISSECT_IMAGE_READ_ONLY);
1273 if (r < 0)
1274 _exit(EXIT_FAILURE);
1275
1276 for (k = 0; k < _META_MAX; k++) {
1277 _cleanup_close_ int fd = -1;
1278 const char *p;
1279
1280 fds[2*k] = safe_close(fds[2*k]);
1281
1282 NULSTR_FOREACH(p, paths[k]) {
1283 _cleanup_free_ char *q = NULL;
1284
1285 r = chase_symlinks(p, t, CHASE_PREFIX_ROOT, &q);
1286 if (r < 0)
1287 continue;
1288
1289 fd = open(q, O_RDONLY|O_CLOEXEC|O_NOCTTY);
1290 if (fd >= 0)
1291 break;
1292 }
1293 if (fd < 0)
1294 continue;
1295
1296 r = copy_bytes(fd, fds[2*k+1], (uint64_t) -1, 0);
1297 if (r < 0)
1298 _exit(EXIT_FAILURE);
1299
1300 fds[2*k+1] = safe_close(fds[2*k+1]);
1301 }
1302
1303 _exit(EXIT_SUCCESS);
1304 }
1305
1306 for (k = 0; k < _META_MAX; k++) {
1307 _cleanup_fclose_ FILE *f = NULL;
1308
1309 fds[2*k+1] = safe_close(fds[2*k+1]);
1310
1311 f = fdopen(fds[2*k], "re");
1312 if (!f) {
1313 r = -errno;
1314 goto finish;
1315 }
1316
1317 fds[2*k] = -1;
1318
1319 switch (k) {
1320
1321 case META_HOSTNAME:
1322 r = read_etc_hostname_stream(f, &hostname);
1323 if (r < 0)
1324 log_debug_errno(r, "Failed to read /etc/hostname: %m");
1325
1326 break;
1327
1328 case META_MACHINE_ID: {
1329 _cleanup_free_ char *line = NULL;
1330
1331 r = read_line(f, LONG_LINE_MAX, &line);
1332 if (r < 0)
1333 log_debug_errno(r, "Failed to read /etc/machine-id: %m");
1334 else if (r == 33) {
1335 r = sd_id128_from_string(line, &machine_id);
1336 if (r < 0)
1337 log_debug_errno(r, "Image contains invalid /etc/machine-id: %s", line);
1338 } else if (r == 0)
1339 log_debug("/etc/machine-id file is empty.");
1340 else
1341 log_debug("/etc/machine-id has unexpected length %i.", r);
1342
1343 break;
1344 }
1345
1346 case META_MACHINE_INFO:
1347 r = load_env_file_pairs(f, "machine-info", NULL, &machine_info);
1348 if (r < 0)
1349 log_debug_errno(r, "Failed to read /etc/machine-info: %m");
1350
1351 break;
1352
1353 case META_OS_RELEASE:
1354 r = load_env_file_pairs(f, "os-release", NULL, &os_release);
1355 if (r < 0)
1356 log_debug_errno(r, "Failed to read OS release file: %m");
1357
1358 break;
1359 }
1360 }
1361
1362 r = wait_for_terminate_and_check("(sd-dissect)", child, 0);
1363 child = 0;
1364 if (r < 0)
1365 goto finish;
1366 if (r != EXIT_SUCCESS)
1367 return -EPROTO;
1368
1369 free_and_replace(m->hostname, hostname);
1370 m->machine_id = machine_id;
1371 strv_free_and_replace(m->machine_info, machine_info);
1372 strv_free_and_replace(m->os_release, os_release);
1373
1374 finish:
1375 for (k = 0; k < n_meta_initialized; k++)
1376 safe_close_pair(fds + 2*k);
1377
1378 return r;
1379 }
1380
1381 static const char *const partition_designator_table[] = {
1382 [PARTITION_ROOT] = "root",
1383 [PARTITION_ROOT_SECONDARY] = "root-secondary",
1384 [PARTITION_HOME] = "home",
1385 [PARTITION_SRV] = "srv",
1386 [PARTITION_ESP] = "esp",
1387 [PARTITION_SWAP] = "swap",
1388 [PARTITION_ROOT_VERITY] = "root-verity",
1389 [PARTITION_ROOT_SECONDARY_VERITY] = "root-secondary-verity",
1390 };
1391
1392 DEFINE_STRING_TABLE_LOOKUP(partition_designator, int);