]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/dissect-image.c
35b2c56555b307d7b13794a92b1ba6939004b9e0
[thirdparty/systemd.git] / src / shared / dissect-image.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2016 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <sys/mount.h>
22 #include <sys/prctl.h>
23 #include <sys/wait.h>
24
25 #include "architecture.h"
26 #include "ask-password-api.h"
27 #include "blkid-util.h"
28 #include "blockdev-util.h"
29 #include "copy.h"
30 #include "crypt-util.h"
31 #include "def.h"
32 #include "device-nodes.h"
33 #include "dissect-image.h"
34 #include "fd-util.h"
35 #include "fileio.h"
36 #include "fs-util.h"
37 #include "gpt.h"
38 #include "hexdecoct.h"
39 #include "hostname-util.h"
40 #include "id128-util.h"
41 #include "linux-3.13/dm-ioctl.h"
42 #include "missing.h"
43 #include "mount-util.h"
44 #include "path-util.h"
45 #include "process-util.h"
46 #include "raw-clone.h"
47 #include "signal-util.h"
48 #include "stat-util.h"
49 #include "stdio-util.h"
50 #include "string-table.h"
51 #include "string-util.h"
52 #include "strv.h"
53 #include "udev-util.h"
54 #include "user-util.h"
55 #include "xattr-util.h"
56
57 int probe_filesystem(const char *node, char **ret_fstype) {
58 /* Try to find device content type and return it in *ret_fstype. If nothing is found,
59 * 0/NULL will be returned. -EUCLEAN will be returned for ambigous results, and an
60 * different error otherwise. */
61
62 #if HAVE_BLKID
63 _cleanup_blkid_free_probe_ blkid_probe b = NULL;
64 const char *fstype;
65 int r;
66
67 errno = 0;
68 b = blkid_new_probe_from_filename(node);
69 if (!b)
70 return -errno ?: -ENOMEM;
71
72 blkid_probe_enable_superblocks(b, 1);
73 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
74
75 errno = 0;
76 r = blkid_do_safeprobe(b);
77 if (r == 1) {
78 log_debug("No type detected on partition %s", node);
79 goto not_found;
80 }
81 if (r == -2) {
82 log_debug("Results ambiguous for partition %s", node);
83 return -EUCLEAN;
84 }
85 if (r != 0)
86 return -errno ?: -EIO;
87
88 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
89
90 if (fstype) {
91 char *t;
92
93 t = strdup(fstype);
94 if (!t)
95 return -ENOMEM;
96
97 *ret_fstype = t;
98 return 1;
99 }
100
101 not_found:
102 *ret_fstype = NULL;
103 return 0;
104 #else
105 return -EOPNOTSUPP;
106 #endif
107 }
108
109 int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DissectedImage **ret) {
110
111 #if HAVE_BLKID
112 sd_id128_t root_uuid = SD_ID128_NULL, verity_uuid = SD_ID128_NULL;
113 _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL;
114 bool is_gpt, is_mbr, generic_rw, multiple_generic = false;
115 _cleanup_udev_device_unref_ struct udev_device *d = NULL;
116 _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
117 _cleanup_blkid_free_probe_ blkid_probe b = NULL;
118 _cleanup_udev_unref_ struct udev *udev = NULL;
119 _cleanup_free_ char *generic_node = NULL;
120 sd_id128_t generic_uuid = SD_ID128_NULL;
121 const char *pttype = NULL;
122 struct udev_list_entry *first, *item;
123 blkid_partlist pl;
124 int r, generic_nr;
125 struct stat st;
126 unsigned i;
127
128 assert(fd >= 0);
129 assert(ret);
130 assert(root_hash || root_hash_size == 0);
131
132 /* Probes a disk image, and returns information about what it found in *ret.
133 *
134 * Returns -ENOPKG if no suitable partition table or file system could be found.
135 * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found. */
136
137 if (root_hash) {
138 /* If a root hash is supplied, then we use the root partition that has a UUID that match the first
139 * 128bit of the root hash. And we use the verity partition that has a UUID that match the final
140 * 128bit. */
141
142 if (root_hash_size < sizeof(sd_id128_t))
143 return -EINVAL;
144
145 memcpy(&root_uuid, root_hash, sizeof(sd_id128_t));
146 memcpy(&verity_uuid, (const uint8_t*) root_hash + root_hash_size - sizeof(sd_id128_t), sizeof(sd_id128_t));
147
148 if (sd_id128_is_null(root_uuid))
149 return -EINVAL;
150 if (sd_id128_is_null(verity_uuid))
151 return -EINVAL;
152 }
153
154 if (fstat(fd, &st) < 0)
155 return -errno;
156
157 if (!S_ISBLK(st.st_mode))
158 return -ENOTBLK;
159
160 b = blkid_new_probe();
161 if (!b)
162 return -ENOMEM;
163
164 errno = 0;
165 r = blkid_probe_set_device(b, fd, 0, 0);
166 if (r != 0)
167 return -errno ?: -ENOMEM;
168
169 if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) {
170 /* Look for file system superblocks, unless we only shall look for GPT partition tables */
171 blkid_probe_enable_superblocks(b, 1);
172 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE);
173 }
174
175 blkid_probe_enable_partitions(b, 1);
176 blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
177
178 errno = 0;
179 r = blkid_do_safeprobe(b);
180 if (IN_SET(r, -2, 1)) {
181 log_debug("Failed to identify any partition table.");
182 return -ENOPKG;
183 }
184 if (r != 0)
185 return -errno ?: -EIO;
186
187 m = new0(DissectedImage, 1);
188 if (!m)
189 return -ENOMEM;
190
191 if (!(flags & DISSECT_IMAGE_GPT_ONLY) &&
192 (flags & DISSECT_IMAGE_REQUIRE_ROOT)) {
193 const char *usage = NULL;
194
195 (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL);
196 if (STRPTR_IN_SET(usage, "filesystem", "crypto")) {
197 _cleanup_free_ char *t = NULL, *n = NULL;
198 const char *fstype = NULL;
199
200 /* OK, we have found a file system, that's our root partition then. */
201 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
202
203 if (fstype) {
204 t = strdup(fstype);
205 if (!t)
206 return -ENOMEM;
207 }
208
209 if (asprintf(&n, "/dev/block/%u:%u", major(st.st_rdev), minor(st.st_rdev)) < 0)
210 return -ENOMEM;
211
212 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
213 .found = true,
214 .rw = true,
215 .partno = -1,
216 .architecture = _ARCHITECTURE_INVALID,
217 .fstype = t,
218 .node = n,
219 };
220
221 t = n = NULL;
222
223 m->encrypted = streq(fstype, "crypto_LUKS");
224
225 *ret = m;
226 m = NULL;
227
228 return 0;
229 }
230 }
231
232 (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
233 if (!pttype)
234 return -ENOPKG;
235
236 is_gpt = streq_ptr(pttype, "gpt");
237 is_mbr = streq_ptr(pttype, "dos");
238
239 if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr))
240 return -ENOPKG;
241
242 errno = 0;
243 pl = blkid_probe_get_partitions(b);
244 if (!pl)
245 return -errno ?: -ENOMEM;
246
247 udev = udev_new();
248 if (!udev)
249 return -errno;
250
251 d = udev_device_new_from_devnum(udev, 'b', st.st_rdev);
252 if (!d)
253 return -ENOMEM;
254
255 for (i = 0;; i++) {
256 int n, z;
257
258 if (i >= 10) {
259 log_debug("Kernel partitions never appeared.");
260 return -ENXIO;
261 }
262
263 e = udev_enumerate_new(udev);
264 if (!e)
265 return -errno;
266
267 r = udev_enumerate_add_match_parent(e, d);
268 if (r < 0)
269 return r;
270
271 r = udev_enumerate_scan_devices(e);
272 if (r < 0)
273 return r;
274
275 /* Count the partitions enumerated by the kernel */
276 n = 0;
277 first = udev_enumerate_get_list_entry(e);
278 udev_list_entry_foreach(item, first)
279 n++;
280
281 /* Count the partitions enumerated by blkid */
282 z = blkid_partlist_numof_partitions(pl);
283 if (n == z + 1)
284 break;
285 if (n > z + 1) {
286 log_debug("blkid and kernel partition list do not match.");
287 return -EIO;
288 }
289 if (n < z + 1) {
290 unsigned j = 0;
291
292 /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running
293 * or it got EBUSY because udev already opened the device. Let's reprobe the device, which is a
294 * synchronous call that waits until probing is complete. */
295
296 for (;;) {
297 if (j++ > 20)
298 return -EBUSY;
299
300 if (ioctl(fd, BLKRRPART, 0) < 0) {
301 r = -errno;
302
303 if (r == -EINVAL) {
304 struct loop_info64 info;
305
306 /* If we are running on a loop device that has partition scanning off,
307 * return an explicit recognizable error about this, so that callers
308 * can generate a proper message explaining the situation. */
309
310 if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0 && (info.lo_flags & LO_FLAGS_PARTSCAN) == 0) {
311 log_debug("Device is loop device and partition scanning is off!");
312 return -EPROTONOSUPPORT;
313 }
314 }
315 if (r != -EBUSY)
316 return r;
317 } else
318 break;
319
320 /* If something else has the device open, such as an udev rule, the ioctl will return
321 * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a
322 * bit, and try again.
323 *
324 * This is really something they should fix in the kernel! */
325
326 (void) usleep(50 * USEC_PER_MSEC);
327 }
328 }
329
330 e = udev_enumerate_unref(e);
331 }
332
333 first = udev_enumerate_get_list_entry(e);
334 udev_list_entry_foreach(item, first) {
335 _cleanup_udev_device_unref_ struct udev_device *q;
336 unsigned long long pflags;
337 blkid_partition pp;
338 const char *node, *sysname;
339 dev_t qn;
340 int nr;
341
342 q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
343 if (!q)
344 return -errno;
345
346 qn = udev_device_get_devnum(q);
347 if (major(qn) == 0)
348 continue;
349
350 if (st.st_rdev == qn)
351 continue;
352
353 /* Filter out weird MMC RPMB partitions, which cannot reasonably be read, see
354 * https://github.com/systemd/systemd/issues/5806 */
355 sysname = udev_device_get_sysname(q);
356 if (sysname && startswith(sysname, "mmcblk") && endswith(sysname, "rpmb"))
357 continue;
358
359 node = udev_device_get_devnode(q);
360 if (!node)
361 continue;
362
363 pp = blkid_partlist_devno_to_partition(pl, qn);
364 if (!pp)
365 continue;
366
367 pflags = blkid_partition_get_flags(pp);
368
369 nr = blkid_partition_get_partno(pp);
370 if (nr < 0)
371 continue;
372
373 if (is_gpt) {
374 int designator = _PARTITION_DESIGNATOR_INVALID, architecture = _ARCHITECTURE_INVALID;
375 const char *stype, *sid, *fstype = NULL;
376 sd_id128_t type_id, id;
377 bool rw = true;
378
379 sid = blkid_partition_get_uuid(pp);
380 if (!sid)
381 continue;
382 if (sd_id128_from_string(sid, &id) < 0)
383 continue;
384
385 stype = blkid_partition_get_type_string(pp);
386 if (!stype)
387 continue;
388 if (sd_id128_from_string(stype, &type_id) < 0)
389 continue;
390
391 if (sd_id128_equal(type_id, GPT_HOME)) {
392
393 if (pflags & GPT_FLAG_NO_AUTO)
394 continue;
395
396 designator = PARTITION_HOME;
397 rw = !(pflags & GPT_FLAG_READ_ONLY);
398 } else if (sd_id128_equal(type_id, GPT_SRV)) {
399
400 if (pflags & GPT_FLAG_NO_AUTO)
401 continue;
402
403 designator = PARTITION_SRV;
404 rw = !(pflags & GPT_FLAG_READ_ONLY);
405 } else if (sd_id128_equal(type_id, GPT_ESP)) {
406
407 /* Note that we don't check the GPT_FLAG_NO_AUTO flag for the ESP, as it is not defined
408 * there. We instead check the GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as recommended by the
409 * UEFI spec (See "12.3.3 Number and Location of System Partitions"). */
410
411 if (pflags & GPT_FLAG_NO_BLOCK_IO_PROTOCOL)
412 continue;
413
414 designator = PARTITION_ESP;
415 fstype = "vfat";
416 }
417 #ifdef GPT_ROOT_NATIVE
418 else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
419
420 if (pflags & GPT_FLAG_NO_AUTO)
421 continue;
422
423 /* If a root ID is specified, ignore everything but the root id */
424 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
425 continue;
426
427 designator = PARTITION_ROOT;
428 architecture = native_architecture();
429 rw = !(pflags & GPT_FLAG_READ_ONLY);
430 } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) {
431
432 if (pflags & GPT_FLAG_NO_AUTO)
433 continue;
434
435 m->can_verity = true;
436
437 /* Ignore verity unless a root hash is specified */
438 if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
439 continue;
440
441 designator = PARTITION_ROOT_VERITY;
442 fstype = "DM_verity_hash";
443 architecture = native_architecture();
444 rw = false;
445 }
446 #endif
447 #ifdef GPT_ROOT_SECONDARY
448 else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
449
450 if (pflags & GPT_FLAG_NO_AUTO)
451 continue;
452
453 /* If a root ID is specified, ignore everything but the root id */
454 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
455 continue;
456
457 designator = PARTITION_ROOT_SECONDARY;
458 architecture = SECONDARY_ARCHITECTURE;
459 rw = !(pflags & GPT_FLAG_READ_ONLY);
460 } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) {
461
462 if (pflags & GPT_FLAG_NO_AUTO)
463 continue;
464
465 m->can_verity = true;
466
467 /* Ignore verity unless root has is specified */
468 if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
469 continue;
470
471 designator = PARTITION_ROOT_SECONDARY_VERITY;
472 fstype = "DM_verity_hash";
473 architecture = SECONDARY_ARCHITECTURE;
474 rw = false;
475 }
476 #endif
477 else if (sd_id128_equal(type_id, GPT_SWAP)) {
478
479 if (pflags & GPT_FLAG_NO_AUTO)
480 continue;
481
482 designator = PARTITION_SWAP;
483 fstype = "swap";
484 } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) {
485
486 if (pflags & GPT_FLAG_NO_AUTO)
487 continue;
488
489 if (generic_node)
490 multiple_generic = true;
491 else {
492 generic_nr = nr;
493 generic_rw = !(pflags & GPT_FLAG_READ_ONLY);
494 generic_uuid = id;
495 generic_node = strdup(node);
496 if (!generic_node)
497 return -ENOMEM;
498 }
499 }
500
501 if (designator != _PARTITION_DESIGNATOR_INVALID) {
502 _cleanup_free_ char *t = NULL, *n = NULL;
503
504 /* First one wins */
505 if (m->partitions[designator].found)
506 continue;
507
508 if (fstype) {
509 t = strdup(fstype);
510 if (!t)
511 return -ENOMEM;
512 }
513
514 n = strdup(node);
515 if (!n)
516 return -ENOMEM;
517
518 m->partitions[designator] = (DissectedPartition) {
519 .found = true,
520 .partno = nr,
521 .rw = rw,
522 .architecture = architecture,
523 .node = n,
524 .fstype = t,
525 .uuid = id,
526 };
527
528 n = t = NULL;
529 }
530
531 } else if (is_mbr) {
532
533 if (pflags != 0x80) /* Bootable flag */
534 continue;
535
536 if (blkid_partition_get_type(pp) != 0x83) /* Linux partition */
537 continue;
538
539 if (generic_node)
540 multiple_generic = true;
541 else {
542 generic_nr = nr;
543 generic_rw = true;
544 generic_node = strdup(node);
545 if (!generic_node)
546 return -ENOMEM;
547 }
548 }
549 }
550
551 if (!m->partitions[PARTITION_ROOT].found) {
552 /* No root partition found? Then let's see if ther's one for the secondary architecture. And if not
553 * either, then check if there's a single generic one, and use that. */
554
555 if (m->partitions[PARTITION_ROOT_VERITY].found)
556 return -EADDRNOTAVAIL;
557
558 if (m->partitions[PARTITION_ROOT_SECONDARY].found) {
559 m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY];
560 zero(m->partitions[PARTITION_ROOT_SECONDARY]);
561
562 m->partitions[PARTITION_ROOT_VERITY] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY];
563 zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY]);
564
565 } else if (flags & DISSECT_IMAGE_REQUIRE_ROOT) {
566
567 /* If the root has was set, then we won't fallback to a generic node, because the root hash
568 * decides */
569 if (root_hash)
570 return -EADDRNOTAVAIL;
571
572 /* If we didn't find a generic node, then we can't fix this up either */
573 if (!generic_node)
574 return -ENXIO;
575
576 /* If we didn't find a properly marked root partition, but we did find a single suitable
577 * generic Linux partition, then use this as root partition, if the caller asked for it. */
578 if (multiple_generic)
579 return -ENOTUNIQ;
580
581 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
582 .found = true,
583 .rw = generic_rw,
584 .partno = generic_nr,
585 .architecture = _ARCHITECTURE_INVALID,
586 .node = generic_node,
587 .uuid = generic_uuid,
588 };
589
590 generic_node = NULL;
591 }
592 }
593
594 if (root_hash) {
595 if (!m->partitions[PARTITION_ROOT_VERITY].found || !m->partitions[PARTITION_ROOT].found)
596 return -EADDRNOTAVAIL;
597
598 /* If we found the primary root with the hash, then we definitely want to suppress any secondary root
599 * (which would be weird, after all the root hash should only be assigned to one pair of
600 * partitions... */
601 m->partitions[PARTITION_ROOT_SECONDARY].found = false;
602 m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found = false;
603
604 /* If we found a verity setup, then the root partition is necessarily read-only. */
605 m->partitions[PARTITION_ROOT].rw = false;
606
607 m->verity = true;
608 }
609
610 blkid_free_probe(b);
611 b = NULL;
612
613 /* Fill in file system types if we don't know them yet. */
614 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
615 DissectedPartition *p = m->partitions + i;
616
617 if (!p->found)
618 continue;
619
620 if (!p->fstype && p->node) {
621 r = probe_filesystem(p->node, &p->fstype);
622 if (r < 0 && r != -EUCLEAN)
623 return r;
624 }
625
626 if (streq_ptr(p->fstype, "crypto_LUKS"))
627 m->encrypted = true;
628
629 if (p->fstype && fstype_is_ro(p->fstype))
630 p->rw = false;
631 }
632
633 *ret = m;
634 m = NULL;
635
636 return 0;
637 #else
638 return -EOPNOTSUPP;
639 #endif
640 }
641
642 DissectedImage* dissected_image_unref(DissectedImage *m) {
643 unsigned i;
644
645 if (!m)
646 return NULL;
647
648 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
649 free(m->partitions[i].fstype);
650 free(m->partitions[i].node);
651 free(m->partitions[i].decrypted_fstype);
652 free(m->partitions[i].decrypted_node);
653 }
654
655 free(m->hostname);
656 strv_free(m->machine_info);
657 strv_free(m->os_release);
658
659 return mfree(m);
660 }
661
662 static int is_loop_device(const char *path) {
663 char s[SYS_BLOCK_PATH_MAX("/../loop/")];
664 struct stat st;
665
666 assert(path);
667
668 if (stat(path, &st) < 0)
669 return -errno;
670
671 if (!S_ISBLK(st.st_mode))
672 return -ENOTBLK;
673
674 xsprintf_sys_block_path(s, "/loop/", st.st_dev);
675 if (access(s, F_OK) < 0) {
676 if (errno != ENOENT)
677 return -errno;
678
679 /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */
680 xsprintf_sys_block_path(s, "/../loop/", st.st_dev);
681 if (access(s, F_OK) < 0)
682 return errno == ENOENT ? false : -errno;
683 }
684
685 return true;
686 }
687
688 static int mount_partition(
689 DissectedPartition *m,
690 const char *where,
691 const char *directory,
692 uid_t uid_shift,
693 DissectImageFlags flags) {
694
695 _cleanup_free_ char *chased = NULL, *options = NULL;
696 const char *p, *node, *fstype;
697 bool rw;
698 int r;
699
700 assert(m);
701 assert(where);
702
703 node = m->decrypted_node ?: m->node;
704 fstype = m->decrypted_fstype ?: m->fstype;
705
706 if (!m->found || !node || !fstype)
707 return 0;
708
709 /* Stacked encryption? Yuck */
710 if (streq_ptr(fstype, "crypto_LUKS"))
711 return -ELOOP;
712
713 rw = m->rw && !(flags & DISSECT_IMAGE_READ_ONLY);
714
715 if (directory) {
716 r = chase_symlinks(directory, where, CHASE_PREFIX_ROOT, &chased);
717 if (r < 0)
718 return r;
719
720 p = chased;
721 } else
722 p = where;
723
724 /* If requested, turn on discard support. */
725 if (fstype_can_discard(fstype) &&
726 ((flags & DISSECT_IMAGE_DISCARD) ||
727 ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node)))) {
728 options = strdup("discard");
729 if (!options)
730 return -ENOMEM;
731 }
732
733 if (uid_is_valid(uid_shift) && uid_shift != 0 && fstype_can_uid_gid(fstype)) {
734 _cleanup_free_ char *uid_option = NULL;
735
736 if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
737 return -ENOMEM;
738
739 if (!strextend_with_separator(&options, ",", uid_option, NULL))
740 return -ENOMEM;
741 }
742
743 return mount_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
744 }
745
746 int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) {
747 int r;
748
749 assert(m);
750 assert(where);
751
752 if (!m->partitions[PARTITION_ROOT].found)
753 return -ENXIO;
754
755 if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) {
756 r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, flags);
757 if (r < 0)
758 return r;
759 }
760
761 if ((flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY))
762 return 0;
763
764 r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, flags);
765 if (r < 0)
766 return r;
767
768 r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, flags);
769 if (r < 0)
770 return r;
771
772 if (m->partitions[PARTITION_ESP].found) {
773 const char *mp;
774
775 /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */
776
777 FOREACH_STRING(mp, "/efi", "/boot") {
778 _cleanup_free_ char *p = NULL;
779
780 r = chase_symlinks(mp, where, CHASE_PREFIX_ROOT, &p);
781 if (r < 0)
782 continue;
783
784 r = dir_is_empty(p);
785 if (r > 0) {
786 r = mount_partition(m->partitions + PARTITION_ESP, where, mp, uid_shift, flags);
787 if (r < 0)
788 return r;
789 }
790 }
791 }
792
793 return 0;
794 }
795
796 #if HAVE_LIBCRYPTSETUP
797 typedef struct DecryptedPartition {
798 struct crypt_device *device;
799 char *name;
800 bool relinquished;
801 } DecryptedPartition;
802
803 struct DecryptedImage {
804 DecryptedPartition *decrypted;
805 size_t n_decrypted;
806 size_t n_allocated;
807 };
808 #endif
809
810 DecryptedImage* decrypted_image_unref(DecryptedImage* d) {
811 #if HAVE_LIBCRYPTSETUP
812 size_t i;
813 int r;
814
815 if (!d)
816 return NULL;
817
818 for (i = 0; i < d->n_decrypted; i++) {
819 DecryptedPartition *p = d->decrypted + i;
820
821 if (p->device && p->name && !p->relinquished) {
822 r = crypt_deactivate(p->device, p->name);
823 if (r < 0)
824 log_debug_errno(r, "Failed to deactivate encrypted partition %s", p->name);
825 }
826
827 if (p->device)
828 crypt_free(p->device);
829 free(p->name);
830 }
831
832 free(d);
833 #endif
834 return NULL;
835 }
836
837 #if HAVE_LIBCRYPTSETUP
838
839 static int make_dm_name_and_node(const void *original_node, const char *suffix, char **ret_name, char **ret_node) {
840 _cleanup_free_ char *name = NULL, *node = NULL;
841 const char *base;
842
843 assert(original_node);
844 assert(suffix);
845 assert(ret_name);
846 assert(ret_node);
847
848 base = strrchr(original_node, '/');
849 if (!base)
850 return -EINVAL;
851 base++;
852 if (isempty(base))
853 return -EINVAL;
854
855 name = strjoin(base, suffix);
856 if (!name)
857 return -ENOMEM;
858 if (!filename_is_valid(name))
859 return -EINVAL;
860
861 node = strjoin(crypt_get_dir(), "/", name);
862 if (!node)
863 return -ENOMEM;
864
865 *ret_name = name;
866 *ret_node = node;
867
868 name = node = NULL;
869 return 0;
870 }
871
872 static int decrypt_partition(
873 DissectedPartition *m,
874 const char *passphrase,
875 DissectImageFlags flags,
876 DecryptedImage *d) {
877
878 _cleanup_free_ char *node = NULL, *name = NULL;
879 _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
880 int r;
881
882 assert(m);
883 assert(d);
884
885 if (!m->found || !m->node || !m->fstype)
886 return 0;
887
888 if (!streq(m->fstype, "crypto_LUKS"))
889 return 0;
890
891 if (!passphrase)
892 return -ENOKEY;
893
894 r = make_dm_name_and_node(m->node, "-decrypted", &name, &node);
895 if (r < 0)
896 return r;
897
898 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
899 return -ENOMEM;
900
901 r = crypt_init(&cd, m->node);
902 if (r < 0)
903 return log_debug_errno(r, "Failed to initialize dm-crypt: %m");
904
905 r = crypt_load(cd, CRYPT_LUKS, NULL);
906 if (r < 0)
907 return log_debug_errno(r, "Failed to load LUKS metadata: %m");
908
909 r = crypt_activate_by_passphrase(cd, name, CRYPT_ANY_SLOT, passphrase, strlen(passphrase),
910 ((flags & DISSECT_IMAGE_READ_ONLY) ? CRYPT_ACTIVATE_READONLY : 0) |
911 ((flags & DISSECT_IMAGE_DISCARD_ON_CRYPTO) ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0));
912 if (r < 0) {
913 log_debug_errno(r, "Failed to activate LUKS device: %m");
914 return r == -EPERM ? -EKEYREJECTED : r;
915 }
916
917 d->decrypted[d->n_decrypted].name = name;
918 name = NULL;
919
920 d->decrypted[d->n_decrypted].device = cd;
921 cd = NULL;
922 d->n_decrypted++;
923
924 m->decrypted_node = node;
925 node = NULL;
926
927 return 0;
928 }
929
930 static int verity_partition(
931 DissectedPartition *m,
932 DissectedPartition *v,
933 const void *root_hash,
934 size_t root_hash_size,
935 DissectImageFlags flags,
936 DecryptedImage *d) {
937
938 _cleanup_free_ char *node = NULL, *name = NULL;
939 _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
940 int r;
941
942 assert(m);
943 assert(v);
944
945 if (!root_hash)
946 return 0;
947
948 if (!m->found || !m->node || !m->fstype)
949 return 0;
950 if (!v->found || !v->node || !v->fstype)
951 return 0;
952
953 if (!streq(v->fstype, "DM_verity_hash"))
954 return 0;
955
956 r = make_dm_name_and_node(m->node, "-verity", &name, &node);
957 if (r < 0)
958 return r;
959
960 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
961 return -ENOMEM;
962
963 r = crypt_init(&cd, v->node);
964 if (r < 0)
965 return r;
966
967 r = crypt_load(cd, CRYPT_VERITY, NULL);
968 if (r < 0)
969 return r;
970
971 r = crypt_set_data_device(cd, m->node);
972 if (r < 0)
973 return r;
974
975 r = crypt_activate_by_volume_key(cd, name, root_hash, root_hash_size, CRYPT_ACTIVATE_READONLY);
976 if (r < 0)
977 return r;
978
979 d->decrypted[d->n_decrypted].name = name;
980 name = NULL;
981
982 d->decrypted[d->n_decrypted].device = cd;
983 cd = NULL;
984 d->n_decrypted++;
985
986 m->decrypted_node = node;
987 node = NULL;
988
989 return 0;
990 }
991 #endif
992
993 int dissected_image_decrypt(
994 DissectedImage *m,
995 const char *passphrase,
996 const void *root_hash,
997 size_t root_hash_size,
998 DissectImageFlags flags,
999 DecryptedImage **ret) {
1000
1001 #if HAVE_LIBCRYPTSETUP
1002 _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL;
1003 unsigned i;
1004 int r;
1005 #endif
1006
1007 assert(m);
1008 assert(root_hash || root_hash_size == 0);
1009
1010 /* Returns:
1011 *
1012 * = 0 → There was nothing to decrypt
1013 * > 0 → Decrypted successfully
1014 * -ENOKEY → There's something to decrypt but no key was supplied
1015 * -EKEYREJECTED → Passed key was not correct
1016 */
1017
1018 if (root_hash && root_hash_size < sizeof(sd_id128_t))
1019 return -EINVAL;
1020
1021 if (!m->encrypted && !m->verity) {
1022 *ret = NULL;
1023 return 0;
1024 }
1025
1026 #if HAVE_LIBCRYPTSETUP
1027 d = new0(DecryptedImage, 1);
1028 if (!d)
1029 return -ENOMEM;
1030
1031 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
1032 DissectedPartition *p = m->partitions + i;
1033 int k;
1034
1035 if (!p->found)
1036 continue;
1037
1038 r = decrypt_partition(p, passphrase, flags, d);
1039 if (r < 0)
1040 return r;
1041
1042 k = PARTITION_VERITY_OF(i);
1043 if (k >= 0) {
1044 r = verity_partition(p, m->partitions + k, root_hash, root_hash_size, flags, d);
1045 if (r < 0)
1046 return r;
1047 }
1048
1049 if (!p->decrypted_fstype && p->decrypted_node) {
1050 r = probe_filesystem(p->decrypted_node, &p->decrypted_fstype);
1051 if (r < 0 && r != -EUCLEAN)
1052 return r;
1053 }
1054 }
1055
1056 *ret = d;
1057 d = NULL;
1058
1059 return 1;
1060 #else
1061 return -EOPNOTSUPP;
1062 #endif
1063 }
1064
1065 int dissected_image_decrypt_interactively(
1066 DissectedImage *m,
1067 const char *passphrase,
1068 const void *root_hash,
1069 size_t root_hash_size,
1070 DissectImageFlags flags,
1071 DecryptedImage **ret) {
1072
1073 _cleanup_strv_free_erase_ char **z = NULL;
1074 int n = 3, r;
1075
1076 if (passphrase)
1077 n--;
1078
1079 for (;;) {
1080 r = dissected_image_decrypt(m, passphrase, root_hash, root_hash_size, flags, ret);
1081 if (r >= 0)
1082 return r;
1083 if (r == -EKEYREJECTED)
1084 log_error_errno(r, "Incorrect passphrase, try again!");
1085 else if (r != -ENOKEY) {
1086 log_error_errno(r, "Failed to decrypt image: %m");
1087 return r;
1088 }
1089
1090 if (--n < 0) {
1091 log_error("Too many retries.");
1092 return -EKEYREJECTED;
1093 }
1094
1095 z = strv_free(z);
1096
1097 r = ask_password_auto("Please enter image passphrase!", NULL, "dissect", "dissect", USEC_INFINITY, 0, &z);
1098 if (r < 0)
1099 return log_error_errno(r, "Failed to query for passphrase: %m");
1100
1101 passphrase = z[0];
1102 }
1103 }
1104
1105 #if HAVE_LIBCRYPTSETUP
1106 static int deferred_remove(DecryptedPartition *p) {
1107
1108 struct dm_ioctl dm = {
1109 .version = {
1110 DM_VERSION_MAJOR,
1111 DM_VERSION_MINOR,
1112 DM_VERSION_PATCHLEVEL
1113 },
1114 .data_size = sizeof(dm),
1115 .flags = DM_DEFERRED_REMOVE,
1116 };
1117
1118 _cleanup_close_ int fd = -1;
1119
1120 assert(p);
1121
1122 /* Unfortunately, libcryptsetup doesn't provide a proper API for this, hence call the ioctl() directly. */
1123
1124 fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC);
1125 if (fd < 0)
1126 return -errno;
1127
1128 strncpy(dm.name, p->name, sizeof(dm.name));
1129
1130 if (ioctl(fd, DM_DEV_REMOVE, &dm))
1131 return -errno;
1132
1133 return 0;
1134 }
1135 #endif
1136
1137 int decrypted_image_relinquish(DecryptedImage *d) {
1138
1139 #if HAVE_LIBCRYPTSETUP
1140 size_t i;
1141 int r;
1142 #endif
1143
1144 assert(d);
1145
1146 /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a boolean so
1147 * that we don't clean it up ourselves either anymore */
1148
1149 #if HAVE_LIBCRYPTSETUP
1150 for (i = 0; i < d->n_decrypted; i++) {
1151 DecryptedPartition *p = d->decrypted + i;
1152
1153 if (p->relinquished)
1154 continue;
1155
1156 r = deferred_remove(p);
1157 if (r < 0)
1158 return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name);
1159
1160 p->relinquished = true;
1161 }
1162 #endif
1163
1164 return 0;
1165 }
1166
1167 int root_hash_load(const char *image, void **ret, size_t *ret_size) {
1168 _cleanup_free_ char *text = NULL;
1169 _cleanup_free_ void *k = NULL;
1170 size_t l;
1171 int r;
1172
1173 assert(image);
1174 assert(ret);
1175 assert(ret_size);
1176
1177 if (is_device_path(image)) {
1178 /* If we are asked to load the root hash for a device node, exit early */
1179 *ret = NULL;
1180 *ret_size = 0;
1181 return 0;
1182 }
1183
1184 r = getxattr_malloc(image, "user.verity.roothash", &text, true);
1185 if (r < 0) {
1186 char *fn, *e, *n;
1187
1188 if (!IN_SET(r, -ENODATA, -EOPNOTSUPP, -ENOENT))
1189 return r;
1190
1191 fn = newa(char, strlen(image) + STRLEN(".roothash") + 1);
1192 n = stpcpy(fn, image);
1193 e = endswith(fn, ".raw");
1194 if (e)
1195 n = e;
1196
1197 strcpy(n, ".roothash");
1198
1199 r = read_one_line_file(fn, &text);
1200 if (r == -ENOENT) {
1201 *ret = NULL;
1202 *ret_size = 0;
1203 return 0;
1204 }
1205 if (r < 0)
1206 return r;
1207 }
1208
1209 r = unhexmem(text, strlen(text), &k, &l);
1210 if (r < 0)
1211 return r;
1212 if (l < sizeof(sd_id128_t))
1213 return -EINVAL;
1214
1215 *ret = k;
1216 *ret_size = l;
1217
1218 k = NULL;
1219
1220 return 1;
1221 }
1222
1223 int dissected_image_acquire_metadata(DissectedImage *m) {
1224
1225 enum {
1226 META_HOSTNAME,
1227 META_MACHINE_ID,
1228 META_MACHINE_INFO,
1229 META_OS_RELEASE,
1230 _META_MAX,
1231 };
1232
1233 static const char *const paths[_META_MAX] = {
1234 [META_HOSTNAME] = "/etc/hostname\0",
1235 [META_MACHINE_ID] = "/etc/machine-id\0",
1236 [META_MACHINE_INFO] = "/etc/machine-info\0",
1237 [META_OS_RELEASE] = "/etc/os-release\0/usr/lib/os-release\0",
1238 };
1239
1240 _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
1241 _cleanup_(rmdir_and_freep) char *t = NULL;
1242 _cleanup_(sigkill_waitp) pid_t child = 0;
1243 sd_id128_t machine_id = SD_ID128_NULL;
1244 _cleanup_free_ char *hostname = NULL;
1245 unsigned n_meta_initialized = 0, k;
1246 int fds[2 * _META_MAX], r;
1247
1248 BLOCK_SIGNALS(SIGCHLD);
1249
1250 assert(m);
1251
1252 for (; n_meta_initialized < _META_MAX; n_meta_initialized ++)
1253 if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) {
1254 r = -errno;
1255 goto finish;
1256 }
1257
1258 r = mkdtemp_malloc("/tmp/dissect-XXXXXX", &t);
1259 if (r < 0)
1260 goto finish;
1261
1262 r = safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_NEW_MOUNTNS, &child);
1263 if (r < 0)
1264 goto finish;
1265 if (r == 0) {
1266 /* Make sure we never propagate to the host */
1267 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
1268 _exit(EXIT_FAILURE);
1269
1270 r = dissected_image_mount(m, t, UID_INVALID, DISSECT_IMAGE_READ_ONLY);
1271 if (r < 0)
1272 _exit(EXIT_FAILURE);
1273
1274 for (k = 0; k < _META_MAX; k++) {
1275 _cleanup_close_ int fd = -1;
1276 const char *p;
1277
1278 fds[2*k] = safe_close(fds[2*k]);
1279
1280 NULSTR_FOREACH(p, paths[k]) {
1281 _cleanup_free_ char *q = NULL;
1282
1283 r = chase_symlinks(p, t, CHASE_PREFIX_ROOT, &q);
1284 if (r < 0)
1285 continue;
1286
1287 fd = open(q, O_RDONLY|O_CLOEXEC|O_NOCTTY);
1288 if (fd >= 0)
1289 break;
1290 }
1291 if (fd < 0)
1292 continue;
1293
1294 r = copy_bytes(fd, fds[2*k+1], (uint64_t) -1, 0);
1295 if (r < 0)
1296 _exit(EXIT_FAILURE);
1297
1298 fds[2*k+1] = safe_close(fds[2*k+1]);
1299 }
1300
1301 _exit(EXIT_SUCCESS);
1302 }
1303
1304 for (k = 0; k < _META_MAX; k++) {
1305 _cleanup_fclose_ FILE *f = NULL;
1306
1307 fds[2*k+1] = safe_close(fds[2*k+1]);
1308
1309 f = fdopen(fds[2*k], "re");
1310 if (!f) {
1311 r = -errno;
1312 goto finish;
1313 }
1314
1315 fds[2*k] = -1;
1316
1317 switch (k) {
1318
1319 case META_HOSTNAME:
1320 r = read_etc_hostname_stream(f, &hostname);
1321 if (r < 0)
1322 log_debug_errno(r, "Failed to read /etc/hostname: %m");
1323
1324 break;
1325
1326 case META_MACHINE_ID: {
1327 _cleanup_free_ char *line = NULL;
1328
1329 r = read_line(f, LONG_LINE_MAX, &line);
1330 if (r < 0)
1331 log_debug_errno(r, "Failed to read /etc/machine-id: %m");
1332 else if (r == 33) {
1333 r = sd_id128_from_string(line, &machine_id);
1334 if (r < 0)
1335 log_debug_errno(r, "Image contains invalid /etc/machine-id: %s", line);
1336 } else if (r == 0)
1337 log_debug("/etc/machine-id file is empty.");
1338 else
1339 log_debug("/etc/machine-id has unexpected length %i.", r);
1340
1341 break;
1342 }
1343
1344 case META_MACHINE_INFO:
1345 r = load_env_file_pairs(f, "machine-info", NULL, &machine_info);
1346 if (r < 0)
1347 log_debug_errno(r, "Failed to read /etc/machine-info: %m");
1348
1349 break;
1350
1351 case META_OS_RELEASE:
1352 r = load_env_file_pairs(f, "os-release", NULL, &os_release);
1353 if (r < 0)
1354 log_debug_errno(r, "Failed to read OS release file: %m");
1355
1356 break;
1357 }
1358 }
1359
1360 r = wait_for_terminate_and_check("(sd-dissect)", child, 0);
1361 child = 0;
1362 if (r < 0)
1363 goto finish;
1364 if (r != EXIT_SUCCESS)
1365 return -EPROTO;
1366
1367 free_and_replace(m->hostname, hostname);
1368 m->machine_id = machine_id;
1369 strv_free_and_replace(m->machine_info, machine_info);
1370 strv_free_and_replace(m->os_release, os_release);
1371
1372 finish:
1373 for (k = 0; k < n_meta_initialized; k++)
1374 safe_close_pair(fds + 2*k);
1375
1376 return r;
1377 }
1378
1379 static const char *const partition_designator_table[] = {
1380 [PARTITION_ROOT] = "root",
1381 [PARTITION_ROOT_SECONDARY] = "root-secondary",
1382 [PARTITION_HOME] = "home",
1383 [PARTITION_SRV] = "srv",
1384 [PARTITION_ESP] = "esp",
1385 [PARTITION_SWAP] = "swap",
1386 [PARTITION_ROOT_VERITY] = "root-verity",
1387 [PARTITION_ROOT_SECONDARY_VERITY] = "root-secondary-verity",
1388 };
1389
1390 DEFINE_STRING_TABLE_LOOKUP(partition_designator, int);