]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/dissect-image.c
Merge pull request #7381 from poettering/cgroup-unified-delegate-rework
[thirdparty/systemd.git] / src / shared / dissect-image.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2016 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #if HAVE_LIBCRYPTSETUP
22 #include <libcryptsetup.h>
23 #ifndef CRYPT_LUKS
24 #define CRYPT_LUKS NULL
25 #endif
26 #endif
27 #include <sys/mount.h>
28 #include <sys/prctl.h>
29 #include <sys/wait.h>
30
31 #include "architecture.h"
32 #include "ask-password-api.h"
33 #include "blkid-util.h"
34 #include "copy.h"
35 #include "def.h"
36 #include "dissect-image.h"
37 #include "fd-util.h"
38 #include "fileio.h"
39 #include "fs-util.h"
40 #include "gpt.h"
41 #include "hexdecoct.h"
42 #include "hostname-util.h"
43 #include "id128-util.h"
44 #include "linux-3.13/dm-ioctl.h"
45 #include "mount-util.h"
46 #include "path-util.h"
47 #include "process-util.h"
48 #include "raw-clone.h"
49 #include "signal-util.h"
50 #include "stat-util.h"
51 #include "stdio-util.h"
52 #include "string-table.h"
53 #include "string-util.h"
54 #include "strv.h"
55 #include "udev-util.h"
56 #include "xattr-util.h"
57
58 _unused_ static int probe_filesystem(const char *node, char **ret_fstype) {
59 #if HAVE_BLKID
60 _cleanup_blkid_free_probe_ blkid_probe b = NULL;
61 const char *fstype;
62 int r;
63
64 b = blkid_new_probe_from_filename(node);
65 if (!b)
66 return -ENOMEM;
67
68 blkid_probe_enable_superblocks(b, 1);
69 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
70
71 errno = 0;
72 r = blkid_do_safeprobe(b);
73 if (IN_SET(r, -2, 1)) {
74 log_debug("Failed to identify any partition type on partition %s", node);
75 goto not_found;
76 }
77 if (r != 0)
78 return -errno ?: -EIO;
79
80 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
81
82 if (fstype) {
83 char *t;
84
85 t = strdup(fstype);
86 if (!t)
87 return -ENOMEM;
88
89 *ret_fstype = t;
90 return 1;
91 }
92
93 not_found:
94 *ret_fstype = NULL;
95 return 0;
96 #else
97 return -EOPNOTSUPP;
98 #endif
99 }
100
101 int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DissectedImage **ret) {
102
103 #if HAVE_BLKID
104 sd_id128_t root_uuid = SD_ID128_NULL, verity_uuid = SD_ID128_NULL;
105 _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL;
106 bool is_gpt, is_mbr, generic_rw, multiple_generic = false;
107 _cleanup_udev_device_unref_ struct udev_device *d = NULL;
108 _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
109 _cleanup_blkid_free_probe_ blkid_probe b = NULL;
110 _cleanup_udev_unref_ struct udev *udev = NULL;
111 _cleanup_free_ char *generic_node = NULL;
112 sd_id128_t generic_uuid = SD_ID128_NULL;
113 const char *pttype = NULL;
114 struct udev_list_entry *first, *item;
115 blkid_partlist pl;
116 int r, generic_nr;
117 struct stat st;
118 unsigned i;
119
120 assert(fd >= 0);
121 assert(ret);
122 assert(root_hash || root_hash_size == 0);
123
124 /* Probes a disk image, and returns information about what it found in *ret.
125 *
126 * Returns -ENOPKG if no suitable partition table or file system could be found.
127 * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found. */
128
129 if (root_hash) {
130 /* If a root hash is supplied, then we use the root partition that has a UUID that match the first
131 * 128bit of the root hash. And we use the verity partition that has a UUID that match the final
132 * 128bit. */
133
134 if (root_hash_size < sizeof(sd_id128_t))
135 return -EINVAL;
136
137 memcpy(&root_uuid, root_hash, sizeof(sd_id128_t));
138 memcpy(&verity_uuid, (const uint8_t*) root_hash + root_hash_size - sizeof(sd_id128_t), sizeof(sd_id128_t));
139
140 if (sd_id128_is_null(root_uuid))
141 return -EINVAL;
142 if (sd_id128_is_null(verity_uuid))
143 return -EINVAL;
144 }
145
146 if (fstat(fd, &st) < 0)
147 return -errno;
148
149 if (!S_ISBLK(st.st_mode))
150 return -ENOTBLK;
151
152 b = blkid_new_probe();
153 if (!b)
154 return -ENOMEM;
155
156 errno = 0;
157 r = blkid_probe_set_device(b, fd, 0, 0);
158 if (r != 0)
159 return -errno ?: -ENOMEM;
160
161 if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) {
162 /* Look for file system superblocks, unless we only shall look for GPT partition tables */
163 blkid_probe_enable_superblocks(b, 1);
164 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE);
165 }
166
167 blkid_probe_enable_partitions(b, 1);
168 blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
169
170 errno = 0;
171 r = blkid_do_safeprobe(b);
172 if (IN_SET(r, -2, 1)) {
173 log_debug("Failed to identify any partition table.");
174 return -ENOPKG;
175 }
176 if (r != 0)
177 return -errno ?: -EIO;
178
179 m = new0(DissectedImage, 1);
180 if (!m)
181 return -ENOMEM;
182
183 if (!(flags & DISSECT_IMAGE_GPT_ONLY) &&
184 (flags & DISSECT_IMAGE_REQUIRE_ROOT)) {
185 const char *usage = NULL;
186
187 (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL);
188 if (STRPTR_IN_SET(usage, "filesystem", "crypto")) {
189 _cleanup_free_ char *t = NULL, *n = NULL;
190 const char *fstype = NULL;
191
192 /* OK, we have found a file system, that's our root partition then. */
193 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
194
195 if (fstype) {
196 t = strdup(fstype);
197 if (!t)
198 return -ENOMEM;
199 }
200
201 if (asprintf(&n, "/dev/block/%u:%u", major(st.st_rdev), minor(st.st_rdev)) < 0)
202 return -ENOMEM;
203
204 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
205 .found = true,
206 .rw = true,
207 .partno = -1,
208 .architecture = _ARCHITECTURE_INVALID,
209 .fstype = t,
210 .node = n,
211 };
212
213 t = n = NULL;
214
215 m->encrypted = streq(fstype, "crypto_LUKS");
216
217 *ret = m;
218 m = NULL;
219
220 return 0;
221 }
222 }
223
224 (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
225 if (!pttype)
226 return -ENOPKG;
227
228 is_gpt = streq_ptr(pttype, "gpt");
229 is_mbr = streq_ptr(pttype, "dos");
230
231 if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr))
232 return -ENOPKG;
233
234 errno = 0;
235 pl = blkid_probe_get_partitions(b);
236 if (!pl)
237 return -errno ?: -ENOMEM;
238
239 udev = udev_new();
240 if (!udev)
241 return -errno;
242
243 d = udev_device_new_from_devnum(udev, 'b', st.st_rdev);
244 if (!d)
245 return -ENOMEM;
246
247 for (i = 0;; i++) {
248 int n, z;
249
250 if (i >= 10) {
251 log_debug("Kernel partitions never appeared.");
252 return -ENXIO;
253 }
254
255 e = udev_enumerate_new(udev);
256 if (!e)
257 return -errno;
258
259 r = udev_enumerate_add_match_parent(e, d);
260 if (r < 0)
261 return r;
262
263 r = udev_enumerate_scan_devices(e);
264 if (r < 0)
265 return r;
266
267 /* Count the partitions enumerated by the kernel */
268 n = 0;
269 first = udev_enumerate_get_list_entry(e);
270 udev_list_entry_foreach(item, first)
271 n++;
272
273 /* Count the partitions enumerated by blkid */
274 z = blkid_partlist_numof_partitions(pl);
275 if (n == z + 1)
276 break;
277 if (n > z + 1) {
278 log_debug("blkid and kernel partition list do not match.");
279 return -EIO;
280 }
281 if (n < z + 1) {
282 unsigned j = 0;
283
284 /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running
285 * or it got EBUSY because udev already opened the device. Let's reprobe the device, which is a
286 * synchronous call that waits until probing is complete. */
287
288 for (;;) {
289 if (j++ > 20)
290 return -EBUSY;
291
292 if (ioctl(fd, BLKRRPART, 0) < 0) {
293 r = -errno;
294
295 if (r == -EINVAL) {
296 struct loop_info64 info;
297
298 /* If we are running on a loop device that has partition scanning off,
299 * return an explicit recognizable error about this, so that callers
300 * can generate a proper message explaining the situation. */
301
302 if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0 && (info.lo_flags & LO_FLAGS_PARTSCAN) == 0) {
303 log_debug("Device is loop device and partition scanning is off!");
304 return -EPROTONOSUPPORT;
305 }
306 }
307 if (r != -EBUSY)
308 return r;
309 } else
310 break;
311
312 /* If something else has the device open, such as an udev rule, the ioctl will return
313 * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a
314 * bit, and try again.
315 *
316 * This is really something they should fix in the kernel! */
317
318 (void) usleep(50 * USEC_PER_MSEC);
319 }
320 }
321
322 e = udev_enumerate_unref(e);
323 }
324
325 first = udev_enumerate_get_list_entry(e);
326 udev_list_entry_foreach(item, first) {
327 _cleanup_udev_device_unref_ struct udev_device *q;
328 unsigned long long pflags;
329 blkid_partition pp;
330 const char *node, *sysname;
331 dev_t qn;
332 int nr;
333
334 q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
335 if (!q)
336 return -errno;
337
338 qn = udev_device_get_devnum(q);
339 if (major(qn) == 0)
340 continue;
341
342 if (st.st_rdev == qn)
343 continue;
344
345 /* Filter out weird MMC RPMB partitions, which cannot reasonably be read, see
346 * https://github.com/systemd/systemd/issues/5806 */
347 sysname = udev_device_get_sysname(q);
348 if (sysname && startswith(sysname, "mmcblk") && endswith(sysname, "rpmb"))
349 continue;
350
351 node = udev_device_get_devnode(q);
352 if (!node)
353 continue;
354
355 pp = blkid_partlist_devno_to_partition(pl, qn);
356 if (!pp)
357 continue;
358
359 pflags = blkid_partition_get_flags(pp);
360
361 nr = blkid_partition_get_partno(pp);
362 if (nr < 0)
363 continue;
364
365 if (is_gpt) {
366 int designator = _PARTITION_DESIGNATOR_INVALID, architecture = _ARCHITECTURE_INVALID;
367 const char *stype, *sid, *fstype = NULL;
368 sd_id128_t type_id, id;
369 bool rw = true;
370
371 sid = blkid_partition_get_uuid(pp);
372 if (!sid)
373 continue;
374 if (sd_id128_from_string(sid, &id) < 0)
375 continue;
376
377 stype = blkid_partition_get_type_string(pp);
378 if (!stype)
379 continue;
380 if (sd_id128_from_string(stype, &type_id) < 0)
381 continue;
382
383 if (sd_id128_equal(type_id, GPT_HOME)) {
384
385 if (pflags & GPT_FLAG_NO_AUTO)
386 continue;
387
388 designator = PARTITION_HOME;
389 rw = !(pflags & GPT_FLAG_READ_ONLY);
390 } else if (sd_id128_equal(type_id, GPT_SRV)) {
391
392 if (pflags & GPT_FLAG_NO_AUTO)
393 continue;
394
395 designator = PARTITION_SRV;
396 rw = !(pflags & GPT_FLAG_READ_ONLY);
397 } else if (sd_id128_equal(type_id, GPT_ESP)) {
398
399 /* Note that we don't check the GPT_FLAG_NO_AUTO flag for the ESP, as it is not defined
400 * there. We instead check the GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as recommended by the
401 * UEFI spec (See "12.3.3 Number and Location of System Partitions"). */
402
403 if (pflags & GPT_FLAG_NO_BLOCK_IO_PROTOCOL)
404 continue;
405
406 designator = PARTITION_ESP;
407 fstype = "vfat";
408 }
409 #ifdef GPT_ROOT_NATIVE
410 else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
411
412 if (pflags & GPT_FLAG_NO_AUTO)
413 continue;
414
415 /* If a root ID is specified, ignore everything but the root id */
416 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
417 continue;
418
419 designator = PARTITION_ROOT;
420 architecture = native_architecture();
421 rw = !(pflags & GPT_FLAG_READ_ONLY);
422 } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) {
423
424 if (pflags & GPT_FLAG_NO_AUTO)
425 continue;
426
427 m->can_verity = true;
428
429 /* Ignore verity unless a root hash is specified */
430 if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
431 continue;
432
433 designator = PARTITION_ROOT_VERITY;
434 fstype = "DM_verity_hash";
435 architecture = native_architecture();
436 rw = false;
437 }
438 #endif
439 #ifdef GPT_ROOT_SECONDARY
440 else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
441
442 if (pflags & GPT_FLAG_NO_AUTO)
443 continue;
444
445 /* If a root ID is specified, ignore everything but the root id */
446 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
447 continue;
448
449 designator = PARTITION_ROOT_SECONDARY;
450 architecture = SECONDARY_ARCHITECTURE;
451 rw = !(pflags & GPT_FLAG_READ_ONLY);
452 } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) {
453
454 if (pflags & GPT_FLAG_NO_AUTO)
455 continue;
456
457 m->can_verity = true;
458
459 /* Ignore verity unless root has is specified */
460 if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
461 continue;
462
463 designator = PARTITION_ROOT_SECONDARY_VERITY;
464 fstype = "DM_verity_hash";
465 architecture = SECONDARY_ARCHITECTURE;
466 rw = false;
467 }
468 #endif
469 else if (sd_id128_equal(type_id, GPT_SWAP)) {
470
471 if (pflags & GPT_FLAG_NO_AUTO)
472 continue;
473
474 designator = PARTITION_SWAP;
475 fstype = "swap";
476 } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) {
477
478 if (pflags & GPT_FLAG_NO_AUTO)
479 continue;
480
481 if (generic_node)
482 multiple_generic = true;
483 else {
484 generic_nr = nr;
485 generic_rw = !(pflags & GPT_FLAG_READ_ONLY);
486 generic_uuid = id;
487 generic_node = strdup(node);
488 if (!generic_node)
489 return -ENOMEM;
490 }
491 }
492
493 if (designator != _PARTITION_DESIGNATOR_INVALID) {
494 _cleanup_free_ char *t = NULL, *n = NULL;
495
496 /* First one wins */
497 if (m->partitions[designator].found)
498 continue;
499
500 if (fstype) {
501 t = strdup(fstype);
502 if (!t)
503 return -ENOMEM;
504 }
505
506 n = strdup(node);
507 if (!n)
508 return -ENOMEM;
509
510 m->partitions[designator] = (DissectedPartition) {
511 .found = true,
512 .partno = nr,
513 .rw = rw,
514 .architecture = architecture,
515 .node = n,
516 .fstype = t,
517 .uuid = id,
518 };
519
520 n = t = NULL;
521 }
522
523 } else if (is_mbr) {
524
525 if (pflags != 0x80) /* Bootable flag */
526 continue;
527
528 if (blkid_partition_get_type(pp) != 0x83) /* Linux partition */
529 continue;
530
531 if (generic_node)
532 multiple_generic = true;
533 else {
534 generic_nr = nr;
535 generic_rw = true;
536 generic_node = strdup(node);
537 if (!generic_node)
538 return -ENOMEM;
539 }
540 }
541 }
542
543 if (!m->partitions[PARTITION_ROOT].found) {
544 /* No root partition found? Then let's see if ther's one for the secondary architecture. And if not
545 * either, then check if there's a single generic one, and use that. */
546
547 if (m->partitions[PARTITION_ROOT_VERITY].found)
548 return -EADDRNOTAVAIL;
549
550 if (m->partitions[PARTITION_ROOT_SECONDARY].found) {
551 m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY];
552 zero(m->partitions[PARTITION_ROOT_SECONDARY]);
553
554 m->partitions[PARTITION_ROOT_VERITY] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY];
555 zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY]);
556
557 } else if (flags & DISSECT_IMAGE_REQUIRE_ROOT) {
558
559 /* If the root has was set, then we won't fallback to a generic node, because the root hash
560 * decides */
561 if (root_hash)
562 return -EADDRNOTAVAIL;
563
564 /* If we didn't find a generic node, then we can't fix this up either */
565 if (!generic_node)
566 return -ENXIO;
567
568 /* If we didn't find a properly marked root partition, but we did find a single suitable
569 * generic Linux partition, then use this as root partition, if the caller asked for it. */
570 if (multiple_generic)
571 return -ENOTUNIQ;
572
573 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
574 .found = true,
575 .rw = generic_rw,
576 .partno = generic_nr,
577 .architecture = _ARCHITECTURE_INVALID,
578 .node = generic_node,
579 .uuid = generic_uuid,
580 };
581
582 generic_node = NULL;
583 }
584 }
585
586 if (root_hash) {
587 if (!m->partitions[PARTITION_ROOT_VERITY].found || !m->partitions[PARTITION_ROOT].found)
588 return -EADDRNOTAVAIL;
589
590 /* If we found the primary root with the hash, then we definitely want to suppress any secondary root
591 * (which would be weird, after all the root hash should only be assigned to one pair of
592 * partitions... */
593 m->partitions[PARTITION_ROOT_SECONDARY].found = false;
594 m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found = false;
595
596 /* If we found a verity setup, then the root partition is necessarily read-only. */
597 m->partitions[PARTITION_ROOT].rw = false;
598
599 m->verity = true;
600 }
601
602 blkid_free_probe(b);
603 b = NULL;
604
605 /* Fill in file system types if we don't know them yet. */
606 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
607 DissectedPartition *p = m->partitions + i;
608
609 if (!p->found)
610 continue;
611
612 if (!p->fstype && p->node) {
613 r = probe_filesystem(p->node, &p->fstype);
614 if (r < 0)
615 return r;
616 }
617
618 if (streq_ptr(p->fstype, "crypto_LUKS"))
619 m->encrypted = true;
620
621 if (p->fstype && fstype_is_ro(p->fstype))
622 p->rw = false;
623 }
624
625 *ret = m;
626 m = NULL;
627
628 return 0;
629 #else
630 return -EOPNOTSUPP;
631 #endif
632 }
633
634 DissectedImage* dissected_image_unref(DissectedImage *m) {
635 unsigned i;
636
637 if (!m)
638 return NULL;
639
640 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
641 free(m->partitions[i].fstype);
642 free(m->partitions[i].node);
643 free(m->partitions[i].decrypted_fstype);
644 free(m->partitions[i].decrypted_node);
645 }
646
647 free(m->hostname);
648 strv_free(m->machine_info);
649 strv_free(m->os_release);
650
651 free(m);
652 return NULL;
653 }
654
655 static int is_loop_device(const char *path) {
656 char s[strlen("/sys/dev/block/") + DECIMAL_STR_MAX(dev_t) + 1 + DECIMAL_STR_MAX(dev_t) + strlen("/../loop/")];
657 struct stat st;
658
659 assert(path);
660
661 if (stat(path, &st) < 0)
662 return -errno;
663
664 if (!S_ISBLK(st.st_mode))
665 return -ENOTBLK;
666
667 xsprintf(s, "/sys/dev/block/%u:%u/loop/", major(st.st_rdev), minor(st.st_rdev));
668 if (access(s, F_OK) < 0) {
669 if (errno != ENOENT)
670 return -errno;
671
672 /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */
673 xsprintf(s, "/sys/dev/block/%u:%u/../loop/", major(st.st_rdev), minor(st.st_rdev));
674 if (access(s, F_OK) < 0)
675 return errno == ENOENT ? false : -errno;
676 }
677
678 return true;
679 }
680
681 static int mount_partition(
682 DissectedPartition *m,
683 const char *where,
684 const char *directory,
685 DissectImageFlags flags) {
686
687 const char *p, *options = NULL, *node, *fstype;
688 _cleanup_free_ char *chased = NULL;
689 bool rw;
690 int r;
691
692 assert(m);
693 assert(where);
694
695 node = m->decrypted_node ?: m->node;
696 fstype = m->decrypted_fstype ?: m->fstype;
697
698 if (!m->found || !node || !fstype)
699 return 0;
700
701 /* Stacked encryption? Yuck */
702 if (streq_ptr(fstype, "crypto_LUKS"))
703 return -ELOOP;
704
705 rw = m->rw && !(flags & DISSECT_IMAGE_READ_ONLY);
706
707 if (directory) {
708 r = chase_symlinks(directory, where, CHASE_PREFIX_ROOT, &chased);
709 if (r < 0)
710 return r;
711
712 p = chased;
713 } else
714 p = where;
715
716 /* If requested, turn on discard support. */
717 if (fstype_can_discard(fstype) &&
718 ((flags & DISSECT_IMAGE_DISCARD) ||
719 ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node))))
720 options = "discard";
721
722 return mount_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
723 }
724
725 int dissected_image_mount(DissectedImage *m, const char *where, DissectImageFlags flags) {
726 int r;
727
728 assert(m);
729 assert(where);
730
731 if (!m->partitions[PARTITION_ROOT].found)
732 return -ENXIO;
733
734 r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, flags);
735 if (r < 0)
736 return r;
737
738 r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", flags);
739 if (r < 0)
740 return r;
741
742 r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", flags);
743 if (r < 0)
744 return r;
745
746 if (m->partitions[PARTITION_ESP].found) {
747 const char *mp;
748
749 /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */
750
751 FOREACH_STRING(mp, "/efi", "/boot") {
752 _cleanup_free_ char *p = NULL;
753
754 r = chase_symlinks(mp, where, CHASE_PREFIX_ROOT, &p);
755 if (r < 0)
756 continue;
757
758 r = dir_is_empty(p);
759 if (r > 0) {
760 r = mount_partition(m->partitions + PARTITION_ESP, where, mp, flags);
761 if (r < 0)
762 return r;
763 }
764 }
765 }
766
767 return 0;
768 }
769
770 #if HAVE_LIBCRYPTSETUP
771 typedef struct DecryptedPartition {
772 struct crypt_device *device;
773 char *name;
774 bool relinquished;
775 } DecryptedPartition;
776
777 struct DecryptedImage {
778 DecryptedPartition *decrypted;
779 size_t n_decrypted;
780 size_t n_allocated;
781 };
782 #endif
783
784 DecryptedImage* decrypted_image_unref(DecryptedImage* d) {
785 #if HAVE_LIBCRYPTSETUP
786 size_t i;
787 int r;
788
789 if (!d)
790 return NULL;
791
792 for (i = 0; i < d->n_decrypted; i++) {
793 DecryptedPartition *p = d->decrypted + i;
794
795 if (p->device && p->name && !p->relinquished) {
796 r = crypt_deactivate(p->device, p->name);
797 if (r < 0)
798 log_debug_errno(r, "Failed to deactivate encrypted partition %s", p->name);
799 }
800
801 if (p->device)
802 crypt_free(p->device);
803 free(p->name);
804 }
805
806 free(d);
807 #endif
808 return NULL;
809 }
810
811 #if HAVE_LIBCRYPTSETUP
812
813 static int make_dm_name_and_node(const void *original_node, const char *suffix, char **ret_name, char **ret_node) {
814 _cleanup_free_ char *name = NULL, *node = NULL;
815 const char *base;
816
817 assert(original_node);
818 assert(suffix);
819 assert(ret_name);
820 assert(ret_node);
821
822 base = strrchr(original_node, '/');
823 if (!base)
824 return -EINVAL;
825 base++;
826 if (isempty(base))
827 return -EINVAL;
828
829 name = strjoin(base, suffix);
830 if (!name)
831 return -ENOMEM;
832 if (!filename_is_valid(name))
833 return -EINVAL;
834
835 node = strjoin(crypt_get_dir(), "/", name);
836 if (!node)
837 return -ENOMEM;
838
839 *ret_name = name;
840 *ret_node = node;
841
842 name = node = NULL;
843 return 0;
844 }
845
846 static int decrypt_partition(
847 DissectedPartition *m,
848 const char *passphrase,
849 DissectImageFlags flags,
850 DecryptedImage *d) {
851
852 _cleanup_free_ char *node = NULL, *name = NULL;
853 struct crypt_device *cd;
854 int r;
855
856 assert(m);
857 assert(d);
858
859 if (!m->found || !m->node || !m->fstype)
860 return 0;
861
862 if (!streq(m->fstype, "crypto_LUKS"))
863 return 0;
864
865 r = make_dm_name_and_node(m->node, "-decrypted", &name, &node);
866 if (r < 0)
867 return r;
868
869 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
870 return -ENOMEM;
871
872 r = crypt_init(&cd, m->node);
873 if (r < 0)
874 return log_debug_errno(r, "Failed to initialize dm-crypt: %m");
875
876 r = crypt_load(cd, CRYPT_LUKS, NULL);
877 if (r < 0) {
878 log_debug_errno(r, "Failed to load LUKS metadata: %m");
879 goto fail;
880 }
881
882 r = crypt_activate_by_passphrase(cd, name, CRYPT_ANY_SLOT, passphrase, strlen(passphrase),
883 ((flags & DISSECT_IMAGE_READ_ONLY) ? CRYPT_ACTIVATE_READONLY : 0) |
884 ((flags & DISSECT_IMAGE_DISCARD_ON_CRYPTO) ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0));
885 if (r < 0)
886 log_debug_errno(r, "Failed to activate LUKS device: %m");
887 if (r == -EPERM) {
888 r = -EKEYREJECTED;
889 goto fail;
890 }
891 if (r < 0)
892 goto fail;
893
894 d->decrypted[d->n_decrypted].name = name;
895 name = NULL;
896
897 d->decrypted[d->n_decrypted].device = cd;
898 d->n_decrypted++;
899
900 m->decrypted_node = node;
901 node = NULL;
902
903 return 0;
904
905 fail:
906 crypt_free(cd);
907 return r;
908 }
909
910 static int verity_partition(
911 DissectedPartition *m,
912 DissectedPartition *v,
913 const void *root_hash,
914 size_t root_hash_size,
915 DissectImageFlags flags,
916 DecryptedImage *d) {
917
918 _cleanup_free_ char *node = NULL, *name = NULL;
919 struct crypt_device *cd;
920 int r;
921
922 assert(m);
923 assert(v);
924
925 if (!root_hash)
926 return 0;
927
928 if (!m->found || !m->node || !m->fstype)
929 return 0;
930 if (!v->found || !v->node || !v->fstype)
931 return 0;
932
933 if (!streq(v->fstype, "DM_verity_hash"))
934 return 0;
935
936 r = make_dm_name_and_node(m->node, "-verity", &name, &node);
937 if (r < 0)
938 return r;
939
940 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
941 return -ENOMEM;
942
943 r = crypt_init(&cd, v->node);
944 if (r < 0)
945 return r;
946
947 r = crypt_load(cd, CRYPT_VERITY, NULL);
948 if (r < 0)
949 goto fail;
950
951 r = crypt_set_data_device(cd, m->node);
952 if (r < 0)
953 goto fail;
954
955 r = crypt_activate_by_volume_key(cd, name, root_hash, root_hash_size, CRYPT_ACTIVATE_READONLY);
956 if (r < 0)
957 goto fail;
958
959 d->decrypted[d->n_decrypted].name = name;
960 name = NULL;
961
962 d->decrypted[d->n_decrypted].device = cd;
963 d->n_decrypted++;
964
965 m->decrypted_node = node;
966 node = NULL;
967
968 return 0;
969
970 fail:
971 crypt_free(cd);
972 return r;
973 }
974 #endif
975
976 int dissected_image_decrypt(
977 DissectedImage *m,
978 const char *passphrase,
979 const void *root_hash,
980 size_t root_hash_size,
981 DissectImageFlags flags,
982 DecryptedImage **ret) {
983
984 _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL;
985 #if HAVE_LIBCRYPTSETUP
986 unsigned i;
987 int r;
988 #endif
989
990 assert(m);
991 assert(root_hash || root_hash_size == 0);
992
993 /* Returns:
994 *
995 * = 0 → There was nothing to decrypt
996 * > 0 → Decrypted successfully
997 * -ENOKEY → There's something to decrypt but no key was supplied
998 * -EKEYREJECTED → Passed key was not correct
999 */
1000
1001 if (root_hash && root_hash_size < sizeof(sd_id128_t))
1002 return -EINVAL;
1003
1004 if (!m->encrypted && !m->verity) {
1005 *ret = NULL;
1006 return 0;
1007 }
1008
1009 #if HAVE_LIBCRYPTSETUP
1010 if (m->encrypted && !passphrase)
1011 return -ENOKEY;
1012
1013 d = new0(DecryptedImage, 1);
1014 if (!d)
1015 return -ENOMEM;
1016
1017 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
1018 DissectedPartition *p = m->partitions + i;
1019 int k;
1020
1021 if (!p->found)
1022 continue;
1023
1024 r = decrypt_partition(p, passphrase, flags, d);
1025 if (r < 0)
1026 return r;
1027
1028 k = PARTITION_VERITY_OF(i);
1029 if (k >= 0) {
1030 r = verity_partition(p, m->partitions + k, root_hash, root_hash_size, flags, d);
1031 if (r < 0)
1032 return r;
1033 }
1034
1035 if (!p->decrypted_fstype && p->decrypted_node) {
1036 r = probe_filesystem(p->decrypted_node, &p->decrypted_fstype);
1037 if (r < 0)
1038 return r;
1039 }
1040 }
1041
1042 *ret = d;
1043 d = NULL;
1044
1045 return 1;
1046 #else
1047 return -EOPNOTSUPP;
1048 #endif
1049 }
1050
1051 int dissected_image_decrypt_interactively(
1052 DissectedImage *m,
1053 const char *passphrase,
1054 const void *root_hash,
1055 size_t root_hash_size,
1056 DissectImageFlags flags,
1057 DecryptedImage **ret) {
1058
1059 _cleanup_strv_free_erase_ char **z = NULL;
1060 int n = 3, r;
1061
1062 if (passphrase)
1063 n--;
1064
1065 for (;;) {
1066 r = dissected_image_decrypt(m, passphrase, root_hash, root_hash_size, flags, ret);
1067 if (r >= 0)
1068 return r;
1069 if (r == -EKEYREJECTED)
1070 log_error_errno(r, "Incorrect passphrase, try again!");
1071 else if (r != -ENOKEY) {
1072 log_error_errno(r, "Failed to decrypt image: %m");
1073 return r;
1074 }
1075
1076 if (--n < 0) {
1077 log_error("Too many retries.");
1078 return -EKEYREJECTED;
1079 }
1080
1081 z = strv_free(z);
1082
1083 r = ask_password_auto("Please enter image passphrase!", NULL, "dissect", "dissect", USEC_INFINITY, 0, &z);
1084 if (r < 0)
1085 return log_error_errno(r, "Failed to query for passphrase: %m");
1086
1087 passphrase = z[0];
1088 }
1089 }
1090
1091 #if HAVE_LIBCRYPTSETUP
1092 static int deferred_remove(DecryptedPartition *p) {
1093
1094 struct dm_ioctl dm = {
1095 .version = {
1096 DM_VERSION_MAJOR,
1097 DM_VERSION_MINOR,
1098 DM_VERSION_PATCHLEVEL
1099 },
1100 .data_size = sizeof(dm),
1101 .flags = DM_DEFERRED_REMOVE,
1102 };
1103
1104 _cleanup_close_ int fd = -1;
1105
1106 assert(p);
1107
1108 /* Unfortunately, libcryptsetup doesn't provide a proper API for this, hence call the ioctl() directly. */
1109
1110 fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC);
1111 if (fd < 0)
1112 return -errno;
1113
1114 strncpy(dm.name, p->name, sizeof(dm.name));
1115
1116 if (ioctl(fd, DM_DEV_REMOVE, &dm))
1117 return -errno;
1118
1119 return 0;
1120 }
1121 #endif
1122
1123 int decrypted_image_relinquish(DecryptedImage *d) {
1124
1125 #if HAVE_LIBCRYPTSETUP
1126 size_t i;
1127 int r;
1128 #endif
1129
1130 assert(d);
1131
1132 /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a boolean so
1133 * that we don't clean it up ourselves either anymore */
1134
1135 #if HAVE_LIBCRYPTSETUP
1136 for (i = 0; i < d->n_decrypted; i++) {
1137 DecryptedPartition *p = d->decrypted + i;
1138
1139 if (p->relinquished)
1140 continue;
1141
1142 r = deferred_remove(p);
1143 if (r < 0)
1144 return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name);
1145
1146 p->relinquished = true;
1147 }
1148 #endif
1149
1150 return 0;
1151 }
1152
1153 int root_hash_load(const char *image, void **ret, size_t *ret_size) {
1154 _cleanup_free_ char *text = NULL;
1155 _cleanup_free_ void *k = NULL;
1156 size_t l;
1157 int r;
1158
1159 assert(image);
1160 assert(ret);
1161 assert(ret_size);
1162
1163 if (is_device_path(image)) {
1164 /* If we are asked to load the root hash for a device node, exit early */
1165 *ret = NULL;
1166 *ret_size = 0;
1167 return 0;
1168 }
1169
1170 r = getxattr_malloc(image, "user.verity.roothash", &text, true);
1171 if (r < 0) {
1172 char *fn, *e, *n;
1173
1174 if (!IN_SET(r, -ENODATA, -EOPNOTSUPP, -ENOENT))
1175 return r;
1176
1177 fn = newa(char, strlen(image) + strlen(".roothash") + 1);
1178 n = stpcpy(fn, image);
1179 e = endswith(fn, ".raw");
1180 if (e)
1181 n = e;
1182
1183 strcpy(n, ".roothash");
1184
1185 r = read_one_line_file(fn, &text);
1186 if (r == -ENOENT) {
1187 *ret = NULL;
1188 *ret_size = 0;
1189 return 0;
1190 }
1191 if (r < 0)
1192 return r;
1193 }
1194
1195 r = unhexmem(text, strlen(text), &k, &l);
1196 if (r < 0)
1197 return r;
1198 if (l < sizeof(sd_id128_t))
1199 return -EINVAL;
1200
1201 *ret = k;
1202 *ret_size = l;
1203
1204 k = NULL;
1205
1206 return 1;
1207 }
1208
1209 int dissected_image_acquire_metadata(DissectedImage *m) {
1210
1211 enum {
1212 META_HOSTNAME,
1213 META_MACHINE_ID,
1214 META_MACHINE_INFO,
1215 META_OS_RELEASE,
1216 _META_MAX,
1217 };
1218
1219 static const char *const paths[_META_MAX] = {
1220 [META_HOSTNAME] = "/etc/hostname\0",
1221 [META_MACHINE_ID] = "/etc/machine-id\0",
1222 [META_MACHINE_INFO] = "/etc/machine-info\0",
1223 [META_OS_RELEASE] = "/etc/os-release\0/usr/lib/os-release\0",
1224 };
1225
1226 _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
1227 _cleanup_(rmdir_and_freep) char *t = NULL;
1228 _cleanup_(sigkill_waitp) pid_t child = 0;
1229 sd_id128_t machine_id = SD_ID128_NULL;
1230 _cleanup_free_ char *hostname = NULL;
1231 unsigned n_meta_initialized = 0, k;
1232 int fds[2 * _META_MAX], r;
1233 siginfo_t si;
1234
1235 BLOCK_SIGNALS(SIGCHLD);
1236
1237 assert(m);
1238
1239 for (; n_meta_initialized < _META_MAX; n_meta_initialized ++)
1240 if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) {
1241 r = -errno;
1242 goto finish;
1243 }
1244
1245 r = mkdtemp_malloc("/tmp/dissect-XXXXXX", &t);
1246 if (r < 0)
1247 goto finish;
1248
1249 child = raw_clone(SIGCHLD|CLONE_NEWNS);
1250 if (child < 0) {
1251 r = -errno;
1252 goto finish;
1253 }
1254
1255 if (child == 0) {
1256
1257 (void) reset_all_signal_handlers();
1258 (void) reset_signal_mask();
1259 assert_se(prctl(PR_SET_PDEATHSIG, SIGTERM) == 0);
1260
1261 /* Make sure we never propagate to the host */
1262 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
1263 _exit(EXIT_FAILURE);
1264
1265 r = dissected_image_mount(m, t, DISSECT_IMAGE_READ_ONLY);
1266 if (r < 0)
1267 _exit(EXIT_FAILURE);
1268
1269 for (k = 0; k < _META_MAX; k++) {
1270 _cleanup_close_ int fd = -1;
1271 const char *p;
1272
1273 fds[2*k] = safe_close(fds[2*k]);
1274
1275 NULSTR_FOREACH(p, paths[k]) {
1276 _cleanup_free_ char *q = NULL;
1277
1278 r = chase_symlinks(p, t, CHASE_PREFIX_ROOT, &q);
1279 if (r < 0)
1280 continue;
1281
1282 fd = open(q, O_RDONLY|O_CLOEXEC|O_NOCTTY);
1283 if (fd >= 0)
1284 break;
1285 }
1286 if (fd < 0)
1287 continue;
1288
1289 r = copy_bytes(fd, fds[2*k+1], (uint64_t) -1, 0);
1290 if (r < 0)
1291 _exit(EXIT_FAILURE);
1292
1293 fds[2*k+1] = safe_close(fds[2*k+1]);
1294 }
1295
1296 _exit(EXIT_SUCCESS);
1297 }
1298
1299 for (k = 0; k < _META_MAX; k++) {
1300 _cleanup_fclose_ FILE *f = NULL;
1301
1302 fds[2*k+1] = safe_close(fds[2*k+1]);
1303
1304 f = fdopen(fds[2*k], "re");
1305 if (!f) {
1306 r = -errno;
1307 goto finish;
1308 }
1309
1310 fds[2*k] = -1;
1311
1312 switch (k) {
1313
1314 case META_HOSTNAME:
1315 r = read_etc_hostname_stream(f, &hostname);
1316 if (r < 0)
1317 log_debug_errno(r, "Failed to read /etc/hostname: %m");
1318
1319 break;
1320
1321 case META_MACHINE_ID: {
1322 _cleanup_free_ char *line = NULL;
1323
1324 r = read_line(f, LONG_LINE_MAX, &line);
1325 if (r < 0)
1326 log_debug_errno(r, "Failed to read /etc/machine-id: %m");
1327 else if (r == 33) {
1328 r = sd_id128_from_string(line, &machine_id);
1329 if (r < 0)
1330 log_debug_errno(r, "Image contains invalid /etc/machine-id: %s", line);
1331 } else if (r == 0)
1332 log_debug("/etc/machine-id file is empty.");
1333 else
1334 log_debug("/etc/machine-id has unexpected length %i.", r);
1335
1336 break;
1337 }
1338
1339 case META_MACHINE_INFO:
1340 r = load_env_file_pairs(f, "machine-info", NULL, &machine_info);
1341 if (r < 0)
1342 log_debug_errno(r, "Failed to read /etc/machine-info: %m");
1343
1344 break;
1345
1346 case META_OS_RELEASE:
1347 r = load_env_file_pairs(f, "os-release", NULL, &os_release);
1348 if (r < 0)
1349 log_debug_errno(r, "Failed to read OS release file: %m");
1350
1351 break;
1352 }
1353 }
1354
1355 r = wait_for_terminate(child, &si);
1356 if (r < 0)
1357 goto finish;
1358 child = 0;
1359
1360 if (si.si_code != CLD_EXITED || si.si_status != EXIT_SUCCESS) {
1361 r = -EPROTO;
1362 goto finish;
1363 }
1364
1365 free_and_replace(m->hostname, hostname);
1366 m->machine_id = machine_id;
1367 strv_free_and_replace(m->machine_info, machine_info);
1368 strv_free_and_replace(m->os_release, os_release);
1369
1370 finish:
1371 for (k = 0; k < n_meta_initialized; k++)
1372 safe_close_pair(fds + 2*k);
1373
1374 return r;
1375 }
1376
1377 static const char *const partition_designator_table[] = {
1378 [PARTITION_ROOT] = "root",
1379 [PARTITION_ROOT_SECONDARY] = "root-secondary",
1380 [PARTITION_HOME] = "home",
1381 [PARTITION_SRV] = "srv",
1382 [PARTITION_ESP] = "esp",
1383 [PARTITION_SWAP] = "swap",
1384 [PARTITION_ROOT_VERITY] = "root-verity",
1385 [PARTITION_ROOT_SECONDARY_VERITY] = "root-secondary-verity",
1386 };
1387
1388 DEFINE_STRING_TABLE_LOOKUP(partition_designator, int);