]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/dissect-image.c
tree-wide: use returned value from log_*_errno()
[thirdparty/systemd.git] / src / shared / dissect-image.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <sys/mount.h>
4 #include <sys/prctl.h>
5 #include <sys/wait.h>
6
7 #include "sd-id128.h"
8
9 #include "architecture.h"
10 #include "ask-password-api.h"
11 #include "blkid-util.h"
12 #include "blockdev-util.h"
13 #include "copy.h"
14 #include "crypt-util.h"
15 #include "def.h"
16 #include "device-nodes.h"
17 #include "dissect-image.h"
18 #include "fd-util.h"
19 #include "fileio.h"
20 #include "fs-util.h"
21 #include "gpt.h"
22 #include "hexdecoct.h"
23 #include "hostname-util.h"
24 #include "id128-util.h"
25 #include "linux-3.13/dm-ioctl.h"
26 #include "missing.h"
27 #include "mount-util.h"
28 #include "os-util.h"
29 #include "path-util.h"
30 #include "process-util.h"
31 #include "raw-clone.h"
32 #include "signal-util.h"
33 #include "stat-util.h"
34 #include "stdio-util.h"
35 #include "string-table.h"
36 #include "string-util.h"
37 #include "strv.h"
38 #include "udev-util.h"
39 #include "user-util.h"
40 #include "xattr-util.h"
41
42 int probe_filesystem(const char *node, char **ret_fstype) {
43 /* Try to find device content type and return it in *ret_fstype. If nothing is found,
44 * 0/NULL will be returned. -EUCLEAN will be returned for ambigous results, and an
45 * different error otherwise. */
46
47 #if HAVE_BLKID
48 _cleanup_(blkid_free_probep) blkid_probe b = NULL;
49 const char *fstype;
50 int r;
51
52 errno = 0;
53 b = blkid_new_probe_from_filename(node);
54 if (!b)
55 return -errno ?: -ENOMEM;
56
57 blkid_probe_enable_superblocks(b, 1);
58 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
59
60 errno = 0;
61 r = blkid_do_safeprobe(b);
62 if (r == 1) {
63 log_debug("No type detected on partition %s", node);
64 goto not_found;
65 }
66 if (r == -2) {
67 log_debug("Results ambiguous for partition %s", node);
68 return -EUCLEAN;
69 }
70 if (r != 0)
71 return -errno ?: -EIO;
72
73 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
74
75 if (fstype) {
76 char *t;
77
78 t = strdup(fstype);
79 if (!t)
80 return -ENOMEM;
81
82 *ret_fstype = t;
83 return 1;
84 }
85
86 not_found:
87 *ret_fstype = NULL;
88 return 0;
89 #else
90 return -EOPNOTSUPP;
91 #endif
92 }
93
94 #if HAVE_BLKID
95 /* Detect RPMB and Boot partitions, which are not listed by blkid.
96 * See https://github.com/systemd/systemd/issues/5806. */
97 static bool device_is_mmc_special_partition(struct udev_device *d) {
98 const char *sysname;
99
100 sysname = udev_device_get_sysname(d);
101 return sysname && startswith(sysname, "mmcblk") &&
102 (endswith(sysname, "rpmb") || endswith(sysname, "boot0") || endswith(sysname, "boot1"));
103 }
104
105 static bool device_is_block(struct udev_device *d) {
106 const char *ss;
107
108 ss = udev_device_get_subsystem(d);
109 if (!ss)
110 return false;
111
112 return streq(ss, "block");
113 }
114 #endif
115
116 int dissect_image(
117 int fd,
118 const void *root_hash,
119 size_t root_hash_size,
120 DissectImageFlags flags,
121 DissectedImage **ret) {
122
123 #if HAVE_BLKID
124 sd_id128_t root_uuid = SD_ID128_NULL, verity_uuid = SD_ID128_NULL;
125 _cleanup_(udev_enumerate_unrefp) struct udev_enumerate *e = NULL;
126 bool is_gpt, is_mbr, generic_rw, multiple_generic = false;
127 _cleanup_(udev_device_unrefp) struct udev_device *d = NULL;
128 _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
129 _cleanup_(blkid_free_probep) blkid_probe b = NULL;
130 _cleanup_(udev_unrefp) struct udev *udev = NULL;
131 _cleanup_free_ char *generic_node = NULL;
132 sd_id128_t generic_uuid = SD_ID128_NULL;
133 const char *pttype = NULL;
134 struct udev_list_entry *first, *item;
135 blkid_partlist pl;
136 int r, generic_nr;
137 struct stat st;
138 unsigned i;
139
140 assert(fd >= 0);
141 assert(ret);
142 assert(root_hash || root_hash_size == 0);
143
144 /* Probes a disk image, and returns information about what it found in *ret.
145 *
146 * Returns -ENOPKG if no suitable partition table or file system could be found.
147 * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found. */
148
149 if (root_hash) {
150 /* If a root hash is supplied, then we use the root partition that has a UUID that match the first
151 * 128bit of the root hash. And we use the verity partition that has a UUID that match the final
152 * 128bit. */
153
154 if (root_hash_size < sizeof(sd_id128_t))
155 return -EINVAL;
156
157 memcpy(&root_uuid, root_hash, sizeof(sd_id128_t));
158 memcpy(&verity_uuid, (const uint8_t*) root_hash + root_hash_size - sizeof(sd_id128_t), sizeof(sd_id128_t));
159
160 if (sd_id128_is_null(root_uuid))
161 return -EINVAL;
162 if (sd_id128_is_null(verity_uuid))
163 return -EINVAL;
164 }
165
166 if (fstat(fd, &st) < 0)
167 return -errno;
168
169 if (!S_ISBLK(st.st_mode))
170 return -ENOTBLK;
171
172 b = blkid_new_probe();
173 if (!b)
174 return -ENOMEM;
175
176 errno = 0;
177 r = blkid_probe_set_device(b, fd, 0, 0);
178 if (r != 0)
179 return -errno ?: -ENOMEM;
180
181 if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) {
182 /* Look for file system superblocks, unless we only shall look for GPT partition tables */
183 blkid_probe_enable_superblocks(b, 1);
184 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE);
185 }
186
187 blkid_probe_enable_partitions(b, 1);
188 blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
189
190 errno = 0;
191 r = blkid_do_safeprobe(b);
192 if (IN_SET(r, -2, 1)) {
193 log_debug("Failed to identify any partition table.");
194 return -ENOPKG;
195 }
196 if (r != 0)
197 return -errno ?: -EIO;
198
199 m = new0(DissectedImage, 1);
200 if (!m)
201 return -ENOMEM;
202
203 if (!(flags & DISSECT_IMAGE_GPT_ONLY) &&
204 (flags & DISSECT_IMAGE_REQUIRE_ROOT)) {
205 const char *usage = NULL;
206
207 (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL);
208 if (STRPTR_IN_SET(usage, "filesystem", "crypto")) {
209 _cleanup_free_ char *t = NULL, *n = NULL;
210 const char *fstype = NULL;
211
212 /* OK, we have found a file system, that's our root partition then. */
213 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
214
215 if (fstype) {
216 t = strdup(fstype);
217 if (!t)
218 return -ENOMEM;
219 }
220
221 if (asprintf(&n, "/dev/block/%u:%u", major(st.st_rdev), minor(st.st_rdev)) < 0)
222 return -ENOMEM;
223
224 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
225 .found = true,
226 .rw = true,
227 .partno = -1,
228 .architecture = _ARCHITECTURE_INVALID,
229 .fstype = TAKE_PTR(t),
230 .node = TAKE_PTR(n),
231 };
232
233 m->encrypted = streq(fstype, "crypto_LUKS");
234
235 *ret = TAKE_PTR(m);
236
237 return 0;
238 }
239 }
240
241 (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
242 if (!pttype)
243 return -ENOPKG;
244
245 is_gpt = streq_ptr(pttype, "gpt");
246 is_mbr = streq_ptr(pttype, "dos");
247
248 if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr))
249 return -ENOPKG;
250
251 errno = 0;
252 pl = blkid_probe_get_partitions(b);
253 if (!pl)
254 return -errno ?: -ENOMEM;
255
256 udev = udev_new();
257 if (!udev)
258 return -errno;
259
260 d = udev_device_new_from_devnum(udev, 'b', st.st_rdev);
261 if (!d)
262 return -ENOMEM;
263
264 for (i = 0;; i++) {
265 int n, z;
266
267 if (i >= 10) {
268 log_debug("Kernel partitions never appeared.");
269 return -ENXIO;
270 }
271
272 e = udev_enumerate_new(udev);
273 if (!e)
274 return -errno;
275
276 r = udev_enumerate_add_match_parent(e, d);
277 if (r < 0)
278 return r;
279
280 r = udev_enumerate_scan_devices(e);
281 if (r < 0)
282 return r;
283
284 /* Count the partitions enumerated by the kernel */
285 n = 0;
286 first = udev_enumerate_get_list_entry(e);
287 udev_list_entry_foreach(item, first) {
288 _cleanup_(udev_device_unrefp) struct udev_device *q;
289 dev_t qn;
290
291 q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
292 if (!q)
293 return -errno;
294
295 qn = udev_device_get_devnum(q);
296 if (major(qn) == 0)
297 continue;
298
299 if (!device_is_block(q))
300 continue;
301
302 if (device_is_mmc_special_partition(q))
303 continue;
304 n++;
305 }
306
307 /* Count the partitions enumerated by blkid */
308 z = blkid_partlist_numof_partitions(pl);
309 if (n == z + 1)
310 break;
311 if (n > z + 1) {
312 log_debug("blkid and kernel partition list do not match.");
313 return -EIO;
314 }
315 if (n < z + 1) {
316 unsigned j = 0;
317
318 /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running
319 * or it got EBUSY because udev already opened the device. Let's reprobe the device, which is a
320 * synchronous call that waits until probing is complete. */
321
322 for (;;) {
323 if (j++ > 20)
324 return -EBUSY;
325
326 if (ioctl(fd, BLKRRPART, 0) < 0) {
327 r = -errno;
328
329 if (r == -EINVAL) {
330 struct loop_info64 info;
331
332 /* If we are running on a loop device that has partition scanning off,
333 * return an explicit recognizable error about this, so that callers
334 * can generate a proper message explaining the situation. */
335
336 if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0 && (info.lo_flags & LO_FLAGS_PARTSCAN) == 0) {
337 log_debug("Device is loop device and partition scanning is off!");
338 return -EPROTONOSUPPORT;
339 }
340 }
341 if (r != -EBUSY)
342 return r;
343 } else
344 break;
345
346 /* If something else has the device open, such as an udev rule, the ioctl will return
347 * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a
348 * bit, and try again.
349 *
350 * This is really something they should fix in the kernel! */
351
352 (void) usleep(50 * USEC_PER_MSEC);
353 }
354 }
355
356 e = udev_enumerate_unref(e);
357 }
358
359 first = udev_enumerate_get_list_entry(e);
360 udev_list_entry_foreach(item, first) {
361 _cleanup_(udev_device_unrefp) struct udev_device *q;
362 unsigned long long pflags;
363 blkid_partition pp;
364 const char *node;
365 dev_t qn;
366 int nr;
367
368 q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
369 if (!q)
370 return -errno;
371
372 qn = udev_device_get_devnum(q);
373 if (major(qn) == 0)
374 continue;
375
376 if (st.st_rdev == qn)
377 continue;
378
379 if (!device_is_block(q))
380 continue;
381
382 if (device_is_mmc_special_partition(q))
383 continue;
384
385 node = udev_device_get_devnode(q);
386 if (!node)
387 continue;
388
389 pp = blkid_partlist_devno_to_partition(pl, qn);
390 if (!pp)
391 continue;
392
393 pflags = blkid_partition_get_flags(pp);
394
395 nr = blkid_partition_get_partno(pp);
396 if (nr < 0)
397 continue;
398
399 if (is_gpt) {
400 int designator = _PARTITION_DESIGNATOR_INVALID, architecture = _ARCHITECTURE_INVALID;
401 const char *stype, *sid, *fstype = NULL;
402 sd_id128_t type_id, id;
403 bool rw = true;
404
405 sid = blkid_partition_get_uuid(pp);
406 if (!sid)
407 continue;
408 if (sd_id128_from_string(sid, &id) < 0)
409 continue;
410
411 stype = blkid_partition_get_type_string(pp);
412 if (!stype)
413 continue;
414 if (sd_id128_from_string(stype, &type_id) < 0)
415 continue;
416
417 if (sd_id128_equal(type_id, GPT_HOME)) {
418
419 if (pflags & GPT_FLAG_NO_AUTO)
420 continue;
421
422 designator = PARTITION_HOME;
423 rw = !(pflags & GPT_FLAG_READ_ONLY);
424 } else if (sd_id128_equal(type_id, GPT_SRV)) {
425
426 if (pflags & GPT_FLAG_NO_AUTO)
427 continue;
428
429 designator = PARTITION_SRV;
430 rw = !(pflags & GPT_FLAG_READ_ONLY);
431 } else if (sd_id128_equal(type_id, GPT_ESP)) {
432
433 /* Note that we don't check the GPT_FLAG_NO_AUTO flag for the ESP, as it is not defined
434 * there. We instead check the GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as recommended by the
435 * UEFI spec (See "12.3.3 Number and Location of System Partitions"). */
436
437 if (pflags & GPT_FLAG_NO_BLOCK_IO_PROTOCOL)
438 continue;
439
440 designator = PARTITION_ESP;
441 fstype = "vfat";
442 }
443 #ifdef GPT_ROOT_NATIVE
444 else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
445
446 if (pflags & GPT_FLAG_NO_AUTO)
447 continue;
448
449 /* If a root ID is specified, ignore everything but the root id */
450 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
451 continue;
452
453 designator = PARTITION_ROOT;
454 architecture = native_architecture();
455 rw = !(pflags & GPT_FLAG_READ_ONLY);
456 } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) {
457
458 if (pflags & GPT_FLAG_NO_AUTO)
459 continue;
460
461 m->can_verity = true;
462
463 /* Ignore verity unless a root hash is specified */
464 if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
465 continue;
466
467 designator = PARTITION_ROOT_VERITY;
468 fstype = "DM_verity_hash";
469 architecture = native_architecture();
470 rw = false;
471 }
472 #endif
473 #ifdef GPT_ROOT_SECONDARY
474 else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
475
476 if (pflags & GPT_FLAG_NO_AUTO)
477 continue;
478
479 /* If a root ID is specified, ignore everything but the root id */
480 if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
481 continue;
482
483 designator = PARTITION_ROOT_SECONDARY;
484 architecture = SECONDARY_ARCHITECTURE;
485 rw = !(pflags & GPT_FLAG_READ_ONLY);
486 } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) {
487
488 if (pflags & GPT_FLAG_NO_AUTO)
489 continue;
490
491 m->can_verity = true;
492
493 /* Ignore verity unless root has is specified */
494 if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
495 continue;
496
497 designator = PARTITION_ROOT_SECONDARY_VERITY;
498 fstype = "DM_verity_hash";
499 architecture = SECONDARY_ARCHITECTURE;
500 rw = false;
501 }
502 #endif
503 else if (sd_id128_equal(type_id, GPT_SWAP)) {
504
505 if (pflags & GPT_FLAG_NO_AUTO)
506 continue;
507
508 designator = PARTITION_SWAP;
509 fstype = "swap";
510 } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) {
511
512 if (pflags & GPT_FLAG_NO_AUTO)
513 continue;
514
515 if (generic_node)
516 multiple_generic = true;
517 else {
518 generic_nr = nr;
519 generic_rw = !(pflags & GPT_FLAG_READ_ONLY);
520 generic_uuid = id;
521 generic_node = strdup(node);
522 if (!generic_node)
523 return -ENOMEM;
524 }
525 }
526
527 if (designator != _PARTITION_DESIGNATOR_INVALID) {
528 _cleanup_free_ char *t = NULL, *n = NULL;
529
530 /* First one wins */
531 if (m->partitions[designator].found)
532 continue;
533
534 if (fstype) {
535 t = strdup(fstype);
536 if (!t)
537 return -ENOMEM;
538 }
539
540 n = strdup(node);
541 if (!n)
542 return -ENOMEM;
543
544 m->partitions[designator] = (DissectedPartition) {
545 .found = true,
546 .partno = nr,
547 .rw = rw,
548 .architecture = architecture,
549 .node = TAKE_PTR(n),
550 .fstype = TAKE_PTR(t),
551 .uuid = id,
552 };
553 }
554
555 } else if (is_mbr) {
556
557 if (pflags != 0x80) /* Bootable flag */
558 continue;
559
560 if (blkid_partition_get_type(pp) != 0x83) /* Linux partition */
561 continue;
562
563 if (generic_node)
564 multiple_generic = true;
565 else {
566 generic_nr = nr;
567 generic_rw = true;
568 generic_node = strdup(node);
569 if (!generic_node)
570 return -ENOMEM;
571 }
572 }
573 }
574
575 if (!m->partitions[PARTITION_ROOT].found) {
576 /* No root partition found? Then let's see if ther's one for the secondary architecture. And if not
577 * either, then check if there's a single generic one, and use that. */
578
579 if (m->partitions[PARTITION_ROOT_VERITY].found)
580 return -EADDRNOTAVAIL;
581
582 if (m->partitions[PARTITION_ROOT_SECONDARY].found) {
583 m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY];
584 zero(m->partitions[PARTITION_ROOT_SECONDARY]);
585
586 m->partitions[PARTITION_ROOT_VERITY] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY];
587 zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY]);
588
589 } else if (flags & DISSECT_IMAGE_REQUIRE_ROOT) {
590
591 /* If the root has was set, then we won't fallback to a generic node, because the root hash
592 * decides */
593 if (root_hash)
594 return -EADDRNOTAVAIL;
595
596 /* If we didn't find a generic node, then we can't fix this up either */
597 if (!generic_node)
598 return -ENXIO;
599
600 /* If we didn't find a properly marked root partition, but we did find a single suitable
601 * generic Linux partition, then use this as root partition, if the caller asked for it. */
602 if (multiple_generic)
603 return -ENOTUNIQ;
604
605 m->partitions[PARTITION_ROOT] = (DissectedPartition) {
606 .found = true,
607 .rw = generic_rw,
608 .partno = generic_nr,
609 .architecture = _ARCHITECTURE_INVALID,
610 .node = TAKE_PTR(generic_node),
611 .uuid = generic_uuid,
612 };
613 }
614 }
615
616 if (root_hash) {
617 if (!m->partitions[PARTITION_ROOT_VERITY].found || !m->partitions[PARTITION_ROOT].found)
618 return -EADDRNOTAVAIL;
619
620 /* If we found the primary root with the hash, then we definitely want to suppress any secondary root
621 * (which would be weird, after all the root hash should only be assigned to one pair of
622 * partitions... */
623 m->partitions[PARTITION_ROOT_SECONDARY].found = false;
624 m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found = false;
625
626 /* If we found a verity setup, then the root partition is necessarily read-only. */
627 m->partitions[PARTITION_ROOT].rw = false;
628
629 m->verity = true;
630 }
631
632 blkid_free_probe(b);
633 b = NULL;
634
635 /* Fill in file system types if we don't know them yet. */
636 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
637 DissectedPartition *p = m->partitions + i;
638
639 if (!p->found)
640 continue;
641
642 if (!p->fstype && p->node) {
643 r = probe_filesystem(p->node, &p->fstype);
644 if (r < 0 && r != -EUCLEAN)
645 return r;
646 }
647
648 if (streq_ptr(p->fstype, "crypto_LUKS"))
649 m->encrypted = true;
650
651 if (p->fstype && fstype_is_ro(p->fstype))
652 p->rw = false;
653 }
654
655 *ret = TAKE_PTR(m);
656
657 return 0;
658 #else
659 return -EOPNOTSUPP;
660 #endif
661 }
662
663 DissectedImage* dissected_image_unref(DissectedImage *m) {
664 unsigned i;
665
666 if (!m)
667 return NULL;
668
669 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
670 free(m->partitions[i].fstype);
671 free(m->partitions[i].node);
672 free(m->partitions[i].decrypted_fstype);
673 free(m->partitions[i].decrypted_node);
674 }
675
676 free(m->hostname);
677 strv_free(m->machine_info);
678 strv_free(m->os_release);
679
680 return mfree(m);
681 }
682
683 static int is_loop_device(const char *path) {
684 char s[SYS_BLOCK_PATH_MAX("/../loop/")];
685 struct stat st;
686
687 assert(path);
688
689 if (stat(path, &st) < 0)
690 return -errno;
691
692 if (!S_ISBLK(st.st_mode))
693 return -ENOTBLK;
694
695 xsprintf_sys_block_path(s, "/loop/", st.st_dev);
696 if (access(s, F_OK) < 0) {
697 if (errno != ENOENT)
698 return -errno;
699
700 /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */
701 xsprintf_sys_block_path(s, "/../loop/", st.st_dev);
702 if (access(s, F_OK) < 0)
703 return errno == ENOENT ? false : -errno;
704 }
705
706 return true;
707 }
708
709 static int mount_partition(
710 DissectedPartition *m,
711 const char *where,
712 const char *directory,
713 uid_t uid_shift,
714 DissectImageFlags flags) {
715
716 _cleanup_free_ char *chased = NULL, *options = NULL;
717 const char *p, *node, *fstype;
718 bool rw;
719 int r;
720
721 assert(m);
722 assert(where);
723
724 node = m->decrypted_node ?: m->node;
725 fstype = m->decrypted_fstype ?: m->fstype;
726
727 if (!m->found || !node || !fstype)
728 return 0;
729
730 /* Stacked encryption? Yuck */
731 if (streq_ptr(fstype, "crypto_LUKS"))
732 return -ELOOP;
733
734 rw = m->rw && !(flags & DISSECT_IMAGE_READ_ONLY);
735
736 if (directory) {
737 r = chase_symlinks(directory, where, CHASE_PREFIX_ROOT, &chased);
738 if (r < 0)
739 return r;
740
741 p = chased;
742 } else
743 p = where;
744
745 /* If requested, turn on discard support. */
746 if (fstype_can_discard(fstype) &&
747 ((flags & DISSECT_IMAGE_DISCARD) ||
748 ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node)))) {
749 options = strdup("discard");
750 if (!options)
751 return -ENOMEM;
752 }
753
754 if (uid_is_valid(uid_shift) && uid_shift != 0 && fstype_can_uid_gid(fstype)) {
755 _cleanup_free_ char *uid_option = NULL;
756
757 if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
758 return -ENOMEM;
759
760 if (!strextend_with_separator(&options, ",", uid_option, NULL))
761 return -ENOMEM;
762 }
763
764 return mount_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
765 }
766
767 int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) {
768 int r;
769
770 assert(m);
771 assert(where);
772
773 if (!m->partitions[PARTITION_ROOT].found)
774 return -ENXIO;
775
776 if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) {
777 r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, flags);
778 if (r < 0)
779 return r;
780
781 if (flags & DISSECT_IMAGE_VALIDATE_OS) {
782 r = path_is_os_tree(where);
783 if (r < 0)
784 return r;
785 if (r == 0)
786 return -EMEDIUMTYPE;
787 }
788 }
789
790 if ((flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY))
791 return 0;
792
793 r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, flags);
794 if (r < 0)
795 return r;
796
797 r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, flags);
798 if (r < 0)
799 return r;
800
801 if (m->partitions[PARTITION_ESP].found) {
802 const char *mp;
803
804 /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */
805
806 FOREACH_STRING(mp, "/efi", "/boot") {
807 _cleanup_free_ char *p = NULL;
808
809 r = chase_symlinks(mp, where, CHASE_PREFIX_ROOT, &p);
810 if (r < 0)
811 continue;
812
813 r = dir_is_empty(p);
814 if (r > 0) {
815 r = mount_partition(m->partitions + PARTITION_ESP, where, mp, uid_shift, flags);
816 if (r < 0)
817 return r;
818 }
819 }
820 }
821
822 return 0;
823 }
824
825 #if HAVE_LIBCRYPTSETUP
826 typedef struct DecryptedPartition {
827 struct crypt_device *device;
828 char *name;
829 bool relinquished;
830 } DecryptedPartition;
831
832 struct DecryptedImage {
833 DecryptedPartition *decrypted;
834 size_t n_decrypted;
835 size_t n_allocated;
836 };
837 #endif
838
839 DecryptedImage* decrypted_image_unref(DecryptedImage* d) {
840 #if HAVE_LIBCRYPTSETUP
841 size_t i;
842 int r;
843
844 if (!d)
845 return NULL;
846
847 for (i = 0; i < d->n_decrypted; i++) {
848 DecryptedPartition *p = d->decrypted + i;
849
850 if (p->device && p->name && !p->relinquished) {
851 r = crypt_deactivate(p->device, p->name);
852 if (r < 0)
853 log_debug_errno(r, "Failed to deactivate encrypted partition %s", p->name);
854 }
855
856 if (p->device)
857 crypt_free(p->device);
858 free(p->name);
859 }
860
861 free(d);
862 #endif
863 return NULL;
864 }
865
866 #if HAVE_LIBCRYPTSETUP
867
868 static int make_dm_name_and_node(const void *original_node, const char *suffix, char **ret_name, char **ret_node) {
869 _cleanup_free_ char *name = NULL, *node = NULL;
870 const char *base;
871
872 assert(original_node);
873 assert(suffix);
874 assert(ret_name);
875 assert(ret_node);
876
877 base = strrchr(original_node, '/');
878 if (!base)
879 return -EINVAL;
880 base++;
881 if (isempty(base))
882 return -EINVAL;
883
884 name = strjoin(base, suffix);
885 if (!name)
886 return -ENOMEM;
887 if (!filename_is_valid(name))
888 return -EINVAL;
889
890 node = strjoin(crypt_get_dir(), "/", name);
891 if (!node)
892 return -ENOMEM;
893
894 *ret_name = TAKE_PTR(name);
895 *ret_node = TAKE_PTR(node);
896
897 return 0;
898 }
899
900 static int decrypt_partition(
901 DissectedPartition *m,
902 const char *passphrase,
903 DissectImageFlags flags,
904 DecryptedImage *d) {
905
906 _cleanup_free_ char *node = NULL, *name = NULL;
907 _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
908 int r;
909
910 assert(m);
911 assert(d);
912
913 if (!m->found || !m->node || !m->fstype)
914 return 0;
915
916 if (!streq(m->fstype, "crypto_LUKS"))
917 return 0;
918
919 if (!passphrase)
920 return -ENOKEY;
921
922 r = make_dm_name_and_node(m->node, "-decrypted", &name, &node);
923 if (r < 0)
924 return r;
925
926 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
927 return -ENOMEM;
928
929 r = crypt_init(&cd, m->node);
930 if (r < 0)
931 return log_debug_errno(r, "Failed to initialize dm-crypt: %m");
932
933 r = crypt_load(cd, CRYPT_LUKS, NULL);
934 if (r < 0)
935 return log_debug_errno(r, "Failed to load LUKS metadata: %m");
936
937 r = crypt_activate_by_passphrase(cd, name, CRYPT_ANY_SLOT, passphrase, strlen(passphrase),
938 ((flags & DISSECT_IMAGE_READ_ONLY) ? CRYPT_ACTIVATE_READONLY : 0) |
939 ((flags & DISSECT_IMAGE_DISCARD_ON_CRYPTO) ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0));
940 if (r < 0) {
941 log_debug_errno(r, "Failed to activate LUKS device: %m");
942 return r == -EPERM ? -EKEYREJECTED : r;
943 }
944
945 d->decrypted[d->n_decrypted].name = TAKE_PTR(name);
946 d->decrypted[d->n_decrypted].device = TAKE_PTR(cd);
947 d->n_decrypted++;
948
949 m->decrypted_node = TAKE_PTR(node);
950
951 return 0;
952 }
953
954 static int verity_partition(
955 DissectedPartition *m,
956 DissectedPartition *v,
957 const void *root_hash,
958 size_t root_hash_size,
959 DissectImageFlags flags,
960 DecryptedImage *d) {
961
962 _cleanup_free_ char *node = NULL, *name = NULL;
963 _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
964 int r;
965
966 assert(m);
967 assert(v);
968
969 if (!root_hash)
970 return 0;
971
972 if (!m->found || !m->node || !m->fstype)
973 return 0;
974 if (!v->found || !v->node || !v->fstype)
975 return 0;
976
977 if (!streq(v->fstype, "DM_verity_hash"))
978 return 0;
979
980 r = make_dm_name_and_node(m->node, "-verity", &name, &node);
981 if (r < 0)
982 return r;
983
984 if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
985 return -ENOMEM;
986
987 r = crypt_init(&cd, v->node);
988 if (r < 0)
989 return r;
990
991 r = crypt_load(cd, CRYPT_VERITY, NULL);
992 if (r < 0)
993 return r;
994
995 r = crypt_set_data_device(cd, m->node);
996 if (r < 0)
997 return r;
998
999 r = crypt_activate_by_volume_key(cd, name, root_hash, root_hash_size, CRYPT_ACTIVATE_READONLY);
1000 if (r < 0)
1001 return r;
1002
1003 d->decrypted[d->n_decrypted].name = TAKE_PTR(name);
1004 d->decrypted[d->n_decrypted].device = TAKE_PTR(cd);
1005 d->n_decrypted++;
1006
1007 m->decrypted_node = TAKE_PTR(node);
1008
1009 return 0;
1010 }
1011 #endif
1012
1013 int dissected_image_decrypt(
1014 DissectedImage *m,
1015 const char *passphrase,
1016 const void *root_hash,
1017 size_t root_hash_size,
1018 DissectImageFlags flags,
1019 DecryptedImage **ret) {
1020
1021 #if HAVE_LIBCRYPTSETUP
1022 _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL;
1023 unsigned i;
1024 int r;
1025 #endif
1026
1027 assert(m);
1028 assert(root_hash || root_hash_size == 0);
1029
1030 /* Returns:
1031 *
1032 * = 0 → There was nothing to decrypt
1033 * > 0 → Decrypted successfully
1034 * -ENOKEY → There's something to decrypt but no key was supplied
1035 * -EKEYREJECTED → Passed key was not correct
1036 */
1037
1038 if (root_hash && root_hash_size < sizeof(sd_id128_t))
1039 return -EINVAL;
1040
1041 if (!m->encrypted && !m->verity) {
1042 *ret = NULL;
1043 return 0;
1044 }
1045
1046 #if HAVE_LIBCRYPTSETUP
1047 d = new0(DecryptedImage, 1);
1048 if (!d)
1049 return -ENOMEM;
1050
1051 for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
1052 DissectedPartition *p = m->partitions + i;
1053 int k;
1054
1055 if (!p->found)
1056 continue;
1057
1058 r = decrypt_partition(p, passphrase, flags, d);
1059 if (r < 0)
1060 return r;
1061
1062 k = PARTITION_VERITY_OF(i);
1063 if (k >= 0) {
1064 r = verity_partition(p, m->partitions + k, root_hash, root_hash_size, flags, d);
1065 if (r < 0)
1066 return r;
1067 }
1068
1069 if (!p->decrypted_fstype && p->decrypted_node) {
1070 r = probe_filesystem(p->decrypted_node, &p->decrypted_fstype);
1071 if (r < 0 && r != -EUCLEAN)
1072 return r;
1073 }
1074 }
1075
1076 *ret = TAKE_PTR(d);
1077
1078 return 1;
1079 #else
1080 return -EOPNOTSUPP;
1081 #endif
1082 }
1083
1084 int dissected_image_decrypt_interactively(
1085 DissectedImage *m,
1086 const char *passphrase,
1087 const void *root_hash,
1088 size_t root_hash_size,
1089 DissectImageFlags flags,
1090 DecryptedImage **ret) {
1091
1092 _cleanup_strv_free_erase_ char **z = NULL;
1093 int n = 3, r;
1094
1095 if (passphrase)
1096 n--;
1097
1098 for (;;) {
1099 r = dissected_image_decrypt(m, passphrase, root_hash, root_hash_size, flags, ret);
1100 if (r >= 0)
1101 return r;
1102 if (r == -EKEYREJECTED)
1103 log_error_errno(r, "Incorrect passphrase, try again!");
1104 else if (r != -ENOKEY)
1105 return log_error_errno(r, "Failed to decrypt image: %m");
1106
1107 if (--n < 0) {
1108 log_error("Too many retries.");
1109 return -EKEYREJECTED;
1110 }
1111
1112 z = strv_free(z);
1113
1114 r = ask_password_auto("Please enter image passphrase!", NULL, "dissect", "dissect", USEC_INFINITY, 0, &z);
1115 if (r < 0)
1116 return log_error_errno(r, "Failed to query for passphrase: %m");
1117
1118 passphrase = z[0];
1119 }
1120 }
1121
1122 #if HAVE_LIBCRYPTSETUP
1123 static int deferred_remove(DecryptedPartition *p) {
1124
1125 struct dm_ioctl dm = {
1126 .version = {
1127 DM_VERSION_MAJOR,
1128 DM_VERSION_MINOR,
1129 DM_VERSION_PATCHLEVEL
1130 },
1131 .data_size = sizeof(dm),
1132 .flags = DM_DEFERRED_REMOVE,
1133 };
1134
1135 _cleanup_close_ int fd = -1;
1136
1137 assert(p);
1138
1139 /* Unfortunately, libcryptsetup doesn't provide a proper API for this, hence call the ioctl() directly. */
1140
1141 fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC);
1142 if (fd < 0)
1143 return -errno;
1144
1145 strncpy(dm.name, p->name, sizeof(dm.name));
1146
1147 if (ioctl(fd, DM_DEV_REMOVE, &dm))
1148 return -errno;
1149
1150 return 0;
1151 }
1152 #endif
1153
1154 int decrypted_image_relinquish(DecryptedImage *d) {
1155
1156 #if HAVE_LIBCRYPTSETUP
1157 size_t i;
1158 int r;
1159 #endif
1160
1161 assert(d);
1162
1163 /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a boolean so
1164 * that we don't clean it up ourselves either anymore */
1165
1166 #if HAVE_LIBCRYPTSETUP
1167 for (i = 0; i < d->n_decrypted; i++) {
1168 DecryptedPartition *p = d->decrypted + i;
1169
1170 if (p->relinquished)
1171 continue;
1172
1173 r = deferred_remove(p);
1174 if (r < 0)
1175 return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name);
1176
1177 p->relinquished = true;
1178 }
1179 #endif
1180
1181 return 0;
1182 }
1183
1184 int root_hash_load(const char *image, void **ret, size_t *ret_size) {
1185 _cleanup_free_ char *text = NULL;
1186 _cleanup_free_ void *k = NULL;
1187 size_t l;
1188 int r;
1189
1190 assert(image);
1191 assert(ret);
1192 assert(ret_size);
1193
1194 if (is_device_path(image)) {
1195 /* If we are asked to load the root hash for a device node, exit early */
1196 *ret = NULL;
1197 *ret_size = 0;
1198 return 0;
1199 }
1200
1201 r = getxattr_malloc(image, "user.verity.roothash", &text, true);
1202 if (r < 0) {
1203 char *fn, *e, *n;
1204
1205 if (!IN_SET(r, -ENODATA, -EOPNOTSUPP, -ENOENT))
1206 return r;
1207
1208 fn = newa(char, strlen(image) + STRLEN(".roothash") + 1);
1209 n = stpcpy(fn, image);
1210 e = endswith(fn, ".raw");
1211 if (e)
1212 n = e;
1213
1214 strcpy(n, ".roothash");
1215
1216 r = read_one_line_file(fn, &text);
1217 if (r == -ENOENT) {
1218 *ret = NULL;
1219 *ret_size = 0;
1220 return 0;
1221 }
1222 if (r < 0)
1223 return r;
1224 }
1225
1226 r = unhexmem(text, strlen(text), &k, &l);
1227 if (r < 0)
1228 return r;
1229 if (l < sizeof(sd_id128_t))
1230 return -EINVAL;
1231
1232 *ret = TAKE_PTR(k);
1233 *ret_size = l;
1234
1235 return 1;
1236 }
1237
1238 int dissected_image_acquire_metadata(DissectedImage *m) {
1239
1240 enum {
1241 META_HOSTNAME,
1242 META_MACHINE_ID,
1243 META_MACHINE_INFO,
1244 META_OS_RELEASE,
1245 _META_MAX,
1246 };
1247
1248 static const char *const paths[_META_MAX] = {
1249 [META_HOSTNAME] = "/etc/hostname\0",
1250 [META_MACHINE_ID] = "/etc/machine-id\0",
1251 [META_MACHINE_INFO] = "/etc/machine-info\0",
1252 [META_OS_RELEASE] = "/etc/os-release\0/usr/lib/os-release\0",
1253 };
1254
1255 _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
1256 _cleanup_(rmdir_and_freep) char *t = NULL;
1257 _cleanup_(sigkill_waitp) pid_t child = 0;
1258 sd_id128_t machine_id = SD_ID128_NULL;
1259 _cleanup_free_ char *hostname = NULL;
1260 unsigned n_meta_initialized = 0, k;
1261 int fds[2 * _META_MAX], r;
1262
1263 BLOCK_SIGNALS(SIGCHLD);
1264
1265 assert(m);
1266
1267 for (; n_meta_initialized < _META_MAX; n_meta_initialized ++)
1268 if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) {
1269 r = -errno;
1270 goto finish;
1271 }
1272
1273 r = mkdtemp_malloc("/tmp/dissect-XXXXXX", &t);
1274 if (r < 0)
1275 goto finish;
1276
1277 r = safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE, &child);
1278 if (r < 0)
1279 goto finish;
1280 if (r == 0) {
1281 r = dissected_image_mount(m, t, UID_INVALID, DISSECT_IMAGE_READ_ONLY|DISSECT_IMAGE_MOUNT_ROOT_ONLY|DISSECT_IMAGE_VALIDATE_OS);
1282 if (r < 0) {
1283 log_debug_errno(r, "Failed to mount dissected image: %m");
1284 _exit(EXIT_FAILURE);
1285 }
1286
1287 for (k = 0; k < _META_MAX; k++) {
1288 _cleanup_close_ int fd = -1;
1289 const char *p;
1290
1291 fds[2*k] = safe_close(fds[2*k]);
1292
1293 NULSTR_FOREACH(p, paths[k]) {
1294 fd = chase_symlinks_and_open(p, t, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL);
1295 if (fd >= 0)
1296 break;
1297 }
1298 if (fd < 0) {
1299 log_debug_errno(fd, "Failed to read %s file of image, ignoring: %m", paths[k]);
1300 continue;
1301 }
1302
1303 r = copy_bytes(fd, fds[2*k+1], (uint64_t) -1, 0);
1304 if (r < 0)
1305 _exit(EXIT_FAILURE);
1306
1307 fds[2*k+1] = safe_close(fds[2*k+1]);
1308 }
1309
1310 _exit(EXIT_SUCCESS);
1311 }
1312
1313 for (k = 0; k < _META_MAX; k++) {
1314 _cleanup_fclose_ FILE *f = NULL;
1315
1316 fds[2*k+1] = safe_close(fds[2*k+1]);
1317
1318 f = fdopen(fds[2*k], "re");
1319 if (!f) {
1320 r = -errno;
1321 goto finish;
1322 }
1323
1324 fds[2*k] = -1;
1325
1326 switch (k) {
1327
1328 case META_HOSTNAME:
1329 r = read_etc_hostname_stream(f, &hostname);
1330 if (r < 0)
1331 log_debug_errno(r, "Failed to read /etc/hostname: %m");
1332
1333 break;
1334
1335 case META_MACHINE_ID: {
1336 _cleanup_free_ char *line = NULL;
1337
1338 r = read_line(f, LONG_LINE_MAX, &line);
1339 if (r < 0)
1340 log_debug_errno(r, "Failed to read /etc/machine-id: %m");
1341 else if (r == 33) {
1342 r = sd_id128_from_string(line, &machine_id);
1343 if (r < 0)
1344 log_debug_errno(r, "Image contains invalid /etc/machine-id: %s", line);
1345 } else if (r == 0)
1346 log_debug("/etc/machine-id file is empty.");
1347 else
1348 log_debug("/etc/machine-id has unexpected length %i.", r);
1349
1350 break;
1351 }
1352
1353 case META_MACHINE_INFO:
1354 r = load_env_file_pairs(f, "machine-info", NULL, &machine_info);
1355 if (r < 0)
1356 log_debug_errno(r, "Failed to read /etc/machine-info: %m");
1357
1358 break;
1359
1360 case META_OS_RELEASE:
1361 r = load_env_file_pairs(f, "os-release", NULL, &os_release);
1362 if (r < 0)
1363 log_debug_errno(r, "Failed to read OS release file: %m");
1364
1365 break;
1366 }
1367 }
1368
1369 r = wait_for_terminate_and_check("(sd-dissect)", child, 0);
1370 child = 0;
1371 if (r < 0)
1372 goto finish;
1373 if (r != EXIT_SUCCESS)
1374 return -EPROTO;
1375
1376 free_and_replace(m->hostname, hostname);
1377 m->machine_id = machine_id;
1378 strv_free_and_replace(m->machine_info, machine_info);
1379 strv_free_and_replace(m->os_release, os_release);
1380
1381 finish:
1382 for (k = 0; k < n_meta_initialized; k++)
1383 safe_close_pair(fds + 2*k);
1384
1385 return r;
1386 }
1387
1388 int dissect_image_and_warn(
1389 int fd,
1390 const char *name,
1391 const void *root_hash,
1392 size_t root_hash_size,
1393 DissectImageFlags flags,
1394 DissectedImage **ret) {
1395
1396 _cleanup_free_ char *buffer = NULL;
1397 int r;
1398
1399 if (!name) {
1400 r = fd_get_path(fd, &buffer);
1401 if (r < 0)
1402 return r;
1403
1404 name = buffer;
1405 }
1406
1407 r = dissect_image(fd, root_hash, root_hash_size, flags, ret);
1408
1409 switch (r) {
1410
1411 case -EOPNOTSUPP:
1412 return log_error_errno(r, "Dissecting images is not supported, compiled without blkid support.");
1413
1414 case -ENOPKG:
1415 return log_error_errno(r, "Couldn't identify a suitable partition table or file system in '%s'.", name);
1416
1417 case -EADDRNOTAVAIL:
1418 return log_error_errno(r, "No root partition for specified root hash found in '%s'.", name);
1419
1420 case -ENOTUNIQ:
1421 return log_error_errno(r, "Multiple suitable root partitions found in image '%s'.", name);
1422
1423 case -ENXIO:
1424 return log_error_errno(r, "No suitable root partition found in image '%s'.", name);
1425
1426 case -EPROTONOSUPPORT:
1427 return log_error_errno(r, "Device '%s' is loopback block device with partition scanning turned off, please turn it on.", name);
1428
1429 default:
1430 if (r < 0)
1431 return log_error_errno(r, "Failed to dissect image '%s': %m", name);
1432
1433 return r;
1434 }
1435 }
1436
1437 static const char *const partition_designator_table[] = {
1438 [PARTITION_ROOT] = "root",
1439 [PARTITION_ROOT_SECONDARY] = "root-secondary",
1440 [PARTITION_HOME] = "home",
1441 [PARTITION_SRV] = "srv",
1442 [PARTITION_ESP] = "esp",
1443 [PARTITION_SWAP] = "swap",
1444 [PARTITION_ROOT_VERITY] = "root-verity",
1445 [PARTITION_ROOT_SECONDARY_VERITY] = "root-secondary-verity",
1446 };
1447
1448 DEFINE_STRING_TABLE_LOOKUP(partition_designator, int);