]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/partition/repart.c
Merge pull request #29711 from berrange/tests-silverblue
[thirdparty/systemd.git] / src / partition / repart.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #if HAVE_VALGRIND_MEMCHECK_H
4 #include <valgrind/memcheck.h>
5 #endif
6
7 #include <fcntl.h>
8 #include <getopt.h>
9 #include <linux/fs.h>
10 #include <linux/loop.h>
11 #include <sys/file.h>
12 #include <sys/ioctl.h>
13 #include <sys/stat.h>
14
15 #include "sd-device.h"
16 #include "sd-id128.h"
17
18 #include "alloc-util.h"
19 #include "blkid-util.h"
20 #include "blockdev-util.h"
21 #include "btrfs-util.h"
22 #include "build.h"
23 #include "chase.h"
24 #include "conf-files.h"
25 #include "conf-parser.h"
26 #include "constants.h"
27 #include "cryptsetup-util.h"
28 #include "device-util.h"
29 #include "devnum-util.h"
30 #include "dirent-util.h"
31 #include "efivars.h"
32 #include "errno-util.h"
33 #include "fd-util.h"
34 #include "fdisk-util.h"
35 #include "fileio.h"
36 #include "format-table.h"
37 #include "format-util.h"
38 #include "fs-util.h"
39 #include "glyph-util.h"
40 #include "gpt.h"
41 #include "hexdecoct.h"
42 #include "hmac.h"
43 #include "id128-util.h"
44 #include "initrd-util.h"
45 #include "io-util.h"
46 #include "json.h"
47 #include "list.h"
48 #include "loop-util.h"
49 #include "main-func.h"
50 #include "mkdir.h"
51 #include "mkfs-util.h"
52 #include "mount-util.h"
53 #include "mountpoint-util.h"
54 #include "nulstr-util.h"
55 #include "openssl-util.h"
56 #include "parse-argument.h"
57 #include "parse-helpers.h"
58 #include "pretty-print.h"
59 #include "proc-cmdline.h"
60 #include "process-util.h"
61 #include "random-util.h"
62 #include "resize-fs.h"
63 #include "rm-rf.h"
64 #include "sort-util.h"
65 #include "specifier.h"
66 #include "stdio-util.h"
67 #include "string-table.h"
68 #include "string-util.h"
69 #include "strv.h"
70 #include "sync-util.h"
71 #include "terminal-util.h"
72 #include "tmpfile-util.h"
73 #include "tpm2-pcr.h"
74 #include "tpm2-util.h"
75 #include "user-util.h"
76 #include "utf8.h"
77
78 /* If not configured otherwise use a minimal partition size of 10M */
79 #define DEFAULT_MIN_SIZE (10ULL*1024ULL*1024ULL)
80
81 /* Hard lower limit for new partition sizes */
82 #define HARD_MIN_SIZE 4096ULL
83
84 /* We know up front we're never going to put more than this in a verity sig partition. */
85 #define VERITY_SIG_SIZE (HARD_MIN_SIZE*4ULL)
86
87 /* libfdisk takes off slightly more than 1M of the disk size when creating a GPT disk label */
88 #define GPT_METADATA_SIZE (1044ULL*1024ULL)
89
90 /* LUKS2 takes off 16M of the partition size with its metadata by default */
91 #define LUKS2_METADATA_SIZE (16ULL*1024ULL*1024ULL)
92
93 /* To do LUKS2 offline encryption, we need to keep some extra free space at the end of the partition. */
94 #define LUKS2_METADATA_KEEP_FREE (LUKS2_METADATA_SIZE*2ULL)
95
96 /* LUKS2 volume key size. */
97 #define VOLUME_KEY_SIZE (512ULL/8ULL)
98
99 /* Use 4K as the default filesystem sector size because as long as the partitions are aligned to 4K, the
100 * filesystems will then also be compatible with sector sizes 512, 1024 and 2048. */
101 #define DEFAULT_FILESYSTEM_SECTOR_SIZE 4096ULL
102
103 #define APIVFS_TMP_DIRS_NULSTR "proc\0sys\0dev\0tmp\0run\0var/tmp\0"
104
105 /* Note: When growing and placing new partitions we always align to 4K sector size. It's how newer hard disks
106 * are designed, and if everything is aligned to that performance is best. And for older hard disks with 512B
107 * sector size devices were generally assumed to have an even number of sectors, hence at the worst we'll
108 * waste 3K per partition, which is probably fine. */
109
110 typedef enum EmptyMode {
111 EMPTY_UNSET, /* no choice has been made yet */
112 EMPTY_REFUSE, /* refuse empty disks, never create a partition table */
113 EMPTY_ALLOW, /* allow empty disks, create partition table if necessary */
114 EMPTY_REQUIRE, /* require an empty disk, create a partition table */
115 EMPTY_FORCE, /* make disk empty, erase everything, create a partition table always */
116 EMPTY_CREATE, /* create disk as loopback file, create a partition table always */
117 _EMPTY_MODE_MAX,
118 _EMPTY_MODE_INVALID = -EINVAL,
119 } EmptyMode;
120
121 typedef enum FilterPartitionType {
122 FILTER_PARTITIONS_NONE,
123 FILTER_PARTITIONS_EXCLUDE,
124 FILTER_PARTITIONS_INCLUDE,
125 _FILTER_PARTITIONS_MAX,
126 _FILTER_PARTITIONS_INVALID = -EINVAL,
127 } FilterPartitionsType;
128
129 static EmptyMode arg_empty = EMPTY_UNSET;
130 static bool arg_dry_run = true;
131 static const char *arg_node = NULL;
132 static char *arg_root = NULL;
133 static char *arg_image = NULL;
134 static char **arg_definitions = NULL;
135 static bool arg_discard = true;
136 static bool arg_can_factory_reset = false;
137 static int arg_factory_reset = -1;
138 static sd_id128_t arg_seed = SD_ID128_NULL;
139 static bool arg_randomize = false;
140 static int arg_pretty = -1;
141 static uint64_t arg_size = UINT64_MAX;
142 static bool arg_size_auto = false;
143 static JsonFormatFlags arg_json_format_flags = JSON_FORMAT_OFF;
144 static PagerFlags arg_pager_flags = 0;
145 static bool arg_legend = true;
146 static void *arg_key = NULL;
147 static size_t arg_key_size = 0;
148 static EVP_PKEY *arg_private_key = NULL;
149 static X509 *arg_certificate = NULL;
150 static char *arg_tpm2_device = NULL;
151 static Tpm2PCRValue *arg_tpm2_hash_pcr_values = NULL;
152 static size_t arg_tpm2_n_hash_pcr_values = 0;
153 static char *arg_tpm2_public_key = NULL;
154 static uint32_t arg_tpm2_public_key_pcr_mask = 0;
155 static bool arg_split = false;
156 static GptPartitionType *arg_filter_partitions = NULL;
157 static size_t arg_n_filter_partitions = 0;
158 static FilterPartitionsType arg_filter_partitions_type = FILTER_PARTITIONS_NONE;
159 static GptPartitionType *arg_defer_partitions = NULL;
160 static size_t arg_n_defer_partitions = 0;
161 static uint64_t arg_sector_size = 0;
162 static ImagePolicy *arg_image_policy = NULL;
163 static Architecture arg_architecture = _ARCHITECTURE_INVALID;
164 static int arg_offline = -1;
165 static char **arg_copy_from = NULL;
166 static char *arg_copy_source = NULL;
167 static char *arg_make_ddi = NULL;
168
169 STATIC_DESTRUCTOR_REGISTER(arg_root, freep);
170 STATIC_DESTRUCTOR_REGISTER(arg_image, freep);
171 STATIC_DESTRUCTOR_REGISTER(arg_definitions, strv_freep);
172 STATIC_DESTRUCTOR_REGISTER(arg_key, erase_and_freep);
173 STATIC_DESTRUCTOR_REGISTER(arg_private_key, EVP_PKEY_freep);
174 STATIC_DESTRUCTOR_REGISTER(arg_certificate, X509_freep);
175 STATIC_DESTRUCTOR_REGISTER(arg_tpm2_device, freep);
176 STATIC_DESTRUCTOR_REGISTER(arg_tpm2_hash_pcr_values, freep);
177 STATIC_DESTRUCTOR_REGISTER(arg_tpm2_public_key, freep);
178 STATIC_DESTRUCTOR_REGISTER(arg_filter_partitions, freep);
179 STATIC_DESTRUCTOR_REGISTER(arg_image_policy, image_policy_freep);
180 STATIC_DESTRUCTOR_REGISTER(arg_copy_from, strv_freep);
181 STATIC_DESTRUCTOR_REGISTER(arg_copy_source, freep);
182 STATIC_DESTRUCTOR_REGISTER(arg_make_ddi, freep);
183
184 typedef struct FreeArea FreeArea;
185
186 typedef enum EncryptMode {
187 ENCRYPT_OFF,
188 ENCRYPT_KEY_FILE,
189 ENCRYPT_TPM2,
190 ENCRYPT_KEY_FILE_TPM2,
191 _ENCRYPT_MODE_MAX,
192 _ENCRYPT_MODE_INVALID = -EINVAL,
193 } EncryptMode;
194
195 typedef enum VerityMode {
196 VERITY_OFF,
197 VERITY_DATA,
198 VERITY_HASH,
199 VERITY_SIG,
200 _VERITY_MODE_MAX,
201 _VERITY_MODE_INVALID = -EINVAL,
202 } VerityMode;
203
204 typedef enum MinimizeMode {
205 MINIMIZE_OFF,
206 MINIMIZE_BEST,
207 MINIMIZE_GUESS,
208 _MINIMIZE_MODE_MAX,
209 _MINIMIZE_MODE_INVALID = -EINVAL,
210 } MinimizeMode;
211
212 typedef struct Partition {
213 char *definition_path;
214 char **drop_in_files;
215
216 GptPartitionType type;
217 sd_id128_t current_uuid, new_uuid;
218 bool new_uuid_is_set;
219 char *current_label, *new_label;
220 sd_id128_t fs_uuid, luks_uuid, verity_uuid;
221 uint8_t verity_salt[SHA256_DIGEST_SIZE];
222
223 bool dropped;
224 bool factory_reset;
225 int32_t priority;
226
227 uint32_t weight, padding_weight;
228
229 uint64_t current_size, new_size;
230 uint64_t size_min, size_max;
231
232 uint64_t current_padding, new_padding;
233 uint64_t padding_min, padding_max;
234
235 uint64_t partno;
236 uint64_t offset;
237
238 struct fdisk_partition *current_partition;
239 struct fdisk_partition *new_partition;
240 FreeArea *padding_area;
241 FreeArea *allocated_to_area;
242
243 char *copy_blocks_path;
244 bool copy_blocks_path_is_our_file;
245 bool copy_blocks_auto;
246 const char *copy_blocks_root;
247 int copy_blocks_fd;
248 uint64_t copy_blocks_offset;
249 uint64_t copy_blocks_size;
250
251 char *format;
252 char **copy_files;
253 char **exclude_files_source;
254 char **exclude_files_target;
255 char **make_directories;
256 char **subvolumes;
257 EncryptMode encrypt;
258 VerityMode verity;
259 char *verity_match_key;
260 MinimizeMode minimize;
261 uint64_t verity_data_block_size;
262 uint64_t verity_hash_block_size;
263
264 uint64_t gpt_flags;
265 int no_auto;
266 int read_only;
267 int growfs;
268
269 struct iovec roothash;
270
271 char *split_name_format;
272 char *split_path;
273
274 struct Partition *siblings[_VERITY_MODE_MAX];
275
276 LIST_FIELDS(struct Partition, partitions);
277 } Partition;
278
279 #define PARTITION_IS_FOREIGN(p) (!(p)->definition_path)
280 #define PARTITION_EXISTS(p) (!!(p)->current_partition)
281
282 struct FreeArea {
283 Partition *after;
284 uint64_t size;
285 uint64_t allocated;
286 };
287
288 typedef struct Context {
289 LIST_HEAD(Partition, partitions);
290 size_t n_partitions;
291
292 FreeArea **free_areas;
293 size_t n_free_areas;
294
295 uint64_t start, end, total;
296
297 struct fdisk_context *fdisk_context;
298 uint64_t sector_size, grain_size, fs_sector_size;
299
300 sd_id128_t seed;
301
302 char *node;
303 bool node_is_our_file;
304 int backing_fd;
305
306 bool from_scratch;
307 } Context;
308
309 static const char *empty_mode_table[_EMPTY_MODE_MAX] = {
310 [EMPTY_UNSET] = "unset",
311 [EMPTY_REFUSE] = "refuse",
312 [EMPTY_ALLOW] = "allow",
313 [EMPTY_REQUIRE] = "require",
314 [EMPTY_FORCE] = "force",
315 [EMPTY_CREATE] = "create",
316 };
317
318 static const char *encrypt_mode_table[_ENCRYPT_MODE_MAX] = {
319 [ENCRYPT_OFF] = "off",
320 [ENCRYPT_KEY_FILE] = "key-file",
321 [ENCRYPT_TPM2] = "tpm2",
322 [ENCRYPT_KEY_FILE_TPM2] = "key-file+tpm2",
323 };
324
325 static const char *verity_mode_table[_VERITY_MODE_MAX] = {
326 [VERITY_OFF] = "off",
327 [VERITY_DATA] = "data",
328 [VERITY_HASH] = "hash",
329 [VERITY_SIG] = "signature",
330 };
331
332 static const char *minimize_mode_table[_MINIMIZE_MODE_MAX] = {
333 [MINIMIZE_OFF] = "off",
334 [MINIMIZE_BEST] = "best",
335 [MINIMIZE_GUESS] = "guess",
336 };
337
338 DEFINE_PRIVATE_STRING_TABLE_LOOKUP(empty_mode, EmptyMode);
339 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(encrypt_mode, EncryptMode, ENCRYPT_KEY_FILE);
340 DEFINE_PRIVATE_STRING_TABLE_LOOKUP(verity_mode, VerityMode);
341 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(minimize_mode, MinimizeMode, MINIMIZE_BEST);
342
343 static uint64_t round_down_size(uint64_t v, uint64_t p) {
344 return (v / p) * p;
345 }
346
347 static uint64_t round_up_size(uint64_t v, uint64_t p) {
348
349 v = DIV_ROUND_UP(v, p);
350
351 if (v > UINT64_MAX / p)
352 return UINT64_MAX; /* overflow */
353
354 return v * p;
355 }
356
357 static Partition *partition_new(void) {
358 Partition *p;
359
360 p = new(Partition, 1);
361 if (!p)
362 return NULL;
363
364 *p = (Partition) {
365 .weight = 1000,
366 .padding_weight = 0,
367 .current_size = UINT64_MAX,
368 .new_size = UINT64_MAX,
369 .size_min = UINT64_MAX,
370 .size_max = UINT64_MAX,
371 .current_padding = UINT64_MAX,
372 .new_padding = UINT64_MAX,
373 .padding_min = UINT64_MAX,
374 .padding_max = UINT64_MAX,
375 .partno = UINT64_MAX,
376 .offset = UINT64_MAX,
377 .copy_blocks_fd = -EBADF,
378 .copy_blocks_offset = UINT64_MAX,
379 .copy_blocks_size = UINT64_MAX,
380 .no_auto = -1,
381 .read_only = -1,
382 .growfs = -1,
383 .verity_data_block_size = UINT64_MAX,
384 .verity_hash_block_size = UINT64_MAX,
385 };
386
387 return p;
388 }
389
390 static Partition* partition_free(Partition *p) {
391 if (!p)
392 return NULL;
393
394 free(p->current_label);
395 free(p->new_label);
396 free(p->definition_path);
397 strv_free(p->drop_in_files);
398
399 if (p->current_partition)
400 fdisk_unref_partition(p->current_partition);
401 if (p->new_partition)
402 fdisk_unref_partition(p->new_partition);
403
404 if (p->copy_blocks_path_is_our_file)
405 unlink_and_free(p->copy_blocks_path);
406 else
407 free(p->copy_blocks_path);
408 safe_close(p->copy_blocks_fd);
409
410 free(p->format);
411 strv_free(p->copy_files);
412 strv_free(p->exclude_files_source);
413 strv_free(p->exclude_files_target);
414 strv_free(p->make_directories);
415 strv_free(p->subvolumes);
416 free(p->verity_match_key);
417
418 iovec_done(&p->roothash);
419
420 free(p->split_name_format);
421 unlink_and_free(p->split_path);
422
423 return mfree(p);
424 }
425
426 static void partition_foreignize(Partition *p) {
427 assert(p);
428 assert(PARTITION_EXISTS(p));
429
430 /* Reset several parameters set through definition file to make the partition foreign. */
431
432 p->definition_path = mfree(p->definition_path);
433 p->drop_in_files = strv_free(p->drop_in_files);
434
435 p->copy_blocks_path = mfree(p->copy_blocks_path);
436 p->copy_blocks_fd = safe_close(p->copy_blocks_fd);
437 p->copy_blocks_root = NULL;
438
439 p->format = mfree(p->format);
440 p->copy_files = strv_free(p->copy_files);
441 p->exclude_files_source = strv_free(p->exclude_files_source);
442 p->exclude_files_target = strv_free(p->exclude_files_target);
443 p->make_directories = strv_free(p->make_directories);
444 p->subvolumes = strv_free(p->subvolumes);
445 p->verity_match_key = mfree(p->verity_match_key);
446
447 p->priority = 0;
448 p->weight = 1000;
449 p->padding_weight = 0;
450 p->size_min = UINT64_MAX;
451 p->size_max = UINT64_MAX;
452 p->padding_min = UINT64_MAX;
453 p->padding_max = UINT64_MAX;
454 p->no_auto = -1;
455 p->read_only = -1;
456 p->growfs = -1;
457 p->verity = VERITY_OFF;
458 }
459
460 static bool partition_type_exclude(const GptPartitionType *type) {
461 if (arg_filter_partitions_type == FILTER_PARTITIONS_NONE)
462 return false;
463
464 for (size_t i = 0; i < arg_n_filter_partitions; i++)
465 if (sd_id128_equal(type->uuid, arg_filter_partitions[i].uuid))
466 return arg_filter_partitions_type == FILTER_PARTITIONS_EXCLUDE;
467
468 return arg_filter_partitions_type == FILTER_PARTITIONS_INCLUDE;
469 }
470
471 static bool partition_type_defer(const GptPartitionType *type) {
472 for (size_t i = 0; i < arg_n_defer_partitions; i++)
473 if (sd_id128_equal(type->uuid, arg_defer_partitions[i].uuid))
474 return true;
475
476 return false;
477 }
478
479 static Partition* partition_unlink_and_free(Context *context, Partition *p) {
480 if (!p)
481 return NULL;
482
483 LIST_REMOVE(partitions, context->partitions, p);
484
485 assert(context->n_partitions > 0);
486 context->n_partitions--;
487
488 return partition_free(p);
489 }
490
491 DEFINE_TRIVIAL_CLEANUP_FUNC(Partition*, partition_free);
492
493 static Context *context_new(sd_id128_t seed) {
494 Context *context;
495
496 context = new(Context, 1);
497 if (!context)
498 return NULL;
499
500 *context = (Context) {
501 .start = UINT64_MAX,
502 .end = UINT64_MAX,
503 .total = UINT64_MAX,
504 .seed = seed,
505 };
506
507 return context;
508 }
509
510 static void context_free_free_areas(Context *context) {
511 assert(context);
512
513 for (size_t i = 0; i < context->n_free_areas; i++)
514 free(context->free_areas[i]);
515
516 context->free_areas = mfree(context->free_areas);
517 context->n_free_areas = 0;
518 }
519
520 static Context *context_free(Context *context) {
521 if (!context)
522 return NULL;
523
524 while (context->partitions)
525 partition_unlink_and_free(context, context->partitions);
526 assert(context->n_partitions == 0);
527
528 context_free_free_areas(context);
529
530 if (context->fdisk_context)
531 fdisk_unref_context(context->fdisk_context);
532
533 safe_close(context->backing_fd);
534 if (context->node_is_our_file)
535 unlink_and_free(context->node);
536 else
537 free(context->node);
538
539 return mfree(context);
540 }
541
542 DEFINE_TRIVIAL_CLEANUP_FUNC(Context*, context_free);
543
544 static int context_add_free_area(
545 Context *context,
546 uint64_t size,
547 Partition *after) {
548
549 FreeArea *a;
550
551 assert(context);
552 assert(!after || !after->padding_area);
553
554 if (!GREEDY_REALLOC(context->free_areas, context->n_free_areas + 1))
555 return -ENOMEM;
556
557 a = new(FreeArea, 1);
558 if (!a)
559 return -ENOMEM;
560
561 *a = (FreeArea) {
562 .size = size,
563 .after = after,
564 };
565
566 context->free_areas[context->n_free_areas++] = a;
567
568 if (after)
569 after->padding_area = a;
570
571 return 0;
572 }
573
574 static void partition_drop_or_foreignize(Partition *p) {
575 if (!p || p->dropped || PARTITION_IS_FOREIGN(p))
576 return;
577
578 if (PARTITION_EXISTS(p)) {
579 log_info("Can't grow existing partition %s of priority %" PRIi32 ", ignoring.",
580 strna(p->current_label ?: p->new_label), p->priority);
581
582 /* Handle the partition as foreign. Do not set dropped flag. */
583 partition_foreignize(p);
584 } else {
585 log_info("Can't fit partition %s of priority %" PRIi32 ", dropping.",
586 p->definition_path, p->priority);
587
588 p->dropped = true;
589 p->allocated_to_area = NULL;
590 }
591 }
592
593 static bool context_drop_or_foreignize_one_priority(Context *context) {
594 int32_t priority = 0;
595
596 LIST_FOREACH(partitions, p, context->partitions) {
597 if (p->dropped)
598 continue;
599
600 priority = MAX(priority, p->priority);
601 }
602
603 /* Refuse to drop partitions with 0 or negative priorities or partitions of priorities that have at
604 * least one existing priority */
605 if (priority <= 0)
606 return false;
607
608 LIST_FOREACH(partitions, p, context->partitions) {
609 if (p->priority < priority)
610 continue;
611
612 partition_drop_or_foreignize(p);
613
614 /* We ensure that all verity sibling partitions have the same priority, so it's safe
615 * to drop all siblings here as well. */
616
617 for (VerityMode mode = VERITY_OFF + 1; mode < _VERITY_MODE_MAX; mode++)
618 partition_drop_or_foreignize(p->siblings[mode]);
619 }
620
621 return true;
622 }
623
624 static uint64_t partition_min_size(const Context *context, const Partition *p) {
625 uint64_t sz;
626
627 assert(context);
628 assert(p);
629
630 /* Calculate the disk space we really need at minimum for this partition. If the partition already
631 * exists the current size is what we really need. If it doesn't exist yet refuse to allocate less
632 * than 4K.
633 *
634 * DEFAULT_MIN_SIZE is the default SizeMin= we configure if nothing else is specified. */
635
636 if (PARTITION_IS_FOREIGN(p)) {
637 /* Don't allow changing size of partitions not managed by us */
638 assert(p->current_size != UINT64_MAX);
639 return p->current_size;
640 }
641
642 if (p->verity == VERITY_SIG)
643 return VERITY_SIG_SIZE;
644
645 sz = p->current_size != UINT64_MAX ? p->current_size : HARD_MIN_SIZE;
646
647 if (!PARTITION_EXISTS(p)) {
648 uint64_t d = 0;
649
650 if (p->encrypt != ENCRYPT_OFF)
651 d += round_up_size(LUKS2_METADATA_KEEP_FREE, context->grain_size);
652
653 if (p->copy_blocks_size != UINT64_MAX)
654 d += round_up_size(p->copy_blocks_size, context->grain_size);
655 else if (p->format || p->encrypt != ENCRYPT_OFF) {
656 uint64_t f;
657
658 /* If we shall synthesize a file system, take minimal fs size into account (assumed to be 4K if not known) */
659 f = p->format ? round_up_size(minimal_size_by_fs_name(p->format), context->grain_size) : UINT64_MAX;
660 d += f == UINT64_MAX ? context->grain_size : f;
661 }
662
663 if (d > sz)
664 sz = d;
665 }
666
667 return MAX(round_up_size(p->size_min != UINT64_MAX ? p->size_min : DEFAULT_MIN_SIZE, context->grain_size), sz);
668 }
669
670 static uint64_t partition_max_size(const Context *context, const Partition *p) {
671 uint64_t sm;
672
673 /* Calculate how large the partition may become at max. This is generally the configured maximum
674 * size, except when it already exists and is larger than that. In that case it's the existing size,
675 * since we never want to shrink partitions. */
676
677 assert(context);
678 assert(p);
679
680 if (PARTITION_IS_FOREIGN(p)) {
681 /* Don't allow changing size of partitions not managed by us */
682 assert(p->current_size != UINT64_MAX);
683 return p->current_size;
684 }
685
686 if (p->verity == VERITY_SIG)
687 return VERITY_SIG_SIZE;
688
689 if (p->size_max == UINT64_MAX)
690 return UINT64_MAX;
691
692 sm = round_down_size(p->size_max, context->grain_size);
693
694 if (p->current_size != UINT64_MAX)
695 sm = MAX(p->current_size, sm);
696
697 return MAX(partition_min_size(context, p), sm);
698 }
699
700 static uint64_t partition_min_padding(const Partition *p) {
701 assert(p);
702 return p->padding_min != UINT64_MAX ? p->padding_min : 0;
703 }
704
705 static uint64_t partition_max_padding(const Partition *p) {
706 assert(p);
707 return p->padding_max;
708 }
709
710 static uint64_t partition_min_size_with_padding(Context *context, const Partition *p) {
711 uint64_t sz;
712
713 /* Calculate the disk space we need for this partition plus any free space coming after it. This
714 * takes user configured padding into account as well as any additional whitespace needed to align
715 * the next partition to 4K again. */
716
717 assert(context);
718 assert(p);
719
720 sz = partition_min_size(context, p) + partition_min_padding(p);
721
722 if (PARTITION_EXISTS(p)) {
723 /* If the partition wasn't aligned, add extra space so that any we might add will be aligned */
724 assert(p->offset != UINT64_MAX);
725 return round_up_size(p->offset + sz, context->grain_size) - p->offset;
726 }
727
728 /* If this is a new partition we'll place it aligned, hence we just need to round up the required size here */
729 return round_up_size(sz, context->grain_size);
730 }
731
732 static uint64_t free_area_available(const FreeArea *a) {
733 assert(a);
734
735 /* Determines how much of this free area is not allocated yet */
736
737 assert(a->size >= a->allocated);
738 return a->size - a->allocated;
739 }
740
741 static uint64_t free_area_current_end(Context *context, const FreeArea *a) {
742 assert(context);
743 assert(a);
744
745 if (!a->after)
746 return free_area_available(a);
747
748 assert(a->after->offset != UINT64_MAX);
749 assert(a->after->current_size != UINT64_MAX);
750
751 /* Calculate where the free area ends, based on the offset of the partition preceding it. */
752 return round_up_size(a->after->offset + a->after->current_size, context->grain_size) + free_area_available(a);
753 }
754
755 static uint64_t free_area_min_end(Context *context, const FreeArea *a) {
756 assert(context);
757 assert(a);
758
759 if (!a->after)
760 return 0;
761
762 assert(a->after->offset != UINT64_MAX);
763 assert(a->after->current_size != UINT64_MAX);
764
765 /* Calculate where the partition would end when we give it as much as it needs. */
766 return round_up_size(a->after->offset + partition_min_size_with_padding(context, a->after), context->grain_size);
767 }
768
769 static uint64_t free_area_available_for_new_partitions(Context *context, const FreeArea *a) {
770 assert(context);
771 assert(a);
772
773 /* Similar to free_area_available(), but takes into account that the required size and padding of the
774 * preceding partition is honoured. */
775
776 return LESS_BY(free_area_current_end(context, a), free_area_min_end(context, a));
777 }
778
779 static int free_area_compare(FreeArea *const *a, FreeArea *const*b, Context *context) {
780 assert(context);
781
782 return CMP(free_area_available_for_new_partitions(context, *a),
783 free_area_available_for_new_partitions(context, *b));
784 }
785
786 static uint64_t charge_size(Context *context, uint64_t total, uint64_t amount) {
787 assert(context);
788 /* Subtract the specified amount from total, rounding up to multiple of 4K if there's room */
789 assert(amount <= total);
790 return LESS_BY(total, round_up_size(amount, context->grain_size));
791 }
792
793 static uint64_t charge_weight(uint64_t total, uint64_t amount) {
794 assert(amount <= total);
795 return total - amount;
796 }
797
798 static bool context_allocate_partitions(Context *context, uint64_t *ret_largest_free_area) {
799 assert(context);
800
801 /* This may be called multiple times. Reset previous assignments. */
802 for (size_t i = 0; i < context->n_free_areas; i++)
803 context->free_areas[i]->allocated = 0;
804
805 /* Sort free areas by size, putting smallest first */
806 typesafe_qsort_r(context->free_areas, context->n_free_areas, free_area_compare, context);
807
808 /* In any case return size of the largest free area (i.e. not the size of all free areas
809 * combined!) */
810 if (ret_largest_free_area)
811 *ret_largest_free_area =
812 context->n_free_areas == 0 ? 0 :
813 free_area_available_for_new_partitions(context, context->free_areas[context->n_free_areas-1]);
814
815 /* Check that each existing partition can fit its area. */
816 for (size_t i = 0; i < context->n_free_areas; i++)
817 if (free_area_current_end(context, context->free_areas[i]) <
818 free_area_min_end(context, context->free_areas[i]))
819 return false;
820
821 /* A simple first-fit algorithm. We return true if we can fit the partitions in, otherwise false. */
822 LIST_FOREACH(partitions, p, context->partitions) {
823 bool fits = false;
824 uint64_t required;
825 FreeArea *a = NULL;
826
827 /* Skip partitions we already dropped or that already exist */
828 if (p->dropped || PARTITION_EXISTS(p))
829 continue;
830
831 /* How much do we need to fit? */
832 required = partition_min_size_with_padding(context, p);
833 assert(required % context->grain_size == 0);
834
835 for (size_t i = 0; i < context->n_free_areas; i++) {
836 a = context->free_areas[i];
837
838 if (free_area_available_for_new_partitions(context, a) >= required) {
839 fits = true;
840 break;
841 }
842 }
843
844 if (!fits)
845 return false; /* 😢 Oh no! We can't fit this partition into any free area! */
846
847 /* Assign the partition to this free area */
848 p->allocated_to_area = a;
849
850 /* Budget the minimal partition size */
851 a->allocated += required;
852 }
853
854 return true;
855 }
856
857 static int context_sum_weights(Context *context, FreeArea *a, uint64_t *ret) {
858 uint64_t weight_sum = 0;
859
860 assert(context);
861 assert(a);
862 assert(ret);
863
864 /* Determine the sum of the weights of all partitions placed in or before the specified free area */
865
866 LIST_FOREACH(partitions, p, context->partitions) {
867 if (p->padding_area != a && p->allocated_to_area != a)
868 continue;
869
870 if (p->weight > UINT64_MAX - weight_sum)
871 goto overflow_sum;
872 weight_sum += p->weight;
873
874 if (p->padding_weight > UINT64_MAX - weight_sum)
875 goto overflow_sum;
876 weight_sum += p->padding_weight;
877 }
878
879 *ret = weight_sum;
880 return 0;
881
882 overflow_sum:
883 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Combined weight of partition exceeds unsigned 64-bit range, refusing.");
884 }
885
886 static uint64_t scale_by_weight(uint64_t value, uint64_t weight, uint64_t weight_sum) {
887 assert(weight_sum >= weight);
888
889 for (;;) {
890 if (weight == 0)
891 return 0;
892 if (weight == weight_sum)
893 return value;
894 if (value <= UINT64_MAX / weight)
895 return value * weight / weight_sum;
896
897 /* Rescale weight and weight_sum to make not the calculation overflow. To satisfy the
898 * following conditions, 'weight_sum' is rounded up but 'weight' is rounded down:
899 * - the sum of scale_by_weight() for all weights must not be larger than the input value,
900 * - scale_by_weight() must not be larger than the ideal value (i.e. calculated with uint128_t). */
901 weight_sum = DIV_ROUND_UP(weight_sum, 2);
902 weight /= 2;
903 }
904 }
905
906 typedef enum GrowPartitionPhase {
907 /* The zeroth phase: do not touch foreign partitions (i.e. those we don't manage). */
908 PHASE_FOREIGN,
909
910 /* The first phase: we charge partitions which need more (according to constraints) than their weight-based share. */
911 PHASE_OVERCHARGE,
912
913 /* The second phase: we charge partitions which need less (according to constraints) than their weight-based share. */
914 PHASE_UNDERCHARGE,
915
916 /* The third phase: we distribute what remains among the remaining partitions, according to the weights */
917 PHASE_DISTRIBUTE,
918
919 _GROW_PARTITION_PHASE_MAX,
920 } GrowPartitionPhase;
921
922 static bool context_grow_partitions_phase(
923 Context *context,
924 FreeArea *a,
925 GrowPartitionPhase phase,
926 uint64_t *span,
927 uint64_t *weight_sum) {
928
929 bool try_again = false;
930
931 assert(context);
932 assert(a);
933 assert(span);
934 assert(weight_sum);
935
936 /* Now let's look at the intended weights and adjust them taking the minimum space assignments into
937 * account. i.e. if a partition has a small weight but a high minimum space value set it should not
938 * get any additional room from the left-overs. Similar, if two partitions have the same weight they
939 * should get the same space if possible, even if one has a smaller minimum size than the other. */
940 LIST_FOREACH(partitions, p, context->partitions) {
941
942 /* Look only at partitions associated with this free area, i.e. immediately
943 * preceding it, or allocated into it */
944 if (p->allocated_to_area != a && p->padding_area != a)
945 continue;
946
947 if (p->new_size == UINT64_MAX) {
948 uint64_t share, rsz, xsz;
949 bool charge = false;
950
951 /* Calculate how much this space this partition needs if everyone would get
952 * the weight based share */
953 share = scale_by_weight(*span, p->weight, *weight_sum);
954
955 rsz = partition_min_size(context, p);
956 xsz = partition_max_size(context, p);
957
958 if (phase == PHASE_FOREIGN && PARTITION_IS_FOREIGN(p)) {
959 /* Never change of foreign partitions (i.e. those we don't manage) */
960
961 p->new_size = p->current_size;
962 charge = true;
963
964 } else if (phase == PHASE_OVERCHARGE && rsz > share) {
965 /* This partition needs more than its calculated share. Let's assign
966 * it that, and take this partition out of all calculations and start
967 * again. */
968
969 p->new_size = rsz;
970 charge = try_again = true;
971
972 } else if (phase == PHASE_UNDERCHARGE && xsz < share) {
973 /* This partition accepts less than its calculated
974 * share. Let's assign it that, and take this partition out
975 * of all calculations and start again. */
976
977 p->new_size = xsz;
978 charge = try_again = true;
979
980 } else if (phase == PHASE_DISTRIBUTE) {
981 /* This partition can accept its calculated share. Let's
982 * assign it. There's no need to restart things here since
983 * assigning this shouldn't impact the shares of the other
984 * partitions. */
985
986 assert(share >= rsz);
987 p->new_size = CLAMP(round_down_size(share, context->grain_size), rsz, xsz);
988 charge = true;
989 }
990
991 if (charge) {
992 *span = charge_size(context, *span, p->new_size);
993 *weight_sum = charge_weight(*weight_sum, p->weight);
994 }
995 }
996
997 if (p->new_padding == UINT64_MAX) {
998 uint64_t share, rsz, xsz;
999 bool charge = false;
1000
1001 share = scale_by_weight(*span, p->padding_weight, *weight_sum);
1002
1003 rsz = partition_min_padding(p);
1004 xsz = partition_max_padding(p);
1005
1006 if (phase == PHASE_OVERCHARGE && rsz > share) {
1007 p->new_padding = rsz;
1008 charge = try_again = true;
1009 } else if (phase == PHASE_UNDERCHARGE && xsz < share) {
1010 p->new_padding = xsz;
1011 charge = try_again = true;
1012 } else if (phase == PHASE_DISTRIBUTE) {
1013 assert(share >= rsz);
1014 p->new_padding = CLAMP(round_down_size(share, context->grain_size), rsz, xsz);
1015 charge = true;
1016 }
1017
1018 if (charge) {
1019 *span = charge_size(context, *span, p->new_padding);
1020 *weight_sum = charge_weight(*weight_sum, p->padding_weight);
1021 }
1022 }
1023 }
1024
1025 return !try_again;
1026 }
1027
1028 static void context_grow_partition_one(Context *context, FreeArea *a, Partition *p, uint64_t *span) {
1029 uint64_t m;
1030
1031 assert(context);
1032 assert(a);
1033 assert(p);
1034 assert(span);
1035
1036 if (*span == 0)
1037 return;
1038
1039 if (p->allocated_to_area != a)
1040 return;
1041
1042 if (PARTITION_IS_FOREIGN(p))
1043 return;
1044
1045 assert(p->new_size != UINT64_MAX);
1046
1047 /* Calculate new size and align. */
1048 m = round_down_size(p->new_size + *span, context->grain_size);
1049 /* But ensure this doesn't shrink the size. */
1050 m = MAX(m, p->new_size);
1051 /* And ensure this doesn't exceed the maximum size. */
1052 m = MIN(m, partition_max_size(context, p));
1053
1054 assert(m >= p->new_size);
1055
1056 *span = charge_size(context, *span, m - p->new_size);
1057 p->new_size = m;
1058 }
1059
1060 static int context_grow_partitions_on_free_area(Context *context, FreeArea *a) {
1061 uint64_t weight_sum = 0, span;
1062 int r;
1063
1064 assert(context);
1065 assert(a);
1066
1067 r = context_sum_weights(context, a, &weight_sum);
1068 if (r < 0)
1069 return r;
1070
1071 /* Let's calculate the total area covered by this free area and the partition before it */
1072 span = a->size;
1073 if (a->after) {
1074 assert(a->after->offset != UINT64_MAX);
1075 assert(a->after->current_size != UINT64_MAX);
1076
1077 span += round_up_size(a->after->offset + a->after->current_size, context->grain_size) - a->after->offset;
1078 }
1079
1080 for (GrowPartitionPhase phase = 0; phase < _GROW_PARTITION_PHASE_MAX;)
1081 if (context_grow_partitions_phase(context, a, phase, &span, &weight_sum))
1082 phase++; /* go to the next phase */
1083
1084 /* We still have space left over? Donate to preceding partition if we have one */
1085 if (span > 0 && a->after)
1086 context_grow_partition_one(context, a, a->after, &span);
1087
1088 /* What? Even still some space left (maybe because there was no preceding partition, or it had a
1089 * size limit), then let's donate it to whoever wants it. */
1090 if (span > 0)
1091 LIST_FOREACH(partitions, p, context->partitions) {
1092 context_grow_partition_one(context, a, p, &span);
1093 if (span == 0)
1094 break;
1095 }
1096
1097 /* Yuck, still no one? Then make it padding */
1098 if (span > 0 && a->after) {
1099 assert(a->after->new_padding != UINT64_MAX);
1100 a->after->new_padding += span;
1101 }
1102
1103 return 0;
1104 }
1105
1106 static int context_grow_partitions(Context *context) {
1107 int r;
1108
1109 assert(context);
1110
1111 for (size_t i = 0; i < context->n_free_areas; i++) {
1112 r = context_grow_partitions_on_free_area(context, context->free_areas[i]);
1113 if (r < 0)
1114 return r;
1115 }
1116
1117 /* All existing partitions that have no free space after them can't change size */
1118 LIST_FOREACH(partitions, p, context->partitions) {
1119 if (p->dropped)
1120 continue;
1121
1122 if (!PARTITION_EXISTS(p) || p->padding_area) {
1123 /* The algorithm above must have initialized this already */
1124 assert(p->new_size != UINT64_MAX);
1125 continue;
1126 }
1127
1128 assert(p->new_size == UINT64_MAX);
1129 p->new_size = p->current_size;
1130
1131 assert(p->new_padding == UINT64_MAX);
1132 p->new_padding = p->current_padding;
1133 }
1134
1135 return 0;
1136 }
1137
1138 static uint64_t find_first_unused_partno(Context *context) {
1139 uint64_t partno = 0;
1140
1141 assert(context);
1142
1143 for (partno = 0;; partno++) {
1144 bool found = false;
1145 LIST_FOREACH(partitions, p, context->partitions)
1146 if (p->partno != UINT64_MAX && p->partno == partno)
1147 found = true;
1148 if (!found)
1149 break;
1150 }
1151
1152 return partno;
1153 }
1154
1155 static void context_place_partitions(Context *context) {
1156
1157 assert(context);
1158
1159 for (size_t i = 0; i < context->n_free_areas; i++) {
1160 FreeArea *a = context->free_areas[i];
1161 _unused_ uint64_t left;
1162 uint64_t start;
1163
1164 if (a->after) {
1165 assert(a->after->offset != UINT64_MAX);
1166 assert(a->after->new_size != UINT64_MAX);
1167 assert(a->after->new_padding != UINT64_MAX);
1168
1169 start = a->after->offset + a->after->new_size + a->after->new_padding;
1170 } else
1171 start = context->start;
1172
1173 start = round_up_size(start, context->grain_size);
1174 left = a->size;
1175
1176 LIST_FOREACH(partitions, p, context->partitions) {
1177 if (p->allocated_to_area != a)
1178 continue;
1179
1180 p->offset = start;
1181 p->partno = find_first_unused_partno(context);
1182
1183 assert(left >= p->new_size);
1184 start += p->new_size;
1185 left -= p->new_size;
1186
1187 assert(left >= p->new_padding);
1188 start += p->new_padding;
1189 left -= p->new_padding;
1190 }
1191 }
1192 }
1193
1194 static int config_parse_type(
1195 const char *unit,
1196 const char *filename,
1197 unsigned line,
1198 const char *section,
1199 unsigned section_line,
1200 const char *lvalue,
1201 int ltype,
1202 const char *rvalue,
1203 void *data,
1204 void *userdata) {
1205
1206 GptPartitionType *type = ASSERT_PTR(data);
1207 int r;
1208
1209 assert(rvalue);
1210
1211 r = gpt_partition_type_from_string(rvalue, type);
1212 if (r < 0)
1213 return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse partition type: %s", rvalue);
1214
1215 if (arg_architecture >= 0)
1216 *type = gpt_partition_type_override_architecture(*type, arg_architecture);
1217
1218 return 0;
1219 }
1220
1221 static int config_parse_label(
1222 const char *unit,
1223 const char *filename,
1224 unsigned line,
1225 const char *section,
1226 unsigned section_line,
1227 const char *lvalue,
1228 int ltype,
1229 const char *rvalue,
1230 void *data,
1231 void *userdata) {
1232
1233 _cleanup_free_ char *resolved = NULL;
1234 char **label = ASSERT_PTR(data);
1235 int r;
1236
1237 assert(rvalue);
1238
1239 /* Nota bene: the empty label is a totally valid one. Let's hence not follow our usual rule of
1240 * assigning the empty string to reset to default here, but really accept it as label to set. */
1241
1242 r = specifier_printf(rvalue, GPT_LABEL_MAX, system_and_tmp_specifier_table, arg_root, NULL, &resolved);
1243 if (r < 0) {
1244 log_syntax(unit, LOG_WARNING, filename, line, r,
1245 "Failed to expand specifiers in Label=, ignoring: %s", rvalue);
1246 return 0;
1247 }
1248
1249 if (!utf8_is_valid(resolved)) {
1250 log_syntax(unit, LOG_WARNING, filename, line, 0,
1251 "Partition label not valid UTF-8, ignoring: %s", rvalue);
1252 return 0;
1253 }
1254
1255 r = gpt_partition_label_valid(resolved);
1256 if (r < 0) {
1257 log_syntax(unit, LOG_WARNING, filename, line, r,
1258 "Failed to check if string is valid as GPT partition label, ignoring: \"%s\" (from \"%s\")",
1259 resolved, rvalue);
1260 return 0;
1261 }
1262 if (!r) {
1263 log_syntax(unit, LOG_WARNING, filename, line, 0,
1264 "Partition label too long for GPT table, ignoring: \"%s\" (from \"%s\")",
1265 resolved, rvalue);
1266 return 0;
1267 }
1268
1269 free_and_replace(*label, resolved);
1270 return 0;
1271 }
1272
1273 static int config_parse_weight(
1274 const char *unit,
1275 const char *filename,
1276 unsigned line,
1277 const char *section,
1278 unsigned section_line,
1279 const char *lvalue,
1280 int ltype,
1281 const char *rvalue,
1282 void *data,
1283 void *userdata) {
1284
1285 uint32_t *w = ASSERT_PTR(data), v;
1286 int r;
1287
1288 assert(rvalue);
1289
1290 r = safe_atou32(rvalue, &v);
1291 if (r < 0) {
1292 log_syntax(unit, LOG_WARNING, filename, line, r,
1293 "Failed to parse weight value, ignoring: %s", rvalue);
1294 return 0;
1295 }
1296
1297 if (v > 1000U*1000U) {
1298 log_syntax(unit, LOG_WARNING, filename, line, 0,
1299 "Weight needs to be in range 0…10000000, ignoring: %" PRIu32, v);
1300 return 0;
1301 }
1302
1303 *w = v;
1304 return 0;
1305 }
1306
1307 static int config_parse_size4096(
1308 const char *unit,
1309 const char *filename,
1310 unsigned line,
1311 const char *section,
1312 unsigned section_line,
1313 const char *lvalue,
1314 int ltype,
1315 const char *rvalue,
1316 void *data,
1317 void *userdata) {
1318
1319 uint64_t *sz = data, parsed;
1320 int r;
1321
1322 assert(rvalue);
1323 assert(data);
1324
1325 r = parse_size(rvalue, 1024, &parsed);
1326 if (r < 0)
1327 return log_syntax(unit, LOG_ERR, filename, line, r,
1328 "Failed to parse size value: %s", rvalue);
1329
1330 if (ltype > 0)
1331 *sz = round_up_size(parsed, 4096);
1332 else if (ltype < 0)
1333 *sz = round_down_size(parsed, 4096);
1334 else
1335 *sz = parsed;
1336
1337 if (*sz != parsed)
1338 log_syntax(unit, LOG_NOTICE, filename, line, r, "Rounded %s= size %" PRIu64 " %s %" PRIu64 ", a multiple of 4096.",
1339 lvalue, parsed, special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), *sz);
1340
1341 return 0;
1342 }
1343
1344 static int config_parse_block_size(
1345 const char *unit,
1346 const char *filename,
1347 unsigned line,
1348 const char *section,
1349 unsigned section_line,
1350 const char *lvalue,
1351 int ltype,
1352 const char *rvalue,
1353 void *data,
1354 void *userdata) {
1355
1356 uint64_t *blksz = ASSERT_PTR(data), parsed;
1357 int r;
1358
1359 assert(rvalue);
1360
1361 r = parse_size(rvalue, 1024, &parsed);
1362 if (r < 0)
1363 return log_syntax(unit, LOG_ERR, filename, line, r,
1364 "Failed to parse size value: %s", rvalue);
1365
1366 if (parsed < 512 || parsed > 4096)
1367 return log_syntax(unit, LOG_ERR, filename, line, SYNTHETIC_ERRNO(EINVAL),
1368 "Value not between 512 and 4096: %s", rvalue);
1369
1370 if (!ISPOWEROF2(parsed))
1371 return log_syntax(unit, LOG_ERR, filename, line, SYNTHETIC_ERRNO(EINVAL),
1372 "Value not a power of 2: %s", rvalue);
1373
1374 *blksz = parsed;
1375 return 0;
1376 }
1377
1378 static int config_parse_fstype(
1379 const char *unit,
1380 const char *filename,
1381 unsigned line,
1382 const char *section,
1383 unsigned section_line,
1384 const char *lvalue,
1385 int ltype,
1386 const char *rvalue,
1387 void *data,
1388 void *userdata) {
1389
1390 char **fstype = ASSERT_PTR(data);
1391 const char *e;
1392
1393 assert(rvalue);
1394
1395 /* Let's provide an easy way to override the chosen fstype for file system partitions */
1396 e = secure_getenv("SYSTEMD_REPART_OVERRIDE_FSTYPE");
1397 if (e && !streq(rvalue, e)) {
1398 log_syntax(unit, LOG_NOTICE, filename, line, 0,
1399 "Overriding defined file system type '%s' with '%s'.", rvalue, e);
1400 rvalue = e;
1401 }
1402
1403 if (!filename_is_valid(rvalue))
1404 return log_syntax(unit, LOG_ERR, filename, line, 0,
1405 "File system type is not valid, refusing: %s", rvalue);
1406
1407 return free_and_strdup_warn(fstype, rvalue);
1408 }
1409
1410 static int config_parse_copy_files(
1411 const char *unit,
1412 const char *filename,
1413 unsigned line,
1414 const char *section,
1415 unsigned section_line,
1416 const char *lvalue,
1417 int ltype,
1418 const char *rvalue,
1419 void *data,
1420 void *userdata) {
1421
1422 _cleanup_free_ char *source = NULL, *buffer = NULL, *resolved_source = NULL, *resolved_target = NULL;
1423 const char *p = rvalue, *target;
1424 char ***copy_files = ASSERT_PTR(data);
1425 int r;
1426
1427 assert(rvalue);
1428
1429 r = extract_first_word(&p, &source, ":", EXTRACT_CUNESCAPE|EXTRACT_DONT_COALESCE_SEPARATORS);
1430 if (r < 0)
1431 return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract source path: %s", rvalue);
1432 if (r == 0) {
1433 log_syntax(unit, LOG_WARNING, filename, line, 0, "No argument specified: %s", rvalue);
1434 return 0;
1435 }
1436
1437 r = extract_first_word(&p, &buffer, ":", EXTRACT_CUNESCAPE|EXTRACT_DONT_COALESCE_SEPARATORS);
1438 if (r < 0)
1439 return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract target path: %s", rvalue);
1440 if (r == 0)
1441 target = source; /* No target, then it's the same as the source */
1442 else
1443 target = buffer;
1444
1445 if (!isempty(p))
1446 return log_syntax(unit, LOG_ERR, filename, line, SYNTHETIC_ERRNO(EINVAL), "Too many arguments: %s", rvalue);
1447
1448 r = specifier_printf(source, PATH_MAX-1, system_and_tmp_specifier_table, arg_root, NULL, &resolved_source);
1449 if (r < 0) {
1450 log_syntax(unit, LOG_WARNING, filename, line, r,
1451 "Failed to expand specifiers in CopyFiles= source, ignoring: %s", rvalue);
1452 return 0;
1453 }
1454
1455 r = path_simplify_and_warn(resolved_source, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
1456 if (r < 0)
1457 return 0;
1458
1459 r = specifier_printf(target, PATH_MAX-1, system_and_tmp_specifier_table, arg_root, NULL, &resolved_target);
1460 if (r < 0) {
1461 log_syntax(unit, LOG_WARNING, filename, line, r,
1462 "Failed to expand specifiers in CopyFiles= target, ignoring: %s", resolved_target);
1463 return 0;
1464 }
1465
1466 r = path_simplify_and_warn(resolved_target, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
1467 if (r < 0)
1468 return 0;
1469
1470 r = strv_consume_pair(copy_files, TAKE_PTR(resolved_source), TAKE_PTR(resolved_target));
1471 if (r < 0)
1472 return log_oom();
1473
1474 return 0;
1475 }
1476
1477 static int config_parse_exclude_files(
1478 const char *unit,
1479 const char *filename,
1480 unsigned line,
1481 const char *section,
1482 unsigned section_line,
1483 const char *lvalue,
1484 int ltype,
1485 const char *rvalue,
1486 void *data,
1487 void *userdata) {
1488 _cleanup_free_ char *resolved = NULL;
1489 char ***exclude_files = ASSERT_PTR(data);
1490 int r;
1491
1492 if (isempty(rvalue)) {
1493 *exclude_files = strv_free(*exclude_files);
1494 return 0;
1495 }
1496
1497 r = specifier_printf(rvalue, PATH_MAX-1, system_and_tmp_specifier_table, arg_root, NULL, &resolved);
1498 if (r < 0) {
1499 log_syntax(unit, LOG_WARNING, filename, line, r,
1500 "Failed to expand specifiers in ExcludeFiles= path, ignoring: %s", rvalue);
1501 return 0;
1502 }
1503
1504 r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE|PATH_KEEP_TRAILING_SLASH, unit, filename, line, lvalue);
1505 if (r < 0)
1506 return 0;
1507
1508 if (strv_consume(exclude_files, TAKE_PTR(resolved)) < 0)
1509 return log_oom();
1510
1511 return 0;
1512 }
1513
1514 static int config_parse_copy_blocks(
1515 const char *unit,
1516 const char *filename,
1517 unsigned line,
1518 const char *section,
1519 unsigned section_line,
1520 const char *lvalue,
1521 int ltype,
1522 const char *rvalue,
1523 void *data,
1524 void *userdata) {
1525
1526 _cleanup_free_ char *d = NULL;
1527 Partition *partition = ASSERT_PTR(data);
1528 int r;
1529
1530 assert(rvalue);
1531
1532 if (isempty(rvalue)) {
1533 partition->copy_blocks_path = mfree(partition->copy_blocks_path);
1534 partition->copy_blocks_auto = false;
1535 return 0;
1536 }
1537
1538 if (streq(rvalue, "auto")) {
1539 partition->copy_blocks_path = mfree(partition->copy_blocks_path);
1540 partition->copy_blocks_auto = true;
1541 partition->copy_blocks_root = arg_root;
1542 return 0;
1543 }
1544
1545 r = specifier_printf(rvalue, PATH_MAX-1, system_and_tmp_specifier_table, arg_root, NULL, &d);
1546 if (r < 0) {
1547 log_syntax(unit, LOG_WARNING, filename, line, r,
1548 "Failed to expand specifiers in CopyBlocks= source path, ignoring: %s", rvalue);
1549 return 0;
1550 }
1551
1552 r = path_simplify_and_warn(d, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
1553 if (r < 0)
1554 return 0;
1555
1556 free_and_replace(partition->copy_blocks_path, d);
1557 partition->copy_blocks_auto = false;
1558 partition->copy_blocks_root = arg_root;
1559 return 0;
1560 }
1561
1562 static int config_parse_make_dirs(
1563 const char *unit,
1564 const char *filename,
1565 unsigned line,
1566 const char *section,
1567 unsigned section_line,
1568 const char *lvalue,
1569 int ltype,
1570 const char *rvalue,
1571 void *data,
1572 void *userdata) {
1573
1574 char ***sv = ASSERT_PTR(data);
1575 const char *p = ASSERT_PTR(rvalue);
1576 int r;
1577
1578 for (;;) {
1579 _cleanup_free_ char *word = NULL, *d = NULL;
1580
1581 r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
1582 if (r == -ENOMEM)
1583 return log_oom();
1584 if (r < 0) {
1585 log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
1586 return 0;
1587 }
1588 if (r == 0)
1589 return 0;
1590
1591 r = specifier_printf(word, PATH_MAX-1, system_and_tmp_specifier_table, arg_root, NULL, &d);
1592 if (r < 0) {
1593 log_syntax(unit, LOG_WARNING, filename, line, r,
1594 "Failed to expand specifiers in MakeDirectories= parameter, ignoring: %s", word);
1595 continue;
1596 }
1597
1598 r = path_simplify_and_warn(d, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
1599 if (r < 0)
1600 continue;
1601
1602 r = strv_consume(sv, TAKE_PTR(d));
1603 if (r < 0)
1604 return log_oom();
1605 }
1606 }
1607
1608 static DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_encrypt, encrypt_mode, EncryptMode, ENCRYPT_OFF, "Invalid encryption mode");
1609
1610 static int config_parse_gpt_flags(
1611 const char *unit,
1612 const char *filename,
1613 unsigned line,
1614 const char *section,
1615 unsigned section_line,
1616 const char *lvalue,
1617 int ltype,
1618 const char *rvalue,
1619 void *data,
1620 void *userdata) {
1621
1622 uint64_t *gpt_flags = ASSERT_PTR(data);
1623 int r;
1624
1625 assert(rvalue);
1626
1627 r = safe_atou64(rvalue, gpt_flags);
1628 if (r < 0) {
1629 log_syntax(unit, LOG_WARNING, filename, line, r,
1630 "Failed to parse Flags= value, ignoring: %s", rvalue);
1631 return 0;
1632 }
1633
1634 return 0;
1635 }
1636
1637 static int config_parse_uuid(
1638 const char *unit,
1639 const char *filename,
1640 unsigned line,
1641 const char *section,
1642 unsigned section_line,
1643 const char *lvalue,
1644 int ltype,
1645 const char *rvalue,
1646 void *data,
1647 void *userdata) {
1648
1649 Partition *partition = ASSERT_PTR(data);
1650 int r;
1651
1652 if (isempty(rvalue)) {
1653 partition->new_uuid = SD_ID128_NULL;
1654 partition->new_uuid_is_set = false;
1655 return 0;
1656 }
1657
1658 if (streq(rvalue, "null")) {
1659 partition->new_uuid = SD_ID128_NULL;
1660 partition->new_uuid_is_set = true;
1661 return 0;
1662 }
1663
1664 r = sd_id128_from_string(rvalue, &partition->new_uuid);
1665 if (r < 0) {
1666 log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse 128-bit ID/UUID, ignoring: %s", rvalue);
1667 return 0;
1668 }
1669
1670 partition->new_uuid_is_set = true;
1671
1672 return 0;
1673 }
1674
1675 static DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_verity, verity_mode, VerityMode, VERITY_OFF, "Invalid verity mode");
1676 static DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_minimize, minimize_mode, MinimizeMode, MINIMIZE_OFF, "Invalid minimize mode");
1677
1678 static int partition_read_definition(Partition *p, const char *path, const char *const *conf_file_dirs) {
1679
1680 ConfigTableItem table[] = {
1681 { "Partition", "Type", config_parse_type, 0, &p->type },
1682 { "Partition", "Label", config_parse_label, 0, &p->new_label },
1683 { "Partition", "UUID", config_parse_uuid, 0, p },
1684 { "Partition", "Priority", config_parse_int32, 0, &p->priority },
1685 { "Partition", "Weight", config_parse_weight, 0, &p->weight },
1686 { "Partition", "PaddingWeight", config_parse_weight, 0, &p->padding_weight },
1687 { "Partition", "SizeMinBytes", config_parse_size4096, 1, &p->size_min },
1688 { "Partition", "SizeMaxBytes", config_parse_size4096, -1, &p->size_max },
1689 { "Partition", "PaddingMinBytes", config_parse_size4096, 1, &p->padding_min },
1690 { "Partition", "PaddingMaxBytes", config_parse_size4096, -1, &p->padding_max },
1691 { "Partition", "FactoryReset", config_parse_bool, 0, &p->factory_reset },
1692 { "Partition", "CopyBlocks", config_parse_copy_blocks, 0, p },
1693 { "Partition", "Format", config_parse_fstype, 0, &p->format },
1694 { "Partition", "CopyFiles", config_parse_copy_files, 0, &p->copy_files },
1695 { "Partition", "ExcludeFiles", config_parse_exclude_files, 0, &p->exclude_files_source },
1696 { "Partition", "ExcludeFilesTarget", config_parse_exclude_files, 0, &p->exclude_files_target },
1697 { "Partition", "MakeDirectories", config_parse_make_dirs, 0, &p->make_directories },
1698 { "Partition", "Encrypt", config_parse_encrypt, 0, &p->encrypt },
1699 { "Partition", "Verity", config_parse_verity, 0, &p->verity },
1700 { "Partition", "VerityMatchKey", config_parse_string, 0, &p->verity_match_key },
1701 { "Partition", "Flags", config_parse_gpt_flags, 0, &p->gpt_flags },
1702 { "Partition", "ReadOnly", config_parse_tristate, 0, &p->read_only },
1703 { "Partition", "NoAuto", config_parse_tristate, 0, &p->no_auto },
1704 { "Partition", "GrowFileSystem", config_parse_tristate, 0, &p->growfs },
1705 { "Partition", "SplitName", config_parse_string, 0, &p->split_name_format },
1706 { "Partition", "Minimize", config_parse_minimize, 0, &p->minimize },
1707 { "Partition", "Subvolumes", config_parse_make_dirs, 0, &p->subvolumes },
1708 { "Partition", "VerityDataBlockSizeBytes", config_parse_block_size, 0, &p->verity_data_block_size },
1709 { "Partition", "VerityHashBlockSizeBytes", config_parse_block_size, 0, &p->verity_hash_block_size },
1710 {}
1711 };
1712 int r;
1713 _cleanup_free_ char *filename = NULL;
1714 const char* dropin_dirname;
1715
1716 r = path_extract_filename(path, &filename);
1717 if (r < 0)
1718 return log_error_errno(r, "Failed to extract filename from path '%s': %m", path);
1719
1720 dropin_dirname = strjoina(filename, ".d");
1721
1722 r = config_parse_many(
1723 STRV_MAKE_CONST(path),
1724 conf_file_dirs,
1725 dropin_dirname,
1726 arg_definitions ? NULL : arg_root,
1727 "Partition\0",
1728 config_item_table_lookup, table,
1729 CONFIG_PARSE_WARN,
1730 p,
1731 NULL,
1732 &p->drop_in_files);
1733 if (r < 0)
1734 return r;
1735
1736 if (partition_type_exclude(&p->type))
1737 return 0;
1738
1739 if (p->size_min != UINT64_MAX && p->size_max != UINT64_MAX && p->size_min > p->size_max)
1740 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1741 "SizeMinBytes= larger than SizeMaxBytes=, refusing.");
1742
1743 if (p->padding_min != UINT64_MAX && p->padding_max != UINT64_MAX && p->padding_min > p->padding_max)
1744 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1745 "PaddingMinBytes= larger than PaddingMaxBytes=, refusing.");
1746
1747 if (sd_id128_is_null(p->type.uuid))
1748 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1749 "Type= not defined, refusing.");
1750
1751 if ((p->copy_blocks_path || p->copy_blocks_auto) &&
1752 (p->format || !strv_isempty(p->copy_files) || !strv_isempty(p->make_directories)))
1753 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1754 "Format=/CopyFiles=/MakeDirectories= and CopyBlocks= cannot be combined, refusing.");
1755
1756 if ((!strv_isempty(p->copy_files) || !strv_isempty(p->make_directories)) && streq_ptr(p->format, "swap"))
1757 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1758 "Format=swap and CopyFiles= cannot be combined, refusing.");
1759
1760 if (!p->format) {
1761 const char *format = NULL;
1762
1763 if (!strv_isempty(p->copy_files) || !strv_isempty(p->make_directories) || (p->encrypt != ENCRYPT_OFF && !(p->copy_blocks_path || p->copy_blocks_auto)))
1764 /* Pick "vfat" as file system for esp and xbootldr partitions, otherwise default to "ext4". */
1765 format = IN_SET(p->type.designator, PARTITION_ESP, PARTITION_XBOOTLDR) ? "vfat" : "ext4";
1766 else if (p->type.designator == PARTITION_SWAP)
1767 format = "swap";
1768
1769 if (format) {
1770 p->format = strdup(format);
1771 if (!p->format)
1772 return log_oom();
1773 }
1774 }
1775
1776 if (p->minimize != MINIMIZE_OFF && !p->format && p->verity != VERITY_HASH)
1777 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1778 "Minimize= can only be enabled if Format= or Verity=hash are set");
1779
1780 if (p->minimize == MINIMIZE_BEST && (p->format && !fstype_is_ro(p->format)) && p->verity != VERITY_HASH)
1781 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1782 "Minimize=best can only be used with read-only filesystems or Verity=hash");
1783
1784 if ((!strv_isempty(p->copy_files) || !strv_isempty(p->make_directories)) && !mkfs_supports_root_option(p->format) && geteuid() != 0)
1785 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EPERM),
1786 "Need to be root to populate %s filesystems with CopyFiles=/MakeDirectories=",
1787 p->format);
1788
1789 if (p->format && fstype_is_ro(p->format) && strv_isempty(p->copy_files) && strv_isempty(p->make_directories))
1790 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1791 "Cannot format %s filesystem without source files, refusing", p->format);
1792
1793 if (p->verity != VERITY_OFF || p->encrypt != ENCRYPT_OFF) {
1794 r = dlopen_cryptsetup();
1795 if (r < 0)
1796 return log_syntax(NULL, LOG_ERR, path, 1, r,
1797 "libcryptsetup not found, Verity=/Encrypt= are not supported: %m");
1798 }
1799
1800 if (p->verity != VERITY_OFF && !p->verity_match_key)
1801 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1802 "VerityMatchKey= must be set if Verity=%s", verity_mode_to_string(p->verity));
1803
1804 if (p->verity == VERITY_OFF && p->verity_match_key)
1805 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1806 "VerityMatchKey= can only be set if Verity= is not \"%s\"",
1807 verity_mode_to_string(p->verity));
1808
1809 if (IN_SET(p->verity, VERITY_HASH, VERITY_SIG) &&
1810 (p->copy_files || p->copy_blocks_path || p->copy_blocks_auto || p->format || p->make_directories))
1811 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1812 "CopyBlocks=/CopyFiles=/Format=/MakeDirectories= cannot be used with Verity=%s",
1813 verity_mode_to_string(p->verity));
1814
1815 if (p->verity != VERITY_OFF && p->encrypt != ENCRYPT_OFF)
1816 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1817 "Encrypting verity hash/data partitions is not supported");
1818
1819 if (p->verity == VERITY_SIG && !arg_private_key)
1820 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1821 "Verity signature partition requested but no private key provided (--private-key=)");
1822
1823 if (p->verity == VERITY_SIG && !arg_certificate)
1824 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1825 "Verity signature partition requested but no PEM certificate provided (--certificate=)");
1826
1827 if (p->verity == VERITY_SIG && (p->size_min != UINT64_MAX || p->size_max != UINT64_MAX))
1828 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1829 "SizeMinBytes=/SizeMaxBytes= cannot be used with Verity=%s",
1830 verity_mode_to_string(p->verity));
1831
1832 if (!strv_isempty(p->subvolumes) && arg_offline > 0)
1833 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EOPNOTSUPP),
1834 "Subvolumes= cannot be used with --offline=yes");
1835
1836 /* Verity partitions are read only, let's imply the RO flag hence, unless explicitly configured otherwise. */
1837 if ((IN_SET(p->type.designator,
1838 PARTITION_ROOT_VERITY,
1839 PARTITION_USR_VERITY) || p->verity == VERITY_DATA) && p->read_only < 0)
1840 p->read_only = true;
1841
1842 /* Default to "growfs" on, unless read-only */
1843 if (gpt_partition_type_knows_growfs(p->type) &&
1844 p->read_only <= 0)
1845 p->growfs = true;
1846
1847 if (!p->split_name_format) {
1848 char *s = strdup("%t");
1849 if (!s)
1850 return log_oom();
1851
1852 p->split_name_format = s;
1853 } else if (streq(p->split_name_format, "-"))
1854 p->split_name_format = mfree(p->split_name_format);
1855
1856 return 1;
1857 }
1858
1859 static int find_verity_sibling(Context *context, Partition *p, VerityMode mode, Partition **ret) {
1860 Partition *s = NULL;
1861
1862 assert(p);
1863 assert(p->verity != VERITY_OFF);
1864 assert(p->verity_match_key);
1865 assert(mode != VERITY_OFF);
1866 assert(p->verity != mode);
1867 assert(ret);
1868
1869 /* Try to find the matching sibling partition of the given type for a verity partition. For a data
1870 * partition, this is the corresponding hash partition with the same verity name (and vice versa for
1871 * the hash partition). */
1872
1873 LIST_FOREACH(partitions, q, context->partitions) {
1874 if (p == q)
1875 continue;
1876
1877 if (q->verity != mode)
1878 continue;
1879
1880 assert(q->verity_match_key);
1881
1882 if (!streq(p->verity_match_key, q->verity_match_key))
1883 continue;
1884
1885 if (s)
1886 return -ENOTUNIQ;
1887
1888 s = q;
1889 }
1890
1891 if (!s)
1892 return -ENXIO;
1893
1894 *ret = s;
1895
1896 return 0;
1897 }
1898
1899 static int context_open_and_lock_backing_fd(const char *node, int operation, int *backing_fd) {
1900 _cleanup_close_ int fd = -EBADF;
1901
1902 assert(node);
1903 assert(backing_fd);
1904
1905 if (*backing_fd >= 0)
1906 return 0;
1907
1908 fd = open(node, O_RDONLY|O_CLOEXEC);
1909 if (fd < 0)
1910 return log_error_errno(errno, "Failed to open device '%s': %m", node);
1911
1912 /* Tell udev not to interfere while we are processing the device */
1913 if (flock(fd, operation) < 0)
1914 return log_error_errno(errno, "Failed to lock device '%s': %m", node);
1915
1916 log_debug("Device %s opened and locked.", node);
1917 *backing_fd = TAKE_FD(fd);
1918 return 1;
1919 }
1920
1921 static int determine_current_padding(
1922 struct fdisk_context *c,
1923 struct fdisk_table *t,
1924 struct fdisk_partition *p,
1925 uint64_t secsz,
1926 uint64_t grainsz,
1927 uint64_t *ret) {
1928
1929 size_t n_partitions;
1930 uint64_t offset, next = UINT64_MAX;
1931
1932 assert(c);
1933 assert(t);
1934 assert(p);
1935 assert(ret);
1936
1937 if (!fdisk_partition_has_end(p))
1938 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Partition has no end!");
1939
1940 offset = fdisk_partition_get_end(p);
1941 assert(offset < UINT64_MAX);
1942 offset++; /* The end is one sector before the next partition or padding. */
1943 assert(offset < UINT64_MAX / secsz);
1944 offset *= secsz;
1945
1946 n_partitions = fdisk_table_get_nents(t);
1947 for (size_t i = 0; i < n_partitions; i++) {
1948 struct fdisk_partition *q;
1949 uint64_t start;
1950
1951 q = fdisk_table_get_partition(t, i);
1952 if (!q)
1953 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read partition metadata: %m");
1954
1955 if (fdisk_partition_is_used(q) <= 0)
1956 continue;
1957
1958 if (!fdisk_partition_has_start(q))
1959 continue;
1960
1961 start = fdisk_partition_get_start(q);
1962 assert(start < UINT64_MAX / secsz);
1963 start *= secsz;
1964
1965 if (start >= offset && (next == UINT64_MAX || next > start))
1966 next = start;
1967 }
1968
1969 if (next == UINT64_MAX) {
1970 /* No later partition? In that case check the end of the usable area */
1971 next = fdisk_get_last_lba(c);
1972 assert(next < UINT64_MAX);
1973 next++; /* The last LBA is one sector before the end */
1974
1975 assert(next < UINT64_MAX / secsz);
1976 next *= secsz;
1977
1978 if (offset > next)
1979 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Partition end beyond disk end.");
1980 }
1981
1982 assert(next >= offset);
1983 offset = round_up_size(offset, grainsz);
1984 next = round_down_size(next, grainsz);
1985
1986 *ret = LESS_BY(next, offset); /* Saturated subtraction, rounding might have fucked things up */
1987 return 0;
1988 }
1989
1990 static int context_copy_from_one(Context *context, const char *src) {
1991 _cleanup_close_ int fd = -EBADF;
1992 _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
1993 _cleanup_(fdisk_unref_tablep) struct fdisk_table *t = NULL;
1994 Partition *last = NULL;
1995 unsigned long secsz, grainsz;
1996 size_t n_partitions;
1997 int r;
1998
1999 assert(src);
2000
2001 r = context_open_and_lock_backing_fd(src, LOCK_SH, &fd);
2002 if (r < 0)
2003 return r;
2004
2005 r = fd_verify_regular(fd);
2006 if (r < 0)
2007 return log_error_errno(r, "%s is not a file: %m", src);
2008
2009 r = fdisk_new_context_at(fd, /* path = */ NULL, /* read_only = */ true, /* sector_size = */ UINT32_MAX, &c);
2010 if (r < 0)
2011 return log_error_errno(r, "Failed to create fdisk context: %m");
2012
2013 secsz = fdisk_get_sector_size(c);
2014 grainsz = fdisk_get_grain_size(c);
2015
2016 /* Insist on a power of two, and that it's a multiple of 512, i.e. the traditional sector size. */
2017 if (secsz < 512 || !ISPOWEROF2(secsz))
2018 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Sector size %lu is not a power of two larger than 512? Refusing.", secsz);
2019
2020 if (!fdisk_is_labeltype(c, FDISK_DISKLABEL_GPT))
2021 return log_error_errno(SYNTHETIC_ERRNO(EHWPOISON), "Cannot copy from disk %s with no GPT disk label.", src);
2022
2023 r = fdisk_get_partitions(c, &t);
2024 if (r < 0)
2025 return log_error_errno(r, "Failed to acquire partition table: %m");
2026
2027 n_partitions = fdisk_table_get_nents(t);
2028 for (size_t i = 0; i < n_partitions; i++) {
2029 _cleanup_(partition_freep) Partition *np = NULL;
2030 _cleanup_free_ char *label_copy = NULL;
2031 struct fdisk_partition *p;
2032 const char *label;
2033 uint64_t sz, start, padding;
2034 sd_id128_t ptid, id;
2035 GptPartitionType type;
2036
2037 p = fdisk_table_get_partition(t, i);
2038 if (!p)
2039 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read partition metadata: %m");
2040
2041 if (fdisk_partition_is_used(p) <= 0)
2042 continue;
2043
2044 if (fdisk_partition_has_start(p) <= 0 ||
2045 fdisk_partition_has_size(p) <= 0 ||
2046 fdisk_partition_has_partno(p) <= 0)
2047 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Found a partition without a position, size or number.");
2048
2049 r = fdisk_partition_get_type_as_id128(p, &ptid);
2050 if (r < 0)
2051 return log_error_errno(r, "Failed to query partition type UUID: %m");
2052
2053 type = gpt_partition_type_from_uuid(ptid);
2054
2055 r = fdisk_partition_get_uuid_as_id128(p, &id);
2056 if (r < 0)
2057 return log_error_errno(r, "Failed to query partition UUID: %m");
2058
2059 label = fdisk_partition_get_name(p);
2060 if (!isempty(label)) {
2061 label_copy = strdup(label);
2062 if (!label_copy)
2063 return log_oom();
2064 }
2065
2066 sz = fdisk_partition_get_size(p);
2067 assert(sz <= UINT64_MAX/secsz);
2068 sz *= secsz;
2069
2070 start = fdisk_partition_get_start(p);
2071 assert(start <= UINT64_MAX/secsz);
2072 start *= secsz;
2073
2074 if (partition_type_exclude(&type))
2075 continue;
2076
2077 np = partition_new();
2078 if (!np)
2079 return log_oom();
2080
2081 np->type = type;
2082 np->new_uuid = id;
2083 np->new_uuid_is_set = true;
2084 np->size_min = np->size_max = sz;
2085 np->new_label = TAKE_PTR(label_copy);
2086
2087 np->definition_path = strdup(src);
2088 if (!np->definition_path)
2089 return log_oom();
2090
2091 r = determine_current_padding(c, t, p, secsz, grainsz, &padding);
2092 if (r < 0)
2093 return r;
2094
2095 np->padding_min = np->padding_max = padding;
2096
2097 np->copy_blocks_path = strdup(src);
2098 if (!np->copy_blocks_path)
2099 return log_oom();
2100
2101 np->copy_blocks_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
2102 if (np->copy_blocks_fd < 0)
2103 return log_error_errno(r, "Failed to duplicate file descriptor of %s: %m", src);
2104
2105 np->copy_blocks_offset = start;
2106 np->copy_blocks_size = sz;
2107
2108 r = fdisk_partition_get_attrs_as_uint64(p, &np->gpt_flags);
2109 if (r < 0)
2110 return log_error_errno(r, "Failed to get partition flags: %m");
2111
2112 LIST_INSERT_AFTER(partitions, context->partitions, last, np);
2113 last = TAKE_PTR(np);
2114 context->n_partitions++;
2115 }
2116
2117 return 0;
2118 }
2119
2120 static int context_copy_from(Context *context) {
2121 int r;
2122
2123 assert(context);
2124
2125 STRV_FOREACH(src, arg_copy_from) {
2126 r = context_copy_from_one(context, *src);
2127 if (r < 0)
2128 return r;
2129 }
2130
2131 return 0;
2132 }
2133
2134 static int context_read_definitions(Context *context) {
2135 _cleanup_strv_free_ char **files = NULL;
2136 Partition *last = LIST_FIND_TAIL(partitions, context->partitions);
2137 const char *const *dirs;
2138 int r;
2139
2140 assert(context);
2141
2142 dirs = (const char* const*) (arg_definitions ?: CONF_PATHS_STRV("repart.d"));
2143
2144 r = conf_files_list_strv(&files, ".conf", arg_definitions ? NULL : arg_root, CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED, dirs);
2145 if (r < 0)
2146 return log_error_errno(r, "Failed to enumerate *.conf files: %m");
2147
2148 STRV_FOREACH(f, files) {
2149 _cleanup_(partition_freep) Partition *p = NULL;
2150
2151 p = partition_new();
2152 if (!p)
2153 return log_oom();
2154
2155 p->definition_path = strdup(*f);
2156 if (!p->definition_path)
2157 return log_oom();
2158
2159 r = partition_read_definition(p, *f, dirs);
2160 if (r < 0)
2161 return r;
2162 if (r == 0)
2163 continue;
2164
2165 LIST_INSERT_AFTER(partitions, context->partitions, last, p);
2166 last = TAKE_PTR(p);
2167 context->n_partitions++;
2168 }
2169
2170 /* Check that each configured verity hash/data partition has a matching verity data/hash partition. */
2171
2172 LIST_FOREACH(partitions, p, context->partitions) {
2173 if (p->verity == VERITY_OFF)
2174 continue;
2175
2176 for (VerityMode mode = VERITY_OFF + 1; mode < _VERITY_MODE_MAX; mode++) {
2177 Partition *q = NULL;
2178
2179 if (p->verity == mode)
2180 continue;
2181
2182 if (p->siblings[mode])
2183 continue;
2184
2185 r = find_verity_sibling(context, p, mode, &q);
2186 if (r == -ENXIO) {
2187 if (mode != VERITY_SIG)
2188 return log_syntax(NULL, LOG_ERR, p->definition_path, 1, SYNTHETIC_ERRNO(EINVAL),
2189 "Missing verity %s partition for verity %s partition with VerityMatchKey=%s",
2190 verity_mode_to_string(mode), verity_mode_to_string(p->verity), p->verity_match_key);
2191 } else if (r == -ENOTUNIQ)
2192 return log_syntax(NULL, LOG_ERR, p->definition_path, 1, SYNTHETIC_ERRNO(EINVAL),
2193 "Multiple verity %s partitions found for verity %s partition with VerityMatchKey=%s",
2194 verity_mode_to_string(mode), verity_mode_to_string(p->verity), p->verity_match_key);
2195 else if (r < 0)
2196 return log_syntax(NULL, LOG_ERR, p->definition_path, 1, r,
2197 "Failed to find verity %s partition for verity %s partition with VerityMatchKey=%s",
2198 verity_mode_to_string(mode), verity_mode_to_string(p->verity), p->verity_match_key);
2199
2200 if (q) {
2201 if (q->priority != p->priority)
2202 return log_syntax(NULL, LOG_ERR, p->definition_path, 1, SYNTHETIC_ERRNO(EINVAL),
2203 "Priority mismatch (%i != %i) for verity sibling partitions with VerityMatchKey=%s",
2204 p->priority, q->priority, p->verity_match_key);
2205
2206 p->siblings[mode] = q;
2207 }
2208 }
2209 }
2210
2211 LIST_FOREACH(partitions, p, context->partitions) {
2212 Partition *dp;
2213
2214 if (p->verity != VERITY_HASH)
2215 continue;
2216
2217 if (p->minimize == MINIMIZE_OFF)
2218 continue;
2219
2220 assert_se(dp = p->siblings[VERITY_DATA]);
2221
2222 if (dp->minimize == MINIMIZE_OFF && !(dp->copy_blocks_path || dp->copy_blocks_auto))
2223 return log_syntax(NULL, LOG_ERR, p->definition_path, 1, SYNTHETIC_ERRNO(EINVAL),
2224 "Minimize= set for verity hash partition but data partition does "
2225 "not set CopyBlocks= or Minimize=");
2226
2227 }
2228
2229 return 0;
2230 }
2231
2232 static int fdisk_ask_cb(struct fdisk_context *c, struct fdisk_ask *ask, void *data) {
2233 _cleanup_free_ char *ids = NULL;
2234 int r;
2235
2236 if (fdisk_ask_get_type(ask) != FDISK_ASKTYPE_STRING)
2237 return -EINVAL;
2238
2239 ids = new(char, SD_ID128_UUID_STRING_MAX);
2240 if (!ids)
2241 return -ENOMEM;
2242
2243 r = fdisk_ask_string_set_result(ask, sd_id128_to_uuid_string(*(sd_id128_t*) data, ids));
2244 if (r < 0)
2245 return r;
2246
2247 TAKE_PTR(ids);
2248 return 0;
2249 }
2250
2251 static int fdisk_set_disklabel_id_by_uuid(struct fdisk_context *c, sd_id128_t id) {
2252 int r;
2253
2254 r = fdisk_set_ask(c, fdisk_ask_cb, &id);
2255 if (r < 0)
2256 return r;
2257
2258 r = fdisk_set_disklabel_id(c);
2259 if (r < 0)
2260 return r;
2261
2262 return fdisk_set_ask(c, NULL, NULL);
2263 }
2264
2265 static int derive_uuid(sd_id128_t base, const char *token, sd_id128_t *ret) {
2266 union {
2267 uint8_t md[SHA256_DIGEST_SIZE];
2268 sd_id128_t id;
2269 } result;
2270
2271 assert(token);
2272 assert(ret);
2273
2274 /* Derive a new UUID from the specified UUID in a stable and reasonably safe way. Specifically, we
2275 * calculate the HMAC-SHA256 of the specified token string, keyed by the supplied base (typically the
2276 * machine ID). We use the machine ID as key (and not as cleartext!) of the HMAC operation since it's
2277 * the machine ID we don't want to leak. */
2278
2279 hmac_sha256(base.bytes, sizeof(base.bytes), token, strlen(token), result.md);
2280
2281 /* Take the first half, mark it as v4 UUID */
2282 assert_cc(sizeof(result.md) == sizeof(result.id) * 2);
2283 *ret = id128_make_v4_uuid(result.id);
2284 return 0;
2285 }
2286
2287 static void derive_salt(sd_id128_t base, const char *token, uint8_t ret[static SHA256_DIGEST_SIZE]) {
2288 assert(token);
2289
2290 hmac_sha256(base.bytes, sizeof(base.bytes), token, strlen(token), ret);
2291 }
2292
2293 static int context_load_partition_table(Context *context) {
2294 _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
2295 _cleanup_(fdisk_unref_tablep) struct fdisk_table *t = NULL;
2296 uint64_t left_boundary = UINT64_MAX, first_lba, last_lba, nsectors;
2297 _cleanup_free_ char *disk_uuid_string = NULL;
2298 bool from_scratch = false;
2299 sd_id128_t disk_uuid;
2300 size_t n_partitions;
2301 unsigned long secsz;
2302 uint64_t grainsz, fs_secsz = DEFAULT_FILESYSTEM_SECTOR_SIZE;
2303 int r;
2304
2305 assert(context);
2306 assert(!context->fdisk_context);
2307 assert(!context->free_areas);
2308 assert(context->start == UINT64_MAX);
2309 assert(context->end == UINT64_MAX);
2310 assert(context->total == UINT64_MAX);
2311
2312 c = fdisk_new_context();
2313 if (!c)
2314 return log_oom();
2315
2316 if (arg_sector_size > 0) {
2317 fs_secsz = arg_sector_size;
2318 r = fdisk_save_user_sector_size(c, /* phy= */ 0, arg_sector_size);
2319 } else {
2320 uint32_t ssz;
2321 struct stat st;
2322
2323 r = context_open_and_lock_backing_fd(context->node, arg_dry_run ? LOCK_SH : LOCK_EX,
2324 &context->backing_fd);
2325 if (r < 0)
2326 return r;
2327
2328 if (fstat(context->backing_fd, &st) < 0)
2329 return log_error_errno(r, "Failed to stat %s: %m", context->node);
2330
2331 /* Auto-detect sector size if not specified. */
2332 r = probe_sector_size_prefer_ioctl(context->backing_fd, &ssz);
2333 if (r < 0)
2334 return log_error_errno(r, "Failed to probe sector size of '%s': %m", context->node);
2335
2336 /* If we found the sector size and we're operating on a block device, use it as the file
2337 * system sector size as well, as we know its the sector size of the actual block device and
2338 * not just the offset at which we found the GPT header. */
2339 if (r > 0 && S_ISBLK(st.st_mode))
2340 fs_secsz = ssz;
2341
2342 r = fdisk_save_user_sector_size(c, /* phy= */ 0, ssz);
2343 }
2344 if (r < 0)
2345 return log_error_errno(r, "Failed to set sector size: %m");
2346
2347 /* libfdisk doesn't have an API to operate on arbitrary fds, hence reopen the fd going via the
2348 * /proc/self/fd/ magic path if we have an existing fd. Open the original file otherwise. */
2349 r = fdisk_assign_device(
2350 c,
2351 context->backing_fd >= 0 ? FORMAT_PROC_FD_PATH(context->backing_fd) : context->node,
2352 arg_dry_run);
2353 if (r == -EINVAL && arg_size_auto) {
2354 struct stat st;
2355
2356 /* libfdisk returns EINVAL if opening a file of size zero. Let's check for that, and accept
2357 * it if automatic sizing is requested. */
2358
2359 if (context->backing_fd < 0)
2360 r = stat(context->node, &st);
2361 else
2362 r = fstat(context->backing_fd, &st);
2363 if (r < 0)
2364 return log_error_errno(errno, "Failed to stat block device '%s': %m", context->node);
2365
2366 if (S_ISREG(st.st_mode) && st.st_size == 0) {
2367 /* Use the fallback values if we have no better idea */
2368 context->sector_size = fdisk_get_sector_size(c);
2369 context->fs_sector_size = fs_secsz;
2370 context->grain_size = 4096;
2371 return /* from_scratch = */ true;
2372 }
2373
2374 r = -EINVAL;
2375 }
2376 if (r < 0)
2377 return log_error_errno(r, "Failed to open device '%s': %m", context->node);
2378
2379 if (context->backing_fd < 0) {
2380 /* If we have no fd referencing the device yet, make a copy of the fd now, so that we have one */
2381 r = context_open_and_lock_backing_fd(FORMAT_PROC_FD_PATH(fdisk_get_devfd(c)),
2382 arg_dry_run ? LOCK_SH : LOCK_EX,
2383 &context->backing_fd);
2384 if (r < 0)
2385 return r;
2386 }
2387
2388 /* The offsets/sizes libfdisk returns to us will be in multiple of the sector size of the
2389 * device. This is typically 512, and sometimes 4096. Let's query libfdisk once for it, and then use
2390 * it for all our needs. Note that the values we use ourselves always are in bytes though, thus mean
2391 * the same thing universally. Also note that regardless what kind of sector size is in use we'll
2392 * place partitions at multiples of 4K. */
2393 secsz = fdisk_get_sector_size(c);
2394
2395 /* Insist on a power of two, and that it's a multiple of 512, i.e. the traditional sector size. */
2396 if (secsz < 512 || !ISPOWEROF2(secsz))
2397 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Sector size %lu is not a power of two larger than 512? Refusing.", secsz);
2398
2399 /* Use at least 4K, and ensure it's a multiple of the sector size, regardless if that is smaller or
2400 * larger */
2401 grainsz = secsz < 4096 ? 4096 : secsz;
2402
2403 log_debug("Sector size of device is %lu bytes. Using grain size of %" PRIu64 ".", secsz, grainsz);
2404
2405 switch (arg_empty) {
2406
2407 case EMPTY_REFUSE:
2408 /* Refuse empty disks, insist on an existing GPT partition table */
2409 if (!fdisk_is_labeltype(c, FDISK_DISKLABEL_GPT))
2410 return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s has no GPT disk label, not repartitioning.", context->node);
2411
2412 break;
2413
2414 case EMPTY_REQUIRE:
2415 /* Require an empty disk, refuse any existing partition table */
2416 r = fdisk_has_label(c);
2417 if (r < 0)
2418 return log_error_errno(r, "Failed to determine whether disk %s has a disk label: %m", context->node);
2419 if (r > 0)
2420 return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s already has a disk label, refusing.", context->node);
2421
2422 from_scratch = true;
2423 break;
2424
2425 case EMPTY_ALLOW:
2426 /* Allow both an empty disk and an existing partition table, but only GPT */
2427 r = fdisk_has_label(c);
2428 if (r < 0)
2429 return log_error_errno(r, "Failed to determine whether disk %s has a disk label: %m", context->node);
2430 if (r > 0) {
2431 if (!fdisk_is_labeltype(c, FDISK_DISKLABEL_GPT))
2432 return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s has non-GPT disk label, not repartitioning.", context->node);
2433 } else
2434 from_scratch = true;
2435
2436 break;
2437
2438 case EMPTY_FORCE:
2439 case EMPTY_CREATE:
2440 /* Always reinitiaize the disk, don't consider what there was on the disk before */
2441 from_scratch = true;
2442 break;
2443
2444 default:
2445 assert_not_reached();
2446 }
2447
2448 if (from_scratch) {
2449 r = fdisk_create_disklabel(c, "gpt");
2450 if (r < 0)
2451 return log_error_errno(r, "Failed to create GPT disk label: %m");
2452
2453 r = derive_uuid(context->seed, "disk-uuid", &disk_uuid);
2454 if (r < 0)
2455 return log_error_errno(r, "Failed to acquire disk GPT uuid: %m");
2456
2457 r = fdisk_set_disklabel_id_by_uuid(c, disk_uuid);
2458 if (r < 0)
2459 return log_error_errno(r, "Failed to set GPT disk label: %m");
2460
2461 goto add_initial_free_area;
2462 }
2463
2464 r = fdisk_get_disklabel_id(c, &disk_uuid_string);
2465 if (r < 0)
2466 return log_error_errno(r, "Failed to get current GPT disk label UUID: %m");
2467
2468 r = id128_from_string_nonzero(disk_uuid_string, &disk_uuid);
2469 if (r == -ENXIO) {
2470 r = derive_uuid(context->seed, "disk-uuid", &disk_uuid);
2471 if (r < 0)
2472 return log_error_errno(r, "Failed to acquire disk GPT uuid: %m");
2473
2474 r = fdisk_set_disklabel_id(c);
2475 if (r < 0)
2476 return log_error_errno(r, "Failed to set GPT disk label: %m");
2477 } else if (r < 0)
2478 return log_error_errno(r, "Failed to parse current GPT disk label UUID: %m");
2479
2480 r = fdisk_get_partitions(c, &t);
2481 if (r < 0)
2482 return log_error_errno(r, "Failed to acquire partition table: %m");
2483
2484 n_partitions = fdisk_table_get_nents(t);
2485 for (size_t i = 0; i < n_partitions; i++) {
2486 _cleanup_free_ char *label_copy = NULL;
2487 Partition *last = NULL;
2488 struct fdisk_partition *p;
2489 const char *label;
2490 uint64_t sz, start;
2491 bool found = false;
2492 sd_id128_t ptid, id;
2493 size_t partno;
2494
2495 p = fdisk_table_get_partition(t, i);
2496 if (!p)
2497 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read partition metadata: %m");
2498
2499 if (fdisk_partition_is_used(p) <= 0)
2500 continue;
2501
2502 if (fdisk_partition_has_start(p) <= 0 ||
2503 fdisk_partition_has_size(p) <= 0 ||
2504 fdisk_partition_has_partno(p) <= 0)
2505 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Found a partition without a position, size or number.");
2506
2507 r = fdisk_partition_get_type_as_id128(p, &ptid);
2508 if (r < 0)
2509 return log_error_errno(r, "Failed to query partition type UUID: %m");
2510
2511 r = fdisk_partition_get_uuid_as_id128(p, &id);
2512 if (r < 0)
2513 return log_error_errno(r, "Failed to query partition UUID: %m");
2514
2515 label = fdisk_partition_get_name(p);
2516 if (!isempty(label)) {
2517 label_copy = strdup(label);
2518 if (!label_copy)
2519 return log_oom();
2520 }
2521
2522 sz = fdisk_partition_get_size(p);
2523 assert(sz <= UINT64_MAX/secsz);
2524 sz *= secsz;
2525
2526 start = fdisk_partition_get_start(p);
2527 assert(start <= UINT64_MAX/secsz);
2528 start *= secsz;
2529
2530 partno = fdisk_partition_get_partno(p);
2531
2532 if (left_boundary == UINT64_MAX || left_boundary > start)
2533 left_boundary = start;
2534
2535 /* Assign this existing partition to the first partition of the right type that doesn't have
2536 * an existing one assigned yet. */
2537 LIST_FOREACH(partitions, pp, context->partitions) {
2538 last = pp;
2539
2540 if (!sd_id128_equal(pp->type.uuid, ptid))
2541 continue;
2542
2543 if (!pp->current_partition) {
2544 pp->current_uuid = id;
2545 pp->current_size = sz;
2546 pp->offset = start;
2547 pp->partno = partno;
2548 pp->current_label = TAKE_PTR(label_copy);
2549
2550 pp->current_partition = p;
2551 fdisk_ref_partition(p);
2552
2553 r = determine_current_padding(c, t, p, secsz, grainsz, &pp->current_padding);
2554 if (r < 0)
2555 return r;
2556
2557 if (pp->current_padding > 0) {
2558 r = context_add_free_area(context, pp->current_padding, pp);
2559 if (r < 0)
2560 return r;
2561 }
2562
2563 found = true;
2564 break;
2565 }
2566 }
2567
2568 /* If we have no matching definition, create a new one. */
2569 if (!found) {
2570 _cleanup_(partition_freep) Partition *np = NULL;
2571
2572 np = partition_new();
2573 if (!np)
2574 return log_oom();
2575
2576 np->current_uuid = id;
2577 np->type = gpt_partition_type_from_uuid(ptid);
2578 np->current_size = sz;
2579 np->offset = start;
2580 np->partno = partno;
2581 np->current_label = TAKE_PTR(label_copy);
2582
2583 np->current_partition = p;
2584 fdisk_ref_partition(p);
2585
2586 r = determine_current_padding(c, t, p, secsz, grainsz, &np->current_padding);
2587 if (r < 0)
2588 return r;
2589
2590 if (np->current_padding > 0) {
2591 r = context_add_free_area(context, np->current_padding, np);
2592 if (r < 0)
2593 return r;
2594 }
2595
2596 LIST_INSERT_AFTER(partitions, context->partitions, last, TAKE_PTR(np));
2597 context->n_partitions++;
2598 }
2599 }
2600
2601 add_initial_free_area:
2602 nsectors = fdisk_get_nsectors(c);
2603 assert(nsectors <= UINT64_MAX/secsz);
2604 nsectors *= secsz;
2605
2606 first_lba = fdisk_get_first_lba(c);
2607 assert(first_lba <= UINT64_MAX/secsz);
2608 first_lba *= secsz;
2609
2610 last_lba = fdisk_get_last_lba(c);
2611 assert(last_lba < UINT64_MAX);
2612 last_lba++;
2613 assert(last_lba <= UINT64_MAX/secsz);
2614 last_lba *= secsz;
2615
2616 assert(last_lba >= first_lba);
2617
2618 if (left_boundary == UINT64_MAX) {
2619 /* No partitions at all? Then the whole disk is up for grabs. */
2620
2621 first_lba = round_up_size(first_lba, grainsz);
2622 last_lba = round_down_size(last_lba, grainsz);
2623
2624 if (last_lba > first_lba) {
2625 r = context_add_free_area(context, last_lba - first_lba, NULL);
2626 if (r < 0)
2627 return r;
2628 }
2629 } else {
2630 /* Add space left of first partition */
2631 assert(left_boundary >= first_lba);
2632
2633 first_lba = round_up_size(first_lba, grainsz);
2634 left_boundary = round_down_size(left_boundary, grainsz);
2635 last_lba = round_down_size(last_lba, grainsz);
2636
2637 if (left_boundary > first_lba) {
2638 r = context_add_free_area(context, left_boundary - first_lba, NULL);
2639 if (r < 0)
2640 return r;
2641 }
2642 }
2643
2644 context->start = first_lba;
2645 context->end = last_lba;
2646 context->total = nsectors;
2647 context->sector_size = secsz;
2648 context->fs_sector_size = fs_secsz;
2649 context->grain_size = grainsz;
2650 context->fdisk_context = TAKE_PTR(c);
2651
2652 return from_scratch;
2653 }
2654
2655 static void context_unload_partition_table(Context *context) {
2656 assert(context);
2657
2658 LIST_FOREACH(partitions, p, context->partitions) {
2659
2660 /* Entirely remove partitions that have no configuration */
2661 if (PARTITION_IS_FOREIGN(p)) {
2662 partition_unlink_and_free(context, p);
2663 continue;
2664 }
2665
2666 /* Otherwise drop all data we read off the block device and everything we might have
2667 * calculated based on it */
2668
2669 p->dropped = false;
2670 p->current_size = UINT64_MAX;
2671 p->new_size = UINT64_MAX;
2672 p->current_padding = UINT64_MAX;
2673 p->new_padding = UINT64_MAX;
2674 p->partno = UINT64_MAX;
2675 p->offset = UINT64_MAX;
2676
2677 if (p->current_partition) {
2678 fdisk_unref_partition(p->current_partition);
2679 p->current_partition = NULL;
2680 }
2681
2682 if (p->new_partition) {
2683 fdisk_unref_partition(p->new_partition);
2684 p->new_partition = NULL;
2685 }
2686
2687 p->padding_area = NULL;
2688 p->allocated_to_area = NULL;
2689
2690 p->current_uuid = SD_ID128_NULL;
2691 p->current_label = mfree(p->current_label);
2692 }
2693
2694 context->start = UINT64_MAX;
2695 context->end = UINT64_MAX;
2696 context->total = UINT64_MAX;
2697
2698 if (context->fdisk_context) {
2699 fdisk_unref_context(context->fdisk_context);
2700 context->fdisk_context = NULL;
2701 }
2702
2703 context_free_free_areas(context);
2704 }
2705
2706 static int format_size_change(uint64_t from, uint64_t to, char **ret) {
2707 char *t;
2708
2709 if (from != UINT64_MAX) {
2710 if (from == to || to == UINT64_MAX)
2711 t = strdup(FORMAT_BYTES(from));
2712 else
2713 t = strjoin(FORMAT_BYTES(from), " ", special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), " ", FORMAT_BYTES(to));
2714 } else if (to != UINT64_MAX)
2715 t = strjoin(special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), " ", FORMAT_BYTES(to));
2716 else {
2717 *ret = NULL;
2718 return 0;
2719 }
2720
2721 if (!t)
2722 return log_oom();
2723
2724 *ret = t;
2725 return 1;
2726 }
2727
2728 static const char *partition_label(const Partition *p) {
2729 assert(p);
2730
2731 if (p->new_label)
2732 return p->new_label;
2733
2734 if (p->current_label)
2735 return p->current_label;
2736
2737 return gpt_partition_type_uuid_to_string(p->type.uuid);
2738 }
2739
2740 static int context_dump_partitions(Context *context) {
2741 _cleanup_(table_unrefp) Table *t = NULL;
2742 uint64_t sum_padding = 0, sum_size = 0;
2743 int r;
2744 const size_t roothash_col = 14, dropin_files_col = 15, split_path_col = 16;
2745 bool has_roothash = false, has_dropin_files = false, has_split_path = false;
2746
2747 if ((arg_json_format_flags & JSON_FORMAT_OFF) && context->n_partitions == 0) {
2748 log_info("Empty partition table.");
2749 return 0;
2750 }
2751
2752 t = table_new("type",
2753 "label",
2754 "uuid",
2755 "partno",
2756 "file",
2757 "node",
2758 "offset",
2759 "old size",
2760 "raw size",
2761 "size",
2762 "old padding",
2763 "raw padding",
2764 "padding",
2765 "activity",
2766 "roothash",
2767 "drop-in files",
2768 "split path");
2769 if (!t)
2770 return log_oom();
2771
2772 if (!DEBUG_LOGGING) {
2773 if (arg_json_format_flags & JSON_FORMAT_OFF)
2774 (void) table_set_display(t, (size_t) 0, (size_t) 1, (size_t) 2, (size_t) 3, (size_t) 4,
2775 (size_t) 8, (size_t) 9, (size_t) 12, roothash_col, dropin_files_col,
2776 split_path_col);
2777 else
2778 (void) table_set_display(t, (size_t) 0, (size_t) 1, (size_t) 2, (size_t) 3, (size_t) 4,
2779 (size_t) 5, (size_t) 6, (size_t) 7, (size_t) 8, (size_t) 10,
2780 (size_t) 11, (size_t) 13, roothash_col, dropin_files_col,
2781 split_path_col);
2782 }
2783
2784 (void) table_set_align_percent(t, table_get_cell(t, 0, 5), 100);
2785 (void) table_set_align_percent(t, table_get_cell(t, 0, 6), 100);
2786 (void) table_set_align_percent(t, table_get_cell(t, 0, 7), 100);
2787 (void) table_set_align_percent(t, table_get_cell(t, 0, 8), 100);
2788 (void) table_set_align_percent(t, table_get_cell(t, 0, 9), 100);
2789 (void) table_set_align_percent(t, table_get_cell(t, 0, 10), 100);
2790 (void) table_set_align_percent(t, table_get_cell(t, 0, 11), 100);
2791
2792 LIST_FOREACH(partitions, p, context->partitions) {
2793 _cleanup_free_ char *size_change = NULL, *padding_change = NULL, *partname = NULL, *rh = NULL;
2794 char uuid_buffer[SD_ID128_UUID_STRING_MAX];
2795 const char *label, *activity = NULL;
2796
2797 if (p->dropped)
2798 continue;
2799
2800 if (p->current_size == UINT64_MAX)
2801 activity = "create";
2802 else if (p->current_size != p->new_size)
2803 activity = "resize";
2804
2805 label = partition_label(p);
2806 partname = p->partno != UINT64_MAX ? fdisk_partname(context->node, p->partno+1) : NULL;
2807
2808 r = format_size_change(p->current_size, p->new_size, &size_change);
2809 if (r < 0)
2810 return r;
2811
2812 r = format_size_change(p->current_padding, p->new_padding, &padding_change);
2813 if (r < 0)
2814 return r;
2815
2816 if (p->new_size != UINT64_MAX)
2817 sum_size += p->new_size;
2818 if (p->new_padding != UINT64_MAX)
2819 sum_padding += p->new_padding;
2820
2821 if (p->verity != VERITY_OFF) {
2822 Partition *hp = p->verity == VERITY_HASH ? p : p->siblings[VERITY_HASH];
2823
2824 rh = iovec_is_set(&hp->roothash) ? hexmem(hp->roothash.iov_base, hp->roothash.iov_len) : strdup("TBD");
2825 if (!rh)
2826 return log_oom();
2827 }
2828
2829 r = table_add_many(
2830 t,
2831 TABLE_STRING, gpt_partition_type_uuid_to_string_harder(p->type.uuid, uuid_buffer),
2832 TABLE_STRING, empty_to_null(label) ?: "-", TABLE_SET_COLOR, empty_to_null(label) ? NULL : ansi_grey(),
2833 TABLE_UUID, p->new_uuid_is_set ? p->new_uuid : p->current_uuid,
2834 TABLE_UINT64, p->partno,
2835 TABLE_PATH_BASENAME, p->definition_path, TABLE_SET_COLOR, p->definition_path ? NULL : ansi_grey(),
2836 TABLE_STRING, partname ?: "-", TABLE_SET_COLOR, partname ? NULL : ansi_highlight(),
2837 TABLE_UINT64, p->offset,
2838 TABLE_UINT64, p->current_size == UINT64_MAX ? 0 : p->current_size,
2839 TABLE_UINT64, p->new_size,
2840 TABLE_STRING, size_change, TABLE_SET_COLOR, !p->partitions_next && sum_size > 0 ? ansi_underline() : NULL,
2841 TABLE_UINT64, p->current_padding == UINT64_MAX ? 0 : p->current_padding,
2842 TABLE_UINT64, p->new_padding,
2843 TABLE_STRING, padding_change, TABLE_SET_COLOR, !p->partitions_next && sum_padding > 0 ? ansi_underline() : NULL,
2844 TABLE_STRING, activity ?: "unchanged",
2845 TABLE_STRING, rh,
2846 TABLE_STRV, p->drop_in_files,
2847 TABLE_STRING, empty_to_null(p->split_path) ?: "-");
2848 if (r < 0)
2849 return table_log_add_error(r);
2850
2851 has_roothash = has_roothash || !isempty(rh);
2852 has_dropin_files = has_dropin_files || !strv_isempty(p->drop_in_files);
2853 has_split_path = has_split_path || !isempty(p->split_path);
2854 }
2855
2856 if ((arg_json_format_flags & JSON_FORMAT_OFF) && (sum_padding > 0 || sum_size > 0)) {
2857 const char *a, *b;
2858
2859 a = strjoina(special_glyph(SPECIAL_GLYPH_SIGMA), " = ", FORMAT_BYTES(sum_size));
2860 b = strjoina(special_glyph(SPECIAL_GLYPH_SIGMA), " = ", FORMAT_BYTES(sum_padding));
2861
2862 r = table_add_many(
2863 t,
2864 TABLE_EMPTY,
2865 TABLE_EMPTY,
2866 TABLE_EMPTY,
2867 TABLE_EMPTY,
2868 TABLE_EMPTY,
2869 TABLE_EMPTY,
2870 TABLE_EMPTY,
2871 TABLE_EMPTY,
2872 TABLE_EMPTY,
2873 TABLE_STRING, a,
2874 TABLE_EMPTY,
2875 TABLE_EMPTY,
2876 TABLE_STRING, b,
2877 TABLE_EMPTY,
2878 TABLE_EMPTY,
2879 TABLE_EMPTY,
2880 TABLE_EMPTY);
2881 if (r < 0)
2882 return table_log_add_error(r);
2883 }
2884
2885 if (!has_roothash) {
2886 r = table_hide_column_from_display(t, roothash_col);
2887 if (r < 0)
2888 return log_error_errno(r, "Failed to set columns to display: %m");
2889 }
2890
2891 if (!has_dropin_files) {
2892 r = table_hide_column_from_display(t, dropin_files_col);
2893 if (r < 0)
2894 return log_error_errno(r, "Failed to set columns to display: %m");
2895 }
2896
2897 if (!has_split_path) {
2898 r = table_hide_column_from_display(t, split_path_col);
2899 if (r < 0)
2900 return log_error_errno(r, "Failed to set columns to display: %m");
2901 }
2902
2903 return table_print_with_pager(t, arg_json_format_flags, arg_pager_flags, arg_legend);
2904 }
2905
2906 static void context_bar_char_process_partition(
2907 Context *context,
2908 Partition *bar[],
2909 size_t n,
2910 Partition *p,
2911 size_t *ret_start) {
2912
2913 uint64_t from, to, total;
2914 size_t x, y;
2915
2916 assert(context);
2917 assert(bar);
2918 assert(n > 0);
2919 assert(p);
2920
2921 if (p->dropped)
2922 return;
2923
2924 assert(p->offset != UINT64_MAX);
2925 assert(p->new_size != UINT64_MAX);
2926
2927 from = p->offset;
2928 to = from + p->new_size;
2929
2930 assert(context->total > 0);
2931 total = context->total;
2932
2933 assert(from <= total);
2934 x = from * n / total;
2935
2936 assert(to <= total);
2937 y = to * n / total;
2938
2939 assert(x <= y);
2940 assert(y <= n);
2941
2942 for (size_t i = x; i < y; i++)
2943 bar[i] = p;
2944
2945 *ret_start = x;
2946 }
2947
2948 static int partition_hint(const Partition *p, const char *node, char **ret) {
2949 _cleanup_free_ char *buf = NULL;
2950 const char *label;
2951 sd_id128_t id;
2952
2953 /* Tries really hard to find a suitable description for this partition */
2954
2955 if (p->definition_path)
2956 return path_extract_filename(p->definition_path, ret);
2957
2958 label = partition_label(p);
2959 if (!isempty(label)) {
2960 buf = strdup(label);
2961 goto done;
2962 }
2963
2964 if (p->partno != UINT64_MAX) {
2965 buf = fdisk_partname(node, p->partno+1);
2966 goto done;
2967 }
2968
2969 if (p->new_uuid_is_set)
2970 id = p->new_uuid;
2971 else if (!sd_id128_is_null(p->current_uuid))
2972 id = p->current_uuid;
2973 else
2974 id = p->type.uuid;
2975
2976 buf = strdup(SD_ID128_TO_UUID_STRING(id));
2977
2978 done:
2979 if (!buf)
2980 return -ENOMEM;
2981
2982 *ret = TAKE_PTR(buf);
2983 return 0;
2984 }
2985
2986 static int context_dump_partition_bar(Context *context) {
2987 _cleanup_free_ Partition **bar = NULL;
2988 _cleanup_free_ size_t *start_array = NULL;
2989 Partition *last = NULL;
2990 bool z = false;
2991 size_t c, j = 0;
2992
2993 assert_se((c = columns()) >= 2);
2994 c -= 2; /* We do not use the leftmost and rightmost character cell */
2995
2996 bar = new0(Partition*, c);
2997 if (!bar)
2998 return log_oom();
2999
3000 start_array = new(size_t, context->n_partitions);
3001 if (!start_array)
3002 return log_oom();
3003
3004 LIST_FOREACH(partitions, p, context->partitions)
3005 context_bar_char_process_partition(context, bar, c, p, start_array + j++);
3006
3007 putc(' ', stdout);
3008
3009 for (size_t i = 0; i < c; i++) {
3010 if (bar[i]) {
3011 if (last != bar[i])
3012 z = !z;
3013
3014 fputs(z ? ansi_green() : ansi_yellow(), stdout);
3015 fputs(special_glyph(SPECIAL_GLYPH_DARK_SHADE), stdout);
3016 } else {
3017 fputs(ansi_normal(), stdout);
3018 fputs(special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), stdout);
3019 }
3020
3021 last = bar[i];
3022 }
3023
3024 fputs(ansi_normal(), stdout);
3025 putc('\n', stdout);
3026
3027 for (size_t i = 0; i < context->n_partitions; i++) {
3028 _cleanup_free_ char **line = NULL;
3029
3030 line = new0(char*, c);
3031 if (!line)
3032 return log_oom();
3033
3034 j = 0;
3035 LIST_FOREACH(partitions, p, context->partitions) {
3036 _cleanup_free_ char *d = NULL;
3037 j++;
3038
3039 if (i < context->n_partitions - j) {
3040
3041 if (line[start_array[j-1]]) {
3042 const char *e;
3043
3044 /* Upgrade final corner to the right with a branch to the right */
3045 e = startswith(line[start_array[j-1]], special_glyph(SPECIAL_GLYPH_TREE_RIGHT));
3046 if (e) {
3047 d = strjoin(special_glyph(SPECIAL_GLYPH_TREE_BRANCH), e);
3048 if (!d)
3049 return log_oom();
3050 }
3051 }
3052
3053 if (!d) {
3054 d = strdup(special_glyph(SPECIAL_GLYPH_TREE_VERTICAL));
3055 if (!d)
3056 return log_oom();
3057 }
3058
3059 } else if (i == context->n_partitions - j) {
3060 _cleanup_free_ char *hint = NULL;
3061
3062 (void) partition_hint(p, context->node, &hint);
3063
3064 if (streq_ptr(line[start_array[j-1]], special_glyph(SPECIAL_GLYPH_TREE_VERTICAL)))
3065 d = strjoin(special_glyph(SPECIAL_GLYPH_TREE_BRANCH), " ", strna(hint));
3066 else
3067 d = strjoin(special_glyph(SPECIAL_GLYPH_TREE_RIGHT), " ", strna(hint));
3068
3069 if (!d)
3070 return log_oom();
3071 }
3072
3073 if (d)
3074 free_and_replace(line[start_array[j-1]], d);
3075 }
3076
3077 putc(' ', stdout);
3078
3079 j = 0;
3080 while (j < c) {
3081 if (line[j]) {
3082 fputs(line[j], stdout);
3083 j += utf8_console_width(line[j]);
3084 } else {
3085 putc(' ', stdout);
3086 j++;
3087 }
3088 }
3089
3090 putc('\n', stdout);
3091
3092 for (j = 0; j < c; j++)
3093 free(line[j]);
3094 }
3095
3096 return 0;
3097 }
3098
3099 static bool context_has_roothash(Context *context) {
3100 LIST_FOREACH(partitions, p, context->partitions)
3101 if (iovec_is_set(&p->roothash))
3102 return true;
3103
3104 return false;
3105 }
3106
3107 static int context_dump(Context *context, bool late) {
3108 int r;
3109
3110 assert(context);
3111
3112 if (arg_pretty == 0 && FLAGS_SET(arg_json_format_flags, JSON_FORMAT_OFF))
3113 return 0;
3114
3115 /* If we're outputting JSON, only dump after doing all operations so we can include the roothashes
3116 * in the output. */
3117 if (!late && !FLAGS_SET(arg_json_format_flags, JSON_FORMAT_OFF))
3118 return 0;
3119
3120 /* If we're not outputting JSON, only dump again after doing all operations if there are any
3121 * roothashes that we need to communicate to the user. */
3122 if (late && FLAGS_SET(arg_json_format_flags, JSON_FORMAT_OFF) && !context_has_roothash(context))
3123 return 0;
3124
3125 r = context_dump_partitions(context);
3126 if (r < 0)
3127 return r;
3128
3129 /* Make sure we only write the partition bar once, even if we're writing the partition table twice to
3130 * communicate roothashes. */
3131 if (FLAGS_SET(arg_json_format_flags, JSON_FORMAT_OFF) && !late) {
3132 putc('\n', stdout);
3133
3134 r = context_dump_partition_bar(context);
3135 if (r < 0)
3136 return r;
3137
3138 putc('\n', stdout);
3139 }
3140
3141 fflush(stdout);
3142
3143 return 0;
3144 }
3145
3146
3147 static bool context_changed(const Context *context) {
3148 assert(context);
3149
3150 LIST_FOREACH(partitions, p, context->partitions) {
3151 if (p->dropped)
3152 continue;
3153
3154 if (p->allocated_to_area)
3155 return true;
3156
3157 if (p->new_size != p->current_size)
3158 return true;
3159 }
3160
3161 return false;
3162 }
3163
3164 static int context_wipe_range(Context *context, uint64_t offset, uint64_t size) {
3165 _cleanup_(blkid_free_probep) blkid_probe probe = NULL;
3166 int r;
3167
3168 assert(context);
3169 assert(offset != UINT64_MAX);
3170 assert(size != UINT64_MAX);
3171
3172 probe = blkid_new_probe();
3173 if (!probe)
3174 return log_oom();
3175
3176 errno = 0;
3177 r = blkid_probe_set_device(probe, fdisk_get_devfd(context->fdisk_context), offset, size);
3178 if (r < 0)
3179 return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to allocate device probe for wiping.");
3180
3181 errno = 0;
3182 if (blkid_probe_enable_superblocks(probe, true) < 0 ||
3183 blkid_probe_set_superblocks_flags(probe, BLKID_SUBLKS_MAGIC|BLKID_SUBLKS_BADCSUM) < 0 ||
3184 blkid_probe_enable_partitions(probe, true) < 0 ||
3185 blkid_probe_set_partitions_flags(probe, BLKID_PARTS_MAGIC) < 0)
3186 return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to enable superblock and partition probing.");
3187
3188 for (;;) {
3189 errno = 0;
3190 r = blkid_do_probe(probe);
3191 if (r < 0)
3192 return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe for file systems.");
3193 if (r > 0)
3194 break;
3195
3196 errno = 0;
3197 if (blkid_do_wipe(probe, false) < 0)
3198 return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to wipe file system signature.");
3199 }
3200
3201 return 0;
3202 }
3203
3204 static int context_wipe_partition(Context *context, Partition *p) {
3205 int r;
3206
3207 assert(context);
3208 assert(p);
3209 assert(!PARTITION_EXISTS(p)); /* Safety check: never wipe existing partitions */
3210
3211 assert(p->offset != UINT64_MAX);
3212 assert(p->new_size != UINT64_MAX);
3213
3214 r = context_wipe_range(context, p->offset, p->new_size);
3215 if (r < 0)
3216 return r;
3217
3218 log_info("Successfully wiped file system signatures from future partition %" PRIu64 ".", p->partno);
3219 return 0;
3220 }
3221
3222 static int context_discard_range(
3223 Context *context,
3224 uint64_t offset,
3225 uint64_t size) {
3226
3227 struct stat st;
3228 int fd;
3229
3230 assert(context);
3231 assert(offset != UINT64_MAX);
3232 assert(size != UINT64_MAX);
3233
3234 if (size <= 0)
3235 return 0;
3236
3237 assert_se((fd = fdisk_get_devfd(context->fdisk_context)) >= 0);
3238
3239 if (fstat(fd, &st) < 0)
3240 return -errno;
3241
3242 if (S_ISREG(st.st_mode)) {
3243 if (fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, size) < 0) {
3244 if (ERRNO_IS_NOT_SUPPORTED(errno))
3245 return -EOPNOTSUPP;
3246
3247 return -errno;
3248 }
3249
3250 return 1;
3251 }
3252
3253 if (S_ISBLK(st.st_mode)) {
3254 uint64_t range[2], end;
3255
3256 range[0] = round_up_size(offset, context->sector_size);
3257
3258 if (offset > UINT64_MAX - size)
3259 return -ERANGE;
3260
3261 end = offset + size;
3262 if (end <= range[0])
3263 return 0;
3264
3265 range[1] = round_down_size(end - range[0], context->sector_size);
3266 if (range[1] <= 0)
3267 return 0;
3268
3269 if (ioctl(fd, BLKDISCARD, range) < 0) {
3270 if (ERRNO_IS_NOT_SUPPORTED(errno))
3271 return -EOPNOTSUPP;
3272
3273 return -errno;
3274 }
3275
3276 return 1;
3277 }
3278
3279 return -EOPNOTSUPP;
3280 }
3281
3282 static int context_discard_partition(Context *context, Partition *p) {
3283 int r;
3284
3285 assert(context);
3286 assert(p);
3287
3288 assert(p->offset != UINT64_MAX);
3289 assert(p->new_size != UINT64_MAX);
3290 assert(!PARTITION_EXISTS(p)); /* Safety check: never discard existing partitions */
3291
3292 if (!arg_discard)
3293 return 0;
3294
3295 r = context_discard_range(context, p->offset, p->new_size);
3296 if (r == -EOPNOTSUPP) {
3297 log_info("Storage does not support discard, not discarding data in future partition %" PRIu64 ".", p->partno);
3298 return 0;
3299 }
3300 if (r == -EBUSY) {
3301 /* Let's handle this gracefully: https://bugzilla.kernel.org/show_bug.cgi?id=211167 */
3302 log_info("Block device is busy, not discarding partition %" PRIu64 " because it probably is mounted.", p->partno);
3303 return 0;
3304 }
3305 if (r == 0) {
3306 log_info("Partition %" PRIu64 " too short for discard, skipping.", p->partno);
3307 return 0;
3308 }
3309 if (r < 0)
3310 return log_error_errno(r, "Failed to discard data for future partition %" PRIu64 ".", p->partno);
3311
3312 log_info("Successfully discarded data from future partition %" PRIu64 ".", p->partno);
3313 return 1;
3314 }
3315
3316 static int context_discard_gap_after(Context *context, Partition *p) {
3317 uint64_t gap, next = UINT64_MAX;
3318 int r;
3319
3320 assert(context);
3321 assert(!p || (p->offset != UINT64_MAX && p->new_size != UINT64_MAX));
3322
3323 if (!arg_discard)
3324 return 0;
3325
3326 if (p)
3327 gap = p->offset + p->new_size;
3328 else
3329 /* The context start gets rounded up to grain_size, however
3330 * existing partitions may be before that so ensure the gap
3331 * starts at the first actually usable lba
3332 */
3333 gap = fdisk_get_first_lba(context->fdisk_context) * context->sector_size;
3334
3335 LIST_FOREACH(partitions, q, context->partitions) {
3336 if (q->dropped)
3337 continue;
3338
3339 assert(q->offset != UINT64_MAX);
3340 assert(q->new_size != UINT64_MAX);
3341
3342 if (q->offset < gap)
3343 continue;
3344
3345 if (next == UINT64_MAX || q->offset < next)
3346 next = q->offset;
3347 }
3348
3349 if (next == UINT64_MAX) {
3350 next = (fdisk_get_last_lba(context->fdisk_context) + 1) * context->sector_size;
3351 if (gap > next)
3352 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Partition end beyond disk end.");
3353 }
3354
3355 assert(next >= gap);
3356 r = context_discard_range(context, gap, next - gap);
3357 if (r == -EOPNOTSUPP) {
3358 if (p)
3359 log_info("Storage does not support discard, not discarding gap after partition %" PRIu64 ".", p->partno);
3360 else
3361 log_info("Storage does not support discard, not discarding gap at beginning of disk.");
3362 return 0;
3363 }
3364 if (r == 0) /* Too short */
3365 return 0;
3366 if (r < 0) {
3367 if (p)
3368 return log_error_errno(r, "Failed to discard gap after partition %" PRIu64 ".", p->partno);
3369 else
3370 return log_error_errno(r, "Failed to discard gap at beginning of disk.");
3371 }
3372
3373 if (p)
3374 log_info("Successfully discarded gap after partition %" PRIu64 ".", p->partno);
3375 else
3376 log_info("Successfully discarded gap at beginning of disk.");
3377
3378 return 0;
3379 }
3380
3381 static int context_wipe_and_discard(Context *context) {
3382 int r;
3383
3384 assert(context);
3385
3386 if (arg_empty == EMPTY_CREATE) /* If we just created the image, no need to wipe */
3387 return 0;
3388
3389 /* Wipe and discard the contents of all partitions we are about to create. We skip the discarding if
3390 * we were supposed to start from scratch anyway, as in that case we just discard the whole block
3391 * device in one go early on. */
3392
3393 LIST_FOREACH(partitions, p, context->partitions) {
3394
3395 if (!p->allocated_to_area)
3396 continue;
3397
3398 if (partition_type_defer(&p->type))
3399 continue;
3400
3401 r = context_wipe_partition(context, p);
3402 if (r < 0)
3403 return r;
3404
3405 if (!context->from_scratch) {
3406 r = context_discard_partition(context, p);
3407 if (r < 0)
3408 return r;
3409
3410 r = context_discard_gap_after(context, p);
3411 if (r < 0)
3412 return r;
3413 }
3414 }
3415
3416 if (!context->from_scratch) {
3417 r = context_discard_gap_after(context, NULL);
3418 if (r < 0)
3419 return r;
3420 }
3421
3422 return 0;
3423 }
3424
3425 typedef struct DecryptedPartitionTarget {
3426 int fd;
3427 char *dm_name;
3428 char *volume;
3429 struct crypt_device *device;
3430 } DecryptedPartitionTarget;
3431
3432 static DecryptedPartitionTarget* decrypted_partition_target_free(DecryptedPartitionTarget *t) {
3433 #if HAVE_LIBCRYPTSETUP
3434 int r;
3435
3436 if (!t)
3437 return NULL;
3438
3439 safe_close(t->fd);
3440
3441 /* udev or so might access out block device in the background while we are done. Let's hence
3442 * force detach the volume. We sync'ed before, hence this should be safe. */
3443 r = sym_crypt_deactivate_by_name(t->device, t->dm_name, CRYPT_DEACTIVATE_FORCE);
3444 if (r < 0)
3445 log_warning_errno(r, "Failed to deactivate LUKS device, ignoring: %m");
3446
3447 sym_crypt_free(t->device);
3448 free(t->dm_name);
3449 free(t->volume);
3450 free(t);
3451 #endif
3452 return NULL;
3453 }
3454
3455 typedef struct {
3456 LoopDevice *loop;
3457 int fd;
3458 char *path;
3459 int whole_fd;
3460 DecryptedPartitionTarget *decrypted;
3461 } PartitionTarget;
3462
3463 static int partition_target_fd(PartitionTarget *t) {
3464 assert(t);
3465 assert(t->loop || t->fd >= 0 || t->whole_fd >= 0);
3466
3467 if (t->decrypted)
3468 return t->decrypted->fd;
3469
3470 if (t->loop)
3471 return t->loop->fd;
3472
3473 if (t->fd >= 0)
3474 return t->fd;
3475
3476 return t->whole_fd;
3477 }
3478
3479 static const char* partition_target_path(PartitionTarget *t) {
3480 assert(t);
3481 assert(t->loop || t->path);
3482
3483 if (t->decrypted)
3484 return t->decrypted->volume;
3485
3486 if (t->loop)
3487 return t->loop->node;
3488
3489 return t->path;
3490 }
3491
3492 static PartitionTarget *partition_target_free(PartitionTarget *t) {
3493 if (!t)
3494 return NULL;
3495
3496 decrypted_partition_target_free(t->decrypted);
3497 loop_device_unref(t->loop);
3498 safe_close(t->fd);
3499 unlink_and_free(t->path);
3500
3501 return mfree(t);
3502 }
3503
3504 DEFINE_TRIVIAL_CLEANUP_FUNC(PartitionTarget*, partition_target_free);
3505
3506 static int prepare_temporary_file(PartitionTarget *t, uint64_t size) {
3507 _cleanup_(unlink_and_freep) char *temp = NULL;
3508 _cleanup_close_ int fd = -EBADF;
3509 const char *vt;
3510 int r;
3511
3512 assert(t);
3513
3514 r = var_tmp_dir(&vt);
3515 if (r < 0)
3516 return log_error_errno(r, "Could not determine temporary directory: %m");
3517
3518 temp = path_join(vt, "repart-XXXXXX");
3519 if (!temp)
3520 return log_oom();
3521
3522 fd = mkostemp_safe(temp);
3523 if (fd < 0)
3524 return log_error_errno(fd, "Failed to create temporary file: %m");
3525
3526 if (ftruncate(fd, size) < 0)
3527 return log_error_errno(errno, "Failed to truncate temporary file to %s: %m",
3528 FORMAT_BYTES(size));
3529
3530 t->fd = TAKE_FD(fd);
3531 t->path = TAKE_PTR(temp);
3532
3533 return 0;
3534 }
3535
3536 static int partition_target_prepare(
3537 Context *context,
3538 Partition *p,
3539 uint64_t size,
3540 bool need_path,
3541 PartitionTarget **ret) {
3542
3543 _cleanup_(partition_target_freep) PartitionTarget *t = NULL;
3544 _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
3545 int whole_fd, r;
3546
3547 assert(context);
3548 assert(p);
3549 assert(ret);
3550
3551 assert_se((whole_fd = fdisk_get_devfd(context->fdisk_context)) >= 0);
3552
3553 t = new(PartitionTarget, 1);
3554 if (!t)
3555 return log_oom();
3556 *t = (PartitionTarget) {
3557 .fd = -EBADF,
3558 .whole_fd = -EBADF,
3559 };
3560
3561 if (!need_path) {
3562 if (lseek(whole_fd, p->offset, SEEK_SET) < 0)
3563 return log_error_errno(errno, "Failed to seek to partition offset: %m");
3564
3565 t->whole_fd = whole_fd;
3566 *ret = TAKE_PTR(t);
3567 return 0;
3568 }
3569
3570 /* Loopback block devices are not only useful to turn regular files into block devices, but
3571 * also to cut out sections of block devices into new block devices. */
3572
3573 if (arg_offline <= 0) {
3574 r = loop_device_make(whole_fd, O_RDWR, p->offset, size, context->sector_size, 0, LOCK_EX, &d);
3575 if (r < 0 && (arg_offline == 0 || (r != -ENOENT && !ERRNO_IS_PRIVILEGE(r)) || !strv_isempty(p->subvolumes)))
3576 return log_error_errno(r, "Failed to make loopback device of future partition %" PRIu64 ": %m", p->partno);
3577 if (r >= 0) {
3578 t->loop = TAKE_PTR(d);
3579 *ret = TAKE_PTR(t);
3580 return 0;
3581 }
3582
3583 log_debug_errno(r, "No access to loop devices, falling back to a regular file");
3584 }
3585
3586 /* If we can't allocate a loop device, let's write to a regular file that we copy into the final
3587 * image so we can run in containers and without needing root privileges. On filesystems with
3588 * reflinking support, we can take advantage of this and just reflink the result into the image.
3589 */
3590
3591 r = prepare_temporary_file(t, size);
3592 if (r < 0)
3593 return r;
3594
3595 *ret = TAKE_PTR(t);
3596
3597 return 0;
3598 }
3599
3600 static int partition_target_grow(PartitionTarget *t, uint64_t size) {
3601 int r;
3602
3603 assert(t);
3604 assert(!t->decrypted);
3605
3606 if (t->loop) {
3607 r = loop_device_refresh_size(t->loop, UINT64_MAX, size);
3608 if (r < 0)
3609 return log_error_errno(r, "Failed to refresh loopback device size: %m");
3610 } else if (t->fd >= 0) {
3611 if (ftruncate(t->fd, size) < 0)
3612 return log_error_errno(errno, "Failed to grow '%s' to %s by truncation: %m",
3613 t->path, FORMAT_BYTES(size));
3614 }
3615
3616 return 0;
3617 }
3618
3619 static int partition_target_sync(Context *context, Partition *p, PartitionTarget *t) {
3620 int whole_fd, r;
3621
3622 assert(context);
3623 assert(p);
3624 assert(t);
3625
3626 assert_se((whole_fd = fdisk_get_devfd(context->fdisk_context)) >= 0);
3627
3628 if (t->decrypted && fsync(t->decrypted->fd) < 0)
3629 return log_error_errno(errno, "Failed to sync changes to '%s': %m", t->decrypted->volume);
3630
3631 if (t->loop) {
3632 r = loop_device_sync(t->loop);
3633 if (r < 0)
3634 return log_error_errno(r, "Failed to sync loopback device: %m");
3635 } else if (t->fd >= 0) {
3636 struct stat st;
3637
3638 if (lseek(whole_fd, p->offset, SEEK_SET) < 0)
3639 return log_error_errno(errno, "Failed to seek to partition offset: %m");
3640
3641 if (lseek(t->fd, 0, SEEK_SET) < 0)
3642 return log_error_errno(errno, "Failed to seek to start of temporary file: %m");
3643
3644 if (fstat(t->fd, &st) < 0)
3645 return log_error_errno(errno, "Failed to stat temporary file: %m");
3646
3647 if (st.st_size > (off_t) p->new_size)
3648 return log_error_errno(SYNTHETIC_ERRNO(ENOSPC),
3649 "Partition %" PRIu64 "'s contents (%s) don't fit in the partition (%s)",
3650 p->partno, FORMAT_BYTES(st.st_size), FORMAT_BYTES(p->new_size));
3651
3652 r = copy_bytes(t->fd, whole_fd, UINT64_MAX, COPY_REFLINK|COPY_HOLES|COPY_FSYNC);
3653 if (r < 0)
3654 return log_error_errno(r, "Failed to copy bytes to partition: %m");
3655 } else {
3656 if (fsync(t->whole_fd) < 0)
3657 return log_error_errno(errno, "Failed to sync changes: %m");
3658 }
3659
3660 return 0;
3661 }
3662
3663 static int partition_encrypt(Context *context, Partition *p, PartitionTarget *target, bool offline) {
3664 #if HAVE_LIBCRYPTSETUP && HAVE_CRYPT_SET_DATA_OFFSET && HAVE_CRYPT_REENCRYPT_INIT_BY_PASSPHRASE && HAVE_CRYPT_REENCRYPT
3665 const char *node = partition_target_path(target);
3666 struct crypt_params_luks2 luks_params = {
3667 .label = strempty(ASSERT_PTR(p)->new_label),
3668 .sector_size = ASSERT_PTR(context)->fs_sector_size,
3669 .data_device = offline ? node : NULL,
3670 };
3671 struct crypt_params_reencrypt reencrypt_params = {
3672 .mode = CRYPT_REENCRYPT_ENCRYPT,
3673 .direction = CRYPT_REENCRYPT_BACKWARD,
3674 .resilience = "datashift",
3675 .data_shift = LUKS2_METADATA_SIZE / 512,
3676 .luks2 = &luks_params,
3677 .flags = CRYPT_REENCRYPT_INITIALIZE_ONLY|CRYPT_REENCRYPT_MOVE_FIRST_SEGMENT,
3678 };
3679 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
3680 _cleanup_(erase_and_freep) char *base64_encoded = NULL;
3681 _cleanup_fclose_ FILE *h = NULL;
3682 _cleanup_free_ char *hp = NULL, *vol = NULL, *dm_name = NULL;
3683 const char *passphrase = NULL;
3684 size_t passphrase_size = 0;
3685 const char *vt;
3686 int r;
3687
3688 assert(context);
3689 assert(p);
3690 assert(p->encrypt != ENCRYPT_OFF);
3691
3692 r = dlopen_cryptsetup();
3693 if (r < 0)
3694 return log_error_errno(r, "libcryptsetup not found, cannot encrypt: %m");
3695
3696 log_info("Encrypting future partition %" PRIu64 "...", p->partno);
3697
3698 if (offline) {
3699 r = var_tmp_dir(&vt);
3700 if (r < 0)
3701 return log_error_errno(r, "Failed to determine temporary files directory: %m");
3702
3703 r = fopen_temporary_child(vt, &h, &hp);
3704 if (r < 0)
3705 return log_error_errno(r, "Failed to create temporary LUKS header file: %m");
3706
3707 /* Weird cryptsetup requirement which requires the header file to be the size of at least one
3708 * sector. */
3709 r = ftruncate(fileno(h), luks_params.sector_size);
3710 if (r < 0)
3711 return log_error_errno(r, "Failed to grow temporary LUKS header file: %m");
3712 } else {
3713 if (asprintf(&dm_name, "luks-repart-%08" PRIx64, random_u64()) < 0)
3714 return log_oom();
3715
3716 vol = path_join("/dev/mapper/", dm_name);
3717 if (!vol)
3718 return log_oom();
3719 }
3720
3721 r = sym_crypt_init(&cd, offline ? hp : node);
3722 if (r < 0)
3723 return log_error_errno(r, "Failed to allocate libcryptsetup context for %s: %m", hp);
3724
3725 cryptsetup_enable_logging(cd);
3726
3727 if (offline) {
3728 /* Disable kernel keyring usage by libcryptsetup as a workaround for
3729 * https://gitlab.com/cryptsetup/cryptsetup/-/merge_requests/273. This makes sure that we can
3730 * do offline encryption even when repart is running in a container. */
3731 r = sym_crypt_volume_key_keyring(cd, false);
3732 if (r < 0)
3733 return log_error_errno(r, "Failed to disable kernel keyring: %m");
3734
3735 r = sym_crypt_metadata_locking(cd, false);
3736 if (r < 0)
3737 return log_error_errno(r, "Failed to disable metadata locking: %m");
3738
3739 r = sym_crypt_set_data_offset(cd, LUKS2_METADATA_SIZE / 512);
3740 if (r < 0)
3741 return log_error_errno(r, "Failed to set data offset: %m");
3742 }
3743
3744 r = sym_crypt_format(cd,
3745 CRYPT_LUKS2,
3746 "aes",
3747 "xts-plain64",
3748 SD_ID128_TO_UUID_STRING(p->luks_uuid),
3749 NULL,
3750 VOLUME_KEY_SIZE,
3751 &luks_params);
3752 if (r < 0)
3753 return log_error_errno(r, "Failed to LUKS2 format future partition: %m");
3754
3755 if (IN_SET(p->encrypt, ENCRYPT_KEY_FILE, ENCRYPT_KEY_FILE_TPM2)) {
3756 r = sym_crypt_keyslot_add_by_volume_key(
3757 cd,
3758 CRYPT_ANY_SLOT,
3759 NULL,
3760 VOLUME_KEY_SIZE,
3761 strempty(arg_key),
3762 arg_key_size);
3763 if (r < 0)
3764 return log_error_errno(r, "Failed to add LUKS2 key: %m");
3765
3766 passphrase = strempty(arg_key);
3767 passphrase_size = arg_key_size;
3768 }
3769
3770 if (IN_SET(p->encrypt, ENCRYPT_TPM2, ENCRYPT_KEY_FILE_TPM2)) {
3771 #if HAVE_TPM2
3772 _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
3773 _cleanup_(erase_and_freep) void *secret = NULL;
3774 _cleanup_free_ void *pubkey = NULL;
3775 _cleanup_free_ void *blob = NULL, *srk_buf = NULL;
3776 size_t secret_size, blob_size, pubkey_size = 0, srk_buf_size = 0;
3777 ssize_t base64_encoded_size;
3778 int keyslot;
3779
3780 if (arg_tpm2_public_key_pcr_mask != 0) {
3781 r = tpm2_load_pcr_public_key(arg_tpm2_public_key, &pubkey, &pubkey_size);
3782 if (r < 0) {
3783 if (arg_tpm2_public_key || r != -ENOENT)
3784 return log_error_errno(r, "Failed to read TPM PCR public key: %m");
3785
3786 log_debug_errno(r, "Failed to read TPM2 PCR public key, proceeding without: %m");
3787 arg_tpm2_public_key_pcr_mask = 0;
3788 }
3789 }
3790
3791 _cleanup_(tpm2_context_unrefp) Tpm2Context *tpm2_context = NULL;
3792 r = tpm2_context_new(arg_tpm2_device, &tpm2_context);
3793 if (r < 0)
3794 return log_error_errno(r, "Failed to create TPM2 context: %m");
3795
3796 TPM2B_PUBLIC public;
3797 if (pubkey) {
3798 r = tpm2_tpm2b_public_from_pem(pubkey, pubkey_size, &public);
3799 if (r < 0)
3800 return log_error_errno(r, "Could not convert public key to TPM2B_PUBLIC: %m");
3801 }
3802
3803 r = tpm2_pcr_read_missing_values(tpm2_context, arg_tpm2_hash_pcr_values, arg_tpm2_n_hash_pcr_values);
3804 if (r < 0)
3805 return log_error_errno(r, "Could not read pcr values: %m");
3806
3807 uint16_t hash_pcr_bank = 0;
3808 uint32_t hash_pcr_mask = 0;
3809 if (arg_tpm2_n_hash_pcr_values > 0) {
3810 size_t hash_count;
3811 r = tpm2_pcr_values_hash_count(arg_tpm2_hash_pcr_values, arg_tpm2_n_hash_pcr_values, &hash_count);
3812 if (r < 0)
3813 return log_error_errno(r, "Could not get hash count: %m");
3814
3815 if (hash_count > 1)
3816 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Multiple PCR banks selected.");
3817
3818 hash_pcr_bank = arg_tpm2_hash_pcr_values[0].hash;
3819 r = tpm2_pcr_values_to_mask(arg_tpm2_hash_pcr_values, arg_tpm2_n_hash_pcr_values, hash_pcr_bank, &hash_pcr_mask);
3820 if (r < 0)
3821 return log_error_errno(r, "Could not get hash mask: %m");
3822 }
3823
3824 TPM2B_DIGEST policy = TPM2B_DIGEST_MAKE(NULL, TPM2_SHA256_DIGEST_SIZE);
3825 r = tpm2_calculate_sealing_policy(arg_tpm2_hash_pcr_values, arg_tpm2_n_hash_pcr_values, pubkey ? &public : NULL, /* use_pin= */ false, &policy);
3826 if (r < 0)
3827 return log_error_errno(r, "Could not calculate sealing policy digest: %m");
3828
3829 r = tpm2_seal(tpm2_context,
3830 /* seal_key_handle= */ 0,
3831 &policy,
3832 /* pin= */ NULL,
3833 &secret, &secret_size,
3834 &blob, &blob_size,
3835 /* ret_primary_alg= */ NULL,
3836 &srk_buf, &srk_buf_size);
3837 if (r < 0)
3838 return log_error_errno(r, "Failed to seal to TPM2: %m");
3839
3840 base64_encoded_size = base64mem(secret, secret_size, &base64_encoded);
3841 if (base64_encoded_size < 0)
3842 return log_error_errno(base64_encoded_size, "Failed to base64 encode secret key: %m");
3843
3844 r = cryptsetup_set_minimal_pbkdf(cd);
3845 if (r < 0)
3846 return log_error_errno(r, "Failed to set minimal PBKDF: %m");
3847
3848 keyslot = sym_crypt_keyslot_add_by_volume_key(
3849 cd,
3850 CRYPT_ANY_SLOT,
3851 NULL,
3852 VOLUME_KEY_SIZE,
3853 base64_encoded,
3854 base64_encoded_size);
3855 if (keyslot < 0)
3856 return log_error_errno(keyslot, "Failed to add new TPM2 key: %m");
3857
3858 r = tpm2_make_luks2_json(
3859 keyslot,
3860 hash_pcr_mask,
3861 hash_pcr_bank,
3862 pubkey, pubkey_size,
3863 arg_tpm2_public_key_pcr_mask,
3864 /* primary_alg= */ 0,
3865 blob, blob_size,
3866 policy.buffer, policy.size,
3867 NULL, 0, /* no salt because tpm2_seal has no pin */
3868 srk_buf, srk_buf_size,
3869 0,
3870 &v);
3871 if (r < 0)
3872 return log_error_errno(r, "Failed to prepare TPM2 JSON token object: %m");
3873
3874 r = cryptsetup_add_token_json(cd, v);
3875 if (r < 0)
3876 return log_error_errno(r, "Failed to add TPM2 JSON token to LUKS2 header: %m");
3877
3878 passphrase = base64_encoded;
3879 passphrase_size = strlen(base64_encoded);
3880 #else
3881 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
3882 "Support for TPM2 enrollment not enabled.");
3883 #endif
3884 }
3885
3886 if (offline) {
3887 r = sym_crypt_reencrypt_init_by_passphrase(
3888 cd,
3889 NULL,
3890 passphrase,
3891 passphrase_size,
3892 CRYPT_ANY_SLOT,
3893 0,
3894 sym_crypt_get_cipher(cd),
3895 sym_crypt_get_cipher_mode(cd),
3896 &reencrypt_params);
3897 if (r < 0)
3898 return log_error_errno(r, "Failed to prepare for reencryption: %m");
3899
3900 /* crypt_reencrypt_init_by_passphrase() doesn't actually put the LUKS header at the front, we
3901 * have to do that ourselves. */
3902
3903 sym_crypt_free(cd);
3904 cd = NULL;
3905
3906 r = sym_crypt_init(&cd, node);
3907 if (r < 0)
3908 return log_error_errno(r, "Failed to allocate libcryptsetup context for %s: %m", node);
3909
3910 r = sym_crypt_header_restore(cd, CRYPT_LUKS2, hp);
3911 if (r < 0)
3912 return log_error_errno(r, "Failed to place new LUKS header at head of %s: %m", node);
3913
3914 reencrypt_params.flags &= ~CRYPT_REENCRYPT_INITIALIZE_ONLY;
3915
3916 r = sym_crypt_reencrypt_init_by_passphrase(
3917 cd,
3918 NULL,
3919 passphrase,
3920 passphrase_size,
3921 CRYPT_ANY_SLOT,
3922 0,
3923 NULL,
3924 NULL,
3925 &reencrypt_params);
3926 if (r < 0)
3927 return log_error_errno(r, "Failed to load reencryption context: %m");
3928
3929 r = sym_crypt_reencrypt(cd, NULL);
3930 if (r < 0)
3931 return log_error_errno(r, "Failed to encrypt %s: %m", node);
3932 } else {
3933 _cleanup_free_ DecryptedPartitionTarget *t = NULL;
3934 _cleanup_close_ int dev_fd = -1;
3935
3936 r = sym_crypt_activate_by_volume_key(
3937 cd,
3938 dm_name,
3939 NULL,
3940 VOLUME_KEY_SIZE,
3941 arg_discard ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0);
3942 if (r < 0)
3943 return log_error_errno(r, "Failed to activate LUKS superblock: %m");
3944
3945 dev_fd = open(vol, O_RDWR|O_CLOEXEC|O_NOCTTY);
3946 if (dev_fd < 0)
3947 return log_error_errno(errno, "Failed to open LUKS volume '%s': %m", vol);
3948
3949 if (flock(dev_fd, LOCK_EX) < 0)
3950 return log_error_errno(errno, "Failed to lock '%s': %m", vol);
3951
3952 t = new(DecryptedPartitionTarget, 1);
3953 if (!t)
3954 return log_oom();
3955
3956 *t = (DecryptedPartitionTarget) {
3957 .fd = TAKE_FD(dev_fd),
3958 .dm_name = TAKE_PTR(dm_name),
3959 .volume = TAKE_PTR(vol),
3960 .device = TAKE_PTR(cd),
3961 };
3962
3963 target->decrypted = TAKE_PTR(t);
3964 }
3965
3966 log_info("Successfully encrypted future partition %" PRIu64 ".", p->partno);
3967
3968 return 0;
3969 #else
3970 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
3971 "libcryptsetup is not supported or is missing required symbols, cannot encrypt: %m");
3972 #endif
3973 }
3974
3975 static int partition_format_verity_hash(
3976 Context *context,
3977 Partition *p,
3978 const char *node,
3979 const char *data_node) {
3980
3981 #if HAVE_LIBCRYPTSETUP
3982 Partition *dp;
3983 _cleanup_(partition_target_freep) PartitionTarget *t = NULL;
3984 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
3985 _cleanup_free_ char *hint = NULL;
3986 int r;
3987
3988 assert(context);
3989 assert(p);
3990 assert(p->verity == VERITY_HASH);
3991 assert(data_node);
3992
3993 if (p->dropped)
3994 return 0;
3995
3996 if (PARTITION_EXISTS(p)) /* Never format existing partitions */
3997 return 0;
3998
3999 /* Minimized partitions will use the copy blocks logic so let's make sure to skip those here. */
4000 if (p->copy_blocks_fd >= 0)
4001 return 0;
4002
4003 assert_se(dp = p->siblings[VERITY_DATA]);
4004 assert(!dp->dropped);
4005
4006 (void) partition_hint(p, node, &hint);
4007
4008 r = dlopen_cryptsetup();
4009 if (r < 0)
4010 return log_error_errno(r, "libcryptsetup not found, cannot setup verity: %m");
4011
4012 if (!node) {
4013 r = partition_target_prepare(context, p, p->new_size, /*need_path=*/ true, &t);
4014 if (r < 0)
4015 return r;
4016
4017 node = partition_target_path(t);
4018 }
4019
4020 if (p->verity_data_block_size == UINT64_MAX)
4021 p->verity_data_block_size = context->fs_sector_size;
4022 if (p->verity_hash_block_size == UINT64_MAX)
4023 p->verity_hash_block_size = context->fs_sector_size;
4024
4025 r = sym_crypt_init(&cd, node);
4026 if (r < 0)
4027 return log_error_errno(r, "Failed to allocate libcryptsetup context for %s: %m", node);
4028
4029 cryptsetup_enable_logging(cd);
4030
4031 r = sym_crypt_format(
4032 cd, CRYPT_VERITY, NULL, NULL, SD_ID128_TO_UUID_STRING(p->verity_uuid), NULL, 0,
4033 &(struct crypt_params_verity){
4034 .data_device = data_node,
4035 .flags = CRYPT_VERITY_CREATE_HASH,
4036 .hash_name = "sha256",
4037 .hash_type = 1,
4038 .data_block_size = p->verity_data_block_size,
4039 .hash_block_size = p->verity_hash_block_size,
4040 .salt_size = sizeof(p->verity_salt),
4041 .salt = (const char*)p->verity_salt,
4042 });
4043 if (r < 0) {
4044 /* libcryptsetup reports non-descriptive EIO errors for every I/O failure. Luckily, it
4045 * doesn't clobber errno so let's check for ENOSPC so we can report a better error if the
4046 * partition is too small. */
4047 if (r == -EIO && errno == ENOSPC)
4048 return log_error_errno(errno,
4049 "Verity hash data does not fit in partition %s with size %s",
4050 strna(hint), FORMAT_BYTES(p->new_size));
4051
4052 return log_error_errno(r, "Failed to setup verity hash data of partition %s: %m", strna(hint));
4053 }
4054
4055 if (t) {
4056 r = partition_target_sync(context, p, t);
4057 if (r < 0)
4058 return r;
4059 }
4060
4061 r = sym_crypt_get_volume_key_size(cd);
4062 if (r < 0)
4063 return log_error_errno(r, "Failed to determine verity root hash size of partition %s: %m", strna(hint));
4064
4065 _cleanup_(iovec_done) struct iovec rh = {
4066 .iov_base = malloc(r),
4067 .iov_len = r,
4068 };
4069 if (!rh.iov_base)
4070 return log_oom();
4071
4072 r = sym_crypt_volume_key_get(cd, CRYPT_ANY_SLOT, (char *) rh.iov_base, &rh.iov_len, NULL, 0);
4073 if (r < 0)
4074 return log_error_errno(r, "Failed to get verity root hash of partition %s: %m", strna(hint));
4075
4076 assert(rh.iov_len >= sizeof(sd_id128_t) * 2);
4077
4078 if (!dp->new_uuid_is_set) {
4079 memcpy_safe(dp->new_uuid.bytes, rh.iov_base, sizeof(sd_id128_t));
4080 dp->new_uuid_is_set = true;
4081 }
4082
4083 if (!p->new_uuid_is_set) {
4084 memcpy_safe(p->new_uuid.bytes, (uint8_t*) rh.iov_base + (rh.iov_len - sizeof(sd_id128_t)), sizeof(sd_id128_t));
4085 p->new_uuid_is_set = true;
4086 }
4087
4088 p->roothash = TAKE_STRUCT(rh);
4089
4090 return 0;
4091 #else
4092 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "libcryptsetup is not supported, cannot setup verity hashes: %m");
4093 #endif
4094 }
4095
4096 static int sign_verity_roothash(
4097 const struct iovec *roothash,
4098 struct iovec *ret_signature) {
4099
4100 #if HAVE_OPENSSL
4101 _cleanup_(BIO_freep) BIO *rb = NULL;
4102 _cleanup_(PKCS7_freep) PKCS7 *p7 = NULL;
4103 _cleanup_free_ char *hex = NULL;
4104 _cleanup_free_ uint8_t *sig = NULL;
4105 int sigsz;
4106
4107 assert(roothash);
4108 assert(iovec_is_set(roothash));
4109 assert(ret_signature);
4110
4111 hex = hexmem(roothash->iov_base, roothash->iov_len);
4112 if (!hex)
4113 return log_oom();
4114
4115 rb = BIO_new_mem_buf(hex, -1);
4116 if (!rb)
4117 return log_oom();
4118
4119 p7 = PKCS7_sign(arg_certificate, arg_private_key, NULL, rb, PKCS7_DETACHED|PKCS7_NOATTR|PKCS7_BINARY);
4120 if (!p7)
4121 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to calculate PKCS7 signature: %s",
4122 ERR_error_string(ERR_get_error(), NULL));
4123
4124 sigsz = i2d_PKCS7(p7, &sig);
4125 if (sigsz < 0)
4126 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to convert PKCS7 signature to DER: %s",
4127 ERR_error_string(ERR_get_error(), NULL));
4128
4129 ret_signature->iov_base = TAKE_PTR(sig);
4130 ret_signature->iov_len = sigsz;
4131
4132 return 0;
4133 #else
4134 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "openssl is not supported, cannot setup verity signature: %m");
4135 #endif
4136 }
4137
4138 static int partition_format_verity_sig(Context *context, Partition *p) {
4139 _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
4140 _cleanup_(iovec_done) struct iovec sig = {};
4141 _cleanup_free_ char *text = NULL, *hint = NULL;
4142 Partition *hp;
4143 uint8_t fp[X509_FINGERPRINT_SIZE];
4144 int whole_fd, r;
4145
4146 assert(p->verity == VERITY_SIG);
4147
4148 if (p->dropped)
4149 return 0;
4150
4151 if (PARTITION_EXISTS(p))
4152 return 0;
4153
4154 (void) partition_hint(p, context->node, &hint);
4155
4156 assert_se(hp = p->siblings[VERITY_HASH]);
4157 assert(!hp->dropped);
4158
4159 assert(arg_certificate);
4160
4161 assert_se((whole_fd = fdisk_get_devfd(context->fdisk_context)) >= 0);
4162
4163 r = sign_verity_roothash(&hp->roothash, &sig);
4164 if (r < 0)
4165 return r;
4166
4167 r = x509_fingerprint(arg_certificate, fp);
4168 if (r < 0)
4169 return log_error_errno(r, "Unable to calculate X509 certificate fingerprint: %m");
4170
4171 r = json_build(&v,
4172 JSON_BUILD_OBJECT(
4173 JSON_BUILD_PAIR("rootHash", JSON_BUILD_HEX(hp->roothash.iov_base, hp->roothash.iov_len)),
4174 JSON_BUILD_PAIR(
4175 "certificateFingerprint",
4176 JSON_BUILD_HEX(fp, sizeof(fp))
4177 ),
4178 JSON_BUILD_PAIR("signature", JSON_BUILD_IOVEC_BASE64(&sig))
4179 )
4180 );
4181 if (r < 0)
4182 return log_error_errno(r, "Failed to build verity signature JSON object: %m");
4183
4184 r = json_variant_format(v, 0, &text);
4185 if (r < 0)
4186 return log_error_errno(r, "Failed to format verity signature JSON object: %m");
4187
4188 if (strlen(text)+1 > p->new_size)
4189 return log_error_errno(SYNTHETIC_ERRNO(E2BIG), "Verity signature too long for partition: %m");
4190
4191 r = strgrowpad0(&text, p->new_size);
4192 if (r < 0)
4193 return log_error_errno(r, "Failed to pad string to %s", FORMAT_BYTES(p->new_size));
4194
4195 if (lseek(whole_fd, p->offset, SEEK_SET) < 0)
4196 return log_error_errno(errno, "Failed to seek to partition %s offset: %m", strna(hint));
4197
4198 r = loop_write(whole_fd, text, p->new_size);
4199 if (r < 0)
4200 return log_error_errno(r, "Failed to write verity signature to partition %s: %m", strna(hint));
4201
4202 if (fsync(whole_fd) < 0)
4203 return log_error_errno(errno, "Failed to synchronize partition %s: %m", strna(hint));
4204
4205 return 0;
4206 }
4207
4208 static int context_copy_blocks(Context *context) {
4209 int r;
4210
4211 assert(context);
4212
4213 /* Copy in file systems on the block level */
4214
4215 LIST_FOREACH(partitions, p, context->partitions) {
4216 _cleanup_(partition_target_freep) PartitionTarget *t = NULL;
4217
4218 if (p->copy_blocks_fd < 0)
4219 continue;
4220
4221 if (p->dropped)
4222 continue;
4223
4224 if (PARTITION_EXISTS(p)) /* Never copy over existing partitions */
4225 continue;
4226
4227 if (partition_type_defer(&p->type))
4228 continue;
4229
4230 assert(p->new_size != UINT64_MAX);
4231 assert(p->copy_blocks_size != UINT64_MAX);
4232 assert(p->new_size >= p->copy_blocks_size + (p->encrypt != ENCRYPT_OFF ? LUKS2_METADATA_KEEP_FREE : 0));
4233
4234 r = partition_target_prepare(context, p, p->new_size,
4235 /*need_path=*/ p->encrypt != ENCRYPT_OFF || p->siblings[VERITY_HASH],
4236 &t);
4237 if (r < 0)
4238 return r;
4239
4240 if (p->encrypt != ENCRYPT_OFF && t->loop) {
4241 r = partition_encrypt(context, p, t, /* offline = */ false);
4242 if (r < 0)
4243 return r;
4244 }
4245
4246 log_info("Copying in '%s' (%s) on block level into future partition %" PRIu64 ".",
4247 p->copy_blocks_path, FORMAT_BYTES(p->copy_blocks_size), p->partno);
4248
4249 if (p->copy_blocks_offset != UINT64_MAX && lseek(p->copy_blocks_fd, p->copy_blocks_offset, SEEK_SET) < 0)
4250 return log_error_errno(errno, "Failed to seek to copy blocks offset in %s: %m", p->copy_blocks_path);
4251
4252 r = copy_bytes(p->copy_blocks_fd, partition_target_fd(t), p->copy_blocks_size, COPY_REFLINK);
4253 if (r < 0)
4254 return log_error_errno(r, "Failed to copy in data from '%s': %m", p->copy_blocks_path);
4255
4256 log_info("Copying in of '%s' on block level completed.", p->copy_blocks_path);
4257
4258 if (p->encrypt != ENCRYPT_OFF && !t->loop) {
4259 r = partition_encrypt(context, p, t, /* offline = */ true);
4260 if (r < 0)
4261 return r;
4262 }
4263
4264 r = partition_target_sync(context, p, t);
4265 if (r < 0)
4266 return r;
4267
4268 if (p->siblings[VERITY_HASH] && !partition_type_defer(&p->siblings[VERITY_HASH]->type)) {
4269 r = partition_format_verity_hash(context, p->siblings[VERITY_HASH],
4270 /* node = */ NULL, partition_target_path(t));
4271 if (r < 0)
4272 return r;
4273 }
4274
4275 if (p->siblings[VERITY_SIG] && !partition_type_defer(&p->siblings[VERITY_SIG]->type)) {
4276 r = partition_format_verity_sig(context, p->siblings[VERITY_SIG]);
4277 if (r < 0)
4278 return r;
4279 }
4280 }
4281
4282 return 0;
4283 }
4284
4285 static int add_exclude_path(const char *path, Hashmap **denylist, DenyType type) {
4286 _cleanup_free_ struct stat *st = NULL;
4287 int r;
4288
4289 assert(path);
4290 assert(denylist);
4291
4292 st = new(struct stat, 1);
4293 if (!st)
4294 return log_oom();
4295
4296 r = chase_and_stat(path, arg_copy_source, CHASE_PREFIX_ROOT, NULL, st);
4297 if (r == -ENOENT)
4298 return 0;
4299 if (r < 0)
4300 return log_error_errno(r, "Failed to stat source file '%s/%s': %m", strempty(arg_copy_source), path);
4301
4302 r = hashmap_ensure_put(denylist, &inode_hash_ops, st, INT_TO_PTR(type));
4303 if (r == -EEXIST)
4304 return 0;
4305 if (r < 0)
4306 return log_oom();
4307 if (r > 0)
4308 TAKE_PTR(st);
4309
4310 return 0;
4311 }
4312
4313 static int make_copy_files_denylist(
4314 Context *context,
4315 const Partition *p,
4316 const char *source,
4317 const char *target,
4318 Hashmap **ret) {
4319
4320 _cleanup_hashmap_free_ Hashmap *denylist = NULL;
4321 int r;
4322
4323 assert(context);
4324 assert(p);
4325 assert(source);
4326 assert(target);
4327 assert(ret);
4328
4329 /* Always exclude the top level APIVFS and temporary directories since the contents of these
4330 * directories are almost certainly not intended to end up in an image. */
4331
4332 NULSTR_FOREACH(s, APIVFS_TMP_DIRS_NULSTR) {
4333 r = add_exclude_path(s, &denylist, DENY_CONTENTS);
4334 if (r < 0)
4335 return r;
4336 }
4337
4338 /* Add the user configured excludes. */
4339
4340 STRV_FOREACH(e, p->exclude_files_source) {
4341 r = add_exclude_path(*e, &denylist, endswith(*e, "/") ? DENY_CONTENTS : DENY_INODE);
4342 if (r < 0)
4343 return r;
4344 }
4345
4346 STRV_FOREACH(e, p->exclude_files_target) {
4347 _cleanup_free_ char *path = NULL;
4348
4349 const char *s = path_startswith(*e, target);
4350 if (!s)
4351 continue;
4352
4353 path = path_join(source, s);
4354 if (!path)
4355 return log_oom();
4356
4357 r = add_exclude_path(path, &denylist, endswith(*e, "/") ? DENY_CONTENTS : DENY_INODE);
4358 if (r < 0)
4359 return r;
4360 }
4361
4362 /* If we're populating a root partition, we don't want any files to end up under the APIVFS mount
4363 * points. While we already exclude <source>/proc, users could still do something such as
4364 * "CopyFiles=/abc:/". Now, if /abc has a proc subdirectory with files in it, those will end up in
4365 * the top level proc directory in the root partition, which we want to avoid. To deal with these
4366 * cases, whenever we're populating a root partition and the target of CopyFiles= is the root
4367 * directory of the root partition, we exclude all directories under the source that are named after
4368 * APIVFS directories or named after mount points of other partitions that are also going to be part
4369 * of the image. */
4370
4371 if (p->type.designator == PARTITION_ROOT && empty_or_root(target)) {
4372 LIST_FOREACH(partitions, q, context->partitions) {
4373 if (q->type.designator == PARTITION_ROOT)
4374 continue;
4375
4376 const char *sources = gpt_partition_type_mountpoint_nulstr(q->type);
4377 if (!sources)
4378 continue;
4379
4380 NULSTR_FOREACH(s, sources) {
4381 _cleanup_free_ char *path = NULL;
4382
4383 /* Exclude only the children of partition mount points so that the nested
4384 * partition mount point itself still ends up in the upper partition. */
4385
4386 path = path_join(source, s);
4387 if (!path)
4388 return -ENOMEM;
4389
4390 r = add_exclude_path(path, &denylist, DENY_CONTENTS);
4391 if (r < 0)
4392 return r;
4393 }
4394 }
4395
4396 NULSTR_FOREACH(s, APIVFS_TMP_DIRS_NULSTR) {
4397 _cleanup_free_ char *path = NULL;
4398
4399 path = path_join(source, s);
4400 if (!path)
4401 return -ENOMEM;
4402
4403 r = add_exclude_path(path, &denylist, DENY_CONTENTS);
4404 if (r < 0)
4405 return r;
4406 }
4407 }
4408
4409 *ret = TAKE_PTR(denylist);
4410 return 0;
4411 }
4412
4413 static int add_subvolume_path(const char *path, Set **subvolumes) {
4414 _cleanup_free_ struct stat *st = NULL;
4415 int r;
4416
4417 assert(path);
4418 assert(subvolumes);
4419
4420 st = new(struct stat, 1);
4421 if (!st)
4422 return log_oom();
4423
4424 r = chase_and_stat(path, arg_copy_source, CHASE_PREFIX_ROOT, NULL, st);
4425 if (r == -ENOENT)
4426 return 0;
4427 if (r < 0)
4428 return log_error_errno(r, "Failed to stat source file '%s/%s': %m", strempty(arg_copy_source), path);
4429
4430 r = set_ensure_consume(subvolumes, &inode_hash_ops, TAKE_PTR(st));
4431 if (r < 0)
4432 return log_oom();
4433
4434 return 0;
4435 }
4436
4437 static int make_subvolumes_set(
4438 Context *context,
4439 const Partition *p,
4440 const char *source,
4441 const char *target,
4442 Set **ret) {
4443 _cleanup_set_free_ Set *subvolumes = NULL;
4444 int r;
4445
4446 assert(context);
4447 assert(p);
4448 assert(target);
4449 assert(ret);
4450
4451 STRV_FOREACH(subvolume, p->subvolumes) {
4452 _cleanup_free_ char *path = NULL;
4453
4454 const char *s = path_startswith(*subvolume, target);
4455 if (!s)
4456 continue;
4457
4458 path = path_join(source, s);
4459 if (!path)
4460 return log_oom();
4461
4462 r = add_subvolume_path(path, &subvolumes);
4463 if (r < 0)
4464 return r;
4465 }
4466
4467 *ret = TAKE_PTR(subvolumes);
4468 return 0;
4469 }
4470
4471 static int do_copy_files(Context *context, Partition *p, const char *root) {
4472 int r;
4473
4474 assert(p);
4475 assert(root);
4476
4477 /* copy_tree_at() automatically copies the permissions of source directories to target directories if
4478 * it created them. However, the root directory is created by us, so we have to manually take care
4479 * that it is initialized. We use the first source directory targeting "/" as the metadata source for
4480 * the root directory. */
4481 STRV_FOREACH_PAIR(source, target, p->copy_files) {
4482 _cleanup_close_ int rfd = -EBADF, sfd = -EBADF;
4483
4484 if (!path_equal(*target, "/"))
4485 continue;
4486
4487 rfd = open(root, O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
4488 if (rfd < 0)
4489 return rfd;
4490
4491 sfd = chase_and_open(*source, arg_copy_source, CHASE_PREFIX_ROOT, O_PATH|O_DIRECTORY|O_CLOEXEC|O_NOCTTY, NULL);
4492 if (sfd < 0)
4493 return log_error_errno(sfd, "Failed to open source file '%s%s': %m", strempty(arg_copy_source), *source);
4494
4495 (void) copy_xattr(sfd, NULL, rfd, NULL, COPY_ALL_XATTRS);
4496 (void) copy_access(sfd, rfd);
4497 (void) copy_times(sfd, rfd, 0);
4498
4499 break;
4500 }
4501
4502 STRV_FOREACH_PAIR(source, target, p->copy_files) {
4503 _cleanup_hashmap_free_ Hashmap *denylist = NULL;
4504 _cleanup_set_free_ Set *subvolumes_by_source_inode = NULL;
4505 _cleanup_close_ int sfd = -EBADF, pfd = -EBADF, tfd = -EBADF;
4506
4507 r = make_copy_files_denylist(context, p, *source, *target, &denylist);
4508 if (r < 0)
4509 return r;
4510
4511 r = make_subvolumes_set(context, p, *source, *target, &subvolumes_by_source_inode);
4512 if (r < 0)
4513 return r;
4514
4515 sfd = chase_and_open(*source, arg_copy_source, CHASE_PREFIX_ROOT, O_CLOEXEC|O_NOCTTY, NULL);
4516 if (sfd == -ENOENT) {
4517 log_notice_errno(sfd, "Failed to open source file '%s%s', skipping: %m", strempty(arg_copy_source), *source);
4518 continue;
4519 }
4520 if (sfd < 0)
4521 return log_error_errno(sfd, "Failed to open source file '%s%s': %m", strempty(arg_copy_source), *source);
4522
4523 r = fd_verify_regular(sfd);
4524 if (r < 0) {
4525 if (r != -EISDIR)
4526 return log_error_errno(r, "Failed to check type of source file '%s': %m", *source);
4527
4528 /* We are looking at a directory */
4529 tfd = chase_and_open(*target, root, CHASE_PREFIX_ROOT, O_RDONLY|O_DIRECTORY|O_CLOEXEC, NULL);
4530 if (tfd < 0) {
4531 _cleanup_free_ char *dn = NULL, *fn = NULL;
4532
4533 if (tfd != -ENOENT)
4534 return log_error_errno(tfd, "Failed to open target directory '%s': %m", *target);
4535
4536 r = path_extract_filename(*target, &fn);
4537 if (r < 0)
4538 return log_error_errno(r, "Failed to extract filename from '%s': %m", *target);
4539
4540 r = path_extract_directory(*target, &dn);
4541 if (r < 0)
4542 return log_error_errno(r, "Failed to extract directory from '%s': %m", *target);
4543
4544 r = mkdir_p_root(root, dn, UID_INVALID, GID_INVALID, 0755, p->subvolumes);
4545 if (r < 0)
4546 return log_error_errno(r, "Failed to create parent directory '%s': %m", dn);
4547
4548 pfd = chase_and_open(dn, root, CHASE_PREFIX_ROOT, O_RDONLY|O_DIRECTORY|O_CLOEXEC, NULL);
4549 if (pfd < 0)
4550 return log_error_errno(pfd, "Failed to open parent directory of target: %m");
4551
4552 r = copy_tree_at(
4553 sfd, ".",
4554 pfd, fn,
4555 UID_INVALID, GID_INVALID,
4556 COPY_REFLINK|COPY_HOLES|COPY_MERGE|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS|COPY_ALL_XATTRS|COPY_GRACEFUL_WARN|COPY_TRUNCATE,
4557 denylist, subvolumes_by_source_inode);
4558 } else
4559 r = copy_tree_at(
4560 sfd, ".",
4561 tfd, ".",
4562 UID_INVALID, GID_INVALID,
4563 COPY_REFLINK|COPY_HOLES|COPY_MERGE|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS|COPY_ALL_XATTRS|COPY_GRACEFUL_WARN|COPY_TRUNCATE,
4564 denylist, subvolumes_by_source_inode);
4565 if (r < 0)
4566 return log_error_errno(r, "Failed to copy '%s%s' to '%s%s': %m",
4567 strempty(arg_copy_source), *source, strempty(root), *target);
4568 } else {
4569 _cleanup_free_ char *dn = NULL, *fn = NULL;
4570
4571 /* We are looking at a regular file */
4572
4573 r = path_extract_filename(*target, &fn);
4574 if (r == -EADDRNOTAVAIL || r == O_DIRECTORY)
4575 return log_error_errno(SYNTHETIC_ERRNO(EISDIR),
4576 "Target path '%s' refers to a directory, but source path '%s' refers to regular file, can't copy.", *target, *source);
4577 if (r < 0)
4578 return log_error_errno(r, "Failed to extract filename from '%s': %m", *target);
4579
4580 r = path_extract_directory(*target, &dn);
4581 if (r < 0)
4582 return log_error_errno(r, "Failed to extract directory from '%s': %m", *target);
4583
4584 r = mkdir_p_root(root, dn, UID_INVALID, GID_INVALID, 0755, p->subvolumes);
4585 if (r < 0)
4586 return log_error_errno(r, "Failed to create parent directory: %m");
4587
4588 pfd = chase_and_open(dn, root, CHASE_PREFIX_ROOT, O_RDONLY|O_DIRECTORY|O_CLOEXEC, NULL);
4589 if (pfd < 0)
4590 return log_error_errno(pfd, "Failed to open parent directory of target: %m");
4591
4592 tfd = openat(pfd, fn, O_CREAT|O_EXCL|O_WRONLY|O_CLOEXEC, 0700);
4593 if (tfd < 0)
4594 return log_error_errno(errno, "Failed to create target file '%s': %m", *target);
4595
4596 r = copy_bytes(sfd, tfd, UINT64_MAX, COPY_REFLINK|COPY_HOLES|COPY_SIGINT|COPY_TRUNCATE);
4597 if (r < 0)
4598 return log_error_errno(r, "Failed to copy '%s' to '%s%s': %m", *source, strempty(arg_copy_source), *target);
4599
4600 (void) copy_xattr(sfd, NULL, tfd, NULL, COPY_ALL_XATTRS);
4601 (void) copy_access(sfd, tfd);
4602 (void) copy_times(sfd, tfd, 0);
4603 }
4604 }
4605
4606 return 0;
4607 }
4608
4609 static int do_make_directories(Partition *p, const char *root) {
4610 int r;
4611
4612 assert(p);
4613 assert(root);
4614
4615 STRV_FOREACH(d, p->make_directories) {
4616 r = mkdir_p_root(root, *d, UID_INVALID, GID_INVALID, 0755, p->subvolumes);
4617 if (r < 0)
4618 return log_error_errno(r, "Failed to create directory '%s' in file system: %m", *d);
4619 }
4620
4621 return 0;
4622 }
4623
4624 static bool partition_needs_populate(Partition *p) {
4625 assert(p);
4626 return !strv_isempty(p->copy_files) || !strv_isempty(p->make_directories);
4627 }
4628
4629 static int partition_populate_directory(Context *context, Partition *p, char **ret) {
4630 _cleanup_(rm_rf_physical_and_freep) char *root = NULL;
4631 const char *vt;
4632 int r;
4633
4634 assert(ret);
4635
4636 log_info("Populating %s filesystem.", p->format);
4637
4638 r = var_tmp_dir(&vt);
4639 if (r < 0)
4640 return log_error_errno(r, "Could not determine temporary directory: %m");
4641
4642 r = tempfn_random_child(vt, "repart", &root);
4643 if (r < 0)
4644 return log_error_errno(r, "Failed to generate temporary directory: %m");
4645
4646 r = mkdir(root, 0755);
4647 if (r < 0)
4648 return log_error_errno(errno, "Failed to create temporary directory: %m");
4649
4650 r = do_copy_files(context, p, root);
4651 if (r < 0)
4652 return r;
4653
4654 r = do_make_directories(p, root);
4655 if (r < 0)
4656 return r;
4657
4658 log_info("Successfully populated %s filesystem.", p->format);
4659
4660 *ret = TAKE_PTR(root);
4661 return 0;
4662 }
4663
4664 static int partition_populate_filesystem(Context *context, Partition *p, const char *node) {
4665 int r;
4666
4667 assert(p);
4668 assert(node);
4669
4670 log_info("Populating %s filesystem.", p->format);
4671
4672 /* We copy in a child process, since we have to mount the fs for that, and we don't want that fs to
4673 * appear in the host namespace. Hence we fork a child that has its own file system namespace and
4674 * detached mount propagation. */
4675
4676 r = safe_fork("(sd-copy)", FORK_DEATHSIG|FORK_LOG|FORK_WAIT|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE, NULL);
4677 if (r < 0)
4678 return r;
4679 if (r == 0) {
4680 static const char fs[] = "/run/systemd/mount-root";
4681 /* This is a child process with its own mount namespace and propagation to host turned off */
4682
4683 r = mkdir_p(fs, 0700);
4684 if (r < 0) {
4685 log_error_errno(r, "Failed to create mount point: %m");
4686 _exit(EXIT_FAILURE);
4687 }
4688
4689 if (mount_nofollow_verbose(LOG_ERR, node, fs, p->format, MS_NOATIME|MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL) < 0)
4690 _exit(EXIT_FAILURE);
4691
4692 if (do_copy_files(context, p, fs) < 0)
4693 _exit(EXIT_FAILURE);
4694
4695 if (do_make_directories(p, fs) < 0)
4696 _exit(EXIT_FAILURE);
4697
4698 r = syncfs_path(AT_FDCWD, fs);
4699 if (r < 0) {
4700 log_error_errno(r, "Failed to synchronize written files: %m");
4701 _exit(EXIT_FAILURE);
4702 }
4703
4704 _exit(EXIT_SUCCESS);
4705 }
4706
4707 log_info("Successfully populated %s filesystem.", p->format);
4708 return 0;
4709 }
4710
4711 static int context_mkfs(Context *context) {
4712 int r;
4713
4714 assert(context);
4715
4716 /* Make a file system */
4717
4718 LIST_FOREACH(partitions, p, context->partitions) {
4719 _cleanup_(rm_rf_physical_and_freep) char *root = NULL;
4720 _cleanup_(partition_target_freep) PartitionTarget *t = NULL;
4721 _cleanup_strv_free_ char **extra_mkfs_options = NULL;
4722
4723 if (p->dropped)
4724 continue;
4725
4726 if (PARTITION_EXISTS(p)) /* Never format existing partitions */
4727 continue;
4728
4729 if (!p->format)
4730 continue;
4731
4732 /* Minimized partitions will use the copy blocks logic so let's make sure to skip those here. */
4733 if (p->copy_blocks_fd >= 0)
4734 continue;
4735
4736 if (partition_type_defer(&p->type))
4737 continue;
4738
4739 assert(p->offset != UINT64_MAX);
4740 assert(p->new_size != UINT64_MAX);
4741 assert(p->new_size >= (p->encrypt != ENCRYPT_OFF ? LUKS2_METADATA_KEEP_FREE : 0));
4742
4743 /* If we're doing encryption, we make sure we keep free space at the end which is required
4744 * for cryptsetup's offline encryption. */
4745 r = partition_target_prepare(context, p,
4746 p->new_size - (p->encrypt != ENCRYPT_OFF ? LUKS2_METADATA_KEEP_FREE : 0),
4747 /*need_path=*/ true,
4748 &t);
4749 if (r < 0)
4750 return r;
4751
4752 if (p->encrypt != ENCRYPT_OFF && t->loop) {
4753 r = partition_target_grow(t, p->new_size);
4754 if (r < 0)
4755 return r;
4756
4757 r = partition_encrypt(context, p, t, /* offline = */ false);
4758 if (r < 0)
4759 return log_error_errno(r, "Failed to encrypt device: %m");
4760 }
4761
4762 log_info("Formatting future partition %" PRIu64 ".", p->partno);
4763
4764 /* If we're not writing to a loop device or if we're populating a read-only filesystem, we
4765 * have to populate using the filesystem's mkfs's --root (or equivalent) option. To do that,
4766 * we need to set up the final directory tree beforehand. */
4767
4768 if (partition_needs_populate(p) && (!t->loop || fstype_is_ro(p->format))) {
4769 if (!mkfs_supports_root_option(p->format))
4770 return log_error_errno(SYNTHETIC_ERRNO(ENODEV),
4771 "Loop device access is required to populate %s filesystems.",
4772 p->format);
4773
4774 r = partition_populate_directory(context, p, &root);
4775 if (r < 0)
4776 return r;
4777 }
4778
4779 r = mkfs_options_from_env("REPART", p->format, &extra_mkfs_options);
4780 if (r < 0)
4781 return log_error_errno(r,
4782 "Failed to determine mkfs command line options for '%s': %m",
4783 p->format);
4784
4785 r = make_filesystem(partition_target_path(t), p->format, strempty(p->new_label), root,
4786 p->fs_uuid, arg_discard, /* quiet = */ false,
4787 context->fs_sector_size, extra_mkfs_options);
4788 if (r < 0)
4789 return r;
4790
4791 log_info("Successfully formatted future partition %" PRIu64 ".", p->partno);
4792
4793 /* If we're writing to a loop device, we can now mount the empty filesystem and populate it. */
4794 if (partition_needs_populate(p) && !root) {
4795 assert(t->loop);
4796
4797 r = partition_populate_filesystem(context, p, partition_target_path(t));
4798 if (r < 0)
4799 return r;
4800 }
4801
4802 if (p->encrypt != ENCRYPT_OFF && !t->loop) {
4803 r = partition_target_grow(t, p->new_size);
4804 if (r < 0)
4805 return r;
4806
4807 r = partition_encrypt(context, p, t, /* offline = */ true);
4808 if (r < 0)
4809 return log_error_errno(r, "Failed to encrypt device: %m");
4810 }
4811
4812 /* Note that we always sync explicitly here, since mkfs.fat doesn't do that on its own, and
4813 * if we don't sync before detaching a block device the in-flight sectors possibly won't hit
4814 * the disk. */
4815
4816 r = partition_target_sync(context, p, t);
4817 if (r < 0)
4818 return r;
4819
4820 if (p->siblings[VERITY_HASH] && !partition_type_defer(&p->siblings[VERITY_HASH]->type)) {
4821 r = partition_format_verity_hash(context, p->siblings[VERITY_HASH],
4822 /* node = */ NULL, partition_target_path(t));
4823 if (r < 0)
4824 return r;
4825 }
4826
4827 if (p->siblings[VERITY_SIG] && !partition_type_defer(&p->siblings[VERITY_SIG]->type)) {
4828 r = partition_format_verity_sig(context, p->siblings[VERITY_SIG]);
4829 if (r < 0)
4830 return r;
4831 }
4832 }
4833
4834 return 0;
4835 }
4836
4837 static int parse_x509_certificate(const char *certificate, size_t certificate_size, X509 **ret) {
4838 #if HAVE_OPENSSL
4839 _cleanup_(X509_freep) X509 *cert = NULL;
4840 _cleanup_(BIO_freep) BIO *cb = NULL;
4841
4842 assert(certificate);
4843 assert(certificate_size > 0);
4844 assert(ret);
4845
4846 cb = BIO_new_mem_buf(certificate, certificate_size);
4847 if (!cb)
4848 return log_oom();
4849
4850 cert = PEM_read_bio_X509(cb, NULL, NULL, NULL);
4851 if (!cert)
4852 return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Failed to parse X.509 certificate: %s",
4853 ERR_error_string(ERR_get_error(), NULL));
4854
4855 if (ret)
4856 *ret = TAKE_PTR(cert);
4857
4858 return 0;
4859 #else
4860 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "openssl is not supported, cannot parse X509 certificate.");
4861 #endif
4862 }
4863
4864 static int parse_private_key(const char *key, size_t key_size, EVP_PKEY **ret) {
4865 #if HAVE_OPENSSL
4866 _cleanup_(BIO_freep) BIO *kb = NULL;
4867 _cleanup_(EVP_PKEY_freep) EVP_PKEY *pk = NULL;
4868
4869 assert(key);
4870 assert(key_size > 0);
4871 assert(ret);
4872
4873 kb = BIO_new_mem_buf(key, key_size);
4874 if (!kb)
4875 return log_oom();
4876
4877 pk = PEM_read_bio_PrivateKey(kb, NULL, NULL, NULL);
4878 if (!pk)
4879 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to parse PEM private key: %s",
4880 ERR_error_string(ERR_get_error(), NULL));
4881
4882 if (ret)
4883 *ret = TAKE_PTR(pk);
4884
4885 return 0;
4886 #else
4887 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "openssl is not supported, cannot parse private key.");
4888 #endif
4889 }
4890
4891 static int partition_acquire_uuid(Context *context, Partition *p, sd_id128_t *ret) {
4892 struct {
4893 sd_id128_t type_uuid;
4894 uint64_t counter;
4895 } _packed_ plaintext = {};
4896 union {
4897 uint8_t md[SHA256_DIGEST_SIZE];
4898 sd_id128_t id;
4899 } result;
4900
4901 uint64_t k = 0;
4902 int r;
4903
4904 assert(context);
4905 assert(p);
4906 assert(ret);
4907
4908 /* Calculate a good UUID for the indicated partition. We want a certain degree of reproducibility,
4909 * hence we won't generate the UUIDs randomly. Instead we use a cryptographic hash (precisely:
4910 * HMAC-SHA256) to derive them from a single seed. The seed is generally the machine ID of the
4911 * installation we are processing, but if random behaviour is desired can be random, too. We use the
4912 * seed value as key for the HMAC (since the machine ID is something we generally don't want to leak)
4913 * and the partition type as plaintext. The partition type is suffixed with a counter (only for the
4914 * second and later partition of the same type) if we have more than one partition of the same
4915 * time. Or in other words:
4916 *
4917 * With:
4918 * SEED := /etc/machine-id
4919 *
4920 * If first partition instance of type TYPE_UUID:
4921 * PARTITION_UUID := HMAC-SHA256(SEED, TYPE_UUID)
4922 *
4923 * For all later partition instances of type TYPE_UUID with INSTANCE being the LE64 encoded instance number:
4924 * PARTITION_UUID := HMAC-SHA256(SEED, TYPE_UUID || INSTANCE)
4925 */
4926
4927 LIST_FOREACH(partitions, q, context->partitions) {
4928 if (p == q)
4929 break;
4930
4931 if (!sd_id128_equal(p->type.uuid, q->type.uuid))
4932 continue;
4933
4934 k++;
4935 }
4936
4937 plaintext.type_uuid = p->type.uuid;
4938 plaintext.counter = htole64(k);
4939
4940 hmac_sha256(context->seed.bytes, sizeof(context->seed.bytes),
4941 &plaintext,
4942 k == 0 ? sizeof(sd_id128_t) : sizeof(plaintext),
4943 result.md);
4944
4945 /* Take the first half, mark it as v4 UUID */
4946 assert_cc(sizeof(result.md) == sizeof(result.id) * 2);
4947 result.id = id128_make_v4_uuid(result.id);
4948
4949 /* Ensure this partition UUID is actually unique, and there's no remaining partition from an earlier run? */
4950 LIST_FOREACH(partitions, q, context->partitions) {
4951 if (p == q)
4952 continue;
4953
4954 if (sd_id128_in_set(result.id, q->current_uuid, q->new_uuid)) {
4955 log_warning("Partition UUID calculated from seed for partition %" PRIu64 " already used, reverting to randomized UUID.", p->partno);
4956
4957 r = sd_id128_randomize(&result.id);
4958 if (r < 0)
4959 return log_error_errno(r, "Failed to generate randomized UUID: %m");
4960
4961 break;
4962 }
4963 }
4964
4965 *ret = result.id;
4966 return 0;
4967 }
4968
4969 static int partition_acquire_label(Context *context, Partition *p, char **ret) {
4970 _cleanup_free_ char *label = NULL;
4971 const char *prefix;
4972 unsigned k = 1;
4973
4974 assert(context);
4975 assert(p);
4976 assert(ret);
4977
4978 prefix = gpt_partition_type_uuid_to_string(p->type.uuid);
4979 if (!prefix)
4980 prefix = "linux";
4981
4982 for (;;) {
4983 const char *ll = label ?: prefix;
4984 bool retry = false;
4985
4986 LIST_FOREACH(partitions, q, context->partitions) {
4987 if (p == q)
4988 break;
4989
4990 if (streq_ptr(ll, q->current_label) ||
4991 streq_ptr(ll, q->new_label)) {
4992 retry = true;
4993 break;
4994 }
4995 }
4996
4997 if (!retry)
4998 break;
4999
5000 label = mfree(label);
5001 if (asprintf(&label, "%s-%u", prefix, ++k) < 0)
5002 return log_oom();
5003 }
5004
5005 if (!label) {
5006 label = strdup(prefix);
5007 if (!label)
5008 return log_oom();
5009 }
5010
5011 *ret = TAKE_PTR(label);
5012 return 0;
5013 }
5014
5015 static int context_acquire_partition_uuids_and_labels(Context *context) {
5016 int r;
5017
5018 assert(context);
5019
5020 LIST_FOREACH(partitions, p, context->partitions) {
5021 sd_id128_t uuid;
5022
5023 /* Never touch foreign partitions */
5024 if (PARTITION_IS_FOREIGN(p)) {
5025 p->new_uuid = p->current_uuid;
5026
5027 if (p->current_label) {
5028 r = free_and_strdup_warn(&p->new_label, strempty(p->current_label));
5029 if (r < 0)
5030 return r;
5031 }
5032
5033 continue;
5034 }
5035
5036 if (!sd_id128_is_null(p->current_uuid))
5037 p->new_uuid = uuid = p->current_uuid; /* Never change initialized UUIDs */
5038 else if (p->new_uuid_is_set)
5039 uuid = p->new_uuid;
5040 else {
5041 /* Not explicitly set by user! */
5042 r = partition_acquire_uuid(context, p, &uuid);
5043 if (r < 0)
5044 return r;
5045
5046 /* The final verity hash/data UUIDs can only be determined after formatting the
5047 * verity hash partition. However, we still want to use the generated partition UUID
5048 * to derive other UUIDs to keep things unique and reproducible, so we always
5049 * generate a UUID if none is set, but we only use it as the actual partition UUID if
5050 * verity is not configured. */
5051 if (!IN_SET(p->verity, VERITY_DATA, VERITY_HASH)) {
5052 p->new_uuid = uuid;
5053 p->new_uuid_is_set = true;
5054 }
5055 }
5056
5057 /* Calculate the UUID for the file system as HMAC-SHA256 of the string "file-system-uuid",
5058 * keyed off the partition UUID. */
5059 r = derive_uuid(uuid, "file-system-uuid", &p->fs_uuid);
5060 if (r < 0)
5061 return r;
5062
5063 if (p->encrypt != ENCRYPT_OFF) {
5064 r = derive_uuid(uuid, "luks-uuid", &p->luks_uuid);
5065 if (r < 0)
5066 return r;
5067 }
5068
5069 /* Derive the verity salt and verity superblock UUID from the seed to keep them reproducible */
5070 if (p->verity == VERITY_HASH) {
5071 derive_salt(context->seed, "verity-salt", p->verity_salt);
5072
5073 r = derive_uuid(context->seed, "verity-uuid", &p->verity_uuid);
5074 if (r < 0)
5075 return log_error_errno(r, "Failed to acquire verity uuid: %m");
5076 }
5077
5078 if (!isempty(p->current_label)) {
5079 /* never change initialized labels */
5080 r = free_and_strdup_warn(&p->new_label, p->current_label);
5081 if (r < 0)
5082 return r;
5083 } else if (!p->new_label) {
5084 /* Not explicitly set by user! */
5085
5086 r = partition_acquire_label(context, p, &p->new_label);
5087 if (r < 0)
5088 return r;
5089 }
5090 }
5091
5092 return 0;
5093 }
5094
5095 static int set_gpt_flags(struct fdisk_partition *q, uint64_t flags) {
5096 _cleanup_free_ char *a = NULL;
5097
5098 for (unsigned i = 0; i < sizeof(flags) * 8; i++) {
5099 uint64_t bit = UINT64_C(1) << i;
5100 char buf[DECIMAL_STR_MAX(unsigned)+1];
5101
5102 if (!FLAGS_SET(flags, bit))
5103 continue;
5104
5105 xsprintf(buf, "%u", i);
5106 if (!strextend_with_separator(&a, ",", buf))
5107 return -ENOMEM;
5108 }
5109
5110 return fdisk_partition_set_attrs(q, a);
5111 }
5112
5113 static uint64_t partition_merge_flags(Partition *p) {
5114 uint64_t f;
5115
5116 assert(p);
5117
5118 f = p->gpt_flags;
5119
5120 if (p->no_auto >= 0) {
5121 if (gpt_partition_type_knows_no_auto(p->type))
5122 SET_FLAG(f, SD_GPT_FLAG_NO_AUTO, p->no_auto);
5123 else {
5124 char buffer[SD_ID128_UUID_STRING_MAX];
5125 log_warning("Configured NoAuto=%s for partition type '%s' that doesn't support it, ignoring.",
5126 yes_no(p->no_auto),
5127 gpt_partition_type_uuid_to_string_harder(p->type.uuid, buffer));
5128 }
5129 }
5130
5131 if (p->read_only >= 0) {
5132 if (gpt_partition_type_knows_read_only(p->type))
5133 SET_FLAG(f, SD_GPT_FLAG_READ_ONLY, p->read_only);
5134 else {
5135 char buffer[SD_ID128_UUID_STRING_MAX];
5136 log_warning("Configured ReadOnly=%s for partition type '%s' that doesn't support it, ignoring.",
5137 yes_no(p->read_only),
5138 gpt_partition_type_uuid_to_string_harder(p->type.uuid, buffer));
5139 }
5140 }
5141
5142 if (p->growfs >= 0) {
5143 if (gpt_partition_type_knows_growfs(p->type))
5144 SET_FLAG(f, SD_GPT_FLAG_GROWFS, p->growfs);
5145 else {
5146 char buffer[SD_ID128_UUID_STRING_MAX];
5147 log_warning("Configured GrowFileSystem=%s for partition type '%s' that doesn't support it, ignoring.",
5148 yes_no(p->growfs),
5149 gpt_partition_type_uuid_to_string_harder(p->type.uuid, buffer));
5150 }
5151 }
5152
5153 return f;
5154 }
5155
5156 static int context_mangle_partitions(Context *context) {
5157 int r;
5158
5159 assert(context);
5160
5161 LIST_FOREACH(partitions, p, context->partitions) {
5162 if (p->dropped)
5163 continue;
5164
5165 if (partition_type_defer(&p->type))
5166 continue;
5167
5168 assert(p->new_size != UINT64_MAX);
5169 assert(p->offset != UINT64_MAX);
5170 assert(p->partno != UINT64_MAX);
5171
5172 if (PARTITION_EXISTS(p)) {
5173 bool changed = false;
5174
5175 assert(p->current_partition);
5176
5177 if (p->new_size != p->current_size) {
5178 assert(p->new_size >= p->current_size);
5179 assert(p->new_size % context->sector_size == 0);
5180
5181 r = fdisk_partition_size_explicit(p->current_partition, true);
5182 if (r < 0)
5183 return log_error_errno(r, "Failed to enable explicit sizing: %m");
5184
5185 r = fdisk_partition_set_size(p->current_partition, p->new_size / context->sector_size);
5186 if (r < 0)
5187 return log_error_errno(r, "Failed to grow partition: %m");
5188
5189 log_info("Growing existing partition %" PRIu64 ".", p->partno);
5190 changed = true;
5191 }
5192
5193 if (!sd_id128_equal(p->new_uuid, p->current_uuid)) {
5194 r = fdisk_partition_set_uuid(p->current_partition, SD_ID128_TO_UUID_STRING(p->new_uuid));
5195 if (r < 0)
5196 return log_error_errno(r, "Failed to set partition UUID: %m");
5197
5198 log_info("Initializing UUID of existing partition %" PRIu64 ".", p->partno);
5199 changed = true;
5200 }
5201
5202 if (!streq_ptr(p->new_label, p->current_label)) {
5203 r = fdisk_partition_set_name(p->current_partition, strempty(p->new_label));
5204 if (r < 0)
5205 return log_error_errno(r, "Failed to set partition label: %m");
5206
5207 log_info("Setting partition label of existing partition %" PRIu64 ".", p->partno);
5208 changed = true;
5209 }
5210
5211 if (changed) {
5212 assert(!PARTITION_IS_FOREIGN(p)); /* never touch foreign partitions */
5213
5214 r = fdisk_set_partition(context->fdisk_context, p->partno, p->current_partition);
5215 if (r < 0)
5216 return log_error_errno(r, "Failed to update partition: %m");
5217 }
5218 } else {
5219 _cleanup_(fdisk_unref_partitionp) struct fdisk_partition *q = NULL;
5220 _cleanup_(fdisk_unref_parttypep) struct fdisk_parttype *t = NULL;
5221
5222 assert(!p->new_partition);
5223 assert(p->offset % context->sector_size == 0);
5224 assert(p->new_size % context->sector_size == 0);
5225 assert(p->new_label);
5226
5227 t = fdisk_new_parttype();
5228 if (!t)
5229 return log_oom();
5230
5231 r = fdisk_parttype_set_typestr(t, SD_ID128_TO_UUID_STRING(p->type.uuid));
5232 if (r < 0)
5233 return log_error_errno(r, "Failed to initialize partition type: %m");
5234
5235 q = fdisk_new_partition();
5236 if (!q)
5237 return log_oom();
5238
5239 r = fdisk_partition_set_type(q, t);
5240 if (r < 0)
5241 return log_error_errno(r, "Failed to set partition type: %m");
5242
5243 r = fdisk_partition_size_explicit(q, true);
5244 if (r < 0)
5245 return log_error_errno(r, "Failed to enable explicit sizing: %m");
5246
5247 r = fdisk_partition_set_start(q, p->offset / context->sector_size);
5248 if (r < 0)
5249 return log_error_errno(r, "Failed to position partition: %m");
5250
5251 r = fdisk_partition_set_size(q, p->new_size / context->sector_size);
5252 if (r < 0)
5253 return log_error_errno(r, "Failed to grow partition: %m");
5254
5255 r = fdisk_partition_set_partno(q, p->partno);
5256 if (r < 0)
5257 return log_error_errno(r, "Failed to set partition number: %m");
5258
5259 r = fdisk_partition_set_uuid(q, SD_ID128_TO_UUID_STRING(p->new_uuid));
5260 if (r < 0)
5261 return log_error_errno(r, "Failed to set partition UUID: %m");
5262
5263 r = fdisk_partition_set_name(q, strempty(p->new_label));
5264 if (r < 0)
5265 return log_error_errno(r, "Failed to set partition label: %m");
5266
5267 /* Merge the no auto + read only + growfs setting with the literal flags, and set them for the partition */
5268 r = set_gpt_flags(q, partition_merge_flags(p));
5269 if (r < 0)
5270 return log_error_errno(r, "Failed to set GPT partition flags: %m");
5271
5272 log_info("Adding new partition %" PRIu64 " to partition table.", p->partno);
5273
5274 r = fdisk_add_partition(context->fdisk_context, q, NULL);
5275 if (r < 0)
5276 return log_error_errno(r, "Failed to add partition: %m");
5277
5278 assert(!p->new_partition);
5279 p->new_partition = TAKE_PTR(q);
5280 }
5281 }
5282
5283 return 0;
5284 }
5285
5286 static int split_name_printf(Partition *p, char **ret) {
5287 assert(p);
5288
5289 const Specifier table[] = {
5290 { 't', specifier_string, GPT_PARTITION_TYPE_UUID_TO_STRING_HARDER(p->type.uuid) },
5291 { 'T', specifier_id128, &p->type.uuid },
5292 { 'U', specifier_id128, &p->new_uuid },
5293 { 'n', specifier_uint64, &p->partno },
5294
5295 COMMON_SYSTEM_SPECIFIERS,
5296 {}
5297 };
5298
5299 return specifier_printf(p->split_name_format, NAME_MAX, table, arg_root, p, ret);
5300 }
5301
5302 static int split_node(const char *node, char **ret_base, char **ret_ext) {
5303 _cleanup_free_ char *base = NULL, *ext = NULL;
5304 char *e;
5305 int r;
5306
5307 assert(node);
5308 assert(ret_base);
5309 assert(ret_ext);
5310
5311 r = path_extract_filename(node, &base);
5312 if (r == O_DIRECTORY || r == -EADDRNOTAVAIL)
5313 return log_error_errno(r, "Device node %s cannot be a directory", node);
5314 if (r < 0)
5315 return log_error_errno(r, "Failed to extract filename from %s: %m", node);
5316
5317 e = endswith(base, ".raw");
5318 if (e) {
5319 ext = strdup(e);
5320 if (!ext)
5321 return log_oom();
5322
5323 *e = 0;
5324 }
5325
5326 *ret_base = TAKE_PTR(base);
5327 *ret_ext = TAKE_PTR(ext);
5328
5329 return 0;
5330 }
5331
5332 static int split_name_resolve(Context *context) {
5333 _cleanup_free_ char *parent = NULL, *base = NULL, *ext = NULL;
5334 int r;
5335
5336 assert(context);
5337
5338 r = path_extract_directory(context->node, &parent);
5339 if (r < 0 && r != -EDESTADDRREQ)
5340 return log_error_errno(r, "Failed to extract directory from %s: %m", context->node);
5341
5342 r = split_node(context->node, &base, &ext);
5343 if (r < 0)
5344 return r;
5345
5346 LIST_FOREACH(partitions, p, context->partitions) {
5347 _cleanup_free_ char *resolved = NULL;
5348
5349 if (p->dropped)
5350 continue;
5351
5352 if (!p->split_name_format)
5353 continue;
5354
5355 r = split_name_printf(p, &resolved);
5356 if (r < 0)
5357 return log_error_errno(r, "Failed to resolve specifiers in %s: %m", p->split_name_format);
5358
5359 if (parent)
5360 p->split_path = strjoin(parent, "/", base, ".", resolved, ext);
5361 else
5362 p->split_path = strjoin(base, ".", resolved, ext);
5363 if (!p->split_path)
5364 return log_oom();
5365 }
5366
5367 LIST_FOREACH(partitions, p, context->partitions) {
5368 if (!p->split_path)
5369 continue;
5370
5371 LIST_FOREACH(partitions, q, context->partitions) {
5372 if (p == q)
5373 continue;
5374
5375 if (!q->split_path)
5376 continue;
5377
5378 if (!streq(p->split_path, q->split_path))
5379 continue;
5380
5381 return log_error_errno(SYNTHETIC_ERRNO(ENOTUNIQ),
5382 "%s and %s have the same resolved split name \"%s\", refusing",
5383 p->definition_path, q->definition_path, p->split_path);
5384 }
5385 }
5386
5387 return 0;
5388 }
5389
5390 static int context_split(Context *context) {
5391 int fd = -EBADF, r;
5392
5393 if (!arg_split)
5394 return 0;
5395
5396 assert(context);
5397
5398 /* We can't do resolution earlier because the partition UUIDs for verity partitions are only filled
5399 * in after they've been generated. */
5400
5401 r = split_name_resolve(context);
5402 if (r < 0)
5403 return r;
5404
5405 LIST_FOREACH(partitions, p, context->partitions) {
5406 _cleanup_close_ int fdt = -EBADF;
5407
5408 if (p->dropped)
5409 continue;
5410
5411 if (!p->split_path)
5412 continue;
5413
5414 if (partition_type_defer(&p->type))
5415 continue;
5416
5417 fdt = open(p->split_path, O_WRONLY|O_NOCTTY|O_CLOEXEC|O_NOFOLLOW|O_CREAT|O_EXCL, 0666);
5418 if (fdt < 0)
5419 return log_error_errno(fdt, "Failed to open split partition file %s: %m", p->split_path);
5420
5421 if (fd < 0)
5422 assert_se((fd = fdisk_get_devfd(context->fdisk_context)) >= 0);
5423
5424 if (lseek(fd, p->offset, SEEK_SET) < 0)
5425 return log_error_errno(errno, "Failed to seek to partition offset: %m");
5426
5427 r = copy_bytes(fd, fdt, p->new_size, COPY_REFLINK|COPY_HOLES|COPY_TRUNCATE);
5428 if (r < 0)
5429 return log_error_errno(r, "Failed to copy to split partition %s: %m", p->split_path);
5430 }
5431
5432 return 0;
5433 }
5434
5435 static int context_write_partition_table(Context *context) {
5436 _cleanup_(fdisk_unref_tablep) struct fdisk_table *original_table = NULL;
5437 int capable, r;
5438
5439 assert(context);
5440
5441 if (!context->from_scratch && !context_changed(context)) {
5442 log_info("No changes.");
5443 return 0;
5444 }
5445
5446 if (arg_dry_run) {
5447 log_notice("Refusing to repartition, please re-run with --dry-run=no.");
5448 return 0;
5449 }
5450
5451 log_info("Applying changes.");
5452
5453 if (context->from_scratch && arg_empty != EMPTY_CREATE) {
5454 /* Erase everything if we operate from scratch, except if the image was just created anyway, and thus is definitely empty. */
5455 r = context_wipe_range(context, 0, context->total);
5456 if (r < 0)
5457 return r;
5458
5459 log_info("Wiped block device.");
5460
5461 if (arg_discard) {
5462 r = context_discard_range(context, 0, context->total);
5463 if (r == -EOPNOTSUPP)
5464 log_info("Storage does not support discard, not discarding entire block device data.");
5465 else if (r < 0)
5466 return log_error_errno(r, "Failed to discard entire block device: %m");
5467 else if (r > 0)
5468 log_info("Discarded entire block device.");
5469 }
5470 }
5471
5472 r = fdisk_get_partitions(context->fdisk_context, &original_table);
5473 if (r < 0)
5474 return log_error_errno(r, "Failed to acquire partition table: %m");
5475
5476 /* Wipe fs signatures and discard sectors where the new partitions are going to be placed and in the
5477 * gaps between partitions, just to be sure. */
5478 r = context_wipe_and_discard(context);
5479 if (r < 0)
5480 return r;
5481
5482 r = context_copy_blocks(context);
5483 if (r < 0)
5484 return r;
5485
5486 r = context_mkfs(context);
5487 if (r < 0)
5488 return r;
5489
5490 r = context_mangle_partitions(context);
5491 if (r < 0)
5492 return r;
5493
5494 log_info("Writing new partition table.");
5495
5496 r = fdisk_write_disklabel(context->fdisk_context);
5497 if (r < 0)
5498 return log_error_errno(r, "Failed to write partition table: %m");
5499
5500 capable = blockdev_partscan_enabled(fdisk_get_devfd(context->fdisk_context));
5501 if (capable == -ENOTBLK)
5502 log_debug("Not telling kernel to reread partition table, since we are not operating on a block device.");
5503 else if (capable < 0)
5504 return log_error_errno(capable, "Failed to check if block device supports partition scanning: %m");
5505 else if (capable > 0) {
5506 log_info("Telling kernel to reread partition table.");
5507
5508 if (context->from_scratch)
5509 r = fdisk_reread_partition_table(context->fdisk_context);
5510 else
5511 r = fdisk_reread_changes(context->fdisk_context, original_table);
5512 if (r < 0)
5513 return log_error_errno(r, "Failed to reread partition table: %m");
5514 } else
5515 log_notice("Not telling kernel to reread partition table, because selected image does not support kernel partition block devices.");
5516
5517 log_info("All done.");
5518
5519 return 0;
5520 }
5521
5522 static int context_read_seed(Context *context, const char *root) {
5523 int r;
5524
5525 assert(context);
5526
5527 if (!sd_id128_is_null(context->seed))
5528 return 0;
5529
5530 if (!arg_randomize) {
5531 r = id128_get_machine(root, &context->seed);
5532 if (r >= 0)
5533 return 0;
5534
5535 if (!ERRNO_IS_MACHINE_ID_UNSET(r))
5536 return log_error_errno(r, "Failed to parse machine ID of image: %m");
5537
5538 log_info("No machine ID set, using randomized partition UUIDs.");
5539 }
5540
5541 r = sd_id128_randomize(&context->seed);
5542 if (r < 0)
5543 return log_error_errno(r, "Failed to generate randomized seed: %m");
5544
5545 return 0;
5546 }
5547
5548 static int context_factory_reset(Context *context) {
5549 size_t n = 0;
5550 int r;
5551
5552 assert(context);
5553
5554 if (arg_factory_reset <= 0)
5555 return 0;
5556
5557 if (context->from_scratch) /* Nothing to reset if we start from scratch */
5558 return 0;
5559
5560 if (arg_dry_run) {
5561 log_notice("Refusing to factory reset, please re-run with --dry-run=no.");
5562 return 0;
5563 }
5564
5565 log_info("Applying factory reset.");
5566
5567 LIST_FOREACH(partitions, p, context->partitions) {
5568
5569 if (!p->factory_reset || !PARTITION_EXISTS(p))
5570 continue;
5571
5572 assert(p->partno != UINT64_MAX);
5573
5574 log_info("Removing partition %" PRIu64 " for factory reset.", p->partno);
5575
5576 r = fdisk_delete_partition(context->fdisk_context, p->partno);
5577 if (r < 0)
5578 return log_error_errno(r, "Failed to remove partition %" PRIu64 ": %m", p->partno);
5579
5580 n++;
5581 }
5582
5583 if (n == 0) {
5584 log_info("Factory reset requested, but no partitions to delete found.");
5585 return 0;
5586 }
5587
5588 r = fdisk_write_disklabel(context->fdisk_context);
5589 if (r < 0)
5590 return log_error_errno(r, "Failed to write disk label: %m");
5591
5592 log_info("Successfully deleted %zu partitions.", n);
5593 return 1;
5594 }
5595
5596 static int context_can_factory_reset(Context *context) {
5597 assert(context);
5598
5599 LIST_FOREACH(partitions, p, context->partitions)
5600 if (p->factory_reset && PARTITION_EXISTS(p))
5601 return true;
5602
5603 return false;
5604 }
5605
5606 static int resolve_copy_blocks_auto_candidate(
5607 dev_t partition_devno,
5608 GptPartitionType partition_type,
5609 dev_t restrict_devno,
5610 sd_id128_t *ret_uuid) {
5611
5612 _cleanup_(blkid_free_probep) blkid_probe b = NULL;
5613 _cleanup_close_ int fd = -EBADF;
5614 _cleanup_free_ char *p = NULL;
5615 const char *pttype, *t;
5616 sd_id128_t pt_parsed, u;
5617 blkid_partition pp;
5618 dev_t whole_devno;
5619 blkid_partlist pl;
5620 int r;
5621
5622 /* Checks if the specified partition has the specified GPT type UUID, and is located on the specified
5623 * 'restrict_devno' device. The type check is particularly relevant if we have Verity volume which is
5624 * backed by two separate partitions: the data and the hash partitions, and we need to find the right
5625 * one of the two. */
5626
5627 r = block_get_whole_disk(partition_devno, &whole_devno);
5628 if (r < 0)
5629 return log_error_errno(
5630 r,
5631 "Unable to determine containing block device of partition %u:%u: %m",
5632 major(partition_devno), minor(partition_devno));
5633
5634 if (restrict_devno != (dev_t) -1 &&
5635 restrict_devno != whole_devno)
5636 return log_error_errno(
5637 SYNTHETIC_ERRNO(EPERM),
5638 "Partition %u:%u is located outside of block device %u:%u, refusing.",
5639 major(partition_devno), minor(partition_devno),
5640 major(restrict_devno), minor(restrict_devno));
5641
5642 fd = r = device_open_from_devnum(S_IFBLK, whole_devno, O_RDONLY|O_CLOEXEC|O_NONBLOCK, &p);
5643 if (r < 0)
5644 return log_error_errno(r, "Failed to open block device " DEVNUM_FORMAT_STR ": %m",
5645 DEVNUM_FORMAT_VAL(whole_devno));
5646
5647 b = blkid_new_probe();
5648 if (!b)
5649 return log_oom();
5650
5651 errno = 0;
5652 r = blkid_probe_set_device(b, fd, 0, 0);
5653 if (r != 0)
5654 return log_error_errno(errno_or_else(ENOMEM), "Failed to open block device '%s': %m", p);
5655
5656 (void) blkid_probe_enable_partitions(b, 1);
5657 (void) blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
5658
5659 errno = 0;
5660 r = blkid_do_safeprobe(b);
5661 if (r == _BLKID_SAFEPROBE_ERROR)
5662 return log_error_errno(errno_or_else(EIO), "Unable to probe for partition table of '%s': %m", p);
5663 if (IN_SET(r, _BLKID_SAFEPROBE_AMBIGUOUS, _BLKID_SAFEPROBE_NOT_FOUND)) {
5664 log_debug("Didn't find partition table on block device '%s'.", p);
5665 return false;
5666 }
5667
5668 assert(r == _BLKID_SAFEPROBE_FOUND);
5669
5670 (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
5671 if (!streq_ptr(pttype, "gpt")) {
5672 log_debug("Didn't find a GPT partition table on '%s'.", p);
5673 return false;
5674 }
5675
5676 errno = 0;
5677 pl = blkid_probe_get_partitions(b);
5678 if (!pl)
5679 return log_error_errno(errno_or_else(EIO), "Unable read partition table of '%s': %m", p);
5680
5681 pp = blkid_partlist_devno_to_partition(pl, partition_devno);
5682 if (!pp) {
5683 log_debug("Partition %u:%u has no matching partition table entry on '%s'.",
5684 major(partition_devno), minor(partition_devno), p);
5685 return false;
5686 }
5687
5688 t = blkid_partition_get_type_string(pp);
5689 if (isempty(t)) {
5690 log_debug("Partition %u:%u has no type on '%s'.",
5691 major(partition_devno), minor(partition_devno), p);
5692 return false;
5693 }
5694
5695 r = sd_id128_from_string(t, &pt_parsed);
5696 if (r < 0) {
5697 log_debug_errno(r, "Failed to parse partition type \"%s\": %m", t);
5698 return false;
5699 }
5700
5701 if (!sd_id128_equal(pt_parsed, partition_type.uuid)) {
5702 log_debug("Partition %u:%u has non-matching partition type " SD_ID128_FORMAT_STR " (needed: " SD_ID128_FORMAT_STR "), ignoring.",
5703 major(partition_devno), minor(partition_devno),
5704 SD_ID128_FORMAT_VAL(pt_parsed), SD_ID128_FORMAT_VAL(partition_type.uuid));
5705 return false;
5706 }
5707
5708 r = blkid_partition_get_uuid_id128(pp, &u);
5709 if (r == -ENXIO) {
5710 log_debug_errno(r, "Partition " DEVNUM_FORMAT_STR " has no UUID.", DEVNUM_FORMAT_VAL(partition_devno));
5711 return false;
5712 }
5713 if (r < 0) {
5714 log_debug_errno(r, "Failed to read partition UUID of " DEVNUM_FORMAT_STR ": %m", DEVNUM_FORMAT_VAL(partition_devno));
5715 return false;
5716 }
5717
5718 log_debug("Automatically found partition " DEVNUM_FORMAT_STR " of right type " SD_ID128_FORMAT_STR ".",
5719 DEVNUM_FORMAT_VAL(partition_devno),
5720 SD_ID128_FORMAT_VAL(pt_parsed));
5721
5722 if (ret_uuid)
5723 *ret_uuid = u;
5724
5725 return true;
5726 }
5727
5728 static int find_backing_devno(
5729 const char *path,
5730 const char *root,
5731 dev_t *ret) {
5732
5733 _cleanup_free_ char *resolved = NULL;
5734 int r;
5735
5736 assert(path);
5737
5738 r = chase(path, root, CHASE_PREFIX_ROOT, &resolved, NULL);
5739 if (r < 0)
5740 return r;
5741
5742 r = path_is_mount_point(resolved, NULL, 0);
5743 if (r < 0)
5744 return r;
5745 if (r == 0) /* Not a mount point, then it's not a partition of its own, let's not automatically use it. */
5746 return -ENOENT;
5747
5748 r = get_block_device(resolved, ret);
5749 if (r < 0)
5750 return r;
5751 if (r == 0) /* Not backed by physical file system, we can't use this */
5752 return -ENOENT;
5753
5754 return 0;
5755 }
5756
5757 static int resolve_copy_blocks_auto(
5758 GptPartitionType type,
5759 const char *root,
5760 dev_t restrict_devno,
5761 dev_t *ret_devno,
5762 sd_id128_t *ret_uuid) {
5763
5764 const char *try1 = NULL, *try2 = NULL;
5765 char p[SYS_BLOCK_PATH_MAX("/slaves")];
5766 _cleanup_closedir_ DIR *d = NULL;
5767 sd_id128_t found_uuid = SD_ID128_NULL;
5768 dev_t devno, found = 0;
5769 int r;
5770
5771 /* Enforce some security restrictions: CopyBlocks=auto should not be an avenue to get outside of the
5772 * --root=/--image= confinement. Specifically, refuse CopyBlocks= in combination with --root= at all,
5773 * and restrict block device references in the --image= case to loopback block device we set up.
5774 *
5775 * restrict_devno contain the dev_t of the loop back device we operate on in case of --image=, and
5776 * thus declares which device (and its partition subdevices) we shall limit access to. If
5777 * restrict_devno is zero no device probing access shall be allowed at all (used for --root=) and if
5778 * it is (dev_t) -1 then free access shall be allowed (if neither switch is used). */
5779
5780 if (restrict_devno == 0)
5781 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
5782 "Automatic discovery of backing block devices not permitted in --root= mode, refusing.");
5783
5784 /* Handles CopyBlocks=auto, and finds the right source partition to copy from. We look for matching
5785 * partitions in the host, using the appropriate directory as key and ensuring that the partition
5786 * type matches. */
5787
5788 if (type.designator == PARTITION_ROOT)
5789 try1 = "/";
5790 else if (type.designator == PARTITION_USR)
5791 try1 = "/usr/";
5792 else if (type.designator == PARTITION_ROOT_VERITY)
5793 try1 = "/";
5794 else if (type.designator == PARTITION_USR_VERITY)
5795 try1 = "/usr/";
5796 else if (type.designator == PARTITION_ESP) {
5797 try1 = "/efi/";
5798 try2 = "/boot/";
5799 } else if (type.designator == PARTITION_XBOOTLDR)
5800 try1 = "/boot/";
5801 else
5802 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
5803 "Partition type " SD_ID128_FORMAT_STR " not supported from automatic source block device discovery.",
5804 SD_ID128_FORMAT_VAL(type.uuid));
5805
5806 r = find_backing_devno(try1, root, &devno);
5807 if (r == -ENOENT && try2)
5808 r = find_backing_devno(try2, root, &devno);
5809 if (r < 0)
5810 return log_error_errno(r, "Failed to resolve automatic CopyBlocks= path for partition type " SD_ID128_FORMAT_STR ", sorry: %m",
5811 SD_ID128_FORMAT_VAL(type.uuid));
5812
5813 xsprintf_sys_block_path(p, "/slaves", devno);
5814 d = opendir(p);
5815 if (d) {
5816 struct dirent *de;
5817
5818 for (;;) {
5819 _cleanup_free_ char *q = NULL, *t = NULL;
5820 sd_id128_t u;
5821 dev_t sl;
5822
5823 errno = 0;
5824 de = readdir_no_dot(d);
5825 if (!de) {
5826 if (errno != 0)
5827 return log_error_errno(errno, "Failed to read directory '%s': %m", p);
5828
5829 break;
5830 }
5831
5832 if (!IN_SET(de->d_type, DT_LNK, DT_UNKNOWN))
5833 continue;
5834
5835 q = path_join(p, de->d_name, "/dev");
5836 if (!q)
5837 return log_oom();
5838
5839 r = read_one_line_file(q, &t);
5840 if (r < 0)
5841 return log_error_errno(r, "Failed to read %s: %m", q);
5842
5843 r = parse_devnum(t, &sl);
5844 if (r < 0) {
5845 log_debug_errno(r, "Failed to parse %s, ignoring: %m", q);
5846 continue;
5847 }
5848 if (major(sl) == 0) {
5849 log_debug("Device backing %s is special, ignoring.", q);
5850 continue;
5851 }
5852
5853 r = resolve_copy_blocks_auto_candidate(sl, type, restrict_devno, &u);
5854 if (r < 0)
5855 return r;
5856 if (r > 0) {
5857 /* We found a matching one! */
5858 if (found != 0)
5859 return log_error_errno(SYNTHETIC_ERRNO(ENOTUNIQ),
5860 "Multiple matching partitions found, refusing.");
5861
5862 found = sl;
5863 found_uuid = u;
5864 }
5865 }
5866 } else if (errno != ENOENT)
5867 return log_error_errno(errno, "Failed open %s: %m", p);
5868 else {
5869 r = resolve_copy_blocks_auto_candidate(devno, type, restrict_devno, &found_uuid);
5870 if (r < 0)
5871 return r;
5872 if (r > 0)
5873 found = devno;
5874 }
5875
5876 if (found == 0)
5877 return log_error_errno(SYNTHETIC_ERRNO(ENXIO),
5878 "Unable to automatically discover suitable partition to copy blocks from.");
5879
5880 if (ret_devno)
5881 *ret_devno = found;
5882
5883 if (ret_uuid)
5884 *ret_uuid = found_uuid;
5885
5886 return 0;
5887 }
5888
5889 static int context_open_copy_block_paths(
5890 Context *context,
5891 dev_t restrict_devno) {
5892
5893 int r;
5894
5895 assert(context);
5896
5897 LIST_FOREACH(partitions, p, context->partitions) {
5898 _cleanup_close_ int source_fd = -EBADF;
5899 _cleanup_free_ char *opened = NULL;
5900 sd_id128_t uuid = SD_ID128_NULL;
5901 uint64_t size;
5902 struct stat st;
5903
5904 if (p->copy_blocks_fd >= 0)
5905 continue;
5906
5907 assert(p->copy_blocks_size == UINT64_MAX);
5908
5909 if (PARTITION_EXISTS(p)) /* Never copy over partitions that already exist! */
5910 continue;
5911
5912 if (p->copy_blocks_path) {
5913
5914 source_fd = chase_and_open(p->copy_blocks_path, p->copy_blocks_root, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_NONBLOCK, &opened);
5915 if (source_fd < 0)
5916 return log_error_errno(source_fd, "Failed to open '%s': %m", p->copy_blocks_path);
5917
5918 if (fstat(source_fd, &st) < 0)
5919 return log_error_errno(errno, "Failed to stat block copy file '%s': %m", opened);
5920
5921 if (!S_ISREG(st.st_mode) && restrict_devno != (dev_t) -1)
5922 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
5923 "Copying from block device node is not permitted in --image=/--root= mode, refusing.");
5924
5925 } else if (p->copy_blocks_auto) {
5926 dev_t devno = 0; /* Fake initialization to appease gcc. */
5927
5928 r = resolve_copy_blocks_auto(p->type, p->copy_blocks_root, restrict_devno, &devno, &uuid);
5929 if (r < 0)
5930 return r;
5931 assert(devno != 0);
5932
5933 source_fd = r = device_open_from_devnum(S_IFBLK, devno, O_RDONLY|O_CLOEXEC|O_NONBLOCK, &opened);
5934 if (r < 0)
5935 return log_error_errno(r, "Failed to open automatically determined source block copy device " DEVNUM_FORMAT_STR ": %m",
5936 DEVNUM_FORMAT_VAL(devno));
5937
5938 if (fstat(source_fd, &st) < 0)
5939 return log_error_errno(errno, "Failed to stat block copy file '%s': %m", opened);
5940 } else
5941 continue;
5942
5943 if (S_ISDIR(st.st_mode)) {
5944 _cleanup_free_ char *bdev = NULL;
5945 dev_t devt;
5946
5947 /* If the file is a directory, automatically find the backing block device */
5948
5949 if (major(st.st_dev) != 0)
5950 devt = st.st_dev;
5951 else {
5952 /* Special support for btrfs */
5953 r = btrfs_get_block_device_fd(source_fd, &devt);
5954 if (r == -EUCLEAN)
5955 return btrfs_log_dev_root(LOG_ERR, r, opened);
5956 if (r < 0)
5957 return log_error_errno(r, "Unable to determine backing block device of '%s': %m", opened);
5958 }
5959
5960 safe_close(source_fd);
5961
5962 source_fd = r = device_open_from_devnum(S_IFBLK, devt, O_RDONLY|O_CLOEXEC|O_NONBLOCK, &bdev);
5963 if (r < 0)
5964 return log_error_errno(r, "Failed to open block device backing '%s': %m", opened);
5965
5966 if (fstat(source_fd, &st) < 0)
5967 return log_error_errno(errno, "Failed to stat block device '%s': %m", bdev);
5968 }
5969
5970 if (S_ISREG(st.st_mode))
5971 size = st.st_size;
5972 else if (S_ISBLK(st.st_mode)) {
5973 if (ioctl(source_fd, BLKGETSIZE64, &size) != 0)
5974 return log_error_errno(errno, "Failed to determine size of block device to copy from: %m");
5975 } else
5976 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Specified path to copy blocks from '%s' is not a regular file, block device or directory, refusing: %m", opened);
5977
5978 if (size <= 0)
5979 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "File to copy bytes from '%s' has zero size, refusing.", opened);
5980 if (size % 512 != 0)
5981 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "File to copy bytes from '%s' has size that is not multiple of 512, refusing.", opened);
5982
5983 p->copy_blocks_fd = TAKE_FD(source_fd);
5984 p->copy_blocks_size = size;
5985
5986 free_and_replace(p->copy_blocks_path, opened);
5987
5988 /* When copying from an existing partition copy that partitions UUID if none is configured explicitly */
5989 if (!p->new_uuid_is_set && !sd_id128_is_null(uuid)) {
5990 p->new_uuid = uuid;
5991 p->new_uuid_is_set = true;
5992 }
5993 }
5994
5995 return 0;
5996 }
5997
5998 static int fd_apparent_size(int fd, uint64_t *ret) {
5999 off_t initial = 0;
6000 uint64_t size = 0;
6001
6002 assert(fd >= 0);
6003 assert(ret);
6004
6005 initial = lseek(fd, 0, SEEK_CUR);
6006 if (initial < 0)
6007 return log_error_errno(errno, "Failed to get file offset: %m");
6008
6009 for (off_t off = 0;;) {
6010 off_t r;
6011
6012 r = lseek(fd, off, SEEK_DATA);
6013 if (r < 0 && errno == ENXIO)
6014 /* If errno == ENXIO, that means we've reached the final hole of the file and
6015 * that hole isn't followed by more data. */
6016 break;
6017 if (r < 0)
6018 return log_error_errno(errno, "Failed to seek data in file from offset %"PRIi64": %m", off);
6019
6020 off = r; /* Set the offset to the start of the data segment. */
6021
6022 /* After copying a potential hole, find the end of the data segment by looking for
6023 * the next hole. If we get ENXIO, we're at EOF. */
6024 r = lseek(fd, off, SEEK_HOLE);
6025 if (r < 0) {
6026 if (errno == ENXIO)
6027 break;
6028 return log_error_errno(errno, "Failed to seek hole in file from offset %"PRIi64": %m", off);
6029 }
6030
6031 size += r - off;
6032 off = r;
6033 }
6034
6035 if (lseek(fd, initial, SEEK_SET) < 0)
6036 return log_error_errno(errno, "Failed to reset file offset: %m");
6037
6038 *ret = size;
6039
6040 return 0;
6041 }
6042
6043 static int context_minimize(Context *context) {
6044 const char *vt = NULL;
6045 int r;
6046
6047 assert(context);
6048
6049 LIST_FOREACH(partitions, p, context->partitions) {
6050 _cleanup_(rm_rf_physical_and_freep) char *root = NULL;
6051 _cleanup_(unlink_and_freep) char *temp = NULL;
6052 _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
6053 _cleanup_strv_free_ char **extra_mkfs_options = NULL;
6054 _cleanup_close_ int fd = -EBADF;
6055 _cleanup_free_ char *hint = NULL;
6056 sd_id128_t fs_uuid;
6057 struct stat st;
6058 uint64_t fsz;
6059
6060 if (p->dropped)
6061 continue;
6062
6063 if (PARTITION_EXISTS(p)) /* Never format existing partitions */
6064 continue;
6065
6066 if (!p->format)
6067 continue;
6068
6069 if (p->copy_blocks_fd >= 0)
6070 continue;
6071
6072 if (p->minimize == MINIMIZE_OFF)
6073 continue;
6074
6075 if (!partition_needs_populate(p))
6076 continue;
6077
6078 assert(!p->copy_blocks_path);
6079
6080 (void) partition_hint(p, context->node, &hint);
6081
6082 log_info("Pre-populating %s filesystem of partition %s twice to calculate minimal partition size",
6083 p->format, strna(hint));
6084
6085 if (!vt) {
6086 r = var_tmp_dir(&vt);
6087 if (r < 0)
6088 return log_error_errno(r, "Could not determine temporary directory: %m");
6089 }
6090
6091 r = tempfn_random_child(vt, "repart", &temp);
6092 if (r < 0)
6093 return log_error_errno(r, "Failed to generate temporary file path: %m");
6094
6095 if (fstype_is_ro(p->format))
6096 fs_uuid = p->fs_uuid;
6097 else {
6098 fd = open(temp, O_CREAT|O_EXCL|O_CLOEXEC|O_RDWR|O_NOCTTY, 0600);
6099 if (fd < 0)
6100 return log_error_errno(errno, "Failed to open temporary file %s: %m", temp);
6101
6102 /* This may seem huge but it will be created sparse so it doesn't take up any space
6103 * on disk until written to. */
6104 if (ftruncate(fd, 1024ULL * 1024ULL * 1024ULL * 1024ULL) < 0)
6105 return log_error_errno(errno, "Failed to truncate temporary file to %s: %m",
6106 FORMAT_BYTES(1024ULL * 1024ULL * 1024ULL * 1024ULL));
6107
6108 if (arg_offline <= 0) {
6109 r = loop_device_make(fd, O_RDWR, 0, UINT64_MAX, context->sector_size, 0, LOCK_EX, &d);
6110 if (r < 0 && (arg_offline == 0 || (r != -ENOENT && !ERRNO_IS_PRIVILEGE(r)) || !strv_isempty(p->subvolumes)))
6111 return log_error_errno(r, "Failed to make loopback device of %s: %m", temp);
6112 }
6113
6114 /* We're going to populate this filesystem twice so use a random UUID the first time
6115 * to avoid UUID conflicts. */
6116 r = sd_id128_randomize(&fs_uuid);
6117 if (r < 0)
6118 return r;
6119 }
6120
6121 if (!d || fstype_is_ro(p->format)) {
6122 if (!mkfs_supports_root_option(p->format))
6123 return log_error_errno(SYNTHETIC_ERRNO(ENODEV),
6124 "Loop device access is required to populate %s filesystems",
6125 p->format);
6126
6127 r = partition_populate_directory(context, p, &root);
6128 if (r < 0)
6129 return r;
6130 }
6131
6132 r = mkfs_options_from_env("REPART", p->format, &extra_mkfs_options);
6133 if (r < 0)
6134 return log_error_errno(r,
6135 "Failed to determine mkfs command line options for '%s': %m",
6136 p->format);
6137
6138 r = make_filesystem(d ? d->node : temp,
6139 p->format,
6140 strempty(p->new_label),
6141 root,
6142 fs_uuid,
6143 arg_discard, /* quiet = */ false,
6144 context->fs_sector_size,
6145 extra_mkfs_options);
6146 if (r < 0)
6147 return r;
6148
6149 /* Read-only filesystems are minimal from the first try because they create and size the
6150 * loopback file for us. */
6151 if (fstype_is_ro(p->format)) {
6152 assert(fd < 0);
6153
6154 fd = open(temp, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
6155 if (fd < 0)
6156 return log_error_errno(errno, "Failed to open temporary file %s: %m", temp);
6157
6158 if (fstat(fd, &st) < 0)
6159 return log_error_errno(errno, "Failed to stat temporary file: %m");
6160
6161 log_info("Minimal partition size of %s filesystem of partition %s is %s",
6162 p->format, strna(hint), FORMAT_BYTES(st.st_size));
6163
6164 p->copy_blocks_path = TAKE_PTR(temp);
6165 p->copy_blocks_path_is_our_file = true;
6166 p->copy_blocks_fd = TAKE_FD(fd);
6167 p->copy_blocks_size = st.st_size;
6168 continue;
6169 }
6170
6171 if (!root) {
6172 assert(d);
6173
6174 r = partition_populate_filesystem(context, p, d->node);
6175 if (r < 0)
6176 return r;
6177 }
6178
6179 /* Other filesystems need to be provided with a pre-sized loopback file and will adapt to
6180 * fully occupy it. Because we gave the filesystem a 1T sparse file, we need to shrink the
6181 * filesystem down to a reasonable size again to fit it in the disk image. While there are
6182 * some filesystems that support shrinking, it doesn't always work properly (e.g. shrinking
6183 * btrfs gives us a 2.0G filesystem regardless of what we put in it). Instead, let's populate
6184 * the filesystem again, but this time, instead of providing the filesystem with a 1T sparse
6185 * loopback file, let's size the loopback file based on the actual data used by the
6186 * filesystem in the sparse file after the first attempt. This should be a good guess of the
6187 * minimal amount of space needed in the filesystem to fit all the required data.
6188 */
6189 r = fd_apparent_size(fd, &fsz);
6190 if (r < 0)
6191 return r;
6192
6193 /* Massage the size a bit because just going by actual data used in the sparse file isn't
6194 * fool-proof. */
6195 uint64_t heuristic = streq(p->format, "xfs") ? fsz : fsz / 2;
6196 fsz = round_up_size(fsz + heuristic, context->grain_size);
6197 if (minimal_size_by_fs_name(p->format) != UINT64_MAX)
6198 fsz = MAX(minimal_size_by_fs_name(p->format), fsz);
6199
6200 log_info("Minimal partition size of %s filesystem of partition %s is %s",
6201 p->format, strna(hint), FORMAT_BYTES(fsz));
6202
6203 d = loop_device_unref(d);
6204
6205 /* Erase the previous filesystem first. */
6206 if (ftruncate(fd, 0))
6207 return log_error_errno(errno, "Failed to erase temporary file: %m");
6208
6209 if (ftruncate(fd, fsz))
6210 return log_error_errno(errno, "Failed to truncate temporary file to %s: %m", FORMAT_BYTES(fsz));
6211
6212 if (arg_offline <= 0) {
6213 r = loop_device_make(fd, O_RDWR, 0, UINT64_MAX, context->sector_size, 0, LOCK_EX, &d);
6214 if (r < 0 && (arg_offline == 0 || (r != -ENOENT && !ERRNO_IS_PRIVILEGE(r)) || !strv_isempty(p->subvolumes)))
6215 return log_error_errno(r, "Failed to make loopback device of %s: %m", temp);
6216 }
6217
6218 r = make_filesystem(d ? d->node : temp,
6219 p->format,
6220 strempty(p->new_label),
6221 root,
6222 p->fs_uuid,
6223 arg_discard,
6224 /* quiet = */ false,
6225 context->fs_sector_size,
6226 extra_mkfs_options);
6227 if (r < 0)
6228 return r;
6229
6230 if (!root) {
6231 assert(d);
6232
6233 r = partition_populate_filesystem(context, p, d->node);
6234 if (r < 0)
6235 return r;
6236 }
6237
6238 if (fstat(fd, &st) < 0)
6239 return log_error_errno(errno, "Failed to stat temporary file: %m");
6240
6241 p->copy_blocks_path = TAKE_PTR(temp);
6242 p->copy_blocks_path_is_our_file = true;
6243 p->copy_blocks_fd = TAKE_FD(fd);
6244 p->copy_blocks_size = st.st_size;
6245 }
6246
6247 /* Now that we've done the data partitions, do the verity hash partitions. We do these in a separate
6248 * step because they might depend on data generated in the previous step. */
6249
6250 LIST_FOREACH(partitions, p, context->partitions) {
6251 _cleanup_(unlink_and_freep) char *temp = NULL;
6252 _cleanup_free_ char *hint = NULL;
6253 _cleanup_close_ int fd = -EBADF;
6254 struct stat st;
6255 Partition *dp;
6256
6257 if (p->dropped)
6258 continue;
6259
6260 if (PARTITION_EXISTS(p)) /* Never format existing partitions */
6261 continue;
6262
6263 if (p->minimize == MINIMIZE_OFF)
6264 continue;
6265
6266 if (p->verity != VERITY_HASH)
6267 continue;
6268
6269 assert_se(dp = p->siblings[VERITY_DATA]);
6270 assert(!dp->dropped);
6271 assert(dp->copy_blocks_path);
6272
6273 (void) partition_hint(p, context->node, &hint);
6274
6275 log_info("Pre-populating verity hash data of partition %s to calculate minimal partition size",
6276 strna(hint));
6277
6278 if (!vt) {
6279 r = var_tmp_dir(&vt);
6280 if (r < 0)
6281 return log_error_errno(r, "Could not determine temporary directory: %m");
6282 }
6283
6284 r = tempfn_random_child(vt, "repart", &temp);
6285 if (r < 0)
6286 return log_error_errno(r, "Failed to generate temporary file path: %m");
6287
6288 r = touch(temp);
6289 if (r < 0)
6290 return log_error_errno(r, "Failed to create temporary file: %m");
6291
6292 r = partition_format_verity_hash(context, p, temp, dp->copy_blocks_path);
6293 if (r < 0)
6294 return r;
6295
6296 fd = open(temp, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
6297 if (fd < 0)
6298 return log_error_errno(errno, "Failed to open temporary file %s: %m", temp);
6299
6300 if (fstat(fd, &st) < 0)
6301 return log_error_errno(r, "Failed to stat temporary file: %m");
6302
6303 log_info("Minimal partition size of verity hash partition %s is %s",
6304 strna(hint), FORMAT_BYTES(st.st_size));
6305
6306 p->copy_blocks_path = TAKE_PTR(temp);
6307 p->copy_blocks_path_is_our_file = true;
6308 p->copy_blocks_fd = TAKE_FD(fd);
6309 p->copy_blocks_size = st.st_size;
6310 }
6311
6312 return 0;
6313 }
6314
6315 static int parse_partition_types(const char *p, GptPartitionType **partitions, size_t *n_partitions) {
6316 int r;
6317
6318 assert(partitions);
6319 assert(n_partitions);
6320
6321 for (;;) {
6322 _cleanup_free_ char *name = NULL;
6323 GptPartitionType type;
6324
6325 r = extract_first_word(&p, &name, ",", EXTRACT_CUNESCAPE|EXTRACT_DONT_COALESCE_SEPARATORS);
6326 if (r == 0)
6327 break;
6328 if (r < 0)
6329 return log_error_errno(r, "Failed to extract partition type identifier or GUID: %s", p);
6330
6331 r = gpt_partition_type_from_string(name, &type);
6332 if (r < 0)
6333 return log_error_errno(r, "'%s' is not a valid partition type identifier or GUID", name);
6334
6335 if (!GREEDY_REALLOC(*partitions, *n_partitions + 1))
6336 return log_oom();
6337
6338 (*partitions)[(*n_partitions)++] = type;
6339 }
6340
6341 return 0;
6342 }
6343
6344 static int help(void) {
6345 _cleanup_free_ char *link = NULL;
6346 int r;
6347
6348 r = terminal_urlify_man("systemd-repart", "8", &link);
6349 if (r < 0)
6350 return log_oom();
6351
6352 printf("%s [OPTIONS...] [DEVICE]\n"
6353 "\n%sGrow and add partitions to partition table.%s\n\n"
6354 " -h --help Show this help\n"
6355 " --version Show package version\n"
6356 " --no-pager Do not pipe output into a pager\n"
6357 " --no-legend Do not show the headers and footers\n"
6358 " --dry-run=BOOL Whether to run dry-run operation\n"
6359 " --empty=MODE One of refuse, allow, require, force, create; controls\n"
6360 " how to handle empty disks lacking partition tables\n"
6361 " --discard=BOOL Whether to discard backing blocks for new partitions\n"
6362 " --pretty=BOOL Whether to show pretty summary before doing changes\n"
6363 " --factory-reset=BOOL Whether to remove data partitions before recreating\n"
6364 " them\n"
6365 " --can-factory-reset Test whether factory reset is defined\n"
6366 " --root=PATH Operate relative to root path\n"
6367 " --image=PATH Operate relative to image file\n"
6368 " --image-policy=POLICY\n"
6369 " Specify disk image dissection policy\n"
6370 " --definitions=DIR Find partition definitions in specified directory\n"
6371 " --key-file=PATH Key to use when encrypting partitions\n"
6372 " --private-key=PATH Private key to use when generating verity roothash\n"
6373 " signatures\n"
6374 " --certificate=PATH PEM certificate to use when generating verity\n"
6375 " roothash signatures\n"
6376 " --tpm2-device=PATH Path to TPM2 device node to use\n"
6377 " --tpm2-pcrs=PCR1+PCR2+PCR3+…\n"
6378 " TPM2 PCR indexes to use for TPM2 enrollment\n"
6379 " --tpm2-public-key=PATH\n"
6380 " Enroll signed TPM2 PCR policy against PEM public key\n"
6381 " --tpm2-public-key-pcrs=PCR1+PCR2+PCR3+…\n"
6382 " Enroll signed TPM2 PCR policy for specified TPM2 PCRs\n"
6383 " --seed=UUID 128-bit seed UUID to derive all UUIDs from\n"
6384 " --size=BYTES Grow loopback file to specified size\n"
6385 " --json=pretty|short|off\n"
6386 " Generate JSON output\n"
6387 " --split=BOOL Whether to generate split artifacts\n"
6388 " --include-partitions=PARTITION1,PARTITION2,PARTITION3,…\n"
6389 " Ignore partitions not of the specified types\n"
6390 " --exclude-partitions=PARTITION1,PARTITION2,PARTITION3,…\n"
6391 " Ignore partitions of the specified types\n"
6392 " --defer-partitions=PARTITION1,PARTITION2,PARTITION3,…\n"
6393 " Take partitions of the specified types into account\n"
6394 " but don't populate them yet\n"
6395 " --sector-size=SIZE Set the logical sector size for the image\n"
6396 " --architecture=ARCH Set the generic architecture for the image\n"
6397 " --offline=BOOL Whether to build the image offline\n"
6398 " -s --copy-source=PATH Specify the primary source tree to copy files from\n"
6399 " --copy-from=IMAGE Copy partitions from the given image(s)\n"
6400 " -S --make-ddi=sysext Make a system extension DDI\n"
6401 " -C --make-ddi=confext Make a configuration extension DDI\n"
6402 " -P --make-ddi=portable Make a portable service DDI\n"
6403 "\nSee the %s for details.\n",
6404 program_invocation_short_name,
6405 ansi_highlight(),
6406 ansi_normal(),
6407 link);
6408
6409 return 0;
6410 }
6411
6412 static int parse_argv(int argc, char *argv[]) {
6413
6414 enum {
6415 ARG_VERSION = 0x100,
6416 ARG_NO_PAGER,
6417 ARG_NO_LEGEND,
6418 ARG_DRY_RUN,
6419 ARG_EMPTY,
6420 ARG_DISCARD,
6421 ARG_FACTORY_RESET,
6422 ARG_CAN_FACTORY_RESET,
6423 ARG_ROOT,
6424 ARG_IMAGE,
6425 ARG_IMAGE_POLICY,
6426 ARG_SEED,
6427 ARG_PRETTY,
6428 ARG_DEFINITIONS,
6429 ARG_SIZE,
6430 ARG_JSON,
6431 ARG_KEY_FILE,
6432 ARG_PRIVATE_KEY,
6433 ARG_CERTIFICATE,
6434 ARG_TPM2_DEVICE,
6435 ARG_TPM2_PCRS,
6436 ARG_TPM2_PUBLIC_KEY,
6437 ARG_TPM2_PUBLIC_KEY_PCRS,
6438 ARG_SPLIT,
6439 ARG_INCLUDE_PARTITIONS,
6440 ARG_EXCLUDE_PARTITIONS,
6441 ARG_DEFER_PARTITIONS,
6442 ARG_SECTOR_SIZE,
6443 ARG_SKIP_PARTITIONS,
6444 ARG_ARCHITECTURE,
6445 ARG_OFFLINE,
6446 ARG_COPY_FROM,
6447 ARG_MAKE_DDI,
6448 };
6449
6450 static const struct option options[] = {
6451 { "help", no_argument, NULL, 'h' },
6452 { "version", no_argument, NULL, ARG_VERSION },
6453 { "no-pager", no_argument, NULL, ARG_NO_PAGER },
6454 { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
6455 { "dry-run", required_argument, NULL, ARG_DRY_RUN },
6456 { "empty", required_argument, NULL, ARG_EMPTY },
6457 { "discard", required_argument, NULL, ARG_DISCARD },
6458 { "factory-reset", required_argument, NULL, ARG_FACTORY_RESET },
6459 { "can-factory-reset", no_argument, NULL, ARG_CAN_FACTORY_RESET },
6460 { "root", required_argument, NULL, ARG_ROOT },
6461 { "image", required_argument, NULL, ARG_IMAGE },
6462 { "image-policy", required_argument, NULL, ARG_IMAGE_POLICY },
6463 { "seed", required_argument, NULL, ARG_SEED },
6464 { "pretty", required_argument, NULL, ARG_PRETTY },
6465 { "definitions", required_argument, NULL, ARG_DEFINITIONS },
6466 { "size", required_argument, NULL, ARG_SIZE },
6467 { "json", required_argument, NULL, ARG_JSON },
6468 { "key-file", required_argument, NULL, ARG_KEY_FILE },
6469 { "private-key", required_argument, NULL, ARG_PRIVATE_KEY },
6470 { "certificate", required_argument, NULL, ARG_CERTIFICATE },
6471 { "tpm2-device", required_argument, NULL, ARG_TPM2_DEVICE },
6472 { "tpm2-pcrs", required_argument, NULL, ARG_TPM2_PCRS },
6473 { "tpm2-public-key", required_argument, NULL, ARG_TPM2_PUBLIC_KEY },
6474 { "tpm2-public-key-pcrs", required_argument, NULL, ARG_TPM2_PUBLIC_KEY_PCRS },
6475 { "split", required_argument, NULL, ARG_SPLIT },
6476 { "include-partitions", required_argument, NULL, ARG_INCLUDE_PARTITIONS },
6477 { "exclude-partitions", required_argument, NULL, ARG_EXCLUDE_PARTITIONS },
6478 { "defer-partitions", required_argument, NULL, ARG_DEFER_PARTITIONS },
6479 { "sector-size", required_argument, NULL, ARG_SECTOR_SIZE },
6480 { "architecture", required_argument, NULL, ARG_ARCHITECTURE },
6481 { "offline", required_argument, NULL, ARG_OFFLINE },
6482 { "copy-from", required_argument, NULL, ARG_COPY_FROM },
6483 { "copy-source", required_argument, NULL, 's' },
6484 { "make-ddi", required_argument, NULL, ARG_MAKE_DDI },
6485 {}
6486 };
6487
6488 bool auto_hash_pcr_values = true, auto_public_key_pcr_mask = true;
6489 int c, r;
6490
6491 assert(argc >= 0);
6492 assert(argv);
6493
6494 while ((c = getopt_long(argc, argv, "hs:SCP", options, NULL)) >= 0)
6495
6496 switch (c) {
6497
6498 case 'h':
6499 return help();
6500
6501 case ARG_VERSION:
6502 return version();
6503
6504 case ARG_NO_PAGER:
6505 arg_pager_flags |= PAGER_DISABLE;
6506 break;
6507
6508 case ARG_NO_LEGEND:
6509 arg_legend = false;
6510 break;
6511
6512 case ARG_DRY_RUN:
6513 r = parse_boolean_argument("--dry-run=", optarg, &arg_dry_run);
6514 if (r < 0)
6515 return r;
6516 break;
6517
6518 case ARG_EMPTY:
6519 if (isempty(optarg)) {
6520 arg_empty = EMPTY_UNSET;
6521 break;
6522 }
6523
6524 arg_empty = empty_mode_from_string(optarg);
6525 if (arg_empty < 0)
6526 return log_error_errno(arg_empty, "Failed to parse --empty= parameter: %s", optarg);
6527
6528 break;
6529
6530 case ARG_DISCARD:
6531 r = parse_boolean_argument("--discard=", optarg, &arg_discard);
6532 if (r < 0)
6533 return r;
6534 break;
6535
6536 case ARG_FACTORY_RESET:
6537 r = parse_boolean_argument("--factory-reset=", optarg, NULL);
6538 if (r < 0)
6539 return r;
6540 arg_factory_reset = r;
6541 break;
6542
6543 case ARG_CAN_FACTORY_RESET:
6544 arg_can_factory_reset = true;
6545 break;
6546
6547 case ARG_ROOT:
6548 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_root);
6549 if (r < 0)
6550 return r;
6551 break;
6552
6553 case ARG_IMAGE:
6554 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_image);
6555 if (r < 0)
6556 return r;
6557 break;
6558
6559 case ARG_IMAGE_POLICY:
6560 r = parse_image_policy_argument(optarg, &arg_image_policy);
6561 if (r < 0)
6562 return r;
6563 break;
6564
6565 case ARG_SEED:
6566 if (isempty(optarg)) {
6567 arg_seed = SD_ID128_NULL;
6568 arg_randomize = false;
6569 } else if (streq(optarg, "random"))
6570 arg_randomize = true;
6571 else {
6572 r = sd_id128_from_string(optarg, &arg_seed);
6573 if (r < 0)
6574 return log_error_errno(r, "Failed to parse seed: %s", optarg);
6575
6576 arg_randomize = false;
6577 }
6578
6579 break;
6580
6581 case ARG_PRETTY:
6582 r = parse_boolean_argument("--pretty=", optarg, NULL);
6583 if (r < 0)
6584 return r;
6585 arg_pretty = r;
6586 break;
6587
6588 case ARG_DEFINITIONS: {
6589 _cleanup_free_ char *path = NULL;
6590 r = parse_path_argument(optarg, false, &path);
6591 if (r < 0)
6592 return r;
6593 if (strv_consume(&arg_definitions, TAKE_PTR(path)) < 0)
6594 return log_oom();
6595 break;
6596 }
6597
6598 case ARG_SIZE: {
6599 uint64_t parsed, rounded;
6600
6601 if (streq(optarg, "auto")) {
6602 arg_size = UINT64_MAX;
6603 arg_size_auto = true;
6604 break;
6605 }
6606
6607 r = parse_size(optarg, 1024, &parsed);
6608 if (r < 0)
6609 return log_error_errno(r, "Failed to parse --size= parameter: %s", optarg);
6610
6611 rounded = round_up_size(parsed, 4096);
6612 if (rounded == 0)
6613 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Specified image size too small, refusing.");
6614 if (rounded == UINT64_MAX)
6615 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Specified image size too large, refusing.");
6616
6617 if (rounded != parsed)
6618 log_warning("Specified size is not a multiple of 4096, rounding up automatically. (%" PRIu64 " %s %" PRIu64 ")",
6619 parsed, special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), rounded);
6620
6621 arg_size = rounded;
6622 arg_size_auto = false;
6623 break;
6624 }
6625
6626 case ARG_JSON:
6627 r = parse_json_argument(optarg, &arg_json_format_flags);
6628 if (r <= 0)
6629 return r;
6630
6631 break;
6632
6633 case ARG_KEY_FILE: {
6634 _cleanup_(erase_and_freep) char *k = NULL;
6635 size_t n = 0;
6636
6637 r = read_full_file_full(
6638 AT_FDCWD, optarg, UINT64_MAX, SIZE_MAX,
6639 READ_FULL_FILE_SECURE|READ_FULL_FILE_WARN_WORLD_READABLE|READ_FULL_FILE_CONNECT_SOCKET,
6640 NULL,
6641 &k, &n);
6642 if (r < 0)
6643 return log_error_errno(r, "Failed to read key file '%s': %m", optarg);
6644
6645 erase_and_free(arg_key);
6646 arg_key = TAKE_PTR(k);
6647 arg_key_size = n;
6648 break;
6649 }
6650
6651 case ARG_PRIVATE_KEY: {
6652 _cleanup_(erase_and_freep) char *k = NULL;
6653 size_t n = 0;
6654
6655 r = read_full_file_full(
6656 AT_FDCWD, optarg, UINT64_MAX, SIZE_MAX,
6657 READ_FULL_FILE_SECURE|READ_FULL_FILE_WARN_WORLD_READABLE|READ_FULL_FILE_CONNECT_SOCKET,
6658 NULL,
6659 &k, &n);
6660 if (r < 0)
6661 return log_error_errno(r, "Failed to read key file '%s': %m", optarg);
6662
6663 EVP_PKEY_free(arg_private_key);
6664 arg_private_key = NULL;
6665 r = parse_private_key(k, n, &arg_private_key);
6666 if (r < 0)
6667 return r;
6668 break;
6669 }
6670
6671 case ARG_CERTIFICATE: {
6672 _cleanup_free_ char *cert = NULL;
6673 size_t n = 0;
6674
6675 r = read_full_file_full(
6676 AT_FDCWD, optarg, UINT64_MAX, SIZE_MAX,
6677 READ_FULL_FILE_CONNECT_SOCKET,
6678 NULL,
6679 &cert, &n);
6680 if (r < 0)
6681 return log_error_errno(r, "Failed to read certificate file '%s': %m", optarg);
6682
6683 X509_free(arg_certificate);
6684 arg_certificate = NULL;
6685 r = parse_x509_certificate(cert, n, &arg_certificate);
6686 if (r < 0)
6687 return r;
6688 break;
6689 }
6690
6691 case ARG_TPM2_DEVICE: {
6692 _cleanup_free_ char *device = NULL;
6693
6694 if (streq(optarg, "list"))
6695 return tpm2_list_devices();
6696
6697 if (!streq(optarg, "auto")) {
6698 device = strdup(optarg);
6699 if (!device)
6700 return log_oom();
6701 }
6702
6703 free(arg_tpm2_device);
6704 arg_tpm2_device = TAKE_PTR(device);
6705 break;
6706 }
6707
6708 case ARG_TPM2_PCRS:
6709 auto_hash_pcr_values = false;
6710 r = tpm2_parse_pcr_argument_append(optarg, &arg_tpm2_hash_pcr_values, &arg_tpm2_n_hash_pcr_values);
6711 if (r < 0)
6712 return r;
6713
6714 break;
6715
6716 case ARG_TPM2_PUBLIC_KEY:
6717 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_tpm2_public_key);
6718 if (r < 0)
6719 return r;
6720
6721 break;
6722
6723 case ARG_TPM2_PUBLIC_KEY_PCRS:
6724 auto_public_key_pcr_mask = false;
6725 r = tpm2_parse_pcr_argument_to_mask(optarg, &arg_tpm2_public_key_pcr_mask);
6726 if (r < 0)
6727 return r;
6728
6729 break;
6730
6731 case ARG_SPLIT:
6732 r = parse_boolean_argument("--split=", optarg, NULL);
6733 if (r < 0)
6734 return r;
6735
6736 arg_split = r;
6737 break;
6738
6739 case ARG_INCLUDE_PARTITIONS:
6740 if (arg_filter_partitions_type == FILTER_PARTITIONS_EXCLUDE)
6741 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6742 "Combination of --include-partitions= and --exclude-partitions= is invalid.");
6743
6744 r = parse_partition_types(optarg, &arg_filter_partitions, &arg_n_filter_partitions);
6745 if (r < 0)
6746 return r;
6747
6748 arg_filter_partitions_type = FILTER_PARTITIONS_INCLUDE;
6749
6750 break;
6751
6752 case ARG_EXCLUDE_PARTITIONS:
6753 if (arg_filter_partitions_type == FILTER_PARTITIONS_INCLUDE)
6754 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6755 "Combination of --include-partitions= and --exclude-partitions= is invalid.");
6756
6757 r = parse_partition_types(optarg, &arg_filter_partitions, &arg_n_filter_partitions);
6758 if (r < 0)
6759 return r;
6760
6761 arg_filter_partitions_type = FILTER_PARTITIONS_EXCLUDE;
6762
6763 break;
6764
6765 case ARG_DEFER_PARTITIONS:
6766 r = parse_partition_types(optarg, &arg_defer_partitions, &arg_n_defer_partitions);
6767 if (r < 0)
6768 return r;
6769
6770 break;
6771
6772 case ARG_SECTOR_SIZE:
6773 r = parse_sector_size(optarg, &arg_sector_size);
6774 if (r < 0)
6775 return r;
6776
6777 break;
6778
6779 case ARG_ARCHITECTURE:
6780 r = architecture_from_string(optarg);
6781 if (r < 0)
6782 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid architecture '%s'", optarg);
6783
6784 arg_architecture = r;
6785 break;
6786
6787 case ARG_OFFLINE:
6788 if (streq(optarg, "auto"))
6789 arg_offline = -1;
6790 else {
6791 r = parse_boolean_argument("--offline=", optarg, NULL);
6792 if (r < 0)
6793 return r;
6794
6795 arg_offline = r;
6796 }
6797
6798 break;
6799
6800 case ARG_COPY_FROM: {
6801 _cleanup_free_ char *p = NULL;
6802
6803 r = parse_path_argument(optarg, /* suppress_root= */ false, &p);
6804 if (r < 0)
6805 return r;
6806
6807 if (strv_consume(&arg_copy_from, TAKE_PTR(p)) < 0)
6808 return log_oom();
6809
6810 break;
6811 }
6812
6813 case 's':
6814 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_copy_source);
6815 if (r < 0)
6816 return r;
6817 break;
6818
6819 case ARG_MAKE_DDI:
6820 if (!filename_is_valid(optarg))
6821 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid DDI type: %s", optarg);
6822
6823 r = free_and_strdup_warn(&arg_make_ddi, optarg);
6824 if (r < 0)
6825 return r;
6826 break;
6827
6828 case 'S':
6829 r = free_and_strdup_warn(&arg_make_ddi, "sysext");
6830 if (r < 0)
6831 return r;
6832 break;
6833
6834 case 'C':
6835 r = free_and_strdup_warn(&arg_make_ddi, "confext");
6836 if (r < 0)
6837 return r;
6838 break;
6839
6840 case 'P':
6841 r = free_and_strdup_warn(&arg_make_ddi, "portable");
6842 if (r < 0)
6843 return r;
6844 break;
6845
6846 case '?':
6847 return -EINVAL;
6848
6849 default:
6850 assert_not_reached();
6851 }
6852
6853 if (argc - optind > 1)
6854 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6855 "Expected at most one argument, the path to the block device or image file.");
6856
6857 if (arg_make_ddi) {
6858 if (arg_definitions)
6859 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Combination of --make-ddi= and --definitions= is not supported.");
6860 if (!IN_SET(arg_empty, EMPTY_UNSET, EMPTY_CREATE))
6861 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Combination of --make-ddi= and --empty=%s is not supported.", empty_mode_to_string(arg_empty));
6862
6863 /* Imply automatic sizing in DDI mode */
6864 if (arg_size == UINT64_MAX)
6865 arg_size_auto = true;
6866
6867 if (!arg_copy_source)
6868 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No --copy-source= specified, refusing.");
6869
6870 r = dir_is_empty(arg_copy_source, /* ignore_hidden_or_backup= */ false);
6871 if (r < 0)
6872 return log_error_errno(r, "Failed to determine if '%s' is empty: %m", arg_copy_source);
6873 if (r > 0)
6874 return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Source directory '%s' is empty, refusing to create empty image.", arg_copy_source);
6875
6876 if (sd_id128_is_null(arg_seed) && !arg_randomize) {
6877 /* We don't want that /etc/machine-id leaks into any image built this way, hence
6878 * let's randomize the seed if not specified explicitly */
6879 log_notice("No seed value specified, randomizing generated UUIDs, resulting image will not be reproducible.");
6880 arg_randomize = true;
6881 }
6882
6883 arg_empty = EMPTY_CREATE;
6884 }
6885
6886 if (arg_empty == EMPTY_UNSET) /* default to refuse mode, if not otherwise specified */
6887 arg_empty = EMPTY_REFUSE;
6888
6889 if (arg_factory_reset > 0 && IN_SET(arg_empty, EMPTY_FORCE, EMPTY_REQUIRE, EMPTY_CREATE))
6890 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6891 "Combination of --factory-reset=yes and --empty=force/--empty=require/--empty=create is invalid.");
6892
6893 if (arg_can_factory_reset)
6894 arg_dry_run = true; /* When --can-factory-reset is specified we don't make changes, hence
6895 * non-dry-run mode makes no sense. Thus, imply dry run mode so that we
6896 * open things strictly read-only. */
6897 else if (arg_empty == EMPTY_CREATE)
6898 arg_dry_run = false; /* Imply --dry-run=no if we create the loopback file anew. After all we
6899 * cannot really break anyone's partition tables that way. */
6900
6901 /* Disable pager once we are not just reviewing, but doing things. */
6902 if (!arg_dry_run)
6903 arg_pager_flags |= PAGER_DISABLE;
6904
6905 if (arg_empty == EMPTY_CREATE && arg_size == UINT64_MAX && !arg_size_auto)
6906 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6907 "If --empty=create is specified, --size= must be specified, too.");
6908
6909 if (arg_image && arg_root)
6910 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Please specify either --root= or --image=, the combination of both is not supported.");
6911 else if (!arg_image && !arg_root && in_initrd()) {
6912
6913 /* By default operate on /sysusr/ or /sysroot/ when invoked in the initrd. We prefer the
6914 * former, if it is mounted, so that we have deterministic behaviour on systems where /usr/
6915 * is vendor-supplied but the root fs formatted on first boot. */
6916 r = path_is_mount_point("/sysusr/usr", NULL, 0);
6917 if (r <= 0) {
6918 if (r < 0 && r != -ENOENT)
6919 log_debug_errno(r, "Unable to determine whether /sysusr/usr is a mount point, assuming it is not: %m");
6920
6921 arg_root = strdup("/sysroot");
6922 } else
6923 arg_root = strdup("/sysusr");
6924 if (!arg_root)
6925 return log_oom();
6926 }
6927
6928 arg_node = argc > optind ? argv[optind] : NULL;
6929
6930 if (IN_SET(arg_empty, EMPTY_FORCE, EMPTY_REQUIRE, EMPTY_CREATE) && !arg_node && !arg_image)
6931 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6932 "A path to a device node or image file must be specified when --make-ddi=, --empty=force, --empty=require or --empty=create are used.");
6933
6934 if (arg_split && !arg_node)
6935 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6936 "A path to an image file must be specified when --split is used.");
6937
6938 if (auto_public_key_pcr_mask) {
6939 assert(arg_tpm2_public_key_pcr_mask == 0);
6940 arg_tpm2_public_key_pcr_mask = INDEX_TO_MASK(uint32_t, TPM2_PCR_KERNEL_BOOT);
6941 }
6942
6943 if (auto_hash_pcr_values) {
6944 assert(arg_tpm2_n_hash_pcr_values == 0);
6945
6946 if (!GREEDY_REALLOC_APPEND(
6947 arg_tpm2_hash_pcr_values,
6948 arg_tpm2_n_hash_pcr_values,
6949 &TPM2_PCR_VALUE_MAKE(TPM2_PCR_INDEX_DEFAULT, /* hash= */ 0, /* value= */ {}),
6950 1))
6951 return log_oom();
6952 }
6953
6954 if (arg_pretty < 0 && isatty(STDOUT_FILENO))
6955 arg_pretty = true;
6956
6957 if (arg_architecture >= 0) {
6958 FOREACH_ARRAY(p, arg_filter_partitions, arg_n_filter_partitions)
6959 *p = gpt_partition_type_override_architecture(*p, arg_architecture);
6960
6961 FOREACH_ARRAY(p, arg_defer_partitions, arg_n_defer_partitions)
6962 *p = gpt_partition_type_override_architecture(*p, arg_architecture);
6963 }
6964
6965 return 1;
6966 }
6967
6968 static int parse_proc_cmdline_factory_reset(void) {
6969 bool b;
6970 int r;
6971
6972 if (arg_factory_reset >= 0) /* Never override what is specified on the process command line */
6973 return 0;
6974
6975 if (!in_initrd()) /* Never honour kernel command line factory reset request outside of the initrd */
6976 return 0;
6977
6978 r = proc_cmdline_get_bool("systemd.factory_reset", /* flags = */ 0, &b);
6979 if (r < 0)
6980 return log_error_errno(r, "Failed to parse systemd.factory_reset kernel command line argument: %m");
6981 if (r > 0) {
6982 arg_factory_reset = b;
6983
6984 if (b)
6985 log_notice("Honouring factory reset requested via kernel command line.");
6986 }
6987
6988 return 0;
6989 }
6990
6991 static int parse_efi_variable_factory_reset(void) {
6992 _cleanup_free_ char *value = NULL;
6993 int r;
6994
6995 if (arg_factory_reset >= 0) /* Never override what is specified on the process command line */
6996 return 0;
6997
6998 if (!in_initrd()) /* Never honour EFI variable factory reset request outside of the initrd */
6999 return 0;
7000
7001 r = efi_get_variable_string(EFI_SYSTEMD_VARIABLE(FactoryReset), &value);
7002 if (r < 0) {
7003 if (r == -ENOENT || ERRNO_IS_NOT_SUPPORTED(r))
7004 return 0;
7005 return log_error_errno(r, "Failed to read EFI variable FactoryReset: %m");
7006 }
7007
7008 r = parse_boolean(value);
7009 if (r < 0)
7010 return log_error_errno(r, "Failed to parse EFI variable FactoryReset: %m");
7011
7012 arg_factory_reset = r;
7013 if (r)
7014 log_notice("Factory reset requested via EFI variable FactoryReset.");
7015
7016 return 0;
7017 }
7018
7019 static int remove_efi_variable_factory_reset(void) {
7020 int r;
7021
7022 r = efi_set_variable(EFI_SYSTEMD_VARIABLE(FactoryReset), NULL, 0);
7023 if (r < 0) {
7024 if (r == -ENOENT || ERRNO_IS_NOT_SUPPORTED(r))
7025 return 0;
7026 return log_error_errno(r, "Failed to remove EFI variable FactoryReset: %m");
7027 }
7028
7029 log_info("Successfully unset EFI variable FactoryReset.");
7030 return 0;
7031 }
7032
7033 static int acquire_root_devno(
7034 const char *p,
7035 const char *root,
7036 int mode,
7037 char **ret,
7038 int *ret_fd) {
7039
7040 _cleanup_free_ char *found_path = NULL, *node = NULL;
7041 dev_t devno, fd_devno = MODE_INVALID;
7042 _cleanup_close_ int fd = -EBADF;
7043 struct stat st;
7044 int r;
7045
7046 assert(p);
7047 assert(ret);
7048 assert(ret_fd);
7049
7050 fd = chase_and_open(p, root, CHASE_PREFIX_ROOT, mode, &found_path);
7051 if (fd < 0)
7052 return fd;
7053
7054 if (fstat(fd, &st) < 0)
7055 return -errno;
7056
7057 if (S_ISREG(st.st_mode)) {
7058 *ret = TAKE_PTR(found_path);
7059 *ret_fd = TAKE_FD(fd);
7060 return 0;
7061 }
7062
7063 if (S_ISBLK(st.st_mode)) {
7064 /* Refuse referencing explicit block devices if a root dir is specified, after all we should
7065 * not be able to leave the image the root path constrains us to. */
7066 if (root)
7067 return -EPERM;
7068
7069 fd_devno = devno = st.st_rdev;
7070 } else if (S_ISDIR(st.st_mode)) {
7071
7072 devno = st.st_dev;
7073 if (major(devno) == 0) {
7074 r = btrfs_get_block_device_fd(fd, &devno);
7075 if (r == -ENOTTY) /* not btrfs */
7076 return -ENODEV;
7077 if (r < 0)
7078 return r;
7079 }
7080 } else
7081 return -ENOTBLK;
7082
7083 /* From dm-crypt to backing partition */
7084 r = block_get_originating(devno, &devno);
7085 if (r == -ENOENT)
7086 log_debug_errno(r, "Device '%s' has no dm-crypt/dm-verity device, no need to look for underlying block device.", p);
7087 else if (r < 0)
7088 log_debug_errno(r, "Failed to find underlying block device for '%s', ignoring: %m", p);
7089
7090 /* From partition to whole disk containing it */
7091 r = block_get_whole_disk(devno, &devno);
7092 if (r < 0)
7093 log_debug_errno(r, "Failed to find whole disk block device for '%s', ignoring: %m", p);
7094
7095 r = devname_from_devnum(S_IFBLK, devno, &node);
7096 if (r < 0)
7097 return log_debug_errno(r, "Failed to determine canonical path for '%s': %m", p);
7098
7099 /* Only if we still look at the same block device we can reuse the fd. Otherwise return an
7100 * invalidated fd. */
7101 if (fd_devno != MODE_INVALID && fd_devno == devno) {
7102 /* Tell udev not to interfere while we are processing the device */
7103 if (flock(fd, arg_dry_run ? LOCK_SH : LOCK_EX) < 0)
7104 return log_error_errno(errno, "Failed to lock device '%s': %m", node);
7105
7106 *ret_fd = TAKE_FD(fd);
7107 } else
7108 *ret_fd = -EBADF;
7109
7110 *ret = TAKE_PTR(node);
7111 return 0;
7112 }
7113
7114 static int find_root(Context *context) {
7115 _cleanup_free_ char *device = NULL;
7116 int r;
7117
7118 assert(context);
7119
7120 if (arg_node) {
7121 if (arg_empty == EMPTY_CREATE) {
7122 _cleanup_close_ int fd = -EBADF;
7123 _cleanup_free_ char *s = NULL;
7124
7125 s = strdup(arg_node);
7126 if (!s)
7127 return log_oom();
7128
7129 fd = open(arg_node, O_RDONLY|O_CREAT|O_EXCL|O_CLOEXEC|O_NOFOLLOW, 0666);
7130 if (fd < 0)
7131 return log_error_errno(errno, "Failed to create '%s': %m", arg_node);
7132
7133 context->node = TAKE_PTR(s);
7134 context->node_is_our_file = true;
7135 context->backing_fd = TAKE_FD(fd);
7136 return 0;
7137 }
7138
7139 /* Note that we don't specify a root argument here: if the user explicitly configured a node
7140 * we'll take it relative to the host, not the image */
7141 r = acquire_root_devno(arg_node, NULL, O_RDONLY|O_CLOEXEC, &context->node, &context->backing_fd);
7142 if (r == -EUCLEAN)
7143 return btrfs_log_dev_root(LOG_ERR, r, arg_node);
7144 if (r < 0)
7145 return log_error_errno(r, "Failed to open file or determine backing device of %s: %m", arg_node);
7146
7147 return 0;
7148 }
7149
7150 assert(IN_SET(arg_empty, EMPTY_REFUSE, EMPTY_ALLOW));
7151
7152 /* If the root mount has been replaced by some form of volatile file system (overlayfs), the
7153 * original root block device node is symlinked in /run/systemd/volatile-root. Let's read that
7154 * here. */
7155 r = readlink_malloc("/run/systemd/volatile-root", &device);
7156 if (r == -ENOENT) { /* volatile-root not found */
7157 /* Let's search for the root device. We look for two cases here: first in /, and then in /usr. The
7158 * latter we check for cases where / is a tmpfs and only /usr is an actual persistent block device
7159 * (think: volatile setups) */
7160
7161 FOREACH_STRING(p, "/", "/usr") {
7162
7163 r = acquire_root_devno(p, arg_root, O_RDONLY|O_DIRECTORY|O_CLOEXEC, &context->node,
7164 &context->backing_fd);
7165 if (r < 0) {
7166 if (r == -EUCLEAN)
7167 return btrfs_log_dev_root(LOG_ERR, r, p);
7168 if (r != -ENODEV)
7169 return log_error_errno(r, "Failed to determine backing device of %s: %m", p);
7170 } else
7171 return 0;
7172 }
7173 } else if (r < 0)
7174 return log_error_errno(r, "Failed to read symlink /run/systemd/volatile-root: %m");
7175 else {
7176 r = acquire_root_devno(device, NULL, O_RDONLY|O_CLOEXEC, &context->node, &context->backing_fd);
7177 if (r == -EUCLEAN)
7178 return btrfs_log_dev_root(LOG_ERR, r, device);
7179 if (r < 0)
7180 return log_error_errno(r, "Failed to open file or determine backing device of %s: %m", device);
7181
7182 return 0;
7183 }
7184
7185 return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "Failed to discover root block device.");
7186 }
7187
7188 static int resize_pt(int fd, uint64_t sector_size) {
7189 _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
7190 int r;
7191
7192 /* After resizing the backing file we need to resize the partition table itself too, so that it takes
7193 * possession of the enlarged backing file. For this it suffices to open the device with libfdisk and
7194 * immediately write it again, with no changes. */
7195
7196 r = fdisk_new_context_at(fd, /* path= */ NULL, /* read_only= */ false, sector_size, &c);
7197 if (r < 0)
7198 return log_error_errno(r, "Failed to open device '%s': %m", FORMAT_PROC_FD_PATH(fd));
7199
7200 r = fdisk_has_label(c);
7201 if (r < 0)
7202 return log_error_errno(r, "Failed to determine whether disk '%s' has a disk label: %m", FORMAT_PROC_FD_PATH(fd));
7203 if (r == 0) {
7204 log_debug("Not resizing partition table, as there currently is none.");
7205 return 0;
7206 }
7207
7208 r = fdisk_write_disklabel(c);
7209 if (r < 0)
7210 return log_error_errno(r, "Failed to write resized partition table: %m");
7211
7212 log_info("Resized partition table.");
7213 return 1;
7214 }
7215
7216 static int resize_backing_fd(
7217 const char *node, /* The primary way we access the disk image to operate on */
7218 int *fd, /* An O_RDONLY fd referring to that inode */
7219 const char *backing_file, /* If the above refers to a loopback device, the backing regular file for that, which we can grow */
7220 LoopDevice *loop_device,
7221 uint64_t sector_size) {
7222
7223 _cleanup_close_ int writable_fd = -EBADF;
7224 uint64_t current_size;
7225 struct stat st;
7226 int r;
7227
7228 assert(node);
7229 assert(fd);
7230
7231 if (arg_size == UINT64_MAX) /* Nothing to do */
7232 return 0;
7233
7234 if (*fd < 0) {
7235 /* Open the file if we haven't opened it yet. Note that we open it read-only here, just to
7236 * keep a reference to the file we can pass around. */
7237 *fd = open(node, O_RDONLY|O_CLOEXEC);
7238 if (*fd < 0)
7239 return log_error_errno(errno, "Failed to open '%s' in order to adjust size: %m", node);
7240 }
7241
7242 if (fstat(*fd, &st) < 0)
7243 return log_error_errno(errno, "Failed to stat '%s': %m", node);
7244
7245 if (S_ISBLK(st.st_mode)) {
7246 if (!backing_file)
7247 return log_error_errno(SYNTHETIC_ERRNO(EBADF), "Cannot resize block device '%s'.", node);
7248
7249 assert(loop_device);
7250
7251 if (ioctl(*fd, BLKGETSIZE64, &current_size) < 0)
7252 return log_error_errno(errno, "Failed to determine size of block device %s: %m", node);
7253 } else {
7254 r = stat_verify_regular(&st);
7255 if (r < 0)
7256 return log_error_errno(r, "Specified path '%s' is not a regular file or loopback block device, cannot resize: %m", node);
7257
7258 assert(!backing_file);
7259 assert(!loop_device);
7260 current_size = st.st_size;
7261 }
7262
7263 if (current_size >= arg_size) {
7264 log_info("File '%s' already is of requested size or larger, not growing. (%s >= %s)",
7265 node, FORMAT_BYTES(current_size), FORMAT_BYTES(arg_size));
7266 return 0;
7267 }
7268
7269 if (S_ISBLK(st.st_mode)) {
7270 assert(backing_file);
7271
7272 /* This is a loopback device. We can't really grow those directly, but we can grow the
7273 * backing file, hence let's do that. */
7274
7275 writable_fd = open(backing_file, O_WRONLY|O_CLOEXEC|O_NONBLOCK);
7276 if (writable_fd < 0)
7277 return log_error_errno(errno, "Failed to open backing file '%s': %m", backing_file);
7278
7279 if (fstat(writable_fd, &st) < 0)
7280 return log_error_errno(errno, "Failed to stat() backing file '%s': %m", backing_file);
7281
7282 r = stat_verify_regular(&st);
7283 if (r < 0)
7284 return log_error_errno(r, "Backing file '%s' of block device is not a regular file: %m", backing_file);
7285
7286 if ((uint64_t) st.st_size != current_size)
7287 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
7288 "Size of backing file '%s' of loopback block device '%s' don't match, refusing.",
7289 node, backing_file);
7290 } else {
7291 assert(S_ISREG(st.st_mode));
7292 assert(!backing_file);
7293
7294 /* The file descriptor is read-only. In order to grow the file we need to have a writable fd. We
7295 * reopen the file for that temporarily. We keep the writable fd only open for this operation though,
7296 * as fdisk can't accept it anyway. */
7297
7298 writable_fd = fd_reopen(*fd, O_WRONLY|O_CLOEXEC);
7299 if (writable_fd < 0)
7300 return log_error_errno(writable_fd, "Failed to reopen backing file '%s' writable: %m", node);
7301 }
7302
7303 if (!arg_discard) {
7304 if (fallocate(writable_fd, 0, 0, arg_size) < 0) {
7305 if (!ERRNO_IS_NOT_SUPPORTED(errno))
7306 return log_error_errno(errno, "Failed to grow '%s' from %s to %s by allocation: %m",
7307 node, FORMAT_BYTES(current_size), FORMAT_BYTES(arg_size));
7308
7309 /* Fallback to truncation, if fallocate() is not supported. */
7310 log_debug("Backing file system does not support fallocate(), falling back to ftruncate().");
7311 } else {
7312 if (current_size == 0) /* Likely regular file just created by us */
7313 log_info("Allocated %s for '%s'.", FORMAT_BYTES(arg_size), node);
7314 else
7315 log_info("File '%s' grown from %s to %s by allocation.",
7316 node, FORMAT_BYTES(current_size), FORMAT_BYTES(arg_size));
7317
7318 goto done;
7319 }
7320 }
7321
7322 if (ftruncate(writable_fd, arg_size) < 0)
7323 return log_error_errno(errno, "Failed to grow '%s' from %s to %s by truncation: %m",
7324 node, FORMAT_BYTES(current_size), FORMAT_BYTES(arg_size));
7325
7326 if (current_size == 0) /* Likely regular file just created by us */
7327 log_info("Sized '%s' to %s.", node, FORMAT_BYTES(arg_size));
7328 else
7329 log_info("File '%s' grown from %s to %s by truncation.",
7330 node, FORMAT_BYTES(current_size), FORMAT_BYTES(arg_size));
7331
7332 done:
7333 r = resize_pt(writable_fd, sector_size);
7334 if (r < 0)
7335 return r;
7336
7337 if (loop_device) {
7338 r = loop_device_refresh_size(loop_device, UINT64_MAX, arg_size);
7339 if (r < 0)
7340 return log_error_errno(r, "Failed to update loop device size: %m");
7341 }
7342
7343 return 1;
7344 }
7345
7346 static int determine_auto_size(Context *c) {
7347 uint64_t sum;
7348
7349 assert(c);
7350
7351 sum = round_up_size(GPT_METADATA_SIZE, 4096);
7352
7353 LIST_FOREACH(partitions, p, c->partitions) {
7354 uint64_t m;
7355
7356 if (p->dropped)
7357 continue;
7358
7359 m = partition_min_size_with_padding(c, p);
7360 if (m > UINT64_MAX - sum)
7361 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Image would grow too large, refusing.");
7362
7363 sum += m;
7364 }
7365
7366 if (c->total != UINT64_MAX)
7367 /* Image already allocated? Then show its size. */
7368 log_info("Automatically determined minimal disk image size as %s, current image size is %s.",
7369 FORMAT_BYTES(sum), FORMAT_BYTES(c->total));
7370 else
7371 /* If the image is being created right now, then it has no previous size, suppress any comment about it hence. */
7372 log_info("Automatically determined minimal disk image size as %s.",
7373 FORMAT_BYTES(sum));
7374
7375 arg_size = sum;
7376 return 0;
7377 }
7378
7379 static int run(int argc, char *argv[]) {
7380 _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
7381 _cleanup_(umount_and_freep) char *mounted_dir = NULL;
7382 _cleanup_(context_freep) Context* context = NULL;
7383 bool node_is_our_loop = false;
7384 int r;
7385
7386 log_show_color(true);
7387 log_parse_environment();
7388 log_open();
7389
7390 r = parse_argv(argc, argv);
7391 if (r <= 0)
7392 return r;
7393
7394 r = parse_proc_cmdline_factory_reset();
7395 if (r < 0)
7396 return r;
7397
7398 r = parse_efi_variable_factory_reset();
7399 if (r < 0)
7400 return r;
7401
7402 #if HAVE_LIBCRYPTSETUP
7403 cryptsetup_enable_logging(NULL);
7404 #endif
7405
7406 if (arg_image) {
7407 assert(!arg_root);
7408
7409 /* Mount this strictly read-only: we shall modify the partition table, not the file
7410 * systems */
7411 r = mount_image_privately_interactively(
7412 arg_image,
7413 arg_image_policy,
7414 DISSECT_IMAGE_MOUNT_READ_ONLY |
7415 (arg_node ? DISSECT_IMAGE_DEVICE_READ_ONLY : 0) | /* If a different node to make changes to is specified let's open the device in read-only mode) */
7416 DISSECT_IMAGE_GPT_ONLY |
7417 DISSECT_IMAGE_RELAX_VAR_CHECK |
7418 DISSECT_IMAGE_USR_NO_ROOT |
7419 DISSECT_IMAGE_REQUIRE_ROOT,
7420 &mounted_dir,
7421 /* ret_dir_fd= */ NULL,
7422 &loop_device);
7423 if (r < 0)
7424 return r;
7425
7426 arg_root = strdup(mounted_dir);
7427 if (!arg_root)
7428 return log_oom();
7429
7430 if (!arg_node) {
7431 arg_node = strdup(loop_device->node);
7432 if (!arg_node)
7433 return log_oom();
7434
7435 /* Remember that the device we are about to manipulate is actually the one we
7436 * allocated here, and thus to increase its backing file we know what to do */
7437 node_is_our_loop = true;
7438 }
7439 }
7440
7441 if (!arg_copy_source && arg_root) {
7442 /* If no explicit copy source is specified, then use --root=/--image= */
7443 arg_copy_source = strdup(arg_root);
7444 if (!arg_copy_source)
7445 return log_oom();
7446 }
7447
7448 context = context_new(arg_seed);
7449 if (!context)
7450 return log_oom();
7451
7452 r = context_copy_from(context);
7453 if (r < 0)
7454 return r;
7455
7456 if (arg_make_ddi) {
7457 _cleanup_free_ char *d = NULL, *dp = NULL;
7458 assert(!arg_definitions);
7459
7460 d = strjoin(arg_make_ddi, ".repart.d/");
7461 if (!d)
7462 return log_oom();
7463
7464 r = search_and_access(d, F_OK, arg_root, CONF_PATHS_USR_STRV("systemd/repart/definitions"), &dp);
7465 if (r < 0)
7466 return log_error_errno(errno, "DDI type '%s' is not defined: %m", arg_make_ddi);
7467
7468 if (strv_consume(&arg_definitions, TAKE_PTR(dp)) < 0)
7469 return log_oom();
7470 } else
7471 strv_uniq(arg_definitions);
7472
7473 r = context_read_definitions(context);
7474 if (r < 0)
7475 return r;
7476
7477 r = find_root(context);
7478 if (r == -ENODEV)
7479 return 76; /* Special return value which means "Root block device not found, so not doing
7480 * anything". This isn't really an error when called at boot. */
7481 if (r < 0)
7482 return r;
7483
7484 if (arg_size != UINT64_MAX) {
7485 r = resize_backing_fd(
7486 context->node,
7487 &context->backing_fd,
7488 node_is_our_loop ? arg_image : NULL,
7489 node_is_our_loop ? loop_device : NULL,
7490 context->sector_size);
7491 if (r < 0)
7492 return r;
7493 }
7494
7495 r = context_load_partition_table(context);
7496 if (r == -EHWPOISON)
7497 return 77; /* Special return value which means "Not GPT, so not doing anything". This isn't
7498 * really an error when called at boot. */
7499 if (r < 0)
7500 return r;
7501 context->from_scratch = r > 0; /* Starting from scratch */
7502
7503 if (arg_can_factory_reset) {
7504 r = context_can_factory_reset(context);
7505 if (r < 0)
7506 return r;
7507 if (r == 0)
7508 return EXIT_FAILURE;
7509
7510 return 0;
7511 }
7512
7513 r = context_factory_reset(context);
7514 if (r < 0)
7515 return r;
7516 if (r > 0) {
7517 /* We actually did a factory reset! */
7518 r = remove_efi_variable_factory_reset();
7519 if (r < 0)
7520 return r;
7521
7522 /* Reload the reduced partition table */
7523 context_unload_partition_table(context);
7524 r = context_load_partition_table(context);
7525 if (r < 0)
7526 return r;
7527 }
7528
7529 r = context_read_seed(context, arg_root);
7530 if (r < 0)
7531 return r;
7532
7533 /* Make sure each partition has a unique UUID and unique label */
7534 r = context_acquire_partition_uuids_and_labels(context);
7535 if (r < 0)
7536 return r;
7537
7538 /* Open all files to copy blocks from now, since we want to take their size into consideration */
7539 r = context_open_copy_block_paths(
7540 context,
7541 loop_device ? loop_device->devno : /* if --image= is specified, only allow partitions on the loopback device */
7542 arg_root && !arg_image ? 0 : /* if --root= is specified, don't accept any block device */
7543 (dev_t) -1); /* if neither is specified, make no restrictions */
7544 if (r < 0)
7545 return r;
7546
7547 r = context_minimize(context);
7548 if (r < 0)
7549 return r;
7550
7551 if (arg_size_auto) {
7552 r = determine_auto_size(context);
7553 if (r < 0)
7554 return r;
7555
7556 /* Flush out everything again, and let's grow the file first, then start fresh */
7557 context_unload_partition_table(context);
7558
7559 assert(arg_size != UINT64_MAX);
7560 r = resize_backing_fd(
7561 context->node,
7562 &context->backing_fd,
7563 node_is_our_loop ? arg_image : NULL,
7564 node_is_our_loop ? loop_device : NULL,
7565 context->sector_size);
7566 if (r < 0)
7567 return r;
7568
7569 r = context_load_partition_table(context);
7570 if (r < 0)
7571 return r;
7572 }
7573
7574 /* First try to fit new partitions in, dropping by priority until it fits */
7575 for (;;) {
7576 uint64_t largest_free_area;
7577
7578 if (context_allocate_partitions(context, &largest_free_area))
7579 break; /* Success! */
7580
7581 if (!context_drop_or_foreignize_one_priority(context)) {
7582 r = log_error_errno(SYNTHETIC_ERRNO(ENOSPC),
7583 "Can't fit requested partitions into available free space (%s), refusing.",
7584 FORMAT_BYTES(largest_free_area));
7585 determine_auto_size(context);
7586 return r;
7587 }
7588 }
7589
7590 /* Now assign free space according to the weight logic */
7591 r = context_grow_partitions(context);
7592 if (r < 0)
7593 return r;
7594
7595 /* Now calculate where each new partition gets placed */
7596 context_place_partitions(context);
7597
7598 (void) context_dump(context, /*late=*/ false);
7599
7600 r = context_write_partition_table(context);
7601 if (r < 0)
7602 return r;
7603
7604 r = context_split(context);
7605 if (r < 0)
7606 return r;
7607
7608 (void) context_dump(context, /*late=*/ true);
7609
7610 context->node = mfree(context->node);
7611
7612 LIST_FOREACH(partitions, p, context->partitions)
7613 p->split_path = mfree(p->split_path);
7614
7615 return 0;
7616 }
7617
7618 DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);