]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/partition/repart.c
Merge pull request #29617 from keszybz/efi-no-xmalloc0
[thirdparty/systemd.git] / src / partition / repart.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #if HAVE_VALGRIND_MEMCHECK_H
4 #include <valgrind/memcheck.h>
5 #endif
6
7 #include <fcntl.h>
8 #include <getopt.h>
9 #include <linux/fs.h>
10 #include <linux/loop.h>
11 #include <sys/file.h>
12 #include <sys/ioctl.h>
13 #include <sys/stat.h>
14
15 #include "sd-device.h"
16 #include "sd-id128.h"
17
18 #include "alloc-util.h"
19 #include "blkid-util.h"
20 #include "blockdev-util.h"
21 #include "btrfs-util.h"
22 #include "build.h"
23 #include "chase.h"
24 #include "conf-files.h"
25 #include "conf-parser.h"
26 #include "constants.h"
27 #include "cryptsetup-util.h"
28 #include "device-util.h"
29 #include "devnum-util.h"
30 #include "dirent-util.h"
31 #include "efivars.h"
32 #include "errno-util.h"
33 #include "fd-util.h"
34 #include "fdisk-util.h"
35 #include "fileio.h"
36 #include "format-table.h"
37 #include "format-util.h"
38 #include "fs-util.h"
39 #include "glyph-util.h"
40 #include "gpt.h"
41 #include "hexdecoct.h"
42 #include "hmac.h"
43 #include "id128-util.h"
44 #include "initrd-util.h"
45 #include "io-util.h"
46 #include "json.h"
47 #include "list.h"
48 #include "loop-util.h"
49 #include "main-func.h"
50 #include "mkdir.h"
51 #include "mkfs-util.h"
52 #include "mount-util.h"
53 #include "mountpoint-util.h"
54 #include "nulstr-util.h"
55 #include "openssl-util.h"
56 #include "parse-argument.h"
57 #include "parse-helpers.h"
58 #include "pretty-print.h"
59 #include "proc-cmdline.h"
60 #include "process-util.h"
61 #include "random-util.h"
62 #include "resize-fs.h"
63 #include "rm-rf.h"
64 #include "sort-util.h"
65 #include "specifier.h"
66 #include "stdio-util.h"
67 #include "string-table.h"
68 #include "string-util.h"
69 #include "strv.h"
70 #include "sync-util.h"
71 #include "terminal-util.h"
72 #include "tmpfile-util.h"
73 #include "tpm2-pcr.h"
74 #include "tpm2-util.h"
75 #include "user-util.h"
76 #include "utf8.h"
77
78 /* If not configured otherwise use a minimal partition size of 10M */
79 #define DEFAULT_MIN_SIZE (10ULL*1024ULL*1024ULL)
80
81 /* Hard lower limit for new partition sizes */
82 #define HARD_MIN_SIZE 4096ULL
83
84 /* We know up front we're never going to put more than this in a verity sig partition. */
85 #define VERITY_SIG_SIZE (HARD_MIN_SIZE*4ULL)
86
87 /* libfdisk takes off slightly more than 1M of the disk size when creating a GPT disk label */
88 #define GPT_METADATA_SIZE (1044ULL*1024ULL)
89
90 /* LUKS2 takes off 16M of the partition size with its metadata by default */
91 #define LUKS2_METADATA_SIZE (16ULL*1024ULL*1024ULL)
92
93 /* To do LUKS2 offline encryption, we need to keep some extra free space at the end of the partition. */
94 #define LUKS2_METADATA_KEEP_FREE (LUKS2_METADATA_SIZE*2ULL)
95
96 /* LUKS2 volume key size. */
97 #define VOLUME_KEY_SIZE (512ULL/8ULL)
98
99 /* Use 4K as the default filesystem sector size because as long as the partitions are aligned to 4K, the
100 * filesystems will then also be compatible with sector sizes 512, 1024 and 2048. */
101 #define DEFAULT_FILESYSTEM_SECTOR_SIZE 4096ULL
102
103 #define APIVFS_TMP_DIRS_NULSTR "proc\0sys\0dev\0tmp\0run\0var/tmp\0"
104
105 /* Note: When growing and placing new partitions we always align to 4K sector size. It's how newer hard disks
106 * are designed, and if everything is aligned to that performance is best. And for older hard disks with 512B
107 * sector size devices were generally assumed to have an even number of sectors, hence at the worst we'll
108 * waste 3K per partition, which is probably fine. */
109
110 typedef enum EmptyMode {
111 EMPTY_UNSET, /* no choice has been made yet */
112 EMPTY_REFUSE, /* refuse empty disks, never create a partition table */
113 EMPTY_ALLOW, /* allow empty disks, create partition table if necessary */
114 EMPTY_REQUIRE, /* require an empty disk, create a partition table */
115 EMPTY_FORCE, /* make disk empty, erase everything, create a partition table always */
116 EMPTY_CREATE, /* create disk as loopback file, create a partition table always */
117 _EMPTY_MODE_MAX,
118 _EMPTY_MODE_INVALID = -EINVAL,
119 } EmptyMode;
120
121 typedef enum FilterPartitionType {
122 FILTER_PARTITIONS_NONE,
123 FILTER_PARTITIONS_EXCLUDE,
124 FILTER_PARTITIONS_INCLUDE,
125 _FILTER_PARTITIONS_MAX,
126 _FILTER_PARTITIONS_INVALID = -EINVAL,
127 } FilterPartitionsType;
128
129 static EmptyMode arg_empty = EMPTY_UNSET;
130 static bool arg_dry_run = true;
131 static const char *arg_node = NULL;
132 static char *arg_root = NULL;
133 static char *arg_image = NULL;
134 static char **arg_definitions = NULL;
135 static bool arg_discard = true;
136 static bool arg_can_factory_reset = false;
137 static int arg_factory_reset = -1;
138 static sd_id128_t arg_seed = SD_ID128_NULL;
139 static bool arg_randomize = false;
140 static int arg_pretty = -1;
141 static uint64_t arg_size = UINT64_MAX;
142 static bool arg_size_auto = false;
143 static JsonFormatFlags arg_json_format_flags = JSON_FORMAT_OFF;
144 static PagerFlags arg_pager_flags = 0;
145 static bool arg_legend = true;
146 static void *arg_key = NULL;
147 static size_t arg_key_size = 0;
148 static EVP_PKEY *arg_private_key = NULL;
149 static X509 *arg_certificate = NULL;
150 static char *arg_tpm2_device = NULL;
151 static Tpm2PCRValue *arg_tpm2_hash_pcr_values = NULL;
152 static size_t arg_tpm2_n_hash_pcr_values = 0;
153 static bool arg_tpm2_hash_pcr_values_use_default = true;
154 static char *arg_tpm2_public_key = NULL;
155 static uint32_t arg_tpm2_public_key_pcr_mask = 0;
156 static bool arg_tpm2_public_key_pcr_mask_use_default = true;
157 static bool arg_split = false;
158 static GptPartitionType *arg_filter_partitions = NULL;
159 static size_t arg_n_filter_partitions = 0;
160 static FilterPartitionsType arg_filter_partitions_type = FILTER_PARTITIONS_NONE;
161 static GptPartitionType *arg_defer_partitions = NULL;
162 static size_t arg_n_defer_partitions = 0;
163 static uint64_t arg_sector_size = 0;
164 static ImagePolicy *arg_image_policy = NULL;
165 static Architecture arg_architecture = _ARCHITECTURE_INVALID;
166 static int arg_offline = -1;
167 static char **arg_copy_from = NULL;
168 static char *arg_copy_source = NULL;
169 static char *arg_make_ddi = NULL;
170
171 STATIC_DESTRUCTOR_REGISTER(arg_root, freep);
172 STATIC_DESTRUCTOR_REGISTER(arg_image, freep);
173 STATIC_DESTRUCTOR_REGISTER(arg_definitions, strv_freep);
174 STATIC_DESTRUCTOR_REGISTER(arg_key, erase_and_freep);
175 STATIC_DESTRUCTOR_REGISTER(arg_private_key, EVP_PKEY_freep);
176 STATIC_DESTRUCTOR_REGISTER(arg_certificate, X509_freep);
177 STATIC_DESTRUCTOR_REGISTER(arg_tpm2_device, freep);
178 STATIC_DESTRUCTOR_REGISTER(arg_tpm2_hash_pcr_values, freep);
179 STATIC_DESTRUCTOR_REGISTER(arg_tpm2_public_key, freep);
180 STATIC_DESTRUCTOR_REGISTER(arg_filter_partitions, freep);
181 STATIC_DESTRUCTOR_REGISTER(arg_image_policy, image_policy_freep);
182 STATIC_DESTRUCTOR_REGISTER(arg_copy_from, strv_freep);
183 STATIC_DESTRUCTOR_REGISTER(arg_copy_source, freep);
184 STATIC_DESTRUCTOR_REGISTER(arg_make_ddi, freep);
185
186 typedef struct FreeArea FreeArea;
187
188 typedef enum EncryptMode {
189 ENCRYPT_OFF,
190 ENCRYPT_KEY_FILE,
191 ENCRYPT_TPM2,
192 ENCRYPT_KEY_FILE_TPM2,
193 _ENCRYPT_MODE_MAX,
194 _ENCRYPT_MODE_INVALID = -EINVAL,
195 } EncryptMode;
196
197 typedef enum VerityMode {
198 VERITY_OFF,
199 VERITY_DATA,
200 VERITY_HASH,
201 VERITY_SIG,
202 _VERITY_MODE_MAX,
203 _VERITY_MODE_INVALID = -EINVAL,
204 } VerityMode;
205
206 typedef enum MinimizeMode {
207 MINIMIZE_OFF,
208 MINIMIZE_BEST,
209 MINIMIZE_GUESS,
210 _MINIMIZE_MODE_MAX,
211 _MINIMIZE_MODE_INVALID = -EINVAL,
212 } MinimizeMode;
213
214 typedef struct Partition {
215 char *definition_path;
216 char **drop_in_files;
217
218 GptPartitionType type;
219 sd_id128_t current_uuid, new_uuid;
220 bool new_uuid_is_set;
221 char *current_label, *new_label;
222 sd_id128_t fs_uuid, luks_uuid, verity_uuid;
223 uint8_t verity_salt[SHA256_DIGEST_SIZE];
224
225 bool dropped;
226 bool factory_reset;
227 int32_t priority;
228
229 uint32_t weight, padding_weight;
230
231 uint64_t current_size, new_size;
232 uint64_t size_min, size_max;
233
234 uint64_t current_padding, new_padding;
235 uint64_t padding_min, padding_max;
236
237 uint64_t partno;
238 uint64_t offset;
239
240 struct fdisk_partition *current_partition;
241 struct fdisk_partition *new_partition;
242 FreeArea *padding_area;
243 FreeArea *allocated_to_area;
244
245 char *copy_blocks_path;
246 bool copy_blocks_path_is_our_file;
247 bool copy_blocks_auto;
248 const char *copy_blocks_root;
249 int copy_blocks_fd;
250 uint64_t copy_blocks_offset;
251 uint64_t copy_blocks_size;
252
253 char *format;
254 char **copy_files;
255 char **exclude_files_source;
256 char **exclude_files_target;
257 char **make_directories;
258 char **subvolumes;
259 EncryptMode encrypt;
260 VerityMode verity;
261 char *verity_match_key;
262 MinimizeMode minimize;
263 uint64_t verity_data_block_size;
264 uint64_t verity_hash_block_size;
265
266 uint64_t gpt_flags;
267 int no_auto;
268 int read_only;
269 int growfs;
270
271 uint8_t *roothash;
272 size_t roothash_size;
273
274 char *split_name_format;
275 char *split_path;
276
277 struct Partition *siblings[_VERITY_MODE_MAX];
278
279 LIST_FIELDS(struct Partition, partitions);
280 } Partition;
281
282 #define PARTITION_IS_FOREIGN(p) (!(p)->definition_path)
283 #define PARTITION_EXISTS(p) (!!(p)->current_partition)
284
285 struct FreeArea {
286 Partition *after;
287 uint64_t size;
288 uint64_t allocated;
289 };
290
291 typedef struct Context {
292 LIST_HEAD(Partition, partitions);
293 size_t n_partitions;
294
295 FreeArea **free_areas;
296 size_t n_free_areas;
297
298 uint64_t start, end, total;
299
300 struct fdisk_context *fdisk_context;
301 uint64_t sector_size, grain_size, fs_sector_size;
302
303 sd_id128_t seed;
304
305 char *node;
306 bool node_is_our_file;
307 int backing_fd;
308
309 bool from_scratch;
310 } Context;
311
312 static const char *empty_mode_table[_EMPTY_MODE_MAX] = {
313 [EMPTY_UNSET] = "unset",
314 [EMPTY_REFUSE] = "refuse",
315 [EMPTY_ALLOW] = "allow",
316 [EMPTY_REQUIRE] = "require",
317 [EMPTY_FORCE] = "force",
318 [EMPTY_CREATE] = "create",
319 };
320
321 static const char *encrypt_mode_table[_ENCRYPT_MODE_MAX] = {
322 [ENCRYPT_OFF] = "off",
323 [ENCRYPT_KEY_FILE] = "key-file",
324 [ENCRYPT_TPM2] = "tpm2",
325 [ENCRYPT_KEY_FILE_TPM2] = "key-file+tpm2",
326 };
327
328 static const char *verity_mode_table[_VERITY_MODE_MAX] = {
329 [VERITY_OFF] = "off",
330 [VERITY_DATA] = "data",
331 [VERITY_HASH] = "hash",
332 [VERITY_SIG] = "signature",
333 };
334
335 static const char *minimize_mode_table[_MINIMIZE_MODE_MAX] = {
336 [MINIMIZE_OFF] = "off",
337 [MINIMIZE_BEST] = "best",
338 [MINIMIZE_GUESS] = "guess",
339 };
340
341 DEFINE_PRIVATE_STRING_TABLE_LOOKUP(empty_mode, EmptyMode);
342 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(encrypt_mode, EncryptMode, ENCRYPT_KEY_FILE);
343 DEFINE_PRIVATE_STRING_TABLE_LOOKUP(verity_mode, VerityMode);
344 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(minimize_mode, MinimizeMode, MINIMIZE_BEST);
345
346 static uint64_t round_down_size(uint64_t v, uint64_t p) {
347 return (v / p) * p;
348 }
349
350 static uint64_t round_up_size(uint64_t v, uint64_t p) {
351
352 v = DIV_ROUND_UP(v, p);
353
354 if (v > UINT64_MAX / p)
355 return UINT64_MAX; /* overflow */
356
357 return v * p;
358 }
359
360 static Partition *partition_new(void) {
361 Partition *p;
362
363 p = new(Partition, 1);
364 if (!p)
365 return NULL;
366
367 *p = (Partition) {
368 .weight = 1000,
369 .padding_weight = 0,
370 .current_size = UINT64_MAX,
371 .new_size = UINT64_MAX,
372 .size_min = UINT64_MAX,
373 .size_max = UINT64_MAX,
374 .current_padding = UINT64_MAX,
375 .new_padding = UINT64_MAX,
376 .padding_min = UINT64_MAX,
377 .padding_max = UINT64_MAX,
378 .partno = UINT64_MAX,
379 .offset = UINT64_MAX,
380 .copy_blocks_fd = -EBADF,
381 .copy_blocks_offset = UINT64_MAX,
382 .copy_blocks_size = UINT64_MAX,
383 .no_auto = -1,
384 .read_only = -1,
385 .growfs = -1,
386 .verity_data_block_size = UINT64_MAX,
387 .verity_hash_block_size = UINT64_MAX,
388 };
389
390 return p;
391 }
392
393 static Partition* partition_free(Partition *p) {
394 if (!p)
395 return NULL;
396
397 free(p->current_label);
398 free(p->new_label);
399 free(p->definition_path);
400 strv_free(p->drop_in_files);
401
402 if (p->current_partition)
403 fdisk_unref_partition(p->current_partition);
404 if (p->new_partition)
405 fdisk_unref_partition(p->new_partition);
406
407 if (p->copy_blocks_path_is_our_file)
408 unlink_and_free(p->copy_blocks_path);
409 else
410 free(p->copy_blocks_path);
411 safe_close(p->copy_blocks_fd);
412
413 free(p->format);
414 strv_free(p->copy_files);
415 strv_free(p->exclude_files_source);
416 strv_free(p->exclude_files_target);
417 strv_free(p->make_directories);
418 strv_free(p->subvolumes);
419 free(p->verity_match_key);
420
421 free(p->roothash);
422
423 free(p->split_name_format);
424 unlink_and_free(p->split_path);
425
426 return mfree(p);
427 }
428
429 static void partition_foreignize(Partition *p) {
430 assert(p);
431 assert(PARTITION_EXISTS(p));
432
433 /* Reset several parameters set through definition file to make the partition foreign. */
434
435 p->definition_path = mfree(p->definition_path);
436 p->drop_in_files = strv_free(p->drop_in_files);
437
438 p->copy_blocks_path = mfree(p->copy_blocks_path);
439 p->copy_blocks_fd = safe_close(p->copy_blocks_fd);
440 p->copy_blocks_root = NULL;
441
442 p->format = mfree(p->format);
443 p->copy_files = strv_free(p->copy_files);
444 p->exclude_files_source = strv_free(p->exclude_files_source);
445 p->exclude_files_target = strv_free(p->exclude_files_target);
446 p->make_directories = strv_free(p->make_directories);
447 p->subvolumes = strv_free(p->subvolumes);
448 p->verity_match_key = mfree(p->verity_match_key);
449
450 p->priority = 0;
451 p->weight = 1000;
452 p->padding_weight = 0;
453 p->size_min = UINT64_MAX;
454 p->size_max = UINT64_MAX;
455 p->padding_min = UINT64_MAX;
456 p->padding_max = UINT64_MAX;
457 p->no_auto = -1;
458 p->read_only = -1;
459 p->growfs = -1;
460 p->verity = VERITY_OFF;
461 }
462
463 static bool partition_type_exclude(const GptPartitionType *type) {
464 if (arg_filter_partitions_type == FILTER_PARTITIONS_NONE)
465 return false;
466
467 for (size_t i = 0; i < arg_n_filter_partitions; i++)
468 if (sd_id128_equal(type->uuid, arg_filter_partitions[i].uuid))
469 return arg_filter_partitions_type == FILTER_PARTITIONS_EXCLUDE;
470
471 return arg_filter_partitions_type == FILTER_PARTITIONS_INCLUDE;
472 }
473
474 static bool partition_type_defer(const GptPartitionType *type) {
475 for (size_t i = 0; i < arg_n_defer_partitions; i++)
476 if (sd_id128_equal(type->uuid, arg_defer_partitions[i].uuid))
477 return true;
478
479 return false;
480 }
481
482 static Partition* partition_unlink_and_free(Context *context, Partition *p) {
483 if (!p)
484 return NULL;
485
486 LIST_REMOVE(partitions, context->partitions, p);
487
488 assert(context->n_partitions > 0);
489 context->n_partitions--;
490
491 return partition_free(p);
492 }
493
494 DEFINE_TRIVIAL_CLEANUP_FUNC(Partition*, partition_free);
495
496 static Context *context_new(sd_id128_t seed) {
497 Context *context;
498
499 context = new(Context, 1);
500 if (!context)
501 return NULL;
502
503 *context = (Context) {
504 .start = UINT64_MAX,
505 .end = UINT64_MAX,
506 .total = UINT64_MAX,
507 .seed = seed,
508 };
509
510 return context;
511 }
512
513 static void context_free_free_areas(Context *context) {
514 assert(context);
515
516 for (size_t i = 0; i < context->n_free_areas; i++)
517 free(context->free_areas[i]);
518
519 context->free_areas = mfree(context->free_areas);
520 context->n_free_areas = 0;
521 }
522
523 static Context *context_free(Context *context) {
524 if (!context)
525 return NULL;
526
527 while (context->partitions)
528 partition_unlink_and_free(context, context->partitions);
529 assert(context->n_partitions == 0);
530
531 context_free_free_areas(context);
532
533 if (context->fdisk_context)
534 fdisk_unref_context(context->fdisk_context);
535
536 safe_close(context->backing_fd);
537 if (context->node_is_our_file)
538 unlink_and_free(context->node);
539 else
540 free(context->node);
541
542 return mfree(context);
543 }
544
545 DEFINE_TRIVIAL_CLEANUP_FUNC(Context*, context_free);
546
547 static int context_add_free_area(
548 Context *context,
549 uint64_t size,
550 Partition *after) {
551
552 FreeArea *a;
553
554 assert(context);
555 assert(!after || !after->padding_area);
556
557 if (!GREEDY_REALLOC(context->free_areas, context->n_free_areas + 1))
558 return -ENOMEM;
559
560 a = new(FreeArea, 1);
561 if (!a)
562 return -ENOMEM;
563
564 *a = (FreeArea) {
565 .size = size,
566 .after = after,
567 };
568
569 context->free_areas[context->n_free_areas++] = a;
570
571 if (after)
572 after->padding_area = a;
573
574 return 0;
575 }
576
577 static void partition_drop_or_foreignize(Partition *p) {
578 if (!p || p->dropped || PARTITION_IS_FOREIGN(p))
579 return;
580
581 if (PARTITION_EXISTS(p)) {
582 log_info("Can't grow existing partition %s of priority %" PRIi32 ", ignoring.",
583 strna(p->current_label ?: p->new_label), p->priority);
584
585 /* Handle the partition as foreign. Do not set dropped flag. */
586 partition_foreignize(p);
587 } else {
588 log_info("Can't fit partition %s of priority %" PRIi32 ", dropping.",
589 p->definition_path, p->priority);
590
591 p->dropped = true;
592 p->allocated_to_area = NULL;
593 }
594 }
595
596 static bool context_drop_or_foreignize_one_priority(Context *context) {
597 int32_t priority = 0;
598
599 LIST_FOREACH(partitions, p, context->partitions) {
600 if (p->dropped)
601 continue;
602
603 priority = MAX(priority, p->priority);
604 }
605
606 /* Refuse to drop partitions with 0 or negative priorities or partitions of priorities that have at
607 * least one existing priority */
608 if (priority <= 0)
609 return false;
610
611 LIST_FOREACH(partitions, p, context->partitions) {
612 if (p->priority < priority)
613 continue;
614
615 partition_drop_or_foreignize(p);
616
617 /* We ensure that all verity sibling partitions have the same priority, so it's safe
618 * to drop all siblings here as well. */
619
620 for (VerityMode mode = VERITY_OFF + 1; mode < _VERITY_MODE_MAX; mode++)
621 partition_drop_or_foreignize(p->siblings[mode]);
622 }
623
624 return true;
625 }
626
627 static uint64_t partition_min_size(const Context *context, const Partition *p) {
628 uint64_t sz;
629
630 assert(context);
631 assert(p);
632
633 /* Calculate the disk space we really need at minimum for this partition. If the partition already
634 * exists the current size is what we really need. If it doesn't exist yet refuse to allocate less
635 * than 4K.
636 *
637 * DEFAULT_MIN_SIZE is the default SizeMin= we configure if nothing else is specified. */
638
639 if (PARTITION_IS_FOREIGN(p)) {
640 /* Don't allow changing size of partitions not managed by us */
641 assert(p->current_size != UINT64_MAX);
642 return p->current_size;
643 }
644
645 if (p->verity == VERITY_SIG)
646 return VERITY_SIG_SIZE;
647
648 sz = p->current_size != UINT64_MAX ? p->current_size : HARD_MIN_SIZE;
649
650 if (!PARTITION_EXISTS(p)) {
651 uint64_t d = 0;
652
653 if (p->encrypt != ENCRYPT_OFF)
654 d += round_up_size(LUKS2_METADATA_KEEP_FREE, context->grain_size);
655
656 if (p->copy_blocks_size != UINT64_MAX)
657 d += round_up_size(p->copy_blocks_size, context->grain_size);
658 else if (p->format || p->encrypt != ENCRYPT_OFF) {
659 uint64_t f;
660
661 /* If we shall synthesize a file system, take minimal fs size into account (assumed to be 4K if not known) */
662 f = p->format ? round_up_size(minimal_size_by_fs_name(p->format), context->grain_size) : UINT64_MAX;
663 d += f == UINT64_MAX ? context->grain_size : f;
664 }
665
666 if (d > sz)
667 sz = d;
668 }
669
670 return MAX(round_up_size(p->size_min != UINT64_MAX ? p->size_min : DEFAULT_MIN_SIZE, context->grain_size), sz);
671 }
672
673 static uint64_t partition_max_size(const Context *context, const Partition *p) {
674 uint64_t sm;
675
676 /* Calculate how large the partition may become at max. This is generally the configured maximum
677 * size, except when it already exists and is larger than that. In that case it's the existing size,
678 * since we never want to shrink partitions. */
679
680 assert(context);
681 assert(p);
682
683 if (PARTITION_IS_FOREIGN(p)) {
684 /* Don't allow changing size of partitions not managed by us */
685 assert(p->current_size != UINT64_MAX);
686 return p->current_size;
687 }
688
689 if (p->verity == VERITY_SIG)
690 return VERITY_SIG_SIZE;
691
692 if (p->size_max == UINT64_MAX)
693 return UINT64_MAX;
694
695 sm = round_down_size(p->size_max, context->grain_size);
696
697 if (p->current_size != UINT64_MAX)
698 sm = MAX(p->current_size, sm);
699
700 return MAX(partition_min_size(context, p), sm);
701 }
702
703 static uint64_t partition_min_padding(const Partition *p) {
704 assert(p);
705 return p->padding_min != UINT64_MAX ? p->padding_min : 0;
706 }
707
708 static uint64_t partition_max_padding(const Partition *p) {
709 assert(p);
710 return p->padding_max;
711 }
712
713 static uint64_t partition_min_size_with_padding(Context *context, const Partition *p) {
714 uint64_t sz;
715
716 /* Calculate the disk space we need for this partition plus any free space coming after it. This
717 * takes user configured padding into account as well as any additional whitespace needed to align
718 * the next partition to 4K again. */
719
720 assert(context);
721 assert(p);
722
723 sz = partition_min_size(context, p) + partition_min_padding(p);
724
725 if (PARTITION_EXISTS(p)) {
726 /* If the partition wasn't aligned, add extra space so that any we might add will be aligned */
727 assert(p->offset != UINT64_MAX);
728 return round_up_size(p->offset + sz, context->grain_size) - p->offset;
729 }
730
731 /* If this is a new partition we'll place it aligned, hence we just need to round up the required size here */
732 return round_up_size(sz, context->grain_size);
733 }
734
735 static uint64_t free_area_available(const FreeArea *a) {
736 assert(a);
737
738 /* Determines how much of this free area is not allocated yet */
739
740 assert(a->size >= a->allocated);
741 return a->size - a->allocated;
742 }
743
744 static uint64_t free_area_current_end(Context *context, const FreeArea *a) {
745 assert(context);
746 assert(a);
747
748 if (!a->after)
749 return free_area_available(a);
750
751 assert(a->after->offset != UINT64_MAX);
752 assert(a->after->current_size != UINT64_MAX);
753
754 /* Calculate where the free area ends, based on the offset of the partition preceding it. */
755 return round_up_size(a->after->offset + a->after->current_size, context->grain_size) + free_area_available(a);
756 }
757
758 static uint64_t free_area_min_end(Context *context, const FreeArea *a) {
759 assert(context);
760 assert(a);
761
762 if (!a->after)
763 return 0;
764
765 assert(a->after->offset != UINT64_MAX);
766 assert(a->after->current_size != UINT64_MAX);
767
768 /* Calculate where the partition would end when we give it as much as it needs. */
769 return round_up_size(a->after->offset + partition_min_size_with_padding(context, a->after), context->grain_size);
770 }
771
772 static uint64_t free_area_available_for_new_partitions(Context *context, const FreeArea *a) {
773 assert(context);
774 assert(a);
775
776 /* Similar to free_area_available(), but takes into account that the required size and padding of the
777 * preceding partition is honoured. */
778
779 return LESS_BY(free_area_current_end(context, a), free_area_min_end(context, a));
780 }
781
782 static int free_area_compare(FreeArea *const *a, FreeArea *const*b, Context *context) {
783 assert(context);
784
785 return CMP(free_area_available_for_new_partitions(context, *a),
786 free_area_available_for_new_partitions(context, *b));
787 }
788
789 static uint64_t charge_size(Context *context, uint64_t total, uint64_t amount) {
790 assert(context);
791 /* Subtract the specified amount from total, rounding up to multiple of 4K if there's room */
792 assert(amount <= total);
793 return LESS_BY(total, round_up_size(amount, context->grain_size));
794 }
795
796 static uint64_t charge_weight(uint64_t total, uint64_t amount) {
797 assert(amount <= total);
798 return total - amount;
799 }
800
801 static bool context_allocate_partitions(Context *context, uint64_t *ret_largest_free_area) {
802 assert(context);
803
804 /* This may be called multiple times. Reset previous assignments. */
805 for (size_t i = 0; i < context->n_free_areas; i++)
806 context->free_areas[i]->allocated = 0;
807
808 /* Sort free areas by size, putting smallest first */
809 typesafe_qsort_r(context->free_areas, context->n_free_areas, free_area_compare, context);
810
811 /* In any case return size of the largest free area (i.e. not the size of all free areas
812 * combined!) */
813 if (ret_largest_free_area)
814 *ret_largest_free_area =
815 context->n_free_areas == 0 ? 0 :
816 free_area_available_for_new_partitions(context, context->free_areas[context->n_free_areas-1]);
817
818 /* Check that each existing partition can fit its area. */
819 for (size_t i = 0; i < context->n_free_areas; i++)
820 if (free_area_current_end(context, context->free_areas[i]) <
821 free_area_min_end(context, context->free_areas[i]))
822 return false;
823
824 /* A simple first-fit algorithm. We return true if we can fit the partitions in, otherwise false. */
825 LIST_FOREACH(partitions, p, context->partitions) {
826 bool fits = false;
827 uint64_t required;
828 FreeArea *a = NULL;
829
830 /* Skip partitions we already dropped or that already exist */
831 if (p->dropped || PARTITION_EXISTS(p))
832 continue;
833
834 /* How much do we need to fit? */
835 required = partition_min_size_with_padding(context, p);
836 assert(required % context->grain_size == 0);
837
838 for (size_t i = 0; i < context->n_free_areas; i++) {
839 a = context->free_areas[i];
840
841 if (free_area_available_for_new_partitions(context, a) >= required) {
842 fits = true;
843 break;
844 }
845 }
846
847 if (!fits)
848 return false; /* 😢 Oh no! We can't fit this partition into any free area! */
849
850 /* Assign the partition to this free area */
851 p->allocated_to_area = a;
852
853 /* Budget the minimal partition size */
854 a->allocated += required;
855 }
856
857 return true;
858 }
859
860 static int context_sum_weights(Context *context, FreeArea *a, uint64_t *ret) {
861 uint64_t weight_sum = 0;
862
863 assert(context);
864 assert(a);
865 assert(ret);
866
867 /* Determine the sum of the weights of all partitions placed in or before the specified free area */
868
869 LIST_FOREACH(partitions, p, context->partitions) {
870 if (p->padding_area != a && p->allocated_to_area != a)
871 continue;
872
873 if (p->weight > UINT64_MAX - weight_sum)
874 goto overflow_sum;
875 weight_sum += p->weight;
876
877 if (p->padding_weight > UINT64_MAX - weight_sum)
878 goto overflow_sum;
879 weight_sum += p->padding_weight;
880 }
881
882 *ret = weight_sum;
883 return 0;
884
885 overflow_sum:
886 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Combined weight of partition exceeds unsigned 64-bit range, refusing.");
887 }
888
889 static uint64_t scale_by_weight(uint64_t value, uint64_t weight, uint64_t weight_sum) {
890 assert(weight_sum >= weight);
891
892 for (;;) {
893 if (weight == 0)
894 return 0;
895 if (weight == weight_sum)
896 return value;
897 if (value <= UINT64_MAX / weight)
898 return value * weight / weight_sum;
899
900 /* Rescale weight and weight_sum to make not the calculation overflow. To satisfy the
901 * following conditions, 'weight_sum' is rounded up but 'weight' is rounded down:
902 * - the sum of scale_by_weight() for all weights must not be larger than the input value,
903 * - scale_by_weight() must not be larger than the ideal value (i.e. calculated with uint128_t). */
904 weight_sum = DIV_ROUND_UP(weight_sum, 2);
905 weight /= 2;
906 }
907 }
908
909 typedef enum GrowPartitionPhase {
910 /* The zeroth phase: do not touch foreign partitions (i.e. those we don't manage). */
911 PHASE_FOREIGN,
912
913 /* The first phase: we charge partitions which need more (according to constraints) than their weight-based share. */
914 PHASE_OVERCHARGE,
915
916 /* The second phase: we charge partitions which need less (according to constraints) than their weight-based share. */
917 PHASE_UNDERCHARGE,
918
919 /* The third phase: we distribute what remains among the remaining partitions, according to the weights */
920 PHASE_DISTRIBUTE,
921
922 _GROW_PARTITION_PHASE_MAX,
923 } GrowPartitionPhase;
924
925 static bool context_grow_partitions_phase(
926 Context *context,
927 FreeArea *a,
928 GrowPartitionPhase phase,
929 uint64_t *span,
930 uint64_t *weight_sum) {
931
932 bool try_again = false;
933
934 assert(context);
935 assert(a);
936 assert(span);
937 assert(weight_sum);
938
939 /* Now let's look at the intended weights and adjust them taking the minimum space assignments into
940 * account. i.e. if a partition has a small weight but a high minimum space value set it should not
941 * get any additional room from the left-overs. Similar, if two partitions have the same weight they
942 * should get the same space if possible, even if one has a smaller minimum size than the other. */
943 LIST_FOREACH(partitions, p, context->partitions) {
944
945 /* Look only at partitions associated with this free area, i.e. immediately
946 * preceding it, or allocated into it */
947 if (p->allocated_to_area != a && p->padding_area != a)
948 continue;
949
950 if (p->new_size == UINT64_MAX) {
951 uint64_t share, rsz, xsz;
952 bool charge = false;
953
954 /* Calculate how much this space this partition needs if everyone would get
955 * the weight based share */
956 share = scale_by_weight(*span, p->weight, *weight_sum);
957
958 rsz = partition_min_size(context, p);
959 xsz = partition_max_size(context, p);
960
961 if (phase == PHASE_FOREIGN && PARTITION_IS_FOREIGN(p)) {
962 /* Never change of foreign partitions (i.e. those we don't manage) */
963
964 p->new_size = p->current_size;
965 charge = true;
966
967 } else if (phase == PHASE_OVERCHARGE && rsz > share) {
968 /* This partition needs more than its calculated share. Let's assign
969 * it that, and take this partition out of all calculations and start
970 * again. */
971
972 p->new_size = rsz;
973 charge = try_again = true;
974
975 } else if (phase == PHASE_UNDERCHARGE && xsz < share) {
976 /* This partition accepts less than its calculated
977 * share. Let's assign it that, and take this partition out
978 * of all calculations and start again. */
979
980 p->new_size = xsz;
981 charge = try_again = true;
982
983 } else if (phase == PHASE_DISTRIBUTE) {
984 /* This partition can accept its calculated share. Let's
985 * assign it. There's no need to restart things here since
986 * assigning this shouldn't impact the shares of the other
987 * partitions. */
988
989 assert(share >= rsz);
990 p->new_size = CLAMP(round_down_size(share, context->grain_size), rsz, xsz);
991 charge = true;
992 }
993
994 if (charge) {
995 *span = charge_size(context, *span, p->new_size);
996 *weight_sum = charge_weight(*weight_sum, p->weight);
997 }
998 }
999
1000 if (p->new_padding == UINT64_MAX) {
1001 uint64_t share, rsz, xsz;
1002 bool charge = false;
1003
1004 share = scale_by_weight(*span, p->padding_weight, *weight_sum);
1005
1006 rsz = partition_min_padding(p);
1007 xsz = partition_max_padding(p);
1008
1009 if (phase == PHASE_OVERCHARGE && rsz > share) {
1010 p->new_padding = rsz;
1011 charge = try_again = true;
1012 } else if (phase == PHASE_UNDERCHARGE && xsz < share) {
1013 p->new_padding = xsz;
1014 charge = try_again = true;
1015 } else if (phase == PHASE_DISTRIBUTE) {
1016 assert(share >= rsz);
1017 p->new_padding = CLAMP(round_down_size(share, context->grain_size), rsz, xsz);
1018 charge = true;
1019 }
1020
1021 if (charge) {
1022 *span = charge_size(context, *span, p->new_padding);
1023 *weight_sum = charge_weight(*weight_sum, p->padding_weight);
1024 }
1025 }
1026 }
1027
1028 return !try_again;
1029 }
1030
1031 static void context_grow_partition_one(Context *context, FreeArea *a, Partition *p, uint64_t *span) {
1032 uint64_t m;
1033
1034 assert(context);
1035 assert(a);
1036 assert(p);
1037 assert(span);
1038
1039 if (*span == 0)
1040 return;
1041
1042 if (p->allocated_to_area != a)
1043 return;
1044
1045 if (PARTITION_IS_FOREIGN(p))
1046 return;
1047
1048 assert(p->new_size != UINT64_MAX);
1049
1050 /* Calculate new size and align. */
1051 m = round_down_size(p->new_size + *span, context->grain_size);
1052 /* But ensure this doesn't shrink the size. */
1053 m = MAX(m, p->new_size);
1054 /* And ensure this doesn't exceed the maximum size. */
1055 m = MIN(m, partition_max_size(context, p));
1056
1057 assert(m >= p->new_size);
1058
1059 *span = charge_size(context, *span, m - p->new_size);
1060 p->new_size = m;
1061 }
1062
1063 static int context_grow_partitions_on_free_area(Context *context, FreeArea *a) {
1064 uint64_t weight_sum = 0, span;
1065 int r;
1066
1067 assert(context);
1068 assert(a);
1069
1070 r = context_sum_weights(context, a, &weight_sum);
1071 if (r < 0)
1072 return r;
1073
1074 /* Let's calculate the total area covered by this free area and the partition before it */
1075 span = a->size;
1076 if (a->after) {
1077 assert(a->after->offset != UINT64_MAX);
1078 assert(a->after->current_size != UINT64_MAX);
1079
1080 span += round_up_size(a->after->offset + a->after->current_size, context->grain_size) - a->after->offset;
1081 }
1082
1083 for (GrowPartitionPhase phase = 0; phase < _GROW_PARTITION_PHASE_MAX;)
1084 if (context_grow_partitions_phase(context, a, phase, &span, &weight_sum))
1085 phase++; /* go to the next phase */
1086
1087 /* We still have space left over? Donate to preceding partition if we have one */
1088 if (span > 0 && a->after)
1089 context_grow_partition_one(context, a, a->after, &span);
1090
1091 /* What? Even still some space left (maybe because there was no preceding partition, or it had a
1092 * size limit), then let's donate it to whoever wants it. */
1093 if (span > 0)
1094 LIST_FOREACH(partitions, p, context->partitions) {
1095 context_grow_partition_one(context, a, p, &span);
1096 if (span == 0)
1097 break;
1098 }
1099
1100 /* Yuck, still no one? Then make it padding */
1101 if (span > 0 && a->after) {
1102 assert(a->after->new_padding != UINT64_MAX);
1103 a->after->new_padding += span;
1104 }
1105
1106 return 0;
1107 }
1108
1109 static int context_grow_partitions(Context *context) {
1110 int r;
1111
1112 assert(context);
1113
1114 for (size_t i = 0; i < context->n_free_areas; i++) {
1115 r = context_grow_partitions_on_free_area(context, context->free_areas[i]);
1116 if (r < 0)
1117 return r;
1118 }
1119
1120 /* All existing partitions that have no free space after them can't change size */
1121 LIST_FOREACH(partitions, p, context->partitions) {
1122 if (p->dropped)
1123 continue;
1124
1125 if (!PARTITION_EXISTS(p) || p->padding_area) {
1126 /* The algorithm above must have initialized this already */
1127 assert(p->new_size != UINT64_MAX);
1128 continue;
1129 }
1130
1131 assert(p->new_size == UINT64_MAX);
1132 p->new_size = p->current_size;
1133
1134 assert(p->new_padding == UINT64_MAX);
1135 p->new_padding = p->current_padding;
1136 }
1137
1138 return 0;
1139 }
1140
1141 static uint64_t find_first_unused_partno(Context *context) {
1142 uint64_t partno = 0;
1143
1144 assert(context);
1145
1146 for (partno = 0;; partno++) {
1147 bool found = false;
1148 LIST_FOREACH(partitions, p, context->partitions)
1149 if (p->partno != UINT64_MAX && p->partno == partno)
1150 found = true;
1151 if (!found)
1152 break;
1153 }
1154
1155 return partno;
1156 }
1157
1158 static void context_place_partitions(Context *context) {
1159
1160 assert(context);
1161
1162 for (size_t i = 0; i < context->n_free_areas; i++) {
1163 FreeArea *a = context->free_areas[i];
1164 _unused_ uint64_t left;
1165 uint64_t start;
1166
1167 if (a->after) {
1168 assert(a->after->offset != UINT64_MAX);
1169 assert(a->after->new_size != UINT64_MAX);
1170 assert(a->after->new_padding != UINT64_MAX);
1171
1172 start = a->after->offset + a->after->new_size + a->after->new_padding;
1173 } else
1174 start = context->start;
1175
1176 start = round_up_size(start, context->grain_size);
1177 left = a->size;
1178
1179 LIST_FOREACH(partitions, p, context->partitions) {
1180 if (p->allocated_to_area != a)
1181 continue;
1182
1183 p->offset = start;
1184 p->partno = find_first_unused_partno(context);
1185
1186 assert(left >= p->new_size);
1187 start += p->new_size;
1188 left -= p->new_size;
1189
1190 assert(left >= p->new_padding);
1191 start += p->new_padding;
1192 left -= p->new_padding;
1193 }
1194 }
1195 }
1196
1197 static int config_parse_type(
1198 const char *unit,
1199 const char *filename,
1200 unsigned line,
1201 const char *section,
1202 unsigned section_line,
1203 const char *lvalue,
1204 int ltype,
1205 const char *rvalue,
1206 void *data,
1207 void *userdata) {
1208
1209 GptPartitionType *type = ASSERT_PTR(data);
1210 int r;
1211
1212 assert(rvalue);
1213
1214 r = gpt_partition_type_from_string(rvalue, type);
1215 if (r < 0)
1216 return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse partition type: %s", rvalue);
1217
1218 if (arg_architecture >= 0)
1219 *type = gpt_partition_type_override_architecture(*type, arg_architecture);
1220
1221 return 0;
1222 }
1223
1224 static int config_parse_label(
1225 const char *unit,
1226 const char *filename,
1227 unsigned line,
1228 const char *section,
1229 unsigned section_line,
1230 const char *lvalue,
1231 int ltype,
1232 const char *rvalue,
1233 void *data,
1234 void *userdata) {
1235
1236 _cleanup_free_ char *resolved = NULL;
1237 char **label = ASSERT_PTR(data);
1238 int r;
1239
1240 assert(rvalue);
1241
1242 /* Nota bene: the empty label is a totally valid one. Let's hence not follow our usual rule of
1243 * assigning the empty string to reset to default here, but really accept it as label to set. */
1244
1245 r = specifier_printf(rvalue, GPT_LABEL_MAX, system_and_tmp_specifier_table, arg_root, NULL, &resolved);
1246 if (r < 0) {
1247 log_syntax(unit, LOG_WARNING, filename, line, r,
1248 "Failed to expand specifiers in Label=, ignoring: %s", rvalue);
1249 return 0;
1250 }
1251
1252 if (!utf8_is_valid(resolved)) {
1253 log_syntax(unit, LOG_WARNING, filename, line, 0,
1254 "Partition label not valid UTF-8, ignoring: %s", rvalue);
1255 return 0;
1256 }
1257
1258 r = gpt_partition_label_valid(resolved);
1259 if (r < 0) {
1260 log_syntax(unit, LOG_WARNING, filename, line, r,
1261 "Failed to check if string is valid as GPT partition label, ignoring: \"%s\" (from \"%s\")",
1262 resolved, rvalue);
1263 return 0;
1264 }
1265 if (!r) {
1266 log_syntax(unit, LOG_WARNING, filename, line, 0,
1267 "Partition label too long for GPT table, ignoring: \"%s\" (from \"%s\")",
1268 resolved, rvalue);
1269 return 0;
1270 }
1271
1272 free_and_replace(*label, resolved);
1273 return 0;
1274 }
1275
1276 static int config_parse_weight(
1277 const char *unit,
1278 const char *filename,
1279 unsigned line,
1280 const char *section,
1281 unsigned section_line,
1282 const char *lvalue,
1283 int ltype,
1284 const char *rvalue,
1285 void *data,
1286 void *userdata) {
1287
1288 uint32_t *w = ASSERT_PTR(data), v;
1289 int r;
1290
1291 assert(rvalue);
1292
1293 r = safe_atou32(rvalue, &v);
1294 if (r < 0) {
1295 log_syntax(unit, LOG_WARNING, filename, line, r,
1296 "Failed to parse weight value, ignoring: %s", rvalue);
1297 return 0;
1298 }
1299
1300 if (v > 1000U*1000U) {
1301 log_syntax(unit, LOG_WARNING, filename, line, 0,
1302 "Weight needs to be in range 0…10000000, ignoring: %" PRIu32, v);
1303 return 0;
1304 }
1305
1306 *w = v;
1307 return 0;
1308 }
1309
1310 static int config_parse_size4096(
1311 const char *unit,
1312 const char *filename,
1313 unsigned line,
1314 const char *section,
1315 unsigned section_line,
1316 const char *lvalue,
1317 int ltype,
1318 const char *rvalue,
1319 void *data,
1320 void *userdata) {
1321
1322 uint64_t *sz = data, parsed;
1323 int r;
1324
1325 assert(rvalue);
1326 assert(data);
1327
1328 r = parse_size(rvalue, 1024, &parsed);
1329 if (r < 0)
1330 return log_syntax(unit, LOG_ERR, filename, line, r,
1331 "Failed to parse size value: %s", rvalue);
1332
1333 if (ltype > 0)
1334 *sz = round_up_size(parsed, 4096);
1335 else if (ltype < 0)
1336 *sz = round_down_size(parsed, 4096);
1337 else
1338 *sz = parsed;
1339
1340 if (*sz != parsed)
1341 log_syntax(unit, LOG_NOTICE, filename, line, r, "Rounded %s= size %" PRIu64 " %s %" PRIu64 ", a multiple of 4096.",
1342 lvalue, parsed, special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), *sz);
1343
1344 return 0;
1345 }
1346
1347 static int config_parse_block_size(
1348 const char *unit,
1349 const char *filename,
1350 unsigned line,
1351 const char *section,
1352 unsigned section_line,
1353 const char *lvalue,
1354 int ltype,
1355 const char *rvalue,
1356 void *data,
1357 void *userdata) {
1358
1359 uint64_t *blksz = ASSERT_PTR(data), parsed;
1360 int r;
1361
1362 assert(rvalue);
1363
1364 r = parse_size(rvalue, 1024, &parsed);
1365 if (r < 0)
1366 return log_syntax(unit, LOG_ERR, filename, line, r,
1367 "Failed to parse size value: %s", rvalue);
1368
1369 if (parsed < 512 || parsed > 4096)
1370 return log_syntax(unit, LOG_ERR, filename, line, SYNTHETIC_ERRNO(EINVAL),
1371 "Value not between 512 and 4096: %s", rvalue);
1372
1373 if (!ISPOWEROF2(parsed))
1374 return log_syntax(unit, LOG_ERR, filename, line, SYNTHETIC_ERRNO(EINVAL),
1375 "Value not a power of 2: %s", rvalue);
1376
1377 *blksz = parsed;
1378 return 0;
1379 }
1380
1381 static int config_parse_fstype(
1382 const char *unit,
1383 const char *filename,
1384 unsigned line,
1385 const char *section,
1386 unsigned section_line,
1387 const char *lvalue,
1388 int ltype,
1389 const char *rvalue,
1390 void *data,
1391 void *userdata) {
1392
1393 char **fstype = ASSERT_PTR(data);
1394 const char *e;
1395
1396 assert(rvalue);
1397
1398 /* Let's provide an easy way to override the chosen fstype for file system partitions */
1399 e = secure_getenv("SYSTEMD_REPART_OVERRIDE_FSTYPE");
1400 if (e && !streq(rvalue, e)) {
1401 log_syntax(unit, LOG_NOTICE, filename, line, 0,
1402 "Overriding defined file system type '%s' with '%s'.", rvalue, e);
1403 rvalue = e;
1404 }
1405
1406 if (!filename_is_valid(rvalue))
1407 return log_syntax(unit, LOG_ERR, filename, line, 0,
1408 "File system type is not valid, refusing: %s", rvalue);
1409
1410 return free_and_strdup_warn(fstype, rvalue);
1411 }
1412
1413 static int config_parse_copy_files(
1414 const char *unit,
1415 const char *filename,
1416 unsigned line,
1417 const char *section,
1418 unsigned section_line,
1419 const char *lvalue,
1420 int ltype,
1421 const char *rvalue,
1422 void *data,
1423 void *userdata) {
1424
1425 _cleanup_free_ char *source = NULL, *buffer = NULL, *resolved_source = NULL, *resolved_target = NULL;
1426 const char *p = rvalue, *target;
1427 char ***copy_files = ASSERT_PTR(data);
1428 int r;
1429
1430 assert(rvalue);
1431
1432 r = extract_first_word(&p, &source, ":", EXTRACT_CUNESCAPE|EXTRACT_DONT_COALESCE_SEPARATORS);
1433 if (r < 0)
1434 return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract source path: %s", rvalue);
1435 if (r == 0) {
1436 log_syntax(unit, LOG_WARNING, filename, line, 0, "No argument specified: %s", rvalue);
1437 return 0;
1438 }
1439
1440 r = extract_first_word(&p, &buffer, ":", EXTRACT_CUNESCAPE|EXTRACT_DONT_COALESCE_SEPARATORS);
1441 if (r < 0)
1442 return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract target path: %s", rvalue);
1443 if (r == 0)
1444 target = source; /* No target, then it's the same as the source */
1445 else
1446 target = buffer;
1447
1448 if (!isempty(p))
1449 return log_syntax(unit, LOG_ERR, filename, line, SYNTHETIC_ERRNO(EINVAL), "Too many arguments: %s", rvalue);
1450
1451 r = specifier_printf(source, PATH_MAX-1, system_and_tmp_specifier_table, arg_root, NULL, &resolved_source);
1452 if (r < 0) {
1453 log_syntax(unit, LOG_WARNING, filename, line, r,
1454 "Failed to expand specifiers in CopyFiles= source, ignoring: %s", rvalue);
1455 return 0;
1456 }
1457
1458 r = path_simplify_and_warn(resolved_source, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
1459 if (r < 0)
1460 return 0;
1461
1462 r = specifier_printf(target, PATH_MAX-1, system_and_tmp_specifier_table, arg_root, NULL, &resolved_target);
1463 if (r < 0) {
1464 log_syntax(unit, LOG_WARNING, filename, line, r,
1465 "Failed to expand specifiers in CopyFiles= target, ignoring: %s", resolved_target);
1466 return 0;
1467 }
1468
1469 r = path_simplify_and_warn(resolved_target, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
1470 if (r < 0)
1471 return 0;
1472
1473 r = strv_consume_pair(copy_files, TAKE_PTR(resolved_source), TAKE_PTR(resolved_target));
1474 if (r < 0)
1475 return log_oom();
1476
1477 return 0;
1478 }
1479
1480 static int config_parse_exclude_files(
1481 const char *unit,
1482 const char *filename,
1483 unsigned line,
1484 const char *section,
1485 unsigned section_line,
1486 const char *lvalue,
1487 int ltype,
1488 const char *rvalue,
1489 void *data,
1490 void *userdata) {
1491 _cleanup_free_ char *resolved = NULL;
1492 char ***exclude_files = ASSERT_PTR(data);
1493 int r;
1494
1495 if (isempty(rvalue)) {
1496 *exclude_files = strv_free(*exclude_files);
1497 return 0;
1498 }
1499
1500 r = specifier_printf(rvalue, PATH_MAX-1, system_and_tmp_specifier_table, arg_root, NULL, &resolved);
1501 if (r < 0) {
1502 log_syntax(unit, LOG_WARNING, filename, line, r,
1503 "Failed to expand specifiers in ExcludeFiles= path, ignoring: %s", rvalue);
1504 return 0;
1505 }
1506
1507 r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE|PATH_KEEP_TRAILING_SLASH, unit, filename, line, lvalue);
1508 if (r < 0)
1509 return 0;
1510
1511 if (strv_consume(exclude_files, TAKE_PTR(resolved)) < 0)
1512 return log_oom();
1513
1514 return 0;
1515 }
1516
1517 static int config_parse_copy_blocks(
1518 const char *unit,
1519 const char *filename,
1520 unsigned line,
1521 const char *section,
1522 unsigned section_line,
1523 const char *lvalue,
1524 int ltype,
1525 const char *rvalue,
1526 void *data,
1527 void *userdata) {
1528
1529 _cleanup_free_ char *d = NULL;
1530 Partition *partition = ASSERT_PTR(data);
1531 int r;
1532
1533 assert(rvalue);
1534
1535 if (isempty(rvalue)) {
1536 partition->copy_blocks_path = mfree(partition->copy_blocks_path);
1537 partition->copy_blocks_auto = false;
1538 return 0;
1539 }
1540
1541 if (streq(rvalue, "auto")) {
1542 partition->copy_blocks_path = mfree(partition->copy_blocks_path);
1543 partition->copy_blocks_auto = true;
1544 partition->copy_blocks_root = arg_root;
1545 return 0;
1546 }
1547
1548 r = specifier_printf(rvalue, PATH_MAX-1, system_and_tmp_specifier_table, arg_root, NULL, &d);
1549 if (r < 0) {
1550 log_syntax(unit, LOG_WARNING, filename, line, r,
1551 "Failed to expand specifiers in CopyBlocks= source path, ignoring: %s", rvalue);
1552 return 0;
1553 }
1554
1555 r = path_simplify_and_warn(d, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
1556 if (r < 0)
1557 return 0;
1558
1559 free_and_replace(partition->copy_blocks_path, d);
1560 partition->copy_blocks_auto = false;
1561 partition->copy_blocks_root = arg_root;
1562 return 0;
1563 }
1564
1565 static int config_parse_make_dirs(
1566 const char *unit,
1567 const char *filename,
1568 unsigned line,
1569 const char *section,
1570 unsigned section_line,
1571 const char *lvalue,
1572 int ltype,
1573 const char *rvalue,
1574 void *data,
1575 void *userdata) {
1576
1577 char ***sv = ASSERT_PTR(data);
1578 const char *p = ASSERT_PTR(rvalue);
1579 int r;
1580
1581 for (;;) {
1582 _cleanup_free_ char *word = NULL, *d = NULL;
1583
1584 r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
1585 if (r == -ENOMEM)
1586 return log_oom();
1587 if (r < 0) {
1588 log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
1589 return 0;
1590 }
1591 if (r == 0)
1592 return 0;
1593
1594 r = specifier_printf(word, PATH_MAX-1, system_and_tmp_specifier_table, arg_root, NULL, &d);
1595 if (r < 0) {
1596 log_syntax(unit, LOG_WARNING, filename, line, r,
1597 "Failed to expand specifiers in MakeDirectories= parameter, ignoring: %s", word);
1598 continue;
1599 }
1600
1601 r = path_simplify_and_warn(d, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
1602 if (r < 0)
1603 continue;
1604
1605 r = strv_consume(sv, TAKE_PTR(d));
1606 if (r < 0)
1607 return log_oom();
1608 }
1609 }
1610
1611 static DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_encrypt, encrypt_mode, EncryptMode, ENCRYPT_OFF, "Invalid encryption mode");
1612
1613 static int config_parse_gpt_flags(
1614 const char *unit,
1615 const char *filename,
1616 unsigned line,
1617 const char *section,
1618 unsigned section_line,
1619 const char *lvalue,
1620 int ltype,
1621 const char *rvalue,
1622 void *data,
1623 void *userdata) {
1624
1625 uint64_t *gpt_flags = ASSERT_PTR(data);
1626 int r;
1627
1628 assert(rvalue);
1629
1630 r = safe_atou64(rvalue, gpt_flags);
1631 if (r < 0) {
1632 log_syntax(unit, LOG_WARNING, filename, line, r,
1633 "Failed to parse Flags= value, ignoring: %s", rvalue);
1634 return 0;
1635 }
1636
1637 return 0;
1638 }
1639
1640 static int config_parse_uuid(
1641 const char *unit,
1642 const char *filename,
1643 unsigned line,
1644 const char *section,
1645 unsigned section_line,
1646 const char *lvalue,
1647 int ltype,
1648 const char *rvalue,
1649 void *data,
1650 void *userdata) {
1651
1652 Partition *partition = ASSERT_PTR(data);
1653 int r;
1654
1655 if (isempty(rvalue)) {
1656 partition->new_uuid = SD_ID128_NULL;
1657 partition->new_uuid_is_set = false;
1658 return 0;
1659 }
1660
1661 if (streq(rvalue, "null")) {
1662 partition->new_uuid = SD_ID128_NULL;
1663 partition->new_uuid_is_set = true;
1664 return 0;
1665 }
1666
1667 r = sd_id128_from_string(rvalue, &partition->new_uuid);
1668 if (r < 0) {
1669 log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse 128-bit ID/UUID, ignoring: %s", rvalue);
1670 return 0;
1671 }
1672
1673 partition->new_uuid_is_set = true;
1674
1675 return 0;
1676 }
1677
1678 static DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_verity, verity_mode, VerityMode, VERITY_OFF, "Invalid verity mode");
1679 static DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_minimize, minimize_mode, MinimizeMode, MINIMIZE_OFF, "Invalid minimize mode");
1680
1681 static int partition_read_definition(Partition *p, const char *path, const char *const *conf_file_dirs) {
1682
1683 ConfigTableItem table[] = {
1684 { "Partition", "Type", config_parse_type, 0, &p->type },
1685 { "Partition", "Label", config_parse_label, 0, &p->new_label },
1686 { "Partition", "UUID", config_parse_uuid, 0, p },
1687 { "Partition", "Priority", config_parse_int32, 0, &p->priority },
1688 { "Partition", "Weight", config_parse_weight, 0, &p->weight },
1689 { "Partition", "PaddingWeight", config_parse_weight, 0, &p->padding_weight },
1690 { "Partition", "SizeMinBytes", config_parse_size4096, 1, &p->size_min },
1691 { "Partition", "SizeMaxBytes", config_parse_size4096, -1, &p->size_max },
1692 { "Partition", "PaddingMinBytes", config_parse_size4096, 1, &p->padding_min },
1693 { "Partition", "PaddingMaxBytes", config_parse_size4096, -1, &p->padding_max },
1694 { "Partition", "FactoryReset", config_parse_bool, 0, &p->factory_reset },
1695 { "Partition", "CopyBlocks", config_parse_copy_blocks, 0, p },
1696 { "Partition", "Format", config_parse_fstype, 0, &p->format },
1697 { "Partition", "CopyFiles", config_parse_copy_files, 0, &p->copy_files },
1698 { "Partition", "ExcludeFiles", config_parse_exclude_files, 0, &p->exclude_files_source },
1699 { "Partition", "ExcludeFilesTarget", config_parse_exclude_files, 0, &p->exclude_files_target },
1700 { "Partition", "MakeDirectories", config_parse_make_dirs, 0, &p->make_directories },
1701 { "Partition", "Encrypt", config_parse_encrypt, 0, &p->encrypt },
1702 { "Partition", "Verity", config_parse_verity, 0, &p->verity },
1703 { "Partition", "VerityMatchKey", config_parse_string, 0, &p->verity_match_key },
1704 { "Partition", "Flags", config_parse_gpt_flags, 0, &p->gpt_flags },
1705 { "Partition", "ReadOnly", config_parse_tristate, 0, &p->read_only },
1706 { "Partition", "NoAuto", config_parse_tristate, 0, &p->no_auto },
1707 { "Partition", "GrowFileSystem", config_parse_tristate, 0, &p->growfs },
1708 { "Partition", "SplitName", config_parse_string, 0, &p->split_name_format },
1709 { "Partition", "Minimize", config_parse_minimize, 0, &p->minimize },
1710 { "Partition", "Subvolumes", config_parse_make_dirs, 0, &p->subvolumes },
1711 { "Partition", "VerityDataBlockSizeBytes", config_parse_block_size, 0, &p->verity_data_block_size },
1712 { "Partition", "VerityHashBlockSizeBytes", config_parse_block_size, 0, &p->verity_hash_block_size },
1713 {}
1714 };
1715 int r;
1716 _cleanup_free_ char *filename = NULL;
1717 const char* dropin_dirname;
1718
1719 r = path_extract_filename(path, &filename);
1720 if (r < 0)
1721 return log_error_errno(r, "Failed to extract filename from path '%s': %m", path);
1722
1723 dropin_dirname = strjoina(filename, ".d");
1724
1725 r = config_parse_many(
1726 STRV_MAKE_CONST(path),
1727 conf_file_dirs,
1728 dropin_dirname,
1729 arg_definitions ? NULL : arg_root,
1730 "Partition\0",
1731 config_item_table_lookup, table,
1732 CONFIG_PARSE_WARN,
1733 p,
1734 NULL,
1735 &p->drop_in_files);
1736 if (r < 0)
1737 return r;
1738
1739 if (partition_type_exclude(&p->type))
1740 return 0;
1741
1742 if (p->size_min != UINT64_MAX && p->size_max != UINT64_MAX && p->size_min > p->size_max)
1743 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1744 "SizeMinBytes= larger than SizeMaxBytes=, refusing.");
1745
1746 if (p->padding_min != UINT64_MAX && p->padding_max != UINT64_MAX && p->padding_min > p->padding_max)
1747 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1748 "PaddingMinBytes= larger than PaddingMaxBytes=, refusing.");
1749
1750 if (sd_id128_is_null(p->type.uuid))
1751 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1752 "Type= not defined, refusing.");
1753
1754 if ((p->copy_blocks_path || p->copy_blocks_auto) &&
1755 (p->format || !strv_isempty(p->copy_files) || !strv_isempty(p->make_directories)))
1756 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1757 "Format=/CopyFiles=/MakeDirectories= and CopyBlocks= cannot be combined, refusing.");
1758
1759 if ((!strv_isempty(p->copy_files) || !strv_isempty(p->make_directories)) && streq_ptr(p->format, "swap"))
1760 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1761 "Format=swap and CopyFiles= cannot be combined, refusing.");
1762
1763 if (!p->format) {
1764 const char *format = NULL;
1765
1766 if (!strv_isempty(p->copy_files) || !strv_isempty(p->make_directories) || (p->encrypt != ENCRYPT_OFF && !(p->copy_blocks_path || p->copy_blocks_auto)))
1767 /* Pick "vfat" as file system for esp and xbootldr partitions, otherwise default to "ext4". */
1768 format = IN_SET(p->type.designator, PARTITION_ESP, PARTITION_XBOOTLDR) ? "vfat" : "ext4";
1769 else if (p->type.designator == PARTITION_SWAP)
1770 format = "swap";
1771
1772 if (format) {
1773 p->format = strdup(format);
1774 if (!p->format)
1775 return log_oom();
1776 }
1777 }
1778
1779 if (p->minimize != MINIMIZE_OFF && !p->format && p->verity != VERITY_HASH)
1780 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1781 "Minimize= can only be enabled if Format= or Verity=hash are set");
1782
1783 if (p->minimize == MINIMIZE_BEST && (p->format && !fstype_is_ro(p->format)) && p->verity != VERITY_HASH)
1784 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1785 "Minimize=best can only be used with read-only filesystems or Verity=hash");
1786
1787 if ((!strv_isempty(p->copy_files) || !strv_isempty(p->make_directories)) && !mkfs_supports_root_option(p->format) && geteuid() != 0)
1788 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EPERM),
1789 "Need to be root to populate %s filesystems with CopyFiles=/MakeDirectories=",
1790 p->format);
1791
1792 if (p->format && fstype_is_ro(p->format) && strv_isempty(p->copy_files) && strv_isempty(p->make_directories))
1793 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1794 "Cannot format %s filesystem without source files, refusing", p->format);
1795
1796 if (p->verity != VERITY_OFF || p->encrypt != ENCRYPT_OFF) {
1797 r = dlopen_cryptsetup();
1798 if (r < 0)
1799 return log_syntax(NULL, LOG_ERR, path, 1, r,
1800 "libcryptsetup not found, Verity=/Encrypt= are not supported: %m");
1801 }
1802
1803 if (p->verity != VERITY_OFF && !p->verity_match_key)
1804 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1805 "VerityMatchKey= must be set if Verity=%s", verity_mode_to_string(p->verity));
1806
1807 if (p->verity == VERITY_OFF && p->verity_match_key)
1808 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1809 "VerityMatchKey= can only be set if Verity= is not \"%s\"",
1810 verity_mode_to_string(p->verity));
1811
1812 if (IN_SET(p->verity, VERITY_HASH, VERITY_SIG) &&
1813 (p->copy_files || p->copy_blocks_path || p->copy_blocks_auto || p->format || p->make_directories))
1814 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1815 "CopyBlocks=/CopyFiles=/Format=/MakeDirectories= cannot be used with Verity=%s",
1816 verity_mode_to_string(p->verity));
1817
1818 if (p->verity != VERITY_OFF && p->encrypt != ENCRYPT_OFF)
1819 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1820 "Encrypting verity hash/data partitions is not supported");
1821
1822 if (p->verity == VERITY_SIG && !arg_private_key)
1823 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1824 "Verity signature partition requested but no private key provided (--private-key=)");
1825
1826 if (p->verity == VERITY_SIG && !arg_certificate)
1827 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1828 "Verity signature partition requested but no PEM certificate provided (--certificate=)");
1829
1830 if (p->verity == VERITY_SIG && (p->size_min != UINT64_MAX || p->size_max != UINT64_MAX))
1831 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1832 "SizeMinBytes=/SizeMaxBytes= cannot be used with Verity=%s",
1833 verity_mode_to_string(p->verity));
1834
1835 if (!strv_isempty(p->subvolumes) && arg_offline > 0)
1836 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EOPNOTSUPP),
1837 "Subvolumes= cannot be used with --offline=yes");
1838
1839 /* Verity partitions are read only, let's imply the RO flag hence, unless explicitly configured otherwise. */
1840 if ((IN_SET(p->type.designator,
1841 PARTITION_ROOT_VERITY,
1842 PARTITION_USR_VERITY) || p->verity == VERITY_DATA) && p->read_only < 0)
1843 p->read_only = true;
1844
1845 /* Default to "growfs" on, unless read-only */
1846 if (gpt_partition_type_knows_growfs(p->type) &&
1847 p->read_only <= 0)
1848 p->growfs = true;
1849
1850 if (!p->split_name_format) {
1851 char *s = strdup("%t");
1852 if (!s)
1853 return log_oom();
1854
1855 p->split_name_format = s;
1856 } else if (streq(p->split_name_format, "-"))
1857 p->split_name_format = mfree(p->split_name_format);
1858
1859 return 1;
1860 }
1861
1862 static int find_verity_sibling(Context *context, Partition *p, VerityMode mode, Partition **ret) {
1863 Partition *s = NULL;
1864
1865 assert(p);
1866 assert(p->verity != VERITY_OFF);
1867 assert(p->verity_match_key);
1868 assert(mode != VERITY_OFF);
1869 assert(p->verity != mode);
1870 assert(ret);
1871
1872 /* Try to find the matching sibling partition of the given type for a verity partition. For a data
1873 * partition, this is the corresponding hash partition with the same verity name (and vice versa for
1874 * the hash partition). */
1875
1876 LIST_FOREACH(partitions, q, context->partitions) {
1877 if (p == q)
1878 continue;
1879
1880 if (q->verity != mode)
1881 continue;
1882
1883 assert(q->verity_match_key);
1884
1885 if (!streq(p->verity_match_key, q->verity_match_key))
1886 continue;
1887
1888 if (s)
1889 return -ENOTUNIQ;
1890
1891 s = q;
1892 }
1893
1894 if (!s)
1895 return -ENXIO;
1896
1897 *ret = s;
1898
1899 return 0;
1900 }
1901
1902 static int context_open_and_lock_backing_fd(const char *node, int operation, int *backing_fd) {
1903 _cleanup_close_ int fd = -EBADF;
1904
1905 assert(node);
1906 assert(backing_fd);
1907
1908 if (*backing_fd >= 0)
1909 return 0;
1910
1911 fd = open(node, O_RDONLY|O_CLOEXEC);
1912 if (fd < 0)
1913 return log_error_errno(errno, "Failed to open device '%s': %m", node);
1914
1915 /* Tell udev not to interfere while we are processing the device */
1916 if (flock(fd, operation) < 0)
1917 return log_error_errno(errno, "Failed to lock device '%s': %m", node);
1918
1919 log_debug("Device %s opened and locked.", node);
1920 *backing_fd = TAKE_FD(fd);
1921 return 1;
1922 }
1923
1924 static int determine_current_padding(
1925 struct fdisk_context *c,
1926 struct fdisk_table *t,
1927 struct fdisk_partition *p,
1928 uint64_t secsz,
1929 uint64_t grainsz,
1930 uint64_t *ret) {
1931
1932 size_t n_partitions;
1933 uint64_t offset, next = UINT64_MAX;
1934
1935 assert(c);
1936 assert(t);
1937 assert(p);
1938 assert(ret);
1939
1940 if (!fdisk_partition_has_end(p))
1941 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Partition has no end!");
1942
1943 offset = fdisk_partition_get_end(p);
1944 assert(offset < UINT64_MAX);
1945 offset++; /* The end is one sector before the next partition or padding. */
1946 assert(offset < UINT64_MAX / secsz);
1947 offset *= secsz;
1948
1949 n_partitions = fdisk_table_get_nents(t);
1950 for (size_t i = 0; i < n_partitions; i++) {
1951 struct fdisk_partition *q;
1952 uint64_t start;
1953
1954 q = fdisk_table_get_partition(t, i);
1955 if (!q)
1956 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read partition metadata: %m");
1957
1958 if (fdisk_partition_is_used(q) <= 0)
1959 continue;
1960
1961 if (!fdisk_partition_has_start(q))
1962 continue;
1963
1964 start = fdisk_partition_get_start(q);
1965 assert(start < UINT64_MAX / secsz);
1966 start *= secsz;
1967
1968 if (start >= offset && (next == UINT64_MAX || next > start))
1969 next = start;
1970 }
1971
1972 if (next == UINT64_MAX) {
1973 /* No later partition? In that case check the end of the usable area */
1974 next = fdisk_get_last_lba(c);
1975 assert(next < UINT64_MAX);
1976 next++; /* The last LBA is one sector before the end */
1977
1978 assert(next < UINT64_MAX / secsz);
1979 next *= secsz;
1980
1981 if (offset > next)
1982 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Partition end beyond disk end.");
1983 }
1984
1985 assert(next >= offset);
1986 offset = round_up_size(offset, grainsz);
1987 next = round_down_size(next, grainsz);
1988
1989 *ret = LESS_BY(next, offset); /* Saturated subtraction, rounding might have fucked things up */
1990 return 0;
1991 }
1992
1993 static int context_copy_from_one(Context *context, const char *src) {
1994 _cleanup_close_ int fd = -EBADF;
1995 _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
1996 _cleanup_(fdisk_unref_tablep) struct fdisk_table *t = NULL;
1997 Partition *last = NULL;
1998 unsigned long secsz, grainsz;
1999 size_t n_partitions;
2000 int r;
2001
2002 assert(src);
2003
2004 r = context_open_and_lock_backing_fd(src, LOCK_SH, &fd);
2005 if (r < 0)
2006 return r;
2007
2008 r = fd_verify_regular(fd);
2009 if (r < 0)
2010 return log_error_errno(r, "%s is not a file: %m", src);
2011
2012 r = fdisk_new_context_at(fd, /* path = */ NULL, /* read_only = */ true, /* sector_size = */ UINT32_MAX, &c);
2013 if (r < 0)
2014 return log_error_errno(r, "Failed to create fdisk context: %m");
2015
2016 secsz = fdisk_get_sector_size(c);
2017 grainsz = fdisk_get_grain_size(c);
2018
2019 /* Insist on a power of two, and that it's a multiple of 512, i.e. the traditional sector size. */
2020 if (secsz < 512 || !ISPOWEROF2(secsz))
2021 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Sector size %lu is not a power of two larger than 512? Refusing.", secsz);
2022
2023 if (!fdisk_is_labeltype(c, FDISK_DISKLABEL_GPT))
2024 return log_error_errno(SYNTHETIC_ERRNO(EHWPOISON), "Cannot copy from disk %s with no GPT disk label.", src);
2025
2026 r = fdisk_get_partitions(c, &t);
2027 if (r < 0)
2028 return log_error_errno(r, "Failed to acquire partition table: %m");
2029
2030 n_partitions = fdisk_table_get_nents(t);
2031 for (size_t i = 0; i < n_partitions; i++) {
2032 _cleanup_(partition_freep) Partition *np = NULL;
2033 _cleanup_free_ char *label_copy = NULL;
2034 struct fdisk_partition *p;
2035 const char *label;
2036 uint64_t sz, start, padding;
2037 sd_id128_t ptid, id;
2038 GptPartitionType type;
2039
2040 p = fdisk_table_get_partition(t, i);
2041 if (!p)
2042 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read partition metadata: %m");
2043
2044 if (fdisk_partition_is_used(p) <= 0)
2045 continue;
2046
2047 if (fdisk_partition_has_start(p) <= 0 ||
2048 fdisk_partition_has_size(p) <= 0 ||
2049 fdisk_partition_has_partno(p) <= 0)
2050 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Found a partition without a position, size or number.");
2051
2052 r = fdisk_partition_get_type_as_id128(p, &ptid);
2053 if (r < 0)
2054 return log_error_errno(r, "Failed to query partition type UUID: %m");
2055
2056 type = gpt_partition_type_from_uuid(ptid);
2057
2058 r = fdisk_partition_get_uuid_as_id128(p, &id);
2059 if (r < 0)
2060 return log_error_errno(r, "Failed to query partition UUID: %m");
2061
2062 label = fdisk_partition_get_name(p);
2063 if (!isempty(label)) {
2064 label_copy = strdup(label);
2065 if (!label_copy)
2066 return log_oom();
2067 }
2068
2069 sz = fdisk_partition_get_size(p);
2070 assert(sz <= UINT64_MAX/secsz);
2071 sz *= secsz;
2072
2073 start = fdisk_partition_get_start(p);
2074 assert(start <= UINT64_MAX/secsz);
2075 start *= secsz;
2076
2077 if (partition_type_exclude(&type))
2078 continue;
2079
2080 np = partition_new();
2081 if (!np)
2082 return log_oom();
2083
2084 np->type = type;
2085 np->new_uuid = id;
2086 np->new_uuid_is_set = true;
2087 np->size_min = np->size_max = sz;
2088 np->new_label = TAKE_PTR(label_copy);
2089
2090 np->definition_path = strdup(src);
2091 if (!np->definition_path)
2092 return log_oom();
2093
2094 r = determine_current_padding(c, t, p, secsz, grainsz, &padding);
2095 if (r < 0)
2096 return r;
2097
2098 np->padding_min = np->padding_max = padding;
2099
2100 np->copy_blocks_path = strdup(src);
2101 if (!np->copy_blocks_path)
2102 return log_oom();
2103
2104 np->copy_blocks_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
2105 if (np->copy_blocks_fd < 0)
2106 return log_error_errno(r, "Failed to duplicate file descriptor of %s: %m", src);
2107
2108 np->copy_blocks_offset = start;
2109 np->copy_blocks_size = sz;
2110
2111 r = fdisk_partition_get_attrs_as_uint64(p, &np->gpt_flags);
2112 if (r < 0)
2113 return log_error_errno(r, "Failed to get partition flags: %m");
2114
2115 LIST_INSERT_AFTER(partitions, context->partitions, last, np);
2116 last = TAKE_PTR(np);
2117 context->n_partitions++;
2118 }
2119
2120 return 0;
2121 }
2122
2123 static int context_copy_from(Context *context) {
2124 int r;
2125
2126 assert(context);
2127
2128 STRV_FOREACH(src, arg_copy_from) {
2129 r = context_copy_from_one(context, *src);
2130 if (r < 0)
2131 return r;
2132 }
2133
2134 return 0;
2135 }
2136
2137 static int context_read_definitions(Context *context) {
2138 _cleanup_strv_free_ char **files = NULL;
2139 Partition *last = LIST_FIND_TAIL(partitions, context->partitions);
2140 const char *const *dirs;
2141 int r;
2142
2143 assert(context);
2144
2145 dirs = (const char* const*) (arg_definitions ?: CONF_PATHS_STRV("repart.d"));
2146
2147 r = conf_files_list_strv(&files, ".conf", arg_definitions ? NULL : arg_root, CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED, dirs);
2148 if (r < 0)
2149 return log_error_errno(r, "Failed to enumerate *.conf files: %m");
2150
2151 STRV_FOREACH(f, files) {
2152 _cleanup_(partition_freep) Partition *p = NULL;
2153
2154 p = partition_new();
2155 if (!p)
2156 return log_oom();
2157
2158 p->definition_path = strdup(*f);
2159 if (!p->definition_path)
2160 return log_oom();
2161
2162 r = partition_read_definition(p, *f, dirs);
2163 if (r < 0)
2164 return r;
2165 if (r == 0)
2166 continue;
2167
2168 LIST_INSERT_AFTER(partitions, context->partitions, last, p);
2169 last = TAKE_PTR(p);
2170 context->n_partitions++;
2171 }
2172
2173 /* Check that each configured verity hash/data partition has a matching verity data/hash partition. */
2174
2175 LIST_FOREACH(partitions, p, context->partitions) {
2176 if (p->verity == VERITY_OFF)
2177 continue;
2178
2179 for (VerityMode mode = VERITY_OFF + 1; mode < _VERITY_MODE_MAX; mode++) {
2180 Partition *q = NULL;
2181
2182 if (p->verity == mode)
2183 continue;
2184
2185 if (p->siblings[mode])
2186 continue;
2187
2188 r = find_verity_sibling(context, p, mode, &q);
2189 if (r == -ENXIO) {
2190 if (mode != VERITY_SIG)
2191 return log_syntax(NULL, LOG_ERR, p->definition_path, 1, SYNTHETIC_ERRNO(EINVAL),
2192 "Missing verity %s partition for verity %s partition with VerityMatchKey=%s",
2193 verity_mode_to_string(mode), verity_mode_to_string(p->verity), p->verity_match_key);
2194 } else if (r == -ENOTUNIQ)
2195 return log_syntax(NULL, LOG_ERR, p->definition_path, 1, SYNTHETIC_ERRNO(EINVAL),
2196 "Multiple verity %s partitions found for verity %s partition with VerityMatchKey=%s",
2197 verity_mode_to_string(mode), verity_mode_to_string(p->verity), p->verity_match_key);
2198 else if (r < 0)
2199 return log_syntax(NULL, LOG_ERR, p->definition_path, 1, r,
2200 "Failed to find verity %s partition for verity %s partition with VerityMatchKey=%s",
2201 verity_mode_to_string(mode), verity_mode_to_string(p->verity), p->verity_match_key);
2202
2203 if (q) {
2204 if (q->priority != p->priority)
2205 return log_syntax(NULL, LOG_ERR, p->definition_path, 1, SYNTHETIC_ERRNO(EINVAL),
2206 "Priority mismatch (%i != %i) for verity sibling partitions with VerityMatchKey=%s",
2207 p->priority, q->priority, p->verity_match_key);
2208
2209 p->siblings[mode] = q;
2210 }
2211 }
2212 }
2213
2214 LIST_FOREACH(partitions, p, context->partitions) {
2215 Partition *dp;
2216
2217 if (p->verity != VERITY_HASH)
2218 continue;
2219
2220 if (p->minimize == MINIMIZE_OFF)
2221 continue;
2222
2223 assert_se(dp = p->siblings[VERITY_DATA]);
2224
2225 if (dp->minimize == MINIMIZE_OFF && !(dp->copy_blocks_path || dp->copy_blocks_auto))
2226 return log_syntax(NULL, LOG_ERR, p->definition_path, 1, SYNTHETIC_ERRNO(EINVAL),
2227 "Minimize= set for verity hash partition but data partition does "
2228 "not set CopyBlocks= or Minimize=");
2229
2230 }
2231
2232 return 0;
2233 }
2234
2235 static int fdisk_ask_cb(struct fdisk_context *c, struct fdisk_ask *ask, void *data) {
2236 _cleanup_free_ char *ids = NULL;
2237 int r;
2238
2239 if (fdisk_ask_get_type(ask) != FDISK_ASKTYPE_STRING)
2240 return -EINVAL;
2241
2242 ids = new(char, SD_ID128_UUID_STRING_MAX);
2243 if (!ids)
2244 return -ENOMEM;
2245
2246 r = fdisk_ask_string_set_result(ask, sd_id128_to_uuid_string(*(sd_id128_t*) data, ids));
2247 if (r < 0)
2248 return r;
2249
2250 TAKE_PTR(ids);
2251 return 0;
2252 }
2253
2254 static int fdisk_set_disklabel_id_by_uuid(struct fdisk_context *c, sd_id128_t id) {
2255 int r;
2256
2257 r = fdisk_set_ask(c, fdisk_ask_cb, &id);
2258 if (r < 0)
2259 return r;
2260
2261 r = fdisk_set_disklabel_id(c);
2262 if (r < 0)
2263 return r;
2264
2265 return fdisk_set_ask(c, NULL, NULL);
2266 }
2267
2268 static int derive_uuid(sd_id128_t base, const char *token, sd_id128_t *ret) {
2269 union {
2270 uint8_t md[SHA256_DIGEST_SIZE];
2271 sd_id128_t id;
2272 } result;
2273
2274 assert(token);
2275 assert(ret);
2276
2277 /* Derive a new UUID from the specified UUID in a stable and reasonably safe way. Specifically, we
2278 * calculate the HMAC-SHA256 of the specified token string, keyed by the supplied base (typically the
2279 * machine ID). We use the machine ID as key (and not as cleartext!) of the HMAC operation since it's
2280 * the machine ID we don't want to leak. */
2281
2282 hmac_sha256(base.bytes, sizeof(base.bytes), token, strlen(token), result.md);
2283
2284 /* Take the first half, mark it as v4 UUID */
2285 assert_cc(sizeof(result.md) == sizeof(result.id) * 2);
2286 *ret = id128_make_v4_uuid(result.id);
2287 return 0;
2288 }
2289
2290 static void derive_salt(sd_id128_t base, const char *token, uint8_t ret[static SHA256_DIGEST_SIZE]) {
2291 assert(token);
2292
2293 hmac_sha256(base.bytes, sizeof(base.bytes), token, strlen(token), ret);
2294 }
2295
2296 static int context_load_partition_table(Context *context) {
2297 _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
2298 _cleanup_(fdisk_unref_tablep) struct fdisk_table *t = NULL;
2299 uint64_t left_boundary = UINT64_MAX, first_lba, last_lba, nsectors;
2300 _cleanup_free_ char *disk_uuid_string = NULL;
2301 bool from_scratch = false;
2302 sd_id128_t disk_uuid;
2303 size_t n_partitions;
2304 unsigned long secsz;
2305 uint64_t grainsz, fs_secsz = DEFAULT_FILESYSTEM_SECTOR_SIZE;
2306 int r;
2307
2308 assert(context);
2309 assert(!context->fdisk_context);
2310 assert(!context->free_areas);
2311 assert(context->start == UINT64_MAX);
2312 assert(context->end == UINT64_MAX);
2313 assert(context->total == UINT64_MAX);
2314
2315 c = fdisk_new_context();
2316 if (!c)
2317 return log_oom();
2318
2319 if (arg_sector_size > 0) {
2320 fs_secsz = arg_sector_size;
2321 r = fdisk_save_user_sector_size(c, /* phy= */ 0, arg_sector_size);
2322 } else {
2323 uint32_t ssz;
2324 struct stat st;
2325
2326 r = context_open_and_lock_backing_fd(context->node, arg_dry_run ? LOCK_SH : LOCK_EX,
2327 &context->backing_fd);
2328 if (r < 0)
2329 return r;
2330
2331 if (fstat(context->backing_fd, &st) < 0)
2332 return log_error_errno(r, "Failed to stat %s: %m", context->node);
2333
2334 /* Auto-detect sector size if not specified. */
2335 r = probe_sector_size_prefer_ioctl(context->backing_fd, &ssz);
2336 if (r < 0)
2337 return log_error_errno(r, "Failed to probe sector size of '%s': %m", context->node);
2338
2339 /* If we found the sector size and we're operating on a block device, use it as the file
2340 * system sector size as well, as we know its the sector size of the actual block device and
2341 * not just the offset at which we found the GPT header. */
2342 if (r > 0 && S_ISBLK(st.st_mode))
2343 fs_secsz = ssz;
2344
2345 r = fdisk_save_user_sector_size(c, /* phy= */ 0, ssz);
2346 }
2347 if (r < 0)
2348 return log_error_errno(r, "Failed to set sector size: %m");
2349
2350 /* libfdisk doesn't have an API to operate on arbitrary fds, hence reopen the fd going via the
2351 * /proc/self/fd/ magic path if we have an existing fd. Open the original file otherwise. */
2352 r = fdisk_assign_device(
2353 c,
2354 context->backing_fd >= 0 ? FORMAT_PROC_FD_PATH(context->backing_fd) : context->node,
2355 arg_dry_run);
2356 if (r == -EINVAL && arg_size_auto) {
2357 struct stat st;
2358
2359 /* libfdisk returns EINVAL if opening a file of size zero. Let's check for that, and accept
2360 * it if automatic sizing is requested. */
2361
2362 if (context->backing_fd < 0)
2363 r = stat(context->node, &st);
2364 else
2365 r = fstat(context->backing_fd, &st);
2366 if (r < 0)
2367 return log_error_errno(errno, "Failed to stat block device '%s': %m", context->node);
2368
2369 if (S_ISREG(st.st_mode) && st.st_size == 0) {
2370 /* Use the fallback values if we have no better idea */
2371 context->sector_size = fdisk_get_sector_size(c);
2372 context->fs_sector_size = fs_secsz;
2373 context->grain_size = 4096;
2374 return /* from_scratch = */ true;
2375 }
2376
2377 r = -EINVAL;
2378 }
2379 if (r < 0)
2380 return log_error_errno(r, "Failed to open device '%s': %m", context->node);
2381
2382 if (context->backing_fd < 0) {
2383 /* If we have no fd referencing the device yet, make a copy of the fd now, so that we have one */
2384 r = context_open_and_lock_backing_fd(FORMAT_PROC_FD_PATH(fdisk_get_devfd(c)),
2385 arg_dry_run ? LOCK_SH : LOCK_EX,
2386 &context->backing_fd);
2387 if (r < 0)
2388 return r;
2389 }
2390
2391 /* The offsets/sizes libfdisk returns to us will be in multiple of the sector size of the
2392 * device. This is typically 512, and sometimes 4096. Let's query libfdisk once for it, and then use
2393 * it for all our needs. Note that the values we use ourselves always are in bytes though, thus mean
2394 * the same thing universally. Also note that regardless what kind of sector size is in use we'll
2395 * place partitions at multiples of 4K. */
2396 secsz = fdisk_get_sector_size(c);
2397
2398 /* Insist on a power of two, and that it's a multiple of 512, i.e. the traditional sector size. */
2399 if (secsz < 512 || !ISPOWEROF2(secsz))
2400 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Sector size %lu is not a power of two larger than 512? Refusing.", secsz);
2401
2402 /* Use at least 4K, and ensure it's a multiple of the sector size, regardless if that is smaller or
2403 * larger */
2404 grainsz = secsz < 4096 ? 4096 : secsz;
2405
2406 log_debug("Sector size of device is %lu bytes. Using grain size of %" PRIu64 ".", secsz, grainsz);
2407
2408 switch (arg_empty) {
2409
2410 case EMPTY_REFUSE:
2411 /* Refuse empty disks, insist on an existing GPT partition table */
2412 if (!fdisk_is_labeltype(c, FDISK_DISKLABEL_GPT))
2413 return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s has no GPT disk label, not repartitioning.", context->node);
2414
2415 break;
2416
2417 case EMPTY_REQUIRE:
2418 /* Require an empty disk, refuse any existing partition table */
2419 r = fdisk_has_label(c);
2420 if (r < 0)
2421 return log_error_errno(r, "Failed to determine whether disk %s has a disk label: %m", context->node);
2422 if (r > 0)
2423 return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s already has a disk label, refusing.", context->node);
2424
2425 from_scratch = true;
2426 break;
2427
2428 case EMPTY_ALLOW:
2429 /* Allow both an empty disk and an existing partition table, but only GPT */
2430 r = fdisk_has_label(c);
2431 if (r < 0)
2432 return log_error_errno(r, "Failed to determine whether disk %s has a disk label: %m", context->node);
2433 if (r > 0) {
2434 if (!fdisk_is_labeltype(c, FDISK_DISKLABEL_GPT))
2435 return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s has non-GPT disk label, not repartitioning.", context->node);
2436 } else
2437 from_scratch = true;
2438
2439 break;
2440
2441 case EMPTY_FORCE:
2442 case EMPTY_CREATE:
2443 /* Always reinitiaize the disk, don't consider what there was on the disk before */
2444 from_scratch = true;
2445 break;
2446
2447 default:
2448 assert_not_reached();
2449 }
2450
2451 if (from_scratch) {
2452 r = fdisk_create_disklabel(c, "gpt");
2453 if (r < 0)
2454 return log_error_errno(r, "Failed to create GPT disk label: %m");
2455
2456 r = derive_uuid(context->seed, "disk-uuid", &disk_uuid);
2457 if (r < 0)
2458 return log_error_errno(r, "Failed to acquire disk GPT uuid: %m");
2459
2460 r = fdisk_set_disklabel_id_by_uuid(c, disk_uuid);
2461 if (r < 0)
2462 return log_error_errno(r, "Failed to set GPT disk label: %m");
2463
2464 goto add_initial_free_area;
2465 }
2466
2467 r = fdisk_get_disklabel_id(c, &disk_uuid_string);
2468 if (r < 0)
2469 return log_error_errno(r, "Failed to get current GPT disk label UUID: %m");
2470
2471 r = id128_from_string_nonzero(disk_uuid_string, &disk_uuid);
2472 if (r == -ENXIO) {
2473 r = derive_uuid(context->seed, "disk-uuid", &disk_uuid);
2474 if (r < 0)
2475 return log_error_errno(r, "Failed to acquire disk GPT uuid: %m");
2476
2477 r = fdisk_set_disklabel_id(c);
2478 if (r < 0)
2479 return log_error_errno(r, "Failed to set GPT disk label: %m");
2480 } else if (r < 0)
2481 return log_error_errno(r, "Failed to parse current GPT disk label UUID: %m");
2482
2483 r = fdisk_get_partitions(c, &t);
2484 if (r < 0)
2485 return log_error_errno(r, "Failed to acquire partition table: %m");
2486
2487 n_partitions = fdisk_table_get_nents(t);
2488 for (size_t i = 0; i < n_partitions; i++) {
2489 _cleanup_free_ char *label_copy = NULL;
2490 Partition *last = NULL;
2491 struct fdisk_partition *p;
2492 const char *label;
2493 uint64_t sz, start;
2494 bool found = false;
2495 sd_id128_t ptid, id;
2496 size_t partno;
2497
2498 p = fdisk_table_get_partition(t, i);
2499 if (!p)
2500 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read partition metadata: %m");
2501
2502 if (fdisk_partition_is_used(p) <= 0)
2503 continue;
2504
2505 if (fdisk_partition_has_start(p) <= 0 ||
2506 fdisk_partition_has_size(p) <= 0 ||
2507 fdisk_partition_has_partno(p) <= 0)
2508 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Found a partition without a position, size or number.");
2509
2510 r = fdisk_partition_get_type_as_id128(p, &ptid);
2511 if (r < 0)
2512 return log_error_errno(r, "Failed to query partition type UUID: %m");
2513
2514 r = fdisk_partition_get_uuid_as_id128(p, &id);
2515 if (r < 0)
2516 return log_error_errno(r, "Failed to query partition UUID: %m");
2517
2518 label = fdisk_partition_get_name(p);
2519 if (!isempty(label)) {
2520 label_copy = strdup(label);
2521 if (!label_copy)
2522 return log_oom();
2523 }
2524
2525 sz = fdisk_partition_get_size(p);
2526 assert(sz <= UINT64_MAX/secsz);
2527 sz *= secsz;
2528
2529 start = fdisk_partition_get_start(p);
2530 assert(start <= UINT64_MAX/secsz);
2531 start *= secsz;
2532
2533 partno = fdisk_partition_get_partno(p);
2534
2535 if (left_boundary == UINT64_MAX || left_boundary > start)
2536 left_boundary = start;
2537
2538 /* Assign this existing partition to the first partition of the right type that doesn't have
2539 * an existing one assigned yet. */
2540 LIST_FOREACH(partitions, pp, context->partitions) {
2541 last = pp;
2542
2543 if (!sd_id128_equal(pp->type.uuid, ptid))
2544 continue;
2545
2546 if (!pp->current_partition) {
2547 pp->current_uuid = id;
2548 pp->current_size = sz;
2549 pp->offset = start;
2550 pp->partno = partno;
2551 pp->current_label = TAKE_PTR(label_copy);
2552
2553 pp->current_partition = p;
2554 fdisk_ref_partition(p);
2555
2556 r = determine_current_padding(c, t, p, secsz, grainsz, &pp->current_padding);
2557 if (r < 0)
2558 return r;
2559
2560 if (pp->current_padding > 0) {
2561 r = context_add_free_area(context, pp->current_padding, pp);
2562 if (r < 0)
2563 return r;
2564 }
2565
2566 found = true;
2567 break;
2568 }
2569 }
2570
2571 /* If we have no matching definition, create a new one. */
2572 if (!found) {
2573 _cleanup_(partition_freep) Partition *np = NULL;
2574
2575 np = partition_new();
2576 if (!np)
2577 return log_oom();
2578
2579 np->current_uuid = id;
2580 np->type = gpt_partition_type_from_uuid(ptid);
2581 np->current_size = sz;
2582 np->offset = start;
2583 np->partno = partno;
2584 np->current_label = TAKE_PTR(label_copy);
2585
2586 np->current_partition = p;
2587 fdisk_ref_partition(p);
2588
2589 r = determine_current_padding(c, t, p, secsz, grainsz, &np->current_padding);
2590 if (r < 0)
2591 return r;
2592
2593 if (np->current_padding > 0) {
2594 r = context_add_free_area(context, np->current_padding, np);
2595 if (r < 0)
2596 return r;
2597 }
2598
2599 LIST_INSERT_AFTER(partitions, context->partitions, last, TAKE_PTR(np));
2600 context->n_partitions++;
2601 }
2602 }
2603
2604 add_initial_free_area:
2605 nsectors = fdisk_get_nsectors(c);
2606 assert(nsectors <= UINT64_MAX/secsz);
2607 nsectors *= secsz;
2608
2609 first_lba = fdisk_get_first_lba(c);
2610 assert(first_lba <= UINT64_MAX/secsz);
2611 first_lba *= secsz;
2612
2613 last_lba = fdisk_get_last_lba(c);
2614 assert(last_lba < UINT64_MAX);
2615 last_lba++;
2616 assert(last_lba <= UINT64_MAX/secsz);
2617 last_lba *= secsz;
2618
2619 assert(last_lba >= first_lba);
2620
2621 if (left_boundary == UINT64_MAX) {
2622 /* No partitions at all? Then the whole disk is up for grabs. */
2623
2624 first_lba = round_up_size(first_lba, grainsz);
2625 last_lba = round_down_size(last_lba, grainsz);
2626
2627 if (last_lba > first_lba) {
2628 r = context_add_free_area(context, last_lba - first_lba, NULL);
2629 if (r < 0)
2630 return r;
2631 }
2632 } else {
2633 /* Add space left of first partition */
2634 assert(left_boundary >= first_lba);
2635
2636 first_lba = round_up_size(first_lba, grainsz);
2637 left_boundary = round_down_size(left_boundary, grainsz);
2638 last_lba = round_down_size(last_lba, grainsz);
2639
2640 if (left_boundary > first_lba) {
2641 r = context_add_free_area(context, left_boundary - first_lba, NULL);
2642 if (r < 0)
2643 return r;
2644 }
2645 }
2646
2647 context->start = first_lba;
2648 context->end = last_lba;
2649 context->total = nsectors;
2650 context->sector_size = secsz;
2651 context->fs_sector_size = fs_secsz;
2652 context->grain_size = grainsz;
2653 context->fdisk_context = TAKE_PTR(c);
2654
2655 return from_scratch;
2656 }
2657
2658 static void context_unload_partition_table(Context *context) {
2659 assert(context);
2660
2661 LIST_FOREACH(partitions, p, context->partitions) {
2662
2663 /* Entirely remove partitions that have no configuration */
2664 if (PARTITION_IS_FOREIGN(p)) {
2665 partition_unlink_and_free(context, p);
2666 continue;
2667 }
2668
2669 /* Otherwise drop all data we read off the block device and everything we might have
2670 * calculated based on it */
2671
2672 p->dropped = false;
2673 p->current_size = UINT64_MAX;
2674 p->new_size = UINT64_MAX;
2675 p->current_padding = UINT64_MAX;
2676 p->new_padding = UINT64_MAX;
2677 p->partno = UINT64_MAX;
2678 p->offset = UINT64_MAX;
2679
2680 if (p->current_partition) {
2681 fdisk_unref_partition(p->current_partition);
2682 p->current_partition = NULL;
2683 }
2684
2685 if (p->new_partition) {
2686 fdisk_unref_partition(p->new_partition);
2687 p->new_partition = NULL;
2688 }
2689
2690 p->padding_area = NULL;
2691 p->allocated_to_area = NULL;
2692
2693 p->current_uuid = SD_ID128_NULL;
2694 p->current_label = mfree(p->current_label);
2695 }
2696
2697 context->start = UINT64_MAX;
2698 context->end = UINT64_MAX;
2699 context->total = UINT64_MAX;
2700
2701 if (context->fdisk_context) {
2702 fdisk_unref_context(context->fdisk_context);
2703 context->fdisk_context = NULL;
2704 }
2705
2706 context_free_free_areas(context);
2707 }
2708
2709 static int format_size_change(uint64_t from, uint64_t to, char **ret) {
2710 char *t;
2711
2712 if (from != UINT64_MAX) {
2713 if (from == to || to == UINT64_MAX)
2714 t = strdup(FORMAT_BYTES(from));
2715 else
2716 t = strjoin(FORMAT_BYTES(from), " ", special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), " ", FORMAT_BYTES(to));
2717 } else if (to != UINT64_MAX)
2718 t = strjoin(special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), " ", FORMAT_BYTES(to));
2719 else {
2720 *ret = NULL;
2721 return 0;
2722 }
2723
2724 if (!t)
2725 return log_oom();
2726
2727 *ret = t;
2728 return 1;
2729 }
2730
2731 static const char *partition_label(const Partition *p) {
2732 assert(p);
2733
2734 if (p->new_label)
2735 return p->new_label;
2736
2737 if (p->current_label)
2738 return p->current_label;
2739
2740 return gpt_partition_type_uuid_to_string(p->type.uuid);
2741 }
2742
2743 static int context_dump_partitions(Context *context) {
2744 _cleanup_(table_unrefp) Table *t = NULL;
2745 uint64_t sum_padding = 0, sum_size = 0;
2746 int r;
2747 const size_t roothash_col = 14, dropin_files_col = 15, split_path_col = 16;
2748 bool has_roothash = false, has_dropin_files = false, has_split_path = false;
2749
2750 if ((arg_json_format_flags & JSON_FORMAT_OFF) && context->n_partitions == 0) {
2751 log_info("Empty partition table.");
2752 return 0;
2753 }
2754
2755 t = table_new("type",
2756 "label",
2757 "uuid",
2758 "partno",
2759 "file",
2760 "node",
2761 "offset",
2762 "old size",
2763 "raw size",
2764 "size",
2765 "old padding",
2766 "raw padding",
2767 "padding",
2768 "activity",
2769 "roothash",
2770 "drop-in files",
2771 "split path");
2772 if (!t)
2773 return log_oom();
2774
2775 if (!DEBUG_LOGGING) {
2776 if (arg_json_format_flags & JSON_FORMAT_OFF)
2777 (void) table_set_display(t, (size_t) 0, (size_t) 1, (size_t) 2, (size_t) 3, (size_t) 4,
2778 (size_t) 8, (size_t) 9, (size_t) 12, roothash_col, dropin_files_col,
2779 split_path_col);
2780 else
2781 (void) table_set_display(t, (size_t) 0, (size_t) 1, (size_t) 2, (size_t) 3, (size_t) 4,
2782 (size_t) 5, (size_t) 6, (size_t) 7, (size_t) 8, (size_t) 10,
2783 (size_t) 11, (size_t) 13, roothash_col, dropin_files_col,
2784 split_path_col);
2785 }
2786
2787 (void) table_set_align_percent(t, table_get_cell(t, 0, 5), 100);
2788 (void) table_set_align_percent(t, table_get_cell(t, 0, 6), 100);
2789 (void) table_set_align_percent(t, table_get_cell(t, 0, 7), 100);
2790 (void) table_set_align_percent(t, table_get_cell(t, 0, 8), 100);
2791 (void) table_set_align_percent(t, table_get_cell(t, 0, 9), 100);
2792 (void) table_set_align_percent(t, table_get_cell(t, 0, 10), 100);
2793 (void) table_set_align_percent(t, table_get_cell(t, 0, 11), 100);
2794
2795 LIST_FOREACH(partitions, p, context->partitions) {
2796 _cleanup_free_ char *size_change = NULL, *padding_change = NULL, *partname = NULL, *rh = NULL;
2797 char uuid_buffer[SD_ID128_UUID_STRING_MAX];
2798 const char *label, *activity = NULL;
2799
2800 if (p->dropped)
2801 continue;
2802
2803 if (p->current_size == UINT64_MAX)
2804 activity = "create";
2805 else if (p->current_size != p->new_size)
2806 activity = "resize";
2807
2808 label = partition_label(p);
2809 partname = p->partno != UINT64_MAX ? fdisk_partname(context->node, p->partno+1) : NULL;
2810
2811 r = format_size_change(p->current_size, p->new_size, &size_change);
2812 if (r < 0)
2813 return r;
2814
2815 r = format_size_change(p->current_padding, p->new_padding, &padding_change);
2816 if (r < 0)
2817 return r;
2818
2819 if (p->new_size != UINT64_MAX)
2820 sum_size += p->new_size;
2821 if (p->new_padding != UINT64_MAX)
2822 sum_padding += p->new_padding;
2823
2824 if (p->verity != VERITY_OFF) {
2825 Partition *hp = p->verity == VERITY_HASH ? p : p->siblings[VERITY_HASH];
2826
2827 rh = hp->roothash ? hexmem(hp->roothash, hp->roothash_size) : strdup("TBD");
2828 if (!rh)
2829 return log_oom();
2830 }
2831
2832 r = table_add_many(
2833 t,
2834 TABLE_STRING, gpt_partition_type_uuid_to_string_harder(p->type.uuid, uuid_buffer),
2835 TABLE_STRING, empty_to_null(label) ?: "-", TABLE_SET_COLOR, empty_to_null(label) ? NULL : ansi_grey(),
2836 TABLE_UUID, p->new_uuid_is_set ? p->new_uuid : p->current_uuid,
2837 TABLE_UINT64, p->partno,
2838 TABLE_PATH_BASENAME, p->definition_path, TABLE_SET_COLOR, p->definition_path ? NULL : ansi_grey(),
2839 TABLE_STRING, partname ?: "-", TABLE_SET_COLOR, partname ? NULL : ansi_highlight(),
2840 TABLE_UINT64, p->offset,
2841 TABLE_UINT64, p->current_size == UINT64_MAX ? 0 : p->current_size,
2842 TABLE_UINT64, p->new_size,
2843 TABLE_STRING, size_change, TABLE_SET_COLOR, !p->partitions_next && sum_size > 0 ? ansi_underline() : NULL,
2844 TABLE_UINT64, p->current_padding == UINT64_MAX ? 0 : p->current_padding,
2845 TABLE_UINT64, p->new_padding,
2846 TABLE_STRING, padding_change, TABLE_SET_COLOR, !p->partitions_next && sum_padding > 0 ? ansi_underline() : NULL,
2847 TABLE_STRING, activity ?: "unchanged",
2848 TABLE_STRING, rh,
2849 TABLE_STRV, p->drop_in_files,
2850 TABLE_STRING, empty_to_null(p->split_path) ?: "-");
2851 if (r < 0)
2852 return table_log_add_error(r);
2853
2854 has_roothash = has_roothash || !isempty(rh);
2855 has_dropin_files = has_dropin_files || !strv_isempty(p->drop_in_files);
2856 has_split_path = has_split_path || !isempty(p->split_path);
2857 }
2858
2859 if ((arg_json_format_flags & JSON_FORMAT_OFF) && (sum_padding > 0 || sum_size > 0)) {
2860 const char *a, *b;
2861
2862 a = strjoina(special_glyph(SPECIAL_GLYPH_SIGMA), " = ", FORMAT_BYTES(sum_size));
2863 b = strjoina(special_glyph(SPECIAL_GLYPH_SIGMA), " = ", FORMAT_BYTES(sum_padding));
2864
2865 r = table_add_many(
2866 t,
2867 TABLE_EMPTY,
2868 TABLE_EMPTY,
2869 TABLE_EMPTY,
2870 TABLE_EMPTY,
2871 TABLE_EMPTY,
2872 TABLE_EMPTY,
2873 TABLE_EMPTY,
2874 TABLE_EMPTY,
2875 TABLE_EMPTY,
2876 TABLE_STRING, a,
2877 TABLE_EMPTY,
2878 TABLE_EMPTY,
2879 TABLE_STRING, b,
2880 TABLE_EMPTY,
2881 TABLE_EMPTY,
2882 TABLE_EMPTY,
2883 TABLE_EMPTY);
2884 if (r < 0)
2885 return table_log_add_error(r);
2886 }
2887
2888 if (!has_roothash) {
2889 r = table_hide_column_from_display(t, roothash_col);
2890 if (r < 0)
2891 return log_error_errno(r, "Failed to set columns to display: %m");
2892 }
2893
2894 if (!has_dropin_files) {
2895 r = table_hide_column_from_display(t, dropin_files_col);
2896 if (r < 0)
2897 return log_error_errno(r, "Failed to set columns to display: %m");
2898 }
2899
2900 if (!has_split_path) {
2901 r = table_hide_column_from_display(t, split_path_col);
2902 if (r < 0)
2903 return log_error_errno(r, "Failed to set columns to display: %m");
2904 }
2905
2906 return table_print_with_pager(t, arg_json_format_flags, arg_pager_flags, arg_legend);
2907 }
2908
2909 static void context_bar_char_process_partition(
2910 Context *context,
2911 Partition *bar[],
2912 size_t n,
2913 Partition *p,
2914 size_t *ret_start) {
2915
2916 uint64_t from, to, total;
2917 size_t x, y;
2918
2919 assert(context);
2920 assert(bar);
2921 assert(n > 0);
2922 assert(p);
2923
2924 if (p->dropped)
2925 return;
2926
2927 assert(p->offset != UINT64_MAX);
2928 assert(p->new_size != UINT64_MAX);
2929
2930 from = p->offset;
2931 to = from + p->new_size;
2932
2933 assert(context->total > 0);
2934 total = context->total;
2935
2936 assert(from <= total);
2937 x = from * n / total;
2938
2939 assert(to <= total);
2940 y = to * n / total;
2941
2942 assert(x <= y);
2943 assert(y <= n);
2944
2945 for (size_t i = x; i < y; i++)
2946 bar[i] = p;
2947
2948 *ret_start = x;
2949 }
2950
2951 static int partition_hint(const Partition *p, const char *node, char **ret) {
2952 _cleanup_free_ char *buf = NULL;
2953 const char *label;
2954 sd_id128_t id;
2955
2956 /* Tries really hard to find a suitable description for this partition */
2957
2958 if (p->definition_path)
2959 return path_extract_filename(p->definition_path, ret);
2960
2961 label = partition_label(p);
2962 if (!isempty(label)) {
2963 buf = strdup(label);
2964 goto done;
2965 }
2966
2967 if (p->partno != UINT64_MAX) {
2968 buf = fdisk_partname(node, p->partno+1);
2969 goto done;
2970 }
2971
2972 if (p->new_uuid_is_set)
2973 id = p->new_uuid;
2974 else if (!sd_id128_is_null(p->current_uuid))
2975 id = p->current_uuid;
2976 else
2977 id = p->type.uuid;
2978
2979 buf = strdup(SD_ID128_TO_UUID_STRING(id));
2980
2981 done:
2982 if (!buf)
2983 return -ENOMEM;
2984
2985 *ret = TAKE_PTR(buf);
2986 return 0;
2987 }
2988
2989 static int context_dump_partition_bar(Context *context) {
2990 _cleanup_free_ Partition **bar = NULL;
2991 _cleanup_free_ size_t *start_array = NULL;
2992 Partition *last = NULL;
2993 bool z = false;
2994 size_t c, j = 0;
2995
2996 assert_se((c = columns()) >= 2);
2997 c -= 2; /* We do not use the leftmost and rightmost character cell */
2998
2999 bar = new0(Partition*, c);
3000 if (!bar)
3001 return log_oom();
3002
3003 start_array = new(size_t, context->n_partitions);
3004 if (!start_array)
3005 return log_oom();
3006
3007 LIST_FOREACH(partitions, p, context->partitions)
3008 context_bar_char_process_partition(context, bar, c, p, start_array + j++);
3009
3010 putc(' ', stdout);
3011
3012 for (size_t i = 0; i < c; i++) {
3013 if (bar[i]) {
3014 if (last != bar[i])
3015 z = !z;
3016
3017 fputs(z ? ansi_green() : ansi_yellow(), stdout);
3018 fputs(special_glyph(SPECIAL_GLYPH_DARK_SHADE), stdout);
3019 } else {
3020 fputs(ansi_normal(), stdout);
3021 fputs(special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), stdout);
3022 }
3023
3024 last = bar[i];
3025 }
3026
3027 fputs(ansi_normal(), stdout);
3028 putc('\n', stdout);
3029
3030 for (size_t i = 0; i < context->n_partitions; i++) {
3031 _cleanup_free_ char **line = NULL;
3032
3033 line = new0(char*, c);
3034 if (!line)
3035 return log_oom();
3036
3037 j = 0;
3038 LIST_FOREACH(partitions, p, context->partitions) {
3039 _cleanup_free_ char *d = NULL;
3040 j++;
3041
3042 if (i < context->n_partitions - j) {
3043
3044 if (line[start_array[j-1]]) {
3045 const char *e;
3046
3047 /* Upgrade final corner to the right with a branch to the right */
3048 e = startswith(line[start_array[j-1]], special_glyph(SPECIAL_GLYPH_TREE_RIGHT));
3049 if (e) {
3050 d = strjoin(special_glyph(SPECIAL_GLYPH_TREE_BRANCH), e);
3051 if (!d)
3052 return log_oom();
3053 }
3054 }
3055
3056 if (!d) {
3057 d = strdup(special_glyph(SPECIAL_GLYPH_TREE_VERTICAL));
3058 if (!d)
3059 return log_oom();
3060 }
3061
3062 } else if (i == context->n_partitions - j) {
3063 _cleanup_free_ char *hint = NULL;
3064
3065 (void) partition_hint(p, context->node, &hint);
3066
3067 if (streq_ptr(line[start_array[j-1]], special_glyph(SPECIAL_GLYPH_TREE_VERTICAL)))
3068 d = strjoin(special_glyph(SPECIAL_GLYPH_TREE_BRANCH), " ", strna(hint));
3069 else
3070 d = strjoin(special_glyph(SPECIAL_GLYPH_TREE_RIGHT), " ", strna(hint));
3071
3072 if (!d)
3073 return log_oom();
3074 }
3075
3076 if (d)
3077 free_and_replace(line[start_array[j-1]], d);
3078 }
3079
3080 putc(' ', stdout);
3081
3082 j = 0;
3083 while (j < c) {
3084 if (line[j]) {
3085 fputs(line[j], stdout);
3086 j += utf8_console_width(line[j]);
3087 } else {
3088 putc(' ', stdout);
3089 j++;
3090 }
3091 }
3092
3093 putc('\n', stdout);
3094
3095 for (j = 0; j < c; j++)
3096 free(line[j]);
3097 }
3098
3099 return 0;
3100 }
3101
3102 static bool context_has_roothash(Context *context) {
3103 LIST_FOREACH(partitions, p, context->partitions)
3104 if (p->roothash)
3105 return true;
3106
3107 return false;
3108 }
3109
3110 static int context_dump(Context *context, bool late) {
3111 int r;
3112
3113 assert(context);
3114
3115 if (arg_pretty == 0 && FLAGS_SET(arg_json_format_flags, JSON_FORMAT_OFF))
3116 return 0;
3117
3118 /* If we're outputting JSON, only dump after doing all operations so we can include the roothashes
3119 * in the output. */
3120 if (!late && !FLAGS_SET(arg_json_format_flags, JSON_FORMAT_OFF))
3121 return 0;
3122
3123 /* If we're not outputting JSON, only dump again after doing all operations if there are any
3124 * roothashes that we need to communicate to the user. */
3125 if (late && FLAGS_SET(arg_json_format_flags, JSON_FORMAT_OFF) && !context_has_roothash(context))
3126 return 0;
3127
3128 r = context_dump_partitions(context);
3129 if (r < 0)
3130 return r;
3131
3132 /* Make sure we only write the partition bar once, even if we're writing the partition table twice to
3133 * communicate roothashes. */
3134 if (FLAGS_SET(arg_json_format_flags, JSON_FORMAT_OFF) && !late) {
3135 putc('\n', stdout);
3136
3137 r = context_dump_partition_bar(context);
3138 if (r < 0)
3139 return r;
3140
3141 putc('\n', stdout);
3142 }
3143
3144 fflush(stdout);
3145
3146 return 0;
3147 }
3148
3149
3150 static bool context_changed(const Context *context) {
3151 assert(context);
3152
3153 LIST_FOREACH(partitions, p, context->partitions) {
3154 if (p->dropped)
3155 continue;
3156
3157 if (p->allocated_to_area)
3158 return true;
3159
3160 if (p->new_size != p->current_size)
3161 return true;
3162 }
3163
3164 return false;
3165 }
3166
3167 static int context_wipe_range(Context *context, uint64_t offset, uint64_t size) {
3168 _cleanup_(blkid_free_probep) blkid_probe probe = NULL;
3169 int r;
3170
3171 assert(context);
3172 assert(offset != UINT64_MAX);
3173 assert(size != UINT64_MAX);
3174
3175 probe = blkid_new_probe();
3176 if (!probe)
3177 return log_oom();
3178
3179 errno = 0;
3180 r = blkid_probe_set_device(probe, fdisk_get_devfd(context->fdisk_context), offset, size);
3181 if (r < 0)
3182 return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to allocate device probe for wiping.");
3183
3184 errno = 0;
3185 if (blkid_probe_enable_superblocks(probe, true) < 0 ||
3186 blkid_probe_set_superblocks_flags(probe, BLKID_SUBLKS_MAGIC|BLKID_SUBLKS_BADCSUM) < 0 ||
3187 blkid_probe_enable_partitions(probe, true) < 0 ||
3188 blkid_probe_set_partitions_flags(probe, BLKID_PARTS_MAGIC) < 0)
3189 return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to enable superblock and partition probing.");
3190
3191 for (;;) {
3192 errno = 0;
3193 r = blkid_do_probe(probe);
3194 if (r < 0)
3195 return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe for file systems.");
3196 if (r > 0)
3197 break;
3198
3199 errno = 0;
3200 if (blkid_do_wipe(probe, false) < 0)
3201 return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to wipe file system signature.");
3202 }
3203
3204 return 0;
3205 }
3206
3207 static int context_wipe_partition(Context *context, Partition *p) {
3208 int r;
3209
3210 assert(context);
3211 assert(p);
3212 assert(!PARTITION_EXISTS(p)); /* Safety check: never wipe existing partitions */
3213
3214 assert(p->offset != UINT64_MAX);
3215 assert(p->new_size != UINT64_MAX);
3216
3217 r = context_wipe_range(context, p->offset, p->new_size);
3218 if (r < 0)
3219 return r;
3220
3221 log_info("Successfully wiped file system signatures from future partition %" PRIu64 ".", p->partno);
3222 return 0;
3223 }
3224
3225 static int context_discard_range(
3226 Context *context,
3227 uint64_t offset,
3228 uint64_t size) {
3229
3230 struct stat st;
3231 int fd;
3232
3233 assert(context);
3234 assert(offset != UINT64_MAX);
3235 assert(size != UINT64_MAX);
3236
3237 if (size <= 0)
3238 return 0;
3239
3240 assert_se((fd = fdisk_get_devfd(context->fdisk_context)) >= 0);
3241
3242 if (fstat(fd, &st) < 0)
3243 return -errno;
3244
3245 if (S_ISREG(st.st_mode)) {
3246 if (fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, size) < 0) {
3247 if (ERRNO_IS_NOT_SUPPORTED(errno))
3248 return -EOPNOTSUPP;
3249
3250 return -errno;
3251 }
3252
3253 return 1;
3254 }
3255
3256 if (S_ISBLK(st.st_mode)) {
3257 uint64_t range[2], end;
3258
3259 range[0] = round_up_size(offset, context->sector_size);
3260
3261 if (offset > UINT64_MAX - size)
3262 return -ERANGE;
3263
3264 end = offset + size;
3265 if (end <= range[0])
3266 return 0;
3267
3268 range[1] = round_down_size(end - range[0], context->sector_size);
3269 if (range[1] <= 0)
3270 return 0;
3271
3272 if (ioctl(fd, BLKDISCARD, range) < 0) {
3273 if (ERRNO_IS_NOT_SUPPORTED(errno))
3274 return -EOPNOTSUPP;
3275
3276 return -errno;
3277 }
3278
3279 return 1;
3280 }
3281
3282 return -EOPNOTSUPP;
3283 }
3284
3285 static int context_discard_partition(Context *context, Partition *p) {
3286 int r;
3287
3288 assert(context);
3289 assert(p);
3290
3291 assert(p->offset != UINT64_MAX);
3292 assert(p->new_size != UINT64_MAX);
3293 assert(!PARTITION_EXISTS(p)); /* Safety check: never discard existing partitions */
3294
3295 if (!arg_discard)
3296 return 0;
3297
3298 r = context_discard_range(context, p->offset, p->new_size);
3299 if (r == -EOPNOTSUPP) {
3300 log_info("Storage does not support discard, not discarding data in future partition %" PRIu64 ".", p->partno);
3301 return 0;
3302 }
3303 if (r == -EBUSY) {
3304 /* Let's handle this gracefully: https://bugzilla.kernel.org/show_bug.cgi?id=211167 */
3305 log_info("Block device is busy, not discarding partition %" PRIu64 " because it probably is mounted.", p->partno);
3306 return 0;
3307 }
3308 if (r == 0) {
3309 log_info("Partition %" PRIu64 " too short for discard, skipping.", p->partno);
3310 return 0;
3311 }
3312 if (r < 0)
3313 return log_error_errno(r, "Failed to discard data for future partition %" PRIu64 ".", p->partno);
3314
3315 log_info("Successfully discarded data from future partition %" PRIu64 ".", p->partno);
3316 return 1;
3317 }
3318
3319 static int context_discard_gap_after(Context *context, Partition *p) {
3320 uint64_t gap, next = UINT64_MAX;
3321 int r;
3322
3323 assert(context);
3324 assert(!p || (p->offset != UINT64_MAX && p->new_size != UINT64_MAX));
3325
3326 if (!arg_discard)
3327 return 0;
3328
3329 if (p)
3330 gap = p->offset + p->new_size;
3331 else
3332 /* The context start gets rounded up to grain_size, however
3333 * existing partitions may be before that so ensure the gap
3334 * starts at the first actually usable lba
3335 */
3336 gap = fdisk_get_first_lba(context->fdisk_context) * context->sector_size;
3337
3338 LIST_FOREACH(partitions, q, context->partitions) {
3339 if (q->dropped)
3340 continue;
3341
3342 assert(q->offset != UINT64_MAX);
3343 assert(q->new_size != UINT64_MAX);
3344
3345 if (q->offset < gap)
3346 continue;
3347
3348 if (next == UINT64_MAX || q->offset < next)
3349 next = q->offset;
3350 }
3351
3352 if (next == UINT64_MAX) {
3353 next = (fdisk_get_last_lba(context->fdisk_context) + 1) * context->sector_size;
3354 if (gap > next)
3355 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Partition end beyond disk end.");
3356 }
3357
3358 assert(next >= gap);
3359 r = context_discard_range(context, gap, next - gap);
3360 if (r == -EOPNOTSUPP) {
3361 if (p)
3362 log_info("Storage does not support discard, not discarding gap after partition %" PRIu64 ".", p->partno);
3363 else
3364 log_info("Storage does not support discard, not discarding gap at beginning of disk.");
3365 return 0;
3366 }
3367 if (r == 0) /* Too short */
3368 return 0;
3369 if (r < 0) {
3370 if (p)
3371 return log_error_errno(r, "Failed to discard gap after partition %" PRIu64 ".", p->partno);
3372 else
3373 return log_error_errno(r, "Failed to discard gap at beginning of disk.");
3374 }
3375
3376 if (p)
3377 log_info("Successfully discarded gap after partition %" PRIu64 ".", p->partno);
3378 else
3379 log_info("Successfully discarded gap at beginning of disk.");
3380
3381 return 0;
3382 }
3383
3384 static int context_wipe_and_discard(Context *context) {
3385 int r;
3386
3387 assert(context);
3388
3389 if (arg_empty == EMPTY_CREATE) /* If we just created the image, no need to wipe */
3390 return 0;
3391
3392 /* Wipe and discard the contents of all partitions we are about to create. We skip the discarding if
3393 * we were supposed to start from scratch anyway, as in that case we just discard the whole block
3394 * device in one go early on. */
3395
3396 LIST_FOREACH(partitions, p, context->partitions) {
3397
3398 if (!p->allocated_to_area)
3399 continue;
3400
3401 if (partition_type_defer(&p->type))
3402 continue;
3403
3404 r = context_wipe_partition(context, p);
3405 if (r < 0)
3406 return r;
3407
3408 if (!context->from_scratch) {
3409 r = context_discard_partition(context, p);
3410 if (r < 0)
3411 return r;
3412
3413 r = context_discard_gap_after(context, p);
3414 if (r < 0)
3415 return r;
3416 }
3417 }
3418
3419 if (!context->from_scratch) {
3420 r = context_discard_gap_after(context, NULL);
3421 if (r < 0)
3422 return r;
3423 }
3424
3425 return 0;
3426 }
3427
3428 typedef struct DecryptedPartitionTarget {
3429 int fd;
3430 char *dm_name;
3431 char *volume;
3432 struct crypt_device *device;
3433 } DecryptedPartitionTarget;
3434
3435 static DecryptedPartitionTarget* decrypted_partition_target_free(DecryptedPartitionTarget *t) {
3436 #if HAVE_LIBCRYPTSETUP
3437 int r;
3438
3439 if (!t)
3440 return NULL;
3441
3442 safe_close(t->fd);
3443
3444 /* udev or so might access out block device in the background while we are done. Let's hence
3445 * force detach the volume. We sync'ed before, hence this should be safe. */
3446 r = sym_crypt_deactivate_by_name(t->device, t->dm_name, CRYPT_DEACTIVATE_FORCE);
3447 if (r < 0)
3448 log_warning_errno(r, "Failed to deactivate LUKS device, ignoring: %m");
3449
3450 sym_crypt_free(t->device);
3451 free(t->dm_name);
3452 free(t->volume);
3453 free(t);
3454 #endif
3455 return NULL;
3456 }
3457
3458 typedef struct {
3459 LoopDevice *loop;
3460 int fd;
3461 char *path;
3462 int whole_fd;
3463 DecryptedPartitionTarget *decrypted;
3464 } PartitionTarget;
3465
3466 static int partition_target_fd(PartitionTarget *t) {
3467 assert(t);
3468 assert(t->loop || t->fd >= 0 || t->whole_fd >= 0);
3469
3470 if (t->decrypted)
3471 return t->decrypted->fd;
3472
3473 if (t->loop)
3474 return t->loop->fd;
3475
3476 if (t->fd >= 0)
3477 return t->fd;
3478
3479 return t->whole_fd;
3480 }
3481
3482 static const char* partition_target_path(PartitionTarget *t) {
3483 assert(t);
3484 assert(t->loop || t->path);
3485
3486 if (t->decrypted)
3487 return t->decrypted->volume;
3488
3489 if (t->loop)
3490 return t->loop->node;
3491
3492 return t->path;
3493 }
3494
3495 static PartitionTarget *partition_target_free(PartitionTarget *t) {
3496 if (!t)
3497 return NULL;
3498
3499 decrypted_partition_target_free(t->decrypted);
3500 loop_device_unref(t->loop);
3501 safe_close(t->fd);
3502 unlink_and_free(t->path);
3503
3504 return mfree(t);
3505 }
3506
3507 DEFINE_TRIVIAL_CLEANUP_FUNC(PartitionTarget*, partition_target_free);
3508
3509 static int prepare_temporary_file(PartitionTarget *t, uint64_t size) {
3510 _cleanup_(unlink_and_freep) char *temp = NULL;
3511 _cleanup_close_ int fd = -EBADF;
3512 const char *vt;
3513 int r;
3514
3515 assert(t);
3516
3517 r = var_tmp_dir(&vt);
3518 if (r < 0)
3519 return log_error_errno(r, "Could not determine temporary directory: %m");
3520
3521 temp = path_join(vt, "repart-XXXXXX");
3522 if (!temp)
3523 return log_oom();
3524
3525 fd = mkostemp_safe(temp);
3526 if (fd < 0)
3527 return log_error_errno(fd, "Failed to create temporary file: %m");
3528
3529 if (ftruncate(fd, size) < 0)
3530 return log_error_errno(errno, "Failed to truncate temporary file to %s: %m",
3531 FORMAT_BYTES(size));
3532
3533 t->fd = TAKE_FD(fd);
3534 t->path = TAKE_PTR(temp);
3535
3536 return 0;
3537 }
3538
3539 static int partition_target_prepare(
3540 Context *context,
3541 Partition *p,
3542 uint64_t size,
3543 bool need_path,
3544 PartitionTarget **ret) {
3545
3546 _cleanup_(partition_target_freep) PartitionTarget *t = NULL;
3547 _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
3548 int whole_fd, r;
3549
3550 assert(context);
3551 assert(p);
3552 assert(ret);
3553
3554 assert_se((whole_fd = fdisk_get_devfd(context->fdisk_context)) >= 0);
3555
3556 t = new(PartitionTarget, 1);
3557 if (!t)
3558 return log_oom();
3559 *t = (PartitionTarget) {
3560 .fd = -EBADF,
3561 .whole_fd = -EBADF,
3562 };
3563
3564 if (!need_path) {
3565 if (lseek(whole_fd, p->offset, SEEK_SET) == (off_t) -1)
3566 return log_error_errno(errno, "Failed to seek to partition offset: %m");
3567
3568 t->whole_fd = whole_fd;
3569 *ret = TAKE_PTR(t);
3570 return 0;
3571 }
3572
3573 /* Loopback block devices are not only useful to turn regular files into block devices, but
3574 * also to cut out sections of block devices into new block devices. */
3575
3576 if (arg_offline <= 0) {
3577 r = loop_device_make(whole_fd, O_RDWR, p->offset, size, context->sector_size, 0, LOCK_EX, &d);
3578 if (r < 0 && (arg_offline == 0 || (r != -ENOENT && !ERRNO_IS_PRIVILEGE(r)) || !strv_isempty(p->subvolumes)))
3579 return log_error_errno(r, "Failed to make loopback device of future partition %" PRIu64 ": %m", p->partno);
3580 if (r >= 0) {
3581 t->loop = TAKE_PTR(d);
3582 *ret = TAKE_PTR(t);
3583 return 0;
3584 }
3585
3586 log_debug_errno(r, "No access to loop devices, falling back to a regular file");
3587 }
3588
3589 /* If we can't allocate a loop device, let's write to a regular file that we copy into the final
3590 * image so we can run in containers and without needing root privileges. On filesystems with
3591 * reflinking support, we can take advantage of this and just reflink the result into the image.
3592 */
3593
3594 r = prepare_temporary_file(t, size);
3595 if (r < 0)
3596 return r;
3597
3598 *ret = TAKE_PTR(t);
3599
3600 return 0;
3601 }
3602
3603 static int partition_target_grow(PartitionTarget *t, uint64_t size) {
3604 int r;
3605
3606 assert(t);
3607 assert(!t->decrypted);
3608
3609 if (t->loop) {
3610 r = loop_device_refresh_size(t->loop, UINT64_MAX, size);
3611 if (r < 0)
3612 return log_error_errno(r, "Failed to refresh loopback device size: %m");
3613 } else if (t->fd >= 0) {
3614 if (ftruncate(t->fd, size) < 0)
3615 return log_error_errno(errno, "Failed to grow '%s' to %s by truncation: %m",
3616 t->path, FORMAT_BYTES(size));
3617 }
3618
3619 return 0;
3620 }
3621
3622 static int partition_target_sync(Context *context, Partition *p, PartitionTarget *t) {
3623 int whole_fd, r;
3624
3625 assert(context);
3626 assert(p);
3627 assert(t);
3628
3629 assert_se((whole_fd = fdisk_get_devfd(context->fdisk_context)) >= 0);
3630
3631 if (t->decrypted && fsync(t->decrypted->fd) < 0)
3632 return log_error_errno(errno, "Failed to sync changes to '%s': %m", t->decrypted->volume);
3633
3634 if (t->loop) {
3635 r = loop_device_sync(t->loop);
3636 if (r < 0)
3637 return log_error_errno(r, "Failed to sync loopback device: %m");
3638 } else if (t->fd >= 0) {
3639 struct stat st;
3640
3641 if (lseek(whole_fd, p->offset, SEEK_SET) == (off_t) -1)
3642 return log_error_errno(errno, "Failed to seek to partition offset: %m");
3643
3644 if (lseek(t->fd, 0, SEEK_SET) == (off_t) -1)
3645 return log_error_errno(errno, "Failed to seek to start of temporary file: %m");
3646
3647 if (fstat(t->fd, &st) < 0)
3648 return log_error_errno(errno, "Failed to stat temporary file: %m");
3649
3650 if (st.st_size > (off_t) p->new_size)
3651 return log_error_errno(SYNTHETIC_ERRNO(ENOSPC),
3652 "Partition %" PRIu64 "'s contents (%s) don't fit in the partition (%s)",
3653 p->partno, FORMAT_BYTES(st.st_size), FORMAT_BYTES(p->new_size));
3654
3655 r = copy_bytes(t->fd, whole_fd, UINT64_MAX, COPY_REFLINK|COPY_HOLES|COPY_FSYNC);
3656 if (r < 0)
3657 return log_error_errno(r, "Failed to copy bytes to partition: %m");
3658 } else {
3659 if (fsync(t->whole_fd) < 0)
3660 return log_error_errno(errno, "Failed to sync changes: %m");
3661 }
3662
3663 return 0;
3664 }
3665
3666 static int partition_encrypt(Context *context, Partition *p, PartitionTarget *target, bool offline) {
3667 #if HAVE_LIBCRYPTSETUP && HAVE_CRYPT_SET_DATA_OFFSET && HAVE_CRYPT_REENCRYPT_INIT_BY_PASSPHRASE && HAVE_CRYPT_REENCRYPT
3668 const char *node = partition_target_path(target);
3669 struct crypt_params_luks2 luks_params = {
3670 .label = strempty(ASSERT_PTR(p)->new_label),
3671 .sector_size = ASSERT_PTR(context)->fs_sector_size,
3672 .data_device = offline ? node : NULL,
3673 };
3674 struct crypt_params_reencrypt reencrypt_params = {
3675 .mode = CRYPT_REENCRYPT_ENCRYPT,
3676 .direction = CRYPT_REENCRYPT_BACKWARD,
3677 .resilience = "datashift",
3678 .data_shift = LUKS2_METADATA_SIZE / 512,
3679 .luks2 = &luks_params,
3680 .flags = CRYPT_REENCRYPT_INITIALIZE_ONLY|CRYPT_REENCRYPT_MOVE_FIRST_SEGMENT,
3681 };
3682 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
3683 _cleanup_(erase_and_freep) char *base64_encoded = NULL;
3684 _cleanup_fclose_ FILE *h = NULL;
3685 _cleanup_free_ char *hp = NULL, *vol = NULL, *dm_name = NULL;
3686 const char *passphrase = NULL;
3687 size_t passphrase_size = 0;
3688 const char *vt;
3689 int r;
3690
3691 assert(context);
3692 assert(p);
3693 assert(p->encrypt != ENCRYPT_OFF);
3694
3695 r = dlopen_cryptsetup();
3696 if (r < 0)
3697 return log_error_errno(r, "libcryptsetup not found, cannot encrypt: %m");
3698
3699 log_info("Encrypting future partition %" PRIu64 "...", p->partno);
3700
3701 if (offline) {
3702 r = var_tmp_dir(&vt);
3703 if (r < 0)
3704 return log_error_errno(r, "Failed to determine temporary files directory: %m");
3705
3706 r = fopen_temporary_child(vt, &h, &hp);
3707 if (r < 0)
3708 return log_error_errno(r, "Failed to create temporary LUKS header file: %m");
3709
3710 /* Weird cryptsetup requirement which requires the header file to be the size of at least one
3711 * sector. */
3712 r = ftruncate(fileno(h), luks_params.sector_size);
3713 if (r < 0)
3714 return log_error_errno(r, "Failed to grow temporary LUKS header file: %m");
3715 } else {
3716 if (asprintf(&dm_name, "luks-repart-%08" PRIx64, random_u64()) < 0)
3717 return log_oom();
3718
3719 vol = path_join("/dev/mapper/", dm_name);
3720 if (!vol)
3721 return log_oom();
3722 }
3723
3724 r = sym_crypt_init(&cd, offline ? hp : node);
3725 if (r < 0)
3726 return log_error_errno(r, "Failed to allocate libcryptsetup context for %s: %m", hp);
3727
3728 cryptsetup_enable_logging(cd);
3729
3730 if (offline) {
3731 /* Disable kernel keyring usage by libcryptsetup as a workaround for
3732 * https://gitlab.com/cryptsetup/cryptsetup/-/merge_requests/273. This makes sure that we can
3733 * do offline encryption even when repart is running in a container. */
3734 r = sym_crypt_volume_key_keyring(cd, false);
3735 if (r < 0)
3736 return log_error_errno(r, "Failed to disable kernel keyring: %m");
3737
3738 r = sym_crypt_metadata_locking(cd, false);
3739 if (r < 0)
3740 return log_error_errno(r, "Failed to disable metadata locking: %m");
3741
3742 r = sym_crypt_set_data_offset(cd, LUKS2_METADATA_SIZE / 512);
3743 if (r < 0)
3744 return log_error_errno(r, "Failed to set data offset: %m");
3745 }
3746
3747 r = sym_crypt_format(cd,
3748 CRYPT_LUKS2,
3749 "aes",
3750 "xts-plain64",
3751 SD_ID128_TO_UUID_STRING(p->luks_uuid),
3752 NULL,
3753 VOLUME_KEY_SIZE,
3754 &luks_params);
3755 if (r < 0)
3756 return log_error_errno(r, "Failed to LUKS2 format future partition: %m");
3757
3758 if (IN_SET(p->encrypt, ENCRYPT_KEY_FILE, ENCRYPT_KEY_FILE_TPM2)) {
3759 r = sym_crypt_keyslot_add_by_volume_key(
3760 cd,
3761 CRYPT_ANY_SLOT,
3762 NULL,
3763 VOLUME_KEY_SIZE,
3764 strempty(arg_key),
3765 arg_key_size);
3766 if (r < 0)
3767 return log_error_errno(r, "Failed to add LUKS2 key: %m");
3768
3769 passphrase = strempty(arg_key);
3770 passphrase_size = arg_key_size;
3771 }
3772
3773 if (IN_SET(p->encrypt, ENCRYPT_TPM2, ENCRYPT_KEY_FILE_TPM2)) {
3774 #if HAVE_TPM2
3775 _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
3776 _cleanup_(erase_and_freep) void *secret = NULL;
3777 _cleanup_free_ void *pubkey = NULL;
3778 _cleanup_free_ void *blob = NULL, *srk_buf = NULL;
3779 size_t secret_size, blob_size, pubkey_size = 0, srk_buf_size = 0;
3780 ssize_t base64_encoded_size;
3781 int keyslot;
3782
3783 if (arg_tpm2_public_key_pcr_mask != 0) {
3784 r = tpm2_load_pcr_public_key(arg_tpm2_public_key, &pubkey, &pubkey_size);
3785 if (r < 0) {
3786 if (arg_tpm2_public_key || r != -ENOENT)
3787 return log_error_errno(r, "Failed read TPM PCR public key: %m");
3788
3789 log_debug_errno(r, "Failed to read TPM2 PCR public key, proceeding without: %m");
3790 arg_tpm2_public_key_pcr_mask = 0;
3791 }
3792 }
3793
3794 _cleanup_(tpm2_context_unrefp) Tpm2Context *tpm2_context = NULL;
3795 r = tpm2_context_new(arg_tpm2_device, &tpm2_context);
3796 if (r < 0)
3797 return log_error_errno(r, "Failed to create TPM2 context: %m");
3798
3799 TPM2B_PUBLIC public;
3800 if (pubkey) {
3801 r = tpm2_tpm2b_public_from_pem(pubkey, pubkey_size, &public);
3802 if (r < 0)
3803 return log_error_errno(r, "Could not convert public key to TPM2B_PUBLIC: %m");
3804 }
3805
3806 r = tpm2_pcr_read_missing_values(tpm2_context, arg_tpm2_hash_pcr_values, arg_tpm2_n_hash_pcr_values);
3807 if (r < 0)
3808 return log_error_errno(r, "Could not read pcr values: %m");
3809
3810 uint16_t hash_pcr_bank = 0;
3811 uint32_t hash_pcr_mask = 0;
3812 if (arg_tpm2_n_hash_pcr_values > 0) {
3813 size_t hash_count;
3814 r = tpm2_pcr_values_hash_count(arg_tpm2_hash_pcr_values, arg_tpm2_n_hash_pcr_values, &hash_count);
3815 if (r < 0)
3816 return log_error_errno(r, "Could not get hash count: %m");
3817
3818 if (hash_count > 1)
3819 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Multiple PCR banks selected.");
3820
3821 hash_pcr_bank = arg_tpm2_hash_pcr_values[0].hash;
3822 r = tpm2_pcr_values_to_mask(arg_tpm2_hash_pcr_values, arg_tpm2_n_hash_pcr_values, hash_pcr_bank, &hash_pcr_mask);
3823 if (r < 0)
3824 return log_error_errno(r, "Could not get hash mask: %m");
3825 }
3826
3827 TPM2B_DIGEST policy = TPM2B_DIGEST_MAKE(NULL, TPM2_SHA256_DIGEST_SIZE);
3828 r = tpm2_calculate_sealing_policy(arg_tpm2_hash_pcr_values, arg_tpm2_n_hash_pcr_values, pubkey ? &public : NULL, /* use_pin= */ false, &policy);
3829 if (r < 0)
3830 return log_error_errno(r, "Could not calculate sealing policy digest: %m");
3831
3832 r = tpm2_seal(tpm2_context,
3833 /* seal_key_handle= */ 0,
3834 &policy,
3835 /* pin= */ NULL,
3836 &secret, &secret_size,
3837 &blob, &blob_size,
3838 /* ret_primary_alg= */ NULL,
3839 &srk_buf, &srk_buf_size);
3840 if (r < 0)
3841 return log_error_errno(r, "Failed to seal to TPM2: %m");
3842
3843 base64_encoded_size = base64mem(secret, secret_size, &base64_encoded);
3844 if (base64_encoded_size < 0)
3845 return log_error_errno(base64_encoded_size, "Failed to base64 encode secret key: %m");
3846
3847 r = cryptsetup_set_minimal_pbkdf(cd);
3848 if (r < 0)
3849 return log_error_errno(r, "Failed to set minimal PBKDF: %m");
3850
3851 keyslot = sym_crypt_keyslot_add_by_volume_key(
3852 cd,
3853 CRYPT_ANY_SLOT,
3854 NULL,
3855 VOLUME_KEY_SIZE,
3856 base64_encoded,
3857 base64_encoded_size);
3858 if (keyslot < 0)
3859 return log_error_errno(keyslot, "Failed to add new TPM2 key: %m");
3860
3861 r = tpm2_make_luks2_json(
3862 keyslot,
3863 hash_pcr_mask,
3864 hash_pcr_bank,
3865 pubkey, pubkey_size,
3866 arg_tpm2_public_key_pcr_mask,
3867 /* primary_alg= */ 0,
3868 blob, blob_size,
3869 policy.buffer, policy.size,
3870 NULL, 0, /* no salt because tpm2_seal has no pin */
3871 srk_buf, srk_buf_size,
3872 0,
3873 &v);
3874 if (r < 0)
3875 return log_error_errno(r, "Failed to prepare TPM2 JSON token object: %m");
3876
3877 r = cryptsetup_add_token_json(cd, v);
3878 if (r < 0)
3879 return log_error_errno(r, "Failed to add TPM2 JSON token to LUKS2 header: %m");
3880
3881 passphrase = base64_encoded;
3882 passphrase_size = strlen(base64_encoded);
3883 #else
3884 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
3885 "Support for TPM2 enrollment not enabled.");
3886 #endif
3887 }
3888
3889 if (offline) {
3890 r = sym_crypt_reencrypt_init_by_passphrase(
3891 cd,
3892 NULL,
3893 passphrase,
3894 passphrase_size,
3895 CRYPT_ANY_SLOT,
3896 0,
3897 sym_crypt_get_cipher(cd),
3898 sym_crypt_get_cipher_mode(cd),
3899 &reencrypt_params);
3900 if (r < 0)
3901 return log_error_errno(r, "Failed to prepare for reencryption: %m");
3902
3903 /* crypt_reencrypt_init_by_passphrase() doesn't actually put the LUKS header at the front, we
3904 * have to do that ourselves. */
3905
3906 sym_crypt_free(cd);
3907 cd = NULL;
3908
3909 r = sym_crypt_init(&cd, node);
3910 if (r < 0)
3911 return log_error_errno(r, "Failed to allocate libcryptsetup context for %s: %m", node);
3912
3913 r = sym_crypt_header_restore(cd, CRYPT_LUKS2, hp);
3914 if (r < 0)
3915 return log_error_errno(r, "Failed to place new LUKS header at head of %s: %m", node);
3916
3917 reencrypt_params.flags &= ~CRYPT_REENCRYPT_INITIALIZE_ONLY;
3918
3919 r = sym_crypt_reencrypt_init_by_passphrase(
3920 cd,
3921 NULL,
3922 passphrase,
3923 passphrase_size,
3924 CRYPT_ANY_SLOT,
3925 0,
3926 NULL,
3927 NULL,
3928 &reencrypt_params);
3929 if (r < 0)
3930 return log_error_errno(r, "Failed to load reencryption context: %m");
3931
3932 r = sym_crypt_reencrypt(cd, NULL);
3933 if (r < 0)
3934 return log_error_errno(r, "Failed to encrypt %s: %m", node);
3935 } else {
3936 _cleanup_free_ DecryptedPartitionTarget *t = NULL;
3937 _cleanup_close_ int dev_fd = -1;
3938
3939 r = sym_crypt_activate_by_volume_key(
3940 cd,
3941 dm_name,
3942 NULL,
3943 VOLUME_KEY_SIZE,
3944 arg_discard ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0);
3945 if (r < 0)
3946 return log_error_errno(r, "Failed to activate LUKS superblock: %m");
3947
3948 dev_fd = open(vol, O_RDWR|O_CLOEXEC|O_NOCTTY);
3949 if (dev_fd < 0)
3950 return log_error_errno(errno, "Failed to open LUKS volume '%s': %m", vol);
3951
3952 if (flock(dev_fd, LOCK_EX) < 0)
3953 return log_error_errno(errno, "Failed to lock '%s': %m", vol);
3954
3955 t = new(DecryptedPartitionTarget, 1);
3956 if (!t)
3957 return log_oom();
3958
3959 *t = (DecryptedPartitionTarget) {
3960 .fd = TAKE_FD(dev_fd),
3961 .dm_name = TAKE_PTR(dm_name),
3962 .volume = TAKE_PTR(vol),
3963 .device = TAKE_PTR(cd),
3964 };
3965
3966 target->decrypted = TAKE_PTR(t);
3967 }
3968
3969 log_info("Successfully encrypted future partition %" PRIu64 ".", p->partno);
3970
3971 return 0;
3972 #else
3973 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
3974 "libcryptsetup is not supported or is missing required symbols, cannot encrypt: %m");
3975 #endif
3976 }
3977
3978 static int partition_format_verity_hash(
3979 Context *context,
3980 Partition *p,
3981 const char *node,
3982 const char *data_node) {
3983
3984 #if HAVE_LIBCRYPTSETUP
3985 Partition *dp;
3986 _cleanup_(partition_target_freep) PartitionTarget *t = NULL;
3987 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
3988 _cleanup_free_ char *hint = NULL;
3989 _cleanup_free_ uint8_t *rh = NULL;
3990 size_t rhs;
3991 int r;
3992
3993 assert(context);
3994 assert(p);
3995 assert(p->verity == VERITY_HASH);
3996 assert(data_node);
3997
3998 if (p->dropped)
3999 return 0;
4000
4001 if (PARTITION_EXISTS(p)) /* Never format existing partitions */
4002 return 0;
4003
4004 /* Minimized partitions will use the copy blocks logic so let's make sure to skip those here. */
4005 if (p->copy_blocks_fd >= 0)
4006 return 0;
4007
4008 assert_se(dp = p->siblings[VERITY_DATA]);
4009 assert(!dp->dropped);
4010
4011 (void) partition_hint(p, node, &hint);
4012
4013 r = dlopen_cryptsetup();
4014 if (r < 0)
4015 return log_error_errno(r, "libcryptsetup not found, cannot setup verity: %m");
4016
4017 if (!node) {
4018 r = partition_target_prepare(context, p, p->new_size, /*need_path=*/ true, &t);
4019 if (r < 0)
4020 return r;
4021
4022 node = partition_target_path(t);
4023 }
4024
4025 if (p->verity_data_block_size == UINT64_MAX)
4026 p->verity_data_block_size = context->fs_sector_size;
4027 if (p->verity_hash_block_size == UINT64_MAX)
4028 p->verity_hash_block_size = context->fs_sector_size;
4029
4030 r = sym_crypt_init(&cd, node);
4031 if (r < 0)
4032 return log_error_errno(r, "Failed to allocate libcryptsetup context for %s: %m", node);
4033
4034 cryptsetup_enable_logging(cd);
4035
4036 r = sym_crypt_format(
4037 cd, CRYPT_VERITY, NULL, NULL, SD_ID128_TO_UUID_STRING(p->verity_uuid), NULL, 0,
4038 &(struct crypt_params_verity){
4039 .data_device = data_node,
4040 .flags = CRYPT_VERITY_CREATE_HASH,
4041 .hash_name = "sha256",
4042 .hash_type = 1,
4043 .data_block_size = p->verity_data_block_size,
4044 .hash_block_size = p->verity_hash_block_size,
4045 .salt_size = sizeof(p->verity_salt),
4046 .salt = (const char*)p->verity_salt,
4047 });
4048 if (r < 0) {
4049 /* libcryptsetup reports non-descriptive EIO errors for every I/O failure. Luckily, it
4050 * doesn't clobber errno so let's check for ENOSPC so we can report a better error if the
4051 * partition is too small. */
4052 if (r == -EIO && errno == ENOSPC)
4053 return log_error_errno(errno,
4054 "Verity hash data does not fit in partition %s with size %s",
4055 strna(hint), FORMAT_BYTES(p->new_size));
4056
4057 return log_error_errno(r, "Failed to setup verity hash data of partition %s: %m", strna(hint));
4058 }
4059
4060 if (t) {
4061 r = partition_target_sync(context, p, t);
4062 if (r < 0)
4063 return r;
4064 }
4065
4066 r = sym_crypt_get_volume_key_size(cd);
4067 if (r < 0)
4068 return log_error_errno(r, "Failed to determine verity root hash size of partition %s: %m", strna(hint));
4069 rhs = (size_t) r;
4070
4071 rh = malloc(rhs);
4072 if (!rh)
4073 return log_oom();
4074
4075 r = sym_crypt_volume_key_get(cd, CRYPT_ANY_SLOT, (char *) rh, &rhs, NULL, 0);
4076 if (r < 0)
4077 return log_error_errno(r, "Failed to get verity root hash of partition %s: %m", strna(hint));
4078
4079 assert(rhs >= sizeof(sd_id128_t) * 2);
4080
4081 if (!dp->new_uuid_is_set) {
4082 memcpy_safe(dp->new_uuid.bytes, rh, sizeof(sd_id128_t));
4083 dp->new_uuid_is_set = true;
4084 }
4085
4086 if (!p->new_uuid_is_set) {
4087 memcpy_safe(p->new_uuid.bytes, rh + rhs - sizeof(sd_id128_t), sizeof(sd_id128_t));
4088 p->new_uuid_is_set = true;
4089 }
4090
4091 p->roothash = TAKE_PTR(rh);
4092 p->roothash_size = rhs;
4093
4094 return 0;
4095 #else
4096 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "libcryptsetup is not supported, cannot setup verity hashes: %m");
4097 #endif
4098 }
4099
4100 static int sign_verity_roothash(
4101 const uint8_t *roothash,
4102 size_t roothash_size,
4103 uint8_t **ret_signature,
4104 size_t *ret_signature_size) {
4105
4106 #if HAVE_OPENSSL
4107 _cleanup_(BIO_freep) BIO *rb = NULL;
4108 _cleanup_(PKCS7_freep) PKCS7 *p7 = NULL;
4109 _cleanup_free_ char *hex = NULL;
4110 _cleanup_free_ uint8_t *sig = NULL;
4111 int sigsz;
4112
4113 assert(roothash);
4114 assert(roothash_size > 0);
4115 assert(ret_signature);
4116 assert(ret_signature_size);
4117
4118 hex = hexmem(roothash, roothash_size);
4119 if (!hex)
4120 return log_oom();
4121
4122 rb = BIO_new_mem_buf(hex, -1);
4123 if (!rb)
4124 return log_oom();
4125
4126 p7 = PKCS7_sign(arg_certificate, arg_private_key, NULL, rb, PKCS7_DETACHED|PKCS7_NOATTR|PKCS7_BINARY);
4127 if (!p7)
4128 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to calculate PKCS7 signature: %s",
4129 ERR_error_string(ERR_get_error(), NULL));
4130
4131 sigsz = i2d_PKCS7(p7, &sig);
4132 if (sigsz < 0)
4133 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to convert PKCS7 signature to DER: %s",
4134 ERR_error_string(ERR_get_error(), NULL));
4135
4136 *ret_signature = TAKE_PTR(sig);
4137 *ret_signature_size = sigsz;
4138
4139 return 0;
4140 #else
4141 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "openssl is not supported, cannot setup verity signature: %m");
4142 #endif
4143 }
4144
4145 static int partition_format_verity_sig(Context *context, Partition *p) {
4146 _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
4147 _cleanup_free_ uint8_t *sig = NULL;
4148 _cleanup_free_ char *text = NULL, *hint = NULL;
4149 Partition *hp;
4150 uint8_t fp[X509_FINGERPRINT_SIZE];
4151 size_t sigsz = 0; /* avoid false maybe-uninitialized warning */
4152 int whole_fd, r;
4153
4154 assert(p->verity == VERITY_SIG);
4155
4156 if (p->dropped)
4157 return 0;
4158
4159 if (PARTITION_EXISTS(p))
4160 return 0;
4161
4162 (void) partition_hint(p, context->node, &hint);
4163
4164 assert_se(hp = p->siblings[VERITY_HASH]);
4165 assert(!hp->dropped);
4166
4167 assert(arg_certificate);
4168
4169 assert_se((whole_fd = fdisk_get_devfd(context->fdisk_context)) >= 0);
4170
4171 r = sign_verity_roothash(hp->roothash, hp->roothash_size, &sig, &sigsz);
4172 if (r < 0)
4173 return r;
4174
4175 r = x509_fingerprint(arg_certificate, fp);
4176 if (r < 0)
4177 return log_error_errno(r, "Unable to calculate X509 certificate fingerprint: %m");
4178
4179 r = json_build(&v,
4180 JSON_BUILD_OBJECT(
4181 JSON_BUILD_PAIR("rootHash", JSON_BUILD_HEX(hp->roothash, hp->roothash_size)),
4182 JSON_BUILD_PAIR(
4183 "certificateFingerprint",
4184 JSON_BUILD_HEX(fp, sizeof(fp))
4185 ),
4186 JSON_BUILD_PAIR("signature", JSON_BUILD_BASE64(sig, sigsz))
4187 )
4188 );
4189 if (r < 0)
4190 return log_error_errno(r, "Failed to build verity signature JSON object: %m");
4191
4192 r = json_variant_format(v, 0, &text);
4193 if (r < 0)
4194 return log_error_errno(r, "Failed to format verity signature JSON object: %m");
4195
4196 if (strlen(text)+1 > p->new_size)
4197 return log_error_errno(SYNTHETIC_ERRNO(E2BIG), "Verity signature too long for partition: %m");
4198
4199 r = strgrowpad0(&text, p->new_size);
4200 if (r < 0)
4201 return log_error_errno(r, "Failed to pad string to %s", FORMAT_BYTES(p->new_size));
4202
4203 if (lseek(whole_fd, p->offset, SEEK_SET) == (off_t) -1)
4204 return log_error_errno(errno, "Failed to seek to partition %s offset: %m", strna(hint));
4205
4206 r = loop_write(whole_fd, text, p->new_size);
4207 if (r < 0)
4208 return log_error_errno(r, "Failed to write verity signature to partition %s: %m", strna(hint));
4209
4210 if (fsync(whole_fd) < 0)
4211 return log_error_errno(errno, "Failed to synchronize partition %s: %m", strna(hint));
4212
4213 return 0;
4214 }
4215
4216 static int context_copy_blocks(Context *context) {
4217 int r;
4218
4219 assert(context);
4220
4221 /* Copy in file systems on the block level */
4222
4223 LIST_FOREACH(partitions, p, context->partitions) {
4224 _cleanup_(partition_target_freep) PartitionTarget *t = NULL;
4225
4226 if (p->copy_blocks_fd < 0)
4227 continue;
4228
4229 if (p->dropped)
4230 continue;
4231
4232 if (PARTITION_EXISTS(p)) /* Never copy over existing partitions */
4233 continue;
4234
4235 if (partition_type_defer(&p->type))
4236 continue;
4237
4238 assert(p->new_size != UINT64_MAX);
4239 assert(p->copy_blocks_size != UINT64_MAX);
4240 assert(p->new_size >= p->copy_blocks_size + (p->encrypt != ENCRYPT_OFF ? LUKS2_METADATA_KEEP_FREE : 0));
4241
4242 r = partition_target_prepare(context, p, p->new_size,
4243 /*need_path=*/ p->encrypt != ENCRYPT_OFF || p->siblings[VERITY_HASH],
4244 &t);
4245 if (r < 0)
4246 return r;
4247
4248 if (p->encrypt != ENCRYPT_OFF && t->loop) {
4249 r = partition_encrypt(context, p, t, /* offline = */ false);
4250 if (r < 0)
4251 return r;
4252 }
4253
4254 log_info("Copying in '%s' (%s) on block level into future partition %" PRIu64 ".",
4255 p->copy_blocks_path, FORMAT_BYTES(p->copy_blocks_size), p->partno);
4256
4257 if (p->copy_blocks_offset != UINT64_MAX && lseek(p->copy_blocks_fd, p->copy_blocks_offset, SEEK_SET) < 0)
4258 return log_error_errno(errno, "Failed to seek to copy blocks offset in %s: %m", p->copy_blocks_path);
4259
4260 r = copy_bytes(p->copy_blocks_fd, partition_target_fd(t), p->copy_blocks_size, COPY_REFLINK);
4261 if (r < 0)
4262 return log_error_errno(r, "Failed to copy in data from '%s': %m", p->copy_blocks_path);
4263
4264 log_info("Copying in of '%s' on block level completed.", p->copy_blocks_path);
4265
4266 if (p->encrypt != ENCRYPT_OFF && !t->loop) {
4267 r = partition_encrypt(context, p, t, /* offline = */ true);
4268 if (r < 0)
4269 return r;
4270 }
4271
4272 r = partition_target_sync(context, p, t);
4273 if (r < 0)
4274 return r;
4275
4276 if (p->siblings[VERITY_HASH] && !partition_type_defer(&p->siblings[VERITY_HASH]->type)) {
4277 r = partition_format_verity_hash(context, p->siblings[VERITY_HASH],
4278 /* node = */ NULL, partition_target_path(t));
4279 if (r < 0)
4280 return r;
4281 }
4282
4283 if (p->siblings[VERITY_SIG] && !partition_type_defer(&p->siblings[VERITY_SIG]->type)) {
4284 r = partition_format_verity_sig(context, p->siblings[VERITY_SIG]);
4285 if (r < 0)
4286 return r;
4287 }
4288 }
4289
4290 return 0;
4291 }
4292
4293 static int add_exclude_path(const char *path, Hashmap **denylist, DenyType type) {
4294 _cleanup_free_ struct stat *st = NULL;
4295 int r;
4296
4297 assert(path);
4298 assert(denylist);
4299
4300 st = new(struct stat, 1);
4301 if (!st)
4302 return log_oom();
4303
4304 r = chase_and_stat(path, arg_copy_source, CHASE_PREFIX_ROOT, NULL, st);
4305 if (r == -ENOENT)
4306 return 0;
4307 if (r < 0)
4308 return log_error_errno(r, "Failed to stat source file '%s/%s': %m", strempty(arg_copy_source), path);
4309
4310 r = hashmap_ensure_put(denylist, &inode_hash_ops, st, INT_TO_PTR(type));
4311 if (r == -EEXIST)
4312 return 0;
4313 if (r < 0)
4314 return log_oom();
4315 if (r > 0)
4316 TAKE_PTR(st);
4317
4318 return 0;
4319 }
4320
4321 static int make_copy_files_denylist(
4322 Context *context,
4323 const Partition *p,
4324 const char *source,
4325 const char *target,
4326 Hashmap **ret) {
4327
4328 _cleanup_hashmap_free_ Hashmap *denylist = NULL;
4329 int r;
4330
4331 assert(context);
4332 assert(p);
4333 assert(source);
4334 assert(target);
4335 assert(ret);
4336
4337 /* Always exclude the top level APIVFS and temporary directories since the contents of these
4338 * directories are almost certainly not intended to end up in an image. */
4339
4340 NULSTR_FOREACH(s, APIVFS_TMP_DIRS_NULSTR) {
4341 r = add_exclude_path(s, &denylist, DENY_CONTENTS);
4342 if (r < 0)
4343 return r;
4344 }
4345
4346 /* Add the user configured excludes. */
4347
4348 STRV_FOREACH(e, p->exclude_files_source) {
4349 r = add_exclude_path(*e, &denylist, endswith(*e, "/") ? DENY_CONTENTS : DENY_INODE);
4350 if (r < 0)
4351 return r;
4352 }
4353
4354 STRV_FOREACH(e, p->exclude_files_target) {
4355 _cleanup_free_ char *path = NULL;
4356
4357 const char *s = path_startswith(*e, target);
4358 if (!s)
4359 continue;
4360
4361 path = path_join(source, s);
4362 if (!path)
4363 return log_oom();
4364
4365 r = add_exclude_path(path, &denylist, endswith(*e, "/") ? DENY_CONTENTS : DENY_INODE);
4366 if (r < 0)
4367 return r;
4368 }
4369
4370 /* If we're populating a root partition, we don't want any files to end up under the APIVFS mount
4371 * points. While we already exclude <source>/proc, users could still do something such as
4372 * "CopyFiles=/abc:/". Now, if /abc has a proc subdirectory with files in it, those will end up in
4373 * the top level proc directory in the root partition, which we want to avoid. To deal with these
4374 * cases, whenever we're populating a root partition and the target of CopyFiles= is the root
4375 * directory of the root partition, we exclude all directories under the source that are named after
4376 * APIVFS directories or named after mount points of other partitions that are also going to be part
4377 * of the image. */
4378
4379 if (p->type.designator == PARTITION_ROOT && empty_or_root(target)) {
4380 LIST_FOREACH(partitions, q, context->partitions) {
4381 if (q->type.designator == PARTITION_ROOT)
4382 continue;
4383
4384 const char *sources = gpt_partition_type_mountpoint_nulstr(q->type);
4385 if (!sources)
4386 continue;
4387
4388 NULSTR_FOREACH(s, sources) {
4389 _cleanup_free_ char *path = NULL;
4390
4391 /* Exclude only the children of partition mount points so that the nested
4392 * partition mount point itself still ends up in the upper partition. */
4393
4394 path = path_join(source, s);
4395 if (!path)
4396 return -ENOMEM;
4397
4398 r = add_exclude_path(path, &denylist, DENY_CONTENTS);
4399 if (r < 0)
4400 return r;
4401 }
4402 }
4403
4404 NULSTR_FOREACH(s, APIVFS_TMP_DIRS_NULSTR) {
4405 _cleanup_free_ char *path = NULL;
4406
4407 path = path_join(source, s);
4408 if (!path)
4409 return -ENOMEM;
4410
4411 r = add_exclude_path(path, &denylist, DENY_CONTENTS);
4412 if (r < 0)
4413 return r;
4414 }
4415 }
4416
4417 *ret = TAKE_PTR(denylist);
4418 return 0;
4419 }
4420
4421 static int add_subvolume_path(const char *path, Set **subvolumes) {
4422 _cleanup_free_ struct stat *st = NULL;
4423 int r;
4424
4425 assert(path);
4426 assert(subvolumes);
4427
4428 st = new(struct stat, 1);
4429 if (!st)
4430 return log_oom();
4431
4432 r = chase_and_stat(path, arg_copy_source, CHASE_PREFIX_ROOT, NULL, st);
4433 if (r == -ENOENT)
4434 return 0;
4435 if (r < 0)
4436 return log_error_errno(r, "Failed to stat source file '%s/%s': %m", strempty(arg_copy_source), path);
4437
4438 r = set_ensure_consume(subvolumes, &inode_hash_ops, TAKE_PTR(st));
4439 if (r < 0)
4440 return log_oom();
4441
4442 return 0;
4443 }
4444
4445 static int make_subvolumes_set(
4446 Context *context,
4447 const Partition *p,
4448 const char *source,
4449 const char *target,
4450 Set **ret) {
4451 _cleanup_set_free_ Set *subvolumes = NULL;
4452 int r;
4453
4454 assert(context);
4455 assert(p);
4456 assert(target);
4457 assert(ret);
4458
4459 STRV_FOREACH(subvolume, p->subvolumes) {
4460 _cleanup_free_ char *path = NULL;
4461
4462 const char *s = path_startswith(*subvolume, target);
4463 if (!s)
4464 continue;
4465
4466 path = path_join(source, s);
4467 if (!path)
4468 return log_oom();
4469
4470 r = add_subvolume_path(path, &subvolumes);
4471 if (r < 0)
4472 return r;
4473 }
4474
4475 *ret = TAKE_PTR(subvolumes);
4476 return 0;
4477 }
4478
4479 static int do_copy_files(Context *context, Partition *p, const char *root) {
4480 int r;
4481
4482 assert(p);
4483 assert(root);
4484
4485 /* copy_tree_at() automatically copies the permissions of source directories to target directories if
4486 * it created them. However, the root directory is created by us, so we have to manually take care
4487 * that it is initialized. We use the first source directory targeting "/" as the metadata source for
4488 * the root directory. */
4489 STRV_FOREACH_PAIR(source, target, p->copy_files) {
4490 _cleanup_close_ int rfd = -EBADF, sfd = -EBADF;
4491
4492 if (!path_equal(*target, "/"))
4493 continue;
4494
4495 rfd = open(root, O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
4496 if (rfd < 0)
4497 return rfd;
4498
4499 sfd = chase_and_open(*source, arg_copy_source, CHASE_PREFIX_ROOT, O_PATH|O_DIRECTORY|O_CLOEXEC|O_NOCTTY, NULL);
4500 if (sfd < 0)
4501 return log_error_errno(sfd, "Failed to open source file '%s%s': %m", strempty(arg_copy_source), *source);
4502
4503 (void) copy_xattr(sfd, NULL, rfd, NULL, COPY_ALL_XATTRS);
4504 (void) copy_access(sfd, rfd);
4505 (void) copy_times(sfd, rfd, 0);
4506
4507 break;
4508 }
4509
4510 STRV_FOREACH_PAIR(source, target, p->copy_files) {
4511 _cleanup_hashmap_free_ Hashmap *denylist = NULL;
4512 _cleanup_set_free_ Set *subvolumes_by_source_inode = NULL;
4513 _cleanup_close_ int sfd = -EBADF, pfd = -EBADF, tfd = -EBADF;
4514
4515 r = make_copy_files_denylist(context, p, *source, *target, &denylist);
4516 if (r < 0)
4517 return r;
4518
4519 r = make_subvolumes_set(context, p, *source, *target, &subvolumes_by_source_inode);
4520 if (r < 0)
4521 return r;
4522
4523 sfd = chase_and_open(*source, arg_copy_source, CHASE_PREFIX_ROOT, O_CLOEXEC|O_NOCTTY, NULL);
4524 if (sfd == -ENOENT) {
4525 log_notice_errno(sfd, "Failed to open source file '%s%s', skipping: %m", strempty(arg_copy_source), *source);
4526 continue;
4527 }
4528 if (sfd < 0)
4529 return log_error_errno(sfd, "Failed to open source file '%s%s': %m", strempty(arg_copy_source), *source);
4530
4531 r = fd_verify_regular(sfd);
4532 if (r < 0) {
4533 if (r != -EISDIR)
4534 return log_error_errno(r, "Failed to check type of source file '%s': %m", *source);
4535
4536 /* We are looking at a directory */
4537 tfd = chase_and_open(*target, root, CHASE_PREFIX_ROOT, O_RDONLY|O_DIRECTORY|O_CLOEXEC, NULL);
4538 if (tfd < 0) {
4539 _cleanup_free_ char *dn = NULL, *fn = NULL;
4540
4541 if (tfd != -ENOENT)
4542 return log_error_errno(tfd, "Failed to open target directory '%s': %m", *target);
4543
4544 r = path_extract_filename(*target, &fn);
4545 if (r < 0)
4546 return log_error_errno(r, "Failed to extract filename from '%s': %m", *target);
4547
4548 r = path_extract_directory(*target, &dn);
4549 if (r < 0)
4550 return log_error_errno(r, "Failed to extract directory from '%s': %m", *target);
4551
4552 r = mkdir_p_root(root, dn, UID_INVALID, GID_INVALID, 0755, p->subvolumes);
4553 if (r < 0)
4554 return log_error_errno(r, "Failed to create parent directory '%s': %m", dn);
4555
4556 pfd = chase_and_open(dn, root, CHASE_PREFIX_ROOT, O_RDONLY|O_DIRECTORY|O_CLOEXEC, NULL);
4557 if (pfd < 0)
4558 return log_error_errno(pfd, "Failed to open parent directory of target: %m");
4559
4560 r = copy_tree_at(
4561 sfd, ".",
4562 pfd, fn,
4563 UID_INVALID, GID_INVALID,
4564 COPY_REFLINK|COPY_HOLES|COPY_MERGE|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS|COPY_ALL_XATTRS|COPY_GRACEFUL_WARN|COPY_TRUNCATE,
4565 denylist, subvolumes_by_source_inode);
4566 } else
4567 r = copy_tree_at(
4568 sfd, ".",
4569 tfd, ".",
4570 UID_INVALID, GID_INVALID,
4571 COPY_REFLINK|COPY_HOLES|COPY_MERGE|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS|COPY_ALL_XATTRS|COPY_GRACEFUL_WARN|COPY_TRUNCATE,
4572 denylist, subvolumes_by_source_inode);
4573 if (r < 0)
4574 return log_error_errno(r, "Failed to copy '%s%s' to '%s%s': %m",
4575 strempty(arg_copy_source), *source, strempty(root), *target);
4576 } else {
4577 _cleanup_free_ char *dn = NULL, *fn = NULL;
4578
4579 /* We are looking at a regular file */
4580
4581 r = path_extract_filename(*target, &fn);
4582 if (r == -EADDRNOTAVAIL || r == O_DIRECTORY)
4583 return log_error_errno(SYNTHETIC_ERRNO(EISDIR),
4584 "Target path '%s' refers to a directory, but source path '%s' refers to regular file, can't copy.", *target, *source);
4585 if (r < 0)
4586 return log_error_errno(r, "Failed to extract filename from '%s': %m", *target);
4587
4588 r = path_extract_directory(*target, &dn);
4589 if (r < 0)
4590 return log_error_errno(r, "Failed to extract directory from '%s': %m", *target);
4591
4592 r = mkdir_p_root(root, dn, UID_INVALID, GID_INVALID, 0755, p->subvolumes);
4593 if (r < 0)
4594 return log_error_errno(r, "Failed to create parent directory: %m");
4595
4596 pfd = chase_and_open(dn, root, CHASE_PREFIX_ROOT, O_RDONLY|O_DIRECTORY|O_CLOEXEC, NULL);
4597 if (pfd < 0)
4598 return log_error_errno(pfd, "Failed to open parent directory of target: %m");
4599
4600 tfd = openat(pfd, fn, O_CREAT|O_EXCL|O_WRONLY|O_CLOEXEC, 0700);
4601 if (tfd < 0)
4602 return log_error_errno(errno, "Failed to create target file '%s': %m", *target);
4603
4604 r = copy_bytes(sfd, tfd, UINT64_MAX, COPY_REFLINK|COPY_HOLES|COPY_SIGINT|COPY_TRUNCATE);
4605 if (r < 0)
4606 return log_error_errno(r, "Failed to copy '%s' to '%s%s': %m", *source, strempty(arg_copy_source), *target);
4607
4608 (void) copy_xattr(sfd, NULL, tfd, NULL, COPY_ALL_XATTRS);
4609 (void) copy_access(sfd, tfd);
4610 (void) copy_times(sfd, tfd, 0);
4611 }
4612 }
4613
4614 return 0;
4615 }
4616
4617 static int do_make_directories(Partition *p, const char *root) {
4618 int r;
4619
4620 assert(p);
4621 assert(root);
4622
4623 STRV_FOREACH(d, p->make_directories) {
4624 r = mkdir_p_root(root, *d, UID_INVALID, GID_INVALID, 0755, p->subvolumes);
4625 if (r < 0)
4626 return log_error_errno(r, "Failed to create directory '%s' in file system: %m", *d);
4627 }
4628
4629 return 0;
4630 }
4631
4632 static bool partition_needs_populate(Partition *p) {
4633 assert(p);
4634 return !strv_isempty(p->copy_files) || !strv_isempty(p->make_directories);
4635 }
4636
4637 static int partition_populate_directory(Context *context, Partition *p, char **ret) {
4638 _cleanup_(rm_rf_physical_and_freep) char *root = NULL;
4639 const char *vt;
4640 int r;
4641
4642 assert(ret);
4643
4644 log_info("Populating %s filesystem.", p->format);
4645
4646 r = var_tmp_dir(&vt);
4647 if (r < 0)
4648 return log_error_errno(r, "Could not determine temporary directory: %m");
4649
4650 r = tempfn_random_child(vt, "repart", &root);
4651 if (r < 0)
4652 return log_error_errno(r, "Failed to generate temporary directory: %m");
4653
4654 r = mkdir(root, 0755);
4655 if (r < 0)
4656 return log_error_errno(errno, "Failed to create temporary directory: %m");
4657
4658 r = do_copy_files(context, p, root);
4659 if (r < 0)
4660 return r;
4661
4662 r = do_make_directories(p, root);
4663 if (r < 0)
4664 return r;
4665
4666 log_info("Successfully populated %s filesystem.", p->format);
4667
4668 *ret = TAKE_PTR(root);
4669 return 0;
4670 }
4671
4672 static int partition_populate_filesystem(Context *context, Partition *p, const char *node) {
4673 int r;
4674
4675 assert(p);
4676 assert(node);
4677
4678 log_info("Populating %s filesystem.", p->format);
4679
4680 /* We copy in a child process, since we have to mount the fs for that, and we don't want that fs to
4681 * appear in the host namespace. Hence we fork a child that has its own file system namespace and
4682 * detached mount propagation. */
4683
4684 r = safe_fork("(sd-copy)", FORK_DEATHSIG|FORK_LOG|FORK_WAIT|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE, NULL);
4685 if (r < 0)
4686 return r;
4687 if (r == 0) {
4688 static const char fs[] = "/run/systemd/mount-root";
4689 /* This is a child process with its own mount namespace and propagation to host turned off */
4690
4691 r = mkdir_p(fs, 0700);
4692 if (r < 0) {
4693 log_error_errno(r, "Failed to create mount point: %m");
4694 _exit(EXIT_FAILURE);
4695 }
4696
4697 if (mount_nofollow_verbose(LOG_ERR, node, fs, p->format, MS_NOATIME|MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL) < 0)
4698 _exit(EXIT_FAILURE);
4699
4700 if (do_copy_files(context, p, fs) < 0)
4701 _exit(EXIT_FAILURE);
4702
4703 if (do_make_directories(p, fs) < 0)
4704 _exit(EXIT_FAILURE);
4705
4706 r = syncfs_path(AT_FDCWD, fs);
4707 if (r < 0) {
4708 log_error_errno(r, "Failed to synchronize written files: %m");
4709 _exit(EXIT_FAILURE);
4710 }
4711
4712 _exit(EXIT_SUCCESS);
4713 }
4714
4715 log_info("Successfully populated %s filesystem.", p->format);
4716 return 0;
4717 }
4718
4719 static int context_mkfs(Context *context) {
4720 int r;
4721
4722 assert(context);
4723
4724 /* Make a file system */
4725
4726 LIST_FOREACH(partitions, p, context->partitions) {
4727 _cleanup_(rm_rf_physical_and_freep) char *root = NULL;
4728 _cleanup_(partition_target_freep) PartitionTarget *t = NULL;
4729 _cleanup_strv_free_ char **extra_mkfs_options = NULL;
4730
4731 if (p->dropped)
4732 continue;
4733
4734 if (PARTITION_EXISTS(p)) /* Never format existing partitions */
4735 continue;
4736
4737 if (!p->format)
4738 continue;
4739
4740 /* Minimized partitions will use the copy blocks logic so let's make sure to skip those here. */
4741 if (p->copy_blocks_fd >= 0)
4742 continue;
4743
4744 if (partition_type_defer(&p->type))
4745 continue;
4746
4747 assert(p->offset != UINT64_MAX);
4748 assert(p->new_size != UINT64_MAX);
4749 assert(p->new_size >= (p->encrypt != ENCRYPT_OFF ? LUKS2_METADATA_KEEP_FREE : 0));
4750
4751 /* If we're doing encryption, we make sure we keep free space at the end which is required
4752 * for cryptsetup's offline encryption. */
4753 r = partition_target_prepare(context, p,
4754 p->new_size - (p->encrypt != ENCRYPT_OFF ? LUKS2_METADATA_KEEP_FREE : 0),
4755 /*need_path=*/ true,
4756 &t);
4757 if (r < 0)
4758 return r;
4759
4760 if (p->encrypt != ENCRYPT_OFF && t->loop) {
4761 r = partition_target_grow(t, p->new_size);
4762 if (r < 0)
4763 return r;
4764
4765 r = partition_encrypt(context, p, t, /* offline = */ false);
4766 if (r < 0)
4767 return log_error_errno(r, "Failed to encrypt device: %m");
4768 }
4769
4770 log_info("Formatting future partition %" PRIu64 ".", p->partno);
4771
4772 /* If we're not writing to a loop device or if we're populating a read-only filesystem, we
4773 * have to populate using the filesystem's mkfs's --root (or equivalent) option. To do that,
4774 * we need to set up the final directory tree beforehand. */
4775
4776 if (partition_needs_populate(p) && (!t->loop || fstype_is_ro(p->format))) {
4777 if (!mkfs_supports_root_option(p->format))
4778 return log_error_errno(SYNTHETIC_ERRNO(ENODEV),
4779 "Loop device access is required to populate %s filesystems.",
4780 p->format);
4781
4782 r = partition_populate_directory(context, p, &root);
4783 if (r < 0)
4784 return r;
4785 }
4786
4787 r = mkfs_options_from_env("REPART", p->format, &extra_mkfs_options);
4788 if (r < 0)
4789 return log_error_errno(r,
4790 "Failed to determine mkfs command line options for '%s': %m",
4791 p->format);
4792
4793 r = make_filesystem(partition_target_path(t), p->format, strempty(p->new_label), root,
4794 p->fs_uuid, arg_discard, /* quiet = */ false,
4795 context->fs_sector_size, extra_mkfs_options);
4796 if (r < 0)
4797 return r;
4798
4799 log_info("Successfully formatted future partition %" PRIu64 ".", p->partno);
4800
4801 /* If we're writing to a loop device, we can now mount the empty filesystem and populate it. */
4802 if (partition_needs_populate(p) && !root) {
4803 assert(t->loop);
4804
4805 r = partition_populate_filesystem(context, p, partition_target_path(t));
4806 if (r < 0)
4807 return r;
4808 }
4809
4810 if (p->encrypt != ENCRYPT_OFF && !t->loop) {
4811 r = partition_target_grow(t, p->new_size);
4812 if (r < 0)
4813 return r;
4814
4815 r = partition_encrypt(context, p, t, /* offline = */ true);
4816 if (r < 0)
4817 return log_error_errno(r, "Failed to encrypt device: %m");
4818 }
4819
4820 /* Note that we always sync explicitly here, since mkfs.fat doesn't do that on its own, and
4821 * if we don't sync before detaching a block device the in-flight sectors possibly won't hit
4822 * the disk. */
4823
4824 r = partition_target_sync(context, p, t);
4825 if (r < 0)
4826 return r;
4827
4828 if (p->siblings[VERITY_HASH] && !partition_type_defer(&p->siblings[VERITY_HASH]->type)) {
4829 r = partition_format_verity_hash(context, p->siblings[VERITY_HASH],
4830 /* node = */ NULL, partition_target_path(t));
4831 if (r < 0)
4832 return r;
4833 }
4834
4835 if (p->siblings[VERITY_SIG] && !partition_type_defer(&p->siblings[VERITY_SIG]->type)) {
4836 r = partition_format_verity_sig(context, p->siblings[VERITY_SIG]);
4837 if (r < 0)
4838 return r;
4839 }
4840 }
4841
4842 return 0;
4843 }
4844
4845 static int parse_x509_certificate(const char *certificate, size_t certificate_size, X509 **ret) {
4846 #if HAVE_OPENSSL
4847 _cleanup_(X509_freep) X509 *cert = NULL;
4848 _cleanup_(BIO_freep) BIO *cb = NULL;
4849
4850 assert(certificate);
4851 assert(certificate_size > 0);
4852 assert(ret);
4853
4854 cb = BIO_new_mem_buf(certificate, certificate_size);
4855 if (!cb)
4856 return log_oom();
4857
4858 cert = PEM_read_bio_X509(cb, NULL, NULL, NULL);
4859 if (!cert)
4860 return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Failed to parse X.509 certificate: %s",
4861 ERR_error_string(ERR_get_error(), NULL));
4862
4863 if (ret)
4864 *ret = TAKE_PTR(cert);
4865
4866 return 0;
4867 #else
4868 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "openssl is not supported, cannot parse X509 certificate.");
4869 #endif
4870 }
4871
4872 static int parse_private_key(const char *key, size_t key_size, EVP_PKEY **ret) {
4873 #if HAVE_OPENSSL
4874 _cleanup_(BIO_freep) BIO *kb = NULL;
4875 _cleanup_(EVP_PKEY_freep) EVP_PKEY *pk = NULL;
4876
4877 assert(key);
4878 assert(key_size > 0);
4879 assert(ret);
4880
4881 kb = BIO_new_mem_buf(key, key_size);
4882 if (!kb)
4883 return log_oom();
4884
4885 pk = PEM_read_bio_PrivateKey(kb, NULL, NULL, NULL);
4886 if (!pk)
4887 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to parse PEM private key: %s",
4888 ERR_error_string(ERR_get_error(), NULL));
4889
4890 if (ret)
4891 *ret = TAKE_PTR(pk);
4892
4893 return 0;
4894 #else
4895 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "openssl is not supported, cannot parse private key.");
4896 #endif
4897 }
4898
4899 static int partition_acquire_uuid(Context *context, Partition *p, sd_id128_t *ret) {
4900 struct {
4901 sd_id128_t type_uuid;
4902 uint64_t counter;
4903 } _packed_ plaintext = {};
4904 union {
4905 uint8_t md[SHA256_DIGEST_SIZE];
4906 sd_id128_t id;
4907 } result;
4908
4909 uint64_t k = 0;
4910 int r;
4911
4912 assert(context);
4913 assert(p);
4914 assert(ret);
4915
4916 /* Calculate a good UUID for the indicated partition. We want a certain degree of reproducibility,
4917 * hence we won't generate the UUIDs randomly. Instead we use a cryptographic hash (precisely:
4918 * HMAC-SHA256) to derive them from a single seed. The seed is generally the machine ID of the
4919 * installation we are processing, but if random behaviour is desired can be random, too. We use the
4920 * seed value as key for the HMAC (since the machine ID is something we generally don't want to leak)
4921 * and the partition type as plaintext. The partition type is suffixed with a counter (only for the
4922 * second and later partition of the same type) if we have more than one partition of the same
4923 * time. Or in other words:
4924 *
4925 * With:
4926 * SEED := /etc/machine-id
4927 *
4928 * If first partition instance of type TYPE_UUID:
4929 * PARTITION_UUID := HMAC-SHA256(SEED, TYPE_UUID)
4930 *
4931 * For all later partition instances of type TYPE_UUID with INSTANCE being the LE64 encoded instance number:
4932 * PARTITION_UUID := HMAC-SHA256(SEED, TYPE_UUID || INSTANCE)
4933 */
4934
4935 LIST_FOREACH(partitions, q, context->partitions) {
4936 if (p == q)
4937 break;
4938
4939 if (!sd_id128_equal(p->type.uuid, q->type.uuid))
4940 continue;
4941
4942 k++;
4943 }
4944
4945 plaintext.type_uuid = p->type.uuid;
4946 plaintext.counter = htole64(k);
4947
4948 hmac_sha256(context->seed.bytes, sizeof(context->seed.bytes),
4949 &plaintext,
4950 k == 0 ? sizeof(sd_id128_t) : sizeof(plaintext),
4951 result.md);
4952
4953 /* Take the first half, mark it as v4 UUID */
4954 assert_cc(sizeof(result.md) == sizeof(result.id) * 2);
4955 result.id = id128_make_v4_uuid(result.id);
4956
4957 /* Ensure this partition UUID is actually unique, and there's no remaining partition from an earlier run? */
4958 LIST_FOREACH(partitions, q, context->partitions) {
4959 if (p == q)
4960 continue;
4961
4962 if (sd_id128_in_set(result.id, q->current_uuid, q->new_uuid)) {
4963 log_warning("Partition UUID calculated from seed for partition %" PRIu64 " already used, reverting to randomized UUID.", p->partno);
4964
4965 r = sd_id128_randomize(&result.id);
4966 if (r < 0)
4967 return log_error_errno(r, "Failed to generate randomized UUID: %m");
4968
4969 break;
4970 }
4971 }
4972
4973 *ret = result.id;
4974 return 0;
4975 }
4976
4977 static int partition_acquire_label(Context *context, Partition *p, char **ret) {
4978 _cleanup_free_ char *label = NULL;
4979 const char *prefix;
4980 unsigned k = 1;
4981
4982 assert(context);
4983 assert(p);
4984 assert(ret);
4985
4986 prefix = gpt_partition_type_uuid_to_string(p->type.uuid);
4987 if (!prefix)
4988 prefix = "linux";
4989
4990 for (;;) {
4991 const char *ll = label ?: prefix;
4992 bool retry = false;
4993
4994 LIST_FOREACH(partitions, q, context->partitions) {
4995 if (p == q)
4996 break;
4997
4998 if (streq_ptr(ll, q->current_label) ||
4999 streq_ptr(ll, q->new_label)) {
5000 retry = true;
5001 break;
5002 }
5003 }
5004
5005 if (!retry)
5006 break;
5007
5008 label = mfree(label);
5009 if (asprintf(&label, "%s-%u", prefix, ++k) < 0)
5010 return log_oom();
5011 }
5012
5013 if (!label) {
5014 label = strdup(prefix);
5015 if (!label)
5016 return log_oom();
5017 }
5018
5019 *ret = TAKE_PTR(label);
5020 return 0;
5021 }
5022
5023 static int context_acquire_partition_uuids_and_labels(Context *context) {
5024 int r;
5025
5026 assert(context);
5027
5028 LIST_FOREACH(partitions, p, context->partitions) {
5029 sd_id128_t uuid;
5030
5031 /* Never touch foreign partitions */
5032 if (PARTITION_IS_FOREIGN(p)) {
5033 p->new_uuid = p->current_uuid;
5034
5035 if (p->current_label) {
5036 r = free_and_strdup_warn(&p->new_label, strempty(p->current_label));
5037 if (r < 0)
5038 return r;
5039 }
5040
5041 continue;
5042 }
5043
5044 if (!sd_id128_is_null(p->current_uuid))
5045 p->new_uuid = uuid = p->current_uuid; /* Never change initialized UUIDs */
5046 else if (p->new_uuid_is_set)
5047 uuid = p->new_uuid;
5048 else {
5049 /* Not explicitly set by user! */
5050 r = partition_acquire_uuid(context, p, &uuid);
5051 if (r < 0)
5052 return r;
5053
5054 /* The final verity hash/data UUIDs can only be determined after formatting the
5055 * verity hash partition. However, we still want to use the generated partition UUID
5056 * to derive other UUIDs to keep things unique and reproducible, so we always
5057 * generate a UUID if none is set, but we only use it as the actual partition UUID if
5058 * verity is not configured. */
5059 if (!IN_SET(p->verity, VERITY_DATA, VERITY_HASH)) {
5060 p->new_uuid = uuid;
5061 p->new_uuid_is_set = true;
5062 }
5063 }
5064
5065 /* Calculate the UUID for the file system as HMAC-SHA256 of the string "file-system-uuid",
5066 * keyed off the partition UUID. */
5067 r = derive_uuid(uuid, "file-system-uuid", &p->fs_uuid);
5068 if (r < 0)
5069 return r;
5070
5071 if (p->encrypt != ENCRYPT_OFF) {
5072 r = derive_uuid(uuid, "luks-uuid", &p->luks_uuid);
5073 if (r < 0)
5074 return r;
5075 }
5076
5077 /* Derive the verity salt and verity superblock UUID from the seed to keep them reproducible */
5078 if (p->verity == VERITY_HASH) {
5079 derive_salt(context->seed, "verity-salt", p->verity_salt);
5080
5081 r = derive_uuid(context->seed, "verity-uuid", &p->verity_uuid);
5082 if (r < 0)
5083 return log_error_errno(r, "Failed to acquire verity uuid: %m");
5084 }
5085
5086 if (!isempty(p->current_label)) {
5087 /* never change initialized labels */
5088 r = free_and_strdup_warn(&p->new_label, p->current_label);
5089 if (r < 0)
5090 return r;
5091 } else if (!p->new_label) {
5092 /* Not explicitly set by user! */
5093
5094 r = partition_acquire_label(context, p, &p->new_label);
5095 if (r < 0)
5096 return r;
5097 }
5098 }
5099
5100 return 0;
5101 }
5102
5103 static int set_gpt_flags(struct fdisk_partition *q, uint64_t flags) {
5104 _cleanup_free_ char *a = NULL;
5105
5106 for (unsigned i = 0; i < sizeof(flags) * 8; i++) {
5107 uint64_t bit = UINT64_C(1) << i;
5108 char buf[DECIMAL_STR_MAX(unsigned)+1];
5109
5110 if (!FLAGS_SET(flags, bit))
5111 continue;
5112
5113 xsprintf(buf, "%u", i);
5114 if (!strextend_with_separator(&a, ",", buf))
5115 return -ENOMEM;
5116 }
5117
5118 return fdisk_partition_set_attrs(q, a);
5119 }
5120
5121 static uint64_t partition_merge_flags(Partition *p) {
5122 uint64_t f;
5123
5124 assert(p);
5125
5126 f = p->gpt_flags;
5127
5128 if (p->no_auto >= 0) {
5129 if (gpt_partition_type_knows_no_auto(p->type))
5130 SET_FLAG(f, SD_GPT_FLAG_NO_AUTO, p->no_auto);
5131 else {
5132 char buffer[SD_ID128_UUID_STRING_MAX];
5133 log_warning("Configured NoAuto=%s for partition type '%s' that doesn't support it, ignoring.",
5134 yes_no(p->no_auto),
5135 gpt_partition_type_uuid_to_string_harder(p->type.uuid, buffer));
5136 }
5137 }
5138
5139 if (p->read_only >= 0) {
5140 if (gpt_partition_type_knows_read_only(p->type))
5141 SET_FLAG(f, SD_GPT_FLAG_READ_ONLY, p->read_only);
5142 else {
5143 char buffer[SD_ID128_UUID_STRING_MAX];
5144 log_warning("Configured ReadOnly=%s for partition type '%s' that doesn't support it, ignoring.",
5145 yes_no(p->read_only),
5146 gpt_partition_type_uuid_to_string_harder(p->type.uuid, buffer));
5147 }
5148 }
5149
5150 if (p->growfs >= 0) {
5151 if (gpt_partition_type_knows_growfs(p->type))
5152 SET_FLAG(f, SD_GPT_FLAG_GROWFS, p->growfs);
5153 else {
5154 char buffer[SD_ID128_UUID_STRING_MAX];
5155 log_warning("Configured GrowFileSystem=%s for partition type '%s' that doesn't support it, ignoring.",
5156 yes_no(p->growfs),
5157 gpt_partition_type_uuid_to_string_harder(p->type.uuid, buffer));
5158 }
5159 }
5160
5161 return f;
5162 }
5163
5164 static int context_mangle_partitions(Context *context) {
5165 int r;
5166
5167 assert(context);
5168
5169 LIST_FOREACH(partitions, p, context->partitions) {
5170 if (p->dropped)
5171 continue;
5172
5173 if (partition_type_defer(&p->type))
5174 continue;
5175
5176 assert(p->new_size != UINT64_MAX);
5177 assert(p->offset != UINT64_MAX);
5178 assert(p->partno != UINT64_MAX);
5179
5180 if (PARTITION_EXISTS(p)) {
5181 bool changed = false;
5182
5183 assert(p->current_partition);
5184
5185 if (p->new_size != p->current_size) {
5186 assert(p->new_size >= p->current_size);
5187 assert(p->new_size % context->sector_size == 0);
5188
5189 r = fdisk_partition_size_explicit(p->current_partition, true);
5190 if (r < 0)
5191 return log_error_errno(r, "Failed to enable explicit sizing: %m");
5192
5193 r = fdisk_partition_set_size(p->current_partition, p->new_size / context->sector_size);
5194 if (r < 0)
5195 return log_error_errno(r, "Failed to grow partition: %m");
5196
5197 log_info("Growing existing partition %" PRIu64 ".", p->partno);
5198 changed = true;
5199 }
5200
5201 if (!sd_id128_equal(p->new_uuid, p->current_uuid)) {
5202 r = fdisk_partition_set_uuid(p->current_partition, SD_ID128_TO_UUID_STRING(p->new_uuid));
5203 if (r < 0)
5204 return log_error_errno(r, "Failed to set partition UUID: %m");
5205
5206 log_info("Initializing UUID of existing partition %" PRIu64 ".", p->partno);
5207 changed = true;
5208 }
5209
5210 if (!streq_ptr(p->new_label, p->current_label)) {
5211 r = fdisk_partition_set_name(p->current_partition, strempty(p->new_label));
5212 if (r < 0)
5213 return log_error_errno(r, "Failed to set partition label: %m");
5214
5215 log_info("Setting partition label of existing partition %" PRIu64 ".", p->partno);
5216 changed = true;
5217 }
5218
5219 if (changed) {
5220 assert(!PARTITION_IS_FOREIGN(p)); /* never touch foreign partitions */
5221
5222 r = fdisk_set_partition(context->fdisk_context, p->partno, p->current_partition);
5223 if (r < 0)
5224 return log_error_errno(r, "Failed to update partition: %m");
5225 }
5226 } else {
5227 _cleanup_(fdisk_unref_partitionp) struct fdisk_partition *q = NULL;
5228 _cleanup_(fdisk_unref_parttypep) struct fdisk_parttype *t = NULL;
5229
5230 assert(!p->new_partition);
5231 assert(p->offset % context->sector_size == 0);
5232 assert(p->new_size % context->sector_size == 0);
5233 assert(p->new_label);
5234
5235 t = fdisk_new_parttype();
5236 if (!t)
5237 return log_oom();
5238
5239 r = fdisk_parttype_set_typestr(t, SD_ID128_TO_UUID_STRING(p->type.uuid));
5240 if (r < 0)
5241 return log_error_errno(r, "Failed to initialize partition type: %m");
5242
5243 q = fdisk_new_partition();
5244 if (!q)
5245 return log_oom();
5246
5247 r = fdisk_partition_set_type(q, t);
5248 if (r < 0)
5249 return log_error_errno(r, "Failed to set partition type: %m");
5250
5251 r = fdisk_partition_size_explicit(q, true);
5252 if (r < 0)
5253 return log_error_errno(r, "Failed to enable explicit sizing: %m");
5254
5255 r = fdisk_partition_set_start(q, p->offset / context->sector_size);
5256 if (r < 0)
5257 return log_error_errno(r, "Failed to position partition: %m");
5258
5259 r = fdisk_partition_set_size(q, p->new_size / context->sector_size);
5260 if (r < 0)
5261 return log_error_errno(r, "Failed to grow partition: %m");
5262
5263 r = fdisk_partition_set_partno(q, p->partno);
5264 if (r < 0)
5265 return log_error_errno(r, "Failed to set partition number: %m");
5266
5267 r = fdisk_partition_set_uuid(q, SD_ID128_TO_UUID_STRING(p->new_uuid));
5268 if (r < 0)
5269 return log_error_errno(r, "Failed to set partition UUID: %m");
5270
5271 r = fdisk_partition_set_name(q, strempty(p->new_label));
5272 if (r < 0)
5273 return log_error_errno(r, "Failed to set partition label: %m");
5274
5275 /* Merge the no auto + read only + growfs setting with the literal flags, and set them for the partition */
5276 r = set_gpt_flags(q, partition_merge_flags(p));
5277 if (r < 0)
5278 return log_error_errno(r, "Failed to set GPT partition flags: %m");
5279
5280 log_info("Adding new partition %" PRIu64 " to partition table.", p->partno);
5281
5282 r = fdisk_add_partition(context->fdisk_context, q, NULL);
5283 if (r < 0)
5284 return log_error_errno(r, "Failed to add partition: %m");
5285
5286 assert(!p->new_partition);
5287 p->new_partition = TAKE_PTR(q);
5288 }
5289 }
5290
5291 return 0;
5292 }
5293
5294 static int split_name_printf(Partition *p, char **ret) {
5295 assert(p);
5296
5297 const Specifier table[] = {
5298 { 't', specifier_string, GPT_PARTITION_TYPE_UUID_TO_STRING_HARDER(p->type.uuid) },
5299 { 'T', specifier_id128, &p->type.uuid },
5300 { 'U', specifier_id128, &p->new_uuid },
5301 { 'n', specifier_uint64, &p->partno },
5302
5303 COMMON_SYSTEM_SPECIFIERS,
5304 {}
5305 };
5306
5307 return specifier_printf(p->split_name_format, NAME_MAX, table, arg_root, p, ret);
5308 }
5309
5310 static int split_node(const char *node, char **ret_base, char **ret_ext) {
5311 _cleanup_free_ char *base = NULL, *ext = NULL;
5312 char *e;
5313 int r;
5314
5315 assert(node);
5316 assert(ret_base);
5317 assert(ret_ext);
5318
5319 r = path_extract_filename(node, &base);
5320 if (r == O_DIRECTORY || r == -EADDRNOTAVAIL)
5321 return log_error_errno(r, "Device node %s cannot be a directory", node);
5322 if (r < 0)
5323 return log_error_errno(r, "Failed to extract filename from %s: %m", node);
5324
5325 e = endswith(base, ".raw");
5326 if (e) {
5327 ext = strdup(e);
5328 if (!ext)
5329 return log_oom();
5330
5331 *e = 0;
5332 }
5333
5334 *ret_base = TAKE_PTR(base);
5335 *ret_ext = TAKE_PTR(ext);
5336
5337 return 0;
5338 }
5339
5340 static int split_name_resolve(Context *context) {
5341 _cleanup_free_ char *parent = NULL, *base = NULL, *ext = NULL;
5342 int r;
5343
5344 assert(context);
5345
5346 r = path_extract_directory(context->node, &parent);
5347 if (r < 0 && r != -EDESTADDRREQ)
5348 return log_error_errno(r, "Failed to extract directory from %s: %m", context->node);
5349
5350 r = split_node(context->node, &base, &ext);
5351 if (r < 0)
5352 return r;
5353
5354 LIST_FOREACH(partitions, p, context->partitions) {
5355 _cleanup_free_ char *resolved = NULL;
5356
5357 if (p->dropped)
5358 continue;
5359
5360 if (!p->split_name_format)
5361 continue;
5362
5363 r = split_name_printf(p, &resolved);
5364 if (r < 0)
5365 return log_error_errno(r, "Failed to resolve specifiers in %s: %m", p->split_name_format);
5366
5367 if (parent)
5368 p->split_path = strjoin(parent, "/", base, ".", resolved, ext);
5369 else
5370 p->split_path = strjoin(base, ".", resolved, ext);
5371 if (!p->split_path)
5372 return log_oom();
5373 }
5374
5375 LIST_FOREACH(partitions, p, context->partitions) {
5376 if (!p->split_path)
5377 continue;
5378
5379 LIST_FOREACH(partitions, q, context->partitions) {
5380 if (p == q)
5381 continue;
5382
5383 if (!q->split_path)
5384 continue;
5385
5386 if (!streq(p->split_path, q->split_path))
5387 continue;
5388
5389 return log_error_errno(SYNTHETIC_ERRNO(ENOTUNIQ),
5390 "%s and %s have the same resolved split name \"%s\", refusing",
5391 p->definition_path, q->definition_path, p->split_path);
5392 }
5393 }
5394
5395 return 0;
5396 }
5397
5398 static int context_split(Context *context) {
5399 int fd = -EBADF, r;
5400
5401 if (!arg_split)
5402 return 0;
5403
5404 assert(context);
5405
5406 /* We can't do resolution earlier because the partition UUIDs for verity partitions are only filled
5407 * in after they've been generated. */
5408
5409 r = split_name_resolve(context);
5410 if (r < 0)
5411 return r;
5412
5413 LIST_FOREACH(partitions, p, context->partitions) {
5414 _cleanup_close_ int fdt = -EBADF;
5415
5416 if (p->dropped)
5417 continue;
5418
5419 if (!p->split_path)
5420 continue;
5421
5422 if (partition_type_defer(&p->type))
5423 continue;
5424
5425 fdt = open(p->split_path, O_WRONLY|O_NOCTTY|O_CLOEXEC|O_NOFOLLOW|O_CREAT|O_EXCL, 0666);
5426 if (fdt < 0)
5427 return log_error_errno(fdt, "Failed to open split partition file %s: %m", p->split_path);
5428
5429 if (fd < 0)
5430 assert_se((fd = fdisk_get_devfd(context->fdisk_context)) >= 0);
5431
5432 if (lseek(fd, p->offset, SEEK_SET) < 0)
5433 return log_error_errno(errno, "Failed to seek to partition offset: %m");
5434
5435 r = copy_bytes(fd, fdt, p->new_size, COPY_REFLINK|COPY_HOLES|COPY_TRUNCATE);
5436 if (r < 0)
5437 return log_error_errno(r, "Failed to copy to split partition %s: %m", p->split_path);
5438 }
5439
5440 return 0;
5441 }
5442
5443 static int context_write_partition_table(Context *context) {
5444 _cleanup_(fdisk_unref_tablep) struct fdisk_table *original_table = NULL;
5445 int capable, r;
5446
5447 assert(context);
5448
5449 if (!context->from_scratch && !context_changed(context)) {
5450 log_info("No changes.");
5451 return 0;
5452 }
5453
5454 if (arg_dry_run) {
5455 log_notice("Refusing to repartition, please re-run with --dry-run=no.");
5456 return 0;
5457 }
5458
5459 log_info("Applying changes.");
5460
5461 if (context->from_scratch && arg_empty != EMPTY_CREATE) {
5462 /* Erase everything if we operate from scratch, except if the image was just created anyway, and thus is definitely empty. */
5463 r = context_wipe_range(context, 0, context->total);
5464 if (r < 0)
5465 return r;
5466
5467 log_info("Wiped block device.");
5468
5469 if (arg_discard) {
5470 r = context_discard_range(context, 0, context->total);
5471 if (r == -EOPNOTSUPP)
5472 log_info("Storage does not support discard, not discarding entire block device data.");
5473 else if (r < 0)
5474 return log_error_errno(r, "Failed to discard entire block device: %m");
5475 else if (r > 0)
5476 log_info("Discarded entire block device.");
5477 }
5478 }
5479
5480 r = fdisk_get_partitions(context->fdisk_context, &original_table);
5481 if (r < 0)
5482 return log_error_errno(r, "Failed to acquire partition table: %m");
5483
5484 /* Wipe fs signatures and discard sectors where the new partitions are going to be placed and in the
5485 * gaps between partitions, just to be sure. */
5486 r = context_wipe_and_discard(context);
5487 if (r < 0)
5488 return r;
5489
5490 r = context_copy_blocks(context);
5491 if (r < 0)
5492 return r;
5493
5494 r = context_mkfs(context);
5495 if (r < 0)
5496 return r;
5497
5498 r = context_mangle_partitions(context);
5499 if (r < 0)
5500 return r;
5501
5502 log_info("Writing new partition table.");
5503
5504 r = fdisk_write_disklabel(context->fdisk_context);
5505 if (r < 0)
5506 return log_error_errno(r, "Failed to write partition table: %m");
5507
5508 capable = blockdev_partscan_enabled(fdisk_get_devfd(context->fdisk_context));
5509 if (capable == -ENOTBLK)
5510 log_debug("Not telling kernel to reread partition table, since we are not operating on a block device.");
5511 else if (capable < 0)
5512 return log_error_errno(capable, "Failed to check if block device supports partition scanning: %m");
5513 else if (capable > 0) {
5514 log_info("Telling kernel to reread partition table.");
5515
5516 if (context->from_scratch)
5517 r = fdisk_reread_partition_table(context->fdisk_context);
5518 else
5519 r = fdisk_reread_changes(context->fdisk_context, original_table);
5520 if (r < 0)
5521 return log_error_errno(r, "Failed to reread partition table: %m");
5522 } else
5523 log_notice("Not telling kernel to reread partition table, because selected image does not support kernel partition block devices.");
5524
5525 log_info("All done.");
5526
5527 return 0;
5528 }
5529
5530 static int context_read_seed(Context *context, const char *root) {
5531 int r;
5532
5533 assert(context);
5534
5535 if (!sd_id128_is_null(context->seed))
5536 return 0;
5537
5538 if (!arg_randomize) {
5539 r = id128_get_machine(root, &context->seed);
5540 if (r >= 0)
5541 return 0;
5542
5543 if (!ERRNO_IS_MACHINE_ID_UNSET(r))
5544 return log_error_errno(r, "Failed to parse machine ID of image: %m");
5545
5546 log_info("No machine ID set, using randomized partition UUIDs.");
5547 }
5548
5549 r = sd_id128_randomize(&context->seed);
5550 if (r < 0)
5551 return log_error_errno(r, "Failed to generate randomized seed: %m");
5552
5553 return 0;
5554 }
5555
5556 static int context_factory_reset(Context *context) {
5557 size_t n = 0;
5558 int r;
5559
5560 assert(context);
5561
5562 if (arg_factory_reset <= 0)
5563 return 0;
5564
5565 if (context->from_scratch) /* Nothing to reset if we start from scratch */
5566 return 0;
5567
5568 if (arg_dry_run) {
5569 log_notice("Refusing to factory reset, please re-run with --dry-run=no.");
5570 return 0;
5571 }
5572
5573 log_info("Applying factory reset.");
5574
5575 LIST_FOREACH(partitions, p, context->partitions) {
5576
5577 if (!p->factory_reset || !PARTITION_EXISTS(p))
5578 continue;
5579
5580 assert(p->partno != UINT64_MAX);
5581
5582 log_info("Removing partition %" PRIu64 " for factory reset.", p->partno);
5583
5584 r = fdisk_delete_partition(context->fdisk_context, p->partno);
5585 if (r < 0)
5586 return log_error_errno(r, "Failed to remove partition %" PRIu64 ": %m", p->partno);
5587
5588 n++;
5589 }
5590
5591 if (n == 0) {
5592 log_info("Factory reset requested, but no partitions to delete found.");
5593 return 0;
5594 }
5595
5596 r = fdisk_write_disklabel(context->fdisk_context);
5597 if (r < 0)
5598 return log_error_errno(r, "Failed to write disk label: %m");
5599
5600 log_info("Successfully deleted %zu partitions.", n);
5601 return 1;
5602 }
5603
5604 static int context_can_factory_reset(Context *context) {
5605 assert(context);
5606
5607 LIST_FOREACH(partitions, p, context->partitions)
5608 if (p->factory_reset && PARTITION_EXISTS(p))
5609 return true;
5610
5611 return false;
5612 }
5613
5614 static int resolve_copy_blocks_auto_candidate(
5615 dev_t partition_devno,
5616 GptPartitionType partition_type,
5617 dev_t restrict_devno,
5618 sd_id128_t *ret_uuid) {
5619
5620 _cleanup_(blkid_free_probep) blkid_probe b = NULL;
5621 _cleanup_close_ int fd = -EBADF;
5622 _cleanup_free_ char *p = NULL;
5623 const char *pttype, *t;
5624 sd_id128_t pt_parsed, u;
5625 blkid_partition pp;
5626 dev_t whole_devno;
5627 blkid_partlist pl;
5628 int r;
5629
5630 /* Checks if the specified partition has the specified GPT type UUID, and is located on the specified
5631 * 'restrict_devno' device. The type check is particularly relevant if we have Verity volume which is
5632 * backed by two separate partitions: the data and the hash partitions, and we need to find the right
5633 * one of the two. */
5634
5635 r = block_get_whole_disk(partition_devno, &whole_devno);
5636 if (r < 0)
5637 return log_error_errno(
5638 r,
5639 "Unable to determine containing block device of partition %u:%u: %m",
5640 major(partition_devno), minor(partition_devno));
5641
5642 if (restrict_devno != (dev_t) -1 &&
5643 restrict_devno != whole_devno)
5644 return log_error_errno(
5645 SYNTHETIC_ERRNO(EPERM),
5646 "Partition %u:%u is located outside of block device %u:%u, refusing.",
5647 major(partition_devno), minor(partition_devno),
5648 major(restrict_devno), minor(restrict_devno));
5649
5650 fd = r = device_open_from_devnum(S_IFBLK, whole_devno, O_RDONLY|O_CLOEXEC|O_NONBLOCK, &p);
5651 if (r < 0)
5652 return log_error_errno(r, "Failed to open block device " DEVNUM_FORMAT_STR ": %m",
5653 DEVNUM_FORMAT_VAL(whole_devno));
5654
5655 b = blkid_new_probe();
5656 if (!b)
5657 return log_oom();
5658
5659 errno = 0;
5660 r = blkid_probe_set_device(b, fd, 0, 0);
5661 if (r != 0)
5662 return log_error_errno(errno_or_else(ENOMEM), "Failed to open block device '%s': %m", p);
5663
5664 (void) blkid_probe_enable_partitions(b, 1);
5665 (void) blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
5666
5667 errno = 0;
5668 r = blkid_do_safeprobe(b);
5669 if (r == _BLKID_SAFEPROBE_ERROR)
5670 return log_error_errno(errno_or_else(EIO), "Unable to probe for partition table of '%s': %m", p);
5671 if (IN_SET(r, _BLKID_SAFEPROBE_AMBIGUOUS, _BLKID_SAFEPROBE_NOT_FOUND)) {
5672 log_debug("Didn't find partition table on block device '%s'.", p);
5673 return false;
5674 }
5675
5676 assert(r == _BLKID_SAFEPROBE_FOUND);
5677
5678 (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
5679 if (!streq_ptr(pttype, "gpt")) {
5680 log_debug("Didn't find a GPT partition table on '%s'.", p);
5681 return false;
5682 }
5683
5684 errno = 0;
5685 pl = blkid_probe_get_partitions(b);
5686 if (!pl)
5687 return log_error_errno(errno_or_else(EIO), "Unable read partition table of '%s': %m", p);
5688
5689 pp = blkid_partlist_devno_to_partition(pl, partition_devno);
5690 if (!pp) {
5691 log_debug("Partition %u:%u has no matching partition table entry on '%s'.",
5692 major(partition_devno), minor(partition_devno), p);
5693 return false;
5694 }
5695
5696 t = blkid_partition_get_type_string(pp);
5697 if (isempty(t)) {
5698 log_debug("Partition %u:%u has no type on '%s'.",
5699 major(partition_devno), minor(partition_devno), p);
5700 return false;
5701 }
5702
5703 r = sd_id128_from_string(t, &pt_parsed);
5704 if (r < 0) {
5705 log_debug_errno(r, "Failed to parse partition type \"%s\": %m", t);
5706 return false;
5707 }
5708
5709 if (!sd_id128_equal(pt_parsed, partition_type.uuid)) {
5710 log_debug("Partition %u:%u has non-matching partition type " SD_ID128_FORMAT_STR " (needed: " SD_ID128_FORMAT_STR "), ignoring.",
5711 major(partition_devno), minor(partition_devno),
5712 SD_ID128_FORMAT_VAL(pt_parsed), SD_ID128_FORMAT_VAL(partition_type.uuid));
5713 return false;
5714 }
5715
5716 r = blkid_partition_get_uuid_id128(pp, &u);
5717 if (r == -ENXIO) {
5718 log_debug_errno(r, "Partition " DEVNUM_FORMAT_STR " has no UUID.", DEVNUM_FORMAT_VAL(partition_devno));
5719 return false;
5720 }
5721 if (r < 0) {
5722 log_debug_errno(r, "Failed to read partition UUID of " DEVNUM_FORMAT_STR ": %m", DEVNUM_FORMAT_VAL(partition_devno));
5723 return false;
5724 }
5725
5726 log_debug("Automatically found partition " DEVNUM_FORMAT_STR " of right type " SD_ID128_FORMAT_STR ".",
5727 DEVNUM_FORMAT_VAL(partition_devno),
5728 SD_ID128_FORMAT_VAL(pt_parsed));
5729
5730 if (ret_uuid)
5731 *ret_uuid = u;
5732
5733 return true;
5734 }
5735
5736 static int find_backing_devno(
5737 const char *path,
5738 const char *root,
5739 dev_t *ret) {
5740
5741 _cleanup_free_ char *resolved = NULL;
5742 int r;
5743
5744 assert(path);
5745
5746 r = chase(path, root, CHASE_PREFIX_ROOT, &resolved, NULL);
5747 if (r < 0)
5748 return r;
5749
5750 r = path_is_mount_point(resolved, NULL, 0);
5751 if (r < 0)
5752 return r;
5753 if (r == 0) /* Not a mount point, then it's not a partition of its own, let's not automatically use it. */
5754 return -ENOENT;
5755
5756 r = get_block_device(resolved, ret);
5757 if (r < 0)
5758 return r;
5759 if (r == 0) /* Not backed by physical file system, we can't use this */
5760 return -ENOENT;
5761
5762 return 0;
5763 }
5764
5765 static int resolve_copy_blocks_auto(
5766 GptPartitionType type,
5767 const char *root,
5768 dev_t restrict_devno,
5769 dev_t *ret_devno,
5770 sd_id128_t *ret_uuid) {
5771
5772 const char *try1 = NULL, *try2 = NULL;
5773 char p[SYS_BLOCK_PATH_MAX("/slaves")];
5774 _cleanup_closedir_ DIR *d = NULL;
5775 sd_id128_t found_uuid = SD_ID128_NULL;
5776 dev_t devno, found = 0;
5777 int r;
5778
5779 /* Enforce some security restrictions: CopyBlocks=auto should not be an avenue to get outside of the
5780 * --root=/--image= confinement. Specifically, refuse CopyBlocks= in combination with --root= at all,
5781 * and restrict block device references in the --image= case to loopback block device we set up.
5782 *
5783 * restrict_devno contain the dev_t of the loop back device we operate on in case of --image=, and
5784 * thus declares which device (and its partition subdevices) we shall limit access to. If
5785 * restrict_devno is zero no device probing access shall be allowed at all (used for --root=) and if
5786 * it is (dev_t) -1 then free access shall be allowed (if neither switch is used). */
5787
5788 if (restrict_devno == 0)
5789 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
5790 "Automatic discovery of backing block devices not permitted in --root= mode, refusing.");
5791
5792 /* Handles CopyBlocks=auto, and finds the right source partition to copy from. We look for matching
5793 * partitions in the host, using the appropriate directory as key and ensuring that the partition
5794 * type matches. */
5795
5796 if (type.designator == PARTITION_ROOT)
5797 try1 = "/";
5798 else if (type.designator == PARTITION_USR)
5799 try1 = "/usr/";
5800 else if (type.designator == PARTITION_ROOT_VERITY)
5801 try1 = "/";
5802 else if (type.designator == PARTITION_USR_VERITY)
5803 try1 = "/usr/";
5804 else if (type.designator == PARTITION_ESP) {
5805 try1 = "/efi/";
5806 try2 = "/boot/";
5807 } else if (type.designator == PARTITION_XBOOTLDR)
5808 try1 = "/boot/";
5809 else
5810 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
5811 "Partition type " SD_ID128_FORMAT_STR " not supported from automatic source block device discovery.",
5812 SD_ID128_FORMAT_VAL(type.uuid));
5813
5814 r = find_backing_devno(try1, root, &devno);
5815 if (r == -ENOENT && try2)
5816 r = find_backing_devno(try2, root, &devno);
5817 if (r < 0)
5818 return log_error_errno(r, "Failed to resolve automatic CopyBlocks= path for partition type " SD_ID128_FORMAT_STR ", sorry: %m",
5819 SD_ID128_FORMAT_VAL(type.uuid));
5820
5821 xsprintf_sys_block_path(p, "/slaves", devno);
5822 d = opendir(p);
5823 if (d) {
5824 struct dirent *de;
5825
5826 for (;;) {
5827 _cleanup_free_ char *q = NULL, *t = NULL;
5828 sd_id128_t u;
5829 dev_t sl;
5830
5831 errno = 0;
5832 de = readdir_no_dot(d);
5833 if (!de) {
5834 if (errno != 0)
5835 return log_error_errno(errno, "Failed to read directory '%s': %m", p);
5836
5837 break;
5838 }
5839
5840 if (!IN_SET(de->d_type, DT_LNK, DT_UNKNOWN))
5841 continue;
5842
5843 q = path_join(p, de->d_name, "/dev");
5844 if (!q)
5845 return log_oom();
5846
5847 r = read_one_line_file(q, &t);
5848 if (r < 0)
5849 return log_error_errno(r, "Failed to read %s: %m", q);
5850
5851 r = parse_devnum(t, &sl);
5852 if (r < 0) {
5853 log_debug_errno(r, "Failed to parse %s, ignoring: %m", q);
5854 continue;
5855 }
5856 if (major(sl) == 0) {
5857 log_debug("Device backing %s is special, ignoring.", q);
5858 continue;
5859 }
5860
5861 r = resolve_copy_blocks_auto_candidate(sl, type, restrict_devno, &u);
5862 if (r < 0)
5863 return r;
5864 if (r > 0) {
5865 /* We found a matching one! */
5866 if (found != 0)
5867 return log_error_errno(SYNTHETIC_ERRNO(ENOTUNIQ),
5868 "Multiple matching partitions found, refusing.");
5869
5870 found = sl;
5871 found_uuid = u;
5872 }
5873 }
5874 } else if (errno != ENOENT)
5875 return log_error_errno(errno, "Failed open %s: %m", p);
5876 else {
5877 r = resolve_copy_blocks_auto_candidate(devno, type, restrict_devno, &found_uuid);
5878 if (r < 0)
5879 return r;
5880 if (r > 0)
5881 found = devno;
5882 }
5883
5884 if (found == 0)
5885 return log_error_errno(SYNTHETIC_ERRNO(ENXIO),
5886 "Unable to automatically discover suitable partition to copy blocks from.");
5887
5888 if (ret_devno)
5889 *ret_devno = found;
5890
5891 if (ret_uuid)
5892 *ret_uuid = found_uuid;
5893
5894 return 0;
5895 }
5896
5897 static int context_open_copy_block_paths(
5898 Context *context,
5899 dev_t restrict_devno) {
5900
5901 int r;
5902
5903 assert(context);
5904
5905 LIST_FOREACH(partitions, p, context->partitions) {
5906 _cleanup_close_ int source_fd = -EBADF;
5907 _cleanup_free_ char *opened = NULL;
5908 sd_id128_t uuid = SD_ID128_NULL;
5909 uint64_t size;
5910 struct stat st;
5911
5912 if (p->copy_blocks_fd >= 0)
5913 continue;
5914
5915 assert(p->copy_blocks_size == UINT64_MAX);
5916
5917 if (PARTITION_EXISTS(p)) /* Never copy over partitions that already exist! */
5918 continue;
5919
5920 if (p->copy_blocks_path) {
5921
5922 source_fd = chase_and_open(p->copy_blocks_path, p->copy_blocks_root, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_NONBLOCK, &opened);
5923 if (source_fd < 0)
5924 return log_error_errno(source_fd, "Failed to open '%s': %m", p->copy_blocks_path);
5925
5926 if (fstat(source_fd, &st) < 0)
5927 return log_error_errno(errno, "Failed to stat block copy file '%s': %m", opened);
5928
5929 if (!S_ISREG(st.st_mode) && restrict_devno != (dev_t) -1)
5930 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
5931 "Copying from block device node is not permitted in --image=/--root= mode, refusing.");
5932
5933 } else if (p->copy_blocks_auto) {
5934 dev_t devno = 0; /* Fake initialization to appease gcc. */
5935
5936 r = resolve_copy_blocks_auto(p->type, p->copy_blocks_root, restrict_devno, &devno, &uuid);
5937 if (r < 0)
5938 return r;
5939 assert(devno != 0);
5940
5941 source_fd = r = device_open_from_devnum(S_IFBLK, devno, O_RDONLY|O_CLOEXEC|O_NONBLOCK, &opened);
5942 if (r < 0)
5943 return log_error_errno(r, "Failed to open automatically determined source block copy device " DEVNUM_FORMAT_STR ": %m",
5944 DEVNUM_FORMAT_VAL(devno));
5945
5946 if (fstat(source_fd, &st) < 0)
5947 return log_error_errno(errno, "Failed to stat block copy file '%s': %m", opened);
5948 } else
5949 continue;
5950
5951 if (S_ISDIR(st.st_mode)) {
5952 _cleanup_free_ char *bdev = NULL;
5953 dev_t devt;
5954
5955 /* If the file is a directory, automatically find the backing block device */
5956
5957 if (major(st.st_dev) != 0)
5958 devt = st.st_dev;
5959 else {
5960 /* Special support for btrfs */
5961 r = btrfs_get_block_device_fd(source_fd, &devt);
5962 if (r == -EUCLEAN)
5963 return btrfs_log_dev_root(LOG_ERR, r, opened);
5964 if (r < 0)
5965 return log_error_errno(r, "Unable to determine backing block device of '%s': %m", opened);
5966 }
5967
5968 safe_close(source_fd);
5969
5970 source_fd = r = device_open_from_devnum(S_IFBLK, devt, O_RDONLY|O_CLOEXEC|O_NONBLOCK, &bdev);
5971 if (r < 0)
5972 return log_error_errno(r, "Failed to open block device backing '%s': %m", opened);
5973
5974 if (fstat(source_fd, &st) < 0)
5975 return log_error_errno(errno, "Failed to stat block device '%s': %m", bdev);
5976 }
5977
5978 if (S_ISREG(st.st_mode))
5979 size = st.st_size;
5980 else if (S_ISBLK(st.st_mode)) {
5981 if (ioctl(source_fd, BLKGETSIZE64, &size) != 0)
5982 return log_error_errno(errno, "Failed to determine size of block device to copy from: %m");
5983 } else
5984 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Specified path to copy blocks from '%s' is not a regular file, block device or directory, refusing: %m", opened);
5985
5986 if (size <= 0)
5987 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "File to copy bytes from '%s' has zero size, refusing.", opened);
5988 if (size % 512 != 0)
5989 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "File to copy bytes from '%s' has size that is not multiple of 512, refusing.", opened);
5990
5991 p->copy_blocks_fd = TAKE_FD(source_fd);
5992 p->copy_blocks_size = size;
5993
5994 free_and_replace(p->copy_blocks_path, opened);
5995
5996 /* When copying from an existing partition copy that partitions UUID if none is configured explicitly */
5997 if (!p->new_uuid_is_set && !sd_id128_is_null(uuid)) {
5998 p->new_uuid = uuid;
5999 p->new_uuid_is_set = true;
6000 }
6001 }
6002
6003 return 0;
6004 }
6005
6006 static int fd_apparent_size(int fd, uint64_t *ret) {
6007 off_t initial = 0;
6008 uint64_t size = 0;
6009
6010 assert(fd >= 0);
6011 assert(ret);
6012
6013 initial = lseek(fd, 0, SEEK_CUR);
6014 if (initial < 0)
6015 return log_error_errno(errno, "Failed to get file offset: %m");
6016
6017 for (off_t off = 0;;) {
6018 off_t r;
6019
6020 r = lseek(fd, off, SEEK_DATA);
6021 if (r < 0 && errno == ENXIO)
6022 /* If errno == ENXIO, that means we've reached the final hole of the file and
6023 * that hole isn't followed by more data. */
6024 break;
6025 if (r < 0)
6026 return log_error_errno(errno, "Failed to seek data in file from offset %"PRIi64": %m", off);
6027
6028 off = r; /* Set the offset to the start of the data segment. */
6029
6030 /* After copying a potential hole, find the end of the data segment by looking for
6031 * the next hole. If we get ENXIO, we're at EOF. */
6032 r = lseek(fd, off, SEEK_HOLE);
6033 if (r < 0) {
6034 if (errno == ENXIO)
6035 break;
6036 return log_error_errno(errno, "Failed to seek hole in file from offset %"PRIi64": %m", off);
6037 }
6038
6039 size += r - off;
6040 off = r;
6041 }
6042
6043 if (lseek(fd, initial, SEEK_SET) < 0)
6044 return log_error_errno(errno, "Failed to reset file offset: %m");
6045
6046 *ret = size;
6047
6048 return 0;
6049 }
6050
6051 static int context_minimize(Context *context) {
6052 const char *vt = NULL;
6053 int r;
6054
6055 assert(context);
6056
6057 LIST_FOREACH(partitions, p, context->partitions) {
6058 _cleanup_(rm_rf_physical_and_freep) char *root = NULL;
6059 _cleanup_(unlink_and_freep) char *temp = NULL;
6060 _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
6061 _cleanup_strv_free_ char **extra_mkfs_options = NULL;
6062 _cleanup_close_ int fd = -EBADF;
6063 _cleanup_free_ char *hint = NULL;
6064 sd_id128_t fs_uuid;
6065 struct stat st;
6066 uint64_t fsz;
6067
6068 if (p->dropped)
6069 continue;
6070
6071 if (PARTITION_EXISTS(p)) /* Never format existing partitions */
6072 continue;
6073
6074 if (!p->format)
6075 continue;
6076
6077 if (p->copy_blocks_fd >= 0)
6078 continue;
6079
6080 if (p->minimize == MINIMIZE_OFF)
6081 continue;
6082
6083 if (!partition_needs_populate(p))
6084 continue;
6085
6086 assert(!p->copy_blocks_path);
6087
6088 (void) partition_hint(p, context->node, &hint);
6089
6090 log_info("Pre-populating %s filesystem of partition %s twice to calculate minimal partition size",
6091 p->format, strna(hint));
6092
6093 if (!vt) {
6094 r = var_tmp_dir(&vt);
6095 if (r < 0)
6096 return log_error_errno(r, "Could not determine temporary directory: %m");
6097 }
6098
6099 r = tempfn_random_child(vt, "repart", &temp);
6100 if (r < 0)
6101 return log_error_errno(r, "Failed to generate temporary file path: %m");
6102
6103 if (fstype_is_ro(p->format))
6104 fs_uuid = p->fs_uuid;
6105 else {
6106 fd = open(temp, O_CREAT|O_EXCL|O_CLOEXEC|O_RDWR|O_NOCTTY, 0600);
6107 if (fd < 0)
6108 return log_error_errno(errno, "Failed to open temporary file %s: %m", temp);
6109
6110 /* This may seem huge but it will be created sparse so it doesn't take up any space
6111 * on disk until written to. */
6112 if (ftruncate(fd, 1024ULL * 1024ULL * 1024ULL * 1024ULL) < 0)
6113 return log_error_errno(errno, "Failed to truncate temporary file to %s: %m",
6114 FORMAT_BYTES(1024ULL * 1024ULL * 1024ULL * 1024ULL));
6115
6116 if (arg_offline <= 0) {
6117 r = loop_device_make(fd, O_RDWR, 0, UINT64_MAX, context->sector_size, 0, LOCK_EX, &d);
6118 if (r < 0 && (arg_offline == 0 || (r != -ENOENT && !ERRNO_IS_PRIVILEGE(r)) || !strv_isempty(p->subvolumes)))
6119 return log_error_errno(r, "Failed to make loopback device of %s: %m", temp);
6120 }
6121
6122 /* We're going to populate this filesystem twice so use a random UUID the first time
6123 * to avoid UUID conflicts. */
6124 r = sd_id128_randomize(&fs_uuid);
6125 if (r < 0)
6126 return r;
6127 }
6128
6129 if (!d || fstype_is_ro(p->format)) {
6130 if (!mkfs_supports_root_option(p->format))
6131 return log_error_errno(SYNTHETIC_ERRNO(ENODEV),
6132 "Loop device access is required to populate %s filesystems",
6133 p->format);
6134
6135 r = partition_populate_directory(context, p, &root);
6136 if (r < 0)
6137 return r;
6138 }
6139
6140 r = mkfs_options_from_env("REPART", p->format, &extra_mkfs_options);
6141 if (r < 0)
6142 return log_error_errno(r,
6143 "Failed to determine mkfs command line options for '%s': %m",
6144 p->format);
6145
6146 r = make_filesystem(d ? d->node : temp,
6147 p->format,
6148 strempty(p->new_label),
6149 root,
6150 fs_uuid,
6151 arg_discard, /* quiet = */ false,
6152 context->fs_sector_size,
6153 extra_mkfs_options);
6154 if (r < 0)
6155 return r;
6156
6157 /* Read-only filesystems are minimal from the first try because they create and size the
6158 * loopback file for us. */
6159 if (fstype_is_ro(p->format)) {
6160 assert(fd < 0);
6161
6162 fd = open(temp, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
6163 if (fd < 0)
6164 return log_error_errno(errno, "Failed to open temporary file %s: %m", temp);
6165
6166 if (fstat(fd, &st) < 0)
6167 return log_error_errno(errno, "Failed to stat temporary file: %m");
6168
6169 log_info("Minimal partition size of %s filesystem of partition %s is %s",
6170 p->format, strna(hint), FORMAT_BYTES(st.st_size));
6171
6172 p->copy_blocks_path = TAKE_PTR(temp);
6173 p->copy_blocks_path_is_our_file = true;
6174 p->copy_blocks_fd = TAKE_FD(fd);
6175 p->copy_blocks_size = st.st_size;
6176 continue;
6177 }
6178
6179 if (!root) {
6180 assert(d);
6181
6182 r = partition_populate_filesystem(context, p, d->node);
6183 if (r < 0)
6184 return r;
6185 }
6186
6187 /* Other filesystems need to be provided with a pre-sized loopback file and will adapt to
6188 * fully occupy it. Because we gave the filesystem a 1T sparse file, we need to shrink the
6189 * filesystem down to a reasonable size again to fit it in the disk image. While there are
6190 * some filesystems that support shrinking, it doesn't always work properly (e.g. shrinking
6191 * btrfs gives us a 2.0G filesystem regardless of what we put in it). Instead, let's populate
6192 * the filesystem again, but this time, instead of providing the filesystem with a 1T sparse
6193 * loopback file, let's size the loopback file based on the actual data used by the
6194 * filesystem in the sparse file after the first attempt. This should be a good guess of the
6195 * minimal amount of space needed in the filesystem to fit all the required data.
6196 */
6197 r = fd_apparent_size(fd, &fsz);
6198 if (r < 0)
6199 return r;
6200
6201 /* Massage the size a bit because just going by actual data used in the sparse file isn't
6202 * fool-proof. */
6203 uint64_t heuristic = streq(p->format, "xfs") ? fsz : fsz / 2;
6204 fsz = round_up_size(fsz + heuristic, context->grain_size);
6205 if (minimal_size_by_fs_name(p->format) != UINT64_MAX)
6206 fsz = MAX(minimal_size_by_fs_name(p->format), fsz);
6207
6208 log_info("Minimal partition size of %s filesystem of partition %s is %s",
6209 p->format, strna(hint), FORMAT_BYTES(fsz));
6210
6211 d = loop_device_unref(d);
6212
6213 /* Erase the previous filesystem first. */
6214 if (ftruncate(fd, 0))
6215 return log_error_errno(errno, "Failed to erase temporary file: %m");
6216
6217 if (ftruncate(fd, fsz))
6218 return log_error_errno(errno, "Failed to truncate temporary file to %s: %m", FORMAT_BYTES(fsz));
6219
6220 if (arg_offline <= 0) {
6221 r = loop_device_make(fd, O_RDWR, 0, UINT64_MAX, context->sector_size, 0, LOCK_EX, &d);
6222 if (r < 0 && (arg_offline == 0 || (r != -ENOENT && !ERRNO_IS_PRIVILEGE(r)) || !strv_isempty(p->subvolumes)))
6223 return log_error_errno(r, "Failed to make loopback device of %s: %m", temp);
6224 }
6225
6226 r = make_filesystem(d ? d->node : temp,
6227 p->format,
6228 strempty(p->new_label),
6229 root,
6230 p->fs_uuid,
6231 arg_discard,
6232 /* quiet = */ false,
6233 context->fs_sector_size,
6234 extra_mkfs_options);
6235 if (r < 0)
6236 return r;
6237
6238 if (!root) {
6239 assert(d);
6240
6241 r = partition_populate_filesystem(context, p, d->node);
6242 if (r < 0)
6243 return r;
6244 }
6245
6246 if (fstat(fd, &st) < 0)
6247 return log_error_errno(errno, "Failed to stat temporary file: %m");
6248
6249 p->copy_blocks_path = TAKE_PTR(temp);
6250 p->copy_blocks_path_is_our_file = true;
6251 p->copy_blocks_fd = TAKE_FD(fd);
6252 p->copy_blocks_size = st.st_size;
6253 }
6254
6255 /* Now that we've done the data partitions, do the verity hash partitions. We do these in a separate
6256 * step because they might depend on data generated in the previous step. */
6257
6258 LIST_FOREACH(partitions, p, context->partitions) {
6259 _cleanup_(unlink_and_freep) char *temp = NULL;
6260 _cleanup_free_ char *hint = NULL;
6261 _cleanup_close_ int fd = -EBADF;
6262 struct stat st;
6263 Partition *dp;
6264
6265 if (p->dropped)
6266 continue;
6267
6268 if (PARTITION_EXISTS(p)) /* Never format existing partitions */
6269 continue;
6270
6271 if (p->minimize == MINIMIZE_OFF)
6272 continue;
6273
6274 if (p->verity != VERITY_HASH)
6275 continue;
6276
6277 assert_se(dp = p->siblings[VERITY_DATA]);
6278 assert(!dp->dropped);
6279 assert(dp->copy_blocks_path);
6280
6281 (void) partition_hint(p, context->node, &hint);
6282
6283 log_info("Pre-populating verity hash data of partition %s to calculate minimal partition size",
6284 strna(hint));
6285
6286 if (!vt) {
6287 r = var_tmp_dir(&vt);
6288 if (r < 0)
6289 return log_error_errno(r, "Could not determine temporary directory: %m");
6290 }
6291
6292 r = tempfn_random_child(vt, "repart", &temp);
6293 if (r < 0)
6294 return log_error_errno(r, "Failed to generate temporary file path: %m");
6295
6296 r = touch(temp);
6297 if (r < 0)
6298 return log_error_errno(r, "Failed to create temporary file: %m");
6299
6300 r = partition_format_verity_hash(context, p, temp, dp->copy_blocks_path);
6301 if (r < 0)
6302 return r;
6303
6304 fd = open(temp, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
6305 if (fd < 0)
6306 return log_error_errno(errno, "Failed to open temporary file %s: %m", temp);
6307
6308 if (fstat(fd, &st) < 0)
6309 return log_error_errno(r, "Failed to stat temporary file: %m");
6310
6311 log_info("Minimal partition size of verity hash partition %s is %s",
6312 strna(hint), FORMAT_BYTES(st.st_size));
6313
6314 p->copy_blocks_path = TAKE_PTR(temp);
6315 p->copy_blocks_path_is_our_file = true;
6316 p->copy_blocks_fd = TAKE_FD(fd);
6317 p->copy_blocks_size = st.st_size;
6318 }
6319
6320 return 0;
6321 }
6322
6323 static int parse_partition_types(const char *p, GptPartitionType **partitions, size_t *n_partitions) {
6324 int r;
6325
6326 assert(partitions);
6327 assert(n_partitions);
6328
6329 for (;;) {
6330 _cleanup_free_ char *name = NULL;
6331 GptPartitionType type;
6332
6333 r = extract_first_word(&p, &name, ",", EXTRACT_CUNESCAPE|EXTRACT_DONT_COALESCE_SEPARATORS);
6334 if (r == 0)
6335 break;
6336 if (r < 0)
6337 return log_error_errno(r, "Failed to extract partition type identifier or GUID: %s", p);
6338
6339 r = gpt_partition_type_from_string(name, &type);
6340 if (r < 0)
6341 return log_error_errno(r, "'%s' is not a valid partition type identifier or GUID", name);
6342
6343 if (!GREEDY_REALLOC(*partitions, *n_partitions + 1))
6344 return log_oom();
6345
6346 (*partitions)[(*n_partitions)++] = type;
6347 }
6348
6349 return 0;
6350 }
6351
6352 static int help(void) {
6353 _cleanup_free_ char *link = NULL;
6354 int r;
6355
6356 r = terminal_urlify_man("systemd-repart", "8", &link);
6357 if (r < 0)
6358 return log_oom();
6359
6360 printf("%s [OPTIONS...] [DEVICE]\n"
6361 "\n%sGrow and add partitions to partition table.%s\n\n"
6362 " -h --help Show this help\n"
6363 " --version Show package version\n"
6364 " --no-pager Do not pipe output into a pager\n"
6365 " --no-legend Do not show the headers and footers\n"
6366 " --dry-run=BOOL Whether to run dry-run operation\n"
6367 " --empty=MODE One of refuse, allow, require, force, create; controls\n"
6368 " how to handle empty disks lacking partition tables\n"
6369 " --discard=BOOL Whether to discard backing blocks for new partitions\n"
6370 " --pretty=BOOL Whether to show pretty summary before doing changes\n"
6371 " --factory-reset=BOOL Whether to remove data partitions before recreating\n"
6372 " them\n"
6373 " --can-factory-reset Test whether factory reset is defined\n"
6374 " --root=PATH Operate relative to root path\n"
6375 " --image=PATH Operate relative to image file\n"
6376 " --image-policy=POLICY\n"
6377 " Specify disk image dissection policy\n"
6378 " --definitions=DIR Find partition definitions in specified directory\n"
6379 " --key-file=PATH Key to use when encrypting partitions\n"
6380 " --private-key=PATH Private key to use when generating verity roothash\n"
6381 " signatures\n"
6382 " --certificate=PATH PEM certificate to use when generating verity\n"
6383 " roothash signatures\n"
6384 " --tpm2-device=PATH Path to TPM2 device node to use\n"
6385 " --tpm2-pcrs=PCR1+PCR2+PCR3+…\n"
6386 " TPM2 PCR indexes to use for TPM2 enrollment\n"
6387 " --tpm2-public-key=PATH\n"
6388 " Enroll signed TPM2 PCR policy against PEM public key\n"
6389 " --tpm2-public-key-pcrs=PCR1+PCR2+PCR3+…\n"
6390 " Enroll signed TPM2 PCR policy for specified TPM2 PCRs\n"
6391 " --seed=UUID 128-bit seed UUID to derive all UUIDs from\n"
6392 " --size=BYTES Grow loopback file to specified size\n"
6393 " --json=pretty|short|off\n"
6394 " Generate JSON output\n"
6395 " --split=BOOL Whether to generate split artifacts\n"
6396 " --include-partitions=PARTITION1,PARTITION2,PARTITION3,…\n"
6397 " Ignore partitions not of the specified types\n"
6398 " --exclude-partitions=PARTITION1,PARTITION2,PARTITION3,…\n"
6399 " Ignore partitions of the specified types\n"
6400 " --defer-partitions=PARTITION1,PARTITION2,PARTITION3,…\n"
6401 " Take partitions of the specified types into account\n"
6402 " but don't populate them yet\n"
6403 " --sector-size=SIZE Set the logical sector size for the image\n"
6404 " --architecture=ARCH Set the generic architecture for the image\n"
6405 " --offline=BOOL Whether to build the image offline\n"
6406 " -s --copy-source=PATH Specify the primary source tree to copy files from\n"
6407 " --copy-from=IMAGE Copy partitions from the given image(s)\n"
6408 " -S --make-ddi=sysext Make a system extension DDI\n"
6409 " -C --make-ddi=confext Make a configuration extension DDI\n"
6410 " -P --make-ddi=portable Make a portable service DDI\n"
6411 "\nSee the %s for details.\n",
6412 program_invocation_short_name,
6413 ansi_highlight(),
6414 ansi_normal(),
6415 link);
6416
6417 return 0;
6418 }
6419
6420 static int parse_argv(int argc, char *argv[]) {
6421
6422 enum {
6423 ARG_VERSION = 0x100,
6424 ARG_NO_PAGER,
6425 ARG_NO_LEGEND,
6426 ARG_DRY_RUN,
6427 ARG_EMPTY,
6428 ARG_DISCARD,
6429 ARG_FACTORY_RESET,
6430 ARG_CAN_FACTORY_RESET,
6431 ARG_ROOT,
6432 ARG_IMAGE,
6433 ARG_IMAGE_POLICY,
6434 ARG_SEED,
6435 ARG_PRETTY,
6436 ARG_DEFINITIONS,
6437 ARG_SIZE,
6438 ARG_JSON,
6439 ARG_KEY_FILE,
6440 ARG_PRIVATE_KEY,
6441 ARG_CERTIFICATE,
6442 ARG_TPM2_DEVICE,
6443 ARG_TPM2_PCRS,
6444 ARG_TPM2_PUBLIC_KEY,
6445 ARG_TPM2_PUBLIC_KEY_PCRS,
6446 ARG_SPLIT,
6447 ARG_INCLUDE_PARTITIONS,
6448 ARG_EXCLUDE_PARTITIONS,
6449 ARG_DEFER_PARTITIONS,
6450 ARG_SECTOR_SIZE,
6451 ARG_SKIP_PARTITIONS,
6452 ARG_ARCHITECTURE,
6453 ARG_OFFLINE,
6454 ARG_COPY_FROM,
6455 ARG_MAKE_DDI,
6456 };
6457
6458 static const struct option options[] = {
6459 { "help", no_argument, NULL, 'h' },
6460 { "version", no_argument, NULL, ARG_VERSION },
6461 { "no-pager", no_argument, NULL, ARG_NO_PAGER },
6462 { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
6463 { "dry-run", required_argument, NULL, ARG_DRY_RUN },
6464 { "empty", required_argument, NULL, ARG_EMPTY },
6465 { "discard", required_argument, NULL, ARG_DISCARD },
6466 { "factory-reset", required_argument, NULL, ARG_FACTORY_RESET },
6467 { "can-factory-reset", no_argument, NULL, ARG_CAN_FACTORY_RESET },
6468 { "root", required_argument, NULL, ARG_ROOT },
6469 { "image", required_argument, NULL, ARG_IMAGE },
6470 { "image-policy", required_argument, NULL, ARG_IMAGE_POLICY },
6471 { "seed", required_argument, NULL, ARG_SEED },
6472 { "pretty", required_argument, NULL, ARG_PRETTY },
6473 { "definitions", required_argument, NULL, ARG_DEFINITIONS },
6474 { "size", required_argument, NULL, ARG_SIZE },
6475 { "json", required_argument, NULL, ARG_JSON },
6476 { "key-file", required_argument, NULL, ARG_KEY_FILE },
6477 { "private-key", required_argument, NULL, ARG_PRIVATE_KEY },
6478 { "certificate", required_argument, NULL, ARG_CERTIFICATE },
6479 { "tpm2-device", required_argument, NULL, ARG_TPM2_DEVICE },
6480 { "tpm2-pcrs", required_argument, NULL, ARG_TPM2_PCRS },
6481 { "tpm2-public-key", required_argument, NULL, ARG_TPM2_PUBLIC_KEY },
6482 { "tpm2-public-key-pcrs", required_argument, NULL, ARG_TPM2_PUBLIC_KEY_PCRS },
6483 { "split", required_argument, NULL, ARG_SPLIT },
6484 { "include-partitions", required_argument, NULL, ARG_INCLUDE_PARTITIONS },
6485 { "exclude-partitions", required_argument, NULL, ARG_EXCLUDE_PARTITIONS },
6486 { "defer-partitions", required_argument, NULL, ARG_DEFER_PARTITIONS },
6487 { "sector-size", required_argument, NULL, ARG_SECTOR_SIZE },
6488 { "architecture", required_argument, NULL, ARG_ARCHITECTURE },
6489 { "offline", required_argument, NULL, ARG_OFFLINE },
6490 { "copy-from", required_argument, NULL, ARG_COPY_FROM },
6491 { "copy-source", required_argument, NULL, 's' },
6492 { "make-ddi", required_argument, NULL, ARG_MAKE_DDI },
6493 {}
6494 };
6495
6496 int c, r;
6497
6498 assert(argc >= 0);
6499 assert(argv);
6500
6501 while ((c = getopt_long(argc, argv, "hs:SCP", options, NULL)) >= 0)
6502
6503 switch (c) {
6504
6505 case 'h':
6506 return help();
6507
6508 case ARG_VERSION:
6509 return version();
6510
6511 case ARG_NO_PAGER:
6512 arg_pager_flags |= PAGER_DISABLE;
6513 break;
6514
6515 case ARG_NO_LEGEND:
6516 arg_legend = false;
6517 break;
6518
6519 case ARG_DRY_RUN:
6520 r = parse_boolean_argument("--dry-run=", optarg, &arg_dry_run);
6521 if (r < 0)
6522 return r;
6523 break;
6524
6525 case ARG_EMPTY:
6526 if (isempty(optarg)) {
6527 arg_empty = EMPTY_UNSET;
6528 break;
6529 }
6530
6531 arg_empty = empty_mode_from_string(optarg);
6532 if (arg_empty < 0)
6533 return log_error_errno(arg_empty, "Failed to parse --empty= parameter: %s", optarg);
6534
6535 break;
6536
6537 case ARG_DISCARD:
6538 r = parse_boolean_argument("--discard=", optarg, &arg_discard);
6539 if (r < 0)
6540 return r;
6541 break;
6542
6543 case ARG_FACTORY_RESET:
6544 r = parse_boolean_argument("--factory-reset=", optarg, NULL);
6545 if (r < 0)
6546 return r;
6547 arg_factory_reset = r;
6548 break;
6549
6550 case ARG_CAN_FACTORY_RESET:
6551 arg_can_factory_reset = true;
6552 break;
6553
6554 case ARG_ROOT:
6555 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_root);
6556 if (r < 0)
6557 return r;
6558 break;
6559
6560 case ARG_IMAGE:
6561 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_image);
6562 if (r < 0)
6563 return r;
6564 break;
6565
6566 case ARG_IMAGE_POLICY:
6567 r = parse_image_policy_argument(optarg, &arg_image_policy);
6568 if (r < 0)
6569 return r;
6570 break;
6571
6572 case ARG_SEED:
6573 if (isempty(optarg)) {
6574 arg_seed = SD_ID128_NULL;
6575 arg_randomize = false;
6576 } else if (streq(optarg, "random"))
6577 arg_randomize = true;
6578 else {
6579 r = sd_id128_from_string(optarg, &arg_seed);
6580 if (r < 0)
6581 return log_error_errno(r, "Failed to parse seed: %s", optarg);
6582
6583 arg_randomize = false;
6584 }
6585
6586 break;
6587
6588 case ARG_PRETTY:
6589 r = parse_boolean_argument("--pretty=", optarg, NULL);
6590 if (r < 0)
6591 return r;
6592 arg_pretty = r;
6593 break;
6594
6595 case ARG_DEFINITIONS: {
6596 _cleanup_free_ char *path = NULL;
6597 r = parse_path_argument(optarg, false, &path);
6598 if (r < 0)
6599 return r;
6600 if (strv_consume(&arg_definitions, TAKE_PTR(path)) < 0)
6601 return log_oom();
6602 break;
6603 }
6604
6605 case ARG_SIZE: {
6606 uint64_t parsed, rounded;
6607
6608 if (streq(optarg, "auto")) {
6609 arg_size = UINT64_MAX;
6610 arg_size_auto = true;
6611 break;
6612 }
6613
6614 r = parse_size(optarg, 1024, &parsed);
6615 if (r < 0)
6616 return log_error_errno(r, "Failed to parse --size= parameter: %s", optarg);
6617
6618 rounded = round_up_size(parsed, 4096);
6619 if (rounded == 0)
6620 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Specified image size too small, refusing.");
6621 if (rounded == UINT64_MAX)
6622 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Specified image size too large, refusing.");
6623
6624 if (rounded != parsed)
6625 log_warning("Specified size is not a multiple of 4096, rounding up automatically. (%" PRIu64 " %s %" PRIu64 ")",
6626 parsed, special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), rounded);
6627
6628 arg_size = rounded;
6629 arg_size_auto = false;
6630 break;
6631 }
6632
6633 case ARG_JSON:
6634 r = parse_json_argument(optarg, &arg_json_format_flags);
6635 if (r <= 0)
6636 return r;
6637
6638 break;
6639
6640 case ARG_KEY_FILE: {
6641 _cleanup_(erase_and_freep) char *k = NULL;
6642 size_t n = 0;
6643
6644 r = read_full_file_full(
6645 AT_FDCWD, optarg, UINT64_MAX, SIZE_MAX,
6646 READ_FULL_FILE_SECURE|READ_FULL_FILE_WARN_WORLD_READABLE|READ_FULL_FILE_CONNECT_SOCKET,
6647 NULL,
6648 &k, &n);
6649 if (r < 0)
6650 return log_error_errno(r, "Failed to read key file '%s': %m", optarg);
6651
6652 erase_and_free(arg_key);
6653 arg_key = TAKE_PTR(k);
6654 arg_key_size = n;
6655 break;
6656 }
6657
6658 case ARG_PRIVATE_KEY: {
6659 _cleanup_(erase_and_freep) char *k = NULL;
6660 size_t n = 0;
6661
6662 r = read_full_file_full(
6663 AT_FDCWD, optarg, UINT64_MAX, SIZE_MAX,
6664 READ_FULL_FILE_SECURE|READ_FULL_FILE_WARN_WORLD_READABLE|READ_FULL_FILE_CONNECT_SOCKET,
6665 NULL,
6666 &k, &n);
6667 if (r < 0)
6668 return log_error_errno(r, "Failed to read key file '%s': %m", optarg);
6669
6670 EVP_PKEY_free(arg_private_key);
6671 arg_private_key = NULL;
6672 r = parse_private_key(k, n, &arg_private_key);
6673 if (r < 0)
6674 return r;
6675 break;
6676 }
6677
6678 case ARG_CERTIFICATE: {
6679 _cleanup_free_ char *cert = NULL;
6680 size_t n = 0;
6681
6682 r = read_full_file_full(
6683 AT_FDCWD, optarg, UINT64_MAX, SIZE_MAX,
6684 READ_FULL_FILE_CONNECT_SOCKET,
6685 NULL,
6686 &cert, &n);
6687 if (r < 0)
6688 return log_error_errno(r, "Failed to read certificate file '%s': %m", optarg);
6689
6690 X509_free(arg_certificate);
6691 arg_certificate = NULL;
6692 r = parse_x509_certificate(cert, n, &arg_certificate);
6693 if (r < 0)
6694 return r;
6695 break;
6696 }
6697
6698 case ARG_TPM2_DEVICE: {
6699 _cleanup_free_ char *device = NULL;
6700
6701 if (streq(optarg, "list"))
6702 return tpm2_list_devices();
6703
6704 if (!streq(optarg, "auto")) {
6705 device = strdup(optarg);
6706 if (!device)
6707 return log_oom();
6708 }
6709
6710 free(arg_tpm2_device);
6711 arg_tpm2_device = TAKE_PTR(device);
6712 break;
6713 }
6714
6715 case ARG_TPM2_PCRS:
6716 arg_tpm2_hash_pcr_values_use_default = false;
6717 r = tpm2_parse_pcr_argument_append(optarg, &arg_tpm2_hash_pcr_values, &arg_tpm2_n_hash_pcr_values);
6718 if (r < 0)
6719 return r;
6720
6721 break;
6722
6723 case ARG_TPM2_PUBLIC_KEY:
6724 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_tpm2_public_key);
6725 if (r < 0)
6726 return r;
6727
6728 break;
6729
6730 case ARG_TPM2_PUBLIC_KEY_PCRS:
6731 arg_tpm2_public_key_pcr_mask_use_default = false;
6732 r = tpm2_parse_pcr_argument_to_mask(optarg, &arg_tpm2_public_key_pcr_mask);
6733 if (r < 0)
6734 return r;
6735
6736 break;
6737
6738 case ARG_SPLIT:
6739 r = parse_boolean_argument("--split=", optarg, NULL);
6740 if (r < 0)
6741 return r;
6742
6743 arg_split = r;
6744 break;
6745
6746 case ARG_INCLUDE_PARTITIONS:
6747 if (arg_filter_partitions_type == FILTER_PARTITIONS_EXCLUDE)
6748 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6749 "Combination of --include-partitions= and --exclude-partitions= is invalid.");
6750
6751 r = parse_partition_types(optarg, &arg_filter_partitions, &arg_n_filter_partitions);
6752 if (r < 0)
6753 return r;
6754
6755 arg_filter_partitions_type = FILTER_PARTITIONS_INCLUDE;
6756
6757 break;
6758
6759 case ARG_EXCLUDE_PARTITIONS:
6760 if (arg_filter_partitions_type == FILTER_PARTITIONS_INCLUDE)
6761 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6762 "Combination of --include-partitions= and --exclude-partitions= is invalid.");
6763
6764 r = parse_partition_types(optarg, &arg_filter_partitions, &arg_n_filter_partitions);
6765 if (r < 0)
6766 return r;
6767
6768 arg_filter_partitions_type = FILTER_PARTITIONS_EXCLUDE;
6769
6770 break;
6771
6772 case ARG_DEFER_PARTITIONS:
6773 r = parse_partition_types(optarg, &arg_defer_partitions, &arg_n_defer_partitions);
6774 if (r < 0)
6775 return r;
6776
6777 break;
6778
6779 case ARG_SECTOR_SIZE:
6780 r = parse_sector_size(optarg, &arg_sector_size);
6781 if (r < 0)
6782 return r;
6783
6784 break;
6785
6786 case ARG_ARCHITECTURE:
6787 r = architecture_from_string(optarg);
6788 if (r < 0)
6789 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid architecture '%s'", optarg);
6790
6791 arg_architecture = r;
6792 break;
6793
6794 case ARG_OFFLINE:
6795 if (streq(optarg, "auto"))
6796 arg_offline = -1;
6797 else {
6798 r = parse_boolean_argument("--offline=", optarg, NULL);
6799 if (r < 0)
6800 return r;
6801
6802 arg_offline = r;
6803 }
6804
6805 break;
6806
6807 case ARG_COPY_FROM: {
6808 _cleanup_free_ char *p = NULL;
6809
6810 r = parse_path_argument(optarg, /* suppress_root= */ false, &p);
6811 if (r < 0)
6812 return r;
6813
6814 if (strv_consume(&arg_copy_from, TAKE_PTR(p)) < 0)
6815 return log_oom();
6816
6817 break;
6818 }
6819
6820 case 's':
6821 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_copy_source);
6822 if (r < 0)
6823 return r;
6824 break;
6825
6826 case ARG_MAKE_DDI:
6827 if (!filename_is_valid(optarg))
6828 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid DDI type: %s", optarg);
6829
6830 r = free_and_strdup_warn(&arg_make_ddi, optarg);
6831 if (r < 0)
6832 return r;
6833 break;
6834
6835 case 'S':
6836 r = free_and_strdup_warn(&arg_make_ddi, "sysext");
6837 if (r < 0)
6838 return r;
6839 break;
6840
6841 case 'C':
6842 r = free_and_strdup_warn(&arg_make_ddi, "confext");
6843 if (r < 0)
6844 return r;
6845 break;
6846
6847 case 'P':
6848 r = free_and_strdup_warn(&arg_make_ddi, "portable");
6849 if (r < 0)
6850 return r;
6851 break;
6852
6853 case '?':
6854 return -EINVAL;
6855
6856 default:
6857 assert_not_reached();
6858 }
6859
6860 if (argc - optind > 1)
6861 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6862 "Expected at most one argument, the path to the block device or image file.");
6863
6864 if (arg_make_ddi) {
6865 if (arg_definitions)
6866 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Combination of --make-ddi= and --definitions= is not supported.");
6867 if (!IN_SET(arg_empty, EMPTY_UNSET, EMPTY_CREATE))
6868 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Combination of --make-ddi= and --empty=%s is not supported.", empty_mode_to_string(arg_empty));
6869
6870 /* Imply automatic sizing in DDI mode */
6871 if (arg_size == UINT64_MAX)
6872 arg_size_auto = true;
6873
6874 if (!arg_copy_source)
6875 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No --copy-source= specified, refusing.");
6876
6877 r = dir_is_empty(arg_copy_source, /* ignore_hidden_or_backup= */ false);
6878 if (r < 0)
6879 return log_error_errno(r, "Failed to determine if '%s' is empty: %m", arg_copy_source);
6880 if (r > 0)
6881 return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Source directory '%s' is empty, refusing to create empty image.", arg_copy_source);
6882
6883 if (sd_id128_is_null(arg_seed) && !arg_randomize) {
6884 /* We don't want that /etc/machine-id leaks into any image built this way, hence
6885 * let's randomize the seed if not specified explicitly */
6886 log_notice("No seed value specified, randomizing generated UUIDs, resulting image will not be reproducible.");
6887 arg_randomize = true;
6888 }
6889
6890 arg_empty = EMPTY_CREATE;
6891 }
6892
6893 if (arg_empty == EMPTY_UNSET) /* default to refuse mode, if not otherwise specified */
6894 arg_empty = EMPTY_REFUSE;
6895
6896 if (arg_factory_reset > 0 && IN_SET(arg_empty, EMPTY_FORCE, EMPTY_REQUIRE, EMPTY_CREATE))
6897 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6898 "Combination of --factory-reset=yes and --empty=force/--empty=require/--empty=create is invalid.");
6899
6900 if (arg_can_factory_reset)
6901 arg_dry_run = true; /* When --can-factory-reset is specified we don't make changes, hence
6902 * non-dry-run mode makes no sense. Thus, imply dry run mode so that we
6903 * open things strictly read-only. */
6904 else if (arg_empty == EMPTY_CREATE)
6905 arg_dry_run = false; /* Imply --dry-run=no if we create the loopback file anew. After all we
6906 * cannot really break anyone's partition tables that way. */
6907
6908 /* Disable pager once we are not just reviewing, but doing things. */
6909 if (!arg_dry_run)
6910 arg_pager_flags |= PAGER_DISABLE;
6911
6912 if (arg_empty == EMPTY_CREATE && arg_size == UINT64_MAX && !arg_size_auto)
6913 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6914 "If --empty=create is specified, --size= must be specified, too.");
6915
6916 if (arg_image && arg_root)
6917 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Please specify either --root= or --image=, the combination of both is not supported.");
6918 else if (!arg_image && !arg_root && in_initrd()) {
6919
6920 /* By default operate on /sysusr/ or /sysroot/ when invoked in the initrd. We prefer the
6921 * former, if it is mounted, so that we have deterministic behaviour on systems where /usr/
6922 * is vendor-supplied but the root fs formatted on first boot. */
6923 r = path_is_mount_point("/sysusr/usr", NULL, 0);
6924 if (r <= 0) {
6925 if (r < 0 && r != -ENOENT)
6926 log_debug_errno(r, "Unable to determine whether /sysusr/usr is a mount point, assuming it is not: %m");
6927
6928 arg_root = strdup("/sysroot");
6929 } else
6930 arg_root = strdup("/sysusr");
6931 if (!arg_root)
6932 return log_oom();
6933 }
6934
6935 arg_node = argc > optind ? argv[optind] : NULL;
6936
6937 if (IN_SET(arg_empty, EMPTY_FORCE, EMPTY_REQUIRE, EMPTY_CREATE) && !arg_node && !arg_image)
6938 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6939 "A path to a device node or image file must be specified when --make-ddi=, --empty=force, --empty=require or --empty=create are used.");
6940
6941 if (arg_split && !arg_node)
6942 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6943 "A path to an image file must be specified when --split is used.");
6944
6945 if (arg_tpm2_public_key_pcr_mask_use_default && arg_tpm2_public_key)
6946 arg_tpm2_public_key_pcr_mask = INDEX_TO_MASK(uint32_t, TPM2_PCR_KERNEL_BOOT);
6947
6948 if (arg_tpm2_hash_pcr_values_use_default && !GREEDY_REALLOC_APPEND(
6949 arg_tpm2_hash_pcr_values,
6950 arg_tpm2_n_hash_pcr_values,
6951 &TPM2_PCR_VALUE_MAKE(TPM2_PCR_INDEX_DEFAULT, /* hash= */ 0, /* value= */ {}),
6952 1))
6953 return log_oom();
6954
6955 if (arg_pretty < 0 && isatty(STDOUT_FILENO))
6956 arg_pretty = true;
6957
6958 if (arg_architecture >= 0) {
6959 FOREACH_ARRAY(p, arg_filter_partitions, arg_n_filter_partitions)
6960 *p = gpt_partition_type_override_architecture(*p, arg_architecture);
6961
6962 FOREACH_ARRAY(p, arg_defer_partitions, arg_n_defer_partitions)
6963 *p = gpt_partition_type_override_architecture(*p, arg_architecture);
6964 }
6965
6966 return 1;
6967 }
6968
6969 static int parse_proc_cmdline_factory_reset(void) {
6970 bool b;
6971 int r;
6972
6973 if (arg_factory_reset >= 0) /* Never override what is specified on the process command line */
6974 return 0;
6975
6976 if (!in_initrd()) /* Never honour kernel command line factory reset request outside of the initrd */
6977 return 0;
6978
6979 r = proc_cmdline_get_bool("systemd.factory_reset", /* flags = */ 0, &b);
6980 if (r < 0)
6981 return log_error_errno(r, "Failed to parse systemd.factory_reset kernel command line argument: %m");
6982 if (r > 0) {
6983 arg_factory_reset = b;
6984
6985 if (b)
6986 log_notice("Honouring factory reset requested via kernel command line.");
6987 }
6988
6989 return 0;
6990 }
6991
6992 static int parse_efi_variable_factory_reset(void) {
6993 _cleanup_free_ char *value = NULL;
6994 int r;
6995
6996 if (arg_factory_reset >= 0) /* Never override what is specified on the process command line */
6997 return 0;
6998
6999 if (!in_initrd()) /* Never honour EFI variable factory reset request outside of the initrd */
7000 return 0;
7001
7002 r = efi_get_variable_string(EFI_SYSTEMD_VARIABLE(FactoryReset), &value);
7003 if (r < 0) {
7004 if (r == -ENOENT || ERRNO_IS_NOT_SUPPORTED(r))
7005 return 0;
7006 return log_error_errno(r, "Failed to read EFI variable FactoryReset: %m");
7007 }
7008
7009 r = parse_boolean(value);
7010 if (r < 0)
7011 return log_error_errno(r, "Failed to parse EFI variable FactoryReset: %m");
7012
7013 arg_factory_reset = r;
7014 if (r)
7015 log_notice("Factory reset requested via EFI variable FactoryReset.");
7016
7017 return 0;
7018 }
7019
7020 static int remove_efi_variable_factory_reset(void) {
7021 int r;
7022
7023 r = efi_set_variable(EFI_SYSTEMD_VARIABLE(FactoryReset), NULL, 0);
7024 if (r < 0) {
7025 if (r == -ENOENT || ERRNO_IS_NOT_SUPPORTED(r))
7026 return 0;
7027 return log_error_errno(r, "Failed to remove EFI variable FactoryReset: %m");
7028 }
7029
7030 log_info("Successfully unset EFI variable FactoryReset.");
7031 return 0;
7032 }
7033
7034 static int acquire_root_devno(
7035 const char *p,
7036 const char *root,
7037 int mode,
7038 char **ret,
7039 int *ret_fd) {
7040
7041 _cleanup_free_ char *found_path = NULL, *node = NULL;
7042 dev_t devno, fd_devno = MODE_INVALID;
7043 _cleanup_close_ int fd = -EBADF;
7044 struct stat st;
7045 int r;
7046
7047 assert(p);
7048 assert(ret);
7049 assert(ret_fd);
7050
7051 fd = chase_and_open(p, root, CHASE_PREFIX_ROOT, mode, &found_path);
7052 if (fd < 0)
7053 return fd;
7054
7055 if (fstat(fd, &st) < 0)
7056 return -errno;
7057
7058 if (S_ISREG(st.st_mode)) {
7059 *ret = TAKE_PTR(found_path);
7060 *ret_fd = TAKE_FD(fd);
7061 return 0;
7062 }
7063
7064 if (S_ISBLK(st.st_mode)) {
7065 /* Refuse referencing explicit block devices if a root dir is specified, after all we should
7066 * not be able to leave the image the root path constrains us to. */
7067 if (root)
7068 return -EPERM;
7069
7070 fd_devno = devno = st.st_rdev;
7071 } else if (S_ISDIR(st.st_mode)) {
7072
7073 devno = st.st_dev;
7074 if (major(devno) == 0) {
7075 r = btrfs_get_block_device_fd(fd, &devno);
7076 if (r == -ENOTTY) /* not btrfs */
7077 return -ENODEV;
7078 if (r < 0)
7079 return r;
7080 }
7081 } else
7082 return -ENOTBLK;
7083
7084 /* From dm-crypt to backing partition */
7085 r = block_get_originating(devno, &devno);
7086 if (r == -ENOENT)
7087 log_debug_errno(r, "Device '%s' has no dm-crypt/dm-verity device, no need to look for underlying block device.", p);
7088 else if (r < 0)
7089 log_debug_errno(r, "Failed to find underlying block device for '%s', ignoring: %m", p);
7090
7091 /* From partition to whole disk containing it */
7092 r = block_get_whole_disk(devno, &devno);
7093 if (r < 0)
7094 log_debug_errno(r, "Failed to find whole disk block device for '%s', ignoring: %m", p);
7095
7096 r = devname_from_devnum(S_IFBLK, devno, &node);
7097 if (r < 0)
7098 return log_debug_errno(r, "Failed to determine canonical path for '%s': %m", p);
7099
7100 /* Only if we still look at the same block device we can reuse the fd. Otherwise return an
7101 * invalidated fd. */
7102 if (fd_devno != MODE_INVALID && fd_devno == devno) {
7103 /* Tell udev not to interfere while we are processing the device */
7104 if (flock(fd, arg_dry_run ? LOCK_SH : LOCK_EX) < 0)
7105 return log_error_errno(errno, "Failed to lock device '%s': %m", node);
7106
7107 *ret_fd = TAKE_FD(fd);
7108 } else
7109 *ret_fd = -EBADF;
7110
7111 *ret = TAKE_PTR(node);
7112 return 0;
7113 }
7114
7115 static int find_root(Context *context) {
7116 _cleanup_free_ char *device = NULL;
7117 int r;
7118
7119 assert(context);
7120
7121 if (arg_node) {
7122 if (arg_empty == EMPTY_CREATE) {
7123 _cleanup_close_ int fd = -EBADF;
7124 _cleanup_free_ char *s = NULL;
7125
7126 s = strdup(arg_node);
7127 if (!s)
7128 return log_oom();
7129
7130 fd = open(arg_node, O_RDONLY|O_CREAT|O_EXCL|O_CLOEXEC|O_NOFOLLOW, 0666);
7131 if (fd < 0)
7132 return log_error_errno(errno, "Failed to create '%s': %m", arg_node);
7133
7134 context->node = TAKE_PTR(s);
7135 context->node_is_our_file = true;
7136 context->backing_fd = TAKE_FD(fd);
7137 return 0;
7138 }
7139
7140 /* Note that we don't specify a root argument here: if the user explicitly configured a node
7141 * we'll take it relative to the host, not the image */
7142 r = acquire_root_devno(arg_node, NULL, O_RDONLY|O_CLOEXEC, &context->node, &context->backing_fd);
7143 if (r == -EUCLEAN)
7144 return btrfs_log_dev_root(LOG_ERR, r, arg_node);
7145 if (r < 0)
7146 return log_error_errno(r, "Failed to open file or determine backing device of %s: %m", arg_node);
7147
7148 return 0;
7149 }
7150
7151 assert(IN_SET(arg_empty, EMPTY_REFUSE, EMPTY_ALLOW));
7152
7153 /* If the root mount has been replaced by some form of volatile file system (overlayfs), the
7154 * original root block device node is symlinked in /run/systemd/volatile-root. Let's read that
7155 * here. */
7156 r = readlink_malloc("/run/systemd/volatile-root", &device);
7157 if (r == -ENOENT) { /* volatile-root not found */
7158 /* Let's search for the root device. We look for two cases here: first in /, and then in /usr. The
7159 * latter we check for cases where / is a tmpfs and only /usr is an actual persistent block device
7160 * (think: volatile setups) */
7161
7162 FOREACH_STRING(p, "/", "/usr") {
7163
7164 r = acquire_root_devno(p, arg_root, O_RDONLY|O_DIRECTORY|O_CLOEXEC, &context->node,
7165 &context->backing_fd);
7166 if (r < 0) {
7167 if (r == -EUCLEAN)
7168 return btrfs_log_dev_root(LOG_ERR, r, p);
7169 if (r != -ENODEV)
7170 return log_error_errno(r, "Failed to determine backing device of %s: %m", p);
7171 } else
7172 return 0;
7173 }
7174 } else if (r < 0)
7175 return log_error_errno(r, "Failed to read symlink /run/systemd/volatile-root: %m");
7176 else {
7177 r = acquire_root_devno(device, NULL, O_RDONLY|O_CLOEXEC, &context->node, &context->backing_fd);
7178 if (r == -EUCLEAN)
7179 return btrfs_log_dev_root(LOG_ERR, r, device);
7180 if (r < 0)
7181 return log_error_errno(r, "Failed to open file or determine backing device of %s: %m", device);
7182
7183 return 0;
7184 }
7185
7186 return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "Failed to discover root block device.");
7187 }
7188
7189 static int resize_pt(int fd, uint64_t sector_size) {
7190 _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
7191 int r;
7192
7193 /* After resizing the backing file we need to resize the partition table itself too, so that it takes
7194 * possession of the enlarged backing file. For this it suffices to open the device with libfdisk and
7195 * immediately write it again, with no changes. */
7196
7197 r = fdisk_new_context_at(fd, /* path= */ NULL, /* read_only= */ false, sector_size, &c);
7198 if (r < 0)
7199 return log_error_errno(r, "Failed to open device '%s': %m", FORMAT_PROC_FD_PATH(fd));
7200
7201 r = fdisk_has_label(c);
7202 if (r < 0)
7203 return log_error_errno(r, "Failed to determine whether disk '%s' has a disk label: %m", FORMAT_PROC_FD_PATH(fd));
7204 if (r == 0) {
7205 log_debug("Not resizing partition table, as there currently is none.");
7206 return 0;
7207 }
7208
7209 r = fdisk_write_disklabel(c);
7210 if (r < 0)
7211 return log_error_errno(r, "Failed to write resized partition table: %m");
7212
7213 log_info("Resized partition table.");
7214 return 1;
7215 }
7216
7217 static int resize_backing_fd(
7218 const char *node, /* The primary way we access the disk image to operate on */
7219 int *fd, /* An O_RDONLY fd referring to that inode */
7220 const char *backing_file, /* If the above refers to a loopback device, the backing regular file for that, which we can grow */
7221 LoopDevice *loop_device,
7222 uint64_t sector_size) {
7223
7224 _cleanup_close_ int writable_fd = -EBADF;
7225 uint64_t current_size;
7226 struct stat st;
7227 int r;
7228
7229 assert(node);
7230 assert(fd);
7231
7232 if (arg_size == UINT64_MAX) /* Nothing to do */
7233 return 0;
7234
7235 if (*fd < 0) {
7236 /* Open the file if we haven't opened it yet. Note that we open it read-only here, just to
7237 * keep a reference to the file we can pass around. */
7238 *fd = open(node, O_RDONLY|O_CLOEXEC);
7239 if (*fd < 0)
7240 return log_error_errno(errno, "Failed to open '%s' in order to adjust size: %m", node);
7241 }
7242
7243 if (fstat(*fd, &st) < 0)
7244 return log_error_errno(errno, "Failed to stat '%s': %m", node);
7245
7246 if (S_ISBLK(st.st_mode)) {
7247 if (!backing_file)
7248 return log_error_errno(SYNTHETIC_ERRNO(EBADF), "Cannot resize block device '%s'.", node);
7249
7250 assert(loop_device);
7251
7252 if (ioctl(*fd, BLKGETSIZE64, &current_size) < 0)
7253 return log_error_errno(errno, "Failed to determine size of block device %s: %m", node);
7254 } else {
7255 r = stat_verify_regular(&st);
7256 if (r < 0)
7257 return log_error_errno(r, "Specified path '%s' is not a regular file or loopback block device, cannot resize: %m", node);
7258
7259 assert(!backing_file);
7260 assert(!loop_device);
7261 current_size = st.st_size;
7262 }
7263
7264 if (current_size >= arg_size) {
7265 log_info("File '%s' already is of requested size or larger, not growing. (%s >= %s)",
7266 node, FORMAT_BYTES(current_size), FORMAT_BYTES(arg_size));
7267 return 0;
7268 }
7269
7270 if (S_ISBLK(st.st_mode)) {
7271 assert(backing_file);
7272
7273 /* This is a loopback device. We can't really grow those directly, but we can grow the
7274 * backing file, hence let's do that. */
7275
7276 writable_fd = open(backing_file, O_WRONLY|O_CLOEXEC|O_NONBLOCK);
7277 if (writable_fd < 0)
7278 return log_error_errno(errno, "Failed to open backing file '%s': %m", backing_file);
7279
7280 if (fstat(writable_fd, &st) < 0)
7281 return log_error_errno(errno, "Failed to stat() backing file '%s': %m", backing_file);
7282
7283 r = stat_verify_regular(&st);
7284 if (r < 0)
7285 return log_error_errno(r, "Backing file '%s' of block device is not a regular file: %m", backing_file);
7286
7287 if ((uint64_t) st.st_size != current_size)
7288 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
7289 "Size of backing file '%s' of loopback block device '%s' don't match, refusing.",
7290 node, backing_file);
7291 } else {
7292 assert(S_ISREG(st.st_mode));
7293 assert(!backing_file);
7294
7295 /* The file descriptor is read-only. In order to grow the file we need to have a writable fd. We
7296 * reopen the file for that temporarily. We keep the writable fd only open for this operation though,
7297 * as fdisk can't accept it anyway. */
7298
7299 writable_fd = fd_reopen(*fd, O_WRONLY|O_CLOEXEC);
7300 if (writable_fd < 0)
7301 return log_error_errno(writable_fd, "Failed to reopen backing file '%s' writable: %m", node);
7302 }
7303
7304 if (!arg_discard) {
7305 if (fallocate(writable_fd, 0, 0, arg_size) < 0) {
7306 if (!ERRNO_IS_NOT_SUPPORTED(errno))
7307 return log_error_errno(errno, "Failed to grow '%s' from %s to %s by allocation: %m",
7308 node, FORMAT_BYTES(current_size), FORMAT_BYTES(arg_size));
7309
7310 /* Fallback to truncation, if fallocate() is not supported. */
7311 log_debug("Backing file system does not support fallocate(), falling back to ftruncate().");
7312 } else {
7313 if (current_size == 0) /* Likely regular file just created by us */
7314 log_info("Allocated %s for '%s'.", FORMAT_BYTES(arg_size), node);
7315 else
7316 log_info("File '%s' grown from %s to %s by allocation.",
7317 node, FORMAT_BYTES(current_size), FORMAT_BYTES(arg_size));
7318
7319 goto done;
7320 }
7321 }
7322
7323 if (ftruncate(writable_fd, arg_size) < 0)
7324 return log_error_errno(errno, "Failed to grow '%s' from %s to %s by truncation: %m",
7325 node, FORMAT_BYTES(current_size), FORMAT_BYTES(arg_size));
7326
7327 if (current_size == 0) /* Likely regular file just created by us */
7328 log_info("Sized '%s' to %s.", node, FORMAT_BYTES(arg_size));
7329 else
7330 log_info("File '%s' grown from %s to %s by truncation.",
7331 node, FORMAT_BYTES(current_size), FORMAT_BYTES(arg_size));
7332
7333 done:
7334 r = resize_pt(writable_fd, sector_size);
7335 if (r < 0)
7336 return r;
7337
7338 if (loop_device) {
7339 r = loop_device_refresh_size(loop_device, UINT64_MAX, arg_size);
7340 if (r < 0)
7341 return log_error_errno(r, "Failed to update loop device size: %m");
7342 }
7343
7344 return 1;
7345 }
7346
7347 static int determine_auto_size(Context *c) {
7348 uint64_t sum;
7349
7350 assert(c);
7351
7352 sum = round_up_size(GPT_METADATA_SIZE, 4096);
7353
7354 LIST_FOREACH(partitions, p, c->partitions) {
7355 uint64_t m;
7356
7357 if (p->dropped)
7358 continue;
7359
7360 m = partition_min_size_with_padding(c, p);
7361 if (m > UINT64_MAX - sum)
7362 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Image would grow too large, refusing.");
7363
7364 sum += m;
7365 }
7366
7367 if (c->total != UINT64_MAX)
7368 /* Image already allocated? Then show its size. */
7369 log_info("Automatically determined minimal disk image size as %s, current image size is %s.",
7370 FORMAT_BYTES(sum), FORMAT_BYTES(c->total));
7371 else
7372 /* If the image is being created right now, then it has no previous size, suppress any comment about it hence. */
7373 log_info("Automatically determined minimal disk image size as %s.",
7374 FORMAT_BYTES(sum));
7375
7376 arg_size = sum;
7377 return 0;
7378 }
7379
7380 static int run(int argc, char *argv[]) {
7381 _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
7382 _cleanup_(umount_and_freep) char *mounted_dir = NULL;
7383 _cleanup_(context_freep) Context* context = NULL;
7384 bool node_is_our_loop = false;
7385 int r;
7386
7387 log_show_color(true);
7388 log_parse_environment();
7389 log_open();
7390
7391 r = parse_argv(argc, argv);
7392 if (r <= 0)
7393 return r;
7394
7395 r = parse_proc_cmdline_factory_reset();
7396 if (r < 0)
7397 return r;
7398
7399 r = parse_efi_variable_factory_reset();
7400 if (r < 0)
7401 return r;
7402
7403 #if HAVE_LIBCRYPTSETUP
7404 cryptsetup_enable_logging(NULL);
7405 #endif
7406
7407 if (arg_image) {
7408 assert(!arg_root);
7409
7410 /* Mount this strictly read-only: we shall modify the partition table, not the file
7411 * systems */
7412 r = mount_image_privately_interactively(
7413 arg_image,
7414 arg_image_policy,
7415 DISSECT_IMAGE_MOUNT_READ_ONLY |
7416 (arg_node ? DISSECT_IMAGE_DEVICE_READ_ONLY : 0) | /* If a different node to make changes to is specified let's open the device in read-only mode) */
7417 DISSECT_IMAGE_GPT_ONLY |
7418 DISSECT_IMAGE_RELAX_VAR_CHECK |
7419 DISSECT_IMAGE_USR_NO_ROOT |
7420 DISSECT_IMAGE_REQUIRE_ROOT,
7421 &mounted_dir,
7422 /* ret_dir_fd= */ NULL,
7423 &loop_device);
7424 if (r < 0)
7425 return r;
7426
7427 arg_root = strdup(mounted_dir);
7428 if (!arg_root)
7429 return log_oom();
7430
7431 if (!arg_node) {
7432 arg_node = strdup(loop_device->node);
7433 if (!arg_node)
7434 return log_oom();
7435
7436 /* Remember that the device we are about to manipulate is actually the one we
7437 * allocated here, and thus to increase its backing file we know what to do */
7438 node_is_our_loop = true;
7439 }
7440 }
7441
7442 if (!arg_copy_source && arg_root) {
7443 /* If no explicit copy source is specified, then use --root=/--image= */
7444 arg_copy_source = strdup(arg_root);
7445 if (!arg_copy_source)
7446 return log_oom();
7447 }
7448
7449 context = context_new(arg_seed);
7450 if (!context)
7451 return log_oom();
7452
7453 r = context_copy_from(context);
7454 if (r < 0)
7455 return r;
7456
7457 if (arg_make_ddi) {
7458 _cleanup_free_ char *d = NULL, *dp = NULL;
7459 assert(!arg_definitions);
7460
7461 d = strjoin(arg_make_ddi, ".repart.d/");
7462 if (!d)
7463 return log_oom();
7464
7465 r = search_and_access(d, F_OK, arg_root, CONF_PATHS_USR_STRV("systemd/repart/definitions"), &dp);
7466 if (r < 0)
7467 return log_error_errno(errno, "DDI type '%s' is not defined: %m", arg_make_ddi);
7468
7469 if (strv_consume(&arg_definitions, TAKE_PTR(dp)) < 0)
7470 return log_oom();
7471 } else
7472 strv_uniq(arg_definitions);
7473
7474 r = context_read_definitions(context);
7475 if (r < 0)
7476 return r;
7477
7478 r = find_root(context);
7479 if (r == -ENODEV)
7480 return 76; /* Special return value which means "Root block device not found, so not doing
7481 * anything". This isn't really an error when called at boot. */
7482 if (r < 0)
7483 return r;
7484
7485 if (arg_size != UINT64_MAX) {
7486 r = resize_backing_fd(
7487 context->node,
7488 &context->backing_fd,
7489 node_is_our_loop ? arg_image : NULL,
7490 node_is_our_loop ? loop_device : NULL,
7491 context->sector_size);
7492 if (r < 0)
7493 return r;
7494 }
7495
7496 r = context_load_partition_table(context);
7497 if (r == -EHWPOISON)
7498 return 77; /* Special return value which means "Not GPT, so not doing anything". This isn't
7499 * really an error when called at boot. */
7500 if (r < 0)
7501 return r;
7502 context->from_scratch = r > 0; /* Starting from scratch */
7503
7504 if (arg_can_factory_reset) {
7505 r = context_can_factory_reset(context);
7506 if (r < 0)
7507 return r;
7508 if (r == 0)
7509 return EXIT_FAILURE;
7510
7511 return 0;
7512 }
7513
7514 r = context_factory_reset(context);
7515 if (r < 0)
7516 return r;
7517 if (r > 0) {
7518 /* We actually did a factory reset! */
7519 r = remove_efi_variable_factory_reset();
7520 if (r < 0)
7521 return r;
7522
7523 /* Reload the reduced partition table */
7524 context_unload_partition_table(context);
7525 r = context_load_partition_table(context);
7526 if (r < 0)
7527 return r;
7528 }
7529
7530 r = context_read_seed(context, arg_root);
7531 if (r < 0)
7532 return r;
7533
7534 /* Make sure each partition has a unique UUID and unique label */
7535 r = context_acquire_partition_uuids_and_labels(context);
7536 if (r < 0)
7537 return r;
7538
7539 /* Open all files to copy blocks from now, since we want to take their size into consideration */
7540 r = context_open_copy_block_paths(
7541 context,
7542 loop_device ? loop_device->devno : /* if --image= is specified, only allow partitions on the loopback device */
7543 arg_root && !arg_image ? 0 : /* if --root= is specified, don't accept any block device */
7544 (dev_t) -1); /* if neither is specified, make no restrictions */
7545 if (r < 0)
7546 return r;
7547
7548 r = context_minimize(context);
7549 if (r < 0)
7550 return r;
7551
7552 if (arg_size_auto) {
7553 r = determine_auto_size(context);
7554 if (r < 0)
7555 return r;
7556
7557 /* Flush out everything again, and let's grow the file first, then start fresh */
7558 context_unload_partition_table(context);
7559
7560 assert(arg_size != UINT64_MAX);
7561 r = resize_backing_fd(
7562 context->node,
7563 &context->backing_fd,
7564 node_is_our_loop ? arg_image : NULL,
7565 node_is_our_loop ? loop_device : NULL,
7566 context->sector_size);
7567 if (r < 0)
7568 return r;
7569
7570 r = context_load_partition_table(context);
7571 if (r < 0)
7572 return r;
7573 }
7574
7575 /* First try to fit new partitions in, dropping by priority until it fits */
7576 for (;;) {
7577 uint64_t largest_free_area;
7578
7579 if (context_allocate_partitions(context, &largest_free_area))
7580 break; /* Success! */
7581
7582 if (!context_drop_or_foreignize_one_priority(context)) {
7583 r = log_error_errno(SYNTHETIC_ERRNO(ENOSPC),
7584 "Can't fit requested partitions into available free space (%s), refusing.",
7585 FORMAT_BYTES(largest_free_area));
7586 determine_auto_size(context);
7587 return r;
7588 }
7589 }
7590
7591 /* Now assign free space according to the weight logic */
7592 r = context_grow_partitions(context);
7593 if (r < 0)
7594 return r;
7595
7596 /* Now calculate where each new partition gets placed */
7597 context_place_partitions(context);
7598
7599 (void) context_dump(context, /*late=*/ false);
7600
7601 r = context_write_partition_table(context);
7602 if (r < 0)
7603 return r;
7604
7605 r = context_split(context);
7606 if (r < 0)
7607 return r;
7608
7609 (void) context_dump(context, /*late=*/ true);
7610
7611 context->node = mfree(context->node);
7612
7613 LIST_FOREACH(partitions, p, context->partitions)
7614 p->split_path = mfree(p->split_path);
7615
7616 return 0;
7617 }
7618
7619 DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);