]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/partition/repart.c
Merge pull request #29679 from keszybz/drop-iovec-null
[thirdparty/systemd.git] / src / partition / repart.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #if HAVE_VALGRIND_MEMCHECK_H
4 #include <valgrind/memcheck.h>
5 #endif
6
7 #include <fcntl.h>
8 #include <getopt.h>
9 #include <linux/fs.h>
10 #include <linux/loop.h>
11 #include <sys/file.h>
12 #include <sys/ioctl.h>
13 #include <sys/stat.h>
14
15 #include "sd-device.h"
16 #include "sd-id128.h"
17
18 #include "alloc-util.h"
19 #include "blkid-util.h"
20 #include "blockdev-util.h"
21 #include "btrfs-util.h"
22 #include "build.h"
23 #include "chase.h"
24 #include "conf-files.h"
25 #include "conf-parser.h"
26 #include "constants.h"
27 #include "cryptsetup-util.h"
28 #include "device-util.h"
29 #include "devnum-util.h"
30 #include "dirent-util.h"
31 #include "efivars.h"
32 #include "errno-util.h"
33 #include "fd-util.h"
34 #include "fdisk-util.h"
35 #include "fileio.h"
36 #include "format-table.h"
37 #include "format-util.h"
38 #include "fs-util.h"
39 #include "glyph-util.h"
40 #include "gpt.h"
41 #include "hexdecoct.h"
42 #include "hmac.h"
43 #include "id128-util.h"
44 #include "initrd-util.h"
45 #include "io-util.h"
46 #include "json.h"
47 #include "list.h"
48 #include "loop-util.h"
49 #include "main-func.h"
50 #include "mkdir.h"
51 #include "mkfs-util.h"
52 #include "mount-util.h"
53 #include "mountpoint-util.h"
54 #include "nulstr-util.h"
55 #include "openssl-util.h"
56 #include "parse-argument.h"
57 #include "parse-helpers.h"
58 #include "pretty-print.h"
59 #include "proc-cmdline.h"
60 #include "process-util.h"
61 #include "random-util.h"
62 #include "resize-fs.h"
63 #include "rm-rf.h"
64 #include "sort-util.h"
65 #include "specifier.h"
66 #include "stdio-util.h"
67 #include "string-table.h"
68 #include "string-util.h"
69 #include "strv.h"
70 #include "sync-util.h"
71 #include "terminal-util.h"
72 #include "tmpfile-util.h"
73 #include "tpm2-pcr.h"
74 #include "tpm2-util.h"
75 #include "user-util.h"
76 #include "utf8.h"
77
78 /* If not configured otherwise use a minimal partition size of 10M */
79 #define DEFAULT_MIN_SIZE (10ULL*1024ULL*1024ULL)
80
81 /* Hard lower limit for new partition sizes */
82 #define HARD_MIN_SIZE 4096ULL
83
84 /* We know up front we're never going to put more than this in a verity sig partition. */
85 #define VERITY_SIG_SIZE (HARD_MIN_SIZE*4ULL)
86
87 /* libfdisk takes off slightly more than 1M of the disk size when creating a GPT disk label */
88 #define GPT_METADATA_SIZE (1044ULL*1024ULL)
89
90 /* LUKS2 takes off 16M of the partition size with its metadata by default */
91 #define LUKS2_METADATA_SIZE (16ULL*1024ULL*1024ULL)
92
93 /* To do LUKS2 offline encryption, we need to keep some extra free space at the end of the partition. */
94 #define LUKS2_METADATA_KEEP_FREE (LUKS2_METADATA_SIZE*2ULL)
95
96 /* LUKS2 volume key size. */
97 #define VOLUME_KEY_SIZE (512ULL/8ULL)
98
99 /* Use 4K as the default filesystem sector size because as long as the partitions are aligned to 4K, the
100 * filesystems will then also be compatible with sector sizes 512, 1024 and 2048. */
101 #define DEFAULT_FILESYSTEM_SECTOR_SIZE 4096ULL
102
103 #define APIVFS_TMP_DIRS_NULSTR "proc\0sys\0dev\0tmp\0run\0var/tmp\0"
104
105 /* Note: When growing and placing new partitions we always align to 4K sector size. It's how newer hard disks
106 * are designed, and if everything is aligned to that performance is best. And for older hard disks with 512B
107 * sector size devices were generally assumed to have an even number of sectors, hence at the worst we'll
108 * waste 3K per partition, which is probably fine. */
109
110 typedef enum EmptyMode {
111 EMPTY_UNSET, /* no choice has been made yet */
112 EMPTY_REFUSE, /* refuse empty disks, never create a partition table */
113 EMPTY_ALLOW, /* allow empty disks, create partition table if necessary */
114 EMPTY_REQUIRE, /* require an empty disk, create a partition table */
115 EMPTY_FORCE, /* make disk empty, erase everything, create a partition table always */
116 EMPTY_CREATE, /* create disk as loopback file, create a partition table always */
117 _EMPTY_MODE_MAX,
118 _EMPTY_MODE_INVALID = -EINVAL,
119 } EmptyMode;
120
121 typedef enum FilterPartitionType {
122 FILTER_PARTITIONS_NONE,
123 FILTER_PARTITIONS_EXCLUDE,
124 FILTER_PARTITIONS_INCLUDE,
125 _FILTER_PARTITIONS_MAX,
126 _FILTER_PARTITIONS_INVALID = -EINVAL,
127 } FilterPartitionsType;
128
129 static EmptyMode arg_empty = EMPTY_UNSET;
130 static bool arg_dry_run = true;
131 static const char *arg_node = NULL;
132 static char *arg_root = NULL;
133 static char *arg_image = NULL;
134 static char **arg_definitions = NULL;
135 static bool arg_discard = true;
136 static bool arg_can_factory_reset = false;
137 static int arg_factory_reset = -1;
138 static sd_id128_t arg_seed = SD_ID128_NULL;
139 static bool arg_randomize = false;
140 static int arg_pretty = -1;
141 static uint64_t arg_size = UINT64_MAX;
142 static bool arg_size_auto = false;
143 static JsonFormatFlags arg_json_format_flags = JSON_FORMAT_OFF;
144 static PagerFlags arg_pager_flags = 0;
145 static bool arg_legend = true;
146 static void *arg_key = NULL;
147 static size_t arg_key_size = 0;
148 static EVP_PKEY *arg_private_key = NULL;
149 static X509 *arg_certificate = NULL;
150 static char *arg_tpm2_device = NULL;
151 static Tpm2PCRValue *arg_tpm2_hash_pcr_values = NULL;
152 static size_t arg_tpm2_n_hash_pcr_values = 0;
153 static bool arg_tpm2_hash_pcr_values_use_default = true;
154 static char *arg_tpm2_public_key = NULL;
155 static uint32_t arg_tpm2_public_key_pcr_mask = 0;
156 static bool arg_tpm2_public_key_pcr_mask_use_default = true;
157 static bool arg_split = false;
158 static GptPartitionType *arg_filter_partitions = NULL;
159 static size_t arg_n_filter_partitions = 0;
160 static FilterPartitionsType arg_filter_partitions_type = FILTER_PARTITIONS_NONE;
161 static GptPartitionType *arg_defer_partitions = NULL;
162 static size_t arg_n_defer_partitions = 0;
163 static uint64_t arg_sector_size = 0;
164 static ImagePolicy *arg_image_policy = NULL;
165 static Architecture arg_architecture = _ARCHITECTURE_INVALID;
166 static int arg_offline = -1;
167 static char **arg_copy_from = NULL;
168 static char *arg_copy_source = NULL;
169 static char *arg_make_ddi = NULL;
170
171 STATIC_DESTRUCTOR_REGISTER(arg_root, freep);
172 STATIC_DESTRUCTOR_REGISTER(arg_image, freep);
173 STATIC_DESTRUCTOR_REGISTER(arg_definitions, strv_freep);
174 STATIC_DESTRUCTOR_REGISTER(arg_key, erase_and_freep);
175 STATIC_DESTRUCTOR_REGISTER(arg_private_key, EVP_PKEY_freep);
176 STATIC_DESTRUCTOR_REGISTER(arg_certificate, X509_freep);
177 STATIC_DESTRUCTOR_REGISTER(arg_tpm2_device, freep);
178 STATIC_DESTRUCTOR_REGISTER(arg_tpm2_hash_pcr_values, freep);
179 STATIC_DESTRUCTOR_REGISTER(arg_tpm2_public_key, freep);
180 STATIC_DESTRUCTOR_REGISTER(arg_filter_partitions, freep);
181 STATIC_DESTRUCTOR_REGISTER(arg_image_policy, image_policy_freep);
182 STATIC_DESTRUCTOR_REGISTER(arg_copy_from, strv_freep);
183 STATIC_DESTRUCTOR_REGISTER(arg_copy_source, freep);
184 STATIC_DESTRUCTOR_REGISTER(arg_make_ddi, freep);
185
186 typedef struct FreeArea FreeArea;
187
188 typedef enum EncryptMode {
189 ENCRYPT_OFF,
190 ENCRYPT_KEY_FILE,
191 ENCRYPT_TPM2,
192 ENCRYPT_KEY_FILE_TPM2,
193 _ENCRYPT_MODE_MAX,
194 _ENCRYPT_MODE_INVALID = -EINVAL,
195 } EncryptMode;
196
197 typedef enum VerityMode {
198 VERITY_OFF,
199 VERITY_DATA,
200 VERITY_HASH,
201 VERITY_SIG,
202 _VERITY_MODE_MAX,
203 _VERITY_MODE_INVALID = -EINVAL,
204 } VerityMode;
205
206 typedef enum MinimizeMode {
207 MINIMIZE_OFF,
208 MINIMIZE_BEST,
209 MINIMIZE_GUESS,
210 _MINIMIZE_MODE_MAX,
211 _MINIMIZE_MODE_INVALID = -EINVAL,
212 } MinimizeMode;
213
214 typedef struct Partition {
215 char *definition_path;
216 char **drop_in_files;
217
218 GptPartitionType type;
219 sd_id128_t current_uuid, new_uuid;
220 bool new_uuid_is_set;
221 char *current_label, *new_label;
222 sd_id128_t fs_uuid, luks_uuid, verity_uuid;
223 uint8_t verity_salt[SHA256_DIGEST_SIZE];
224
225 bool dropped;
226 bool factory_reset;
227 int32_t priority;
228
229 uint32_t weight, padding_weight;
230
231 uint64_t current_size, new_size;
232 uint64_t size_min, size_max;
233
234 uint64_t current_padding, new_padding;
235 uint64_t padding_min, padding_max;
236
237 uint64_t partno;
238 uint64_t offset;
239
240 struct fdisk_partition *current_partition;
241 struct fdisk_partition *new_partition;
242 FreeArea *padding_area;
243 FreeArea *allocated_to_area;
244
245 char *copy_blocks_path;
246 bool copy_blocks_path_is_our_file;
247 bool copy_blocks_auto;
248 const char *copy_blocks_root;
249 int copy_blocks_fd;
250 uint64_t copy_blocks_offset;
251 uint64_t copy_blocks_size;
252
253 char *format;
254 char **copy_files;
255 char **exclude_files_source;
256 char **exclude_files_target;
257 char **make_directories;
258 char **subvolumes;
259 EncryptMode encrypt;
260 VerityMode verity;
261 char *verity_match_key;
262 MinimizeMode minimize;
263 uint64_t verity_data_block_size;
264 uint64_t verity_hash_block_size;
265
266 uint64_t gpt_flags;
267 int no_auto;
268 int read_only;
269 int growfs;
270
271 struct iovec roothash;
272
273 char *split_name_format;
274 char *split_path;
275
276 struct Partition *siblings[_VERITY_MODE_MAX];
277
278 LIST_FIELDS(struct Partition, partitions);
279 } Partition;
280
281 #define PARTITION_IS_FOREIGN(p) (!(p)->definition_path)
282 #define PARTITION_EXISTS(p) (!!(p)->current_partition)
283
284 struct FreeArea {
285 Partition *after;
286 uint64_t size;
287 uint64_t allocated;
288 };
289
290 typedef struct Context {
291 LIST_HEAD(Partition, partitions);
292 size_t n_partitions;
293
294 FreeArea **free_areas;
295 size_t n_free_areas;
296
297 uint64_t start, end, total;
298
299 struct fdisk_context *fdisk_context;
300 uint64_t sector_size, grain_size, fs_sector_size;
301
302 sd_id128_t seed;
303
304 char *node;
305 bool node_is_our_file;
306 int backing_fd;
307
308 bool from_scratch;
309 } Context;
310
311 static const char *empty_mode_table[_EMPTY_MODE_MAX] = {
312 [EMPTY_UNSET] = "unset",
313 [EMPTY_REFUSE] = "refuse",
314 [EMPTY_ALLOW] = "allow",
315 [EMPTY_REQUIRE] = "require",
316 [EMPTY_FORCE] = "force",
317 [EMPTY_CREATE] = "create",
318 };
319
320 static const char *encrypt_mode_table[_ENCRYPT_MODE_MAX] = {
321 [ENCRYPT_OFF] = "off",
322 [ENCRYPT_KEY_FILE] = "key-file",
323 [ENCRYPT_TPM2] = "tpm2",
324 [ENCRYPT_KEY_FILE_TPM2] = "key-file+tpm2",
325 };
326
327 static const char *verity_mode_table[_VERITY_MODE_MAX] = {
328 [VERITY_OFF] = "off",
329 [VERITY_DATA] = "data",
330 [VERITY_HASH] = "hash",
331 [VERITY_SIG] = "signature",
332 };
333
334 static const char *minimize_mode_table[_MINIMIZE_MODE_MAX] = {
335 [MINIMIZE_OFF] = "off",
336 [MINIMIZE_BEST] = "best",
337 [MINIMIZE_GUESS] = "guess",
338 };
339
340 DEFINE_PRIVATE_STRING_TABLE_LOOKUP(empty_mode, EmptyMode);
341 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(encrypt_mode, EncryptMode, ENCRYPT_KEY_FILE);
342 DEFINE_PRIVATE_STRING_TABLE_LOOKUP(verity_mode, VerityMode);
343 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(minimize_mode, MinimizeMode, MINIMIZE_BEST);
344
345 static uint64_t round_down_size(uint64_t v, uint64_t p) {
346 return (v / p) * p;
347 }
348
349 static uint64_t round_up_size(uint64_t v, uint64_t p) {
350
351 v = DIV_ROUND_UP(v, p);
352
353 if (v > UINT64_MAX / p)
354 return UINT64_MAX; /* overflow */
355
356 return v * p;
357 }
358
359 static Partition *partition_new(void) {
360 Partition *p;
361
362 p = new(Partition, 1);
363 if (!p)
364 return NULL;
365
366 *p = (Partition) {
367 .weight = 1000,
368 .padding_weight = 0,
369 .current_size = UINT64_MAX,
370 .new_size = UINT64_MAX,
371 .size_min = UINT64_MAX,
372 .size_max = UINT64_MAX,
373 .current_padding = UINT64_MAX,
374 .new_padding = UINT64_MAX,
375 .padding_min = UINT64_MAX,
376 .padding_max = UINT64_MAX,
377 .partno = UINT64_MAX,
378 .offset = UINT64_MAX,
379 .copy_blocks_fd = -EBADF,
380 .copy_blocks_offset = UINT64_MAX,
381 .copy_blocks_size = UINT64_MAX,
382 .no_auto = -1,
383 .read_only = -1,
384 .growfs = -1,
385 .verity_data_block_size = UINT64_MAX,
386 .verity_hash_block_size = UINT64_MAX,
387 };
388
389 return p;
390 }
391
392 static Partition* partition_free(Partition *p) {
393 if (!p)
394 return NULL;
395
396 free(p->current_label);
397 free(p->new_label);
398 free(p->definition_path);
399 strv_free(p->drop_in_files);
400
401 if (p->current_partition)
402 fdisk_unref_partition(p->current_partition);
403 if (p->new_partition)
404 fdisk_unref_partition(p->new_partition);
405
406 if (p->copy_blocks_path_is_our_file)
407 unlink_and_free(p->copy_blocks_path);
408 else
409 free(p->copy_blocks_path);
410 safe_close(p->copy_blocks_fd);
411
412 free(p->format);
413 strv_free(p->copy_files);
414 strv_free(p->exclude_files_source);
415 strv_free(p->exclude_files_target);
416 strv_free(p->make_directories);
417 strv_free(p->subvolumes);
418 free(p->verity_match_key);
419
420 iovec_done(&p->roothash);
421
422 free(p->split_name_format);
423 unlink_and_free(p->split_path);
424
425 return mfree(p);
426 }
427
428 static void partition_foreignize(Partition *p) {
429 assert(p);
430 assert(PARTITION_EXISTS(p));
431
432 /* Reset several parameters set through definition file to make the partition foreign. */
433
434 p->definition_path = mfree(p->definition_path);
435 p->drop_in_files = strv_free(p->drop_in_files);
436
437 p->copy_blocks_path = mfree(p->copy_blocks_path);
438 p->copy_blocks_fd = safe_close(p->copy_blocks_fd);
439 p->copy_blocks_root = NULL;
440
441 p->format = mfree(p->format);
442 p->copy_files = strv_free(p->copy_files);
443 p->exclude_files_source = strv_free(p->exclude_files_source);
444 p->exclude_files_target = strv_free(p->exclude_files_target);
445 p->make_directories = strv_free(p->make_directories);
446 p->subvolumes = strv_free(p->subvolumes);
447 p->verity_match_key = mfree(p->verity_match_key);
448
449 p->priority = 0;
450 p->weight = 1000;
451 p->padding_weight = 0;
452 p->size_min = UINT64_MAX;
453 p->size_max = UINT64_MAX;
454 p->padding_min = UINT64_MAX;
455 p->padding_max = UINT64_MAX;
456 p->no_auto = -1;
457 p->read_only = -1;
458 p->growfs = -1;
459 p->verity = VERITY_OFF;
460 }
461
462 static bool partition_type_exclude(const GptPartitionType *type) {
463 if (arg_filter_partitions_type == FILTER_PARTITIONS_NONE)
464 return false;
465
466 for (size_t i = 0; i < arg_n_filter_partitions; i++)
467 if (sd_id128_equal(type->uuid, arg_filter_partitions[i].uuid))
468 return arg_filter_partitions_type == FILTER_PARTITIONS_EXCLUDE;
469
470 return arg_filter_partitions_type == FILTER_PARTITIONS_INCLUDE;
471 }
472
473 static bool partition_type_defer(const GptPartitionType *type) {
474 for (size_t i = 0; i < arg_n_defer_partitions; i++)
475 if (sd_id128_equal(type->uuid, arg_defer_partitions[i].uuid))
476 return true;
477
478 return false;
479 }
480
481 static Partition* partition_unlink_and_free(Context *context, Partition *p) {
482 if (!p)
483 return NULL;
484
485 LIST_REMOVE(partitions, context->partitions, p);
486
487 assert(context->n_partitions > 0);
488 context->n_partitions--;
489
490 return partition_free(p);
491 }
492
493 DEFINE_TRIVIAL_CLEANUP_FUNC(Partition*, partition_free);
494
495 static Context *context_new(sd_id128_t seed) {
496 Context *context;
497
498 context = new(Context, 1);
499 if (!context)
500 return NULL;
501
502 *context = (Context) {
503 .start = UINT64_MAX,
504 .end = UINT64_MAX,
505 .total = UINT64_MAX,
506 .seed = seed,
507 };
508
509 return context;
510 }
511
512 static void context_free_free_areas(Context *context) {
513 assert(context);
514
515 for (size_t i = 0; i < context->n_free_areas; i++)
516 free(context->free_areas[i]);
517
518 context->free_areas = mfree(context->free_areas);
519 context->n_free_areas = 0;
520 }
521
522 static Context *context_free(Context *context) {
523 if (!context)
524 return NULL;
525
526 while (context->partitions)
527 partition_unlink_and_free(context, context->partitions);
528 assert(context->n_partitions == 0);
529
530 context_free_free_areas(context);
531
532 if (context->fdisk_context)
533 fdisk_unref_context(context->fdisk_context);
534
535 safe_close(context->backing_fd);
536 if (context->node_is_our_file)
537 unlink_and_free(context->node);
538 else
539 free(context->node);
540
541 return mfree(context);
542 }
543
544 DEFINE_TRIVIAL_CLEANUP_FUNC(Context*, context_free);
545
546 static int context_add_free_area(
547 Context *context,
548 uint64_t size,
549 Partition *after) {
550
551 FreeArea *a;
552
553 assert(context);
554 assert(!after || !after->padding_area);
555
556 if (!GREEDY_REALLOC(context->free_areas, context->n_free_areas + 1))
557 return -ENOMEM;
558
559 a = new(FreeArea, 1);
560 if (!a)
561 return -ENOMEM;
562
563 *a = (FreeArea) {
564 .size = size,
565 .after = after,
566 };
567
568 context->free_areas[context->n_free_areas++] = a;
569
570 if (after)
571 after->padding_area = a;
572
573 return 0;
574 }
575
576 static void partition_drop_or_foreignize(Partition *p) {
577 if (!p || p->dropped || PARTITION_IS_FOREIGN(p))
578 return;
579
580 if (PARTITION_EXISTS(p)) {
581 log_info("Can't grow existing partition %s of priority %" PRIi32 ", ignoring.",
582 strna(p->current_label ?: p->new_label), p->priority);
583
584 /* Handle the partition as foreign. Do not set dropped flag. */
585 partition_foreignize(p);
586 } else {
587 log_info("Can't fit partition %s of priority %" PRIi32 ", dropping.",
588 p->definition_path, p->priority);
589
590 p->dropped = true;
591 p->allocated_to_area = NULL;
592 }
593 }
594
595 static bool context_drop_or_foreignize_one_priority(Context *context) {
596 int32_t priority = 0;
597
598 LIST_FOREACH(partitions, p, context->partitions) {
599 if (p->dropped)
600 continue;
601
602 priority = MAX(priority, p->priority);
603 }
604
605 /* Refuse to drop partitions with 0 or negative priorities or partitions of priorities that have at
606 * least one existing priority */
607 if (priority <= 0)
608 return false;
609
610 LIST_FOREACH(partitions, p, context->partitions) {
611 if (p->priority < priority)
612 continue;
613
614 partition_drop_or_foreignize(p);
615
616 /* We ensure that all verity sibling partitions have the same priority, so it's safe
617 * to drop all siblings here as well. */
618
619 for (VerityMode mode = VERITY_OFF + 1; mode < _VERITY_MODE_MAX; mode++)
620 partition_drop_or_foreignize(p->siblings[mode]);
621 }
622
623 return true;
624 }
625
626 static uint64_t partition_min_size(const Context *context, const Partition *p) {
627 uint64_t sz;
628
629 assert(context);
630 assert(p);
631
632 /* Calculate the disk space we really need at minimum for this partition. If the partition already
633 * exists the current size is what we really need. If it doesn't exist yet refuse to allocate less
634 * than 4K.
635 *
636 * DEFAULT_MIN_SIZE is the default SizeMin= we configure if nothing else is specified. */
637
638 if (PARTITION_IS_FOREIGN(p)) {
639 /* Don't allow changing size of partitions not managed by us */
640 assert(p->current_size != UINT64_MAX);
641 return p->current_size;
642 }
643
644 if (p->verity == VERITY_SIG)
645 return VERITY_SIG_SIZE;
646
647 sz = p->current_size != UINT64_MAX ? p->current_size : HARD_MIN_SIZE;
648
649 if (!PARTITION_EXISTS(p)) {
650 uint64_t d = 0;
651
652 if (p->encrypt != ENCRYPT_OFF)
653 d += round_up_size(LUKS2_METADATA_KEEP_FREE, context->grain_size);
654
655 if (p->copy_blocks_size != UINT64_MAX)
656 d += round_up_size(p->copy_blocks_size, context->grain_size);
657 else if (p->format || p->encrypt != ENCRYPT_OFF) {
658 uint64_t f;
659
660 /* If we shall synthesize a file system, take minimal fs size into account (assumed to be 4K if not known) */
661 f = p->format ? round_up_size(minimal_size_by_fs_name(p->format), context->grain_size) : UINT64_MAX;
662 d += f == UINT64_MAX ? context->grain_size : f;
663 }
664
665 if (d > sz)
666 sz = d;
667 }
668
669 return MAX(round_up_size(p->size_min != UINT64_MAX ? p->size_min : DEFAULT_MIN_SIZE, context->grain_size), sz);
670 }
671
672 static uint64_t partition_max_size(const Context *context, const Partition *p) {
673 uint64_t sm;
674
675 /* Calculate how large the partition may become at max. This is generally the configured maximum
676 * size, except when it already exists and is larger than that. In that case it's the existing size,
677 * since we never want to shrink partitions. */
678
679 assert(context);
680 assert(p);
681
682 if (PARTITION_IS_FOREIGN(p)) {
683 /* Don't allow changing size of partitions not managed by us */
684 assert(p->current_size != UINT64_MAX);
685 return p->current_size;
686 }
687
688 if (p->verity == VERITY_SIG)
689 return VERITY_SIG_SIZE;
690
691 if (p->size_max == UINT64_MAX)
692 return UINT64_MAX;
693
694 sm = round_down_size(p->size_max, context->grain_size);
695
696 if (p->current_size != UINT64_MAX)
697 sm = MAX(p->current_size, sm);
698
699 return MAX(partition_min_size(context, p), sm);
700 }
701
702 static uint64_t partition_min_padding(const Partition *p) {
703 assert(p);
704 return p->padding_min != UINT64_MAX ? p->padding_min : 0;
705 }
706
707 static uint64_t partition_max_padding(const Partition *p) {
708 assert(p);
709 return p->padding_max;
710 }
711
712 static uint64_t partition_min_size_with_padding(Context *context, const Partition *p) {
713 uint64_t sz;
714
715 /* Calculate the disk space we need for this partition plus any free space coming after it. This
716 * takes user configured padding into account as well as any additional whitespace needed to align
717 * the next partition to 4K again. */
718
719 assert(context);
720 assert(p);
721
722 sz = partition_min_size(context, p) + partition_min_padding(p);
723
724 if (PARTITION_EXISTS(p)) {
725 /* If the partition wasn't aligned, add extra space so that any we might add will be aligned */
726 assert(p->offset != UINT64_MAX);
727 return round_up_size(p->offset + sz, context->grain_size) - p->offset;
728 }
729
730 /* If this is a new partition we'll place it aligned, hence we just need to round up the required size here */
731 return round_up_size(sz, context->grain_size);
732 }
733
734 static uint64_t free_area_available(const FreeArea *a) {
735 assert(a);
736
737 /* Determines how much of this free area is not allocated yet */
738
739 assert(a->size >= a->allocated);
740 return a->size - a->allocated;
741 }
742
743 static uint64_t free_area_current_end(Context *context, const FreeArea *a) {
744 assert(context);
745 assert(a);
746
747 if (!a->after)
748 return free_area_available(a);
749
750 assert(a->after->offset != UINT64_MAX);
751 assert(a->after->current_size != UINT64_MAX);
752
753 /* Calculate where the free area ends, based on the offset of the partition preceding it. */
754 return round_up_size(a->after->offset + a->after->current_size, context->grain_size) + free_area_available(a);
755 }
756
757 static uint64_t free_area_min_end(Context *context, const FreeArea *a) {
758 assert(context);
759 assert(a);
760
761 if (!a->after)
762 return 0;
763
764 assert(a->after->offset != UINT64_MAX);
765 assert(a->after->current_size != UINT64_MAX);
766
767 /* Calculate where the partition would end when we give it as much as it needs. */
768 return round_up_size(a->after->offset + partition_min_size_with_padding(context, a->after), context->grain_size);
769 }
770
771 static uint64_t free_area_available_for_new_partitions(Context *context, const FreeArea *a) {
772 assert(context);
773 assert(a);
774
775 /* Similar to free_area_available(), but takes into account that the required size and padding of the
776 * preceding partition is honoured. */
777
778 return LESS_BY(free_area_current_end(context, a), free_area_min_end(context, a));
779 }
780
781 static int free_area_compare(FreeArea *const *a, FreeArea *const*b, Context *context) {
782 assert(context);
783
784 return CMP(free_area_available_for_new_partitions(context, *a),
785 free_area_available_for_new_partitions(context, *b));
786 }
787
788 static uint64_t charge_size(Context *context, uint64_t total, uint64_t amount) {
789 assert(context);
790 /* Subtract the specified amount from total, rounding up to multiple of 4K if there's room */
791 assert(amount <= total);
792 return LESS_BY(total, round_up_size(amount, context->grain_size));
793 }
794
795 static uint64_t charge_weight(uint64_t total, uint64_t amount) {
796 assert(amount <= total);
797 return total - amount;
798 }
799
800 static bool context_allocate_partitions(Context *context, uint64_t *ret_largest_free_area) {
801 assert(context);
802
803 /* This may be called multiple times. Reset previous assignments. */
804 for (size_t i = 0; i < context->n_free_areas; i++)
805 context->free_areas[i]->allocated = 0;
806
807 /* Sort free areas by size, putting smallest first */
808 typesafe_qsort_r(context->free_areas, context->n_free_areas, free_area_compare, context);
809
810 /* In any case return size of the largest free area (i.e. not the size of all free areas
811 * combined!) */
812 if (ret_largest_free_area)
813 *ret_largest_free_area =
814 context->n_free_areas == 0 ? 0 :
815 free_area_available_for_new_partitions(context, context->free_areas[context->n_free_areas-1]);
816
817 /* Check that each existing partition can fit its area. */
818 for (size_t i = 0; i < context->n_free_areas; i++)
819 if (free_area_current_end(context, context->free_areas[i]) <
820 free_area_min_end(context, context->free_areas[i]))
821 return false;
822
823 /* A simple first-fit algorithm. We return true if we can fit the partitions in, otherwise false. */
824 LIST_FOREACH(partitions, p, context->partitions) {
825 bool fits = false;
826 uint64_t required;
827 FreeArea *a = NULL;
828
829 /* Skip partitions we already dropped or that already exist */
830 if (p->dropped || PARTITION_EXISTS(p))
831 continue;
832
833 /* How much do we need to fit? */
834 required = partition_min_size_with_padding(context, p);
835 assert(required % context->grain_size == 0);
836
837 for (size_t i = 0; i < context->n_free_areas; i++) {
838 a = context->free_areas[i];
839
840 if (free_area_available_for_new_partitions(context, a) >= required) {
841 fits = true;
842 break;
843 }
844 }
845
846 if (!fits)
847 return false; /* 😢 Oh no! We can't fit this partition into any free area! */
848
849 /* Assign the partition to this free area */
850 p->allocated_to_area = a;
851
852 /* Budget the minimal partition size */
853 a->allocated += required;
854 }
855
856 return true;
857 }
858
859 static int context_sum_weights(Context *context, FreeArea *a, uint64_t *ret) {
860 uint64_t weight_sum = 0;
861
862 assert(context);
863 assert(a);
864 assert(ret);
865
866 /* Determine the sum of the weights of all partitions placed in or before the specified free area */
867
868 LIST_FOREACH(partitions, p, context->partitions) {
869 if (p->padding_area != a && p->allocated_to_area != a)
870 continue;
871
872 if (p->weight > UINT64_MAX - weight_sum)
873 goto overflow_sum;
874 weight_sum += p->weight;
875
876 if (p->padding_weight > UINT64_MAX - weight_sum)
877 goto overflow_sum;
878 weight_sum += p->padding_weight;
879 }
880
881 *ret = weight_sum;
882 return 0;
883
884 overflow_sum:
885 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Combined weight of partition exceeds unsigned 64-bit range, refusing.");
886 }
887
888 static uint64_t scale_by_weight(uint64_t value, uint64_t weight, uint64_t weight_sum) {
889 assert(weight_sum >= weight);
890
891 for (;;) {
892 if (weight == 0)
893 return 0;
894 if (weight == weight_sum)
895 return value;
896 if (value <= UINT64_MAX / weight)
897 return value * weight / weight_sum;
898
899 /* Rescale weight and weight_sum to make not the calculation overflow. To satisfy the
900 * following conditions, 'weight_sum' is rounded up but 'weight' is rounded down:
901 * - the sum of scale_by_weight() for all weights must not be larger than the input value,
902 * - scale_by_weight() must not be larger than the ideal value (i.e. calculated with uint128_t). */
903 weight_sum = DIV_ROUND_UP(weight_sum, 2);
904 weight /= 2;
905 }
906 }
907
908 typedef enum GrowPartitionPhase {
909 /* The zeroth phase: do not touch foreign partitions (i.e. those we don't manage). */
910 PHASE_FOREIGN,
911
912 /* The first phase: we charge partitions which need more (according to constraints) than their weight-based share. */
913 PHASE_OVERCHARGE,
914
915 /* The second phase: we charge partitions which need less (according to constraints) than their weight-based share. */
916 PHASE_UNDERCHARGE,
917
918 /* The third phase: we distribute what remains among the remaining partitions, according to the weights */
919 PHASE_DISTRIBUTE,
920
921 _GROW_PARTITION_PHASE_MAX,
922 } GrowPartitionPhase;
923
924 static bool context_grow_partitions_phase(
925 Context *context,
926 FreeArea *a,
927 GrowPartitionPhase phase,
928 uint64_t *span,
929 uint64_t *weight_sum) {
930
931 bool try_again = false;
932
933 assert(context);
934 assert(a);
935 assert(span);
936 assert(weight_sum);
937
938 /* Now let's look at the intended weights and adjust them taking the minimum space assignments into
939 * account. i.e. if a partition has a small weight but a high minimum space value set it should not
940 * get any additional room from the left-overs. Similar, if two partitions have the same weight they
941 * should get the same space if possible, even if one has a smaller minimum size than the other. */
942 LIST_FOREACH(partitions, p, context->partitions) {
943
944 /* Look only at partitions associated with this free area, i.e. immediately
945 * preceding it, or allocated into it */
946 if (p->allocated_to_area != a && p->padding_area != a)
947 continue;
948
949 if (p->new_size == UINT64_MAX) {
950 uint64_t share, rsz, xsz;
951 bool charge = false;
952
953 /* Calculate how much this space this partition needs if everyone would get
954 * the weight based share */
955 share = scale_by_weight(*span, p->weight, *weight_sum);
956
957 rsz = partition_min_size(context, p);
958 xsz = partition_max_size(context, p);
959
960 if (phase == PHASE_FOREIGN && PARTITION_IS_FOREIGN(p)) {
961 /* Never change of foreign partitions (i.e. those we don't manage) */
962
963 p->new_size = p->current_size;
964 charge = true;
965
966 } else if (phase == PHASE_OVERCHARGE && rsz > share) {
967 /* This partition needs more than its calculated share. Let's assign
968 * it that, and take this partition out of all calculations and start
969 * again. */
970
971 p->new_size = rsz;
972 charge = try_again = true;
973
974 } else if (phase == PHASE_UNDERCHARGE && xsz < share) {
975 /* This partition accepts less than its calculated
976 * share. Let's assign it that, and take this partition out
977 * of all calculations and start again. */
978
979 p->new_size = xsz;
980 charge = try_again = true;
981
982 } else if (phase == PHASE_DISTRIBUTE) {
983 /* This partition can accept its calculated share. Let's
984 * assign it. There's no need to restart things here since
985 * assigning this shouldn't impact the shares of the other
986 * partitions. */
987
988 assert(share >= rsz);
989 p->new_size = CLAMP(round_down_size(share, context->grain_size), rsz, xsz);
990 charge = true;
991 }
992
993 if (charge) {
994 *span = charge_size(context, *span, p->new_size);
995 *weight_sum = charge_weight(*weight_sum, p->weight);
996 }
997 }
998
999 if (p->new_padding == UINT64_MAX) {
1000 uint64_t share, rsz, xsz;
1001 bool charge = false;
1002
1003 share = scale_by_weight(*span, p->padding_weight, *weight_sum);
1004
1005 rsz = partition_min_padding(p);
1006 xsz = partition_max_padding(p);
1007
1008 if (phase == PHASE_OVERCHARGE && rsz > share) {
1009 p->new_padding = rsz;
1010 charge = try_again = true;
1011 } else if (phase == PHASE_UNDERCHARGE && xsz < share) {
1012 p->new_padding = xsz;
1013 charge = try_again = true;
1014 } else if (phase == PHASE_DISTRIBUTE) {
1015 assert(share >= rsz);
1016 p->new_padding = CLAMP(round_down_size(share, context->grain_size), rsz, xsz);
1017 charge = true;
1018 }
1019
1020 if (charge) {
1021 *span = charge_size(context, *span, p->new_padding);
1022 *weight_sum = charge_weight(*weight_sum, p->padding_weight);
1023 }
1024 }
1025 }
1026
1027 return !try_again;
1028 }
1029
1030 static void context_grow_partition_one(Context *context, FreeArea *a, Partition *p, uint64_t *span) {
1031 uint64_t m;
1032
1033 assert(context);
1034 assert(a);
1035 assert(p);
1036 assert(span);
1037
1038 if (*span == 0)
1039 return;
1040
1041 if (p->allocated_to_area != a)
1042 return;
1043
1044 if (PARTITION_IS_FOREIGN(p))
1045 return;
1046
1047 assert(p->new_size != UINT64_MAX);
1048
1049 /* Calculate new size and align. */
1050 m = round_down_size(p->new_size + *span, context->grain_size);
1051 /* But ensure this doesn't shrink the size. */
1052 m = MAX(m, p->new_size);
1053 /* And ensure this doesn't exceed the maximum size. */
1054 m = MIN(m, partition_max_size(context, p));
1055
1056 assert(m >= p->new_size);
1057
1058 *span = charge_size(context, *span, m - p->new_size);
1059 p->new_size = m;
1060 }
1061
1062 static int context_grow_partitions_on_free_area(Context *context, FreeArea *a) {
1063 uint64_t weight_sum = 0, span;
1064 int r;
1065
1066 assert(context);
1067 assert(a);
1068
1069 r = context_sum_weights(context, a, &weight_sum);
1070 if (r < 0)
1071 return r;
1072
1073 /* Let's calculate the total area covered by this free area and the partition before it */
1074 span = a->size;
1075 if (a->after) {
1076 assert(a->after->offset != UINT64_MAX);
1077 assert(a->after->current_size != UINT64_MAX);
1078
1079 span += round_up_size(a->after->offset + a->after->current_size, context->grain_size) - a->after->offset;
1080 }
1081
1082 for (GrowPartitionPhase phase = 0; phase < _GROW_PARTITION_PHASE_MAX;)
1083 if (context_grow_partitions_phase(context, a, phase, &span, &weight_sum))
1084 phase++; /* go to the next phase */
1085
1086 /* We still have space left over? Donate to preceding partition if we have one */
1087 if (span > 0 && a->after)
1088 context_grow_partition_one(context, a, a->after, &span);
1089
1090 /* What? Even still some space left (maybe because there was no preceding partition, or it had a
1091 * size limit), then let's donate it to whoever wants it. */
1092 if (span > 0)
1093 LIST_FOREACH(partitions, p, context->partitions) {
1094 context_grow_partition_one(context, a, p, &span);
1095 if (span == 0)
1096 break;
1097 }
1098
1099 /* Yuck, still no one? Then make it padding */
1100 if (span > 0 && a->after) {
1101 assert(a->after->new_padding != UINT64_MAX);
1102 a->after->new_padding += span;
1103 }
1104
1105 return 0;
1106 }
1107
1108 static int context_grow_partitions(Context *context) {
1109 int r;
1110
1111 assert(context);
1112
1113 for (size_t i = 0; i < context->n_free_areas; i++) {
1114 r = context_grow_partitions_on_free_area(context, context->free_areas[i]);
1115 if (r < 0)
1116 return r;
1117 }
1118
1119 /* All existing partitions that have no free space after them can't change size */
1120 LIST_FOREACH(partitions, p, context->partitions) {
1121 if (p->dropped)
1122 continue;
1123
1124 if (!PARTITION_EXISTS(p) || p->padding_area) {
1125 /* The algorithm above must have initialized this already */
1126 assert(p->new_size != UINT64_MAX);
1127 continue;
1128 }
1129
1130 assert(p->new_size == UINT64_MAX);
1131 p->new_size = p->current_size;
1132
1133 assert(p->new_padding == UINT64_MAX);
1134 p->new_padding = p->current_padding;
1135 }
1136
1137 return 0;
1138 }
1139
1140 static uint64_t find_first_unused_partno(Context *context) {
1141 uint64_t partno = 0;
1142
1143 assert(context);
1144
1145 for (partno = 0;; partno++) {
1146 bool found = false;
1147 LIST_FOREACH(partitions, p, context->partitions)
1148 if (p->partno != UINT64_MAX && p->partno == partno)
1149 found = true;
1150 if (!found)
1151 break;
1152 }
1153
1154 return partno;
1155 }
1156
1157 static void context_place_partitions(Context *context) {
1158
1159 assert(context);
1160
1161 for (size_t i = 0; i < context->n_free_areas; i++) {
1162 FreeArea *a = context->free_areas[i];
1163 _unused_ uint64_t left;
1164 uint64_t start;
1165
1166 if (a->after) {
1167 assert(a->after->offset != UINT64_MAX);
1168 assert(a->after->new_size != UINT64_MAX);
1169 assert(a->after->new_padding != UINT64_MAX);
1170
1171 start = a->after->offset + a->after->new_size + a->after->new_padding;
1172 } else
1173 start = context->start;
1174
1175 start = round_up_size(start, context->grain_size);
1176 left = a->size;
1177
1178 LIST_FOREACH(partitions, p, context->partitions) {
1179 if (p->allocated_to_area != a)
1180 continue;
1181
1182 p->offset = start;
1183 p->partno = find_first_unused_partno(context);
1184
1185 assert(left >= p->new_size);
1186 start += p->new_size;
1187 left -= p->new_size;
1188
1189 assert(left >= p->new_padding);
1190 start += p->new_padding;
1191 left -= p->new_padding;
1192 }
1193 }
1194 }
1195
1196 static int config_parse_type(
1197 const char *unit,
1198 const char *filename,
1199 unsigned line,
1200 const char *section,
1201 unsigned section_line,
1202 const char *lvalue,
1203 int ltype,
1204 const char *rvalue,
1205 void *data,
1206 void *userdata) {
1207
1208 GptPartitionType *type = ASSERT_PTR(data);
1209 int r;
1210
1211 assert(rvalue);
1212
1213 r = gpt_partition_type_from_string(rvalue, type);
1214 if (r < 0)
1215 return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse partition type: %s", rvalue);
1216
1217 if (arg_architecture >= 0)
1218 *type = gpt_partition_type_override_architecture(*type, arg_architecture);
1219
1220 return 0;
1221 }
1222
1223 static int config_parse_label(
1224 const char *unit,
1225 const char *filename,
1226 unsigned line,
1227 const char *section,
1228 unsigned section_line,
1229 const char *lvalue,
1230 int ltype,
1231 const char *rvalue,
1232 void *data,
1233 void *userdata) {
1234
1235 _cleanup_free_ char *resolved = NULL;
1236 char **label = ASSERT_PTR(data);
1237 int r;
1238
1239 assert(rvalue);
1240
1241 /* Nota bene: the empty label is a totally valid one. Let's hence not follow our usual rule of
1242 * assigning the empty string to reset to default here, but really accept it as label to set. */
1243
1244 r = specifier_printf(rvalue, GPT_LABEL_MAX, system_and_tmp_specifier_table, arg_root, NULL, &resolved);
1245 if (r < 0) {
1246 log_syntax(unit, LOG_WARNING, filename, line, r,
1247 "Failed to expand specifiers in Label=, ignoring: %s", rvalue);
1248 return 0;
1249 }
1250
1251 if (!utf8_is_valid(resolved)) {
1252 log_syntax(unit, LOG_WARNING, filename, line, 0,
1253 "Partition label not valid UTF-8, ignoring: %s", rvalue);
1254 return 0;
1255 }
1256
1257 r = gpt_partition_label_valid(resolved);
1258 if (r < 0) {
1259 log_syntax(unit, LOG_WARNING, filename, line, r,
1260 "Failed to check if string is valid as GPT partition label, ignoring: \"%s\" (from \"%s\")",
1261 resolved, rvalue);
1262 return 0;
1263 }
1264 if (!r) {
1265 log_syntax(unit, LOG_WARNING, filename, line, 0,
1266 "Partition label too long for GPT table, ignoring: \"%s\" (from \"%s\")",
1267 resolved, rvalue);
1268 return 0;
1269 }
1270
1271 free_and_replace(*label, resolved);
1272 return 0;
1273 }
1274
1275 static int config_parse_weight(
1276 const char *unit,
1277 const char *filename,
1278 unsigned line,
1279 const char *section,
1280 unsigned section_line,
1281 const char *lvalue,
1282 int ltype,
1283 const char *rvalue,
1284 void *data,
1285 void *userdata) {
1286
1287 uint32_t *w = ASSERT_PTR(data), v;
1288 int r;
1289
1290 assert(rvalue);
1291
1292 r = safe_atou32(rvalue, &v);
1293 if (r < 0) {
1294 log_syntax(unit, LOG_WARNING, filename, line, r,
1295 "Failed to parse weight value, ignoring: %s", rvalue);
1296 return 0;
1297 }
1298
1299 if (v > 1000U*1000U) {
1300 log_syntax(unit, LOG_WARNING, filename, line, 0,
1301 "Weight needs to be in range 0…10000000, ignoring: %" PRIu32, v);
1302 return 0;
1303 }
1304
1305 *w = v;
1306 return 0;
1307 }
1308
1309 static int config_parse_size4096(
1310 const char *unit,
1311 const char *filename,
1312 unsigned line,
1313 const char *section,
1314 unsigned section_line,
1315 const char *lvalue,
1316 int ltype,
1317 const char *rvalue,
1318 void *data,
1319 void *userdata) {
1320
1321 uint64_t *sz = data, parsed;
1322 int r;
1323
1324 assert(rvalue);
1325 assert(data);
1326
1327 r = parse_size(rvalue, 1024, &parsed);
1328 if (r < 0)
1329 return log_syntax(unit, LOG_ERR, filename, line, r,
1330 "Failed to parse size value: %s", rvalue);
1331
1332 if (ltype > 0)
1333 *sz = round_up_size(parsed, 4096);
1334 else if (ltype < 0)
1335 *sz = round_down_size(parsed, 4096);
1336 else
1337 *sz = parsed;
1338
1339 if (*sz != parsed)
1340 log_syntax(unit, LOG_NOTICE, filename, line, r, "Rounded %s= size %" PRIu64 " %s %" PRIu64 ", a multiple of 4096.",
1341 lvalue, parsed, special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), *sz);
1342
1343 return 0;
1344 }
1345
1346 static int config_parse_block_size(
1347 const char *unit,
1348 const char *filename,
1349 unsigned line,
1350 const char *section,
1351 unsigned section_line,
1352 const char *lvalue,
1353 int ltype,
1354 const char *rvalue,
1355 void *data,
1356 void *userdata) {
1357
1358 uint64_t *blksz = ASSERT_PTR(data), parsed;
1359 int r;
1360
1361 assert(rvalue);
1362
1363 r = parse_size(rvalue, 1024, &parsed);
1364 if (r < 0)
1365 return log_syntax(unit, LOG_ERR, filename, line, r,
1366 "Failed to parse size value: %s", rvalue);
1367
1368 if (parsed < 512 || parsed > 4096)
1369 return log_syntax(unit, LOG_ERR, filename, line, SYNTHETIC_ERRNO(EINVAL),
1370 "Value not between 512 and 4096: %s", rvalue);
1371
1372 if (!ISPOWEROF2(parsed))
1373 return log_syntax(unit, LOG_ERR, filename, line, SYNTHETIC_ERRNO(EINVAL),
1374 "Value not a power of 2: %s", rvalue);
1375
1376 *blksz = parsed;
1377 return 0;
1378 }
1379
1380 static int config_parse_fstype(
1381 const char *unit,
1382 const char *filename,
1383 unsigned line,
1384 const char *section,
1385 unsigned section_line,
1386 const char *lvalue,
1387 int ltype,
1388 const char *rvalue,
1389 void *data,
1390 void *userdata) {
1391
1392 char **fstype = ASSERT_PTR(data);
1393 const char *e;
1394
1395 assert(rvalue);
1396
1397 /* Let's provide an easy way to override the chosen fstype for file system partitions */
1398 e = secure_getenv("SYSTEMD_REPART_OVERRIDE_FSTYPE");
1399 if (e && !streq(rvalue, e)) {
1400 log_syntax(unit, LOG_NOTICE, filename, line, 0,
1401 "Overriding defined file system type '%s' with '%s'.", rvalue, e);
1402 rvalue = e;
1403 }
1404
1405 if (!filename_is_valid(rvalue))
1406 return log_syntax(unit, LOG_ERR, filename, line, 0,
1407 "File system type is not valid, refusing: %s", rvalue);
1408
1409 return free_and_strdup_warn(fstype, rvalue);
1410 }
1411
1412 static int config_parse_copy_files(
1413 const char *unit,
1414 const char *filename,
1415 unsigned line,
1416 const char *section,
1417 unsigned section_line,
1418 const char *lvalue,
1419 int ltype,
1420 const char *rvalue,
1421 void *data,
1422 void *userdata) {
1423
1424 _cleanup_free_ char *source = NULL, *buffer = NULL, *resolved_source = NULL, *resolved_target = NULL;
1425 const char *p = rvalue, *target;
1426 char ***copy_files = ASSERT_PTR(data);
1427 int r;
1428
1429 assert(rvalue);
1430
1431 r = extract_first_word(&p, &source, ":", EXTRACT_CUNESCAPE|EXTRACT_DONT_COALESCE_SEPARATORS);
1432 if (r < 0)
1433 return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract source path: %s", rvalue);
1434 if (r == 0) {
1435 log_syntax(unit, LOG_WARNING, filename, line, 0, "No argument specified: %s", rvalue);
1436 return 0;
1437 }
1438
1439 r = extract_first_word(&p, &buffer, ":", EXTRACT_CUNESCAPE|EXTRACT_DONT_COALESCE_SEPARATORS);
1440 if (r < 0)
1441 return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract target path: %s", rvalue);
1442 if (r == 0)
1443 target = source; /* No target, then it's the same as the source */
1444 else
1445 target = buffer;
1446
1447 if (!isempty(p))
1448 return log_syntax(unit, LOG_ERR, filename, line, SYNTHETIC_ERRNO(EINVAL), "Too many arguments: %s", rvalue);
1449
1450 r = specifier_printf(source, PATH_MAX-1, system_and_tmp_specifier_table, arg_root, NULL, &resolved_source);
1451 if (r < 0) {
1452 log_syntax(unit, LOG_WARNING, filename, line, r,
1453 "Failed to expand specifiers in CopyFiles= source, ignoring: %s", rvalue);
1454 return 0;
1455 }
1456
1457 r = path_simplify_and_warn(resolved_source, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
1458 if (r < 0)
1459 return 0;
1460
1461 r = specifier_printf(target, PATH_MAX-1, system_and_tmp_specifier_table, arg_root, NULL, &resolved_target);
1462 if (r < 0) {
1463 log_syntax(unit, LOG_WARNING, filename, line, r,
1464 "Failed to expand specifiers in CopyFiles= target, ignoring: %s", resolved_target);
1465 return 0;
1466 }
1467
1468 r = path_simplify_and_warn(resolved_target, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
1469 if (r < 0)
1470 return 0;
1471
1472 r = strv_consume_pair(copy_files, TAKE_PTR(resolved_source), TAKE_PTR(resolved_target));
1473 if (r < 0)
1474 return log_oom();
1475
1476 return 0;
1477 }
1478
1479 static int config_parse_exclude_files(
1480 const char *unit,
1481 const char *filename,
1482 unsigned line,
1483 const char *section,
1484 unsigned section_line,
1485 const char *lvalue,
1486 int ltype,
1487 const char *rvalue,
1488 void *data,
1489 void *userdata) {
1490 _cleanup_free_ char *resolved = NULL;
1491 char ***exclude_files = ASSERT_PTR(data);
1492 int r;
1493
1494 if (isempty(rvalue)) {
1495 *exclude_files = strv_free(*exclude_files);
1496 return 0;
1497 }
1498
1499 r = specifier_printf(rvalue, PATH_MAX-1, system_and_tmp_specifier_table, arg_root, NULL, &resolved);
1500 if (r < 0) {
1501 log_syntax(unit, LOG_WARNING, filename, line, r,
1502 "Failed to expand specifiers in ExcludeFiles= path, ignoring: %s", rvalue);
1503 return 0;
1504 }
1505
1506 r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE|PATH_KEEP_TRAILING_SLASH, unit, filename, line, lvalue);
1507 if (r < 0)
1508 return 0;
1509
1510 if (strv_consume(exclude_files, TAKE_PTR(resolved)) < 0)
1511 return log_oom();
1512
1513 return 0;
1514 }
1515
1516 static int config_parse_copy_blocks(
1517 const char *unit,
1518 const char *filename,
1519 unsigned line,
1520 const char *section,
1521 unsigned section_line,
1522 const char *lvalue,
1523 int ltype,
1524 const char *rvalue,
1525 void *data,
1526 void *userdata) {
1527
1528 _cleanup_free_ char *d = NULL;
1529 Partition *partition = ASSERT_PTR(data);
1530 int r;
1531
1532 assert(rvalue);
1533
1534 if (isempty(rvalue)) {
1535 partition->copy_blocks_path = mfree(partition->copy_blocks_path);
1536 partition->copy_blocks_auto = false;
1537 return 0;
1538 }
1539
1540 if (streq(rvalue, "auto")) {
1541 partition->copy_blocks_path = mfree(partition->copy_blocks_path);
1542 partition->copy_blocks_auto = true;
1543 partition->copy_blocks_root = arg_root;
1544 return 0;
1545 }
1546
1547 r = specifier_printf(rvalue, PATH_MAX-1, system_and_tmp_specifier_table, arg_root, NULL, &d);
1548 if (r < 0) {
1549 log_syntax(unit, LOG_WARNING, filename, line, r,
1550 "Failed to expand specifiers in CopyBlocks= source path, ignoring: %s", rvalue);
1551 return 0;
1552 }
1553
1554 r = path_simplify_and_warn(d, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
1555 if (r < 0)
1556 return 0;
1557
1558 free_and_replace(partition->copy_blocks_path, d);
1559 partition->copy_blocks_auto = false;
1560 partition->copy_blocks_root = arg_root;
1561 return 0;
1562 }
1563
1564 static int config_parse_make_dirs(
1565 const char *unit,
1566 const char *filename,
1567 unsigned line,
1568 const char *section,
1569 unsigned section_line,
1570 const char *lvalue,
1571 int ltype,
1572 const char *rvalue,
1573 void *data,
1574 void *userdata) {
1575
1576 char ***sv = ASSERT_PTR(data);
1577 const char *p = ASSERT_PTR(rvalue);
1578 int r;
1579
1580 for (;;) {
1581 _cleanup_free_ char *word = NULL, *d = NULL;
1582
1583 r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
1584 if (r == -ENOMEM)
1585 return log_oom();
1586 if (r < 0) {
1587 log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
1588 return 0;
1589 }
1590 if (r == 0)
1591 return 0;
1592
1593 r = specifier_printf(word, PATH_MAX-1, system_and_tmp_specifier_table, arg_root, NULL, &d);
1594 if (r < 0) {
1595 log_syntax(unit, LOG_WARNING, filename, line, r,
1596 "Failed to expand specifiers in MakeDirectories= parameter, ignoring: %s", word);
1597 continue;
1598 }
1599
1600 r = path_simplify_and_warn(d, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
1601 if (r < 0)
1602 continue;
1603
1604 r = strv_consume(sv, TAKE_PTR(d));
1605 if (r < 0)
1606 return log_oom();
1607 }
1608 }
1609
1610 static DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_encrypt, encrypt_mode, EncryptMode, ENCRYPT_OFF, "Invalid encryption mode");
1611
1612 static int config_parse_gpt_flags(
1613 const char *unit,
1614 const char *filename,
1615 unsigned line,
1616 const char *section,
1617 unsigned section_line,
1618 const char *lvalue,
1619 int ltype,
1620 const char *rvalue,
1621 void *data,
1622 void *userdata) {
1623
1624 uint64_t *gpt_flags = ASSERT_PTR(data);
1625 int r;
1626
1627 assert(rvalue);
1628
1629 r = safe_atou64(rvalue, gpt_flags);
1630 if (r < 0) {
1631 log_syntax(unit, LOG_WARNING, filename, line, r,
1632 "Failed to parse Flags= value, ignoring: %s", rvalue);
1633 return 0;
1634 }
1635
1636 return 0;
1637 }
1638
1639 static int config_parse_uuid(
1640 const char *unit,
1641 const char *filename,
1642 unsigned line,
1643 const char *section,
1644 unsigned section_line,
1645 const char *lvalue,
1646 int ltype,
1647 const char *rvalue,
1648 void *data,
1649 void *userdata) {
1650
1651 Partition *partition = ASSERT_PTR(data);
1652 int r;
1653
1654 if (isempty(rvalue)) {
1655 partition->new_uuid = SD_ID128_NULL;
1656 partition->new_uuid_is_set = false;
1657 return 0;
1658 }
1659
1660 if (streq(rvalue, "null")) {
1661 partition->new_uuid = SD_ID128_NULL;
1662 partition->new_uuid_is_set = true;
1663 return 0;
1664 }
1665
1666 r = sd_id128_from_string(rvalue, &partition->new_uuid);
1667 if (r < 0) {
1668 log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse 128-bit ID/UUID, ignoring: %s", rvalue);
1669 return 0;
1670 }
1671
1672 partition->new_uuid_is_set = true;
1673
1674 return 0;
1675 }
1676
1677 static DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_verity, verity_mode, VerityMode, VERITY_OFF, "Invalid verity mode");
1678 static DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_minimize, minimize_mode, MinimizeMode, MINIMIZE_OFF, "Invalid minimize mode");
1679
1680 static int partition_read_definition(Partition *p, const char *path, const char *const *conf_file_dirs) {
1681
1682 ConfigTableItem table[] = {
1683 { "Partition", "Type", config_parse_type, 0, &p->type },
1684 { "Partition", "Label", config_parse_label, 0, &p->new_label },
1685 { "Partition", "UUID", config_parse_uuid, 0, p },
1686 { "Partition", "Priority", config_parse_int32, 0, &p->priority },
1687 { "Partition", "Weight", config_parse_weight, 0, &p->weight },
1688 { "Partition", "PaddingWeight", config_parse_weight, 0, &p->padding_weight },
1689 { "Partition", "SizeMinBytes", config_parse_size4096, 1, &p->size_min },
1690 { "Partition", "SizeMaxBytes", config_parse_size4096, -1, &p->size_max },
1691 { "Partition", "PaddingMinBytes", config_parse_size4096, 1, &p->padding_min },
1692 { "Partition", "PaddingMaxBytes", config_parse_size4096, -1, &p->padding_max },
1693 { "Partition", "FactoryReset", config_parse_bool, 0, &p->factory_reset },
1694 { "Partition", "CopyBlocks", config_parse_copy_blocks, 0, p },
1695 { "Partition", "Format", config_parse_fstype, 0, &p->format },
1696 { "Partition", "CopyFiles", config_parse_copy_files, 0, &p->copy_files },
1697 { "Partition", "ExcludeFiles", config_parse_exclude_files, 0, &p->exclude_files_source },
1698 { "Partition", "ExcludeFilesTarget", config_parse_exclude_files, 0, &p->exclude_files_target },
1699 { "Partition", "MakeDirectories", config_parse_make_dirs, 0, &p->make_directories },
1700 { "Partition", "Encrypt", config_parse_encrypt, 0, &p->encrypt },
1701 { "Partition", "Verity", config_parse_verity, 0, &p->verity },
1702 { "Partition", "VerityMatchKey", config_parse_string, 0, &p->verity_match_key },
1703 { "Partition", "Flags", config_parse_gpt_flags, 0, &p->gpt_flags },
1704 { "Partition", "ReadOnly", config_parse_tristate, 0, &p->read_only },
1705 { "Partition", "NoAuto", config_parse_tristate, 0, &p->no_auto },
1706 { "Partition", "GrowFileSystem", config_parse_tristate, 0, &p->growfs },
1707 { "Partition", "SplitName", config_parse_string, 0, &p->split_name_format },
1708 { "Partition", "Minimize", config_parse_minimize, 0, &p->minimize },
1709 { "Partition", "Subvolumes", config_parse_make_dirs, 0, &p->subvolumes },
1710 { "Partition", "VerityDataBlockSizeBytes", config_parse_block_size, 0, &p->verity_data_block_size },
1711 { "Partition", "VerityHashBlockSizeBytes", config_parse_block_size, 0, &p->verity_hash_block_size },
1712 {}
1713 };
1714 int r;
1715 _cleanup_free_ char *filename = NULL;
1716 const char* dropin_dirname;
1717
1718 r = path_extract_filename(path, &filename);
1719 if (r < 0)
1720 return log_error_errno(r, "Failed to extract filename from path '%s': %m", path);
1721
1722 dropin_dirname = strjoina(filename, ".d");
1723
1724 r = config_parse_many(
1725 STRV_MAKE_CONST(path),
1726 conf_file_dirs,
1727 dropin_dirname,
1728 arg_definitions ? NULL : arg_root,
1729 "Partition\0",
1730 config_item_table_lookup, table,
1731 CONFIG_PARSE_WARN,
1732 p,
1733 NULL,
1734 &p->drop_in_files);
1735 if (r < 0)
1736 return r;
1737
1738 if (partition_type_exclude(&p->type))
1739 return 0;
1740
1741 if (p->size_min != UINT64_MAX && p->size_max != UINT64_MAX && p->size_min > p->size_max)
1742 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1743 "SizeMinBytes= larger than SizeMaxBytes=, refusing.");
1744
1745 if (p->padding_min != UINT64_MAX && p->padding_max != UINT64_MAX && p->padding_min > p->padding_max)
1746 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1747 "PaddingMinBytes= larger than PaddingMaxBytes=, refusing.");
1748
1749 if (sd_id128_is_null(p->type.uuid))
1750 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1751 "Type= not defined, refusing.");
1752
1753 if ((p->copy_blocks_path || p->copy_blocks_auto) &&
1754 (p->format || !strv_isempty(p->copy_files) || !strv_isempty(p->make_directories)))
1755 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1756 "Format=/CopyFiles=/MakeDirectories= and CopyBlocks= cannot be combined, refusing.");
1757
1758 if ((!strv_isempty(p->copy_files) || !strv_isempty(p->make_directories)) && streq_ptr(p->format, "swap"))
1759 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1760 "Format=swap and CopyFiles= cannot be combined, refusing.");
1761
1762 if (!p->format) {
1763 const char *format = NULL;
1764
1765 if (!strv_isempty(p->copy_files) || !strv_isempty(p->make_directories) || (p->encrypt != ENCRYPT_OFF && !(p->copy_blocks_path || p->copy_blocks_auto)))
1766 /* Pick "vfat" as file system for esp and xbootldr partitions, otherwise default to "ext4". */
1767 format = IN_SET(p->type.designator, PARTITION_ESP, PARTITION_XBOOTLDR) ? "vfat" : "ext4";
1768 else if (p->type.designator == PARTITION_SWAP)
1769 format = "swap";
1770
1771 if (format) {
1772 p->format = strdup(format);
1773 if (!p->format)
1774 return log_oom();
1775 }
1776 }
1777
1778 if (p->minimize != MINIMIZE_OFF && !p->format && p->verity != VERITY_HASH)
1779 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1780 "Minimize= can only be enabled if Format= or Verity=hash are set");
1781
1782 if (p->minimize == MINIMIZE_BEST && (p->format && !fstype_is_ro(p->format)) && p->verity != VERITY_HASH)
1783 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1784 "Minimize=best can only be used with read-only filesystems or Verity=hash");
1785
1786 if ((!strv_isempty(p->copy_files) || !strv_isempty(p->make_directories)) && !mkfs_supports_root_option(p->format) && geteuid() != 0)
1787 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EPERM),
1788 "Need to be root to populate %s filesystems with CopyFiles=/MakeDirectories=",
1789 p->format);
1790
1791 if (p->format && fstype_is_ro(p->format) && strv_isempty(p->copy_files) && strv_isempty(p->make_directories))
1792 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1793 "Cannot format %s filesystem without source files, refusing", p->format);
1794
1795 if (p->verity != VERITY_OFF || p->encrypt != ENCRYPT_OFF) {
1796 r = dlopen_cryptsetup();
1797 if (r < 0)
1798 return log_syntax(NULL, LOG_ERR, path, 1, r,
1799 "libcryptsetup not found, Verity=/Encrypt= are not supported: %m");
1800 }
1801
1802 if (p->verity != VERITY_OFF && !p->verity_match_key)
1803 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1804 "VerityMatchKey= must be set if Verity=%s", verity_mode_to_string(p->verity));
1805
1806 if (p->verity == VERITY_OFF && p->verity_match_key)
1807 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1808 "VerityMatchKey= can only be set if Verity= is not \"%s\"",
1809 verity_mode_to_string(p->verity));
1810
1811 if (IN_SET(p->verity, VERITY_HASH, VERITY_SIG) &&
1812 (p->copy_files || p->copy_blocks_path || p->copy_blocks_auto || p->format || p->make_directories))
1813 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1814 "CopyBlocks=/CopyFiles=/Format=/MakeDirectories= cannot be used with Verity=%s",
1815 verity_mode_to_string(p->verity));
1816
1817 if (p->verity != VERITY_OFF && p->encrypt != ENCRYPT_OFF)
1818 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1819 "Encrypting verity hash/data partitions is not supported");
1820
1821 if (p->verity == VERITY_SIG && !arg_private_key)
1822 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1823 "Verity signature partition requested but no private key provided (--private-key=)");
1824
1825 if (p->verity == VERITY_SIG && !arg_certificate)
1826 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1827 "Verity signature partition requested but no PEM certificate provided (--certificate=)");
1828
1829 if (p->verity == VERITY_SIG && (p->size_min != UINT64_MAX || p->size_max != UINT64_MAX))
1830 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
1831 "SizeMinBytes=/SizeMaxBytes= cannot be used with Verity=%s",
1832 verity_mode_to_string(p->verity));
1833
1834 if (!strv_isempty(p->subvolumes) && arg_offline > 0)
1835 return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EOPNOTSUPP),
1836 "Subvolumes= cannot be used with --offline=yes");
1837
1838 /* Verity partitions are read only, let's imply the RO flag hence, unless explicitly configured otherwise. */
1839 if ((IN_SET(p->type.designator,
1840 PARTITION_ROOT_VERITY,
1841 PARTITION_USR_VERITY) || p->verity == VERITY_DATA) && p->read_only < 0)
1842 p->read_only = true;
1843
1844 /* Default to "growfs" on, unless read-only */
1845 if (gpt_partition_type_knows_growfs(p->type) &&
1846 p->read_only <= 0)
1847 p->growfs = true;
1848
1849 if (!p->split_name_format) {
1850 char *s = strdup("%t");
1851 if (!s)
1852 return log_oom();
1853
1854 p->split_name_format = s;
1855 } else if (streq(p->split_name_format, "-"))
1856 p->split_name_format = mfree(p->split_name_format);
1857
1858 return 1;
1859 }
1860
1861 static int find_verity_sibling(Context *context, Partition *p, VerityMode mode, Partition **ret) {
1862 Partition *s = NULL;
1863
1864 assert(p);
1865 assert(p->verity != VERITY_OFF);
1866 assert(p->verity_match_key);
1867 assert(mode != VERITY_OFF);
1868 assert(p->verity != mode);
1869 assert(ret);
1870
1871 /* Try to find the matching sibling partition of the given type for a verity partition. For a data
1872 * partition, this is the corresponding hash partition with the same verity name (and vice versa for
1873 * the hash partition). */
1874
1875 LIST_FOREACH(partitions, q, context->partitions) {
1876 if (p == q)
1877 continue;
1878
1879 if (q->verity != mode)
1880 continue;
1881
1882 assert(q->verity_match_key);
1883
1884 if (!streq(p->verity_match_key, q->verity_match_key))
1885 continue;
1886
1887 if (s)
1888 return -ENOTUNIQ;
1889
1890 s = q;
1891 }
1892
1893 if (!s)
1894 return -ENXIO;
1895
1896 *ret = s;
1897
1898 return 0;
1899 }
1900
1901 static int context_open_and_lock_backing_fd(const char *node, int operation, int *backing_fd) {
1902 _cleanup_close_ int fd = -EBADF;
1903
1904 assert(node);
1905 assert(backing_fd);
1906
1907 if (*backing_fd >= 0)
1908 return 0;
1909
1910 fd = open(node, O_RDONLY|O_CLOEXEC);
1911 if (fd < 0)
1912 return log_error_errno(errno, "Failed to open device '%s': %m", node);
1913
1914 /* Tell udev not to interfere while we are processing the device */
1915 if (flock(fd, operation) < 0)
1916 return log_error_errno(errno, "Failed to lock device '%s': %m", node);
1917
1918 log_debug("Device %s opened and locked.", node);
1919 *backing_fd = TAKE_FD(fd);
1920 return 1;
1921 }
1922
1923 static int determine_current_padding(
1924 struct fdisk_context *c,
1925 struct fdisk_table *t,
1926 struct fdisk_partition *p,
1927 uint64_t secsz,
1928 uint64_t grainsz,
1929 uint64_t *ret) {
1930
1931 size_t n_partitions;
1932 uint64_t offset, next = UINT64_MAX;
1933
1934 assert(c);
1935 assert(t);
1936 assert(p);
1937 assert(ret);
1938
1939 if (!fdisk_partition_has_end(p))
1940 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Partition has no end!");
1941
1942 offset = fdisk_partition_get_end(p);
1943 assert(offset < UINT64_MAX);
1944 offset++; /* The end is one sector before the next partition or padding. */
1945 assert(offset < UINT64_MAX / secsz);
1946 offset *= secsz;
1947
1948 n_partitions = fdisk_table_get_nents(t);
1949 for (size_t i = 0; i < n_partitions; i++) {
1950 struct fdisk_partition *q;
1951 uint64_t start;
1952
1953 q = fdisk_table_get_partition(t, i);
1954 if (!q)
1955 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read partition metadata: %m");
1956
1957 if (fdisk_partition_is_used(q) <= 0)
1958 continue;
1959
1960 if (!fdisk_partition_has_start(q))
1961 continue;
1962
1963 start = fdisk_partition_get_start(q);
1964 assert(start < UINT64_MAX / secsz);
1965 start *= secsz;
1966
1967 if (start >= offset && (next == UINT64_MAX || next > start))
1968 next = start;
1969 }
1970
1971 if (next == UINT64_MAX) {
1972 /* No later partition? In that case check the end of the usable area */
1973 next = fdisk_get_last_lba(c);
1974 assert(next < UINT64_MAX);
1975 next++; /* The last LBA is one sector before the end */
1976
1977 assert(next < UINT64_MAX / secsz);
1978 next *= secsz;
1979
1980 if (offset > next)
1981 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Partition end beyond disk end.");
1982 }
1983
1984 assert(next >= offset);
1985 offset = round_up_size(offset, grainsz);
1986 next = round_down_size(next, grainsz);
1987
1988 *ret = LESS_BY(next, offset); /* Saturated subtraction, rounding might have fucked things up */
1989 return 0;
1990 }
1991
1992 static int context_copy_from_one(Context *context, const char *src) {
1993 _cleanup_close_ int fd = -EBADF;
1994 _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
1995 _cleanup_(fdisk_unref_tablep) struct fdisk_table *t = NULL;
1996 Partition *last = NULL;
1997 unsigned long secsz, grainsz;
1998 size_t n_partitions;
1999 int r;
2000
2001 assert(src);
2002
2003 r = context_open_and_lock_backing_fd(src, LOCK_SH, &fd);
2004 if (r < 0)
2005 return r;
2006
2007 r = fd_verify_regular(fd);
2008 if (r < 0)
2009 return log_error_errno(r, "%s is not a file: %m", src);
2010
2011 r = fdisk_new_context_at(fd, /* path = */ NULL, /* read_only = */ true, /* sector_size = */ UINT32_MAX, &c);
2012 if (r < 0)
2013 return log_error_errno(r, "Failed to create fdisk context: %m");
2014
2015 secsz = fdisk_get_sector_size(c);
2016 grainsz = fdisk_get_grain_size(c);
2017
2018 /* Insist on a power of two, and that it's a multiple of 512, i.e. the traditional sector size. */
2019 if (secsz < 512 || !ISPOWEROF2(secsz))
2020 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Sector size %lu is not a power of two larger than 512? Refusing.", secsz);
2021
2022 if (!fdisk_is_labeltype(c, FDISK_DISKLABEL_GPT))
2023 return log_error_errno(SYNTHETIC_ERRNO(EHWPOISON), "Cannot copy from disk %s with no GPT disk label.", src);
2024
2025 r = fdisk_get_partitions(c, &t);
2026 if (r < 0)
2027 return log_error_errno(r, "Failed to acquire partition table: %m");
2028
2029 n_partitions = fdisk_table_get_nents(t);
2030 for (size_t i = 0; i < n_partitions; i++) {
2031 _cleanup_(partition_freep) Partition *np = NULL;
2032 _cleanup_free_ char *label_copy = NULL;
2033 struct fdisk_partition *p;
2034 const char *label;
2035 uint64_t sz, start, padding;
2036 sd_id128_t ptid, id;
2037 GptPartitionType type;
2038
2039 p = fdisk_table_get_partition(t, i);
2040 if (!p)
2041 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read partition metadata: %m");
2042
2043 if (fdisk_partition_is_used(p) <= 0)
2044 continue;
2045
2046 if (fdisk_partition_has_start(p) <= 0 ||
2047 fdisk_partition_has_size(p) <= 0 ||
2048 fdisk_partition_has_partno(p) <= 0)
2049 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Found a partition without a position, size or number.");
2050
2051 r = fdisk_partition_get_type_as_id128(p, &ptid);
2052 if (r < 0)
2053 return log_error_errno(r, "Failed to query partition type UUID: %m");
2054
2055 type = gpt_partition_type_from_uuid(ptid);
2056
2057 r = fdisk_partition_get_uuid_as_id128(p, &id);
2058 if (r < 0)
2059 return log_error_errno(r, "Failed to query partition UUID: %m");
2060
2061 label = fdisk_partition_get_name(p);
2062 if (!isempty(label)) {
2063 label_copy = strdup(label);
2064 if (!label_copy)
2065 return log_oom();
2066 }
2067
2068 sz = fdisk_partition_get_size(p);
2069 assert(sz <= UINT64_MAX/secsz);
2070 sz *= secsz;
2071
2072 start = fdisk_partition_get_start(p);
2073 assert(start <= UINT64_MAX/secsz);
2074 start *= secsz;
2075
2076 if (partition_type_exclude(&type))
2077 continue;
2078
2079 np = partition_new();
2080 if (!np)
2081 return log_oom();
2082
2083 np->type = type;
2084 np->new_uuid = id;
2085 np->new_uuid_is_set = true;
2086 np->size_min = np->size_max = sz;
2087 np->new_label = TAKE_PTR(label_copy);
2088
2089 np->definition_path = strdup(src);
2090 if (!np->definition_path)
2091 return log_oom();
2092
2093 r = determine_current_padding(c, t, p, secsz, grainsz, &padding);
2094 if (r < 0)
2095 return r;
2096
2097 np->padding_min = np->padding_max = padding;
2098
2099 np->copy_blocks_path = strdup(src);
2100 if (!np->copy_blocks_path)
2101 return log_oom();
2102
2103 np->copy_blocks_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
2104 if (np->copy_blocks_fd < 0)
2105 return log_error_errno(r, "Failed to duplicate file descriptor of %s: %m", src);
2106
2107 np->copy_blocks_offset = start;
2108 np->copy_blocks_size = sz;
2109
2110 r = fdisk_partition_get_attrs_as_uint64(p, &np->gpt_flags);
2111 if (r < 0)
2112 return log_error_errno(r, "Failed to get partition flags: %m");
2113
2114 LIST_INSERT_AFTER(partitions, context->partitions, last, np);
2115 last = TAKE_PTR(np);
2116 context->n_partitions++;
2117 }
2118
2119 return 0;
2120 }
2121
2122 static int context_copy_from(Context *context) {
2123 int r;
2124
2125 assert(context);
2126
2127 STRV_FOREACH(src, arg_copy_from) {
2128 r = context_copy_from_one(context, *src);
2129 if (r < 0)
2130 return r;
2131 }
2132
2133 return 0;
2134 }
2135
2136 static int context_read_definitions(Context *context) {
2137 _cleanup_strv_free_ char **files = NULL;
2138 Partition *last = LIST_FIND_TAIL(partitions, context->partitions);
2139 const char *const *dirs;
2140 int r;
2141
2142 assert(context);
2143
2144 dirs = (const char* const*) (arg_definitions ?: CONF_PATHS_STRV("repart.d"));
2145
2146 r = conf_files_list_strv(&files, ".conf", arg_definitions ? NULL : arg_root, CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED, dirs);
2147 if (r < 0)
2148 return log_error_errno(r, "Failed to enumerate *.conf files: %m");
2149
2150 STRV_FOREACH(f, files) {
2151 _cleanup_(partition_freep) Partition *p = NULL;
2152
2153 p = partition_new();
2154 if (!p)
2155 return log_oom();
2156
2157 p->definition_path = strdup(*f);
2158 if (!p->definition_path)
2159 return log_oom();
2160
2161 r = partition_read_definition(p, *f, dirs);
2162 if (r < 0)
2163 return r;
2164 if (r == 0)
2165 continue;
2166
2167 LIST_INSERT_AFTER(partitions, context->partitions, last, p);
2168 last = TAKE_PTR(p);
2169 context->n_partitions++;
2170 }
2171
2172 /* Check that each configured verity hash/data partition has a matching verity data/hash partition. */
2173
2174 LIST_FOREACH(partitions, p, context->partitions) {
2175 if (p->verity == VERITY_OFF)
2176 continue;
2177
2178 for (VerityMode mode = VERITY_OFF + 1; mode < _VERITY_MODE_MAX; mode++) {
2179 Partition *q = NULL;
2180
2181 if (p->verity == mode)
2182 continue;
2183
2184 if (p->siblings[mode])
2185 continue;
2186
2187 r = find_verity_sibling(context, p, mode, &q);
2188 if (r == -ENXIO) {
2189 if (mode != VERITY_SIG)
2190 return log_syntax(NULL, LOG_ERR, p->definition_path, 1, SYNTHETIC_ERRNO(EINVAL),
2191 "Missing verity %s partition for verity %s partition with VerityMatchKey=%s",
2192 verity_mode_to_string(mode), verity_mode_to_string(p->verity), p->verity_match_key);
2193 } else if (r == -ENOTUNIQ)
2194 return log_syntax(NULL, LOG_ERR, p->definition_path, 1, SYNTHETIC_ERRNO(EINVAL),
2195 "Multiple verity %s partitions found for verity %s partition with VerityMatchKey=%s",
2196 verity_mode_to_string(mode), verity_mode_to_string(p->verity), p->verity_match_key);
2197 else if (r < 0)
2198 return log_syntax(NULL, LOG_ERR, p->definition_path, 1, r,
2199 "Failed to find verity %s partition for verity %s partition with VerityMatchKey=%s",
2200 verity_mode_to_string(mode), verity_mode_to_string(p->verity), p->verity_match_key);
2201
2202 if (q) {
2203 if (q->priority != p->priority)
2204 return log_syntax(NULL, LOG_ERR, p->definition_path, 1, SYNTHETIC_ERRNO(EINVAL),
2205 "Priority mismatch (%i != %i) for verity sibling partitions with VerityMatchKey=%s",
2206 p->priority, q->priority, p->verity_match_key);
2207
2208 p->siblings[mode] = q;
2209 }
2210 }
2211 }
2212
2213 LIST_FOREACH(partitions, p, context->partitions) {
2214 Partition *dp;
2215
2216 if (p->verity != VERITY_HASH)
2217 continue;
2218
2219 if (p->minimize == MINIMIZE_OFF)
2220 continue;
2221
2222 assert_se(dp = p->siblings[VERITY_DATA]);
2223
2224 if (dp->minimize == MINIMIZE_OFF && !(dp->copy_blocks_path || dp->copy_blocks_auto))
2225 return log_syntax(NULL, LOG_ERR, p->definition_path, 1, SYNTHETIC_ERRNO(EINVAL),
2226 "Minimize= set for verity hash partition but data partition does "
2227 "not set CopyBlocks= or Minimize=");
2228
2229 }
2230
2231 return 0;
2232 }
2233
2234 static int fdisk_ask_cb(struct fdisk_context *c, struct fdisk_ask *ask, void *data) {
2235 _cleanup_free_ char *ids = NULL;
2236 int r;
2237
2238 if (fdisk_ask_get_type(ask) != FDISK_ASKTYPE_STRING)
2239 return -EINVAL;
2240
2241 ids = new(char, SD_ID128_UUID_STRING_MAX);
2242 if (!ids)
2243 return -ENOMEM;
2244
2245 r = fdisk_ask_string_set_result(ask, sd_id128_to_uuid_string(*(sd_id128_t*) data, ids));
2246 if (r < 0)
2247 return r;
2248
2249 TAKE_PTR(ids);
2250 return 0;
2251 }
2252
2253 static int fdisk_set_disklabel_id_by_uuid(struct fdisk_context *c, sd_id128_t id) {
2254 int r;
2255
2256 r = fdisk_set_ask(c, fdisk_ask_cb, &id);
2257 if (r < 0)
2258 return r;
2259
2260 r = fdisk_set_disklabel_id(c);
2261 if (r < 0)
2262 return r;
2263
2264 return fdisk_set_ask(c, NULL, NULL);
2265 }
2266
2267 static int derive_uuid(sd_id128_t base, const char *token, sd_id128_t *ret) {
2268 union {
2269 uint8_t md[SHA256_DIGEST_SIZE];
2270 sd_id128_t id;
2271 } result;
2272
2273 assert(token);
2274 assert(ret);
2275
2276 /* Derive a new UUID from the specified UUID in a stable and reasonably safe way. Specifically, we
2277 * calculate the HMAC-SHA256 of the specified token string, keyed by the supplied base (typically the
2278 * machine ID). We use the machine ID as key (and not as cleartext!) of the HMAC operation since it's
2279 * the machine ID we don't want to leak. */
2280
2281 hmac_sha256(base.bytes, sizeof(base.bytes), token, strlen(token), result.md);
2282
2283 /* Take the first half, mark it as v4 UUID */
2284 assert_cc(sizeof(result.md) == sizeof(result.id) * 2);
2285 *ret = id128_make_v4_uuid(result.id);
2286 return 0;
2287 }
2288
2289 static void derive_salt(sd_id128_t base, const char *token, uint8_t ret[static SHA256_DIGEST_SIZE]) {
2290 assert(token);
2291
2292 hmac_sha256(base.bytes, sizeof(base.bytes), token, strlen(token), ret);
2293 }
2294
2295 static int context_load_partition_table(Context *context) {
2296 _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
2297 _cleanup_(fdisk_unref_tablep) struct fdisk_table *t = NULL;
2298 uint64_t left_boundary = UINT64_MAX, first_lba, last_lba, nsectors;
2299 _cleanup_free_ char *disk_uuid_string = NULL;
2300 bool from_scratch = false;
2301 sd_id128_t disk_uuid;
2302 size_t n_partitions;
2303 unsigned long secsz;
2304 uint64_t grainsz, fs_secsz = DEFAULT_FILESYSTEM_SECTOR_SIZE;
2305 int r;
2306
2307 assert(context);
2308 assert(!context->fdisk_context);
2309 assert(!context->free_areas);
2310 assert(context->start == UINT64_MAX);
2311 assert(context->end == UINT64_MAX);
2312 assert(context->total == UINT64_MAX);
2313
2314 c = fdisk_new_context();
2315 if (!c)
2316 return log_oom();
2317
2318 if (arg_sector_size > 0) {
2319 fs_secsz = arg_sector_size;
2320 r = fdisk_save_user_sector_size(c, /* phy= */ 0, arg_sector_size);
2321 } else {
2322 uint32_t ssz;
2323 struct stat st;
2324
2325 r = context_open_and_lock_backing_fd(context->node, arg_dry_run ? LOCK_SH : LOCK_EX,
2326 &context->backing_fd);
2327 if (r < 0)
2328 return r;
2329
2330 if (fstat(context->backing_fd, &st) < 0)
2331 return log_error_errno(r, "Failed to stat %s: %m", context->node);
2332
2333 /* Auto-detect sector size if not specified. */
2334 r = probe_sector_size_prefer_ioctl(context->backing_fd, &ssz);
2335 if (r < 0)
2336 return log_error_errno(r, "Failed to probe sector size of '%s': %m", context->node);
2337
2338 /* If we found the sector size and we're operating on a block device, use it as the file
2339 * system sector size as well, as we know its the sector size of the actual block device and
2340 * not just the offset at which we found the GPT header. */
2341 if (r > 0 && S_ISBLK(st.st_mode))
2342 fs_secsz = ssz;
2343
2344 r = fdisk_save_user_sector_size(c, /* phy= */ 0, ssz);
2345 }
2346 if (r < 0)
2347 return log_error_errno(r, "Failed to set sector size: %m");
2348
2349 /* libfdisk doesn't have an API to operate on arbitrary fds, hence reopen the fd going via the
2350 * /proc/self/fd/ magic path if we have an existing fd. Open the original file otherwise. */
2351 r = fdisk_assign_device(
2352 c,
2353 context->backing_fd >= 0 ? FORMAT_PROC_FD_PATH(context->backing_fd) : context->node,
2354 arg_dry_run);
2355 if (r == -EINVAL && arg_size_auto) {
2356 struct stat st;
2357
2358 /* libfdisk returns EINVAL if opening a file of size zero. Let's check for that, and accept
2359 * it if automatic sizing is requested. */
2360
2361 if (context->backing_fd < 0)
2362 r = stat(context->node, &st);
2363 else
2364 r = fstat(context->backing_fd, &st);
2365 if (r < 0)
2366 return log_error_errno(errno, "Failed to stat block device '%s': %m", context->node);
2367
2368 if (S_ISREG(st.st_mode) && st.st_size == 0) {
2369 /* Use the fallback values if we have no better idea */
2370 context->sector_size = fdisk_get_sector_size(c);
2371 context->fs_sector_size = fs_secsz;
2372 context->grain_size = 4096;
2373 return /* from_scratch = */ true;
2374 }
2375
2376 r = -EINVAL;
2377 }
2378 if (r < 0)
2379 return log_error_errno(r, "Failed to open device '%s': %m", context->node);
2380
2381 if (context->backing_fd < 0) {
2382 /* If we have no fd referencing the device yet, make a copy of the fd now, so that we have one */
2383 r = context_open_and_lock_backing_fd(FORMAT_PROC_FD_PATH(fdisk_get_devfd(c)),
2384 arg_dry_run ? LOCK_SH : LOCK_EX,
2385 &context->backing_fd);
2386 if (r < 0)
2387 return r;
2388 }
2389
2390 /* The offsets/sizes libfdisk returns to us will be in multiple of the sector size of the
2391 * device. This is typically 512, and sometimes 4096. Let's query libfdisk once for it, and then use
2392 * it for all our needs. Note that the values we use ourselves always are in bytes though, thus mean
2393 * the same thing universally. Also note that regardless what kind of sector size is in use we'll
2394 * place partitions at multiples of 4K. */
2395 secsz = fdisk_get_sector_size(c);
2396
2397 /* Insist on a power of two, and that it's a multiple of 512, i.e. the traditional sector size. */
2398 if (secsz < 512 || !ISPOWEROF2(secsz))
2399 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Sector size %lu is not a power of two larger than 512? Refusing.", secsz);
2400
2401 /* Use at least 4K, and ensure it's a multiple of the sector size, regardless if that is smaller or
2402 * larger */
2403 grainsz = secsz < 4096 ? 4096 : secsz;
2404
2405 log_debug("Sector size of device is %lu bytes. Using grain size of %" PRIu64 ".", secsz, grainsz);
2406
2407 switch (arg_empty) {
2408
2409 case EMPTY_REFUSE:
2410 /* Refuse empty disks, insist on an existing GPT partition table */
2411 if (!fdisk_is_labeltype(c, FDISK_DISKLABEL_GPT))
2412 return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s has no GPT disk label, not repartitioning.", context->node);
2413
2414 break;
2415
2416 case EMPTY_REQUIRE:
2417 /* Require an empty disk, refuse any existing partition table */
2418 r = fdisk_has_label(c);
2419 if (r < 0)
2420 return log_error_errno(r, "Failed to determine whether disk %s has a disk label: %m", context->node);
2421 if (r > 0)
2422 return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s already has a disk label, refusing.", context->node);
2423
2424 from_scratch = true;
2425 break;
2426
2427 case EMPTY_ALLOW:
2428 /* Allow both an empty disk and an existing partition table, but only GPT */
2429 r = fdisk_has_label(c);
2430 if (r < 0)
2431 return log_error_errno(r, "Failed to determine whether disk %s has a disk label: %m", context->node);
2432 if (r > 0) {
2433 if (!fdisk_is_labeltype(c, FDISK_DISKLABEL_GPT))
2434 return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s has non-GPT disk label, not repartitioning.", context->node);
2435 } else
2436 from_scratch = true;
2437
2438 break;
2439
2440 case EMPTY_FORCE:
2441 case EMPTY_CREATE:
2442 /* Always reinitiaize the disk, don't consider what there was on the disk before */
2443 from_scratch = true;
2444 break;
2445
2446 default:
2447 assert_not_reached();
2448 }
2449
2450 if (from_scratch) {
2451 r = fdisk_create_disklabel(c, "gpt");
2452 if (r < 0)
2453 return log_error_errno(r, "Failed to create GPT disk label: %m");
2454
2455 r = derive_uuid(context->seed, "disk-uuid", &disk_uuid);
2456 if (r < 0)
2457 return log_error_errno(r, "Failed to acquire disk GPT uuid: %m");
2458
2459 r = fdisk_set_disklabel_id_by_uuid(c, disk_uuid);
2460 if (r < 0)
2461 return log_error_errno(r, "Failed to set GPT disk label: %m");
2462
2463 goto add_initial_free_area;
2464 }
2465
2466 r = fdisk_get_disklabel_id(c, &disk_uuid_string);
2467 if (r < 0)
2468 return log_error_errno(r, "Failed to get current GPT disk label UUID: %m");
2469
2470 r = id128_from_string_nonzero(disk_uuid_string, &disk_uuid);
2471 if (r == -ENXIO) {
2472 r = derive_uuid(context->seed, "disk-uuid", &disk_uuid);
2473 if (r < 0)
2474 return log_error_errno(r, "Failed to acquire disk GPT uuid: %m");
2475
2476 r = fdisk_set_disklabel_id(c);
2477 if (r < 0)
2478 return log_error_errno(r, "Failed to set GPT disk label: %m");
2479 } else if (r < 0)
2480 return log_error_errno(r, "Failed to parse current GPT disk label UUID: %m");
2481
2482 r = fdisk_get_partitions(c, &t);
2483 if (r < 0)
2484 return log_error_errno(r, "Failed to acquire partition table: %m");
2485
2486 n_partitions = fdisk_table_get_nents(t);
2487 for (size_t i = 0; i < n_partitions; i++) {
2488 _cleanup_free_ char *label_copy = NULL;
2489 Partition *last = NULL;
2490 struct fdisk_partition *p;
2491 const char *label;
2492 uint64_t sz, start;
2493 bool found = false;
2494 sd_id128_t ptid, id;
2495 size_t partno;
2496
2497 p = fdisk_table_get_partition(t, i);
2498 if (!p)
2499 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read partition metadata: %m");
2500
2501 if (fdisk_partition_is_used(p) <= 0)
2502 continue;
2503
2504 if (fdisk_partition_has_start(p) <= 0 ||
2505 fdisk_partition_has_size(p) <= 0 ||
2506 fdisk_partition_has_partno(p) <= 0)
2507 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Found a partition without a position, size or number.");
2508
2509 r = fdisk_partition_get_type_as_id128(p, &ptid);
2510 if (r < 0)
2511 return log_error_errno(r, "Failed to query partition type UUID: %m");
2512
2513 r = fdisk_partition_get_uuid_as_id128(p, &id);
2514 if (r < 0)
2515 return log_error_errno(r, "Failed to query partition UUID: %m");
2516
2517 label = fdisk_partition_get_name(p);
2518 if (!isempty(label)) {
2519 label_copy = strdup(label);
2520 if (!label_copy)
2521 return log_oom();
2522 }
2523
2524 sz = fdisk_partition_get_size(p);
2525 assert(sz <= UINT64_MAX/secsz);
2526 sz *= secsz;
2527
2528 start = fdisk_partition_get_start(p);
2529 assert(start <= UINT64_MAX/secsz);
2530 start *= secsz;
2531
2532 partno = fdisk_partition_get_partno(p);
2533
2534 if (left_boundary == UINT64_MAX || left_boundary > start)
2535 left_boundary = start;
2536
2537 /* Assign this existing partition to the first partition of the right type that doesn't have
2538 * an existing one assigned yet. */
2539 LIST_FOREACH(partitions, pp, context->partitions) {
2540 last = pp;
2541
2542 if (!sd_id128_equal(pp->type.uuid, ptid))
2543 continue;
2544
2545 if (!pp->current_partition) {
2546 pp->current_uuid = id;
2547 pp->current_size = sz;
2548 pp->offset = start;
2549 pp->partno = partno;
2550 pp->current_label = TAKE_PTR(label_copy);
2551
2552 pp->current_partition = p;
2553 fdisk_ref_partition(p);
2554
2555 r = determine_current_padding(c, t, p, secsz, grainsz, &pp->current_padding);
2556 if (r < 0)
2557 return r;
2558
2559 if (pp->current_padding > 0) {
2560 r = context_add_free_area(context, pp->current_padding, pp);
2561 if (r < 0)
2562 return r;
2563 }
2564
2565 found = true;
2566 break;
2567 }
2568 }
2569
2570 /* If we have no matching definition, create a new one. */
2571 if (!found) {
2572 _cleanup_(partition_freep) Partition *np = NULL;
2573
2574 np = partition_new();
2575 if (!np)
2576 return log_oom();
2577
2578 np->current_uuid = id;
2579 np->type = gpt_partition_type_from_uuid(ptid);
2580 np->current_size = sz;
2581 np->offset = start;
2582 np->partno = partno;
2583 np->current_label = TAKE_PTR(label_copy);
2584
2585 np->current_partition = p;
2586 fdisk_ref_partition(p);
2587
2588 r = determine_current_padding(c, t, p, secsz, grainsz, &np->current_padding);
2589 if (r < 0)
2590 return r;
2591
2592 if (np->current_padding > 0) {
2593 r = context_add_free_area(context, np->current_padding, np);
2594 if (r < 0)
2595 return r;
2596 }
2597
2598 LIST_INSERT_AFTER(partitions, context->partitions, last, TAKE_PTR(np));
2599 context->n_partitions++;
2600 }
2601 }
2602
2603 add_initial_free_area:
2604 nsectors = fdisk_get_nsectors(c);
2605 assert(nsectors <= UINT64_MAX/secsz);
2606 nsectors *= secsz;
2607
2608 first_lba = fdisk_get_first_lba(c);
2609 assert(first_lba <= UINT64_MAX/secsz);
2610 first_lba *= secsz;
2611
2612 last_lba = fdisk_get_last_lba(c);
2613 assert(last_lba < UINT64_MAX);
2614 last_lba++;
2615 assert(last_lba <= UINT64_MAX/secsz);
2616 last_lba *= secsz;
2617
2618 assert(last_lba >= first_lba);
2619
2620 if (left_boundary == UINT64_MAX) {
2621 /* No partitions at all? Then the whole disk is up for grabs. */
2622
2623 first_lba = round_up_size(first_lba, grainsz);
2624 last_lba = round_down_size(last_lba, grainsz);
2625
2626 if (last_lba > first_lba) {
2627 r = context_add_free_area(context, last_lba - first_lba, NULL);
2628 if (r < 0)
2629 return r;
2630 }
2631 } else {
2632 /* Add space left of first partition */
2633 assert(left_boundary >= first_lba);
2634
2635 first_lba = round_up_size(first_lba, grainsz);
2636 left_boundary = round_down_size(left_boundary, grainsz);
2637 last_lba = round_down_size(last_lba, grainsz);
2638
2639 if (left_boundary > first_lba) {
2640 r = context_add_free_area(context, left_boundary - first_lba, NULL);
2641 if (r < 0)
2642 return r;
2643 }
2644 }
2645
2646 context->start = first_lba;
2647 context->end = last_lba;
2648 context->total = nsectors;
2649 context->sector_size = secsz;
2650 context->fs_sector_size = fs_secsz;
2651 context->grain_size = grainsz;
2652 context->fdisk_context = TAKE_PTR(c);
2653
2654 return from_scratch;
2655 }
2656
2657 static void context_unload_partition_table(Context *context) {
2658 assert(context);
2659
2660 LIST_FOREACH(partitions, p, context->partitions) {
2661
2662 /* Entirely remove partitions that have no configuration */
2663 if (PARTITION_IS_FOREIGN(p)) {
2664 partition_unlink_and_free(context, p);
2665 continue;
2666 }
2667
2668 /* Otherwise drop all data we read off the block device and everything we might have
2669 * calculated based on it */
2670
2671 p->dropped = false;
2672 p->current_size = UINT64_MAX;
2673 p->new_size = UINT64_MAX;
2674 p->current_padding = UINT64_MAX;
2675 p->new_padding = UINT64_MAX;
2676 p->partno = UINT64_MAX;
2677 p->offset = UINT64_MAX;
2678
2679 if (p->current_partition) {
2680 fdisk_unref_partition(p->current_partition);
2681 p->current_partition = NULL;
2682 }
2683
2684 if (p->new_partition) {
2685 fdisk_unref_partition(p->new_partition);
2686 p->new_partition = NULL;
2687 }
2688
2689 p->padding_area = NULL;
2690 p->allocated_to_area = NULL;
2691
2692 p->current_uuid = SD_ID128_NULL;
2693 p->current_label = mfree(p->current_label);
2694 }
2695
2696 context->start = UINT64_MAX;
2697 context->end = UINT64_MAX;
2698 context->total = UINT64_MAX;
2699
2700 if (context->fdisk_context) {
2701 fdisk_unref_context(context->fdisk_context);
2702 context->fdisk_context = NULL;
2703 }
2704
2705 context_free_free_areas(context);
2706 }
2707
2708 static int format_size_change(uint64_t from, uint64_t to, char **ret) {
2709 char *t;
2710
2711 if (from != UINT64_MAX) {
2712 if (from == to || to == UINT64_MAX)
2713 t = strdup(FORMAT_BYTES(from));
2714 else
2715 t = strjoin(FORMAT_BYTES(from), " ", special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), " ", FORMAT_BYTES(to));
2716 } else if (to != UINT64_MAX)
2717 t = strjoin(special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), " ", FORMAT_BYTES(to));
2718 else {
2719 *ret = NULL;
2720 return 0;
2721 }
2722
2723 if (!t)
2724 return log_oom();
2725
2726 *ret = t;
2727 return 1;
2728 }
2729
2730 static const char *partition_label(const Partition *p) {
2731 assert(p);
2732
2733 if (p->new_label)
2734 return p->new_label;
2735
2736 if (p->current_label)
2737 return p->current_label;
2738
2739 return gpt_partition_type_uuid_to_string(p->type.uuid);
2740 }
2741
2742 static int context_dump_partitions(Context *context) {
2743 _cleanup_(table_unrefp) Table *t = NULL;
2744 uint64_t sum_padding = 0, sum_size = 0;
2745 int r;
2746 const size_t roothash_col = 14, dropin_files_col = 15, split_path_col = 16;
2747 bool has_roothash = false, has_dropin_files = false, has_split_path = false;
2748
2749 if ((arg_json_format_flags & JSON_FORMAT_OFF) && context->n_partitions == 0) {
2750 log_info("Empty partition table.");
2751 return 0;
2752 }
2753
2754 t = table_new("type",
2755 "label",
2756 "uuid",
2757 "partno",
2758 "file",
2759 "node",
2760 "offset",
2761 "old size",
2762 "raw size",
2763 "size",
2764 "old padding",
2765 "raw padding",
2766 "padding",
2767 "activity",
2768 "roothash",
2769 "drop-in files",
2770 "split path");
2771 if (!t)
2772 return log_oom();
2773
2774 if (!DEBUG_LOGGING) {
2775 if (arg_json_format_flags & JSON_FORMAT_OFF)
2776 (void) table_set_display(t, (size_t) 0, (size_t) 1, (size_t) 2, (size_t) 3, (size_t) 4,
2777 (size_t) 8, (size_t) 9, (size_t) 12, roothash_col, dropin_files_col,
2778 split_path_col);
2779 else
2780 (void) table_set_display(t, (size_t) 0, (size_t) 1, (size_t) 2, (size_t) 3, (size_t) 4,
2781 (size_t) 5, (size_t) 6, (size_t) 7, (size_t) 8, (size_t) 10,
2782 (size_t) 11, (size_t) 13, roothash_col, dropin_files_col,
2783 split_path_col);
2784 }
2785
2786 (void) table_set_align_percent(t, table_get_cell(t, 0, 5), 100);
2787 (void) table_set_align_percent(t, table_get_cell(t, 0, 6), 100);
2788 (void) table_set_align_percent(t, table_get_cell(t, 0, 7), 100);
2789 (void) table_set_align_percent(t, table_get_cell(t, 0, 8), 100);
2790 (void) table_set_align_percent(t, table_get_cell(t, 0, 9), 100);
2791 (void) table_set_align_percent(t, table_get_cell(t, 0, 10), 100);
2792 (void) table_set_align_percent(t, table_get_cell(t, 0, 11), 100);
2793
2794 LIST_FOREACH(partitions, p, context->partitions) {
2795 _cleanup_free_ char *size_change = NULL, *padding_change = NULL, *partname = NULL, *rh = NULL;
2796 char uuid_buffer[SD_ID128_UUID_STRING_MAX];
2797 const char *label, *activity = NULL;
2798
2799 if (p->dropped)
2800 continue;
2801
2802 if (p->current_size == UINT64_MAX)
2803 activity = "create";
2804 else if (p->current_size != p->new_size)
2805 activity = "resize";
2806
2807 label = partition_label(p);
2808 partname = p->partno != UINT64_MAX ? fdisk_partname(context->node, p->partno+1) : NULL;
2809
2810 r = format_size_change(p->current_size, p->new_size, &size_change);
2811 if (r < 0)
2812 return r;
2813
2814 r = format_size_change(p->current_padding, p->new_padding, &padding_change);
2815 if (r < 0)
2816 return r;
2817
2818 if (p->new_size != UINT64_MAX)
2819 sum_size += p->new_size;
2820 if (p->new_padding != UINT64_MAX)
2821 sum_padding += p->new_padding;
2822
2823 if (p->verity != VERITY_OFF) {
2824 Partition *hp = p->verity == VERITY_HASH ? p : p->siblings[VERITY_HASH];
2825
2826 rh = iovec_is_set(&hp->roothash) ? hexmem(hp->roothash.iov_base, hp->roothash.iov_len) : strdup("TBD");
2827 if (!rh)
2828 return log_oom();
2829 }
2830
2831 r = table_add_many(
2832 t,
2833 TABLE_STRING, gpt_partition_type_uuid_to_string_harder(p->type.uuid, uuid_buffer),
2834 TABLE_STRING, empty_to_null(label) ?: "-", TABLE_SET_COLOR, empty_to_null(label) ? NULL : ansi_grey(),
2835 TABLE_UUID, p->new_uuid_is_set ? p->new_uuid : p->current_uuid,
2836 TABLE_UINT64, p->partno,
2837 TABLE_PATH_BASENAME, p->definition_path, TABLE_SET_COLOR, p->definition_path ? NULL : ansi_grey(),
2838 TABLE_STRING, partname ?: "-", TABLE_SET_COLOR, partname ? NULL : ansi_highlight(),
2839 TABLE_UINT64, p->offset,
2840 TABLE_UINT64, p->current_size == UINT64_MAX ? 0 : p->current_size,
2841 TABLE_UINT64, p->new_size,
2842 TABLE_STRING, size_change, TABLE_SET_COLOR, !p->partitions_next && sum_size > 0 ? ansi_underline() : NULL,
2843 TABLE_UINT64, p->current_padding == UINT64_MAX ? 0 : p->current_padding,
2844 TABLE_UINT64, p->new_padding,
2845 TABLE_STRING, padding_change, TABLE_SET_COLOR, !p->partitions_next && sum_padding > 0 ? ansi_underline() : NULL,
2846 TABLE_STRING, activity ?: "unchanged",
2847 TABLE_STRING, rh,
2848 TABLE_STRV, p->drop_in_files,
2849 TABLE_STRING, empty_to_null(p->split_path) ?: "-");
2850 if (r < 0)
2851 return table_log_add_error(r);
2852
2853 has_roothash = has_roothash || !isempty(rh);
2854 has_dropin_files = has_dropin_files || !strv_isempty(p->drop_in_files);
2855 has_split_path = has_split_path || !isempty(p->split_path);
2856 }
2857
2858 if ((arg_json_format_flags & JSON_FORMAT_OFF) && (sum_padding > 0 || sum_size > 0)) {
2859 const char *a, *b;
2860
2861 a = strjoina(special_glyph(SPECIAL_GLYPH_SIGMA), " = ", FORMAT_BYTES(sum_size));
2862 b = strjoina(special_glyph(SPECIAL_GLYPH_SIGMA), " = ", FORMAT_BYTES(sum_padding));
2863
2864 r = table_add_many(
2865 t,
2866 TABLE_EMPTY,
2867 TABLE_EMPTY,
2868 TABLE_EMPTY,
2869 TABLE_EMPTY,
2870 TABLE_EMPTY,
2871 TABLE_EMPTY,
2872 TABLE_EMPTY,
2873 TABLE_EMPTY,
2874 TABLE_EMPTY,
2875 TABLE_STRING, a,
2876 TABLE_EMPTY,
2877 TABLE_EMPTY,
2878 TABLE_STRING, b,
2879 TABLE_EMPTY,
2880 TABLE_EMPTY,
2881 TABLE_EMPTY,
2882 TABLE_EMPTY);
2883 if (r < 0)
2884 return table_log_add_error(r);
2885 }
2886
2887 if (!has_roothash) {
2888 r = table_hide_column_from_display(t, roothash_col);
2889 if (r < 0)
2890 return log_error_errno(r, "Failed to set columns to display: %m");
2891 }
2892
2893 if (!has_dropin_files) {
2894 r = table_hide_column_from_display(t, dropin_files_col);
2895 if (r < 0)
2896 return log_error_errno(r, "Failed to set columns to display: %m");
2897 }
2898
2899 if (!has_split_path) {
2900 r = table_hide_column_from_display(t, split_path_col);
2901 if (r < 0)
2902 return log_error_errno(r, "Failed to set columns to display: %m");
2903 }
2904
2905 return table_print_with_pager(t, arg_json_format_flags, arg_pager_flags, arg_legend);
2906 }
2907
2908 static void context_bar_char_process_partition(
2909 Context *context,
2910 Partition *bar[],
2911 size_t n,
2912 Partition *p,
2913 size_t *ret_start) {
2914
2915 uint64_t from, to, total;
2916 size_t x, y;
2917
2918 assert(context);
2919 assert(bar);
2920 assert(n > 0);
2921 assert(p);
2922
2923 if (p->dropped)
2924 return;
2925
2926 assert(p->offset != UINT64_MAX);
2927 assert(p->new_size != UINT64_MAX);
2928
2929 from = p->offset;
2930 to = from + p->new_size;
2931
2932 assert(context->total > 0);
2933 total = context->total;
2934
2935 assert(from <= total);
2936 x = from * n / total;
2937
2938 assert(to <= total);
2939 y = to * n / total;
2940
2941 assert(x <= y);
2942 assert(y <= n);
2943
2944 for (size_t i = x; i < y; i++)
2945 bar[i] = p;
2946
2947 *ret_start = x;
2948 }
2949
2950 static int partition_hint(const Partition *p, const char *node, char **ret) {
2951 _cleanup_free_ char *buf = NULL;
2952 const char *label;
2953 sd_id128_t id;
2954
2955 /* Tries really hard to find a suitable description for this partition */
2956
2957 if (p->definition_path)
2958 return path_extract_filename(p->definition_path, ret);
2959
2960 label = partition_label(p);
2961 if (!isempty(label)) {
2962 buf = strdup(label);
2963 goto done;
2964 }
2965
2966 if (p->partno != UINT64_MAX) {
2967 buf = fdisk_partname(node, p->partno+1);
2968 goto done;
2969 }
2970
2971 if (p->new_uuid_is_set)
2972 id = p->new_uuid;
2973 else if (!sd_id128_is_null(p->current_uuid))
2974 id = p->current_uuid;
2975 else
2976 id = p->type.uuid;
2977
2978 buf = strdup(SD_ID128_TO_UUID_STRING(id));
2979
2980 done:
2981 if (!buf)
2982 return -ENOMEM;
2983
2984 *ret = TAKE_PTR(buf);
2985 return 0;
2986 }
2987
2988 static int context_dump_partition_bar(Context *context) {
2989 _cleanup_free_ Partition **bar = NULL;
2990 _cleanup_free_ size_t *start_array = NULL;
2991 Partition *last = NULL;
2992 bool z = false;
2993 size_t c, j = 0;
2994
2995 assert_se((c = columns()) >= 2);
2996 c -= 2; /* We do not use the leftmost and rightmost character cell */
2997
2998 bar = new0(Partition*, c);
2999 if (!bar)
3000 return log_oom();
3001
3002 start_array = new(size_t, context->n_partitions);
3003 if (!start_array)
3004 return log_oom();
3005
3006 LIST_FOREACH(partitions, p, context->partitions)
3007 context_bar_char_process_partition(context, bar, c, p, start_array + j++);
3008
3009 putc(' ', stdout);
3010
3011 for (size_t i = 0; i < c; i++) {
3012 if (bar[i]) {
3013 if (last != bar[i])
3014 z = !z;
3015
3016 fputs(z ? ansi_green() : ansi_yellow(), stdout);
3017 fputs(special_glyph(SPECIAL_GLYPH_DARK_SHADE), stdout);
3018 } else {
3019 fputs(ansi_normal(), stdout);
3020 fputs(special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), stdout);
3021 }
3022
3023 last = bar[i];
3024 }
3025
3026 fputs(ansi_normal(), stdout);
3027 putc('\n', stdout);
3028
3029 for (size_t i = 0; i < context->n_partitions; i++) {
3030 _cleanup_free_ char **line = NULL;
3031
3032 line = new0(char*, c);
3033 if (!line)
3034 return log_oom();
3035
3036 j = 0;
3037 LIST_FOREACH(partitions, p, context->partitions) {
3038 _cleanup_free_ char *d = NULL;
3039 j++;
3040
3041 if (i < context->n_partitions - j) {
3042
3043 if (line[start_array[j-1]]) {
3044 const char *e;
3045
3046 /* Upgrade final corner to the right with a branch to the right */
3047 e = startswith(line[start_array[j-1]], special_glyph(SPECIAL_GLYPH_TREE_RIGHT));
3048 if (e) {
3049 d = strjoin(special_glyph(SPECIAL_GLYPH_TREE_BRANCH), e);
3050 if (!d)
3051 return log_oom();
3052 }
3053 }
3054
3055 if (!d) {
3056 d = strdup(special_glyph(SPECIAL_GLYPH_TREE_VERTICAL));
3057 if (!d)
3058 return log_oom();
3059 }
3060
3061 } else if (i == context->n_partitions - j) {
3062 _cleanup_free_ char *hint = NULL;
3063
3064 (void) partition_hint(p, context->node, &hint);
3065
3066 if (streq_ptr(line[start_array[j-1]], special_glyph(SPECIAL_GLYPH_TREE_VERTICAL)))
3067 d = strjoin(special_glyph(SPECIAL_GLYPH_TREE_BRANCH), " ", strna(hint));
3068 else
3069 d = strjoin(special_glyph(SPECIAL_GLYPH_TREE_RIGHT), " ", strna(hint));
3070
3071 if (!d)
3072 return log_oom();
3073 }
3074
3075 if (d)
3076 free_and_replace(line[start_array[j-1]], d);
3077 }
3078
3079 putc(' ', stdout);
3080
3081 j = 0;
3082 while (j < c) {
3083 if (line[j]) {
3084 fputs(line[j], stdout);
3085 j += utf8_console_width(line[j]);
3086 } else {
3087 putc(' ', stdout);
3088 j++;
3089 }
3090 }
3091
3092 putc('\n', stdout);
3093
3094 for (j = 0; j < c; j++)
3095 free(line[j]);
3096 }
3097
3098 return 0;
3099 }
3100
3101 static bool context_has_roothash(Context *context) {
3102 LIST_FOREACH(partitions, p, context->partitions)
3103 if (iovec_is_set(&p->roothash))
3104 return true;
3105
3106 return false;
3107 }
3108
3109 static int context_dump(Context *context, bool late) {
3110 int r;
3111
3112 assert(context);
3113
3114 if (arg_pretty == 0 && FLAGS_SET(arg_json_format_flags, JSON_FORMAT_OFF))
3115 return 0;
3116
3117 /* If we're outputting JSON, only dump after doing all operations so we can include the roothashes
3118 * in the output. */
3119 if (!late && !FLAGS_SET(arg_json_format_flags, JSON_FORMAT_OFF))
3120 return 0;
3121
3122 /* If we're not outputting JSON, only dump again after doing all operations if there are any
3123 * roothashes that we need to communicate to the user. */
3124 if (late && FLAGS_SET(arg_json_format_flags, JSON_FORMAT_OFF) && !context_has_roothash(context))
3125 return 0;
3126
3127 r = context_dump_partitions(context);
3128 if (r < 0)
3129 return r;
3130
3131 /* Make sure we only write the partition bar once, even if we're writing the partition table twice to
3132 * communicate roothashes. */
3133 if (FLAGS_SET(arg_json_format_flags, JSON_FORMAT_OFF) && !late) {
3134 putc('\n', stdout);
3135
3136 r = context_dump_partition_bar(context);
3137 if (r < 0)
3138 return r;
3139
3140 putc('\n', stdout);
3141 }
3142
3143 fflush(stdout);
3144
3145 return 0;
3146 }
3147
3148
3149 static bool context_changed(const Context *context) {
3150 assert(context);
3151
3152 LIST_FOREACH(partitions, p, context->partitions) {
3153 if (p->dropped)
3154 continue;
3155
3156 if (p->allocated_to_area)
3157 return true;
3158
3159 if (p->new_size != p->current_size)
3160 return true;
3161 }
3162
3163 return false;
3164 }
3165
3166 static int context_wipe_range(Context *context, uint64_t offset, uint64_t size) {
3167 _cleanup_(blkid_free_probep) blkid_probe probe = NULL;
3168 int r;
3169
3170 assert(context);
3171 assert(offset != UINT64_MAX);
3172 assert(size != UINT64_MAX);
3173
3174 probe = blkid_new_probe();
3175 if (!probe)
3176 return log_oom();
3177
3178 errno = 0;
3179 r = blkid_probe_set_device(probe, fdisk_get_devfd(context->fdisk_context), offset, size);
3180 if (r < 0)
3181 return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to allocate device probe for wiping.");
3182
3183 errno = 0;
3184 if (blkid_probe_enable_superblocks(probe, true) < 0 ||
3185 blkid_probe_set_superblocks_flags(probe, BLKID_SUBLKS_MAGIC|BLKID_SUBLKS_BADCSUM) < 0 ||
3186 blkid_probe_enable_partitions(probe, true) < 0 ||
3187 blkid_probe_set_partitions_flags(probe, BLKID_PARTS_MAGIC) < 0)
3188 return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to enable superblock and partition probing.");
3189
3190 for (;;) {
3191 errno = 0;
3192 r = blkid_do_probe(probe);
3193 if (r < 0)
3194 return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe for file systems.");
3195 if (r > 0)
3196 break;
3197
3198 errno = 0;
3199 if (blkid_do_wipe(probe, false) < 0)
3200 return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to wipe file system signature.");
3201 }
3202
3203 return 0;
3204 }
3205
3206 static int context_wipe_partition(Context *context, Partition *p) {
3207 int r;
3208
3209 assert(context);
3210 assert(p);
3211 assert(!PARTITION_EXISTS(p)); /* Safety check: never wipe existing partitions */
3212
3213 assert(p->offset != UINT64_MAX);
3214 assert(p->new_size != UINT64_MAX);
3215
3216 r = context_wipe_range(context, p->offset, p->new_size);
3217 if (r < 0)
3218 return r;
3219
3220 log_info("Successfully wiped file system signatures from future partition %" PRIu64 ".", p->partno);
3221 return 0;
3222 }
3223
3224 static int context_discard_range(
3225 Context *context,
3226 uint64_t offset,
3227 uint64_t size) {
3228
3229 struct stat st;
3230 int fd;
3231
3232 assert(context);
3233 assert(offset != UINT64_MAX);
3234 assert(size != UINT64_MAX);
3235
3236 if (size <= 0)
3237 return 0;
3238
3239 assert_se((fd = fdisk_get_devfd(context->fdisk_context)) >= 0);
3240
3241 if (fstat(fd, &st) < 0)
3242 return -errno;
3243
3244 if (S_ISREG(st.st_mode)) {
3245 if (fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, size) < 0) {
3246 if (ERRNO_IS_NOT_SUPPORTED(errno))
3247 return -EOPNOTSUPP;
3248
3249 return -errno;
3250 }
3251
3252 return 1;
3253 }
3254
3255 if (S_ISBLK(st.st_mode)) {
3256 uint64_t range[2], end;
3257
3258 range[0] = round_up_size(offset, context->sector_size);
3259
3260 if (offset > UINT64_MAX - size)
3261 return -ERANGE;
3262
3263 end = offset + size;
3264 if (end <= range[0])
3265 return 0;
3266
3267 range[1] = round_down_size(end - range[0], context->sector_size);
3268 if (range[1] <= 0)
3269 return 0;
3270
3271 if (ioctl(fd, BLKDISCARD, range) < 0) {
3272 if (ERRNO_IS_NOT_SUPPORTED(errno))
3273 return -EOPNOTSUPP;
3274
3275 return -errno;
3276 }
3277
3278 return 1;
3279 }
3280
3281 return -EOPNOTSUPP;
3282 }
3283
3284 static int context_discard_partition(Context *context, Partition *p) {
3285 int r;
3286
3287 assert(context);
3288 assert(p);
3289
3290 assert(p->offset != UINT64_MAX);
3291 assert(p->new_size != UINT64_MAX);
3292 assert(!PARTITION_EXISTS(p)); /* Safety check: never discard existing partitions */
3293
3294 if (!arg_discard)
3295 return 0;
3296
3297 r = context_discard_range(context, p->offset, p->new_size);
3298 if (r == -EOPNOTSUPP) {
3299 log_info("Storage does not support discard, not discarding data in future partition %" PRIu64 ".", p->partno);
3300 return 0;
3301 }
3302 if (r == -EBUSY) {
3303 /* Let's handle this gracefully: https://bugzilla.kernel.org/show_bug.cgi?id=211167 */
3304 log_info("Block device is busy, not discarding partition %" PRIu64 " because it probably is mounted.", p->partno);
3305 return 0;
3306 }
3307 if (r == 0) {
3308 log_info("Partition %" PRIu64 " too short for discard, skipping.", p->partno);
3309 return 0;
3310 }
3311 if (r < 0)
3312 return log_error_errno(r, "Failed to discard data for future partition %" PRIu64 ".", p->partno);
3313
3314 log_info("Successfully discarded data from future partition %" PRIu64 ".", p->partno);
3315 return 1;
3316 }
3317
3318 static int context_discard_gap_after(Context *context, Partition *p) {
3319 uint64_t gap, next = UINT64_MAX;
3320 int r;
3321
3322 assert(context);
3323 assert(!p || (p->offset != UINT64_MAX && p->new_size != UINT64_MAX));
3324
3325 if (!arg_discard)
3326 return 0;
3327
3328 if (p)
3329 gap = p->offset + p->new_size;
3330 else
3331 /* The context start gets rounded up to grain_size, however
3332 * existing partitions may be before that so ensure the gap
3333 * starts at the first actually usable lba
3334 */
3335 gap = fdisk_get_first_lba(context->fdisk_context) * context->sector_size;
3336
3337 LIST_FOREACH(partitions, q, context->partitions) {
3338 if (q->dropped)
3339 continue;
3340
3341 assert(q->offset != UINT64_MAX);
3342 assert(q->new_size != UINT64_MAX);
3343
3344 if (q->offset < gap)
3345 continue;
3346
3347 if (next == UINT64_MAX || q->offset < next)
3348 next = q->offset;
3349 }
3350
3351 if (next == UINT64_MAX) {
3352 next = (fdisk_get_last_lba(context->fdisk_context) + 1) * context->sector_size;
3353 if (gap > next)
3354 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Partition end beyond disk end.");
3355 }
3356
3357 assert(next >= gap);
3358 r = context_discard_range(context, gap, next - gap);
3359 if (r == -EOPNOTSUPP) {
3360 if (p)
3361 log_info("Storage does not support discard, not discarding gap after partition %" PRIu64 ".", p->partno);
3362 else
3363 log_info("Storage does not support discard, not discarding gap at beginning of disk.");
3364 return 0;
3365 }
3366 if (r == 0) /* Too short */
3367 return 0;
3368 if (r < 0) {
3369 if (p)
3370 return log_error_errno(r, "Failed to discard gap after partition %" PRIu64 ".", p->partno);
3371 else
3372 return log_error_errno(r, "Failed to discard gap at beginning of disk.");
3373 }
3374
3375 if (p)
3376 log_info("Successfully discarded gap after partition %" PRIu64 ".", p->partno);
3377 else
3378 log_info("Successfully discarded gap at beginning of disk.");
3379
3380 return 0;
3381 }
3382
3383 static int context_wipe_and_discard(Context *context) {
3384 int r;
3385
3386 assert(context);
3387
3388 if (arg_empty == EMPTY_CREATE) /* If we just created the image, no need to wipe */
3389 return 0;
3390
3391 /* Wipe and discard the contents of all partitions we are about to create. We skip the discarding if
3392 * we were supposed to start from scratch anyway, as in that case we just discard the whole block
3393 * device in one go early on. */
3394
3395 LIST_FOREACH(partitions, p, context->partitions) {
3396
3397 if (!p->allocated_to_area)
3398 continue;
3399
3400 if (partition_type_defer(&p->type))
3401 continue;
3402
3403 r = context_wipe_partition(context, p);
3404 if (r < 0)
3405 return r;
3406
3407 if (!context->from_scratch) {
3408 r = context_discard_partition(context, p);
3409 if (r < 0)
3410 return r;
3411
3412 r = context_discard_gap_after(context, p);
3413 if (r < 0)
3414 return r;
3415 }
3416 }
3417
3418 if (!context->from_scratch) {
3419 r = context_discard_gap_after(context, NULL);
3420 if (r < 0)
3421 return r;
3422 }
3423
3424 return 0;
3425 }
3426
3427 typedef struct DecryptedPartitionTarget {
3428 int fd;
3429 char *dm_name;
3430 char *volume;
3431 struct crypt_device *device;
3432 } DecryptedPartitionTarget;
3433
3434 static DecryptedPartitionTarget* decrypted_partition_target_free(DecryptedPartitionTarget *t) {
3435 #if HAVE_LIBCRYPTSETUP
3436 int r;
3437
3438 if (!t)
3439 return NULL;
3440
3441 safe_close(t->fd);
3442
3443 /* udev or so might access out block device in the background while we are done. Let's hence
3444 * force detach the volume. We sync'ed before, hence this should be safe. */
3445 r = sym_crypt_deactivate_by_name(t->device, t->dm_name, CRYPT_DEACTIVATE_FORCE);
3446 if (r < 0)
3447 log_warning_errno(r, "Failed to deactivate LUKS device, ignoring: %m");
3448
3449 sym_crypt_free(t->device);
3450 free(t->dm_name);
3451 free(t->volume);
3452 free(t);
3453 #endif
3454 return NULL;
3455 }
3456
3457 typedef struct {
3458 LoopDevice *loop;
3459 int fd;
3460 char *path;
3461 int whole_fd;
3462 DecryptedPartitionTarget *decrypted;
3463 } PartitionTarget;
3464
3465 static int partition_target_fd(PartitionTarget *t) {
3466 assert(t);
3467 assert(t->loop || t->fd >= 0 || t->whole_fd >= 0);
3468
3469 if (t->decrypted)
3470 return t->decrypted->fd;
3471
3472 if (t->loop)
3473 return t->loop->fd;
3474
3475 if (t->fd >= 0)
3476 return t->fd;
3477
3478 return t->whole_fd;
3479 }
3480
3481 static const char* partition_target_path(PartitionTarget *t) {
3482 assert(t);
3483 assert(t->loop || t->path);
3484
3485 if (t->decrypted)
3486 return t->decrypted->volume;
3487
3488 if (t->loop)
3489 return t->loop->node;
3490
3491 return t->path;
3492 }
3493
3494 static PartitionTarget *partition_target_free(PartitionTarget *t) {
3495 if (!t)
3496 return NULL;
3497
3498 decrypted_partition_target_free(t->decrypted);
3499 loop_device_unref(t->loop);
3500 safe_close(t->fd);
3501 unlink_and_free(t->path);
3502
3503 return mfree(t);
3504 }
3505
3506 DEFINE_TRIVIAL_CLEANUP_FUNC(PartitionTarget*, partition_target_free);
3507
3508 static int prepare_temporary_file(PartitionTarget *t, uint64_t size) {
3509 _cleanup_(unlink_and_freep) char *temp = NULL;
3510 _cleanup_close_ int fd = -EBADF;
3511 const char *vt;
3512 int r;
3513
3514 assert(t);
3515
3516 r = var_tmp_dir(&vt);
3517 if (r < 0)
3518 return log_error_errno(r, "Could not determine temporary directory: %m");
3519
3520 temp = path_join(vt, "repart-XXXXXX");
3521 if (!temp)
3522 return log_oom();
3523
3524 fd = mkostemp_safe(temp);
3525 if (fd < 0)
3526 return log_error_errno(fd, "Failed to create temporary file: %m");
3527
3528 if (ftruncate(fd, size) < 0)
3529 return log_error_errno(errno, "Failed to truncate temporary file to %s: %m",
3530 FORMAT_BYTES(size));
3531
3532 t->fd = TAKE_FD(fd);
3533 t->path = TAKE_PTR(temp);
3534
3535 return 0;
3536 }
3537
3538 static int partition_target_prepare(
3539 Context *context,
3540 Partition *p,
3541 uint64_t size,
3542 bool need_path,
3543 PartitionTarget **ret) {
3544
3545 _cleanup_(partition_target_freep) PartitionTarget *t = NULL;
3546 _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
3547 int whole_fd, r;
3548
3549 assert(context);
3550 assert(p);
3551 assert(ret);
3552
3553 assert_se((whole_fd = fdisk_get_devfd(context->fdisk_context)) >= 0);
3554
3555 t = new(PartitionTarget, 1);
3556 if (!t)
3557 return log_oom();
3558 *t = (PartitionTarget) {
3559 .fd = -EBADF,
3560 .whole_fd = -EBADF,
3561 };
3562
3563 if (!need_path) {
3564 if (lseek(whole_fd, p->offset, SEEK_SET) == (off_t) -1)
3565 return log_error_errno(errno, "Failed to seek to partition offset: %m");
3566
3567 t->whole_fd = whole_fd;
3568 *ret = TAKE_PTR(t);
3569 return 0;
3570 }
3571
3572 /* Loopback block devices are not only useful to turn regular files into block devices, but
3573 * also to cut out sections of block devices into new block devices. */
3574
3575 if (arg_offline <= 0) {
3576 r = loop_device_make(whole_fd, O_RDWR, p->offset, size, context->sector_size, 0, LOCK_EX, &d);
3577 if (r < 0 && (arg_offline == 0 || (r != -ENOENT && !ERRNO_IS_PRIVILEGE(r)) || !strv_isempty(p->subvolumes)))
3578 return log_error_errno(r, "Failed to make loopback device of future partition %" PRIu64 ": %m", p->partno);
3579 if (r >= 0) {
3580 t->loop = TAKE_PTR(d);
3581 *ret = TAKE_PTR(t);
3582 return 0;
3583 }
3584
3585 log_debug_errno(r, "No access to loop devices, falling back to a regular file");
3586 }
3587
3588 /* If we can't allocate a loop device, let's write to a regular file that we copy into the final
3589 * image so we can run in containers and without needing root privileges. On filesystems with
3590 * reflinking support, we can take advantage of this and just reflink the result into the image.
3591 */
3592
3593 r = prepare_temporary_file(t, size);
3594 if (r < 0)
3595 return r;
3596
3597 *ret = TAKE_PTR(t);
3598
3599 return 0;
3600 }
3601
3602 static int partition_target_grow(PartitionTarget *t, uint64_t size) {
3603 int r;
3604
3605 assert(t);
3606 assert(!t->decrypted);
3607
3608 if (t->loop) {
3609 r = loop_device_refresh_size(t->loop, UINT64_MAX, size);
3610 if (r < 0)
3611 return log_error_errno(r, "Failed to refresh loopback device size: %m");
3612 } else if (t->fd >= 0) {
3613 if (ftruncate(t->fd, size) < 0)
3614 return log_error_errno(errno, "Failed to grow '%s' to %s by truncation: %m",
3615 t->path, FORMAT_BYTES(size));
3616 }
3617
3618 return 0;
3619 }
3620
3621 static int partition_target_sync(Context *context, Partition *p, PartitionTarget *t) {
3622 int whole_fd, r;
3623
3624 assert(context);
3625 assert(p);
3626 assert(t);
3627
3628 assert_se((whole_fd = fdisk_get_devfd(context->fdisk_context)) >= 0);
3629
3630 if (t->decrypted && fsync(t->decrypted->fd) < 0)
3631 return log_error_errno(errno, "Failed to sync changes to '%s': %m", t->decrypted->volume);
3632
3633 if (t->loop) {
3634 r = loop_device_sync(t->loop);
3635 if (r < 0)
3636 return log_error_errno(r, "Failed to sync loopback device: %m");
3637 } else if (t->fd >= 0) {
3638 struct stat st;
3639
3640 if (lseek(whole_fd, p->offset, SEEK_SET) == (off_t) -1)
3641 return log_error_errno(errno, "Failed to seek to partition offset: %m");
3642
3643 if (lseek(t->fd, 0, SEEK_SET) == (off_t) -1)
3644 return log_error_errno(errno, "Failed to seek to start of temporary file: %m");
3645
3646 if (fstat(t->fd, &st) < 0)
3647 return log_error_errno(errno, "Failed to stat temporary file: %m");
3648
3649 if (st.st_size > (off_t) p->new_size)
3650 return log_error_errno(SYNTHETIC_ERRNO(ENOSPC),
3651 "Partition %" PRIu64 "'s contents (%s) don't fit in the partition (%s)",
3652 p->partno, FORMAT_BYTES(st.st_size), FORMAT_BYTES(p->new_size));
3653
3654 r = copy_bytes(t->fd, whole_fd, UINT64_MAX, COPY_REFLINK|COPY_HOLES|COPY_FSYNC);
3655 if (r < 0)
3656 return log_error_errno(r, "Failed to copy bytes to partition: %m");
3657 } else {
3658 if (fsync(t->whole_fd) < 0)
3659 return log_error_errno(errno, "Failed to sync changes: %m");
3660 }
3661
3662 return 0;
3663 }
3664
3665 static int partition_encrypt(Context *context, Partition *p, PartitionTarget *target, bool offline) {
3666 #if HAVE_LIBCRYPTSETUP && HAVE_CRYPT_SET_DATA_OFFSET && HAVE_CRYPT_REENCRYPT_INIT_BY_PASSPHRASE && HAVE_CRYPT_REENCRYPT
3667 const char *node = partition_target_path(target);
3668 struct crypt_params_luks2 luks_params = {
3669 .label = strempty(ASSERT_PTR(p)->new_label),
3670 .sector_size = ASSERT_PTR(context)->fs_sector_size,
3671 .data_device = offline ? node : NULL,
3672 };
3673 struct crypt_params_reencrypt reencrypt_params = {
3674 .mode = CRYPT_REENCRYPT_ENCRYPT,
3675 .direction = CRYPT_REENCRYPT_BACKWARD,
3676 .resilience = "datashift",
3677 .data_shift = LUKS2_METADATA_SIZE / 512,
3678 .luks2 = &luks_params,
3679 .flags = CRYPT_REENCRYPT_INITIALIZE_ONLY|CRYPT_REENCRYPT_MOVE_FIRST_SEGMENT,
3680 };
3681 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
3682 _cleanup_(erase_and_freep) char *base64_encoded = NULL;
3683 _cleanup_fclose_ FILE *h = NULL;
3684 _cleanup_free_ char *hp = NULL, *vol = NULL, *dm_name = NULL;
3685 const char *passphrase = NULL;
3686 size_t passphrase_size = 0;
3687 const char *vt;
3688 int r;
3689
3690 assert(context);
3691 assert(p);
3692 assert(p->encrypt != ENCRYPT_OFF);
3693
3694 r = dlopen_cryptsetup();
3695 if (r < 0)
3696 return log_error_errno(r, "libcryptsetup not found, cannot encrypt: %m");
3697
3698 log_info("Encrypting future partition %" PRIu64 "...", p->partno);
3699
3700 if (offline) {
3701 r = var_tmp_dir(&vt);
3702 if (r < 0)
3703 return log_error_errno(r, "Failed to determine temporary files directory: %m");
3704
3705 r = fopen_temporary_child(vt, &h, &hp);
3706 if (r < 0)
3707 return log_error_errno(r, "Failed to create temporary LUKS header file: %m");
3708
3709 /* Weird cryptsetup requirement which requires the header file to be the size of at least one
3710 * sector. */
3711 r = ftruncate(fileno(h), luks_params.sector_size);
3712 if (r < 0)
3713 return log_error_errno(r, "Failed to grow temporary LUKS header file: %m");
3714 } else {
3715 if (asprintf(&dm_name, "luks-repart-%08" PRIx64, random_u64()) < 0)
3716 return log_oom();
3717
3718 vol = path_join("/dev/mapper/", dm_name);
3719 if (!vol)
3720 return log_oom();
3721 }
3722
3723 r = sym_crypt_init(&cd, offline ? hp : node);
3724 if (r < 0)
3725 return log_error_errno(r, "Failed to allocate libcryptsetup context for %s: %m", hp);
3726
3727 cryptsetup_enable_logging(cd);
3728
3729 if (offline) {
3730 /* Disable kernel keyring usage by libcryptsetup as a workaround for
3731 * https://gitlab.com/cryptsetup/cryptsetup/-/merge_requests/273. This makes sure that we can
3732 * do offline encryption even when repart is running in a container. */
3733 r = sym_crypt_volume_key_keyring(cd, false);
3734 if (r < 0)
3735 return log_error_errno(r, "Failed to disable kernel keyring: %m");
3736
3737 r = sym_crypt_metadata_locking(cd, false);
3738 if (r < 0)
3739 return log_error_errno(r, "Failed to disable metadata locking: %m");
3740
3741 r = sym_crypt_set_data_offset(cd, LUKS2_METADATA_SIZE / 512);
3742 if (r < 0)
3743 return log_error_errno(r, "Failed to set data offset: %m");
3744 }
3745
3746 r = sym_crypt_format(cd,
3747 CRYPT_LUKS2,
3748 "aes",
3749 "xts-plain64",
3750 SD_ID128_TO_UUID_STRING(p->luks_uuid),
3751 NULL,
3752 VOLUME_KEY_SIZE,
3753 &luks_params);
3754 if (r < 0)
3755 return log_error_errno(r, "Failed to LUKS2 format future partition: %m");
3756
3757 if (IN_SET(p->encrypt, ENCRYPT_KEY_FILE, ENCRYPT_KEY_FILE_TPM2)) {
3758 r = sym_crypt_keyslot_add_by_volume_key(
3759 cd,
3760 CRYPT_ANY_SLOT,
3761 NULL,
3762 VOLUME_KEY_SIZE,
3763 strempty(arg_key),
3764 arg_key_size);
3765 if (r < 0)
3766 return log_error_errno(r, "Failed to add LUKS2 key: %m");
3767
3768 passphrase = strempty(arg_key);
3769 passphrase_size = arg_key_size;
3770 }
3771
3772 if (IN_SET(p->encrypt, ENCRYPT_TPM2, ENCRYPT_KEY_FILE_TPM2)) {
3773 #if HAVE_TPM2
3774 _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
3775 _cleanup_(erase_and_freep) void *secret = NULL;
3776 _cleanup_free_ void *pubkey = NULL;
3777 _cleanup_free_ void *blob = NULL, *srk_buf = NULL;
3778 size_t secret_size, blob_size, pubkey_size = 0, srk_buf_size = 0;
3779 ssize_t base64_encoded_size;
3780 int keyslot;
3781
3782 if (arg_tpm2_public_key_pcr_mask != 0) {
3783 r = tpm2_load_pcr_public_key(arg_tpm2_public_key, &pubkey, &pubkey_size);
3784 if (r < 0) {
3785 if (arg_tpm2_public_key || r != -ENOENT)
3786 return log_error_errno(r, "Failed read TPM PCR public key: %m");
3787
3788 log_debug_errno(r, "Failed to read TPM2 PCR public key, proceeding without: %m");
3789 arg_tpm2_public_key_pcr_mask = 0;
3790 }
3791 }
3792
3793 _cleanup_(tpm2_context_unrefp) Tpm2Context *tpm2_context = NULL;
3794 r = tpm2_context_new(arg_tpm2_device, &tpm2_context);
3795 if (r < 0)
3796 return log_error_errno(r, "Failed to create TPM2 context: %m");
3797
3798 TPM2B_PUBLIC public;
3799 if (pubkey) {
3800 r = tpm2_tpm2b_public_from_pem(pubkey, pubkey_size, &public);
3801 if (r < 0)
3802 return log_error_errno(r, "Could not convert public key to TPM2B_PUBLIC: %m");
3803 }
3804
3805 r = tpm2_pcr_read_missing_values(tpm2_context, arg_tpm2_hash_pcr_values, arg_tpm2_n_hash_pcr_values);
3806 if (r < 0)
3807 return log_error_errno(r, "Could not read pcr values: %m");
3808
3809 uint16_t hash_pcr_bank = 0;
3810 uint32_t hash_pcr_mask = 0;
3811 if (arg_tpm2_n_hash_pcr_values > 0) {
3812 size_t hash_count;
3813 r = tpm2_pcr_values_hash_count(arg_tpm2_hash_pcr_values, arg_tpm2_n_hash_pcr_values, &hash_count);
3814 if (r < 0)
3815 return log_error_errno(r, "Could not get hash count: %m");
3816
3817 if (hash_count > 1)
3818 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Multiple PCR banks selected.");
3819
3820 hash_pcr_bank = arg_tpm2_hash_pcr_values[0].hash;
3821 r = tpm2_pcr_values_to_mask(arg_tpm2_hash_pcr_values, arg_tpm2_n_hash_pcr_values, hash_pcr_bank, &hash_pcr_mask);
3822 if (r < 0)
3823 return log_error_errno(r, "Could not get hash mask: %m");
3824 }
3825
3826 TPM2B_DIGEST policy = TPM2B_DIGEST_MAKE(NULL, TPM2_SHA256_DIGEST_SIZE);
3827 r = tpm2_calculate_sealing_policy(arg_tpm2_hash_pcr_values, arg_tpm2_n_hash_pcr_values, pubkey ? &public : NULL, /* use_pin= */ false, &policy);
3828 if (r < 0)
3829 return log_error_errno(r, "Could not calculate sealing policy digest: %m");
3830
3831 r = tpm2_seal(tpm2_context,
3832 /* seal_key_handle= */ 0,
3833 &policy,
3834 /* pin= */ NULL,
3835 &secret, &secret_size,
3836 &blob, &blob_size,
3837 /* ret_primary_alg= */ NULL,
3838 &srk_buf, &srk_buf_size);
3839 if (r < 0)
3840 return log_error_errno(r, "Failed to seal to TPM2: %m");
3841
3842 base64_encoded_size = base64mem(secret, secret_size, &base64_encoded);
3843 if (base64_encoded_size < 0)
3844 return log_error_errno(base64_encoded_size, "Failed to base64 encode secret key: %m");
3845
3846 r = cryptsetup_set_minimal_pbkdf(cd);
3847 if (r < 0)
3848 return log_error_errno(r, "Failed to set minimal PBKDF: %m");
3849
3850 keyslot = sym_crypt_keyslot_add_by_volume_key(
3851 cd,
3852 CRYPT_ANY_SLOT,
3853 NULL,
3854 VOLUME_KEY_SIZE,
3855 base64_encoded,
3856 base64_encoded_size);
3857 if (keyslot < 0)
3858 return log_error_errno(keyslot, "Failed to add new TPM2 key: %m");
3859
3860 r = tpm2_make_luks2_json(
3861 keyslot,
3862 hash_pcr_mask,
3863 hash_pcr_bank,
3864 pubkey, pubkey_size,
3865 arg_tpm2_public_key_pcr_mask,
3866 /* primary_alg= */ 0,
3867 blob, blob_size,
3868 policy.buffer, policy.size,
3869 NULL, 0, /* no salt because tpm2_seal has no pin */
3870 srk_buf, srk_buf_size,
3871 0,
3872 &v);
3873 if (r < 0)
3874 return log_error_errno(r, "Failed to prepare TPM2 JSON token object: %m");
3875
3876 r = cryptsetup_add_token_json(cd, v);
3877 if (r < 0)
3878 return log_error_errno(r, "Failed to add TPM2 JSON token to LUKS2 header: %m");
3879
3880 passphrase = base64_encoded;
3881 passphrase_size = strlen(base64_encoded);
3882 #else
3883 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
3884 "Support for TPM2 enrollment not enabled.");
3885 #endif
3886 }
3887
3888 if (offline) {
3889 r = sym_crypt_reencrypt_init_by_passphrase(
3890 cd,
3891 NULL,
3892 passphrase,
3893 passphrase_size,
3894 CRYPT_ANY_SLOT,
3895 0,
3896 sym_crypt_get_cipher(cd),
3897 sym_crypt_get_cipher_mode(cd),
3898 &reencrypt_params);
3899 if (r < 0)
3900 return log_error_errno(r, "Failed to prepare for reencryption: %m");
3901
3902 /* crypt_reencrypt_init_by_passphrase() doesn't actually put the LUKS header at the front, we
3903 * have to do that ourselves. */
3904
3905 sym_crypt_free(cd);
3906 cd = NULL;
3907
3908 r = sym_crypt_init(&cd, node);
3909 if (r < 0)
3910 return log_error_errno(r, "Failed to allocate libcryptsetup context for %s: %m", node);
3911
3912 r = sym_crypt_header_restore(cd, CRYPT_LUKS2, hp);
3913 if (r < 0)
3914 return log_error_errno(r, "Failed to place new LUKS header at head of %s: %m", node);
3915
3916 reencrypt_params.flags &= ~CRYPT_REENCRYPT_INITIALIZE_ONLY;
3917
3918 r = sym_crypt_reencrypt_init_by_passphrase(
3919 cd,
3920 NULL,
3921 passphrase,
3922 passphrase_size,
3923 CRYPT_ANY_SLOT,
3924 0,
3925 NULL,
3926 NULL,
3927 &reencrypt_params);
3928 if (r < 0)
3929 return log_error_errno(r, "Failed to load reencryption context: %m");
3930
3931 r = sym_crypt_reencrypt(cd, NULL);
3932 if (r < 0)
3933 return log_error_errno(r, "Failed to encrypt %s: %m", node);
3934 } else {
3935 _cleanup_free_ DecryptedPartitionTarget *t = NULL;
3936 _cleanup_close_ int dev_fd = -1;
3937
3938 r = sym_crypt_activate_by_volume_key(
3939 cd,
3940 dm_name,
3941 NULL,
3942 VOLUME_KEY_SIZE,
3943 arg_discard ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0);
3944 if (r < 0)
3945 return log_error_errno(r, "Failed to activate LUKS superblock: %m");
3946
3947 dev_fd = open(vol, O_RDWR|O_CLOEXEC|O_NOCTTY);
3948 if (dev_fd < 0)
3949 return log_error_errno(errno, "Failed to open LUKS volume '%s': %m", vol);
3950
3951 if (flock(dev_fd, LOCK_EX) < 0)
3952 return log_error_errno(errno, "Failed to lock '%s': %m", vol);
3953
3954 t = new(DecryptedPartitionTarget, 1);
3955 if (!t)
3956 return log_oom();
3957
3958 *t = (DecryptedPartitionTarget) {
3959 .fd = TAKE_FD(dev_fd),
3960 .dm_name = TAKE_PTR(dm_name),
3961 .volume = TAKE_PTR(vol),
3962 .device = TAKE_PTR(cd),
3963 };
3964
3965 target->decrypted = TAKE_PTR(t);
3966 }
3967
3968 log_info("Successfully encrypted future partition %" PRIu64 ".", p->partno);
3969
3970 return 0;
3971 #else
3972 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
3973 "libcryptsetup is not supported or is missing required symbols, cannot encrypt: %m");
3974 #endif
3975 }
3976
3977 static int partition_format_verity_hash(
3978 Context *context,
3979 Partition *p,
3980 const char *node,
3981 const char *data_node) {
3982
3983 #if HAVE_LIBCRYPTSETUP
3984 Partition *dp;
3985 _cleanup_(partition_target_freep) PartitionTarget *t = NULL;
3986 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
3987 _cleanup_free_ char *hint = NULL;
3988 int r;
3989
3990 assert(context);
3991 assert(p);
3992 assert(p->verity == VERITY_HASH);
3993 assert(data_node);
3994
3995 if (p->dropped)
3996 return 0;
3997
3998 if (PARTITION_EXISTS(p)) /* Never format existing partitions */
3999 return 0;
4000
4001 /* Minimized partitions will use the copy blocks logic so let's make sure to skip those here. */
4002 if (p->copy_blocks_fd >= 0)
4003 return 0;
4004
4005 assert_se(dp = p->siblings[VERITY_DATA]);
4006 assert(!dp->dropped);
4007
4008 (void) partition_hint(p, node, &hint);
4009
4010 r = dlopen_cryptsetup();
4011 if (r < 0)
4012 return log_error_errno(r, "libcryptsetup not found, cannot setup verity: %m");
4013
4014 if (!node) {
4015 r = partition_target_prepare(context, p, p->new_size, /*need_path=*/ true, &t);
4016 if (r < 0)
4017 return r;
4018
4019 node = partition_target_path(t);
4020 }
4021
4022 if (p->verity_data_block_size == UINT64_MAX)
4023 p->verity_data_block_size = context->fs_sector_size;
4024 if (p->verity_hash_block_size == UINT64_MAX)
4025 p->verity_hash_block_size = context->fs_sector_size;
4026
4027 r = sym_crypt_init(&cd, node);
4028 if (r < 0)
4029 return log_error_errno(r, "Failed to allocate libcryptsetup context for %s: %m", node);
4030
4031 cryptsetup_enable_logging(cd);
4032
4033 r = sym_crypt_format(
4034 cd, CRYPT_VERITY, NULL, NULL, SD_ID128_TO_UUID_STRING(p->verity_uuid), NULL, 0,
4035 &(struct crypt_params_verity){
4036 .data_device = data_node,
4037 .flags = CRYPT_VERITY_CREATE_HASH,
4038 .hash_name = "sha256",
4039 .hash_type = 1,
4040 .data_block_size = p->verity_data_block_size,
4041 .hash_block_size = p->verity_hash_block_size,
4042 .salt_size = sizeof(p->verity_salt),
4043 .salt = (const char*)p->verity_salt,
4044 });
4045 if (r < 0) {
4046 /* libcryptsetup reports non-descriptive EIO errors for every I/O failure. Luckily, it
4047 * doesn't clobber errno so let's check for ENOSPC so we can report a better error if the
4048 * partition is too small. */
4049 if (r == -EIO && errno == ENOSPC)
4050 return log_error_errno(errno,
4051 "Verity hash data does not fit in partition %s with size %s",
4052 strna(hint), FORMAT_BYTES(p->new_size));
4053
4054 return log_error_errno(r, "Failed to setup verity hash data of partition %s: %m", strna(hint));
4055 }
4056
4057 if (t) {
4058 r = partition_target_sync(context, p, t);
4059 if (r < 0)
4060 return r;
4061 }
4062
4063 r = sym_crypt_get_volume_key_size(cd);
4064 if (r < 0)
4065 return log_error_errno(r, "Failed to determine verity root hash size of partition %s: %m", strna(hint));
4066
4067 _cleanup_(iovec_done) struct iovec rh = {
4068 .iov_base = malloc(r),
4069 .iov_len = r,
4070 };
4071 if (!rh.iov_base)
4072 return log_oom();
4073
4074 r = sym_crypt_volume_key_get(cd, CRYPT_ANY_SLOT, (char *) rh.iov_base, &rh.iov_len, NULL, 0);
4075 if (r < 0)
4076 return log_error_errno(r, "Failed to get verity root hash of partition %s: %m", strna(hint));
4077
4078 assert(rh.iov_len >= sizeof(sd_id128_t) * 2);
4079
4080 if (!dp->new_uuid_is_set) {
4081 memcpy_safe(dp->new_uuid.bytes, rh.iov_base, sizeof(sd_id128_t));
4082 dp->new_uuid_is_set = true;
4083 }
4084
4085 if (!p->new_uuid_is_set) {
4086 memcpy_safe(p->new_uuid.bytes, (uint8_t*) rh.iov_base + (rh.iov_len - sizeof(sd_id128_t)), sizeof(sd_id128_t));
4087 p->new_uuid_is_set = true;
4088 }
4089
4090 p->roothash = TAKE_STRUCT(rh);
4091
4092 return 0;
4093 #else
4094 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "libcryptsetup is not supported, cannot setup verity hashes: %m");
4095 #endif
4096 }
4097
4098 static int sign_verity_roothash(
4099 const struct iovec *roothash,
4100 struct iovec *ret_signature) {
4101
4102 #if HAVE_OPENSSL
4103 _cleanup_(BIO_freep) BIO *rb = NULL;
4104 _cleanup_(PKCS7_freep) PKCS7 *p7 = NULL;
4105 _cleanup_free_ char *hex = NULL;
4106 _cleanup_free_ uint8_t *sig = NULL;
4107 int sigsz;
4108
4109 assert(roothash);
4110 assert(iovec_is_set(roothash));
4111 assert(ret_signature);
4112
4113 hex = hexmem(roothash->iov_base, roothash->iov_len);
4114 if (!hex)
4115 return log_oom();
4116
4117 rb = BIO_new_mem_buf(hex, -1);
4118 if (!rb)
4119 return log_oom();
4120
4121 p7 = PKCS7_sign(arg_certificate, arg_private_key, NULL, rb, PKCS7_DETACHED|PKCS7_NOATTR|PKCS7_BINARY);
4122 if (!p7)
4123 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to calculate PKCS7 signature: %s",
4124 ERR_error_string(ERR_get_error(), NULL));
4125
4126 sigsz = i2d_PKCS7(p7, &sig);
4127 if (sigsz < 0)
4128 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to convert PKCS7 signature to DER: %s",
4129 ERR_error_string(ERR_get_error(), NULL));
4130
4131 ret_signature->iov_base = TAKE_PTR(sig);
4132 ret_signature->iov_len = sigsz;
4133
4134 return 0;
4135 #else
4136 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "openssl is not supported, cannot setup verity signature: %m");
4137 #endif
4138 }
4139
4140 static int partition_format_verity_sig(Context *context, Partition *p) {
4141 _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
4142 _cleanup_(iovec_done) struct iovec sig = {};
4143 _cleanup_free_ char *text = NULL, *hint = NULL;
4144 Partition *hp;
4145 uint8_t fp[X509_FINGERPRINT_SIZE];
4146 int whole_fd, r;
4147
4148 assert(p->verity == VERITY_SIG);
4149
4150 if (p->dropped)
4151 return 0;
4152
4153 if (PARTITION_EXISTS(p))
4154 return 0;
4155
4156 (void) partition_hint(p, context->node, &hint);
4157
4158 assert_se(hp = p->siblings[VERITY_HASH]);
4159 assert(!hp->dropped);
4160
4161 assert(arg_certificate);
4162
4163 assert_se((whole_fd = fdisk_get_devfd(context->fdisk_context)) >= 0);
4164
4165 r = sign_verity_roothash(&hp->roothash, &sig);
4166 if (r < 0)
4167 return r;
4168
4169 r = x509_fingerprint(arg_certificate, fp);
4170 if (r < 0)
4171 return log_error_errno(r, "Unable to calculate X509 certificate fingerprint: %m");
4172
4173 r = json_build(&v,
4174 JSON_BUILD_OBJECT(
4175 JSON_BUILD_PAIR("rootHash", JSON_BUILD_HEX(hp->roothash.iov_base, hp->roothash.iov_len)),
4176 JSON_BUILD_PAIR(
4177 "certificateFingerprint",
4178 JSON_BUILD_HEX(fp, sizeof(fp))
4179 ),
4180 JSON_BUILD_PAIR("signature", JSON_BUILD_IOVEC_BASE64(&sig))
4181 )
4182 );
4183 if (r < 0)
4184 return log_error_errno(r, "Failed to build verity signature JSON object: %m");
4185
4186 r = json_variant_format(v, 0, &text);
4187 if (r < 0)
4188 return log_error_errno(r, "Failed to format verity signature JSON object: %m");
4189
4190 if (strlen(text)+1 > p->new_size)
4191 return log_error_errno(SYNTHETIC_ERRNO(E2BIG), "Verity signature too long for partition: %m");
4192
4193 r = strgrowpad0(&text, p->new_size);
4194 if (r < 0)
4195 return log_error_errno(r, "Failed to pad string to %s", FORMAT_BYTES(p->new_size));
4196
4197 if (lseek(whole_fd, p->offset, SEEK_SET) == (off_t) -1)
4198 return log_error_errno(errno, "Failed to seek to partition %s offset: %m", strna(hint));
4199
4200 r = loop_write(whole_fd, text, p->new_size);
4201 if (r < 0)
4202 return log_error_errno(r, "Failed to write verity signature to partition %s: %m", strna(hint));
4203
4204 if (fsync(whole_fd) < 0)
4205 return log_error_errno(errno, "Failed to synchronize partition %s: %m", strna(hint));
4206
4207 return 0;
4208 }
4209
4210 static int context_copy_blocks(Context *context) {
4211 int r;
4212
4213 assert(context);
4214
4215 /* Copy in file systems on the block level */
4216
4217 LIST_FOREACH(partitions, p, context->partitions) {
4218 _cleanup_(partition_target_freep) PartitionTarget *t = NULL;
4219
4220 if (p->copy_blocks_fd < 0)
4221 continue;
4222
4223 if (p->dropped)
4224 continue;
4225
4226 if (PARTITION_EXISTS(p)) /* Never copy over existing partitions */
4227 continue;
4228
4229 if (partition_type_defer(&p->type))
4230 continue;
4231
4232 assert(p->new_size != UINT64_MAX);
4233 assert(p->copy_blocks_size != UINT64_MAX);
4234 assert(p->new_size >= p->copy_blocks_size + (p->encrypt != ENCRYPT_OFF ? LUKS2_METADATA_KEEP_FREE : 0));
4235
4236 r = partition_target_prepare(context, p, p->new_size,
4237 /*need_path=*/ p->encrypt != ENCRYPT_OFF || p->siblings[VERITY_HASH],
4238 &t);
4239 if (r < 0)
4240 return r;
4241
4242 if (p->encrypt != ENCRYPT_OFF && t->loop) {
4243 r = partition_encrypt(context, p, t, /* offline = */ false);
4244 if (r < 0)
4245 return r;
4246 }
4247
4248 log_info("Copying in '%s' (%s) on block level into future partition %" PRIu64 ".",
4249 p->copy_blocks_path, FORMAT_BYTES(p->copy_blocks_size), p->partno);
4250
4251 if (p->copy_blocks_offset != UINT64_MAX && lseek(p->copy_blocks_fd, p->copy_blocks_offset, SEEK_SET) < 0)
4252 return log_error_errno(errno, "Failed to seek to copy blocks offset in %s: %m", p->copy_blocks_path);
4253
4254 r = copy_bytes(p->copy_blocks_fd, partition_target_fd(t), p->copy_blocks_size, COPY_REFLINK);
4255 if (r < 0)
4256 return log_error_errno(r, "Failed to copy in data from '%s': %m", p->copy_blocks_path);
4257
4258 log_info("Copying in of '%s' on block level completed.", p->copy_blocks_path);
4259
4260 if (p->encrypt != ENCRYPT_OFF && !t->loop) {
4261 r = partition_encrypt(context, p, t, /* offline = */ true);
4262 if (r < 0)
4263 return r;
4264 }
4265
4266 r = partition_target_sync(context, p, t);
4267 if (r < 0)
4268 return r;
4269
4270 if (p->siblings[VERITY_HASH] && !partition_type_defer(&p->siblings[VERITY_HASH]->type)) {
4271 r = partition_format_verity_hash(context, p->siblings[VERITY_HASH],
4272 /* node = */ NULL, partition_target_path(t));
4273 if (r < 0)
4274 return r;
4275 }
4276
4277 if (p->siblings[VERITY_SIG] && !partition_type_defer(&p->siblings[VERITY_SIG]->type)) {
4278 r = partition_format_verity_sig(context, p->siblings[VERITY_SIG]);
4279 if (r < 0)
4280 return r;
4281 }
4282 }
4283
4284 return 0;
4285 }
4286
4287 static int add_exclude_path(const char *path, Hashmap **denylist, DenyType type) {
4288 _cleanup_free_ struct stat *st = NULL;
4289 int r;
4290
4291 assert(path);
4292 assert(denylist);
4293
4294 st = new(struct stat, 1);
4295 if (!st)
4296 return log_oom();
4297
4298 r = chase_and_stat(path, arg_copy_source, CHASE_PREFIX_ROOT, NULL, st);
4299 if (r == -ENOENT)
4300 return 0;
4301 if (r < 0)
4302 return log_error_errno(r, "Failed to stat source file '%s/%s': %m", strempty(arg_copy_source), path);
4303
4304 r = hashmap_ensure_put(denylist, &inode_hash_ops, st, INT_TO_PTR(type));
4305 if (r == -EEXIST)
4306 return 0;
4307 if (r < 0)
4308 return log_oom();
4309 if (r > 0)
4310 TAKE_PTR(st);
4311
4312 return 0;
4313 }
4314
4315 static int make_copy_files_denylist(
4316 Context *context,
4317 const Partition *p,
4318 const char *source,
4319 const char *target,
4320 Hashmap **ret) {
4321
4322 _cleanup_hashmap_free_ Hashmap *denylist = NULL;
4323 int r;
4324
4325 assert(context);
4326 assert(p);
4327 assert(source);
4328 assert(target);
4329 assert(ret);
4330
4331 /* Always exclude the top level APIVFS and temporary directories since the contents of these
4332 * directories are almost certainly not intended to end up in an image. */
4333
4334 NULSTR_FOREACH(s, APIVFS_TMP_DIRS_NULSTR) {
4335 r = add_exclude_path(s, &denylist, DENY_CONTENTS);
4336 if (r < 0)
4337 return r;
4338 }
4339
4340 /* Add the user configured excludes. */
4341
4342 STRV_FOREACH(e, p->exclude_files_source) {
4343 r = add_exclude_path(*e, &denylist, endswith(*e, "/") ? DENY_CONTENTS : DENY_INODE);
4344 if (r < 0)
4345 return r;
4346 }
4347
4348 STRV_FOREACH(e, p->exclude_files_target) {
4349 _cleanup_free_ char *path = NULL;
4350
4351 const char *s = path_startswith(*e, target);
4352 if (!s)
4353 continue;
4354
4355 path = path_join(source, s);
4356 if (!path)
4357 return log_oom();
4358
4359 r = add_exclude_path(path, &denylist, endswith(*e, "/") ? DENY_CONTENTS : DENY_INODE);
4360 if (r < 0)
4361 return r;
4362 }
4363
4364 /* If we're populating a root partition, we don't want any files to end up under the APIVFS mount
4365 * points. While we already exclude <source>/proc, users could still do something such as
4366 * "CopyFiles=/abc:/". Now, if /abc has a proc subdirectory with files in it, those will end up in
4367 * the top level proc directory in the root partition, which we want to avoid. To deal with these
4368 * cases, whenever we're populating a root partition and the target of CopyFiles= is the root
4369 * directory of the root partition, we exclude all directories under the source that are named after
4370 * APIVFS directories or named after mount points of other partitions that are also going to be part
4371 * of the image. */
4372
4373 if (p->type.designator == PARTITION_ROOT && empty_or_root(target)) {
4374 LIST_FOREACH(partitions, q, context->partitions) {
4375 if (q->type.designator == PARTITION_ROOT)
4376 continue;
4377
4378 const char *sources = gpt_partition_type_mountpoint_nulstr(q->type);
4379 if (!sources)
4380 continue;
4381
4382 NULSTR_FOREACH(s, sources) {
4383 _cleanup_free_ char *path = NULL;
4384
4385 /* Exclude only the children of partition mount points so that the nested
4386 * partition mount point itself still ends up in the upper partition. */
4387
4388 path = path_join(source, s);
4389 if (!path)
4390 return -ENOMEM;
4391
4392 r = add_exclude_path(path, &denylist, DENY_CONTENTS);
4393 if (r < 0)
4394 return r;
4395 }
4396 }
4397
4398 NULSTR_FOREACH(s, APIVFS_TMP_DIRS_NULSTR) {
4399 _cleanup_free_ char *path = NULL;
4400
4401 path = path_join(source, s);
4402 if (!path)
4403 return -ENOMEM;
4404
4405 r = add_exclude_path(path, &denylist, DENY_CONTENTS);
4406 if (r < 0)
4407 return r;
4408 }
4409 }
4410
4411 *ret = TAKE_PTR(denylist);
4412 return 0;
4413 }
4414
4415 static int add_subvolume_path(const char *path, Set **subvolumes) {
4416 _cleanup_free_ struct stat *st = NULL;
4417 int r;
4418
4419 assert(path);
4420 assert(subvolumes);
4421
4422 st = new(struct stat, 1);
4423 if (!st)
4424 return log_oom();
4425
4426 r = chase_and_stat(path, arg_copy_source, CHASE_PREFIX_ROOT, NULL, st);
4427 if (r == -ENOENT)
4428 return 0;
4429 if (r < 0)
4430 return log_error_errno(r, "Failed to stat source file '%s/%s': %m", strempty(arg_copy_source), path);
4431
4432 r = set_ensure_consume(subvolumes, &inode_hash_ops, TAKE_PTR(st));
4433 if (r < 0)
4434 return log_oom();
4435
4436 return 0;
4437 }
4438
4439 static int make_subvolumes_set(
4440 Context *context,
4441 const Partition *p,
4442 const char *source,
4443 const char *target,
4444 Set **ret) {
4445 _cleanup_set_free_ Set *subvolumes = NULL;
4446 int r;
4447
4448 assert(context);
4449 assert(p);
4450 assert(target);
4451 assert(ret);
4452
4453 STRV_FOREACH(subvolume, p->subvolumes) {
4454 _cleanup_free_ char *path = NULL;
4455
4456 const char *s = path_startswith(*subvolume, target);
4457 if (!s)
4458 continue;
4459
4460 path = path_join(source, s);
4461 if (!path)
4462 return log_oom();
4463
4464 r = add_subvolume_path(path, &subvolumes);
4465 if (r < 0)
4466 return r;
4467 }
4468
4469 *ret = TAKE_PTR(subvolumes);
4470 return 0;
4471 }
4472
4473 static int do_copy_files(Context *context, Partition *p, const char *root) {
4474 int r;
4475
4476 assert(p);
4477 assert(root);
4478
4479 /* copy_tree_at() automatically copies the permissions of source directories to target directories if
4480 * it created them. However, the root directory is created by us, so we have to manually take care
4481 * that it is initialized. We use the first source directory targeting "/" as the metadata source for
4482 * the root directory. */
4483 STRV_FOREACH_PAIR(source, target, p->copy_files) {
4484 _cleanup_close_ int rfd = -EBADF, sfd = -EBADF;
4485
4486 if (!path_equal(*target, "/"))
4487 continue;
4488
4489 rfd = open(root, O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
4490 if (rfd < 0)
4491 return rfd;
4492
4493 sfd = chase_and_open(*source, arg_copy_source, CHASE_PREFIX_ROOT, O_PATH|O_DIRECTORY|O_CLOEXEC|O_NOCTTY, NULL);
4494 if (sfd < 0)
4495 return log_error_errno(sfd, "Failed to open source file '%s%s': %m", strempty(arg_copy_source), *source);
4496
4497 (void) copy_xattr(sfd, NULL, rfd, NULL, COPY_ALL_XATTRS);
4498 (void) copy_access(sfd, rfd);
4499 (void) copy_times(sfd, rfd, 0);
4500
4501 break;
4502 }
4503
4504 STRV_FOREACH_PAIR(source, target, p->copy_files) {
4505 _cleanup_hashmap_free_ Hashmap *denylist = NULL;
4506 _cleanup_set_free_ Set *subvolumes_by_source_inode = NULL;
4507 _cleanup_close_ int sfd = -EBADF, pfd = -EBADF, tfd = -EBADF;
4508
4509 r = make_copy_files_denylist(context, p, *source, *target, &denylist);
4510 if (r < 0)
4511 return r;
4512
4513 r = make_subvolumes_set(context, p, *source, *target, &subvolumes_by_source_inode);
4514 if (r < 0)
4515 return r;
4516
4517 sfd = chase_and_open(*source, arg_copy_source, CHASE_PREFIX_ROOT, O_CLOEXEC|O_NOCTTY, NULL);
4518 if (sfd == -ENOENT) {
4519 log_notice_errno(sfd, "Failed to open source file '%s%s', skipping: %m", strempty(arg_copy_source), *source);
4520 continue;
4521 }
4522 if (sfd < 0)
4523 return log_error_errno(sfd, "Failed to open source file '%s%s': %m", strempty(arg_copy_source), *source);
4524
4525 r = fd_verify_regular(sfd);
4526 if (r < 0) {
4527 if (r != -EISDIR)
4528 return log_error_errno(r, "Failed to check type of source file '%s': %m", *source);
4529
4530 /* We are looking at a directory */
4531 tfd = chase_and_open(*target, root, CHASE_PREFIX_ROOT, O_RDONLY|O_DIRECTORY|O_CLOEXEC, NULL);
4532 if (tfd < 0) {
4533 _cleanup_free_ char *dn = NULL, *fn = NULL;
4534
4535 if (tfd != -ENOENT)
4536 return log_error_errno(tfd, "Failed to open target directory '%s': %m", *target);
4537
4538 r = path_extract_filename(*target, &fn);
4539 if (r < 0)
4540 return log_error_errno(r, "Failed to extract filename from '%s': %m", *target);
4541
4542 r = path_extract_directory(*target, &dn);
4543 if (r < 0)
4544 return log_error_errno(r, "Failed to extract directory from '%s': %m", *target);
4545
4546 r = mkdir_p_root(root, dn, UID_INVALID, GID_INVALID, 0755, p->subvolumes);
4547 if (r < 0)
4548 return log_error_errno(r, "Failed to create parent directory '%s': %m", dn);
4549
4550 pfd = chase_and_open(dn, root, CHASE_PREFIX_ROOT, O_RDONLY|O_DIRECTORY|O_CLOEXEC, NULL);
4551 if (pfd < 0)
4552 return log_error_errno(pfd, "Failed to open parent directory of target: %m");
4553
4554 r = copy_tree_at(
4555 sfd, ".",
4556 pfd, fn,
4557 UID_INVALID, GID_INVALID,
4558 COPY_REFLINK|COPY_HOLES|COPY_MERGE|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS|COPY_ALL_XATTRS|COPY_GRACEFUL_WARN|COPY_TRUNCATE,
4559 denylist, subvolumes_by_source_inode);
4560 } else
4561 r = copy_tree_at(
4562 sfd, ".",
4563 tfd, ".",
4564 UID_INVALID, GID_INVALID,
4565 COPY_REFLINK|COPY_HOLES|COPY_MERGE|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS|COPY_ALL_XATTRS|COPY_GRACEFUL_WARN|COPY_TRUNCATE,
4566 denylist, subvolumes_by_source_inode);
4567 if (r < 0)
4568 return log_error_errno(r, "Failed to copy '%s%s' to '%s%s': %m",
4569 strempty(arg_copy_source), *source, strempty(root), *target);
4570 } else {
4571 _cleanup_free_ char *dn = NULL, *fn = NULL;
4572
4573 /* We are looking at a regular file */
4574
4575 r = path_extract_filename(*target, &fn);
4576 if (r == -EADDRNOTAVAIL || r == O_DIRECTORY)
4577 return log_error_errno(SYNTHETIC_ERRNO(EISDIR),
4578 "Target path '%s' refers to a directory, but source path '%s' refers to regular file, can't copy.", *target, *source);
4579 if (r < 0)
4580 return log_error_errno(r, "Failed to extract filename from '%s': %m", *target);
4581
4582 r = path_extract_directory(*target, &dn);
4583 if (r < 0)
4584 return log_error_errno(r, "Failed to extract directory from '%s': %m", *target);
4585
4586 r = mkdir_p_root(root, dn, UID_INVALID, GID_INVALID, 0755, p->subvolumes);
4587 if (r < 0)
4588 return log_error_errno(r, "Failed to create parent directory: %m");
4589
4590 pfd = chase_and_open(dn, root, CHASE_PREFIX_ROOT, O_RDONLY|O_DIRECTORY|O_CLOEXEC, NULL);
4591 if (pfd < 0)
4592 return log_error_errno(pfd, "Failed to open parent directory of target: %m");
4593
4594 tfd = openat(pfd, fn, O_CREAT|O_EXCL|O_WRONLY|O_CLOEXEC, 0700);
4595 if (tfd < 0)
4596 return log_error_errno(errno, "Failed to create target file '%s': %m", *target);
4597
4598 r = copy_bytes(sfd, tfd, UINT64_MAX, COPY_REFLINK|COPY_HOLES|COPY_SIGINT|COPY_TRUNCATE);
4599 if (r < 0)
4600 return log_error_errno(r, "Failed to copy '%s' to '%s%s': %m", *source, strempty(arg_copy_source), *target);
4601
4602 (void) copy_xattr(sfd, NULL, tfd, NULL, COPY_ALL_XATTRS);
4603 (void) copy_access(sfd, tfd);
4604 (void) copy_times(sfd, tfd, 0);
4605 }
4606 }
4607
4608 return 0;
4609 }
4610
4611 static int do_make_directories(Partition *p, const char *root) {
4612 int r;
4613
4614 assert(p);
4615 assert(root);
4616
4617 STRV_FOREACH(d, p->make_directories) {
4618 r = mkdir_p_root(root, *d, UID_INVALID, GID_INVALID, 0755, p->subvolumes);
4619 if (r < 0)
4620 return log_error_errno(r, "Failed to create directory '%s' in file system: %m", *d);
4621 }
4622
4623 return 0;
4624 }
4625
4626 static bool partition_needs_populate(Partition *p) {
4627 assert(p);
4628 return !strv_isempty(p->copy_files) || !strv_isempty(p->make_directories);
4629 }
4630
4631 static int partition_populate_directory(Context *context, Partition *p, char **ret) {
4632 _cleanup_(rm_rf_physical_and_freep) char *root = NULL;
4633 const char *vt;
4634 int r;
4635
4636 assert(ret);
4637
4638 log_info("Populating %s filesystem.", p->format);
4639
4640 r = var_tmp_dir(&vt);
4641 if (r < 0)
4642 return log_error_errno(r, "Could not determine temporary directory: %m");
4643
4644 r = tempfn_random_child(vt, "repart", &root);
4645 if (r < 0)
4646 return log_error_errno(r, "Failed to generate temporary directory: %m");
4647
4648 r = mkdir(root, 0755);
4649 if (r < 0)
4650 return log_error_errno(errno, "Failed to create temporary directory: %m");
4651
4652 r = do_copy_files(context, p, root);
4653 if (r < 0)
4654 return r;
4655
4656 r = do_make_directories(p, root);
4657 if (r < 0)
4658 return r;
4659
4660 log_info("Successfully populated %s filesystem.", p->format);
4661
4662 *ret = TAKE_PTR(root);
4663 return 0;
4664 }
4665
4666 static int partition_populate_filesystem(Context *context, Partition *p, const char *node) {
4667 int r;
4668
4669 assert(p);
4670 assert(node);
4671
4672 log_info("Populating %s filesystem.", p->format);
4673
4674 /* We copy in a child process, since we have to mount the fs for that, and we don't want that fs to
4675 * appear in the host namespace. Hence we fork a child that has its own file system namespace and
4676 * detached mount propagation. */
4677
4678 r = safe_fork("(sd-copy)", FORK_DEATHSIG|FORK_LOG|FORK_WAIT|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE, NULL);
4679 if (r < 0)
4680 return r;
4681 if (r == 0) {
4682 static const char fs[] = "/run/systemd/mount-root";
4683 /* This is a child process with its own mount namespace and propagation to host turned off */
4684
4685 r = mkdir_p(fs, 0700);
4686 if (r < 0) {
4687 log_error_errno(r, "Failed to create mount point: %m");
4688 _exit(EXIT_FAILURE);
4689 }
4690
4691 if (mount_nofollow_verbose(LOG_ERR, node, fs, p->format, MS_NOATIME|MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL) < 0)
4692 _exit(EXIT_FAILURE);
4693
4694 if (do_copy_files(context, p, fs) < 0)
4695 _exit(EXIT_FAILURE);
4696
4697 if (do_make_directories(p, fs) < 0)
4698 _exit(EXIT_FAILURE);
4699
4700 r = syncfs_path(AT_FDCWD, fs);
4701 if (r < 0) {
4702 log_error_errno(r, "Failed to synchronize written files: %m");
4703 _exit(EXIT_FAILURE);
4704 }
4705
4706 _exit(EXIT_SUCCESS);
4707 }
4708
4709 log_info("Successfully populated %s filesystem.", p->format);
4710 return 0;
4711 }
4712
4713 static int context_mkfs(Context *context) {
4714 int r;
4715
4716 assert(context);
4717
4718 /* Make a file system */
4719
4720 LIST_FOREACH(partitions, p, context->partitions) {
4721 _cleanup_(rm_rf_physical_and_freep) char *root = NULL;
4722 _cleanup_(partition_target_freep) PartitionTarget *t = NULL;
4723 _cleanup_strv_free_ char **extra_mkfs_options = NULL;
4724
4725 if (p->dropped)
4726 continue;
4727
4728 if (PARTITION_EXISTS(p)) /* Never format existing partitions */
4729 continue;
4730
4731 if (!p->format)
4732 continue;
4733
4734 /* Minimized partitions will use the copy blocks logic so let's make sure to skip those here. */
4735 if (p->copy_blocks_fd >= 0)
4736 continue;
4737
4738 if (partition_type_defer(&p->type))
4739 continue;
4740
4741 assert(p->offset != UINT64_MAX);
4742 assert(p->new_size != UINT64_MAX);
4743 assert(p->new_size >= (p->encrypt != ENCRYPT_OFF ? LUKS2_METADATA_KEEP_FREE : 0));
4744
4745 /* If we're doing encryption, we make sure we keep free space at the end which is required
4746 * for cryptsetup's offline encryption. */
4747 r = partition_target_prepare(context, p,
4748 p->new_size - (p->encrypt != ENCRYPT_OFF ? LUKS2_METADATA_KEEP_FREE : 0),
4749 /*need_path=*/ true,
4750 &t);
4751 if (r < 0)
4752 return r;
4753
4754 if (p->encrypt != ENCRYPT_OFF && t->loop) {
4755 r = partition_target_grow(t, p->new_size);
4756 if (r < 0)
4757 return r;
4758
4759 r = partition_encrypt(context, p, t, /* offline = */ false);
4760 if (r < 0)
4761 return log_error_errno(r, "Failed to encrypt device: %m");
4762 }
4763
4764 log_info("Formatting future partition %" PRIu64 ".", p->partno);
4765
4766 /* If we're not writing to a loop device or if we're populating a read-only filesystem, we
4767 * have to populate using the filesystem's mkfs's --root (or equivalent) option. To do that,
4768 * we need to set up the final directory tree beforehand. */
4769
4770 if (partition_needs_populate(p) && (!t->loop || fstype_is_ro(p->format))) {
4771 if (!mkfs_supports_root_option(p->format))
4772 return log_error_errno(SYNTHETIC_ERRNO(ENODEV),
4773 "Loop device access is required to populate %s filesystems.",
4774 p->format);
4775
4776 r = partition_populate_directory(context, p, &root);
4777 if (r < 0)
4778 return r;
4779 }
4780
4781 r = mkfs_options_from_env("REPART", p->format, &extra_mkfs_options);
4782 if (r < 0)
4783 return log_error_errno(r,
4784 "Failed to determine mkfs command line options for '%s': %m",
4785 p->format);
4786
4787 r = make_filesystem(partition_target_path(t), p->format, strempty(p->new_label), root,
4788 p->fs_uuid, arg_discard, /* quiet = */ false,
4789 context->fs_sector_size, extra_mkfs_options);
4790 if (r < 0)
4791 return r;
4792
4793 log_info("Successfully formatted future partition %" PRIu64 ".", p->partno);
4794
4795 /* If we're writing to a loop device, we can now mount the empty filesystem and populate it. */
4796 if (partition_needs_populate(p) && !root) {
4797 assert(t->loop);
4798
4799 r = partition_populate_filesystem(context, p, partition_target_path(t));
4800 if (r < 0)
4801 return r;
4802 }
4803
4804 if (p->encrypt != ENCRYPT_OFF && !t->loop) {
4805 r = partition_target_grow(t, p->new_size);
4806 if (r < 0)
4807 return r;
4808
4809 r = partition_encrypt(context, p, t, /* offline = */ true);
4810 if (r < 0)
4811 return log_error_errno(r, "Failed to encrypt device: %m");
4812 }
4813
4814 /* Note that we always sync explicitly here, since mkfs.fat doesn't do that on its own, and
4815 * if we don't sync before detaching a block device the in-flight sectors possibly won't hit
4816 * the disk. */
4817
4818 r = partition_target_sync(context, p, t);
4819 if (r < 0)
4820 return r;
4821
4822 if (p->siblings[VERITY_HASH] && !partition_type_defer(&p->siblings[VERITY_HASH]->type)) {
4823 r = partition_format_verity_hash(context, p->siblings[VERITY_HASH],
4824 /* node = */ NULL, partition_target_path(t));
4825 if (r < 0)
4826 return r;
4827 }
4828
4829 if (p->siblings[VERITY_SIG] && !partition_type_defer(&p->siblings[VERITY_SIG]->type)) {
4830 r = partition_format_verity_sig(context, p->siblings[VERITY_SIG]);
4831 if (r < 0)
4832 return r;
4833 }
4834 }
4835
4836 return 0;
4837 }
4838
4839 static int parse_x509_certificate(const char *certificate, size_t certificate_size, X509 **ret) {
4840 #if HAVE_OPENSSL
4841 _cleanup_(X509_freep) X509 *cert = NULL;
4842 _cleanup_(BIO_freep) BIO *cb = NULL;
4843
4844 assert(certificate);
4845 assert(certificate_size > 0);
4846 assert(ret);
4847
4848 cb = BIO_new_mem_buf(certificate, certificate_size);
4849 if (!cb)
4850 return log_oom();
4851
4852 cert = PEM_read_bio_X509(cb, NULL, NULL, NULL);
4853 if (!cert)
4854 return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Failed to parse X.509 certificate: %s",
4855 ERR_error_string(ERR_get_error(), NULL));
4856
4857 if (ret)
4858 *ret = TAKE_PTR(cert);
4859
4860 return 0;
4861 #else
4862 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "openssl is not supported, cannot parse X509 certificate.");
4863 #endif
4864 }
4865
4866 static int parse_private_key(const char *key, size_t key_size, EVP_PKEY **ret) {
4867 #if HAVE_OPENSSL
4868 _cleanup_(BIO_freep) BIO *kb = NULL;
4869 _cleanup_(EVP_PKEY_freep) EVP_PKEY *pk = NULL;
4870
4871 assert(key);
4872 assert(key_size > 0);
4873 assert(ret);
4874
4875 kb = BIO_new_mem_buf(key, key_size);
4876 if (!kb)
4877 return log_oom();
4878
4879 pk = PEM_read_bio_PrivateKey(kb, NULL, NULL, NULL);
4880 if (!pk)
4881 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to parse PEM private key: %s",
4882 ERR_error_string(ERR_get_error(), NULL));
4883
4884 if (ret)
4885 *ret = TAKE_PTR(pk);
4886
4887 return 0;
4888 #else
4889 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "openssl is not supported, cannot parse private key.");
4890 #endif
4891 }
4892
4893 static int partition_acquire_uuid(Context *context, Partition *p, sd_id128_t *ret) {
4894 struct {
4895 sd_id128_t type_uuid;
4896 uint64_t counter;
4897 } _packed_ plaintext = {};
4898 union {
4899 uint8_t md[SHA256_DIGEST_SIZE];
4900 sd_id128_t id;
4901 } result;
4902
4903 uint64_t k = 0;
4904 int r;
4905
4906 assert(context);
4907 assert(p);
4908 assert(ret);
4909
4910 /* Calculate a good UUID for the indicated partition. We want a certain degree of reproducibility,
4911 * hence we won't generate the UUIDs randomly. Instead we use a cryptographic hash (precisely:
4912 * HMAC-SHA256) to derive them from a single seed. The seed is generally the machine ID of the
4913 * installation we are processing, but if random behaviour is desired can be random, too. We use the
4914 * seed value as key for the HMAC (since the machine ID is something we generally don't want to leak)
4915 * and the partition type as plaintext. The partition type is suffixed with a counter (only for the
4916 * second and later partition of the same type) if we have more than one partition of the same
4917 * time. Or in other words:
4918 *
4919 * With:
4920 * SEED := /etc/machine-id
4921 *
4922 * If first partition instance of type TYPE_UUID:
4923 * PARTITION_UUID := HMAC-SHA256(SEED, TYPE_UUID)
4924 *
4925 * For all later partition instances of type TYPE_UUID with INSTANCE being the LE64 encoded instance number:
4926 * PARTITION_UUID := HMAC-SHA256(SEED, TYPE_UUID || INSTANCE)
4927 */
4928
4929 LIST_FOREACH(partitions, q, context->partitions) {
4930 if (p == q)
4931 break;
4932
4933 if (!sd_id128_equal(p->type.uuid, q->type.uuid))
4934 continue;
4935
4936 k++;
4937 }
4938
4939 plaintext.type_uuid = p->type.uuid;
4940 plaintext.counter = htole64(k);
4941
4942 hmac_sha256(context->seed.bytes, sizeof(context->seed.bytes),
4943 &plaintext,
4944 k == 0 ? sizeof(sd_id128_t) : sizeof(plaintext),
4945 result.md);
4946
4947 /* Take the first half, mark it as v4 UUID */
4948 assert_cc(sizeof(result.md) == sizeof(result.id) * 2);
4949 result.id = id128_make_v4_uuid(result.id);
4950
4951 /* Ensure this partition UUID is actually unique, and there's no remaining partition from an earlier run? */
4952 LIST_FOREACH(partitions, q, context->partitions) {
4953 if (p == q)
4954 continue;
4955
4956 if (sd_id128_in_set(result.id, q->current_uuid, q->new_uuid)) {
4957 log_warning("Partition UUID calculated from seed for partition %" PRIu64 " already used, reverting to randomized UUID.", p->partno);
4958
4959 r = sd_id128_randomize(&result.id);
4960 if (r < 0)
4961 return log_error_errno(r, "Failed to generate randomized UUID: %m");
4962
4963 break;
4964 }
4965 }
4966
4967 *ret = result.id;
4968 return 0;
4969 }
4970
4971 static int partition_acquire_label(Context *context, Partition *p, char **ret) {
4972 _cleanup_free_ char *label = NULL;
4973 const char *prefix;
4974 unsigned k = 1;
4975
4976 assert(context);
4977 assert(p);
4978 assert(ret);
4979
4980 prefix = gpt_partition_type_uuid_to_string(p->type.uuid);
4981 if (!prefix)
4982 prefix = "linux";
4983
4984 for (;;) {
4985 const char *ll = label ?: prefix;
4986 bool retry = false;
4987
4988 LIST_FOREACH(partitions, q, context->partitions) {
4989 if (p == q)
4990 break;
4991
4992 if (streq_ptr(ll, q->current_label) ||
4993 streq_ptr(ll, q->new_label)) {
4994 retry = true;
4995 break;
4996 }
4997 }
4998
4999 if (!retry)
5000 break;
5001
5002 label = mfree(label);
5003 if (asprintf(&label, "%s-%u", prefix, ++k) < 0)
5004 return log_oom();
5005 }
5006
5007 if (!label) {
5008 label = strdup(prefix);
5009 if (!label)
5010 return log_oom();
5011 }
5012
5013 *ret = TAKE_PTR(label);
5014 return 0;
5015 }
5016
5017 static int context_acquire_partition_uuids_and_labels(Context *context) {
5018 int r;
5019
5020 assert(context);
5021
5022 LIST_FOREACH(partitions, p, context->partitions) {
5023 sd_id128_t uuid;
5024
5025 /* Never touch foreign partitions */
5026 if (PARTITION_IS_FOREIGN(p)) {
5027 p->new_uuid = p->current_uuid;
5028
5029 if (p->current_label) {
5030 r = free_and_strdup_warn(&p->new_label, strempty(p->current_label));
5031 if (r < 0)
5032 return r;
5033 }
5034
5035 continue;
5036 }
5037
5038 if (!sd_id128_is_null(p->current_uuid))
5039 p->new_uuid = uuid = p->current_uuid; /* Never change initialized UUIDs */
5040 else if (p->new_uuid_is_set)
5041 uuid = p->new_uuid;
5042 else {
5043 /* Not explicitly set by user! */
5044 r = partition_acquire_uuid(context, p, &uuid);
5045 if (r < 0)
5046 return r;
5047
5048 /* The final verity hash/data UUIDs can only be determined after formatting the
5049 * verity hash partition. However, we still want to use the generated partition UUID
5050 * to derive other UUIDs to keep things unique and reproducible, so we always
5051 * generate a UUID if none is set, but we only use it as the actual partition UUID if
5052 * verity is not configured. */
5053 if (!IN_SET(p->verity, VERITY_DATA, VERITY_HASH)) {
5054 p->new_uuid = uuid;
5055 p->new_uuid_is_set = true;
5056 }
5057 }
5058
5059 /* Calculate the UUID for the file system as HMAC-SHA256 of the string "file-system-uuid",
5060 * keyed off the partition UUID. */
5061 r = derive_uuid(uuid, "file-system-uuid", &p->fs_uuid);
5062 if (r < 0)
5063 return r;
5064
5065 if (p->encrypt != ENCRYPT_OFF) {
5066 r = derive_uuid(uuid, "luks-uuid", &p->luks_uuid);
5067 if (r < 0)
5068 return r;
5069 }
5070
5071 /* Derive the verity salt and verity superblock UUID from the seed to keep them reproducible */
5072 if (p->verity == VERITY_HASH) {
5073 derive_salt(context->seed, "verity-salt", p->verity_salt);
5074
5075 r = derive_uuid(context->seed, "verity-uuid", &p->verity_uuid);
5076 if (r < 0)
5077 return log_error_errno(r, "Failed to acquire verity uuid: %m");
5078 }
5079
5080 if (!isempty(p->current_label)) {
5081 /* never change initialized labels */
5082 r = free_and_strdup_warn(&p->new_label, p->current_label);
5083 if (r < 0)
5084 return r;
5085 } else if (!p->new_label) {
5086 /* Not explicitly set by user! */
5087
5088 r = partition_acquire_label(context, p, &p->new_label);
5089 if (r < 0)
5090 return r;
5091 }
5092 }
5093
5094 return 0;
5095 }
5096
5097 static int set_gpt_flags(struct fdisk_partition *q, uint64_t flags) {
5098 _cleanup_free_ char *a = NULL;
5099
5100 for (unsigned i = 0; i < sizeof(flags) * 8; i++) {
5101 uint64_t bit = UINT64_C(1) << i;
5102 char buf[DECIMAL_STR_MAX(unsigned)+1];
5103
5104 if (!FLAGS_SET(flags, bit))
5105 continue;
5106
5107 xsprintf(buf, "%u", i);
5108 if (!strextend_with_separator(&a, ",", buf))
5109 return -ENOMEM;
5110 }
5111
5112 return fdisk_partition_set_attrs(q, a);
5113 }
5114
5115 static uint64_t partition_merge_flags(Partition *p) {
5116 uint64_t f;
5117
5118 assert(p);
5119
5120 f = p->gpt_flags;
5121
5122 if (p->no_auto >= 0) {
5123 if (gpt_partition_type_knows_no_auto(p->type))
5124 SET_FLAG(f, SD_GPT_FLAG_NO_AUTO, p->no_auto);
5125 else {
5126 char buffer[SD_ID128_UUID_STRING_MAX];
5127 log_warning("Configured NoAuto=%s for partition type '%s' that doesn't support it, ignoring.",
5128 yes_no(p->no_auto),
5129 gpt_partition_type_uuid_to_string_harder(p->type.uuid, buffer));
5130 }
5131 }
5132
5133 if (p->read_only >= 0) {
5134 if (gpt_partition_type_knows_read_only(p->type))
5135 SET_FLAG(f, SD_GPT_FLAG_READ_ONLY, p->read_only);
5136 else {
5137 char buffer[SD_ID128_UUID_STRING_MAX];
5138 log_warning("Configured ReadOnly=%s for partition type '%s' that doesn't support it, ignoring.",
5139 yes_no(p->read_only),
5140 gpt_partition_type_uuid_to_string_harder(p->type.uuid, buffer));
5141 }
5142 }
5143
5144 if (p->growfs >= 0) {
5145 if (gpt_partition_type_knows_growfs(p->type))
5146 SET_FLAG(f, SD_GPT_FLAG_GROWFS, p->growfs);
5147 else {
5148 char buffer[SD_ID128_UUID_STRING_MAX];
5149 log_warning("Configured GrowFileSystem=%s for partition type '%s' that doesn't support it, ignoring.",
5150 yes_no(p->growfs),
5151 gpt_partition_type_uuid_to_string_harder(p->type.uuid, buffer));
5152 }
5153 }
5154
5155 return f;
5156 }
5157
5158 static int context_mangle_partitions(Context *context) {
5159 int r;
5160
5161 assert(context);
5162
5163 LIST_FOREACH(partitions, p, context->partitions) {
5164 if (p->dropped)
5165 continue;
5166
5167 if (partition_type_defer(&p->type))
5168 continue;
5169
5170 assert(p->new_size != UINT64_MAX);
5171 assert(p->offset != UINT64_MAX);
5172 assert(p->partno != UINT64_MAX);
5173
5174 if (PARTITION_EXISTS(p)) {
5175 bool changed = false;
5176
5177 assert(p->current_partition);
5178
5179 if (p->new_size != p->current_size) {
5180 assert(p->new_size >= p->current_size);
5181 assert(p->new_size % context->sector_size == 0);
5182
5183 r = fdisk_partition_size_explicit(p->current_partition, true);
5184 if (r < 0)
5185 return log_error_errno(r, "Failed to enable explicit sizing: %m");
5186
5187 r = fdisk_partition_set_size(p->current_partition, p->new_size / context->sector_size);
5188 if (r < 0)
5189 return log_error_errno(r, "Failed to grow partition: %m");
5190
5191 log_info("Growing existing partition %" PRIu64 ".", p->partno);
5192 changed = true;
5193 }
5194
5195 if (!sd_id128_equal(p->new_uuid, p->current_uuid)) {
5196 r = fdisk_partition_set_uuid(p->current_partition, SD_ID128_TO_UUID_STRING(p->new_uuid));
5197 if (r < 0)
5198 return log_error_errno(r, "Failed to set partition UUID: %m");
5199
5200 log_info("Initializing UUID of existing partition %" PRIu64 ".", p->partno);
5201 changed = true;
5202 }
5203
5204 if (!streq_ptr(p->new_label, p->current_label)) {
5205 r = fdisk_partition_set_name(p->current_partition, strempty(p->new_label));
5206 if (r < 0)
5207 return log_error_errno(r, "Failed to set partition label: %m");
5208
5209 log_info("Setting partition label of existing partition %" PRIu64 ".", p->partno);
5210 changed = true;
5211 }
5212
5213 if (changed) {
5214 assert(!PARTITION_IS_FOREIGN(p)); /* never touch foreign partitions */
5215
5216 r = fdisk_set_partition(context->fdisk_context, p->partno, p->current_partition);
5217 if (r < 0)
5218 return log_error_errno(r, "Failed to update partition: %m");
5219 }
5220 } else {
5221 _cleanup_(fdisk_unref_partitionp) struct fdisk_partition *q = NULL;
5222 _cleanup_(fdisk_unref_parttypep) struct fdisk_parttype *t = NULL;
5223
5224 assert(!p->new_partition);
5225 assert(p->offset % context->sector_size == 0);
5226 assert(p->new_size % context->sector_size == 0);
5227 assert(p->new_label);
5228
5229 t = fdisk_new_parttype();
5230 if (!t)
5231 return log_oom();
5232
5233 r = fdisk_parttype_set_typestr(t, SD_ID128_TO_UUID_STRING(p->type.uuid));
5234 if (r < 0)
5235 return log_error_errno(r, "Failed to initialize partition type: %m");
5236
5237 q = fdisk_new_partition();
5238 if (!q)
5239 return log_oom();
5240
5241 r = fdisk_partition_set_type(q, t);
5242 if (r < 0)
5243 return log_error_errno(r, "Failed to set partition type: %m");
5244
5245 r = fdisk_partition_size_explicit(q, true);
5246 if (r < 0)
5247 return log_error_errno(r, "Failed to enable explicit sizing: %m");
5248
5249 r = fdisk_partition_set_start(q, p->offset / context->sector_size);
5250 if (r < 0)
5251 return log_error_errno(r, "Failed to position partition: %m");
5252
5253 r = fdisk_partition_set_size(q, p->new_size / context->sector_size);
5254 if (r < 0)
5255 return log_error_errno(r, "Failed to grow partition: %m");
5256
5257 r = fdisk_partition_set_partno(q, p->partno);
5258 if (r < 0)
5259 return log_error_errno(r, "Failed to set partition number: %m");
5260
5261 r = fdisk_partition_set_uuid(q, SD_ID128_TO_UUID_STRING(p->new_uuid));
5262 if (r < 0)
5263 return log_error_errno(r, "Failed to set partition UUID: %m");
5264
5265 r = fdisk_partition_set_name(q, strempty(p->new_label));
5266 if (r < 0)
5267 return log_error_errno(r, "Failed to set partition label: %m");
5268
5269 /* Merge the no auto + read only + growfs setting with the literal flags, and set them for the partition */
5270 r = set_gpt_flags(q, partition_merge_flags(p));
5271 if (r < 0)
5272 return log_error_errno(r, "Failed to set GPT partition flags: %m");
5273
5274 log_info("Adding new partition %" PRIu64 " to partition table.", p->partno);
5275
5276 r = fdisk_add_partition(context->fdisk_context, q, NULL);
5277 if (r < 0)
5278 return log_error_errno(r, "Failed to add partition: %m");
5279
5280 assert(!p->new_partition);
5281 p->new_partition = TAKE_PTR(q);
5282 }
5283 }
5284
5285 return 0;
5286 }
5287
5288 static int split_name_printf(Partition *p, char **ret) {
5289 assert(p);
5290
5291 const Specifier table[] = {
5292 { 't', specifier_string, GPT_PARTITION_TYPE_UUID_TO_STRING_HARDER(p->type.uuid) },
5293 { 'T', specifier_id128, &p->type.uuid },
5294 { 'U', specifier_id128, &p->new_uuid },
5295 { 'n', specifier_uint64, &p->partno },
5296
5297 COMMON_SYSTEM_SPECIFIERS,
5298 {}
5299 };
5300
5301 return specifier_printf(p->split_name_format, NAME_MAX, table, arg_root, p, ret);
5302 }
5303
5304 static int split_node(const char *node, char **ret_base, char **ret_ext) {
5305 _cleanup_free_ char *base = NULL, *ext = NULL;
5306 char *e;
5307 int r;
5308
5309 assert(node);
5310 assert(ret_base);
5311 assert(ret_ext);
5312
5313 r = path_extract_filename(node, &base);
5314 if (r == O_DIRECTORY || r == -EADDRNOTAVAIL)
5315 return log_error_errno(r, "Device node %s cannot be a directory", node);
5316 if (r < 0)
5317 return log_error_errno(r, "Failed to extract filename from %s: %m", node);
5318
5319 e = endswith(base, ".raw");
5320 if (e) {
5321 ext = strdup(e);
5322 if (!ext)
5323 return log_oom();
5324
5325 *e = 0;
5326 }
5327
5328 *ret_base = TAKE_PTR(base);
5329 *ret_ext = TAKE_PTR(ext);
5330
5331 return 0;
5332 }
5333
5334 static int split_name_resolve(Context *context) {
5335 _cleanup_free_ char *parent = NULL, *base = NULL, *ext = NULL;
5336 int r;
5337
5338 assert(context);
5339
5340 r = path_extract_directory(context->node, &parent);
5341 if (r < 0 && r != -EDESTADDRREQ)
5342 return log_error_errno(r, "Failed to extract directory from %s: %m", context->node);
5343
5344 r = split_node(context->node, &base, &ext);
5345 if (r < 0)
5346 return r;
5347
5348 LIST_FOREACH(partitions, p, context->partitions) {
5349 _cleanup_free_ char *resolved = NULL;
5350
5351 if (p->dropped)
5352 continue;
5353
5354 if (!p->split_name_format)
5355 continue;
5356
5357 r = split_name_printf(p, &resolved);
5358 if (r < 0)
5359 return log_error_errno(r, "Failed to resolve specifiers in %s: %m", p->split_name_format);
5360
5361 if (parent)
5362 p->split_path = strjoin(parent, "/", base, ".", resolved, ext);
5363 else
5364 p->split_path = strjoin(base, ".", resolved, ext);
5365 if (!p->split_path)
5366 return log_oom();
5367 }
5368
5369 LIST_FOREACH(partitions, p, context->partitions) {
5370 if (!p->split_path)
5371 continue;
5372
5373 LIST_FOREACH(partitions, q, context->partitions) {
5374 if (p == q)
5375 continue;
5376
5377 if (!q->split_path)
5378 continue;
5379
5380 if (!streq(p->split_path, q->split_path))
5381 continue;
5382
5383 return log_error_errno(SYNTHETIC_ERRNO(ENOTUNIQ),
5384 "%s and %s have the same resolved split name \"%s\", refusing",
5385 p->definition_path, q->definition_path, p->split_path);
5386 }
5387 }
5388
5389 return 0;
5390 }
5391
5392 static int context_split(Context *context) {
5393 int fd = -EBADF, r;
5394
5395 if (!arg_split)
5396 return 0;
5397
5398 assert(context);
5399
5400 /* We can't do resolution earlier because the partition UUIDs for verity partitions are only filled
5401 * in after they've been generated. */
5402
5403 r = split_name_resolve(context);
5404 if (r < 0)
5405 return r;
5406
5407 LIST_FOREACH(partitions, p, context->partitions) {
5408 _cleanup_close_ int fdt = -EBADF;
5409
5410 if (p->dropped)
5411 continue;
5412
5413 if (!p->split_path)
5414 continue;
5415
5416 if (partition_type_defer(&p->type))
5417 continue;
5418
5419 fdt = open(p->split_path, O_WRONLY|O_NOCTTY|O_CLOEXEC|O_NOFOLLOW|O_CREAT|O_EXCL, 0666);
5420 if (fdt < 0)
5421 return log_error_errno(fdt, "Failed to open split partition file %s: %m", p->split_path);
5422
5423 if (fd < 0)
5424 assert_se((fd = fdisk_get_devfd(context->fdisk_context)) >= 0);
5425
5426 if (lseek(fd, p->offset, SEEK_SET) < 0)
5427 return log_error_errno(errno, "Failed to seek to partition offset: %m");
5428
5429 r = copy_bytes(fd, fdt, p->new_size, COPY_REFLINK|COPY_HOLES|COPY_TRUNCATE);
5430 if (r < 0)
5431 return log_error_errno(r, "Failed to copy to split partition %s: %m", p->split_path);
5432 }
5433
5434 return 0;
5435 }
5436
5437 static int context_write_partition_table(Context *context) {
5438 _cleanup_(fdisk_unref_tablep) struct fdisk_table *original_table = NULL;
5439 int capable, r;
5440
5441 assert(context);
5442
5443 if (!context->from_scratch && !context_changed(context)) {
5444 log_info("No changes.");
5445 return 0;
5446 }
5447
5448 if (arg_dry_run) {
5449 log_notice("Refusing to repartition, please re-run with --dry-run=no.");
5450 return 0;
5451 }
5452
5453 log_info("Applying changes.");
5454
5455 if (context->from_scratch && arg_empty != EMPTY_CREATE) {
5456 /* Erase everything if we operate from scratch, except if the image was just created anyway, and thus is definitely empty. */
5457 r = context_wipe_range(context, 0, context->total);
5458 if (r < 0)
5459 return r;
5460
5461 log_info("Wiped block device.");
5462
5463 if (arg_discard) {
5464 r = context_discard_range(context, 0, context->total);
5465 if (r == -EOPNOTSUPP)
5466 log_info("Storage does not support discard, not discarding entire block device data.");
5467 else if (r < 0)
5468 return log_error_errno(r, "Failed to discard entire block device: %m");
5469 else if (r > 0)
5470 log_info("Discarded entire block device.");
5471 }
5472 }
5473
5474 r = fdisk_get_partitions(context->fdisk_context, &original_table);
5475 if (r < 0)
5476 return log_error_errno(r, "Failed to acquire partition table: %m");
5477
5478 /* Wipe fs signatures and discard sectors where the new partitions are going to be placed and in the
5479 * gaps between partitions, just to be sure. */
5480 r = context_wipe_and_discard(context);
5481 if (r < 0)
5482 return r;
5483
5484 r = context_copy_blocks(context);
5485 if (r < 0)
5486 return r;
5487
5488 r = context_mkfs(context);
5489 if (r < 0)
5490 return r;
5491
5492 r = context_mangle_partitions(context);
5493 if (r < 0)
5494 return r;
5495
5496 log_info("Writing new partition table.");
5497
5498 r = fdisk_write_disklabel(context->fdisk_context);
5499 if (r < 0)
5500 return log_error_errno(r, "Failed to write partition table: %m");
5501
5502 capable = blockdev_partscan_enabled(fdisk_get_devfd(context->fdisk_context));
5503 if (capable == -ENOTBLK)
5504 log_debug("Not telling kernel to reread partition table, since we are not operating on a block device.");
5505 else if (capable < 0)
5506 return log_error_errno(capable, "Failed to check if block device supports partition scanning: %m");
5507 else if (capable > 0) {
5508 log_info("Telling kernel to reread partition table.");
5509
5510 if (context->from_scratch)
5511 r = fdisk_reread_partition_table(context->fdisk_context);
5512 else
5513 r = fdisk_reread_changes(context->fdisk_context, original_table);
5514 if (r < 0)
5515 return log_error_errno(r, "Failed to reread partition table: %m");
5516 } else
5517 log_notice("Not telling kernel to reread partition table, because selected image does not support kernel partition block devices.");
5518
5519 log_info("All done.");
5520
5521 return 0;
5522 }
5523
5524 static int context_read_seed(Context *context, const char *root) {
5525 int r;
5526
5527 assert(context);
5528
5529 if (!sd_id128_is_null(context->seed))
5530 return 0;
5531
5532 if (!arg_randomize) {
5533 r = id128_get_machine(root, &context->seed);
5534 if (r >= 0)
5535 return 0;
5536
5537 if (!ERRNO_IS_MACHINE_ID_UNSET(r))
5538 return log_error_errno(r, "Failed to parse machine ID of image: %m");
5539
5540 log_info("No machine ID set, using randomized partition UUIDs.");
5541 }
5542
5543 r = sd_id128_randomize(&context->seed);
5544 if (r < 0)
5545 return log_error_errno(r, "Failed to generate randomized seed: %m");
5546
5547 return 0;
5548 }
5549
5550 static int context_factory_reset(Context *context) {
5551 size_t n = 0;
5552 int r;
5553
5554 assert(context);
5555
5556 if (arg_factory_reset <= 0)
5557 return 0;
5558
5559 if (context->from_scratch) /* Nothing to reset if we start from scratch */
5560 return 0;
5561
5562 if (arg_dry_run) {
5563 log_notice("Refusing to factory reset, please re-run with --dry-run=no.");
5564 return 0;
5565 }
5566
5567 log_info("Applying factory reset.");
5568
5569 LIST_FOREACH(partitions, p, context->partitions) {
5570
5571 if (!p->factory_reset || !PARTITION_EXISTS(p))
5572 continue;
5573
5574 assert(p->partno != UINT64_MAX);
5575
5576 log_info("Removing partition %" PRIu64 " for factory reset.", p->partno);
5577
5578 r = fdisk_delete_partition(context->fdisk_context, p->partno);
5579 if (r < 0)
5580 return log_error_errno(r, "Failed to remove partition %" PRIu64 ": %m", p->partno);
5581
5582 n++;
5583 }
5584
5585 if (n == 0) {
5586 log_info("Factory reset requested, but no partitions to delete found.");
5587 return 0;
5588 }
5589
5590 r = fdisk_write_disklabel(context->fdisk_context);
5591 if (r < 0)
5592 return log_error_errno(r, "Failed to write disk label: %m");
5593
5594 log_info("Successfully deleted %zu partitions.", n);
5595 return 1;
5596 }
5597
5598 static int context_can_factory_reset(Context *context) {
5599 assert(context);
5600
5601 LIST_FOREACH(partitions, p, context->partitions)
5602 if (p->factory_reset && PARTITION_EXISTS(p))
5603 return true;
5604
5605 return false;
5606 }
5607
5608 static int resolve_copy_blocks_auto_candidate(
5609 dev_t partition_devno,
5610 GptPartitionType partition_type,
5611 dev_t restrict_devno,
5612 sd_id128_t *ret_uuid) {
5613
5614 _cleanup_(blkid_free_probep) blkid_probe b = NULL;
5615 _cleanup_close_ int fd = -EBADF;
5616 _cleanup_free_ char *p = NULL;
5617 const char *pttype, *t;
5618 sd_id128_t pt_parsed, u;
5619 blkid_partition pp;
5620 dev_t whole_devno;
5621 blkid_partlist pl;
5622 int r;
5623
5624 /* Checks if the specified partition has the specified GPT type UUID, and is located on the specified
5625 * 'restrict_devno' device. The type check is particularly relevant if we have Verity volume which is
5626 * backed by two separate partitions: the data and the hash partitions, and we need to find the right
5627 * one of the two. */
5628
5629 r = block_get_whole_disk(partition_devno, &whole_devno);
5630 if (r < 0)
5631 return log_error_errno(
5632 r,
5633 "Unable to determine containing block device of partition %u:%u: %m",
5634 major(partition_devno), minor(partition_devno));
5635
5636 if (restrict_devno != (dev_t) -1 &&
5637 restrict_devno != whole_devno)
5638 return log_error_errno(
5639 SYNTHETIC_ERRNO(EPERM),
5640 "Partition %u:%u is located outside of block device %u:%u, refusing.",
5641 major(partition_devno), minor(partition_devno),
5642 major(restrict_devno), minor(restrict_devno));
5643
5644 fd = r = device_open_from_devnum(S_IFBLK, whole_devno, O_RDONLY|O_CLOEXEC|O_NONBLOCK, &p);
5645 if (r < 0)
5646 return log_error_errno(r, "Failed to open block device " DEVNUM_FORMAT_STR ": %m",
5647 DEVNUM_FORMAT_VAL(whole_devno));
5648
5649 b = blkid_new_probe();
5650 if (!b)
5651 return log_oom();
5652
5653 errno = 0;
5654 r = blkid_probe_set_device(b, fd, 0, 0);
5655 if (r != 0)
5656 return log_error_errno(errno_or_else(ENOMEM), "Failed to open block device '%s': %m", p);
5657
5658 (void) blkid_probe_enable_partitions(b, 1);
5659 (void) blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
5660
5661 errno = 0;
5662 r = blkid_do_safeprobe(b);
5663 if (r == _BLKID_SAFEPROBE_ERROR)
5664 return log_error_errno(errno_or_else(EIO), "Unable to probe for partition table of '%s': %m", p);
5665 if (IN_SET(r, _BLKID_SAFEPROBE_AMBIGUOUS, _BLKID_SAFEPROBE_NOT_FOUND)) {
5666 log_debug("Didn't find partition table on block device '%s'.", p);
5667 return false;
5668 }
5669
5670 assert(r == _BLKID_SAFEPROBE_FOUND);
5671
5672 (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
5673 if (!streq_ptr(pttype, "gpt")) {
5674 log_debug("Didn't find a GPT partition table on '%s'.", p);
5675 return false;
5676 }
5677
5678 errno = 0;
5679 pl = blkid_probe_get_partitions(b);
5680 if (!pl)
5681 return log_error_errno(errno_or_else(EIO), "Unable read partition table of '%s': %m", p);
5682
5683 pp = blkid_partlist_devno_to_partition(pl, partition_devno);
5684 if (!pp) {
5685 log_debug("Partition %u:%u has no matching partition table entry on '%s'.",
5686 major(partition_devno), minor(partition_devno), p);
5687 return false;
5688 }
5689
5690 t = blkid_partition_get_type_string(pp);
5691 if (isempty(t)) {
5692 log_debug("Partition %u:%u has no type on '%s'.",
5693 major(partition_devno), minor(partition_devno), p);
5694 return false;
5695 }
5696
5697 r = sd_id128_from_string(t, &pt_parsed);
5698 if (r < 0) {
5699 log_debug_errno(r, "Failed to parse partition type \"%s\": %m", t);
5700 return false;
5701 }
5702
5703 if (!sd_id128_equal(pt_parsed, partition_type.uuid)) {
5704 log_debug("Partition %u:%u has non-matching partition type " SD_ID128_FORMAT_STR " (needed: " SD_ID128_FORMAT_STR "), ignoring.",
5705 major(partition_devno), minor(partition_devno),
5706 SD_ID128_FORMAT_VAL(pt_parsed), SD_ID128_FORMAT_VAL(partition_type.uuid));
5707 return false;
5708 }
5709
5710 r = blkid_partition_get_uuid_id128(pp, &u);
5711 if (r == -ENXIO) {
5712 log_debug_errno(r, "Partition " DEVNUM_FORMAT_STR " has no UUID.", DEVNUM_FORMAT_VAL(partition_devno));
5713 return false;
5714 }
5715 if (r < 0) {
5716 log_debug_errno(r, "Failed to read partition UUID of " DEVNUM_FORMAT_STR ": %m", DEVNUM_FORMAT_VAL(partition_devno));
5717 return false;
5718 }
5719
5720 log_debug("Automatically found partition " DEVNUM_FORMAT_STR " of right type " SD_ID128_FORMAT_STR ".",
5721 DEVNUM_FORMAT_VAL(partition_devno),
5722 SD_ID128_FORMAT_VAL(pt_parsed));
5723
5724 if (ret_uuid)
5725 *ret_uuid = u;
5726
5727 return true;
5728 }
5729
5730 static int find_backing_devno(
5731 const char *path,
5732 const char *root,
5733 dev_t *ret) {
5734
5735 _cleanup_free_ char *resolved = NULL;
5736 int r;
5737
5738 assert(path);
5739
5740 r = chase(path, root, CHASE_PREFIX_ROOT, &resolved, NULL);
5741 if (r < 0)
5742 return r;
5743
5744 r = path_is_mount_point(resolved, NULL, 0);
5745 if (r < 0)
5746 return r;
5747 if (r == 0) /* Not a mount point, then it's not a partition of its own, let's not automatically use it. */
5748 return -ENOENT;
5749
5750 r = get_block_device(resolved, ret);
5751 if (r < 0)
5752 return r;
5753 if (r == 0) /* Not backed by physical file system, we can't use this */
5754 return -ENOENT;
5755
5756 return 0;
5757 }
5758
5759 static int resolve_copy_blocks_auto(
5760 GptPartitionType type,
5761 const char *root,
5762 dev_t restrict_devno,
5763 dev_t *ret_devno,
5764 sd_id128_t *ret_uuid) {
5765
5766 const char *try1 = NULL, *try2 = NULL;
5767 char p[SYS_BLOCK_PATH_MAX("/slaves")];
5768 _cleanup_closedir_ DIR *d = NULL;
5769 sd_id128_t found_uuid = SD_ID128_NULL;
5770 dev_t devno, found = 0;
5771 int r;
5772
5773 /* Enforce some security restrictions: CopyBlocks=auto should not be an avenue to get outside of the
5774 * --root=/--image= confinement. Specifically, refuse CopyBlocks= in combination with --root= at all,
5775 * and restrict block device references in the --image= case to loopback block device we set up.
5776 *
5777 * restrict_devno contain the dev_t of the loop back device we operate on in case of --image=, and
5778 * thus declares which device (and its partition subdevices) we shall limit access to. If
5779 * restrict_devno is zero no device probing access shall be allowed at all (used for --root=) and if
5780 * it is (dev_t) -1 then free access shall be allowed (if neither switch is used). */
5781
5782 if (restrict_devno == 0)
5783 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
5784 "Automatic discovery of backing block devices not permitted in --root= mode, refusing.");
5785
5786 /* Handles CopyBlocks=auto, and finds the right source partition to copy from. We look for matching
5787 * partitions in the host, using the appropriate directory as key and ensuring that the partition
5788 * type matches. */
5789
5790 if (type.designator == PARTITION_ROOT)
5791 try1 = "/";
5792 else if (type.designator == PARTITION_USR)
5793 try1 = "/usr/";
5794 else if (type.designator == PARTITION_ROOT_VERITY)
5795 try1 = "/";
5796 else if (type.designator == PARTITION_USR_VERITY)
5797 try1 = "/usr/";
5798 else if (type.designator == PARTITION_ESP) {
5799 try1 = "/efi/";
5800 try2 = "/boot/";
5801 } else if (type.designator == PARTITION_XBOOTLDR)
5802 try1 = "/boot/";
5803 else
5804 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
5805 "Partition type " SD_ID128_FORMAT_STR " not supported from automatic source block device discovery.",
5806 SD_ID128_FORMAT_VAL(type.uuid));
5807
5808 r = find_backing_devno(try1, root, &devno);
5809 if (r == -ENOENT && try2)
5810 r = find_backing_devno(try2, root, &devno);
5811 if (r < 0)
5812 return log_error_errno(r, "Failed to resolve automatic CopyBlocks= path for partition type " SD_ID128_FORMAT_STR ", sorry: %m",
5813 SD_ID128_FORMAT_VAL(type.uuid));
5814
5815 xsprintf_sys_block_path(p, "/slaves", devno);
5816 d = opendir(p);
5817 if (d) {
5818 struct dirent *de;
5819
5820 for (;;) {
5821 _cleanup_free_ char *q = NULL, *t = NULL;
5822 sd_id128_t u;
5823 dev_t sl;
5824
5825 errno = 0;
5826 de = readdir_no_dot(d);
5827 if (!de) {
5828 if (errno != 0)
5829 return log_error_errno(errno, "Failed to read directory '%s': %m", p);
5830
5831 break;
5832 }
5833
5834 if (!IN_SET(de->d_type, DT_LNK, DT_UNKNOWN))
5835 continue;
5836
5837 q = path_join(p, de->d_name, "/dev");
5838 if (!q)
5839 return log_oom();
5840
5841 r = read_one_line_file(q, &t);
5842 if (r < 0)
5843 return log_error_errno(r, "Failed to read %s: %m", q);
5844
5845 r = parse_devnum(t, &sl);
5846 if (r < 0) {
5847 log_debug_errno(r, "Failed to parse %s, ignoring: %m", q);
5848 continue;
5849 }
5850 if (major(sl) == 0) {
5851 log_debug("Device backing %s is special, ignoring.", q);
5852 continue;
5853 }
5854
5855 r = resolve_copy_blocks_auto_candidate(sl, type, restrict_devno, &u);
5856 if (r < 0)
5857 return r;
5858 if (r > 0) {
5859 /* We found a matching one! */
5860 if (found != 0)
5861 return log_error_errno(SYNTHETIC_ERRNO(ENOTUNIQ),
5862 "Multiple matching partitions found, refusing.");
5863
5864 found = sl;
5865 found_uuid = u;
5866 }
5867 }
5868 } else if (errno != ENOENT)
5869 return log_error_errno(errno, "Failed open %s: %m", p);
5870 else {
5871 r = resolve_copy_blocks_auto_candidate(devno, type, restrict_devno, &found_uuid);
5872 if (r < 0)
5873 return r;
5874 if (r > 0)
5875 found = devno;
5876 }
5877
5878 if (found == 0)
5879 return log_error_errno(SYNTHETIC_ERRNO(ENXIO),
5880 "Unable to automatically discover suitable partition to copy blocks from.");
5881
5882 if (ret_devno)
5883 *ret_devno = found;
5884
5885 if (ret_uuid)
5886 *ret_uuid = found_uuid;
5887
5888 return 0;
5889 }
5890
5891 static int context_open_copy_block_paths(
5892 Context *context,
5893 dev_t restrict_devno) {
5894
5895 int r;
5896
5897 assert(context);
5898
5899 LIST_FOREACH(partitions, p, context->partitions) {
5900 _cleanup_close_ int source_fd = -EBADF;
5901 _cleanup_free_ char *opened = NULL;
5902 sd_id128_t uuid = SD_ID128_NULL;
5903 uint64_t size;
5904 struct stat st;
5905
5906 if (p->copy_blocks_fd >= 0)
5907 continue;
5908
5909 assert(p->copy_blocks_size == UINT64_MAX);
5910
5911 if (PARTITION_EXISTS(p)) /* Never copy over partitions that already exist! */
5912 continue;
5913
5914 if (p->copy_blocks_path) {
5915
5916 source_fd = chase_and_open(p->copy_blocks_path, p->copy_blocks_root, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_NONBLOCK, &opened);
5917 if (source_fd < 0)
5918 return log_error_errno(source_fd, "Failed to open '%s': %m", p->copy_blocks_path);
5919
5920 if (fstat(source_fd, &st) < 0)
5921 return log_error_errno(errno, "Failed to stat block copy file '%s': %m", opened);
5922
5923 if (!S_ISREG(st.st_mode) && restrict_devno != (dev_t) -1)
5924 return log_error_errno(SYNTHETIC_ERRNO(EPERM),
5925 "Copying from block device node is not permitted in --image=/--root= mode, refusing.");
5926
5927 } else if (p->copy_blocks_auto) {
5928 dev_t devno = 0; /* Fake initialization to appease gcc. */
5929
5930 r = resolve_copy_blocks_auto(p->type, p->copy_blocks_root, restrict_devno, &devno, &uuid);
5931 if (r < 0)
5932 return r;
5933 assert(devno != 0);
5934
5935 source_fd = r = device_open_from_devnum(S_IFBLK, devno, O_RDONLY|O_CLOEXEC|O_NONBLOCK, &opened);
5936 if (r < 0)
5937 return log_error_errno(r, "Failed to open automatically determined source block copy device " DEVNUM_FORMAT_STR ": %m",
5938 DEVNUM_FORMAT_VAL(devno));
5939
5940 if (fstat(source_fd, &st) < 0)
5941 return log_error_errno(errno, "Failed to stat block copy file '%s': %m", opened);
5942 } else
5943 continue;
5944
5945 if (S_ISDIR(st.st_mode)) {
5946 _cleanup_free_ char *bdev = NULL;
5947 dev_t devt;
5948
5949 /* If the file is a directory, automatically find the backing block device */
5950
5951 if (major(st.st_dev) != 0)
5952 devt = st.st_dev;
5953 else {
5954 /* Special support for btrfs */
5955 r = btrfs_get_block_device_fd(source_fd, &devt);
5956 if (r == -EUCLEAN)
5957 return btrfs_log_dev_root(LOG_ERR, r, opened);
5958 if (r < 0)
5959 return log_error_errno(r, "Unable to determine backing block device of '%s': %m", opened);
5960 }
5961
5962 safe_close(source_fd);
5963
5964 source_fd = r = device_open_from_devnum(S_IFBLK, devt, O_RDONLY|O_CLOEXEC|O_NONBLOCK, &bdev);
5965 if (r < 0)
5966 return log_error_errno(r, "Failed to open block device backing '%s': %m", opened);
5967
5968 if (fstat(source_fd, &st) < 0)
5969 return log_error_errno(errno, "Failed to stat block device '%s': %m", bdev);
5970 }
5971
5972 if (S_ISREG(st.st_mode))
5973 size = st.st_size;
5974 else if (S_ISBLK(st.st_mode)) {
5975 if (ioctl(source_fd, BLKGETSIZE64, &size) != 0)
5976 return log_error_errno(errno, "Failed to determine size of block device to copy from: %m");
5977 } else
5978 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Specified path to copy blocks from '%s' is not a regular file, block device or directory, refusing: %m", opened);
5979
5980 if (size <= 0)
5981 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "File to copy bytes from '%s' has zero size, refusing.", opened);
5982 if (size % 512 != 0)
5983 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "File to copy bytes from '%s' has size that is not multiple of 512, refusing.", opened);
5984
5985 p->copy_blocks_fd = TAKE_FD(source_fd);
5986 p->copy_blocks_size = size;
5987
5988 free_and_replace(p->copy_blocks_path, opened);
5989
5990 /* When copying from an existing partition copy that partitions UUID if none is configured explicitly */
5991 if (!p->new_uuid_is_set && !sd_id128_is_null(uuid)) {
5992 p->new_uuid = uuid;
5993 p->new_uuid_is_set = true;
5994 }
5995 }
5996
5997 return 0;
5998 }
5999
6000 static int fd_apparent_size(int fd, uint64_t *ret) {
6001 off_t initial = 0;
6002 uint64_t size = 0;
6003
6004 assert(fd >= 0);
6005 assert(ret);
6006
6007 initial = lseek(fd, 0, SEEK_CUR);
6008 if (initial < 0)
6009 return log_error_errno(errno, "Failed to get file offset: %m");
6010
6011 for (off_t off = 0;;) {
6012 off_t r;
6013
6014 r = lseek(fd, off, SEEK_DATA);
6015 if (r < 0 && errno == ENXIO)
6016 /* If errno == ENXIO, that means we've reached the final hole of the file and
6017 * that hole isn't followed by more data. */
6018 break;
6019 if (r < 0)
6020 return log_error_errno(errno, "Failed to seek data in file from offset %"PRIi64": %m", off);
6021
6022 off = r; /* Set the offset to the start of the data segment. */
6023
6024 /* After copying a potential hole, find the end of the data segment by looking for
6025 * the next hole. If we get ENXIO, we're at EOF. */
6026 r = lseek(fd, off, SEEK_HOLE);
6027 if (r < 0) {
6028 if (errno == ENXIO)
6029 break;
6030 return log_error_errno(errno, "Failed to seek hole in file from offset %"PRIi64": %m", off);
6031 }
6032
6033 size += r - off;
6034 off = r;
6035 }
6036
6037 if (lseek(fd, initial, SEEK_SET) < 0)
6038 return log_error_errno(errno, "Failed to reset file offset: %m");
6039
6040 *ret = size;
6041
6042 return 0;
6043 }
6044
6045 static int context_minimize(Context *context) {
6046 const char *vt = NULL;
6047 int r;
6048
6049 assert(context);
6050
6051 LIST_FOREACH(partitions, p, context->partitions) {
6052 _cleanup_(rm_rf_physical_and_freep) char *root = NULL;
6053 _cleanup_(unlink_and_freep) char *temp = NULL;
6054 _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
6055 _cleanup_strv_free_ char **extra_mkfs_options = NULL;
6056 _cleanup_close_ int fd = -EBADF;
6057 _cleanup_free_ char *hint = NULL;
6058 sd_id128_t fs_uuid;
6059 struct stat st;
6060 uint64_t fsz;
6061
6062 if (p->dropped)
6063 continue;
6064
6065 if (PARTITION_EXISTS(p)) /* Never format existing partitions */
6066 continue;
6067
6068 if (!p->format)
6069 continue;
6070
6071 if (p->copy_blocks_fd >= 0)
6072 continue;
6073
6074 if (p->minimize == MINIMIZE_OFF)
6075 continue;
6076
6077 if (!partition_needs_populate(p))
6078 continue;
6079
6080 assert(!p->copy_blocks_path);
6081
6082 (void) partition_hint(p, context->node, &hint);
6083
6084 log_info("Pre-populating %s filesystem of partition %s twice to calculate minimal partition size",
6085 p->format, strna(hint));
6086
6087 if (!vt) {
6088 r = var_tmp_dir(&vt);
6089 if (r < 0)
6090 return log_error_errno(r, "Could not determine temporary directory: %m");
6091 }
6092
6093 r = tempfn_random_child(vt, "repart", &temp);
6094 if (r < 0)
6095 return log_error_errno(r, "Failed to generate temporary file path: %m");
6096
6097 if (fstype_is_ro(p->format))
6098 fs_uuid = p->fs_uuid;
6099 else {
6100 fd = open(temp, O_CREAT|O_EXCL|O_CLOEXEC|O_RDWR|O_NOCTTY, 0600);
6101 if (fd < 0)
6102 return log_error_errno(errno, "Failed to open temporary file %s: %m", temp);
6103
6104 /* This may seem huge but it will be created sparse so it doesn't take up any space
6105 * on disk until written to. */
6106 if (ftruncate(fd, 1024ULL * 1024ULL * 1024ULL * 1024ULL) < 0)
6107 return log_error_errno(errno, "Failed to truncate temporary file to %s: %m",
6108 FORMAT_BYTES(1024ULL * 1024ULL * 1024ULL * 1024ULL));
6109
6110 if (arg_offline <= 0) {
6111 r = loop_device_make(fd, O_RDWR, 0, UINT64_MAX, context->sector_size, 0, LOCK_EX, &d);
6112 if (r < 0 && (arg_offline == 0 || (r != -ENOENT && !ERRNO_IS_PRIVILEGE(r)) || !strv_isempty(p->subvolumes)))
6113 return log_error_errno(r, "Failed to make loopback device of %s: %m", temp);
6114 }
6115
6116 /* We're going to populate this filesystem twice so use a random UUID the first time
6117 * to avoid UUID conflicts. */
6118 r = sd_id128_randomize(&fs_uuid);
6119 if (r < 0)
6120 return r;
6121 }
6122
6123 if (!d || fstype_is_ro(p->format)) {
6124 if (!mkfs_supports_root_option(p->format))
6125 return log_error_errno(SYNTHETIC_ERRNO(ENODEV),
6126 "Loop device access is required to populate %s filesystems",
6127 p->format);
6128
6129 r = partition_populate_directory(context, p, &root);
6130 if (r < 0)
6131 return r;
6132 }
6133
6134 r = mkfs_options_from_env("REPART", p->format, &extra_mkfs_options);
6135 if (r < 0)
6136 return log_error_errno(r,
6137 "Failed to determine mkfs command line options for '%s': %m",
6138 p->format);
6139
6140 r = make_filesystem(d ? d->node : temp,
6141 p->format,
6142 strempty(p->new_label),
6143 root,
6144 fs_uuid,
6145 arg_discard, /* quiet = */ false,
6146 context->fs_sector_size,
6147 extra_mkfs_options);
6148 if (r < 0)
6149 return r;
6150
6151 /* Read-only filesystems are minimal from the first try because they create and size the
6152 * loopback file for us. */
6153 if (fstype_is_ro(p->format)) {
6154 assert(fd < 0);
6155
6156 fd = open(temp, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
6157 if (fd < 0)
6158 return log_error_errno(errno, "Failed to open temporary file %s: %m", temp);
6159
6160 if (fstat(fd, &st) < 0)
6161 return log_error_errno(errno, "Failed to stat temporary file: %m");
6162
6163 log_info("Minimal partition size of %s filesystem of partition %s is %s",
6164 p->format, strna(hint), FORMAT_BYTES(st.st_size));
6165
6166 p->copy_blocks_path = TAKE_PTR(temp);
6167 p->copy_blocks_path_is_our_file = true;
6168 p->copy_blocks_fd = TAKE_FD(fd);
6169 p->copy_blocks_size = st.st_size;
6170 continue;
6171 }
6172
6173 if (!root) {
6174 assert(d);
6175
6176 r = partition_populate_filesystem(context, p, d->node);
6177 if (r < 0)
6178 return r;
6179 }
6180
6181 /* Other filesystems need to be provided with a pre-sized loopback file and will adapt to
6182 * fully occupy it. Because we gave the filesystem a 1T sparse file, we need to shrink the
6183 * filesystem down to a reasonable size again to fit it in the disk image. While there are
6184 * some filesystems that support shrinking, it doesn't always work properly (e.g. shrinking
6185 * btrfs gives us a 2.0G filesystem regardless of what we put in it). Instead, let's populate
6186 * the filesystem again, but this time, instead of providing the filesystem with a 1T sparse
6187 * loopback file, let's size the loopback file based on the actual data used by the
6188 * filesystem in the sparse file after the first attempt. This should be a good guess of the
6189 * minimal amount of space needed in the filesystem to fit all the required data.
6190 */
6191 r = fd_apparent_size(fd, &fsz);
6192 if (r < 0)
6193 return r;
6194
6195 /* Massage the size a bit because just going by actual data used in the sparse file isn't
6196 * fool-proof. */
6197 uint64_t heuristic = streq(p->format, "xfs") ? fsz : fsz / 2;
6198 fsz = round_up_size(fsz + heuristic, context->grain_size);
6199 if (minimal_size_by_fs_name(p->format) != UINT64_MAX)
6200 fsz = MAX(minimal_size_by_fs_name(p->format), fsz);
6201
6202 log_info("Minimal partition size of %s filesystem of partition %s is %s",
6203 p->format, strna(hint), FORMAT_BYTES(fsz));
6204
6205 d = loop_device_unref(d);
6206
6207 /* Erase the previous filesystem first. */
6208 if (ftruncate(fd, 0))
6209 return log_error_errno(errno, "Failed to erase temporary file: %m");
6210
6211 if (ftruncate(fd, fsz))
6212 return log_error_errno(errno, "Failed to truncate temporary file to %s: %m", FORMAT_BYTES(fsz));
6213
6214 if (arg_offline <= 0) {
6215 r = loop_device_make(fd, O_RDWR, 0, UINT64_MAX, context->sector_size, 0, LOCK_EX, &d);
6216 if (r < 0 && (arg_offline == 0 || (r != -ENOENT && !ERRNO_IS_PRIVILEGE(r)) || !strv_isempty(p->subvolumes)))
6217 return log_error_errno(r, "Failed to make loopback device of %s: %m", temp);
6218 }
6219
6220 r = make_filesystem(d ? d->node : temp,
6221 p->format,
6222 strempty(p->new_label),
6223 root,
6224 p->fs_uuid,
6225 arg_discard,
6226 /* quiet = */ false,
6227 context->fs_sector_size,
6228 extra_mkfs_options);
6229 if (r < 0)
6230 return r;
6231
6232 if (!root) {
6233 assert(d);
6234
6235 r = partition_populate_filesystem(context, p, d->node);
6236 if (r < 0)
6237 return r;
6238 }
6239
6240 if (fstat(fd, &st) < 0)
6241 return log_error_errno(errno, "Failed to stat temporary file: %m");
6242
6243 p->copy_blocks_path = TAKE_PTR(temp);
6244 p->copy_blocks_path_is_our_file = true;
6245 p->copy_blocks_fd = TAKE_FD(fd);
6246 p->copy_blocks_size = st.st_size;
6247 }
6248
6249 /* Now that we've done the data partitions, do the verity hash partitions. We do these in a separate
6250 * step because they might depend on data generated in the previous step. */
6251
6252 LIST_FOREACH(partitions, p, context->partitions) {
6253 _cleanup_(unlink_and_freep) char *temp = NULL;
6254 _cleanup_free_ char *hint = NULL;
6255 _cleanup_close_ int fd = -EBADF;
6256 struct stat st;
6257 Partition *dp;
6258
6259 if (p->dropped)
6260 continue;
6261
6262 if (PARTITION_EXISTS(p)) /* Never format existing partitions */
6263 continue;
6264
6265 if (p->minimize == MINIMIZE_OFF)
6266 continue;
6267
6268 if (p->verity != VERITY_HASH)
6269 continue;
6270
6271 assert_se(dp = p->siblings[VERITY_DATA]);
6272 assert(!dp->dropped);
6273 assert(dp->copy_blocks_path);
6274
6275 (void) partition_hint(p, context->node, &hint);
6276
6277 log_info("Pre-populating verity hash data of partition %s to calculate minimal partition size",
6278 strna(hint));
6279
6280 if (!vt) {
6281 r = var_tmp_dir(&vt);
6282 if (r < 0)
6283 return log_error_errno(r, "Could not determine temporary directory: %m");
6284 }
6285
6286 r = tempfn_random_child(vt, "repart", &temp);
6287 if (r < 0)
6288 return log_error_errno(r, "Failed to generate temporary file path: %m");
6289
6290 r = touch(temp);
6291 if (r < 0)
6292 return log_error_errno(r, "Failed to create temporary file: %m");
6293
6294 r = partition_format_verity_hash(context, p, temp, dp->copy_blocks_path);
6295 if (r < 0)
6296 return r;
6297
6298 fd = open(temp, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
6299 if (fd < 0)
6300 return log_error_errno(errno, "Failed to open temporary file %s: %m", temp);
6301
6302 if (fstat(fd, &st) < 0)
6303 return log_error_errno(r, "Failed to stat temporary file: %m");
6304
6305 log_info("Minimal partition size of verity hash partition %s is %s",
6306 strna(hint), FORMAT_BYTES(st.st_size));
6307
6308 p->copy_blocks_path = TAKE_PTR(temp);
6309 p->copy_blocks_path_is_our_file = true;
6310 p->copy_blocks_fd = TAKE_FD(fd);
6311 p->copy_blocks_size = st.st_size;
6312 }
6313
6314 return 0;
6315 }
6316
6317 static int parse_partition_types(const char *p, GptPartitionType **partitions, size_t *n_partitions) {
6318 int r;
6319
6320 assert(partitions);
6321 assert(n_partitions);
6322
6323 for (;;) {
6324 _cleanup_free_ char *name = NULL;
6325 GptPartitionType type;
6326
6327 r = extract_first_word(&p, &name, ",", EXTRACT_CUNESCAPE|EXTRACT_DONT_COALESCE_SEPARATORS);
6328 if (r == 0)
6329 break;
6330 if (r < 0)
6331 return log_error_errno(r, "Failed to extract partition type identifier or GUID: %s", p);
6332
6333 r = gpt_partition_type_from_string(name, &type);
6334 if (r < 0)
6335 return log_error_errno(r, "'%s' is not a valid partition type identifier or GUID", name);
6336
6337 if (!GREEDY_REALLOC(*partitions, *n_partitions + 1))
6338 return log_oom();
6339
6340 (*partitions)[(*n_partitions)++] = type;
6341 }
6342
6343 return 0;
6344 }
6345
6346 static int help(void) {
6347 _cleanup_free_ char *link = NULL;
6348 int r;
6349
6350 r = terminal_urlify_man("systemd-repart", "8", &link);
6351 if (r < 0)
6352 return log_oom();
6353
6354 printf("%s [OPTIONS...] [DEVICE]\n"
6355 "\n%sGrow and add partitions to partition table.%s\n\n"
6356 " -h --help Show this help\n"
6357 " --version Show package version\n"
6358 " --no-pager Do not pipe output into a pager\n"
6359 " --no-legend Do not show the headers and footers\n"
6360 " --dry-run=BOOL Whether to run dry-run operation\n"
6361 " --empty=MODE One of refuse, allow, require, force, create; controls\n"
6362 " how to handle empty disks lacking partition tables\n"
6363 " --discard=BOOL Whether to discard backing blocks for new partitions\n"
6364 " --pretty=BOOL Whether to show pretty summary before doing changes\n"
6365 " --factory-reset=BOOL Whether to remove data partitions before recreating\n"
6366 " them\n"
6367 " --can-factory-reset Test whether factory reset is defined\n"
6368 " --root=PATH Operate relative to root path\n"
6369 " --image=PATH Operate relative to image file\n"
6370 " --image-policy=POLICY\n"
6371 " Specify disk image dissection policy\n"
6372 " --definitions=DIR Find partition definitions in specified directory\n"
6373 " --key-file=PATH Key to use when encrypting partitions\n"
6374 " --private-key=PATH Private key to use when generating verity roothash\n"
6375 " signatures\n"
6376 " --certificate=PATH PEM certificate to use when generating verity\n"
6377 " roothash signatures\n"
6378 " --tpm2-device=PATH Path to TPM2 device node to use\n"
6379 " --tpm2-pcrs=PCR1+PCR2+PCR3+…\n"
6380 " TPM2 PCR indexes to use for TPM2 enrollment\n"
6381 " --tpm2-public-key=PATH\n"
6382 " Enroll signed TPM2 PCR policy against PEM public key\n"
6383 " --tpm2-public-key-pcrs=PCR1+PCR2+PCR3+…\n"
6384 " Enroll signed TPM2 PCR policy for specified TPM2 PCRs\n"
6385 " --seed=UUID 128-bit seed UUID to derive all UUIDs from\n"
6386 " --size=BYTES Grow loopback file to specified size\n"
6387 " --json=pretty|short|off\n"
6388 " Generate JSON output\n"
6389 " --split=BOOL Whether to generate split artifacts\n"
6390 " --include-partitions=PARTITION1,PARTITION2,PARTITION3,…\n"
6391 " Ignore partitions not of the specified types\n"
6392 " --exclude-partitions=PARTITION1,PARTITION2,PARTITION3,…\n"
6393 " Ignore partitions of the specified types\n"
6394 " --defer-partitions=PARTITION1,PARTITION2,PARTITION3,…\n"
6395 " Take partitions of the specified types into account\n"
6396 " but don't populate them yet\n"
6397 " --sector-size=SIZE Set the logical sector size for the image\n"
6398 " --architecture=ARCH Set the generic architecture for the image\n"
6399 " --offline=BOOL Whether to build the image offline\n"
6400 " -s --copy-source=PATH Specify the primary source tree to copy files from\n"
6401 " --copy-from=IMAGE Copy partitions from the given image(s)\n"
6402 " -S --make-ddi=sysext Make a system extension DDI\n"
6403 " -C --make-ddi=confext Make a configuration extension DDI\n"
6404 " -P --make-ddi=portable Make a portable service DDI\n"
6405 "\nSee the %s for details.\n",
6406 program_invocation_short_name,
6407 ansi_highlight(),
6408 ansi_normal(),
6409 link);
6410
6411 return 0;
6412 }
6413
6414 static int parse_argv(int argc, char *argv[]) {
6415
6416 enum {
6417 ARG_VERSION = 0x100,
6418 ARG_NO_PAGER,
6419 ARG_NO_LEGEND,
6420 ARG_DRY_RUN,
6421 ARG_EMPTY,
6422 ARG_DISCARD,
6423 ARG_FACTORY_RESET,
6424 ARG_CAN_FACTORY_RESET,
6425 ARG_ROOT,
6426 ARG_IMAGE,
6427 ARG_IMAGE_POLICY,
6428 ARG_SEED,
6429 ARG_PRETTY,
6430 ARG_DEFINITIONS,
6431 ARG_SIZE,
6432 ARG_JSON,
6433 ARG_KEY_FILE,
6434 ARG_PRIVATE_KEY,
6435 ARG_CERTIFICATE,
6436 ARG_TPM2_DEVICE,
6437 ARG_TPM2_PCRS,
6438 ARG_TPM2_PUBLIC_KEY,
6439 ARG_TPM2_PUBLIC_KEY_PCRS,
6440 ARG_SPLIT,
6441 ARG_INCLUDE_PARTITIONS,
6442 ARG_EXCLUDE_PARTITIONS,
6443 ARG_DEFER_PARTITIONS,
6444 ARG_SECTOR_SIZE,
6445 ARG_SKIP_PARTITIONS,
6446 ARG_ARCHITECTURE,
6447 ARG_OFFLINE,
6448 ARG_COPY_FROM,
6449 ARG_MAKE_DDI,
6450 };
6451
6452 static const struct option options[] = {
6453 { "help", no_argument, NULL, 'h' },
6454 { "version", no_argument, NULL, ARG_VERSION },
6455 { "no-pager", no_argument, NULL, ARG_NO_PAGER },
6456 { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
6457 { "dry-run", required_argument, NULL, ARG_DRY_RUN },
6458 { "empty", required_argument, NULL, ARG_EMPTY },
6459 { "discard", required_argument, NULL, ARG_DISCARD },
6460 { "factory-reset", required_argument, NULL, ARG_FACTORY_RESET },
6461 { "can-factory-reset", no_argument, NULL, ARG_CAN_FACTORY_RESET },
6462 { "root", required_argument, NULL, ARG_ROOT },
6463 { "image", required_argument, NULL, ARG_IMAGE },
6464 { "image-policy", required_argument, NULL, ARG_IMAGE_POLICY },
6465 { "seed", required_argument, NULL, ARG_SEED },
6466 { "pretty", required_argument, NULL, ARG_PRETTY },
6467 { "definitions", required_argument, NULL, ARG_DEFINITIONS },
6468 { "size", required_argument, NULL, ARG_SIZE },
6469 { "json", required_argument, NULL, ARG_JSON },
6470 { "key-file", required_argument, NULL, ARG_KEY_FILE },
6471 { "private-key", required_argument, NULL, ARG_PRIVATE_KEY },
6472 { "certificate", required_argument, NULL, ARG_CERTIFICATE },
6473 { "tpm2-device", required_argument, NULL, ARG_TPM2_DEVICE },
6474 { "tpm2-pcrs", required_argument, NULL, ARG_TPM2_PCRS },
6475 { "tpm2-public-key", required_argument, NULL, ARG_TPM2_PUBLIC_KEY },
6476 { "tpm2-public-key-pcrs", required_argument, NULL, ARG_TPM2_PUBLIC_KEY_PCRS },
6477 { "split", required_argument, NULL, ARG_SPLIT },
6478 { "include-partitions", required_argument, NULL, ARG_INCLUDE_PARTITIONS },
6479 { "exclude-partitions", required_argument, NULL, ARG_EXCLUDE_PARTITIONS },
6480 { "defer-partitions", required_argument, NULL, ARG_DEFER_PARTITIONS },
6481 { "sector-size", required_argument, NULL, ARG_SECTOR_SIZE },
6482 { "architecture", required_argument, NULL, ARG_ARCHITECTURE },
6483 { "offline", required_argument, NULL, ARG_OFFLINE },
6484 { "copy-from", required_argument, NULL, ARG_COPY_FROM },
6485 { "copy-source", required_argument, NULL, 's' },
6486 { "make-ddi", required_argument, NULL, ARG_MAKE_DDI },
6487 {}
6488 };
6489
6490 int c, r;
6491
6492 assert(argc >= 0);
6493 assert(argv);
6494
6495 while ((c = getopt_long(argc, argv, "hs:SCP", options, NULL)) >= 0)
6496
6497 switch (c) {
6498
6499 case 'h':
6500 return help();
6501
6502 case ARG_VERSION:
6503 return version();
6504
6505 case ARG_NO_PAGER:
6506 arg_pager_flags |= PAGER_DISABLE;
6507 break;
6508
6509 case ARG_NO_LEGEND:
6510 arg_legend = false;
6511 break;
6512
6513 case ARG_DRY_RUN:
6514 r = parse_boolean_argument("--dry-run=", optarg, &arg_dry_run);
6515 if (r < 0)
6516 return r;
6517 break;
6518
6519 case ARG_EMPTY:
6520 if (isempty(optarg)) {
6521 arg_empty = EMPTY_UNSET;
6522 break;
6523 }
6524
6525 arg_empty = empty_mode_from_string(optarg);
6526 if (arg_empty < 0)
6527 return log_error_errno(arg_empty, "Failed to parse --empty= parameter: %s", optarg);
6528
6529 break;
6530
6531 case ARG_DISCARD:
6532 r = parse_boolean_argument("--discard=", optarg, &arg_discard);
6533 if (r < 0)
6534 return r;
6535 break;
6536
6537 case ARG_FACTORY_RESET:
6538 r = parse_boolean_argument("--factory-reset=", optarg, NULL);
6539 if (r < 0)
6540 return r;
6541 arg_factory_reset = r;
6542 break;
6543
6544 case ARG_CAN_FACTORY_RESET:
6545 arg_can_factory_reset = true;
6546 break;
6547
6548 case ARG_ROOT:
6549 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_root);
6550 if (r < 0)
6551 return r;
6552 break;
6553
6554 case ARG_IMAGE:
6555 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_image);
6556 if (r < 0)
6557 return r;
6558 break;
6559
6560 case ARG_IMAGE_POLICY:
6561 r = parse_image_policy_argument(optarg, &arg_image_policy);
6562 if (r < 0)
6563 return r;
6564 break;
6565
6566 case ARG_SEED:
6567 if (isempty(optarg)) {
6568 arg_seed = SD_ID128_NULL;
6569 arg_randomize = false;
6570 } else if (streq(optarg, "random"))
6571 arg_randomize = true;
6572 else {
6573 r = sd_id128_from_string(optarg, &arg_seed);
6574 if (r < 0)
6575 return log_error_errno(r, "Failed to parse seed: %s", optarg);
6576
6577 arg_randomize = false;
6578 }
6579
6580 break;
6581
6582 case ARG_PRETTY:
6583 r = parse_boolean_argument("--pretty=", optarg, NULL);
6584 if (r < 0)
6585 return r;
6586 arg_pretty = r;
6587 break;
6588
6589 case ARG_DEFINITIONS: {
6590 _cleanup_free_ char *path = NULL;
6591 r = parse_path_argument(optarg, false, &path);
6592 if (r < 0)
6593 return r;
6594 if (strv_consume(&arg_definitions, TAKE_PTR(path)) < 0)
6595 return log_oom();
6596 break;
6597 }
6598
6599 case ARG_SIZE: {
6600 uint64_t parsed, rounded;
6601
6602 if (streq(optarg, "auto")) {
6603 arg_size = UINT64_MAX;
6604 arg_size_auto = true;
6605 break;
6606 }
6607
6608 r = parse_size(optarg, 1024, &parsed);
6609 if (r < 0)
6610 return log_error_errno(r, "Failed to parse --size= parameter: %s", optarg);
6611
6612 rounded = round_up_size(parsed, 4096);
6613 if (rounded == 0)
6614 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Specified image size too small, refusing.");
6615 if (rounded == UINT64_MAX)
6616 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Specified image size too large, refusing.");
6617
6618 if (rounded != parsed)
6619 log_warning("Specified size is not a multiple of 4096, rounding up automatically. (%" PRIu64 " %s %" PRIu64 ")",
6620 parsed, special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), rounded);
6621
6622 arg_size = rounded;
6623 arg_size_auto = false;
6624 break;
6625 }
6626
6627 case ARG_JSON:
6628 r = parse_json_argument(optarg, &arg_json_format_flags);
6629 if (r <= 0)
6630 return r;
6631
6632 break;
6633
6634 case ARG_KEY_FILE: {
6635 _cleanup_(erase_and_freep) char *k = NULL;
6636 size_t n = 0;
6637
6638 r = read_full_file_full(
6639 AT_FDCWD, optarg, UINT64_MAX, SIZE_MAX,
6640 READ_FULL_FILE_SECURE|READ_FULL_FILE_WARN_WORLD_READABLE|READ_FULL_FILE_CONNECT_SOCKET,
6641 NULL,
6642 &k, &n);
6643 if (r < 0)
6644 return log_error_errno(r, "Failed to read key file '%s': %m", optarg);
6645
6646 erase_and_free(arg_key);
6647 arg_key = TAKE_PTR(k);
6648 arg_key_size = n;
6649 break;
6650 }
6651
6652 case ARG_PRIVATE_KEY: {
6653 _cleanup_(erase_and_freep) char *k = NULL;
6654 size_t n = 0;
6655
6656 r = read_full_file_full(
6657 AT_FDCWD, optarg, UINT64_MAX, SIZE_MAX,
6658 READ_FULL_FILE_SECURE|READ_FULL_FILE_WARN_WORLD_READABLE|READ_FULL_FILE_CONNECT_SOCKET,
6659 NULL,
6660 &k, &n);
6661 if (r < 0)
6662 return log_error_errno(r, "Failed to read key file '%s': %m", optarg);
6663
6664 EVP_PKEY_free(arg_private_key);
6665 arg_private_key = NULL;
6666 r = parse_private_key(k, n, &arg_private_key);
6667 if (r < 0)
6668 return r;
6669 break;
6670 }
6671
6672 case ARG_CERTIFICATE: {
6673 _cleanup_free_ char *cert = NULL;
6674 size_t n = 0;
6675
6676 r = read_full_file_full(
6677 AT_FDCWD, optarg, UINT64_MAX, SIZE_MAX,
6678 READ_FULL_FILE_CONNECT_SOCKET,
6679 NULL,
6680 &cert, &n);
6681 if (r < 0)
6682 return log_error_errno(r, "Failed to read certificate file '%s': %m", optarg);
6683
6684 X509_free(arg_certificate);
6685 arg_certificate = NULL;
6686 r = parse_x509_certificate(cert, n, &arg_certificate);
6687 if (r < 0)
6688 return r;
6689 break;
6690 }
6691
6692 case ARG_TPM2_DEVICE: {
6693 _cleanup_free_ char *device = NULL;
6694
6695 if (streq(optarg, "list"))
6696 return tpm2_list_devices();
6697
6698 if (!streq(optarg, "auto")) {
6699 device = strdup(optarg);
6700 if (!device)
6701 return log_oom();
6702 }
6703
6704 free(arg_tpm2_device);
6705 arg_tpm2_device = TAKE_PTR(device);
6706 break;
6707 }
6708
6709 case ARG_TPM2_PCRS:
6710 arg_tpm2_hash_pcr_values_use_default = false;
6711 r = tpm2_parse_pcr_argument_append(optarg, &arg_tpm2_hash_pcr_values, &arg_tpm2_n_hash_pcr_values);
6712 if (r < 0)
6713 return r;
6714
6715 break;
6716
6717 case ARG_TPM2_PUBLIC_KEY:
6718 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_tpm2_public_key);
6719 if (r < 0)
6720 return r;
6721
6722 break;
6723
6724 case ARG_TPM2_PUBLIC_KEY_PCRS:
6725 arg_tpm2_public_key_pcr_mask_use_default = false;
6726 r = tpm2_parse_pcr_argument_to_mask(optarg, &arg_tpm2_public_key_pcr_mask);
6727 if (r < 0)
6728 return r;
6729
6730 break;
6731
6732 case ARG_SPLIT:
6733 r = parse_boolean_argument("--split=", optarg, NULL);
6734 if (r < 0)
6735 return r;
6736
6737 arg_split = r;
6738 break;
6739
6740 case ARG_INCLUDE_PARTITIONS:
6741 if (arg_filter_partitions_type == FILTER_PARTITIONS_EXCLUDE)
6742 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6743 "Combination of --include-partitions= and --exclude-partitions= is invalid.");
6744
6745 r = parse_partition_types(optarg, &arg_filter_partitions, &arg_n_filter_partitions);
6746 if (r < 0)
6747 return r;
6748
6749 arg_filter_partitions_type = FILTER_PARTITIONS_INCLUDE;
6750
6751 break;
6752
6753 case ARG_EXCLUDE_PARTITIONS:
6754 if (arg_filter_partitions_type == FILTER_PARTITIONS_INCLUDE)
6755 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6756 "Combination of --include-partitions= and --exclude-partitions= is invalid.");
6757
6758 r = parse_partition_types(optarg, &arg_filter_partitions, &arg_n_filter_partitions);
6759 if (r < 0)
6760 return r;
6761
6762 arg_filter_partitions_type = FILTER_PARTITIONS_EXCLUDE;
6763
6764 break;
6765
6766 case ARG_DEFER_PARTITIONS:
6767 r = parse_partition_types(optarg, &arg_defer_partitions, &arg_n_defer_partitions);
6768 if (r < 0)
6769 return r;
6770
6771 break;
6772
6773 case ARG_SECTOR_SIZE:
6774 r = parse_sector_size(optarg, &arg_sector_size);
6775 if (r < 0)
6776 return r;
6777
6778 break;
6779
6780 case ARG_ARCHITECTURE:
6781 r = architecture_from_string(optarg);
6782 if (r < 0)
6783 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid architecture '%s'", optarg);
6784
6785 arg_architecture = r;
6786 break;
6787
6788 case ARG_OFFLINE:
6789 if (streq(optarg, "auto"))
6790 arg_offline = -1;
6791 else {
6792 r = parse_boolean_argument("--offline=", optarg, NULL);
6793 if (r < 0)
6794 return r;
6795
6796 arg_offline = r;
6797 }
6798
6799 break;
6800
6801 case ARG_COPY_FROM: {
6802 _cleanup_free_ char *p = NULL;
6803
6804 r = parse_path_argument(optarg, /* suppress_root= */ false, &p);
6805 if (r < 0)
6806 return r;
6807
6808 if (strv_consume(&arg_copy_from, TAKE_PTR(p)) < 0)
6809 return log_oom();
6810
6811 break;
6812 }
6813
6814 case 's':
6815 r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_copy_source);
6816 if (r < 0)
6817 return r;
6818 break;
6819
6820 case ARG_MAKE_DDI:
6821 if (!filename_is_valid(optarg))
6822 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid DDI type: %s", optarg);
6823
6824 r = free_and_strdup_warn(&arg_make_ddi, optarg);
6825 if (r < 0)
6826 return r;
6827 break;
6828
6829 case 'S':
6830 r = free_and_strdup_warn(&arg_make_ddi, "sysext");
6831 if (r < 0)
6832 return r;
6833 break;
6834
6835 case 'C':
6836 r = free_and_strdup_warn(&arg_make_ddi, "confext");
6837 if (r < 0)
6838 return r;
6839 break;
6840
6841 case 'P':
6842 r = free_and_strdup_warn(&arg_make_ddi, "portable");
6843 if (r < 0)
6844 return r;
6845 break;
6846
6847 case '?':
6848 return -EINVAL;
6849
6850 default:
6851 assert_not_reached();
6852 }
6853
6854 if (argc - optind > 1)
6855 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6856 "Expected at most one argument, the path to the block device or image file.");
6857
6858 if (arg_make_ddi) {
6859 if (arg_definitions)
6860 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Combination of --make-ddi= and --definitions= is not supported.");
6861 if (!IN_SET(arg_empty, EMPTY_UNSET, EMPTY_CREATE))
6862 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Combination of --make-ddi= and --empty=%s is not supported.", empty_mode_to_string(arg_empty));
6863
6864 /* Imply automatic sizing in DDI mode */
6865 if (arg_size == UINT64_MAX)
6866 arg_size_auto = true;
6867
6868 if (!arg_copy_source)
6869 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No --copy-source= specified, refusing.");
6870
6871 r = dir_is_empty(arg_copy_source, /* ignore_hidden_or_backup= */ false);
6872 if (r < 0)
6873 return log_error_errno(r, "Failed to determine if '%s' is empty: %m", arg_copy_source);
6874 if (r > 0)
6875 return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Source directory '%s' is empty, refusing to create empty image.", arg_copy_source);
6876
6877 if (sd_id128_is_null(arg_seed) && !arg_randomize) {
6878 /* We don't want that /etc/machine-id leaks into any image built this way, hence
6879 * let's randomize the seed if not specified explicitly */
6880 log_notice("No seed value specified, randomizing generated UUIDs, resulting image will not be reproducible.");
6881 arg_randomize = true;
6882 }
6883
6884 arg_empty = EMPTY_CREATE;
6885 }
6886
6887 if (arg_empty == EMPTY_UNSET) /* default to refuse mode, if not otherwise specified */
6888 arg_empty = EMPTY_REFUSE;
6889
6890 if (arg_factory_reset > 0 && IN_SET(arg_empty, EMPTY_FORCE, EMPTY_REQUIRE, EMPTY_CREATE))
6891 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6892 "Combination of --factory-reset=yes and --empty=force/--empty=require/--empty=create is invalid.");
6893
6894 if (arg_can_factory_reset)
6895 arg_dry_run = true; /* When --can-factory-reset is specified we don't make changes, hence
6896 * non-dry-run mode makes no sense. Thus, imply dry run mode so that we
6897 * open things strictly read-only. */
6898 else if (arg_empty == EMPTY_CREATE)
6899 arg_dry_run = false; /* Imply --dry-run=no if we create the loopback file anew. After all we
6900 * cannot really break anyone's partition tables that way. */
6901
6902 /* Disable pager once we are not just reviewing, but doing things. */
6903 if (!arg_dry_run)
6904 arg_pager_flags |= PAGER_DISABLE;
6905
6906 if (arg_empty == EMPTY_CREATE && arg_size == UINT64_MAX && !arg_size_auto)
6907 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6908 "If --empty=create is specified, --size= must be specified, too.");
6909
6910 if (arg_image && arg_root)
6911 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Please specify either --root= or --image=, the combination of both is not supported.");
6912 else if (!arg_image && !arg_root && in_initrd()) {
6913
6914 /* By default operate on /sysusr/ or /sysroot/ when invoked in the initrd. We prefer the
6915 * former, if it is mounted, so that we have deterministic behaviour on systems where /usr/
6916 * is vendor-supplied but the root fs formatted on first boot. */
6917 r = path_is_mount_point("/sysusr/usr", NULL, 0);
6918 if (r <= 0) {
6919 if (r < 0 && r != -ENOENT)
6920 log_debug_errno(r, "Unable to determine whether /sysusr/usr is a mount point, assuming it is not: %m");
6921
6922 arg_root = strdup("/sysroot");
6923 } else
6924 arg_root = strdup("/sysusr");
6925 if (!arg_root)
6926 return log_oom();
6927 }
6928
6929 arg_node = argc > optind ? argv[optind] : NULL;
6930
6931 if (IN_SET(arg_empty, EMPTY_FORCE, EMPTY_REQUIRE, EMPTY_CREATE) && !arg_node && !arg_image)
6932 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6933 "A path to a device node or image file must be specified when --make-ddi=, --empty=force, --empty=require or --empty=create are used.");
6934
6935 if (arg_split && !arg_node)
6936 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
6937 "A path to an image file must be specified when --split is used.");
6938
6939 if (arg_tpm2_public_key_pcr_mask_use_default && arg_tpm2_public_key)
6940 arg_tpm2_public_key_pcr_mask = INDEX_TO_MASK(uint32_t, TPM2_PCR_KERNEL_BOOT);
6941
6942 if (arg_tpm2_hash_pcr_values_use_default && !GREEDY_REALLOC_APPEND(
6943 arg_tpm2_hash_pcr_values,
6944 arg_tpm2_n_hash_pcr_values,
6945 &TPM2_PCR_VALUE_MAKE(TPM2_PCR_INDEX_DEFAULT, /* hash= */ 0, /* value= */ {}),
6946 1))
6947 return log_oom();
6948
6949 if (arg_pretty < 0 && isatty(STDOUT_FILENO))
6950 arg_pretty = true;
6951
6952 if (arg_architecture >= 0) {
6953 FOREACH_ARRAY(p, arg_filter_partitions, arg_n_filter_partitions)
6954 *p = gpt_partition_type_override_architecture(*p, arg_architecture);
6955
6956 FOREACH_ARRAY(p, arg_defer_partitions, arg_n_defer_partitions)
6957 *p = gpt_partition_type_override_architecture(*p, arg_architecture);
6958 }
6959
6960 return 1;
6961 }
6962
6963 static int parse_proc_cmdline_factory_reset(void) {
6964 bool b;
6965 int r;
6966
6967 if (arg_factory_reset >= 0) /* Never override what is specified on the process command line */
6968 return 0;
6969
6970 if (!in_initrd()) /* Never honour kernel command line factory reset request outside of the initrd */
6971 return 0;
6972
6973 r = proc_cmdline_get_bool("systemd.factory_reset", /* flags = */ 0, &b);
6974 if (r < 0)
6975 return log_error_errno(r, "Failed to parse systemd.factory_reset kernel command line argument: %m");
6976 if (r > 0) {
6977 arg_factory_reset = b;
6978
6979 if (b)
6980 log_notice("Honouring factory reset requested via kernel command line.");
6981 }
6982
6983 return 0;
6984 }
6985
6986 static int parse_efi_variable_factory_reset(void) {
6987 _cleanup_free_ char *value = NULL;
6988 int r;
6989
6990 if (arg_factory_reset >= 0) /* Never override what is specified on the process command line */
6991 return 0;
6992
6993 if (!in_initrd()) /* Never honour EFI variable factory reset request outside of the initrd */
6994 return 0;
6995
6996 r = efi_get_variable_string(EFI_SYSTEMD_VARIABLE(FactoryReset), &value);
6997 if (r < 0) {
6998 if (r == -ENOENT || ERRNO_IS_NOT_SUPPORTED(r))
6999 return 0;
7000 return log_error_errno(r, "Failed to read EFI variable FactoryReset: %m");
7001 }
7002
7003 r = parse_boolean(value);
7004 if (r < 0)
7005 return log_error_errno(r, "Failed to parse EFI variable FactoryReset: %m");
7006
7007 arg_factory_reset = r;
7008 if (r)
7009 log_notice("Factory reset requested via EFI variable FactoryReset.");
7010
7011 return 0;
7012 }
7013
7014 static int remove_efi_variable_factory_reset(void) {
7015 int r;
7016
7017 r = efi_set_variable(EFI_SYSTEMD_VARIABLE(FactoryReset), NULL, 0);
7018 if (r < 0) {
7019 if (r == -ENOENT || ERRNO_IS_NOT_SUPPORTED(r))
7020 return 0;
7021 return log_error_errno(r, "Failed to remove EFI variable FactoryReset: %m");
7022 }
7023
7024 log_info("Successfully unset EFI variable FactoryReset.");
7025 return 0;
7026 }
7027
7028 static int acquire_root_devno(
7029 const char *p,
7030 const char *root,
7031 int mode,
7032 char **ret,
7033 int *ret_fd) {
7034
7035 _cleanup_free_ char *found_path = NULL, *node = NULL;
7036 dev_t devno, fd_devno = MODE_INVALID;
7037 _cleanup_close_ int fd = -EBADF;
7038 struct stat st;
7039 int r;
7040
7041 assert(p);
7042 assert(ret);
7043 assert(ret_fd);
7044
7045 fd = chase_and_open(p, root, CHASE_PREFIX_ROOT, mode, &found_path);
7046 if (fd < 0)
7047 return fd;
7048
7049 if (fstat(fd, &st) < 0)
7050 return -errno;
7051
7052 if (S_ISREG(st.st_mode)) {
7053 *ret = TAKE_PTR(found_path);
7054 *ret_fd = TAKE_FD(fd);
7055 return 0;
7056 }
7057
7058 if (S_ISBLK(st.st_mode)) {
7059 /* Refuse referencing explicit block devices if a root dir is specified, after all we should
7060 * not be able to leave the image the root path constrains us to. */
7061 if (root)
7062 return -EPERM;
7063
7064 fd_devno = devno = st.st_rdev;
7065 } else if (S_ISDIR(st.st_mode)) {
7066
7067 devno = st.st_dev;
7068 if (major(devno) == 0) {
7069 r = btrfs_get_block_device_fd(fd, &devno);
7070 if (r == -ENOTTY) /* not btrfs */
7071 return -ENODEV;
7072 if (r < 0)
7073 return r;
7074 }
7075 } else
7076 return -ENOTBLK;
7077
7078 /* From dm-crypt to backing partition */
7079 r = block_get_originating(devno, &devno);
7080 if (r == -ENOENT)
7081 log_debug_errno(r, "Device '%s' has no dm-crypt/dm-verity device, no need to look for underlying block device.", p);
7082 else if (r < 0)
7083 log_debug_errno(r, "Failed to find underlying block device for '%s', ignoring: %m", p);
7084
7085 /* From partition to whole disk containing it */
7086 r = block_get_whole_disk(devno, &devno);
7087 if (r < 0)
7088 log_debug_errno(r, "Failed to find whole disk block device for '%s', ignoring: %m", p);
7089
7090 r = devname_from_devnum(S_IFBLK, devno, &node);
7091 if (r < 0)
7092 return log_debug_errno(r, "Failed to determine canonical path for '%s': %m", p);
7093
7094 /* Only if we still look at the same block device we can reuse the fd. Otherwise return an
7095 * invalidated fd. */
7096 if (fd_devno != MODE_INVALID && fd_devno == devno) {
7097 /* Tell udev not to interfere while we are processing the device */
7098 if (flock(fd, arg_dry_run ? LOCK_SH : LOCK_EX) < 0)
7099 return log_error_errno(errno, "Failed to lock device '%s': %m", node);
7100
7101 *ret_fd = TAKE_FD(fd);
7102 } else
7103 *ret_fd = -EBADF;
7104
7105 *ret = TAKE_PTR(node);
7106 return 0;
7107 }
7108
7109 static int find_root(Context *context) {
7110 _cleanup_free_ char *device = NULL;
7111 int r;
7112
7113 assert(context);
7114
7115 if (arg_node) {
7116 if (arg_empty == EMPTY_CREATE) {
7117 _cleanup_close_ int fd = -EBADF;
7118 _cleanup_free_ char *s = NULL;
7119
7120 s = strdup(arg_node);
7121 if (!s)
7122 return log_oom();
7123
7124 fd = open(arg_node, O_RDONLY|O_CREAT|O_EXCL|O_CLOEXEC|O_NOFOLLOW, 0666);
7125 if (fd < 0)
7126 return log_error_errno(errno, "Failed to create '%s': %m", arg_node);
7127
7128 context->node = TAKE_PTR(s);
7129 context->node_is_our_file = true;
7130 context->backing_fd = TAKE_FD(fd);
7131 return 0;
7132 }
7133
7134 /* Note that we don't specify a root argument here: if the user explicitly configured a node
7135 * we'll take it relative to the host, not the image */
7136 r = acquire_root_devno(arg_node, NULL, O_RDONLY|O_CLOEXEC, &context->node, &context->backing_fd);
7137 if (r == -EUCLEAN)
7138 return btrfs_log_dev_root(LOG_ERR, r, arg_node);
7139 if (r < 0)
7140 return log_error_errno(r, "Failed to open file or determine backing device of %s: %m", arg_node);
7141
7142 return 0;
7143 }
7144
7145 assert(IN_SET(arg_empty, EMPTY_REFUSE, EMPTY_ALLOW));
7146
7147 /* If the root mount has been replaced by some form of volatile file system (overlayfs), the
7148 * original root block device node is symlinked in /run/systemd/volatile-root. Let's read that
7149 * here. */
7150 r = readlink_malloc("/run/systemd/volatile-root", &device);
7151 if (r == -ENOENT) { /* volatile-root not found */
7152 /* Let's search for the root device. We look for two cases here: first in /, and then in /usr. The
7153 * latter we check for cases where / is a tmpfs and only /usr is an actual persistent block device
7154 * (think: volatile setups) */
7155
7156 FOREACH_STRING(p, "/", "/usr") {
7157
7158 r = acquire_root_devno(p, arg_root, O_RDONLY|O_DIRECTORY|O_CLOEXEC, &context->node,
7159 &context->backing_fd);
7160 if (r < 0) {
7161 if (r == -EUCLEAN)
7162 return btrfs_log_dev_root(LOG_ERR, r, p);
7163 if (r != -ENODEV)
7164 return log_error_errno(r, "Failed to determine backing device of %s: %m", p);
7165 } else
7166 return 0;
7167 }
7168 } else if (r < 0)
7169 return log_error_errno(r, "Failed to read symlink /run/systemd/volatile-root: %m");
7170 else {
7171 r = acquire_root_devno(device, NULL, O_RDONLY|O_CLOEXEC, &context->node, &context->backing_fd);
7172 if (r == -EUCLEAN)
7173 return btrfs_log_dev_root(LOG_ERR, r, device);
7174 if (r < 0)
7175 return log_error_errno(r, "Failed to open file or determine backing device of %s: %m", device);
7176
7177 return 0;
7178 }
7179
7180 return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "Failed to discover root block device.");
7181 }
7182
7183 static int resize_pt(int fd, uint64_t sector_size) {
7184 _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
7185 int r;
7186
7187 /* After resizing the backing file we need to resize the partition table itself too, so that it takes
7188 * possession of the enlarged backing file. For this it suffices to open the device with libfdisk and
7189 * immediately write it again, with no changes. */
7190
7191 r = fdisk_new_context_at(fd, /* path= */ NULL, /* read_only= */ false, sector_size, &c);
7192 if (r < 0)
7193 return log_error_errno(r, "Failed to open device '%s': %m", FORMAT_PROC_FD_PATH(fd));
7194
7195 r = fdisk_has_label(c);
7196 if (r < 0)
7197 return log_error_errno(r, "Failed to determine whether disk '%s' has a disk label: %m", FORMAT_PROC_FD_PATH(fd));
7198 if (r == 0) {
7199 log_debug("Not resizing partition table, as there currently is none.");
7200 return 0;
7201 }
7202
7203 r = fdisk_write_disklabel(c);
7204 if (r < 0)
7205 return log_error_errno(r, "Failed to write resized partition table: %m");
7206
7207 log_info("Resized partition table.");
7208 return 1;
7209 }
7210
7211 static int resize_backing_fd(
7212 const char *node, /* The primary way we access the disk image to operate on */
7213 int *fd, /* An O_RDONLY fd referring to that inode */
7214 const char *backing_file, /* If the above refers to a loopback device, the backing regular file for that, which we can grow */
7215 LoopDevice *loop_device,
7216 uint64_t sector_size) {
7217
7218 _cleanup_close_ int writable_fd = -EBADF;
7219 uint64_t current_size;
7220 struct stat st;
7221 int r;
7222
7223 assert(node);
7224 assert(fd);
7225
7226 if (arg_size == UINT64_MAX) /* Nothing to do */
7227 return 0;
7228
7229 if (*fd < 0) {
7230 /* Open the file if we haven't opened it yet. Note that we open it read-only here, just to
7231 * keep a reference to the file we can pass around. */
7232 *fd = open(node, O_RDONLY|O_CLOEXEC);
7233 if (*fd < 0)
7234 return log_error_errno(errno, "Failed to open '%s' in order to adjust size: %m", node);
7235 }
7236
7237 if (fstat(*fd, &st) < 0)
7238 return log_error_errno(errno, "Failed to stat '%s': %m", node);
7239
7240 if (S_ISBLK(st.st_mode)) {
7241 if (!backing_file)
7242 return log_error_errno(SYNTHETIC_ERRNO(EBADF), "Cannot resize block device '%s'.", node);
7243
7244 assert(loop_device);
7245
7246 if (ioctl(*fd, BLKGETSIZE64, &current_size) < 0)
7247 return log_error_errno(errno, "Failed to determine size of block device %s: %m", node);
7248 } else {
7249 r = stat_verify_regular(&st);
7250 if (r < 0)
7251 return log_error_errno(r, "Specified path '%s' is not a regular file or loopback block device, cannot resize: %m", node);
7252
7253 assert(!backing_file);
7254 assert(!loop_device);
7255 current_size = st.st_size;
7256 }
7257
7258 if (current_size >= arg_size) {
7259 log_info("File '%s' already is of requested size or larger, not growing. (%s >= %s)",
7260 node, FORMAT_BYTES(current_size), FORMAT_BYTES(arg_size));
7261 return 0;
7262 }
7263
7264 if (S_ISBLK(st.st_mode)) {
7265 assert(backing_file);
7266
7267 /* This is a loopback device. We can't really grow those directly, but we can grow the
7268 * backing file, hence let's do that. */
7269
7270 writable_fd = open(backing_file, O_WRONLY|O_CLOEXEC|O_NONBLOCK);
7271 if (writable_fd < 0)
7272 return log_error_errno(errno, "Failed to open backing file '%s': %m", backing_file);
7273
7274 if (fstat(writable_fd, &st) < 0)
7275 return log_error_errno(errno, "Failed to stat() backing file '%s': %m", backing_file);
7276
7277 r = stat_verify_regular(&st);
7278 if (r < 0)
7279 return log_error_errno(r, "Backing file '%s' of block device is not a regular file: %m", backing_file);
7280
7281 if ((uint64_t) st.st_size != current_size)
7282 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
7283 "Size of backing file '%s' of loopback block device '%s' don't match, refusing.",
7284 node, backing_file);
7285 } else {
7286 assert(S_ISREG(st.st_mode));
7287 assert(!backing_file);
7288
7289 /* The file descriptor is read-only. In order to grow the file we need to have a writable fd. We
7290 * reopen the file for that temporarily. We keep the writable fd only open for this operation though,
7291 * as fdisk can't accept it anyway. */
7292
7293 writable_fd = fd_reopen(*fd, O_WRONLY|O_CLOEXEC);
7294 if (writable_fd < 0)
7295 return log_error_errno(writable_fd, "Failed to reopen backing file '%s' writable: %m", node);
7296 }
7297
7298 if (!arg_discard) {
7299 if (fallocate(writable_fd, 0, 0, arg_size) < 0) {
7300 if (!ERRNO_IS_NOT_SUPPORTED(errno))
7301 return log_error_errno(errno, "Failed to grow '%s' from %s to %s by allocation: %m",
7302 node, FORMAT_BYTES(current_size), FORMAT_BYTES(arg_size));
7303
7304 /* Fallback to truncation, if fallocate() is not supported. */
7305 log_debug("Backing file system does not support fallocate(), falling back to ftruncate().");
7306 } else {
7307 if (current_size == 0) /* Likely regular file just created by us */
7308 log_info("Allocated %s for '%s'.", FORMAT_BYTES(arg_size), node);
7309 else
7310 log_info("File '%s' grown from %s to %s by allocation.",
7311 node, FORMAT_BYTES(current_size), FORMAT_BYTES(arg_size));
7312
7313 goto done;
7314 }
7315 }
7316
7317 if (ftruncate(writable_fd, arg_size) < 0)
7318 return log_error_errno(errno, "Failed to grow '%s' from %s to %s by truncation: %m",
7319 node, FORMAT_BYTES(current_size), FORMAT_BYTES(arg_size));
7320
7321 if (current_size == 0) /* Likely regular file just created by us */
7322 log_info("Sized '%s' to %s.", node, FORMAT_BYTES(arg_size));
7323 else
7324 log_info("File '%s' grown from %s to %s by truncation.",
7325 node, FORMAT_BYTES(current_size), FORMAT_BYTES(arg_size));
7326
7327 done:
7328 r = resize_pt(writable_fd, sector_size);
7329 if (r < 0)
7330 return r;
7331
7332 if (loop_device) {
7333 r = loop_device_refresh_size(loop_device, UINT64_MAX, arg_size);
7334 if (r < 0)
7335 return log_error_errno(r, "Failed to update loop device size: %m");
7336 }
7337
7338 return 1;
7339 }
7340
7341 static int determine_auto_size(Context *c) {
7342 uint64_t sum;
7343
7344 assert(c);
7345
7346 sum = round_up_size(GPT_METADATA_SIZE, 4096);
7347
7348 LIST_FOREACH(partitions, p, c->partitions) {
7349 uint64_t m;
7350
7351 if (p->dropped)
7352 continue;
7353
7354 m = partition_min_size_with_padding(c, p);
7355 if (m > UINT64_MAX - sum)
7356 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Image would grow too large, refusing.");
7357
7358 sum += m;
7359 }
7360
7361 if (c->total != UINT64_MAX)
7362 /* Image already allocated? Then show its size. */
7363 log_info("Automatically determined minimal disk image size as %s, current image size is %s.",
7364 FORMAT_BYTES(sum), FORMAT_BYTES(c->total));
7365 else
7366 /* If the image is being created right now, then it has no previous size, suppress any comment about it hence. */
7367 log_info("Automatically determined minimal disk image size as %s.",
7368 FORMAT_BYTES(sum));
7369
7370 arg_size = sum;
7371 return 0;
7372 }
7373
7374 static int run(int argc, char *argv[]) {
7375 _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
7376 _cleanup_(umount_and_freep) char *mounted_dir = NULL;
7377 _cleanup_(context_freep) Context* context = NULL;
7378 bool node_is_our_loop = false;
7379 int r;
7380
7381 log_show_color(true);
7382 log_parse_environment();
7383 log_open();
7384
7385 r = parse_argv(argc, argv);
7386 if (r <= 0)
7387 return r;
7388
7389 r = parse_proc_cmdline_factory_reset();
7390 if (r < 0)
7391 return r;
7392
7393 r = parse_efi_variable_factory_reset();
7394 if (r < 0)
7395 return r;
7396
7397 #if HAVE_LIBCRYPTSETUP
7398 cryptsetup_enable_logging(NULL);
7399 #endif
7400
7401 if (arg_image) {
7402 assert(!arg_root);
7403
7404 /* Mount this strictly read-only: we shall modify the partition table, not the file
7405 * systems */
7406 r = mount_image_privately_interactively(
7407 arg_image,
7408 arg_image_policy,
7409 DISSECT_IMAGE_MOUNT_READ_ONLY |
7410 (arg_node ? DISSECT_IMAGE_DEVICE_READ_ONLY : 0) | /* If a different node to make changes to is specified let's open the device in read-only mode) */
7411 DISSECT_IMAGE_GPT_ONLY |
7412 DISSECT_IMAGE_RELAX_VAR_CHECK |
7413 DISSECT_IMAGE_USR_NO_ROOT |
7414 DISSECT_IMAGE_REQUIRE_ROOT,
7415 &mounted_dir,
7416 /* ret_dir_fd= */ NULL,
7417 &loop_device);
7418 if (r < 0)
7419 return r;
7420
7421 arg_root = strdup(mounted_dir);
7422 if (!arg_root)
7423 return log_oom();
7424
7425 if (!arg_node) {
7426 arg_node = strdup(loop_device->node);
7427 if (!arg_node)
7428 return log_oom();
7429
7430 /* Remember that the device we are about to manipulate is actually the one we
7431 * allocated here, and thus to increase its backing file we know what to do */
7432 node_is_our_loop = true;
7433 }
7434 }
7435
7436 if (!arg_copy_source && arg_root) {
7437 /* If no explicit copy source is specified, then use --root=/--image= */
7438 arg_copy_source = strdup(arg_root);
7439 if (!arg_copy_source)
7440 return log_oom();
7441 }
7442
7443 context = context_new(arg_seed);
7444 if (!context)
7445 return log_oom();
7446
7447 r = context_copy_from(context);
7448 if (r < 0)
7449 return r;
7450
7451 if (arg_make_ddi) {
7452 _cleanup_free_ char *d = NULL, *dp = NULL;
7453 assert(!arg_definitions);
7454
7455 d = strjoin(arg_make_ddi, ".repart.d/");
7456 if (!d)
7457 return log_oom();
7458
7459 r = search_and_access(d, F_OK, arg_root, CONF_PATHS_USR_STRV("systemd/repart/definitions"), &dp);
7460 if (r < 0)
7461 return log_error_errno(errno, "DDI type '%s' is not defined: %m", arg_make_ddi);
7462
7463 if (strv_consume(&arg_definitions, TAKE_PTR(dp)) < 0)
7464 return log_oom();
7465 } else
7466 strv_uniq(arg_definitions);
7467
7468 r = context_read_definitions(context);
7469 if (r < 0)
7470 return r;
7471
7472 r = find_root(context);
7473 if (r == -ENODEV)
7474 return 76; /* Special return value which means "Root block device not found, so not doing
7475 * anything". This isn't really an error when called at boot. */
7476 if (r < 0)
7477 return r;
7478
7479 if (arg_size != UINT64_MAX) {
7480 r = resize_backing_fd(
7481 context->node,
7482 &context->backing_fd,
7483 node_is_our_loop ? arg_image : NULL,
7484 node_is_our_loop ? loop_device : NULL,
7485 context->sector_size);
7486 if (r < 0)
7487 return r;
7488 }
7489
7490 r = context_load_partition_table(context);
7491 if (r == -EHWPOISON)
7492 return 77; /* Special return value which means "Not GPT, so not doing anything". This isn't
7493 * really an error when called at boot. */
7494 if (r < 0)
7495 return r;
7496 context->from_scratch = r > 0; /* Starting from scratch */
7497
7498 if (arg_can_factory_reset) {
7499 r = context_can_factory_reset(context);
7500 if (r < 0)
7501 return r;
7502 if (r == 0)
7503 return EXIT_FAILURE;
7504
7505 return 0;
7506 }
7507
7508 r = context_factory_reset(context);
7509 if (r < 0)
7510 return r;
7511 if (r > 0) {
7512 /* We actually did a factory reset! */
7513 r = remove_efi_variable_factory_reset();
7514 if (r < 0)
7515 return r;
7516
7517 /* Reload the reduced partition table */
7518 context_unload_partition_table(context);
7519 r = context_load_partition_table(context);
7520 if (r < 0)
7521 return r;
7522 }
7523
7524 r = context_read_seed(context, arg_root);
7525 if (r < 0)
7526 return r;
7527
7528 /* Make sure each partition has a unique UUID and unique label */
7529 r = context_acquire_partition_uuids_and_labels(context);
7530 if (r < 0)
7531 return r;
7532
7533 /* Open all files to copy blocks from now, since we want to take their size into consideration */
7534 r = context_open_copy_block_paths(
7535 context,
7536 loop_device ? loop_device->devno : /* if --image= is specified, only allow partitions on the loopback device */
7537 arg_root && !arg_image ? 0 : /* if --root= is specified, don't accept any block device */
7538 (dev_t) -1); /* if neither is specified, make no restrictions */
7539 if (r < 0)
7540 return r;
7541
7542 r = context_minimize(context);
7543 if (r < 0)
7544 return r;
7545
7546 if (arg_size_auto) {
7547 r = determine_auto_size(context);
7548 if (r < 0)
7549 return r;
7550
7551 /* Flush out everything again, and let's grow the file first, then start fresh */
7552 context_unload_partition_table(context);
7553
7554 assert(arg_size != UINT64_MAX);
7555 r = resize_backing_fd(
7556 context->node,
7557 &context->backing_fd,
7558 node_is_our_loop ? arg_image : NULL,
7559 node_is_our_loop ? loop_device : NULL,
7560 context->sector_size);
7561 if (r < 0)
7562 return r;
7563
7564 r = context_load_partition_table(context);
7565 if (r < 0)
7566 return r;
7567 }
7568
7569 /* First try to fit new partitions in, dropping by priority until it fits */
7570 for (;;) {
7571 uint64_t largest_free_area;
7572
7573 if (context_allocate_partitions(context, &largest_free_area))
7574 break; /* Success! */
7575
7576 if (!context_drop_or_foreignize_one_priority(context)) {
7577 r = log_error_errno(SYNTHETIC_ERRNO(ENOSPC),
7578 "Can't fit requested partitions into available free space (%s), refusing.",
7579 FORMAT_BYTES(largest_free_area));
7580 determine_auto_size(context);
7581 return r;
7582 }
7583 }
7584
7585 /* Now assign free space according to the weight logic */
7586 r = context_grow_partitions(context);
7587 if (r < 0)
7588 return r;
7589
7590 /* Now calculate where each new partition gets placed */
7591 context_place_partitions(context);
7592
7593 (void) context_dump(context, /*late=*/ false);
7594
7595 r = context_write_partition_table(context);
7596 if (r < 0)
7597 return r;
7598
7599 r = context_split(context);
7600 if (r < 0)
7601 return r;
7602
7603 (void) context_dump(context, /*late=*/ true);
7604
7605 context->node = mfree(context->node);
7606
7607 LIST_FOREACH(partitions, p, context->partitions)
7608 p->split_path = mfree(p->split_path);
7609
7610 return 0;
7611 }
7612
7613 DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);