]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/home/homework-luks.c
Merge pull request #31357 from keszybz/cleanups-vmspawn
[thirdparty/systemd.git] / src / home / homework-luks.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <linux/loop.h>
4 #include <poll.h>
5 #include <sys/file.h>
6 #include <sys/ioctl.h>
7 #include <sys/xattr.h>
8
9 #if HAVE_VALGRIND_MEMCHECK_H
10 #include <valgrind/memcheck.h>
11 #endif
12
13 #include "sd-daemon.h"
14 #include "sd-device.h"
15 #include "sd-event.h"
16 #include "sd-id128.h"
17
18 #include "blkid-util.h"
19 #include "blockdev-util.h"
20 #include "btrfs-util.h"
21 #include "chattr-util.h"
22 #include "device-util.h"
23 #include "devnum-util.h"
24 #include "dm-util.h"
25 #include "env-util.h"
26 #include "errno-util.h"
27 #include "fd-util.h"
28 #include "fdisk-util.h"
29 #include "fileio.h"
30 #include "filesystems.h"
31 #include "fs-util.h"
32 #include "fsck-util.h"
33 #include "glyph-util.h"
34 #include "gpt.h"
35 #include "home-util.h"
36 #include "homework-blob.h"
37 #include "homework-luks.h"
38 #include "homework-mount.h"
39 #include "io-util.h"
40 #include "keyring-util.h"
41 #include "memory-util.h"
42 #include "missing_magic.h"
43 #include "mkdir.h"
44 #include "mkfs-util.h"
45 #include "mount-util.h"
46 #include "openssl-util.h"
47 #include "parse-util.h"
48 #include "path-util.h"
49 #include "process-util.h"
50 #include "random-util.h"
51 #include "resize-fs.h"
52 #include "strv.h"
53 #include "sync-util.h"
54 #include "tmpfile-util.h"
55 #include "udev-util.h"
56 #include "user-util.h"
57
58 /* Round down to the nearest 4K size. Given that newer hardware generally prefers 4K sectors, let's align our
59 * partitions to that too. In the worst case we'll waste 3.5K per partition that way, but I think I can live
60 * with that. */
61 #define DISK_SIZE_ROUND_DOWN(x) ((x) & ~UINT64_C(4095))
62
63 /* Rounds up to the nearest 4K boundary. Returns UINT64_MAX on overflow */
64 #define DISK_SIZE_ROUND_UP(x) \
65 ({ \
66 uint64_t _x = (x); \
67 _x > UINT64_MAX - 4095U ? UINT64_MAX : (_x + 4095U) & ~UINT64_C(4095); \
68 })
69
70 /* How much larger will the image on disk be than the fs inside it, i.e. the space we pay for the GPT and
71 * LUKS2 envelope. (As measured on cryptsetup 2.4.1) */
72 #define GPT_LUKS2_OVERHEAD UINT64_C(18874368)
73
74 static int resize_image_loop(UserRecord *h, HomeSetup *setup, uint64_t old_image_size, uint64_t new_image_size, uint64_t *ret_image_size);
75
76 int run_mark_dirty(int fd, bool b) {
77 char x = '1';
78 int r, ret;
79
80 /* Sets or removes the 'user.home-dirty' xattr on the specified file. We use this to detect when a
81 * home directory was not properly unmounted. */
82
83 assert(fd >= 0);
84
85 r = fd_verify_regular(fd);
86 if (r < 0)
87 return r;
88
89 if (b) {
90 ret = fsetxattr(fd, "user.home-dirty", &x, 1, XATTR_CREATE);
91 if (ret < 0 && errno != EEXIST)
92 return log_debug_errno(errno, "Could not mark home directory as dirty: %m");
93
94 } else {
95 r = fsync_full(fd);
96 if (r < 0)
97 return log_debug_errno(r, "Failed to synchronize image before marking it clean: %m");
98
99 ret = fremovexattr(fd, "user.home-dirty");
100 if (ret < 0 && !ERRNO_IS_XATTR_ABSENT(errno))
101 return log_debug_errno(errno, "Could not mark home directory as clean: %m");
102 }
103
104 r = fsync_full(fd);
105 if (r < 0)
106 return log_debug_errno(r, "Failed to synchronize dirty flag to disk: %m");
107
108 return ret >= 0;
109 }
110
111 int run_mark_dirty_by_path(const char *path, bool b) {
112 _cleanup_close_ int fd = -EBADF;
113
114 assert(path);
115
116 fd = open(path, O_RDWR|O_CLOEXEC|O_NOCTTY);
117 if (fd < 0)
118 return log_debug_errno(errno, "Failed to open %s to mark dirty or clean: %m", path);
119
120 return run_mark_dirty(fd, b);
121 }
122
123 static int probe_file_system_by_fd(
124 int fd,
125 char **ret_fstype,
126 sd_id128_t *ret_uuid) {
127
128 _cleanup_(blkid_free_probep) blkid_probe b = NULL;
129 _cleanup_free_ char *s = NULL;
130 const char *fstype = NULL, *uuid = NULL;
131 sd_id128_t id;
132 int r;
133
134 assert(fd >= 0);
135 assert(ret_fstype);
136 assert(ret_uuid);
137
138 b = blkid_new_probe();
139 if (!b)
140 return -ENOMEM;
141
142 errno = 0;
143 r = blkid_probe_set_device(b, fd, 0, 0);
144 if (r != 0)
145 return errno_or_else(ENOMEM);
146
147 (void) blkid_probe_enable_superblocks(b, 1);
148 (void) blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_UUID);
149
150 errno = 0;
151 r = blkid_do_safeprobe(b);
152 if (r == _BLKID_SAFEPROBE_ERROR)
153 return errno_or_else(EIO);
154 if (IN_SET(r, _BLKID_SAFEPROBE_AMBIGUOUS, _BLKID_SAFEPROBE_NOT_FOUND))
155 return -ENOPKG;
156
157 assert(r == _BLKID_SAFEPROBE_FOUND);
158
159 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
160 if (!fstype)
161 return -ENOPKG;
162
163 (void) blkid_probe_lookup_value(b, "UUID", &uuid, NULL);
164 if (!uuid)
165 return -ENOPKG;
166
167 r = sd_id128_from_string(uuid, &id);
168 if (r < 0)
169 return r;
170
171 s = strdup(fstype);
172 if (!s)
173 return -ENOMEM;
174
175 *ret_fstype = TAKE_PTR(s);
176 *ret_uuid = id;
177
178 return 0;
179 }
180
181 static int probe_file_system_by_path(const char *path, char **ret_fstype, sd_id128_t *ret_uuid) {
182 _cleanup_close_ int fd = -EBADF;
183
184 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
185 if (fd < 0)
186 return negative_errno();
187
188 return probe_file_system_by_fd(fd, ret_fstype, ret_uuid);
189 }
190
191 static int block_get_size_by_fd(int fd, uint64_t *ret) {
192 struct stat st;
193
194 assert(fd >= 0);
195 assert(ret);
196
197 if (fstat(fd, &st) < 0)
198 return -errno;
199
200 if (!S_ISBLK(st.st_mode))
201 return -ENOTBLK;
202
203 return blockdev_get_device_size(fd, ret);
204 }
205
206 static int block_get_size_by_path(const char *path, uint64_t *ret) {
207 _cleanup_close_ int fd = -EBADF;
208
209 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
210 if (fd < 0)
211 return -errno;
212
213 return block_get_size_by_fd(fd, ret);
214 }
215
216 static int run_fsck(const char *node, const char *fstype) {
217 int r, exit_status;
218 pid_t fsck_pid;
219
220 assert(node);
221 assert(fstype);
222
223 r = fsck_exists_for_fstype(fstype);
224 if (r < 0)
225 return log_error_errno(r, "Failed to check if fsck for file system %s exists: %m", fstype);
226 if (r == 0) {
227 log_warning("No fsck for file system %s installed, ignoring.", fstype);
228 return 0;
229 }
230
231 r = safe_fork("(fsck)",
232 FORK_RESET_SIGNALS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG_SIGTERM|FORK_LOG|FORK_STDOUT_TO_STDERR|FORK_CLOSE_ALL_FDS,
233 &fsck_pid);
234 if (r < 0)
235 return r;
236 if (r == 0) {
237 /* Child */
238 execlp("fsck", "fsck", "-aTl", node, NULL);
239 log_open();
240 log_error_errno(errno, "Failed to execute fsck: %m");
241 _exit(FSCK_OPERATIONAL_ERROR);
242 }
243
244 exit_status = wait_for_terminate_and_check("fsck", fsck_pid, WAIT_LOG_ABNORMAL);
245 if (exit_status < 0)
246 return exit_status;
247 if ((exit_status & ~FSCK_ERROR_CORRECTED) != 0) {
248 log_warning("fsck failed with exit status %i.", exit_status);
249
250 if ((exit_status & (FSCK_SYSTEM_SHOULD_REBOOT|FSCK_ERRORS_LEFT_UNCORRECTED)) != 0)
251 return log_error_errno(SYNTHETIC_ERRNO(EIO), "File system is corrupted, refusing.");
252
253 log_warning("Ignoring fsck error.");
254 }
255
256 log_info("File system check completed.");
257
258 return 1;
259 }
260
261 DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(key_serial_t, keyring_unlink, -1);
262
263 static int upload_to_keyring(
264 UserRecord *h,
265 const char *password,
266 key_serial_t *ret_key_serial) {
267
268 _cleanup_free_ char *name = NULL;
269 key_serial_t serial;
270
271 assert(h);
272 assert(password);
273
274 /* If auto-shrink-on-logout is turned on, we need to keep the key we used to unlock the LUKS volume
275 * around, since we'll need it when automatically resizing (since we can't ask the user there
276 * again). We do this by uploading it into the kernel keyring, specifically the "session" one. This
277 * is done under the assumption systemd-homed gets its private per-session keyring (i.e. default
278 * service behaviour, given that KeyringMode=private is the default). It will survive between our
279 * systemd-homework invocations that way.
280 *
281 * If auto-shrink-on-logout is disabled we'll skip this step, to be frugal with sensitive data. */
282
283 if (user_record_auto_resize_mode(h) != AUTO_RESIZE_SHRINK_AND_GROW) { /* Won't need it */
284 if (ret_key_serial)
285 *ret_key_serial = -1;
286 return 0;
287 }
288
289 name = strjoin("homework-user-", h->user_name);
290 if (!name)
291 return -ENOMEM;
292
293 serial = add_key("user", name, password, strlen(password), KEY_SPEC_SESSION_KEYRING);
294 if (serial == -1)
295 return -errno;
296
297 if (ret_key_serial)
298 *ret_key_serial = serial;
299
300 return 1;
301 }
302
303 static int luks_try_passwords(
304 UserRecord *h,
305 struct crypt_device *cd,
306 char **passwords,
307 void *volume_key,
308 size_t *volume_key_size,
309 key_serial_t *ret_key_serial) {
310
311 int r;
312
313 assert(h);
314 assert(cd);
315
316 STRV_FOREACH(pp, passwords) {
317 size_t vks = *volume_key_size;
318
319 r = sym_crypt_volume_key_get(
320 cd,
321 CRYPT_ANY_SLOT,
322 volume_key,
323 &vks,
324 *pp,
325 strlen(*pp));
326 if (r >= 0) {
327 if (ret_key_serial) {
328 /* If ret_key_serial is non-NULL, let's try to upload the password that
329 * worked, and return its serial. */
330 r = upload_to_keyring(h, *pp, ret_key_serial);
331 if (r < 0) {
332 log_debug_errno(r, "Failed to upload LUKS password to kernel keyring, ignoring: %m");
333 *ret_key_serial = -1;
334 }
335 }
336
337 *volume_key_size = vks;
338 return 0;
339 }
340
341 log_debug_errno(r, "Password %zu didn't work for unlocking LUKS superblock: %m", (size_t) (pp - passwords));
342 }
343
344 return -ENOKEY;
345 }
346
347 static int luks_setup(
348 UserRecord *h,
349 const char *node,
350 const char *dm_name,
351 sd_id128_t uuid,
352 const char *cipher,
353 const char *cipher_mode,
354 uint64_t volume_key_size,
355 char **passwords,
356 const PasswordCache *cache,
357 bool discard,
358 struct crypt_device **ret,
359 sd_id128_t *ret_found_uuid,
360 void **ret_volume_key,
361 size_t *ret_volume_key_size,
362 key_serial_t *ret_key_serial) {
363
364 _cleanup_(keyring_unlinkp) key_serial_t key_serial = -1;
365 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
366 _cleanup_(erase_and_freep) void *vk = NULL;
367 sd_id128_t p;
368 size_t vks;
369 int r;
370
371 assert(h);
372 assert(node);
373 assert(dm_name);
374 assert(ret);
375
376 r = sym_crypt_init(&cd, node);
377 if (r < 0)
378 return log_error_errno(r, "Failed to allocate libcryptsetup context: %m");
379
380 cryptsetup_enable_logging(cd);
381
382 r = sym_crypt_load(cd, CRYPT_LUKS2, NULL);
383 if (r < 0)
384 return log_error_errno(r, "Failed to load LUKS superblock: %m");
385
386 r = sym_crypt_get_volume_key_size(cd);
387 if (r <= 0)
388 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine LUKS volume key size");
389 vks = (size_t) r;
390
391 if (!sd_id128_is_null(uuid) || ret_found_uuid) {
392 const char *s;
393
394 s = sym_crypt_get_uuid(cd);
395 if (!s)
396 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has no UUID.");
397
398 r = sd_id128_from_string(s, &p);
399 if (r < 0)
400 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has invalid UUID.");
401
402 /* Check that the UUID matches, if specified */
403 if (!sd_id128_is_null(uuid) &&
404 !sd_id128_equal(uuid, p))
405 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has wrong UUID.");
406 }
407
408 if (cipher && !streq_ptr(cipher, sym_crypt_get_cipher(cd)))
409 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock declares wrong cipher.");
410
411 if (cipher_mode && !streq_ptr(cipher_mode, sym_crypt_get_cipher_mode(cd)))
412 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock declares wrong cipher mode.");
413
414 if (volume_key_size != UINT64_MAX && vks != volume_key_size)
415 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock declares wrong volume key size.");
416
417 vk = malloc(vks);
418 if (!vk)
419 return log_oom();
420
421 r = -ENOKEY;
422 char **list;
423 FOREACH_ARGUMENT(list,
424 cache ? cache->keyring_passswords : NULL,
425 cache ? cache->pkcs11_passwords : NULL,
426 cache ? cache->fido2_passwords : NULL,
427 passwords) {
428
429 r = luks_try_passwords(h, cd, list, vk, &vks, ret_key_serial ? &key_serial : NULL);
430 if (r != -ENOKEY)
431 break;
432 }
433 if (r == -ENOKEY)
434 return log_error_errno(r, "No valid password for LUKS superblock.");
435 if (r < 0)
436 return log_error_errno(r, "Failed to unlock LUKS superblock: %m");
437
438 r = sym_crypt_activate_by_volume_key(
439 cd,
440 dm_name,
441 vk, vks,
442 discard ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0);
443 if (r < 0)
444 return log_error_errno(r, "Failed to unlock LUKS superblock: %m");
445
446 log_info("Setting up LUKS device /dev/mapper/%s completed.", dm_name);
447
448 *ret = TAKE_PTR(cd);
449
450 if (ret_found_uuid) /* Return the UUID actually found if the caller wants to know */
451 *ret_found_uuid = p;
452 if (ret_volume_key)
453 *ret_volume_key = TAKE_PTR(vk);
454 if (ret_volume_key_size)
455 *ret_volume_key_size = vks;
456 if (ret_key_serial)
457 *ret_key_serial = TAKE_KEY_SERIAL(key_serial);
458
459 return 0;
460 }
461
462 static int make_dm_names(UserRecord *h, HomeSetup *setup) {
463 assert(h);
464 assert(h->user_name);
465 assert(setup);
466
467 if (!setup->dm_name) {
468 setup->dm_name = strjoin("home-", h->user_name);
469 if (!setup->dm_name)
470 return log_oom();
471 }
472
473 if (!setup->dm_node) {
474 setup->dm_node = path_join("/dev/mapper/", setup->dm_name);
475 if (!setup->dm_node)
476 return log_oom();
477 }
478
479 return 0;
480 }
481
482 static int acquire_open_luks_device(
483 UserRecord *h,
484 HomeSetup *setup,
485 bool graceful) {
486
487 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
488 int r;
489
490 assert(h);
491 assert(setup);
492 assert(!setup->crypt_device);
493
494 r = dlopen_cryptsetup();
495 if (r < 0)
496 return r;
497
498 r = make_dm_names(h, setup);
499 if (r < 0)
500 return r;
501
502 r = sym_crypt_init_by_name(&cd, setup->dm_name);
503 if ((ERRNO_IS_NEG_DEVICE_ABSENT(r) || r == -EINVAL) && graceful)
504 return 0;
505 if (r < 0)
506 return log_error_errno(r, "Failed to initialize cryptsetup context for %s: %m", setup->dm_name);
507
508 cryptsetup_enable_logging(cd);
509
510 setup->crypt_device = TAKE_PTR(cd);
511 return 1;
512 }
513
514 static int luks_open(
515 UserRecord *h,
516 HomeSetup *setup,
517 const PasswordCache *cache,
518 sd_id128_t *ret_found_uuid,
519 void **ret_volume_key,
520 size_t *ret_volume_key_size) {
521
522 _cleanup_(erase_and_freep) void *vk = NULL;
523 sd_id128_t p;
524 size_t vks;
525 int r;
526
527 assert(h);
528 assert(setup);
529 assert(!setup->crypt_device);
530
531 /* Opens a LUKS device that is already set up. Re-validates the password while doing so (which also
532 * provides us with the volume key, which we want). */
533
534 r = acquire_open_luks_device(h, setup, /* graceful= */ false);
535 if (r < 0)
536 return r;
537
538 r = sym_crypt_load(setup->crypt_device, CRYPT_LUKS2, NULL);
539 if (r < 0)
540 return log_error_errno(r, "Failed to load LUKS superblock: %m");
541
542 r = sym_crypt_get_volume_key_size(setup->crypt_device);
543 if (r <= 0)
544 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine LUKS volume key size");
545 vks = (size_t) r;
546
547 if (ret_found_uuid) {
548 const char *s;
549
550 s = sym_crypt_get_uuid(setup->crypt_device);
551 if (!s)
552 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has no UUID.");
553
554 r = sd_id128_from_string(s, &p);
555 if (r < 0)
556 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has invalid UUID.");
557 }
558
559 vk = malloc(vks);
560 if (!vk)
561 return log_oom();
562
563 r = -ENOKEY;
564 char **list;
565 FOREACH_ARGUMENT(list,
566 cache ? cache->keyring_passswords : NULL,
567 cache ? cache->pkcs11_passwords : NULL,
568 cache ? cache->fido2_passwords : NULL,
569 h->password) {
570
571 r = luks_try_passwords(h, setup->crypt_device, list, vk, &vks, NULL);
572 if (r != -ENOKEY)
573 break;
574 }
575 if (r == -ENOKEY)
576 return log_error_errno(r, "No valid password for LUKS superblock.");
577 if (r < 0)
578 return log_error_errno(r, "Failed to unlock LUKS superblock: %m");
579
580 log_info("Discovered used LUKS device /dev/mapper/%s, and validated password.", setup->dm_name);
581
582 /* This is needed so that crypt_resize() can operate correctly for pre-existing LUKS devices. We need
583 * to tell libcryptsetup the volume key explicitly, so that it is in the kernel keyring. */
584 r = sym_crypt_activate_by_volume_key(setup->crypt_device, NULL, vk, vks, CRYPT_ACTIVATE_KEYRING_KEY);
585 if (r < 0)
586 return log_error_errno(r, "Failed to upload volume key again: %m");
587
588 log_info("Successfully re-activated LUKS device.");
589
590 if (ret_found_uuid)
591 *ret_found_uuid = p;
592 if (ret_volume_key)
593 *ret_volume_key = TAKE_PTR(vk);
594 if (ret_volume_key_size)
595 *ret_volume_key_size = vks;
596
597 return 0;
598 }
599
600 static int fs_validate(
601 const char *dm_node,
602 sd_id128_t uuid,
603 char **ret_fstype,
604 sd_id128_t *ret_found_uuid) {
605
606 _cleanup_free_ char *fstype = NULL;
607 sd_id128_t u = SD_ID128_NULL; /* avoid false maybe-unitialized warning */
608 int r;
609
610 assert(dm_node);
611 assert(ret_fstype);
612
613 r = probe_file_system_by_path(dm_node, &fstype, &u);
614 if (r < 0)
615 return log_error_errno(r, "Failed to probe file system: %m");
616
617 /* Limit the set of supported file systems a bit, as protection against little tested kernel file
618 * systems. Also, we only support the resize ioctls for these file systems. */
619 if (!supported_fstype(fstype))
620 return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "Image contains unsupported file system: %s", strna(fstype));
621
622 if (!sd_id128_is_null(uuid) &&
623 !sd_id128_equal(uuid, u))
624 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "File system has wrong UUID.");
625
626 log_info("Probing file system completed (found %s).", fstype);
627
628 *ret_fstype = TAKE_PTR(fstype);
629
630 if (ret_found_uuid) /* Return the UUID actually found if the caller wants to know */
631 *ret_found_uuid = u;
632
633 return 0;
634 }
635
636 static int luks_validate(
637 int fd,
638 const char *label,
639 sd_id128_t partition_uuid,
640 sd_id128_t *ret_partition_uuid,
641 uint64_t *ret_offset,
642 uint64_t *ret_size) {
643
644 _cleanup_(blkid_free_probep) blkid_probe b = NULL;
645 sd_id128_t found_partition_uuid = SD_ID128_NULL;
646 const char *fstype = NULL, *pttype = NULL;
647 blkid_loff_t offset = 0, size = 0;
648 blkid_partlist pl;
649 bool found = false;
650 int r, n;
651
652 assert(fd >= 0);
653 assert(label);
654 assert(ret_offset);
655 assert(ret_size);
656
657 b = blkid_new_probe();
658 if (!b)
659 return -ENOMEM;
660
661 errno = 0;
662 r = blkid_probe_set_device(b, fd, 0, 0);
663 if (r != 0)
664 return errno_or_else(ENOMEM);
665
666 (void) blkid_probe_enable_superblocks(b, 1);
667 (void) blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
668 (void) blkid_probe_enable_partitions(b, 1);
669 (void) blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
670
671 errno = 0;
672 r = blkid_do_safeprobe(b);
673 if (r == _BLKID_SAFEPROBE_ERROR)
674 return errno_or_else(EIO);
675 if (IN_SET(r, _BLKID_SAFEPROBE_AMBIGUOUS, _BLKID_SAFEPROBE_NOT_FOUND))
676 return -ENOPKG;
677
678 assert(r == _BLKID_SAFEPROBE_FOUND);
679
680 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
681 if (streq_ptr(fstype, "crypto_LUKS")) {
682 /* Directly a LUKS image */
683 *ret_offset = 0;
684 *ret_size = UINT64_MAX; /* full disk */
685 *ret_partition_uuid = SD_ID128_NULL;
686 return 0;
687 } else if (fstype)
688 return -ENOPKG;
689
690 (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
691 if (!streq_ptr(pttype, "gpt"))
692 return -ENOPKG;
693
694 errno = 0;
695 pl = blkid_probe_get_partitions(b);
696 if (!pl)
697 return errno_or_else(ENOMEM);
698
699 errno = 0;
700 n = blkid_partlist_numof_partitions(pl);
701 if (n < 0)
702 return errno_or_else(EIO);
703
704 for (int i = 0; i < n; i++) {
705 sd_id128_t id = SD_ID128_NULL;
706 blkid_partition pp;
707
708 errno = 0;
709 pp = blkid_partlist_get_partition(pl, i);
710 if (!pp)
711 return errno_or_else(EIO);
712
713 if (sd_id128_string_equal(blkid_partition_get_type_string(pp), SD_GPT_USER_HOME) <= 0)
714 continue;
715
716 if (!streq_ptr(blkid_partition_get_name(pp), label))
717 continue;
718
719
720 r = blkid_partition_get_uuid_id128(pp, &id);
721 if (r < 0)
722 log_debug_errno(r, "Failed to read partition UUID, ignoring: %m");
723 else if (!sd_id128_is_null(partition_uuid) && !sd_id128_equal(id, partition_uuid))
724 continue;
725
726 if (found)
727 return -ENOPKG;
728
729 offset = blkid_partition_get_start(pp);
730 size = blkid_partition_get_size(pp);
731 found_partition_uuid = id;
732
733 found = true;
734 }
735
736 if (!found)
737 return -ENOPKG;
738
739 if (offset < 0)
740 return -EINVAL;
741 if ((uint64_t) offset > UINT64_MAX / 512U)
742 return -EINVAL;
743 if (size <= 0)
744 return -EINVAL;
745 if ((uint64_t) size > UINT64_MAX / 512U)
746 return -EINVAL;
747
748 *ret_offset = offset * 512U;
749 *ret_size = size * 512U;
750 *ret_partition_uuid = found_partition_uuid;
751
752 return 0;
753 }
754
755 static int crypt_device_to_evp_cipher(struct crypt_device *cd, const EVP_CIPHER **ret) {
756 _cleanup_free_ char *cipher_name = NULL;
757 const char *cipher, *cipher_mode, *e;
758 size_t key_size, key_bits;
759 const EVP_CIPHER *cc;
760 int r;
761
762 assert(cd);
763
764 /* Let's find the right OpenSSL EVP_CIPHER object that matches the encryption settings of the LUKS
765 * device */
766
767 cipher = sym_crypt_get_cipher(cd);
768 if (!cipher)
769 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot get cipher from LUKS device.");
770
771 cipher_mode = sym_crypt_get_cipher_mode(cd);
772 if (!cipher_mode)
773 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot get cipher mode from LUKS device.");
774
775 e = strchr(cipher_mode, '-');
776 if (e)
777 cipher_mode = strndupa_safe(cipher_mode, e - cipher_mode);
778
779 r = sym_crypt_get_volume_key_size(cd);
780 if (r <= 0)
781 return log_error_errno(r < 0 ? r : SYNTHETIC_ERRNO(EINVAL), "Cannot get volume key size from LUKS device.");
782
783 key_size = r;
784 key_bits = key_size * 8;
785 if (streq(cipher_mode, "xts"))
786 key_bits /= 2;
787
788 if (asprintf(&cipher_name, "%s-%zu-%s", cipher, key_bits, cipher_mode) < 0)
789 return log_oom();
790
791 cc = EVP_get_cipherbyname(cipher_name);
792 if (!cc)
793 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Selected cipher mode '%s' not supported, can't encrypt JSON record.", cipher_name);
794
795 /* Verify that our key length calculations match what OpenSSL thinks */
796 r = EVP_CIPHER_key_length(cc);
797 if (r < 0 || (uint64_t) r != key_size)
798 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Key size of selected cipher doesn't meet our expectations.");
799
800 *ret = cc;
801 return 0;
802 }
803
804 static int luks_validate_home_record(
805 struct crypt_device *cd,
806 UserRecord *h,
807 const void *volume_key,
808 PasswordCache *cache,
809 UserRecord **ret_luks_home_record) {
810
811 int r;
812
813 assert(cd);
814 assert(h);
815
816 for (int token = 0; token < sym_crypt_token_max(CRYPT_LUKS2); token++) {
817 _cleanup_(json_variant_unrefp) JsonVariant *v = NULL, *rr = NULL;
818 _cleanup_(EVP_CIPHER_CTX_freep) EVP_CIPHER_CTX *context = NULL;
819 _cleanup_(user_record_unrefp) UserRecord *lhr = NULL;
820 _cleanup_free_ void *encrypted = NULL, *iv = NULL;
821 size_t decrypted_size, encrypted_size, iv_size;
822 int decrypted_size_out1, decrypted_size_out2;
823 _cleanup_free_ char *decrypted = NULL;
824 const char *text, *type;
825 crypt_token_info state;
826 JsonVariant *jr, *jiv;
827 unsigned line, column;
828 const EVP_CIPHER *cc;
829
830 state = sym_crypt_token_status(cd, token, &type);
831 if (state == CRYPT_TOKEN_INACTIVE) /* First unconfigured token, give up */
832 break;
833 if (IN_SET(state, CRYPT_TOKEN_INTERNAL, CRYPT_TOKEN_INTERNAL_UNKNOWN, CRYPT_TOKEN_EXTERNAL))
834 continue;
835 if (state != CRYPT_TOKEN_EXTERNAL_UNKNOWN)
836 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unexpected token state of token %i: %i", token, (int) state);
837
838 if (!streq(type, "systemd-homed"))
839 continue;
840
841 r = sym_crypt_token_json_get(cd, token, &text);
842 if (r < 0)
843 return log_error_errno(r, "Failed to read LUKS token %i: %m", token);
844
845 r = json_parse(text, JSON_PARSE_SENSITIVE, &v, &line, &column);
846 if (r < 0)
847 return log_error_errno(r, "Failed to parse LUKS token JSON data %u:%u: %m", line, column);
848
849 jr = json_variant_by_key(v, "record");
850 if (!jr)
851 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "LUKS token lacks 'record' field.");
852 jiv = json_variant_by_key(v, "iv");
853 if (!jiv)
854 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "LUKS token lacks 'iv' field.");
855
856 r = json_variant_unbase64(jr, &encrypted, &encrypted_size);
857 if (r < 0)
858 return log_error_errno(r, "Failed to base64 decode record: %m");
859
860 r = json_variant_unbase64(jiv, &iv, &iv_size);
861 if (r < 0)
862 return log_error_errno(r, "Failed to base64 decode IV: %m");
863
864 r = crypt_device_to_evp_cipher(cd, &cc);
865 if (r < 0)
866 return r;
867 if (iv_size > INT_MAX || EVP_CIPHER_iv_length(cc) != (int) iv_size)
868 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "IV size doesn't match.");
869
870 context = EVP_CIPHER_CTX_new();
871 if (!context)
872 return log_oom();
873
874 if (EVP_DecryptInit_ex(context, cc, NULL, volume_key, iv) != 1)
875 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to initialize decryption context.");
876
877 decrypted_size = encrypted_size + EVP_CIPHER_key_length(cc) * 2;
878 decrypted = new(char, decrypted_size);
879 if (!decrypted)
880 return log_oom();
881
882 if (EVP_DecryptUpdate(context, (uint8_t*) decrypted, &decrypted_size_out1, encrypted, encrypted_size) != 1)
883 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to decrypt JSON record.");
884
885 assert((size_t) decrypted_size_out1 <= decrypted_size);
886
887 if (EVP_DecryptFinal_ex(context, (uint8_t*) decrypted + decrypted_size_out1, &decrypted_size_out2) != 1)
888 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to finish decryption of JSON record.");
889
890 assert((size_t) decrypted_size_out1 + (size_t) decrypted_size_out2 < decrypted_size);
891 decrypted_size = (size_t) decrypted_size_out1 + (size_t) decrypted_size_out2;
892
893 if (memchr(decrypted, 0, decrypted_size))
894 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Inner NUL byte in JSON record, refusing.");
895
896 decrypted[decrypted_size] = 0;
897
898 r = json_parse(decrypted, JSON_PARSE_SENSITIVE, &rr, NULL, NULL);
899 if (r < 0)
900 return log_error_errno(r, "Failed to parse decrypted JSON record, refusing.");
901
902 lhr = user_record_new();
903 if (!lhr)
904 return log_oom();
905
906 r = user_record_load(lhr, rr, USER_RECORD_LOAD_EMBEDDED|USER_RECORD_PERMISSIVE);
907 if (r < 0)
908 return log_error_errno(r, "Failed to parse user record: %m");
909
910 if (!user_record_compatible(h, lhr))
911 return log_error_errno(SYNTHETIC_ERRNO(EREMCHG), "LUKS home record not compatible with host record, refusing.");
912
913 r = user_record_authenticate(lhr, h, cache, /* strict_verify= */ true);
914 if (r < 0)
915 return r;
916 assert(r > 0); /* Insist that a password was verified */
917
918 *ret_luks_home_record = TAKE_PTR(lhr);
919 return 0;
920 }
921
922 return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Couldn't find home record in LUKS2 header, refusing.");
923 }
924
925 static int format_luks_token_text(
926 struct crypt_device *cd,
927 UserRecord *hr,
928 const void *volume_key,
929 char **ret) {
930
931 int r, encrypted_size_out1 = 0, encrypted_size_out2 = 0, iv_size, key_size;
932 _cleanup_(EVP_CIPHER_CTX_freep) EVP_CIPHER_CTX *context = NULL;
933 _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
934 _cleanup_free_ void *iv = NULL, *encrypted = NULL;
935 size_t text_length, encrypted_size;
936 _cleanup_free_ char *text = NULL;
937 const EVP_CIPHER *cc;
938
939 assert(cd);
940 assert(hr);
941 assert(volume_key);
942 assert(ret);
943
944 r = crypt_device_to_evp_cipher(cd, &cc);
945 if (r < 0)
946 return r;
947
948 key_size = EVP_CIPHER_key_length(cc);
949 iv_size = EVP_CIPHER_iv_length(cc);
950
951 if (iv_size > 0) {
952 iv = malloc(iv_size);
953 if (!iv)
954 return log_oom();
955
956 r = crypto_random_bytes(iv, iv_size);
957 if (r < 0)
958 return log_error_errno(r, "Failed to generate IV: %m");
959 }
960
961 context = EVP_CIPHER_CTX_new();
962 if (!context)
963 return log_oom();
964
965 if (EVP_EncryptInit_ex(context, cc, NULL, volume_key, iv) != 1)
966 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to initialize encryption context.");
967
968 r = json_variant_format(hr->json, 0, &text);
969 if (r < 0)
970 return log_error_errno(r, "Failed to format user record for LUKS: %m");
971
972 text_length = strlen(text);
973 encrypted_size = text_length + 2*key_size - 1;
974
975 encrypted = malloc(encrypted_size);
976 if (!encrypted)
977 return log_oom();
978
979 if (EVP_EncryptUpdate(context, encrypted, &encrypted_size_out1, (uint8_t*) text, text_length) != 1)
980 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to encrypt JSON record.");
981
982 assert((size_t) encrypted_size_out1 <= encrypted_size);
983
984 if (EVP_EncryptFinal_ex(context, (uint8_t*) encrypted + encrypted_size_out1, &encrypted_size_out2) != 1)
985 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to finish encryption of JSON record. ");
986
987 assert((size_t) encrypted_size_out1 + (size_t) encrypted_size_out2 <= encrypted_size);
988
989 r = json_build(&v,
990 JSON_BUILD_OBJECT(
991 JSON_BUILD_PAIR("type", JSON_BUILD_CONST_STRING("systemd-homed")),
992 JSON_BUILD_PAIR("keyslots", JSON_BUILD_EMPTY_ARRAY),
993 JSON_BUILD_PAIR("record", JSON_BUILD_BASE64(encrypted, encrypted_size_out1 + encrypted_size_out2)),
994 JSON_BUILD_PAIR("iv", JSON_BUILD_BASE64(iv, iv_size))));
995 if (r < 0)
996 return log_error_errno(r, "Failed to prepare LUKS JSON token object: %m");
997
998 r = json_variant_format(v, 0, ret);
999 if (r < 0)
1000 return log_error_errno(r, "Failed to format encrypted user record for LUKS: %m");
1001
1002 return 0;
1003 }
1004
1005 int home_store_header_identity_luks(
1006 UserRecord *h,
1007 HomeSetup *setup,
1008 UserRecord *old_home) {
1009
1010 _cleanup_(user_record_unrefp) UserRecord *header_home = NULL;
1011 _cleanup_free_ char *text = NULL;
1012 int r;
1013
1014 assert(h);
1015
1016 if (!setup->crypt_device)
1017 return 0;
1018
1019 assert(setup->volume_key);
1020
1021 /* Let's store the user's identity record in the LUKS2 "token" header data fields, in an encrypted
1022 * fashion. Why that? If we'd rely on the record being embedded in the payload file system itself we
1023 * would have to mount the file system before we can validate the JSON record, its signatures and
1024 * whether it matches what we are looking for. However, kernel file system implementations are
1025 * generally not ready to be used on untrusted media. Hence let's store the record independently of
1026 * the file system, so that we can validate it first, and only then mount the file system. To keep
1027 * things simple we use the same encryption settings for this record as for the file system itself. */
1028
1029 r = user_record_clone(h, USER_RECORD_EXTRACT_EMBEDDED|USER_RECORD_PERMISSIVE, &header_home);
1030 if (r < 0)
1031 return log_error_errno(r, "Failed to determine new header record: %m");
1032
1033 if (old_home && user_record_equal(old_home, header_home)) {
1034 log_debug("Not updating header home record.");
1035 return 0;
1036 }
1037
1038 r = format_luks_token_text(setup->crypt_device, header_home, setup->volume_key, &text);
1039 if (r < 0)
1040 return r;
1041
1042 for (int token = 0; token < sym_crypt_token_max(CRYPT_LUKS2); token++) {
1043 crypt_token_info state;
1044 const char *type;
1045
1046 state = sym_crypt_token_status(setup->crypt_device, token, &type);
1047 if (state == CRYPT_TOKEN_INACTIVE) /* First unconfigured token, we are done */
1048 break;
1049 if (IN_SET(state, CRYPT_TOKEN_INTERNAL, CRYPT_TOKEN_INTERNAL_UNKNOWN, CRYPT_TOKEN_EXTERNAL))
1050 continue; /* Not ours */
1051 if (state != CRYPT_TOKEN_EXTERNAL_UNKNOWN)
1052 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unexpected token state of token %i: %i", token, (int) state);
1053
1054 if (!streq(type, "systemd-homed"))
1055 continue;
1056
1057 r = sym_crypt_token_json_set(setup->crypt_device, token, text);
1058 if (r < 0)
1059 return log_error_errno(r, "Failed to set JSON token for slot %i: %m", token);
1060
1061 /* Now, let's free the text so that for all further matching tokens we all crypt_json_token_set()
1062 * with a NULL text in order to invalidate the tokens. */
1063 text = mfree(text);
1064 }
1065
1066 if (text)
1067 return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Didn't find any record token to update.");
1068
1069 log_info("Wrote LUKS header user record.");
1070
1071 return 1;
1072 }
1073
1074 int run_fitrim(int root_fd) {
1075 struct fstrim_range range = {
1076 .len = UINT64_MAX,
1077 };
1078
1079 /* If discarding is on, discard everything right after mounting, so that the discard setting takes
1080 * effect on activation. (Also, optionally, trim on logout) */
1081
1082 assert(root_fd >= 0);
1083
1084 if (ioctl(root_fd, FITRIM, &range) < 0) {
1085 if (ERRNO_IS_NOT_SUPPORTED(errno) || errno == EBADF) {
1086 log_debug_errno(errno, "File system does not support FITRIM, not trimming.");
1087 return 0;
1088 }
1089
1090 return log_warning_errno(errno, "Failed to invoke FITRIM, ignoring: %m");
1091 }
1092
1093 log_info("Discarded unused %s.", FORMAT_BYTES(range.len));
1094 return 1;
1095 }
1096
1097 int run_fallocate(int backing_fd, const struct stat *st) {
1098 struct stat stbuf;
1099
1100 assert(backing_fd >= 0);
1101
1102 /* If discarding is off, let's allocate the whole image before mounting, so that the setting takes
1103 * effect on activation */
1104
1105 if (!st) {
1106 if (fstat(backing_fd, &stbuf) < 0)
1107 return log_error_errno(errno, "Failed to fstat(): %m");
1108
1109 st = &stbuf;
1110 }
1111
1112 if (!S_ISREG(st->st_mode))
1113 return 0;
1114
1115 if (st->st_blocks >= DIV_ROUND_UP(st->st_size, 512)) {
1116 log_info("Backing file is fully allocated already.");
1117 return 0;
1118 }
1119
1120 if (fallocate(backing_fd, FALLOC_FL_KEEP_SIZE, 0, st->st_size) < 0) {
1121
1122 if (ERRNO_IS_NOT_SUPPORTED(errno)) {
1123 log_debug_errno(errno, "fallocate() not supported on file system, ignoring.");
1124 return 0;
1125 }
1126
1127 if (ERRNO_IS_DISK_SPACE(errno)) {
1128 log_debug_errno(errno, "Not enough disk space to fully allocate home.");
1129 return -ENOSPC; /* make recognizable */
1130 }
1131
1132 return log_error_errno(errno, "Failed to allocate backing file blocks: %m");
1133 }
1134
1135 log_info("Allocated additional %s.",
1136 FORMAT_BYTES((DIV_ROUND_UP(st->st_size, 512) - st->st_blocks) * 512));
1137 return 1;
1138 }
1139
1140 int run_fallocate_by_path(const char *backing_path) {
1141 _cleanup_close_ int backing_fd = -EBADF;
1142
1143 backing_fd = open(backing_path, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
1144 if (backing_fd < 0)
1145 return log_error_errno(errno, "Failed to open '%s' for fallocate(): %m", backing_path);
1146
1147 return run_fallocate(backing_fd, NULL);
1148 }
1149
1150 static int lock_image_fd(int image_fd, const char *ip) {
1151 int r;
1152
1153 /* If the $SYSTEMD_LUKS_LOCK environment variable is set we'll take an exclusive BSD lock on the
1154 * image file, and send it to our parent. homed will keep it open to ensure no other instance of
1155 * homed (across the network or such) will also mount the file. */
1156
1157 assert(image_fd >= 0);
1158 assert(ip);
1159
1160 r = getenv_bool("SYSTEMD_LUKS_LOCK");
1161 if (r == -ENXIO)
1162 return 0;
1163 if (r < 0)
1164 return log_error_errno(r, "Failed to parse $SYSTEMD_LUKS_LOCK environment variable: %m");
1165 if (r == 0)
1166 return 0;
1167
1168 if (flock(image_fd, LOCK_EX|LOCK_NB) < 0) {
1169
1170 if (errno == EAGAIN)
1171 log_error_errno(errno, "Image file '%s' already locked, can't use.", ip);
1172 else
1173 log_error_errno(errno, "Failed to lock image file '%s': %m", ip);
1174
1175 return errno != EAGAIN ? -errno : -EADDRINUSE; /* Make error recognizable */
1176 }
1177
1178 log_info("Successfully locked image file '%s'.", ip);
1179
1180 /* Now send it to our parent to keep safe while the home dir is active */
1181 r = sd_pid_notify_with_fds(0, false, "SYSTEMD_LUKS_LOCK_FD=1", &image_fd, 1);
1182 if (r < 0)
1183 log_warning_errno(r, "Failed to send LUKS lock fd to parent, ignoring: %m");
1184
1185 return 0;
1186 }
1187
1188 static int open_image_file(
1189 UserRecord *h,
1190 const char *force_image_path,
1191 struct stat *ret_stat) {
1192
1193 _cleanup_close_ int image_fd = -EBADF;
1194 struct stat st;
1195 const char *ip;
1196 int r;
1197
1198 assert(h || force_image_path);
1199
1200 ip = force_image_path ?: user_record_image_path(h);
1201
1202 image_fd = open(ip, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
1203 if (image_fd < 0)
1204 return log_error_errno(errno, "Failed to open image file %s: %m", ip);
1205
1206 if (fstat(image_fd, &st) < 0)
1207 return log_error_errno(errno, "Failed to fstat() image file: %m");
1208 if (!S_ISREG(st.st_mode) && !S_ISBLK(st.st_mode))
1209 return log_error_errno(
1210 S_ISDIR(st.st_mode) ? SYNTHETIC_ERRNO(EISDIR) : SYNTHETIC_ERRNO(EBADFD),
1211 "Image file %s is not a regular file or block device: %m", ip);
1212
1213 /* Locking block devices doesn't really make sense, as this might interfere with
1214 * udev's workings, and these locks aren't network propagated anyway, hence not what
1215 * we are after here. */
1216 if (S_ISREG(st.st_mode)) {
1217 r = lock_image_fd(image_fd, ip);
1218 if (r < 0)
1219 return r;
1220 }
1221
1222 if (ret_stat)
1223 *ret_stat = st;
1224
1225 return TAKE_FD(image_fd);
1226 }
1227
1228 int home_setup_luks(
1229 UserRecord *h,
1230 HomeSetupFlags flags,
1231 const char *force_image_path,
1232 HomeSetup *setup,
1233 PasswordCache *cache,
1234 UserRecord **ret_luks_home) {
1235
1236 sd_id128_t found_partition_uuid, found_fs_uuid = SD_ID128_NULL, found_luks_uuid = SD_ID128_NULL;
1237 _cleanup_(user_record_unrefp) UserRecord *luks_home = NULL;
1238 _cleanup_(erase_and_freep) void *volume_key = NULL;
1239 size_t volume_key_size = 0;
1240 uint64_t offset, size;
1241 struct stat st;
1242 int r;
1243
1244 assert(h);
1245 assert(setup);
1246 assert(user_record_storage(h) == USER_LUKS);
1247
1248 r = dlopen_cryptsetup();
1249 if (r < 0)
1250 return r;
1251
1252 r = make_dm_names(h, setup);
1253 if (r < 0)
1254 return r;
1255
1256 /* Reuse the image fd if it has already been opened by an earlier step */
1257 if (setup->image_fd < 0) {
1258 setup->image_fd = open_image_file(h, force_image_path, &st);
1259 if (setup->image_fd < 0)
1260 return setup->image_fd;
1261 } else if (fstat(setup->image_fd, &st) < 0)
1262 return log_error_errno(errno, "Failed to stat image: %m");
1263
1264 if (FLAGS_SET(flags, HOME_SETUP_ALREADY_ACTIVATED)) {
1265 struct loop_info64 info;
1266 const char *n;
1267
1268 if (!setup->crypt_device) {
1269 r = luks_open(h,
1270 setup,
1271 cache,
1272 &found_luks_uuid,
1273 &volume_key,
1274 &volume_key_size);
1275 if (r < 0)
1276 return r;
1277 }
1278
1279 if (ret_luks_home) {
1280 r = luks_validate_home_record(setup->crypt_device, h, volume_key, cache, &luks_home);
1281 if (r < 0)
1282 return r;
1283 }
1284
1285 n = sym_crypt_get_device_name(setup->crypt_device);
1286 if (!n)
1287 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine backing device for DM %s.", setup->dm_name);
1288
1289 if (!setup->loop) {
1290 r = loop_device_open_from_path(n, O_RDWR, LOCK_UN, &setup->loop);
1291 if (r < 0)
1292 return log_error_errno(r, "Failed to open loopback device %s: %m", n);
1293 }
1294
1295 if (ioctl(setup->loop->fd, LOOP_GET_STATUS64, &info) < 0) {
1296 _cleanup_free_ char *sysfs = NULL;
1297
1298 if (!IN_SET(errno, ENOTTY, EINVAL))
1299 return log_error_errno(errno, "Failed to get block device metrics of %s: %m", n);
1300
1301 if (fstat(setup->loop->fd, &st) < 0)
1302 return log_error_errno(r, "Failed to stat block device %s: %m", n);
1303 assert(S_ISBLK(st.st_mode));
1304
1305 if (asprintf(&sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/partition", DEVNUM_FORMAT_VAL(st.st_rdev)) < 0)
1306 return log_oom();
1307
1308 if (access(sysfs, F_OK) < 0) {
1309 if (errno != ENOENT)
1310 return log_error_errno(errno, "Failed to determine whether %s exists: %m", sysfs);
1311
1312 offset = 0;
1313 } else {
1314 _cleanup_free_ char *buffer = NULL;
1315
1316 if (asprintf(&sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/start", DEVNUM_FORMAT_VAL(st.st_rdev)) < 0)
1317 return log_oom();
1318
1319 r = read_one_line_file(sysfs, &buffer);
1320 if (r < 0)
1321 return log_error_errno(r, "Failed to read partition start offset: %m");
1322
1323 r = safe_atou64(buffer, &offset);
1324 if (r < 0)
1325 return log_error_errno(r, "Failed to parse partition start offset: %m");
1326
1327 if (offset > UINT64_MAX / 512U)
1328 return log_error_errno(SYNTHETIC_ERRNO(E2BIG), "Offset too large for 64 byte range, refusing.");
1329
1330 offset *= 512U;
1331 }
1332
1333 size = setup->loop->device_size;
1334 } else {
1335 #if HAVE_VALGRIND_MEMCHECK_H
1336 VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
1337 #endif
1338
1339 offset = info.lo_offset;
1340 size = info.lo_sizelimit;
1341 }
1342
1343 found_partition_uuid = found_fs_uuid = SD_ID128_NULL;
1344
1345 log_info("Discovered used loopback device %s.", setup->loop->node);
1346
1347 if (setup->root_fd < 0) {
1348 setup->root_fd = open(user_record_home_directory(h), O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1349 if (setup->root_fd < 0)
1350 return log_error_errno(errno, "Failed to open home directory: %m");
1351 }
1352 } else {
1353 _cleanup_free_ char *fstype = NULL, *subdir = NULL;
1354 const char *ip;
1355
1356 /* When we aren't reopening the home directory we are allocating it fresh, hence the relevant
1357 * objects can't be allocated yet. */
1358 assert(setup->root_fd < 0);
1359 assert(!setup->crypt_device);
1360 assert(!setup->loop);
1361
1362 ip = force_image_path ?: user_record_image_path(h);
1363
1364 subdir = path_join(HOME_RUNTIME_WORK_DIR, user_record_user_name_and_realm(h));
1365 if (!subdir)
1366 return log_oom();
1367
1368 r = luks_validate(setup->image_fd, user_record_user_name_and_realm(h), h->partition_uuid, &found_partition_uuid, &offset, &size);
1369 if (r < 0)
1370 return log_error_errno(r, "Failed to validate disk label: %m");
1371
1372 /* Everything before this point left the image untouched. We are now starting to make
1373 * changes, hence mark the image dirty */
1374 if (run_mark_dirty(setup->image_fd, true) > 0)
1375 setup->do_mark_clean = true;
1376
1377 if (!user_record_luks_discard(h)) {
1378 r = run_fallocate(setup->image_fd, &st);
1379 if (r < 0)
1380 return r;
1381 }
1382
1383 r = loop_device_make(
1384 setup->image_fd,
1385 O_RDWR,
1386 offset,
1387 size,
1388 h->luks_sector_size == UINT64_MAX ? UINT32_MAX : user_record_luks_sector_size(h), /* if sector size is not specified, select UINT32_MAX, i.e. auto-probe */
1389 /* loop_flags= */ 0,
1390 LOCK_UN,
1391 &setup->loop);
1392 if (r == -ENOENT) {
1393 log_error_errno(r, "Loopback block device support is not available on this system.");
1394 return -ENOLINK; /* make recognizable */
1395 }
1396 if (r < 0)
1397 return log_error_errno(r, "Failed to allocate loopback context: %m");
1398
1399 log_info("Setting up loopback device %s completed.", setup->loop->node ?: ip);
1400
1401 r = luks_setup(h,
1402 setup->loop->node ?: ip,
1403 setup->dm_name,
1404 h->luks_uuid,
1405 h->luks_cipher,
1406 h->luks_cipher_mode,
1407 h->luks_volume_key_size,
1408 h->password,
1409 cache,
1410 user_record_luks_discard(h) || user_record_luks_offline_discard(h),
1411 &setup->crypt_device,
1412 &found_luks_uuid,
1413 &volume_key,
1414 &volume_key_size,
1415 &setup->key_serial);
1416 if (r < 0)
1417 return r;
1418
1419 setup->undo_dm = true;
1420
1421 if (ret_luks_home) {
1422 r = luks_validate_home_record(setup->crypt_device, h, volume_key, cache, &luks_home);
1423 if (r < 0)
1424 return r;
1425 }
1426
1427 r = fs_validate(setup->dm_node, h->file_system_uuid, &fstype, &found_fs_uuid);
1428 if (r < 0)
1429 return r;
1430
1431 r = run_fsck(setup->dm_node, fstype);
1432 if (r < 0)
1433 return r;
1434
1435 r = home_unshare_and_mount(setup->dm_node, fstype, user_record_luks_discard(h), user_record_mount_flags(h), h->luks_extra_mount_options);
1436 if (r < 0)
1437 return r;
1438
1439 setup->undo_mount = true;
1440
1441 setup->root_fd = open(subdir, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1442 if (setup->root_fd < 0)
1443 return log_error_errno(errno, "Failed to open home directory: %m");
1444
1445 if (user_record_luks_discard(h))
1446 (void) run_fitrim(setup->root_fd);
1447
1448 setup->do_offline_fallocate = !(setup->do_offline_fitrim = user_record_luks_offline_discard(h));
1449 }
1450
1451 if (!sd_id128_is_null(found_partition_uuid))
1452 setup->found_partition_uuid = found_partition_uuid;
1453 if (!sd_id128_is_null(found_luks_uuid))
1454 setup->found_luks_uuid = found_luks_uuid;
1455 if (!sd_id128_is_null(found_fs_uuid))
1456 setup->found_fs_uuid = found_fs_uuid;
1457
1458 setup->partition_offset = offset;
1459 setup->partition_size = size;
1460
1461 if (volume_key) {
1462 erase_and_free(setup->volume_key);
1463 setup->volume_key = TAKE_PTR(volume_key);
1464 setup->volume_key_size = volume_key_size;
1465 }
1466
1467 if (ret_luks_home)
1468 *ret_luks_home = TAKE_PTR(luks_home);
1469
1470 return 0;
1471 }
1472
1473 static void print_size_summary(uint64_t host_size, uint64_t encrypted_size, const struct statfs *sfs) {
1474 assert(sfs);
1475
1476 log_info("Image size is %s, file system size is %s, file system payload size is %s, file system free is %s.",
1477 FORMAT_BYTES(host_size),
1478 FORMAT_BYTES(encrypted_size),
1479 FORMAT_BYTES((uint64_t) sfs->f_blocks * (uint64_t) sfs->f_frsize),
1480 FORMAT_BYTES((uint64_t) sfs->f_bfree * (uint64_t) sfs->f_frsize));
1481 }
1482
1483 static int home_auto_grow_luks(
1484 UserRecord *h,
1485 HomeSetup *setup,
1486 PasswordCache *cache) {
1487
1488 struct statfs sfs;
1489
1490 assert(h);
1491 assert(setup);
1492
1493 if (!IN_SET(user_record_auto_resize_mode(h), AUTO_RESIZE_GROW, AUTO_RESIZE_SHRINK_AND_GROW))
1494 return 0;
1495
1496 assert(setup->root_fd >= 0);
1497
1498 if (fstatfs(setup->root_fd, &sfs) < 0)
1499 return log_error_errno(errno, "Failed to statfs home directory: %m");
1500
1501 if (!fs_can_online_shrink_and_grow(sfs.f_type)) {
1502 log_debug("Not auto-grow file system, since selected file system cannot do both online shrink and grow.");
1503 return 0;
1504 }
1505
1506 log_debug("Initiating auto-grow...");
1507
1508 return home_resize_luks(
1509 h,
1510 HOME_SETUP_ALREADY_ACTIVATED|
1511 HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES|
1512 HOME_SETUP_RESIZE_DONT_SHRINK|
1513 HOME_SETUP_RESIZE_DONT_UNDO,
1514 setup,
1515 cache,
1516 NULL);
1517 }
1518
1519 int home_activate_luks(
1520 UserRecord *h,
1521 HomeSetupFlags flags,
1522 HomeSetup *setup,
1523 PasswordCache *cache,
1524 UserRecord **ret_home) {
1525
1526 _cleanup_(user_record_unrefp) UserRecord *new_home = NULL, *luks_home_record = NULL;
1527 uint64_t host_size, encrypted_size;
1528 const char *hdo, *hd;
1529 struct statfs sfs;
1530 int r;
1531
1532 assert(h);
1533 assert(user_record_storage(h) == USER_LUKS);
1534 assert(setup);
1535 assert(ret_home);
1536
1537 r = dlopen_cryptsetup();
1538 if (r < 0)
1539 return r;
1540
1541 assert_se(hdo = user_record_home_directory(h));
1542 hd = strdupa_safe(hdo); /* copy the string out, since it might change later in the home record object */
1543
1544 r = home_get_state_luks(h, setup);
1545 if (r < 0)
1546 return r;
1547 if (r > 0)
1548 return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Device mapper device %s already exists, refusing.", setup->dm_node);
1549
1550 r = home_setup_luks(
1551 h,
1552 0,
1553 NULL,
1554 setup,
1555 cache,
1556 &luks_home_record);
1557 if (r < 0)
1558 return r;
1559
1560 r = home_auto_grow_luks(h, setup, cache);
1561 if (r < 0)
1562 return r;
1563
1564 r = block_get_size_by_fd(setup->loop->fd, &host_size);
1565 if (r < 0)
1566 return log_error_errno(r, "Failed to get loopback block device size: %m");
1567
1568 r = block_get_size_by_path(setup->dm_node, &encrypted_size);
1569 if (r < 0)
1570 return log_error_errno(r, "Failed to get LUKS block device size: %m");
1571
1572 r = home_refresh(
1573 h,
1574 flags,
1575 setup,
1576 luks_home_record,
1577 cache,
1578 &sfs,
1579 &new_home);
1580 if (r < 0)
1581 return r;
1582
1583 r = home_extend_embedded_identity(new_home, h, setup);
1584 if (r < 0)
1585 return r;
1586
1587 setup->root_fd = safe_close(setup->root_fd);
1588
1589 r = home_move_mount(user_record_user_name_and_realm(h), hd);
1590 if (r < 0)
1591 return r;
1592
1593 setup->undo_mount = false;
1594 setup->do_offline_fitrim = false;
1595
1596 loop_device_relinquish(setup->loop);
1597
1598 r = sym_crypt_deactivate_by_name(NULL, setup->dm_name, CRYPT_DEACTIVATE_DEFERRED);
1599 if (r < 0)
1600 log_warning_errno(r, "Failed to relinquish DM device, ignoring: %m");
1601
1602 setup->undo_dm = false;
1603 setup->do_offline_fallocate = false;
1604 setup->do_mark_clean = false;
1605 setup->do_drop_caches = false;
1606 TAKE_KEY_SERIAL(setup->key_serial); /* Leave key in kernel keyring */
1607
1608 log_info("Activation completed.");
1609
1610 print_size_summary(host_size, encrypted_size, &sfs);
1611
1612 *ret_home = TAKE_PTR(new_home);
1613 return 1;
1614 }
1615
1616 int home_deactivate_luks(UserRecord *h, HomeSetup *setup) {
1617 bool we_detached = false;
1618 int r;
1619
1620 assert(h);
1621 assert(setup);
1622
1623 /* Note that the DM device and loopback device are set to auto-detach, hence strictly speaking we
1624 * don't have to explicitly have to detach them. However, we do that nonetheless (in case of the DM
1625 * device), to avoid races: by explicitly detaching them we know when the detaching is complete. We
1626 * don't bother about the loopback device because unlike the DM device it doesn't have a fixed
1627 * name. */
1628
1629 if (!setup->crypt_device) {
1630 r = acquire_open_luks_device(h, setup, /* graceful= */ true);
1631 if (r < 0)
1632 return log_error_errno(r, "Failed to initialize cryptsetup context for %s: %m", setup->dm_name);
1633 if (r == 0)
1634 log_debug("LUKS device %s has already been detached.", setup->dm_name);
1635 }
1636
1637 if (setup->crypt_device) {
1638 log_info("Discovered used LUKS device %s.", setup->dm_node);
1639
1640 cryptsetup_enable_logging(setup->crypt_device);
1641
1642 r = sym_crypt_deactivate_by_name(setup->crypt_device, setup->dm_name, 0);
1643 if (ERRNO_IS_NEG_DEVICE_ABSENT(r) || r == -EINVAL)
1644 log_debug_errno(r, "LUKS device %s is already detached.", setup->dm_node);
1645 else if (r < 0)
1646 return log_info_errno(r, "LUKS device %s couldn't be deactivated: %m", setup->dm_node);
1647 else {
1648 log_info("LUKS device detaching completed.");
1649 we_detached = true;
1650 }
1651 }
1652
1653 (void) wait_for_block_device_gone(setup, USEC_PER_SEC * 30);
1654 setup->undo_dm = false;
1655
1656 if (user_record_luks_offline_discard(h))
1657 log_debug("Not allocating on logout.");
1658 else
1659 (void) run_fallocate_by_path(user_record_image_path(h));
1660
1661 run_mark_dirty_by_path(user_record_image_path(h), false);
1662 return we_detached;
1663 }
1664
1665 int home_trim_luks(UserRecord *h, HomeSetup *setup) {
1666 assert(h);
1667 assert(setup);
1668 assert(setup->root_fd >= 0);
1669
1670 if (!user_record_luks_offline_discard(h)) {
1671 log_debug("Not trimming on logout.");
1672 return 0;
1673 }
1674
1675 (void) run_fitrim(setup->root_fd);
1676 return 0;
1677 }
1678
1679 static struct crypt_pbkdf_type* build_good_pbkdf(struct crypt_pbkdf_type *buffer, UserRecord *hr) {
1680 assert(buffer);
1681 assert(hr);
1682
1683 bool benchmark = user_record_luks_pbkdf_force_iterations(hr) == UINT64_MAX;
1684
1685 *buffer = (struct crypt_pbkdf_type) {
1686 .hash = user_record_luks_pbkdf_hash_algorithm(hr),
1687 .type = user_record_luks_pbkdf_type(hr),
1688 .time_ms = benchmark ? user_record_luks_pbkdf_time_cost_usec(hr) / USEC_PER_MSEC : 0,
1689 .iterations = benchmark ? 0 : user_record_luks_pbkdf_force_iterations(hr),
1690 .max_memory_kb = user_record_luks_pbkdf_memory_cost(hr) / 1024,
1691 .parallel_threads = user_record_luks_pbkdf_parallel_threads(hr),
1692 .flags = benchmark ? 0 : CRYPT_PBKDF_NO_BENCHMARK,
1693 };
1694
1695 return buffer;
1696 }
1697
1698 static struct crypt_pbkdf_type* build_minimal_pbkdf(struct crypt_pbkdf_type *buffer, UserRecord *hr) {
1699 assert(buffer);
1700 assert(hr);
1701
1702 /* For PKCS#11 derived keys (which are generated randomly and are of high quality already) we use a
1703 * minimal PBKDF and CRYPT_PBKDF_NO_BENCHMARK flag to skip benchmark. */
1704 *buffer = (struct crypt_pbkdf_type) {
1705 .hash = user_record_luks_pbkdf_hash_algorithm(hr),
1706 .type = CRYPT_KDF_PBKDF2,
1707 .iterations = 1000, /* recommended minimum count for pbkdf2
1708 * according to NIST SP 800-132, ch. 5.2 */
1709 .flags = CRYPT_PBKDF_NO_BENCHMARK
1710 };
1711
1712 return buffer;
1713 }
1714
1715 static int luks_format(
1716 const char *node,
1717 const char *dm_name,
1718 sd_id128_t uuid,
1719 const char *label,
1720 const PasswordCache *cache,
1721 char **effective_passwords,
1722 bool discard,
1723 UserRecord *hr,
1724 struct crypt_device **ret) {
1725
1726 _cleanup_(user_record_unrefp) UserRecord *reduced = NULL;
1727 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
1728 _cleanup_(erase_and_freep) void *volume_key = NULL;
1729 struct crypt_pbkdf_type good_pbkdf, minimal_pbkdf;
1730 _cleanup_free_ char *text = NULL;
1731 size_t volume_key_size;
1732 int slot = 0, r;
1733
1734 assert(node);
1735 assert(dm_name);
1736 assert(hr);
1737 assert(ret);
1738
1739 r = sym_crypt_init(&cd, node);
1740 if (r < 0)
1741 return log_error_errno(r, "Failed to allocate libcryptsetup context: %m");
1742
1743 cryptsetup_enable_logging(cd);
1744
1745 /* Normally we'd, just leave volume key generation to libcryptsetup. However, we can't, since we
1746 * can't extract the volume key from the library again, but we need it in order to encrypt the JSON
1747 * record. Hence, let's generate it on our own, so that we can keep track of it. */
1748
1749 volume_key_size = user_record_luks_volume_key_size(hr);
1750 volume_key = malloc(volume_key_size);
1751 if (!volume_key)
1752 return log_oom();
1753
1754 r = crypto_random_bytes(volume_key, volume_key_size);
1755 if (r < 0)
1756 return log_error_errno(r, "Failed to generate volume key: %m");
1757
1758 #if HAVE_CRYPT_SET_METADATA_SIZE
1759 /* Increase the metadata space to 4M, the largest LUKS2 supports */
1760 r = sym_crypt_set_metadata_size(cd, 4096U*1024U, 0);
1761 if (r < 0)
1762 return log_error_errno(r, "Failed to change LUKS2 metadata size: %m");
1763 #endif
1764
1765 build_good_pbkdf(&good_pbkdf, hr);
1766 build_minimal_pbkdf(&minimal_pbkdf, hr);
1767
1768 r = sym_crypt_format(
1769 cd,
1770 CRYPT_LUKS2,
1771 user_record_luks_cipher(hr),
1772 user_record_luks_cipher_mode(hr),
1773 SD_ID128_TO_UUID_STRING(uuid),
1774 volume_key,
1775 volume_key_size,
1776 &(struct crypt_params_luks2) {
1777 .label = label,
1778 .subsystem = "systemd-home",
1779 .sector_size = user_record_luks_sector_size(hr),
1780 .pbkdf = &good_pbkdf,
1781 });
1782 if (r < 0)
1783 return log_error_errno(r, "Failed to format LUKS image: %m");
1784
1785 log_info("LUKS formatting completed.");
1786
1787 STRV_FOREACH(pp, effective_passwords) {
1788
1789 if (password_cache_contains(cache, *pp)) { /* is this a fido2 or pkcs11 password? */
1790 log_debug("Using minimal PBKDF for slot %i", slot);
1791 r = sym_crypt_set_pbkdf_type(cd, &minimal_pbkdf);
1792 } else {
1793 log_debug("Using good PBKDF for slot %i", slot);
1794 r = sym_crypt_set_pbkdf_type(cd, &good_pbkdf);
1795 }
1796 if (r < 0)
1797 return log_error_errno(r, "Failed to tweak PBKDF for slot %i: %m", slot);
1798
1799 r = sym_crypt_keyslot_add_by_volume_key(
1800 cd,
1801 slot,
1802 volume_key,
1803 volume_key_size,
1804 *pp,
1805 strlen(*pp));
1806 if (r < 0)
1807 return log_error_errno(r, "Failed to set up LUKS password for slot %i: %m", slot);
1808
1809 log_info("Writing password to LUKS keyslot %i completed.", slot);
1810 slot++;
1811 }
1812
1813 r = sym_crypt_activate_by_volume_key(
1814 cd,
1815 dm_name,
1816 volume_key,
1817 volume_key_size,
1818 discard ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0);
1819 if (r < 0)
1820 return log_error_errno(r, "Failed to activate LUKS superblock: %m");
1821
1822 log_info("LUKS activation by volume key succeeded.");
1823
1824 r = user_record_clone(hr, USER_RECORD_EXTRACT_EMBEDDED|USER_RECORD_PERMISSIVE, &reduced);
1825 if (r < 0)
1826 return log_error_errno(r, "Failed to prepare home record for LUKS: %m");
1827
1828 r = format_luks_token_text(cd, reduced, volume_key, &text);
1829 if (r < 0)
1830 return r;
1831
1832 r = sym_crypt_token_json_set(cd, CRYPT_ANY_TOKEN, text);
1833 if (r < 0)
1834 return log_error_errno(r, "Failed to set LUKS JSON token: %m");
1835
1836 log_info("Writing user record as LUKS token completed.");
1837
1838 if (ret)
1839 *ret = TAKE_PTR(cd);
1840
1841 return 0;
1842 }
1843
1844 static int make_partition_table(
1845 int fd,
1846 uint32_t sector_size,
1847 const char *label,
1848 sd_id128_t uuid,
1849 uint64_t *ret_offset,
1850 uint64_t *ret_size,
1851 sd_id128_t *ret_disk_uuid) {
1852
1853 _cleanup_(fdisk_unref_partitionp) struct fdisk_partition *p = NULL, *q = NULL;
1854 _cleanup_(fdisk_unref_parttypep) struct fdisk_parttype *t = NULL;
1855 _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
1856 _cleanup_free_ char *disk_uuid_as_string = NULL;
1857 uint64_t offset, size, first_lba, start, last_lba, end;
1858 sd_id128_t disk_uuid;
1859 int r;
1860
1861 assert(fd >= 0);
1862 assert(label);
1863 assert(ret_offset);
1864 assert(ret_size);
1865
1866 t = fdisk_new_parttype();
1867 if (!t)
1868 return log_oom();
1869
1870 r = fdisk_parttype_set_typestr(t, SD_GPT_USER_HOME_STR);
1871 if (r < 0)
1872 return log_error_errno(r, "Failed to initialize partition type: %m");
1873
1874 r = fdisk_new_context_at(fd, /* path= */ NULL, /* read_only= */ false, sector_size, &c);
1875 if (r < 0)
1876 return log_error_errno(r, "Failed to open device: %m");
1877
1878 r = fdisk_create_disklabel(c, "gpt");
1879 if (r < 0)
1880 return log_error_errno(r, "Failed to create GPT disk label: %m");
1881
1882 p = fdisk_new_partition();
1883 if (!p)
1884 return log_oom();
1885
1886 r = fdisk_partition_set_type(p, t);
1887 if (r < 0)
1888 return log_error_errno(r, "Failed to set partition type: %m");
1889
1890 r = fdisk_partition_partno_follow_default(p, 1);
1891 if (r < 0)
1892 return log_error_errno(r, "Failed to place partition at first free partition index: %m");
1893
1894 first_lba = fdisk_get_first_lba(c); /* Boundary where usable space starts */
1895 assert(first_lba <= UINT64_MAX/512);
1896 start = DISK_SIZE_ROUND_UP(first_lba * 512); /* Round up to multiple of 4K */
1897
1898 log_debug("Starting partition at offset %" PRIu64, start);
1899
1900 if (start == UINT64_MAX)
1901 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Overflow while rounding up start LBA.");
1902
1903 last_lba = fdisk_get_last_lba(c); /* One sector before boundary where usable space ends */
1904 assert(last_lba < UINT64_MAX/512);
1905 end = DISK_SIZE_ROUND_DOWN((last_lba + 1) * 512); /* Round down to multiple of 4K */
1906
1907 if (end <= start)
1908 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Resulting partition size zero or negative.");
1909
1910 r = fdisk_partition_set_start(p, start / 512);
1911 if (r < 0)
1912 return log_error_errno(r, "Failed to place partition at offset %" PRIu64 ": %m", start);
1913
1914 r = fdisk_partition_set_size(p, (end - start) / 512);
1915 if (r < 0)
1916 return log_error_errno(r, "Failed to end partition at offset %" PRIu64 ": %m", end);
1917
1918 r = fdisk_partition_set_name(p, label);
1919 if (r < 0)
1920 return log_error_errno(r, "Failed to set partition name: %m");
1921
1922 r = fdisk_partition_set_uuid(p, SD_ID128_TO_UUID_STRING(uuid));
1923 if (r < 0)
1924 return log_error_errno(r, "Failed to set partition UUID: %m");
1925
1926 r = fdisk_add_partition(c, p, NULL);
1927 if (r < 0)
1928 return log_error_errno(r, "Failed to add partition: %m");
1929
1930 r = fdisk_write_disklabel(c);
1931 if (r < 0)
1932 return log_error_errno(r, "Failed to write disk label: %m");
1933
1934 r = fdisk_get_disklabel_id(c, &disk_uuid_as_string);
1935 if (r < 0)
1936 return log_error_errno(r, "Failed to determine disk label UUID: %m");
1937
1938 r = sd_id128_from_string(disk_uuid_as_string, &disk_uuid);
1939 if (r < 0)
1940 return log_error_errno(r, "Failed to parse disk label UUID: %m");
1941
1942 r = fdisk_get_partition(c, 0, &q);
1943 if (r < 0)
1944 return log_error_errno(r, "Failed to read created partition metadata: %m");
1945
1946 assert(fdisk_partition_has_start(q));
1947 offset = fdisk_partition_get_start(q);
1948 if (offset > UINT64_MAX / 512U)
1949 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Partition offset too large.");
1950
1951 assert(fdisk_partition_has_size(q));
1952 size = fdisk_partition_get_size(q);
1953 if (size > UINT64_MAX / 512U)
1954 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Partition size too large.");
1955
1956 *ret_offset = offset * 512U;
1957 *ret_size = size * 512U;
1958 *ret_disk_uuid = disk_uuid;
1959
1960 return 0;
1961 }
1962
1963 static bool supported_fs_size(const char *fstype, uint64_t host_size) {
1964 uint64_t m;
1965
1966 m = minimal_size_by_fs_name(fstype);
1967 if (m == UINT64_MAX)
1968 return false;
1969
1970 return host_size >= m;
1971 }
1972
1973 static int wait_for_devlink(const char *path) {
1974 _cleanup_close_ int inotify_fd = -EBADF;
1975 usec_t until;
1976 int r;
1977
1978 /* let's wait for a device link to show up in /dev, with a timeout. This is good to do since we
1979 * return a /dev/disk/by-uuid/… link to our callers and they likely want to access it right-away,
1980 * hence let's wait until udev has caught up with our changes, and wait for the symlink to be
1981 * created. */
1982
1983 until = usec_add(now(CLOCK_MONOTONIC), 45 * USEC_PER_SEC);
1984
1985 for (;;) {
1986 _cleanup_free_ char *dn = NULL;
1987 usec_t w;
1988
1989 if (laccess(path, F_OK) < 0) {
1990 if (errno != ENOENT)
1991 return log_error_errno(errno, "Failed to determine whether %s exists: %m", path);
1992 } else
1993 return 0; /* Found it */
1994
1995 if (inotify_fd < 0) {
1996 /* We need to wait for the device symlink to show up, let's create an inotify watch for it */
1997 inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
1998 if (inotify_fd < 0)
1999 return log_error_errno(errno, "Failed to allocate inotify fd: %m");
2000 }
2001
2002 r = path_extract_directory(path, &dn);
2003 if (r < 0)
2004 return log_error_errno(r, "Failed to extract directory from device node path '%s': %m", path);
2005 for (;;) {
2006 _cleanup_free_ char *ndn = NULL;
2007
2008 log_info("Watching %s", dn);
2009
2010 if (inotify_add_watch(inotify_fd, dn, IN_CREATE|IN_MOVED_TO|IN_ONLYDIR|IN_DELETE_SELF|IN_MOVE_SELF) < 0) {
2011 if (errno != ENOENT)
2012 return log_error_errno(errno, "Failed to add watch on %s: %m", dn);
2013 } else
2014 break;
2015
2016 r = path_extract_directory(dn, &ndn);
2017 if (r == -EADDRNOTAVAIL) /* Arrived at the top? */
2018 break;
2019 if (r < 0)
2020 return log_error_errno(r, "Failed to extract directory from device node path '%s': %m", dn);
2021
2022 free_and_replace(dn, ndn);
2023 }
2024
2025 w = now(CLOCK_MONOTONIC);
2026 if (w >= until)
2027 return log_error_errno(SYNTHETIC_ERRNO(ETIMEDOUT), "Device link %s still hasn't shown up, giving up.", path);
2028
2029 r = fd_wait_for_event(inotify_fd, POLLIN, until - w);
2030 if (ERRNO_IS_NEG_TRANSIENT(r))
2031 continue;
2032 if (r < 0)
2033 return log_error_errno(r, "Failed to watch inotify: %m");
2034
2035 (void) flush_fd(inotify_fd);
2036 }
2037 }
2038
2039 static int calculate_initial_image_size(UserRecord *h, int image_fd, const char *fstype, uint64_t *ret) {
2040 uint64_t upper_boundary, lower_boundary;
2041 struct statfs sfs;
2042
2043 assert(h);
2044 assert(image_fd >= 0);
2045 assert(ret);
2046
2047 if (fstatfs(image_fd, &sfs) < 0)
2048 return log_error_errno(errno, "statfs() on image failed: %m");
2049
2050 upper_boundary = DISK_SIZE_ROUND_DOWN((uint64_t) sfs.f_bsize * sfs.f_bavail);
2051
2052 if (h->disk_size != UINT64_MAX)
2053 *ret = MIN(DISK_SIZE_ROUND_DOWN(h->disk_size), upper_boundary);
2054 else if (h->disk_size_relative == UINT64_MAX) {
2055
2056 if (upper_boundary > UINT64_MAX / USER_DISK_SIZE_DEFAULT_PERCENT)
2057 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Disk size too large.");
2058
2059 *ret = DISK_SIZE_ROUND_DOWN(upper_boundary * USER_DISK_SIZE_DEFAULT_PERCENT / 100);
2060
2061 log_info("Sizing home to %u%% of available disk space, which is %s.",
2062 USER_DISK_SIZE_DEFAULT_PERCENT,
2063 FORMAT_BYTES(*ret));
2064 } else {
2065 *ret = DISK_SIZE_ROUND_DOWN((uint64_t) ((double) upper_boundary * (double) CLAMP(h->disk_size_relative, 0U, UINT32_MAX) / (double) UINT32_MAX));
2066
2067 log_info("Sizing home to %" PRIu64 ".%01" PRIu64 "%% of available disk space, which is %s.",
2068 (h->disk_size_relative * 100) / UINT32_MAX,
2069 ((h->disk_size_relative * 1000) / UINT32_MAX) % 10,
2070 FORMAT_BYTES(*ret));
2071 }
2072
2073 lower_boundary = minimal_size_by_fs_name(fstype);
2074 if (lower_boundary != UINT64_MAX) {
2075 assert(GPT_LUKS2_OVERHEAD < UINT64_MAX - lower_boundary);
2076 lower_boundary += GPT_LUKS2_OVERHEAD;
2077 }
2078 if (lower_boundary == UINT64_MAX || lower_boundary < USER_DISK_SIZE_MIN)
2079 lower_boundary = USER_DISK_SIZE_MIN;
2080
2081 if (*ret < lower_boundary)
2082 *ret = lower_boundary;
2083
2084 return 0;
2085 }
2086
2087 static int home_truncate(
2088 UserRecord *h,
2089 int fd,
2090 uint64_t size) {
2091
2092 bool trunc;
2093 int r;
2094
2095 assert(h);
2096 assert(fd >= 0);
2097
2098 trunc = user_record_luks_discard(h);
2099 if (!trunc) {
2100 r = fallocate(fd, 0, 0, size);
2101 if (r < 0 && ERRNO_IS_NOT_SUPPORTED(errno)) {
2102 /* Some file systems do not support fallocate(), let's gracefully degrade
2103 * (ZFS, reiserfs, …) and fall back to truncation */
2104 log_notice_errno(errno, "Backing file system does not support fallocate(), falling back to ftruncate(), i.e. implicitly using non-discard mode.");
2105 trunc = true;
2106 }
2107 }
2108
2109 if (trunc)
2110 r = ftruncate(fd, size);
2111
2112 if (r < 0) {
2113 if (ERRNO_IS_DISK_SPACE(errno)) {
2114 log_debug_errno(errno, "Not enough disk space to allocate home of size %s.", FORMAT_BYTES(size));
2115 return -ENOSPC; /* make recognizable */
2116 }
2117
2118 return log_error_errno(errno, "Failed to truncate home image: %m");
2119 }
2120
2121 return !trunc; /* Return == 0 if we managed to truncate, > 0 if we managed to allocate */
2122 }
2123
2124 int home_create_luks(
2125 UserRecord *h,
2126 HomeSetup *setup,
2127 const PasswordCache *cache,
2128 char **effective_passwords,
2129 UserRecord **ret_home) {
2130
2131 _cleanup_free_ char *subdir = NULL, *disk_uuid_path = NULL;
2132 uint64_t encrypted_size,
2133 host_size = 0, partition_offset = 0, partition_size = 0; /* Unnecessary initialization to appease gcc */
2134 _cleanup_(user_record_unrefp) UserRecord *new_home = NULL;
2135 sd_id128_t partition_uuid, fs_uuid, luks_uuid, disk_uuid;
2136 _cleanup_close_ int mount_fd = -EBADF;
2137 const char *fstype, *ip;
2138 struct statfs sfs;
2139 int r;
2140 _cleanup_strv_free_ char **extra_mkfs_options = NULL;
2141
2142 assert(h);
2143 assert(h->storage < 0 || h->storage == USER_LUKS);
2144 assert(setup);
2145 assert(!setup->temporary_image_path);
2146 assert(setup->image_fd < 0);
2147 assert(ret_home);
2148
2149 r = dlopen_cryptsetup();
2150 if (r < 0)
2151 return r;
2152
2153 assert_se(ip = user_record_image_path(h));
2154
2155 fstype = user_record_file_system_type(h);
2156 if (!supported_fstype(fstype))
2157 return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "Unsupported file system type: %s", fstype);
2158
2159 r = mkfs_exists(fstype);
2160 if (r < 0)
2161 return log_error_errno(r, "Failed to check if mkfs binary for %s exists: %m", fstype);
2162 if (r == 0) {
2163 if (h->file_system_type || streq(fstype, "ext4") || !supported_fstype("ext4"))
2164 return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "mkfs binary for file system type %s does not exist.", fstype);
2165
2166 /* If the record does not explicitly declare a file system to use, and the compiled-in
2167 * default does not actually exist, than do an automatic fallback onto ext4, as the baseline
2168 * fs of Linux. We won't search for a working fs type here beyond ext4, i.e. nothing fancier
2169 * than a single, conservative fallback to baseline. This should be useful in minimal
2170 * environments where mkfs.btrfs or so are not made available, but mkfs.ext4 as Linux' most
2171 * boring, most basic fs is. */
2172 log_info("Formatting tool for compiled-in default file system %s not available, falling back to ext4 instead.", fstype);
2173 fstype = "ext4";
2174 }
2175
2176 if (sd_id128_is_null(h->partition_uuid)) {
2177 r = sd_id128_randomize(&partition_uuid);
2178 if (r < 0)
2179 return log_error_errno(r, "Failed to acquire partition UUID: %m");
2180 } else
2181 partition_uuid = h->partition_uuid;
2182
2183 if (sd_id128_is_null(h->luks_uuid)) {
2184 r = sd_id128_randomize(&luks_uuid);
2185 if (r < 0)
2186 return log_error_errno(r, "Failed to acquire LUKS UUID: %m");
2187 } else
2188 luks_uuid = h->luks_uuid;
2189
2190 if (sd_id128_is_null(h->file_system_uuid)) {
2191 r = sd_id128_randomize(&fs_uuid);
2192 if (r < 0)
2193 return log_error_errno(r, "Failed to acquire file system UUID: %m");
2194 } else
2195 fs_uuid = h->file_system_uuid;
2196
2197 r = make_dm_names(h, setup);
2198 if (r < 0)
2199 return r;
2200
2201 r = access(setup->dm_node, F_OK);
2202 if (r < 0) {
2203 if (errno != ENOENT)
2204 return log_error_errno(errno, "Failed to determine whether %s exists: %m", setup->dm_node);
2205 } else
2206 return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Device mapper device %s already exists, refusing.", setup->dm_node);
2207
2208 if (path_startswith(ip, "/dev/")) {
2209 _cleanup_free_ char *sysfs = NULL;
2210 uint64_t block_device_size;
2211 struct stat st;
2212
2213 /* Let's place the home directory on a real device, i.e. a USB stick or such */
2214
2215 setup->image_fd = open_image_file(h, ip, &st);
2216 if (setup->image_fd < 0)
2217 return setup->image_fd;
2218
2219 if (!S_ISBLK(st.st_mode))
2220 return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK), "Device is not a block device, refusing.");
2221
2222 if (asprintf(&sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/partition", DEVNUM_FORMAT_VAL(st.st_rdev)) < 0)
2223 return log_oom();
2224 if (access(sysfs, F_OK) < 0) {
2225 if (errno != ENOENT)
2226 return log_error_errno(errno, "Failed to check whether %s exists: %m", sysfs);
2227 } else
2228 return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK), "Operating on partitions is currently not supported, sorry. Please specify a top-level block device.");
2229
2230 if (flock(setup->image_fd, LOCK_EX) < 0) /* make sure udev doesn't read from it while we operate on the device */
2231 return log_error_errno(errno, "Failed to lock block device %s: %m", ip);
2232
2233 r = blockdev_get_device_size(setup->image_fd, &block_device_size);
2234 if (r < 0)
2235 return log_error_errno(r, "Failed to read block device size: %m");
2236
2237 if (h->disk_size == UINT64_MAX) {
2238
2239 /* If a relative disk size is requested, apply it relative to the block device size */
2240 if (h->disk_size_relative < UINT32_MAX)
2241 host_size = CLAMP(DISK_SIZE_ROUND_DOWN(block_device_size * h->disk_size_relative / UINT32_MAX),
2242 USER_DISK_SIZE_MIN, USER_DISK_SIZE_MAX);
2243 else
2244 host_size = block_device_size; /* Otherwise, take the full device */
2245
2246 } else if (h->disk_size > block_device_size)
2247 return log_error_errno(SYNTHETIC_ERRNO(EMSGSIZE), "Selected disk size larger than backing block device, refusing.");
2248 else
2249 host_size = DISK_SIZE_ROUND_DOWN(h->disk_size);
2250
2251 if (!supported_fs_size(fstype, LESS_BY(host_size, GPT_LUKS2_OVERHEAD)))
2252 return log_error_errno(SYNTHETIC_ERRNO(ERANGE),
2253 "Selected file system size too small for %s.", fstype);
2254
2255 /* After creation we should reference this partition by its UUID instead of the block
2256 * device. That's preferable since the user might have specified a device node such as
2257 * /dev/sdb to us, which might look very different when replugged. */
2258 if (asprintf(&disk_uuid_path, "/dev/disk/by-uuid/" SD_ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(luks_uuid)) < 0)
2259 return log_oom();
2260
2261 if (user_record_luks_discard(h) || user_record_luks_offline_discard(h)) {
2262 /* If we want online or offline discard, discard once before we start using things. */
2263
2264 if (ioctl(setup->image_fd, BLKDISCARD, (uint64_t[]) { 0, block_device_size }) < 0)
2265 log_full_errno(errno == EOPNOTSUPP ? LOG_DEBUG : LOG_WARNING, errno,
2266 "Failed to issue full-device BLKDISCARD on device, ignoring: %m");
2267 else
2268 log_info("Full device discard completed.");
2269 }
2270 } else {
2271 _cleanup_free_ char *t = NULL;
2272
2273 r = mkdir_parents(ip, 0755);
2274 if (r < 0)
2275 return log_error_errno(r, "Failed to create parent directory of %s: %m", ip);
2276
2277 r = tempfn_random(ip, "homework", &t);
2278 if (r < 0)
2279 return log_error_errno(r, "Failed to derive temporary file name for %s: %m", ip);
2280
2281 setup->image_fd = open(t, O_RDWR|O_CREAT|O_EXCL|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0600);
2282 if (setup->image_fd < 0)
2283 return log_error_errno(errno, "Failed to create home image %s: %m", t);
2284
2285 setup->temporary_image_path = TAKE_PTR(t);
2286
2287 r = chattr_full(setup->image_fd, NULL, FS_NOCOW_FL|FS_NOCOMP_FL, FS_NOCOW_FL|FS_NOCOMP_FL, NULL, NULL, CHATTR_FALLBACK_BITWISE);
2288 if (r < 0 && r != -ENOANO) /* ENOANO → some bits didn't work; which we skip logging about because chattr_full() already debug logs about those flags */
2289 log_full_errno(ERRNO_IS_NOT_SUPPORTED(r) ? LOG_DEBUG : LOG_WARNING, r,
2290 "Failed to set file attributes on %s, ignoring: %m", setup->temporary_image_path);
2291
2292 r = calculate_initial_image_size(h, setup->image_fd, fstype, &host_size);
2293 if (r < 0)
2294 return r;
2295
2296 r = resize_image_loop(h, setup, 0, host_size, &host_size);
2297 if (r < 0)
2298 return r;
2299
2300 log_info("Allocating image file completed.");
2301 }
2302
2303 r = make_partition_table(
2304 setup->image_fd,
2305 user_record_luks_sector_size(h),
2306 user_record_user_name_and_realm(h),
2307 partition_uuid,
2308 &partition_offset,
2309 &partition_size,
2310 &disk_uuid);
2311 if (r < 0)
2312 return r;
2313
2314 log_info("Writing of partition table completed.");
2315
2316 r = loop_device_make(
2317 setup->image_fd,
2318 O_RDWR,
2319 partition_offset,
2320 partition_size,
2321 user_record_luks_sector_size(h),
2322 0,
2323 LOCK_EX,
2324 &setup->loop);
2325 if (r < 0) {
2326 if (r == -ENOENT) { /* this means /dev/loop-control doesn't exist, i.e. we are in a container
2327 * or similar and loopback bock devices are not available, return a
2328 * recognizable error in this case. */
2329 log_error_errno(r, "Loopback block device support is not available on this system.");
2330 return -ENOLINK; /* Make recognizable */
2331 }
2332
2333 return log_error_errno(r, "Failed to set up loopback device for %s: %m", setup->temporary_image_path);
2334 }
2335
2336 log_info("Setting up loopback device %s completed.", setup->loop->node ?: ip);
2337
2338 r = luks_format(setup->loop->node,
2339 setup->dm_name,
2340 luks_uuid,
2341 user_record_user_name_and_realm(h),
2342 cache,
2343 effective_passwords,
2344 user_record_luks_discard(h) || user_record_luks_offline_discard(h),
2345 h,
2346 &setup->crypt_device);
2347 if (r < 0)
2348 return r;
2349
2350 setup->undo_dm = true;
2351
2352 r = block_get_size_by_path(setup->dm_node, &encrypted_size);
2353 if (r < 0)
2354 return log_error_errno(r, "Failed to get encrypted block device size: %m");
2355
2356 log_info("Setting up LUKS device %s completed.", setup->dm_node);
2357
2358 r = mkfs_options_from_env("HOME", fstype, &extra_mkfs_options);
2359 if (r < 0)
2360 return log_error_errno(r, "Failed to determine mkfs command line options for '%s': %m", fstype);
2361
2362 r = make_filesystem(setup->dm_node,
2363 fstype,
2364 user_record_user_name_and_realm(h),
2365 /* root = */ NULL,
2366 fs_uuid,
2367 user_record_luks_discard(h),
2368 /* quiet = */ true,
2369 /* sector_size = */ 0,
2370 extra_mkfs_options);
2371 if (r < 0)
2372 return r;
2373
2374 log_info("Formatting file system completed.");
2375
2376 r = home_unshare_and_mount(setup->dm_node, fstype, user_record_luks_discard(h), user_record_mount_flags(h), h->luks_extra_mount_options);
2377 if (r < 0)
2378 return r;
2379
2380 setup->undo_mount = true;
2381
2382 subdir = path_join(HOME_RUNTIME_WORK_DIR, user_record_user_name_and_realm(h));
2383 if (!subdir)
2384 return log_oom();
2385
2386 /* Prefer using a btrfs subvolume if we can, fall back to directory otherwise */
2387 r = btrfs_subvol_make_fallback(AT_FDCWD, subdir, 0700);
2388 if (r < 0)
2389 return log_error_errno(r, "Failed to create user directory in mounted image file: %m");
2390
2391 setup->root_fd = open(subdir, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
2392 if (setup->root_fd < 0)
2393 return log_error_errno(errno, "Failed to open user directory in mounted image file: %m");
2394
2395 (void) home_shift_uid(setup->root_fd, NULL, UID_NOBODY, h->uid, &mount_fd);
2396
2397 if (mount_fd >= 0) {
2398 /* If we have established a new mount, then we can use that as new root fd to our home directory. */
2399 safe_close(setup->root_fd);
2400
2401 setup->root_fd = fd_reopen(mount_fd, O_RDONLY|O_CLOEXEC|O_DIRECTORY);
2402 if (setup->root_fd < 0)
2403 return log_error_errno(setup->root_fd, "Unable to convert mount fd into proper directory fd: %m");
2404
2405 mount_fd = safe_close(mount_fd);
2406 }
2407
2408 r = home_populate(h, setup->root_fd);
2409 if (r < 0)
2410 return r;
2411
2412 r = home_sync_and_statfs(setup->root_fd, &sfs);
2413 if (r < 0)
2414 return r;
2415
2416 r = user_record_clone(h, USER_RECORD_LOAD_MASK_SECRET|USER_RECORD_LOG|USER_RECORD_PERMISSIVE, &new_home);
2417 if (r < 0)
2418 return log_error_errno(r, "Failed to clone record: %m");
2419
2420 r = user_record_add_binding(
2421 new_home,
2422 USER_LUKS,
2423 disk_uuid_path ?: ip,
2424 partition_uuid,
2425 luks_uuid,
2426 fs_uuid,
2427 sym_crypt_get_cipher(setup->crypt_device),
2428 sym_crypt_get_cipher_mode(setup->crypt_device),
2429 luks_volume_key_size_convert(setup->crypt_device),
2430 fstype,
2431 NULL,
2432 h->uid,
2433 (gid_t) h->uid);
2434 if (r < 0)
2435 return log_error_errno(r, "Failed to add binding to record: %m");
2436
2437 if (user_record_luks_offline_discard(h)) {
2438 r = run_fitrim(setup->root_fd);
2439 if (r < 0)
2440 return r;
2441 }
2442
2443 setup->root_fd = safe_close(setup->root_fd);
2444
2445 r = home_setup_undo_mount(setup, LOG_ERR);
2446 if (r < 0)
2447 return r;
2448
2449 r = home_setup_undo_dm(setup, LOG_ERR);
2450 if (r < 0)
2451 return r;
2452
2453 setup->loop = loop_device_unref(setup->loop);
2454
2455 if (!user_record_luks_offline_discard(h)) {
2456 r= run_fallocate(setup->image_fd, NULL /* refresh stat() data */);
2457 if (r < 0)
2458 return r;
2459 }
2460
2461 /* Sync everything to disk before we move things into place under the final name. */
2462 if (fsync(setup->image_fd) < 0)
2463 return log_error_errno(r, "Failed to synchronize image to disk: %m");
2464
2465 if (disk_uuid_path)
2466 /* Reread partition table if this is a block device */
2467 (void) ioctl(setup->image_fd, BLKRRPART, 0);
2468 else {
2469 assert(setup->temporary_image_path);
2470
2471 if (rename(setup->temporary_image_path, ip) < 0)
2472 return log_error_errno(errno, "Failed to rename image file: %m");
2473
2474 setup->temporary_image_path = mfree(setup->temporary_image_path);
2475
2476 /* If we operate on a file, sync the containing directory too. */
2477 r = fsync_directory_of_file(setup->image_fd);
2478 if (r < 0)
2479 return log_error_errno(r, "Failed to synchronize directory of image file to disk: %m");
2480
2481 log_info("Moved image file into place.");
2482 }
2483
2484 /* Let's close the image fd now. If we are operating on a real block device this will release the BSD
2485 * lock that ensures udev doesn't interfere with what we are doing */
2486 setup->image_fd = safe_close(setup->image_fd);
2487
2488 if (disk_uuid_path)
2489 (void) wait_for_devlink(disk_uuid_path);
2490
2491 log_info("Creation completed.");
2492
2493 print_size_summary(host_size, encrypted_size, &sfs);
2494
2495 log_debug("GPT + LUKS2 overhead is %" PRIu64 " (expected %" PRIu64 ")", host_size - encrypted_size, GPT_LUKS2_OVERHEAD);
2496
2497 *ret_home = TAKE_PTR(new_home);
2498 return 0;
2499 }
2500
2501 int home_get_state_luks(UserRecord *h, HomeSetup *setup) {
2502 int r;
2503
2504 assert(h);
2505 assert(setup);
2506
2507 r = make_dm_names(h, setup);
2508 if (r < 0)
2509 return r;
2510
2511 r = access(setup->dm_node, F_OK);
2512 if (r < 0 && errno != ENOENT)
2513 return log_error_errno(errno, "Failed to determine whether %s exists: %m", setup->dm_node);
2514
2515 return r >= 0;
2516 }
2517
2518 enum {
2519 CAN_RESIZE_ONLINE,
2520 CAN_RESIZE_OFFLINE,
2521 };
2522
2523 static int can_resize_fs(int fd, uint64_t old_size, uint64_t new_size) {
2524 struct statfs sfs;
2525
2526 assert(fd >= 0);
2527
2528 /* Filter out bogus requests early */
2529 if (old_size == 0 || old_size == UINT64_MAX ||
2530 new_size == 0 || new_size == UINT64_MAX)
2531 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid resize parameters.");
2532
2533 if ((old_size & 511) != 0 || (new_size & 511) != 0)
2534 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Resize parameters not multiple of 512.");
2535
2536 if (fstatfs(fd, &sfs) < 0)
2537 return log_error_errno(errno, "Failed to fstatfs() file system: %m");
2538
2539 if (is_fs_type(&sfs, BTRFS_SUPER_MAGIC)) {
2540
2541 if (new_size < BTRFS_MINIMAL_SIZE)
2542 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "New file system size too small for btrfs (needs to be 256M at least.");
2543
2544 /* btrfs can grow and shrink online */
2545
2546 } else if (is_fs_type(&sfs, XFS_SB_MAGIC)) {
2547
2548 if (new_size < XFS_MINIMAL_SIZE)
2549 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "New file system size too small for xfs (needs to be 14M at least).");
2550
2551 /* XFS can grow, but not shrink */
2552 if (new_size < old_size)
2553 return log_error_errno(SYNTHETIC_ERRNO(EMSGSIZE), "Shrinking this type of file system is not supported.");
2554
2555 } else if (is_fs_type(&sfs, EXT4_SUPER_MAGIC)) {
2556
2557 if (new_size < EXT4_MINIMAL_SIZE)
2558 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "New file system size too small for ext4 (needs to be 1M at least).");
2559
2560 /* ext4 can grow online, and shrink offline */
2561 if (new_size < old_size)
2562 return CAN_RESIZE_OFFLINE;
2563
2564 } else
2565 return log_error_errno(SYNTHETIC_ERRNO(ESOCKTNOSUPPORT), "Resizing this type of file system is not supported.");
2566
2567 return CAN_RESIZE_ONLINE;
2568 }
2569
2570 static int ext4_offline_resize_fs(
2571 HomeSetup *setup,
2572 uint64_t new_size,
2573 bool discard,
2574 unsigned long flags,
2575 const char *extra_mount_options) {
2576
2577 _cleanup_free_ char *size_str = NULL;
2578 bool re_open = false, re_mount = false;
2579 pid_t resize_pid, fsck_pid;
2580 int r, exit_status;
2581
2582 assert(setup);
2583 assert(setup->dm_node);
2584
2585 /* First, unmount the file system */
2586 if (setup->root_fd >= 0) {
2587 setup->root_fd = safe_close(setup->root_fd);
2588 re_open = true;
2589 }
2590
2591 if (setup->undo_mount) {
2592 r = home_setup_undo_mount(setup, LOG_ERR);
2593 if (r < 0)
2594 return r;
2595
2596 re_mount = true;
2597 }
2598
2599 log_info("Temporary unmounting of file system completed.");
2600
2601 /* resize2fs requires that the file system is force checked first, do so. */
2602 r = safe_fork("(e2fsck)",
2603 FORK_RESET_SIGNALS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG_SIGTERM|FORK_LOG|FORK_STDOUT_TO_STDERR|FORK_CLOSE_ALL_FDS,
2604 &fsck_pid);
2605 if (r < 0)
2606 return r;
2607 if (r == 0) {
2608 /* Child */
2609 execlp("e2fsck" ,"e2fsck", "-fp", setup->dm_node, NULL);
2610 log_open();
2611 log_error_errno(errno, "Failed to execute e2fsck: %m");
2612 _exit(EXIT_FAILURE);
2613 }
2614
2615 exit_status = wait_for_terminate_and_check("e2fsck", fsck_pid, WAIT_LOG_ABNORMAL);
2616 if (exit_status < 0)
2617 return exit_status;
2618 if ((exit_status & ~FSCK_ERROR_CORRECTED) != 0) {
2619 log_warning("e2fsck failed with exit status %i.", exit_status);
2620
2621 if ((exit_status & (FSCK_SYSTEM_SHOULD_REBOOT|FSCK_ERRORS_LEFT_UNCORRECTED)) != 0)
2622 return log_error_errno(SYNTHETIC_ERRNO(EIO), "File system is corrupted, refusing.");
2623
2624 log_warning("Ignoring fsck error.");
2625 }
2626
2627 log_info("Forced file system check completed.");
2628
2629 /* We use 512 sectors here, because resize2fs doesn't do byte sizes */
2630 if (asprintf(&size_str, "%" PRIu64 "s", new_size / 512) < 0)
2631 return log_oom();
2632
2633 /* Resize the thing */
2634 r = safe_fork("(e2resize)",
2635 FORK_RESET_SIGNALS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG_SIGTERM|FORK_LOG|FORK_WAIT|FORK_STDOUT_TO_STDERR|FORK_CLOSE_ALL_FDS,
2636 &resize_pid);
2637 if (r < 0)
2638 return r;
2639 if (r == 0) {
2640 /* Child */
2641 execlp("resize2fs" ,"resize2fs", setup->dm_node, size_str, NULL);
2642 log_open();
2643 log_error_errno(errno, "Failed to execute resize2fs: %m");
2644 _exit(EXIT_FAILURE);
2645 }
2646
2647 log_info("Offline file system resize completed.");
2648
2649 /* Re-establish mounts and reopen the directory */
2650 if (re_mount) {
2651 r = home_mount_node(setup->dm_node, "ext4", discard, flags, extra_mount_options);
2652 if (r < 0)
2653 return r;
2654
2655 setup->undo_mount = true;
2656 }
2657
2658 if (re_open) {
2659 setup->root_fd = open(HOME_RUNTIME_WORK_DIR, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
2660 if (setup->root_fd < 0)
2661 return log_error_errno(errno, "Failed to reopen file system: %m");
2662 }
2663
2664 log_info("File system mounted again.");
2665
2666 return 0;
2667 }
2668
2669 static int prepare_resize_partition(
2670 int fd,
2671 uint64_t partition_offset,
2672 uint64_t old_partition_size,
2673 sd_id128_t *ret_disk_uuid,
2674 struct fdisk_table **ret_table,
2675 struct fdisk_partition **ret_partition) {
2676
2677 _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
2678 _cleanup_(fdisk_unref_tablep) struct fdisk_table *t = NULL;
2679 _cleanup_free_ char *disk_uuid_as_string = NULL;
2680 struct fdisk_partition *found = NULL;
2681 sd_id128_t disk_uuid;
2682 size_t n_partitions;
2683 int r;
2684
2685 assert(fd >= 0);
2686 assert(ret_disk_uuid);
2687 assert(ret_table);
2688
2689 assert((partition_offset & 511) == 0);
2690 assert((old_partition_size & 511) == 0);
2691 assert(UINT64_MAX - old_partition_size >= partition_offset);
2692
2693 if (partition_offset == 0) {
2694 /* If the offset is at the beginning we assume no partition table, let's exit early. */
2695 log_debug("Not rewriting partition table, operating on naked device.");
2696 *ret_disk_uuid = SD_ID128_NULL;
2697 *ret_table = NULL;
2698 *ret_partition = NULL;
2699 return 0;
2700 }
2701
2702 r = fdisk_new_context_at(fd, /* path= */ NULL, /* read_only= */ false, UINT32_MAX, &c);
2703 if (r < 0)
2704 return log_error_errno(r, "Failed to open device: %m");
2705
2706 if (!fdisk_is_labeltype(c, FDISK_DISKLABEL_GPT))
2707 return log_error_errno(SYNTHETIC_ERRNO(ENOMEDIUM), "Disk has no GPT partition table.");
2708
2709 r = fdisk_get_disklabel_id(c, &disk_uuid_as_string);
2710 if (r < 0)
2711 return log_error_errno(r, "Failed to acquire disk UUID: %m");
2712
2713 r = sd_id128_from_string(disk_uuid_as_string, &disk_uuid);
2714 if (r < 0)
2715 return log_error_errno(r, "Failed parse disk UUID: %m");
2716
2717 r = fdisk_get_partitions(c, &t);
2718 if (r < 0)
2719 return log_error_errno(r, "Failed to acquire partition table: %m");
2720
2721 n_partitions = fdisk_table_get_nents(t);
2722 for (size_t i = 0; i < n_partitions; i++) {
2723 struct fdisk_partition *p;
2724
2725 p = fdisk_table_get_partition(t, i);
2726 if (!p)
2727 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read partition metadata: %m");
2728
2729 if (fdisk_partition_is_used(p) <= 0)
2730 continue;
2731 if (fdisk_partition_has_start(p) <= 0 || fdisk_partition_has_size(p) <= 0 || fdisk_partition_has_end(p) <= 0)
2732 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Found partition without a size.");
2733
2734 if (fdisk_partition_get_start(p) == partition_offset / 512U &&
2735 fdisk_partition_get_size(p) == old_partition_size / 512U) {
2736
2737 if (found)
2738 return log_error_errno(SYNTHETIC_ERRNO(ENOTUNIQ), "Partition found twice, refusing.");
2739
2740 found = p;
2741 } else if (fdisk_partition_get_end(p) > partition_offset / 512U)
2742 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Can't extend, not last partition in image.");
2743 }
2744
2745 if (!found)
2746 return log_error_errno(SYNTHETIC_ERRNO(ENOPKG), "Failed to find matching partition to resize.");
2747
2748 *ret_disk_uuid = disk_uuid;
2749 *ret_table = TAKE_PTR(t);
2750 *ret_partition = found;
2751
2752 return 1;
2753 }
2754
2755 static int get_maximum_partition_size(
2756 int fd,
2757 struct fdisk_partition *p,
2758 uint64_t *ret_maximum_partition_size) {
2759
2760 _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
2761 uint64_t start_lba, start, last_lba, end;
2762 int r;
2763
2764 assert(fd >= 0);
2765 assert(p);
2766 assert(ret_maximum_partition_size);
2767
2768 r = fdisk_new_context_at(fd, /* path= */ NULL, /* read_only= */ true, /* sector_size= */ UINT32_MAX, &c);
2769 if (r < 0)
2770 return log_error_errno(r, "Failed to create fdisk context: %m");
2771
2772 start_lba = fdisk_partition_get_start(p);
2773 assert(start_lba <= UINT64_MAX/512);
2774 start = start_lba * 512;
2775
2776 last_lba = fdisk_get_last_lba(c); /* One sector before boundary where usable space ends */
2777 assert(last_lba < UINT64_MAX/512);
2778 end = DISK_SIZE_ROUND_DOWN((last_lba + 1) * 512); /* Round down to multiple of 4K */
2779
2780 if (start > end)
2781 return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Last LBA is before partition start.");
2782
2783 *ret_maximum_partition_size = DISK_SIZE_ROUND_DOWN(end - start);
2784
2785 return 1;
2786 }
2787
2788 static int ask_cb(struct fdisk_context *c, struct fdisk_ask *ask, void *userdata) {
2789 char *result;
2790
2791 assert(c);
2792
2793 switch (fdisk_ask_get_type(ask)) {
2794
2795 case FDISK_ASKTYPE_STRING:
2796 result = new(char, 37);
2797 if (!result)
2798 return log_oom();
2799
2800 fdisk_ask_string_set_result(ask, sd_id128_to_uuid_string(*(sd_id128_t*) userdata, result));
2801 break;
2802
2803 default:
2804 log_debug("Unexpected question from libfdisk, ignoring.");
2805 }
2806
2807 return 0;
2808 }
2809
2810 static int apply_resize_partition(
2811 int fd,
2812 sd_id128_t disk_uuids,
2813 struct fdisk_table *t,
2814 struct fdisk_partition *p,
2815 size_t new_partition_size) {
2816
2817 _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
2818 _cleanup_free_ void *two_zero_lbas = NULL;
2819 uint32_t ssz;
2820 ssize_t n;
2821 int r;
2822
2823 assert(fd >= 0);
2824 assert(!t == !p);
2825
2826 if (!t) /* no partition table to apply, exit early */
2827 return 0;
2828
2829 assert(p);
2830
2831 /* Before writing our partition patch the final size in */
2832 r = fdisk_partition_size_explicit(p, 1);
2833 if (r < 0)
2834 return log_error_errno(r, "Failed to enable explicit partition size: %m");
2835
2836 r = fdisk_partition_set_size(p, new_partition_size / 512U);
2837 if (r < 0)
2838 return log_error_errno(r, "Failed to change partition size: %m");
2839
2840 r = probe_sector_size(fd, &ssz);
2841 if (r < 0)
2842 return log_error_errno(r, "Failed to determine current sector size: %m");
2843
2844 two_zero_lbas = malloc0(ssz * 2);
2845 if (!two_zero_lbas)
2846 return log_oom();
2847
2848 /* libfdisk appears to get confused by the existing PMBR. Let's explicitly flush it out. */
2849 n = pwrite(fd, two_zero_lbas, ssz * 2, 0);
2850 if (n < 0)
2851 return log_error_errno(errno, "Failed to wipe partition table: %m");
2852 if ((size_t) n != ssz * 2)
2853 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short write while wiping partition table.");
2854
2855 r = fdisk_new_context_at(fd, /* path= */ NULL, /* read_only= */ false, ssz, &c);
2856 if (r < 0)
2857 return log_error_errno(r, "Failed to open device: %m");
2858
2859 r = fdisk_create_disklabel(c, "gpt");
2860 if (r < 0)
2861 return log_error_errno(r, "Failed to create GPT disk label: %m");
2862
2863 r = fdisk_apply_table(c, t);
2864 if (r < 0)
2865 return log_error_errno(r, "Failed to apply partition table: %m");
2866
2867 r = fdisk_set_ask(c, ask_cb, &disk_uuids);
2868 if (r < 0)
2869 return log_error_errno(r, "Failed to set libfdisk query function: %m");
2870
2871 r = fdisk_set_disklabel_id(c);
2872 if (r < 0)
2873 return log_error_errno(r, "Failed to change disklabel ID: %m");
2874
2875 r = fdisk_write_disklabel(c);
2876 if (r < 0)
2877 return log_error_errno(r, "Failed to write disk label: %m");
2878
2879 return 1;
2880 }
2881
2882 /* Always keep at least 16M free, so that we can safely log in and update the user record while doing so */
2883 #define HOME_MIN_FREE (16U*1024U*1024U)
2884
2885 static int get_smallest_fs_size(int fd, uint64_t *ret) {
2886 uint64_t minsz, needed;
2887 struct statfs sfs;
2888
2889 assert(fd >= 0);
2890 assert(ret);
2891
2892 /* Determines the minimal disk size we might be able to shrink the file system referenced by the fd to. */
2893
2894 if (syncfs(fd) < 0) /* let's sync before we query the size, so that the values returned are accurate */
2895 return log_error_errno(errno, "Failed to synchronize home file system: %m");
2896
2897 if (fstatfs(fd, &sfs) < 0)
2898 return log_error_errno(errno, "Failed to statfs() home file system: %m");
2899
2900 /* Let's determine the minimal file system size of the used fstype */
2901 minsz = minimal_size_by_fs_magic(sfs.f_type);
2902 if (minsz == UINT64_MAX)
2903 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Don't know minimum file system size of file system type '%s' of home directory.", fs_type_to_string(sfs.f_type));
2904
2905 if (minsz < USER_DISK_SIZE_MIN)
2906 minsz = USER_DISK_SIZE_MIN;
2907
2908 if (sfs.f_bfree > sfs.f_blocks)
2909 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Detected amount of free blocks is greater than the total amount of file system blocks. Refusing.");
2910
2911 /* Calculate how much disk space is currently in use. */
2912 needed = sfs.f_blocks - sfs.f_bfree;
2913 if (needed > UINT64_MAX / sfs.f_bsize)
2914 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "File system size out of range.");
2915
2916 needed *= sfs.f_bsize;
2917
2918 /* Add some safety margin of free space we'll always keep */
2919 if (needed > UINT64_MAX - HOME_MIN_FREE) /* Check for overflow */
2920 needed = UINT64_MAX;
2921 else
2922 needed += HOME_MIN_FREE;
2923
2924 *ret = DISK_SIZE_ROUND_UP(MAX(needed, minsz));
2925 return 0;
2926 }
2927
2928 static int get_largest_image_size(int fd, const struct stat *st, uint64_t *ret) {
2929 uint64_t used, avail, sum;
2930 struct statfs sfs;
2931 int r;
2932
2933 assert(fd >= 0);
2934 assert(st);
2935 assert(ret);
2936
2937 /* Determines the maximum file size we might be able to grow the image file referenced by the fd to. */
2938
2939 r = stat_verify_regular(st);
2940 if (r < 0)
2941 return log_error_errno(r, "Image file is not a regular file, refusing: %m");
2942
2943 if (syncfs(fd) < 0)
2944 return log_error_errno(errno, "Failed to synchronize file system backing image file: %m");
2945
2946 if (fstatfs(fd, &sfs) < 0)
2947 return log_error_errno(errno, "Failed to statfs() image file: %m");
2948
2949 used = (uint64_t) st->st_blocks * 512;
2950 avail = (uint64_t) sfs.f_bsize * sfs.f_bavail;
2951
2952 if (avail > UINT64_MAX - used)
2953 sum = UINT64_MAX;
2954 else
2955 sum = avail + used;
2956
2957 *ret = DISK_SIZE_ROUND_DOWN(MIN(sum, USER_DISK_SIZE_MAX));
2958 return 0;
2959 }
2960
2961 static int resize_fs_loop(
2962 UserRecord *h,
2963 HomeSetup *setup,
2964 int resize_type,
2965 uint64_t old_fs_size,
2966 uint64_t new_fs_size,
2967 uint64_t *ret_fs_size) {
2968
2969 uint64_t current_fs_size;
2970 unsigned n_iterations = 0;
2971 int r;
2972
2973 assert(h);
2974 assert(setup);
2975 assert(setup->root_fd >= 0);
2976
2977 /* A bisection loop trying to find the closest size to what the user asked for. (Well, we bisect like
2978 * this only when we *shrink* the fs — if we grow the fs there's no need to bisect.) */
2979
2980 current_fs_size = old_fs_size;
2981 for (uint64_t lower_boundary = new_fs_size, upper_boundary = old_fs_size, try_fs_size = new_fs_size;;) {
2982 bool worked;
2983
2984 n_iterations++;
2985
2986 /* Now resize the file system */
2987 if (resize_type == CAN_RESIZE_ONLINE) {
2988 r = resize_fs(setup->root_fd, try_fs_size, NULL);
2989 if (r < 0) {
2990 if (!ERRNO_IS_DISK_SPACE(r) || new_fs_size > old_fs_size) /* Not a disk space issue? Not trying to shrink? */
2991 return log_error_errno(r, "Failed to resize file system: %m");
2992
2993 log_debug_errno(r, "Shrinking from %s to %s didn't work, not enough space for contained data.", FORMAT_BYTES(current_fs_size), FORMAT_BYTES(try_fs_size));
2994 worked = false;
2995 } else {
2996 log_debug("Successfully resized from %s to %s.", FORMAT_BYTES(current_fs_size), FORMAT_BYTES(try_fs_size));
2997 current_fs_size = try_fs_size;
2998 worked = true;
2999 }
3000
3001 /* If we hit a disk space issue and are shrinking the fs, then maybe it helps to
3002 * increase the image size. */
3003 } else {
3004 r = ext4_offline_resize_fs(setup, try_fs_size, user_record_luks_discard(h), user_record_mount_flags(h), h->luks_extra_mount_options);
3005 if (r < 0)
3006 return r;
3007
3008 /* For now, when we fail to shrink an ext4 image we'll not try again via the
3009 * bisection logic. We might add that later, but given this involves shelling out
3010 * multiple programs, it's a bit too cumbersome for my taste. */
3011
3012 worked = true;
3013 current_fs_size = try_fs_size;
3014 }
3015
3016 if (new_fs_size > old_fs_size) /* If we are growing we are done after one iteration */
3017 break;
3018
3019 /* If we are shrinking then let's adjust our bisection boundaries and try again. */
3020 if (worked)
3021 upper_boundary = MIN(upper_boundary, try_fs_size);
3022 else
3023 lower_boundary = MAX(lower_boundary, try_fs_size);
3024
3025 /* OK, this attempt to shrink didn't work. Let's try between the old size and what worked. */
3026 if (lower_boundary >= upper_boundary) {
3027 log_debug("Image can't be shrunk further (range to try is empty).");
3028 break;
3029 }
3030
3031 /* Let's find a new value to try half-way between the lower boundary and the upper boundary
3032 * to try now. */
3033 try_fs_size = DISK_SIZE_ROUND_DOWN(lower_boundary + (upper_boundary - lower_boundary) / 2);
3034 if (try_fs_size <= lower_boundary || try_fs_size >= upper_boundary) {
3035 log_debug("Image can't be shrunk further (remaining range to try too small).");
3036 break;
3037 }
3038 }
3039
3040 log_debug("Bisection loop completed after %u iterations.", n_iterations);
3041
3042 if (ret_fs_size)
3043 *ret_fs_size = current_fs_size;
3044
3045 return 0;
3046 }
3047
3048 static int resize_image_loop(
3049 UserRecord *h,
3050 HomeSetup *setup,
3051 uint64_t old_image_size,
3052 uint64_t new_image_size,
3053 uint64_t *ret_image_size) {
3054
3055 uint64_t current_image_size;
3056 unsigned n_iterations = 0;
3057 int r;
3058
3059 assert(h);
3060 assert(setup);
3061 assert(setup->image_fd >= 0);
3062
3063 /* A bisection loop trying to find the closest size to what the user asked for. (Well, we bisect like
3064 * this only when we *grow* the image — if we shrink the image then there's no need to bisect.) */
3065
3066 current_image_size = old_image_size;
3067 for (uint64_t lower_boundary = old_image_size, upper_boundary = new_image_size, try_image_size = new_image_size;;) {
3068 bool worked;
3069
3070 n_iterations++;
3071
3072 r = home_truncate(h, setup->image_fd, try_image_size);
3073 if (r < 0) {
3074 if (!ERRNO_IS_DISK_SPACE(r) || new_image_size < old_image_size) /* Not a disk space issue? Not trying to grow? */
3075 return r;
3076
3077 log_debug_errno(r, "Growing from %s to %s didn't work, not enough space on backing disk.", FORMAT_BYTES(current_image_size), FORMAT_BYTES(try_image_size));
3078 worked = false;
3079 } else if (r > 0) { /* Success: allocation worked */
3080 log_debug("Resizing from %s to %s via allocation worked successfully.", FORMAT_BYTES(current_image_size), FORMAT_BYTES(try_image_size));
3081 current_image_size = try_image_size;
3082 worked = true;
3083 } else { /* Success, but through truncation, not allocation. */
3084 log_debug("Resizing from %s to %s via truncation worked successfully.", FORMAT_BYTES(old_image_size), FORMAT_BYTES(try_image_size));
3085 current_image_size = try_image_size;
3086 break; /* there's no point in the bisection logic if this was plain truncation and
3087 * not allocation, let's exit immediately. */
3088 }
3089
3090 if (new_image_size < old_image_size) /* If we are shrinking we are done after one iteration */
3091 break;
3092
3093 /* If we are growing then let's adjust our bisection boundaries and try again */
3094 if (worked)
3095 lower_boundary = MAX(lower_boundary, try_image_size);
3096 else
3097 upper_boundary = MIN(upper_boundary, try_image_size);
3098
3099 if (lower_boundary >= upper_boundary) {
3100 log_debug("Image can't be grown further (range to try is empty).");
3101 break;
3102 }
3103
3104 try_image_size = DISK_SIZE_ROUND_DOWN(lower_boundary + (upper_boundary - lower_boundary) / 2);
3105 if (try_image_size <= lower_boundary || try_image_size >= upper_boundary) {
3106 log_debug("Image can't be grown further (remaining range to try too small).");
3107 break;
3108 }
3109 }
3110
3111 log_debug("Bisection loop completed after %u iterations.", n_iterations);
3112
3113 if (ret_image_size)
3114 *ret_image_size = current_image_size;
3115
3116 return 0;
3117 }
3118
3119 int home_resize_luks(
3120 UserRecord *h,
3121 HomeSetupFlags flags,
3122 HomeSetup *setup,
3123 PasswordCache *cache,
3124 UserRecord **ret_home) {
3125
3126 uint64_t old_image_size, new_image_size, old_fs_size, new_fs_size, crypto_offset, crypto_offset_bytes,
3127 new_partition_size, smallest_fs_size, resized_fs_size;
3128 _cleanup_(user_record_unrefp) UserRecord *header_home = NULL, *embedded_home = NULL, *new_home = NULL;
3129 _cleanup_(fdisk_unref_tablep) struct fdisk_table *table = NULL;
3130 struct fdisk_partition *partition = NULL;
3131 _cleanup_close_ int opened_image_fd = -EBADF;
3132 _cleanup_free_ char *whole_disk = NULL;
3133 int r, resize_type, image_fd = -EBADF, reconciled = USER_RECONCILE_IDENTICAL;
3134 sd_id128_t disk_uuid;
3135 const char *ip, *ipo;
3136 struct statfs sfs;
3137 struct stat st;
3138 enum {
3139 INTENTION_DONT_KNOW = 0, /* These happen to match the return codes of CMP() */
3140 INTENTION_SHRINK = -1,
3141 INTENTION_GROW = 1,
3142 } intention = INTENTION_DONT_KNOW;
3143
3144 assert(h);
3145 assert(user_record_storage(h) == USER_LUKS);
3146 assert(setup);
3147
3148 r = dlopen_cryptsetup();
3149 if (r < 0)
3150 return r;
3151
3152 assert_se(ipo = user_record_image_path(h));
3153 ip = strdupa_safe(ipo); /* copy out since original might change later in home record object */
3154
3155 if (setup->image_fd < 0) {
3156 setup->image_fd = open_image_file(h, NULL, &st);
3157 if (setup->image_fd < 0)
3158 return setup->image_fd;
3159 } else {
3160 if (fstat(setup->image_fd, &st) < 0)
3161 return log_error_errno(errno, "Failed to stat image file %s: %m", ip);
3162 }
3163
3164 image_fd = setup->image_fd;
3165
3166 if (S_ISBLK(st.st_mode)) {
3167 dev_t parent;
3168
3169 r = block_get_whole_disk(st.st_rdev, &parent);
3170 if (r < 0)
3171 return log_error_errno(r, "Failed to acquire whole block device for %s: %m", ip);
3172 if (r > 0) {
3173 /* If we shall resize a file system on a partition device, then let's figure out the
3174 * whole disk device and operate on that instead, since we need to rewrite the
3175 * partition table to resize the partition. */
3176
3177 log_info("Operating on partition device %s, using parent device.", ip);
3178
3179 opened_image_fd = r = device_open_from_devnum(S_IFBLK, parent, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK, &whole_disk);
3180 if (r < 0)
3181 return log_error_errno(r, "Failed to open whole block device for %s: %m", ip);
3182
3183 image_fd = opened_image_fd;
3184
3185 if (fstat(image_fd, &st) < 0)
3186 return log_error_errno(errno, "Failed to stat whole block device %s: %m", whole_disk);
3187 } else
3188 log_info("Operating on whole block device %s.", ip);
3189
3190 r = blockdev_get_device_size(image_fd, &old_image_size);
3191 if (r < 0)
3192 return log_error_errno(r, "Failed to determine size of original block device: %m");
3193
3194 if (flock(image_fd, LOCK_EX) < 0) /* make sure udev doesn't read from it while we operate on the device */
3195 return log_error_errno(errno, "Failed to lock block device %s: %m", ip);
3196
3197 new_image_size = old_image_size; /* we can't resize physical block devices */
3198 } else {
3199 r = stat_verify_regular(&st);
3200 if (r < 0)
3201 return log_error_errno(r, "Image %s is not a block device nor regular file: %m", ip);
3202
3203 old_image_size = st.st_size;
3204
3205 /* Note an asymmetry here: when we operate on loopback files the specified disk size we get we
3206 * apply onto the loopback file as a whole. When we operate on block devices we instead apply
3207 * to the partition itself only. */
3208
3209 if (FLAGS_SET(flags, HOME_SETUP_RESIZE_MINIMIZE)) {
3210 new_image_size = 0;
3211 intention = INTENTION_SHRINK;
3212 } else {
3213 uint64_t new_image_size_rounded;
3214
3215 new_image_size_rounded = DISK_SIZE_ROUND_DOWN(h->disk_size);
3216
3217 if (old_image_size >= new_image_size_rounded && old_image_size <= h->disk_size) {
3218 /* If exact match, or a match after we rounded down, don't do a thing */
3219 log_info("Image size already matching, skipping operation.");
3220 return 0;
3221 }
3222
3223 new_image_size = new_image_size_rounded;
3224 intention = CMP(new_image_size, old_image_size); /* Is this a shrink */
3225 }
3226 }
3227
3228 r = home_setup_luks(
3229 h,
3230 flags,
3231 whole_disk,
3232 setup,
3233 cache,
3234 FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES) ? NULL : &header_home);
3235 if (r < 0)
3236 return r;
3237
3238 if (!FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES)) {
3239 reconciled = home_load_embedded_identity(h, setup->root_fd, header_home, USER_RECONCILE_REQUIRE_NEWER_OR_EQUAL, cache, &embedded_home, &new_home);
3240 if (reconciled < 0)
3241 return reconciled;
3242 }
3243
3244 r = home_maybe_shift_uid(h, flags, setup);
3245 if (r < 0)
3246 return r;
3247
3248 log_info("offset = %" PRIu64 ", size = %" PRIu64 ", image = %" PRIu64, setup->partition_offset, setup->partition_size, old_image_size);
3249
3250 if ((UINT64_MAX - setup->partition_offset) < setup->partition_size ||
3251 setup->partition_offset + setup->partition_size > old_image_size)
3252 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Old partition doesn't fit in backing storage, refusing.");
3253
3254 /* Get target partition information in here for new_partition_size calculation */
3255 r = prepare_resize_partition(
3256 image_fd,
3257 setup->partition_offset,
3258 setup->partition_size,
3259 &disk_uuid,
3260 &table,
3261 &partition);
3262 if (r < 0)
3263 return r;
3264
3265 if (S_ISREG(st.st_mode)) {
3266 uint64_t partition_table_extra, largest_size;
3267
3268 partition_table_extra = old_image_size - setup->partition_size;
3269
3270 r = get_largest_image_size(setup->image_fd, &st, &largest_size);
3271 if (r < 0)
3272 return r;
3273 if (new_image_size > largest_size)
3274 new_image_size = largest_size;
3275
3276 if (new_image_size < partition_table_extra)
3277 new_image_size = partition_table_extra;
3278
3279 new_partition_size = DISK_SIZE_ROUND_DOWN(new_image_size - partition_table_extra);
3280 } else {
3281 assert(S_ISBLK(st.st_mode));
3282
3283 if (FLAGS_SET(flags, HOME_SETUP_RESIZE_MINIMIZE)) {
3284 new_partition_size = 0;
3285 intention = INTENTION_SHRINK;
3286 } else {
3287 uint64_t new_partition_size_rounded = DISK_SIZE_ROUND_DOWN(h->disk_size);
3288
3289 if (h->disk_size == UINT64_MAX && partition) {
3290 r = get_maximum_partition_size(image_fd, partition, &new_partition_size_rounded);
3291 if (r < 0)
3292 return r;
3293 }
3294
3295 if (setup->partition_size >= new_partition_size_rounded &&
3296 setup->partition_size <= h->disk_size) {
3297 log_info("Partition size already matching, skipping operation.");
3298 return 0;
3299 }
3300
3301 new_partition_size = new_partition_size_rounded;
3302 intention = CMP(new_partition_size, setup->partition_size);
3303 }
3304 }
3305
3306 if ((UINT64_MAX - setup->partition_offset) < new_partition_size ||
3307 setup->partition_offset + new_partition_size > new_image_size)
3308 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "New partition doesn't fit into backing storage, refusing.");
3309
3310 crypto_offset = sym_crypt_get_data_offset(setup->crypt_device);
3311 if (crypto_offset > UINT64_MAX/512U)
3312 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "LUKS2 data offset out of range, refusing.");
3313 crypto_offset_bytes = (uint64_t) crypto_offset * 512U;
3314 if (setup->partition_size <= crypto_offset_bytes)
3315 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Weird, old crypto payload offset doesn't actually fit in partition size?");
3316
3317 /* Make sure at least the LUKS header fit in */
3318 if (new_partition_size <= crypto_offset_bytes) {
3319 uint64_t add;
3320
3321 add = DISK_SIZE_ROUND_UP(crypto_offset_bytes) - new_partition_size;
3322 new_partition_size += add;
3323 if (S_ISREG(st.st_mode))
3324 new_image_size += add;
3325 }
3326
3327 old_fs_size = setup->partition_size - crypto_offset_bytes;
3328 new_fs_size = DISK_SIZE_ROUND_DOWN(new_partition_size - crypto_offset_bytes);
3329
3330 r = get_smallest_fs_size(setup->root_fd, &smallest_fs_size);
3331 if (r < 0)
3332 return r;
3333
3334 if (new_fs_size < smallest_fs_size) {
3335 uint64_t add;
3336
3337 add = DISK_SIZE_ROUND_UP(smallest_fs_size) - new_fs_size;
3338 new_fs_size += add;
3339 new_partition_size += add;
3340 if (S_ISREG(st.st_mode))
3341 new_image_size += add;
3342 }
3343
3344 if (new_fs_size == old_fs_size) {
3345 log_info("New file system size identical to old file system size, skipping operation.");
3346 return 0;
3347 }
3348
3349 if (FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_GROW) && new_fs_size > old_fs_size) {
3350 log_info("New file system size would be larger than old, but shrinking requested, skipping operation.");
3351 return 0;
3352 }
3353
3354 if (FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SHRINK) && new_fs_size < old_fs_size) {
3355 log_info("New file system size would be smaller than old, but growing requested, skipping operation.");
3356 return 0;
3357 }
3358
3359 if (CMP(new_fs_size, old_fs_size) != intention) {
3360 if (intention < 0)
3361 log_info("Shrink operation would enlarge file system, skipping operation.");
3362 else {
3363 assert(intention > 0);
3364 log_info("Grow operation would shrink file system, skipping operation.");
3365 }
3366 return 0;
3367 }
3368
3369 /* Before we start doing anything, let's figure out if we actually can */
3370 resize_type = can_resize_fs(setup->root_fd, old_fs_size, new_fs_size);
3371 if (resize_type < 0)
3372 return resize_type;
3373 if (resize_type == CAN_RESIZE_OFFLINE && FLAGS_SET(flags, HOME_SETUP_ALREADY_ACTIVATED))
3374 return log_error_errno(SYNTHETIC_ERRNO(ETXTBSY), "File systems of this type can only be resized offline, but is currently online.");
3375
3376 log_info("Ready to resize image size %s %s %s, partition size %s %s %s, file system size %s %s %s.",
3377 FORMAT_BYTES(old_image_size),
3378 special_glyph(SPECIAL_GLYPH_ARROW_RIGHT),
3379 FORMAT_BYTES(new_image_size),
3380 FORMAT_BYTES(setup->partition_size),
3381 special_glyph(SPECIAL_GLYPH_ARROW_RIGHT),
3382 FORMAT_BYTES(new_partition_size),
3383 FORMAT_BYTES(old_fs_size),
3384 special_glyph(SPECIAL_GLYPH_ARROW_RIGHT),
3385 FORMAT_BYTES(new_fs_size));
3386
3387 if (new_fs_size > old_fs_size) { /* → Grow */
3388
3389 if (S_ISREG(st.st_mode)) {
3390 uint64_t resized_image_size;
3391
3392 /* Grow file size */
3393 r = resize_image_loop(h, setup, old_image_size, new_image_size, &resized_image_size);
3394 if (r < 0)
3395 return r;
3396
3397 if (resized_image_size == old_image_size) {
3398 log_info("Couldn't change image size.");
3399 return 0;
3400 }
3401
3402 assert(resized_image_size > old_image_size);
3403
3404 log_info("Growing of image file from %s to %s completed.", FORMAT_BYTES(old_image_size), FORMAT_BYTES(resized_image_size));
3405
3406 if (resized_image_size < new_image_size) {
3407 uint64_t sub;
3408
3409 /* If the growing we managed to do is smaller than what we wanted we need to
3410 * adjust the partition/file system sizes we are going for, too */
3411 sub = new_image_size - resized_image_size;
3412 assert(new_partition_size >= sub);
3413 new_partition_size -= sub;
3414 assert(new_fs_size >= sub);
3415 new_fs_size -= sub;
3416 }
3417
3418 new_image_size = resized_image_size;
3419 } else {
3420 assert(S_ISBLK(st.st_mode));
3421 assert(new_image_size == old_image_size);
3422 }
3423
3424 /* Make sure loopback device sees the new bigger size */
3425 r = loop_device_refresh_size(setup->loop, UINT64_MAX, new_partition_size);
3426 if (r == -ENOTTY)
3427 log_debug_errno(r, "Device is not a loopback device, not refreshing size.");
3428 else if (r < 0)
3429 return log_error_errno(r, "Failed to refresh loopback device size: %m");
3430 else
3431 log_info("Refreshing loop device size completed.");
3432
3433 r = apply_resize_partition(image_fd, disk_uuid, table, partition, new_partition_size);
3434 if (r < 0)
3435 return r;
3436 if (r > 0)
3437 log_info("Growing of partition completed.");
3438
3439 if (S_ISBLK(st.st_mode) && ioctl(image_fd, BLKRRPART, 0) < 0)
3440 log_debug_errno(errno, "BLKRRPART failed on block device, ignoring: %m");
3441
3442 /* Tell LUKS about the new bigger size too */
3443 r = sym_crypt_resize(setup->crypt_device, setup->dm_name, new_fs_size / 512U);
3444 if (r < 0)
3445 return log_error_errno(r, "Failed to grow LUKS device: %m");
3446
3447 log_info("LUKS device growing completed.");
3448 } else {
3449 /* → Shrink */
3450
3451 if (!FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES)) {
3452 r = home_store_embedded_identity(new_home, setup->root_fd, embedded_home);
3453 if (r < 0)
3454 return r;
3455
3456 r = home_reconcile_blob_dirs(new_home, setup->root_fd, reconciled);
3457 if (r < 0)
3458 return r;
3459 }
3460
3461 if (S_ISREG(st.st_mode)) {
3462 if (user_record_luks_discard(h))
3463 /* Before we shrink, let's trim the file system, so that we need less space on disk during the shrinking */
3464 (void) run_fitrim(setup->root_fd);
3465 else {
3466 /* If discard is off, let's ensure all backing blocks are allocated, so that our resize operation doesn't fail half-way */
3467 r = run_fallocate(image_fd, &st);
3468 if (r < 0)
3469 return r;
3470 }
3471 }
3472 }
3473
3474 /* Now try to resize the file system. The requested size might not always be possible, in which case
3475 * we'll try to get as close as we can get. The result is returned in 'resized_fs_size' */
3476 r = resize_fs_loop(h, setup, resize_type, old_fs_size, new_fs_size, &resized_fs_size);
3477 if (r < 0)
3478 return r;
3479
3480 if (resized_fs_size == old_fs_size) {
3481 log_info("Couldn't change file system size.");
3482 return 0;
3483 }
3484
3485 log_info("File system resizing from %s to %s completed.", FORMAT_BYTES(old_fs_size), FORMAT_BYTES(resized_fs_size));
3486
3487 if (resized_fs_size > new_fs_size) {
3488 uint64_t add;
3489
3490 /* If the shrinking we managed to do is larger than what we wanted we need to adjust the partition/image sizes. */
3491 add = resized_fs_size - new_fs_size;
3492 new_partition_size += add;
3493 if (S_ISREG(st.st_mode))
3494 new_image_size += add;
3495 }
3496
3497 new_fs_size = resized_fs_size;
3498
3499 /* Immediately sync afterwards */
3500 r = home_sync_and_statfs(setup->root_fd, NULL);
3501 if (r < 0)
3502 return r;
3503
3504 if (new_fs_size < old_fs_size) { /* → Shrink */
3505
3506 /* Shrink the LUKS device now, matching the new file system size */
3507 r = sym_crypt_resize(setup->crypt_device, setup->dm_name, new_fs_size / 512);
3508 if (r < 0)
3509 return log_error_errno(r, "Failed to shrink LUKS device: %m");
3510
3511 log_info("LUKS device shrinking completed.");
3512
3513 /* Refresh the loop devices size */
3514 r = loop_device_refresh_size(setup->loop, UINT64_MAX, new_partition_size);
3515 if (r == -ENOTTY)
3516 log_debug_errno(r, "Device is not a loopback device, not refreshing size.");
3517 else if (r < 0)
3518 return log_error_errno(r, "Failed to refresh loopback device size: %m");
3519 else
3520 log_info("Refreshing loop device size completed.");
3521
3522 if (S_ISREG(st.st_mode)) {
3523 /* Shrink the image file */
3524 if (ftruncate(image_fd, new_image_size) < 0)
3525 return log_error_errno(errno, "Failed to shrink image file %s: %m", ip);
3526
3527 log_info("Shrinking of image file completed.");
3528 } else {
3529 assert(S_ISBLK(st.st_mode));
3530 assert(new_image_size == old_image_size);
3531 }
3532
3533 r = apply_resize_partition(image_fd, disk_uuid, table, partition, new_partition_size);
3534 if (r < 0)
3535 return r;
3536 if (r > 0)
3537 log_info("Shrinking of partition completed.");
3538
3539 if (S_ISBLK(st.st_mode) && ioctl(image_fd, BLKRRPART, 0) < 0)
3540 log_debug_errno(errno, "BLKRRPART failed on block device, ignoring: %m");
3541
3542 } else { /* → Grow */
3543 if (!FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES)) {
3544 r = home_store_embedded_identity(new_home, setup->root_fd, embedded_home);
3545 if (r < 0)
3546 return r;
3547
3548 r = home_reconcile_blob_dirs(new_home, setup->root_fd, reconciled);
3549 if (r < 0)
3550 return r;
3551 }
3552 }
3553
3554 if (!FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES)) {
3555 r = home_store_header_identity_luks(new_home, setup, header_home);
3556 if (r < 0)
3557 return r;
3558
3559 r = home_extend_embedded_identity(new_home, h, setup);
3560 if (r < 0)
3561 return r;
3562 }
3563
3564 if (user_record_luks_discard(h))
3565 (void) run_fitrim(setup->root_fd);
3566
3567 r = home_sync_and_statfs(setup->root_fd, &sfs);
3568 if (r < 0)
3569 return r;
3570
3571 if (!FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_UNDO)) {
3572 r = home_setup_done(setup);
3573 if (r < 0)
3574 return r;
3575 }
3576
3577 log_info("Resizing completed.");
3578
3579 print_size_summary(new_image_size, new_fs_size, &sfs);
3580
3581 if (ret_home)
3582 *ret_home = TAKE_PTR(new_home);
3583
3584 return 0;
3585 }
3586
3587 int home_passwd_luks(
3588 UserRecord *h,
3589 HomeSetupFlags flags,
3590 HomeSetup *setup,
3591 const PasswordCache *cache, /* the passwords acquired via PKCS#11/FIDO2 security tokens */
3592 char **effective_passwords /* new passwords */) {
3593
3594 size_t volume_key_size, max_key_slots, n_effective;
3595 _cleanup_(erase_and_freep) void *volume_key = NULL;
3596 struct crypt_pbkdf_type good_pbkdf, minimal_pbkdf;
3597 const char *type;
3598 int r;
3599
3600 assert(h);
3601 assert(user_record_storage(h) == USER_LUKS);
3602 assert(setup);
3603
3604 r = dlopen_cryptsetup();
3605 if (r < 0)
3606 return r;
3607
3608 type = sym_crypt_get_type(setup->crypt_device);
3609 if (!type)
3610 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine crypto device type.");
3611
3612 r = sym_crypt_keyslot_max(type);
3613 if (r <= 0)
3614 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine number of key slots.");
3615 max_key_slots = r;
3616
3617 r = sym_crypt_get_volume_key_size(setup->crypt_device);
3618 if (r <= 0)
3619 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine volume key size.");
3620 volume_key_size = (size_t) r;
3621
3622 volume_key = malloc(volume_key_size);
3623 if (!volume_key)
3624 return log_oom();
3625
3626 r = -ENOKEY;
3627 char **list;
3628 FOREACH_ARGUMENT(list,
3629 cache ? cache->keyring_passswords : NULL,
3630 cache ? cache->pkcs11_passwords : NULL,
3631 cache ? cache->fido2_passwords : NULL,
3632 h->password) {
3633
3634 r = luks_try_passwords(h, setup->crypt_device, list, volume_key, &volume_key_size, NULL);
3635 if (r != -ENOKEY)
3636 break;
3637 }
3638 if (r == -ENOKEY)
3639 return log_error_errno(SYNTHETIC_ERRNO(ENOKEY), "Failed to unlock LUKS superblock with supplied passwords.");
3640 if (r < 0)
3641 return log_error_errno(r, "Failed to unlock LUKS superblock: %m");
3642
3643 n_effective = strv_length(effective_passwords);
3644
3645 build_good_pbkdf(&good_pbkdf, h);
3646 build_minimal_pbkdf(&minimal_pbkdf, h);
3647
3648 for (size_t i = 0; i < max_key_slots; i++) {
3649 r = sym_crypt_keyslot_destroy(setup->crypt_device, i);
3650 if (r < 0 && !IN_SET(r, -ENOENT, -EINVAL)) /* Returns EINVAL or ENOENT if there's no key in this slot already */
3651 return log_error_errno(r, "Failed to destroy LUKS password: %m");
3652
3653 if (i >= n_effective) {
3654 if (r >= 0)
3655 log_info("Destroyed LUKS key slot %zu.", i);
3656 continue;
3657 }
3658
3659 if (password_cache_contains(cache, effective_passwords[i])) { /* Is this a FIDO2 or PKCS#11 password? */
3660 log_debug("Using minimal PBKDF for slot %zu", i);
3661 r = sym_crypt_set_pbkdf_type(setup->crypt_device, &minimal_pbkdf);
3662 } else {
3663 log_debug("Using good PBKDF for slot %zu", i);
3664 r = sym_crypt_set_pbkdf_type(setup->crypt_device, &good_pbkdf);
3665 }
3666 if (r < 0)
3667 return log_error_errno(r, "Failed to tweak PBKDF for slot %zu: %m", i);
3668
3669 r = sym_crypt_keyslot_add_by_volume_key(
3670 setup->crypt_device,
3671 i,
3672 volume_key,
3673 volume_key_size,
3674 effective_passwords[i],
3675 strlen(effective_passwords[i]));
3676 if (r < 0)
3677 return log_error_errno(r, "Failed to set up LUKS password: %m");
3678
3679 log_info("Updated LUKS key slot %zu.", i);
3680
3681 /* If we changed the password, then make sure to update the copy in the keyring, so that
3682 * auto-rebalance continues to work. We only do this if we operate on an active home dir. */
3683 if (i == 0 && FLAGS_SET(flags, HOME_SETUP_ALREADY_ACTIVATED))
3684 upload_to_keyring(h, effective_passwords[i], NULL);
3685 }
3686
3687 return 1;
3688 }
3689
3690 int home_lock_luks(UserRecord *h, HomeSetup *setup) {
3691 const char *p;
3692 int r;
3693
3694 assert(h);
3695 assert(setup);
3696 assert(setup->root_fd < 0);
3697 assert(!setup->crypt_device);
3698
3699 r = acquire_open_luks_device(h, setup, /* graceful= */ false);
3700 if (r < 0)
3701 return r;
3702
3703 log_info("Discovered used LUKS device %s.", setup->dm_node);
3704
3705 assert_se(p = user_record_home_directory(h));
3706 r = syncfs_path(AT_FDCWD, p);
3707 if (r < 0) /* Snake oil, but let's better be safe than sorry */
3708 return log_error_errno(r, "Failed to synchronize file system %s: %m", p);
3709
3710 log_info("File system synchronized.");
3711
3712 /* Note that we don't invoke FIFREEZE here, it appears libcryptsetup/device-mapper already does that on its own for us */
3713
3714 r = sym_crypt_suspend(setup->crypt_device, setup->dm_name);
3715 if (r < 0)
3716 return log_error_errno(r, "Failed to suspend cryptsetup device: %s: %m", setup->dm_node);
3717
3718 log_info("LUKS device suspended.");
3719 return 0;
3720 }
3721
3722 static int luks_try_resume(
3723 struct crypt_device *cd,
3724 const char *dm_name,
3725 char **password) {
3726
3727 int r;
3728
3729 assert(cd);
3730 assert(dm_name);
3731
3732 STRV_FOREACH(pp, password) {
3733 r = sym_crypt_resume_by_passphrase(
3734 cd,
3735 dm_name,
3736 CRYPT_ANY_SLOT,
3737 *pp,
3738 strlen(*pp));
3739 if (r >= 0) {
3740 log_info("Resumed LUKS device %s.", dm_name);
3741 return 0;
3742 }
3743
3744 log_debug_errno(r, "Password %zu didn't work for resuming device: %m", (size_t) (pp - password));
3745 }
3746
3747 return -ENOKEY;
3748 }
3749
3750 int home_unlock_luks(UserRecord *h, HomeSetup *setup, const PasswordCache *cache) {
3751 int r;
3752
3753 assert(h);
3754 assert(setup);
3755 assert(!setup->crypt_device);
3756
3757 r = acquire_open_luks_device(h, setup, /* graceful= */ false);
3758 if (r < 0)
3759 return r;
3760
3761 log_info("Discovered used LUKS device %s.", setup->dm_node);
3762
3763 r = -ENOKEY;
3764 char **list;
3765 FOREACH_ARGUMENT(list,
3766 cache ? cache->pkcs11_passwords : NULL,
3767 cache ? cache->fido2_passwords : NULL,
3768 h->password) {
3769
3770 r = luks_try_resume(setup->crypt_device, setup->dm_name, list);
3771 if (r != -ENOKEY)
3772 break;
3773 }
3774 if (r == -ENOKEY)
3775 return log_error_errno(r, "No valid password for LUKS superblock.");
3776 if (r < 0)
3777 return log_error_errno(r, "Failed to resume LUKS superblock: %m");
3778
3779 log_info("LUKS device resumed.");
3780 return 0;
3781 }
3782
3783 static int device_is_gone(HomeSetup *setup) {
3784 _cleanup_(sd_device_unrefp) sd_device *d = NULL;
3785 struct stat st;
3786 int r;
3787
3788 assert(setup);
3789
3790 if (!setup->dm_node)
3791 return true;
3792
3793 if (stat(setup->dm_node, &st) < 0) {
3794 if (errno != ENOENT)
3795 return log_error_errno(errno, "Failed to stat block device node %s: %m", setup->dm_node);
3796
3797 return true;
3798 }
3799
3800 r = sd_device_new_from_stat_rdev(&d, &st);
3801 if (r < 0) {
3802 if (r != -ENODEV)
3803 return log_error_errno(errno, "Failed to allocate device object from block device node %s: %m", setup->dm_node);
3804
3805 return true;
3806 }
3807
3808 return false;
3809 }
3810
3811 static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) {
3812 HomeSetup *setup = ASSERT_PTR(userdata);
3813 int r;
3814
3815 if (!device_for_action(device, SD_DEVICE_REMOVE))
3816 return 0;
3817
3818 /* We don't really care for the device object passed to us, we just check if the device node still
3819 * exists */
3820
3821 r = device_is_gone(setup);
3822 if (r < 0)
3823 return r;
3824 if (r > 0) /* Yay! we are done! */
3825 (void) sd_event_exit(sd_device_monitor_get_event(monitor), 0);
3826
3827 return 0;
3828 }
3829
3830 int wait_for_block_device_gone(HomeSetup *setup, usec_t timeout_usec) {
3831 _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *m = NULL;
3832 _cleanup_(sd_event_unrefp) sd_event *event = NULL;
3833 int r;
3834
3835 assert(setup);
3836
3837 /* So here's the thing: we enable "deferred deactivation" on our dm-crypt volumes. This means they
3838 * are automatically torn down once not used anymore (i.e. once unmounted). Which is great. It also
3839 * means that when we deactivate a home directory and try to tear down the volume that backs it, it
3840 * possibly is already torn down or in the process of being torn down, since we race against the
3841 * automatic tearing down. Which is fine, we handle errors from that. However, we lose the ability to
3842 * naturally wait for the tear down operation to complete: if we are not the ones who tear down the
3843 * device we are also not the ones who naturally block on that operation. Hence let's add some code
3844 * to actively wait for the device to go away, via sd-device. We'll call this whenever tearing down a
3845 * LUKS device, to ensure the device is really really gone before we proceed. Net effect: "homectl
3846 * deactivate foo && homectl activate foo" will work reliably, i.e. deactivation immediately followed
3847 * by activation will work. Also, by the time deactivation completes we can guarantee that all data
3848 * is sync'ed down to the lowest block layer as all higher levels are fully and entirely
3849 * destructed. */
3850
3851 if (!setup->dm_name)
3852 return 0;
3853
3854 assert(setup->dm_node);
3855 log_debug("Waiting until %s disappears.", setup->dm_node);
3856
3857 r = sd_event_new(&event);
3858 if (r < 0)
3859 return log_error_errno(r, "Failed to allocate event loop: %m");
3860
3861 r = sd_device_monitor_new(&m);
3862 if (r < 0)
3863 return log_error_errno(r, "Failed to allocate device monitor: %m");
3864
3865 r = sd_device_monitor_filter_add_match_subsystem_devtype(m, "block", "disk");
3866 if (r < 0)
3867 return log_error_errno(r, "Failed to configure device monitor match: %m");
3868
3869 r = sd_device_monitor_attach_event(m, event);
3870 if (r < 0)
3871 return log_error_errno(r, "Failed to attach device monitor to event loop: %m");
3872
3873 r = sd_device_monitor_start(m, device_monitor_handler, setup);
3874 if (r < 0)
3875 return log_error_errno(r, "Failed to start device monitor: %m");
3876
3877 r = device_is_gone(setup);
3878 if (r < 0)
3879 return r;
3880 if (r > 0) {
3881 log_debug("%s has already disappeared before entering wait loop.", setup->dm_node);
3882 return 0; /* gone already */
3883 }
3884
3885 if (timeout_usec != USEC_INFINITY) {
3886 r = sd_event_add_time_relative(event, NULL, CLOCK_MONOTONIC, timeout_usec, 0, NULL, NULL);
3887 if (r < 0)
3888 return log_error_errno(r, "Failed to add timer event: %m");
3889 }
3890
3891 r = sd_event_loop(event);
3892 if (r < 0)
3893 return log_error_errno(r, "Failed to run event loop: %m");
3894
3895 r = device_is_gone(setup);
3896 if (r < 0)
3897 return r;
3898 if (r == 0)
3899 return log_error_errno(r, "Device %s still around.", setup->dm_node);
3900
3901 log_debug("Successfully waited until device %s disappeared.", setup->dm_node);
3902 return 0;
3903 }
3904
3905 int home_auto_shrink_luks(UserRecord *h, HomeSetup *setup, PasswordCache *cache) {
3906 struct statfs sfs;
3907 int r;
3908
3909 assert(h);
3910 assert(user_record_storage(h) == USER_LUKS);
3911 assert(setup);
3912 assert(setup->root_fd >= 0);
3913
3914 if (user_record_auto_resize_mode(h) != AUTO_RESIZE_SHRINK_AND_GROW)
3915 return 0;
3916
3917 if (fstatfs(setup->root_fd, &sfs) < 0)
3918 return log_error_errno(errno, "Failed to statfs home directory: %m");
3919
3920 if (!fs_can_online_shrink_and_grow(sfs.f_type)) {
3921 log_debug("Not auto-shrinking file system, since selected file system cannot do both online shrink and grow.");
3922 return 0;
3923 }
3924
3925 r = home_resize_luks(
3926 h,
3927 HOME_SETUP_ALREADY_ACTIVATED|
3928 HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES|
3929 HOME_SETUP_RESIZE_MINIMIZE|
3930 HOME_SETUP_RESIZE_DONT_GROW|
3931 HOME_SETUP_RESIZE_DONT_UNDO,
3932 setup,
3933 cache,
3934 NULL);
3935 if (r < 0)
3936 return r;
3937
3938 return 1;
3939 }