]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/home/homework-luks.c
tree-wide: Use fdisk_new_context_at() more
[thirdparty/systemd.git] / src / home / homework-luks.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <linux/loop.h>
4 #include <poll.h>
5 #include <sys/file.h>
6 #include <sys/ioctl.h>
7 #include <sys/xattr.h>
8
9 #if HAVE_VALGRIND_MEMCHECK_H
10 #include <valgrind/memcheck.h>
11 #endif
12
13 #include "sd-daemon.h"
14 #include "sd-device.h"
15 #include "sd-event.h"
16 #include "sd-id128.h"
17
18 #include "blkid-util.h"
19 #include "blockdev-util.h"
20 #include "btrfs-util.h"
21 #include "chattr-util.h"
22 #include "device-util.h"
23 #include "devnum-util.h"
24 #include "dm-util.h"
25 #include "env-util.h"
26 #include "errno-util.h"
27 #include "fd-util.h"
28 #include "fdisk-util.h"
29 #include "fileio.h"
30 #include "filesystems.h"
31 #include "fs-util.h"
32 #include "fsck-util.h"
33 #include "glyph-util.h"
34 #include "gpt.h"
35 #include "home-util.h"
36 #include "homework-luks.h"
37 #include "homework-mount.h"
38 #include "io-util.h"
39 #include "keyring-util.h"
40 #include "memory-util.h"
41 #include "missing_magic.h"
42 #include "mkdir.h"
43 #include "mkfs-util.h"
44 #include "mount-util.h"
45 #include "openssl-util.h"
46 #include "parse-util.h"
47 #include "path-util.h"
48 #include "process-util.h"
49 #include "random-util.h"
50 #include "resize-fs.h"
51 #include "strv.h"
52 #include "sync-util.h"
53 #include "tmpfile-util.h"
54 #include "udev-util.h"
55 #include "user-util.h"
56
57 /* Round down to the nearest 4K size. Given that newer hardware generally prefers 4K sectors, let's align our
58 * partitions to that too. In the worst case we'll waste 3.5K per partition that way, but I think I can live
59 * with that. */
60 #define DISK_SIZE_ROUND_DOWN(x) ((x) & ~UINT64_C(4095))
61
62 /* Rounds up to the nearest 4K boundary. Returns UINT64_MAX on overflow */
63 #define DISK_SIZE_ROUND_UP(x) \
64 ({ \
65 uint64_t _x = (x); \
66 _x > UINT64_MAX - 4095U ? UINT64_MAX : (_x + 4095U) & ~UINT64_C(4095); \
67 })
68
69 /* How much larger will the image on disk be than the fs inside it, i.e. the space we pay for the GPT and
70 * LUKS2 envelope. (As measured on cryptsetup 2.4.1) */
71 #define GPT_LUKS2_OVERHEAD UINT64_C(18874368)
72
73 static int resize_image_loop(UserRecord *h, HomeSetup *setup, uint64_t old_image_size, uint64_t new_image_size, uint64_t *ret_image_size);
74
75 int run_mark_dirty(int fd, bool b) {
76 char x = '1';
77 int r, ret;
78
79 /* Sets or removes the 'user.home-dirty' xattr on the specified file. We use this to detect when a
80 * home directory was not properly unmounted. */
81
82 assert(fd >= 0);
83
84 r = fd_verify_regular(fd);
85 if (r < 0)
86 return r;
87
88 if (b) {
89 ret = fsetxattr(fd, "user.home-dirty", &x, 1, XATTR_CREATE);
90 if (ret < 0 && errno != EEXIST)
91 return log_debug_errno(errno, "Could not mark home directory as dirty: %m");
92
93 } else {
94 r = fsync_full(fd);
95 if (r < 0)
96 return log_debug_errno(r, "Failed to synchronize image before marking it clean: %m");
97
98 ret = fremovexattr(fd, "user.home-dirty");
99 if (ret < 0 && !ERRNO_IS_XATTR_ABSENT(errno))
100 return log_debug_errno(errno, "Could not mark home directory as clean: %m");
101 }
102
103 r = fsync_full(fd);
104 if (r < 0)
105 return log_debug_errno(r, "Failed to synchronize dirty flag to disk: %m");
106
107 return ret >= 0;
108 }
109
110 int run_mark_dirty_by_path(const char *path, bool b) {
111 _cleanup_close_ int fd = -EBADF;
112
113 assert(path);
114
115 fd = open(path, O_RDWR|O_CLOEXEC|O_NOCTTY);
116 if (fd < 0)
117 return log_debug_errno(errno, "Failed to open %s to mark dirty or clean: %m", path);
118
119 return run_mark_dirty(fd, b);
120 }
121
122 static int probe_file_system_by_fd(
123 int fd,
124 char **ret_fstype,
125 sd_id128_t *ret_uuid) {
126
127 _cleanup_(blkid_free_probep) blkid_probe b = NULL;
128 _cleanup_free_ char *s = NULL;
129 const char *fstype = NULL, *uuid = NULL;
130 sd_id128_t id;
131 int r;
132
133 assert(fd >= 0);
134 assert(ret_fstype);
135 assert(ret_uuid);
136
137 b = blkid_new_probe();
138 if (!b)
139 return -ENOMEM;
140
141 errno = 0;
142 r = blkid_probe_set_device(b, fd, 0, 0);
143 if (r != 0)
144 return errno_or_else(ENOMEM);
145
146 (void) blkid_probe_enable_superblocks(b, 1);
147 (void) blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_UUID);
148
149 errno = 0;
150 r = blkid_do_safeprobe(b);
151 if (r == _BLKID_SAFEPROBE_ERROR)
152 return errno_or_else(EIO);
153 if (IN_SET(r, _BLKID_SAFEPROBE_AMBIGUOUS, _BLKID_SAFEPROBE_NOT_FOUND))
154 return -ENOPKG;
155
156 assert(r == _BLKID_SAFEPROBE_FOUND);
157
158 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
159 if (!fstype)
160 return -ENOPKG;
161
162 (void) blkid_probe_lookup_value(b, "UUID", &uuid, NULL);
163 if (!uuid)
164 return -ENOPKG;
165
166 r = sd_id128_from_string(uuid, &id);
167 if (r < 0)
168 return r;
169
170 s = strdup(fstype);
171 if (!s)
172 return -ENOMEM;
173
174 *ret_fstype = TAKE_PTR(s);
175 *ret_uuid = id;
176
177 return 0;
178 }
179
180 static int probe_file_system_by_path(const char *path, char **ret_fstype, sd_id128_t *ret_uuid) {
181 _cleanup_close_ int fd = -EBADF;
182
183 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
184 if (fd < 0)
185 return negative_errno();
186
187 return probe_file_system_by_fd(fd, ret_fstype, ret_uuid);
188 }
189
190 static int block_get_size_by_fd(int fd, uint64_t *ret) {
191 struct stat st;
192
193 assert(fd >= 0);
194 assert(ret);
195
196 if (fstat(fd, &st) < 0)
197 return -errno;
198
199 if (!S_ISBLK(st.st_mode))
200 return -ENOTBLK;
201
202 return RET_NERRNO(ioctl(fd, BLKGETSIZE64, ret));
203 }
204
205 static int block_get_size_by_path(const char *path, uint64_t *ret) {
206 _cleanup_close_ int fd = -EBADF;
207
208 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
209 if (fd < 0)
210 return -errno;
211
212 return block_get_size_by_fd(fd, ret);
213 }
214
215 static int run_fsck(const char *node, const char *fstype) {
216 int r, exit_status;
217 pid_t fsck_pid;
218
219 assert(node);
220 assert(fstype);
221
222 r = fsck_exists_for_fstype(fstype);
223 if (r < 0)
224 return log_error_errno(r, "Failed to check if fsck for file system %s exists: %m", fstype);
225 if (r == 0) {
226 log_warning("No fsck for file system %s installed, ignoring.", fstype);
227 return 0;
228 }
229
230 r = safe_fork("(fsck)",
231 FORK_RESET_SIGNALS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_LOG|FORK_STDOUT_TO_STDERR|FORK_CLOSE_ALL_FDS,
232 &fsck_pid);
233 if (r < 0)
234 return r;
235 if (r == 0) {
236 /* Child */
237 execlp("fsck", "fsck", "-aTl", node, NULL);
238 log_open();
239 log_error_errno(errno, "Failed to execute fsck: %m");
240 _exit(FSCK_OPERATIONAL_ERROR);
241 }
242
243 exit_status = wait_for_terminate_and_check("fsck", fsck_pid, WAIT_LOG_ABNORMAL);
244 if (exit_status < 0)
245 return exit_status;
246 if ((exit_status & ~FSCK_ERROR_CORRECTED) != 0) {
247 log_warning("fsck failed with exit status %i.", exit_status);
248
249 if ((exit_status & (FSCK_SYSTEM_SHOULD_REBOOT|FSCK_ERRORS_LEFT_UNCORRECTED)) != 0)
250 return log_error_errno(SYNTHETIC_ERRNO(EIO), "File system is corrupted, refusing.");
251
252 log_warning("Ignoring fsck error.");
253 }
254
255 log_info("File system check completed.");
256
257 return 1;
258 }
259
260 DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(key_serial_t, keyring_unlink, -1);
261
262 static int upload_to_keyring(
263 UserRecord *h,
264 const char *password,
265 key_serial_t *ret_key_serial) {
266
267 _cleanup_free_ char *name = NULL;
268 key_serial_t serial;
269
270 assert(h);
271 assert(password);
272
273 /* If auto-shrink-on-logout is turned on, we need to keep the key we used to unlock the LUKS volume
274 * around, since we'll need it when automatically resizing (since we can't ask the user there
275 * again). We do this by uploading it into the kernel keyring, specifically the "session" one. This
276 * is done under the assumption systemd-homed gets its private per-session keyring (i.e. default
277 * service behaviour, given that KeyringMode=private is the default). It will survive between our
278 * systemd-homework invocations that way.
279 *
280 * If auto-shrink-on-logout is disabled we'll skip this step, to be frugal with sensitive data. */
281
282 if (user_record_auto_resize_mode(h) != AUTO_RESIZE_SHRINK_AND_GROW) { /* Won't need it */
283 if (ret_key_serial)
284 *ret_key_serial = -1;
285 return 0;
286 }
287
288 name = strjoin("homework-user-", h->user_name);
289 if (!name)
290 return -ENOMEM;
291
292 serial = add_key("user", name, password, strlen(password), KEY_SPEC_SESSION_KEYRING);
293 if (serial == -1)
294 return -errno;
295
296 if (ret_key_serial)
297 *ret_key_serial = serial;
298
299 return 1;
300 }
301
302 static int luks_try_passwords(
303 UserRecord *h,
304 struct crypt_device *cd,
305 char **passwords,
306 void *volume_key,
307 size_t *volume_key_size,
308 key_serial_t *ret_key_serial) {
309
310 int r;
311
312 assert(h);
313 assert(cd);
314
315 STRV_FOREACH(pp, passwords) {
316 size_t vks = *volume_key_size;
317
318 r = sym_crypt_volume_key_get(
319 cd,
320 CRYPT_ANY_SLOT,
321 volume_key,
322 &vks,
323 *pp,
324 strlen(*pp));
325 if (r >= 0) {
326 if (ret_key_serial) {
327 /* If ret_key_serial is non-NULL, let's try to upload the password that
328 * worked, and return its serial. */
329 r = upload_to_keyring(h, *pp, ret_key_serial);
330 if (r < 0) {
331 log_debug_errno(r, "Failed to upload LUKS password to kernel keyring, ignoring: %m");
332 *ret_key_serial = -1;
333 }
334 }
335
336 *volume_key_size = vks;
337 return 0;
338 }
339
340 log_debug_errno(r, "Password %zu didn't work for unlocking LUKS superblock: %m", (size_t) (pp - passwords));
341 }
342
343 return -ENOKEY;
344 }
345
346 static int luks_setup(
347 UserRecord *h,
348 const char *node,
349 const char *dm_name,
350 sd_id128_t uuid,
351 const char *cipher,
352 const char *cipher_mode,
353 uint64_t volume_key_size,
354 char **passwords,
355 const PasswordCache *cache,
356 bool discard,
357 struct crypt_device **ret,
358 sd_id128_t *ret_found_uuid,
359 void **ret_volume_key,
360 size_t *ret_volume_key_size,
361 key_serial_t *ret_key_serial) {
362
363 _cleanup_(keyring_unlinkp) key_serial_t key_serial = -1;
364 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
365 _cleanup_(erase_and_freep) void *vk = NULL;
366 sd_id128_t p;
367 size_t vks;
368 char **list;
369 int r;
370
371 assert(h);
372 assert(node);
373 assert(dm_name);
374 assert(ret);
375
376 r = sym_crypt_init(&cd, node);
377 if (r < 0)
378 return log_error_errno(r, "Failed to allocate libcryptsetup context: %m");
379
380 cryptsetup_enable_logging(cd);
381
382 r = sym_crypt_load(cd, CRYPT_LUKS2, NULL);
383 if (r < 0)
384 return log_error_errno(r, "Failed to load LUKS superblock: %m");
385
386 r = sym_crypt_get_volume_key_size(cd);
387 if (r <= 0)
388 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine LUKS volume key size");
389 vks = (size_t) r;
390
391 if (!sd_id128_is_null(uuid) || ret_found_uuid) {
392 const char *s;
393
394 s = sym_crypt_get_uuid(cd);
395 if (!s)
396 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has no UUID.");
397
398 r = sd_id128_from_string(s, &p);
399 if (r < 0)
400 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has invalid UUID.");
401
402 /* Check that the UUID matches, if specified */
403 if (!sd_id128_is_null(uuid) &&
404 !sd_id128_equal(uuid, p))
405 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has wrong UUID.");
406 }
407
408 if (cipher && !streq_ptr(cipher, sym_crypt_get_cipher(cd)))
409 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock declares wrong cipher.");
410
411 if (cipher_mode && !streq_ptr(cipher_mode, sym_crypt_get_cipher_mode(cd)))
412 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock declares wrong cipher mode.");
413
414 if (volume_key_size != UINT64_MAX && vks != volume_key_size)
415 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock declares wrong volume key size.");
416
417 vk = malloc(vks);
418 if (!vk)
419 return log_oom();
420
421 r = -ENOKEY;
422 FOREACH_POINTER(list,
423 cache ? cache->keyring_passswords : NULL,
424 cache ? cache->pkcs11_passwords : NULL,
425 cache ? cache->fido2_passwords : NULL,
426 passwords) {
427 r = luks_try_passwords(h, cd, list, vk, &vks, ret_key_serial ? &key_serial : NULL);
428 if (r != -ENOKEY)
429 break;
430 }
431 if (r == -ENOKEY)
432 return log_error_errno(r, "No valid password for LUKS superblock.");
433 if (r < 0)
434 return log_error_errno(r, "Failed to unlock LUKS superblock: %m");
435
436 r = sym_crypt_activate_by_volume_key(
437 cd,
438 dm_name,
439 vk, vks,
440 discard ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0);
441 if (r < 0)
442 return log_error_errno(r, "Failed to unlock LUKS superblock: %m");
443
444 log_info("Setting up LUKS device /dev/mapper/%s completed.", dm_name);
445
446 *ret = TAKE_PTR(cd);
447
448 if (ret_found_uuid) /* Return the UUID actually found if the caller wants to know */
449 *ret_found_uuid = p;
450 if (ret_volume_key)
451 *ret_volume_key = TAKE_PTR(vk);
452 if (ret_volume_key_size)
453 *ret_volume_key_size = vks;
454 if (ret_key_serial)
455 *ret_key_serial = TAKE_KEY_SERIAL(key_serial);
456
457 return 0;
458 }
459
460 static int make_dm_names(UserRecord *h, HomeSetup *setup) {
461 assert(h);
462 assert(h->user_name);
463 assert(setup);
464
465 if (!setup->dm_name) {
466 setup->dm_name = strjoin("home-", h->user_name);
467 if (!setup->dm_name)
468 return log_oom();
469 }
470
471 if (!setup->dm_node) {
472 setup->dm_node = path_join("/dev/mapper/", setup->dm_name);
473 if (!setup->dm_node)
474 return log_oom();
475 }
476
477 return 0;
478 }
479
480 static int acquire_open_luks_device(
481 UserRecord *h,
482 HomeSetup *setup,
483 bool graceful) {
484
485 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
486 int r;
487
488 assert(h);
489 assert(setup);
490 assert(!setup->crypt_device);
491
492 r = dlopen_cryptsetup();
493 if (r < 0)
494 return r;
495
496 r = make_dm_names(h, setup);
497 if (r < 0)
498 return r;
499
500 r = sym_crypt_init_by_name(&cd, setup->dm_name);
501 if (r < 0) {
502 if ((ERRNO_IS_DEVICE_ABSENT(r) || r == -EINVAL) && graceful)
503 return 0;
504 return log_error_errno(r, "Failed to initialize cryptsetup context for %s: %m", setup->dm_name);
505 }
506
507 cryptsetup_enable_logging(cd);
508
509 setup->crypt_device = TAKE_PTR(cd);
510 return 1;
511 }
512
513 static int luks_open(
514 UserRecord *h,
515 HomeSetup *setup,
516 const PasswordCache *cache,
517 sd_id128_t *ret_found_uuid,
518 void **ret_volume_key,
519 size_t *ret_volume_key_size) {
520
521 _cleanup_(erase_and_freep) void *vk = NULL;
522 sd_id128_t p;
523 char **list;
524 size_t vks;
525 int r;
526
527 assert(h);
528 assert(setup);
529 assert(!setup->crypt_device);
530
531 /* Opens a LUKS device that is already set up. Re-validates the password while doing so (which also
532 * provides us with the volume key, which we want). */
533
534 r = acquire_open_luks_device(h, setup, /* graceful= */ false);
535 if (r < 0)
536 return r;
537
538 r = sym_crypt_load(setup->crypt_device, CRYPT_LUKS2, NULL);
539 if (r < 0)
540 return log_error_errno(r, "Failed to load LUKS superblock: %m");
541
542 r = sym_crypt_get_volume_key_size(setup->crypt_device);
543 if (r <= 0)
544 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine LUKS volume key size");
545 vks = (size_t) r;
546
547 if (ret_found_uuid) {
548 const char *s;
549
550 s = sym_crypt_get_uuid(setup->crypt_device);
551 if (!s)
552 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has no UUID.");
553
554 r = sd_id128_from_string(s, &p);
555 if (r < 0)
556 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has invalid UUID.");
557 }
558
559 vk = malloc(vks);
560 if (!vk)
561 return log_oom();
562
563 r = -ENOKEY;
564 FOREACH_POINTER(list,
565 cache ? cache->keyring_passswords : NULL,
566 cache ? cache->pkcs11_passwords : NULL,
567 cache ? cache->fido2_passwords : NULL,
568 h->password) {
569 r = luks_try_passwords(h, setup->crypt_device, list, vk, &vks, NULL);
570 if (r != -ENOKEY)
571 break;
572 }
573 if (r == -ENOKEY)
574 return log_error_errno(r, "No valid password for LUKS superblock.");
575 if (r < 0)
576 return log_error_errno(r, "Failed to unlock LUKS superblock: %m");
577
578 log_info("Discovered used LUKS device /dev/mapper/%s, and validated password.", setup->dm_name);
579
580 /* This is needed so that crypt_resize() can operate correctly for pre-existing LUKS devices. We need
581 * to tell libcryptsetup the volume key explicitly, so that it is in the kernel keyring. */
582 r = sym_crypt_activate_by_volume_key(setup->crypt_device, NULL, vk, vks, CRYPT_ACTIVATE_KEYRING_KEY);
583 if (r < 0)
584 return log_error_errno(r, "Failed to upload volume key again: %m");
585
586 log_info("Successfully re-activated LUKS device.");
587
588 if (ret_found_uuid)
589 *ret_found_uuid = p;
590 if (ret_volume_key)
591 *ret_volume_key = TAKE_PTR(vk);
592 if (ret_volume_key_size)
593 *ret_volume_key_size = vks;
594
595 return 0;
596 }
597
598 static int fs_validate(
599 const char *dm_node,
600 sd_id128_t uuid,
601 char **ret_fstype,
602 sd_id128_t *ret_found_uuid) {
603
604 _cleanup_free_ char *fstype = NULL;
605 sd_id128_t u = SD_ID128_NULL; /* avoid false maybe-unitialized warning */
606 int r;
607
608 assert(dm_node);
609 assert(ret_fstype);
610
611 r = probe_file_system_by_path(dm_node, &fstype, &u);
612 if (r < 0)
613 return log_error_errno(r, "Failed to probe file system: %m");
614
615 /* Limit the set of supported file systems a bit, as protection against little tested kernel file
616 * systems. Also, we only support the resize ioctls for these file systems. */
617 if (!supported_fstype(fstype))
618 return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "Image contains unsupported file system: %s", strna(fstype));
619
620 if (!sd_id128_is_null(uuid) &&
621 !sd_id128_equal(uuid, u))
622 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "File system has wrong UUID.");
623
624 log_info("Probing file system completed (found %s).", fstype);
625
626 *ret_fstype = TAKE_PTR(fstype);
627
628 if (ret_found_uuid) /* Return the UUID actually found if the caller wants to know */
629 *ret_found_uuid = u;
630
631 return 0;
632 }
633
634 static int luks_validate(
635 int fd,
636 const char *label,
637 sd_id128_t partition_uuid,
638 sd_id128_t *ret_partition_uuid,
639 uint64_t *ret_offset,
640 uint64_t *ret_size) {
641
642 _cleanup_(blkid_free_probep) blkid_probe b = NULL;
643 sd_id128_t found_partition_uuid = SD_ID128_NULL;
644 const char *fstype = NULL, *pttype = NULL;
645 blkid_loff_t offset = 0, size = 0;
646 blkid_partlist pl;
647 bool found = false;
648 int r, n;
649
650 assert(fd >= 0);
651 assert(label);
652 assert(ret_offset);
653 assert(ret_size);
654
655 b = blkid_new_probe();
656 if (!b)
657 return -ENOMEM;
658
659 errno = 0;
660 r = blkid_probe_set_device(b, fd, 0, 0);
661 if (r != 0)
662 return errno_or_else(ENOMEM);
663
664 (void) blkid_probe_enable_superblocks(b, 1);
665 (void) blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
666 (void) blkid_probe_enable_partitions(b, 1);
667 (void) blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
668
669 errno = 0;
670 r = blkid_do_safeprobe(b);
671 if (r == _BLKID_SAFEPROBE_ERROR)
672 return errno_or_else(EIO);
673 if (IN_SET(r, _BLKID_SAFEPROBE_AMBIGUOUS, _BLKID_SAFEPROBE_NOT_FOUND))
674 return -ENOPKG;
675
676 assert(r == _BLKID_SAFEPROBE_FOUND);
677
678 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
679 if (streq_ptr(fstype, "crypto_LUKS")) {
680 /* Directly a LUKS image */
681 *ret_offset = 0;
682 *ret_size = UINT64_MAX; /* full disk */
683 *ret_partition_uuid = SD_ID128_NULL;
684 return 0;
685 } else if (fstype)
686 return -ENOPKG;
687
688 (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
689 if (!streq_ptr(pttype, "gpt"))
690 return -ENOPKG;
691
692 errno = 0;
693 pl = blkid_probe_get_partitions(b);
694 if (!pl)
695 return errno_or_else(ENOMEM);
696
697 errno = 0;
698 n = blkid_partlist_numof_partitions(pl);
699 if (n < 0)
700 return errno_or_else(EIO);
701
702 for (int i = 0; i < n; i++) {
703 sd_id128_t id = SD_ID128_NULL;
704 blkid_partition pp;
705
706 errno = 0;
707 pp = blkid_partlist_get_partition(pl, i);
708 if (!pp)
709 return errno_or_else(EIO);
710
711 if (sd_id128_string_equal(blkid_partition_get_type_string(pp), SD_GPT_USER_HOME) <= 0)
712 continue;
713
714 if (!streq_ptr(blkid_partition_get_name(pp), label))
715 continue;
716
717
718 r = blkid_partition_get_uuid_id128(pp, &id);
719 if (r < 0)
720 log_debug_errno(r, "Failed to read partition UUID, ignoring: %m");
721 else if (!sd_id128_is_null(partition_uuid) && !sd_id128_equal(id, partition_uuid))
722 continue;
723
724 if (found)
725 return -ENOPKG;
726
727 offset = blkid_partition_get_start(pp);
728 size = blkid_partition_get_size(pp);
729 found_partition_uuid = id;
730
731 found = true;
732 }
733
734 if (!found)
735 return -ENOPKG;
736
737 if (offset < 0)
738 return -EINVAL;
739 if ((uint64_t) offset > UINT64_MAX / 512U)
740 return -EINVAL;
741 if (size <= 0)
742 return -EINVAL;
743 if ((uint64_t) size > UINT64_MAX / 512U)
744 return -EINVAL;
745
746 *ret_offset = offset * 512U;
747 *ret_size = size * 512U;
748 *ret_partition_uuid = found_partition_uuid;
749
750 return 0;
751 }
752
753 static int crypt_device_to_evp_cipher(struct crypt_device *cd, const EVP_CIPHER **ret) {
754 _cleanup_free_ char *cipher_name = NULL;
755 const char *cipher, *cipher_mode, *e;
756 size_t key_size, key_bits;
757 const EVP_CIPHER *cc;
758 int r;
759
760 assert(cd);
761
762 /* Let's find the right OpenSSL EVP_CIPHER object that matches the encryption settings of the LUKS
763 * device */
764
765 cipher = sym_crypt_get_cipher(cd);
766 if (!cipher)
767 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot get cipher from LUKS device.");
768
769 cipher_mode = sym_crypt_get_cipher_mode(cd);
770 if (!cipher_mode)
771 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot get cipher mode from LUKS device.");
772
773 e = strchr(cipher_mode, '-');
774 if (e)
775 cipher_mode = strndupa_safe(cipher_mode, e - cipher_mode);
776
777 r = sym_crypt_get_volume_key_size(cd);
778 if (r <= 0)
779 return log_error_errno(r < 0 ? r : SYNTHETIC_ERRNO(EINVAL), "Cannot get volume key size from LUKS device.");
780
781 key_size = r;
782 key_bits = key_size * 8;
783 if (streq(cipher_mode, "xts"))
784 key_bits /= 2;
785
786 if (asprintf(&cipher_name, "%s-%zu-%s", cipher, key_bits, cipher_mode) < 0)
787 return log_oom();
788
789 cc = EVP_get_cipherbyname(cipher_name);
790 if (!cc)
791 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Selected cipher mode '%s' not supported, can't encrypt JSON record.", cipher_name);
792
793 /* Verify that our key length calculations match what OpenSSL thinks */
794 r = EVP_CIPHER_key_length(cc);
795 if (r < 0 || (uint64_t) r != key_size)
796 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Key size of selected cipher doesn't meet our expectations.");
797
798 *ret = cc;
799 return 0;
800 }
801
802 static int luks_validate_home_record(
803 struct crypt_device *cd,
804 UserRecord *h,
805 const void *volume_key,
806 PasswordCache *cache,
807 UserRecord **ret_luks_home_record) {
808
809 int r;
810
811 assert(cd);
812 assert(h);
813
814 for (int token = 0; token < sym_crypt_token_max(CRYPT_LUKS2); token++) {
815 _cleanup_(json_variant_unrefp) JsonVariant *v = NULL, *rr = NULL;
816 _cleanup_(EVP_CIPHER_CTX_freep) EVP_CIPHER_CTX *context = NULL;
817 _cleanup_(user_record_unrefp) UserRecord *lhr = NULL;
818 _cleanup_free_ void *encrypted = NULL, *iv = NULL;
819 size_t decrypted_size, encrypted_size, iv_size;
820 int decrypted_size_out1, decrypted_size_out2;
821 _cleanup_free_ char *decrypted = NULL;
822 const char *text, *type;
823 crypt_token_info state;
824 JsonVariant *jr, *jiv;
825 unsigned line, column;
826 const EVP_CIPHER *cc;
827
828 state = sym_crypt_token_status(cd, token, &type);
829 if (state == CRYPT_TOKEN_INACTIVE) /* First unconfigured token, give up */
830 break;
831 if (IN_SET(state, CRYPT_TOKEN_INTERNAL, CRYPT_TOKEN_INTERNAL_UNKNOWN, CRYPT_TOKEN_EXTERNAL))
832 continue;
833 if (state != CRYPT_TOKEN_EXTERNAL_UNKNOWN)
834 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unexpected token state of token %i: %i", token, (int) state);
835
836 if (!streq(type, "systemd-homed"))
837 continue;
838
839 r = sym_crypt_token_json_get(cd, token, &text);
840 if (r < 0)
841 return log_error_errno(r, "Failed to read LUKS token %i: %m", token);
842
843 r = json_parse(text, JSON_PARSE_SENSITIVE, &v, &line, &column);
844 if (r < 0)
845 return log_error_errno(r, "Failed to parse LUKS token JSON data %u:%u: %m", line, column);
846
847 jr = json_variant_by_key(v, "record");
848 if (!jr)
849 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "LUKS token lacks 'record' field.");
850 jiv = json_variant_by_key(v, "iv");
851 if (!jiv)
852 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "LUKS token lacks 'iv' field.");
853
854 r = json_variant_unbase64(jr, &encrypted, &encrypted_size);
855 if (r < 0)
856 return log_error_errno(r, "Failed to base64 decode record: %m");
857
858 r = json_variant_unbase64(jiv, &iv, &iv_size);
859 if (r < 0)
860 return log_error_errno(r, "Failed to base64 decode IV: %m");
861
862 r = crypt_device_to_evp_cipher(cd, &cc);
863 if (r < 0)
864 return r;
865 if (iv_size > INT_MAX || EVP_CIPHER_iv_length(cc) != (int) iv_size)
866 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "IV size doesn't match.");
867
868 context = EVP_CIPHER_CTX_new();
869 if (!context)
870 return log_oom();
871
872 if (EVP_DecryptInit_ex(context, cc, NULL, volume_key, iv) != 1)
873 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to initialize decryption context.");
874
875 decrypted_size = encrypted_size + EVP_CIPHER_key_length(cc) * 2;
876 decrypted = new(char, decrypted_size);
877 if (!decrypted)
878 return log_oom();
879
880 if (EVP_DecryptUpdate(context, (uint8_t*) decrypted, &decrypted_size_out1, encrypted, encrypted_size) != 1)
881 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to decrypt JSON record.");
882
883 assert((size_t) decrypted_size_out1 <= decrypted_size);
884
885 if (EVP_DecryptFinal_ex(context, (uint8_t*) decrypted + decrypted_size_out1, &decrypted_size_out2) != 1)
886 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to finish decryption of JSON record.");
887
888 assert((size_t) decrypted_size_out1 + (size_t) decrypted_size_out2 < decrypted_size);
889 decrypted_size = (size_t) decrypted_size_out1 + (size_t) decrypted_size_out2;
890
891 if (memchr(decrypted, 0, decrypted_size))
892 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Inner NUL byte in JSON record, refusing.");
893
894 decrypted[decrypted_size] = 0;
895
896 r = json_parse(decrypted, JSON_PARSE_SENSITIVE, &rr, NULL, NULL);
897 if (r < 0)
898 return log_error_errno(r, "Failed to parse decrypted JSON record, refusing.");
899
900 lhr = user_record_new();
901 if (!lhr)
902 return log_oom();
903
904 r = user_record_load(lhr, rr, USER_RECORD_LOAD_EMBEDDED|USER_RECORD_PERMISSIVE);
905 if (r < 0)
906 return log_error_errno(r, "Failed to parse user record: %m");
907
908 if (!user_record_compatible(h, lhr))
909 return log_error_errno(SYNTHETIC_ERRNO(EREMCHG), "LUKS home record not compatible with host record, refusing.");
910
911 r = user_record_authenticate(lhr, h, cache, /* strict_verify= */ true);
912 if (r < 0)
913 return r;
914 assert(r > 0); /* Insist that a password was verified */
915
916 *ret_luks_home_record = TAKE_PTR(lhr);
917 return 0;
918 }
919
920 return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Couldn't find home record in LUKS2 header, refusing.");
921 }
922
923 static int format_luks_token_text(
924 struct crypt_device *cd,
925 UserRecord *hr,
926 const void *volume_key,
927 char **ret) {
928
929 int r, encrypted_size_out1 = 0, encrypted_size_out2 = 0, iv_size, key_size;
930 _cleanup_(EVP_CIPHER_CTX_freep) EVP_CIPHER_CTX *context = NULL;
931 _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
932 _cleanup_free_ void *iv = NULL, *encrypted = NULL;
933 size_t text_length, encrypted_size;
934 _cleanup_free_ char *text = NULL;
935 const EVP_CIPHER *cc;
936
937 assert(cd);
938 assert(hr);
939 assert(volume_key);
940 assert(ret);
941
942 r = crypt_device_to_evp_cipher(cd, &cc);
943 if (r < 0)
944 return r;
945
946 key_size = EVP_CIPHER_key_length(cc);
947 iv_size = EVP_CIPHER_iv_length(cc);
948
949 if (iv_size > 0) {
950 iv = malloc(iv_size);
951 if (!iv)
952 return log_oom();
953
954 r = crypto_random_bytes(iv, iv_size);
955 if (r < 0)
956 return log_error_errno(r, "Failed to generate IV: %m");
957 }
958
959 context = EVP_CIPHER_CTX_new();
960 if (!context)
961 return log_oom();
962
963 if (EVP_EncryptInit_ex(context, cc, NULL, volume_key, iv) != 1)
964 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to initialize encryption context.");
965
966 r = json_variant_format(hr->json, 0, &text);
967 if (r < 0)
968 return log_error_errno(r, "Failed to format user record for LUKS: %m");
969
970 text_length = strlen(text);
971 encrypted_size = text_length + 2*key_size - 1;
972
973 encrypted = malloc(encrypted_size);
974 if (!encrypted)
975 return log_oom();
976
977 if (EVP_EncryptUpdate(context, encrypted, &encrypted_size_out1, (uint8_t*) text, text_length) != 1)
978 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to encrypt JSON record.");
979
980 assert((size_t) encrypted_size_out1 <= encrypted_size);
981
982 if (EVP_EncryptFinal_ex(context, (uint8_t*) encrypted + encrypted_size_out1, &encrypted_size_out2) != 1)
983 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to finish encryption of JSON record. ");
984
985 assert((size_t) encrypted_size_out1 + (size_t) encrypted_size_out2 <= encrypted_size);
986
987 r = json_build(&v,
988 JSON_BUILD_OBJECT(
989 JSON_BUILD_PAIR("type", JSON_BUILD_CONST_STRING("systemd-homed")),
990 JSON_BUILD_PAIR("keyslots", JSON_BUILD_EMPTY_ARRAY),
991 JSON_BUILD_PAIR("record", JSON_BUILD_BASE64(encrypted, encrypted_size_out1 + encrypted_size_out2)),
992 JSON_BUILD_PAIR("iv", JSON_BUILD_BASE64(iv, iv_size))));
993 if (r < 0)
994 return log_error_errno(r, "Failed to prepare LUKS JSON token object: %m");
995
996 r = json_variant_format(v, 0, ret);
997 if (r < 0)
998 return log_error_errno(r, "Failed to format encrypted user record for LUKS: %m");
999
1000 return 0;
1001 }
1002
1003 int home_store_header_identity_luks(
1004 UserRecord *h,
1005 HomeSetup *setup,
1006 UserRecord *old_home) {
1007
1008 _cleanup_(user_record_unrefp) UserRecord *header_home = NULL;
1009 _cleanup_free_ char *text = NULL;
1010 int r;
1011
1012 assert(h);
1013
1014 if (!setup->crypt_device)
1015 return 0;
1016
1017 assert(setup->volume_key);
1018
1019 /* Let's store the user's identity record in the LUKS2 "token" header data fields, in an encrypted
1020 * fashion. Why that? If we'd rely on the record being embedded in the payload file system itself we
1021 * would have to mount the file system before we can validate the JSON record, its signatures and
1022 * whether it matches what we are looking for. However, kernel file system implementations are
1023 * generally not ready to be used on untrusted media. Hence let's store the record independently of
1024 * the file system, so that we can validate it first, and only then mount the file system. To keep
1025 * things simple we use the same encryption settings for this record as for the file system itself. */
1026
1027 r = user_record_clone(h, USER_RECORD_EXTRACT_EMBEDDED|USER_RECORD_PERMISSIVE, &header_home);
1028 if (r < 0)
1029 return log_error_errno(r, "Failed to determine new header record: %m");
1030
1031 if (old_home && user_record_equal(old_home, header_home)) {
1032 log_debug("Not updating header home record.");
1033 return 0;
1034 }
1035
1036 r = format_luks_token_text(setup->crypt_device, header_home, setup->volume_key, &text);
1037 if (r < 0)
1038 return r;
1039
1040 for (int token = 0; token < sym_crypt_token_max(CRYPT_LUKS2); token++) {
1041 crypt_token_info state;
1042 const char *type;
1043
1044 state = sym_crypt_token_status(setup->crypt_device, token, &type);
1045 if (state == CRYPT_TOKEN_INACTIVE) /* First unconfigured token, we are done */
1046 break;
1047 if (IN_SET(state, CRYPT_TOKEN_INTERNAL, CRYPT_TOKEN_INTERNAL_UNKNOWN, CRYPT_TOKEN_EXTERNAL))
1048 continue; /* Not ours */
1049 if (state != CRYPT_TOKEN_EXTERNAL_UNKNOWN)
1050 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unexpected token state of token %i: %i", token, (int) state);
1051
1052 if (!streq(type, "systemd-homed"))
1053 continue;
1054
1055 r = sym_crypt_token_json_set(setup->crypt_device, token, text);
1056 if (r < 0)
1057 return log_error_errno(r, "Failed to set JSON token for slot %i: %m", token);
1058
1059 /* Now, let's free the text so that for all further matching tokens we all crypt_json_token_set()
1060 * with a NULL text in order to invalidate the tokens. */
1061 text = mfree(text);
1062 }
1063
1064 if (text)
1065 return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Didn't find any record token to update.");
1066
1067 log_info("Wrote LUKS header user record.");
1068
1069 return 1;
1070 }
1071
1072 int run_fitrim(int root_fd) {
1073 struct fstrim_range range = {
1074 .len = UINT64_MAX,
1075 };
1076
1077 /* If discarding is on, discard everything right after mounting, so that the discard setting takes
1078 * effect on activation. (Also, optionally, trim on logout) */
1079
1080 assert(root_fd >= 0);
1081
1082 if (ioctl(root_fd, FITRIM, &range) < 0) {
1083 if (ERRNO_IS_NOT_SUPPORTED(errno) || errno == EBADF) {
1084 log_debug_errno(errno, "File system does not support FITRIM, not trimming.");
1085 return 0;
1086 }
1087
1088 return log_warning_errno(errno, "Failed to invoke FITRIM, ignoring: %m");
1089 }
1090
1091 log_info("Discarded unused %s.", FORMAT_BYTES(range.len));
1092 return 1;
1093 }
1094
1095 int run_fallocate(int backing_fd, const struct stat *st) {
1096 struct stat stbuf;
1097
1098 assert(backing_fd >= 0);
1099
1100 /* If discarding is off, let's allocate the whole image before mounting, so that the setting takes
1101 * effect on activation */
1102
1103 if (!st) {
1104 if (fstat(backing_fd, &stbuf) < 0)
1105 return log_error_errno(errno, "Failed to fstat(): %m");
1106
1107 st = &stbuf;
1108 }
1109
1110 if (!S_ISREG(st->st_mode))
1111 return 0;
1112
1113 if (st->st_blocks >= DIV_ROUND_UP(st->st_size, 512)) {
1114 log_info("Backing file is fully allocated already.");
1115 return 0;
1116 }
1117
1118 if (fallocate(backing_fd, FALLOC_FL_KEEP_SIZE, 0, st->st_size) < 0) {
1119
1120 if (ERRNO_IS_NOT_SUPPORTED(errno)) {
1121 log_debug_errno(errno, "fallocate() not supported on file system, ignoring.");
1122 return 0;
1123 }
1124
1125 if (ERRNO_IS_DISK_SPACE(errno)) {
1126 log_debug_errno(errno, "Not enough disk space to fully allocate home.");
1127 return -ENOSPC; /* make recognizable */
1128 }
1129
1130 return log_error_errno(errno, "Failed to allocate backing file blocks: %m");
1131 }
1132
1133 log_info("Allocated additional %s.",
1134 FORMAT_BYTES((DIV_ROUND_UP(st->st_size, 512) - st->st_blocks) * 512));
1135 return 1;
1136 }
1137
1138 int run_fallocate_by_path(const char *backing_path) {
1139 _cleanup_close_ int backing_fd = -EBADF;
1140
1141 backing_fd = open(backing_path, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
1142 if (backing_fd < 0)
1143 return log_error_errno(errno, "Failed to open '%s' for fallocate(): %m", backing_path);
1144
1145 return run_fallocate(backing_fd, NULL);
1146 }
1147
1148 static int lock_image_fd(int image_fd, const char *ip) {
1149 int r;
1150
1151 /* If the $SYSTEMD_LUKS_LOCK environment variable is set we'll take an exclusive BSD lock on the
1152 * image file, and send it to our parent. homed will keep it open to ensure no other instance of
1153 * homed (across the network or such) will also mount the file. */
1154
1155 assert(image_fd >= 0);
1156 assert(ip);
1157
1158 r = getenv_bool("SYSTEMD_LUKS_LOCK");
1159 if (r == -ENXIO)
1160 return 0;
1161 if (r < 0)
1162 return log_error_errno(r, "Failed to parse $SYSTEMD_LUKS_LOCK environment variable: %m");
1163 if (r == 0)
1164 return 0;
1165
1166 if (flock(image_fd, LOCK_EX|LOCK_NB) < 0) {
1167
1168 if (errno == EAGAIN)
1169 log_error_errno(errno, "Image file '%s' already locked, can't use.", ip);
1170 else
1171 log_error_errno(errno, "Failed to lock image file '%s': %m", ip);
1172
1173 return errno != EAGAIN ? -errno : -EADDRINUSE; /* Make error recognizable */
1174 }
1175
1176 log_info("Successfully locked image file '%s'.", ip);
1177
1178 /* Now send it to our parent to keep safe while the home dir is active */
1179 r = sd_pid_notify_with_fds(0, false, "SYSTEMD_LUKS_LOCK_FD=1", &image_fd, 1);
1180 if (r < 0)
1181 log_warning_errno(r, "Failed to send LUKS lock fd to parent, ignoring: %m");
1182
1183 return 0;
1184 }
1185
1186 static int open_image_file(
1187 UserRecord *h,
1188 const char *force_image_path,
1189 struct stat *ret_stat) {
1190
1191 _cleanup_close_ int image_fd = -EBADF;
1192 struct stat st;
1193 const char *ip;
1194 int r;
1195
1196 assert(h || force_image_path);
1197
1198 ip = force_image_path ?: user_record_image_path(h);
1199
1200 image_fd = open(ip, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
1201 if (image_fd < 0)
1202 return log_error_errno(errno, "Failed to open image file %s: %m", ip);
1203
1204 if (fstat(image_fd, &st) < 0)
1205 return log_error_errno(errno, "Failed to fstat() image file: %m");
1206 if (!S_ISREG(st.st_mode) && !S_ISBLK(st.st_mode))
1207 return log_error_errno(
1208 S_ISDIR(st.st_mode) ? SYNTHETIC_ERRNO(EISDIR) : SYNTHETIC_ERRNO(EBADFD),
1209 "Image file %s is not a regular file or block device: %m", ip);
1210
1211 /* Locking block devices doesn't really make sense, as this might interfere with
1212 * udev's workings, and these locks aren't network propagated anyway, hence not what
1213 * we are after here. */
1214 if (S_ISREG(st.st_mode)) {
1215 r = lock_image_fd(image_fd, ip);
1216 if (r < 0)
1217 return r;
1218 }
1219
1220 if (ret_stat)
1221 *ret_stat = st;
1222
1223 return TAKE_FD(image_fd);
1224 }
1225
1226 int home_setup_luks(
1227 UserRecord *h,
1228 HomeSetupFlags flags,
1229 const char *force_image_path,
1230 HomeSetup *setup,
1231 PasswordCache *cache,
1232 UserRecord **ret_luks_home) {
1233
1234 sd_id128_t found_partition_uuid, found_fs_uuid = SD_ID128_NULL, found_luks_uuid = SD_ID128_NULL;
1235 _cleanup_(user_record_unrefp) UserRecord *luks_home = NULL;
1236 _cleanup_(erase_and_freep) void *volume_key = NULL;
1237 size_t volume_key_size = 0;
1238 uint64_t offset, size;
1239 struct stat st;
1240 int r;
1241
1242 assert(h);
1243 assert(setup);
1244 assert(user_record_storage(h) == USER_LUKS);
1245
1246 r = dlopen_cryptsetup();
1247 if (r < 0)
1248 return r;
1249
1250 r = make_dm_names(h, setup);
1251 if (r < 0)
1252 return r;
1253
1254 /* Reuse the image fd if it has already been opened by an earlier step */
1255 if (setup->image_fd < 0) {
1256 setup->image_fd = open_image_file(h, force_image_path, &st);
1257 if (setup->image_fd < 0)
1258 return setup->image_fd;
1259 } else if (fstat(setup->image_fd, &st) < 0)
1260 return log_error_errno(errno, "Failed to stat image: %m");
1261
1262 if (FLAGS_SET(flags, HOME_SETUP_ALREADY_ACTIVATED)) {
1263 struct loop_info64 info;
1264 const char *n;
1265
1266 if (!setup->crypt_device) {
1267 r = luks_open(h,
1268 setup,
1269 cache,
1270 &found_luks_uuid,
1271 &volume_key,
1272 &volume_key_size);
1273 if (r < 0)
1274 return r;
1275 }
1276
1277 if (ret_luks_home) {
1278 r = luks_validate_home_record(setup->crypt_device, h, volume_key, cache, &luks_home);
1279 if (r < 0)
1280 return r;
1281 }
1282
1283 n = sym_crypt_get_device_name(setup->crypt_device);
1284 if (!n)
1285 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine backing device for DM %s.", setup->dm_name);
1286
1287 if (!setup->loop) {
1288 r = loop_device_open_from_path(n, O_RDWR, LOCK_UN, &setup->loop);
1289 if (r < 0)
1290 return log_error_errno(r, "Failed to open loopback device %s: %m", n);
1291 }
1292
1293 if (ioctl(setup->loop->fd, LOOP_GET_STATUS64, &info) < 0) {
1294 _cleanup_free_ char *sysfs = NULL;
1295
1296 if (!IN_SET(errno, ENOTTY, EINVAL))
1297 return log_error_errno(errno, "Failed to get block device metrics of %s: %m", n);
1298
1299 if (ioctl(setup->loop->fd, BLKGETSIZE64, &size) < 0)
1300 return log_error_errno(r, "Failed to read block device size of %s: %m", n);
1301
1302 if (fstat(setup->loop->fd, &st) < 0)
1303 return log_error_errno(r, "Failed to stat block device %s: %m", n);
1304 assert(S_ISBLK(st.st_mode));
1305
1306 if (asprintf(&sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/partition", DEVNUM_FORMAT_VAL(st.st_rdev)) < 0)
1307 return log_oom();
1308
1309 if (access(sysfs, F_OK) < 0) {
1310 if (errno != ENOENT)
1311 return log_error_errno(errno, "Failed to determine whether %s exists: %m", sysfs);
1312
1313 offset = 0;
1314 } else {
1315 _cleanup_free_ char *buffer = NULL;
1316
1317 if (asprintf(&sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/start", DEVNUM_FORMAT_VAL(st.st_rdev)) < 0)
1318 return log_oom();
1319
1320 r = read_one_line_file(sysfs, &buffer);
1321 if (r < 0)
1322 return log_error_errno(r, "Failed to read partition start offset: %m");
1323
1324 r = safe_atou64(buffer, &offset);
1325 if (r < 0)
1326 return log_error_errno(r, "Failed to parse partition start offset: %m");
1327
1328 if (offset > UINT64_MAX / 512U)
1329 return log_error_errno(SYNTHETIC_ERRNO(E2BIG), "Offset too large for 64 byte range, refusing.");
1330
1331 offset *= 512U;
1332 }
1333 } else {
1334 #if HAVE_VALGRIND_MEMCHECK_H
1335 VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
1336 #endif
1337
1338 offset = info.lo_offset;
1339 size = info.lo_sizelimit;
1340 }
1341
1342 found_partition_uuid = found_fs_uuid = SD_ID128_NULL;
1343
1344 log_info("Discovered used loopback device %s.", setup->loop->node);
1345
1346 if (setup->root_fd < 0) {
1347 setup->root_fd = open(user_record_home_directory(h), O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1348 if (setup->root_fd < 0)
1349 return log_error_errno(errno, "Failed to open home directory: %m");
1350 }
1351 } else {
1352 _cleanup_free_ char *fstype = NULL, *subdir = NULL;
1353 const char *ip;
1354
1355 /* When we aren't reopening the home directory we are allocating it fresh, hence the relevant
1356 * objects can't be allocated yet. */
1357 assert(setup->root_fd < 0);
1358 assert(!setup->crypt_device);
1359 assert(!setup->loop);
1360
1361 ip = force_image_path ?: user_record_image_path(h);
1362
1363 subdir = path_join(HOME_RUNTIME_WORK_DIR, user_record_user_name_and_realm(h));
1364 if (!subdir)
1365 return log_oom();
1366
1367 r = luks_validate(setup->image_fd, user_record_user_name_and_realm(h), h->partition_uuid, &found_partition_uuid, &offset, &size);
1368 if (r < 0)
1369 return log_error_errno(r, "Failed to validate disk label: %m");
1370
1371 /* Everything before this point left the image untouched. We are now starting to make
1372 * changes, hence mark the image dirty */
1373 if (run_mark_dirty(setup->image_fd, true) > 0)
1374 setup->do_mark_clean = true;
1375
1376 if (!user_record_luks_discard(h)) {
1377 r = run_fallocate(setup->image_fd, &st);
1378 if (r < 0)
1379 return r;
1380 }
1381
1382 r = loop_device_make(
1383 setup->image_fd,
1384 O_RDWR,
1385 offset,
1386 size,
1387 h->luks_sector_size == UINT64_MAX ? UINT32_MAX : user_record_luks_sector_size(h), /* if sector size is not specified, select UINT32_MAX, i.e. auto-probe */
1388 /* loop_flags= */ 0,
1389 LOCK_UN,
1390 &setup->loop);
1391 if (r == -ENOENT) {
1392 log_error_errno(r, "Loopback block device support is not available on this system.");
1393 return -ENOLINK; /* make recognizable */
1394 }
1395 if (r < 0)
1396 return log_error_errno(r, "Failed to allocate loopback context: %m");
1397
1398 log_info("Setting up loopback device %s completed.", setup->loop->node ?: ip);
1399
1400 r = luks_setup(h,
1401 setup->loop->node ?: ip,
1402 setup->dm_name,
1403 h->luks_uuid,
1404 h->luks_cipher,
1405 h->luks_cipher_mode,
1406 h->luks_volume_key_size,
1407 h->password,
1408 cache,
1409 user_record_luks_discard(h) || user_record_luks_offline_discard(h),
1410 &setup->crypt_device,
1411 &found_luks_uuid,
1412 &volume_key,
1413 &volume_key_size,
1414 &setup->key_serial);
1415 if (r < 0)
1416 return r;
1417
1418 setup->undo_dm = true;
1419
1420 if (ret_luks_home) {
1421 r = luks_validate_home_record(setup->crypt_device, h, volume_key, cache, &luks_home);
1422 if (r < 0)
1423 return r;
1424 }
1425
1426 r = fs_validate(setup->dm_node, h->file_system_uuid, &fstype, &found_fs_uuid);
1427 if (r < 0)
1428 return r;
1429
1430 r = run_fsck(setup->dm_node, fstype);
1431 if (r < 0)
1432 return r;
1433
1434 r = home_unshare_and_mount(setup->dm_node, fstype, user_record_luks_discard(h), user_record_mount_flags(h), h->luks_extra_mount_options);
1435 if (r < 0)
1436 return r;
1437
1438 setup->undo_mount = true;
1439
1440 setup->root_fd = open(subdir, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1441 if (setup->root_fd < 0)
1442 return log_error_errno(errno, "Failed to open home directory: %m");
1443
1444 if (user_record_luks_discard(h))
1445 (void) run_fitrim(setup->root_fd);
1446
1447 setup->do_offline_fallocate = !(setup->do_offline_fitrim = user_record_luks_offline_discard(h));
1448 }
1449
1450 if (!sd_id128_is_null(found_partition_uuid))
1451 setup->found_partition_uuid = found_partition_uuid;
1452 if (!sd_id128_is_null(found_luks_uuid))
1453 setup->found_luks_uuid = found_luks_uuid;
1454 if (!sd_id128_is_null(found_fs_uuid))
1455 setup->found_fs_uuid = found_fs_uuid;
1456
1457 setup->partition_offset = offset;
1458 setup->partition_size = size;
1459
1460 if (volume_key) {
1461 erase_and_free(setup->volume_key);
1462 setup->volume_key = TAKE_PTR(volume_key);
1463 setup->volume_key_size = volume_key_size;
1464 }
1465
1466 if (ret_luks_home)
1467 *ret_luks_home = TAKE_PTR(luks_home);
1468
1469 return 0;
1470 }
1471
1472 static void print_size_summary(uint64_t host_size, uint64_t encrypted_size, const struct statfs *sfs) {
1473 assert(sfs);
1474
1475 log_info("Image size is %s, file system size is %s, file system payload size is %s, file system free is %s.",
1476 FORMAT_BYTES(host_size),
1477 FORMAT_BYTES(encrypted_size),
1478 FORMAT_BYTES((uint64_t) sfs->f_blocks * (uint64_t) sfs->f_frsize),
1479 FORMAT_BYTES((uint64_t) sfs->f_bfree * (uint64_t) sfs->f_frsize));
1480 }
1481
1482 static int home_auto_grow_luks(
1483 UserRecord *h,
1484 HomeSetup *setup,
1485 PasswordCache *cache) {
1486
1487 struct statfs sfs;
1488
1489 assert(h);
1490 assert(setup);
1491
1492 if (!IN_SET(user_record_auto_resize_mode(h), AUTO_RESIZE_GROW, AUTO_RESIZE_SHRINK_AND_GROW))
1493 return 0;
1494
1495 assert(setup->root_fd >= 0);
1496
1497 if (fstatfs(setup->root_fd, &sfs) < 0)
1498 return log_error_errno(errno, "Failed to statfs home directory: %m");
1499
1500 if (!fs_can_online_shrink_and_grow(sfs.f_type)) {
1501 log_debug("Not auto-grow file system, since selected file system cannot do both online shrink and grow.");
1502 return 0;
1503 }
1504
1505 log_debug("Initiating auto-grow...");
1506
1507 return home_resize_luks(
1508 h,
1509 HOME_SETUP_ALREADY_ACTIVATED|
1510 HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES|
1511 HOME_SETUP_RESIZE_DONT_SHRINK|
1512 HOME_SETUP_RESIZE_DONT_UNDO,
1513 setup,
1514 cache,
1515 NULL);
1516 }
1517
1518 int home_activate_luks(
1519 UserRecord *h,
1520 HomeSetupFlags flags,
1521 HomeSetup *setup,
1522 PasswordCache *cache,
1523 UserRecord **ret_home) {
1524
1525 _cleanup_(user_record_unrefp) UserRecord *new_home = NULL, *luks_home_record = NULL;
1526 uint64_t host_size, encrypted_size;
1527 const char *hdo, *hd;
1528 struct statfs sfs;
1529 int r;
1530
1531 assert(h);
1532 assert(user_record_storage(h) == USER_LUKS);
1533 assert(setup);
1534 assert(ret_home);
1535
1536 r = dlopen_cryptsetup();
1537 if (r < 0)
1538 return r;
1539
1540 assert_se(hdo = user_record_home_directory(h));
1541 hd = strdupa_safe(hdo); /* copy the string out, since it might change later in the home record object */
1542
1543 r = home_get_state_luks(h, setup);
1544 if (r < 0)
1545 return r;
1546 if (r > 0)
1547 return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Device mapper device %s already exists, refusing.", setup->dm_node);
1548
1549 r = home_setup_luks(
1550 h,
1551 0,
1552 NULL,
1553 setup,
1554 cache,
1555 &luks_home_record);
1556 if (r < 0)
1557 return r;
1558
1559 r = home_auto_grow_luks(h, setup, cache);
1560 if (r < 0)
1561 return r;
1562
1563 r = block_get_size_by_fd(setup->loop->fd, &host_size);
1564 if (r < 0)
1565 return log_error_errno(r, "Failed to get loopback block device size: %m");
1566
1567 r = block_get_size_by_path(setup->dm_node, &encrypted_size);
1568 if (r < 0)
1569 return log_error_errno(r, "Failed to get LUKS block device size: %m");
1570
1571 r = home_refresh(
1572 h,
1573 flags,
1574 setup,
1575 luks_home_record,
1576 cache,
1577 &sfs,
1578 &new_home);
1579 if (r < 0)
1580 return r;
1581
1582 r = home_extend_embedded_identity(new_home, h, setup);
1583 if (r < 0)
1584 return r;
1585
1586 setup->root_fd = safe_close(setup->root_fd);
1587
1588 r = home_move_mount(user_record_user_name_and_realm(h), hd);
1589 if (r < 0)
1590 return r;
1591
1592 setup->undo_mount = false;
1593 setup->do_offline_fitrim = false;
1594
1595 loop_device_relinquish(setup->loop);
1596
1597 r = sym_crypt_deactivate_by_name(NULL, setup->dm_name, CRYPT_DEACTIVATE_DEFERRED);
1598 if (r < 0)
1599 log_warning_errno(r, "Failed to relinquish DM device, ignoring: %m");
1600
1601 setup->undo_dm = false;
1602 setup->do_offline_fallocate = false;
1603 setup->do_mark_clean = false;
1604 setup->do_drop_caches = false;
1605 TAKE_KEY_SERIAL(setup->key_serial); /* Leave key in kernel keyring */
1606
1607 log_info("Activation completed.");
1608
1609 print_size_summary(host_size, encrypted_size, &sfs);
1610
1611 *ret_home = TAKE_PTR(new_home);
1612 return 1;
1613 }
1614
1615 int home_deactivate_luks(UserRecord *h, HomeSetup *setup) {
1616 bool we_detached = false;
1617 int r;
1618
1619 assert(h);
1620 assert(setup);
1621
1622 /* Note that the DM device and loopback device are set to auto-detach, hence strictly speaking we
1623 * don't have to explicitly have to detach them. However, we do that nonetheless (in case of the DM
1624 * device), to avoid races: by explicitly detaching them we know when the detaching is complete. We
1625 * don't bother about the loopback device because unlike the DM device it doesn't have a fixed
1626 * name. */
1627
1628 if (!setup->crypt_device) {
1629 r = acquire_open_luks_device(h, setup, /* graceful= */ true);
1630 if (r < 0)
1631 return log_error_errno(r, "Failed to initialize cryptsetup context for %s: %m", setup->dm_name);
1632 if (r == 0)
1633 log_debug("LUKS device %s has already been detached.", setup->dm_name);
1634 }
1635
1636 if (setup->crypt_device) {
1637 log_info("Discovered used LUKS device %s.", setup->dm_node);
1638
1639 cryptsetup_enable_logging(setup->crypt_device);
1640
1641 r = sym_crypt_deactivate_by_name(setup->crypt_device, setup->dm_name, 0);
1642 if (r < 0) {
1643 if (ERRNO_IS_DEVICE_ABSENT(r) || r == -EINVAL)
1644 log_debug_errno(r, "LUKS device %s is already detached.", setup->dm_node);
1645 else
1646 return log_info_errno(r, "LUKS device %s couldn't be deactivated: %m", setup->dm_node);
1647 } else {
1648 log_info("LUKS device detaching completed.");
1649 we_detached = true;
1650 }
1651 }
1652
1653 (void) wait_for_block_device_gone(setup, USEC_PER_SEC * 30);
1654 setup->undo_dm = false;
1655
1656 if (user_record_luks_offline_discard(h))
1657 log_debug("Not allocating on logout.");
1658 else
1659 (void) run_fallocate_by_path(user_record_image_path(h));
1660
1661 run_mark_dirty_by_path(user_record_image_path(h), false);
1662 return we_detached;
1663 }
1664
1665 int home_trim_luks(UserRecord *h, HomeSetup *setup) {
1666 assert(h);
1667 assert(setup);
1668 assert(setup->root_fd >= 0);
1669
1670 if (!user_record_luks_offline_discard(h)) {
1671 log_debug("Not trimming on logout.");
1672 return 0;
1673 }
1674
1675 (void) run_fitrim(setup->root_fd);
1676 return 0;
1677 }
1678
1679 static struct crypt_pbkdf_type* build_good_pbkdf(struct crypt_pbkdf_type *buffer, UserRecord *hr) {
1680 assert(buffer);
1681 assert(hr);
1682
1683 bool benchmark = user_record_luks_pbkdf_force_iterations(hr) == UINT64_MAX;
1684
1685 *buffer = (struct crypt_pbkdf_type) {
1686 .hash = user_record_luks_pbkdf_hash_algorithm(hr),
1687 .type = user_record_luks_pbkdf_type(hr),
1688 .time_ms = benchmark ? user_record_luks_pbkdf_time_cost_usec(hr) / USEC_PER_MSEC : 0,
1689 .iterations = benchmark ? 0 : user_record_luks_pbkdf_force_iterations(hr),
1690 .max_memory_kb = user_record_luks_pbkdf_memory_cost(hr) / 1024,
1691 .parallel_threads = user_record_luks_pbkdf_parallel_threads(hr),
1692 .flags = benchmark ? 0 : CRYPT_PBKDF_NO_BENCHMARK,
1693 };
1694
1695 return buffer;
1696 }
1697
1698 static struct crypt_pbkdf_type* build_minimal_pbkdf(struct crypt_pbkdf_type *buffer, UserRecord *hr) {
1699 assert(buffer);
1700 assert(hr);
1701
1702 /* For PKCS#11 derived keys (which are generated randomly and are of high quality already) we use a
1703 * minimal PBKDF */
1704 *buffer = (struct crypt_pbkdf_type) {
1705 .hash = user_record_luks_pbkdf_hash_algorithm(hr),
1706 .type = CRYPT_KDF_PBKDF2,
1707 .iterations = 1,
1708 .time_ms = 1,
1709 };
1710
1711 return buffer;
1712 }
1713
1714 static int luks_format(
1715 const char *node,
1716 const char *dm_name,
1717 sd_id128_t uuid,
1718 const char *label,
1719 const PasswordCache *cache,
1720 char **effective_passwords,
1721 bool discard,
1722 UserRecord *hr,
1723 struct crypt_device **ret) {
1724
1725 _cleanup_(user_record_unrefp) UserRecord *reduced = NULL;
1726 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
1727 _cleanup_(erase_and_freep) void *volume_key = NULL;
1728 struct crypt_pbkdf_type good_pbkdf, minimal_pbkdf;
1729 _cleanup_free_ char *text = NULL;
1730 size_t volume_key_size;
1731 int slot = 0, r;
1732
1733 assert(node);
1734 assert(dm_name);
1735 assert(hr);
1736 assert(ret);
1737
1738 r = sym_crypt_init(&cd, node);
1739 if (r < 0)
1740 return log_error_errno(r, "Failed to allocate libcryptsetup context: %m");
1741
1742 cryptsetup_enable_logging(cd);
1743
1744 /* Normally we'd, just leave volume key generation to libcryptsetup. However, we can't, since we
1745 * can't extract the volume key from the library again, but we need it in order to encrypt the JSON
1746 * record. Hence, let's generate it on our own, so that we can keep track of it. */
1747
1748 volume_key_size = user_record_luks_volume_key_size(hr);
1749 volume_key = malloc(volume_key_size);
1750 if (!volume_key)
1751 return log_oom();
1752
1753 r = crypto_random_bytes(volume_key, volume_key_size);
1754 if (r < 0)
1755 return log_error_errno(r, "Failed to generate volume key: %m");
1756
1757 #if HAVE_CRYPT_SET_METADATA_SIZE
1758 /* Increase the metadata space to 4M, the largest LUKS2 supports */
1759 r = sym_crypt_set_metadata_size(cd, 4096U*1024U, 0);
1760 if (r < 0)
1761 return log_error_errno(r, "Failed to change LUKS2 metadata size: %m");
1762 #endif
1763
1764 build_good_pbkdf(&good_pbkdf, hr);
1765 build_minimal_pbkdf(&minimal_pbkdf, hr);
1766
1767 r = sym_crypt_format(
1768 cd,
1769 CRYPT_LUKS2,
1770 user_record_luks_cipher(hr),
1771 user_record_luks_cipher_mode(hr),
1772 SD_ID128_TO_UUID_STRING(uuid),
1773 volume_key,
1774 volume_key_size,
1775 &(struct crypt_params_luks2) {
1776 .label = label,
1777 .subsystem = "systemd-home",
1778 .sector_size = user_record_luks_sector_size(hr),
1779 .pbkdf = &good_pbkdf,
1780 });
1781 if (r < 0)
1782 return log_error_errno(r, "Failed to format LUKS image: %m");
1783
1784 log_info("LUKS formatting completed.");
1785
1786 STRV_FOREACH(pp, effective_passwords) {
1787
1788 if (password_cache_contains(cache, *pp)) { /* is this a fido2 or pkcs11 password? */
1789 log_debug("Using minimal PBKDF for slot %i", slot);
1790 r = sym_crypt_set_pbkdf_type(cd, &minimal_pbkdf);
1791 } else {
1792 log_debug("Using good PBKDF for slot %i", slot);
1793 r = sym_crypt_set_pbkdf_type(cd, &good_pbkdf);
1794 }
1795 if (r < 0)
1796 return log_error_errno(r, "Failed to tweak PBKDF for slot %i: %m", slot);
1797
1798 r = sym_crypt_keyslot_add_by_volume_key(
1799 cd,
1800 slot,
1801 volume_key,
1802 volume_key_size,
1803 *pp,
1804 strlen(*pp));
1805 if (r < 0)
1806 return log_error_errno(r, "Failed to set up LUKS password for slot %i: %m", slot);
1807
1808 log_info("Writing password to LUKS keyslot %i completed.", slot);
1809 slot++;
1810 }
1811
1812 r = sym_crypt_activate_by_volume_key(
1813 cd,
1814 dm_name,
1815 volume_key,
1816 volume_key_size,
1817 discard ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0);
1818 if (r < 0)
1819 return log_error_errno(r, "Failed to activate LUKS superblock: %m");
1820
1821 log_info("LUKS activation by volume key succeeded.");
1822
1823 r = user_record_clone(hr, USER_RECORD_EXTRACT_EMBEDDED|USER_RECORD_PERMISSIVE, &reduced);
1824 if (r < 0)
1825 return log_error_errno(r, "Failed to prepare home record for LUKS: %m");
1826
1827 r = format_luks_token_text(cd, reduced, volume_key, &text);
1828 if (r < 0)
1829 return r;
1830
1831 r = sym_crypt_token_json_set(cd, CRYPT_ANY_TOKEN, text);
1832 if (r < 0)
1833 return log_error_errno(r, "Failed to set LUKS JSON token: %m");
1834
1835 log_info("Writing user record as LUKS token completed.");
1836
1837 if (ret)
1838 *ret = TAKE_PTR(cd);
1839
1840 return 0;
1841 }
1842
1843 static int make_partition_table(
1844 int fd,
1845 uint32_t sector_size,
1846 const char *label,
1847 sd_id128_t uuid,
1848 uint64_t *ret_offset,
1849 uint64_t *ret_size,
1850 sd_id128_t *ret_disk_uuid) {
1851
1852 _cleanup_(fdisk_unref_partitionp) struct fdisk_partition *p = NULL, *q = NULL;
1853 _cleanup_(fdisk_unref_parttypep) struct fdisk_parttype *t = NULL;
1854 _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
1855 _cleanup_free_ char *disk_uuid_as_string = NULL;
1856 uint64_t offset, size, first_lba, start, last_lba, end;
1857 sd_id128_t disk_uuid;
1858 int r;
1859
1860 assert(fd >= 0);
1861 assert(label);
1862 assert(ret_offset);
1863 assert(ret_size);
1864
1865 t = fdisk_new_parttype();
1866 if (!t)
1867 return log_oom();
1868
1869 r = fdisk_parttype_set_typestr(t, SD_GPT_USER_HOME_STR);
1870 if (r < 0)
1871 return log_error_errno(r, "Failed to initialize partition type: %m");
1872
1873 r = fdisk_new_context_at(fd, /* path= */ NULL, /* read_only= */ false, sector_size, &c);
1874 if (r < 0)
1875 return log_error_errno(r, "Failed to open device: %m");
1876
1877 r = fdisk_create_disklabel(c, "gpt");
1878 if (r < 0)
1879 return log_error_errno(r, "Failed to create GPT disk label: %m");
1880
1881 p = fdisk_new_partition();
1882 if (!p)
1883 return log_oom();
1884
1885 r = fdisk_partition_set_type(p, t);
1886 if (r < 0)
1887 return log_error_errno(r, "Failed to set partition type: %m");
1888
1889 r = fdisk_partition_partno_follow_default(p, 1);
1890 if (r < 0)
1891 return log_error_errno(r, "Failed to place partition at first free partition index: %m");
1892
1893 first_lba = fdisk_get_first_lba(c); /* Boundary where usable space starts */
1894 assert(first_lba <= UINT64_MAX/512);
1895 start = DISK_SIZE_ROUND_UP(first_lba * 512); /* Round up to multiple of 4K */
1896
1897 log_debug("Starting partition at offset %" PRIu64, start);
1898
1899 if (start == UINT64_MAX)
1900 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Overflow while rounding up start LBA.");
1901
1902 last_lba = fdisk_get_last_lba(c); /* One sector before boundary where usable space ends */
1903 assert(last_lba < UINT64_MAX/512);
1904 end = DISK_SIZE_ROUND_DOWN((last_lba + 1) * 512); /* Round down to multiple of 4K */
1905
1906 if (end <= start)
1907 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Resulting partition size zero or negative.");
1908
1909 r = fdisk_partition_set_start(p, start / 512);
1910 if (r < 0)
1911 return log_error_errno(r, "Failed to place partition at offset %" PRIu64 ": %m", start);
1912
1913 r = fdisk_partition_set_size(p, (end - start) / 512);
1914 if (r < 0)
1915 return log_error_errno(r, "Failed to end partition at offset %" PRIu64 ": %m", end);
1916
1917 r = fdisk_partition_set_name(p, label);
1918 if (r < 0)
1919 return log_error_errno(r, "Failed to set partition name: %m");
1920
1921 r = fdisk_partition_set_uuid(p, SD_ID128_TO_UUID_STRING(uuid));
1922 if (r < 0)
1923 return log_error_errno(r, "Failed to set partition UUID: %m");
1924
1925 r = fdisk_add_partition(c, p, NULL);
1926 if (r < 0)
1927 return log_error_errno(r, "Failed to add partition: %m");
1928
1929 r = fdisk_write_disklabel(c);
1930 if (r < 0)
1931 return log_error_errno(r, "Failed to write disk label: %m");
1932
1933 r = fdisk_get_disklabel_id(c, &disk_uuid_as_string);
1934 if (r < 0)
1935 return log_error_errno(r, "Failed to determine disk label UUID: %m");
1936
1937 r = sd_id128_from_string(disk_uuid_as_string, &disk_uuid);
1938 if (r < 0)
1939 return log_error_errno(r, "Failed to parse disk label UUID: %m");
1940
1941 r = fdisk_get_partition(c, 0, &q);
1942 if (r < 0)
1943 return log_error_errno(r, "Failed to read created partition metadata: %m");
1944
1945 assert(fdisk_partition_has_start(q));
1946 offset = fdisk_partition_get_start(q);
1947 if (offset > UINT64_MAX / 512U)
1948 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Partition offset too large.");
1949
1950 assert(fdisk_partition_has_size(q));
1951 size = fdisk_partition_get_size(q);
1952 if (size > UINT64_MAX / 512U)
1953 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Partition size too large.");
1954
1955 *ret_offset = offset * 512U;
1956 *ret_size = size * 512U;
1957 *ret_disk_uuid = disk_uuid;
1958
1959 return 0;
1960 }
1961
1962 static bool supported_fs_size(const char *fstype, uint64_t host_size) {
1963 uint64_t m;
1964
1965 m = minimal_size_by_fs_name(fstype);
1966 if (m == UINT64_MAX)
1967 return false;
1968
1969 return host_size >= m;
1970 }
1971
1972 static int wait_for_devlink(const char *path) {
1973 _cleanup_close_ int inotify_fd = -EBADF;
1974 usec_t until;
1975 int r;
1976
1977 /* let's wait for a device link to show up in /dev, with a timeout. This is good to do since we
1978 * return a /dev/disk/by-uuid/… link to our callers and they likely want to access it right-away,
1979 * hence let's wait until udev has caught up with our changes, and wait for the symlink to be
1980 * created. */
1981
1982 until = usec_add(now(CLOCK_MONOTONIC), 45 * USEC_PER_SEC);
1983
1984 for (;;) {
1985 _cleanup_free_ char *dn = NULL;
1986 usec_t w;
1987
1988 if (laccess(path, F_OK) < 0) {
1989 if (errno != ENOENT)
1990 return log_error_errno(errno, "Failed to determine whether %s exists: %m", path);
1991 } else
1992 return 0; /* Found it */
1993
1994 if (inotify_fd < 0) {
1995 /* We need to wait for the device symlink to show up, let's create an inotify watch for it */
1996 inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
1997 if (inotify_fd < 0)
1998 return log_error_errno(errno, "Failed to allocate inotify fd: %m");
1999 }
2000
2001 r = path_extract_directory(path, &dn);
2002 if (r < 0)
2003 return log_error_errno(r, "Failed to extract directory from device node path '%s': %m", path);
2004 for (;;) {
2005 _cleanup_free_ char *ndn = NULL;
2006
2007 log_info("Watching %s", dn);
2008
2009 if (inotify_add_watch(inotify_fd, dn, IN_CREATE|IN_MOVED_TO|IN_ONLYDIR|IN_DELETE_SELF|IN_MOVE_SELF) < 0) {
2010 if (errno != ENOENT)
2011 return log_error_errno(errno, "Failed to add watch on %s: %m", dn);
2012 } else
2013 break;
2014
2015 r = path_extract_directory(dn, &ndn);
2016 if (r == -EADDRNOTAVAIL) /* Arrived at the top? */
2017 break;
2018 if (r < 0)
2019 return log_error_errno(r, "Failed to extract directory from device node path '%s': %m", dn);
2020
2021 free_and_replace(dn, ndn);
2022 }
2023
2024 w = now(CLOCK_MONOTONIC);
2025 if (w >= until)
2026 return log_error_errno(SYNTHETIC_ERRNO(ETIMEDOUT), "Device link %s still hasn't shown up, giving up.", path);
2027
2028 r = fd_wait_for_event(inotify_fd, POLLIN, until - w);
2029 if (r < 0) {
2030 if (ERRNO_IS_TRANSIENT(r))
2031 continue;
2032 return log_error_errno(r, "Failed to watch inotify: %m");
2033 }
2034
2035 (void) flush_fd(inotify_fd);
2036 }
2037 }
2038
2039 static int calculate_initial_image_size(UserRecord *h, int image_fd, const char *fstype, uint64_t *ret) {
2040 uint64_t upper_boundary, lower_boundary;
2041 struct statfs sfs;
2042
2043 assert(h);
2044 assert(image_fd >= 0);
2045 assert(ret);
2046
2047 if (fstatfs(image_fd, &sfs) < 0)
2048 return log_error_errno(errno, "statfs() on image failed: %m");
2049
2050 upper_boundary = DISK_SIZE_ROUND_DOWN((uint64_t) sfs.f_bsize * sfs.f_bavail);
2051
2052 if (h->disk_size != UINT64_MAX)
2053 *ret = MIN(DISK_SIZE_ROUND_DOWN(h->disk_size), upper_boundary);
2054 else if (h->disk_size_relative == UINT64_MAX) {
2055
2056 if (upper_boundary > UINT64_MAX / USER_DISK_SIZE_DEFAULT_PERCENT)
2057 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Disk size too large.");
2058
2059 *ret = DISK_SIZE_ROUND_DOWN(upper_boundary * USER_DISK_SIZE_DEFAULT_PERCENT / 100);
2060
2061 log_info("Sizing home to %u%% of available disk space, which is %s.",
2062 USER_DISK_SIZE_DEFAULT_PERCENT,
2063 FORMAT_BYTES(*ret));
2064 } else {
2065 *ret = DISK_SIZE_ROUND_DOWN((uint64_t) ((double) upper_boundary * (double) CLAMP(h->disk_size_relative, 0U, UINT32_MAX) / (double) UINT32_MAX));
2066
2067 log_info("Sizing home to %" PRIu64 ".%01" PRIu64 "%% of available disk space, which is %s.",
2068 (h->disk_size_relative * 100) / UINT32_MAX,
2069 ((h->disk_size_relative * 1000) / UINT32_MAX) % 10,
2070 FORMAT_BYTES(*ret));
2071 }
2072
2073 lower_boundary = minimal_size_by_fs_name(fstype);
2074 if (lower_boundary != UINT64_MAX) {
2075 assert(GPT_LUKS2_OVERHEAD < UINT64_MAX - lower_boundary);
2076 lower_boundary += GPT_LUKS2_OVERHEAD;
2077 }
2078 if (lower_boundary == UINT64_MAX || lower_boundary < USER_DISK_SIZE_MIN)
2079 lower_boundary = USER_DISK_SIZE_MIN;
2080
2081 if (*ret < lower_boundary)
2082 *ret = lower_boundary;
2083
2084 return 0;
2085 }
2086
2087 static int home_truncate(
2088 UserRecord *h,
2089 int fd,
2090 uint64_t size) {
2091
2092 bool trunc;
2093 int r;
2094
2095 assert(h);
2096 assert(fd >= 0);
2097
2098 trunc = user_record_luks_discard(h);
2099 if (!trunc) {
2100 r = fallocate(fd, 0, 0, size);
2101 if (r < 0 && ERRNO_IS_NOT_SUPPORTED(errno)) {
2102 /* Some file systems do not support fallocate(), let's gracefully degrade
2103 * (ZFS, reiserfs, …) and fall back to truncation */
2104 log_notice_errno(errno, "Backing file system does not support fallocate(), falling back to ftruncate(), i.e. implicitly using non-discard mode.");
2105 trunc = true;
2106 }
2107 }
2108
2109 if (trunc)
2110 r = ftruncate(fd, size);
2111
2112 if (r < 0) {
2113 if (ERRNO_IS_DISK_SPACE(errno)) {
2114 log_debug_errno(errno, "Not enough disk space to allocate home of size %s.", FORMAT_BYTES(size));
2115 return -ENOSPC; /* make recognizable */
2116 }
2117
2118 return log_error_errno(errno, "Failed to truncate home image: %m");
2119 }
2120
2121 return !trunc; /* Return == 0 if we managed to truncate, > 0 if we managed to allocate */
2122 }
2123
2124 int home_create_luks(
2125 UserRecord *h,
2126 HomeSetup *setup,
2127 const PasswordCache *cache,
2128 char **effective_passwords,
2129 UserRecord **ret_home) {
2130
2131 _cleanup_free_ char *subdir = NULL, *disk_uuid_path = NULL;
2132 uint64_t encrypted_size,
2133 host_size = 0, partition_offset = 0, partition_size = 0; /* Unnecessary initialization to appease gcc */
2134 _cleanup_(user_record_unrefp) UserRecord *new_home = NULL;
2135 sd_id128_t partition_uuid, fs_uuid, luks_uuid, disk_uuid;
2136 _cleanup_close_ int mount_fd = -EBADF;
2137 const char *fstype, *ip;
2138 struct statfs sfs;
2139 int r;
2140 _cleanup_strv_free_ char **extra_mkfs_options = NULL;
2141
2142 assert(h);
2143 assert(h->storage < 0 || h->storage == USER_LUKS);
2144 assert(setup);
2145 assert(!setup->temporary_image_path);
2146 assert(setup->image_fd < 0);
2147 assert(ret_home);
2148
2149 r = dlopen_cryptsetup();
2150 if (r < 0)
2151 return r;
2152
2153 assert_se(ip = user_record_image_path(h));
2154
2155 fstype = user_record_file_system_type(h);
2156 if (!supported_fstype(fstype))
2157 return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "Unsupported file system type: %s", fstype);
2158
2159 r = mkfs_exists(fstype);
2160 if (r < 0)
2161 return log_error_errno(r, "Failed to check if mkfs binary for %s exists: %m", fstype);
2162 if (r == 0) {
2163 if (h->file_system_type || streq(fstype, "ext4") || !supported_fstype("ext4"))
2164 return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "mkfs binary for file system type %s does not exist.", fstype);
2165
2166 /* If the record does not explicitly declare a file system to use, and the compiled-in
2167 * default does not actually exist, than do an automatic fallback onto ext4, as the baseline
2168 * fs of Linux. We won't search for a working fs type here beyond ext4, i.e. nothing fancier
2169 * than a single, conservative fallback to baseline. This should be useful in minimal
2170 * environments where mkfs.btrfs or so are not made available, but mkfs.ext4 as Linux' most
2171 * boring, most basic fs is. */
2172 log_info("Formatting tool for compiled-in default file system %s not available, falling back to ext4 instead.", fstype);
2173 fstype = "ext4";
2174 }
2175
2176 if (sd_id128_is_null(h->partition_uuid)) {
2177 r = sd_id128_randomize(&partition_uuid);
2178 if (r < 0)
2179 return log_error_errno(r, "Failed to acquire partition UUID: %m");
2180 } else
2181 partition_uuid = h->partition_uuid;
2182
2183 if (sd_id128_is_null(h->luks_uuid)) {
2184 r = sd_id128_randomize(&luks_uuid);
2185 if (r < 0)
2186 return log_error_errno(r, "Failed to acquire LUKS UUID: %m");
2187 } else
2188 luks_uuid = h->luks_uuid;
2189
2190 if (sd_id128_is_null(h->file_system_uuid)) {
2191 r = sd_id128_randomize(&fs_uuid);
2192 if (r < 0)
2193 return log_error_errno(r, "Failed to acquire file system UUID: %m");
2194 } else
2195 fs_uuid = h->file_system_uuid;
2196
2197 r = make_dm_names(h, setup);
2198 if (r < 0)
2199 return r;
2200
2201 r = access(setup->dm_node, F_OK);
2202 if (r < 0) {
2203 if (errno != ENOENT)
2204 return log_error_errno(errno, "Failed to determine whether %s exists: %m", setup->dm_node);
2205 } else
2206 return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Device mapper device %s already exists, refusing.", setup->dm_node);
2207
2208 if (path_startswith(ip, "/dev/")) {
2209 _cleanup_free_ char *sysfs = NULL;
2210 uint64_t block_device_size;
2211 struct stat st;
2212
2213 /* Let's place the home directory on a real device, i.e. a USB stick or such */
2214
2215 setup->image_fd = open_image_file(h, ip, &st);
2216 if (setup->image_fd < 0)
2217 return setup->image_fd;
2218
2219 if (!S_ISBLK(st.st_mode))
2220 return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK), "Device is not a block device, refusing.");
2221
2222 if (asprintf(&sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/partition", DEVNUM_FORMAT_VAL(st.st_rdev)) < 0)
2223 return log_oom();
2224 if (access(sysfs, F_OK) < 0) {
2225 if (errno != ENOENT)
2226 return log_error_errno(errno, "Failed to check whether %s exists: %m", sysfs);
2227 } else
2228 return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK), "Operating on partitions is currently not supported, sorry. Please specify a top-level block device.");
2229
2230 if (flock(setup->image_fd, LOCK_EX) < 0) /* make sure udev doesn't read from it while we operate on the device */
2231 return log_error_errno(errno, "Failed to lock block device %s: %m", ip);
2232
2233 if (ioctl(setup->image_fd, BLKGETSIZE64, &block_device_size) < 0)
2234 return log_error_errno(errno, "Failed to read block device size: %m");
2235
2236 if (h->disk_size == UINT64_MAX) {
2237
2238 /* If a relative disk size is requested, apply it relative to the block device size */
2239 if (h->disk_size_relative < UINT32_MAX)
2240 host_size = CLAMP(DISK_SIZE_ROUND_DOWN(block_device_size * h->disk_size_relative / UINT32_MAX),
2241 USER_DISK_SIZE_MIN, USER_DISK_SIZE_MAX);
2242 else
2243 host_size = block_device_size; /* Otherwise, take the full device */
2244
2245 } else if (h->disk_size > block_device_size)
2246 return log_error_errno(SYNTHETIC_ERRNO(EMSGSIZE), "Selected disk size larger than backing block device, refusing.");
2247 else
2248 host_size = DISK_SIZE_ROUND_DOWN(h->disk_size);
2249
2250 if (!supported_fs_size(fstype, LESS_BY(host_size, GPT_LUKS2_OVERHEAD)))
2251 return log_error_errno(SYNTHETIC_ERRNO(ERANGE),
2252 "Selected file system size too small for %s.", fstype);
2253
2254 /* After creation we should reference this partition by its UUID instead of the block
2255 * device. That's preferable since the user might have specified a device node such as
2256 * /dev/sdb to us, which might look very different when replugged. */
2257 if (asprintf(&disk_uuid_path, "/dev/disk/by-uuid/" SD_ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(luks_uuid)) < 0)
2258 return log_oom();
2259
2260 if (user_record_luks_discard(h) || user_record_luks_offline_discard(h)) {
2261 /* If we want online or offline discard, discard once before we start using things. */
2262
2263 if (ioctl(setup->image_fd, BLKDISCARD, (uint64_t[]) { 0, block_device_size }) < 0)
2264 log_full_errno(errno == EOPNOTSUPP ? LOG_DEBUG : LOG_WARNING, errno,
2265 "Failed to issue full-device BLKDISCARD on device, ignoring: %m");
2266 else
2267 log_info("Full device discard completed.");
2268 }
2269 } else {
2270 _cleanup_free_ char *t = NULL;
2271
2272 r = mkdir_parents(ip, 0755);
2273 if (r < 0)
2274 return log_error_errno(r, "Failed to create parent directory of %s: %m", ip);
2275
2276 r = tempfn_random(ip, "homework", &t);
2277 if (r < 0)
2278 return log_error_errno(r, "Failed to derive temporary file name for %s: %m", ip);
2279
2280 setup->image_fd = open(t, O_RDWR|O_CREAT|O_EXCL|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0600);
2281 if (setup->image_fd < 0)
2282 return log_error_errno(errno, "Failed to create home image %s: %m", t);
2283
2284 setup->temporary_image_path = TAKE_PTR(t);
2285
2286 r = chattr_full(setup->image_fd, NULL, FS_NOCOW_FL|FS_NOCOMP_FL, FS_NOCOW_FL|FS_NOCOMP_FL, NULL, NULL, CHATTR_FALLBACK_BITWISE);
2287 if (r < 0 && r != -ENOANO) /* ENOANO → some bits didn't work; which we skip logging about because chattr_full() already debug logs about those flags */
2288 log_full_errno(ERRNO_IS_NOT_SUPPORTED(r) ? LOG_DEBUG : LOG_WARNING, r,
2289 "Failed to set file attributes on %s, ignoring: %m", setup->temporary_image_path);
2290
2291 r = calculate_initial_image_size(h, setup->image_fd, fstype, &host_size);
2292 if (r < 0)
2293 return r;
2294
2295 r = resize_image_loop(h, setup, 0, host_size, &host_size);
2296 if (r < 0)
2297 return r;
2298
2299 log_info("Allocating image file completed.");
2300 }
2301
2302 r = make_partition_table(
2303 setup->image_fd,
2304 user_record_luks_sector_size(h),
2305 user_record_user_name_and_realm(h),
2306 partition_uuid,
2307 &partition_offset,
2308 &partition_size,
2309 &disk_uuid);
2310 if (r < 0)
2311 return r;
2312
2313 log_info("Writing of partition table completed.");
2314
2315 r = loop_device_make(
2316 setup->image_fd,
2317 O_RDWR,
2318 partition_offset,
2319 partition_size,
2320 user_record_luks_sector_size(h),
2321 0,
2322 LOCK_EX,
2323 &setup->loop);
2324 if (r < 0) {
2325 if (r == -ENOENT) { /* this means /dev/loop-control doesn't exist, i.e. we are in a container
2326 * or similar and loopback bock devices are not available, return a
2327 * recognizable error in this case. */
2328 log_error_errno(r, "Loopback block device support is not available on this system.");
2329 return -ENOLINK; /* Make recognizable */
2330 }
2331
2332 return log_error_errno(r, "Failed to set up loopback device for %s: %m", setup->temporary_image_path);
2333 }
2334
2335 log_info("Setting up loopback device %s completed.", setup->loop->node ?: ip);
2336
2337 r = luks_format(setup->loop->node,
2338 setup->dm_name,
2339 luks_uuid,
2340 user_record_user_name_and_realm(h),
2341 cache,
2342 effective_passwords,
2343 user_record_luks_discard(h) || user_record_luks_offline_discard(h),
2344 h,
2345 &setup->crypt_device);
2346 if (r < 0)
2347 return r;
2348
2349 setup->undo_dm = true;
2350
2351 r = block_get_size_by_path(setup->dm_node, &encrypted_size);
2352 if (r < 0)
2353 return log_error_errno(r, "Failed to get encrypted block device size: %m");
2354
2355 log_info("Setting up LUKS device %s completed.", setup->dm_node);
2356
2357 r = mkfs_options_from_env("HOME", fstype, &extra_mkfs_options);
2358 if (r < 0)
2359 return log_error_errno(r, "Failed to determine mkfs command line options for '%s': %m", fstype);
2360
2361 r = make_filesystem(setup->dm_node,
2362 fstype,
2363 user_record_user_name_and_realm(h),
2364 /* root = */ NULL,
2365 fs_uuid,
2366 user_record_luks_discard(h),
2367 /* quiet = */ true,
2368 /* sector_size = */ 0,
2369 extra_mkfs_options);
2370 if (r < 0)
2371 return r;
2372
2373 log_info("Formatting file system completed.");
2374
2375 r = home_unshare_and_mount(setup->dm_node, fstype, user_record_luks_discard(h), user_record_mount_flags(h), h->luks_extra_mount_options);
2376 if (r < 0)
2377 return r;
2378
2379 setup->undo_mount = true;
2380
2381 subdir = path_join(HOME_RUNTIME_WORK_DIR, user_record_user_name_and_realm(h));
2382 if (!subdir)
2383 return log_oom();
2384
2385 /* Prefer using a btrfs subvolume if we can, fall back to directory otherwise */
2386 r = btrfs_subvol_make_fallback(subdir, 0700);
2387 if (r < 0)
2388 return log_error_errno(r, "Failed to create user directory in mounted image file: %m");
2389
2390 setup->root_fd = open(subdir, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
2391 if (setup->root_fd < 0)
2392 return log_error_errno(errno, "Failed to open user directory in mounted image file: %m");
2393
2394 (void) home_shift_uid(setup->root_fd, NULL, UID_NOBODY, h->uid, &mount_fd);
2395
2396 if (mount_fd >= 0) {
2397 /* If we have established a new mount, then we can use that as new root fd to our home directory. */
2398 safe_close(setup->root_fd);
2399
2400 setup->root_fd = fd_reopen(mount_fd, O_RDONLY|O_CLOEXEC|O_DIRECTORY);
2401 if (setup->root_fd < 0)
2402 return log_error_errno(setup->root_fd, "Unable to convert mount fd into proper directory fd: %m");
2403
2404 mount_fd = safe_close(mount_fd);
2405 }
2406
2407 r = home_populate(h, setup->root_fd);
2408 if (r < 0)
2409 return r;
2410
2411 r = home_sync_and_statfs(setup->root_fd, &sfs);
2412 if (r < 0)
2413 return r;
2414
2415 r = user_record_clone(h, USER_RECORD_LOAD_MASK_SECRET|USER_RECORD_LOG|USER_RECORD_PERMISSIVE, &new_home);
2416 if (r < 0)
2417 return log_error_errno(r, "Failed to clone record: %m");
2418
2419 r = user_record_add_binding(
2420 new_home,
2421 USER_LUKS,
2422 disk_uuid_path ?: ip,
2423 partition_uuid,
2424 luks_uuid,
2425 fs_uuid,
2426 sym_crypt_get_cipher(setup->crypt_device),
2427 sym_crypt_get_cipher_mode(setup->crypt_device),
2428 luks_volume_key_size_convert(setup->crypt_device),
2429 fstype,
2430 NULL,
2431 h->uid,
2432 (gid_t) h->uid);
2433 if (r < 0)
2434 return log_error_errno(r, "Failed to add binding to record: %m");
2435
2436 if (user_record_luks_offline_discard(h)) {
2437 r = run_fitrim(setup->root_fd);
2438 if (r < 0)
2439 return r;
2440 }
2441
2442 setup->root_fd = safe_close(setup->root_fd);
2443
2444 r = home_setup_undo_mount(setup, LOG_ERR);
2445 if (r < 0)
2446 return r;
2447
2448 r = home_setup_undo_dm(setup, LOG_ERR);
2449 if (r < 0)
2450 return r;
2451
2452 setup->loop = loop_device_unref(setup->loop);
2453
2454 if (!user_record_luks_offline_discard(h)) {
2455 r= run_fallocate(setup->image_fd, NULL /* refresh stat() data */);
2456 if (r < 0)
2457 return r;
2458 }
2459
2460 /* Sync everything to disk before we move things into place under the final name. */
2461 if (fsync(setup->image_fd) < 0)
2462 return log_error_errno(r, "Failed to synchronize image to disk: %m");
2463
2464 if (disk_uuid_path)
2465 /* Reread partition table if this is a block device */
2466 (void) ioctl(setup->image_fd, BLKRRPART, 0);
2467 else {
2468 assert(setup->temporary_image_path);
2469
2470 if (rename(setup->temporary_image_path, ip) < 0)
2471 return log_error_errno(errno, "Failed to rename image file: %m");
2472
2473 setup->temporary_image_path = mfree(setup->temporary_image_path);
2474
2475 /* If we operate on a file, sync the containing directory too. */
2476 r = fsync_directory_of_file(setup->image_fd);
2477 if (r < 0)
2478 return log_error_errno(r, "Failed to synchronize directory of image file to disk: %m");
2479
2480 log_info("Moved image file into place.");
2481 }
2482
2483 /* Let's close the image fd now. If we are operating on a real block device this will release the BSD
2484 * lock that ensures udev doesn't interfere with what we are doing */
2485 setup->image_fd = safe_close(setup->image_fd);
2486
2487 if (disk_uuid_path)
2488 (void) wait_for_devlink(disk_uuid_path);
2489
2490 log_info("Creation completed.");
2491
2492 print_size_summary(host_size, encrypted_size, &sfs);
2493
2494 log_debug("GPT + LUKS2 overhead is %" PRIu64 " (expected %" PRIu64 ")", host_size - encrypted_size, GPT_LUKS2_OVERHEAD);
2495
2496 *ret_home = TAKE_PTR(new_home);
2497 return 0;
2498 }
2499
2500 int home_get_state_luks(UserRecord *h, HomeSetup *setup) {
2501 int r;
2502
2503 assert(h);
2504 assert(setup);
2505
2506 r = make_dm_names(h, setup);
2507 if (r < 0)
2508 return r;
2509
2510 r = access(setup->dm_node, F_OK);
2511 if (r < 0 && errno != ENOENT)
2512 return log_error_errno(errno, "Failed to determine whether %s exists: %m", setup->dm_node);
2513
2514 return r >= 0;
2515 }
2516
2517 enum {
2518 CAN_RESIZE_ONLINE,
2519 CAN_RESIZE_OFFLINE,
2520 };
2521
2522 static int can_resize_fs(int fd, uint64_t old_size, uint64_t new_size) {
2523 struct statfs sfs;
2524
2525 assert(fd >= 0);
2526
2527 /* Filter out bogus requests early */
2528 if (old_size == 0 || old_size == UINT64_MAX ||
2529 new_size == 0 || new_size == UINT64_MAX)
2530 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid resize parameters.");
2531
2532 if ((old_size & 511) != 0 || (new_size & 511) != 0)
2533 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Resize parameters not multiple of 512.");
2534
2535 if (fstatfs(fd, &sfs) < 0)
2536 return log_error_errno(errno, "Failed to fstatfs() file system: %m");
2537
2538 if (is_fs_type(&sfs, BTRFS_SUPER_MAGIC)) {
2539
2540 if (new_size < BTRFS_MINIMAL_SIZE)
2541 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "New file system size too small for btrfs (needs to be 256M at least.");
2542
2543 /* btrfs can grow and shrink online */
2544
2545 } else if (is_fs_type(&sfs, XFS_SB_MAGIC)) {
2546
2547 if (new_size < XFS_MINIMAL_SIZE)
2548 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "New file system size too small for xfs (needs to be 14M at least).");
2549
2550 /* XFS can grow, but not shrink */
2551 if (new_size < old_size)
2552 return log_error_errno(SYNTHETIC_ERRNO(EMSGSIZE), "Shrinking this type of file system is not supported.");
2553
2554 } else if (is_fs_type(&sfs, EXT4_SUPER_MAGIC)) {
2555
2556 if (new_size < EXT4_MINIMAL_SIZE)
2557 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "New file system size too small for ext4 (needs to be 1M at least).");
2558
2559 /* ext4 can grow online, and shrink offline */
2560 if (new_size < old_size)
2561 return CAN_RESIZE_OFFLINE;
2562
2563 } else
2564 return log_error_errno(SYNTHETIC_ERRNO(ESOCKTNOSUPPORT), "Resizing this type of file system is not supported.");
2565
2566 return CAN_RESIZE_ONLINE;
2567 }
2568
2569 static int ext4_offline_resize_fs(
2570 HomeSetup *setup,
2571 uint64_t new_size,
2572 bool discard,
2573 unsigned long flags,
2574 const char *extra_mount_options) {
2575
2576 _cleanup_free_ char *size_str = NULL;
2577 bool re_open = false, re_mount = false;
2578 pid_t resize_pid, fsck_pid;
2579 int r, exit_status;
2580
2581 assert(setup);
2582 assert(setup->dm_node);
2583
2584 /* First, unmount the file system */
2585 if (setup->root_fd >= 0) {
2586 setup->root_fd = safe_close(setup->root_fd);
2587 re_open = true;
2588 }
2589
2590 if (setup->undo_mount) {
2591 r = home_setup_undo_mount(setup, LOG_ERR);
2592 if (r < 0)
2593 return r;
2594
2595 re_mount = true;
2596 }
2597
2598 log_info("Temporary unmounting of file system completed.");
2599
2600 /* resize2fs requires that the file system is force checked first, do so. */
2601 r = safe_fork("(e2fsck)",
2602 FORK_RESET_SIGNALS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_LOG|FORK_STDOUT_TO_STDERR|FORK_CLOSE_ALL_FDS,
2603 &fsck_pid);
2604 if (r < 0)
2605 return r;
2606 if (r == 0) {
2607 /* Child */
2608 execlp("e2fsck" ,"e2fsck", "-fp", setup->dm_node, NULL);
2609 log_open();
2610 log_error_errno(errno, "Failed to execute e2fsck: %m");
2611 _exit(EXIT_FAILURE);
2612 }
2613
2614 exit_status = wait_for_terminate_and_check("e2fsck", fsck_pid, WAIT_LOG_ABNORMAL);
2615 if (exit_status < 0)
2616 return exit_status;
2617 if ((exit_status & ~FSCK_ERROR_CORRECTED) != 0) {
2618 log_warning("e2fsck failed with exit status %i.", exit_status);
2619
2620 if ((exit_status & (FSCK_SYSTEM_SHOULD_REBOOT|FSCK_ERRORS_LEFT_UNCORRECTED)) != 0)
2621 return log_error_errno(SYNTHETIC_ERRNO(EIO), "File system is corrupted, refusing.");
2622
2623 log_warning("Ignoring fsck error.");
2624 }
2625
2626 log_info("Forced file system check completed.");
2627
2628 /* We use 512 sectors here, because resize2fs doesn't do byte sizes */
2629 if (asprintf(&size_str, "%" PRIu64 "s", new_size / 512) < 0)
2630 return log_oom();
2631
2632 /* Resize the thing */
2633 r = safe_fork("(e2resize)",
2634 FORK_RESET_SIGNALS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_LOG|FORK_WAIT|FORK_STDOUT_TO_STDERR|FORK_CLOSE_ALL_FDS,
2635 &resize_pid);
2636 if (r < 0)
2637 return r;
2638 if (r == 0) {
2639 /* Child */
2640 execlp("resize2fs" ,"resize2fs", setup->dm_node, size_str, NULL);
2641 log_open();
2642 log_error_errno(errno, "Failed to execute resize2fs: %m");
2643 _exit(EXIT_FAILURE);
2644 }
2645
2646 log_info("Offline file system resize completed.");
2647
2648 /* Re-establish mounts and reopen the directory */
2649 if (re_mount) {
2650 r = home_mount_node(setup->dm_node, "ext4", discard, flags, extra_mount_options);
2651 if (r < 0)
2652 return r;
2653
2654 setup->undo_mount = true;
2655 }
2656
2657 if (re_open) {
2658 setup->root_fd = open(HOME_RUNTIME_WORK_DIR, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
2659 if (setup->root_fd < 0)
2660 return log_error_errno(errno, "Failed to reopen file system: %m");
2661 }
2662
2663 log_info("File system mounted again.");
2664
2665 return 0;
2666 }
2667
2668 static int prepare_resize_partition(
2669 int fd,
2670 uint64_t partition_offset,
2671 uint64_t old_partition_size,
2672 sd_id128_t *ret_disk_uuid,
2673 struct fdisk_table **ret_table,
2674 struct fdisk_partition **ret_partition) {
2675
2676 _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
2677 _cleanup_(fdisk_unref_tablep) struct fdisk_table *t = NULL;
2678 _cleanup_free_ char *disk_uuid_as_string = NULL;
2679 struct fdisk_partition *found = NULL;
2680 sd_id128_t disk_uuid;
2681 size_t n_partitions;
2682 int r;
2683
2684 assert(fd >= 0);
2685 assert(ret_disk_uuid);
2686 assert(ret_table);
2687
2688 assert((partition_offset & 511) == 0);
2689 assert((old_partition_size & 511) == 0);
2690 assert(UINT64_MAX - old_partition_size >= partition_offset);
2691
2692 if (partition_offset == 0) {
2693 /* If the offset is at the beginning we assume no partition table, let's exit early. */
2694 log_debug("Not rewriting partition table, operating on naked device.");
2695 *ret_disk_uuid = SD_ID128_NULL;
2696 *ret_table = NULL;
2697 *ret_partition = NULL;
2698 return 0;
2699 }
2700
2701 r = fdisk_new_context_at(fd, /* path= */ NULL, /* read_only= */ false, UINT32_MAX, &c);
2702 if (r < 0)
2703 return log_error_errno(r, "Failed to open device: %m");
2704
2705 if (!fdisk_is_labeltype(c, FDISK_DISKLABEL_GPT))
2706 return log_error_errno(SYNTHETIC_ERRNO(ENOMEDIUM), "Disk has no GPT partition table.");
2707
2708 r = fdisk_get_disklabel_id(c, &disk_uuid_as_string);
2709 if (r < 0)
2710 return log_error_errno(r, "Failed to acquire disk UUID: %m");
2711
2712 r = sd_id128_from_string(disk_uuid_as_string, &disk_uuid);
2713 if (r < 0)
2714 return log_error_errno(r, "Failed parse disk UUID: %m");
2715
2716 r = fdisk_get_partitions(c, &t);
2717 if (r < 0)
2718 return log_error_errno(r, "Failed to acquire partition table: %m");
2719
2720 n_partitions = fdisk_table_get_nents(t);
2721 for (size_t i = 0; i < n_partitions; i++) {
2722 struct fdisk_partition *p;
2723
2724 p = fdisk_table_get_partition(t, i);
2725 if (!p)
2726 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read partition metadata: %m");
2727
2728 if (fdisk_partition_is_used(p) <= 0)
2729 continue;
2730 if (fdisk_partition_has_start(p) <= 0 || fdisk_partition_has_size(p) <= 0 || fdisk_partition_has_end(p) <= 0)
2731 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Found partition without a size.");
2732
2733 if (fdisk_partition_get_start(p) == partition_offset / 512U &&
2734 fdisk_partition_get_size(p) == old_partition_size / 512U) {
2735
2736 if (found)
2737 return log_error_errno(SYNTHETIC_ERRNO(ENOTUNIQ), "Partition found twice, refusing.");
2738
2739 found = p;
2740 } else if (fdisk_partition_get_end(p) > partition_offset / 512U)
2741 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Can't extend, not last partition in image.");
2742 }
2743
2744 if (!found)
2745 return log_error_errno(SYNTHETIC_ERRNO(ENOPKG), "Failed to find matching partition to resize.");
2746
2747 *ret_disk_uuid = disk_uuid;
2748 *ret_table = TAKE_PTR(t);
2749 *ret_partition = found;
2750
2751 return 1;
2752 }
2753
2754 static int get_maximum_partition_size(
2755 int fd,
2756 struct fdisk_partition *p,
2757 uint64_t *ret_maximum_partition_size) {
2758
2759 _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
2760 uint64_t start_lba, start, last_lba, end;
2761 int r;
2762
2763 assert(fd >= 0);
2764 assert(p);
2765 assert(ret_maximum_partition_size);
2766
2767 r = fdisk_new_context_at(fd, /* path= */ NULL, /* read_only= */ true, /* sector_size= */ UINT32_MAX, &c);
2768 if (r < 0)
2769 return log_error_errno(r, "Failed to create fdisk context: %m");
2770
2771 start_lba = fdisk_partition_get_start(p);
2772 assert(start_lba <= UINT64_MAX/512);
2773 start = start_lba * 512;
2774
2775 last_lba = fdisk_get_last_lba(c); /* One sector before boundary where usable space ends */
2776 assert(last_lba < UINT64_MAX/512);
2777 end = DISK_SIZE_ROUND_DOWN((last_lba + 1) * 512); /* Round down to multiple of 4K */
2778
2779 if (start > end)
2780 return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Last LBA is before partition start.");
2781
2782 *ret_maximum_partition_size = DISK_SIZE_ROUND_DOWN(end - start);
2783
2784 return 1;
2785 }
2786
2787 static int ask_cb(struct fdisk_context *c, struct fdisk_ask *ask, void *userdata) {
2788 char *result;
2789
2790 assert(c);
2791
2792 switch (fdisk_ask_get_type(ask)) {
2793
2794 case FDISK_ASKTYPE_STRING:
2795 result = new(char, 37);
2796 if (!result)
2797 return log_oom();
2798
2799 fdisk_ask_string_set_result(ask, sd_id128_to_uuid_string(*(sd_id128_t*) userdata, result));
2800 break;
2801
2802 default:
2803 log_debug("Unexpected question from libfdisk, ignoring.");
2804 }
2805
2806 return 0;
2807 }
2808
2809 static int apply_resize_partition(
2810 int fd,
2811 sd_id128_t disk_uuids,
2812 struct fdisk_table *t,
2813 struct fdisk_partition *p,
2814 size_t new_partition_size) {
2815
2816 _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
2817 _cleanup_free_ void *two_zero_lbas = NULL;
2818 uint32_t ssz;
2819 ssize_t n;
2820 int r;
2821
2822 assert(fd >= 0);
2823 assert(!t == !p);
2824
2825 if (!t) /* no partition table to apply, exit early */
2826 return 0;
2827
2828 assert(p);
2829
2830 /* Before writing our partition patch the final size in */
2831 r = fdisk_partition_size_explicit(p, 1);
2832 if (r < 0)
2833 return log_error_errno(r, "Failed to enable explicit partition size: %m");
2834
2835 r = fdisk_partition_set_size(p, new_partition_size / 512U);
2836 if (r < 0)
2837 return log_error_errno(r, "Failed to change partition size: %m");
2838
2839 r = probe_sector_size(fd, &ssz);
2840 if (r < 0)
2841 return log_error_errno(r, "Failed to determine current sector size: %m");
2842
2843 two_zero_lbas = malloc0(ssz * 2);
2844 if (!two_zero_lbas)
2845 return log_oom();
2846
2847 /* libfdisk appears to get confused by the existing PMBR. Let's explicitly flush it out. */
2848 n = pwrite(fd, two_zero_lbas, ssz * 2, 0);
2849 if (n < 0)
2850 return log_error_errno(errno, "Failed to wipe partition table: %m");
2851 if ((size_t) n != ssz * 2)
2852 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short write while wiping partition table.");
2853
2854 r = fdisk_new_context_at(fd, /* path= */ NULL, /* read_only= */ false, ssz, &c);
2855 if (r < 0)
2856 return log_error_errno(r, "Failed to open device: %m");
2857
2858 r = fdisk_create_disklabel(c, "gpt");
2859 if (r < 0)
2860 return log_error_errno(r, "Failed to create GPT disk label: %m");
2861
2862 r = fdisk_apply_table(c, t);
2863 if (r < 0)
2864 return log_error_errno(r, "Failed to apply partition table: %m");
2865
2866 r = fdisk_set_ask(c, ask_cb, &disk_uuids);
2867 if (r < 0)
2868 return log_error_errno(r, "Failed to set libfdisk query function: %m");
2869
2870 r = fdisk_set_disklabel_id(c);
2871 if (r < 0)
2872 return log_error_errno(r, "Failed to change disklabel ID: %m");
2873
2874 r = fdisk_write_disklabel(c);
2875 if (r < 0)
2876 return log_error_errno(r, "Failed to write disk label: %m");
2877
2878 return 1;
2879 }
2880
2881 /* Always keep at least 16M free, so that we can safely log in and update the user record while doing so */
2882 #define HOME_MIN_FREE (16U*1024U*1024U)
2883
2884 static int get_smallest_fs_size(int fd, uint64_t *ret) {
2885 uint64_t minsz, needed;
2886 struct statfs sfs;
2887
2888 assert(fd >= 0);
2889 assert(ret);
2890
2891 /* Determines the minimal disk size we might be able to shrink the file system referenced by the fd to. */
2892
2893 if (syncfs(fd) < 0) /* let's sync before we query the size, so that the values returned are accurate */
2894 return log_error_errno(errno, "Failed to synchronize home file system: %m");
2895
2896 if (fstatfs(fd, &sfs) < 0)
2897 return log_error_errno(errno, "Failed to statfs() home file system: %m");
2898
2899 /* Let's determine the minimal file system size of the used fstype */
2900 minsz = minimal_size_by_fs_magic(sfs.f_type);
2901 if (minsz == UINT64_MAX)
2902 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Don't know minimum file system size of file system type '%s' of home directory.", fs_type_to_string(sfs.f_type));
2903
2904 if (minsz < USER_DISK_SIZE_MIN)
2905 minsz = USER_DISK_SIZE_MIN;
2906
2907 if (sfs.f_bfree > sfs.f_blocks)
2908 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Detected amount of free blocks is greater than the total amount of file system blocks. Refusing.");
2909
2910 /* Calculate how much disk space is currently in use. */
2911 needed = sfs.f_blocks - sfs.f_bfree;
2912 if (needed > UINT64_MAX / sfs.f_bsize)
2913 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "File system size out of range.");
2914
2915 needed *= sfs.f_bsize;
2916
2917 /* Add some safety margin of free space we'll always keep */
2918 if (needed > UINT64_MAX - HOME_MIN_FREE) /* Check for overflow */
2919 needed = UINT64_MAX;
2920 else
2921 needed += HOME_MIN_FREE;
2922
2923 *ret = DISK_SIZE_ROUND_UP(MAX(needed, minsz));
2924 return 0;
2925 }
2926
2927 static int get_largest_image_size(int fd, const struct stat *st, uint64_t *ret) {
2928 uint64_t used, avail, sum;
2929 struct statfs sfs;
2930 int r;
2931
2932 assert(fd >= 0);
2933 assert(st);
2934 assert(ret);
2935
2936 /* Determines the maximum file size we might be able to grow the image file referenced by the fd to. */
2937
2938 r = stat_verify_regular(st);
2939 if (r < 0)
2940 return log_error_errno(r, "Image file is not a regular file, refusing: %m");
2941
2942 if (syncfs(fd) < 0)
2943 return log_error_errno(errno, "Failed to synchronize file system backing image file: %m");
2944
2945 if (fstatfs(fd, &sfs) < 0)
2946 return log_error_errno(errno, "Failed to statfs() image file: %m");
2947
2948 used = (uint64_t) st->st_blocks * 512;
2949 avail = (uint64_t) sfs.f_bsize * sfs.f_bavail;
2950
2951 if (avail > UINT64_MAX - used)
2952 sum = UINT64_MAX;
2953 else
2954 sum = avail + used;
2955
2956 *ret = DISK_SIZE_ROUND_DOWN(MIN(sum, USER_DISK_SIZE_MAX));
2957 return 0;
2958 }
2959
2960 static int resize_fs_loop(
2961 UserRecord *h,
2962 HomeSetup *setup,
2963 int resize_type,
2964 uint64_t old_fs_size,
2965 uint64_t new_fs_size,
2966 uint64_t *ret_fs_size) {
2967
2968 uint64_t current_fs_size;
2969 unsigned n_iterations = 0;
2970 int r;
2971
2972 assert(h);
2973 assert(setup);
2974 assert(setup->root_fd >= 0);
2975
2976 /* A bisection loop trying to find the closest size to what the user asked for. (Well, we bisect like
2977 * this only when we *shrink* the fs — if we grow the fs there's no need to bisect.) */
2978
2979 current_fs_size = old_fs_size;
2980 for (uint64_t lower_boundary = new_fs_size, upper_boundary = old_fs_size, try_fs_size = new_fs_size;;) {
2981 bool worked;
2982
2983 n_iterations++;
2984
2985 /* Now resize the file system */
2986 if (resize_type == CAN_RESIZE_ONLINE) {
2987 r = resize_fs(setup->root_fd, try_fs_size, NULL);
2988 if (r < 0) {
2989 if (!ERRNO_IS_DISK_SPACE(r) || new_fs_size > old_fs_size) /* Not a disk space issue? Not trying to shrink? */
2990 return log_error_errno(r, "Failed to resize file system: %m");
2991
2992 log_debug_errno(r, "Shrinking from %s to %s didn't work, not enough space for contained data.", FORMAT_BYTES(current_fs_size), FORMAT_BYTES(try_fs_size));
2993 worked = false;
2994 } else {
2995 log_debug("Successfully resized from %s to %s.", FORMAT_BYTES(current_fs_size), FORMAT_BYTES(try_fs_size));
2996 current_fs_size = try_fs_size;
2997 worked = true;
2998 }
2999
3000 /* If we hit a disk space issue and are shrinking the fs, then maybe it helps to
3001 * increase the image size. */
3002 } else {
3003 r = ext4_offline_resize_fs(setup, try_fs_size, user_record_luks_discard(h), user_record_mount_flags(h), h->luks_extra_mount_options);
3004 if (r < 0)
3005 return r;
3006
3007 /* For now, when we fail to shrink an ext4 image we'll not try again via the
3008 * bisection logic. We might add that later, but given this involves shelling out
3009 * multiple programs, it's a bit too cumbersome for my taste. */
3010
3011 worked = true;
3012 current_fs_size = try_fs_size;
3013 }
3014
3015 if (new_fs_size > old_fs_size) /* If we are growing we are done after one iteration */
3016 break;
3017
3018 /* If we are shrinking then let's adjust our bisection boundaries and try again. */
3019 if (worked)
3020 upper_boundary = MIN(upper_boundary, try_fs_size);
3021 else
3022 lower_boundary = MAX(lower_boundary, try_fs_size);
3023
3024 /* OK, this attempt to shrink didn't work. Let's try between the old size and what worked. */
3025 if (lower_boundary >= upper_boundary) {
3026 log_debug("Image can't be shrunk further (range to try is empty).");
3027 break;
3028 }
3029
3030 /* Let's find a new value to try half-way between the lower boundary and the upper boundary
3031 * to try now. */
3032 try_fs_size = DISK_SIZE_ROUND_DOWN(lower_boundary + (upper_boundary - lower_boundary) / 2);
3033 if (try_fs_size <= lower_boundary || try_fs_size >= upper_boundary) {
3034 log_debug("Image can't be shrunk further (remaining range to try too small).");
3035 break;
3036 }
3037 }
3038
3039 log_debug("Bisection loop completed after %u iterations.", n_iterations);
3040
3041 if (ret_fs_size)
3042 *ret_fs_size = current_fs_size;
3043
3044 return 0;
3045 }
3046
3047 static int resize_image_loop(
3048 UserRecord *h,
3049 HomeSetup *setup,
3050 uint64_t old_image_size,
3051 uint64_t new_image_size,
3052 uint64_t *ret_image_size) {
3053
3054 uint64_t current_image_size;
3055 unsigned n_iterations = 0;
3056 int r;
3057
3058 assert(h);
3059 assert(setup);
3060 assert(setup->image_fd >= 0);
3061
3062 /* A bisection loop trying to find the closest size to what the user asked for. (Well, we bisect like
3063 * this only when we *grow* the image — if we shrink the image then there's no need to bisect.) */
3064
3065 current_image_size = old_image_size;
3066 for (uint64_t lower_boundary = old_image_size, upper_boundary = new_image_size, try_image_size = new_image_size;;) {
3067 bool worked;
3068
3069 n_iterations++;
3070
3071 r = home_truncate(h, setup->image_fd, try_image_size);
3072 if (r < 0) {
3073 if (!ERRNO_IS_DISK_SPACE(r) || new_image_size < old_image_size) /* Not a disk space issue? Not trying to grow? */
3074 return r;
3075
3076 log_debug_errno(r, "Growing from %s to %s didn't work, not enough space on backing disk.", FORMAT_BYTES(current_image_size), FORMAT_BYTES(try_image_size));
3077 worked = false;
3078 } else if (r > 0) { /* Success: allocation worked */
3079 log_debug("Resizing from %s to %s via allocation worked successfully.", FORMAT_BYTES(current_image_size), FORMAT_BYTES(try_image_size));
3080 current_image_size = try_image_size;
3081 worked = true;
3082 } else { /* Success, but through truncation, not allocation. */
3083 log_debug("Resizing from %s to %s via truncation worked successfully.", FORMAT_BYTES(old_image_size), FORMAT_BYTES(try_image_size));
3084 current_image_size = try_image_size;
3085 break; /* there's no point in the bisection logic if this was plain truncation and
3086 * not allocation, let's exit immediately. */
3087 }
3088
3089 if (new_image_size < old_image_size) /* If we are shrinking we are done after one iteration */
3090 break;
3091
3092 /* If we are growing then let's adjust our bisection boundaries and try again */
3093 if (worked)
3094 lower_boundary = MAX(lower_boundary, try_image_size);
3095 else
3096 upper_boundary = MIN(upper_boundary, try_image_size);
3097
3098 if (lower_boundary >= upper_boundary) {
3099 log_debug("Image can't be grown further (range to try is empty).");
3100 break;
3101 }
3102
3103 try_image_size = DISK_SIZE_ROUND_DOWN(lower_boundary + (upper_boundary - lower_boundary) / 2);
3104 if (try_image_size <= lower_boundary || try_image_size >= upper_boundary) {
3105 log_debug("Image can't be grown further (remaining range to try too small).");
3106 break;
3107 }
3108 }
3109
3110 log_debug("Bisection loop completed after %u iterations.", n_iterations);
3111
3112 if (ret_image_size)
3113 *ret_image_size = current_image_size;
3114
3115 return 0;
3116 }
3117
3118 int home_resize_luks(
3119 UserRecord *h,
3120 HomeSetupFlags flags,
3121 HomeSetup *setup,
3122 PasswordCache *cache,
3123 UserRecord **ret_home) {
3124
3125 uint64_t old_image_size, new_image_size, old_fs_size, new_fs_size, crypto_offset, crypto_offset_bytes,
3126 new_partition_size, smallest_fs_size, resized_fs_size;
3127 _cleanup_(user_record_unrefp) UserRecord *header_home = NULL, *embedded_home = NULL, *new_home = NULL;
3128 _cleanup_(fdisk_unref_tablep) struct fdisk_table *table = NULL;
3129 struct fdisk_partition *partition = NULL;
3130 _cleanup_close_ int opened_image_fd = -EBADF;
3131 _cleanup_free_ char *whole_disk = NULL;
3132 int r, resize_type, image_fd = -EBADF;
3133 sd_id128_t disk_uuid;
3134 const char *ip, *ipo;
3135 struct statfs sfs;
3136 struct stat st;
3137 enum {
3138 INTENTION_DONT_KNOW = 0, /* These happen to match the return codes of CMP() */
3139 INTENTION_SHRINK = -1,
3140 INTENTION_GROW = 1,
3141 } intention = INTENTION_DONT_KNOW;
3142
3143 assert(h);
3144 assert(user_record_storage(h) == USER_LUKS);
3145 assert(setup);
3146
3147 r = dlopen_cryptsetup();
3148 if (r < 0)
3149 return r;
3150
3151 assert_se(ipo = user_record_image_path(h));
3152 ip = strdupa_safe(ipo); /* copy out since original might change later in home record object */
3153
3154 if (setup->image_fd < 0) {
3155 setup->image_fd = open_image_file(h, NULL, &st);
3156 if (setup->image_fd < 0)
3157 return setup->image_fd;
3158 } else {
3159 if (fstat(setup->image_fd, &st) < 0)
3160 return log_error_errno(errno, "Failed to stat image file %s: %m", ip);
3161 }
3162
3163 image_fd = setup->image_fd;
3164
3165 if (S_ISBLK(st.st_mode)) {
3166 dev_t parent;
3167
3168 r = block_get_whole_disk(st.st_rdev, &parent);
3169 if (r < 0)
3170 return log_error_errno(r, "Failed to acquire whole block device for %s: %m", ip);
3171 if (r > 0) {
3172 /* If we shall resize a file system on a partition device, then let's figure out the
3173 * whole disk device and operate on that instead, since we need to rewrite the
3174 * partition table to resize the partition. */
3175
3176 log_info("Operating on partition device %s, using parent device.", ip);
3177
3178 opened_image_fd = r = device_open_from_devnum(S_IFBLK, parent, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK, &whole_disk);
3179 if (r < 0)
3180 return log_error_errno(r, "Failed to open whole block device for %s: %m", ip);
3181
3182 image_fd = opened_image_fd;
3183
3184 if (fstat(image_fd, &st) < 0)
3185 return log_error_errno(errno, "Failed to stat whole block device %s: %m", whole_disk);
3186 } else
3187 log_info("Operating on whole block device %s.", ip);
3188
3189 if (ioctl(image_fd, BLKGETSIZE64, &old_image_size) < 0)
3190 return log_error_errno(errno, "Failed to determine size of original block device: %m");
3191
3192 if (flock(image_fd, LOCK_EX) < 0) /* make sure udev doesn't read from it while we operate on the device */
3193 return log_error_errno(errno, "Failed to lock block device %s: %m", ip);
3194
3195 new_image_size = old_image_size; /* we can't resize physical block devices */
3196 } else {
3197 r = stat_verify_regular(&st);
3198 if (r < 0)
3199 return log_error_errno(r, "Image %s is not a block device nor regular file: %m", ip);
3200
3201 old_image_size = st.st_size;
3202
3203 /* Note an asymetry here: when we operate on loopback files the specified disk size we get we
3204 * apply onto the loopback file as a whole. When we operate on block devices we instead apply
3205 * to the partition itself only. */
3206
3207 if (FLAGS_SET(flags, HOME_SETUP_RESIZE_MINIMIZE)) {
3208 new_image_size = 0;
3209 intention = INTENTION_SHRINK;
3210 } else {
3211 uint64_t new_image_size_rounded;
3212
3213 new_image_size_rounded = DISK_SIZE_ROUND_DOWN(h->disk_size);
3214
3215 if (old_image_size >= new_image_size_rounded && old_image_size <= h->disk_size) {
3216 /* If exact match, or a match after we rounded down, don't do a thing */
3217 log_info("Image size already matching, skipping operation.");
3218 return 0;
3219 }
3220
3221 new_image_size = new_image_size_rounded;
3222 intention = CMP(new_image_size, old_image_size); /* Is this a shrink */
3223 }
3224 }
3225
3226 r = home_setup_luks(
3227 h,
3228 flags,
3229 whole_disk,
3230 setup,
3231 cache,
3232 FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES) ? NULL : &header_home);
3233 if (r < 0)
3234 return r;
3235
3236 if (!FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES)) {
3237 r = home_load_embedded_identity(h, setup->root_fd, header_home, USER_RECONCILE_REQUIRE_NEWER_OR_EQUAL, cache, &embedded_home, &new_home);
3238 if (r < 0)
3239 return r;
3240 }
3241
3242 r = home_maybe_shift_uid(h, flags, setup);
3243 if (r < 0)
3244 return r;
3245
3246 log_info("offset = %" PRIu64 ", size = %" PRIu64 ", image = %" PRIu64, setup->partition_offset, setup->partition_size, old_image_size);
3247
3248 if ((UINT64_MAX - setup->partition_offset) < setup->partition_size ||
3249 setup->partition_offset + setup->partition_size > old_image_size)
3250 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Old partition doesn't fit in backing storage, refusing.");
3251
3252 /* Get target partition information in here for new_partition_size calculation */
3253 r = prepare_resize_partition(
3254 image_fd,
3255 setup->partition_offset,
3256 setup->partition_size,
3257 &disk_uuid,
3258 &table,
3259 &partition);
3260 if (r < 0)
3261 return r;
3262
3263 if (S_ISREG(st.st_mode)) {
3264 uint64_t partition_table_extra, largest_size;
3265
3266 partition_table_extra = old_image_size - setup->partition_size;
3267
3268 r = get_largest_image_size(setup->image_fd, &st, &largest_size);
3269 if (r < 0)
3270 return r;
3271 if (new_image_size > largest_size)
3272 new_image_size = largest_size;
3273
3274 if (new_image_size < partition_table_extra)
3275 new_image_size = partition_table_extra;
3276
3277 new_partition_size = DISK_SIZE_ROUND_DOWN(new_image_size - partition_table_extra);
3278 } else {
3279 assert(S_ISBLK(st.st_mode));
3280
3281 if (FLAGS_SET(flags, HOME_SETUP_RESIZE_MINIMIZE)) {
3282 new_partition_size = 0;
3283 intention = INTENTION_SHRINK;
3284 } else {
3285 uint64_t new_partition_size_rounded = DISK_SIZE_ROUND_DOWN(h->disk_size);
3286
3287 if (h->disk_size == UINT64_MAX && partition) {
3288 r = get_maximum_partition_size(image_fd, partition, &new_partition_size_rounded);
3289 if (r < 0)
3290 return r;
3291 }
3292
3293 if (setup->partition_size >= new_partition_size_rounded &&
3294 setup->partition_size <= h->disk_size) {
3295 log_info("Partition size already matching, skipping operation.");
3296 return 0;
3297 }
3298
3299 new_partition_size = new_partition_size_rounded;
3300 intention = CMP(new_partition_size, setup->partition_size);
3301 }
3302 }
3303
3304 if ((UINT64_MAX - setup->partition_offset) < new_partition_size ||
3305 setup->partition_offset + new_partition_size > new_image_size)
3306 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "New partition doesn't fit into backing storage, refusing.");
3307
3308 crypto_offset = sym_crypt_get_data_offset(setup->crypt_device);
3309 if (crypto_offset > UINT64_MAX/512U)
3310 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "LUKS2 data offset out of range, refusing.");
3311 crypto_offset_bytes = (uint64_t) crypto_offset * 512U;
3312 if (setup->partition_size <= crypto_offset_bytes)
3313 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Weird, old crypto payload offset doesn't actually fit in partition size?");
3314
3315 /* Make sure at least the LUKS header fit in */
3316 if (new_partition_size <= crypto_offset_bytes) {
3317 uint64_t add;
3318
3319 add = DISK_SIZE_ROUND_UP(crypto_offset_bytes) - new_partition_size;
3320 new_partition_size += add;
3321 if (S_ISREG(st.st_mode))
3322 new_image_size += add;
3323 }
3324
3325 old_fs_size = setup->partition_size - crypto_offset_bytes;
3326 new_fs_size = DISK_SIZE_ROUND_DOWN(new_partition_size - crypto_offset_bytes);
3327
3328 r = get_smallest_fs_size(setup->root_fd, &smallest_fs_size);
3329 if (r < 0)
3330 return r;
3331
3332 if (new_fs_size < smallest_fs_size) {
3333 uint64_t add;
3334
3335 add = DISK_SIZE_ROUND_UP(smallest_fs_size) - new_fs_size;
3336 new_fs_size += add;
3337 new_partition_size += add;
3338 if (S_ISREG(st.st_mode))
3339 new_image_size += add;
3340 }
3341
3342 if (new_fs_size == old_fs_size) {
3343 log_info("New file system size identical to old file system size, skipping operation.");
3344 return 0;
3345 }
3346
3347 if (FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_GROW) && new_fs_size > old_fs_size) {
3348 log_info("New file system size would be larger than old, but shrinking requested, skipping operation.");
3349 return 0;
3350 }
3351
3352 if (FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SHRINK) && new_fs_size < old_fs_size) {
3353 log_info("New file system size would be smaller than old, but growing requested, skipping operation.");
3354 return 0;
3355 }
3356
3357 if (CMP(new_fs_size, old_fs_size) != intention) {
3358 if (intention < 0)
3359 log_info("Shrink operation would enlarge file system, skipping operation.");
3360 else {
3361 assert(intention > 0);
3362 log_info("Grow operation would shrink file system, skipping operation.");
3363 }
3364 return 0;
3365 }
3366
3367 /* Before we start doing anything, let's figure out if we actually can */
3368 resize_type = can_resize_fs(setup->root_fd, old_fs_size, new_fs_size);
3369 if (resize_type < 0)
3370 return resize_type;
3371 if (resize_type == CAN_RESIZE_OFFLINE && FLAGS_SET(flags, HOME_SETUP_ALREADY_ACTIVATED))
3372 return log_error_errno(SYNTHETIC_ERRNO(ETXTBSY), "File systems of this type can only be resized offline, but is currently online.");
3373
3374 log_info("Ready to resize image size %s %s %s, partition size %s %s %s, file system size %s %s %s.",
3375 FORMAT_BYTES(old_image_size),
3376 special_glyph(SPECIAL_GLYPH_ARROW_RIGHT),
3377 FORMAT_BYTES(new_image_size),
3378 FORMAT_BYTES(setup->partition_size),
3379 special_glyph(SPECIAL_GLYPH_ARROW_RIGHT),
3380 FORMAT_BYTES(new_partition_size),
3381 FORMAT_BYTES(old_fs_size),
3382 special_glyph(SPECIAL_GLYPH_ARROW_RIGHT),
3383 FORMAT_BYTES(new_fs_size));
3384
3385 if (new_fs_size > old_fs_size) { /* → Grow */
3386
3387 if (S_ISREG(st.st_mode)) {
3388 uint64_t resized_image_size;
3389
3390 /* Grow file size */
3391 r = resize_image_loop(h, setup, old_image_size, new_image_size, &resized_image_size);
3392 if (r < 0)
3393 return r;
3394
3395 if (resized_image_size == old_image_size) {
3396 log_info("Couldn't change image size.");
3397 return 0;
3398 }
3399
3400 assert(resized_image_size > old_image_size);
3401
3402 log_info("Growing of image file from %s to %s completed.", FORMAT_BYTES(old_image_size), FORMAT_BYTES(resized_image_size));
3403
3404 if (resized_image_size < new_image_size) {
3405 uint64_t sub;
3406
3407 /* If the growing we managed to do is smaller than what we wanted we need to
3408 * adjust the partition/file system sizes we are going for, too */
3409 sub = new_image_size - resized_image_size;
3410 assert(new_partition_size >= sub);
3411 new_partition_size -= sub;
3412 assert(new_fs_size >= sub);
3413 new_fs_size -= sub;
3414 }
3415
3416 new_image_size = resized_image_size;
3417 } else {
3418 assert(S_ISBLK(st.st_mode));
3419 assert(new_image_size == old_image_size);
3420 }
3421
3422 /* Make sure loopback device sees the new bigger size */
3423 r = loop_device_refresh_size(setup->loop, UINT64_MAX, new_partition_size);
3424 if (r == -ENOTTY)
3425 log_debug_errno(r, "Device is not a loopback device, not refreshing size.");
3426 else if (r < 0)
3427 return log_error_errno(r, "Failed to refresh loopback device size: %m");
3428 else
3429 log_info("Refreshing loop device size completed.");
3430
3431 r = apply_resize_partition(image_fd, disk_uuid, table, partition, new_partition_size);
3432 if (r < 0)
3433 return r;
3434 if (r > 0)
3435 log_info("Growing of partition completed.");
3436
3437 if (S_ISBLK(st.st_mode) && ioctl(image_fd, BLKRRPART, 0) < 0)
3438 log_debug_errno(errno, "BLKRRPART failed on block device, ignoring: %m");
3439
3440 /* Tell LUKS about the new bigger size too */
3441 r = sym_crypt_resize(setup->crypt_device, setup->dm_name, new_fs_size / 512U);
3442 if (r < 0)
3443 return log_error_errno(r, "Failed to grow LUKS device: %m");
3444
3445 log_info("LUKS device growing completed.");
3446 } else {
3447 /* → Shrink */
3448
3449 if (!FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES)) {
3450 r = home_store_embedded_identity(new_home, setup->root_fd, h->uid, embedded_home);
3451 if (r < 0)
3452 return r;
3453 }
3454
3455 if (S_ISREG(st.st_mode)) {
3456 if (user_record_luks_discard(h))
3457 /* Before we shrink, let's trim the file system, so that we need less space on disk during the shrinking */
3458 (void) run_fitrim(setup->root_fd);
3459 else {
3460 /* If discard is off, let's ensure all backing blocks are allocated, so that our resize operation doesn't fail half-way */
3461 r = run_fallocate(image_fd, &st);
3462 if (r < 0)
3463 return r;
3464 }
3465 }
3466 }
3467
3468 /* Now try to resize the file system. The requested size might not always be possible, in which case
3469 * we'll try to get as close as we can get. The result is returned in 'resized_fs_size' */
3470 r = resize_fs_loop(h, setup, resize_type, old_fs_size, new_fs_size, &resized_fs_size);
3471 if (r < 0)
3472 return r;
3473
3474 if (resized_fs_size == old_fs_size) {
3475 log_info("Couldn't change file system size.");
3476 return 0;
3477 }
3478
3479 log_info("File system resizing from %s to %s completed.", FORMAT_BYTES(old_fs_size), FORMAT_BYTES(resized_fs_size));
3480
3481 if (resized_fs_size > new_fs_size) {
3482 uint64_t add;
3483
3484 /* If the shrinking we managed to do is larger than what we wanted we need to adjust the partition/image sizes. */
3485 add = resized_fs_size - new_fs_size;
3486 new_partition_size += add;
3487 if (S_ISREG(st.st_mode))
3488 new_image_size += add;
3489 }
3490
3491 new_fs_size = resized_fs_size;
3492
3493 /* Immediately sync afterwards */
3494 r = home_sync_and_statfs(setup->root_fd, NULL);
3495 if (r < 0)
3496 return r;
3497
3498 if (new_fs_size < old_fs_size) { /* → Shrink */
3499
3500 /* Shrink the LUKS device now, matching the new file system size */
3501 r = sym_crypt_resize(setup->crypt_device, setup->dm_name, new_fs_size / 512);
3502 if (r < 0)
3503 return log_error_errno(r, "Failed to shrink LUKS device: %m");
3504
3505 log_info("LUKS device shrinking completed.");
3506
3507 /* Refresh the loop devices size */
3508 r = loop_device_refresh_size(setup->loop, UINT64_MAX, new_partition_size);
3509 if (r == -ENOTTY)
3510 log_debug_errno(r, "Device is not a loopback device, not refreshing size.");
3511 else if (r < 0)
3512 return log_error_errno(r, "Failed to refresh loopback device size: %m");
3513 else
3514 log_info("Refreshing loop device size completed.");
3515
3516 if (S_ISREG(st.st_mode)) {
3517 /* Shrink the image file */
3518 if (ftruncate(image_fd, new_image_size) < 0)
3519 return log_error_errno(errno, "Failed to shrink image file %s: %m", ip);
3520
3521 log_info("Shrinking of image file completed.");
3522 } else {
3523 assert(S_ISBLK(st.st_mode));
3524 assert(new_image_size == old_image_size);
3525 }
3526
3527 r = apply_resize_partition(image_fd, disk_uuid, table, partition, new_partition_size);
3528 if (r < 0)
3529 return r;
3530 if (r > 0)
3531 log_info("Shrinking of partition completed.");
3532
3533 if (S_ISBLK(st.st_mode) && ioctl(image_fd, BLKRRPART, 0) < 0)
3534 log_debug_errno(errno, "BLKRRPART failed on block device, ignoring: %m");
3535
3536 } else { /* → Grow */
3537 if (!FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES)) {
3538 r = home_store_embedded_identity(new_home, setup->root_fd, h->uid, embedded_home);
3539 if (r < 0)
3540 return r;
3541 }
3542 }
3543
3544 if (!FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES)) {
3545 r = home_store_header_identity_luks(new_home, setup, header_home);
3546 if (r < 0)
3547 return r;
3548
3549 r = home_extend_embedded_identity(new_home, h, setup);
3550 if (r < 0)
3551 return r;
3552 }
3553
3554 if (user_record_luks_discard(h))
3555 (void) run_fitrim(setup->root_fd);
3556
3557 r = home_sync_and_statfs(setup->root_fd, &sfs);
3558 if (r < 0)
3559 return r;
3560
3561 if (!FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_UNDO)) {
3562 r = home_setup_done(setup);
3563 if (r < 0)
3564 return r;
3565 }
3566
3567 log_info("Resizing completed.");
3568
3569 print_size_summary(new_image_size, new_fs_size, &sfs);
3570
3571 if (ret_home)
3572 *ret_home = TAKE_PTR(new_home);
3573
3574 return 0;
3575 }
3576
3577 int home_passwd_luks(
3578 UserRecord *h,
3579 HomeSetupFlags flags,
3580 HomeSetup *setup,
3581 const PasswordCache *cache, /* the passwords acquired via PKCS#11/FIDO2 security tokens */
3582 char **effective_passwords /* new passwords */) {
3583
3584 size_t volume_key_size, max_key_slots, n_effective;
3585 _cleanup_(erase_and_freep) void *volume_key = NULL;
3586 struct crypt_pbkdf_type good_pbkdf, minimal_pbkdf;
3587 const char *type;
3588 char **list;
3589 int r;
3590
3591 assert(h);
3592 assert(user_record_storage(h) == USER_LUKS);
3593 assert(setup);
3594
3595 r = dlopen_cryptsetup();
3596 if (r < 0)
3597 return r;
3598
3599 type = sym_crypt_get_type(setup->crypt_device);
3600 if (!type)
3601 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine crypto device type.");
3602
3603 r = sym_crypt_keyslot_max(type);
3604 if (r <= 0)
3605 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine number of key slots.");
3606 max_key_slots = r;
3607
3608 r = sym_crypt_get_volume_key_size(setup->crypt_device);
3609 if (r <= 0)
3610 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine volume key size.");
3611 volume_key_size = (size_t) r;
3612
3613 volume_key = malloc(volume_key_size);
3614 if (!volume_key)
3615 return log_oom();
3616
3617 r = -ENOKEY;
3618 FOREACH_POINTER(list,
3619 cache ? cache->keyring_passswords : NULL,
3620 cache ? cache->pkcs11_passwords : NULL,
3621 cache ? cache->fido2_passwords : NULL,
3622 h->password) {
3623
3624 r = luks_try_passwords(h, setup->crypt_device, list, volume_key, &volume_key_size, NULL);
3625 if (r != -ENOKEY)
3626 break;
3627 }
3628 if (r == -ENOKEY)
3629 return log_error_errno(SYNTHETIC_ERRNO(ENOKEY), "Failed to unlock LUKS superblock with supplied passwords.");
3630 if (r < 0)
3631 return log_error_errno(r, "Failed to unlock LUKS superblock: %m");
3632
3633 n_effective = strv_length(effective_passwords);
3634
3635 build_good_pbkdf(&good_pbkdf, h);
3636 build_minimal_pbkdf(&minimal_pbkdf, h);
3637
3638 for (size_t i = 0; i < max_key_slots; i++) {
3639 r = sym_crypt_keyslot_destroy(setup->crypt_device, i);
3640 if (r < 0 && !IN_SET(r, -ENOENT, -EINVAL)) /* Returns EINVAL or ENOENT if there's no key in this slot already */
3641 return log_error_errno(r, "Failed to destroy LUKS password: %m");
3642
3643 if (i >= n_effective) {
3644 if (r >= 0)
3645 log_info("Destroyed LUKS key slot %zu.", i);
3646 continue;
3647 }
3648
3649 if (password_cache_contains(cache, effective_passwords[i])) { /* Is this a FIDO2 or PKCS#11 password? */
3650 log_debug("Using minimal PBKDF for slot %zu", i);
3651 r = sym_crypt_set_pbkdf_type(setup->crypt_device, &minimal_pbkdf);
3652 } else {
3653 log_debug("Using good PBKDF for slot %zu", i);
3654 r = sym_crypt_set_pbkdf_type(setup->crypt_device, &good_pbkdf);
3655 }
3656 if (r < 0)
3657 return log_error_errno(r, "Failed to tweak PBKDF for slot %zu: %m", i);
3658
3659 r = sym_crypt_keyslot_add_by_volume_key(
3660 setup->crypt_device,
3661 i,
3662 volume_key,
3663 volume_key_size,
3664 effective_passwords[i],
3665 strlen(effective_passwords[i]));
3666 if (r < 0)
3667 return log_error_errno(r, "Failed to set up LUKS password: %m");
3668
3669 log_info("Updated LUKS key slot %zu.", i);
3670
3671 /* If we changed the password, then make sure to update the copy in the keyring, so that
3672 * auto-rebalance continues to work. We only do this if we operate on an active home dir. */
3673 if (i == 0 && FLAGS_SET(flags, HOME_SETUP_ALREADY_ACTIVATED))
3674 upload_to_keyring(h, effective_passwords[i], NULL);
3675 }
3676
3677 return 1;
3678 }
3679
3680 int home_lock_luks(UserRecord *h, HomeSetup *setup) {
3681 const char *p;
3682 int r;
3683
3684 assert(h);
3685 assert(setup);
3686 assert(setup->root_fd < 0);
3687 assert(!setup->crypt_device);
3688
3689 r = acquire_open_luks_device(h, setup, /* graceful= */ false);
3690 if (r < 0)
3691 return r;
3692
3693 log_info("Discovered used LUKS device %s.", setup->dm_node);
3694
3695 assert_se(p = user_record_home_directory(h));
3696 r = syncfs_path(AT_FDCWD, p);
3697 if (r < 0) /* Snake oil, but let's better be safe than sorry */
3698 return log_error_errno(r, "Failed to synchronize file system %s: %m", p);
3699
3700 log_info("File system synchronized.");
3701
3702 /* Note that we don't invoke FIFREEZE here, it appears libcryptsetup/device-mapper already does that on its own for us */
3703
3704 r = sym_crypt_suspend(setup->crypt_device, setup->dm_name);
3705 if (r < 0)
3706 return log_error_errno(r, "Failed to suspend cryptsetup device: %s: %m", setup->dm_node);
3707
3708 log_info("LUKS device suspended.");
3709 return 0;
3710 }
3711
3712 static int luks_try_resume(
3713 struct crypt_device *cd,
3714 const char *dm_name,
3715 char **password) {
3716
3717 int r;
3718
3719 assert(cd);
3720 assert(dm_name);
3721
3722 STRV_FOREACH(pp, password) {
3723 r = sym_crypt_resume_by_passphrase(
3724 cd,
3725 dm_name,
3726 CRYPT_ANY_SLOT,
3727 *pp,
3728 strlen(*pp));
3729 if (r >= 0) {
3730 log_info("Resumed LUKS device %s.", dm_name);
3731 return 0;
3732 }
3733
3734 log_debug_errno(r, "Password %zu didn't work for resuming device: %m", (size_t) (pp - password));
3735 }
3736
3737 return -ENOKEY;
3738 }
3739
3740 int home_unlock_luks(UserRecord *h, HomeSetup *setup, const PasswordCache *cache) {
3741 char **list;
3742 int r;
3743
3744 assert(h);
3745 assert(setup);
3746 assert(!setup->crypt_device);
3747
3748 r = acquire_open_luks_device(h, setup, /* graceful= */ false);
3749 if (r < 0)
3750 return r;
3751
3752 log_info("Discovered used LUKS device %s.", setup->dm_node);
3753
3754 r = -ENOKEY;
3755 FOREACH_POINTER(list,
3756 cache ? cache->pkcs11_passwords : NULL,
3757 cache ? cache->fido2_passwords : NULL,
3758 h->password) {
3759 r = luks_try_resume(setup->crypt_device, setup->dm_name, list);
3760 if (r != -ENOKEY)
3761 break;
3762 }
3763 if (r == -ENOKEY)
3764 return log_error_errno(r, "No valid password for LUKS superblock.");
3765 if (r < 0)
3766 return log_error_errno(r, "Failed to resume LUKS superblock: %m");
3767
3768 log_info("LUKS device resumed.");
3769 return 0;
3770 }
3771
3772 static int device_is_gone(HomeSetup *setup) {
3773 _cleanup_(sd_device_unrefp) sd_device *d = NULL;
3774 struct stat st;
3775 int r;
3776
3777 assert(setup);
3778
3779 if (!setup->dm_node)
3780 return true;
3781
3782 if (stat(setup->dm_node, &st) < 0) {
3783 if (errno != ENOENT)
3784 return log_error_errno(errno, "Failed to stat block device node %s: %m", setup->dm_node);
3785
3786 return true;
3787 }
3788
3789 r = sd_device_new_from_stat_rdev(&d, &st);
3790 if (r < 0) {
3791 if (r != -ENODEV)
3792 return log_error_errno(errno, "Failed to allocate device object from block device node %s: %m", setup->dm_node);
3793
3794 return true;
3795 }
3796
3797 return false;
3798 }
3799
3800 static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) {
3801 HomeSetup *setup = ASSERT_PTR(userdata);
3802 int r;
3803
3804 if (!device_for_action(device, SD_DEVICE_REMOVE))
3805 return 0;
3806
3807 /* We don't really care for the device object passed to us, we just check if the device node still
3808 * exists */
3809
3810 r = device_is_gone(setup);
3811 if (r < 0)
3812 return r;
3813 if (r > 0) /* Yay! we are done! */
3814 (void) sd_event_exit(sd_device_monitor_get_event(monitor), 0);
3815
3816 return 0;
3817 }
3818
3819 int wait_for_block_device_gone(HomeSetup *setup, usec_t timeout_usec) {
3820 _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *m = NULL;
3821 _cleanup_(sd_event_unrefp) sd_event *event = NULL;
3822 int r;
3823
3824 assert(setup);
3825
3826 /* So here's the thing: we enable "deferred deactivation" on our dm-crypt volumes. This means they
3827 * are automatically torn down once not used anymore (i.e. once unmounted). Which is great. It also
3828 * means that when we deactivate a home directory and try to tear down the volume that backs it, it
3829 * possibly is already torn down or in the process of being torn down, since we race against the
3830 * automatic tearing down. Which is fine, we handle errors from that. However, we lose the ability to
3831 * naturally wait for the tear down operation to complete: if we are not the ones who tear down the
3832 * device we are also not the ones who naturally block on that operation. Hence let's add some code
3833 * to actively wait for the device to go away, via sd-device. We'll call this whenever tearing down a
3834 * LUKS device, to ensure the device is really really gone before we proceed. Net effect: "homectl
3835 * deactivate foo && homectl activate foo" will work reliably, i.e. deactivation immediately followed
3836 * by activation will work. Also, by the time deactivation completes we can guarantee that all data
3837 * is sync'ed down to the lowest block layer as all higher levels are fully and entirely
3838 * destructed. */
3839
3840 if (!setup->dm_name)
3841 return 0;
3842
3843 assert(setup->dm_node);
3844 log_debug("Waiting until %s disappears.", setup->dm_node);
3845
3846 r = sd_event_new(&event);
3847 if (r < 0)
3848 return log_error_errno(r, "Failed to allocate event loop: %m");
3849
3850 r = sd_device_monitor_new(&m);
3851 if (r < 0)
3852 return log_error_errno(r, "Failed to allocate device monitor: %m");
3853
3854 r = sd_device_monitor_filter_add_match_subsystem_devtype(m, "block", "disk");
3855 if (r < 0)
3856 return log_error_errno(r, "Failed to configure device monitor match: %m");
3857
3858 r = sd_device_monitor_attach_event(m, event);
3859 if (r < 0)
3860 return log_error_errno(r, "Failed to attach device monitor to event loop: %m");
3861
3862 r = sd_device_monitor_start(m, device_monitor_handler, setup);
3863 if (r < 0)
3864 return log_error_errno(r, "Failed to start device monitor: %m");
3865
3866 r = device_is_gone(setup);
3867 if (r < 0)
3868 return r;
3869 if (r > 0) {
3870 log_debug("%s has already disappeared before entering wait loop.", setup->dm_node);
3871 return 0; /* gone already */
3872 }
3873
3874 if (timeout_usec != USEC_INFINITY) {
3875 r = sd_event_add_time_relative(event, NULL, CLOCK_MONOTONIC, timeout_usec, 0, NULL, NULL);
3876 if (r < 0)
3877 return log_error_errno(r, "Failed to add timer event: %m");
3878 }
3879
3880 r = sd_event_loop(event);
3881 if (r < 0)
3882 return log_error_errno(r, "Failed to run event loop: %m");
3883
3884 r = device_is_gone(setup);
3885 if (r < 0)
3886 return r;
3887 if (r == 0)
3888 return log_error_errno(r, "Device %s still around.", setup->dm_node);
3889
3890 log_debug("Successfully waited until device %s disappeared.", setup->dm_node);
3891 return 0;
3892 }
3893
3894 int home_auto_shrink_luks(UserRecord *h, HomeSetup *setup, PasswordCache *cache) {
3895 struct statfs sfs;
3896 int r;
3897
3898 assert(h);
3899 assert(user_record_storage(h) == USER_LUKS);
3900 assert(setup);
3901 assert(setup->root_fd >= 0);
3902
3903 if (user_record_auto_resize_mode(h) != AUTO_RESIZE_SHRINK_AND_GROW)
3904 return 0;
3905
3906 if (fstatfs(setup->root_fd, &sfs) < 0)
3907 return log_error_errno(errno, "Failed to statfs home directory: %m");
3908
3909 if (!fs_can_online_shrink_and_grow(sfs.f_type)) {
3910 log_debug("Not auto-shrinking file system, since selected file system cannot do both online shrink and grow.");
3911 return 0;
3912 }
3913
3914 r = home_resize_luks(
3915 h,
3916 HOME_SETUP_ALREADY_ACTIVATED|
3917 HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES|
3918 HOME_SETUP_RESIZE_MINIMIZE|
3919 HOME_SETUP_RESIZE_DONT_GROW|
3920 HOME_SETUP_RESIZE_DONT_UNDO,
3921 setup,
3922 cache,
3923 NULL);
3924 if (r < 0)
3925 return r;
3926
3927 return 1;
3928 }