]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/home/homed-manager.c
5f345b3d407664db18d440d0d7ee626a9fbb1fa4
[thirdparty/systemd.git] / src / home / homed-manager.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <grp.h>
4 #include <linux/fs.h>
5 #include <linux/magic.h>
6 #include <math.h>
7 #include <openssl/pem.h>
8 #include <pwd.h>
9 #include <sys/ioctl.h>
10 #include <sys/quota.h>
11 #include <sys/stat.h>
12
13 #include "sd-id128.h"
14
15 #include "btrfs-util.h"
16 #include "bus-common-errors.h"
17 #include "bus-error.h"
18 #include "bus-log-control-api.h"
19 #include "bus-polkit.h"
20 #include "clean-ipc.h"
21 #include "common-signal.h"
22 #include "conf-files.h"
23 #include "device-util.h"
24 #include "dirent-util.h"
25 #include "fd-util.h"
26 #include "fileio.h"
27 #include "format-util.h"
28 #include "fs-util.h"
29 #include "glyph-util.h"
30 #include "gpt.h"
31 #include "home-util.h"
32 #include "homed-conf.h"
33 #include "homed-home-bus.h"
34 #include "homed-home.h"
35 #include "homed-manager-bus.h"
36 #include "homed-manager.h"
37 #include "homed-varlink.h"
38 #include "io-util.h"
39 #include "mkdir.h"
40 #include "openssl-util.h"
41 #include "process-util.h"
42 #include "quota-util.h"
43 #include "random-util.h"
44 #include "resize-fs.h"
45 #include "rm-rf.h"
46 #include "socket-util.h"
47 #include "sort-util.h"
48 #include "stat-util.h"
49 #include "strv.h"
50 #include "sync-util.h"
51 #include "tmpfile-util.h"
52 #include "udev-util.h"
53 #include "user-record-sign.h"
54 #include "user-record-util.h"
55 #include "user-record.h"
56 #include "user-util.h"
57 #include "varlink-io.systemd.UserDatabase.h"
58
59 /* Where to look for private/public keys that are used to sign the user records. We are not using
60 * CONF_PATHS_NULSTR() here since we want to insert /var/lib/systemd/home/ in the middle. And we insert that
61 * since we want to auto-generate a persistent private/public key pair if we need to. */
62 #define KEY_PATHS_NULSTR \
63 "/etc/systemd/home/\0" \
64 "/run/systemd/home/\0" \
65 "/var/lib/systemd/home/\0" \
66 "/usr/local/lib/systemd/home/\0" \
67 "/usr/lib/systemd/home/\0"
68
69 static bool uid_is_home(uid_t uid) {
70 return uid >= HOME_UID_MIN && uid <= HOME_UID_MAX;
71 }
72 /* Takes a value generated randomly or by hashing and turns it into a UID in the right range */
73
74 #define UID_CLAMP_INTO_HOME_RANGE(rnd) (((uid_t) (rnd) % (HOME_UID_MAX - HOME_UID_MIN + 1)) + HOME_UID_MIN)
75
76 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(homes_by_uid_hash_ops, void, trivial_hash_func, trivial_compare_func, Home, home_free);
77 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(homes_by_name_hash_ops, char, string_hash_func, string_compare_func, Home, home_free);
78 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(homes_by_worker_pid_hash_ops, void, trivial_hash_func, trivial_compare_func, Home, home_free);
79 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(homes_by_sysfs_hash_ops, char, path_hash_func, path_compare, Home, home_free);
80
81 static int on_home_inotify(sd_event_source *s, const struct inotify_event *event, void *userdata);
82 static int manager_gc_images(Manager *m);
83 static int manager_gc_blob(Manager *m);
84 static int manager_enumerate_images(Manager *m);
85 static int manager_assess_image(Manager *m, int dir_fd, const char *dir_path, const char *dentry_name);
86 static void manager_revalidate_image(Manager *m, Home *h);
87
88 static void manager_watch_home(Manager *m) {
89 struct statfs sfs;
90 int r;
91
92 assert(m);
93
94 m->inotify_event_source = sd_event_source_disable_unref(m->inotify_event_source);
95 m->scan_slash_home = false;
96
97 if (statfs(get_home_root(), &sfs) < 0) {
98 log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
99 "Failed to statfs() %s directory, disabling automatic scanning.", get_home_root());
100 return;
101 }
102
103 if (is_network_fs(&sfs)) {
104 log_info("%s is a network file system, disabling automatic scanning.", get_home_root());
105 return;
106 }
107
108 if (is_fs_type(&sfs, AUTOFS_SUPER_MAGIC)) {
109 log_info("%s is on autofs, disabling automatic scanning.", get_home_root());
110 return;
111 }
112
113 m->scan_slash_home = true;
114
115 r = sd_event_add_inotify(m->event, &m->inotify_event_source, get_home_root(),
116 IN_CREATE|IN_CLOSE_WRITE|IN_DELETE_SELF|IN_MOVE_SELF|IN_ONLYDIR|IN_MOVED_TO|IN_MOVED_FROM|IN_DELETE,
117 on_home_inotify, m);
118 if (r < 0)
119 log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
120 "Failed to create inotify watch on %s, ignoring.", get_home_root());
121
122 (void) sd_event_source_set_description(m->inotify_event_source, "home-inotify");
123
124 log_info("Watching %s.", get_home_root());
125 }
126
127 static int on_home_inotify(sd_event_source *s, const struct inotify_event *event, void *userdata) {
128 _cleanup_free_ char *j = NULL;
129 Manager *m = ASSERT_PTR(userdata);
130 const char *e, *n;
131
132 assert(event);
133
134 if ((event->mask & (IN_Q_OVERFLOW|IN_MOVE_SELF|IN_DELETE_SELF|IN_IGNORED|IN_UNMOUNT)) != 0) {
135
136 if (FLAGS_SET(event->mask, IN_Q_OVERFLOW))
137 log_debug("%s inotify queue overflow, rescanning.", get_home_root());
138 else if (FLAGS_SET(event->mask, IN_MOVE_SELF))
139 log_info("%s moved or renamed, recreating watch and rescanning.", get_home_root());
140 else if (FLAGS_SET(event->mask, IN_DELETE_SELF))
141 log_info("%s deleted, recreating watch and rescanning.", get_home_root());
142 else if (FLAGS_SET(event->mask, IN_UNMOUNT))
143 log_info("%s unmounted, recreating watch and rescanning.", get_home_root());
144 else if (FLAGS_SET(event->mask, IN_IGNORED))
145 log_info("%s watch invalidated, recreating watch and rescanning.", get_home_root());
146
147 manager_watch_home(m);
148 (void) manager_gc_images(m);
149 (void) manager_enumerate_images(m);
150 (void) bus_manager_emit_auto_login_changed(m);
151 return 0;
152 }
153
154 /* For the other inotify events, let's ignore all events for file names that don't match our
155 * expectations */
156 if (isempty(event->name))
157 return 0;
158 e = endswith(event->name, FLAGS_SET(event->mask, IN_ISDIR) ? ".homedir" : ".home");
159 if (!e)
160 return 0;
161
162 n = strndupa_safe(event->name, e - event->name);
163 if (!suitable_user_name(n))
164 return 0;
165
166 j = path_join(get_home_root(), event->name);
167 if (!j)
168 return log_oom();
169
170 if ((event->mask & (IN_CREATE|IN_CLOSE_WRITE|IN_MOVED_TO)) != 0) {
171 if (FLAGS_SET(event->mask, IN_CREATE))
172 log_debug("%s has been created, having a look.", j);
173 else if (FLAGS_SET(event->mask, IN_CLOSE_WRITE))
174 log_debug("%s has been modified, having a look.", j);
175 else if (FLAGS_SET(event->mask, IN_MOVED_TO))
176 log_debug("%s has been moved in, having a look.", j);
177
178 (void) manager_assess_image(m, -1, get_home_root(), event->name);
179 (void) bus_manager_emit_auto_login_changed(m);
180 }
181
182 if ((event->mask & (IN_DELETE | IN_CLOSE_WRITE | IN_MOVED_FROM)) != 0) {
183 Home *h;
184
185 if (FLAGS_SET(event->mask, IN_DELETE))
186 log_debug("%s has been deleted, revalidating.", j);
187 else if (FLAGS_SET(event->mask, IN_CLOSE_WRITE))
188 log_debug("%s has been closed after writing, revalidating.", j);
189 else if (FLAGS_SET(event->mask, IN_MOVED_FROM))
190 log_debug("%s has been moved away, revalidating.", j);
191
192 h = hashmap_get(m->homes_by_name, n);
193 if (h) {
194 manager_revalidate_image(m, h);
195 (void) bus_manager_emit_auto_login_changed(m);
196 }
197 }
198
199 return 0;
200 }
201
202 int manager_new(Manager **ret) {
203 _cleanup_(manager_freep) Manager *m = NULL;
204 int r;
205
206 assert(ret);
207
208 m = new(Manager, 1);
209 if (!m)
210 return -ENOMEM;
211
212 *m = (Manager) {
213 .default_storage = _USER_STORAGE_INVALID,
214 .rebalance_interval_usec = 2 * USEC_PER_MINUTE, /* initially, rebalance every 2min */
215 };
216
217 r = manager_parse_config_file(m);
218 if (r < 0)
219 return r;
220
221 r = sd_event_default(&m->event);
222 if (r < 0)
223 return r;
224
225 r = sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL);
226 if (r < 0)
227 return r;
228
229 r = sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL);
230 if (r < 0)
231 return r;
232
233 r = sd_event_add_memory_pressure(m->event, NULL, NULL, NULL);
234 if (r < 0)
235 log_full_errno(ERRNO_IS_NOT_SUPPORTED(r) || ERRNO_IS_PRIVILEGE(r) || (r == -EHOSTDOWN) ? LOG_DEBUG : LOG_WARNING, r,
236 "Failed to allocate memory pressure watch, ignoring: %m");
237
238 r = sd_event_add_signal(m->event, NULL, SIGRTMIN+18, sigrtmin18_handler, NULL);
239 if (r < 0)
240 return r;
241
242 (void) sd_event_set_watchdog(m->event, true);
243
244 m->homes_by_uid = hashmap_new(&homes_by_uid_hash_ops);
245 if (!m->homes_by_uid)
246 return -ENOMEM;
247
248 m->homes_by_name = hashmap_new(&homes_by_name_hash_ops);
249 if (!m->homes_by_name)
250 return -ENOMEM;
251
252 m->homes_by_worker_pid = hashmap_new(&homes_by_worker_pid_hash_ops);
253 if (!m->homes_by_worker_pid)
254 return -ENOMEM;
255
256 m->homes_by_sysfs = hashmap_new(&homes_by_sysfs_hash_ops);
257 if (!m->homes_by_sysfs)
258 return -ENOMEM;
259
260 *ret = TAKE_PTR(m);
261 return 0;
262 }
263
264 Manager* manager_free(Manager *m) {
265 Home *h;
266
267 assert(m);
268
269 HASHMAP_FOREACH(h, m->homes_by_worker_pid)
270 (void) home_wait_for_worker(h);
271
272 m->bus = sd_bus_flush_close_unref(m->bus);
273 m->polkit_registry = hashmap_free(m->polkit_registry);
274
275 m->device_monitor = sd_device_monitor_unref(m->device_monitor);
276
277 m->inotify_event_source = sd_event_source_unref(m->inotify_event_source);
278 m->notify_socket_event_source = sd_event_source_unref(m->notify_socket_event_source);
279 m->deferred_rescan_event_source = sd_event_source_unref(m->deferred_rescan_event_source);
280 m->deferred_gc_event_source = sd_event_source_unref(m->deferred_gc_event_source);
281 m->deferred_auto_login_event_source = sd_event_source_unref(m->deferred_auto_login_event_source);
282 m->rebalance_event_source = sd_event_source_unref(m->rebalance_event_source);
283
284 m->event = sd_event_unref(m->event);
285
286 m->homes_by_uid = hashmap_free(m->homes_by_uid);
287 m->homes_by_name = hashmap_free(m->homes_by_name);
288 m->homes_by_worker_pid = hashmap_free(m->homes_by_worker_pid);
289 m->homes_by_sysfs = hashmap_free(m->homes_by_sysfs);
290
291 if (m->private_key)
292 EVP_PKEY_free(m->private_key);
293
294 hashmap_free(m->public_keys);
295
296 varlink_server_unref(m->varlink_server);
297 free(m->userdb_service);
298
299 free(m->default_file_system_type);
300
301 return mfree(m);
302 }
303
304 int manager_verify_user_record(Manager *m, UserRecord *hr) {
305 EVP_PKEY *pkey;
306 int r;
307
308 assert(m);
309 assert(hr);
310
311 if (!m->private_key && hashmap_isempty(m->public_keys)) {
312 r = user_record_has_signature(hr);
313 if (r < 0)
314 return r;
315
316 return r ? -ENOKEY : USER_RECORD_UNSIGNED;
317 }
318
319 /* Is it our own? */
320 if (m->private_key) {
321 r = user_record_verify(hr, m->private_key);
322 switch (r) {
323
324 case USER_RECORD_FOREIGN:
325 /* This record is not signed by this key, but let's see below */
326 break;
327
328 case USER_RECORD_SIGNED: /* Signed by us, but also by others, let's propagate that */
329 case USER_RECORD_SIGNED_EXCLUSIVE: /* Signed by us, and nothing else, ditto */
330 case USER_RECORD_UNSIGNED: /* Not signed at all, ditto */
331 default:
332 return r;
333 }
334 }
335
336 HASHMAP_FOREACH(pkey, m->public_keys) {
337 r = user_record_verify(hr, pkey);
338 switch (r) {
339
340 case USER_RECORD_FOREIGN:
341 /* This record is not signed by this key, but let's see our other keys */
342 break;
343
344 case USER_RECORD_SIGNED: /* It's signed by this key we are happy with, but which is not our own. */
345 case USER_RECORD_SIGNED_EXCLUSIVE:
346 return USER_RECORD_FOREIGN;
347
348 case USER_RECORD_UNSIGNED: /* It's not signed at all */
349 default:
350 return r;
351 }
352 }
353
354 return -ENOKEY;
355 }
356
357 static int manager_add_home_by_record(
358 Manager *m,
359 const char *name,
360 int dir_fd,
361 const char *fname) {
362
363 _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
364 _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
365 unsigned line, column;
366 int r, is_signed;
367 struct stat st;
368 Home *h;
369
370 assert(m);
371 assert(name);
372 assert(fname);
373
374 if (fstatat(dir_fd, fname, &st, 0) < 0)
375 return log_error_errno(errno, "Failed to stat identity record %s: %m", fname);
376
377 if (!S_ISREG(st.st_mode)) {
378 log_debug("Identity record file %s is not a regular file, ignoring.", fname);
379 return 0;
380 }
381
382 if (st.st_size == 0)
383 goto unlink_this_file;
384
385 r = json_parse_file_at(NULL, dir_fd, fname, JSON_PARSE_SENSITIVE, &v, &line, &column);
386 if (r < 0)
387 return log_error_errno(r, "Failed to parse identity record at %s:%u%u: %m", fname, line, column);
388
389 if (json_variant_is_blank_object(v))
390 goto unlink_this_file;
391
392 hr = user_record_new();
393 if (!hr)
394 return log_oom();
395
396 r = user_record_load(hr, v, USER_RECORD_LOAD_REFUSE_SECRET|USER_RECORD_LOG|USER_RECORD_PERMISSIVE);
397 if (r < 0)
398 return r;
399
400 if (!streq_ptr(hr->user_name, name))
401 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
402 "Identity's user name %s does not match file name %s, refusing.",
403 hr->user_name, name);
404
405 is_signed = manager_verify_user_record(m, hr);
406 switch (is_signed) {
407
408 case -ENOKEY:
409 return log_warning_errno(is_signed, "User record %s is not signed by any accepted key, ignoring.", fname);
410 case USER_RECORD_UNSIGNED:
411 return log_warning_errno(SYNTHETIC_ERRNO(EPERM), "User record %s is not signed at all, ignoring.", fname);
412 case USER_RECORD_SIGNED:
413 log_info("User record %s is signed by us (and others), accepting.", fname);
414 break;
415 case USER_RECORD_SIGNED_EXCLUSIVE:
416 log_info("User record %s is signed only by us, accepting.", fname);
417 break;
418 case USER_RECORD_FOREIGN:
419 log_info("User record %s is signed by registered key from others, accepting.", fname);
420 break;
421 default:
422 assert(is_signed < 0);
423 return log_error_errno(is_signed, "Failed to verify signature of user record in %s: %m", fname);
424 }
425
426 h = hashmap_get(m->homes_by_name, name);
427 if (h) {
428 r = home_set_record(h, hr);
429 if (r < 0)
430 return log_error_errno(r, "Failed to update home record for %s: %m", name);
431
432 /* If we acquired a record now for a previously unallocated entry, then reset the state. This
433 * makes sure home_get_state() will check for the availability of the image file dynamically
434 * in order to detect to distinguish HOME_INACTIVE and HOME_ABSENT. */
435 if (h->state == HOME_UNFIXATED)
436 h->state = _HOME_STATE_INVALID;
437 } else {
438 r = home_new(m, hr, NULL, &h);
439 if (r < 0)
440 return log_error_errno(r, "Failed to allocate new home object: %m");
441
442 log_info("Added registered home for user %s.", hr->user_name);
443 }
444
445 /* Only entries we exclusively signed are writable to us, hence remember the result */
446 h->signed_locally = is_signed == USER_RECORD_SIGNED_EXCLUSIVE;
447
448 return 1;
449
450 unlink_this_file:
451 /* If this is an empty file, then let's just remove it. An empty file is not useful in any case, and
452 * apparently xfs likes to leave empty files around when not unmounted cleanly (see
453 * https://github.com/systemd/systemd/issues/15178 for example). Note that we don't delete non-empty
454 * files even if they are invalid, because that's just too risky, we might delete data the user still
455 * needs. But empty files are never useful, hence let's just remove them. */
456
457 if (unlinkat(dir_fd, fname, 0) < 0)
458 return log_error_errno(errno, "Failed to remove empty user record file %s: %m", fname);
459
460 log_notice("Discovered empty user record file %s/%s, removed automatically.", home_record_dir(), fname);
461 return 0;
462 }
463
464 static int manager_enumerate_records(Manager *m) {
465 _cleanup_closedir_ DIR *d = NULL;
466
467 assert(m);
468
469 d = opendir(home_record_dir());
470 if (!d)
471 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
472 "Failed to open %s: %m", home_record_dir());
473
474 FOREACH_DIRENT(de, d, return log_error_errno(errno, "Failed to read record directory: %m")) {
475 _cleanup_free_ char *n = NULL;
476 const char *e;
477
478 if (!dirent_is_file(de))
479 continue;
480
481 e = endswith(de->d_name, ".identity");
482 if (!e)
483 continue;
484
485 n = strndup(de->d_name, e - de->d_name);
486 if (!n)
487 return log_oom();
488
489 if (!suitable_user_name(n))
490 continue;
491
492 (void) manager_add_home_by_record(m, n, dirfd(d), de->d_name);
493 }
494
495 return 0;
496 }
497
498 static int search_quota(uid_t uid, const char *exclude_quota_path) {
499 struct stat exclude_st = {};
500 dev_t previous_devno = 0;
501 int r;
502
503 /* Checks whether the specified UID owns any files on the files system, but ignore any file system
504 * backing the specified file. The file is used when operating on home directories, where it's OK if
505 * the UID of them already owns files. */
506
507 if (exclude_quota_path && stat(exclude_quota_path, &exclude_st) < 0) {
508 if (errno != ENOENT)
509 return log_warning_errno(errno, "Failed to stat %s, ignoring: %m", exclude_quota_path);
510 }
511
512 /* Check a few usual suspects where regular users might own files. Note that this is by no means
513 * comprehensive, but should cover most cases. Note that in an ideal world every user would be
514 * registered in NSS and avoid our own UID range, but for all other cases, it's a good idea to be
515 * paranoid and check quota if we can. */
516 FOREACH_STRING(where, get_home_root(), "/tmp/", "/var/", "/var/mail/", "/var/tmp/", "/var/spool/") {
517 struct dqblk req;
518 struct stat st;
519
520 if (stat(where, &st) < 0) {
521 log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
522 "Failed to stat %s, ignoring: %m", where);
523 continue;
524 }
525
526 if (major(st.st_dev) == 0) {
527 log_debug("Directory %s is not on a real block device, not checking quota for UID use.", where);
528 continue;
529 }
530
531 if (st.st_dev == exclude_st.st_dev) { /* If an exclude path is specified, then ignore quota
532 * reported on the same block device as that path. */
533 log_debug("Directory %s is where the home directory is located, not checking quota for UID use.", where);
534 continue;
535 }
536
537 if (st.st_dev == previous_devno) { /* Does this directory have the same devno as the previous
538 * one we tested? If so, there's no point in testing this
539 * again. */
540 log_debug("Directory %s is on same device as previous tested directory, not checking quota for UID use a second time.", where);
541 continue;
542 }
543
544 previous_devno = st.st_dev;
545
546 r = quotactl_devnum(QCMD_FIXED(Q_GETQUOTA, USRQUOTA), st.st_dev, uid, &req);
547 if (r < 0) {
548 if (ERRNO_IS_NOT_SUPPORTED(r))
549 log_debug_errno(r, "No UID quota support on %s, ignoring.", where);
550 else if (ERRNO_IS_PRIVILEGE(r))
551 log_debug_errno(r, "UID quota support for %s prohibited, ignoring.", where);
552 else
553 log_warning_errno(r, "Failed to query quota on %s, ignoring: %m", where);
554
555 continue;
556 }
557
558 if ((FLAGS_SET(req.dqb_valid, QIF_SPACE) && req.dqb_curspace > 0) ||
559 (FLAGS_SET(req.dqb_valid, QIF_INODES) && req.dqb_curinodes > 0)) {
560 log_debug_errno(errno, "Quota reports UID " UID_FMT " occupies disk space on %s.", uid, where);
561 return 1;
562 }
563 }
564
565 return 0;
566 }
567
568 static int manager_acquire_uid(
569 Manager *m,
570 uid_t start_uid,
571 const char *user_name,
572 const char *exclude_quota_path,
573 uid_t *ret) {
574
575 static const uint8_t hash_key[] = {
576 0xa3, 0xb8, 0x82, 0x69, 0x9a, 0x71, 0xf7, 0xa9,
577 0xe0, 0x7c, 0xf6, 0xf1, 0x21, 0x69, 0xd2, 0x1e
578 };
579
580 enum {
581 PHASE_SUGGESTED,
582 PHASE_HASHED,
583 PHASE_RANDOM
584 } phase = PHASE_SUGGESTED;
585
586 unsigned n_tries = 100;
587 int r;
588
589 assert(m);
590 assert(ret);
591
592 for (;;) {
593 _cleanup_free_ struct passwd *pw = NULL;
594 _cleanup_free_ struct group *gr = NULL;
595 uid_t candidate;
596 Home *other;
597
598 if (--n_tries <= 0)
599 return -EBUSY;
600
601 switch (phase) {
602
603 case PHASE_SUGGESTED:
604 phase = PHASE_HASHED;
605
606 if (!uid_is_home(start_uid))
607 continue;
608
609 candidate = start_uid;
610 break;
611
612 case PHASE_HASHED:
613 phase = PHASE_RANDOM;
614
615 if (!user_name)
616 continue;
617
618 candidate = UID_CLAMP_INTO_HOME_RANGE(siphash24(user_name, strlen(user_name), hash_key));
619 break;
620
621 case PHASE_RANDOM:
622 random_bytes(&candidate, sizeof(candidate));
623 candidate = UID_CLAMP_INTO_HOME_RANGE(candidate);
624 break;
625
626 default:
627 assert_not_reached();
628 }
629
630 other = hashmap_get(m->homes_by_uid, UID_TO_PTR(candidate));
631 if (other) {
632 log_debug("Candidate UID " UID_FMT " already used by another home directory (%s), let's try another.",
633 candidate, other->user_name);
634 continue;
635 }
636
637 r = getpwuid_malloc(candidate, &pw);
638 if (r >= 0) {
639 log_debug("Candidate UID " UID_FMT " already registered by another user in NSS (%s), let's try another.",
640 candidate, pw->pw_name);
641 continue;
642 }
643 if (r != -ESRCH) {
644 log_debug_errno(r, "Failed to check if an NSS user is already registered for candidate UID " UID_FMT ", assuming there might be: %m", candidate);
645 continue;
646 }
647
648 r = getgrgid_malloc((gid_t) candidate, &gr);
649 if (r >= 0) {
650 log_debug("Candidate UID " UID_FMT " already registered by another group in NSS (%s), let's try another.",
651 candidate, gr->gr_name);
652 continue;
653 }
654 if (r != -ESRCH) {
655 log_debug_errno(r, "Failed to check if an NSS group is already registered for candidate UID " UID_FMT ", assuming there might be: %m", candidate);
656 continue;
657 }
658
659 r = search_ipc(candidate, (gid_t) candidate);
660 if (r < 0)
661 continue;
662 if (r > 0) {
663 log_debug_errno(r, "Candidate UID " UID_FMT " already owns IPC objects, let's try another: %m",
664 candidate);
665 continue;
666 }
667
668 r = search_quota(candidate, exclude_quota_path);
669 if (r != 0)
670 continue;
671
672 *ret = candidate;
673 return 0;
674 }
675 }
676
677 static int manager_add_home_by_image(
678 Manager *m,
679 const char *user_name,
680 const char *realm,
681 const char *image_path,
682 const char *sysfs,
683 UserStorage storage,
684 uid_t start_uid) {
685
686 _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
687 uid_t uid;
688 Home *h;
689 int r;
690
691 assert(m);
692
693 assert(m);
694 assert(user_name);
695 assert(image_path);
696 assert(storage >= 0);
697 assert(storage < _USER_STORAGE_MAX);
698
699 h = hashmap_get(m->homes_by_name, user_name);
700 if (h) {
701 bool same;
702
703 if (h->state != HOME_UNFIXATED) {
704 log_debug("Found an image for user %s which already has a record, skipping.", user_name);
705 return 0; /* ignore images that synthesize a user we already have a record for */
706 }
707
708 same = user_record_storage(h->record) == storage;
709 if (same) {
710 if (h->sysfs && sysfs)
711 same = path_equal(h->sysfs, sysfs);
712 else if (!!h->sysfs != !!sysfs)
713 same = false;
714 else {
715 const char *p;
716
717 p = user_record_image_path(h->record);
718 same = p && path_equal(p, image_path);
719 }
720 }
721
722 if (!same) {
723 log_debug("Found multiple images for user '%s', ignoring image '%s'.", user_name, image_path);
724 return 0;
725 }
726 } else {
727 /* Check NSS, in case there's another user or group by this name */
728 if (getpwnam_malloc(user_name, /* ret= */ NULL) >= 0 || getgrnam_malloc(user_name, /* ret= */ NULL) >= 0) {
729 log_debug("Found an existing user or group by name '%s', ignoring image '%s'.", user_name, image_path);
730 return 0;
731 }
732 }
733
734 if (h && uid_is_valid(h->uid))
735 uid = h->uid;
736 else {
737 r = manager_acquire_uid(m, start_uid, user_name,
738 IN_SET(storage, USER_SUBVOLUME, USER_DIRECTORY, USER_FSCRYPT) ? image_path : NULL,
739 &uid);
740 if (r < 0)
741 return log_warning_errno(r, "Failed to acquire unused UID for %s: %m", user_name);
742 }
743
744 hr = user_record_new();
745 if (!hr)
746 return log_oom();
747
748 r = user_record_synthesize(hr, user_name, realm, image_path, storage, uid, (gid_t) uid);
749 if (r < 0)
750 return log_error_errno(r, "Failed to synthesize home record for %s (image %s): %m", user_name, image_path);
751
752 if (h) {
753 r = home_set_record(h, hr);
754 if (r < 0)
755 return log_error_errno(r, "Failed to update home record for %s: %m", user_name);
756 } else {
757 r = home_new(m, hr, sysfs, &h);
758 if (r < 0)
759 return log_error_errno(r, "Failed to allocate new home object: %m");
760
761 h->state = HOME_UNFIXATED;
762
763 log_info("Discovered new home for user %s through image %s.", user_name, image_path);
764 }
765
766 return 1;
767 }
768
769 int manager_augment_record_with_uid(
770 Manager *m,
771 UserRecord *hr) {
772
773 const char *exclude_quota_path = NULL;
774 uid_t start_uid = UID_INVALID, uid;
775 int r;
776
777 assert(m);
778 assert(hr);
779
780 if (uid_is_valid(hr->uid))
781 return 0;
782
783 if (IN_SET(hr->storage, USER_CLASSIC, USER_SUBVOLUME, USER_DIRECTORY, USER_FSCRYPT)) {
784 const char * ip;
785
786 ip = user_record_image_path(hr);
787 if (ip) {
788 struct stat st;
789
790 if (stat(ip, &st) < 0) {
791 if (errno != ENOENT)
792 log_warning_errno(errno, "Failed to stat(%s): %m", ip);
793 } else if (uid_is_home(st.st_uid)) {
794 start_uid = st.st_uid;
795 exclude_quota_path = ip;
796 }
797 }
798 }
799
800 r = manager_acquire_uid(m, start_uid, hr->user_name, exclude_quota_path, &uid);
801 if (r < 0)
802 return r;
803
804 log_debug("Acquired new UID " UID_FMT " for %s.", uid, hr->user_name);
805
806 r = user_record_add_binding(
807 hr,
808 _USER_STORAGE_INVALID,
809 NULL,
810 SD_ID128_NULL,
811 SD_ID128_NULL,
812 SD_ID128_NULL,
813 NULL,
814 NULL,
815 UINT64_MAX,
816 NULL,
817 NULL,
818 uid,
819 (gid_t) uid);
820 if (r < 0)
821 return r;
822
823 return 1;
824 }
825
826 static int manager_assess_image(
827 Manager *m,
828 int dir_fd,
829 const char *dir_path,
830 const char *dentry_name) {
831
832 char *luks_suffix, *directory_suffix;
833 _cleanup_free_ char *path = NULL;
834 struct stat st;
835 int r;
836
837 assert(m);
838 assert(dir_path);
839 assert(dentry_name);
840
841 luks_suffix = endswith(dentry_name, ".home");
842 if (luks_suffix)
843 directory_suffix = NULL;
844 else
845 directory_suffix = endswith(dentry_name, ".homedir");
846
847 /* Early filter out: by name */
848 if (!luks_suffix && !directory_suffix)
849 return 0;
850
851 path = path_join(dir_path, dentry_name);
852 if (!path)
853 return log_oom();
854
855 /* Follow symlinks here, to allow people to link in stuff to make them available locally. */
856 if (dir_fd >= 0)
857 r = fstatat(dir_fd, dentry_name, &st, 0);
858 else
859 r = stat(path, &st);
860 if (r < 0)
861 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
862 "Failed to stat() directory entry '%s', ignoring: %m", dentry_name);
863
864 if (S_ISREG(st.st_mode)) {
865 _cleanup_free_ char *n = NULL, *user_name = NULL, *realm = NULL;
866
867 if (!luks_suffix)
868 return 0;
869
870 n = strndup(dentry_name, luks_suffix - dentry_name);
871 if (!n)
872 return log_oom();
873
874 r = split_user_name_realm(n, &user_name, &realm);
875 if (r == -EINVAL) /* Not the right format: ignore */
876 return 0;
877 if (r < 0)
878 return log_error_errno(r, "Failed to split image name into user name/realm: %m");
879
880 return manager_add_home_by_image(m, user_name, realm, path, NULL, USER_LUKS, UID_INVALID);
881 }
882
883 if (S_ISDIR(st.st_mode)) {
884 _cleanup_free_ char *n = NULL, *user_name = NULL, *realm = NULL;
885 _cleanup_close_ int fd = -EBADF;
886 UserStorage storage;
887
888 if (!directory_suffix)
889 return 0;
890
891 n = strndup(dentry_name, directory_suffix - dentry_name);
892 if (!n)
893 return log_oom();
894
895 r = split_user_name_realm(n, &user_name, &realm);
896 if (r == -EINVAL) /* Not the right format: ignore */
897 return 0;
898 if (r < 0)
899 return log_error_errno(r, "Failed to split image name into user name/realm: %m");
900
901 if (dir_fd >= 0)
902 fd = openat(dir_fd, dentry_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
903 else
904 fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
905 if (fd < 0)
906 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
907 "Failed to open directory '%s', ignoring: %m", path);
908
909 if (fstat(fd, &st) < 0)
910 return log_warning_errno(errno, "Failed to fstat() %s, ignoring: %m", path);
911
912 assert(S_ISDIR(st.st_mode)); /* Must hold, we used O_DIRECTORY above */
913
914 r = btrfs_is_subvol_fd(fd);
915 if (r < 0)
916 return log_warning_errno(errno, "Failed to determine whether %s is a btrfs subvolume: %m", path);
917 if (r > 0)
918 storage = USER_SUBVOLUME;
919 else {
920 struct fscrypt_policy policy;
921
922 if (ioctl(fd, FS_IOC_GET_ENCRYPTION_POLICY, &policy) < 0) {
923
924 if (errno == ENODATA)
925 log_debug_errno(errno, "Determined %s is not fscrypt encrypted.", path);
926 else if (ERRNO_IS_NOT_SUPPORTED(errno))
927 log_debug_errno(errno, "Determined %s is not fscrypt encrypted because kernel or file system doesn't support it.", path);
928 else
929 log_debug_errno(errno, "FS_IOC_GET_ENCRYPTION_POLICY failed with unexpected error code on %s, ignoring: %m", path);
930
931 storage = USER_DIRECTORY;
932 } else
933 storage = USER_FSCRYPT;
934 }
935
936 return manager_add_home_by_image(m, user_name, realm, path, NULL, storage, st.st_uid);
937 }
938
939 return 0;
940 }
941
942 int manager_enumerate_images(Manager *m) {
943 _cleanup_closedir_ DIR *d = NULL;
944
945 assert(m);
946
947 if (!m->scan_slash_home)
948 return 0;
949
950 d = opendir(get_home_root());
951 if (!d)
952 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
953 "Failed to open %s: %m", get_home_root());
954
955 FOREACH_DIRENT(de, d, return log_error_errno(errno, "Failed to read %s directory: %m", get_home_root()))
956 (void) manager_assess_image(m, dirfd(d), get_home_root(), de->d_name);
957
958 return 0;
959 }
960
961 static int manager_connect_bus(Manager *m) {
962 _cleanup_free_ char *b = NULL;
963 const char *suffix, *busname;
964 int r;
965
966 assert(m);
967 assert(!m->bus);
968
969 r = sd_bus_default_system(&m->bus);
970 if (r < 0)
971 return log_error_errno(r, "Failed to connect to system bus: %m");
972
973 r = bus_add_implementation(m->bus, &manager_object, m);
974 if (r < 0)
975 return r;
976
977 r = bus_log_control_api_register(m->bus);
978 if (r < 0)
979 return r;
980
981 suffix = getenv("SYSTEMD_HOME_DEBUG_SUFFIX");
982 if (suffix) {
983 b = strjoin("org.freedesktop.home1.", suffix);
984 if (!b)
985 return log_oom();
986 busname = b;
987 } else
988 busname = "org.freedesktop.home1";
989
990 r = sd_bus_request_name_async(m->bus, NULL, busname, 0, NULL, NULL);
991 if (r < 0)
992 return log_error_errno(r, "Failed to request name: %m");
993
994 r = sd_bus_attach_event(m->bus, m->event, SD_EVENT_PRIORITY_NORMAL);
995 if (r < 0)
996 return log_error_errno(r, "Failed to attach bus to event loop: %m");
997
998 (void) sd_bus_set_exit_on_disconnect(m->bus, true);
999
1000 return 0;
1001 }
1002
1003 static int manager_bind_varlink(Manager *m) {
1004 _cleanup_free_ char *p = NULL;
1005 const char *suffix, *socket_path;
1006 int r;
1007
1008 assert(m);
1009 assert(!m->varlink_server);
1010
1011 r = varlink_server_new(&m->varlink_server, VARLINK_SERVER_ACCOUNT_UID|VARLINK_SERVER_INHERIT_USERDATA|VARLINK_SERVER_INPUT_SENSITIVE);
1012 if (r < 0)
1013 return log_error_errno(r, "Failed to allocate varlink server object: %m");
1014
1015 varlink_server_set_userdata(m->varlink_server, m);
1016
1017 r = varlink_server_add_interface(m->varlink_server, &vl_interface_io_systemd_UserDatabase);
1018 if (r < 0)
1019 return log_error_errno(r, "Failed to add UserDatabase interface to varlink server: %m");
1020
1021 r = varlink_server_bind_method_many(
1022 m->varlink_server,
1023 "io.systemd.UserDatabase.GetUserRecord", vl_method_get_user_record,
1024 "io.systemd.UserDatabase.GetGroupRecord", vl_method_get_group_record,
1025 "io.systemd.UserDatabase.GetMemberships", vl_method_get_memberships);
1026 if (r < 0)
1027 return log_error_errno(r, "Failed to register varlink methods: %m");
1028
1029 (void) mkdir_p("/run/systemd/userdb", 0755);
1030
1031 /* To make things easier to debug, when working from a homed managed home directory, let's optionally
1032 * use a different varlink socket name */
1033 suffix = getenv("SYSTEMD_HOME_DEBUG_SUFFIX");
1034 if (suffix) {
1035 p = strjoin("/run/systemd/userdb/io.systemd.Home.", suffix);
1036 if (!p)
1037 return log_oom();
1038 socket_path = p;
1039 } else
1040 socket_path = "/run/systemd/userdb/io.systemd.Home";
1041
1042 r = varlink_server_listen_address(m->varlink_server, socket_path, 0666);
1043 if (r < 0)
1044 return log_error_errno(r, "Failed to bind to varlink socket: %m");
1045
1046 r = varlink_server_attach_event(m->varlink_server, m->event, SD_EVENT_PRIORITY_NORMAL);
1047 if (r < 0)
1048 return log_error_errno(r, "Failed to attach varlink connection to event loop: %m");
1049
1050 assert(!m->userdb_service);
1051 r = path_extract_filename(socket_path, &m->userdb_service);
1052 if (r < 0)
1053 return log_error_errno(r, "Failed to extract filename from socket path '%s': %m", socket_path);
1054
1055 /* Avoid recursion */
1056 if (setenv("SYSTEMD_BYPASS_USERDB", m->userdb_service, 1) < 0)
1057 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to set $SYSTEMD_BYPASS_USERDB: %m");
1058
1059 return 0;
1060 }
1061
1062 static ssize_t read_datagram(
1063 int fd,
1064 struct ucred *ret_sender,
1065 void **ret,
1066 int *ret_passed_fd) {
1067
1068 CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))) control;
1069 _cleanup_free_ void *buffer = NULL;
1070 _cleanup_close_ int passed_fd = -EBADF;
1071 struct ucred *sender = NULL;
1072 struct cmsghdr *cmsg;
1073 struct msghdr mh;
1074 struct iovec iov;
1075 ssize_t n, m;
1076
1077 assert(fd >= 0);
1078 assert(ret_sender);
1079 assert(ret);
1080 assert(ret_passed_fd);
1081
1082 n = next_datagram_size_fd(fd);
1083 if (n < 0)
1084 return n;
1085
1086 buffer = malloc(n + 2);
1087 if (!buffer)
1088 return -ENOMEM;
1089
1090 /* Pass one extra byte, as a size check */
1091 iov = IOVEC_MAKE(buffer, n + 1);
1092
1093 mh = (struct msghdr) {
1094 .msg_iov = &iov,
1095 .msg_iovlen = 1,
1096 .msg_control = &control,
1097 .msg_controllen = sizeof(control),
1098 };
1099
1100 m = recvmsg_safe(fd, &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1101 if (m < 0)
1102 return m;
1103
1104 /* Ensure the size matches what we determined before */
1105 if (m != n) {
1106 cmsg_close_all(&mh);
1107 return -EMSGSIZE;
1108 }
1109
1110 CMSG_FOREACH(cmsg, &mh) {
1111 if (cmsg->cmsg_level == SOL_SOCKET &&
1112 cmsg->cmsg_type == SCM_CREDENTIALS &&
1113 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) {
1114 assert(!sender);
1115 sender = CMSG_TYPED_DATA(cmsg, struct ucred);
1116 }
1117
1118 if (cmsg->cmsg_level == SOL_SOCKET &&
1119 cmsg->cmsg_type == SCM_RIGHTS) {
1120
1121 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
1122 cmsg_close_all(&mh);
1123 return -EMSGSIZE;
1124 }
1125
1126 assert(passed_fd < 0);
1127 passed_fd = *CMSG_TYPED_DATA(cmsg, int);
1128 }
1129 }
1130
1131 if (sender)
1132 *ret_sender = *sender;
1133 else
1134 *ret_sender = (struct ucred) UCRED_INVALID;
1135
1136 *ret_passed_fd = TAKE_FD(passed_fd);
1137
1138 /* For safety reasons: let's always NUL terminate. */
1139 ((char*) buffer)[n] = 0;
1140 *ret = TAKE_PTR(buffer);
1141
1142 return 0;
1143 }
1144
1145 static int on_notify_socket(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1146 _cleanup_strv_free_ char **l = NULL;
1147 _cleanup_free_ void *datagram = NULL;
1148 _cleanup_close_ int passed_fd = -EBADF;
1149 struct ucred sender = UCRED_INVALID;
1150 Manager *m = ASSERT_PTR(userdata);
1151 ssize_t n;
1152 Home *h;
1153
1154 assert(s);
1155
1156 n = read_datagram(fd, &sender, &datagram, &passed_fd);
1157 if (n < 0) {
1158 if (ERRNO_IS_TRANSIENT(n))
1159 return 0;
1160 return log_error_errno(n, "Failed to read notify datagram: %m");
1161 }
1162
1163 if (sender.pid <= 0) {
1164 log_warning("Received notify datagram without valid sender PID, ignoring.");
1165 return 0;
1166 }
1167
1168 h = hashmap_get(m->homes_by_worker_pid, PID_TO_PTR(sender.pid));
1169 if (!h) {
1170 log_warning("Received notify datagram of unknown process, ignoring.");
1171 return 0;
1172 }
1173
1174 l = strv_split(datagram, "\n");
1175 if (!l)
1176 return log_oom();
1177
1178 home_process_notify(h, l, TAKE_FD(passed_fd));
1179 return 0;
1180 }
1181
1182 static int manager_listen_notify(Manager *m) {
1183 _cleanup_close_ int fd = -EBADF;
1184 union sockaddr_union sa = {
1185 .un.sun_family = AF_UNIX,
1186 .un.sun_path = "/run/systemd/home/notify",
1187 };
1188 const char *suffix;
1189 int r;
1190
1191 assert(m);
1192 assert(!m->notify_socket_event_source);
1193
1194 suffix = getenv("SYSTEMD_HOME_DEBUG_SUFFIX");
1195 if (suffix) {
1196 _cleanup_free_ char *unix_path = NULL;
1197
1198 unix_path = strjoin("/run/systemd/home/notify.", suffix);
1199 if (!unix_path)
1200 return log_oom();
1201 r = sockaddr_un_set_path(&sa.un, unix_path);
1202 if (r < 0)
1203 return log_error_errno(r, "Socket path %s does not fit in sockaddr_un: %m", unix_path);
1204 }
1205
1206 fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1207 if (fd < 0)
1208 return log_error_errno(errno, "Failed to create listening socket: %m");
1209
1210 (void) mkdir_parents(sa.un.sun_path, 0755);
1211 (void) sockaddr_un_unlink(&sa.un);
1212
1213 if (bind(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0)
1214 return log_error_errno(errno, "Failed to bind to socket: %m");
1215
1216 r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true);
1217 if (r < 0)
1218 return r;
1219
1220 r = sd_event_add_io(m->event, &m->notify_socket_event_source, fd, EPOLLIN, on_notify_socket, m);
1221 if (r < 0)
1222 return log_error_errno(r, "Failed to allocate event source for notify socket: %m");
1223
1224 (void) sd_event_source_set_description(m->notify_socket_event_source, "notify-socket");
1225
1226 /* Make sure we process sd_notify() before SIGCHLD for any worker, so that we always know the error
1227 * number of a client before it exits. */
1228 r = sd_event_source_set_priority(m->notify_socket_event_source, SD_EVENT_PRIORITY_NORMAL - 5);
1229 if (r < 0)
1230 return log_error_errno(r, "Failed to alter priority of NOTIFY_SOCKET event source: %m");
1231
1232 r = sd_event_source_set_io_fd_own(m->notify_socket_event_source, true);
1233 if (r < 0)
1234 return log_error_errno(r, "Failed to pass ownership of notify socket: %m");
1235
1236 return TAKE_FD(fd);
1237 }
1238
1239 static int manager_add_device(Manager *m, sd_device *d) {
1240 _cleanup_free_ char *user_name = NULL, *realm = NULL, *node = NULL;
1241 const char *tabletype, *parttype, *partname, *partuuid, *sysfs;
1242 sd_id128_t id;
1243 int r;
1244
1245 assert(m);
1246 assert(d);
1247
1248 r = sd_device_get_syspath(d, &sysfs);
1249 if (r < 0)
1250 return log_error_errno(r, "Failed to acquire sysfs path of device: %m");
1251
1252 r = sd_device_get_property_value(d, "ID_PART_TABLE_TYPE", &tabletype);
1253 if (r == -ENOENT)
1254 return 0;
1255 if (r < 0)
1256 return log_error_errno(r, "Failed to acquire ID_PART_TABLE_TYPE device property, ignoring: %m");
1257
1258 if (!streq(tabletype, "gpt")) {
1259 log_debug("Found partition (%s) on non-GPT table, ignoring.", sysfs);
1260 return 0;
1261 }
1262
1263 r = sd_device_get_property_value(d, "ID_PART_ENTRY_TYPE", &parttype);
1264 if (r == -ENOENT)
1265 return 0;
1266 if (r < 0)
1267 return log_error_errno(r, "Failed to acquire ID_PART_ENTRY_TYPE device property, ignoring: %m");
1268 if (sd_id128_string_equal(parttype, SD_GPT_USER_HOME) <= 0) {
1269 log_debug("Found partition (%s) we don't care about, ignoring.", sysfs);
1270 return 0;
1271 }
1272
1273 r = sd_device_get_property_value(d, "ID_PART_ENTRY_NAME", &partname);
1274 if (r < 0)
1275 return log_warning_errno(r, "Failed to acquire ID_PART_ENTRY_NAME device property, ignoring: %m");
1276
1277 r = split_user_name_realm(partname, &user_name, &realm);
1278 if (r == -EINVAL)
1279 return log_warning_errno(r, "Found partition with correct partition type but a non-parsable partition name '%s', ignoring.", partname);
1280 if (r < 0)
1281 return log_error_errno(r, "Failed to validate partition name '%s': %m", partname);
1282
1283 r = sd_device_get_property_value(d, "ID_FS_UUID", &partuuid);
1284 if (r < 0)
1285 return log_warning_errno(r, "Failed to acquire ID_FS_UUID device property, ignoring: %m");
1286
1287 r = sd_id128_from_string(partuuid, &id);
1288 if (r < 0)
1289 return log_warning_errno(r, "Failed to parse ID_FS_UUID field '%s', ignoring: %m", partuuid);
1290
1291 if (asprintf(&node, "/dev/disk/by-uuid/" SD_ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(id)) < 0)
1292 return log_oom();
1293
1294 return manager_add_home_by_image(m, user_name, realm, node, sysfs, USER_LUKS, UID_INVALID);
1295 }
1296
1297 static int manager_on_device(sd_device_monitor *monitor, sd_device *d, void *userdata) {
1298 Manager *m = ASSERT_PTR(userdata);
1299 int r;
1300
1301 assert(d);
1302
1303 if (device_for_action(d, SD_DEVICE_REMOVE)) {
1304 const char *sysfs;
1305 Home *h;
1306
1307 r = sd_device_get_syspath(d, &sysfs);
1308 if (r < 0) {
1309 log_warning_errno(r, "Failed to acquire sysfs path from device: %m");
1310 return 0;
1311 }
1312
1313 log_info("block device %s has been removed.", sysfs);
1314
1315 /* Let's see if we previously synthesized a home record from this device, if so, let's just
1316 * revalidate that. Otherwise let's revalidate them all, but asynchronously. */
1317 h = hashmap_get(m->homes_by_sysfs, sysfs);
1318 if (h)
1319 manager_revalidate_image(m, h);
1320 else
1321 manager_enqueue_gc(m, NULL);
1322 } else
1323 (void) manager_add_device(m, d);
1324
1325 (void) bus_manager_emit_auto_login_changed(m);
1326 return 0;
1327 }
1328
1329 static int manager_watch_devices(Manager *m) {
1330 int r;
1331
1332 assert(m);
1333 assert(!m->device_monitor);
1334
1335 r = sd_device_monitor_new(&m->device_monitor);
1336 if (r < 0)
1337 return log_error_errno(r, "Failed to allocate device monitor: %m");
1338
1339 r = sd_device_monitor_filter_add_match_subsystem_devtype(m->device_monitor, "block", NULL);
1340 if (r < 0)
1341 return log_error_errno(r, "Failed to configure device monitor match: %m");
1342
1343 r = sd_device_monitor_attach_event(m->device_monitor, m->event);
1344 if (r < 0)
1345 return log_error_errno(r, "Failed to attach device monitor to event loop: %m");
1346
1347 r = sd_device_monitor_start(m->device_monitor, manager_on_device, m);
1348 if (r < 0)
1349 return log_error_errno(r, "Failed to start device monitor: %m");
1350
1351 return 0;
1352 }
1353
1354 static int manager_enumerate_devices(Manager *m) {
1355 _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
1356 int r;
1357
1358 assert(m);
1359
1360 r = sd_device_enumerator_new(&e);
1361 if (r < 0)
1362 return r;
1363
1364 r = sd_device_enumerator_add_match_subsystem(e, "block", true);
1365 if (r < 0)
1366 return r;
1367
1368 FOREACH_DEVICE(e, d)
1369 (void) manager_add_device(m, d);
1370
1371 return 0;
1372 }
1373
1374 static int manager_load_key_pair(Manager *m) {
1375 _cleanup_fclose_ FILE *f = NULL;
1376 struct stat st;
1377 int r;
1378
1379 assert(m);
1380
1381 if (m->private_key) {
1382 EVP_PKEY_free(m->private_key);
1383 m->private_key = NULL;
1384 }
1385
1386 r = search_and_fopen_nulstr("local.private", "re", NULL, KEY_PATHS_NULSTR, &f, NULL);
1387 if (r == -ENOENT)
1388 return 0;
1389 if (r < 0)
1390 return log_error_errno(r, "Failed to read private key file: %m");
1391
1392 if (fstat(fileno(f), &st) < 0)
1393 return log_error_errno(errno, "Failed to stat private key file: %m");
1394
1395 r = stat_verify_regular(&st);
1396 if (r < 0)
1397 return log_error_errno(r, "Private key file is not regular: %m");
1398
1399 if (st.st_uid != 0 || (st.st_mode & 0077) != 0)
1400 return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Private key file is readable by more than the root user");
1401
1402 m->private_key = PEM_read_PrivateKey(f, NULL, NULL, NULL);
1403 if (!m->private_key)
1404 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to load private key pair");
1405
1406 log_info("Successfully loaded private key pair.");
1407
1408 return 1;
1409 }
1410
1411 static int manager_generate_key_pair(Manager *m) {
1412 _cleanup_(EVP_PKEY_CTX_freep) EVP_PKEY_CTX *ctx = NULL;
1413 _cleanup_(unlink_and_freep) char *temp_public = NULL, *temp_private = NULL;
1414 _cleanup_fclose_ FILE *fpublic = NULL, *fprivate = NULL;
1415 int r;
1416
1417 if (m->private_key) {
1418 EVP_PKEY_free(m->private_key);
1419 m->private_key = NULL;
1420 }
1421
1422 ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_ED25519, NULL);
1423 if (!ctx)
1424 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to allocate Ed25519 key generation context.");
1425
1426 if (EVP_PKEY_keygen_init(ctx) <= 0)
1427 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to initialize Ed25519 key generation context.");
1428
1429 log_info("Generating key pair for signing local user identity records.");
1430
1431 if (EVP_PKEY_keygen(ctx, &m->private_key) <= 0)
1432 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to generate Ed25519 key pair");
1433
1434 log_info("Successfully created Ed25519 key pair.");
1435
1436 (void) mkdir_p("/var/lib/systemd/home", 0755);
1437
1438 /* Write out public key (note that we only do that as a help to the user, we don't make use of this ever */
1439 r = fopen_temporary("/var/lib/systemd/home/local.public", &fpublic, &temp_public);
1440 if (r < 0)
1441 return log_error_errno(errno, "Failed to open key file for writing: %m");
1442
1443 if (PEM_write_PUBKEY(fpublic, m->private_key) <= 0)
1444 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to write public key.");
1445
1446 r = fflush_sync_and_check(fpublic);
1447 if (r < 0)
1448 return log_error_errno(r, "Failed to write private key: %m");
1449
1450 fpublic = safe_fclose(fpublic);
1451
1452 /* Write out the private key (this actually writes out both private and public, OpenSSL is confusing) */
1453 r = fopen_temporary("/var/lib/systemd/home/local.private", &fprivate, &temp_private);
1454 if (r < 0)
1455 return log_error_errno(errno, "Failed to open key file for writing: %m");
1456
1457 if (PEM_write_PrivateKey(fprivate, m->private_key, NULL, NULL, 0, NULL, 0) <= 0)
1458 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to write private key pair.");
1459
1460 r = fflush_sync_and_check(fprivate);
1461 if (r < 0)
1462 return log_error_errno(r, "Failed to write private key: %m");
1463
1464 fprivate = safe_fclose(fprivate);
1465
1466 /* Both are written now, move them into place */
1467
1468 if (rename(temp_public, "/var/lib/systemd/home/local.public") < 0)
1469 return log_error_errno(errno, "Failed to move public key file into place: %m");
1470 temp_public = mfree(temp_public);
1471
1472 r = RET_NERRNO(rename(temp_private, "/var/lib/systemd/home/local.private"));
1473 if (r < 0) {
1474 (void) unlink("/var/lib/systemd/home/local.public"); /* try to remove the file we already created */
1475 return log_error_errno(r, "Failed to move private key file into place: %m");
1476 }
1477 temp_private = mfree(temp_private);
1478
1479 r = fsync_path_at(AT_FDCWD, "/var/lib/systemd/home/");
1480 if (r < 0)
1481 log_warning_errno(r, "Failed to sync /var/lib/systemd/home/, ignoring: %m");
1482
1483 return 1;
1484 }
1485
1486 int manager_acquire_key_pair(Manager *m) {
1487 int r;
1488
1489 assert(m);
1490
1491 /* Already there? */
1492 if (m->private_key)
1493 return 1;
1494
1495 /* First try to load key off disk */
1496 r = manager_load_key_pair(m);
1497 if (r != 0)
1498 return r;
1499
1500 /* Didn't work, generate a new one */
1501 return manager_generate_key_pair(m);
1502 }
1503
1504 int manager_sign_user_record(Manager *m, UserRecord *u, UserRecord **ret, sd_bus_error *error) {
1505 int r;
1506
1507 assert(m);
1508 assert(u);
1509 assert(ret);
1510
1511 r = manager_acquire_key_pair(m);
1512 if (r < 0)
1513 return r;
1514 if (r == 0)
1515 return sd_bus_error_set(error, BUS_ERROR_NO_PRIVATE_KEY, "Can't sign without local key.");
1516
1517 return user_record_sign(u, m->private_key, ret);
1518 }
1519
1520 DEFINE_PRIVATE_HASH_OPS_FULL(public_key_hash_ops, char, string_hash_func, string_compare_func, free, EVP_PKEY, EVP_PKEY_free);
1521
1522 static int manager_load_public_key_one(Manager *m, const char *path) {
1523 _cleanup_(EVP_PKEY_freep) EVP_PKEY *pkey = NULL;
1524 _cleanup_fclose_ FILE *f = NULL;
1525 _cleanup_free_ char *fn = NULL;
1526 struct stat st;
1527 int r;
1528
1529 assert(m);
1530
1531 r = path_extract_filename(path, &fn);
1532 if (r < 0)
1533 return log_error_errno(r, "Failed to extract filename of path '%s': %m", path);
1534
1535 if (streq(fn, "local.public")) /* we already loaded the private key, which includes the public one */
1536 return 0;
1537
1538 f = fopen(path, "re");
1539 if (!f) {
1540 if (errno == ENOENT)
1541 return 0;
1542
1543 return log_error_errno(errno, "Failed to open public key %s: %m", path);
1544 }
1545
1546 if (fstat(fileno(f), &st) < 0)
1547 return log_error_errno(errno, "Failed to stat public key %s: %m", path);
1548
1549 r = stat_verify_regular(&st);
1550 if (r < 0)
1551 return log_error_errno(r, "Public key file %s is not a regular file: %m", path);
1552
1553 if (st.st_uid != 0 || (st.st_mode & 0022) != 0)
1554 return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Public key file %s is writable by more than the root user, refusing.", path);
1555
1556 r = hashmap_ensure_allocated(&m->public_keys, &public_key_hash_ops);
1557 if (r < 0)
1558 return log_oom();
1559
1560 pkey = PEM_read_PUBKEY(f, &pkey, NULL, NULL);
1561 if (!pkey)
1562 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to parse public key file %s.", path);
1563
1564 r = hashmap_put(m->public_keys, fn, pkey);
1565 if (r < 0)
1566 return log_error_errno(r, "Failed to add public key to set: %m");
1567
1568 TAKE_PTR(fn);
1569 TAKE_PTR(pkey);
1570
1571 return 0;
1572 }
1573
1574 static int manager_load_public_keys(Manager *m) {
1575 _cleanup_strv_free_ char **files = NULL;
1576 int r;
1577
1578 assert(m);
1579
1580 m->public_keys = hashmap_free(m->public_keys);
1581
1582 r = conf_files_list_nulstr(
1583 &files,
1584 ".public",
1585 NULL,
1586 CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED,
1587 KEY_PATHS_NULSTR);
1588 if (r < 0)
1589 return log_error_errno(r, "Failed to assemble list of public key directories: %m");
1590
1591 STRV_FOREACH(i, files)
1592 (void) manager_load_public_key_one(m, *i);
1593
1594 return 0;
1595 }
1596
1597 int manager_startup(Manager *m) {
1598 int r;
1599
1600 assert(m);
1601
1602 r = manager_listen_notify(m);
1603 if (r < 0)
1604 return r;
1605
1606 r = manager_connect_bus(m);
1607 if (r < 0)
1608 return r;
1609
1610 r = manager_bind_varlink(m);
1611 if (r < 0)
1612 return r;
1613
1614 r = manager_load_key_pair(m); /* only try to load it, don't generate any */
1615 if (r < 0)
1616 return r;
1617
1618 r = manager_load_public_keys(m);
1619 if (r < 0)
1620 return r;
1621
1622 manager_watch_home(m);
1623 (void) manager_watch_devices(m);
1624
1625 (void) manager_enumerate_records(m);
1626 (void) manager_enumerate_images(m);
1627 (void) manager_enumerate_devices(m);
1628
1629 /* Let's clean up home directories whose devices got removed while we were not running */
1630 (void) manager_enqueue_gc(m, NULL);
1631
1632 /* Let's clean up blob directories for home dirs that no longer exist */
1633 (void) manager_gc_blob(m);
1634
1635 return 0;
1636 }
1637
1638 void manager_revalidate_image(Manager *m, Home *h) {
1639 int r;
1640
1641 assert(m);
1642 assert(h);
1643
1644 /* Frees an automatically discovered image, if it's synthetic and its image disappeared. Unmounts any
1645 * image if it's mounted but its image vanished. */
1646
1647 if (h->current_operation || !ordered_set_isempty(h->pending_operations))
1648 return;
1649
1650 if (h->state == HOME_UNFIXATED) {
1651 r = user_record_test_image_path(h->record);
1652 if (r < 0)
1653 log_warning_errno(r, "Can't determine if image of %s exists, freeing unfixated user: %m", h->user_name);
1654 else if (r == USER_TEST_ABSENT)
1655 log_info("Image for %s disappeared, freeing unfixated user.", h->user_name);
1656 else
1657 return;
1658
1659 home_free(h);
1660
1661 } else if (h->state < 0) {
1662
1663 r = user_record_test_home_directory(h->record);
1664 if (r < 0) {
1665 log_warning_errno(r, "Unable to determine state of home directory, ignoring: %m");
1666 return;
1667 }
1668
1669 if (r == USER_TEST_MOUNTED) {
1670 r = user_record_test_image_path(h->record);
1671 if (r < 0) {
1672 log_warning_errno(r, "Unable to determine state of image path, ignoring: %m");
1673 return;
1674 }
1675
1676 if (r == USER_TEST_ABSENT) {
1677 _cleanup_(operation_unrefp) Operation *o = NULL;
1678
1679 log_notice("Backing image disappeared while home directory %s was mounted, unmounting it forcibly.", h->user_name);
1680 /* Wowza, the thing is mounted, but the device is gone? Act on it. */
1681
1682 r = home_killall(h);
1683 if (r < 0)
1684 log_warning_errno(r, "Failed to kill processes of user %s, ignoring: %m", h->user_name);
1685
1686 /* We enqueue the operation here, after all the home directory might
1687 * currently already run some operation, and we can deactivate it only after
1688 * that's complete. */
1689 o = operation_new(OPERATION_DEACTIVATE_FORCE, NULL);
1690 if (!o) {
1691 log_oom();
1692 return;
1693 }
1694
1695 r = home_schedule_operation(h, o, NULL);
1696 if (r < 0)
1697 log_warning_errno(r, "Failed to enqueue forced home directory %s deactivation, ignoring: %m", h->user_name);
1698 }
1699 }
1700 }
1701 }
1702
1703 int manager_gc_images(Manager *m) {
1704 Home *h;
1705
1706 assert_se(m);
1707
1708 if (m->gc_focus) {
1709 /* Focus on a specific home */
1710
1711 h = TAKE_PTR(m->gc_focus);
1712 manager_revalidate_image(m, h);
1713 } else {
1714 /* Gc all */
1715
1716 HASHMAP_FOREACH(h, m->homes_by_name)
1717 manager_revalidate_image(m, h);
1718 }
1719
1720 return 0;
1721 }
1722
1723 static int manager_gc_blob(Manager *m) {
1724 _cleanup_closedir_ DIR *d = NULL;
1725 int r;
1726
1727 assert(m);
1728
1729 d = opendir(home_system_blob_dir());
1730 if (!d) {
1731 if (errno == ENOENT)
1732 return 0;
1733 return log_error_errno(errno, "Failed to open %s: %m", home_system_blob_dir());
1734 }
1735
1736 FOREACH_DIRENT(de, d, return log_error_errno(errno, "Failed to read system blob directory: %m"))
1737 if (!hashmap_contains(m->homes_by_name, de->d_name)) {
1738 r = rm_rf_at(dirfd(d), de->d_name, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
1739 if (r < 0)
1740 log_warning_errno(r, "Failed to delete blob dir for missing user '%s', ignoring: %m", de->d_name);
1741 }
1742
1743 return 0;
1744 }
1745
1746 static int on_deferred_rescan(sd_event_source *s, void *userdata) {
1747 Manager *m = ASSERT_PTR(userdata);
1748
1749 m->deferred_rescan_event_source = sd_event_source_disable_unref(m->deferred_rescan_event_source);
1750
1751 manager_enumerate_devices(m);
1752 manager_enumerate_images(m);
1753 return 0;
1754 }
1755
1756 int manager_enqueue_rescan(Manager *m) {
1757 int r;
1758
1759 assert(m);
1760
1761 if (m->deferred_rescan_event_source)
1762 return 0;
1763
1764 if (!m->event)
1765 return 0;
1766
1767 if (IN_SET(sd_event_get_state(m->event), SD_EVENT_FINISHED, SD_EVENT_EXITING))
1768 return 0;
1769
1770 r = sd_event_add_defer(m->event, &m->deferred_rescan_event_source, on_deferred_rescan, m);
1771 if (r < 0)
1772 return log_error_errno(r, "Failed to allocate rescan event source: %m");
1773
1774 r = sd_event_source_set_priority(m->deferred_rescan_event_source, SD_EVENT_PRIORITY_IDLE+1);
1775 if (r < 0)
1776 log_warning_errno(r, "Failed to tweak priority of event source, ignoring: %m");
1777
1778 (void) sd_event_source_set_description(m->deferred_rescan_event_source, "deferred-rescan");
1779 return 1;
1780 }
1781
1782 static int on_deferred_gc(sd_event_source *s, void *userdata) {
1783 Manager *m = ASSERT_PTR(userdata);
1784
1785 m->deferred_gc_event_source = sd_event_source_disable_unref(m->deferred_gc_event_source);
1786
1787 manager_gc_images(m);
1788 return 0;
1789 }
1790
1791 int manager_enqueue_gc(Manager *m, Home *focus) {
1792 int r;
1793
1794 assert(m);
1795
1796 /* This enqueues a request to GC dead homes. It may be called with focus=NULL in which case all homes
1797 * will be scanned, or with the parameter set, in which case only that home is checked. */
1798
1799 if (!m->event)
1800 return 0;
1801
1802 if (IN_SET(sd_event_get_state(m->event), SD_EVENT_FINISHED, SD_EVENT_EXITING))
1803 return 0;
1804
1805 /* If a focus home is specified, then remember to focus just on this home. Otherwise invalidate any
1806 * focus that might be set to look at all homes. */
1807
1808 if (m->deferred_gc_event_source) {
1809 if (m->gc_focus != focus) /* not the same focus, then look at everything */
1810 m->gc_focus = NULL;
1811
1812 return 0;
1813 } else
1814 m->gc_focus = focus; /* start focused */
1815
1816 r = sd_event_add_defer(m->event, &m->deferred_gc_event_source, on_deferred_gc, m);
1817 if (r < 0)
1818 return log_error_errno(r, "Failed to allocate GC event source: %m");
1819
1820 r = sd_event_source_set_priority(m->deferred_gc_event_source, SD_EVENT_PRIORITY_IDLE);
1821 if (r < 0)
1822 log_warning_errno(r, "Failed to tweak priority of event source, ignoring: %m");
1823
1824 (void) sd_event_source_set_description(m->deferred_gc_event_source, "deferred-gc");
1825 return 1;
1826 }
1827
1828 static bool manager_shall_rebalance(Manager *m) {
1829 Home *h;
1830
1831 assert(m);
1832
1833 if (IN_SET(m->rebalance_state, REBALANCE_PENDING, REBALANCE_SHRINKING, REBALANCE_GROWING))
1834 return true;
1835
1836 HASHMAP_FOREACH(h, m->homes_by_name)
1837 if (home_shall_rebalance(h))
1838 return true;
1839
1840 return false;
1841 }
1842
1843 static int home_cmp(Home *const*a, Home *const*b) {
1844 int r;
1845
1846 assert(a);
1847 assert(*a);
1848 assert(b);
1849 assert(*b);
1850
1851 /* Order user records by their weight (and by their name, to make things stable). We put the records
1852 * with the highest weight last, since we distribute space from the beginning and round down, hence
1853 * later entries tend to get slightly more than earlier entries. */
1854
1855 r = CMP(user_record_rebalance_weight((*a)->record), user_record_rebalance_weight((*b)->record));
1856 if (r != 0)
1857 return r;
1858
1859 return strcmp((*a)->user_name, (*b)->user_name);
1860 }
1861
1862 static int manager_rebalance_calculate(Manager *m) {
1863 uint64_t weight_sum, free_sum, usage_sum = 0, min_free = UINT64_MAX;
1864 _cleanup_free_ Home **array = NULL;
1865 bool relevant = false;
1866 struct statfs sfs;
1867 int c = 0, r;
1868 Home *h;
1869
1870 assert(m);
1871
1872 if (statfs(get_home_root(), &sfs) < 0)
1873 return log_error_errno(errno, "Failed to statfs() /home: %m");
1874
1875 free_sum = (uint64_t) sfs.f_bsize * sfs.f_bavail; /* This much free space is available on the
1876 * underlying pool directory */
1877
1878 weight_sum = REBALANCE_WEIGHT_BACKING; /* Grant the underlying pool directory a fixed weight of 20
1879 * (home dirs get 100 by default, i.e. 5x more). This weight
1880 * is not configurable, the per-home weights are. */
1881
1882 HASHMAP_FOREACH(h, m->homes_by_name) {
1883 statfs_f_type_t fstype;
1884 h->rebalance_pending = false; /* First, reset the flag, we only want it to be true for the
1885 * homes that qualify for rebalancing */
1886
1887 if (!home_shall_rebalance(h)) /* Only look at actual candidates */
1888 continue;
1889
1890 if (home_is_busy(h))
1891 return -EBUSY; /* Let's not rebalance if there's a busy home directory. */
1892
1893 r = home_get_disk_status(
1894 h,
1895 &h->rebalance_size,
1896 &h->rebalance_usage,
1897 &h->rebalance_free,
1898 NULL,
1899 NULL,
1900 &fstype,
1901 NULL);
1902 if (r < 0) {
1903 log_warning_errno(r, "Failed to get free space of home '%s', ignoring.", h->user_name);
1904 continue;
1905 }
1906
1907 if (h->rebalance_free > UINT64_MAX - free_sum)
1908 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Rebalance free overflow");
1909 free_sum += h->rebalance_free;
1910
1911 if (h->rebalance_usage > UINT64_MAX - usage_sum)
1912 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Rebalance usage overflow");
1913 usage_sum += h->rebalance_usage;
1914
1915 h->rebalance_weight = user_record_rebalance_weight(h->record);
1916 if (h->rebalance_weight > UINT64_MAX - weight_sum)
1917 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Rebalance weight overflow");
1918 weight_sum += h->rebalance_weight;
1919
1920 h->rebalance_min = minimal_size_by_fs_magic(fstype);
1921
1922 if (!GREEDY_REALLOC(array, c+1))
1923 return log_oom();
1924
1925 array[c++] = h;
1926 }
1927
1928 if (c == 0) {
1929 log_debug("No homes to rebalance.");
1930 return 0;
1931 }
1932
1933 assert(weight_sum > 0);
1934
1935 log_debug("Disk space usage by all home directories to rebalance: %s — available disk space: %s",
1936 FORMAT_BYTES(usage_sum), FORMAT_BYTES(free_sum));
1937
1938 /* Bring the home directories in a well-defined order, so that we distribute space in a reproducible
1939 * way for the same parameters. */
1940 typesafe_qsort(array, c, home_cmp);
1941
1942 for (int i = 0; i < c; i++) {
1943 uint64_t new_free;
1944 double d;
1945
1946 h = array[i];
1947
1948 assert(h->rebalance_free <= free_sum);
1949 assert(h->rebalance_usage <= usage_sum);
1950 assert(h->rebalance_weight <= weight_sum);
1951
1952 d = ((double) (free_sum / 4096) * (double) h->rebalance_weight) / (double) weight_sum; /* Calculate new space for this home in units of 4K */
1953
1954 /* Convert from units of 4K back to bytes */
1955 if (d >= (double) (UINT64_MAX/4096))
1956 new_free = UINT64_MAX;
1957 else
1958 new_free = (uint64_t) d * 4096;
1959
1960 /* Subtract the weight and assigned space from the sums now, to distribute the rounding noise
1961 * to the remaining home dirs */
1962 free_sum = LESS_BY(free_sum, new_free);
1963 weight_sum = LESS_BY(weight_sum, h->rebalance_weight);
1964
1965 /* Keep track of home directory with the least amount of space left: we want to schedule the
1966 * next rebalance more quickly if this is low */
1967 if (new_free < min_free)
1968 min_free = h->rebalance_size;
1969
1970 if (new_free > UINT64_MAX - h->rebalance_usage)
1971 h->rebalance_goal = UINT64_MAX-1; /* maximum size */
1972 else {
1973 h->rebalance_goal = h->rebalance_usage + new_free;
1974
1975 if (h->rebalance_min != UINT64_MAX && h->rebalance_goal < h->rebalance_min)
1976 h->rebalance_goal = h->rebalance_min;
1977 }
1978
1979 /* Skip over this home if the state doesn't match the operation */
1980 if ((m->rebalance_state == REBALANCE_SHRINKING && h->rebalance_goal > h->rebalance_size) ||
1981 (m->rebalance_state == REBALANCE_GROWING && h->rebalance_goal < h->rebalance_size))
1982 h->rebalance_pending = false;
1983 else {
1984 log_debug("Rebalancing home directory '%s' %s %s %s.", h->user_name,
1985 FORMAT_BYTES(h->rebalance_size),
1986 special_glyph(SPECIAL_GLYPH_ARROW_RIGHT),
1987 FORMAT_BYTES(h->rebalance_goal));
1988 h->rebalance_pending = true;
1989 }
1990
1991 if ((fabs((double) h->rebalance_size - (double) h->rebalance_goal) * 100 / (double) h->rebalance_size) >= 5.0)
1992 relevant = true;
1993 }
1994
1995 /* Scale next rebalancing interval based on the least amount of space of any of the home
1996 * directories. We pick a time in the range 1min … 15min, scaled by log2(min_free), so that:
1997 * 10M → ~0.7min, 100M → ~2.7min, 1G → ~4.6min, 10G → ~6.5min, 100G ~8.4 */
1998 m->rebalance_interval_usec = (usec_t) CLAMP((LESS_BY(log2(min_free), 22)*15*USEC_PER_MINUTE)/26,
1999 1 * USEC_PER_MINUTE,
2000 15 * USEC_PER_MINUTE);
2001
2002
2003 log_debug("Rebalancing interval set to %s.", FORMAT_TIMESPAN(m->rebalance_interval_usec, USEC_PER_MSEC));
2004
2005 /* Let's suppress small resizes, growing/shrinking file systems isn't free after all */
2006 if (!relevant) {
2007 log_debug("Skipping rebalancing, since all calculated size changes are below ±5%%.");
2008 return 0;
2009 }
2010
2011 return c;
2012 }
2013
2014 static int manager_rebalance_apply(Manager *m) {
2015 int c = 0, r;
2016 Home *h;
2017
2018 assert(m);
2019
2020 HASHMAP_FOREACH(h, m->homes_by_name) {
2021 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
2022
2023 if (!h->rebalance_pending)
2024 continue;
2025
2026 h->rebalance_pending = false;
2027
2028 r = home_resize(h, h->rebalance_goal, /* secret= */ NULL, &error);
2029 if (r < 0)
2030 log_warning_errno(r, "Failed to resize home '%s' for rebalancing, ignoring: %s",
2031 h->user_name, bus_error_message(&error, r));
2032 else
2033 c++;
2034 }
2035
2036 return c;
2037 }
2038
2039 static void manager_rebalance_reply_messages(Manager *m) {
2040 int r;
2041
2042 assert(m);
2043
2044 for (;;) {
2045 _cleanup_(sd_bus_message_unrefp) sd_bus_message *msg =
2046 set_steal_first(m->rebalance_pending_method_calls);
2047
2048 if (!msg)
2049 break;
2050
2051 r = sd_bus_reply_method_return(msg, NULL);
2052 if (r < 0)
2053 log_debug_errno(r, "Failed to reply to rebalance method call, ignoring: %m");
2054 }
2055 }
2056
2057 static int manager_rebalance_now(Manager *m) {
2058 RebalanceState busy_state; /* the state to revert to when operation fails if busy */
2059 int r;
2060
2061 assert(m);
2062
2063 log_debug("Rebalancing now...");
2064
2065 /* We maintain a simple state engine here to keep track of what we are doing. We'll first shrink all
2066 * homes that shall be shrunk and then grow all homes that shall be grown, so that they can take up
2067 * the space now freed. */
2068
2069 for (;;) {
2070 switch (m->rebalance_state) {
2071
2072 case REBALANCE_IDLE:
2073 case REBALANCE_PENDING:
2074 case REBALANCE_WAITING:
2075 /* First shrink large home dirs */
2076 m->rebalance_state = REBALANCE_SHRINKING;
2077 busy_state = REBALANCE_PENDING;
2078
2079 /* We are initiating the next rebalancing cycle now, let's make the queued methods
2080 * calls the pending ones, and flush out any pending ones (which shouldn't exist at
2081 * this time anyway) */
2082 set_clear(m->rebalance_pending_method_calls);
2083 SWAP_TWO(m->rebalance_pending_method_calls, m->rebalance_queued_method_calls);
2084
2085 log_debug("Shrinking phase..");
2086 break;
2087
2088 case REBALANCE_SHRINKING:
2089 /* Then grow small home dirs */
2090 m->rebalance_state = REBALANCE_GROWING;
2091 busy_state = REBALANCE_SHRINKING;
2092 log_debug("Growing phase..");
2093 break;
2094
2095 case REBALANCE_GROWING:
2096 /* Finally, we are done */
2097 log_info("Rebalancing complete.");
2098 m->rebalance_state = REBALANCE_IDLE;
2099 r = 0;
2100 goto finish;
2101
2102 case REBALANCE_OFF:
2103 default:
2104 assert_not_reached();
2105 }
2106
2107 r = manager_rebalance_calculate(m);
2108 if (r == -EBUSY) {
2109 /* Calculations failed because one home directory is currently busy. Revert to a state that
2110 * tells us what to do next. */
2111 log_debug("Can't enter phase, busy.");
2112 m->rebalance_state = busy_state;
2113 return r;
2114 }
2115 if (r < 0)
2116 goto finish;
2117 if (r == 0)
2118 continue; /* got to next step immediately, if there's nothing to do */
2119
2120 r = manager_rebalance_apply(m);
2121 if (r < 0)
2122 goto finish;
2123 if (r > 0)
2124 break; /* At least one resize operation is now pending, we are done for now */
2125
2126 /* If there was nothing to apply, go for next state right-away */
2127 }
2128
2129 return 0;
2130
2131 finish:
2132 /* Reset state and schedule next rebalance */
2133 m->rebalance_state = REBALANCE_IDLE;
2134 manager_rebalance_reply_messages(m);
2135 (void) manager_schedule_rebalance(m, /* immediately= */ false);
2136 return r;
2137 }
2138
2139 static int on_rebalance_timer(sd_event_source *s, usec_t t, void *userdata) {
2140 Manager *m = ASSERT_PTR(userdata);
2141
2142 assert(s);
2143 assert(IN_SET(m->rebalance_state, REBALANCE_WAITING, REBALANCE_PENDING, REBALANCE_SHRINKING, REBALANCE_GROWING));
2144
2145 (void) manager_rebalance_now(m);
2146 return 0;
2147 }
2148
2149 int manager_schedule_rebalance(Manager *m, bool immediately) {
2150 int r;
2151
2152 assert(m);
2153
2154 /* Check if there are any records where rebalancing is requested */
2155 if (!manager_shall_rebalance(m)) {
2156 log_debug("Not scheduling rebalancing, not needed.");
2157 r = 0; /* report that we didn't schedule anything because nothing needed it */
2158 goto turn_off;
2159 }
2160
2161 if (immediately) {
2162 /* If we are told to rebalance immediately, then mark a rebalance as pending (even if we area
2163 * already running one) */
2164
2165 if (m->rebalance_event_source) {
2166 r = sd_event_source_set_time(m->rebalance_event_source, 0);
2167 if (r < 0) {
2168 log_error_errno(r, "Failed to schedule immediate rebalancing: %m");
2169 goto turn_off;
2170 }
2171
2172 r = sd_event_source_set_enabled(m->rebalance_event_source, SD_EVENT_ONESHOT);
2173 if (r < 0) {
2174 log_error_errno(r, "Failed to enable rebalancing event source: %m");
2175 goto turn_off;
2176 }
2177 } else {
2178 r = sd_event_add_time(m->event, &m->rebalance_event_source, CLOCK_MONOTONIC, 0, USEC_PER_SEC, on_rebalance_timer, m);
2179 if (r < 0) {
2180 log_error_errno(r, "Failed to allocate rebalance event source: %m");
2181 goto turn_off;
2182 }
2183
2184 r = sd_event_source_set_priority(m->rebalance_event_source, SD_EVENT_PRIORITY_IDLE + 10);
2185 if (r < 0) {
2186 log_error_errno(r, "Failed to set rebalance event source priority: %m");
2187 goto turn_off;
2188 }
2189
2190 (void) sd_event_source_set_description(m->rebalance_event_source, "rebalance");
2191
2192 }
2193
2194 if (!IN_SET(m->rebalance_state, REBALANCE_PENDING, REBALANCE_SHRINKING, REBALANCE_GROWING))
2195 m->rebalance_state = REBALANCE_PENDING;
2196
2197 log_debug("Scheduled immediate rebalancing...");
2198 return 1; /* report that we scheduled something */
2199 }
2200
2201 /* If we are told to schedule a rebalancing eventually, then do so only if we are not executing
2202 * anything yet. Also if we have something scheduled already, leave it in place */
2203 if (!IN_SET(m->rebalance_state, REBALANCE_OFF, REBALANCE_IDLE))
2204 return 1; /* report that there's already something scheduled */
2205
2206 if (m->rebalance_event_source) {
2207 r = sd_event_source_set_time_relative(m->rebalance_event_source, m->rebalance_interval_usec);
2208 if (r < 0) {
2209 log_error_errno(r, "Failed to schedule immediate rebalancing: %m");
2210 goto turn_off;
2211 }
2212
2213 r = sd_event_source_set_enabled(m->rebalance_event_source, SD_EVENT_ONESHOT);
2214 if (r < 0) {
2215 log_error_errno(r, "Failed to enable rebalancing event source: %m");
2216 goto turn_off;
2217 }
2218 } else {
2219 r = sd_event_add_time_relative(m->event, &m->rebalance_event_source, CLOCK_MONOTONIC, m->rebalance_interval_usec, USEC_PER_SEC, on_rebalance_timer, m);
2220 if (r < 0) {
2221 log_error_errno(r, "Failed to allocate rebalance event source: %m");
2222 goto turn_off;
2223 }
2224
2225 r = sd_event_source_set_priority(m->rebalance_event_source, SD_EVENT_PRIORITY_IDLE + 10);
2226 if (r < 0) {
2227 log_error_errno(r, "Failed to set rebalance event source priority: %m");
2228 goto turn_off;
2229 }
2230
2231 (void) sd_event_source_set_description(m->rebalance_event_source, "rebalance");
2232 }
2233
2234 m->rebalance_state = REBALANCE_WAITING; /* We managed to enqueue a timer event, we now wait until it fires */
2235 log_debug("Scheduled rebalancing in %s...", FORMAT_TIMESPAN(m->rebalance_interval_usec, 0));
2236 return 1; /* report that we scheduled something */
2237
2238 turn_off:
2239 m->rebalance_event_source = sd_event_source_disable_unref(m->rebalance_event_source);
2240 m->rebalance_state = REBALANCE_OFF;
2241 manager_rebalance_reply_messages(m);
2242 return r;
2243 }
2244
2245 int manager_reschedule_rebalance(Manager *m) {
2246 int r;
2247
2248 assert(m);
2249
2250 /* If a rebalance is pending reschedules it so it gets executed immediately */
2251
2252 if (!IN_SET(m->rebalance_state, REBALANCE_PENDING, REBALANCE_SHRINKING, REBALANCE_GROWING))
2253 return 0;
2254
2255 r = manager_schedule_rebalance(m, /* immediately= */ true);
2256 if (r < 0)
2257 return r;
2258
2259 return 1;
2260 }