]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/home/homed-manager.c
tree-wide: use ASSERT_PTR more
[thirdparty/systemd.git] / src / home / homed-manager.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
70a5db58
LP
2
3#include <grp.h>
4#include <linux/fs.h>
5#include <linux/magic.h>
d357b80d 6#include <math.h>
70a5db58
LP
7#include <openssl/pem.h>
8#include <pwd.h>
9#include <sys/ioctl.h>
10#include <sys/quota.h>
11#include <sys/stat.h>
12
e1614484
ZJS
13#include "sd-id128.h"
14
70a5db58
LP
15#include "btrfs-util.h"
16#include "bus-common-errors.h"
17#include "bus-error.h"
ac9f55ed 18#include "bus-log-control-api.h"
70a5db58
LP
19#include "bus-polkit.h"
20#include "clean-ipc.h"
21#include "conf-files.h"
22#include "device-util.h"
23#include "dirent-util.h"
24#include "fd-util.h"
25#include "fileio.h"
26#include "format-util.h"
27#include "fs-util.h"
e2341b6b 28#include "glyph-util.h"
70a5db58
LP
29#include "gpt.h"
30#include "home-util.h"
c76dd733 31#include "homed-conf.h"
70a5db58
LP
32#include "homed-home-bus.h"
33#include "homed-home.h"
34#include "homed-manager-bus.h"
35#include "homed-manager.h"
36#include "homed-varlink.h"
37#include "io-util.h"
38#include "mkdir.h"
39#include "process-util.h"
40#include "quota-util.h"
41#include "random-util.h"
d357b80d 42#include "resize-fs.h"
70a5db58 43#include "socket-util.h"
d357b80d 44#include "sort-util.h"
70a5db58
LP
45#include "stat-util.h"
46#include "strv.h"
bf819d3a 47#include "sync-util.h"
70a5db58
LP
48#include "tmpfile-util.h"
49#include "udev-util.h"
50#include "user-record-sign.h"
51#include "user-record-util.h"
52#include "user-record.h"
53#include "user-util.h"
54
55/* Where to look for private/public keys that are used to sign the user records. We are not using
56 * CONF_PATHS_NULSTR() here since we want to insert /var/lib/systemd/home/ in the middle. And we insert that
57 * since we want to auto-generate a persistent private/public key pair if we need to. */
58#define KEY_PATHS_NULSTR \
59 "/etc/systemd/home/\0" \
60 "/run/systemd/home/\0" \
61 "/var/lib/systemd/home/\0" \
62 "/usr/local/lib/systemd/home/\0" \
63 "/usr/lib/systemd/home/\0"
64
65static bool uid_is_home(uid_t uid) {
66 return uid >= HOME_UID_MIN && uid <= HOME_UID_MAX;
67}
68/* Takes a value generated randomly or by hashing and turns it into a UID in the right range */
69
70#define UID_CLAMP_INTO_HOME_RANGE(rnd) (((uid_t) (rnd) % (HOME_UID_MAX - HOME_UID_MIN + 1)) + HOME_UID_MIN)
71
72DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(homes_by_uid_hash_ops, void, trivial_hash_func, trivial_compare_func, Home, home_free);
73DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(homes_by_name_hash_ops, char, string_hash_func, string_compare_func, Home, home_free);
74DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(homes_by_worker_pid_hash_ops, void, trivial_hash_func, trivial_compare_func, Home, home_free);
75DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(homes_by_sysfs_hash_ops, char, path_hash_func, path_compare, Home, home_free);
76
77static int on_home_inotify(sd_event_source *s, const struct inotify_event *event, void *userdata);
78static int manager_gc_images(Manager *m);
79static int manager_enumerate_images(Manager *m);
80static int manager_assess_image(Manager *m, int dir_fd, const char *dir_path, const char *dentry_name);
81static void manager_revalidate_image(Manager *m, Home *h);
82
83static void manager_watch_home(Manager *m) {
84 struct statfs sfs;
85 int r;
86
87 assert(m);
88
cf536638 89 m->inotify_event_source = sd_event_source_disable_unref(m->inotify_event_source);
70a5db58
LP
90 m->scan_slash_home = false;
91
2700fecd 92 if (statfs(get_home_root(), &sfs) < 0) {
70a5db58 93 log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
2700fecd 94 "Failed to statfs() %s directory, disabling automatic scanning.", get_home_root());
70a5db58
LP
95 return;
96 }
97
98 if (is_network_fs(&sfs)) {
2700fecd 99 log_info("%s is a network file system, disabling automatic scanning.", get_home_root());
70a5db58
LP
100 return;
101 }
102
103 if (is_fs_type(&sfs, AUTOFS_SUPER_MAGIC)) {
2700fecd 104 log_info("%s is on autofs, disabling automatic scanning.", get_home_root());
70a5db58
LP
105 return;
106 }
107
108 m->scan_slash_home = true;
109
2700fecd 110 r = sd_event_add_inotify(m->event, &m->inotify_event_source, get_home_root(),
23d24b76
ZJS
111 IN_CREATE|IN_CLOSE_WRITE|IN_DELETE_SELF|IN_MOVE_SELF|IN_ONLYDIR|IN_MOVED_TO|IN_MOVED_FROM|IN_DELETE,
112 on_home_inotify, m);
70a5db58
LP
113 if (r < 0)
114 log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
2700fecd 115 "Failed to create inotify watch on %s, ignoring.", get_home_root());
70a5db58
LP
116
117 (void) sd_event_source_set_description(m->inotify_event_source, "home-inotify");
2700fecd
LP
118
119 log_info("Watching %s.", get_home_root());
70a5db58
LP
120}
121
122static int on_home_inotify(sd_event_source *s, const struct inotify_event *event, void *userdata) {
2700fecd 123 _cleanup_free_ char *j = NULL;
99534007 124 Manager *m = ASSERT_PTR(userdata);
70a5db58
LP
125 const char *e, *n;
126
70a5db58
LP
127 assert(event);
128
129 if ((event->mask & (IN_Q_OVERFLOW|IN_MOVE_SELF|IN_DELETE_SELF|IN_IGNORED|IN_UNMOUNT)) != 0) {
130
131 if (FLAGS_SET(event->mask, IN_Q_OVERFLOW))
2700fecd 132 log_debug("%s inotify queue overflow, rescanning.", get_home_root());
70a5db58 133 else if (FLAGS_SET(event->mask, IN_MOVE_SELF))
2700fecd 134 log_info("%s moved or renamed, recreating watch and rescanning.", get_home_root());
70a5db58 135 else if (FLAGS_SET(event->mask, IN_DELETE_SELF))
2700fecd 136 log_info("%s deleted, recreating watch and rescanning.", get_home_root());
70a5db58 137 else if (FLAGS_SET(event->mask, IN_UNMOUNT))
2700fecd 138 log_info("%s unmounted, recreating watch and rescanning.", get_home_root());
70a5db58 139 else if (FLAGS_SET(event->mask, IN_IGNORED))
2700fecd 140 log_info("%s watch invalidated, recreating watch and rescanning.", get_home_root());
70a5db58
LP
141
142 manager_watch_home(m);
143 (void) manager_gc_images(m);
144 (void) manager_enumerate_images(m);
145 (void) bus_manager_emit_auto_login_changed(m);
146 return 0;
147 }
148
149 /* For the other inotify events, let's ignore all events for file names that don't match our
150 * expectations */
151 if (isempty(event->name))
152 return 0;
153 e = endswith(event->name, FLAGS_SET(event->mask, IN_ISDIR) ? ".homedir" : ".home");
154 if (!e)
155 return 0;
156
2f82562b 157 n = strndupa_safe(event->name, e - event->name);
70a5db58
LP
158 if (!suitable_user_name(n))
159 return 0;
160
2700fecd
LP
161 j = path_join(get_home_root(), event->name);
162 if (!j)
163 return log_oom();
164
70a5db58
LP
165 if ((event->mask & (IN_CREATE|IN_CLOSE_WRITE|IN_MOVED_TO)) != 0) {
166 if (FLAGS_SET(event->mask, IN_CREATE))
2700fecd 167 log_debug("%s has been created, having a look.", j);
70a5db58 168 else if (FLAGS_SET(event->mask, IN_CLOSE_WRITE))
2700fecd 169 log_debug("%s has been modified, having a look.", j);
70a5db58 170 else if (FLAGS_SET(event->mask, IN_MOVED_TO))
2700fecd 171 log_debug("%s has been moved in, having a look.", j);
70a5db58 172
2700fecd 173 (void) manager_assess_image(m, -1, get_home_root(), event->name);
70a5db58
LP
174 (void) bus_manager_emit_auto_login_changed(m);
175 }
176
755b35b1 177 if ((event->mask & (IN_DELETE | IN_CLOSE_WRITE | IN_MOVED_FROM)) != 0) {
70a5db58
LP
178 Home *h;
179
180 if (FLAGS_SET(event->mask, IN_DELETE))
2700fecd 181 log_debug("%s has been deleted, revalidating.", j);
70a5db58 182 else if (FLAGS_SET(event->mask, IN_CLOSE_WRITE))
2700fecd 183 log_debug("%s has been closed after writing, revalidating.", j);
70a5db58 184 else if (FLAGS_SET(event->mask, IN_MOVED_FROM))
2700fecd 185 log_debug("%s has been moved away, revalidating.", j);
70a5db58
LP
186
187 h = hashmap_get(m->homes_by_name, n);
188 if (h) {
189 manager_revalidate_image(m, h);
190 (void) bus_manager_emit_auto_login_changed(m);
191 }
192 }
193
194 return 0;
195}
196
197int manager_new(Manager **ret) {
198 _cleanup_(manager_freep) Manager *m = NULL;
199 int r;
200
201 assert(ret);
202
c76dd733 203 m = new(Manager, 1);
70a5db58
LP
204 if (!m)
205 return -ENOMEM;
206
c76dd733
LP
207 *m = (Manager) {
208 .default_storage = _USER_STORAGE_INVALID,
d357b80d 209 .rebalance_interval_usec = 2 * USEC_PER_MINUTE, /* initially, rebalance every 2min */
c76dd733
LP
210 };
211
212 r = manager_parse_config_file(m);
213 if (r < 0)
214 return r;
215
70a5db58
LP
216 r = sd_event_default(&m->event);
217 if (r < 0)
218 return r;
219
220 r = sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL);
221 if (r < 0)
222 return r;
223
224 r = sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL);
225 if (r < 0)
226 return r;
227
228 (void) sd_event_set_watchdog(m->event, true);
229
230 m->homes_by_uid = hashmap_new(&homes_by_uid_hash_ops);
231 if (!m->homes_by_uid)
232 return -ENOMEM;
233
234 m->homes_by_name = hashmap_new(&homes_by_name_hash_ops);
235 if (!m->homes_by_name)
236 return -ENOMEM;
237
238 m->homes_by_worker_pid = hashmap_new(&homes_by_worker_pid_hash_ops);
239 if (!m->homes_by_worker_pid)
240 return -ENOMEM;
241
242 m->homes_by_sysfs = hashmap_new(&homes_by_sysfs_hash_ops);
243 if (!m->homes_by_sysfs)
244 return -ENOMEM;
245
246 *ret = TAKE_PTR(m);
247 return 0;
248}
249
250Manager* manager_free(Manager *m) {
9796a9fb
LP
251 Home *h;
252
70a5db58
LP
253 assert(m);
254
9796a9fb
LP
255 HASHMAP_FOREACH(h, m->homes_by_worker_pid)
256 (void) home_wait_for_worker(h);
257
76fc1577
YW
258 m->bus = sd_bus_flush_close_unref(m->bus);
259 m->polkit_registry = bus_verify_polkit_async_registry_free(m->polkit_registry);
70a5db58 260
70a5db58
LP
261 m->device_monitor = sd_device_monitor_unref(m->device_monitor);
262
f76e5644
ZJS
263 m->inotify_event_source = sd_event_source_unref(m->inotify_event_source);
264 m->notify_socket_event_source = sd_event_source_unref(m->notify_socket_event_source);
70a5db58
LP
265 m->deferred_rescan_event_source = sd_event_source_unref(m->deferred_rescan_event_source);
266 m->deferred_gc_event_source = sd_event_source_unref(m->deferred_gc_event_source);
267 m->deferred_auto_login_event_source = sd_event_source_unref(m->deferred_auto_login_event_source);
d357b80d 268 m->rebalance_event_source = sd_event_source_unref(m->rebalance_event_source);
70a5db58 269
76fc1577 270 m->event = sd_event_unref(m->event);
f76e5644 271
76fc1577
YW
272 m->homes_by_uid = hashmap_free(m->homes_by_uid);
273 m->homes_by_name = hashmap_free(m->homes_by_name);
274 m->homes_by_worker_pid = hashmap_free(m->homes_by_worker_pid);
275 m->homes_by_sysfs = hashmap_free(m->homes_by_sysfs);
f76e5644 276
70a5db58
LP
277 if (m->private_key)
278 EVP_PKEY_free(m->private_key);
279
280 hashmap_free(m->public_keys);
281
282 varlink_server_unref(m->varlink_server);
cc9886bc 283 free(m->userdb_service);
70a5db58 284
c76dd733
LP
285 free(m->default_file_system_type);
286
70a5db58
LP
287 return mfree(m);
288}
289
290int manager_verify_user_record(Manager *m, UserRecord *hr) {
291 EVP_PKEY *pkey;
70a5db58
LP
292 int r;
293
294 assert(m);
295 assert(hr);
296
297 if (!m->private_key && hashmap_isempty(m->public_keys)) {
298 r = user_record_has_signature(hr);
299 if (r < 0)
300 return r;
301
302 return r ? -ENOKEY : USER_RECORD_UNSIGNED;
303 }
304
305 /* Is it our own? */
306 if (m->private_key) {
307 r = user_record_verify(hr, m->private_key);
308 switch (r) {
309
310 case USER_RECORD_FOREIGN:
311 /* This record is not signed by this key, but let's see below */
312 break;
313
314 case USER_RECORD_SIGNED: /* Signed by us, but also by others, let's propagate that */
315 case USER_RECORD_SIGNED_EXCLUSIVE: /* Signed by us, and nothing else, ditto */
316 case USER_RECORD_UNSIGNED: /* Not signed at all, ditto */
317 default:
318 return r;
319 }
320 }
321
90e74a66 322 HASHMAP_FOREACH(pkey, m->public_keys) {
70a5db58
LP
323 r = user_record_verify(hr, pkey);
324 switch (r) {
325
326 case USER_RECORD_FOREIGN:
327 /* This record is not signed by this key, but let's see our other keys */
328 break;
329
330 case USER_RECORD_SIGNED: /* It's signed by this key we are happy with, but which is not our own. */
331 case USER_RECORD_SIGNED_EXCLUSIVE:
332 return USER_RECORD_FOREIGN;
333
334 case USER_RECORD_UNSIGNED: /* It's not signed at all */
335 default:
336 return r;
337 }
338 }
339
340 return -ENOKEY;
341}
342
343static int manager_add_home_by_record(
344 Manager *m,
345 const char *name,
346 int dir_fd,
347 const char *fname) {
348
349 _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
852640f8 350 _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
70a5db58
LP
351 unsigned line, column;
352 int r, is_signed;
20f4a308 353 struct stat st;
70a5db58
LP
354 Home *h;
355
356 assert(m);
357 assert(name);
358 assert(fname);
359
20f4a308
LP
360 if (fstatat(dir_fd, fname, &st, 0) < 0)
361 return log_error_errno(errno, "Failed to stat identity record %s: %m", fname);
362
363 if (!S_ISREG(st.st_mode)) {
364 log_debug("Identity record file %s is not a regular file, ignoring.", fname);
365 return 0;
366 }
367
368 if (st.st_size == 0)
369 goto unlink_this_file;
370
70a5db58
LP
371 r = json_parse_file_at(NULL, dir_fd, fname, JSON_PARSE_SENSITIVE, &v, &line, &column);
372 if (r < 0)
373 return log_error_errno(r, "Failed to parse identity record at %s:%u%u: %m", fname, line, column);
374
20f4a308
LP
375 if (json_variant_is_blank_object(v))
376 goto unlink_this_file;
377
70a5db58
LP
378 hr = user_record_new();
379 if (!hr)
380 return log_oom();
381
bfc0cc1a 382 r = user_record_load(hr, v, USER_RECORD_LOAD_REFUSE_SECRET|USER_RECORD_LOG|USER_RECORD_PERMISSIVE);
70a5db58
LP
383 if (r < 0)
384 return r;
385
386 if (!streq_ptr(hr->user_name, name))
23d24b76
ZJS
387 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
388 "Identity's user name %s does not match file name %s, refusing.",
389 hr->user_name, name);
70a5db58
LP
390
391 is_signed = manager_verify_user_record(m, hr);
392 switch (is_signed) {
393
394 case -ENOKEY:
395 return log_warning_errno(is_signed, "User record %s is not signed by any accepted key, ignoring.", fname);
396 case USER_RECORD_UNSIGNED:
397 return log_warning_errno(SYNTHETIC_ERRNO(EPERM), "User record %s is not signed at all, ignoring.", fname);
398 case USER_RECORD_SIGNED:
399 log_info("User record %s is signed by us (and others), accepting.", fname);
400 break;
401 case USER_RECORD_SIGNED_EXCLUSIVE:
402 log_info("User record %s is signed only by us, accepting.", fname);
403 break;
404 case USER_RECORD_FOREIGN:
405 log_info("User record %s is signed by registered key from others, accepting.", fname);
406 break;
407 default:
408 assert(is_signed < 0);
409 return log_error_errno(is_signed, "Failed to verify signature of user record in %s: %m", fname);
410 }
411
412 h = hashmap_get(m->homes_by_name, name);
413 if (h) {
414 r = home_set_record(h, hr);
415 if (r < 0)
416 return log_error_errno(r, "Failed to update home record for %s: %m", name);
417
418 /* If we acquired a record now for a previously unallocated entry, then reset the state. This
419 * makes sure home_get_state() will check for the availability of the image file dynamically
162392b7 420 * in order to detect to distinguish HOME_INACTIVE and HOME_ABSENT. */
70a5db58
LP
421 if (h->state == HOME_UNFIXATED)
422 h->state = _HOME_STATE_INVALID;
423 } else {
424 r = home_new(m, hr, NULL, &h);
425 if (r < 0)
426 return log_error_errno(r, "Failed to allocate new home object: %m");
427
428 log_info("Added registered home for user %s.", hr->user_name);
429 }
430
431 /* Only entries we exclusively signed are writable to us, hence remember the result */
432 h->signed_locally = is_signed == USER_RECORD_SIGNED_EXCLUSIVE;
433
434 return 1;
20f4a308
LP
435
436unlink_this_file:
437 /* If this is an empty file, then let's just remove it. An empty file is not useful in any case, and
438 * apparently xfs likes to leave empty files around when not unmounted cleanly (see
439 * https://github.com/systemd/systemd/issues/15178 for example). Note that we don't delete non-empty
440 * files even if they are invalid, because that's just too risky, we might delete data the user still
441 * needs. But empty files are never useful, hence let's just remove them. */
442
443 if (unlinkat(dir_fd, fname, 0) < 0)
444 return log_error_errno(errno, "Failed to remove empty user record file %s: %m", fname);
445
005daeed 446 log_notice("Discovered empty user record file %s/%s, removed automatically.", home_record_dir(), fname);
20f4a308 447 return 0;
70a5db58
LP
448}
449
450static int manager_enumerate_records(Manager *m) {
451 _cleanup_closedir_ DIR *d = NULL;
70a5db58
LP
452
453 assert(m);
454
005daeed 455 d = opendir(home_record_dir());
70a5db58
LP
456 if (!d)
457 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
005daeed 458 "Failed to open %s: %m", home_record_dir());
70a5db58
LP
459
460 FOREACH_DIRENT(de, d, return log_error_errno(errno, "Failed to read record directory: %m")) {
461 _cleanup_free_ char *n = NULL;
462 const char *e;
463
464 if (!dirent_is_file(de))
465 continue;
466
467 e = endswith(de->d_name, ".identity");
468 if (!e)
469 continue;
470
471 n = strndup(de->d_name, e - de->d_name);
472 if (!n)
473 return log_oom();
474
475 if (!suitable_user_name(n))
476 continue;
477
478 (void) manager_add_home_by_record(m, n, dirfd(d), de->d_name);
479 }
480
481 return 0;
482}
483
484static int search_quota(uid_t uid, const char *exclude_quota_path) {
485 struct stat exclude_st = {};
486 dev_t previous_devno = 0;
70a5db58
LP
487 int r;
488
489 /* Checks whether the specified UID owns any files on the files system, but ignore any file system
490 * backing the specified file. The file is used when operating on home directories, where it's OK if
491 * the UID of them already owns files. */
492
493 if (exclude_quota_path && stat(exclude_quota_path, &exclude_st) < 0) {
494 if (errno != ENOENT)
495 return log_warning_errno(errno, "Failed to stat %s, ignoring: %m", exclude_quota_path);
496 }
497
498 /* Check a few usual suspects where regular users might own files. Note that this is by no means
499 * comprehensive, but should cover most cases. Note that in an ideal world every user would be
500 * registered in NSS and avoid our own UID range, but for all other cases, it's a good idea to be
501 * paranoid and check quota if we can. */
2700fecd 502 FOREACH_STRING(where, get_home_root(), "/tmp/", "/var/", "/var/mail/", "/var/tmp/", "/var/spool/") {
70a5db58
LP
503 struct dqblk req;
504 struct stat st;
505
506 if (stat(where, &st) < 0) {
507 log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
508 "Failed to stat %s, ignoring: %m", where);
509 continue;
510 }
511
512 if (major(st.st_dev) == 0) {
513 log_debug("Directory %s is not on a real block device, not checking quota for UID use.", where);
514 continue;
515 }
516
517 if (st.st_dev == exclude_st.st_dev) { /* If an exclude path is specified, then ignore quota
518 * reported on the same block device as that path. */
519 log_debug("Directory %s is where the home directory is located, not checking quota for UID use.", where);
520 continue;
521 }
522
523 if (st.st_dev == previous_devno) { /* Does this directory have the same devno as the previous
524 * one we tested? If so, there's no point in testing this
525 * again. */
526 log_debug("Directory %s is on same device as previous tested directory, not checking quota for UID use a second time.", where);
527 continue;
528 }
529
530 previous_devno = st.st_dev;
531
7176f06c 532 r = quotactl_devnum(QCMD_FIXED(Q_GETQUOTA, USRQUOTA), st.st_dev, uid, &req);
70a5db58
LP
533 if (r < 0) {
534 if (ERRNO_IS_NOT_SUPPORTED(r))
535 log_debug_errno(r, "No UID quota support on %s, ignoring.", where);
5e5e11b8
LP
536 else if (ERRNO_IS_PRIVILEGE(r))
537 log_debug_errno(r, "UID quota support for %s prohibited, ignoring.", where);
70a5db58 538 else
1a53adb3 539 log_warning_errno(r, "Failed to query quota on %s, ignoring: %m", where);
70a5db58
LP
540
541 continue;
542 }
543
544 if ((FLAGS_SET(req.dqb_valid, QIF_SPACE) && req.dqb_curspace > 0) ||
545 (FLAGS_SET(req.dqb_valid, QIF_INODES) && req.dqb_curinodes > 0)) {
546 log_debug_errno(errno, "Quota reports UID " UID_FMT " occupies disk space on %s.", uid, where);
547 return 1;
548 }
549 }
550
551 return 0;
552}
553
554static int manager_acquire_uid(
555 Manager *m,
556 uid_t start_uid,
557 const char *user_name,
558 const char *exclude_quota_path,
559 uid_t *ret) {
560
561 static const uint8_t hash_key[] = {
562 0xa3, 0xb8, 0x82, 0x69, 0x9a, 0x71, 0xf7, 0xa9,
563 0xe0, 0x7c, 0xf6, 0xf1, 0x21, 0x69, 0xd2, 0x1e
564 };
565
566 enum {
567 PHASE_SUGGESTED,
568 PHASE_HASHED,
569 PHASE_RANDOM
570 } phase = PHASE_SUGGESTED;
571
572 unsigned n_tries = 100;
573 int r;
574
575 assert(m);
576 assert(ret);
577
578 for (;;) {
579 struct passwd *pw;
580 struct group *gr;
581 uid_t candidate;
582 Home *other;
583
584 if (--n_tries <= 0)
585 return -EBUSY;
586
587 switch (phase) {
588
589 case PHASE_SUGGESTED:
590 phase = PHASE_HASHED;
591
592 if (!uid_is_home(start_uid))
593 continue;
594
595 candidate = start_uid;
596 break;
597
598 case PHASE_HASHED:
599 phase = PHASE_RANDOM;
600
601 if (!user_name)
602 continue;
603
604 candidate = UID_CLAMP_INTO_HOME_RANGE(siphash24(user_name, strlen(user_name), hash_key));
605 break;
606
607 case PHASE_RANDOM:
608 random_bytes(&candidate, sizeof(candidate));
609 candidate = UID_CLAMP_INTO_HOME_RANGE(candidate);
610 break;
611
612 default:
04499a70 613 assert_not_reached();
70a5db58
LP
614 }
615
616 other = hashmap_get(m->homes_by_uid, UID_TO_PTR(candidate));
617 if (other) {
23d24b76
ZJS
618 log_debug("Candidate UID " UID_FMT " already used by another home directory (%s), let's try another.",
619 candidate, other->user_name);
70a5db58
LP
620 continue;
621 }
622
623 pw = getpwuid(candidate);
624 if (pw) {
23d24b76
ZJS
625 log_debug("Candidate UID " UID_FMT " already registered by another user in NSS (%s), let's try another.",
626 candidate, pw->pw_name);
70a5db58
LP
627 continue;
628 }
629
630 gr = getgrgid((gid_t) candidate);
631 if (gr) {
23d24b76
ZJS
632 log_debug("Candidate UID " UID_FMT " already registered by another group in NSS (%s), let's try another.",
633 candidate, gr->gr_name);
70a5db58
LP
634 continue;
635 }
636
637 r = search_ipc(candidate, (gid_t) candidate);
638 if (r < 0)
639 continue;
640 if (r > 0) {
23d24b76
ZJS
641 log_debug_errno(r, "Candidate UID " UID_FMT " already owns IPC objects, let's try another: %m",
642 candidate);
70a5db58
LP
643 continue;
644 }
645
646 r = search_quota(candidate, exclude_quota_path);
647 if (r != 0)
648 continue;
649
650 *ret = candidate;
651 return 0;
652 }
653}
654
655static int manager_add_home_by_image(
656 Manager *m,
657 const char *user_name,
658 const char *realm,
659 const char *image_path,
660 const char *sysfs,
661 UserStorage storage,
662 uid_t start_uid) {
663
664 _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
665 uid_t uid;
666 Home *h;
667 int r;
668
669 assert(m);
670
671 assert(m);
672 assert(user_name);
673 assert(image_path);
674 assert(storage >= 0);
675 assert(storage < _USER_STORAGE_MAX);
676
677 h = hashmap_get(m->homes_by_name, user_name);
678 if (h) {
679 bool same;
680
681 if (h->state != HOME_UNFIXATED) {
682 log_debug("Found an image for user %s which already has a record, skipping.", user_name);
683 return 0; /* ignore images that synthesize a user we already have a record for */
684 }
685
686 same = user_record_storage(h->record) == storage;
687 if (same) {
688 if (h->sysfs && sysfs)
689 same = path_equal(h->sysfs, sysfs);
690 else if (!!h->sysfs != !!sysfs)
691 same = false;
692 else {
693 const char *p;
694
695 p = user_record_image_path(h->record);
696 same = p && path_equal(p, image_path);
697 }
698 }
699
700 if (!same) {
80ace4f2 701 log_debug("Found multiple images for user '%s', ignoring image '%s'.", user_name, image_path);
70a5db58
LP
702 return 0;
703 }
704 } else {
705 /* Check NSS, in case there's another user or group by this name */
706 if (getpwnam(user_name) || getgrnam(user_name)) {
707 log_debug("Found an existing user or group by name '%s', ignoring image '%s'.", user_name, image_path);
708 return 0;
709 }
710 }
711
712 if (h && uid_is_valid(h->uid))
713 uid = h->uid;
714 else {
23d24b76
ZJS
715 r = manager_acquire_uid(m, start_uid, user_name,
716 IN_SET(storage, USER_SUBVOLUME, USER_DIRECTORY, USER_FSCRYPT) ? image_path : NULL,
717 &uid);
70a5db58
LP
718 if (r < 0)
719 return log_warning_errno(r, "Failed to acquire unused UID for %s: %m", user_name);
720 }
721
722 hr = user_record_new();
723 if (!hr)
724 return log_oom();
725
726 r = user_record_synthesize(hr, user_name, realm, image_path, storage, uid, (gid_t) uid);
727 if (r < 0)
728 return log_error_errno(r, "Failed to synthesize home record for %s (image %s): %m", user_name, image_path);
729
730 if (h) {
731 r = home_set_record(h, hr);
732 if (r < 0)
733 return log_error_errno(r, "Failed to update home record for %s: %m", user_name);
734 } else {
735 r = home_new(m, hr, sysfs, &h);
736 if (r < 0)
737 return log_error_errno(r, "Failed to allocate new home object: %m");
738
739 h->state = HOME_UNFIXATED;
740
741 log_info("Discovered new home for user %s through image %s.", user_name, image_path);
742 }
743
744 return 1;
745}
746
747int manager_augment_record_with_uid(
748 Manager *m,
749 UserRecord *hr) {
750
751 const char *exclude_quota_path = NULL;
752 uid_t start_uid = UID_INVALID, uid;
753 int r;
754
755 assert(m);
756 assert(hr);
757
758 if (uid_is_valid(hr->uid))
759 return 0;
760
761 if (IN_SET(hr->storage, USER_CLASSIC, USER_SUBVOLUME, USER_DIRECTORY, USER_FSCRYPT)) {
762 const char * ip;
763
764 ip = user_record_image_path(hr);
765 if (ip) {
766 struct stat st;
767
768 if (stat(ip, &st) < 0) {
769 if (errno != ENOENT)
770 log_warning_errno(errno, "Failed to stat(%s): %m", ip);
771 } else if (uid_is_home(st.st_uid)) {
772 start_uid = st.st_uid;
773 exclude_quota_path = ip;
774 }
775 }
776 }
777
778 r = manager_acquire_uid(m, start_uid, hr->user_name, exclude_quota_path, &uid);
779 if (r < 0)
780 return r;
781
782 log_debug("Acquired new UID " UID_FMT " for %s.", uid, hr->user_name);
783
784 r = user_record_add_binding(
785 hr,
786 _USER_STORAGE_INVALID,
787 NULL,
788 SD_ID128_NULL,
789 SD_ID128_NULL,
790 SD_ID128_NULL,
791 NULL,
792 NULL,
793 UINT64_MAX,
794 NULL,
795 NULL,
796 uid,
797 (gid_t) uid);
798 if (r < 0)
799 return r;
800
801 return 1;
802}
803
804static int manager_assess_image(
805 Manager *m,
806 int dir_fd,
807 const char *dir_path,
808 const char *dentry_name) {
809
810 char *luks_suffix, *directory_suffix;
811 _cleanup_free_ char *path = NULL;
812 struct stat st;
813 int r;
814
815 assert(m);
816 assert(dir_path);
817 assert(dentry_name);
818
819 luks_suffix = endswith(dentry_name, ".home");
820 if (luks_suffix)
821 directory_suffix = NULL;
822 else
823 directory_suffix = endswith(dentry_name, ".homedir");
824
825 /* Early filter out: by name */
826 if (!luks_suffix && !directory_suffix)
827 return 0;
828
829 path = path_join(dir_path, dentry_name);
830 if (!path)
831 return log_oom();
832
833 /* Follow symlinks here, to allow people to link in stuff to make them available locally. */
834 if (dir_fd >= 0)
835 r = fstatat(dir_fd, dentry_name, &st, 0);
836 else
837 r = stat(path, &st);
838 if (r < 0)
839 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
80ace4f2 840 "Failed to stat() directory entry '%s', ignoring: %m", dentry_name);
70a5db58
LP
841
842 if (S_ISREG(st.st_mode)) {
843 _cleanup_free_ char *n = NULL, *user_name = NULL, *realm = NULL;
844
845 if (!luks_suffix)
846 return 0;
847
848 n = strndup(dentry_name, luks_suffix - dentry_name);
849 if (!n)
850 return log_oom();
851
852 r = split_user_name_realm(n, &user_name, &realm);
853 if (r == -EINVAL) /* Not the right format: ignore */
854 return 0;
855 if (r < 0)
856 return log_error_errno(r, "Failed to split image name into user name/realm: %m");
857
858 return manager_add_home_by_image(m, user_name, realm, path, NULL, USER_LUKS, UID_INVALID);
859 }
860
861 if (S_ISDIR(st.st_mode)) {
862 _cleanup_free_ char *n = NULL, *user_name = NULL, *realm = NULL;
863 _cleanup_close_ int fd = -1;
864 UserStorage storage;
865
866 if (!directory_suffix)
867 return 0;
868
869 n = strndup(dentry_name, directory_suffix - dentry_name);
870 if (!n)
871 return log_oom();
872
873 r = split_user_name_realm(n, &user_name, &realm);
874 if (r == -EINVAL) /* Not the right format: ignore */
875 return 0;
876 if (r < 0)
877 return log_error_errno(r, "Failed to split image name into user name/realm: %m");
878
879 if (dir_fd >= 0)
880 fd = openat(dir_fd, dentry_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
881 else
882 fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
883 if (fd < 0)
884 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
885 "Failed to open directory '%s', ignoring: %m", path);
886
887 if (fstat(fd, &st) < 0)
888 return log_warning_errno(errno, "Failed to fstat() %s, ignoring: %m", path);
889
890 assert(S_ISDIR(st.st_mode)); /* Must hold, we used O_DIRECTORY above */
891
892 r = btrfs_is_subvol_fd(fd);
893 if (r < 0)
894 return log_warning_errno(errno, "Failed to determine whether %s is a btrfs subvolume: %m", path);
895 if (r > 0)
896 storage = USER_SUBVOLUME;
897 else {
898 struct fscrypt_policy policy;
899
900 if (ioctl(fd, FS_IOC_GET_ENCRYPTION_POLICY, &policy) < 0) {
901
902 if (errno == ENODATA)
903 log_debug_errno(errno, "Determined %s is not fscrypt encrypted.", path);
904 else if (ERRNO_IS_NOT_SUPPORTED(errno))
80ace4f2 905 log_debug_errno(errno, "Determined %s is not fscrypt encrypted because kernel or file system doesn't support it.", path);
70a5db58
LP
906 else
907 log_debug_errno(errno, "FS_IOC_GET_ENCRYPTION_POLICY failed with unexpected error code on %s, ignoring: %m", path);
908
909 storage = USER_DIRECTORY;
910 } else
911 storage = USER_FSCRYPT;
912 }
913
914 return manager_add_home_by_image(m, user_name, realm, path, NULL, storage, st.st_uid);
915 }
916
917 return 0;
918}
919
920int manager_enumerate_images(Manager *m) {
921 _cleanup_closedir_ DIR *d = NULL;
70a5db58
LP
922
923 assert(m);
924
925 if (!m->scan_slash_home)
926 return 0;
927
2700fecd 928 d = opendir(get_home_root());
70a5db58
LP
929 if (!d)
930 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
2700fecd 931 "Failed to open %s: %m", get_home_root());
70a5db58 932
2700fecd
LP
933 FOREACH_DIRENT(de, d, return log_error_errno(errno, "Failed to read %s directory: %m", get_home_root()))
934 (void) manager_assess_image(m, dirfd(d), get_home_root(), de->d_name);
70a5db58
LP
935
936 return 0;
937}
938
939static int manager_connect_bus(Manager *m) {
1d3b68f6 940 _cleanup_free_ char *b = NULL;
cc9886bc 941 const char *suffix, *busname;
70a5db58
LP
942 int r;
943
944 assert(m);
945 assert(!m->bus);
946
947 r = sd_bus_default_system(&m->bus);
948 if (r < 0)
949 return log_error_errno(r, "Failed to connect to system bus: %m");
950
cfd508a9 951 r = bus_add_implementation(m->bus, &manager_object, m);
ac9f55ed
LP
952 if (r < 0)
953 return r;
954
c42234ab
LP
955 r = bus_log_control_api_register(m->bus);
956 if (r < 0)
957 return r;
958
cc9886bc 959 suffix = getenv("SYSTEMD_HOME_DEBUG_SUFFIX");
1d3b68f6
AZ
960 if (suffix) {
961 b = strjoin("org.freedesktop.home1.", suffix);
962 if (!b)
963 return log_oom();
964 busname = b;
965 } else
cc9886bc
LP
966 busname = "org.freedesktop.home1";
967
968 r = sd_bus_request_name_async(m->bus, NULL, busname, 0, NULL, NULL);
70a5db58
LP
969 if (r < 0)
970 return log_error_errno(r, "Failed to request name: %m");
971
972 r = sd_bus_attach_event(m->bus, m->event, 0);
973 if (r < 0)
974 return log_error_errno(r, "Failed to attach bus to event loop: %m");
975
976 (void) sd_bus_set_exit_on_disconnect(m->bus, true);
977
978 return 0;
979}
980
981static int manager_bind_varlink(Manager *m) {
1d3b68f6 982 _cleanup_free_ char *p = NULL;
cc9886bc 983 const char *suffix, *socket_path;
70a5db58
LP
984 int r;
985
986 assert(m);
987 assert(!m->varlink_server);
988
9807fdc1 989 r = varlink_server_new(&m->varlink_server, VARLINK_SERVER_ACCOUNT_UID|VARLINK_SERVER_INHERIT_USERDATA);
70a5db58
LP
990 if (r < 0)
991 return log_error_errno(r, "Failed to allocate varlink server object: %m");
992
993 varlink_server_set_userdata(m->varlink_server, m);
994
995 r = varlink_server_bind_method_many(
996 m->varlink_server,
997 "io.systemd.UserDatabase.GetUserRecord", vl_method_get_user_record,
998 "io.systemd.UserDatabase.GetGroupRecord", vl_method_get_group_record,
999 "io.systemd.UserDatabase.GetMemberships", vl_method_get_memberships);
1000 if (r < 0)
1001 return log_error_errno(r, "Failed to register varlink methods: %m");
1002
1003 (void) mkdir_p("/run/systemd/userdb", 0755);
1004
cc9886bc
LP
1005 /* To make things easier to debug, when working from a homed managed home directory, let's optionally
1006 * use a different varlink socket name */
1007 suffix = getenv("SYSTEMD_HOME_DEBUG_SUFFIX");
1d3b68f6
AZ
1008 if (suffix) {
1009 p = strjoin("/run/systemd/userdb/io.systemd.Home.", suffix);
1010 if (!p)
1011 return log_oom();
1012 socket_path = p;
1013 } else
cc9886bc
LP
1014 socket_path = "/run/systemd/userdb/io.systemd.Home";
1015
1016 r = varlink_server_listen_address(m->varlink_server, socket_path, 0666);
70a5db58
LP
1017 if (r < 0)
1018 return log_error_errno(r, "Failed to bind to varlink socket: %m");
1019
1020 r = varlink_server_attach_event(m->varlink_server, m->event, SD_EVENT_PRIORITY_NORMAL);
1021 if (r < 0)
1022 return log_error_errno(r, "Failed to attach varlink connection to event loop: %m");
1023
cc9886bc
LP
1024 assert(!m->userdb_service);
1025 m->userdb_service = strdup(basename(socket_path));
1026 if (!m->userdb_service)
1027 return log_oom();
1028
1029 /* Avoid recursion */
1030 if (setenv("SYSTEMD_BYPASS_USERDB", m->userdb_service, 1) < 0)
1031 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to set $SYSTEMD_BYPASS_USERDB: %m");
1032
70a5db58
LP
1033 return 0;
1034}
1035
2aaf565a
LP
1036static ssize_t read_datagram(
1037 int fd,
1038 struct ucred *ret_sender,
1039 void **ret,
1040 int *ret_passed_fd) {
1041
1042 CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))) control;
70a5db58 1043 _cleanup_free_ void *buffer = NULL;
2aaf565a
LP
1044 _cleanup_close_ int passed_fd = -1;
1045 struct ucred *sender = NULL;
1046 struct cmsghdr *cmsg;
1047 struct msghdr mh;
1048 struct iovec iov;
70a5db58
LP
1049 ssize_t n, m;
1050
1051 assert(fd >= 0);
1052 assert(ret_sender);
1053 assert(ret);
2aaf565a 1054 assert(ret_passed_fd);
70a5db58
LP
1055
1056 n = next_datagram_size_fd(fd);
1057 if (n < 0)
1058 return n;
1059
1060 buffer = malloc(n + 2);
1061 if (!buffer)
1062 return -ENOMEM;
1063
2aaf565a
LP
1064 /* Pass one extra byte, as a size check */
1065 iov = IOVEC_MAKE(buffer, n + 1);
70a5db58 1066
2aaf565a
LP
1067 mh = (struct msghdr) {
1068 .msg_iov = &iov,
1069 .msg_iovlen = 1,
1070 .msg_control = &control,
1071 .msg_controllen = sizeof(control),
1072 };
70a5db58 1073
2aaf565a
LP
1074 m = recvmsg_safe(fd, &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1075 if (m < 0)
1076 return m;
70a5db58 1077
2aaf565a
LP
1078 /* Ensure the size matches what we determined before */
1079 if (m != n) {
70a5db58 1080 cmsg_close_all(&mh);
2aaf565a
LP
1081 return -EMSGSIZE;
1082 }
70a5db58 1083
2aaf565a
LP
1084 CMSG_FOREACH(cmsg, &mh) {
1085 if (cmsg->cmsg_level == SOL_SOCKET &&
1086 cmsg->cmsg_type == SCM_CREDENTIALS &&
1087 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) {
1088 assert(!sender);
1089 sender = (struct ucred*) CMSG_DATA(cmsg);
1090 }
70a5db58 1091
2aaf565a
LP
1092 if (cmsg->cmsg_level == SOL_SOCKET &&
1093 cmsg->cmsg_type == SCM_RIGHTS) {
70a5db58 1094
2aaf565a
LP
1095 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
1096 cmsg_close_all(&mh);
1097 return -EMSGSIZE;
70a5db58
LP
1098 }
1099
2aaf565a
LP
1100 assert(passed_fd < 0);
1101 passed_fd = *(int*) CMSG_DATA(cmsg);
1102 }
70a5db58
LP
1103 }
1104
2aaf565a
LP
1105 if (sender)
1106 *ret_sender = *sender;
1107 else
1108 *ret_sender = (struct ucred) UCRED_INVALID;
1109
1110 *ret_passed_fd = TAKE_FD(passed_fd);
1111
70a5db58
LP
1112 /* For safety reasons: let's always NUL terminate. */
1113 ((char*) buffer)[n] = 0;
1114 *ret = TAKE_PTR(buffer);
1115
1116 return 0;
1117}
1118
1119static int on_notify_socket(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1120 _cleanup_strv_free_ char **l = NULL;
1121 _cleanup_free_ void *datagram = NULL;
2aaf565a
LP
1122 _cleanup_close_ int passed_fd = -1;
1123 struct ucred sender = UCRED_INVALID;
99534007 1124 Manager *m = ASSERT_PTR(userdata);
70a5db58
LP
1125 ssize_t n;
1126 Home *h;
1127
1128 assert(s);
70a5db58 1129
2aaf565a 1130 n = read_datagram(fd, &sender, &datagram, &passed_fd);
8add30a0
YW
1131 if (n < 0) {
1132 if (ERRNO_IS_TRANSIENT(n))
1133 return 0;
70a5db58 1134 return log_error_errno(n, "Failed to read notify datagram: %m");
8add30a0 1135 }
70a5db58
LP
1136
1137 if (sender.pid <= 0) {
1138 log_warning("Received notify datagram without valid sender PID, ignoring.");
1139 return 0;
1140 }
1141
1142 h = hashmap_get(m->homes_by_worker_pid, PID_TO_PTR(sender.pid));
1143 if (!h) {
162392b7 1144 log_warning("Received notify datagram of unknown process, ignoring.");
70a5db58
LP
1145 return 0;
1146 }
1147
1148 l = strv_split(datagram, "\n");
1149 if (!l)
1150 return log_oom();
1151
2aaf565a 1152 home_process_notify(h, l, TAKE_FD(passed_fd));
70a5db58
LP
1153 return 0;
1154}
1155
1156static int manager_listen_notify(Manager *m) {
1157 _cleanup_close_ int fd = -1;
425d925f
ZJS
1158 union sockaddr_union sa = {
1159 .un.sun_family = AF_UNIX,
1160 .un.sun_path = "/run/systemd/home/notify",
1161 };
cc9886bc 1162 const char *suffix;
70a5db58
LP
1163 int r;
1164
1165 assert(m);
1166 assert(!m->notify_socket_event_source);
1167
cc9886bc
LP
1168 suffix = getenv("SYSTEMD_HOME_DEBUG_SUFFIX");
1169 if (suffix) {
1d3b68f6 1170 _cleanup_free_ char *unix_path = NULL;
cc9886bc 1171
1d3b68f6
AZ
1172 unix_path = strjoin("/run/systemd/home/notify.", suffix);
1173 if (!unix_path)
1174 return log_oom();
cc9886bc
LP
1175 r = sockaddr_un_set_path(&sa.un, unix_path);
1176 if (r < 0)
1177 return log_error_errno(r, "Socket path %s does not fit in sockaddr_un: %m", unix_path);
1178 }
1179
70a5db58
LP
1180 fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1181 if (fd < 0)
1182 return log_error_errno(errno, "Failed to create listening socket: %m");
1183
70a5db58
LP
1184 (void) mkdir_parents(sa.un.sun_path, 0755);
1185 (void) sockaddr_un_unlink(&sa.un);
1186
1187 if (bind(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0)
1188 return log_error_errno(errno, "Failed to bind to socket: %m");
1189
1190 r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true);
1191 if (r < 0)
1192 return r;
1193
1194 r = sd_event_add_io(m->event, &m->notify_socket_event_source, fd, EPOLLIN, on_notify_socket, m);
1195 if (r < 0)
1196 return log_error_errno(r, "Failed to allocate event source for notify socket: %m");
1197
1198 (void) sd_event_source_set_description(m->notify_socket_event_source, "notify-socket");
1199
1200 /* Make sure we process sd_notify() before SIGCHLD for any worker, so that we always know the error
1201 * number of a client before it exits. */
1202 r = sd_event_source_set_priority(m->notify_socket_event_source, SD_EVENT_PRIORITY_NORMAL - 5);
1203 if (r < 0)
1204 return log_error_errno(r, "Failed to alter priority of NOTIFY_SOCKET event source: %m");
1205
1206 r = sd_event_source_set_io_fd_own(m->notify_socket_event_source, true);
1207 if (r < 0)
1208 return log_error_errno(r, "Failed to pass ownership of notify socket: %m");
1209
1210 return TAKE_FD(fd);
1211}
1212
1213static int manager_add_device(Manager *m, sd_device *d) {
1214 _cleanup_free_ char *user_name = NULL, *realm = NULL, *node = NULL;
1215 const char *tabletype, *parttype, *partname, *partuuid, *sysfs;
1216 sd_id128_t id;
1217 int r;
1218
1219 assert(m);
1220 assert(d);
1221
1222 r = sd_device_get_syspath(d, &sysfs);
1223 if (r < 0)
1224 return log_error_errno(r, "Failed to acquire sysfs path of device: %m");
1225
1226 r = sd_device_get_property_value(d, "ID_PART_TABLE_TYPE", &tabletype);
1227 if (r == -ENOENT)
1228 return 0;
1229 if (r < 0)
1230 return log_error_errno(r, "Failed to acquire ID_PART_TABLE_TYPE device property, ignoring: %m");
1231
1232 if (!streq(tabletype, "gpt")) {
1233 log_debug("Found partition (%s) on non-GPT table, ignoring.", sysfs);
1234 return 0;
1235 }
1236
1237 r = sd_device_get_property_value(d, "ID_PART_ENTRY_TYPE", &parttype);
1238 if (r == -ENOENT)
1239 return 0;
1240 if (r < 0)
1241 return log_error_errno(r, "Failed to acquire ID_PART_ENTRY_TYPE device property, ignoring: %m");
e1614484 1242 if (sd_id128_string_equal(parttype, GPT_USER_HOME) <= 0) {
70a5db58
LP
1243 log_debug("Found partition (%s) we don't care about, ignoring.", sysfs);
1244 return 0;
1245 }
1246
1247 r = sd_device_get_property_value(d, "ID_PART_ENTRY_NAME", &partname);
1248 if (r < 0)
1249 return log_warning_errno(r, "Failed to acquire ID_PART_ENTRY_NAME device property, ignoring: %m");
1250
1251 r = split_user_name_realm(partname, &user_name, &realm);
1252 if (r == -EINVAL)
1253 return log_warning_errno(r, "Found partition with correct partition type but a non-parsable partition name '%s', ignoring.", partname);
1254 if (r < 0)
1255 return log_error_errno(r, "Failed to validate partition name '%s': %m", partname);
1256
1257 r = sd_device_get_property_value(d, "ID_FS_UUID", &partuuid);
1258 if (r < 0)
1259 return log_warning_errno(r, "Failed to acquire ID_FS_UUID device property, ignoring: %m");
1260
1261 r = sd_id128_from_string(partuuid, &id);
1262 if (r < 0)
1263 return log_warning_errno(r, "Failed to parse ID_FS_UUID field '%s', ignoring: %m", partuuid);
1264
1265 if (asprintf(&node, "/dev/disk/by-uuid/" SD_ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(id)) < 0)
1266 return log_oom();
1267
1268 return manager_add_home_by_image(m, user_name, realm, node, sysfs, USER_LUKS, UID_INVALID);
1269}
1270
1271static int manager_on_device(sd_device_monitor *monitor, sd_device *d, void *userdata) {
99534007 1272 Manager *m = ASSERT_PTR(userdata);
70a5db58
LP
1273 int r;
1274
70a5db58
LP
1275 assert(d);
1276
a1130022 1277 if (device_for_action(d, SD_DEVICE_REMOVE)) {
70a5db58
LP
1278 const char *sysfs;
1279 Home *h;
1280
1281 r = sd_device_get_syspath(d, &sysfs);
1282 if (r < 0) {
1283 log_warning_errno(r, "Failed to acquire sysfs path from device: %m");
1284 return 0;
1285 }
1286
1287 log_info("block device %s has been removed.", sysfs);
1288
1289 /* Let's see if we previously synthesized a home record from this device, if so, let's just
1290 * revalidate that. Otherwise let's revalidate them all, but asynchronously. */
1291 h = hashmap_get(m->homes_by_sysfs, sysfs);
1292 if (h)
1293 manager_revalidate_image(m, h);
1294 else
1295 manager_enqueue_gc(m, NULL);
1296 } else
1297 (void) manager_add_device(m, d);
1298
1299 (void) bus_manager_emit_auto_login_changed(m);
1300 return 0;
1301}
1302
1303static int manager_watch_devices(Manager *m) {
1304 int r;
1305
1306 assert(m);
1307 assert(!m->device_monitor);
1308
1309 r = sd_device_monitor_new(&m->device_monitor);
1310 if (r < 0)
1311 return log_error_errno(r, "Failed to allocate device monitor: %m");
1312
1313 r = sd_device_monitor_filter_add_match_subsystem_devtype(m->device_monitor, "block", NULL);
1314 if (r < 0)
1315 return log_error_errno(r, "Failed to configure device monitor match: %m");
1316
1317 r = sd_device_monitor_attach_event(m->device_monitor, m->event);
1318 if (r < 0)
1319 return log_error_errno(r, "Failed to attach device monitor to event loop: %m");
1320
1321 r = sd_device_monitor_start(m->device_monitor, manager_on_device, m);
1322 if (r < 0)
1323 return log_error_errno(r, "Failed to start device monitor: %m");
1324
1325 return 0;
1326}
1327
1328static int manager_enumerate_devices(Manager *m) {
1329 _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
1330 sd_device *d;
1331 int r;
1332
1333 assert(m);
1334
1335 r = sd_device_enumerator_new(&e);
1336 if (r < 0)
1337 return r;
1338
1339 r = sd_device_enumerator_add_match_subsystem(e, "block", true);
1340 if (r < 0)
1341 return r;
1342
1343 FOREACH_DEVICE(e, d)
1344 (void) manager_add_device(m, d);
1345
1346 return 0;
1347}
1348
1349static int manager_load_key_pair(Manager *m) {
1350 _cleanup_(fclosep) FILE *f = NULL;
1351 struct stat st;
1352 int r;
1353
1354 assert(m);
1355
1356 if (m->private_key) {
1357 EVP_PKEY_free(m->private_key);
1358 m->private_key = NULL;
1359 }
1360
2708160c 1361 r = search_and_fopen_nulstr("local.private", "re", NULL, KEY_PATHS_NULSTR, &f, NULL);
70a5db58
LP
1362 if (r == -ENOENT)
1363 return 0;
1364 if (r < 0)
1365 return log_error_errno(r, "Failed to read private key file: %m");
1366
1367 if (fstat(fileno(f), &st) < 0)
1368 return log_error_errno(errno, "Failed to stat private key file: %m");
1369
1370 r = stat_verify_regular(&st);
1371 if (r < 0)
1372 return log_error_errno(r, "Private key file is not regular: %m");
1373
1374 if (st.st_uid != 0 || (st.st_mode & 0077) != 0)
1375 return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Private key file is readable by more than the root user");
1376
1377 m->private_key = PEM_read_PrivateKey(f, NULL, NULL, NULL);
1378 if (!m->private_key)
1379 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to load private key pair");
1380
1381 log_info("Successfully loaded private key pair.");
1382
1383 return 1;
1384}
1385
fd421c4a 1386DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(EVP_PKEY_CTX*, EVP_PKEY_CTX_free, NULL);
70a5db58
LP
1387
1388static int manager_generate_key_pair(Manager *m) {
1389 _cleanup_(EVP_PKEY_CTX_freep) EVP_PKEY_CTX *ctx = NULL;
1390 _cleanup_(unlink_and_freep) char *temp_public = NULL, *temp_private = NULL;
1391 _cleanup_fclose_ FILE *fpublic = NULL, *fprivate = NULL;
1392 int r;
1393
1394 if (m->private_key) {
1395 EVP_PKEY_free(m->private_key);
1396 m->private_key = NULL;
1397 }
1398
1399 ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_ED25519, NULL);
1400 if (!ctx)
1401 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to allocate Ed25519 key generation context.");
1402
1403 if (EVP_PKEY_keygen_init(ctx) <= 0)
1404 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to initialize Ed25519 key generation context.");
1405
1406 log_info("Generating key pair for signing local user identity records.");
1407
1408 if (EVP_PKEY_keygen(ctx, &m->private_key) <= 0)
1409 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to generate Ed25519 key pair");
1410
1411 log_info("Successfully created Ed25519 key pair.");
1412
1413 (void) mkdir_p("/var/lib/systemd/home", 0755);
1414
1415 /* Write out public key (note that we only do that as a help to the user, we don't make use of this ever */
1416 r = fopen_temporary("/var/lib/systemd/home/local.public", &fpublic, &temp_public);
1417 if (r < 0)
80ace4f2 1418 return log_error_errno(errno, "Failed to open key file for writing: %m");
70a5db58
LP
1419
1420 if (PEM_write_PUBKEY(fpublic, m->private_key) <= 0)
1421 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to write public key.");
1422
fa3709c5 1423 r = fflush_sync_and_check(fpublic);
70a5db58
LP
1424 if (r < 0)
1425 return log_error_errno(r, "Failed to write private key: %m");
1426
1427 fpublic = safe_fclose(fpublic);
1428
1429 /* Write out the private key (this actually writes out both private and public, OpenSSL is confusing) */
1430 r = fopen_temporary("/var/lib/systemd/home/local.private", &fprivate, &temp_private);
1431 if (r < 0)
80ace4f2 1432 return log_error_errno(errno, "Failed to open key file for writing: %m");
70a5db58
LP
1433
1434 if (PEM_write_PrivateKey(fprivate, m->private_key, NULL, NULL, 0, NULL, 0) <= 0)
1435 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to write private key pair.");
1436
fa3709c5 1437 r = fflush_sync_and_check(fprivate);
70a5db58
LP
1438 if (r < 0)
1439 return log_error_errno(r, "Failed to write private key: %m");
1440
1441 fprivate = safe_fclose(fprivate);
1442
1443 /* Both are written now, move them into place */
1444
1445 if (rename(temp_public, "/var/lib/systemd/home/local.public") < 0)
1446 return log_error_errno(errno, "Failed to move public key file into place: %m");
1447 temp_public = mfree(temp_public);
1448
1449 if (rename(temp_private, "/var/lib/systemd/home/local.private") < 0) {
1450 (void) unlink_noerrno("/var/lib/systemd/home/local.public"); /* try to remove the file we already created */
e8dd54ab 1451 return log_error_errno(errno, "Failed to move private key file into place: %m");
70a5db58
LP
1452 }
1453 temp_private = mfree(temp_private);
1454
fa3709c5
LP
1455 r = fsync_path_at(AT_FDCWD, "/var/lib/systemd/home/");
1456 if (r < 0)
1457 log_warning_errno(r, "Failed to sync /var/lib/systemd/home/, ignoring: %m");
1458
70a5db58
LP
1459 return 1;
1460}
1461
1462int manager_acquire_key_pair(Manager *m) {
1463 int r;
1464
1465 assert(m);
1466
1467 /* Already there? */
1468 if (m->private_key)
1469 return 1;
1470
1471 /* First try to load key off disk */
1472 r = manager_load_key_pair(m);
1473 if (r != 0)
1474 return r;
1475
1476 /* Didn't work, generate a new one */
1477 return manager_generate_key_pair(m);
1478}
1479
1480int manager_sign_user_record(Manager *m, UserRecord *u, UserRecord **ret, sd_bus_error *error) {
1481 int r;
1482
1483 assert(m);
1484 assert(u);
1485 assert(ret);
1486
1487 r = manager_acquire_key_pair(m);
1488 if (r < 0)
1489 return r;
1490 if (r == 0)
1b09b81c 1491 return sd_bus_error_set(error, BUS_ERROR_NO_PRIVATE_KEY, "Can't sign without local key.");
70a5db58
LP
1492
1493 return user_record_sign(u, m->private_key, ret);
1494}
1495
1496DEFINE_PRIVATE_HASH_OPS_FULL(public_key_hash_ops, char, string_hash_func, string_compare_func, free, EVP_PKEY, EVP_PKEY_free);
fd421c4a 1497DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(EVP_PKEY*, EVP_PKEY_free, NULL);
70a5db58
LP
1498
1499static int manager_load_public_key_one(Manager *m, const char *path) {
1500 _cleanup_(EVP_PKEY_freep) EVP_PKEY *pkey = NULL;
1501 _cleanup_fclose_ FILE *f = NULL;
1502 _cleanup_free_ char *fn = NULL;
1503 struct stat st;
1504 int r;
1505
1506 assert(m);
1507
1508 if (streq(basename(path), "local.public")) /* we already loaded the private key, which includes the public one */
1509 return 0;
1510
1511 f = fopen(path, "re");
1512 if (!f) {
1513 if (errno == ENOENT)
1514 return 0;
1515
1516 return log_error_errno(errno, "Failed to open public key %s: %m", path);
1517 }
1518
1519 if (fstat(fileno(f), &st) < 0)
1520 return log_error_errno(errno, "Failed to stat public key %s: %m", path);
1521
1522 r = stat_verify_regular(&st);
1523 if (r < 0)
1524 return log_error_errno(r, "Public key file %s is not a regular file: %m", path);
1525
1526 if (st.st_uid != 0 || (st.st_mode & 0022) != 0)
1527 return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Public key file %s is writable by more than the root user, refusing.", path);
1528
1529 r = hashmap_ensure_allocated(&m->public_keys, &public_key_hash_ops);
1530 if (r < 0)
1531 return log_oom();
1532
1533 pkey = PEM_read_PUBKEY(f, &pkey, NULL, NULL);
1534 if (!pkey)
1535 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to parse public key file %s.", path);
1536
1537 fn = strdup(basename(path));
1538 if (!fn)
1539 return log_oom();
1540
1541 r = hashmap_put(m->public_keys, fn, pkey);
1542 if (r < 0)
1543 return log_error_errno(r, "Failed to add public key to set: %m");
1544
1545 TAKE_PTR(fn);
1546 TAKE_PTR(pkey);
1547
1548 return 0;
1549}
1550
1551static int manager_load_public_keys(Manager *m) {
1552 _cleanup_strv_free_ char **files = NULL;
70a5db58
LP
1553 int r;
1554
1555 assert(m);
1556
1557 m->public_keys = hashmap_free(m->public_keys);
1558
1559 r = conf_files_list_nulstr(
1560 &files,
1561 ".public",
1562 NULL,
1563 CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED,
1564 KEY_PATHS_NULSTR);
1565 if (r < 0)
1566 return log_error_errno(r, "Failed to assemble list of public key directories: %m");
1567
1568 STRV_FOREACH(i, files)
1569 (void) manager_load_public_key_one(m, *i);
1570
1571 return 0;
1572}
1573
1574int manager_startup(Manager *m) {
1575 int r;
1576
1577 assert(m);
1578
1579 r = manager_listen_notify(m);
1580 if (r < 0)
1581 return r;
1582
1583 r = manager_connect_bus(m);
1584 if (r < 0)
1585 return r;
1586
1587 r = manager_bind_varlink(m);
1588 if (r < 0)
1589 return r;
1590
1591 r = manager_load_key_pair(m); /* only try to load it, don't generate any */
1592 if (r < 0)
1593 return r;
1594
1595 r = manager_load_public_keys(m);
1596 if (r < 0)
1597 return r;
1598
1599 manager_watch_home(m);
1600 (void) manager_watch_devices(m);
1601
1602 (void) manager_enumerate_records(m);
1603 (void) manager_enumerate_images(m);
1604 (void) manager_enumerate_devices(m);
1605
1606 /* Let's clean up home directories whose devices got removed while we were not running */
1607 (void) manager_enqueue_gc(m, NULL);
1608
1609 return 0;
1610}
1611
1612void manager_revalidate_image(Manager *m, Home *h) {
1613 int r;
1614
1615 assert(m);
1616 assert(h);
1617
1618 /* Frees an automatically discovered image, if it's synthetic and its image disappeared. Unmounts any
1619 * image if it's mounted but it's image vanished. */
1620
1621 if (h->current_operation || !ordered_set_isempty(h->pending_operations))
1622 return;
1623
1624 if (h->state == HOME_UNFIXATED) {
1625 r = user_record_test_image_path(h->record);
1626 if (r < 0)
1627 log_warning_errno(r, "Can't determine if image of %s exists, freeing unfixated user: %m", h->user_name);
1628 else if (r == USER_TEST_ABSENT)
1629 log_info("Image for %s disappeared, freeing unfixated user.", h->user_name);
1630 else
1631 return;
1632
1633 home_free(h);
1634
1635 } else if (h->state < 0) {
1636
1637 r = user_record_test_home_directory(h->record);
1638 if (r < 0) {
1639 log_warning_errno(r, "Unable to determine state of home directory, ignoring: %m");
1640 return;
1641 }
1642
1643 if (r == USER_TEST_MOUNTED) {
1644 r = user_record_test_image_path(h->record);
1645 if (r < 0) {
1646 log_warning_errno(r, "Unable to determine state of image path, ignoring: %m");
1647 return;
1648 }
1649
1650 if (r == USER_TEST_ABSENT) {
1651 _cleanup_(operation_unrefp) Operation *o = NULL;
1652
1653 log_notice("Backing image disappeared while home directory %s was mounted, unmounting it forcibly.", h->user_name);
1654 /* Wowza, the thing is mounted, but the device is gone? Act on it. */
1655
1656 r = home_killall(h);
1657 if (r < 0)
1658 log_warning_errno(r, "Failed to kill processes of user %s, ignoring: %m", h->user_name);
1659
1660 /* We enqueue the operation here, after all the home directory might
1661 * currently already run some operation, and we can deactivate it only after
1662 * that's complete. */
1663 o = operation_new(OPERATION_DEACTIVATE_FORCE, NULL);
1664 if (!o) {
1665 log_oom();
1666 return;
1667 }
1668
1669 r = home_schedule_operation(h, o, NULL);
1670 if (r < 0)
1671 log_warning_errno(r, "Failed to enqueue forced home directory %s deactivation, ignoring: %m", h->user_name);
1672 }
1673 }
1674 }
1675}
1676
1677int manager_gc_images(Manager *m) {
1678 Home *h;
1679
1680 assert_se(m);
1681
1682 if (m->gc_focus) {
1683 /* Focus on a specific home */
1684
1685 h = TAKE_PTR(m->gc_focus);
1686 manager_revalidate_image(m, h);
1687 } else {
1688 /* Gc all */
70a5db58 1689
90e74a66 1690 HASHMAP_FOREACH(h, m->homes_by_name)
70a5db58
LP
1691 manager_revalidate_image(m, h);
1692 }
1693
1694 return 0;
1695}
1696
1697static int on_deferred_rescan(sd_event_source *s, void *userdata) {
99534007 1698 Manager *m = ASSERT_PTR(userdata);
70a5db58 1699
cf536638 1700 m->deferred_rescan_event_source = sd_event_source_disable_unref(m->deferred_rescan_event_source);
70a5db58
LP
1701
1702 manager_enumerate_devices(m);
1703 manager_enumerate_images(m);
1704 return 0;
1705}
1706
1707int manager_enqueue_rescan(Manager *m) {
1708 int r;
1709
1710 assert(m);
1711
1712 if (m->deferred_rescan_event_source)
1713 return 0;
1714
1715 if (!m->event)
1716 return 0;
1717
1718 if (IN_SET(sd_event_get_state(m->event), SD_EVENT_FINISHED, SD_EVENT_EXITING))
1719 return 0;
1720
1721 r = sd_event_add_defer(m->event, &m->deferred_rescan_event_source, on_deferred_rescan, m);
1722 if (r < 0)
1723 return log_error_errno(r, "Failed to allocate rescan event source: %m");
1724
1725 r = sd_event_source_set_priority(m->deferred_rescan_event_source, SD_EVENT_PRIORITY_IDLE+1);
1726 if (r < 0)
1727 log_warning_errno(r, "Failed to tweak priority of event source, ignoring: %m");
1728
1729 (void) sd_event_source_set_description(m->deferred_rescan_event_source, "deferred-rescan");
1730 return 1;
1731}
1732
1733static int on_deferred_gc(sd_event_source *s, void *userdata) {
99534007 1734 Manager *m = ASSERT_PTR(userdata);
70a5db58 1735
cf536638 1736 m->deferred_gc_event_source = sd_event_source_disable_unref(m->deferred_gc_event_source);
70a5db58
LP
1737
1738 manager_gc_images(m);
1739 return 0;
1740}
1741
1742int manager_enqueue_gc(Manager *m, Home *focus) {
1743 int r;
1744
1745 assert(m);
1746
1747 /* This enqueues a request to GC dead homes. It may be called with focus=NULL in which case all homes
1748 * will be scanned, or with the parameter set, in which case only that home is checked. */
1749
1750 if (!m->event)
1751 return 0;
1752
1753 if (IN_SET(sd_event_get_state(m->event), SD_EVENT_FINISHED, SD_EVENT_EXITING))
1754 return 0;
1755
1756 /* If a focus home is specified, then remember to focus just on this home. Otherwise invalidate any
1757 * focus that might be set to look at all homes. */
1758
1759 if (m->deferred_gc_event_source) {
1760 if (m->gc_focus != focus) /* not the same focus, then look at everything */
1761 m->gc_focus = NULL;
1762
1763 return 0;
1764 } else
162392b7 1765 m->gc_focus = focus; /* start focused */
70a5db58
LP
1766
1767 r = sd_event_add_defer(m->event, &m->deferred_gc_event_source, on_deferred_gc, m);
1768 if (r < 0)
80ace4f2 1769 return log_error_errno(r, "Failed to allocate GC event source: %m");
70a5db58
LP
1770
1771 r = sd_event_source_set_priority(m->deferred_gc_event_source, SD_EVENT_PRIORITY_IDLE);
1772 if (r < 0)
1773 log_warning_errno(r, "Failed to tweak priority of event source, ignoring: %m");
1774
1775 (void) sd_event_source_set_description(m->deferred_gc_event_source, "deferred-gc");
1776 return 1;
1777}
d357b80d
LP
1778
1779static bool manager_shall_rebalance(Manager *m) {
1780 Home *h;
1781
1782 assert(m);
1783
1784 if (IN_SET(m->rebalance_state, REBALANCE_PENDING, REBALANCE_SHRINKING, REBALANCE_GROWING))
1785 return true;
1786
1787 HASHMAP_FOREACH(h, m->homes_by_name)
1788 if (home_shall_rebalance(h))
1789 return true;
1790
1791 return false;
1792}
1793
1794static int home_cmp(Home *const*a, Home *const*b) {
1795 int r;
1796
1797 assert(a);
1798 assert(*a);
1799 assert(b);
1800 assert(*b);
1801
1802 /* Order user records by their weight (and by their name, to make things stable). We put the records
a6f44d61 1803 * with the highest weight last, since we distribute space from the beginning and round down, hence
d357b80d
LP
1804 * later entries tend to get slightly more than earlier entries. */
1805
1806 r = CMP(user_record_rebalance_weight((*a)->record), user_record_rebalance_weight((*b)->record));
1807 if (r != 0)
1808 return r;
1809
1810 return strcmp((*a)->user_name, (*b)->user_name);
1811}
1812
1813static int manager_rebalance_calculate(Manager *m) {
1814 uint64_t weight_sum, free_sum, usage_sum = 0, min_free = UINT64_MAX;
1815 _cleanup_free_ Home **array = NULL;
1816 bool relevant = false;
1817 struct statfs sfs;
1818 int c = 0, r;
1819 Home *h;
1820
1821 assert(m);
1822
1823 if (statfs(get_home_root(), &sfs) < 0)
1824 return log_error_errno(errno, "Failed to statfs() /home: %m");
1825
1826 free_sum = (uint64_t) sfs.f_bsize * sfs.f_bavail; /* This much free space is available on the
1827 * underlying pool directory */
1828
1829 weight_sum = REBALANCE_WEIGHT_BACKING; /* Grant the underlying pool directory a fixed weight of 20
1830 * (home dirs get 100 by default, i.e. 5x more). This weight
1831 * is not configurable, the per-home weights are. */
1832
1833 HASHMAP_FOREACH(h, m->homes_by_name) {
1834 statfs_f_type_t fstype;
1835 h->rebalance_pending = false; /* First, reset the flag, we only want it to be true for the
1836 * homes that qualify for rebalancing */
1837
1838 if (!home_shall_rebalance(h)) /* Only look at actual candidates */
1839 continue;
1840
1841 if (home_is_busy(h))
1842 return -EBUSY; /* Let's not rebalance if there's a busy home directory. */
1843
1844 r = home_get_disk_status(
1845 h,
1846 &h->rebalance_size,
1847 &h->rebalance_usage,
1848 &h->rebalance_free,
1849 NULL,
1850 NULL,
1851 &fstype,
1852 NULL);
1853 if (r < 0) {
1854 log_warning_errno(r, "Failed to get free space of home '%s', ignoring.", h->user_name);
1855 continue;
1856 }
1857
1858 if (h->rebalance_free > UINT64_MAX - free_sum)
1859 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Rebalance free overflow");
1860 free_sum += h->rebalance_free;
1861
1862 if (h->rebalance_usage > UINT64_MAX - usage_sum)
1863 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Rebalance usage overflow");
1864 usage_sum += h->rebalance_usage;
1865
1866 h->rebalance_weight = user_record_rebalance_weight(h->record);
1867 if (h->rebalance_weight > UINT64_MAX - weight_sum)
1868 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Rebalance weight overflow");
1869 weight_sum += h->rebalance_weight;
1870
1871 h->rebalance_min = minimal_size_by_fs_magic(fstype);
1872
1873 if (!GREEDY_REALLOC(array, c+1))
1874 return log_oom();
1875
1876 array[c++] = h;
1877 }
1878
1879 if (c == 0) {
1880 log_debug("No homes to rebalance.");
1881 return 0;
1882 }
1883
1884 assert(weight_sum > 0);
1885
1886 log_debug("Disk space usage by all home directories to rebalance: %s — available disk space: %s",
1887 FORMAT_BYTES(usage_sum), FORMAT_BYTES(free_sum));
1888
1889 /* Bring the home directories in a well-defined order, so that we distribute space in a reproducible
1890 * way for the same parameters. */
1891 typesafe_qsort(array, c, home_cmp);
1892
1893 for (int i = 0; i < c; i++) {
1894 uint64_t new_free;
1895 double d;
1896
1897 h = array[i];
1898
1899 assert(h->rebalance_free <= free_sum);
1900 assert(h->rebalance_usage <= usage_sum);
1901 assert(h->rebalance_weight <= weight_sum);
1902
1903 d = ((double) (free_sum / 4096) * (double) h->rebalance_weight) / (double) weight_sum; /* Calculate new space for this home in units of 4K */
1904
1905 /* Convert from units of 4K back to bytes */
1906 if (d >= (double) (UINT64_MAX/4096))
1907 new_free = UINT64_MAX;
1908 else
1909 new_free = (uint64_t) d * 4096;
1910
1911 /* Subtract the weight and assigned space from the sums now, to distribute the rounding noise
1912 * to the remaining home dirs */
1913 free_sum = LESS_BY(free_sum, new_free);
1914 weight_sum = LESS_BY(weight_sum, h->rebalance_weight);
1915
1916 /* Keep track of home directory with the least amount of space left: we want to schedule the
1917 * next rebalance more quickly if this is low */
1918 if (new_free < min_free)
1919 min_free = h->rebalance_size;
1920
1921 if (new_free > UINT64_MAX - h->rebalance_usage)
1922 h->rebalance_goal = UINT64_MAX-1; /* maximum size */
1923 else {
1924 h->rebalance_goal = h->rebalance_usage + new_free;
1925
1926 if (h->rebalance_min != UINT64_MAX && h->rebalance_goal < h->rebalance_min)
1927 h->rebalance_goal = h->rebalance_min;
1928 }
1929
1930 /* Skip over this home if the state doesn't match the operation */
1931 if ((m->rebalance_state == REBALANCE_SHRINKING && h->rebalance_goal > h->rebalance_size) ||
1932 (m->rebalance_state == REBALANCE_GROWING && h->rebalance_goal < h->rebalance_size))
1933 h->rebalance_pending = false;
1934 else {
e2341b6b
DT
1935 log_debug("Rebalancing home directory '%s' %s %s %s.", h->user_name,
1936 FORMAT_BYTES(h->rebalance_size),
1937 special_glyph(SPECIAL_GLYPH_ARROW_RIGHT),
1938 FORMAT_BYTES(h->rebalance_goal));
d357b80d
LP
1939 h->rebalance_pending = true;
1940 }
1941
1942 if ((fabs((double) h->rebalance_size - (double) h->rebalance_goal) * 100 / (double) h->rebalance_size) >= 5.0)
1943 relevant = true;
1944 }
1945
1946 /* Scale next rebalancing interval based on the least amount of space of any of the home
1947 * directories. We pick a time in the range 1min … 15min, scaled by log2(min_free), so that:
1948 * 10M → ~0.7min, 100M → ~2.7min, 1G → ~4.6min, 10G → ~6.5min, 100G ~8.4 */
1949 m->rebalance_interval_usec = (usec_t) CLAMP((LESS_BY(log2(min_free), 22)*15*USEC_PER_MINUTE)/26,
1950 1 * USEC_PER_MINUTE,
1951 15 * USEC_PER_MINUTE);
1952
1953
1954 log_debug("Rebalancing interval set to %s.", FORMAT_TIMESPAN(m->rebalance_interval_usec, USEC_PER_MSEC));
1955
1956 /* Let's suppress small resizes, growing/shrinking file systems isn't free after all */
1957 if (!relevant) {
1958 log_debug("Skipping rebalancing, since all calculated size changes are below ±5%%.");
1959 return 0;
1960 }
1961
1962 return c;
1963}
1964
1965static int manager_rebalance_apply(Manager *m) {
1966 int c = 0, r;
1967 Home *h;
1968
1969 assert(m);
1970
1971 HASHMAP_FOREACH(h, m->homes_by_name) {
1972 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
1973
1974 if (!h->rebalance_pending)
1975 continue;
1976
1977 h->rebalance_pending = false;
1978
1979 r = home_resize(h, h->rebalance_goal, /* secret= */ NULL, /* automatic= */ true, &error);
1980 if (r < 0)
1981 log_warning_errno(r, "Failed to resize home '%s' for rebalancing, ignoring: %s",
1982 h->user_name, bus_error_message(&error, r));
1983 else
1984 c++;
1985 }
1986
1987 return c;
1988}
1989
49505916
LP
1990static void manager_rebalance_reply_messages(Manager *m) {
1991 int r;
1992
1993 assert(m);
1994
1995 for (;;) {
1996 _cleanup_(sd_bus_message_unrefp) sd_bus_message *msg =
1997 set_steal_first(m->rebalance_pending_method_calls);
1998
1999 if (!msg)
2000 break;
2001
2002 r = sd_bus_reply_method_return(msg, NULL);
2003 if (r < 0)
2004 log_debug_errno(r, "Failed to reply to rebalance method call, ignoring: %m");
2005 }
2006}
2007
d357b80d
LP
2008static int manager_rebalance_now(Manager *m) {
2009 RebalanceState busy_state; /* the state to revert to when operation fails if busy */
2010 int r;
2011
2012 assert(m);
2013
2014 log_debug("Rebalancing now...");
2015
2016 /* We maintain a simple state engine here to keep track of what we are doing. We'll first shrink all
a6f44d61 2017 * homes that shall be shrunk and then grow all homes that shall be grown, so that they can take up
d357b80d
LP
2018 * the space now freed. */
2019
2020 for (;;) {
2021 switch (m->rebalance_state) {
2022
2023 case REBALANCE_IDLE:
2024 case REBALANCE_PENDING:
2025 case REBALANCE_WAITING:
2026 /* First shrink large home dirs */
2027 m->rebalance_state = REBALANCE_SHRINKING;
2028 busy_state = REBALANCE_PENDING;
49505916
LP
2029
2030 /* We are initiating the next rebalancing cycle now, let's make the queued methods
2031 * calls the pending ones, and flush out any pending ones (which shouldn't exist at
2032 * this time anyway) */
2033 set_clear(m->rebalance_pending_method_calls);
2034 SWAP_TWO(m->rebalance_pending_method_calls, m->rebalance_queued_method_calls);
2035
d357b80d
LP
2036 log_debug("Shrinking phase..");
2037 break;
2038
2039 case REBALANCE_SHRINKING:
2040 /* Then grow small home dirs */
2041 m->rebalance_state = REBALANCE_GROWING;
2042 busy_state = REBALANCE_SHRINKING;
2043 log_debug("Growing phase..");
2044 break;
2045
2046 case REBALANCE_GROWING:
2047 /* Finally, we are done */
2048 log_info("Rebalancing complete.");
2049 m->rebalance_state = REBALANCE_IDLE;
2050 r = 0;
2051 goto finish;
2052
2053 case REBALANCE_OFF:
2054 default:
2055 assert_not_reached();
2056 }
2057
2058 r = manager_rebalance_calculate(m);
2059 if (r == -EBUSY) {
2060 /* Calculations failed because one home directory is currently busy. Revert to a state that
2061 * tells us what to do next. */
2062 log_debug("Can't enter phase, busy.");
2063 m->rebalance_state = busy_state;
2064 return r;
2065 }
2066 if (r < 0)
2067 goto finish;
2068 if (r == 0)
2069 continue; /* got to next step immediately, if there's nothing to do */
2070
2071 r = manager_rebalance_apply(m);
2072 if (r < 0)
2073 goto finish;
2074 if (r > 0)
2075 break; /* At least one resize operation is now pending, we are done for now */
2076
2077 /* If there was nothing to apply, go for next state right-away */
2078 }
2079
2080 return 0;
2081
2082finish:
2083 /* Reset state and schedule next rebalance */
2084 m->rebalance_state = REBALANCE_IDLE;
49505916 2085 manager_rebalance_reply_messages(m);
d357b80d
LP
2086 (void) manager_schedule_rebalance(m, /* immediately= */ false);
2087 return r;
2088}
2089
2090static int on_rebalance_timer(sd_event_source *s, usec_t t, void *userdata) {
99534007 2091 Manager *m = ASSERT_PTR(userdata);
d357b80d
LP
2092
2093 assert(s);
d357b80d
LP
2094 assert(IN_SET(m->rebalance_state, REBALANCE_WAITING, REBALANCE_PENDING, REBALANCE_SHRINKING, REBALANCE_GROWING));
2095
2096 (void) manager_rebalance_now(m);
2097 return 0;
2098}
2099
2100int manager_schedule_rebalance(Manager *m, bool immediately) {
2101 int r;
2102
2103 assert(m);
2104
2105 /* Check if there are any records where rebalancing is requested */
2106 if (!manager_shall_rebalance(m)) {
2107 log_debug("Not scheduling rebalancing, not needed.");
49505916 2108 r = 0; /* report that we didn't schedule anything because nothing needed it */
d357b80d
LP
2109 goto turn_off;
2110 }
2111
2112 if (immediately) {
2113 /* If we are told to rebalance immediately, then mark a rebalance as pending (even if we area
2114 * already running one) */
2115
2116 if (m->rebalance_event_source) {
2117 r = sd_event_source_set_time(m->rebalance_event_source, 0);
2118 if (r < 0) {
2119 log_error_errno(r, "Failed to schedule immediate rebalancing: %m");
2120 goto turn_off;
2121 }
2122
2123 r = sd_event_source_set_enabled(m->rebalance_event_source, SD_EVENT_ONESHOT);
2124 if (r < 0) {
2125 log_error_errno(r, "Failed to enable rebalancing event source: %m");
2126 goto turn_off;
2127 }
2128 } else {
2129 r = sd_event_add_time(m->event, &m->rebalance_event_source, CLOCK_MONOTONIC, 0, USEC_PER_SEC, on_rebalance_timer, m);
2130 if (r < 0) {
2131 log_error_errno(r, "Failed to allocate rebalance event source: %m");
2132 goto turn_off;
2133 }
2134
2135 r = sd_event_source_set_priority(m->rebalance_event_source, SD_EVENT_PRIORITY_IDLE + 10);
2136 if (r < 0) {
2137 log_error_errno(r, "Failed to set rebalance event source priority: %m");
2138 goto turn_off;
2139 }
2140
2141 (void) sd_event_source_set_description(m->rebalance_event_source, "rebalance");
2142
2143 }
2144
2145 if (!IN_SET(m->rebalance_state, REBALANCE_PENDING, REBALANCE_SHRINKING, REBALANCE_GROWING))
2146 m->rebalance_state = REBALANCE_PENDING;
2147
2148 log_debug("Scheduled immediate rebalancing...");
49505916 2149 return 1; /* report that we scheduled something */
d357b80d
LP
2150 }
2151
2152 /* If we are told to schedule a rebalancing eventually, then do so only if we are not executing
2153 * anything yet. Also if we have something scheduled already, leave it in place */
2154 if (!IN_SET(m->rebalance_state, REBALANCE_OFF, REBALANCE_IDLE))
49505916 2155 return 1; /* report that there's already something scheduled */
d357b80d
LP
2156
2157 if (m->rebalance_event_source) {
2158 r = sd_event_source_set_time_relative(m->rebalance_event_source, m->rebalance_interval_usec);
2159 if (r < 0) {
2160 log_error_errno(r, "Failed to schedule immediate rebalancing: %m");
2161 goto turn_off;
2162 }
2163
2164 r = sd_event_source_set_enabled(m->rebalance_event_source, SD_EVENT_ONESHOT);
2165 if (r < 0) {
2166 log_error_errno(r, "Failed to enable rebalancing event source: %m");
2167 goto turn_off;
2168 }
2169 } else {
2170 r = sd_event_add_time_relative(m->event, &m->rebalance_event_source, CLOCK_MONOTONIC, m->rebalance_interval_usec, USEC_PER_SEC, on_rebalance_timer, m);
2171 if (r < 0) {
2172 log_error_errno(r, "Failed to allocate rebalance event source: %m");
2173 goto turn_off;
2174 }
2175
2176 r = sd_event_source_set_priority(m->rebalance_event_source, SD_EVENT_PRIORITY_IDLE + 10);
2177 if (r < 0) {
2178 log_error_errno(r, "Failed to set rebalance event source priority: %m");
2179 goto turn_off;
2180 }
2181
2182 (void) sd_event_source_set_description(m->rebalance_event_source, "rebalance");
2183 }
2184
2185 m->rebalance_state = REBALANCE_WAITING; /* We managed to enqueue a timer event, we now wait until it fires */
2186 log_debug("Scheduled rebalancing in %s...", FORMAT_TIMESPAN(m->rebalance_interval_usec, 0));
49505916 2187 return 1; /* report that we scheduled something */
d357b80d
LP
2188
2189turn_off:
2190 m->rebalance_event_source = sd_event_source_disable_unref(m->rebalance_event_source);
2191 m->rebalance_state = REBALANCE_OFF;
49505916 2192 manager_rebalance_reply_messages(m);
d357b80d
LP
2193 return r;
2194}
2195
2196int manager_reschedule_rebalance(Manager *m) {
2197 int r;
2198
2199 assert(m);
2200
2201 /* If a rebalance is pending reschedules it so it gets executed immediately */
2202
2203 if (!IN_SET(m->rebalance_state, REBALANCE_PENDING, REBALANCE_SHRINKING, REBALANCE_GROWING))
2204 return 0;
2205
2206 r = manager_schedule_rebalance(m, /* immediately= */ true);
2207 if (r < 0)
2208 return r;
2209
2210 return 1;
2211}