]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/home/homed-manager.c
device-util: Declare iterator variables inline
[thirdparty/systemd.git] / src / home / homed-manager.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
70a5db58
LP
2
3#include <grp.h>
4#include <linux/fs.h>
5#include <linux/magic.h>
d357b80d 6#include <math.h>
70a5db58
LP
7#include <openssl/pem.h>
8#include <pwd.h>
9#include <sys/ioctl.h>
10#include <sys/quota.h>
11#include <sys/stat.h>
12
e1614484
ZJS
13#include "sd-id128.h"
14
70a5db58
LP
15#include "btrfs-util.h"
16#include "bus-common-errors.h"
17#include "bus-error.h"
ac9f55ed 18#include "bus-log-control-api.h"
70a5db58
LP
19#include "bus-polkit.h"
20#include "clean-ipc.h"
11d78c31 21#include "common-signal.h"
70a5db58
LP
22#include "conf-files.h"
23#include "device-util.h"
24#include "dirent-util.h"
25#include "fd-util.h"
26#include "fileio.h"
27#include "format-util.h"
28#include "fs-util.h"
e2341b6b 29#include "glyph-util.h"
70a5db58
LP
30#include "gpt.h"
31#include "home-util.h"
c76dd733 32#include "homed-conf.h"
70a5db58
LP
33#include "homed-home-bus.h"
34#include "homed-home.h"
35#include "homed-manager-bus.h"
36#include "homed-manager.h"
37#include "homed-varlink.h"
38#include "io-util.h"
39#include "mkdir.h"
40#include "process-util.h"
41#include "quota-util.h"
42#include "random-util.h"
d357b80d 43#include "resize-fs.h"
70a5db58 44#include "socket-util.h"
d357b80d 45#include "sort-util.h"
70a5db58
LP
46#include "stat-util.h"
47#include "strv.h"
bf819d3a 48#include "sync-util.h"
70a5db58
LP
49#include "tmpfile-util.h"
50#include "udev-util.h"
51#include "user-record-sign.h"
52#include "user-record-util.h"
53#include "user-record.h"
54#include "user-util.h"
55
56/* Where to look for private/public keys that are used to sign the user records. We are not using
57 * CONF_PATHS_NULSTR() here since we want to insert /var/lib/systemd/home/ in the middle. And we insert that
58 * since we want to auto-generate a persistent private/public key pair if we need to. */
59#define KEY_PATHS_NULSTR \
60 "/etc/systemd/home/\0" \
61 "/run/systemd/home/\0" \
62 "/var/lib/systemd/home/\0" \
63 "/usr/local/lib/systemd/home/\0" \
64 "/usr/lib/systemd/home/\0"
65
66static bool uid_is_home(uid_t uid) {
67 return uid >= HOME_UID_MIN && uid <= HOME_UID_MAX;
68}
69/* Takes a value generated randomly or by hashing and turns it into a UID in the right range */
70
71#define UID_CLAMP_INTO_HOME_RANGE(rnd) (((uid_t) (rnd) % (HOME_UID_MAX - HOME_UID_MIN + 1)) + HOME_UID_MIN)
72
73DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(homes_by_uid_hash_ops, void, trivial_hash_func, trivial_compare_func, Home, home_free);
74DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(homes_by_name_hash_ops, char, string_hash_func, string_compare_func, Home, home_free);
75DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(homes_by_worker_pid_hash_ops, void, trivial_hash_func, trivial_compare_func, Home, home_free);
76DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(homes_by_sysfs_hash_ops, char, path_hash_func, path_compare, Home, home_free);
77
78static int on_home_inotify(sd_event_source *s, const struct inotify_event *event, void *userdata);
79static int manager_gc_images(Manager *m);
80static int manager_enumerate_images(Manager *m);
81static int manager_assess_image(Manager *m, int dir_fd, const char *dir_path, const char *dentry_name);
82static void manager_revalidate_image(Manager *m, Home *h);
83
84static void manager_watch_home(Manager *m) {
85 struct statfs sfs;
86 int r;
87
88 assert(m);
89
cf536638 90 m->inotify_event_source = sd_event_source_disable_unref(m->inotify_event_source);
70a5db58
LP
91 m->scan_slash_home = false;
92
2700fecd 93 if (statfs(get_home_root(), &sfs) < 0) {
70a5db58 94 log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
2700fecd 95 "Failed to statfs() %s directory, disabling automatic scanning.", get_home_root());
70a5db58
LP
96 return;
97 }
98
99 if (is_network_fs(&sfs)) {
2700fecd 100 log_info("%s is a network file system, disabling automatic scanning.", get_home_root());
70a5db58
LP
101 return;
102 }
103
104 if (is_fs_type(&sfs, AUTOFS_SUPER_MAGIC)) {
2700fecd 105 log_info("%s is on autofs, disabling automatic scanning.", get_home_root());
70a5db58
LP
106 return;
107 }
108
109 m->scan_slash_home = true;
110
2700fecd 111 r = sd_event_add_inotify(m->event, &m->inotify_event_source, get_home_root(),
23d24b76
ZJS
112 IN_CREATE|IN_CLOSE_WRITE|IN_DELETE_SELF|IN_MOVE_SELF|IN_ONLYDIR|IN_MOVED_TO|IN_MOVED_FROM|IN_DELETE,
113 on_home_inotify, m);
70a5db58
LP
114 if (r < 0)
115 log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
2700fecd 116 "Failed to create inotify watch on %s, ignoring.", get_home_root());
70a5db58
LP
117
118 (void) sd_event_source_set_description(m->inotify_event_source, "home-inotify");
2700fecd
LP
119
120 log_info("Watching %s.", get_home_root());
70a5db58
LP
121}
122
123static int on_home_inotify(sd_event_source *s, const struct inotify_event *event, void *userdata) {
2700fecd 124 _cleanup_free_ char *j = NULL;
99534007 125 Manager *m = ASSERT_PTR(userdata);
70a5db58
LP
126 const char *e, *n;
127
70a5db58
LP
128 assert(event);
129
130 if ((event->mask & (IN_Q_OVERFLOW|IN_MOVE_SELF|IN_DELETE_SELF|IN_IGNORED|IN_UNMOUNT)) != 0) {
131
132 if (FLAGS_SET(event->mask, IN_Q_OVERFLOW))
2700fecd 133 log_debug("%s inotify queue overflow, rescanning.", get_home_root());
70a5db58 134 else if (FLAGS_SET(event->mask, IN_MOVE_SELF))
2700fecd 135 log_info("%s moved or renamed, recreating watch and rescanning.", get_home_root());
70a5db58 136 else if (FLAGS_SET(event->mask, IN_DELETE_SELF))
2700fecd 137 log_info("%s deleted, recreating watch and rescanning.", get_home_root());
70a5db58 138 else if (FLAGS_SET(event->mask, IN_UNMOUNT))
2700fecd 139 log_info("%s unmounted, recreating watch and rescanning.", get_home_root());
70a5db58 140 else if (FLAGS_SET(event->mask, IN_IGNORED))
2700fecd 141 log_info("%s watch invalidated, recreating watch and rescanning.", get_home_root());
70a5db58
LP
142
143 manager_watch_home(m);
144 (void) manager_gc_images(m);
145 (void) manager_enumerate_images(m);
146 (void) bus_manager_emit_auto_login_changed(m);
147 return 0;
148 }
149
150 /* For the other inotify events, let's ignore all events for file names that don't match our
151 * expectations */
152 if (isempty(event->name))
153 return 0;
154 e = endswith(event->name, FLAGS_SET(event->mask, IN_ISDIR) ? ".homedir" : ".home");
155 if (!e)
156 return 0;
157
2f82562b 158 n = strndupa_safe(event->name, e - event->name);
70a5db58
LP
159 if (!suitable_user_name(n))
160 return 0;
161
2700fecd
LP
162 j = path_join(get_home_root(), event->name);
163 if (!j)
164 return log_oom();
165
70a5db58
LP
166 if ((event->mask & (IN_CREATE|IN_CLOSE_WRITE|IN_MOVED_TO)) != 0) {
167 if (FLAGS_SET(event->mask, IN_CREATE))
2700fecd 168 log_debug("%s has been created, having a look.", j);
70a5db58 169 else if (FLAGS_SET(event->mask, IN_CLOSE_WRITE))
2700fecd 170 log_debug("%s has been modified, having a look.", j);
70a5db58 171 else if (FLAGS_SET(event->mask, IN_MOVED_TO))
2700fecd 172 log_debug("%s has been moved in, having a look.", j);
70a5db58 173
2700fecd 174 (void) manager_assess_image(m, -1, get_home_root(), event->name);
70a5db58
LP
175 (void) bus_manager_emit_auto_login_changed(m);
176 }
177
755b35b1 178 if ((event->mask & (IN_DELETE | IN_CLOSE_WRITE | IN_MOVED_FROM)) != 0) {
70a5db58
LP
179 Home *h;
180
181 if (FLAGS_SET(event->mask, IN_DELETE))
2700fecd 182 log_debug("%s has been deleted, revalidating.", j);
70a5db58 183 else if (FLAGS_SET(event->mask, IN_CLOSE_WRITE))
2700fecd 184 log_debug("%s has been closed after writing, revalidating.", j);
70a5db58 185 else if (FLAGS_SET(event->mask, IN_MOVED_FROM))
2700fecd 186 log_debug("%s has been moved away, revalidating.", j);
70a5db58
LP
187
188 h = hashmap_get(m->homes_by_name, n);
189 if (h) {
190 manager_revalidate_image(m, h);
191 (void) bus_manager_emit_auto_login_changed(m);
192 }
193 }
194
195 return 0;
196}
197
198int manager_new(Manager **ret) {
199 _cleanup_(manager_freep) Manager *m = NULL;
200 int r;
201
202 assert(ret);
203
c76dd733 204 m = new(Manager, 1);
70a5db58
LP
205 if (!m)
206 return -ENOMEM;
207
c76dd733
LP
208 *m = (Manager) {
209 .default_storage = _USER_STORAGE_INVALID,
d357b80d 210 .rebalance_interval_usec = 2 * USEC_PER_MINUTE, /* initially, rebalance every 2min */
c76dd733
LP
211 };
212
213 r = manager_parse_config_file(m);
214 if (r < 0)
215 return r;
216
70a5db58
LP
217 r = sd_event_default(&m->event);
218 if (r < 0)
219 return r;
220
221 r = sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL);
222 if (r < 0)
223 return r;
224
225 r = sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL);
226 if (r < 0)
227 return r;
228
11d78c31
LP
229 r = sd_event_add_memory_pressure(m->event, NULL, NULL, NULL);
230 if (r < 0)
231 log_full_errno(ERRNO_IS_NOT_SUPPORTED(r) || ERRNO_IS_PRIVILEGE(r) || (r == -EHOSTDOWN) ? LOG_DEBUG : LOG_WARNING, r,
232 "Failed to allocate memory pressure watch, ignoring: %m");
233
234 r = sd_event_add_signal(m->event, NULL, SIGRTMIN+18, sigrtmin18_handler, NULL);
235 if (r < 0)
236 return r;
237
70a5db58
LP
238 (void) sd_event_set_watchdog(m->event, true);
239
240 m->homes_by_uid = hashmap_new(&homes_by_uid_hash_ops);
241 if (!m->homes_by_uid)
242 return -ENOMEM;
243
244 m->homes_by_name = hashmap_new(&homes_by_name_hash_ops);
245 if (!m->homes_by_name)
246 return -ENOMEM;
247
248 m->homes_by_worker_pid = hashmap_new(&homes_by_worker_pid_hash_ops);
249 if (!m->homes_by_worker_pid)
250 return -ENOMEM;
251
252 m->homes_by_sysfs = hashmap_new(&homes_by_sysfs_hash_ops);
253 if (!m->homes_by_sysfs)
254 return -ENOMEM;
255
256 *ret = TAKE_PTR(m);
257 return 0;
258}
259
260Manager* manager_free(Manager *m) {
9796a9fb
LP
261 Home *h;
262
70a5db58
LP
263 assert(m);
264
9796a9fb
LP
265 HASHMAP_FOREACH(h, m->homes_by_worker_pid)
266 (void) home_wait_for_worker(h);
267
76fc1577
YW
268 m->bus = sd_bus_flush_close_unref(m->bus);
269 m->polkit_registry = bus_verify_polkit_async_registry_free(m->polkit_registry);
70a5db58 270
70a5db58
LP
271 m->device_monitor = sd_device_monitor_unref(m->device_monitor);
272
f76e5644
ZJS
273 m->inotify_event_source = sd_event_source_unref(m->inotify_event_source);
274 m->notify_socket_event_source = sd_event_source_unref(m->notify_socket_event_source);
70a5db58
LP
275 m->deferred_rescan_event_source = sd_event_source_unref(m->deferred_rescan_event_source);
276 m->deferred_gc_event_source = sd_event_source_unref(m->deferred_gc_event_source);
277 m->deferred_auto_login_event_source = sd_event_source_unref(m->deferred_auto_login_event_source);
d357b80d 278 m->rebalance_event_source = sd_event_source_unref(m->rebalance_event_source);
70a5db58 279
76fc1577 280 m->event = sd_event_unref(m->event);
f76e5644 281
76fc1577
YW
282 m->homes_by_uid = hashmap_free(m->homes_by_uid);
283 m->homes_by_name = hashmap_free(m->homes_by_name);
284 m->homes_by_worker_pid = hashmap_free(m->homes_by_worker_pid);
285 m->homes_by_sysfs = hashmap_free(m->homes_by_sysfs);
f76e5644 286
70a5db58
LP
287 if (m->private_key)
288 EVP_PKEY_free(m->private_key);
289
290 hashmap_free(m->public_keys);
291
292 varlink_server_unref(m->varlink_server);
cc9886bc 293 free(m->userdb_service);
70a5db58 294
c76dd733
LP
295 free(m->default_file_system_type);
296
70a5db58
LP
297 return mfree(m);
298}
299
300int manager_verify_user_record(Manager *m, UserRecord *hr) {
301 EVP_PKEY *pkey;
70a5db58
LP
302 int r;
303
304 assert(m);
305 assert(hr);
306
307 if (!m->private_key && hashmap_isempty(m->public_keys)) {
308 r = user_record_has_signature(hr);
309 if (r < 0)
310 return r;
311
312 return r ? -ENOKEY : USER_RECORD_UNSIGNED;
313 }
314
315 /* Is it our own? */
316 if (m->private_key) {
317 r = user_record_verify(hr, m->private_key);
318 switch (r) {
319
320 case USER_RECORD_FOREIGN:
321 /* This record is not signed by this key, but let's see below */
322 break;
323
324 case USER_RECORD_SIGNED: /* Signed by us, but also by others, let's propagate that */
325 case USER_RECORD_SIGNED_EXCLUSIVE: /* Signed by us, and nothing else, ditto */
326 case USER_RECORD_UNSIGNED: /* Not signed at all, ditto */
327 default:
328 return r;
329 }
330 }
331
90e74a66 332 HASHMAP_FOREACH(pkey, m->public_keys) {
70a5db58
LP
333 r = user_record_verify(hr, pkey);
334 switch (r) {
335
336 case USER_RECORD_FOREIGN:
337 /* This record is not signed by this key, but let's see our other keys */
338 break;
339
340 case USER_RECORD_SIGNED: /* It's signed by this key we are happy with, but which is not our own. */
341 case USER_RECORD_SIGNED_EXCLUSIVE:
342 return USER_RECORD_FOREIGN;
343
344 case USER_RECORD_UNSIGNED: /* It's not signed at all */
345 default:
346 return r;
347 }
348 }
349
350 return -ENOKEY;
351}
352
353static int manager_add_home_by_record(
354 Manager *m,
355 const char *name,
356 int dir_fd,
357 const char *fname) {
358
359 _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
852640f8 360 _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
70a5db58
LP
361 unsigned line, column;
362 int r, is_signed;
20f4a308 363 struct stat st;
70a5db58
LP
364 Home *h;
365
366 assert(m);
367 assert(name);
368 assert(fname);
369
20f4a308
LP
370 if (fstatat(dir_fd, fname, &st, 0) < 0)
371 return log_error_errno(errno, "Failed to stat identity record %s: %m", fname);
372
373 if (!S_ISREG(st.st_mode)) {
374 log_debug("Identity record file %s is not a regular file, ignoring.", fname);
375 return 0;
376 }
377
378 if (st.st_size == 0)
379 goto unlink_this_file;
380
70a5db58
LP
381 r = json_parse_file_at(NULL, dir_fd, fname, JSON_PARSE_SENSITIVE, &v, &line, &column);
382 if (r < 0)
383 return log_error_errno(r, "Failed to parse identity record at %s:%u%u: %m", fname, line, column);
384
20f4a308
LP
385 if (json_variant_is_blank_object(v))
386 goto unlink_this_file;
387
70a5db58
LP
388 hr = user_record_new();
389 if (!hr)
390 return log_oom();
391
bfc0cc1a 392 r = user_record_load(hr, v, USER_RECORD_LOAD_REFUSE_SECRET|USER_RECORD_LOG|USER_RECORD_PERMISSIVE);
70a5db58
LP
393 if (r < 0)
394 return r;
395
396 if (!streq_ptr(hr->user_name, name))
23d24b76
ZJS
397 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
398 "Identity's user name %s does not match file name %s, refusing.",
399 hr->user_name, name);
70a5db58
LP
400
401 is_signed = manager_verify_user_record(m, hr);
402 switch (is_signed) {
403
404 case -ENOKEY:
405 return log_warning_errno(is_signed, "User record %s is not signed by any accepted key, ignoring.", fname);
406 case USER_RECORD_UNSIGNED:
407 return log_warning_errno(SYNTHETIC_ERRNO(EPERM), "User record %s is not signed at all, ignoring.", fname);
408 case USER_RECORD_SIGNED:
409 log_info("User record %s is signed by us (and others), accepting.", fname);
410 break;
411 case USER_RECORD_SIGNED_EXCLUSIVE:
412 log_info("User record %s is signed only by us, accepting.", fname);
413 break;
414 case USER_RECORD_FOREIGN:
415 log_info("User record %s is signed by registered key from others, accepting.", fname);
416 break;
417 default:
418 assert(is_signed < 0);
419 return log_error_errno(is_signed, "Failed to verify signature of user record in %s: %m", fname);
420 }
421
422 h = hashmap_get(m->homes_by_name, name);
423 if (h) {
424 r = home_set_record(h, hr);
425 if (r < 0)
426 return log_error_errno(r, "Failed to update home record for %s: %m", name);
427
428 /* If we acquired a record now for a previously unallocated entry, then reset the state. This
429 * makes sure home_get_state() will check for the availability of the image file dynamically
162392b7 430 * in order to detect to distinguish HOME_INACTIVE and HOME_ABSENT. */
70a5db58
LP
431 if (h->state == HOME_UNFIXATED)
432 h->state = _HOME_STATE_INVALID;
433 } else {
434 r = home_new(m, hr, NULL, &h);
435 if (r < 0)
436 return log_error_errno(r, "Failed to allocate new home object: %m");
437
438 log_info("Added registered home for user %s.", hr->user_name);
439 }
440
441 /* Only entries we exclusively signed are writable to us, hence remember the result */
442 h->signed_locally = is_signed == USER_RECORD_SIGNED_EXCLUSIVE;
443
444 return 1;
20f4a308
LP
445
446unlink_this_file:
447 /* If this is an empty file, then let's just remove it. An empty file is not useful in any case, and
448 * apparently xfs likes to leave empty files around when not unmounted cleanly (see
449 * https://github.com/systemd/systemd/issues/15178 for example). Note that we don't delete non-empty
450 * files even if they are invalid, because that's just too risky, we might delete data the user still
451 * needs. But empty files are never useful, hence let's just remove them. */
452
453 if (unlinkat(dir_fd, fname, 0) < 0)
454 return log_error_errno(errno, "Failed to remove empty user record file %s: %m", fname);
455
005daeed 456 log_notice("Discovered empty user record file %s/%s, removed automatically.", home_record_dir(), fname);
20f4a308 457 return 0;
70a5db58
LP
458}
459
460static int manager_enumerate_records(Manager *m) {
461 _cleanup_closedir_ DIR *d = NULL;
70a5db58
LP
462
463 assert(m);
464
005daeed 465 d = opendir(home_record_dir());
70a5db58
LP
466 if (!d)
467 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
005daeed 468 "Failed to open %s: %m", home_record_dir());
70a5db58
LP
469
470 FOREACH_DIRENT(de, d, return log_error_errno(errno, "Failed to read record directory: %m")) {
471 _cleanup_free_ char *n = NULL;
472 const char *e;
473
474 if (!dirent_is_file(de))
475 continue;
476
477 e = endswith(de->d_name, ".identity");
478 if (!e)
479 continue;
480
481 n = strndup(de->d_name, e - de->d_name);
482 if (!n)
483 return log_oom();
484
485 if (!suitable_user_name(n))
486 continue;
487
488 (void) manager_add_home_by_record(m, n, dirfd(d), de->d_name);
489 }
490
491 return 0;
492}
493
494static int search_quota(uid_t uid, const char *exclude_quota_path) {
495 struct stat exclude_st = {};
496 dev_t previous_devno = 0;
70a5db58
LP
497 int r;
498
499 /* Checks whether the specified UID owns any files on the files system, but ignore any file system
500 * backing the specified file. The file is used when operating on home directories, where it's OK if
501 * the UID of them already owns files. */
502
503 if (exclude_quota_path && stat(exclude_quota_path, &exclude_st) < 0) {
504 if (errno != ENOENT)
505 return log_warning_errno(errno, "Failed to stat %s, ignoring: %m", exclude_quota_path);
506 }
507
508 /* Check a few usual suspects where regular users might own files. Note that this is by no means
509 * comprehensive, but should cover most cases. Note that in an ideal world every user would be
510 * registered in NSS and avoid our own UID range, but for all other cases, it's a good idea to be
511 * paranoid and check quota if we can. */
2700fecd 512 FOREACH_STRING(where, get_home_root(), "/tmp/", "/var/", "/var/mail/", "/var/tmp/", "/var/spool/") {
70a5db58
LP
513 struct dqblk req;
514 struct stat st;
515
516 if (stat(where, &st) < 0) {
517 log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
518 "Failed to stat %s, ignoring: %m", where);
519 continue;
520 }
521
522 if (major(st.st_dev) == 0) {
523 log_debug("Directory %s is not on a real block device, not checking quota for UID use.", where);
524 continue;
525 }
526
527 if (st.st_dev == exclude_st.st_dev) { /* If an exclude path is specified, then ignore quota
528 * reported on the same block device as that path. */
529 log_debug("Directory %s is where the home directory is located, not checking quota for UID use.", where);
530 continue;
531 }
532
533 if (st.st_dev == previous_devno) { /* Does this directory have the same devno as the previous
534 * one we tested? If so, there's no point in testing this
535 * again. */
536 log_debug("Directory %s is on same device as previous tested directory, not checking quota for UID use a second time.", where);
537 continue;
538 }
539
540 previous_devno = st.st_dev;
541
7176f06c 542 r = quotactl_devnum(QCMD_FIXED(Q_GETQUOTA, USRQUOTA), st.st_dev, uid, &req);
70a5db58
LP
543 if (r < 0) {
544 if (ERRNO_IS_NOT_SUPPORTED(r))
545 log_debug_errno(r, "No UID quota support on %s, ignoring.", where);
5e5e11b8
LP
546 else if (ERRNO_IS_PRIVILEGE(r))
547 log_debug_errno(r, "UID quota support for %s prohibited, ignoring.", where);
70a5db58 548 else
1a53adb3 549 log_warning_errno(r, "Failed to query quota on %s, ignoring: %m", where);
70a5db58
LP
550
551 continue;
552 }
553
554 if ((FLAGS_SET(req.dqb_valid, QIF_SPACE) && req.dqb_curspace > 0) ||
555 (FLAGS_SET(req.dqb_valid, QIF_INODES) && req.dqb_curinodes > 0)) {
556 log_debug_errno(errno, "Quota reports UID " UID_FMT " occupies disk space on %s.", uid, where);
557 return 1;
558 }
559 }
560
561 return 0;
562}
563
564static int manager_acquire_uid(
565 Manager *m,
566 uid_t start_uid,
567 const char *user_name,
568 const char *exclude_quota_path,
569 uid_t *ret) {
570
571 static const uint8_t hash_key[] = {
572 0xa3, 0xb8, 0x82, 0x69, 0x9a, 0x71, 0xf7, 0xa9,
573 0xe0, 0x7c, 0xf6, 0xf1, 0x21, 0x69, 0xd2, 0x1e
574 };
575
576 enum {
577 PHASE_SUGGESTED,
578 PHASE_HASHED,
579 PHASE_RANDOM
580 } phase = PHASE_SUGGESTED;
581
582 unsigned n_tries = 100;
583 int r;
584
585 assert(m);
586 assert(ret);
587
588 for (;;) {
589 struct passwd *pw;
590 struct group *gr;
591 uid_t candidate;
592 Home *other;
593
594 if (--n_tries <= 0)
595 return -EBUSY;
596
597 switch (phase) {
598
599 case PHASE_SUGGESTED:
600 phase = PHASE_HASHED;
601
602 if (!uid_is_home(start_uid))
603 continue;
604
605 candidate = start_uid;
606 break;
607
608 case PHASE_HASHED:
609 phase = PHASE_RANDOM;
610
611 if (!user_name)
612 continue;
613
614 candidate = UID_CLAMP_INTO_HOME_RANGE(siphash24(user_name, strlen(user_name), hash_key));
615 break;
616
617 case PHASE_RANDOM:
618 random_bytes(&candidate, sizeof(candidate));
619 candidate = UID_CLAMP_INTO_HOME_RANGE(candidate);
620 break;
621
622 default:
04499a70 623 assert_not_reached();
70a5db58
LP
624 }
625
626 other = hashmap_get(m->homes_by_uid, UID_TO_PTR(candidate));
627 if (other) {
23d24b76
ZJS
628 log_debug("Candidate UID " UID_FMT " already used by another home directory (%s), let's try another.",
629 candidate, other->user_name);
70a5db58
LP
630 continue;
631 }
632
633 pw = getpwuid(candidate);
634 if (pw) {
23d24b76
ZJS
635 log_debug("Candidate UID " UID_FMT " already registered by another user in NSS (%s), let's try another.",
636 candidate, pw->pw_name);
70a5db58
LP
637 continue;
638 }
639
640 gr = getgrgid((gid_t) candidate);
641 if (gr) {
23d24b76
ZJS
642 log_debug("Candidate UID " UID_FMT " already registered by another group in NSS (%s), let's try another.",
643 candidate, gr->gr_name);
70a5db58
LP
644 continue;
645 }
646
647 r = search_ipc(candidate, (gid_t) candidate);
648 if (r < 0)
649 continue;
650 if (r > 0) {
23d24b76
ZJS
651 log_debug_errno(r, "Candidate UID " UID_FMT " already owns IPC objects, let's try another: %m",
652 candidate);
70a5db58
LP
653 continue;
654 }
655
656 r = search_quota(candidate, exclude_quota_path);
657 if (r != 0)
658 continue;
659
660 *ret = candidate;
661 return 0;
662 }
663}
664
665static int manager_add_home_by_image(
666 Manager *m,
667 const char *user_name,
668 const char *realm,
669 const char *image_path,
670 const char *sysfs,
671 UserStorage storage,
672 uid_t start_uid) {
673
674 _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
675 uid_t uid;
676 Home *h;
677 int r;
678
679 assert(m);
680
681 assert(m);
682 assert(user_name);
683 assert(image_path);
684 assert(storage >= 0);
685 assert(storage < _USER_STORAGE_MAX);
686
687 h = hashmap_get(m->homes_by_name, user_name);
688 if (h) {
689 bool same;
690
691 if (h->state != HOME_UNFIXATED) {
692 log_debug("Found an image for user %s which already has a record, skipping.", user_name);
693 return 0; /* ignore images that synthesize a user we already have a record for */
694 }
695
696 same = user_record_storage(h->record) == storage;
697 if (same) {
698 if (h->sysfs && sysfs)
699 same = path_equal(h->sysfs, sysfs);
700 else if (!!h->sysfs != !!sysfs)
701 same = false;
702 else {
703 const char *p;
704
705 p = user_record_image_path(h->record);
706 same = p && path_equal(p, image_path);
707 }
708 }
709
710 if (!same) {
80ace4f2 711 log_debug("Found multiple images for user '%s', ignoring image '%s'.", user_name, image_path);
70a5db58
LP
712 return 0;
713 }
714 } else {
715 /* Check NSS, in case there's another user or group by this name */
716 if (getpwnam(user_name) || getgrnam(user_name)) {
717 log_debug("Found an existing user or group by name '%s', ignoring image '%s'.", user_name, image_path);
718 return 0;
719 }
720 }
721
722 if (h && uid_is_valid(h->uid))
723 uid = h->uid;
724 else {
23d24b76
ZJS
725 r = manager_acquire_uid(m, start_uid, user_name,
726 IN_SET(storage, USER_SUBVOLUME, USER_DIRECTORY, USER_FSCRYPT) ? image_path : NULL,
727 &uid);
70a5db58
LP
728 if (r < 0)
729 return log_warning_errno(r, "Failed to acquire unused UID for %s: %m", user_name);
730 }
731
732 hr = user_record_new();
733 if (!hr)
734 return log_oom();
735
736 r = user_record_synthesize(hr, user_name, realm, image_path, storage, uid, (gid_t) uid);
737 if (r < 0)
738 return log_error_errno(r, "Failed to synthesize home record for %s (image %s): %m", user_name, image_path);
739
740 if (h) {
741 r = home_set_record(h, hr);
742 if (r < 0)
743 return log_error_errno(r, "Failed to update home record for %s: %m", user_name);
744 } else {
745 r = home_new(m, hr, sysfs, &h);
746 if (r < 0)
747 return log_error_errno(r, "Failed to allocate new home object: %m");
748
749 h->state = HOME_UNFIXATED;
750
751 log_info("Discovered new home for user %s through image %s.", user_name, image_path);
752 }
753
754 return 1;
755}
756
757int manager_augment_record_with_uid(
758 Manager *m,
759 UserRecord *hr) {
760
761 const char *exclude_quota_path = NULL;
762 uid_t start_uid = UID_INVALID, uid;
763 int r;
764
765 assert(m);
766 assert(hr);
767
768 if (uid_is_valid(hr->uid))
769 return 0;
770
771 if (IN_SET(hr->storage, USER_CLASSIC, USER_SUBVOLUME, USER_DIRECTORY, USER_FSCRYPT)) {
772 const char * ip;
773
774 ip = user_record_image_path(hr);
775 if (ip) {
776 struct stat st;
777
778 if (stat(ip, &st) < 0) {
779 if (errno != ENOENT)
780 log_warning_errno(errno, "Failed to stat(%s): %m", ip);
781 } else if (uid_is_home(st.st_uid)) {
782 start_uid = st.st_uid;
783 exclude_quota_path = ip;
784 }
785 }
786 }
787
788 r = manager_acquire_uid(m, start_uid, hr->user_name, exclude_quota_path, &uid);
789 if (r < 0)
790 return r;
791
792 log_debug("Acquired new UID " UID_FMT " for %s.", uid, hr->user_name);
793
794 r = user_record_add_binding(
795 hr,
796 _USER_STORAGE_INVALID,
797 NULL,
798 SD_ID128_NULL,
799 SD_ID128_NULL,
800 SD_ID128_NULL,
801 NULL,
802 NULL,
803 UINT64_MAX,
804 NULL,
805 NULL,
806 uid,
807 (gid_t) uid);
808 if (r < 0)
809 return r;
810
811 return 1;
812}
813
814static int manager_assess_image(
815 Manager *m,
816 int dir_fd,
817 const char *dir_path,
818 const char *dentry_name) {
819
820 char *luks_suffix, *directory_suffix;
821 _cleanup_free_ char *path = NULL;
822 struct stat st;
823 int r;
824
825 assert(m);
826 assert(dir_path);
827 assert(dentry_name);
828
829 luks_suffix = endswith(dentry_name, ".home");
830 if (luks_suffix)
831 directory_suffix = NULL;
832 else
833 directory_suffix = endswith(dentry_name, ".homedir");
834
835 /* Early filter out: by name */
836 if (!luks_suffix && !directory_suffix)
837 return 0;
838
839 path = path_join(dir_path, dentry_name);
840 if (!path)
841 return log_oom();
842
843 /* Follow symlinks here, to allow people to link in stuff to make them available locally. */
844 if (dir_fd >= 0)
845 r = fstatat(dir_fd, dentry_name, &st, 0);
846 else
847 r = stat(path, &st);
848 if (r < 0)
849 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
80ace4f2 850 "Failed to stat() directory entry '%s', ignoring: %m", dentry_name);
70a5db58
LP
851
852 if (S_ISREG(st.st_mode)) {
853 _cleanup_free_ char *n = NULL, *user_name = NULL, *realm = NULL;
854
855 if (!luks_suffix)
856 return 0;
857
858 n = strndup(dentry_name, luks_suffix - dentry_name);
859 if (!n)
860 return log_oom();
861
862 r = split_user_name_realm(n, &user_name, &realm);
863 if (r == -EINVAL) /* Not the right format: ignore */
864 return 0;
865 if (r < 0)
866 return log_error_errno(r, "Failed to split image name into user name/realm: %m");
867
868 return manager_add_home_by_image(m, user_name, realm, path, NULL, USER_LUKS, UID_INVALID);
869 }
870
871 if (S_ISDIR(st.st_mode)) {
872 _cleanup_free_ char *n = NULL, *user_name = NULL, *realm = NULL;
254d1313 873 _cleanup_close_ int fd = -EBADF;
70a5db58
LP
874 UserStorage storage;
875
876 if (!directory_suffix)
877 return 0;
878
879 n = strndup(dentry_name, directory_suffix - dentry_name);
880 if (!n)
881 return log_oom();
882
883 r = split_user_name_realm(n, &user_name, &realm);
884 if (r == -EINVAL) /* Not the right format: ignore */
885 return 0;
886 if (r < 0)
887 return log_error_errno(r, "Failed to split image name into user name/realm: %m");
888
889 if (dir_fd >= 0)
890 fd = openat(dir_fd, dentry_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
891 else
892 fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
893 if (fd < 0)
894 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
895 "Failed to open directory '%s', ignoring: %m", path);
896
897 if (fstat(fd, &st) < 0)
898 return log_warning_errno(errno, "Failed to fstat() %s, ignoring: %m", path);
899
900 assert(S_ISDIR(st.st_mode)); /* Must hold, we used O_DIRECTORY above */
901
902 r = btrfs_is_subvol_fd(fd);
903 if (r < 0)
904 return log_warning_errno(errno, "Failed to determine whether %s is a btrfs subvolume: %m", path);
905 if (r > 0)
906 storage = USER_SUBVOLUME;
907 else {
908 struct fscrypt_policy policy;
909
910 if (ioctl(fd, FS_IOC_GET_ENCRYPTION_POLICY, &policy) < 0) {
911
912 if (errno == ENODATA)
913 log_debug_errno(errno, "Determined %s is not fscrypt encrypted.", path);
914 else if (ERRNO_IS_NOT_SUPPORTED(errno))
80ace4f2 915 log_debug_errno(errno, "Determined %s is not fscrypt encrypted because kernel or file system doesn't support it.", path);
70a5db58
LP
916 else
917 log_debug_errno(errno, "FS_IOC_GET_ENCRYPTION_POLICY failed with unexpected error code on %s, ignoring: %m", path);
918
919 storage = USER_DIRECTORY;
920 } else
921 storage = USER_FSCRYPT;
922 }
923
924 return manager_add_home_by_image(m, user_name, realm, path, NULL, storage, st.st_uid);
925 }
926
927 return 0;
928}
929
930int manager_enumerate_images(Manager *m) {
931 _cleanup_closedir_ DIR *d = NULL;
70a5db58
LP
932
933 assert(m);
934
935 if (!m->scan_slash_home)
936 return 0;
937
2700fecd 938 d = opendir(get_home_root());
70a5db58
LP
939 if (!d)
940 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
2700fecd 941 "Failed to open %s: %m", get_home_root());
70a5db58 942
2700fecd
LP
943 FOREACH_DIRENT(de, d, return log_error_errno(errno, "Failed to read %s directory: %m", get_home_root()))
944 (void) manager_assess_image(m, dirfd(d), get_home_root(), de->d_name);
70a5db58
LP
945
946 return 0;
947}
948
949static int manager_connect_bus(Manager *m) {
1d3b68f6 950 _cleanup_free_ char *b = NULL;
cc9886bc 951 const char *suffix, *busname;
70a5db58
LP
952 int r;
953
954 assert(m);
955 assert(!m->bus);
956
957 r = sd_bus_default_system(&m->bus);
958 if (r < 0)
959 return log_error_errno(r, "Failed to connect to system bus: %m");
960
cfd508a9 961 r = bus_add_implementation(m->bus, &manager_object, m);
ac9f55ed
LP
962 if (r < 0)
963 return r;
964
c42234ab
LP
965 r = bus_log_control_api_register(m->bus);
966 if (r < 0)
967 return r;
968
cc9886bc 969 suffix = getenv("SYSTEMD_HOME_DEBUG_SUFFIX");
1d3b68f6
AZ
970 if (suffix) {
971 b = strjoin("org.freedesktop.home1.", suffix);
972 if (!b)
973 return log_oom();
974 busname = b;
975 } else
cc9886bc
LP
976 busname = "org.freedesktop.home1";
977
978 r = sd_bus_request_name_async(m->bus, NULL, busname, 0, NULL, NULL);
70a5db58
LP
979 if (r < 0)
980 return log_error_errno(r, "Failed to request name: %m");
981
982 r = sd_bus_attach_event(m->bus, m->event, 0);
983 if (r < 0)
984 return log_error_errno(r, "Failed to attach bus to event loop: %m");
985
986 (void) sd_bus_set_exit_on_disconnect(m->bus, true);
987
988 return 0;
989}
990
991static int manager_bind_varlink(Manager *m) {
1d3b68f6 992 _cleanup_free_ char *p = NULL;
cc9886bc 993 const char *suffix, *socket_path;
70a5db58
LP
994 int r;
995
996 assert(m);
997 assert(!m->varlink_server);
998
9807fdc1 999 r = varlink_server_new(&m->varlink_server, VARLINK_SERVER_ACCOUNT_UID|VARLINK_SERVER_INHERIT_USERDATA);
70a5db58
LP
1000 if (r < 0)
1001 return log_error_errno(r, "Failed to allocate varlink server object: %m");
1002
1003 varlink_server_set_userdata(m->varlink_server, m);
1004
1005 r = varlink_server_bind_method_many(
1006 m->varlink_server,
1007 "io.systemd.UserDatabase.GetUserRecord", vl_method_get_user_record,
1008 "io.systemd.UserDatabase.GetGroupRecord", vl_method_get_group_record,
1009 "io.systemd.UserDatabase.GetMemberships", vl_method_get_memberships);
1010 if (r < 0)
1011 return log_error_errno(r, "Failed to register varlink methods: %m");
1012
1013 (void) mkdir_p("/run/systemd/userdb", 0755);
1014
cc9886bc
LP
1015 /* To make things easier to debug, when working from a homed managed home directory, let's optionally
1016 * use a different varlink socket name */
1017 suffix = getenv("SYSTEMD_HOME_DEBUG_SUFFIX");
1d3b68f6
AZ
1018 if (suffix) {
1019 p = strjoin("/run/systemd/userdb/io.systemd.Home.", suffix);
1020 if (!p)
1021 return log_oom();
1022 socket_path = p;
1023 } else
cc9886bc
LP
1024 socket_path = "/run/systemd/userdb/io.systemd.Home";
1025
1026 r = varlink_server_listen_address(m->varlink_server, socket_path, 0666);
70a5db58
LP
1027 if (r < 0)
1028 return log_error_errno(r, "Failed to bind to varlink socket: %m");
1029
1030 r = varlink_server_attach_event(m->varlink_server, m->event, SD_EVENT_PRIORITY_NORMAL);
1031 if (r < 0)
1032 return log_error_errno(r, "Failed to attach varlink connection to event loop: %m");
1033
cc9886bc 1034 assert(!m->userdb_service);
c96c9fc7
LP
1035 r = path_extract_filename(socket_path, &m->userdb_service);
1036 if (r < 0)
1037 return log_error_errno(r, "Failed to extra filename from socket path '%s': %m", socket_path);
cc9886bc
LP
1038
1039 /* Avoid recursion */
1040 if (setenv("SYSTEMD_BYPASS_USERDB", m->userdb_service, 1) < 0)
1041 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to set $SYSTEMD_BYPASS_USERDB: %m");
1042
70a5db58
LP
1043 return 0;
1044}
1045
2aaf565a
LP
1046static ssize_t read_datagram(
1047 int fd,
1048 struct ucred *ret_sender,
1049 void **ret,
1050 int *ret_passed_fd) {
1051
1052 CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))) control;
70a5db58 1053 _cleanup_free_ void *buffer = NULL;
254d1313 1054 _cleanup_close_ int passed_fd = -EBADF;
2aaf565a
LP
1055 struct ucred *sender = NULL;
1056 struct cmsghdr *cmsg;
1057 struct msghdr mh;
1058 struct iovec iov;
70a5db58
LP
1059 ssize_t n, m;
1060
1061 assert(fd >= 0);
1062 assert(ret_sender);
1063 assert(ret);
2aaf565a 1064 assert(ret_passed_fd);
70a5db58
LP
1065
1066 n = next_datagram_size_fd(fd);
1067 if (n < 0)
1068 return n;
1069
1070 buffer = malloc(n + 2);
1071 if (!buffer)
1072 return -ENOMEM;
1073
2aaf565a
LP
1074 /* Pass one extra byte, as a size check */
1075 iov = IOVEC_MAKE(buffer, n + 1);
70a5db58 1076
2aaf565a
LP
1077 mh = (struct msghdr) {
1078 .msg_iov = &iov,
1079 .msg_iovlen = 1,
1080 .msg_control = &control,
1081 .msg_controllen = sizeof(control),
1082 };
70a5db58 1083
2aaf565a
LP
1084 m = recvmsg_safe(fd, &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1085 if (m < 0)
1086 return m;
70a5db58 1087
2aaf565a
LP
1088 /* Ensure the size matches what we determined before */
1089 if (m != n) {
70a5db58 1090 cmsg_close_all(&mh);
2aaf565a
LP
1091 return -EMSGSIZE;
1092 }
70a5db58 1093
2aaf565a
LP
1094 CMSG_FOREACH(cmsg, &mh) {
1095 if (cmsg->cmsg_level == SOL_SOCKET &&
1096 cmsg->cmsg_type == SCM_CREDENTIALS &&
1097 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) {
1098 assert(!sender);
b1d02191 1099 sender = CMSG_TYPED_DATA(cmsg, struct ucred);
2aaf565a 1100 }
70a5db58 1101
2aaf565a
LP
1102 if (cmsg->cmsg_level == SOL_SOCKET &&
1103 cmsg->cmsg_type == SCM_RIGHTS) {
70a5db58 1104
2aaf565a
LP
1105 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
1106 cmsg_close_all(&mh);
1107 return -EMSGSIZE;
70a5db58
LP
1108 }
1109
2aaf565a 1110 assert(passed_fd < 0);
b1d02191 1111 passed_fd = *CMSG_TYPED_DATA(cmsg, int);
2aaf565a 1112 }
70a5db58
LP
1113 }
1114
2aaf565a
LP
1115 if (sender)
1116 *ret_sender = *sender;
1117 else
1118 *ret_sender = (struct ucred) UCRED_INVALID;
1119
1120 *ret_passed_fd = TAKE_FD(passed_fd);
1121
70a5db58
LP
1122 /* For safety reasons: let's always NUL terminate. */
1123 ((char*) buffer)[n] = 0;
1124 *ret = TAKE_PTR(buffer);
1125
1126 return 0;
1127}
1128
1129static int on_notify_socket(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1130 _cleanup_strv_free_ char **l = NULL;
1131 _cleanup_free_ void *datagram = NULL;
254d1313 1132 _cleanup_close_ int passed_fd = -EBADF;
2aaf565a 1133 struct ucred sender = UCRED_INVALID;
99534007 1134 Manager *m = ASSERT_PTR(userdata);
70a5db58
LP
1135 ssize_t n;
1136 Home *h;
1137
1138 assert(s);
70a5db58 1139
2aaf565a 1140 n = read_datagram(fd, &sender, &datagram, &passed_fd);
8add30a0
YW
1141 if (n < 0) {
1142 if (ERRNO_IS_TRANSIENT(n))
1143 return 0;
70a5db58 1144 return log_error_errno(n, "Failed to read notify datagram: %m");
8add30a0 1145 }
70a5db58
LP
1146
1147 if (sender.pid <= 0) {
1148 log_warning("Received notify datagram without valid sender PID, ignoring.");
1149 return 0;
1150 }
1151
1152 h = hashmap_get(m->homes_by_worker_pid, PID_TO_PTR(sender.pid));
1153 if (!h) {
162392b7 1154 log_warning("Received notify datagram of unknown process, ignoring.");
70a5db58
LP
1155 return 0;
1156 }
1157
1158 l = strv_split(datagram, "\n");
1159 if (!l)
1160 return log_oom();
1161
2aaf565a 1162 home_process_notify(h, l, TAKE_FD(passed_fd));
70a5db58
LP
1163 return 0;
1164}
1165
1166static int manager_listen_notify(Manager *m) {
254d1313 1167 _cleanup_close_ int fd = -EBADF;
425d925f
ZJS
1168 union sockaddr_union sa = {
1169 .un.sun_family = AF_UNIX,
1170 .un.sun_path = "/run/systemd/home/notify",
1171 };
cc9886bc 1172 const char *suffix;
70a5db58
LP
1173 int r;
1174
1175 assert(m);
1176 assert(!m->notify_socket_event_source);
1177
cc9886bc
LP
1178 suffix = getenv("SYSTEMD_HOME_DEBUG_SUFFIX");
1179 if (suffix) {
1d3b68f6 1180 _cleanup_free_ char *unix_path = NULL;
cc9886bc 1181
1d3b68f6
AZ
1182 unix_path = strjoin("/run/systemd/home/notify.", suffix);
1183 if (!unix_path)
1184 return log_oom();
cc9886bc
LP
1185 r = sockaddr_un_set_path(&sa.un, unix_path);
1186 if (r < 0)
1187 return log_error_errno(r, "Socket path %s does not fit in sockaddr_un: %m", unix_path);
1188 }
1189
70a5db58
LP
1190 fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1191 if (fd < 0)
1192 return log_error_errno(errno, "Failed to create listening socket: %m");
1193
70a5db58
LP
1194 (void) mkdir_parents(sa.un.sun_path, 0755);
1195 (void) sockaddr_un_unlink(&sa.un);
1196
1197 if (bind(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0)
1198 return log_error_errno(errno, "Failed to bind to socket: %m");
1199
1200 r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true);
1201 if (r < 0)
1202 return r;
1203
1204 r = sd_event_add_io(m->event, &m->notify_socket_event_source, fd, EPOLLIN, on_notify_socket, m);
1205 if (r < 0)
1206 return log_error_errno(r, "Failed to allocate event source for notify socket: %m");
1207
1208 (void) sd_event_source_set_description(m->notify_socket_event_source, "notify-socket");
1209
1210 /* Make sure we process sd_notify() before SIGCHLD for any worker, so that we always know the error
1211 * number of a client before it exits. */
1212 r = sd_event_source_set_priority(m->notify_socket_event_source, SD_EVENT_PRIORITY_NORMAL - 5);
1213 if (r < 0)
1214 return log_error_errno(r, "Failed to alter priority of NOTIFY_SOCKET event source: %m");
1215
1216 r = sd_event_source_set_io_fd_own(m->notify_socket_event_source, true);
1217 if (r < 0)
1218 return log_error_errno(r, "Failed to pass ownership of notify socket: %m");
1219
1220 return TAKE_FD(fd);
1221}
1222
1223static int manager_add_device(Manager *m, sd_device *d) {
1224 _cleanup_free_ char *user_name = NULL, *realm = NULL, *node = NULL;
1225 const char *tabletype, *parttype, *partname, *partuuid, *sysfs;
1226 sd_id128_t id;
1227 int r;
1228
1229 assert(m);
1230 assert(d);
1231
1232 r = sd_device_get_syspath(d, &sysfs);
1233 if (r < 0)
1234 return log_error_errno(r, "Failed to acquire sysfs path of device: %m");
1235
1236 r = sd_device_get_property_value(d, "ID_PART_TABLE_TYPE", &tabletype);
1237 if (r == -ENOENT)
1238 return 0;
1239 if (r < 0)
1240 return log_error_errno(r, "Failed to acquire ID_PART_TABLE_TYPE device property, ignoring: %m");
1241
1242 if (!streq(tabletype, "gpt")) {
1243 log_debug("Found partition (%s) on non-GPT table, ignoring.", sysfs);
1244 return 0;
1245 }
1246
1247 r = sd_device_get_property_value(d, "ID_PART_ENTRY_TYPE", &parttype);
1248 if (r == -ENOENT)
1249 return 0;
1250 if (r < 0)
1251 return log_error_errno(r, "Failed to acquire ID_PART_ENTRY_TYPE device property, ignoring: %m");
92e72028 1252 if (sd_id128_string_equal(parttype, SD_GPT_USER_HOME) <= 0) {
70a5db58
LP
1253 log_debug("Found partition (%s) we don't care about, ignoring.", sysfs);
1254 return 0;
1255 }
1256
1257 r = sd_device_get_property_value(d, "ID_PART_ENTRY_NAME", &partname);
1258 if (r < 0)
1259 return log_warning_errno(r, "Failed to acquire ID_PART_ENTRY_NAME device property, ignoring: %m");
1260
1261 r = split_user_name_realm(partname, &user_name, &realm);
1262 if (r == -EINVAL)
1263 return log_warning_errno(r, "Found partition with correct partition type but a non-parsable partition name '%s', ignoring.", partname);
1264 if (r < 0)
1265 return log_error_errno(r, "Failed to validate partition name '%s': %m", partname);
1266
1267 r = sd_device_get_property_value(d, "ID_FS_UUID", &partuuid);
1268 if (r < 0)
1269 return log_warning_errno(r, "Failed to acquire ID_FS_UUID device property, ignoring: %m");
1270
1271 r = sd_id128_from_string(partuuid, &id);
1272 if (r < 0)
1273 return log_warning_errno(r, "Failed to parse ID_FS_UUID field '%s', ignoring: %m", partuuid);
1274
1275 if (asprintf(&node, "/dev/disk/by-uuid/" SD_ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(id)) < 0)
1276 return log_oom();
1277
1278 return manager_add_home_by_image(m, user_name, realm, node, sysfs, USER_LUKS, UID_INVALID);
1279}
1280
1281static int manager_on_device(sd_device_monitor *monitor, sd_device *d, void *userdata) {
99534007 1282 Manager *m = ASSERT_PTR(userdata);
70a5db58
LP
1283 int r;
1284
70a5db58
LP
1285 assert(d);
1286
a1130022 1287 if (device_for_action(d, SD_DEVICE_REMOVE)) {
70a5db58
LP
1288 const char *sysfs;
1289 Home *h;
1290
1291 r = sd_device_get_syspath(d, &sysfs);
1292 if (r < 0) {
1293 log_warning_errno(r, "Failed to acquire sysfs path from device: %m");
1294 return 0;
1295 }
1296
1297 log_info("block device %s has been removed.", sysfs);
1298
1299 /* Let's see if we previously synthesized a home record from this device, if so, let's just
1300 * revalidate that. Otherwise let's revalidate them all, but asynchronously. */
1301 h = hashmap_get(m->homes_by_sysfs, sysfs);
1302 if (h)
1303 manager_revalidate_image(m, h);
1304 else
1305 manager_enqueue_gc(m, NULL);
1306 } else
1307 (void) manager_add_device(m, d);
1308
1309 (void) bus_manager_emit_auto_login_changed(m);
1310 return 0;
1311}
1312
1313static int manager_watch_devices(Manager *m) {
1314 int r;
1315
1316 assert(m);
1317 assert(!m->device_monitor);
1318
1319 r = sd_device_monitor_new(&m->device_monitor);
1320 if (r < 0)
1321 return log_error_errno(r, "Failed to allocate device monitor: %m");
1322
1323 r = sd_device_monitor_filter_add_match_subsystem_devtype(m->device_monitor, "block", NULL);
1324 if (r < 0)
1325 return log_error_errno(r, "Failed to configure device monitor match: %m");
1326
1327 r = sd_device_monitor_attach_event(m->device_monitor, m->event);
1328 if (r < 0)
1329 return log_error_errno(r, "Failed to attach device monitor to event loop: %m");
1330
1331 r = sd_device_monitor_start(m->device_monitor, manager_on_device, m);
1332 if (r < 0)
1333 return log_error_errno(r, "Failed to start device monitor: %m");
1334
1335 return 0;
1336}
1337
1338static int manager_enumerate_devices(Manager *m) {
1339 _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
70a5db58
LP
1340 int r;
1341
1342 assert(m);
1343
1344 r = sd_device_enumerator_new(&e);
1345 if (r < 0)
1346 return r;
1347
1348 r = sd_device_enumerator_add_match_subsystem(e, "block", true);
1349 if (r < 0)
1350 return r;
1351
1352 FOREACH_DEVICE(e, d)
1353 (void) manager_add_device(m, d);
1354
1355 return 0;
1356}
1357
1358static int manager_load_key_pair(Manager *m) {
5d2a48da 1359 _cleanup_fclose_ FILE *f = NULL;
70a5db58
LP
1360 struct stat st;
1361 int r;
1362
1363 assert(m);
1364
1365 if (m->private_key) {
1366 EVP_PKEY_free(m->private_key);
1367 m->private_key = NULL;
1368 }
1369
2708160c 1370 r = search_and_fopen_nulstr("local.private", "re", NULL, KEY_PATHS_NULSTR, &f, NULL);
70a5db58
LP
1371 if (r == -ENOENT)
1372 return 0;
1373 if (r < 0)
1374 return log_error_errno(r, "Failed to read private key file: %m");
1375
1376 if (fstat(fileno(f), &st) < 0)
1377 return log_error_errno(errno, "Failed to stat private key file: %m");
1378
1379 r = stat_verify_regular(&st);
1380 if (r < 0)
1381 return log_error_errno(r, "Private key file is not regular: %m");
1382
1383 if (st.st_uid != 0 || (st.st_mode & 0077) != 0)
1384 return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Private key file is readable by more than the root user");
1385
1386 m->private_key = PEM_read_PrivateKey(f, NULL, NULL, NULL);
1387 if (!m->private_key)
1388 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to load private key pair");
1389
1390 log_info("Successfully loaded private key pair.");
1391
1392 return 1;
1393}
1394
fd421c4a 1395DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(EVP_PKEY_CTX*, EVP_PKEY_CTX_free, NULL);
70a5db58
LP
1396
1397static int manager_generate_key_pair(Manager *m) {
1398 _cleanup_(EVP_PKEY_CTX_freep) EVP_PKEY_CTX *ctx = NULL;
1399 _cleanup_(unlink_and_freep) char *temp_public = NULL, *temp_private = NULL;
1400 _cleanup_fclose_ FILE *fpublic = NULL, *fprivate = NULL;
1401 int r;
1402
1403 if (m->private_key) {
1404 EVP_PKEY_free(m->private_key);
1405 m->private_key = NULL;
1406 }
1407
1408 ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_ED25519, NULL);
1409 if (!ctx)
1410 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to allocate Ed25519 key generation context.");
1411
1412 if (EVP_PKEY_keygen_init(ctx) <= 0)
1413 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to initialize Ed25519 key generation context.");
1414
1415 log_info("Generating key pair for signing local user identity records.");
1416
1417 if (EVP_PKEY_keygen(ctx, &m->private_key) <= 0)
1418 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to generate Ed25519 key pair");
1419
1420 log_info("Successfully created Ed25519 key pair.");
1421
1422 (void) mkdir_p("/var/lib/systemd/home", 0755);
1423
1424 /* Write out public key (note that we only do that as a help to the user, we don't make use of this ever */
1425 r = fopen_temporary("/var/lib/systemd/home/local.public", &fpublic, &temp_public);
1426 if (r < 0)
80ace4f2 1427 return log_error_errno(errno, "Failed to open key file for writing: %m");
70a5db58
LP
1428
1429 if (PEM_write_PUBKEY(fpublic, m->private_key) <= 0)
1430 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to write public key.");
1431
fa3709c5 1432 r = fflush_sync_and_check(fpublic);
70a5db58
LP
1433 if (r < 0)
1434 return log_error_errno(r, "Failed to write private key: %m");
1435
1436 fpublic = safe_fclose(fpublic);
1437
1438 /* Write out the private key (this actually writes out both private and public, OpenSSL is confusing) */
1439 r = fopen_temporary("/var/lib/systemd/home/local.private", &fprivate, &temp_private);
1440 if (r < 0)
80ace4f2 1441 return log_error_errno(errno, "Failed to open key file for writing: %m");
70a5db58
LP
1442
1443 if (PEM_write_PrivateKey(fprivate, m->private_key, NULL, NULL, 0, NULL, 0) <= 0)
1444 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to write private key pair.");
1445
fa3709c5 1446 r = fflush_sync_and_check(fprivate);
70a5db58
LP
1447 if (r < 0)
1448 return log_error_errno(r, "Failed to write private key: %m");
1449
1450 fprivate = safe_fclose(fprivate);
1451
1452 /* Both are written now, move them into place */
1453
1454 if (rename(temp_public, "/var/lib/systemd/home/local.public") < 0)
1455 return log_error_errno(errno, "Failed to move public key file into place: %m");
1456 temp_public = mfree(temp_public);
1457
39eb3ffa
DDM
1458 r = RET_NERRNO(rename(temp_private, "/var/lib/systemd/home/local.private"));
1459 if (r < 0) {
1460 (void) unlink("/var/lib/systemd/home/local.public"); /* try to remove the file we already created */
1461 return log_error_errno(r, "Failed to move private key file into place: %m");
70a5db58
LP
1462 }
1463 temp_private = mfree(temp_private);
1464
fa3709c5
LP
1465 r = fsync_path_at(AT_FDCWD, "/var/lib/systemd/home/");
1466 if (r < 0)
1467 log_warning_errno(r, "Failed to sync /var/lib/systemd/home/, ignoring: %m");
1468
70a5db58
LP
1469 return 1;
1470}
1471
1472int manager_acquire_key_pair(Manager *m) {
1473 int r;
1474
1475 assert(m);
1476
1477 /* Already there? */
1478 if (m->private_key)
1479 return 1;
1480
1481 /* First try to load key off disk */
1482 r = manager_load_key_pair(m);
1483 if (r != 0)
1484 return r;
1485
1486 /* Didn't work, generate a new one */
1487 return manager_generate_key_pair(m);
1488}
1489
1490int manager_sign_user_record(Manager *m, UserRecord *u, UserRecord **ret, sd_bus_error *error) {
1491 int r;
1492
1493 assert(m);
1494 assert(u);
1495 assert(ret);
1496
1497 r = manager_acquire_key_pair(m);
1498 if (r < 0)
1499 return r;
1500 if (r == 0)
1b09b81c 1501 return sd_bus_error_set(error, BUS_ERROR_NO_PRIVATE_KEY, "Can't sign without local key.");
70a5db58
LP
1502
1503 return user_record_sign(u, m->private_key, ret);
1504}
1505
1506DEFINE_PRIVATE_HASH_OPS_FULL(public_key_hash_ops, char, string_hash_func, string_compare_func, free, EVP_PKEY, EVP_PKEY_free);
fd421c4a 1507DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(EVP_PKEY*, EVP_PKEY_free, NULL);
70a5db58
LP
1508
1509static int manager_load_public_key_one(Manager *m, const char *path) {
1510 _cleanup_(EVP_PKEY_freep) EVP_PKEY *pkey = NULL;
1511 _cleanup_fclose_ FILE *f = NULL;
1512 _cleanup_free_ char *fn = NULL;
1513 struct stat st;
1514 int r;
1515
1516 assert(m);
1517
c96c9fc7
LP
1518 r = path_extract_filename(path, &fn);
1519 if (r < 0)
1520 return log_error_errno(r, "Failed to extract filename of path '%s': %m", path);
1521
1522 if (streq(fn, "local.public")) /* we already loaded the private key, which includes the public one */
70a5db58
LP
1523 return 0;
1524
1525 f = fopen(path, "re");
1526 if (!f) {
1527 if (errno == ENOENT)
1528 return 0;
1529
1530 return log_error_errno(errno, "Failed to open public key %s: %m", path);
1531 }
1532
1533 if (fstat(fileno(f), &st) < 0)
1534 return log_error_errno(errno, "Failed to stat public key %s: %m", path);
1535
1536 r = stat_verify_regular(&st);
1537 if (r < 0)
1538 return log_error_errno(r, "Public key file %s is not a regular file: %m", path);
1539
1540 if (st.st_uid != 0 || (st.st_mode & 0022) != 0)
1541 return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Public key file %s is writable by more than the root user, refusing.", path);
1542
1543 r = hashmap_ensure_allocated(&m->public_keys, &public_key_hash_ops);
1544 if (r < 0)
1545 return log_oom();
1546
1547 pkey = PEM_read_PUBKEY(f, &pkey, NULL, NULL);
1548 if (!pkey)
1549 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to parse public key file %s.", path);
1550
70a5db58
LP
1551 r = hashmap_put(m->public_keys, fn, pkey);
1552 if (r < 0)
1553 return log_error_errno(r, "Failed to add public key to set: %m");
1554
1555 TAKE_PTR(fn);
1556 TAKE_PTR(pkey);
1557
1558 return 0;
1559}
1560
1561static int manager_load_public_keys(Manager *m) {
1562 _cleanup_strv_free_ char **files = NULL;
70a5db58
LP
1563 int r;
1564
1565 assert(m);
1566
1567 m->public_keys = hashmap_free(m->public_keys);
1568
1569 r = conf_files_list_nulstr(
1570 &files,
1571 ".public",
1572 NULL,
1573 CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED,
1574 KEY_PATHS_NULSTR);
1575 if (r < 0)
1576 return log_error_errno(r, "Failed to assemble list of public key directories: %m");
1577
1578 STRV_FOREACH(i, files)
1579 (void) manager_load_public_key_one(m, *i);
1580
1581 return 0;
1582}
1583
1584int manager_startup(Manager *m) {
1585 int r;
1586
1587 assert(m);
1588
1589 r = manager_listen_notify(m);
1590 if (r < 0)
1591 return r;
1592
1593 r = manager_connect_bus(m);
1594 if (r < 0)
1595 return r;
1596
1597 r = manager_bind_varlink(m);
1598 if (r < 0)
1599 return r;
1600
1601 r = manager_load_key_pair(m); /* only try to load it, don't generate any */
1602 if (r < 0)
1603 return r;
1604
1605 r = manager_load_public_keys(m);
1606 if (r < 0)
1607 return r;
1608
1609 manager_watch_home(m);
1610 (void) manager_watch_devices(m);
1611
1612 (void) manager_enumerate_records(m);
1613 (void) manager_enumerate_images(m);
1614 (void) manager_enumerate_devices(m);
1615
1616 /* Let's clean up home directories whose devices got removed while we were not running */
1617 (void) manager_enqueue_gc(m, NULL);
1618
1619 return 0;
1620}
1621
1622void manager_revalidate_image(Manager *m, Home *h) {
1623 int r;
1624
1625 assert(m);
1626 assert(h);
1627
1628 /* Frees an automatically discovered image, if it's synthetic and its image disappeared. Unmounts any
18fe76eb 1629 * image if it's mounted but its image vanished. */
70a5db58
LP
1630
1631 if (h->current_operation || !ordered_set_isempty(h->pending_operations))
1632 return;
1633
1634 if (h->state == HOME_UNFIXATED) {
1635 r = user_record_test_image_path(h->record);
1636 if (r < 0)
1637 log_warning_errno(r, "Can't determine if image of %s exists, freeing unfixated user: %m", h->user_name);
1638 else if (r == USER_TEST_ABSENT)
1639 log_info("Image for %s disappeared, freeing unfixated user.", h->user_name);
1640 else
1641 return;
1642
1643 home_free(h);
1644
1645 } else if (h->state < 0) {
1646
1647 r = user_record_test_home_directory(h->record);
1648 if (r < 0) {
1649 log_warning_errno(r, "Unable to determine state of home directory, ignoring: %m");
1650 return;
1651 }
1652
1653 if (r == USER_TEST_MOUNTED) {
1654 r = user_record_test_image_path(h->record);
1655 if (r < 0) {
1656 log_warning_errno(r, "Unable to determine state of image path, ignoring: %m");
1657 return;
1658 }
1659
1660 if (r == USER_TEST_ABSENT) {
1661 _cleanup_(operation_unrefp) Operation *o = NULL;
1662
1663 log_notice("Backing image disappeared while home directory %s was mounted, unmounting it forcibly.", h->user_name);
1664 /* Wowza, the thing is mounted, but the device is gone? Act on it. */
1665
1666 r = home_killall(h);
1667 if (r < 0)
1668 log_warning_errno(r, "Failed to kill processes of user %s, ignoring: %m", h->user_name);
1669
1670 /* We enqueue the operation here, after all the home directory might
1671 * currently already run some operation, and we can deactivate it only after
1672 * that's complete. */
1673 o = operation_new(OPERATION_DEACTIVATE_FORCE, NULL);
1674 if (!o) {
1675 log_oom();
1676 return;
1677 }
1678
1679 r = home_schedule_operation(h, o, NULL);
1680 if (r < 0)
1681 log_warning_errno(r, "Failed to enqueue forced home directory %s deactivation, ignoring: %m", h->user_name);
1682 }
1683 }
1684 }
1685}
1686
1687int manager_gc_images(Manager *m) {
1688 Home *h;
1689
1690 assert_se(m);
1691
1692 if (m->gc_focus) {
1693 /* Focus on a specific home */
1694
1695 h = TAKE_PTR(m->gc_focus);
1696 manager_revalidate_image(m, h);
1697 } else {
1698 /* Gc all */
70a5db58 1699
90e74a66 1700 HASHMAP_FOREACH(h, m->homes_by_name)
70a5db58
LP
1701 manager_revalidate_image(m, h);
1702 }
1703
1704 return 0;
1705}
1706
1707static int on_deferred_rescan(sd_event_source *s, void *userdata) {
99534007 1708 Manager *m = ASSERT_PTR(userdata);
70a5db58 1709
cf536638 1710 m->deferred_rescan_event_source = sd_event_source_disable_unref(m->deferred_rescan_event_source);
70a5db58
LP
1711
1712 manager_enumerate_devices(m);
1713 manager_enumerate_images(m);
1714 return 0;
1715}
1716
1717int manager_enqueue_rescan(Manager *m) {
1718 int r;
1719
1720 assert(m);
1721
1722 if (m->deferred_rescan_event_source)
1723 return 0;
1724
1725 if (!m->event)
1726 return 0;
1727
1728 if (IN_SET(sd_event_get_state(m->event), SD_EVENT_FINISHED, SD_EVENT_EXITING))
1729 return 0;
1730
1731 r = sd_event_add_defer(m->event, &m->deferred_rescan_event_source, on_deferred_rescan, m);
1732 if (r < 0)
1733 return log_error_errno(r, "Failed to allocate rescan event source: %m");
1734
1735 r = sd_event_source_set_priority(m->deferred_rescan_event_source, SD_EVENT_PRIORITY_IDLE+1);
1736 if (r < 0)
1737 log_warning_errno(r, "Failed to tweak priority of event source, ignoring: %m");
1738
1739 (void) sd_event_source_set_description(m->deferred_rescan_event_source, "deferred-rescan");
1740 return 1;
1741}
1742
1743static int on_deferred_gc(sd_event_source *s, void *userdata) {
99534007 1744 Manager *m = ASSERT_PTR(userdata);
70a5db58 1745
cf536638 1746 m->deferred_gc_event_source = sd_event_source_disable_unref(m->deferred_gc_event_source);
70a5db58
LP
1747
1748 manager_gc_images(m);
1749 return 0;
1750}
1751
1752int manager_enqueue_gc(Manager *m, Home *focus) {
1753 int r;
1754
1755 assert(m);
1756
1757 /* This enqueues a request to GC dead homes. It may be called with focus=NULL in which case all homes
1758 * will be scanned, or with the parameter set, in which case only that home is checked. */
1759
1760 if (!m->event)
1761 return 0;
1762
1763 if (IN_SET(sd_event_get_state(m->event), SD_EVENT_FINISHED, SD_EVENT_EXITING))
1764 return 0;
1765
1766 /* If a focus home is specified, then remember to focus just on this home. Otherwise invalidate any
1767 * focus that might be set to look at all homes. */
1768
1769 if (m->deferred_gc_event_source) {
1770 if (m->gc_focus != focus) /* not the same focus, then look at everything */
1771 m->gc_focus = NULL;
1772
1773 return 0;
1774 } else
162392b7 1775 m->gc_focus = focus; /* start focused */
70a5db58
LP
1776
1777 r = sd_event_add_defer(m->event, &m->deferred_gc_event_source, on_deferred_gc, m);
1778 if (r < 0)
80ace4f2 1779 return log_error_errno(r, "Failed to allocate GC event source: %m");
70a5db58
LP
1780
1781 r = sd_event_source_set_priority(m->deferred_gc_event_source, SD_EVENT_PRIORITY_IDLE);
1782 if (r < 0)
1783 log_warning_errno(r, "Failed to tweak priority of event source, ignoring: %m");
1784
1785 (void) sd_event_source_set_description(m->deferred_gc_event_source, "deferred-gc");
1786 return 1;
1787}
d357b80d
LP
1788
1789static bool manager_shall_rebalance(Manager *m) {
1790 Home *h;
1791
1792 assert(m);
1793
1794 if (IN_SET(m->rebalance_state, REBALANCE_PENDING, REBALANCE_SHRINKING, REBALANCE_GROWING))
1795 return true;
1796
1797 HASHMAP_FOREACH(h, m->homes_by_name)
1798 if (home_shall_rebalance(h))
1799 return true;
1800
1801 return false;
1802}
1803
1804static int home_cmp(Home *const*a, Home *const*b) {
1805 int r;
1806
1807 assert(a);
1808 assert(*a);
1809 assert(b);
1810 assert(*b);
1811
1812 /* Order user records by their weight (and by their name, to make things stable). We put the records
a6f44d61 1813 * with the highest weight last, since we distribute space from the beginning and round down, hence
d357b80d
LP
1814 * later entries tend to get slightly more than earlier entries. */
1815
1816 r = CMP(user_record_rebalance_weight((*a)->record), user_record_rebalance_weight((*b)->record));
1817 if (r != 0)
1818 return r;
1819
1820 return strcmp((*a)->user_name, (*b)->user_name);
1821}
1822
1823static int manager_rebalance_calculate(Manager *m) {
1824 uint64_t weight_sum, free_sum, usage_sum = 0, min_free = UINT64_MAX;
1825 _cleanup_free_ Home **array = NULL;
1826 bool relevant = false;
1827 struct statfs sfs;
1828 int c = 0, r;
1829 Home *h;
1830
1831 assert(m);
1832
1833 if (statfs(get_home_root(), &sfs) < 0)
1834 return log_error_errno(errno, "Failed to statfs() /home: %m");
1835
1836 free_sum = (uint64_t) sfs.f_bsize * sfs.f_bavail; /* This much free space is available on the
1837 * underlying pool directory */
1838
1839 weight_sum = REBALANCE_WEIGHT_BACKING; /* Grant the underlying pool directory a fixed weight of 20
1840 * (home dirs get 100 by default, i.e. 5x more). This weight
1841 * is not configurable, the per-home weights are. */
1842
1843 HASHMAP_FOREACH(h, m->homes_by_name) {
1844 statfs_f_type_t fstype;
1845 h->rebalance_pending = false; /* First, reset the flag, we only want it to be true for the
1846 * homes that qualify for rebalancing */
1847
1848 if (!home_shall_rebalance(h)) /* Only look at actual candidates */
1849 continue;
1850
1851 if (home_is_busy(h))
1852 return -EBUSY; /* Let's not rebalance if there's a busy home directory. */
1853
1854 r = home_get_disk_status(
1855 h,
1856 &h->rebalance_size,
1857 &h->rebalance_usage,
1858 &h->rebalance_free,
1859 NULL,
1860 NULL,
1861 &fstype,
1862 NULL);
1863 if (r < 0) {
1864 log_warning_errno(r, "Failed to get free space of home '%s', ignoring.", h->user_name);
1865 continue;
1866 }
1867
1868 if (h->rebalance_free > UINT64_MAX - free_sum)
1869 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Rebalance free overflow");
1870 free_sum += h->rebalance_free;
1871
1872 if (h->rebalance_usage > UINT64_MAX - usage_sum)
1873 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Rebalance usage overflow");
1874 usage_sum += h->rebalance_usage;
1875
1876 h->rebalance_weight = user_record_rebalance_weight(h->record);
1877 if (h->rebalance_weight > UINT64_MAX - weight_sum)
1878 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Rebalance weight overflow");
1879 weight_sum += h->rebalance_weight;
1880
1881 h->rebalance_min = minimal_size_by_fs_magic(fstype);
1882
1883 if (!GREEDY_REALLOC(array, c+1))
1884 return log_oom();
1885
1886 array[c++] = h;
1887 }
1888
1889 if (c == 0) {
1890 log_debug("No homes to rebalance.");
1891 return 0;
1892 }
1893
1894 assert(weight_sum > 0);
1895
1896 log_debug("Disk space usage by all home directories to rebalance: %s — available disk space: %s",
1897 FORMAT_BYTES(usage_sum), FORMAT_BYTES(free_sum));
1898
1899 /* Bring the home directories in a well-defined order, so that we distribute space in a reproducible
1900 * way for the same parameters. */
1901 typesafe_qsort(array, c, home_cmp);
1902
1903 for (int i = 0; i < c; i++) {
1904 uint64_t new_free;
1905 double d;
1906
1907 h = array[i];
1908
1909 assert(h->rebalance_free <= free_sum);
1910 assert(h->rebalance_usage <= usage_sum);
1911 assert(h->rebalance_weight <= weight_sum);
1912
1913 d = ((double) (free_sum / 4096) * (double) h->rebalance_weight) / (double) weight_sum; /* Calculate new space for this home in units of 4K */
1914
1915 /* Convert from units of 4K back to bytes */
1916 if (d >= (double) (UINT64_MAX/4096))
1917 new_free = UINT64_MAX;
1918 else
1919 new_free = (uint64_t) d * 4096;
1920
1921 /* Subtract the weight and assigned space from the sums now, to distribute the rounding noise
1922 * to the remaining home dirs */
1923 free_sum = LESS_BY(free_sum, new_free);
1924 weight_sum = LESS_BY(weight_sum, h->rebalance_weight);
1925
1926 /* Keep track of home directory with the least amount of space left: we want to schedule the
1927 * next rebalance more quickly if this is low */
1928 if (new_free < min_free)
1929 min_free = h->rebalance_size;
1930
1931 if (new_free > UINT64_MAX - h->rebalance_usage)
1932 h->rebalance_goal = UINT64_MAX-1; /* maximum size */
1933 else {
1934 h->rebalance_goal = h->rebalance_usage + new_free;
1935
1936 if (h->rebalance_min != UINT64_MAX && h->rebalance_goal < h->rebalance_min)
1937 h->rebalance_goal = h->rebalance_min;
1938 }
1939
1940 /* Skip over this home if the state doesn't match the operation */
1941 if ((m->rebalance_state == REBALANCE_SHRINKING && h->rebalance_goal > h->rebalance_size) ||
1942 (m->rebalance_state == REBALANCE_GROWING && h->rebalance_goal < h->rebalance_size))
1943 h->rebalance_pending = false;
1944 else {
e2341b6b
DT
1945 log_debug("Rebalancing home directory '%s' %s %s %s.", h->user_name,
1946 FORMAT_BYTES(h->rebalance_size),
1947 special_glyph(SPECIAL_GLYPH_ARROW_RIGHT),
1948 FORMAT_BYTES(h->rebalance_goal));
d357b80d
LP
1949 h->rebalance_pending = true;
1950 }
1951
1952 if ((fabs((double) h->rebalance_size - (double) h->rebalance_goal) * 100 / (double) h->rebalance_size) >= 5.0)
1953 relevant = true;
1954 }
1955
1956 /* Scale next rebalancing interval based on the least amount of space of any of the home
1957 * directories. We pick a time in the range 1min … 15min, scaled by log2(min_free), so that:
1958 * 10M → ~0.7min, 100M → ~2.7min, 1G → ~4.6min, 10G → ~6.5min, 100G ~8.4 */
1959 m->rebalance_interval_usec = (usec_t) CLAMP((LESS_BY(log2(min_free), 22)*15*USEC_PER_MINUTE)/26,
1960 1 * USEC_PER_MINUTE,
1961 15 * USEC_PER_MINUTE);
1962
1963
1964 log_debug("Rebalancing interval set to %s.", FORMAT_TIMESPAN(m->rebalance_interval_usec, USEC_PER_MSEC));
1965
1966 /* Let's suppress small resizes, growing/shrinking file systems isn't free after all */
1967 if (!relevant) {
1968 log_debug("Skipping rebalancing, since all calculated size changes are below ±5%%.");
1969 return 0;
1970 }
1971
1972 return c;
1973}
1974
1975static int manager_rebalance_apply(Manager *m) {
1976 int c = 0, r;
1977 Home *h;
1978
1979 assert(m);
1980
1981 HASHMAP_FOREACH(h, m->homes_by_name) {
1982 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
1983
1984 if (!h->rebalance_pending)
1985 continue;
1986
1987 h->rebalance_pending = false;
1988
1989 r = home_resize(h, h->rebalance_goal, /* secret= */ NULL, /* automatic= */ true, &error);
1990 if (r < 0)
1991 log_warning_errno(r, "Failed to resize home '%s' for rebalancing, ignoring: %s",
1992 h->user_name, bus_error_message(&error, r));
1993 else
1994 c++;
1995 }
1996
1997 return c;
1998}
1999
49505916
LP
2000static void manager_rebalance_reply_messages(Manager *m) {
2001 int r;
2002
2003 assert(m);
2004
2005 for (;;) {
2006 _cleanup_(sd_bus_message_unrefp) sd_bus_message *msg =
2007 set_steal_first(m->rebalance_pending_method_calls);
2008
2009 if (!msg)
2010 break;
2011
2012 r = sd_bus_reply_method_return(msg, NULL);
2013 if (r < 0)
2014 log_debug_errno(r, "Failed to reply to rebalance method call, ignoring: %m");
2015 }
2016}
2017
d357b80d
LP
2018static int manager_rebalance_now(Manager *m) {
2019 RebalanceState busy_state; /* the state to revert to when operation fails if busy */
2020 int r;
2021
2022 assert(m);
2023
2024 log_debug("Rebalancing now...");
2025
2026 /* We maintain a simple state engine here to keep track of what we are doing. We'll first shrink all
a6f44d61 2027 * homes that shall be shrunk and then grow all homes that shall be grown, so that they can take up
d357b80d
LP
2028 * the space now freed. */
2029
2030 for (;;) {
2031 switch (m->rebalance_state) {
2032
2033 case REBALANCE_IDLE:
2034 case REBALANCE_PENDING:
2035 case REBALANCE_WAITING:
2036 /* First shrink large home dirs */
2037 m->rebalance_state = REBALANCE_SHRINKING;
2038 busy_state = REBALANCE_PENDING;
49505916
LP
2039
2040 /* We are initiating the next rebalancing cycle now, let's make the queued methods
2041 * calls the pending ones, and flush out any pending ones (which shouldn't exist at
2042 * this time anyway) */
2043 set_clear(m->rebalance_pending_method_calls);
2044 SWAP_TWO(m->rebalance_pending_method_calls, m->rebalance_queued_method_calls);
2045
d357b80d
LP
2046 log_debug("Shrinking phase..");
2047 break;
2048
2049 case REBALANCE_SHRINKING:
2050 /* Then grow small home dirs */
2051 m->rebalance_state = REBALANCE_GROWING;
2052 busy_state = REBALANCE_SHRINKING;
2053 log_debug("Growing phase..");
2054 break;
2055
2056 case REBALANCE_GROWING:
2057 /* Finally, we are done */
2058 log_info("Rebalancing complete.");
2059 m->rebalance_state = REBALANCE_IDLE;
2060 r = 0;
2061 goto finish;
2062
2063 case REBALANCE_OFF:
2064 default:
2065 assert_not_reached();
2066 }
2067
2068 r = manager_rebalance_calculate(m);
2069 if (r == -EBUSY) {
2070 /* Calculations failed because one home directory is currently busy. Revert to a state that
2071 * tells us what to do next. */
2072 log_debug("Can't enter phase, busy.");
2073 m->rebalance_state = busy_state;
2074 return r;
2075 }
2076 if (r < 0)
2077 goto finish;
2078 if (r == 0)
2079 continue; /* got to next step immediately, if there's nothing to do */
2080
2081 r = manager_rebalance_apply(m);
2082 if (r < 0)
2083 goto finish;
2084 if (r > 0)
2085 break; /* At least one resize operation is now pending, we are done for now */
2086
2087 /* If there was nothing to apply, go for next state right-away */
2088 }
2089
2090 return 0;
2091
2092finish:
2093 /* Reset state and schedule next rebalance */
2094 m->rebalance_state = REBALANCE_IDLE;
49505916 2095 manager_rebalance_reply_messages(m);
d357b80d
LP
2096 (void) manager_schedule_rebalance(m, /* immediately= */ false);
2097 return r;
2098}
2099
2100static int on_rebalance_timer(sd_event_source *s, usec_t t, void *userdata) {
99534007 2101 Manager *m = ASSERT_PTR(userdata);
d357b80d
LP
2102
2103 assert(s);
d357b80d
LP
2104 assert(IN_SET(m->rebalance_state, REBALANCE_WAITING, REBALANCE_PENDING, REBALANCE_SHRINKING, REBALANCE_GROWING));
2105
2106 (void) manager_rebalance_now(m);
2107 return 0;
2108}
2109
2110int manager_schedule_rebalance(Manager *m, bool immediately) {
2111 int r;
2112
2113 assert(m);
2114
2115 /* Check if there are any records where rebalancing is requested */
2116 if (!manager_shall_rebalance(m)) {
2117 log_debug("Not scheduling rebalancing, not needed.");
49505916 2118 r = 0; /* report that we didn't schedule anything because nothing needed it */
d357b80d
LP
2119 goto turn_off;
2120 }
2121
2122 if (immediately) {
2123 /* If we are told to rebalance immediately, then mark a rebalance as pending (even if we area
2124 * already running one) */
2125
2126 if (m->rebalance_event_source) {
2127 r = sd_event_source_set_time(m->rebalance_event_source, 0);
2128 if (r < 0) {
2129 log_error_errno(r, "Failed to schedule immediate rebalancing: %m");
2130 goto turn_off;
2131 }
2132
2133 r = sd_event_source_set_enabled(m->rebalance_event_source, SD_EVENT_ONESHOT);
2134 if (r < 0) {
2135 log_error_errno(r, "Failed to enable rebalancing event source: %m");
2136 goto turn_off;
2137 }
2138 } else {
2139 r = sd_event_add_time(m->event, &m->rebalance_event_source, CLOCK_MONOTONIC, 0, USEC_PER_SEC, on_rebalance_timer, m);
2140 if (r < 0) {
2141 log_error_errno(r, "Failed to allocate rebalance event source: %m");
2142 goto turn_off;
2143 }
2144
2145 r = sd_event_source_set_priority(m->rebalance_event_source, SD_EVENT_PRIORITY_IDLE + 10);
2146 if (r < 0) {
2147 log_error_errno(r, "Failed to set rebalance event source priority: %m");
2148 goto turn_off;
2149 }
2150
2151 (void) sd_event_source_set_description(m->rebalance_event_source, "rebalance");
2152
2153 }
2154
2155 if (!IN_SET(m->rebalance_state, REBALANCE_PENDING, REBALANCE_SHRINKING, REBALANCE_GROWING))
2156 m->rebalance_state = REBALANCE_PENDING;
2157
2158 log_debug("Scheduled immediate rebalancing...");
49505916 2159 return 1; /* report that we scheduled something */
d357b80d
LP
2160 }
2161
2162 /* If we are told to schedule a rebalancing eventually, then do so only if we are not executing
2163 * anything yet. Also if we have something scheduled already, leave it in place */
2164 if (!IN_SET(m->rebalance_state, REBALANCE_OFF, REBALANCE_IDLE))
49505916 2165 return 1; /* report that there's already something scheduled */
d357b80d
LP
2166
2167 if (m->rebalance_event_source) {
2168 r = sd_event_source_set_time_relative(m->rebalance_event_source, m->rebalance_interval_usec);
2169 if (r < 0) {
2170 log_error_errno(r, "Failed to schedule immediate rebalancing: %m");
2171 goto turn_off;
2172 }
2173
2174 r = sd_event_source_set_enabled(m->rebalance_event_source, SD_EVENT_ONESHOT);
2175 if (r < 0) {
2176 log_error_errno(r, "Failed to enable rebalancing event source: %m");
2177 goto turn_off;
2178 }
2179 } else {
2180 r = sd_event_add_time_relative(m->event, &m->rebalance_event_source, CLOCK_MONOTONIC, m->rebalance_interval_usec, USEC_PER_SEC, on_rebalance_timer, m);
2181 if (r < 0) {
2182 log_error_errno(r, "Failed to allocate rebalance event source: %m");
2183 goto turn_off;
2184 }
2185
2186 r = sd_event_source_set_priority(m->rebalance_event_source, SD_EVENT_PRIORITY_IDLE + 10);
2187 if (r < 0) {
2188 log_error_errno(r, "Failed to set rebalance event source priority: %m");
2189 goto turn_off;
2190 }
2191
2192 (void) sd_event_source_set_description(m->rebalance_event_source, "rebalance");
2193 }
2194
2195 m->rebalance_state = REBALANCE_WAITING; /* We managed to enqueue a timer event, we now wait until it fires */
2196 log_debug("Scheduled rebalancing in %s...", FORMAT_TIMESPAN(m->rebalance_interval_usec, 0));
49505916 2197 return 1; /* report that we scheduled something */
d357b80d
LP
2198
2199turn_off:
2200 m->rebalance_event_source = sd_event_source_disable_unref(m->rebalance_event_source);
2201 m->rebalance_state = REBALANCE_OFF;
49505916 2202 manager_rebalance_reply_messages(m);
d357b80d
LP
2203 return r;
2204}
2205
2206int manager_reschedule_rebalance(Manager *m) {
2207 int r;
2208
2209 assert(m);
2210
2211 /* If a rebalance is pending reschedules it so it gets executed immediately */
2212
2213 if (!IN_SET(m->rebalance_state, REBALANCE_PENDING, REBALANCE_SHRINKING, REBALANCE_GROWING))
2214 return 0;
2215
2216 r = manager_schedule_rebalance(m, /* immediately= */ true);
2217 if (r < 0)
2218 return r;
2219
2220 return 1;
2221}