#define JOBS_IN_PROGRESS_PERIOD_USEC (USEC_PER_SEC / 3)
#define JOBS_IN_PROGRESS_PERIOD_DIVISOR 3
+/* If there are more than 1K bus messages queue across our API and direct busses, then let's not add more on top until
+ * the queue gets more empty. */
+#define MANAGER_BUS_BUSY_THRESHOLD 1024LU
+
+/* How many units and jobs to process of the bus queue before returning to the event loop. */
+#define MANAGER_BUS_MESSAGE_BUDGET 100U
+
static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
return 0;
}
+#define RTSIG_IF_AVAILABLE(signum) (signum <= SIGRTMAX ? signum : -1)
+
static int manager_setup_signals(Manager *m) {
struct sigaction sa = {
.sa_handler = SIG_DFL,
/* .. one free signal here ... */
-#if !defined(__hppa64__) && !defined(__hppa__)
- /* Apparently Linux on hppa has fewer RT
- * signals (SIGRTMAX is SIGRTMIN+25 there),
- * hence let's not try to make use of them
- * here. Since these commands are accessible
- * by different means and only really a safety
- * net, the missing functionality on hppa
- * shouldn't matter. */
-
- SIGRTMIN+26, /* systemd: set log target to journal-or-kmsg */
- SIGRTMIN+27, /* systemd: set log target to console */
- SIGRTMIN+28, /* systemd: set log target to kmsg */
- SIGRTMIN+29, /* systemd: set log target to syslog-or-kmsg (obsolete) */
+ /* Apparently Linux on hppa had fewer RT signals until v3.18,
+ * SIGRTMAX was SIGRTMIN+25, and then SIGRTMIN was lowered,
+ * see commit v3.17-7614-g1f25df2eff.
+ *
+ * We cannot unconditionally make use of those signals here,
+ * so let's use a runtime check. Since these commands are
+ * accessible by different means and only really a safety
+ * net, the missing functionality on hppa shouldn't matter.
+ */
+
+ RTSIG_IF_AVAILABLE(SIGRTMIN+26), /* systemd: set log target to journal-or-kmsg */
+ RTSIG_IF_AVAILABLE(SIGRTMIN+27), /* systemd: set log target to console */
+ RTSIG_IF_AVAILABLE(SIGRTMIN+28), /* systemd: set log target to kmsg */
+ RTSIG_IF_AVAILABLE(SIGRTMIN+29), /* systemd: set log target to syslog-or-kmsg (obsolete) */
/* ... one free signal here SIGRTMIN+30 ... */
-#endif
-1);
assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
}
int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
- Manager *m;
+ _cleanup_(manager_freep) Manager *m = NULL;
int r;
assert(_m);
m->unit_file_scope = scope;
m->exit_code = _MANAGER_EXIT_CODE_INVALID;
m->default_timer_accuracy_usec = USEC_PER_MINUTE;
+ m->default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
m->default_tasks_accounting = true;
m->default_tasks_max = UINT64_MAX;
m->default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
r = manager_default_environment(m);
if (r < 0)
- goto fail;
+ return r;
r = hashmap_ensure_allocated(&m->units, &string_hash_ops);
if (r < 0)
- goto fail;
+ return r;
r = hashmap_ensure_allocated(&m->jobs, NULL);
if (r < 0)
- goto fail;
+ return r;
- r = hashmap_ensure_allocated(&m->cgroup_unit, &string_hash_ops);
+ r = hashmap_ensure_allocated(&m->cgroup_unit, &path_hash_ops);
if (r < 0)
- goto fail;
+ return r;
r = hashmap_ensure_allocated(&m->watch_bus, &string_hash_ops);
if (r < 0)
- goto fail;
+ return r;
- r = sd_event_default(&m->event);
+ r = manager_setup_prefix(m);
if (r < 0)
- goto fail;
+ return r;
- r = manager_setup_run_queue(m);
- if (r < 0)
- goto fail;
+ m->udev = udev_new();
+ if (!m->udev)
+ return -ENOMEM;
- r = manager_setup_signals(m);
+ r = sd_event_default(&m->event);
if (r < 0)
- goto fail;
+ return r;
- r = manager_setup_cgroup(m);
+ r = manager_setup_run_queue(m);
if (r < 0)
- goto fail;
+ return r;
- r = manager_setup_time_change(m);
- if (r < 0)
- goto fail;
+ if (test_run_flags == MANAGER_TEST_RUN_MINIMAL) {
+ m->cgroup_root = strdup("");
+ if (!m->cgroup_root)
+ return -ENOMEM;
+ } else {
+ r = manager_setup_signals(m);
+ if (r < 0)
+ return r;
- r = manager_setup_sigchld_event_source(m);
- if (r < 0)
- goto fail;
+ r = manager_setup_cgroup(m);
+ if (r < 0)
+ return r;
- m->udev = udev_new();
- if (!m->udev) {
- r = -ENOMEM;
- goto fail;
- }
+ r = manager_setup_time_change(m);
+ if (r < 0)
+ return r;
- r = manager_setup_prefix(m);
- if (r < 0)
- goto fail;
+ r = manager_setup_sigchld_event_source(m);
+ if (r < 0)
+ return r;
+ }
if (MANAGER_IS_SYSTEM(m) && test_run_flags == 0) {
r = mkdir_label("/run/systemd/units", 0755);
if (r < 0 && r != -EEXIST)
- goto fail;
+ return r;
}
m->taint_usr =
/* Note that we do not set up the notify fd here. We do that after deserialization,
* since they might have gotten serialized across the reexec. */
- *_m = m;
- return 0;
+ *_m = TAKE_PTR(m);
-fail:
- manager_free(m);
- return r;
+ return 0;
}
static int manager_setup_notify(Manager *m) {
if (r < 0)
return log_error_errno(errno, "SO_PASSCRED failed: %m");
- m->notify_fd = fd;
- fd = -1;
+ m->notify_fd = TAKE_FD(fd);
log_debug("Using notification socket %s", m->notify_socket);
}
return 0;
}
-static int manager_connect_bus(Manager *m, bool reexecuting) {
- bool try_bus_connect;
- Unit *u = NULL;
-
- assert(m);
-
- if (m->test_run_flags)
- return 0;
-
- u = manager_get_unit(m, SPECIAL_DBUS_SERVICE);
-
- try_bus_connect =
- (u && SERVICE(u)->deserialized_state == SERVICE_RUNNING) &&
- (reexecuting ||
- (MANAGER_IS_USER(m) && getenv("DBUS_SESSION_BUS_ADDRESS")));
-
- /* Try to connect to the buses, if possible. */
- return bus_init(m, try_bus_connect);
-}
-
static unsigned manager_dispatch_cleanup_queue(Manager *m) {
Unit *u;
unsigned n = 0;
if (u->in_cleanup_queue)
goto bad;
- if (unit_check_gc(u))
+ if (!unit_may_gc(u))
goto good;
u->gc_marker = gc_marker + GC_OFFSET_IN_PATH;
is_bad = false;
}
+ if (u->refs_by_target) {
+ const UnitRef *ref;
+
+ LIST_FOREACH(refs_by_target, ref, u->refs_by_target) {
+ unit_gc_sweep(ref->source, gc_marker);
+
+ if (ref->source->gc_marker == gc_marker + GC_OFFSET_GOOD)
+ goto good;
+
+ if (ref->source->gc_marker != gc_marker + GC_OFFSET_BAD)
+ is_bad = false;
+ }
+ }
+
if (is_bad)
goto bad;
n++;
- if (job_check_gc(j))
+ if (!job_may_gc(j))
continue;
log_unit_debug(j->unit, "Collecting job.");
bus_done(m);
+ exec_runtime_vacuum(m);
+ hashmap_free(m->exec_runtime_by_id);
+
dynamic_user_vacuum(m, false);
hashmap_free(m->dynamic_users);
sd_event_source_unref(m->jobs_in_progress_event_source);
sd_event_source_unref(m->run_queue_event_source);
sd_event_source_unref(m->user_lookup_event_source);
+ sd_event_source_unref(m->sync_bus_names_event_source);
safe_close(m->signal_fd);
safe_close(m->notify_fd);
set_free_free(m->unit_path_cache);
- m->unit_path_cache = set_new(&string_hash_ops);
+ m->unit_path_cache = set_new(&path_hash_ops);
if (!m->unit_path_cache) {
r = -ENOMEM;
goto fail;
}
}
+static bool manager_dbus_is_running(Manager *m, bool deserialized) {
+ Unit *u;
+
+ assert(m);
+
+ /* This checks whether the dbus instance we are supposed to expose our APIs on is up. We check both the socket
+ * and the service unit. If the 'deserialized' parameter is true we'll check the deserialized state of the unit
+ * rather than the current one. */
+
+ if (m->test_run_flags != 0)
+ return false;
+
+ /* If we are in the user instance, and the env var is already set for us, then this means D-Bus is ran
+ * somewhere outside of our own logic. Let's use it */
+ if (MANAGER_IS_USER(m) && getenv("DBUS_SESSION_BUS_ADDRESS"))
+ return true;
+
+ u = manager_get_unit(m, SPECIAL_DBUS_SOCKET);
+ if (!u)
+ return false;
+ if ((deserialized ? SOCKET(u)->deserialized_state : SOCKET(u)->state) != SOCKET_RUNNING)
+ return false;
+
+ u = manager_get_unit(m, SPECIAL_DBUS_SERVICE);
+ if (!u)
+ return false;
+ if (!IN_SET((deserialized ? SERVICE(u)->deserialized_state : SERVICE(u)->state), SERVICE_RUNNING, SERVICE_RELOAD))
+ return false;
+
+ return true;
+}
+
int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
int r;
/* This shouldn't fail, except if things are really broken. */
return r;
- /* Let's connect to the bus now. */
- (void) manager_connect_bus(m, !!serialization);
+ /* Let's set up our private bus connection now, unconditionally */
+ (void) bus_init_private(m);
+
+ /* If we are in --user mode also connect to the system bus now */
+ if (MANAGER_IS_USER(m))
+ (void) bus_init_system(m);
+
+ /* Let's connect to the bus now, but only if the unit is supposed to be up */
+ if (manager_dbus_is_running(m, !!serialization)) {
+ (void) bus_init_api(m);
+
+ if (MANAGER_IS_SYSTEM(m))
+ (void) bus_init_system(m);
+ }
+ /* Now that we are connected to all possible busses, let's deserialize who is tracking us. */
(void) bus_track_coldplug(m, &m->subscribed, false, m->deserialized_subscribed);
m->deserialized_subscribed = strv_free(m->deserialized_subscribed);
/* Release any dynamic users no longer referenced */
dynamic_user_vacuum(m, true);
+ exec_runtime_vacuum(m);
+
/* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */
manager_vacuum_uid_refs(m);
manager_vacuum_gid_refs(m);
return hashmap_get(m->units, name);
}
+static int manager_dispatch_target_deps_queue(Manager *m) {
+ Unit *u;
+ unsigned k;
+ int r = 0;
+
+ static const UnitDependency deps[] = {
+ UNIT_REQUIRED_BY,
+ UNIT_REQUISITE_OF,
+ UNIT_WANTED_BY,
+ UNIT_BOUND_BY
+ };
+
+ assert(m);
+
+ while ((u = m->target_deps_queue)) {
+ assert(u->in_target_deps_queue);
+
+ LIST_REMOVE(target_deps_queue, u->manager->target_deps_queue, u);
+ u->in_target_deps_queue = false;
+
+ for (k = 0; k < ELEMENTSOF(deps); k++) {
+ Unit *target;
+ Iterator i;
+ void *v;
+
+ HASHMAP_FOREACH_KEY(v, target, u->dependencies[deps[k]], i) {
+ r = unit_add_default_target_dependency(u, target);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ return r;
+}
+
unsigned manager_dispatch_load_queue(Manager *m) {
Unit *u;
unsigned n = 0;
}
m->dispatching_load_queue = false;
+
+ /* Dispatch the units waiting for their target dependencies to be added now, as all targets that we know about
+ * should be loaded and have aliases resolved */
+ (void) manager_dispatch_target_deps_queue(m);
+
return n;
}
sd_bus_error *e,
Unit **_ret) {
+ _cleanup_(unit_freep) Unit *cleanup_ret = NULL;
Unit *ret;
UnitType t;
int r;
return 1;
}
- ret = unit_new(m, unit_vtable[t]->object_size);
+ ret = cleanup_ret = unit_new(m, unit_vtable[t]->object_size);
if (!ret)
return -ENOMEM;
if (path) {
ret->fragment_path = strdup(path);
- if (!ret->fragment_path) {
- unit_free(ret);
+ if (!ret->fragment_path)
return -ENOMEM;
- }
}
r = unit_add_name(ret, name);
- if (r < 0) {
- unit_free(ret);
+ if (r < 0)
return r;
- }
unit_add_to_load_queue(ret);
unit_add_to_dbus_queue(ret);
unit_add_to_gc_queue(ret);
*_ret = ret;
+ cleanup_ret = NULL;
return 0;
}
f = safe_fclose(f);
- *ret = dump;
- dump = NULL;
+ *ret = TAKE_PTR(dump);
return 0;
}
}
static unsigned manager_dispatch_dbus_queue(Manager *m) {
- Job *j;
+ unsigned n = 0, budget;
Unit *u;
- unsigned n = 0;
+ Job *j;
assert(m);
if (m->dispatching_dbus_queue)
return 0;
+ /* Anything to do at all? */
+ if (!m->dbus_unit_queue && !m->dbus_job_queue && !m->send_reloading_done && !m->queued_message)
+ return 0;
+
+ /* Do we have overly many messages queued at the moment? If so, let's not enqueue more on top, let's sit this
+ * cycle out, and process things in a later cycle when the queues got a bit emptier. */
+ if (manager_bus_n_queued_write(m) > MANAGER_BUS_BUSY_THRESHOLD)
+ return 0;
+
+ /* Only process a certain number of units/jobs per event loop iteration. Even if the bus queue wasn't overly
+ * full before this call we shouldn't increase it in size too wildly in one step, and we shouldn't monopolize
+ * CPU time with generating these messages. Note the difference in counting of this "budget" and the
+ * "threshold" above: the "budget" is decreased only once per generated message, regardless how many
+ * busses/direct connections it is enqueued on, while the "threshold" is applied to each queued instance of bus
+ * message, i.e. if the same message is enqueued to five busses/direct connections it will be counted five
+ * times. This difference in counting ("references" vs. "instances") is primarily a result of the fact that
+ * it's easier to implement it this way, however it also reflects the thinking that the "threshold" should put
+ * a limit on used queue memory, i.e. space, while the "budget" should put a limit on time. Also note that
+ * the "threshold" is currently chosen much higher than the "budget". */
+ budget = MANAGER_BUS_MESSAGE_BUDGET;
+
m->dispatching_dbus_queue = true;
- while ((u = m->dbus_unit_queue)) {
+ while (budget > 0 && (u = m->dbus_unit_queue)) {
+
assert(u->in_dbus_queue);
bus_unit_send_change_signal(u);
- n++;
+ n++, budget--;
}
- while ((j = m->dbus_job_queue)) {
+ while (budget > 0 && (j = m->dbus_job_queue)) {
assert(j->in_dbus_queue);
bus_job_send_change_signal(j);
- n++;
+ n++, budget--;
}
m->dispatching_dbus_queue = false;
- if (m->send_reloading_done) {
+ if (budget > 0 && m->send_reloading_done) {
m->send_reloading_done = false;
-
bus_manager_send_reloading(m, false);
+ n++, budget--;
}
- if (m->queued_message)
+ if (budget > 0 && m->queued_message) {
bus_send_queued_message(m);
+ n++;
+ }
return n;
}
if (waitid(P_ALL, 0, &si, WEXITED|WNOHANG|WNOWAIT) < 0) {
- if (errno == ECHILD)
- goto turn_off;
+ if (errno != ECHILD)
+ log_error_errno(errno, "Failed to peek for child with waitid(), ignoring: %m");
- log_error_errno(errno, "Failed to peek for child with waitid(), ignoring: %m");
- return 0;
+ goto turn_off;
}
if (si.si_pid <= 0)
case SIGCHLD:
r = sd_event_source_set_enabled(m->sigchld_event_source, SD_EVENT_ON);
if (r < 0)
- log_warning_errno(r, "Failed to enable SIGCHLD even source, ignoring: %m");
+ log_warning_errno(r, "Failed to enable SIGCHLD event source, ignoring: %m");
break;
case SIGTERM:
if (MANAGER_IS_SYSTEM(m)) {
- /* This is for compatibility with the
- * original sysvinit */
+ /* This is for compatibility with the original sysvinit */
r = verify_run_space_and_log("Refusing to reexecute");
if (r >= 0)
m->exit_code = MANAGER_REEXECUTE;
break;
case SIGWINCH:
+ /* This is a nop on non-init */
if (MANAGER_IS_SYSTEM(m))
manager_start_target(m, SPECIAL_KBREQUEST_TARGET, JOB_REPLACE);
- /* This is a nop on non-init */
break;
case SIGPWR:
+ /* This is a nop on non-init */
if (MANAGER_IS_SYSTEM(m))
manager_start_target(m, SPECIAL_SIGPWR_TARGET, JOB_REPLACE);
- /* This is a nop on non-init */
break;
- case SIGUSR1: {
- Unit *u;
-
- u = manager_get_unit(m, SPECIAL_DBUS_SERVICE);
-
- if (!u || UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u))) {
+ case SIGUSR1:
+ if (manager_dbus_is_running(m, false)) {
log_info("Trying to reconnect to bus...");
- bus_init(m, true);
- }
- if (!u || !UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(u))) {
- log_info("Loading D-Bus service...");
+ (void) bus_init_api(m);
+
+ if (MANAGER_IS_SYSTEM(m))
+ (void) bus_init_system(m);
+ } else {
+ log_info("Starting D-Bus service...");
manager_start_target(m, SPECIAL_DBUS_SERVICE, JOB_REPLACE);
}
break;
- }
case SIGUSR2: {
_cleanup_free_ char *dump = NULL;
}
if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
-
if (!IN_SET(errno, EPIPE, EAGAIN, ENOENT, ECONNREFUSED, ECONNRESET, ECONNABORTED))
log_error_errno(errno, "connect() failed: %m");
return;
manager_serialize_uid_refs(m, f);
manager_serialize_gid_refs(m, f);
+ r = exec_runtime_serialize(m, f, fds);
+ if (r < 0)
+ return r;
+
(void) fputc('\n', f);
HASHMAP_FOREACH_KEY(u, t, m->units, i) {
manager_deserialize_uid_refs_one(m, val);
else if ((val = startswith(l, "destroy-ipc-gid=")))
manager_deserialize_gid_refs_one(m, val);
+ else if ((val = startswith(l, "exec-runtime=")))
+ exec_runtime_deserialize_one(m, val, fds);
else if ((val = startswith(l, "subscribed="))) {
if (strv_extend(&m->deserialized_subscribed, val) < 0)
manager_clear_jobs_and_units(m);
lookup_paths_flush_generator(&m->lookup_paths);
lookup_paths_free(&m->lookup_paths);
+ exec_runtime_vacuum(m);
dynamic_user_vacuum(m, false);
m->uid_refs = hashmap_free(m->uid_refs);
m->gid_refs = hashmap_free(m->gid_refs);
manager_vacuum_uid_refs(m);
manager_vacuum_gid_refs(m);
- /* It might be safe to log to the journal now. */
- manager_recheck_journal(m);
-
- /* Sync current state of bus names with our set of listening units */
- if (m->api_bus)
- manager_sync_bus_names(m, m->api_bus);
+ exec_runtime_vacuum(m);
assert(m->n_reloading > 0);
m->n_reloading--;
+ /* It might be safe to log to the journal now and connect to dbus */
+ manager_recheck_journal(m);
+ manager_recheck_dbus(m);
+
+ /* Sync current state of bus names with our set of listening units */
+ q = manager_enqueue_sync_bus_names(m);
+ if (q < 0 && r >= 0)
+ r = q;
+
m->send_reloading_done = true;
return r;
return 0;
}
+void manager_recheck_dbus(Manager *m) {
+ assert(m);
+
+ /* Connects to the bus if the dbus service and socket are running. If we are running in user mode this is all
+ * it does. In system mode we'll also connect to the system bus (which will most likely just reuse the
+ * connection of the API bus). That's because the system bus after all runs as service of the system instance,
+ * while in the user instance we can assume it's already there. */
+
+ if (MANAGER_IS_RELOADING(m))
+ return; /* don't check while we are reloading… */
+
+ if (manager_dbus_is_running(m, false)) {
+ (void) bus_init_api(m);
+
+ if (MANAGER_IS_SYSTEM(m))
+ (void) bus_init_system(m);
+ } else {
+ (void) bus_done_api(m);
+
+ if (MANAGER_IS_SYSTEM(m))
+ (void) bus_done_system(m);
+ }
+}
+
static bool manager_journal_is_running(Manager *m) {
Unit *u;
assert(m);
+ if (m->test_run_flags != 0)
+ return false;
+
/* If we are the user manager we can safely assume that the journal is up */
if (!MANAGER_IS_SYSTEM(m))
return true;
u = manager_get_unit(m, SPECIAL_JOURNALD_SERVICE);
if (!u)
return false;
- if (SERVICE(u)->state != SERVICE_RUNNING)
+ if (!IN_SET(SERVICE(u)->state, SERVICE_RELOAD, SERVICE_RUNNING))
return false;
return true;
if (getpid_cached() != 1)
return;
- if (manager_journal_is_running(m)) {
-
- /* The journal is fully and entirely up? If so, let's permit logging to it, if that's configured. */
- log_set_prohibit_ipc(false);
- log_open();
- } else {
+ /* Don't check this while we are reloading, things might still change */
+ if (MANAGER_IS_RELOADING(m))
+ return;
- /* If the journal is down, don't ever log to it, otherwise we might end up deadlocking ourselves as we
- * might trigger an activation ourselves we can't fulfill */
- log_set_prohibit_ipc(true);
- log_close_journal();
- }
+ /* The journal is fully and entirely up? If so, let's permit logging to it, if that's configured. If the
+ * journal is down, don't ever log to it, otherwise we might end up deadlocking ourselves as we might trigger
+ * an activation ourselves we can't fulfill. */
+ log_set_prohibit_ipc(!manager_journal_is_running(m));
+ log_open();
}
void manager_set_show_status(Manager *m, ShowStatus mode) {
return hashmap_get(m->units_requiring_mounts_for, streq(p, "/") ? "" : p);
}
-void manager_set_exec_params(Manager *m, ExecParameters *p) {
- assert(m);
- assert(p);
-
- p->environment = m->environment;
- p->confirm_spawn = manager_get_confirm_spawn(m);
- p->cgroup_supported = m->cgroup_supported;
- p->prefix = m->prefix;
-
- SET_FLAG(p->flags, EXEC_PASS_LOG_UNIT|EXEC_CHOWN_DIRECTORIES, MANAGER_IS_SYSTEM(m));
-}
-
int manager_update_failed_units(Manager *m, Unit *u, bool failed) {
unsigned size;
int r;