]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/core/manager.c
tree-wide: use TAKE_PTR() and TAKE_FD() macros
[thirdparty/systemd.git] / src / core / manager.c
index 8ae70b648c6c23e51003ab33efd4448ac44c5201..ff6761e1616baa0dbd69c23cf69a96b155329ad0 100644 (file)
 #define JOBS_IN_PROGRESS_PERIOD_USEC (USEC_PER_SEC / 3)
 #define JOBS_IN_PROGRESS_PERIOD_DIVISOR 3
 
+/* If there are more than 1K bus messages queue across our API and direct busses, then let's not add more on top until
+ * the queue gets more empty. */
+#define MANAGER_BUS_BUSY_THRESHOLD 1024LU
+
+/* How many units and jobs to process of the bus queue before returning to the event loop. */
+#define MANAGER_BUS_MESSAGE_BUDGET 100U
+
 static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
 static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
 static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
@@ -419,6 +426,8 @@ static int enable_special_signals(Manager *m) {
         return 0;
 }
 
+#define RTSIG_IF_AVAILABLE(signum) (signum <= SIGRTMAX ? signum : -1)
+
 static int manager_setup_signals(Manager *m) {
         struct sigaction sa = {
                 .sa_handler = SIG_DFL,
@@ -472,22 +481,22 @@ static int manager_setup_signals(Manager *m) {
 
                         /* .. one free signal here ... */
 
-#if !defined(__hppa64__) && !defined(__hppa__)
-                        /* Apparently Linux on hppa has fewer RT
-                         * signals (SIGRTMAX is SIGRTMIN+25 there),
-                         * hence let's not try to make use of them
-                         * here. Since these commands are accessible
-                         * by different means and only really a safety
-                         * net, the missing functionality on hppa
-                         * shouldn't matter. */
-
-                        SIGRTMIN+26, /* systemd: set log target to journal-or-kmsg */
-                        SIGRTMIN+27, /* systemd: set log target to console */
-                        SIGRTMIN+28, /* systemd: set log target to kmsg */
-                        SIGRTMIN+29, /* systemd: set log target to syslog-or-kmsg (obsolete) */
+                        /* Apparently Linux on hppa had fewer RT signals until v3.18,
+                         * SIGRTMAX was SIGRTMIN+25, and then SIGRTMIN was lowered,
+                         * see commit v3.17-7614-g1f25df2eff.
+                         *
+                         * We cannot unconditionally make use of those signals here,
+                         * so let's use a runtime check. Since these commands are
+                         * accessible by different means and only really a safety
+                         * net, the missing functionality on hppa shouldn't matter.
+                         */
+
+                        RTSIG_IF_AVAILABLE(SIGRTMIN+26), /* systemd: set log target to journal-or-kmsg */
+                        RTSIG_IF_AVAILABLE(SIGRTMIN+27), /* systemd: set log target to console */
+                        RTSIG_IF_AVAILABLE(SIGRTMIN+28), /* systemd: set log target to kmsg */
+                        RTSIG_IF_AVAILABLE(SIGRTMIN+29), /* systemd: set log target to syslog-or-kmsg (obsolete) */
 
                         /* ... one free signal here SIGRTMIN+30 ... */
-#endif
                         -1);
         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
 
@@ -660,7 +669,7 @@ static int manager_setup_sigchld_event_source(Manager *m) {
 }
 
 int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
-        Manager *m;
+        _cleanup_(manager_freep) Manager *m = NULL;
         int r;
 
         assert(_m);
@@ -673,6 +682,7 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
         m->unit_file_scope = scope;
         m->exit_code = _MANAGER_EXIT_CODE_INVALID;
         m->default_timer_accuracy_usec = USEC_PER_MINUTE;
+        m->default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
         m->default_tasks_accounting = true;
         m->default_tasks_max = UINT64_MAX;
         m->default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
@@ -721,62 +731,66 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
 
         r = manager_default_environment(m);
         if (r < 0)
-                goto fail;
+                return r;
 
         r = hashmap_ensure_allocated(&m->units, &string_hash_ops);
         if (r < 0)
-                goto fail;
+                return r;
 
         r = hashmap_ensure_allocated(&m->jobs, NULL);
         if (r < 0)
-                goto fail;
+                return r;
 
-        r = hashmap_ensure_allocated(&m->cgroup_unit, &string_hash_ops);
+        r = hashmap_ensure_allocated(&m->cgroup_unit, &path_hash_ops);
         if (r < 0)
-                goto fail;
+                return r;
 
         r = hashmap_ensure_allocated(&m->watch_bus, &string_hash_ops);
         if (r < 0)
-                goto fail;
+                return r;
 
-        r = sd_event_default(&m->event);
+        r = manager_setup_prefix(m);
         if (r < 0)
-                goto fail;
+                return r;
 
-        r = manager_setup_run_queue(m);
-        if (r < 0)
-                goto fail;
+        m->udev = udev_new();
+        if (!m->udev)
+                return -ENOMEM;
 
-        r = manager_setup_signals(m);
+        r = sd_event_default(&m->event);
         if (r < 0)
-                goto fail;
+                return r;
 
-        r = manager_setup_cgroup(m);
+        r = manager_setup_run_queue(m);
         if (r < 0)
-                goto fail;
+                return r;
 
-        r = manager_setup_time_change(m);
-        if (r < 0)
-                goto fail;
+        if (test_run_flags == MANAGER_TEST_RUN_MINIMAL) {
+                m->cgroup_root = strdup("");
+                if (!m->cgroup_root)
+                        return -ENOMEM;
+        } else {
+                r = manager_setup_signals(m);
+                if (r < 0)
+                        return r;
 
-        r = manager_setup_sigchld_event_source(m);
-        if (r < 0)
-                goto fail;
+                r = manager_setup_cgroup(m);
+                if (r < 0)
+                        return r;
 
-        m->udev = udev_new();
-        if (!m->udev) {
-                r = -ENOMEM;
-                goto fail;
-        }
+                r = manager_setup_time_change(m);
+                if (r < 0)
+                        return r;
 
-        r = manager_setup_prefix(m);
-        if (r < 0)
-                goto fail;
+                r = manager_setup_sigchld_event_source(m);
+                if (r < 0)
+                        return r;
+        }
 
         if (MANAGER_IS_SYSTEM(m) && test_run_flags == 0) {
                 r = mkdir_label("/run/systemd/units", 0755);
                 if (r < 0 && r != -EEXIST)
-                        goto fail;
+                        return r;
         }
 
         m->taint_usr =
@@ -786,12 +800,9 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
         /* Note that we do not set up the notify fd here. We do that after deserialization,
          * since they might have gotten serialized across the reexec. */
 
-        *_m = m;
-        return 0;
+        *_m = TAKE_PTR(m);
 
-fail:
-        manager_free(m);
-        return r;
+        return 0;
 }
 
 static int manager_setup_notify(Manager *m) {
@@ -833,8 +844,7 @@ static int manager_setup_notify(Manager *m) {
                 if (r < 0)
                         return log_error_errno(errno, "SO_PASSCRED failed: %m");
 
-                m->notify_fd = fd;
-                fd = -1;
+                m->notify_fd = TAKE_FD(fd);
 
                 log_debug("Using notification socket %s", m->notify_socket);
         }
@@ -1037,7 +1047,7 @@ static void unit_gc_sweep(Unit *u, unsigned gc_marker) {
         if (u->in_cleanup_queue)
                 goto bad;
 
-        if (unit_check_gc(u))
+        if (!unit_may_gc(u))
                 goto good;
 
         u->gc_marker = gc_marker + GC_OFFSET_IN_PATH;
@@ -1054,6 +1064,20 @@ static void unit_gc_sweep(Unit *u, unsigned gc_marker) {
                         is_bad = false;
         }
 
+        if (u->refs_by_target) {
+                const UnitRef *ref;
+
+                LIST_FOREACH(refs_by_target, ref, u->refs_by_target) {
+                        unit_gc_sweep(ref->source, gc_marker);
+
+                        if (ref->source->gc_marker == gc_marker + GC_OFFSET_GOOD)
+                                goto good;
+
+                        if (ref->source->gc_marker != gc_marker + GC_OFFSET_BAD)
+                                is_bad = false;
+                }
+        }
+
         if (is_bad)
                 goto bad;
 
@@ -1124,7 +1148,7 @@ static unsigned manager_dispatch_gc_job_queue(Manager *m) {
 
                 n++;
 
-                if (job_check_gc(j))
+                if (!job_may_gc(j))
                         continue;
 
                 log_unit_debug(j->unit, "Collecting job.");
@@ -1203,6 +1227,7 @@ Manager* manager_free(Manager *m) {
         sd_event_source_unref(m->jobs_in_progress_event_source);
         sd_event_source_unref(m->run_queue_event_source);
         sd_event_source_unref(m->user_lookup_event_source);
+        sd_event_source_unref(m->sync_bus_names_event_source);
 
         safe_close(m->signal_fd);
         safe_close(m->notify_fd);
@@ -1294,7 +1319,7 @@ static void manager_build_unit_path_cache(Manager *m) {
 
         set_free_free(m->unit_path_cache);
 
-        m->unit_path_cache = set_new(&string_hash_ops);
+        m->unit_path_cache = set_new(&path_hash_ops);
         if (!m->unit_path_cache) {
                 r = -ENOMEM;
                 goto fail;
@@ -1645,6 +1670,42 @@ Unit *manager_get_unit(Manager *m, const char *name) {
         return hashmap_get(m->units, name);
 }
 
+static int manager_dispatch_target_deps_queue(Manager *m) {
+        Unit *u;
+        unsigned k;
+        int r = 0;
+
+        static const UnitDependency deps[] = {
+                UNIT_REQUIRED_BY,
+                UNIT_REQUISITE_OF,
+                UNIT_WANTED_BY,
+                UNIT_BOUND_BY
+        };
+
+        assert(m);
+
+        while ((u = m->target_deps_queue)) {
+                assert(u->in_target_deps_queue);
+
+                LIST_REMOVE(target_deps_queue, u->manager->target_deps_queue, u);
+                u->in_target_deps_queue = false;
+
+                for (k = 0; k < ELEMENTSOF(deps); k++) {
+                        Unit *target;
+                        Iterator i;
+                        void *v;
+
+                        HASHMAP_FOREACH_KEY(v, target, u->dependencies[deps[k]], i) {
+                                r = unit_add_default_target_dependency(u, target);
+                                if (r < 0)
+                                        return r;
+                        }
+                }
+        }
+
+        return r;
+}
+
 unsigned manager_dispatch_load_queue(Manager *m) {
         Unit *u;
         unsigned n = 0;
@@ -1668,6 +1729,11 @@ unsigned manager_dispatch_load_queue(Manager *m) {
         }
 
         m->dispatching_load_queue = false;
+
+        /* Dispatch the units waiting for their target dependencies to be added now, as all targets that we know about
+         * should be loaded and have aliases resolved */
+        (void) manager_dispatch_target_deps_queue(m);
+
         return n;
 }
 
@@ -1678,6 +1744,7 @@ int manager_load_unit_prepare(
                 sd_bus_error *e,
                 Unit **_ret) {
 
+        _cleanup_(unit_freep) Unit *cleanup_ret = NULL;
         Unit *ret;
         UnitType t;
         int r;
@@ -1710,29 +1777,26 @@ int manager_load_unit_prepare(
                 return 1;
         }
 
-        ret = unit_new(m, unit_vtable[t]->object_size);
+        ret = cleanup_ret = unit_new(m, unit_vtable[t]->object_size);
         if (!ret)
                 return -ENOMEM;
 
         if (path) {
                 ret->fragment_path = strdup(path);
-                if (!ret->fragment_path) {
-                        unit_free(ret);
+                if (!ret->fragment_path)
                         return -ENOMEM;
-                }
         }
 
         r = unit_add_name(ret, name);
-        if (r < 0) {
-                unit_free(ret);
+        if (r < 0)
                 return r;
-        }
 
         unit_add_to_load_queue(ret);
         unit_add_to_dbus_queue(ret);
         unit_add_to_gc_queue(ret);
 
         *_ret = ret;
+        cleanup_ret = NULL;
 
         return 0;
 }
@@ -1830,8 +1894,7 @@ int manager_get_dump_string(Manager *m, char **ret) {
 
         f = safe_fclose(f);
 
-        *ret = dump;
-        dump = NULL;
+        *ret = TAKE_PTR(dump);
 
         return 0;
 }
@@ -1870,41 +1933,65 @@ static int manager_dispatch_run_queue(sd_event_source *source, void *userdata) {
 }
 
 static unsigned manager_dispatch_dbus_queue(Manager *m) {
-        Job *j;
+        unsigned n = 0, budget;
         Unit *u;
-        unsigned n = 0;
+        Job *j;
 
         assert(m);
 
         if (m->dispatching_dbus_queue)
                 return 0;
 
+        /* Anything to do at all? */
+        if (!m->dbus_unit_queue && !m->dbus_job_queue && !m->send_reloading_done && !m->queued_message)
+                return 0;
+
+        /* Do we have overly many messages queued at the moment? If so, let's not enqueue more on top, let's sit this
+         * cycle out, and process things in a later cycle when the queues got a bit emptier. */
+        if (manager_bus_n_queued_write(m) > MANAGER_BUS_BUSY_THRESHOLD)
+                return 0;
+
+        /* Only process a certain number of units/jobs per event loop iteration. Even if the bus queue wasn't overly
+         * full before this call we shouldn't increase it in size too wildly in one step, and we shouldn't monopolize
+         * CPU time with generating these messages. Note the difference in counting of this "budget" and the
+         * "threshold" above: the "budget" is decreased only once per generated message, regardless how many
+         * busses/direct connections it is enqueued on, while the "threshold" is applied to each queued instance of bus
+         * message, i.e. if the same message is enqueued to five busses/direct connections it will be counted five
+         * times. This difference in counting ("references" vs. "instances") is primarily a result of the fact that
+         * it's easier to implement it this way, however it also reflects the thinking that the "threshold" should put
+         * a limit on used queue memory, i.e. space, while the "budget" should put a limit on time. Also note that
+         * the "threshold" is currently chosen much higher than the "budget". */
+        budget = MANAGER_BUS_MESSAGE_BUDGET;
+
         m->dispatching_dbus_queue = true;
 
-        while ((u = m->dbus_unit_queue)) {
+        while (budget > 0 && (u = m->dbus_unit_queue)) {
+
                 assert(u->in_dbus_queue);
 
                 bus_unit_send_change_signal(u);
-                n++;
+                n++, budget--;
         }
 
-        while ((j = m->dbus_job_queue)) {
+        while (budget > 0 && (j = m->dbus_job_queue)) {
                 assert(j->in_dbus_queue);
 
                 bus_job_send_change_signal(j);
-                n++;
+                n++, budget--;
         }
 
         m->dispatching_dbus_queue = false;
 
-        if (m->send_reloading_done) {
+        if (budget > 0 && m->send_reloading_done) {
                 m->send_reloading_done = false;
-
                 bus_manager_send_reloading(m, false);
+                n++, budget--;
         }
 
-        if (m->queued_message)
+        if (budget > 0 && m->queued_message) {
                 bus_send_queued_message(m);
+                n++;
+        }
 
         return n;
 }
@@ -2146,11 +2233,10 @@ static int manager_dispatch_sigchld(sd_event_source *source, void *userdata) {
 
         if (waitid(P_ALL, 0, &si, WEXITED|WNOHANG|WNOWAIT) < 0) {
 
-                if (errno == ECHILD)
-                        goto turn_off;
+                if (errno != ECHILD)
+                        log_error_errno(errno, "Failed to peek for child with waitid(), ignoring: %m");
 
-                log_error_errno(errno, "Failed to peek for child with waitid(), ignoring: %m");
-                return 0;
+                goto turn_off;
         }
 
         if (si.si_pid <= 0)
@@ -2283,14 +2369,13 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
         case SIGCHLD:
                 r = sd_event_source_set_enabled(m->sigchld_event_source, SD_EVENT_ON);
                 if (r < 0)
-                        log_warning_errno(r, "Failed to enable SIGCHLD even source, ignoring: %m");
+                        log_warning_errno(r, "Failed to enable SIGCHLD event source, ignoring: %m");
 
                 break;
 
         case SIGTERM:
                 if (MANAGER_IS_SYSTEM(m)) {
-                        /* This is for compatibility with the
-                         * original sysvinit */
+                        /* This is for compatibility with the original sysvinit */
                         r = verify_run_space_and_log("Refusing to reexecute");
                         if (r >= 0)
                                 m->exit_code = MANAGER_REEXECUTE;
@@ -2307,21 +2392,20 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
                 break;
 
         case SIGWINCH:
+                /* This is a nop on non-init */
                 if (MANAGER_IS_SYSTEM(m))
                         manager_start_target(m, SPECIAL_KBREQUEST_TARGET, JOB_REPLACE);
 
-                /* This is a nop on non-init */
                 break;
 
         case SIGPWR:
+                /* This is a nop on non-init */
                 if (MANAGER_IS_SYSTEM(m))
                         manager_start_target(m, SPECIAL_SIGPWR_TARGET, JOB_REPLACE);
 
-                /* This is a nop on non-init */
                 break;
 
         case SIGUSR1:
-
                 if (manager_dbus_is_running(m, false)) {
                         log_info("Trying to reconnect to bus...");
 
@@ -2710,7 +2794,6 @@ void manager_send_unit_plymouth(Manager *m, Unit *u) {
         }
 
         if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
-
                 if (!IN_SET(errno, EPIPE, EAGAIN, ENOENT, ECONNREFUSED, ECONNRESET, ECONNABORTED))
                         log_error_errno(errno, "connect() failed: %m");
                 return;
@@ -3177,16 +3260,17 @@ int manager_reload(Manager *m) {
 
         exec_runtime_vacuum(m);
 
+        assert(m->n_reloading > 0);
+        m->n_reloading--;
+
         /* It might be safe to log to the journal now and connect to dbus */
         manager_recheck_journal(m);
         manager_recheck_dbus(m);
 
         /* Sync current state of bus names with our set of listening units */
-        if (m->api_bus)
-                manager_sync_bus_names(m, m->api_bus);
-
-        assert(m->n_reloading > 0);
-        m->n_reloading--;
+        q = manager_enqueue_sync_bus_names(m);
+        if (q < 0 && r >= 0)
+                r = q;
 
         m->send_reloading_done = true;
 
@@ -3551,6 +3635,9 @@ void manager_recheck_dbus(Manager *m) {
          * connection of the API bus). That's because the system bus after all runs as service of the system instance,
          * while in the user instance we can assume it's already there. */
 
+        if (MANAGER_IS_RELOADING(m))
+                return; /* don't check while we are reloading… */
+
         if (manager_dbus_is_running(m, false)) {
                 (void) bus_init_api(m);
 
@@ -3569,6 +3656,9 @@ static bool manager_journal_is_running(Manager *m) {
 
         assert(m);
 
+        if (m->test_run_flags != 0)
+                return false;
+
         /* If we are the user manager we can safely assume that the journal is up */
         if (!MANAGER_IS_SYSTEM(m))
                 return true;
@@ -3584,7 +3674,7 @@ static bool manager_journal_is_running(Manager *m) {
         u = manager_get_unit(m, SPECIAL_JOURNALD_SERVICE);
         if (!u)
                 return false;
-        if (SERVICE(u)->state != SERVICE_RUNNING)
+        if (!IN_SET(SERVICE(u)->state, SERVICE_RELOAD, SERVICE_RUNNING))
                 return false;
 
         return true;
@@ -3598,16 +3688,14 @@ void manager_recheck_journal(Manager *m) {
         if (getpid_cached() != 1)
                 return;
 
-        if (manager_journal_is_running(m)) {
-
-                /* The journal is fully and entirely up? If so, let's permit logging to it, if that's configured. */
-                log_set_prohibit_ipc(false);
-        } else {
+        /* Don't check this while we are reloading, things might still change */
+        if (MANAGER_IS_RELOADING(m))
+                return;
 
-                /* If the journal is down, don't ever log to it, otherwise we might end up deadlocking ourselves as we
-                 * might trigger an activation ourselves we can't fulfill */
-                log_set_prohibit_ipc(true);
-        }
+        /* The journal is fully and entirely up? If so, let's permit logging to it, if that's configured. If the
+         * journal is down, don't ever log to it, otherwise we might end up deadlocking ourselves as we might trigger
+         * an activation ourselves we can't fulfill. */
+        log_set_prohibit_ipc(!manager_journal_is_running(m));
         log_open();
 }