#include "dropin.h"
#include "escape.h"
#include "execute.h"
+#include "fd-util.h"
#include "fileio-label.h"
#include "format-util.h"
#include "id128-util.h"
+#include "io-util.h"
#include "load-dropin.h"
#include "load-fragment.h"
#include "log.h"
u->ref_gid = GID_INVALID;
u->cpu_usage_last = NSEC_INFINITY;
+ u->ip_accounting_ingress_map_fd = -1;
+ u->ip_accounting_egress_map_fd = -1;
+ u->ipv4_allow_map_fd = -1;
+ u->ipv6_allow_map_fd = -1;
+ u->ipv4_deny_map_fd = -1;
+ u->ipv6_deny_map_fd = -1;
+
RATELIMIT_INIT(u->start_limit, m->default_start_limit_interval, m->default_start_limit_burst);
RATELIMIT_INIT(u->auto_stop_ratelimit, 10 * USEC_PER_SEC, 16);
cc->cpu_accounting = u->manager->default_cpu_accounting;
cc->io_accounting = u->manager->default_io_accounting;
+ cc->ip_accounting = u->manager->default_ip_accounting;
cc->blockio_accounting = u->manager->default_blockio_accounting;
cc->memory_accounting = u->manager->default_memory_accounting;
cc->tasks_accounting = u->manager->default_tasks_accounting;
+ cc->ip_accounting = u->manager->default_ip_accounting;
if (u->type != UNIT_SLICE)
cc->tasks_max = u->manager->default_tasks_max;
}
ec = unit_get_exec_context(u);
- if (ec)
+ if (ec) {
exec_context_init(ec);
+ ec->keyring_mode = MANAGER_IS_SYSTEM(u->manager) ?
+ EXEC_KEYRING_PRIVATE : EXEC_KEYRING_INHERIT;
+ }
+
kc = unit_get_kill_context(u);
if (kc)
kill_context_init(kc);
}
int unit_set_description(Unit *u, const char *description) {
- char *s;
+ int r;
assert(u);
- if (isempty(description))
- s = NULL;
- else {
- s = strdup(description);
- if (!s)
- return -ENOMEM;
- }
-
- free(u->description);
- u->description = s;
+ r = free_and_strdup(&u->description, empty_to_null(description));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ unit_add_to_dbus_queue(u);
- unit_add_to_dbus_queue(u);
return 0;
}
if (u->in_gc_queue)
LIST_REMOVE(gc_queue, u->manager->gc_unit_queue, u);
- if (u->in_cgroup_queue)
- LIST_REMOVE(cgroup_queue, u->manager->cgroup_queue, u);
+ if (u->in_cgroup_realize_queue)
+ LIST_REMOVE(cgroup_realize_queue, u->manager->cgroup_realize_queue, u);
+
+ if (u->in_cgroup_empty_queue)
+ LIST_REMOVE(cgroup_empty_queue, u->manager->cgroup_empty_queue, u);
unit_release_cgroup(u);
while (u->refs)
unit_ref_unset(u->refs);
+ safe_close(u->ip_accounting_ingress_map_fd);
+ safe_close(u->ip_accounting_egress_map_fd);
+
+ safe_close(u->ipv4_allow_map_fd);
+ safe_close(u->ipv6_allow_map_fd);
+ safe_close(u->ipv4_deny_map_fd);
+ safe_close(u->ipv6_deny_map_fd);
+
+ bpf_program_unref(u->ip_bpf_ingress);
+ bpf_program_unref(u->ip_bpf_egress);
+
free(u);
}
if (!unit_type_may_alias(u->type)) /* Merging only applies to unit names that support aliases */
return -EEXIST;
- if (other->load_state != UNIT_STUB &&
- other->load_state != UNIT_NOT_FOUND)
+ if (!IN_SET(other->load_state, UNIT_STUB, UNIT_NOT_FOUND))
return -EEXIST;
if (other->job)
}
int unit_add_exec_dependencies(Unit *u, ExecContext *c) {
+ ExecDirectoryType dt;
+ char **dp;
int r;
assert(u);
return r;
}
+ for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
+ if (!u->manager->prefix[dt])
+ continue;
+
+ STRV_FOREACH(dp, c->directories[dt].paths) {
+ _cleanup_free_ char *p;
+
+ p = strjoin(u->manager->prefix[dt], "/", *dp);
+ if (!p)
+ return -ENOMEM;
+
+ r = unit_require_mounts_for(u, p);
+ if (r < 0)
+ return r;
+ }
+ }
+
if (!MANAGER_IS_SYSTEM(u->manager))
return 0;
log_struct(LOG_INFO,
LOG_MESSAGE("%s", buf),
LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
mid,
NULL);
}
unit_add_to_dbus_queue(u);
+ if (!UNIT_VTABLE(u)->reload) {
+ /* Unit doesn't have a reload function, but we need to propagate the reload anyway */
+ unit_notify(u, unit_active_state(u), unit_active_state(u), true);
+ return 0;
+ }
+
return UNIT_VTABLE(u)->reload(u);
}
bool unit_can_reload(Unit *u) {
assert(u);
- if (!UNIT_VTABLE(u)->reload)
- return false;
+ if (UNIT_VTABLE(u)->can_reload)
+ return UNIT_VTABLE(u)->can_reload(u);
- if (!UNIT_VTABLE(u)->can_reload)
+ if (!set_isempty(u->dependencies[UNIT_PROPAGATES_RELOAD_TO]))
return true;
- return UNIT_VTABLE(u)->can_reload(u);
+ return UNIT_VTABLE(u)->reload;
}
static void unit_check_unneeded(Unit *u) {
UNIT_VTABLE(other)->trigger_notify(other, u);
}
+static int unit_log_resources(Unit *u) {
+
+ struct iovec iovec[1 + _CGROUP_IP_ACCOUNTING_METRIC_MAX + 4];
+ size_t n_message_parts = 0, n_iovec = 0;
+ char* message_parts[3 + 1], *t;
+ nsec_t nsec = NSEC_INFINITY;
+ CGroupIPAccountingMetric m;
+ size_t i;
+ int r;
+ const char* const ip_fields[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
+ [CGROUP_IP_INGRESS_BYTES] = "IP_METRIC_INGRESS_BYTES",
+ [CGROUP_IP_INGRESS_PACKETS] = "IP_METRIC_INGRESS_PACKETS",
+ [CGROUP_IP_EGRESS_BYTES] = "IP_METRIC_EGRESS_BYTES",
+ [CGROUP_IP_EGRESS_PACKETS] = "IP_METRIC_EGRESS_PACKETS",
+ };
+
+ assert(u);
+
+ /* Invoked whenever a unit enters failed or dead state. Logs information about consumed resources if resource
+ * accounting was enabled for a unit. It does this in two ways: a friendly human readable string with reduced
+ * information and the complete data in structured fields. */
+
+ (void) unit_get_cpu_usage(u, &nsec);
+ if (nsec != NSEC_INFINITY) {
+ char buf[FORMAT_TIMESPAN_MAX] = "";
+
+ /* Format the CPU time for inclusion in the structured log message */
+ if (asprintf(&t, "CPU_USAGE_NSEC=%" PRIu64, nsec) < 0) {
+ r = log_oom();
+ goto finish;
+ }
+ iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
+
+ /* Format the CPU time for inclusion in the human language message string */
+ format_timespan(buf, sizeof(buf), nsec / NSEC_PER_USEC, USEC_PER_MSEC);
+ t = strjoin(n_message_parts > 0 ? "consumed " : "Consumed ", buf, " CPU time");
+ if (!t) {
+ r = log_oom();
+ goto finish;
+ }
+
+ message_parts[n_message_parts++] = t;
+ }
+
+ for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
+ char buf[FORMAT_BYTES_MAX] = "";
+ uint64_t value = UINT64_MAX;
+
+ assert(ip_fields[m]);
+
+ (void) unit_get_ip_accounting(u, m, &value);
+ if (value == UINT64_MAX)
+ continue;
+
+ /* Format IP accounting data for inclusion in the structured log message */
+ if (asprintf(&t, "%s=%" PRIu64, ip_fields[m], value) < 0) {
+ r = log_oom();
+ goto finish;
+ }
+ iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
+
+ /* Format the IP accounting data for inclusion in the human language message string, but only for the
+ * bytes counters (and not for the packets counters) */
+ if (m == CGROUP_IP_INGRESS_BYTES)
+ t = strjoin(n_message_parts > 0 ? "received " : "Received ",
+ format_bytes(buf, sizeof(buf), value),
+ " IP traffic");
+ else if (m == CGROUP_IP_EGRESS_BYTES)
+ t = strjoin(n_message_parts > 0 ? "sent " : "Sent ",
+ format_bytes(buf, sizeof(buf), value),
+ " IP traffic");
+ else
+ continue;
+ if (!t) {
+ r = log_oom();
+ goto finish;
+ }
+
+ message_parts[n_message_parts++] = t;
+ }
+
+ /* Is there any accounting data available at all? */
+ if (n_iovec == 0) {
+ r = 0;
+ goto finish;
+ }
+
+ if (n_message_parts == 0)
+ t = strjoina("MESSAGE=", u->id, ": Completed");
+ else {
+ _cleanup_free_ char *joined;
+
+ message_parts[n_message_parts] = NULL;
+
+ joined = strv_join(message_parts, ", ");
+ if (!joined) {
+ r = log_oom();
+ goto finish;
+ }
+
+ t = strjoina("MESSAGE=", u->id, ": ", joined);
+ }
+
+ /* The following four fields we allocate on the stack or are static strings, we hence don't want to free them,
+ * and hence don't increase n_iovec for them */
+ iovec[n_iovec] = IOVEC_MAKE_STRING(t);
+ iovec[n_iovec + 1] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_UNIT_RESOURCES_STR);
+
+ t = strjoina(u->manager->unit_log_field, u->id);
+ iovec[n_iovec + 2] = IOVEC_MAKE_STRING(t);
+
+ t = strjoina(u->manager->invocation_log_field, u->invocation_id_string);
+ iovec[n_iovec + 3] = IOVEC_MAKE_STRING(t);
+
+ log_struct_iovec(LOG_INFO, iovec, n_iovec + 4);
+ r = 0;
+
+finish:
+ for (i = 0; i < n_message_parts; i++)
+ free(message_parts[i]);
+
+ for (i = 0; i < n_iovec; i++)
+ free(iovec[i].iov_base);
+
+ return r;
+
+}
+
void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_success) {
Manager *m;
bool unexpected;
if (u->job->state == JOB_RUNNING) {
if (ns == UNIT_ACTIVE)
job_finish_and_invalidate(u->job, reload_success ? JOB_DONE : JOB_FAILED, true, false);
- else if (ns != UNIT_ACTIVATING && ns != UNIT_RELOADING) {
+ else if (!IN_SET(ns, UNIT_ACTIVATING, UNIT_RELOADING)) {
unexpected = true;
if (UNIT_IS_INACTIVE_OR_FAILED(ns))
check_unneeded_dependencies(u);
if (ns != os && ns == UNIT_FAILED) {
- log_unit_notice(u, "Unit entered failed state.");
+ log_unit_debug(u, "Unit entered failed state.");
unit_start_on_failure(u);
}
}
manager_send_unit_plymouth(m, u);
} else {
+ /* We don't care about D-Bus going down here, since we'll get an asynchronous notification for it
+ * anyway. */
- /* We don't care about D-Bus here, since we'll get an
- * asynchronous notification for it anyway. */
+ if (UNIT_IS_INACTIVE_OR_FAILED(ns) &&
+ !UNIT_IS_INACTIVE_OR_FAILED(os)
+ && !MANAGER_IS_RELOADING(m)) {
- if (u->type == UNIT_SERVICE &&
- UNIT_IS_INACTIVE_OR_FAILED(ns) &&
- !UNIT_IS_INACTIVE_OR_FAILED(os) &&
- !MANAGER_IS_RELOADING(m)) {
+ /* This unit just stopped/failed. */
+ if (u->type == UNIT_SERVICE) {
- /* Hmm, if there was no start record written
- * write it now, so that we always have a nice
- * pair */
- if (!u->in_audit) {
- manager_send_unit_audit(m, u, AUDIT_SERVICE_START, ns == UNIT_INACTIVE);
+ /* Hmm, if there was no start record written
+ * write it now, so that we always have a nice
+ * pair */
+ if (!u->in_audit) {
+ manager_send_unit_audit(m, u, AUDIT_SERVICE_START, ns == UNIT_INACTIVE);
- if (ns == UNIT_INACTIVE)
- manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, true);
- } else
- /* Write audit record if we have just finished shutting down */
- manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, ns == UNIT_INACTIVE);
+ if (ns == UNIT_INACTIVE)
+ manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, true);
+ } else
+ /* Write audit record if we have just finished shutting down */
+ manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, ns == UNIT_INACTIVE);
+
+ u->in_audit = false;
+ }
- u->in_audit = false;
+ /* Write a log message about consumed resources */
+ unit_log_resources(u);
}
}
return r;
}
+static const char *ip_accounting_metric_field[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
+ [CGROUP_IP_INGRESS_BYTES] = "ip-accounting-ingress-bytes",
+ [CGROUP_IP_INGRESS_PACKETS] = "ip-accounting-ingress-packets",
+ [CGROUP_IP_EGRESS_BYTES] = "ip-accounting-egress-bytes",
+ [CGROUP_IP_EGRESS_PACKETS] = "ip-accounting-egress-packets",
+};
+
int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
+ CGroupIPAccountingMetric m;
int r;
assert(u);
unit_serialize_item(u, f, "cgroup-realized", yes_no(u->cgroup_realized));
(void) unit_serialize_cgroup_mask(f, "cgroup-realized-mask", u->cgroup_realized_mask);
(void) unit_serialize_cgroup_mask(f, "cgroup-enabled-mask", u->cgroup_enabled_mask);
+ unit_serialize_item_format(u, f, "cgroup-bpf-realized", "%i", u->cgroup_bpf_state);
if (uid_is_valid(u->ref_uid))
unit_serialize_item_format(u, f, "ref-uid", UID_FMT, u->ref_uid);
bus_track_serialize(u->bus_track, f, "ref");
+ for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
+ uint64_t v;
+
+ r = unit_get_ip_accounting(u, m, &v);
+ if (r >= 0)
+ unit_serialize_item_format(u, f, ip_accounting_metric_field[m], "%" PRIu64, v);
+ }
+
if (serialize_jobs) {
if (u->job) {
fprintf(f, "job\n");
for (;;) {
char line[LINE_MAX], *l, *v;
+ CGroupIPAccountingMetric m;
size_t k;
if (!fgets(line, sizeof(line), f)) {
log_unit_debug(u, "Failed to parse cgroup-enabled-mask %s, ignoring.", v);
continue;
+ } else if (streq(l, "cgroup-bpf-realized")) {
+ int i;
+
+ r = safe_atoi(v, &i);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse cgroup BPF state %s, ignoring.", v);
+ else
+ u->cgroup_bpf_state =
+ i < 0 ? UNIT_CGROUP_BPF_INVALIDATED :
+ i > 0 ? UNIT_CGROUP_BPF_ON :
+ UNIT_CGROUP_BPF_OFF;
+
+ continue;
+
} else if (streq(l, "ref-uid")) {
uid_t uid;
continue;
}
+ /* Check if this is an IP accounting metric serialization field */
+ for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++)
+ if (streq(l, ip_accounting_metric_field[m]))
+ break;
+ if (m < _CGROUP_IP_ACCOUNTING_METRIC_MAX) {
+ uint64_t c;
+
+ r = safe_atou64(v, &c);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse IP accounting value %s, ignoring.", v);
+ else
+ u->ip_accounting_extra[m] = c;
+ continue;
+ }
+
if (unit_can_serialize(u)) {
if (rt) {
r = exec_runtime_deserialize_item(u, rt, l, v, fds);
if (!dual_timestamp_is_set(&u->state_change_timestamp))
dual_timestamp_get(&u->state_change_timestamp);
+ /* Let's make sure that everything that is deserialized also gets any potential new cgroup settings applied
+ * after we are done. For that we invalidate anything already realized, so that we can realize it again. */
+ unit_invalidate_cgroup(u, _CGROUP_MASK_ALL);
+ unit_invalidate_cgroup_bpf(u);
+
return 0;
}
return true;
if (u->job &&
- (u->job->type == JOB_START ||
- u->job->type == JOB_RELOAD_OR_START ||
- u->job->type == JOB_RESTART))
+ IN_SET(u->job->type, JOB_START, JOB_RELOAD_OR_START, JOB_RESTART))
return true;
return false;
* them. */
if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0 ||
- (detect_container() == 0 && !unit_cgroup_delegate(u)))
+ (detect_container() == 0 && !UNIT_CGROUP_BOOL(u, delegate)))
wait_for_exit = true;
if (send_sighup) {
log_struct(LOG_NOTICE,
"MESSAGE_ID=" SD_MESSAGE_OVERMOUNTING_STR,
LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
LOG_UNIT_MESSAGE(u, "Directory %s to mount over is not empty, mounting anyway.", where),
"WHERE=%s", where,
NULL);
log_struct(LOG_ERR,
"MESSAGE_ID=" SD_MESSAGE_OVERMOUNTING_STR,
LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
LOG_UNIT_MESSAGE(u, "Mount on symlink %s not allowed.", where),
"WHERE=%s", where,
NULL);
return 0;
}
-void unit_set_exec_params(Unit *s, ExecParameters *p) {
- CGroupContext *c;
+void unit_set_exec_params(Unit *u, ExecParameters *p) {
+ assert(u);
+ assert(p);
- assert(s);
- assert(s);
+ p->cgroup_path = u->cgroup_path;
+ SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, UNIT_CGROUP_BOOL(u, delegate));
+}
- p->cgroup_path = s->cgroup_path;
+int unit_fork_helper_process(Unit *u, pid_t *ret) {
+ pid_t pid;
+ int r;
+
+ assert(u);
+ assert(ret);
- c = unit_get_cgroup_context(s);
- SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, c && c->delegate);
+ /* Forks off a helper process and makes sure it is a member of the unit's cgroup. Returns == 0 in the child,
+ * and > 0 in the parent. The pid parameter is always filled in with the child's PID. */
+
+ (void) unit_realize_cgroup(u);
+
+ pid = fork();
+ if (pid < 0)
+ return -errno;
+
+ if (pid == 0) {
+
+ (void) default_signals(SIGNALS_CRASH_HANDLER, SIGNALS_IGNORE, -1);
+ (void) ignore_signals(SIGPIPE, -1);
+
+ log_close();
+ log_open();
+
+ if (u->cgroup_path) {
+ r = cg_attach_everywhere(u->manager->cgroup_supported, u->cgroup_path, 0, NULL, NULL);
+ if (r < 0) {
+ log_unit_error_errno(u, r, "Failed to join unit cgroup %s: %m", u->cgroup_path);
+ _exit(EXIT_CGROUP);
+ }
+ }
+
+ *ret = getpid_cached();
+ return 0;
+ }
+
+ *ret = pid;
+ return 1;
}