From: Yaping Li <202858510+YapingLi04@users.noreply.github.com> Date: Tue, 21 Oct 2025 01:25:10 +0000 (-0700) Subject: [metrics] Add basic system wide and per unit metrics X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bb1ef2edf7d62de35291702635067ee85f09bad5;p=thirdparty%2Fsystemd.git [metrics] Add basic system wide and per unit metrics This commit adds some basic metrics and integration tests. System wide metrics: - units_by_type_total: target/device/automount etc. - units_by_state_total: active/reloading/inactive etc. Two per unit metrics which shows the current state of a unit: - unit_active_state - unit_load_state A metric for service state: - nrestarts Here are some sample outputs: units_by_type_total: { "name" : "io.systemd.Manager.units_by_type_total", "value" : 52, "fields" : { "type" : "target" } } { "name" : "io.systemd.Manager.units_by_type_total", "value" : 82, "fields" : { "type" : "device" } } { "name" : "io.systemd.Manager.units_by_type_total", "value" : 2, "fields" : { "type" : "automount" } } units_by_state_total: { "name" : "io.systemd.Manager.units_by_state_total", "value" : 216, "fields" : { "state" : "active" } } { "name" : "io.systemd.Manager.units_by_state_total", "value" : 0, "fields" : { "state" : "reloading" } } { "name" : "io.systemd.Manager.units_by_state_total", "value" : 120, "fields" : { "state" : "inactive" } } unit_active_state: { "name" : "io.systemd.Manager.unit_active_state", "object" : "multi-user.target", "value" : "active" } { "name" : "io.systemd.Manager.unit_active_state", "object" : "systemd-sysusers.service", "value" : "inactive" } unit_load_state: { "name" : "io.systemd.Manager.unit_load_state", "object" : "multi-user.target", "value" : "loaded" } nrestarts: { "name" : "io.systemd.Manager.nrestarts", "object" : "user@0.service", "value" : 0 } { "name" : "io.systemd.Manager.nrestarts", "object" : "user-runtime-dir@0.service", "value" : 0 } --- diff --git a/src/core/manager.h b/src/core/manager.h index c764f411086..2df606005db 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -469,6 +469,8 @@ typedef struct Manager { * systemd-oomd to report changes in ManagedOOM settings (systemd client - oomd server). */ sd_varlink *managed_oom_varlink; + sd_varlink_server *metrics_varlink_server; + /* Reference to RestrictFileSystems= BPF program */ struct restrict_fs_bpf *restrict_fs; diff --git a/src/core/meson.build b/src/core/meson.build index 4f20cae2ee3..e703cc37289 100644 --- a/src/core/meson.build +++ b/src/core/meson.build @@ -69,6 +69,7 @@ libcore_sources = files( 'varlink-dynamic-user.c', 'varlink-execute.c', 'varlink-manager.c', + 'varlink-metrics.c', 'varlink-unit.c', ) diff --git a/src/core/varlink-metrics.c b/src/core/varlink-metrics.c new file mode 100644 index 00000000000..c64d850367b --- /dev/null +++ b/src/core/varlink-metrics.c @@ -0,0 +1,173 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "hashmap.h" +#include "manager.h" +#include "metrics.h" +#include "service.h" +#include "unit-def.h" +#include "unit.h" +#include "varlink-metrics.h" + +static int unit_active_state_build_json(MetricFamilyContext *context, void *userdata) { + Manager *manager = ASSERT_PTR(userdata); + Unit *unit; + char *key; + int r; + + assert(context); + + HASHMAP_FOREACH_KEY(unit, key, manager->units) { + /* ignore aliases */ + if (key != unit->id) + continue; + + r = metric_build_send_string( + context, + unit->id, + unit_active_state_to_string(unit_active_state(unit)), + /* field_pairs= */ NULL); + if (r < 0) + return r; + } + + return 0; +} + +static int unit_load_state_build_json(MetricFamilyContext *context, void *userdata) { + Manager *manager = ASSERT_PTR(userdata); + Unit *unit; + char *key; + int r; + + assert(context); + + HASHMAP_FOREACH_KEY(unit, key, manager->units) { + /* ignore aliases */ + if (key != unit->id) + continue; + + r = metric_build_send_string( + context, + unit->id, + unit_load_state_to_string(unit->load_state), + /* field_pairs= */ NULL); + if (r < 0) + return r; + } + + return 0; +} + +static int nrestarts_build_json(MetricFamilyContext *context, void *userdata) { + Manager *manager = ASSERT_PTR(userdata); + int r; + + assert(context); + + LIST_FOREACH(units_by_type, unit, manager->units_by_type[UNIT_SERVICE]) { + r = metric_build_send_unsigned( + context, unit->id, SERVICE(unit)->n_restarts, /* field_pairs= */ NULL); + if (r < 0) + return r; + } + + return 0; +} + +static int units_by_type_total_build_json(MetricFamilyContext *context, void *userdata) { + Manager *manager = ASSERT_PTR(userdata); + int r; + + assert(context); + + for (UnitType type = 0; type < _UNIT_TYPE_MAX; type++) { + uint64_t counter = 0; + + LIST_FOREACH(units_by_type, _u, manager->units_by_type[type]) + counter++; + + r = metric_build_send_unsigned( + context, + /* object= */ NULL, + counter, + STRV_MAKE("type", unit_type_to_string(type))); + if (r < 0) + return r; + } + + return 0; +} + +static int units_by_state_total_build_json(MetricFamilyContext *context, void *userdata) { + Manager *manager = ASSERT_PTR(userdata); + UnitActiveState counters[_UNIT_ACTIVE_STATE_MAX] = {}; + Unit *unit; + char *key; + int r; + + assert(context); + + /* TODO need a rework probably with state counter */ + HASHMAP_FOREACH_KEY(unit, key, manager->units) { + /* ignore aliases */ + if (key != unit->id) + continue; + + counters[unit_active_state(unit)]++; + } + + for (UnitActiveState state = 0; state < _UNIT_ACTIVE_STATE_MAX; state++) { + r = metric_build_send_unsigned( + context, + /* object= */ NULL, + counters[state], + STRV_MAKE("state", unit_active_state_to_string(state))); + if (r < 0) + return r; + } + + return 0; +} + +const MetricFamily metric_family_table[] = { + // Keep metrics ordered alphabetically + { + .name = METRIC_IO_SYSTEMD_MANAGER_PREFIX "nrestarts", + .description = "Per unit metric: number of restarts", + .type = METRIC_FAMILY_TYPE_COUNTER, + .generate_cb = nrestarts_build_json, + }, + { + .name = METRIC_IO_SYSTEMD_MANAGER_PREFIX "unit_active_state", + .description = "Per unit metric: active state", + .type = METRIC_FAMILY_TYPE_STRING, + .generate_cb = unit_active_state_build_json, + }, + { + .name = METRIC_IO_SYSTEMD_MANAGER_PREFIX "unit_load_state", + .description = "Per unit metric: load state", + .type = METRIC_FAMILY_TYPE_STRING, + .generate_cb = unit_load_state_build_json, + }, + { + .name = METRIC_IO_SYSTEMD_MANAGER_PREFIX "units_by_state_total", + .description = "Total number of units of different state", + .type = METRIC_FAMILY_TYPE_GAUGE, + .generate_cb = units_by_state_total_build_json, + }, + { + .name = METRIC_IO_SYSTEMD_MANAGER_PREFIX "units_by_type_total", + .description = "Total number of units of different types", + .type = METRIC_FAMILY_TYPE_GAUGE, + .generate_cb = units_by_type_total_build_json, + }, + {} +}; + +int vl_method_describe(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { + return metrics_method_describe(metric_family_table, link, parameters, flags, userdata); +} + +int vl_method_list(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { + return metrics_method_list(metric_family_table, link, parameters, flags, userdata); +} diff --git a/src/core/varlink-metrics.h b/src/core/varlink-metrics.h new file mode 100644 index 00000000000..92e7d81cf86 --- /dev/null +++ b/src/core/varlink-metrics.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-varlink.h" +#include "sd-json.h" + +#define METRIC_IO_SYSTEMD_MANAGER_PREFIX "io.systemd.Manager." + +int vl_method_list(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata); +int vl_method_describe(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata); diff --git a/src/core/varlink.c b/src/core/varlink.c index 605673ef197..b5b00cdf740 100644 --- a/src/core/varlink.c +++ b/src/core/varlink.c @@ -5,6 +5,7 @@ #include "constants.h" #include "errno-util.h" #include "manager.h" +#include "metrics.h" #include "path-util.h" #include "pidref.h" #include "string-util.h" @@ -18,6 +19,7 @@ #include "varlink-io.systemd.UserDatabase.h" #include "varlink-io.systemd.service.h" #include "varlink-manager.h" +#include "varlink-metrics.h" #include "varlink-serialize.h" #include "varlink-unit.h" #include "varlink-util.h" @@ -423,8 +425,26 @@ int manager_setup_varlink_server(Manager *m) { return 1; } +static int manager_setup_varlink_metrics_server(Manager *m) { + sd_varlink_server_flags_t flags = SD_VARLINK_SERVER_INHERIT_USERDATA; + int r; + + assert(m); + + if (MANAGER_IS_SYSTEM(m)) + flags |= SD_VARLINK_SERVER_ACCOUNT_UID; + + r = metrics_setup_varlink_server( + &m->metrics_varlink_server, flags, m->event, vl_method_list, vl_method_describe, m); + if (r < 0) + return r; + + return 0; +} + static int manager_varlink_init_system(Manager *m) { int r; + _cleanup_free_ char *metrics_address = NULL; assert(m); @@ -433,16 +453,29 @@ static int manager_varlink_init_system(Manager *m) { return log_error_errno(r, "Failed to set up varlink server: %m"); bool fresh = r > 0; + r = manager_setup_varlink_metrics_server(m); + if (r < 0) + return log_error_errno(r, "Failed to set up metrics varlink server: %m"); + bool metrics_fresh = r > 0; + + r = runtime_directory_generic(m->runtime_scope, "systemd/report/io.systemd.Manager", &metrics_address); + if (r < 0) + return r; + if (!MANAGER_IS_TEST_RUN(m)) { FOREACH_STRING(address, "/run/systemd/userdb/io.systemd.DynamicUser", VARLINK_PATH_MANAGED_OOM_SYSTEM, - "/run/systemd/io.systemd.Manager") { + "/run/systemd/io.systemd.Manager", + metrics_address) { + + sd_varlink_server *server = streq(address, metrics_address) ? m->metrics_varlink_server : m->varlink_server; + fresh = streq(address, metrics_address) ? metrics_fresh : fresh; /* We might have got sockets through deserialization. Do not bind to them twice. */ - if (!fresh && varlink_server_contains_socket(m->varlink_server, address)) + if (!fresh && varlink_server_contains_socket(server, address)) continue; - r = sd_varlink_server_listen_address(m->varlink_server, address, 0666 | SD_VARLINK_SERVER_MODE_MKDIR_0755); + r = sd_varlink_server_listen_address(server, address, 0666 | SD_VARLINK_SERVER_MODE_MKDIR_0755); if (r < 0) return log_error_errno(r, "Failed to bind to varlink socket '%s': %m", address); } @@ -479,6 +512,10 @@ static int manager_varlink_init_user(Manager *m) { return log_error_errno(r, "Failed to bind to varlink socket '%s': %m", address); } + r = manager_setup_varlink_metrics_server(m); + if (r < 0) + return log_error_errno(r, "Failed to set up metrics varlink server: %m"); + return manager_varlink_managed_oom_connect(m); } @@ -497,6 +534,7 @@ void manager_varlink_done(Manager *m) { m->varlink_server = sd_varlink_server_unref(m->varlink_server); m->managed_oom_varlink = sd_varlink_close_unref(m->managed_oom_varlink); + m->metrics_varlink_server = sd_varlink_server_unref(m->metrics_varlink_server); } void manager_varlink_send_pending_reload_message(Manager *m) { diff --git a/test/units/TEST-74-AUX-UTILS.varlinkctl.sh b/test/units/TEST-74-AUX-UTILS.varlinkctl.sh index c7607ce7c45..f321b46fc22 100755 --- a/test/units/TEST-74-AUX-UTILS.varlinkctl.sh +++ b/test/units/TEST-74-AUX-UTILS.varlinkctl.sh @@ -210,6 +210,18 @@ varlinkctl call /run/systemd/io.systemd.Manager io.systemd.Unit.List '{"cgroup": invocation_id="$(systemctl show -P InvocationID systemd-journald.service)" varlinkctl call /run/systemd/io.systemd.Manager io.systemd.Unit.List "{\"invocationID\": \"$invocation_id\"}" +# test io.systemd.Metrics +varlinkctl info /run/systemd/report/io.systemd.Manager + +varlinkctl list-methods /run/systemd/report/io.systemd.Manager +varlinkctl list-methods -j /run/systemd/report/io.systemd.Manager io.systemd.Metrics | jq . + +varlinkctl introspect /run/systemd/report/io.systemd.Manager +varlinkctl introspect -j /run/systemd/report/io.systemd.Manager io.systemd.Metrics | jq . + +varlinkctl --more call /run/systemd/report/io.systemd.Manager io.systemd.Metrics.List {} +varlinkctl --more call /run/systemd/report/io.systemd.Manager io.systemd.Metrics.Describe {} + # test io.systemd.Manager in user manager testuser_uid=$(id -u testuser) systemd-run --wait --pipe --user --machine testuser@ \