]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
[metrics] Add basic system wide and per unit metrics
authorYaping Li <202858510+YapingLi04@users.noreply.github.com>
Tue, 21 Oct 2025 01:25:10 +0000 (18:25 -0700)
committerYaping Li <202858510+YapingLi04@users.noreply.github.com>
Mon, 2 Feb 2026 16:03:56 +0000 (08:03 -0800)
This commit adds some basic metrics and integration tests.

System wide metrics:
- units_by_type_total: target/device/automount etc.
- units_by_state_total: active/reloading/inactive etc.

Two per unit metrics which shows the current state of a unit:
- unit_active_state
- unit_load_state

A metric for service state:
- nrestarts

Here are some sample outputs:

units_by_type_total:

{
        "name" : "io.systemd.Manager.units_by_type_total",
        "value" : 52,
        "fields" : {
                "type" : "target"
        }
}
{
        "name" : "io.systemd.Manager.units_by_type_total",
        "value" : 82,
        "fields" : {
                "type" : "device"
        }
}
{
        "name" : "io.systemd.Manager.units_by_type_total",
        "value" : 2,
        "fields" : {
                "type" : "automount"
        }
}

units_by_state_total:

{
        "name" : "io.systemd.Manager.units_by_state_total",
        "value" : 216,
        "fields" : {
                "state" : "active"
        }
}
{
        "name" : "io.systemd.Manager.units_by_state_total",
        "value" : 0,
        "fields" : {
                "state" : "reloading"
        }
}
{
        "name" : "io.systemd.Manager.units_by_state_total",
        "value" : 120,
        "fields" : {
                "state" : "inactive"
        }
}

unit_active_state:

{
        "name" : "io.systemd.Manager.unit_active_state",
        "object" : "multi-user.target",
        "value" : "active"
}
{
        "name" : "io.systemd.Manager.unit_active_state",
        "object" : "systemd-sysusers.service",
        "value" : "inactive"
}

unit_load_state:

{
        "name" : "io.systemd.Manager.unit_load_state",
        "object" : "multi-user.target",
        "value" : "loaded"
}

nrestarts:

{
        "name" : "io.systemd.Manager.nrestarts",
        "object" : "user@0.service",
        "value" : 0
}
{
        "name" : "io.systemd.Manager.nrestarts",
        "object" : "user-runtime-dir@0.service",
        "value" : 0
}

src/core/manager.h
src/core/meson.build
src/core/varlink-metrics.c [new file with mode: 0644]
src/core/varlink-metrics.h [new file with mode: 0644]
src/core/varlink.c
test/units/TEST-74-AUX-UTILS.varlinkctl.sh

index c764f411086eaac28ecefe7d0c92857faa3f8273..2df606005dbb12d589b883463030fb2c36f5d3ea 100644 (file)
@@ -469,6 +469,8 @@ typedef struct Manager {
          * systemd-oomd to report changes in ManagedOOM settings (systemd client - oomd server). */
         sd_varlink *managed_oom_varlink;
 
+        sd_varlink_server *metrics_varlink_server;
+
         /* Reference to RestrictFileSystems= BPF program */
         struct restrict_fs_bpf *restrict_fs;
 
index 4f20cae2ee303dfaa8cef6fd4cc363934f0bdfc5..e703cc3728970f9cbc535ccfb400b566277efb67 100644 (file)
@@ -69,6 +69,7 @@ libcore_sources = files(
         'varlink-dynamic-user.c',
         'varlink-execute.c',
         'varlink-manager.c',
+        'varlink-metrics.c',
         'varlink-unit.c',
 )
 
diff --git a/src/core/varlink-metrics.c b/src/core/varlink-metrics.c
new file mode 100644 (file)
index 0000000..c64d850
--- /dev/null
@@ -0,0 +1,173 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "hashmap.h"
+#include "manager.h"
+#include "metrics.h"
+#include "service.h"
+#include "unit-def.h"
+#include "unit.h"
+#include "varlink-metrics.h"
+
+static int unit_active_state_build_json(MetricFamilyContext *context, void *userdata) {
+        Manager *manager = ASSERT_PTR(userdata);
+        Unit *unit;
+        char *key;
+        int r;
+
+        assert(context);
+
+        HASHMAP_FOREACH_KEY(unit, key, manager->units) {
+                /* ignore aliases */
+                if (key != unit->id)
+                        continue;
+
+                r = metric_build_send_string(
+                                context,
+                                unit->id,
+                                unit_active_state_to_string(unit_active_state(unit)),
+                                /* field_pairs= */ NULL);
+                if (r < 0)
+                        return r;
+        }
+
+        return 0;
+}
+
+static int unit_load_state_build_json(MetricFamilyContext *context, void *userdata) {
+        Manager *manager = ASSERT_PTR(userdata);
+        Unit *unit;
+        char *key;
+        int r;
+
+        assert(context);
+
+        HASHMAP_FOREACH_KEY(unit, key, manager->units) {
+                /* ignore aliases */
+                if (key != unit->id)
+                        continue;
+
+                r = metric_build_send_string(
+                                context,
+                                unit->id,
+                                unit_load_state_to_string(unit->load_state),
+                                /* field_pairs= */ NULL);
+                if (r < 0)
+                        return r;
+        }
+
+        return 0;
+}
+
+static int nrestarts_build_json(MetricFamilyContext *context, void *userdata) {
+        Manager *manager = ASSERT_PTR(userdata);
+        int r;
+
+        assert(context);
+
+        LIST_FOREACH(units_by_type, unit, manager->units_by_type[UNIT_SERVICE]) {
+                r = metric_build_send_unsigned(
+                                context, unit->id, SERVICE(unit)->n_restarts, /* field_pairs= */ NULL);
+                if (r < 0)
+                        return r;
+        }
+
+        return 0;
+}
+
+static int units_by_type_total_build_json(MetricFamilyContext *context, void *userdata) {
+        Manager *manager = ASSERT_PTR(userdata);
+        int r;
+
+        assert(context);
+
+        for (UnitType type = 0; type < _UNIT_TYPE_MAX; type++) {
+                uint64_t counter = 0;
+
+                LIST_FOREACH(units_by_type, _u, manager->units_by_type[type])
+                        counter++;
+
+                r = metric_build_send_unsigned(
+                                context,
+                                /* object= */ NULL,
+                                counter,
+                                STRV_MAKE("type", unit_type_to_string(type)));
+                if (r < 0)
+                        return r;
+        }
+
+        return 0;
+}
+
+static int units_by_state_total_build_json(MetricFamilyContext *context, void *userdata) {
+        Manager *manager = ASSERT_PTR(userdata);
+        UnitActiveState counters[_UNIT_ACTIVE_STATE_MAX] = {};
+        Unit *unit;
+        char *key;
+        int r;
+
+        assert(context);
+
+        /* TODO need a rework probably with state counter */
+        HASHMAP_FOREACH_KEY(unit, key, manager->units) {
+                /* ignore aliases */
+                if (key != unit->id)
+                        continue;
+
+                counters[unit_active_state(unit)]++;
+        }
+
+        for (UnitActiveState state = 0; state < _UNIT_ACTIVE_STATE_MAX; state++) {
+                r = metric_build_send_unsigned(
+                                context,
+                                /* object= */ NULL,
+                                counters[state],
+                                STRV_MAKE("state", unit_active_state_to_string(state)));
+                if (r < 0)
+                        return r;
+        }
+
+        return 0;
+}
+
+const MetricFamily metric_family_table[] = {
+        // Keep metrics ordered alphabetically
+        {
+         .name = METRIC_IO_SYSTEMD_MANAGER_PREFIX "nrestarts",
+         .description = "Per unit metric: number of restarts",
+         .type = METRIC_FAMILY_TYPE_COUNTER,
+         .generate_cb = nrestarts_build_json,
+        },
+        {
+         .name = METRIC_IO_SYSTEMD_MANAGER_PREFIX "unit_active_state",
+         .description = "Per unit metric: active state",
+         .type = METRIC_FAMILY_TYPE_STRING,
+         .generate_cb = unit_active_state_build_json,
+        },
+        {
+         .name = METRIC_IO_SYSTEMD_MANAGER_PREFIX "unit_load_state",
+         .description = "Per unit metric: load state",
+         .type = METRIC_FAMILY_TYPE_STRING,
+         .generate_cb = unit_load_state_build_json,
+        },
+        {
+         .name = METRIC_IO_SYSTEMD_MANAGER_PREFIX "units_by_state_total",
+         .description = "Total number of units of different state",
+         .type = METRIC_FAMILY_TYPE_GAUGE,
+         .generate_cb = units_by_state_total_build_json,
+        },
+        {
+         .name = METRIC_IO_SYSTEMD_MANAGER_PREFIX "units_by_type_total",
+         .description = "Total number of units of different types",
+         .type = METRIC_FAMILY_TYPE_GAUGE,
+         .generate_cb = units_by_type_total_build_json,
+        },
+        {}
+};
+
+int vl_method_describe(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) {
+        return metrics_method_describe(metric_family_table, link, parameters, flags, userdata);
+}
+
+int vl_method_list(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) {
+        return metrics_method_list(metric_family_table, link, parameters, flags, userdata);
+}
diff --git a/src/core/varlink-metrics.h b/src/core/varlink-metrics.h
new file mode 100644 (file)
index 0000000..92e7d81
--- /dev/null
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-varlink.h"
+#include "sd-json.h"
+
+#define METRIC_IO_SYSTEMD_MANAGER_PREFIX "io.systemd.Manager."
+
+int vl_method_list(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata);
+int vl_method_describe(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata);
index 605673ef1977d1f95238b5ccc49d363ba1aca6f0..b5b00cdf74044549b809792a2b71619dcf58b5bb 100644 (file)
@@ -5,6 +5,7 @@
 #include "constants.h"
 #include "errno-util.h"
 #include "manager.h"
+#include "metrics.h"
 #include "path-util.h"
 #include "pidref.h"
 #include "string-util.h"
@@ -18,6 +19,7 @@
 #include "varlink-io.systemd.UserDatabase.h"
 #include "varlink-io.systemd.service.h"
 #include "varlink-manager.h"
+#include "varlink-metrics.h"
 #include "varlink-serialize.h"
 #include "varlink-unit.h"
 #include "varlink-util.h"
@@ -423,8 +425,26 @@ int manager_setup_varlink_server(Manager *m) {
         return 1;
 }
 
+static int manager_setup_varlink_metrics_server(Manager *m) {
+        sd_varlink_server_flags_t flags = SD_VARLINK_SERVER_INHERIT_USERDATA;
+        int r;
+
+        assert(m);
+
+        if (MANAGER_IS_SYSTEM(m))
+                flags |= SD_VARLINK_SERVER_ACCOUNT_UID;
+
+        r = metrics_setup_varlink_server(
+                        &m->metrics_varlink_server, flags, m->event, vl_method_list, vl_method_describe, m);
+        if (r < 0)
+                return r;
+
+        return 0;
+}
+
 static int manager_varlink_init_system(Manager *m) {
         int r;
+        _cleanup_free_ char *metrics_address = NULL;
 
         assert(m);
 
@@ -433,16 +453,29 @@ static int manager_varlink_init_system(Manager *m) {
                 return log_error_errno(r, "Failed to set up varlink server: %m");
         bool fresh = r > 0;
 
+        r = manager_setup_varlink_metrics_server(m);
+        if (r < 0)
+                return log_error_errno(r, "Failed to set up metrics varlink server: %m");
+        bool metrics_fresh = r > 0;
+
+        r = runtime_directory_generic(m->runtime_scope, "systemd/report/io.systemd.Manager", &metrics_address);
+        if (r < 0)
+                return r;
+
         if (!MANAGER_IS_TEST_RUN(m)) {
                 FOREACH_STRING(address,
                                "/run/systemd/userdb/io.systemd.DynamicUser",
                                VARLINK_PATH_MANAGED_OOM_SYSTEM,
-                               "/run/systemd/io.systemd.Manager") {
+                               "/run/systemd/io.systemd.Manager",
+                               metrics_address) {
+
+                        sd_varlink_server *server = streq(address, metrics_address) ? m->metrics_varlink_server : m->varlink_server;
+                        fresh = streq(address, metrics_address) ? metrics_fresh : fresh;
                         /* We might have got sockets through deserialization. Do not bind to them twice. */
-                        if (!fresh && varlink_server_contains_socket(m->varlink_server, address))
+                        if (!fresh && varlink_server_contains_socket(server, address))
                                 continue;
 
-                        r = sd_varlink_server_listen_address(m->varlink_server, address, 0666 | SD_VARLINK_SERVER_MODE_MKDIR_0755);
+                        r = sd_varlink_server_listen_address(server, address, 0666 | SD_VARLINK_SERVER_MODE_MKDIR_0755);
                         if (r < 0)
                                 return log_error_errno(r, "Failed to bind to varlink socket '%s': %m", address);
                 }
@@ -479,6 +512,10 @@ static int manager_varlink_init_user(Manager *m) {
                         return log_error_errno(r, "Failed to bind to varlink socket '%s': %m", address);
         }
 
+        r = manager_setup_varlink_metrics_server(m);
+        if (r < 0)
+                return log_error_errno(r, "Failed to set up metrics varlink server: %m");
+
         return manager_varlink_managed_oom_connect(m);
 }
 
@@ -497,6 +534,7 @@ void manager_varlink_done(Manager *m) {
 
         m->varlink_server = sd_varlink_server_unref(m->varlink_server);
         m->managed_oom_varlink = sd_varlink_close_unref(m->managed_oom_varlink);
+        m->metrics_varlink_server = sd_varlink_server_unref(m->metrics_varlink_server);
 }
 
 void manager_varlink_send_pending_reload_message(Manager *m) {
index c7607ce7c4501f5b080461e843edfd52a42e3a51..f321b46fc220a4cb8c801debd3b82dd9f69de88c 100755 (executable)
@@ -210,6 +210,18 @@ varlinkctl call /run/systemd/io.systemd.Manager io.systemd.Unit.List '{"cgroup":
 invocation_id="$(systemctl show -P InvocationID systemd-journald.service)"
 varlinkctl call /run/systemd/io.systemd.Manager io.systemd.Unit.List "{\"invocationID\": \"$invocation_id\"}"
 
+# test io.systemd.Metrics
+varlinkctl info /run/systemd/report/io.systemd.Manager
+
+varlinkctl list-methods /run/systemd/report/io.systemd.Manager
+varlinkctl list-methods -j /run/systemd/report/io.systemd.Manager io.systemd.Metrics | jq .
+
+varlinkctl introspect /run/systemd/report/io.systemd.Manager
+varlinkctl introspect -j /run/systemd/report/io.systemd.Manager io.systemd.Metrics | jq .
+
+varlinkctl --more call /run/systemd/report/io.systemd.Manager io.systemd.Metrics.List {}
+varlinkctl --more call /run/systemd/report/io.systemd.Manager io.systemd.Metrics.Describe {}
+
 # test io.systemd.Manager in user manager
 testuser_uid=$(id -u testuser)
 systemd-run --wait --pipe --user --machine testuser@ \