From 8c65fe4fa11d9476558aa6c54d9e26db8cc80f32 Mon Sep 17 00:00:00 2001 From: Yaping Li <202858510+YapingLi04@users.noreply.github.com> Date: Thu, 2 Apr 2026 22:01:15 -0700 Subject: [PATCH] report: add cgroup metrics in a separate varlink service Add CpuUsage, MemoryUsage, IOReadBytes, IOReadOperations, and TasksCurrent in a standalone socket-activated varlink service. The new systemd-report-cgroup service listens at /run/systemd/report/io.systemd.CGroup and exposes: - io.systemd.CGroup.CpuUsage - io.systemd.CGroup.IOReadBytes - io.systemd.CGroup.IOReadOperations - io.systemd.CGroup.MemoryUsage (with type=current/available/peak) - io.systemd.CGroup.TasksCurrent --- src/report/meson.build | 7 + src/report/report-cgroup-server.c | 131 +++++++ src/report/report-cgroup.c | 495 ++++++++++++++++++++++++ src/report/report-cgroup.h | 20 + test/units/TEST-74-AUX-UTILS.report.sh | 7 + units/meson.build | 2 + units/systemd-report-cgroup.socket | 25 ++ units/systemd-report-cgroup@.service.in | 42 ++ 8 files changed, 729 insertions(+) create mode 100644 src/report/report-cgroup-server.c create mode 100644 src/report/report-cgroup.c create mode 100644 src/report/report-cgroup.h create mode 100644 units/systemd-report-cgroup.socket create mode 100644 units/systemd-report-cgroup@.service.in diff --git a/src/report/meson.build b/src/report/meson.build index 26d1bbfdc3e..36227bed9f5 100644 --- a/src/report/meson.build +++ b/src/report/meson.build @@ -15,4 +15,11 @@ executables += [ 'report-basic.c', ), }, + libexec_template + { + 'name' : 'systemd-report-cgroup', + 'sources' : files( + 'report-cgroup.c', + 'report-cgroup-server.c', + ), + }, ] diff --git a/src/report/report-cgroup-server.c b/src/report/report-cgroup-server.c new file mode 100644 index 00000000000..eef2ec05fcb --- /dev/null +++ b/src/report/report-cgroup-server.c @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include + +#include "sd-varlink.h" + +#include "alloc-util.h" +#include "ansi-color.h" +#include "build.h" +#include "log.h" +#include "main-func.h" +#include "pretty-print.h" +#include "report-cgroup.h" +#include "varlink-io.systemd.Metrics.h" +#include "varlink-util.h" + +static int vl_server(void) { + _cleanup_(sd_varlink_server_unrefp) sd_varlink_server *vs = NULL; + _cleanup_(cgroup_context_freep) CGroupContext *ctx = NULL; + int r; + + ctx = new0(CGroupContext, 1); + if (!ctx) + return log_oom(); + + r = varlink_server_new(&vs, SD_VARLINK_SERVER_INHERIT_USERDATA, ctx); + if (r < 0) + return log_error_errno(r, "Failed to allocate Varlink server: %m"); + + r = sd_varlink_server_add_interface(vs, &vl_interface_io_systemd_Metrics); + if (r < 0) + return log_error_errno(r, "Failed to add Varlink interface: %m"); + + r = sd_varlink_server_bind_method_many( + vs, + "io.systemd.Metrics.List", vl_method_list_metrics, + "io.systemd.Metrics.Describe", vl_method_describe_metrics); + if (r < 0) + return log_error_errno(r, "Failed to bind Varlink methods: %m"); + + r = sd_varlink_server_loop_auto(vs); + if (r < 0) + return log_error_errno(r, "Failed to run Varlink event loop: %m"); + + return 0; +} + +static int help(void) { + _cleanup_free_ char *url = NULL; + int r; + + r = terminal_urlify_man("systemd-report-cgroup", "8", &url); + if (r < 0) + return log_oom(); + + printf("%s [OPTIONS...]\n" + "\n%sReport cgroup metrics.%s\n" + "\n%sOptions:%s\n" + " -h --help Show this help\n" + " --version Show package version\n" + "\nSee the %s for details.\n", + program_invocation_short_name, + ansi_highlight(), + ansi_normal(), + ansi_underline(), + ansi_normal(), + url); + + return 0; +} + +static int parse_argv(int argc, char *argv[]) { + enum { + ARG_VERSION = 0x100, + }; + + static const struct option options[] = { + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, ARG_VERSION }, + {} + }; + + int c, r; + + assert(argc >= 0); + assert(argv); + + while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0) + + switch (c) { + + case 'h': + return help(); + + case ARG_VERSION: + return version(); + + case '?': + return -EINVAL; + + default: + assert_not_reached(); + } + + if (optind < argc) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "This program takes no arguments."); + + r = sd_varlink_invocation(SD_VARLINK_ALLOW_ACCEPT); + if (r < 0) + return log_error_errno(r, "Failed to check if invoked in Varlink mode: %m"); + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "This program can only run as a Varlink service."); + + return 1; +} + +static int run(int argc, char *argv[]) { + int r; + + log_setup(); + + r = parse_argv(argc, argv); + if (r <= 0) + return r; + + return vl_server(); +} + +DEFINE_MAIN_FUNCTION(run); diff --git a/src/report/report-cgroup.c b/src/report/report-cgroup.c new file mode 100644 index 00000000000..476b074ac05 --- /dev/null +++ b/src/report/report-cgroup.c @@ -0,0 +1,495 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-json.h" +#include "sd-varlink.h" + +#include "alloc-util.h" +#include "cgroup-util.h" +#include "extract-word.h" +#include "fd-util.h" +#include "fileio.h" +#include "log.h" +#include "metrics.h" +#include "parse-util.h" +#include "path-util.h" +#include "report-cgroup.h" +#include "string-util.h" +#include "time-util.h" + +typedef struct CGroupInfo { + char *unit; + char *path; + uint64_t io_rbytes; + uint64_t io_rios; + int io_stat_cached; /* 0 = not attempted, > 0 = cached, < 0 = -errno */ +} CGroupInfo; + +static CGroupInfo *cgroup_info_free(CGroupInfo *info) { + if (!info) + return NULL; + free(info->unit); + free(info->path); + return mfree(info); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(CGroupInfo*, cgroup_info_free); + +static void cgroup_info_array_free(CGroupInfo **infos, size_t n) { + FOREACH_ARRAY(i, infos, n) + cgroup_info_free(*i); + free(infos); +} + +static void cgroup_context_flush(CGroupContext *ctx) { + assert(ctx); + cgroup_info_array_free(ctx->cgroups, ctx->n_cgroups); + ctx->cgroups = NULL; + ctx->n_cgroups = 0; + ctx->cache_populated = false; +} + +CGroupContext *cgroup_context_free(CGroupContext *ctx) { + if (!ctx) + return NULL; + cgroup_context_flush(ctx); + return mfree(ctx); +} + +static int walk_cgroups_recursive(const char *path, CGroupInfo ***infos, size_t *n_infos) { + _cleanup_closedir_ DIR *d = NULL; + int r; + + assert(path); + assert(infos); + assert(n_infos); + + /* Collect any unit cgroup we encounter */ + _cleanup_free_ char *name = NULL; + r = cg_path_get_unit(path, &name); + if (r >= 0) { + _cleanup_(cgroup_info_freep) CGroupInfo *info = new(CGroupInfo, 1); + if (!info) + return log_oom(); + + *info = (CGroupInfo) { + .unit = TAKE_PTR(name), + .path = strdup(path), + }; + if (!info->path) + return log_oom(); + + if (!GREEDY_REALLOC(*infos, *n_infos + 1)) + return log_oom(); + + (*infos)[(*n_infos)++] = TAKE_PTR(info); + return 0; /* Unit cgroups are leaf nodes for our purposes */ + } + + /* Stop at delegation boundaries — don't descend into delegated subtrees */ + r = cg_is_delegated(path); + if (r == -ENOENT) + return 0; + if (r < 0) + return log_debug_errno(r, "Failed to check delegation for '%s': %m", path); + if (r > 0) + return 0; + + r = cg_enumerate_subgroups(path, &d); + if (r == -ENOENT) + return 0; + if (r < 0) + return log_debug_errno(r, "Failed to enumerate cgroup '%s': %m", path); + + for (;;) { + _cleanup_free_ char *fn = NULL, *child = NULL; + + r = cg_read_subgroup(d, &fn); + if (r < 0) + return log_debug_errno(r, "Failed to read subgroup from '%s': %m", path); + if (r == 0) + break; + + child = path_join(empty_to_root(path), fn); + if (!child) + return log_oom(); + + path_simplify(child); + + r = walk_cgroups_recursive(child, infos, n_infos); + if (r < 0) + return r; + } + + return 0; +} + +static int walk_cgroups(CGroupContext *ctx, CGroupInfo ***ret, size_t *ret_n) { + int r; + + assert(ctx); + assert(ret); + assert(ret_n); + + /* Return cached result if available */ + if (ctx->cache_populated) { + *ret = ctx->cgroups; + *ret_n = ctx->n_cgroups; + return 0; + } + + CGroupInfo **infos = NULL; + size_t n_infos = 0; + CLEANUP_ARRAY(infos, n_infos, cgroup_info_array_free); + + r = walk_cgroups_recursive("", &infos, &n_infos); + if (r < 0) + return r; + + ctx->cgroups = TAKE_PTR(infos); + ctx->n_cgroups = TAKE_GENERIC(n_infos, size_t, 0); + ctx->cache_populated = true; + + *ret = ctx->cgroups; + *ret_n = ctx->n_cgroups; + return 0; +} + +static int cpu_usage_build_json(MetricFamilyContext *context, void *userdata) { + CGroupContext *ctx = ASSERT_PTR(userdata); + CGroupInfo **cgroups; + size_t n_cgroups; + int r; + + assert(context); + + r = walk_cgroups(ctx, &cgroups, &n_cgroups); + if (r < 0) + return 0; /* Skip metric on failure */ + + FOREACH_ARRAY(c, cgroups, n_cgroups) { + uint64_t us; + + r = cg_get_keyed_attribute_uint64((*c)->path, "cpu.stat", "usage_usec", &us); + if (r < 0) + continue; + + r = metric_build_send_unsigned( + context, + (*c)->unit, + us * NSEC_PER_USEC, + /* fields= */ NULL); + if (r < 0) + return r; + } + + return 0; +} + +static int memory_usage_build_json(MetricFamilyContext *context, void *userdata) { + CGroupContext *ctx = ASSERT_PTR(userdata); + CGroupInfo **cgroups; + size_t n_cgroups; + int r; + + assert(context); + + r = walk_cgroups(ctx, &cgroups, &n_cgroups); + if (r < 0) + return 0; + + FOREACH_ARRAY(c, cgroups, n_cgroups) { + uint64_t current = 0, limit = UINT64_MAX; + + r = cg_get_attribute_as_uint64((*c)->path, "memory.current", ¤t); + if (r >= 0) { + /* Walk up the cgroup tree to find the tightest memory limit */ + _cleanup_free_ char *path_buf = strdup((*c)->path); + if (!path_buf) + return log_oom(); + + for (char *p = path_buf;;) { + uint64_t high, max; + + r = cg_get_attribute_as_uint64(p, "memory.max", &max); + if (r >= 0 && max < limit) + limit = max; + + r = cg_get_attribute_as_uint64(p, "memory.high", &high); + if (r >= 0 && high < limit) + limit = high; + + /* Move to parent */ + const char *e; + r = path_find_last_component(p, /* accept_dot_dot= */ false, &e, NULL); + if (r <= 0) + break; + p[e - p] = '\0'; + } + + if (limit != UINT64_MAX && limit > current) { + _cleanup_(sd_json_variant_unrefp) sd_json_variant *fields = NULL; + r = sd_json_buildo(&fields, SD_JSON_BUILD_PAIR_STRING("type", "available")); + if (r < 0) + return r; + + r = metric_build_send_unsigned( + context, + (*c)->unit, + limit - current, + fields); + if (r < 0) + return r; + } + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *fields = NULL; + r = sd_json_buildo(&fields, SD_JSON_BUILD_PAIR_STRING("type", "current")); + if (r < 0) + return r; + + r = metric_build_send_unsigned( + context, + (*c)->unit, + current, + fields); + if (r < 0) + return r; + } + + uint64_t val; + r = cg_get_attribute_as_uint64((*c)->path, "memory.peak", &val); + if (r >= 0) { + _cleanup_(sd_json_variant_unrefp) sd_json_variant *fields = NULL; + r = sd_json_buildo(&fields, SD_JSON_BUILD_PAIR_STRING("type", "peak")); + if (r < 0) + return r; + + r = metric_build_send_unsigned( + context, + (*c)->unit, + val, + fields); + if (r < 0) + return r; + } + } + + return 0; +} + +/* Parse io.stat for a cgroup once, summing both rbytes= and rios= fields in a + * single pass to avoid reading the file twice. */ +static int io_stat_parse(const char *cgroup_path, uint64_t *ret_rbytes, uint64_t *ret_rios) { + _cleanup_free_ char *path = NULL; + _cleanup_fclose_ FILE *f = NULL; + uint64_t rbytes = 0, rios = 0; + int r; + + r = cg_get_path(cgroup_path, "io.stat", &path); + if (r < 0) + return r; + + f = fopen(path, "re"); + if (!f) + return -errno; + + for (;;) { + _cleanup_free_ char *line = NULL; + const char *p; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + break; + + p = line; + p += strcspn(p, WHITESPACE); + p += strspn(p, WHITESPACE); + + for (;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&p, &word, NULL, EXTRACT_RETAIN_ESCAPE); + if (r < 0) + return r; + if (r == 0) + break; + + const char *v; + uint64_t val; + + v = startswith(word, "rbytes="); + if (v && safe_atou64(v, &val) >= 0) { + rbytes += val; + continue; + } + + v = startswith(word, "rios="); + if (v && safe_atou64(v, &val) >= 0) + rios += val; + } + } + + *ret_rbytes = rbytes; + *ret_rios = rios; + return 0; +} + +static int ensure_io_stat_cached(CGroupInfo *info) { + int r; + + assert(info); + + if (info->io_stat_cached > 0) + return 0; + if (info->io_stat_cached < 0) + return info->io_stat_cached; + + r = io_stat_parse(info->path, &info->io_rbytes, &info->io_rios); + if (r < 0) { + if (r != -ENOENT) + log_debug_errno(r, "Failed to parse IO stats for '%s': %m", info->path); + info->io_stat_cached = r; + return r; + } + + info->io_stat_cached = 1; + return 0; +} + +static int io_read_bytes_build_json(MetricFamilyContext *context, void *userdata) { + CGroupContext *ctx = ASSERT_PTR(userdata); + CGroupInfo **cgroups; + size_t n_cgroups; + int r; + + assert(context); + + r = walk_cgroups(ctx, &cgroups, &n_cgroups); + if (r < 0) + return 0; + + FOREACH_ARRAY(c, cgroups, n_cgroups) { + if (ensure_io_stat_cached(*c) < 0) + continue; + + r = metric_build_send_unsigned( + context, + (*c)->unit, + (*c)->io_rbytes, + /* fields= */ NULL); + if (r < 0) + return r; + } + + return 0; +} + +static int io_read_operations_build_json(MetricFamilyContext *context, void *userdata) { + CGroupContext *ctx = ASSERT_PTR(userdata); + CGroupInfo **cgroups; + size_t n_cgroups; + int r; + + assert(context); + + r = walk_cgroups(ctx, &cgroups, &n_cgroups); + if (r < 0) + return 0; + + FOREACH_ARRAY(c, cgroups, n_cgroups) { + if (ensure_io_stat_cached(*c) < 0) + continue; + + r = metric_build_send_unsigned( + context, + (*c)->unit, + (*c)->io_rios, + /* fields= */ NULL); + if (r < 0) + return r; + } + + return 0; +} + +static int tasks_current_build_json(MetricFamilyContext *context, void *userdata) { + CGroupContext *ctx = ASSERT_PTR(userdata); + CGroupInfo **cgroups; + size_t n_cgroups; + int r; + + assert(context); + + r = walk_cgroups(ctx, &cgroups, &n_cgroups); + if (r < 0) + return 0; + + FOREACH_ARRAY(c, cgroups, n_cgroups) { + uint64_t val; + + r = cg_get_attribute_as_uint64((*c)->path, "pids.current", &val); + if (r < 0) + continue; + + r = metric_build_send_unsigned( + context, + (*c)->unit, + val, + /* fields= */ NULL); + if (r < 0) + return r; + } + + return 0; +} + +static const MetricFamily cgroup_metric_family_table[] = { + /* Keep metrics ordered alphabetically */ + { + .name = METRIC_IO_SYSTEMD_CGROUP_PREFIX "CpuUsage", + .description = "Per unit metric: CPU usage in nanoseconds", + .type = METRIC_FAMILY_TYPE_COUNTER, + .generate = cpu_usage_build_json, + }, + { + .name = METRIC_IO_SYSTEMD_CGROUP_PREFIX "IOReadBytes", + .description = "Per unit metric: IO bytes read", + .type = METRIC_FAMILY_TYPE_COUNTER, + .generate = io_read_bytes_build_json, + }, + { + .name = METRIC_IO_SYSTEMD_CGROUP_PREFIX "IOReadOperations", + .description = "Per unit metric: IO read operations", + .type = METRIC_FAMILY_TYPE_COUNTER, + .generate = io_read_operations_build_json, + }, + { + .name = METRIC_IO_SYSTEMD_CGROUP_PREFIX "MemoryUsage", + .description = "Per unit metric: memory usage in bytes", + .type = METRIC_FAMILY_TYPE_GAUGE, + .generate = memory_usage_build_json, + }, + { + .name = METRIC_IO_SYSTEMD_CGROUP_PREFIX "TasksCurrent", + .description = "Per unit metric: current number of tasks", + .type = METRIC_FAMILY_TYPE_GAUGE, + .generate = tasks_current_build_json, + }, + {} +}; + +int vl_method_describe_metrics(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { + return metrics_method_describe(cgroup_metric_family_table, link, parameters, flags, userdata); +} + +int vl_method_list_metrics(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { + CGroupContext *ctx = ASSERT_PTR(userdata); + int r; + + r = metrics_method_list(cgroup_metric_family_table, link, parameters, flags, userdata); + + cgroup_context_flush(ctx); + + return r; +} diff --git a/src/report/report-cgroup.h b/src/report/report-cgroup.h new file mode 100644 index 00000000000..dae8411df58 --- /dev/null +++ b/src/report/report-cgroup.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "shared-forward.h" + +#define METRIC_IO_SYSTEMD_CGROUP_PREFIX "io.systemd.CGroup." + +typedef struct CGroupInfo CGroupInfo; + +typedef struct CGroupContext { + CGroupInfo **cgroups; + size_t n_cgroups; + bool cache_populated; +} CGroupContext; + +CGroupContext *cgroup_context_free(CGroupContext *ctx); +DEFINE_TRIVIAL_CLEANUP_FUNC(CGroupContext*, cgroup_context_free); + +int vl_method_list_metrics(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata); +int vl_method_describe_metrics(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata); diff --git a/test/units/TEST-74-AUX-UTILS.report.sh b/test/units/TEST-74-AUX-UTILS.report.sh index 0b9006e0590..f92f1ed7507 100755 --- a/test/units/TEST-74-AUX-UTILS.report.sh +++ b/test/units/TEST-74-AUX-UTILS.report.sh @@ -30,6 +30,13 @@ REPORT=/usr/lib/systemd/systemd-report "$REPORT" describe-metrics io.systemd piff "$REPORT" describe-metrics piff +# test io.systemd.CGroup Metrics +systemctl start systemd-report-cgroup.socket +varlinkctl info /run/systemd/report/io.systemd.CGroup +varlinkctl list-methods /run/systemd/report/io.systemd.CGroup +varlinkctl --more call /run/systemd/report/io.systemd.CGroup io.systemd.Metrics.List {} +varlinkctl --more call /run/systemd/report/io.systemd.CGroup io.systemd.Metrics.Describe {} + # test io.systemd.Network Metrics varlinkctl info /run/systemd/report/io.systemd.Network varlinkctl list-methods /run/systemd/report/io.systemd.Network diff --git a/units/meson.build b/units/meson.build index 02c2db074c2..a7a3e6c5d61 100644 --- a/units/meson.build +++ b/units/meson.build @@ -720,6 +720,8 @@ units = [ 'file' : 'systemd-repart@.service', 'conditions' : ['ENABLE_REPART'], }, + { 'file' : 'systemd-report-cgroup.socket' }, + { 'file' : 'systemd-report-cgroup@.service.in' }, { 'file' : 'systemd-resolved.service.in', 'conditions' : ['ENABLE_RESOLVE'], diff --git a/units/systemd-report-cgroup.socket b/units/systemd-report-cgroup.socket new file mode 100644 index 00000000000..39a867cd40c --- /dev/null +++ b/units/systemd-report-cgroup.socket @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. + +[Unit] +Description=CGroup Report Varlink Socket +DefaultDependencies=no +Before=sockets.target shutdown.target +Conflicts=shutdown.target + +[Socket] +ListenStream=/run/systemd/report/io.systemd.CGroup +FileDescriptorName=varlink +SocketMode=0666 +Accept=yes +MaxConnectionsPerSource=16 +RemoveOnStop=yes + +[Install] +WantedBy=sockets.target diff --git a/units/systemd-report-cgroup@.service.in b/units/systemd-report-cgroup@.service.in new file mode 100644 index 00000000000..6f18c647dd2 --- /dev/null +++ b/units/systemd-report-cgroup@.service.in @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. + +[Unit] +Description=CGroup Report Service +DefaultDependencies=no +Conflicts=shutdown.target +Before=shutdown.target + +[Service] +CapabilityBoundingSet= +DeviceAllow= +DynamicUser=yes +IPAddressDeny=any +LockPersonality=yes +MemoryDenyWriteExecute=yes +PrivateDevices=yes +PrivateIPC=yes +PrivateNetwork=yes +PrivateTmp=disconnected +ProtectControlGroups=yes +ProtectHome=yes +ProtectHostname=yes +ProtectKernelLogs=yes +ProtectKernelModules=yes +ProtectKernelTunables=yes +ProtectSystem=strict +RestrictAddressFamilies=AF_UNIX +RestrictNamespaces=yes +RestrictRealtime=yes +RestrictSUIDSGID=yes +RuntimeMaxSec=1min +SystemCallArchitectures=native +SystemCallErrorNumber=EPERM +SystemCallFilter=@system-service +ExecStart={{LIBEXECDIR}}/systemd-report-cgroup -- 2.47.3