From: Florian Forster Date: Mon, 8 Jan 2024 16:33:43 +0000 (+0100) Subject: cpu plugin: Implement `usage_count()`. X-Git-Tag: 6.0.0-rc0~5^2~22 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d9643d94e128782aa4d9f9626124979484fc6397;p=thirdparty%2Fcollectd.git cpu plugin: Implement `usage_count()`. This one is done in a second aggregation loop, because we require a CPU-level aggregate rate to be available to properly scale the counter. --- diff --git a/src/cpu.c b/src/cpu.c index 4472e044b..dc1d4e0e1 100644 --- a/src/cpu.c +++ b/src/cpu.c @@ -336,9 +336,15 @@ static int init(void) { } /* int init */ typedef struct { - value_to_rate_state_t conv; gauge_t rate; bool has_value; + value_to_rate_state_t conv; + + /* count is a scaled counter, so that all states in sum increase by 1000000 + * per second. */ + derive_t count; + bool has_count; + rate_to_value_state_t to_count; } usage_state_t; typedef struct { @@ -409,6 +415,9 @@ __attribute__((unused)) static int usage_record(usage_t *u, size_t cpu, status = value_to_rate(&us->rate, (value_t){.derive = count}, DS_TYPE_DERIVE, u->time, &us->conv); + if (status == EAGAIN) { + return 0; + } if (status != 0) { return status; } @@ -422,6 +431,7 @@ static void usage_finalize(usage_t *u) { return; } + gauge_t global_rate = 0; size_t cpu_num = u->states_num / STATE_MAX; for (size_t cpu = 0; cpu < cpu_num; cpu++) { size_t active_index = (cpu * STATE_MAX) + STATE_ACTIVE; @@ -430,6 +440,8 @@ static void usage_finalize(usage_t *u) { active->rate = 0; active->has_value = false; + gauge_t cpu_rate = 0; + for (state_t s = 0; s < STATE_ACTIVE; s++) { size_t index = (cpu * STATE_MAX) + s; usage_state_t *us = u->states + index; @@ -438,21 +450,71 @@ static void usage_finalize(usage_t *u) { continue; } + // aggregate by cpu + cpu_rate += us->rate; + + // aggregate by state u->global[s].rate += us->rate; u->global[s].has_value = true; + // global aggregate + global_rate += us->rate; + if (s != STATE_IDLE) { active->rate += us->rate; active->has_value = true; } } + /* With cpu_rate available, calculate a counter for each state that is + * normalized to microseconds. I.e. all states of one CPU sum up to 1000000 + * us per second. */ + for (state_t s = 0; s < STATE_MAX; s++) { + size_t index = (cpu * STATE_MAX) + s; + usage_state_t *us = u->states + index; + + us->count = -1; + if (!us->has_value) { + /* Ensure that us->to_count is initialized. */ + rate_to_value(&(value_t){0}, 0.0, &us->to_count, DS_TYPE_DERIVE, + u->time); + continue; + } + + gauge_t rate = 1000000.0 * us->rate / cpu_rate; + value_t v = {0}; + int status = + rate_to_value(&v, rate, &us->to_count, DS_TYPE_DERIVE, u->time); + if (status == 0) { + us->count = v.derive; + us->has_count = true; + } + } + if (active->has_value) { u->global[STATE_ACTIVE].rate += active->rate; u->global[STATE_ACTIVE].has_value = true; } } + for (state_t s = 0; s < STATE_MAX; s++) { + usage_state_t *us = &u->global[s]; + + us->count = -1; + if (!us->has_value) { + continue; + } + + gauge_t rate = CDTIME_T_TO_DOUBLE(u->interval) * us->rate / global_rate; + value_t v = {0}; + int status = + rate_to_value(&v, rate, &us->to_count, DS_TYPE_DERIVE, u->time); + if (status == 0) { + us->count = v.derive; + us->has_count = true; + } + } + u->finalized = true; } @@ -501,6 +563,19 @@ __attribute__((unused)) static gauge_t usage_global_ratio(usage_t *u, return usage_global_rate(u, state) / global_rate; } +__attribute__((unused)) static derive_t usage_count(usage_t *u, size_t cpu, + state_t state) { + usage_finalize(u); + + size_t index = (cpu * STATE_MAX) + state; + if (index >= u->states_num) { + return -1; + } + usage_state_t *us = u->states + index; + + return us->count; +} + /* Takes the zero-index number of a CPU and makes sure that the module-global * cpu_states buffer is large enough. Returne ENOMEM on erorr. */ static int cpu_states_alloc(size_t cpu_num) /* {{{ */ diff --git a/src/cpu_test.c b/src/cpu_test.c index df4c577bf..5b2cbb618 100644 --- a/src/cpu_test.c +++ b/src/cpu_test.c @@ -107,6 +107,84 @@ DEF_TEST(usage_ratio) { return 0; } +static bool expect_usage_count(derive_t want, derive_t got, size_t cpu, + state_t state) { + bool ok = true; + char msg[1024] = {0}; + snprintf(msg, sizeof(msg), "usage_count(cpu=%zu, state=\"%s\") = %" PRId64, + cpu, cpu_state_names[state], got); + + derive_t diff = got - want; + if (diff < -1 || diff > 1) { + snprintf(msg, sizeof(msg), + "usage_count(cpu=%zu, state=\"%s\") = %" PRId64 ", want %" PRId64, + cpu, cpu_state_names[state], got, want); + ok = false; + } + + LOG(ok, msg); + return ok; +} + +DEF_TEST(usage_count) { + int ret = 0; + usage_t usage = {0}; +#define CPU_NUM 2 + + cdtime_t t0 = TIME_T_TO_CDTIME_T(100); + usage_init(&usage, t0); + for (size_t cpu = 0; cpu < CPU_NUM; cpu++) { + for (state_t s = 0; s < STATE_ACTIVE; s++) { + usage_record(&usage, cpu, s, 1000); + } + } + usage_finalize(&usage); + + cdtime_t interval = TIME_T_TO_CDTIME_T(300); + cdtime_t t1 = t0 + interval; + usage_init(&usage, t1); + derive_t cpu_increment[CPU_NUM] = {0}; + for (size_t cpu = 0; cpu < CPU_NUM; cpu++) { + for (state_t s = 0; s < STATE_ACTIVE; s++) { + derive_t increment = ((derive_t)cpu * STATE_ACTIVE) + ((derive_t)s); + cpu_increment[cpu] += increment; + usage_record(&usage, cpu, s, 1000 + increment); + } + } + + gauge_t sum_time = 0; + for (size_t cpu = 0; cpu < CPU_NUM; cpu++) { + derive_t active_increment = 0; + for (state_t s = 0; s < STATE_ACTIVE; s++) { + derive_t increment = ((derive_t)cpu * STATE_ACTIVE) + ((derive_t)s); + if (s != STATE_IDLE) { + active_increment += increment; + } + + gauge_t want_time = 1000000.0 * CDTIME_T_TO_DOUBLE(interval) * + ((gauge_t)increment) / ((gauge_t)cpu_increment[cpu]); + sum_time += want_time; + + bool ok = expect_usage_count((derive_t)want_time, + usage_count(&usage, cpu, s), cpu, s); + ret = ret || !ok; + } + + gauge_t want_active_time = 1000000.0 * CDTIME_T_TO_DOUBLE(interval) * + ((gauge_t)active_increment) / + ((gauge_t)cpu_increment[cpu]); + bool ok = expect_usage_count((derive_t)want_active_time, + usage_count(&usage, cpu, STATE_ACTIVE), cpu, + STATE_ACTIVE); + ret = ret || !ok; + } + EXPECT_EQ_DOUBLE(CPU_NUM * 1000000.0 * CDTIME_T_TO_DOUBLE(interval), + sum_time); + + usage_reset(&usage); + return ret; +} + DEF_TEST(usage_active_rate) { usage_t usage = {0}; @@ -242,6 +320,7 @@ DEF_TEST(usage_global_ratio) { int main(void) { RUN_TEST(usage_rate); RUN_TEST(usage_ratio); + RUN_TEST(usage_count); RUN_TEST(usage_active_rate); RUN_TEST(usage_global_rate); RUN_TEST(usage_global_ratio);