]> git.ipfire.org Git - thirdparty/collectd.git/commitdiff
cpu plugin: Implement `usage_count()`.
authorFlorian Forster <octo@collectd.org>
Mon, 8 Jan 2024 16:33:43 +0000 (17:33 +0100)
committerFlorian Forster <octo@collectd.org>
Mon, 22 Jan 2024 15:07:57 +0000 (16:07 +0100)
This one is done in a second aggregation loop, because we require a CPU-level
aggregate rate to be available to properly scale the counter.

src/cpu.c
src/cpu_test.c

index 4472e044b00a62a172010371d59670209f17d683..dc1d4e0e12490230c003cf437131c2eac420cb59 100644 (file)
--- a/src/cpu.c
+++ b/src/cpu.c
@@ -336,9 +336,15 @@ static int init(void) {
 } /* int init */
 
 typedef struct {
-  value_to_rate_state_t conv;
   gauge_t rate;
   bool has_value;
+  value_to_rate_state_t conv;
+
+  /* count is a scaled counter, so that all states in sum increase by 1000000
+   * per second. */
+  derive_t count;
+  bool has_count;
+  rate_to_value_state_t to_count;
 } usage_state_t;
 
 typedef struct {
@@ -409,6 +415,9 @@ __attribute__((unused)) static int usage_record(usage_t *u, size_t cpu,
 
   status = value_to_rate(&us->rate, (value_t){.derive = count}, DS_TYPE_DERIVE,
                          u->time, &us->conv);
+  if (status == EAGAIN) {
+    return 0;
+  }
   if (status != 0) {
     return status;
   }
@@ -422,6 +431,7 @@ static void usage_finalize(usage_t *u) {
     return;
   }
 
+  gauge_t global_rate = 0;
   size_t cpu_num = u->states_num / STATE_MAX;
   for (size_t cpu = 0; cpu < cpu_num; cpu++) {
     size_t active_index = (cpu * STATE_MAX) + STATE_ACTIVE;
@@ -430,6 +440,8 @@ static void usage_finalize(usage_t *u) {
     active->rate = 0;
     active->has_value = false;
 
+    gauge_t cpu_rate = 0;
+
     for (state_t s = 0; s < STATE_ACTIVE; s++) {
       size_t index = (cpu * STATE_MAX) + s;
       usage_state_t *us = u->states + index;
@@ -438,21 +450,71 @@ static void usage_finalize(usage_t *u) {
         continue;
       }
 
+      // aggregate by cpu
+      cpu_rate += us->rate;
+
+      // aggregate by state
       u->global[s].rate += us->rate;
       u->global[s].has_value = true;
 
+      // global aggregate
+      global_rate += us->rate;
+
       if (s != STATE_IDLE) {
         active->rate += us->rate;
         active->has_value = true;
       }
     }
 
+    /* With cpu_rate available, calculate a counter for each state that is
+     * normalized to microseconds. I.e. all states of one CPU sum up to 1000000
+     * us per second. */
+    for (state_t s = 0; s < STATE_MAX; s++) {
+      size_t index = (cpu * STATE_MAX) + s;
+      usage_state_t *us = u->states + index;
+
+      us->count = -1;
+      if (!us->has_value) {
+        /* Ensure that us->to_count is initialized. */
+        rate_to_value(&(value_t){0}, 0.0, &us->to_count, DS_TYPE_DERIVE,
+                      u->time);
+        continue;
+      }
+
+      gauge_t rate = 1000000.0 * us->rate / cpu_rate;
+      value_t v = {0};
+      int status =
+          rate_to_value(&v, rate, &us->to_count, DS_TYPE_DERIVE, u->time);
+      if (status == 0) {
+        us->count = v.derive;
+        us->has_count = true;
+      }
+    }
+
     if (active->has_value) {
       u->global[STATE_ACTIVE].rate += active->rate;
       u->global[STATE_ACTIVE].has_value = true;
     }
   }
 
+  for (state_t s = 0; s < STATE_MAX; s++) {
+    usage_state_t *us = &u->global[s];
+
+    us->count = -1;
+    if (!us->has_value) {
+      continue;
+    }
+
+    gauge_t rate = CDTIME_T_TO_DOUBLE(u->interval) * us->rate / global_rate;
+    value_t v = {0};
+    int status =
+        rate_to_value(&v, rate, &us->to_count, DS_TYPE_DERIVE, u->time);
+    if (status == 0) {
+      us->count = v.derive;
+      us->has_count = true;
+    }
+  }
+
   u->finalized = true;
 }
 
@@ -501,6 +563,19 @@ __attribute__((unused)) static gauge_t usage_global_ratio(usage_t *u,
   return usage_global_rate(u, state) / global_rate;
 }
 
+__attribute__((unused)) static derive_t usage_count(usage_t *u, size_t cpu,
+                                                    state_t state) {
+  usage_finalize(u);
+
+  size_t index = (cpu * STATE_MAX) + state;
+  if (index >= u->states_num) {
+    return -1;
+  }
+  usage_state_t *us = u->states + index;
+
+  return us->count;
+}
+
 /* Takes the zero-index number of a CPU and makes sure that the module-global
  * cpu_states buffer is large enough. Returne ENOMEM on erorr. */
 static int cpu_states_alloc(size_t cpu_num) /* {{{ */
index df4c577bf28f6941c508bf1d0684528f2551125d..5b2cbb618fbb79dd336d0dde6eaa21d991549a98 100644 (file)
@@ -107,6 +107,84 @@ DEF_TEST(usage_ratio) {
   return 0;
 }
 
+static bool expect_usage_count(derive_t want, derive_t got, size_t cpu,
+                               state_t state) {
+  bool ok = true;
+  char msg[1024] = {0};
+  snprintf(msg, sizeof(msg), "usage_count(cpu=%zu, state=\"%s\") = %" PRId64,
+           cpu, cpu_state_names[state], got);
+
+  derive_t diff = got - want;
+  if (diff < -1 || diff > 1) {
+    snprintf(msg, sizeof(msg),
+             "usage_count(cpu=%zu, state=\"%s\") = %" PRId64 ", want %" PRId64,
+             cpu, cpu_state_names[state], got, want);
+    ok = false;
+  }
+
+  LOG(ok, msg);
+  return ok;
+}
+
+DEF_TEST(usage_count) {
+  int ret = 0;
+  usage_t usage = {0};
+#define CPU_NUM 2
+
+  cdtime_t t0 = TIME_T_TO_CDTIME_T(100);
+  usage_init(&usage, t0);
+  for (size_t cpu = 0; cpu < CPU_NUM; cpu++) {
+    for (state_t s = 0; s < STATE_ACTIVE; s++) {
+      usage_record(&usage, cpu, s, 1000);
+    }
+  }
+  usage_finalize(&usage);
+
+  cdtime_t interval = TIME_T_TO_CDTIME_T(300);
+  cdtime_t t1 = t0 + interval;
+  usage_init(&usage, t1);
+  derive_t cpu_increment[CPU_NUM] = {0};
+  for (size_t cpu = 0; cpu < CPU_NUM; cpu++) {
+    for (state_t s = 0; s < STATE_ACTIVE; s++) {
+      derive_t increment = ((derive_t)cpu * STATE_ACTIVE) + ((derive_t)s);
+      cpu_increment[cpu] += increment;
+      usage_record(&usage, cpu, s, 1000 + increment);
+    }
+  }
+
+  gauge_t sum_time = 0;
+  for (size_t cpu = 0; cpu < CPU_NUM; cpu++) {
+    derive_t active_increment = 0;
+    for (state_t s = 0; s < STATE_ACTIVE; s++) {
+      derive_t increment = ((derive_t)cpu * STATE_ACTIVE) + ((derive_t)s);
+      if (s != STATE_IDLE) {
+        active_increment += increment;
+      }
+
+      gauge_t want_time = 1000000.0 * CDTIME_T_TO_DOUBLE(interval) *
+                          ((gauge_t)increment) / ((gauge_t)cpu_increment[cpu]);
+      sum_time += want_time;
+
+      bool ok = expect_usage_count((derive_t)want_time,
+                                   usage_count(&usage, cpu, s), cpu, s);
+      ret = ret || !ok;
+    }
+
+    gauge_t want_active_time = 1000000.0 * CDTIME_T_TO_DOUBLE(interval) *
+                               ((gauge_t)active_increment) /
+                               ((gauge_t)cpu_increment[cpu]);
+    bool ok = expect_usage_count((derive_t)want_active_time,
+                                 usage_count(&usage, cpu, STATE_ACTIVE), cpu,
+                                 STATE_ACTIVE);
+    ret = ret || !ok;
+  }
+  EXPECT_EQ_DOUBLE(CPU_NUM * 1000000.0 * CDTIME_T_TO_DOUBLE(interval),
+                   sum_time);
+
+  usage_reset(&usage);
+  return ret;
+}
+
 DEF_TEST(usage_active_rate) {
   usage_t usage = {0};
 
@@ -242,6 +320,7 @@ DEF_TEST(usage_global_ratio) {
 int main(void) {
   RUN_TEST(usage_rate);
   RUN_TEST(usage_ratio);
+  RUN_TEST(usage_count);
   RUN_TEST(usage_active_rate);
   RUN_TEST(usage_global_rate);
   RUN_TEST(usage_global_ratio);