.type = METRIC_TYPE_GAUGE,
};
metric_family_t fam_counter = {
- .help = "Total time HW frequency has been throttled (in microseconds)",
- .name = METRIC_PREFIX "throttled_usecs_total",
- .type = METRIC_TYPE_COUNTER,
+ .help = "Total time HW frequency has been throttled (in seconds)",
+ .name = METRIC_PREFIX "throttled_seconds_total",
+ .type = METRIC_TYPE_COUNTER_FP,
};
metric_t metric = {0};
break;
}
if (config.output & OUTPUT_BASE) {
- /* cannot convert microsecs to secs as counters are integers */
- metric.value.counter = throttle.throttleTime;
+ /* times are in microseconds:
+ * https://spec.oneapi.io/level-zero/latest/sysman/api.html#zes-freq-throttle-time-t
+ */
+ metric.value.counter_fp = throttle.throttleTime / 1e6;
metric_family_metric_append(&fam_counter, metric);
reported_base = true;
}
zes_freq_throttle_time_t *old = &gpu->throttle[i];
if (old->timestamp && throttle.timestamp > old->timestamp &&
(config.output & OUTPUT_RATIO)) {
- /* micro seconds => throttle ratio */
+ /* throttle time & timestamp are both in microsecs */
metric.value.gauge = (throttle.throttleTime - old->throttleTime) /
(double)(throttle.timestamp - old->timestamp);
metric_family_metric_append(&fam_ratio, metric);
.type = METRIC_TYPE_UP_DOWN_FP,
};
metric_family_t fam_energy = {
- .help = "Total energy consumption since boot (in microjoules)",
- .name = METRIC_PREFIX "energy_ujoules_total",
- .type = METRIC_TYPE_COUNTER,
+ .help = "Total energy consumption since boot (in joules)",
+ .name = METRIC_PREFIX "energy_joules_total",
+ .type = METRIC_TYPE_COUNTER_FP,
};
metric_t metric = {0};
}
metric_set_subdev(&metric, props.onSubdevice, props.subdeviceId);
if (config.output & OUTPUT_BASE) {
- metric.value.counter = counter.energy;
+ metric.value.counter_fp = counter.energy / 1e6;
metric_family_metric_append(&fam_energy, metric);
reported_base = true;
}
.type = METRIC_TYPE_GAUGE,
};
metric_family_t fam_counter = {
- .help = "GPU engine / group execution time (activity) total (in "
- "microseconds)",
- .name = METRIC_PREFIX "engine_use_usecs_total",
- .type = METRIC_TYPE_COUNTER,
+ .help = "GPU engine / group execution (use / activity) time total (in "
+ "seconds)",
+ .name = METRIC_PREFIX "engine_use_seconds_total",
+ .type = METRIC_TYPE_COUNTER_FP,
};
metric_t metric = {0};
metric_set_subdev(&metric, props.onSubdevice, props.subdeviceId);
metric_label_set(&metric, "type", vname);
if (config.output & OUTPUT_BASE) {
- metric.value.counter = stats.activeTime;
+ /* Intel L0 backend provides times in microsecs:
+ * https://spec.oneapi.io/level-zero/latest/sysman/api.html#zes-engine-stats-t
+ */
+ metric.value.counter_fp = stats.activeTime / 1e6;
metric_family_metric_append(&fam_counter, metric);
reported_base = true;
}
#define SYSMAN_UNIT_TEST_BUILD 1
#include "gpu_sysman.c" /* test this */
+#include "testing.h"
/* include metric functions + their dependencies directly, instead of
* building & linking libcommon.a (like normal collectd builds do)?
{"temperature_ratio", true, false, TEMP_RATIO_INIT, TEMP_RATIO_INC, 0, 0.0},
/* while counters increase, per-time incremented value should stay same */
- {"energy_ujoules_total", true, false, COUNTER_START, COUNTER_INC, 0, 0.0},
+ {"energy_joules_total", true, false, COUNTER_START / 1e6, COUNTER_INC / 1e6,
+ 0, 0.0},
{"engine_ratio/all", true, false, COUNTER_RATIO, 0, 0, 0.0},
- {"engine_use_usecs_total/all", true, false, COUNTER_START, COUNTER_INC, 0,
- 0.0},
+ {"engine_use_seconds_total/all", true, false, COUNTER_START / 1e6,
+ COUNTER_INC / 1e6, 0, 0.0},
{"fabric_port_bytes_total/healthy/off/read", true, false, 2 * COUNTER_START,
2 * COUNTER_INC, 0, 0.0},
{"fabric_port_bytes_total/healthy/off/write", true, false, COUNTER_START,
{"power_ratio", true, false, COUNTER_INC / POWER_LIMIT / TIME_INC, 0, 0,
0.0},
{"power_watts", true, false, COUNTER_RATIO, 0, 0, 0.0},
- {"throttled_usecs_total/gpu", true, false, COUNTER_START, COUNTER_INC, 0,
- 0.0},
+ {"throttled_seconds_total/gpu", true, false, COUNTER_START / 1e6,
+ COUNTER_INC / 1e6, 0, 0.0},
{"throttled_ratio/gpu", true, false, COUNTER_RATIO, 0, 0, 0.0},
};
+static int expect_double_eq(double expect, double actual) {
+ /* WA for "unused-variable" warning on testing.h */
+ fail_count__++;
+ /* macro returns -1 on non-equality, continues if equal */
+ EXPECT_EQ_DOUBLE(expect, actual);
+ fail_count__--;
+ return 0;
+}
+
/* VALIDATE: reset tracked metrics values and return count of how many
* metrics were not set since last reset.
*
incrounds += multisampled / config.samples;
}
double expected = metric->value_init + incrounds * metric->value_inc;
- if (last != expected) {
+ if (expect_double_eq(expected, last) != 0) {
fprintf(
stderr,
"ERROR: expected %g, but got value %g for metric '%s' on round %d\n",