#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
+#include <time.h>
#include <level_zero/ze_api.h>
#include <level_zero/zes_api.h>
static uint32_t gpu_count;
static struct {
bool gpuinfo;
+ bool logmetrics;
gpu_disable_t disabled;
output_t output;
uint32_t samples;
#define KEY_DISABLE_TEMP "DisableTemperature"
#define KEY_DISABLE_THROTTLE "DisableThrottleTime"
-#define KEY_METRICS_OUTPUT "MetricsOutput"
#define KEY_LOG_GPU_INFO "LogGpuInfo"
+#define KEY_LOG_METRICS "LogMetrics"
+#define KEY_METRICS_OUTPUT "MetricsOutput"
#define KEY_SAMPLES "Samples"
#define MAX_SAMPLES 64
}
/* Add device labels to all metrics in given metric family and submit family to
- * collectd. Resets metric family after dispatch */
+ * collectd, and log the metric if metric logging is enabled.
+ * Resets metric family after dispatch */
static void gpu_submit(gpu_device_t *gpu, metric_family_t *fam) {
- metric_t *m = fam->metric.ptr;
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ const char *pci_bdf = gpu->pci_bdf;
+ /* logmetrics readability: skip common BDF address prefix */
+ if (strncmp("0000:", pci_bdf, 5) == 0) {
+ pci_bdf += 5;
+ }
+
for (size_t i = 0; i < fam->metric.num; i++) {
- metric_label_set(m + i, "pci_bdf", gpu->pci_bdf);
+ metric_t *m = fam->metric.ptr + i;
+
+ /* log metric values in addition to dispatching them? */
+ if (config.logmetrics) {
+ const char *type = "<type>";
+ char *labels[] = {"direction", "location", "type"};
+ for (size_t i = 0; i < STATIC_ARRAY_SIZE(labels); i++) {
+ char const *l = metric_label_get(m, labels[i]);
+ if (l != NULL) {
+ type = l;
+ break;
+ }
+ }
+ INFO("[%7ld.%03ld] %s: %s / %s [%ld]: %.3f", ts.tv_sec,
+ ts.tv_nsec / 1000000, pci_bdf, fam->name, type, i,
+ fam->type == METRIC_TYPE_COUNTER ? m->value.counter
+ : m->value.gauge);
+ }
+
+ /* add extra per-metric labels */
+ metric_label_set(m, "pci_bdf", gpu->pci_bdf);
if (gpu->dev_file) {
- metric_label_set(m + i, "dev_file", gpu->dev_file);
+ metric_label_set(m, "dev_file", gpu->dev_file);
}
if (gpu->pci_dev) {
- metric_label_set(m + i, "pci_dev", gpu->pci_dev);
+ metric_label_set(m, "pci_dev", gpu->pci_dev);
}
}
+
int status = plugin_dispatch_metric_family(fam);
if (status != 0) {
ERROR(PLUGIN_NAME ": gpu_submit(%s, %s) failed: %s", gpu->pci_bdf,
config.disabled.throttle = IS_TRUE(value);
} else if (strcasecmp(key, KEY_LOG_GPU_INFO) == 0) {
config.gpuinfo = IS_TRUE(value);
+ } else if (strcasecmp(key, KEY_LOG_METRICS) == 0) {
+ config.logmetrics = IS_TRUE(value);
} else if (strcasecmp(key, KEY_METRICS_OUTPUT) == 0) {
config.output = 0;
static const char delim[] = ",:/ ";
void module_register(void) {
/* NOTE: key strings *must* be static */
static const char *config_keys[] = {
- KEY_DISABLE_ENGINE, KEY_DISABLE_ENGINE_SINGLE,
- KEY_DISABLE_FABRIC, KEY_DISABLE_FREQ,
- KEY_DISABLE_MEM, KEY_DISABLE_MEMBW,
- KEY_DISABLE_POWER, KEY_DISABLE_RAS,
- KEY_DISABLE_RAS_SEPARATE, KEY_DISABLE_TEMP,
- KEY_DISABLE_THROTTLE, KEY_METRICS_OUTPUT,
- KEY_LOG_GPU_INFO, KEY_SAMPLES};
+ KEY_DISABLE_ENGINE, KEY_DISABLE_ENGINE_SINGLE, KEY_DISABLE_FABRIC,
+ KEY_DISABLE_FREQ, KEY_DISABLE_MEM, KEY_DISABLE_MEMBW,
+ KEY_DISABLE_POWER, KEY_DISABLE_RAS, KEY_DISABLE_RAS_SEPARATE,
+ KEY_DISABLE_TEMP, KEY_DISABLE_THROTTLE, KEY_METRICS_OUTPUT,
+ KEY_LOG_GPU_INFO, KEY_LOG_METRICS, KEY_SAMPLES};
const int config_keys_num = STATIC_ARRAY_SIZE(config_keys);
plugin_register_config(PLUGIN_NAME, gpu_config_parse, config_keys,