--- /dev/null
+/**
+ * collectd - src/gpu_sysman.c
+ *
+ * Copyright(c) 2020-2022 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * - Eero Tamminen <eero.t.tamminen@intel.com>
+ *
+ * See: https://spec.oneapi.com/level-zero/latest/sysman/PROG.html
+ *
+ * Error handling:
+ * - All allocation checking is done with asserts, so plugin will abort
+ * if any allocation fails
+ * - All Sysman API call errors are logged
+ * - Sysman errors do not cause plugin initialization failure if even
+ * one GPU device is available with PCI ID
+ * - Sysman errors in metrics queries cause just given metric to be
+ * disabled (for given GPU)
+ */
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <level_zero/ze_api.h>
+#include <level_zero/zes_api.h>
+
+/* whether to add "dev_file" label to metrics for Kubernetes Intel GPU plugin,
+ * needs (POSIX.1-2001) basename() + glob() and (POSIX.1-2008) getline()
+ * functions.
+ */
+#define ADD_DEV_FILE 1
+#if ADD_DEV_FILE
+#include <glob.h>
+#include <libgen.h>
+#endif
+
+#include "collectd.h"
+#include "plugin.h"
+#include "utils/common/common.h"
+
+#define PLUGIN_NAME "gpu_sysman"
+#define METRIC_PREFIX "collectd_" PLUGIN_NAME "_"
+
+/* collectd plugin API callback finished OK */
+#define RET_OK 0
+/* plugin specific callback error return values */
+#define RET_NO_METRICS -1
+#define RET_INVALID_CONFIG -2
+#define RET_ZE_INIT_FAIL -3
+#define RET_NO_DRIVERS -4
+#define RET_ZE_DRIVER_GET_FAIL -5
+#define RET_ZE_DEVICE_GET_FAIL -6
+#define RET_ZE_DEVICE_PROPS_FAIL -7
+#define RET_NO_GPUS -9
+
+/* GPU metrics to disable */
+typedef struct {
+ bool all; /* no metrics from whole GPU */
+ bool engine;
+ bool engine_single;
+ bool freq;
+ bool mem;
+ bool membw;
+ bool power;
+ bool ras;
+ bool ras_separate;
+ bool temp;
+ bool throttle;
+} gpu_disable_t;
+
+/* handles for the GPU devices discovered by Sysman library */
+typedef struct {
+ char *pci_bdf;
+ char *dev_file;
+ /* number of types for metrics without allocs */
+ uint32_t ras_count;
+ uint32_t temp_count;
+ /* number of types for each counter metric */
+ uint32_t engine_count;
+ uint32_t membw_count;
+ uint32_t power_count;
+ uint32_t throttle_count;
+ /* number of types for each sampled metric */
+ uint32_t frequency_count;
+ uint32_t memory_count;
+ /* previous values for counters */
+ zes_engine_stats_t *engine;
+ zes_mem_bandwidth_t *membw;
+ zes_power_energy_counter_t *power;
+ zes_freq_throttle_time_t *throttle;
+ /* types * samples sized array of values, used for aggregate outputs */
+ zes_freq_state_t **frequency;
+ zes_mem_state_t **memory;
+ /* GPU specific disable flags */
+ gpu_disable_t disabled;
+ zes_device_handle_t handle;
+ /* report counter */
+ uint64_t counter;
+} gpu_device_t;
+
+typedef enum {
+ OUTPUT_UNSET = 0,
+ OUTPUT_RAW,
+ OUTPUT_DERIVED,
+ OUTPUT_BOTH, /* 3 = 1 | 2 mask */
+ OUTPUT_TYPES
+} output_t;
+
+static const char *metrics_output[OUTPUT_TYPES] = {"unset", "raw", "derived",
+ "both"};
+
+static gpu_device_t *gpus;
+static uint32_t gpu_count;
+static struct {
+ bool gpuinfo;
+ gpu_disable_t disabled;
+ output_t output;
+ uint32_t samples;
+} config;
+
+/* Sysman GPU plugin config options (defines to ease catching typos) */
+#define KEY_DISABLE_ENGINE "DisableEngine"
+#define KEY_DISABLE_ENGINE_SINGLE "DisableEngineSingle"
+#define KEY_DISABLE_FREQ "DisableFrequency"
+#define KEY_DISABLE_MEM "DisableMemory"
+#define KEY_DISABLE_MEMBW "DisableMemoryBandwidth"
+#define KEY_DISABLE_POWER "DisablePower"
+#define KEY_DISABLE_RAS "DisableErrors"
+#define KEY_DISABLE_RAS_SEPARATE "DisableSeparateErrors"
+#define KEY_DISABLE_TEMP "DisableTemperature"
+#define KEY_DISABLE_THROTTLE "DisableThrottleTime"
+
+#define KEY_METRICS_OUTPUT "MetricsOutput"
+#define KEY_LOG_GPU_INFO "LogGpuInfo"
+#define KEY_SAMPLES "Samples"
+#define MAX_SAMPLES 64
+
+/* Free array of arrays allocated with gpu_subarray_realloc().
+ *
+ * config.samples must not have changed since allocation, because
+ * that determines the number of allocated subarrays
+ */
+static bool gpu_subarray_free(void **mem) {
+ uint32_t i;
+ if (!mem) {
+ return false;
+ }
+ for (i = 0; i < config.samples; i++) {
+ free(mem[i]);
+ mem[i] = NULL;
+ }
+ free(mem);
+ return true;
+}
+
+/* Allocate 'config.samples' sized array of 'count' sized arrays having 'size'
+ * sized items. If given array is already allocated, it and its subarrays
+ * is freed first
+ */
+static void **gpu_subarray_realloc(void **mem, int count, int size) {
+ uint32_t i;
+ gpu_subarray_free(mem);
+ mem = smalloc(config.samples * sizeof(void *));
+ for (i = 0; i < config.samples; i++) {
+ mem[i] = scalloc(count, size);
+ }
+ return mem;
+}
+
+/* Free GPU allocations and zero counters
+ *
+ * Return RET_OK for shutdown callback success
+ */
+static int gpu_config_free(void) {
+#define FREE_GPU_ARRAY(i, member) \
+ if (gpus[i].member) { \
+ free(gpus[i].member); \
+ gpus[i].member##_count = 0; \
+ gpus[i].member = NULL; \
+ }
+#define FREE_GPU_SAMPLING_ARRAYS(i, member) \
+ if (gpus[i].member) { \
+ gpu_subarray_free((void **)gpus[i].member); \
+ gpus[i].member##_count = 0; \
+ gpus[i].member = NULL; \
+ }
+ if (!gpus) {
+ /* gpu_init() should have failed with no GPUs, so no need for this */
+ WARNING(PLUGIN_NAME
+ ": gpu_config_free() (shutdown) called with no GPUs initialized");
+ return RET_NO_GPUS;
+ }
+ for (uint32_t i = 0; i < gpu_count; i++) {
+ /* free previous values for counters & zero their counts */
+ FREE_GPU_ARRAY(i, engine);
+ FREE_GPU_ARRAY(i, membw);
+ FREE_GPU_ARRAY(i, power);
+ FREE_GPU_ARRAY(i, throttle);
+ /* and similar for sampling arrays */
+ FREE_GPU_SAMPLING_ARRAYS(i, frequency);
+ FREE_GPU_SAMPLING_ARRAYS(i, memory);
+ /* zero rest of counters & free name */
+ gpus[i].ras_count = 0;
+ gpus[i].temp_count = 0;
+ free(gpus[i].pci_bdf);
+ gpus[i].pci_bdf = NULL;
+ free(gpus[i].dev_file);
+ gpus[i].dev_file = NULL;
+ }
+#undef FREE_GPU_SAMPLING_ARRAYS
+#undef FREE_GPU_ARRAY
+ free(gpus);
+ gpus = NULL;
+ return RET_OK;
+}
+
+/* show plugin GPU metrics config options, return RET_OK
+ * if at least some metric is enabled, otherwise error code
+ */
+static int gpu_config_check(void) {
+ if (config.output == OUTPUT_UNSET) {
+ config.output = OUTPUT_BOTH;
+ }
+ assert(config.output < STATIC_ARRAY_SIZE(metrics_output));
+
+ if (config.gpuinfo) {
+ INFO("Sysman '" KEY_SAMPLES "': %d", config.samples);
+ INFO(KEY_METRICS_OUTPUT ": %s", metrics_output[config.output]);
+ INFO("Disabled metrics:");
+ }
+ struct {
+ const char *name;
+ bool value;
+ } options[] = {{KEY_DISABLE_ENGINE, config.disabled.engine},
+ {KEY_DISABLE_ENGINE_SINGLE, config.disabled.engine_single},
+ {KEY_DISABLE_FREQ, config.disabled.freq},
+ {KEY_DISABLE_MEM, config.disabled.mem},
+ {KEY_DISABLE_MEMBW, config.disabled.membw},
+ {KEY_DISABLE_POWER, config.disabled.power},
+ {KEY_DISABLE_RAS, config.disabled.ras},
+ {KEY_DISABLE_RAS_SEPARATE, config.disabled.ras_separate},
+ {KEY_DISABLE_TEMP, config.disabled.temp},
+ {KEY_DISABLE_THROTTLE, config.disabled.throttle}};
+ unsigned int i, disabled = 0;
+ for (i = 0; i < STATIC_ARRAY_SIZE(options); i++) {
+ if (options[i].value) {
+ if (config.gpuinfo) {
+ INFO("- %s", options[i].name);
+ }
+ disabled++;
+ }
+ }
+ if (disabled >= STATIC_ARRAY_SIZE(options)) {
+ ERROR(PLUGIN_NAME ": all metrics disabled");
+ return RET_NO_METRICS;
+ }
+ if (config.gpuinfo) {
+ if (disabled) {
+ INFO("=> %d disabled metrics", disabled);
+ } else {
+ INFO("- no disabled metrics");
+ }
+ }
+ return RET_OK;
+}
+
+/* Set GPU specific flags to initial global configuration values
+ * for each GPU. Allocations of metrics arrays are done when metrics
+ * are queried for the first time (not here), and re-allocated if
+ * number of types for given metric changes.
+ *
+ * Return RET_OK if config is OK, (negative) error value otherwise
+ */
+static int gpu_config_init(unsigned int count) {
+ if (!config.samples) {
+ config.samples = 1;
+ }
+ if (gpu_config_check()) {
+ gpu_config_free();
+ return RET_NO_METRICS;
+ }
+ unsigned int i;
+ for (i = 0; i < count; i++) {
+ gpus[i].disabled = config.disabled;
+ gpus[i].counter = 0;
+ }
+ gpu_count = count;
+ return RET_OK;
+}
+
+/* log given UUID (without dashes):
+ * https://en.wikipedia.org/wiki/Universally_unique_identifier
+ */
+static void log_uuid(const char *prefix, const uint8_t *byte, int len) {
+ int offset = strlen(prefix);
+ char buf[offset + 2 * len + 1];
+ sstrncpy(buf, prefix, sizeof(buf));
+ while (len-- > 0) {
+ sprintf(buf + offset, "%02x", *byte++);
+ offset += 2;
+ }
+ INFO("%s", buf);
+}
+
+/* Log Sysman API provided info for given GPU if logging is enabled
+ * and on success, return GPU PCI ID as string in BDF notation:
+ * https://wiki.xen.org/wiki/Bus:Device.Function_(BDF)_Notation
+ */
+static char *gpu_info(int idx, zes_device_handle_t dev) {
+ char *pci_bdf, buf[32];
+
+ zes_pci_properties_t pci;
+ ze_result_t ret = zesDevicePciGetProperties(dev, &pci);
+ if (ret == ZE_RESULT_SUCCESS) {
+ const zes_pci_address_t *addr = &pci.address;
+ snprintf(buf, sizeof(buf), "%04x:%02x:%02x.%x", addr->domain, addr->bus,
+ addr->device, addr->function);
+ } else {
+ ERROR(PLUGIN_NAME ": failed to get GPU %d PCI device properties => 0x%x",
+ idx, ret);
+ return NULL;
+ }
+ pci_bdf = strdup(buf);
+ assert(pci_bdf);
+ if (!config.gpuinfo) {
+ return pci_bdf;
+ }
+
+ INFO("Level-Zero Sysman API GPU %d info", idx);
+ INFO("==================================");
+
+ INFO("PCI info:");
+ if (ret == ZE_RESULT_SUCCESS) {
+ INFO("- PCI B/D/F: %s", pci_bdf);
+ const zes_pci_speed_t *speed = &pci.maxSpeed;
+ INFO("- PCI gen: %d", speed->gen);
+ INFO("- PCI width: %d", speed->width);
+ double max = speed->maxBandwidth / (double)(1024 * 1024 * 1024);
+ INFO("- max BW: %.2f GiB/s (all lines)", max);
+ } else {
+ INFO("- unavailable");
+ }
+
+ INFO("HW state:");
+ zes_device_state_t state;
+ /* Note: there's also zesDevicePciGetState() for PCI link status */
+ if (ret = zesDeviceGetState(dev, &state), ret == ZE_RESULT_SUCCESS) {
+ INFO("- repaired: %s",
+ (state.repaired == ZES_REPAIR_STATUS_PERFORMED) ? "yes" : "no");
+ if (state.reset != 0) {
+ INFO("- device RESET required");
+ if (state.reset & ZES_RESET_REASON_FLAG_WEDGED) {
+ INFO(" - HW is wedged");
+ }
+ if (state.reset & ZES_RESET_REASON_FLAG_REPAIR) {
+ INFO(" - HW needs to complete repairs");
+ }
+ } else {
+ INFO("- no RESET required");
+ }
+ } else {
+ INFO("- unavailable");
+ WARNING(PLUGIN_NAME ": failed to get GPU %d device state => 0x%x", idx,
+ ret);
+ }
+
+ INFO("HW identification:");
+ zes_device_properties_t props;
+ if (ret = zesDeviceGetProperties(dev, &props), ret == ZE_RESULT_SUCCESS) {
+ const ze_device_properties_t *core = &props.core;
+ INFO("- name: %s", core->name);
+ INFO("- vendor ID: 0x%x", core->vendorId);
+ INFO("- device ID: 0x%x", core->deviceId);
+ log_uuid("- UUID: 0x", core->uuid.id, sizeof(core->uuid.id));
+ INFO("- serial#: %s", props.serialNumber);
+ INFO("- board#: %s", props.boardNumber);
+ INFO("- brand: %s", props.brandName);
+ INFO("- model: %s", props.modelName);
+ INFO("- vendor: %s", props.vendorName);
+
+ INFO("UMD/KMD driver info:");
+ INFO("- version: %s", props.driverVersion);
+ INFO("- max alloc: %lu MiB", core->maxMemAllocSize / (1024 * 1024));
+
+ INFO("HW info:");
+ INFO("- # sub devs: %u", props.numSubdevices);
+ INFO("- core clock: %u", core->coreClockRate);
+ INFO("- EUs: %u", core->numEUsPerSubslice *
+ core->numSubslicesPerSlice * core->numSlices);
+ } else {
+ INFO("- unavailable");
+ WARNING(PLUGIN_NAME ": failed to get GPU %d device properties => 0x%x", idx,
+ ret);
+ }
+
+ /* HW info for all memories */
+ uint32_t i, mem_count = 0;
+ ze_device_handle_t mdev = (ze_device_handle_t)dev;
+ if (zeDeviceGetMemoryProperties(mdev, &mem_count, NULL) !=
+ ZE_RESULT_SUCCESS) {
+ WARNING(PLUGIN_NAME ": failed to get memory properties count");
+ return pci_bdf;
+ }
+ ze_device_memory_properties_t *mems;
+ mems = scalloc(mem_count, sizeof(*mems));
+ if (zeDeviceGetMemoryProperties(mdev, &mem_count, mems) !=
+ ZE_RESULT_SUCCESS) {
+ WARNING(PLUGIN_NAME ": failed to get %d memory properties", mem_count);
+ free(mems);
+ return pci_bdf;
+ }
+ for (i = 0; i < mem_count; i++) {
+ const char *memname = mems[i].name;
+ if (!(memname && *memname)) {
+ memname = "Unknown";
+ }
+ INFO("Memory - %s:", memname);
+ INFO("- size: %lu MiB", mems[i].totalSize / (1024 * 1024));
+ INFO("- bus width: %u", mems[i].maxBusWidth);
+ INFO("- max clock: %u", mems[i].maxClockRate);
+ }
+ free(mems);
+ return pci_bdf;
+}
+
+/* Add (given) BDF string and device file name to GPU struct for metric labels.
+ *
+ * Return false if (required) BDF string is missing, true otherwise.
+ */
+static bool add_gpu_labels(gpu_device_t *gpu, char *pci_bdf) {
+ assert(gpu);
+ if (!pci_bdf) {
+ return false;
+ }
+ gpu->pci_bdf = pci_bdf;
+ /*
+ * scan devfs and sysfs to find primary GPU device file node matching
+ * given BDF, and if one is found, use that as device file name.
+ *
+ * NOTE: scanning can log only INFO messages, because ERRORs and WARNINGs
+ * would FAIL unit test that are run as part of build, if build environment
+ * has no GPU access.
+ */
+#if ADD_DEV_FILE
+#define BDF_LINE "PCI_SLOT_NAME="
+#define DEVFS_GLOB "/dev/dri/card*"
+ glob_t devfs;
+ if (glob(DEVFS_GLOB, 0, NULL, &devfs) != 0) {
+ INFO(PLUGIN_NAME ": device <-> BDF mapping, no matches for: " DEVFS_GLOB);
+ globfree(&devfs);
+ return true;
+ }
+ const size_t prefix_size = strlen(BDF_LINE);
+ for (size_t i = 0; i < devfs.gl_pathc; i++) {
+ char path[PATH_MAX], *dev_file;
+ dev_file = basename(devfs.gl_pathv[i]);
+
+ FILE *fp;
+ snprintf(path, sizeof(path), "/sys/class/drm/%s/device/uevent", dev_file);
+ if (!(fp = fopen(path, "r"))) {
+ INFO(PLUGIN_NAME ": device <-> BDF mapping, file missing: %s", path);
+ continue;
+ }
+ ssize_t nread;
+ size_t len = 0;
+ char *line = NULL;
+ while ((nread = getline(&line, &len, fp)) > 0) {
+ if (strncmp(line, BDF_LINE, prefix_size) != 0) {
+ continue;
+ }
+ line[nread - 1] = '\0'; // remove newline
+ if (strcmp(line + prefix_size, pci_bdf) == 0) {
+ INFO(PLUGIN_NAME ": %s <-> %s", dev_file, pci_bdf);
+ gpu->dev_file = strdup(dev_file);
+ break;
+ }
+ }
+ free(line);
+ fclose(fp);
+ if (gpu->dev_file) {
+ break;
+ }
+ }
+ globfree(&devfs);
+#undef DEVFS_GLOB
+#undef BDF_LINE
+#endif
+ return true;
+}
+
+/* Scan how many GPU devices Sysman reports in total, and set 'scan_count'
+ * accordingly
+ *
+ * Return RET_OK for success, or (negative) error value if any of the device
+ * count queries fails
+ */
+static int gpu_scan(ze_driver_handle_t *drivers, uint32_t driver_count,
+ uint32_t *scan_count) {
+ assert(!gpus);
+ *scan_count = 0;
+ for (uint32_t drv_idx = 0; drv_idx < driver_count; drv_idx++) {
+
+ uint32_t dev_count = 0;
+ if (zeDeviceGet(drivers[drv_idx], &dev_count, NULL) != ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get device count for driver %d", drv_idx);
+ return RET_ZE_DEVICE_GET_FAIL;
+ }
+ if (config.gpuinfo) {
+ INFO("driver %d: %d devices", drv_idx, dev_count);
+ }
+ *scan_count += dev_count;
+ }
+ if (!*scan_count) {
+ ERROR(PLUGIN_NAME ": scan for GPU devices failed");
+ return RET_NO_GPUS;
+ }
+ if (config.gpuinfo) {
+ INFO("scan: %d GPUs in total from %d L0 drivers", *scan_count,
+ driver_count);
+ }
+ return RET_OK;
+}
+
+/* Allocate 'scan_count' GPU structs to 'gpus' and fetch Sysman handle & name
+ * for them.
+ *
+ * Counts of still found & ignored GPUs are set to 'scan_count' and
+ * 'scan_ignored' arguments before returning.
+ *
+ * Return RET_OK for success if at least one GPU device info fetch succeeded,
+ * otherwise (negative) error value for last error encountered
+ */
+static int gpu_fetch(ze_driver_handle_t *drivers, uint32_t driver_count,
+ uint32_t *scan_count, uint32_t *scan_ignored) {
+ assert(!gpus);
+ assert(*scan_count > 0);
+ gpus = scalloc(*scan_count, sizeof(*gpus));
+
+ uint32_t ignored = 0, count = 0;
+ int retval = RET_NO_GPUS;
+
+ for (uint32_t drv_idx = 0; drv_idx < driver_count; drv_idx++) {
+ uint32_t dev_count = 0;
+ if (zeDeviceGet(drivers[drv_idx], &dev_count, NULL) != ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get device count for driver %d", drv_idx);
+ retval = RET_ZE_DEVICE_GET_FAIL;
+ continue;
+ }
+ ze_device_handle_t *devs;
+ devs = scalloc(dev_count, sizeof(*devs));
+ if (zeDeviceGet(drivers[drv_idx], &dev_count, devs) != ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get %d devices for driver %d", dev_count,
+ drv_idx);
+ free(devs);
+ devs = NULL;
+ retval = RET_ZE_DEVICE_GET_FAIL;
+ continue;
+ }
+ /* Get all GPU devices for the driver */
+ for (uint32_t dev_idx = 0; dev_idx < dev_count; dev_idx++) {
+ ze_device_properties_t props;
+ if (zeDeviceGetProperties(devs[dev_idx], &props) != ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get driver %d device %d properties",
+ drv_idx, dev_idx);
+ retval = RET_ZE_DEVICE_PROPS_FAIL;
+ continue;
+ }
+ assert(ZE_DEVICE_TYPE_GPU == props.type);
+ if (count >= *scan_count) {
+ ignored++;
+ continue;
+ }
+ gpus[count].handle = (zes_device_handle_t)devs[dev_idx];
+ if (!add_gpu_labels(&(gpus[count]), gpu_info(count, devs[dev_idx]))) {
+ ignored++;
+ continue;
+ }
+ count++;
+ }
+ free(devs);
+ devs = NULL;
+ }
+ if (count > 0) {
+ retval = RET_OK;
+ if (config.gpuinfo) {
+ INFO("fetch: %d/%d GPUs in total from %d L0 drivers", count, *scan_count,
+ driver_count);
+ }
+ } else {
+ ERROR(PLUGIN_NAME ": fetch for GPU devices failed");
+ gpu_config_free();
+ }
+ *scan_ignored = ignored;
+ *scan_count = count;
+ return retval;
+}
+
+/* Scan Sysman for GPU devices
+ * Return RET_OK for success, (negative) error value otherwise
+ */
+static int gpu_init(void) {
+ if (gpus) {
+ NOTICE(PLUGIN_NAME ": skipping extra gpu_init() call");
+ return RET_OK;
+ }
+ setenv("ZES_ENABLE_SYSMAN", "1", 1);
+ if (zeInit(ZE_INIT_FLAG_GPU_ONLY) != ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": Level Zero API init failed");
+ return RET_ZE_INIT_FAIL;
+ }
+ /* Discover all the drivers */
+ uint32_t driver_count = 0;
+ if (zeDriverGet(&driver_count, NULL) != ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get L0 GPU drivers count");
+ return RET_ZE_DRIVER_GET_FAIL;
+ }
+ if (!driver_count) {
+ ERROR(PLUGIN_NAME ": no drivers found with Level-Zero Sysman API");
+ return RET_NO_DRIVERS;
+ }
+ ze_driver_handle_t *drivers;
+ drivers = scalloc(driver_count, sizeof(*drivers));
+ if (zeDriverGet(&driver_count, drivers) != ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get %d L0 drivers", driver_count);
+ free(drivers);
+ return RET_ZE_DRIVER_GET_FAIL;
+ }
+ /* scan number of Sysman provided GPUs... */
+ int fail;
+ uint32_t count;
+ if ((fail = gpu_scan(drivers, driver_count, &count)) < 0) {
+ free(drivers);
+ return fail;
+ }
+ uint32_t ignored = 0, scanned = count;
+ if (count) {
+ /* ...and allocate & fetch data for them */
+ if ((fail = gpu_fetch(drivers, driver_count, &count, &ignored)) < 0) {
+ free(drivers);
+ return fail;
+ }
+ }
+ free(drivers);
+ if (scanned > count) {
+ WARNING(PLUGIN_NAME ": %d GPUs disappeared after first scan",
+ scanned - count);
+ }
+ if (ignored) {
+ WARNING(PLUGIN_NAME ": %d GPUs appeared after first scan (are ignored)",
+ ignored);
+ }
+ if (!count) {
+ ERROR(PLUGIN_NAME ": no GPU devices found with Level-Zero Sysman API");
+ return RET_NO_GPUS;
+ }
+ return gpu_config_init(count);
+}
+
+/* Add device labels to all metrics in given metric family and submit family to
+ * collectd. Resets metric family after dispatch */
+static void gpu_submit(gpu_device_t *gpu, metric_family_t *fam) {
+ metric_t *m = fam->metric.ptr;
+ for (size_t i = 0; i < fam->metric.num; i++) {
+ metric_label_set(m + i, "pci_bdf", gpu->pci_bdf);
+ if (gpu->dev_file) {
+ metric_label_set(m + i, "dev_file", gpu->dev_file);
+ }
+ }
+ int status = plugin_dispatch_metric_family(fam);
+ if (status != 0) {
+ ERROR(PLUGIN_NAME ": gpu_submit(%s, %s) failed: %s", gpu->pci_bdf,
+ fam->name, strerror(status));
+ }
+ metric_family_metric_reset(fam);
+}
+
+/* because of family name change, each RAS metric needs to be submitted +
+ * reseted separately */
+static void ras_submit(gpu_device_t *gpu, const char *name, const char *help,
+ const char *type, const char *subdev, double value) {
+ metric_family_t fam = {
+ .type = METRIC_TYPE_COUNTER,
+ /*
+ * String literals are const, so they are passed as such to
+ * here, but .name & .help members are not, so casts are
+ * necessary.
+ *
+ * Note that same casts happen implicitly when string
+ * literals are assigned directly to these members, GCC
+ * just does not warn about that unless "-Write-strings"
+ * warning is enabled, which is NOT part of even "-Wall
+ * -Wextra".
+ *
+ * This cast is safe as long as metric_family_free() is not
+ * called on these families (which is the case).
+ */
+ .name = (char *)name,
+ .help = (char *)help,
+ };
+ metric_t m = {0};
+
+ m.value.counter = value;
+ if (type) {
+ metric_label_set(&m, "type", type);
+ }
+ if (subdev) {
+ metric_label_set(&m, "sub_dev", subdev);
+ }
+ metric_family_metric_append(&fam, m);
+ metric_reset(&m);
+ gpu_submit(gpu, &fam);
+}
+
+/* Report error set types, return true for success */
+static bool gpu_ras(gpu_device_t *gpu) {
+ uint32_t i, ras_count = 0;
+ zes_device_handle_t dev = gpu->handle;
+ if ((zesDeviceEnumRasErrorSets(dev, &ras_count, NULL) != ZE_RESULT_SUCCESS)) {
+ ERROR(PLUGIN_NAME ": failed to get RAS error sets count");
+ return false;
+ }
+ zes_ras_handle_t *ras;
+ ras = scalloc(ras_count, sizeof(*ras));
+ if (zesDeviceEnumRasErrorSets(dev, &ras_count, ras) != ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get %d RAS error sets", ras_count);
+ free(ras);
+ return false;
+ }
+ if (gpu->ras_count != ras_count) {
+ INFO(PLUGIN_NAME ": Sysman reports %d RAS error sets", ras_count);
+ gpu->ras_count = ras_count;
+ }
+
+ bool ok = false;
+ for (i = 0; i < ras_count; i++) {
+ zes_ras_properties_t props;
+ if (zesRasGetProperties(ras[i], &props) != ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get RAS set %d properties", i);
+ ok = false;
+ break;
+ }
+ const char *type;
+ switch (props.type) {
+ case ZES_RAS_ERROR_TYPE_CORRECTABLE:
+ type = "correctable";
+ break;
+ case ZES_RAS_ERROR_TYPE_UNCORRECTABLE:
+ type = "uncorrectable";
+ break;
+ default:
+ type = "unknown";
+ }
+ char buf[8];
+ const char *subdev = NULL;
+ if (props.onSubdevice) {
+ snprintf(buf, sizeof(buf), "%d", props.subdeviceId);
+ subdev = buf;
+ }
+ zes_ras_state_t values;
+ const bool clear = false;
+ if (zesRasGetState(ras[i], clear, &values) != ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get RAS set %d (%s) state", i, type);
+ ok = false;
+ break;
+ }
+
+ bool correctable;
+ uint64_t value, total = 0;
+ const char *catname, *help;
+ for (int cat_idx = 0; cat_idx < ZES_MAX_RAS_ERROR_CATEGORY_COUNT;
+ cat_idx++) {
+ value = values.category[cat_idx];
+ total += value;
+ if (gpu->disabled.ras_separate) {
+ continue;
+ }
+ correctable = true;
+ switch (cat_idx) {
+ // categories which are not correctable, see:
+ // https://spec.oneapi.io/level-zero/latest/sysman/PROG.html#querying-ras-errors
+ case ZES_RAS_ERROR_CAT_RESET:
+ help = "Total number of GPU reset attempts by the driver";
+ catname = METRIC_PREFIX "resets_total";
+ correctable = false;
+ break;
+ case ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS:
+ help = "Total number of non-correctable HW exceptions generated by the "
+ "way workloads have programmed the HW";
+ catname = METRIC_PREFIX "programming_errors_total";
+ correctable = false;
+ break;
+ case ZES_RAS_ERROR_CAT_DRIVER_ERRORS:
+ help = "total number of non-correctable low level driver communication "
+ "errors";
+ catname = METRIC_PREFIX "driver_errors_total";
+ correctable = false;
+ break;
+ // categories which can have both correctable and uncorrectable errors
+ case ZES_RAS_ERROR_CAT_COMPUTE_ERRORS:
+ help = "Total number of errors occurrend in the accelerator HW";
+ catname = METRIC_PREFIX "compute_errors_total";
+ break;
+ case ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS:
+ help = "Total number of errors occurred in the fixed-function "
+ "accelerator HW";
+ catname = METRIC_PREFIX "fixed_function_errors_total";
+ break;
+ case ZES_RAS_ERROR_CAT_CACHE_ERRORS:
+ help = "Total number of ECC errors that have occurred in the on-chip "
+ "caches";
+ catname = METRIC_PREFIX "cache_errors_total";
+ break;
+ case ZES_RAS_ERROR_CAT_DISPLAY_ERRORS:
+ help = "Total number of ECC errors that have occurred in the display";
+ catname = METRIC_PREFIX "display_errors_total";
+ break;
+ default:
+ help = "Total number of errors in unsupported categories";
+ catname = METRIC_PREFIX "unknown_errors_total";
+ }
+ if (correctable) {
+ ras_submit(gpu, catname, help, type, subdev, value);
+ } else if (props.type == ZES_RAS_ERROR_TYPE_UNCORRECTABLE) {
+ ras_submit(gpu, catname, help, NULL, subdev, value);
+ }
+ }
+ catname = METRIC_PREFIX "all_errors_total";
+ help = "Total number of errors in all categories";
+ ras_submit(gpu, catname, help, type, subdev, total);
+ ok = true;
+ }
+ free(ras);
+ return ok;
+}
+
+static void metric_set_subdev(metric_t *m, bool onsub, uint32_t subid) {
+ if (onsub) {
+ char buf[8];
+ snprintf(buf, sizeof(buf), "%d", subid);
+ metric_label_set(m, "sub_dev", buf);
+ }
+}
+
+static bool set_mem_labels(zes_mem_handle_t mem, metric_t *metric) {
+ zes_mem_properties_t props;
+ if (zesMemoryGetProperties(mem, &props) != ZE_RESULT_SUCCESS) {
+ return false;
+ }
+ const char *location;
+ switch (props.location) {
+ case ZES_MEM_LOC_SYSTEM:
+ location = "system";
+ break;
+ case ZES_MEM_LOC_DEVICE:
+ location = "device";
+ break;
+ default:
+ location = "unknown";
+ }
+ const char *type;
+ switch (props.type) {
+ case ZES_MEM_TYPE_HBM:
+ type = "HBM";
+ break;
+ case ZES_MEM_TYPE_DDR:
+ type = "DDR";
+ break;
+ case ZES_MEM_TYPE_DDR3:
+ type = "DDR3";
+ break;
+ case ZES_MEM_TYPE_DDR4:
+ type = "DDR4";
+ break;
+ case ZES_MEM_TYPE_DDR5:
+ type = "DDR5";
+ break;
+ case ZES_MEM_TYPE_LPDDR:
+ type = "LPDDR";
+ break;
+ case ZES_MEM_TYPE_LPDDR3:
+ type = "LPDDR3";
+ break;
+ case ZES_MEM_TYPE_LPDDR4:
+ type = "LPDDR4";
+ break;
+ case ZES_MEM_TYPE_LPDDR5:
+ type = "LPDDR5";
+ break;
+ case ZES_MEM_TYPE_SRAM:
+ type = "SRAM";
+ break;
+ case ZES_MEM_TYPE_L1:
+ type = "L1";
+ break;
+ case ZES_MEM_TYPE_L3:
+ type = "L3";
+ break;
+ case ZES_MEM_TYPE_GRF:
+ type = "GRF";
+ break;
+ case ZES_MEM_TYPE_SLM:
+ type = "SLM";
+ break;
+ default:
+ type = "unknown";
+ }
+ metric_label_set(metric, "type", type);
+ metric_label_set(metric, "location", location);
+ metric_set_subdev(metric, props.onSubdevice, props.subdeviceId);
+ return true;
+}
+
+/* Report memory usage for memory modules, return true for success.
+ *
+ * See gpu_read() on 'cache_idx' usage.
+ */
+static bool gpu_mems(gpu_device_t *gpu, unsigned int cache_idx) {
+ uint32_t i, mem_count = 0;
+ zes_device_handle_t dev = gpu->handle;
+ if ((zesDeviceEnumMemoryModules(dev, &mem_count, NULL) !=
+ ZE_RESULT_SUCCESS)) {
+ ERROR(PLUGIN_NAME ": failed to get memory modules count");
+ return false;
+ }
+ zes_mem_handle_t *mems;
+ mems = scalloc(mem_count, sizeof(*mems));
+ if (zesDeviceEnumMemoryModules(dev, &mem_count, mems) != ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get %d memory modules", mem_count);
+ free(mems);
+ return false;
+ }
+
+ if (gpu->memory_count != mem_count) {
+ INFO(PLUGIN_NAME ": Sysman reports %d memory modules", mem_count);
+ gpu->memory = (zes_mem_state_t **)gpu_subarray_realloc(
+ (void **)gpu->memory, mem_count, sizeof(gpu->memory[0][0]));
+ gpu->memory_count = mem_count;
+ assert(gpu->memory);
+ }
+
+ metric_family_t fam_bytes = {
+ .help = "Memory usage (in bytes)",
+ .name = METRIC_PREFIX "memory_used_bytes",
+ .type = METRIC_TYPE_GAUGE,
+ };
+ metric_family_t fam_ratio = {
+ .help = "Memory usage ratio (0-1)",
+ .name = METRIC_PREFIX "memory_usage_ratio",
+ .type = METRIC_TYPE_GAUGE,
+ };
+ metric_t metric = {0};
+
+ bool ok = false;
+ for (i = 0; i < mem_count; i++) {
+ /* fetch memory samples */
+ if (zesMemoryGetState(mems[i], &(gpu->memory[cache_idx][i])) !=
+ ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get memory module %d state", i);
+ ok = false;
+ break;
+ }
+ ok = true;
+ if (cache_idx > 0) {
+ continue;
+ }
+ const uint64_t mem_size = gpu->memory[0][i].size;
+ if (!mem_size) {
+ ERROR(PLUGIN_NAME ": invalid (zero) memory module %d size", i);
+ ok = false;
+ break;
+ }
+ /* process samples */
+ if (!set_mem_labels(mems[i], &metric)) {
+ ERROR(PLUGIN_NAME ": failed to get memory module %d properties", i);
+ ok = false;
+ break;
+ }
+ double mem_used;
+ if (config.samples < 2) {
+ const uint64_t mem_free = gpu->memory[0][i].free;
+ /* Sysman reports just memory size & free amounts => calculate used */
+ mem_used = mem_size - mem_free;
+ metric.value.gauge = mem_used;
+ metric_family_metric_append(&fam_bytes, metric);
+ metric.value.gauge = mem_used / mem_size;
+ metric_family_metric_append(&fam_ratio, metric);
+ } else {
+ /* find min & max values for memory free from
+ * (the configured number of) samples
+ */
+ uint64_t free_min = (uint64_t)1024 * 1024 * 1024 * 1024;
+ uint64_t free_max = 0, mem_free;
+ for (uint32_t j = 0; j < config.samples; j++) {
+ mem_free = gpu->memory[j][i].free;
+ if (mem_free < free_min) {
+ free_min = mem_free;
+ }
+ if (mem_free > free_max) {
+ free_max = mem_free;
+ }
+ }
+ /* largest used amount of memory */
+ mem_used = mem_size - free_max;
+ metric.value.gauge = mem_used;
+ metric_label_set(&metric, "function", "min");
+ metric_family_metric_append(&fam_bytes, metric);
+ metric.value.gauge = mem_used / mem_size;
+ metric_family_metric_append(&fam_ratio, metric);
+
+ /* smallest used amount of memory */
+ mem_used = mem_size - free_min;
+ metric.value.gauge = mem_used;
+ metric_label_set(&metric, "function", "max");
+ metric_family_metric_append(&fam_bytes, metric);
+ metric.value.gauge = mem_used / mem_size;
+ metric_family_metric_append(&fam_ratio, metric);
+ }
+ }
+ if (ok && cache_idx == 0) {
+ metric_reset(&metric);
+ gpu_submit(gpu, &fam_bytes);
+ gpu_submit(gpu, &fam_ratio);
+ }
+ free(mems);
+ return ok;
+}
+
+/* Report memory modules bandwidth usage, return true for success.
+ */
+static bool gpu_mems_bw(gpu_device_t *gpu) {
+ uint32_t i, mem_count = 0;
+ zes_device_handle_t dev = gpu->handle;
+ if ((zesDeviceEnumMemoryModules(dev, &mem_count, NULL) !=
+ ZE_RESULT_SUCCESS)) {
+ ERROR(PLUGIN_NAME ": failed to get memory (BW) modules count");
+ return false;
+ }
+ zes_mem_handle_t *mems;
+ mems = scalloc(mem_count, sizeof(*mems));
+ if (zesDeviceEnumMemoryModules(dev, &mem_count, mems) != ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get %d memory (BW) modules", mem_count);
+ free(mems);
+ return false;
+ }
+
+ if (gpu->membw_count != mem_count) {
+ INFO(PLUGIN_NAME ": Sysman reports %d memory (BW) modules", mem_count);
+ if (gpu->membw) {
+ free(gpu->membw);
+ }
+ gpu->membw = scalloc(mem_count, sizeof(*gpu->membw));
+ gpu->membw_count = mem_count;
+ }
+
+ metric_family_t fam_ratio = {
+ .help = "Average memory bandwidth usage ratio (0-1) over query interval",
+ .name = METRIC_PREFIX "memory_bw_ratio",
+ .type = METRIC_TYPE_GAUGE,
+ };
+ metric_family_t fam_counter = {
+ .help = "Memory bandwidth usage total (in bytes)",
+ .name = METRIC_PREFIX "memory_bw_bytes_total",
+ .type = METRIC_TYPE_COUNTER,
+ };
+ metric_t metric = {0};
+
+ bool reported_ratio = false, reported_counter = false, ok = false;
+ for (i = 0; i < mem_count; i++) {
+ ze_result_t ret;
+ zes_mem_bandwidth_t bw;
+ if (ret = zesMemoryGetBandwidth(mems[i], &bw), ret != ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get memory module %d bandwidth => 0x%x", i,
+ ret);
+ ok = false;
+ break;
+ }
+ if (!set_mem_labels(mems[i], &metric)) {
+ ERROR(PLUGIN_NAME ": failed to get memory module %d properties", i);
+ ok = false;
+ break;
+ }
+ if (config.output & OUTPUT_RAW) {
+ metric.value.counter = bw.writeCounter;
+ metric_label_set(&metric, "direction", "write");
+ metric_family_metric_append(&fam_counter, metric);
+
+ metric.value.counter = bw.readCounter;
+ metric_label_set(&metric, "direction", "read");
+ metric_family_metric_append(&fam_counter, metric);
+ reported_counter = true;
+ }
+ zes_mem_bandwidth_t *old = &gpu->membw[i];
+ if (old->maxBandwidth && (config.output & OUTPUT_DERIVED) &&
+ bw.timestamp > old->timestamp) {
+ /* https://spec.oneapi.com/level-zero/latest/sysman/api.html#_CPPv419zes_mem_bandwidth_t
+ */
+ uint64_t writes = bw.writeCounter - old->writeCounter;
+ uint64_t reads = bw.readCounter - old->readCounter;
+ uint64_t timediff = bw.timestamp - old->timestamp;
+ double factor = 1.0e6 / (old->maxBandwidth * timediff);
+
+ metric.value.gauge = factor * writes;
+ metric_label_set(&metric, "direction", "write");
+ metric_family_metric_append(&fam_ratio, metric);
+
+ metric.value.gauge = factor * reads;
+ metric_label_set(&metric, "direction", "read");
+ metric_family_metric_append(&fam_ratio, metric);
+ reported_ratio = true;
+ }
+ *old = bw;
+ ok = true;
+ }
+ if (ok) {
+ metric_reset(&metric);
+ if (reported_ratio) {
+ gpu_submit(gpu, &fam_ratio);
+ }
+ if (reported_counter) {
+ gpu_submit(gpu, &fam_counter);
+ }
+ }
+ free(mems);
+ return ok;
+}
+
+/* set frequency metric labels based on its properties, return true for success
+ */
+static bool set_freq_labels(zes_freq_handle_t freq, metric_t *metric) {
+ zes_freq_properties_t props;
+ if (zesFrequencyGetProperties(freq, &props) != ZE_RESULT_SUCCESS) {
+ return false;
+ }
+ const char *type;
+ switch (props.type) {
+ case ZES_FREQ_DOMAIN_GPU:
+ type = "gpu";
+ break;
+ case ZES_FREQ_DOMAIN_MEMORY:
+ type = "memory";
+ break;
+ default:
+ type = "unknown";
+ }
+ metric_label_set(metric, "location", type);
+ metric_set_subdev(metric, props.onSubdevice, props.subdeviceId);
+ return true;
+}
+
+/* Report frequency domains request & actual frequency, return true for success
+ *
+ * See gpu_read() on 'cache_idx' usage.
+ */
+static bool gpu_freqs(gpu_device_t *gpu, unsigned int cache_idx) {
+ uint32_t i, freq_count = 0;
+ zes_device_handle_t dev = gpu->handle;
+ if ((zesDeviceEnumFrequencyDomains(dev, &freq_count, NULL) !=
+ ZE_RESULT_SUCCESS)) {
+ ERROR(PLUGIN_NAME ": failed to get frequency domains count");
+ return false;
+ }
+ zes_freq_handle_t *freqs;
+ freqs = scalloc(freq_count, sizeof(*freqs));
+ if (zesDeviceEnumFrequencyDomains(dev, &freq_count, freqs) !=
+ ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get %d frequency domains", freq_count);
+ free(freqs);
+ return false;
+ }
+
+ if (gpu->frequency_count != freq_count) {
+ INFO(PLUGIN_NAME ": Sysman reports %d frequency domains", freq_count);
+ gpu->frequency = (zes_freq_state_t **)gpu_subarray_realloc(
+ (void **)gpu->frequency, freq_count, sizeof(gpu->frequency[0][0]));
+ gpu->frequency_count = freq_count;
+ assert(gpu->frequency);
+ }
+
+ metric_family_t fam = {
+ .help = "HW frequency (in MHz)",
+ .name = METRIC_PREFIX "frequency_mhz",
+ .type = METRIC_TYPE_GAUGE,
+ };
+ metric_t metric = {0};
+
+ bool reported = false, ok = false;
+ for (i = 0; i < freq_count; i++) {
+ /* fetch freq samples */
+ if (zesFrequencyGetState(freqs[i], &(gpu->frequency[cache_idx][i])) !=
+ ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get frequency domain %d state", i);
+ ok = false;
+ break;
+ }
+ ok = true;
+ if (cache_idx > 0) {
+ continue;
+ }
+ /* process samples */
+ if (!set_freq_labels(freqs[i], &metric)) {
+ ERROR(PLUGIN_NAME ": failed to get frequency domain %d properties", i);
+ ok = false;
+ break;
+ }
+
+ bool freq_ok = false;
+ double value;
+
+ if (config.samples < 2) {
+ /* negative value = unsupported:
+ * https://spec.oneapi.com/level-zero/latest/sysman/api.html#_CPPv416zes_freq_state_t
+ */
+ value = gpu->frequency[0][i].request;
+ if (value >= 0) {
+ metric.value.gauge = value;
+ metric_label_set(&metric, "type", "request");
+ metric_family_metric_append(&fam, metric);
+ freq_ok = true;
+ }
+ value = gpu->frequency[0][i].actual;
+ if (value >= 0) {
+ metric.value.gauge = value;
+ metric_label_set(&metric, "type", "actual");
+ metric_family_metric_append(&fam, metric);
+ freq_ok = true;
+ }
+ } else {
+ /* find min & max values for actual frequency & its request
+ * from (the configured number of) samples
+ */
+ double req_min = 1.0e12, req_max = -1.0e12;
+ double act_min = 1.0e12, act_max = -1.0e12;
+ for (uint32_t j = 0; j < config.samples; j++) {
+ value = gpu->frequency[j][i].request;
+ if (value < req_min) {
+ req_min = value;
+ }
+ if (value > req_max) {
+ req_max = value;
+ }
+ value = gpu->frequency[j][i].actual;
+ if (value < act_min) {
+ act_min = value;
+ }
+ if (value > act_max) {
+ act_max = value;
+ }
+ }
+ if (req_max >= 0.0) {
+ metric.value.gauge = req_min;
+ metric_label_set(&metric, "type", "request");
+ metric_label_set(&metric, "function", "min");
+ metric_family_metric_append(&fam, metric);
+
+ metric.value.gauge = req_max;
+ metric_label_set(&metric, "function", "max");
+ metric_family_metric_append(&fam, metric);
+ freq_ok = true;
+ }
+ if (act_max >= 0.0) {
+ metric.value.gauge = act_min;
+ metric_label_set(&metric, "type", "actual");
+ metric_label_set(&metric, "function", "min");
+ metric_family_metric_append(&fam, metric);
+
+ metric.value.gauge = act_max;
+ metric_label_set(&metric, "function", "max");
+ metric_family_metric_append(&fam, metric);
+ freq_ok = true;
+ }
+ }
+ if (freq_ok) {
+ reported = true;
+ } else {
+ ERROR(PLUGIN_NAME ": neither requests nor actual frequencies supported "
+ "for domain %d",
+ i);
+ ok = false;
+ break;
+ }
+ }
+ if (reported) {
+ metric_reset(&metric);
+ gpu_submit(gpu, &fam);
+ }
+ free(freqs);
+ return ok;
+}
+
+/* Report throttling time, return true for success
+ */
+static bool gpu_freqs_throttle(gpu_device_t *gpu) {
+ uint32_t i, freq_count = 0;
+ zes_device_handle_t dev = gpu->handle;
+ if ((zesDeviceEnumFrequencyDomains(dev, &freq_count, NULL) !=
+ ZE_RESULT_SUCCESS)) {
+ ERROR(PLUGIN_NAME ": failed to get frequency (throttling) domains count");
+ return false;
+ }
+ zes_freq_handle_t *freqs;
+ freqs = scalloc(freq_count, sizeof(*freqs));
+ if (zesDeviceEnumFrequencyDomains(dev, &freq_count, freqs) !=
+ ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get %d frequency (throttling) domains",
+ freq_count);
+ free(freqs);
+ return false;
+ }
+
+ if (gpu->throttle_count != freq_count) {
+ INFO(PLUGIN_NAME ": Sysman reports %d frequency (throttling) domains",
+ freq_count);
+ if (gpu->throttle) {
+ free(gpu->throttle);
+ }
+ gpu->throttle = scalloc(freq_count, sizeof(*gpu->throttle));
+ gpu->throttle_count = freq_count;
+ }
+
+ metric_family_t fam_ratio = {
+ .help =
+ "Ratio (0-1) of HW frequency being throttled during query interval",
+ .name = METRIC_PREFIX "throttled_ratio",
+ .type = METRIC_TYPE_GAUGE,
+ };
+ metric_family_t fam_counter = {
+ .help = "Total time HW frequency has been throttled (in microseconds)",
+ .name = METRIC_PREFIX "throttled_usecs_total",
+ .type = METRIC_TYPE_COUNTER,
+ };
+ metric_t metric = {0};
+
+ bool reported_ratio = false, reported_counter = false, ok = false;
+ for (i = 0; i < freq_count; i++) {
+ ze_result_t ret;
+ zes_freq_throttle_time_t throttle;
+ if (ret = zesFrequencyGetThrottleTime(freqs[i], &throttle),
+ ret != ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME
+ ": failed to get frequency domain %d throttle time => 0x%x",
+ i, ret);
+ ok = false;
+ break;
+ }
+ if (!set_freq_labels(freqs[i], &metric)) {
+ ERROR(PLUGIN_NAME ": failed to get frequency domain %d properties", i);
+ ok = false;
+ break;
+ }
+ if (config.output & OUTPUT_RAW) {
+ /* cannot convert microsecs to secs as counters are integers */
+ metric.value.counter = throttle.throttleTime;
+ metric_family_metric_append(&fam_counter, metric);
+ reported_counter = true;
+ }
+ zes_freq_throttle_time_t *old = &gpu->throttle[i];
+ if (old->timestamp && (config.output & OUTPUT_DERIVED) &&
+ throttle.timestamp > old->timestamp) {
+ /* micro seconds => throttle ratio */
+ metric.value.gauge = (throttle.throttleTime - old->throttleTime) /
+ (double)(throttle.timestamp - old->timestamp);
+ metric_family_metric_append(&fam_ratio, metric);
+ reported_ratio = true;
+ }
+ *old = throttle;
+ ok = true;
+ }
+ if (ok) {
+ metric_reset(&metric);
+ if (reported_ratio) {
+ gpu_submit(gpu, &fam_ratio);
+ }
+ if (reported_counter) {
+ gpu_submit(gpu, &fam_counter);
+ }
+ }
+ free(freqs);
+ return ok;
+}
+
+/* Report relevant temperature sensor values, return true for success */
+static bool gpu_temps(gpu_device_t *gpu) {
+ uint32_t i, temp_count = 0;
+ zes_device_handle_t dev = gpu->handle;
+ if ((zesDeviceEnumTemperatureSensors(dev, &temp_count, NULL) !=
+ ZE_RESULT_SUCCESS)) {
+ ERROR(PLUGIN_NAME ": failed to get temperature sensors count");
+ return false;
+ }
+ zes_temp_handle_t *temps;
+ temps = scalloc(temp_count, sizeof(*temps));
+ if (zesDeviceEnumTemperatureSensors(dev, &temp_count, temps) !=
+ ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get %d temperature sensors", temp_count);
+ free(temps);
+ return false;
+ }
+ if (gpu->temp_count != temp_count) {
+ INFO(PLUGIN_NAME ": Sysman reports %d temperature sensors", temp_count);
+ gpu->temp_count = temp_count;
+ }
+
+ metric_family_t fam = {
+ .help = "Temperature sensor value (in Celsius) when queried",
+ .name = METRIC_PREFIX "temperature_celsius",
+ .type = METRIC_TYPE_GAUGE,
+ };
+ metric_t metric = {0};
+
+ bool ok = false;
+ for (i = 0; i < temp_count; i++) {
+ zes_temp_properties_t props;
+ if (zesTemperatureGetProperties(temps[i], &props) != ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get temperature sensor %d properties", i);
+ ok = false;
+ break;
+ }
+ const char *type;
+ /*
+ * https://spec.oneapi.io/level-zero/latest/sysman/PROG.html#querying-temperature
+ */
+ switch (props.type) {
+ /* max temperatures */
+ case ZES_TEMP_SENSORS_GLOBAL:
+ type = "global-max";
+ break;
+ case ZES_TEMP_SENSORS_GPU:
+ type = "gpu-max";
+ break;
+ case ZES_TEMP_SENSORS_MEMORY:
+ type = "memory-max";
+ break;
+ /* min temperatures */
+ case ZES_TEMP_SENSORS_GLOBAL_MIN:
+ type = "global-min";
+ break;
+ case ZES_TEMP_SENSORS_GPU_MIN:
+ type = "gpu-min";
+ break;
+ case ZES_TEMP_SENSORS_MEMORY_MIN:
+ type = "memory-min";
+ break;
+ default:
+ type = "unknown";
+ }
+
+ double value;
+ if (zesTemperatureGetState(temps[i], &value) != ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get temperature sensor %d (%s) state", i,
+ type);
+ ok = false;
+ break;
+ }
+ metric.value.gauge = value;
+ metric_label_set(&metric, "location", type);
+ metric_set_subdev(&metric, props.onSubdevice, props.subdeviceId);
+ metric_family_metric_append(&fam, metric);
+ ok = true;
+ }
+ if (ok) {
+ metric_reset(&metric);
+ gpu_submit(gpu, &fam);
+ }
+ free(temps);
+ return ok;
+}
+
+/* Report power usage for relevant domains, return true for success */
+static bool gpu_powers(gpu_device_t *gpu) {
+ uint32_t i, power_count = 0;
+ zes_device_handle_t dev = gpu->handle;
+ if ((zesDeviceEnumPowerDomains(dev, &power_count, NULL) !=
+ ZE_RESULT_SUCCESS)) {
+ ERROR(PLUGIN_NAME ": failed to get power domains count");
+ return false;
+ }
+ zes_pwr_handle_t *powers;
+ powers = scalloc(power_count, sizeof(*powers));
+ if (zesDeviceEnumPowerDomains(dev, &power_count, powers) !=
+ ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get %d power domains", power_count);
+ free(powers);
+ return false;
+ }
+
+ if (gpu->power_count != power_count) {
+ INFO(PLUGIN_NAME ": Sysman reports %d power domains", power_count);
+ if (gpu->power) {
+ free(gpu->power);
+ }
+ gpu->power = scalloc(power_count, sizeof(*gpu->power));
+ gpu->power_count = power_count;
+ }
+
+ metric_family_t fam_power = {
+ .help = "Average power usage (in Watts) over query interval",
+ .name = METRIC_PREFIX "power_watts",
+ .type = METRIC_TYPE_GAUGE,
+ };
+ metric_family_t fam_energy = {
+ .help = "Total energy consumption since boot (in microjoules)",
+ .name = METRIC_PREFIX "energy_ujoules_total",
+ .type = METRIC_TYPE_COUNTER,
+ };
+ metric_t metric = {0};
+
+ bool reported_power = false, reported_energy = false, ok = false;
+ for (i = 0; i < power_count; i++) {
+ zes_power_properties_t props;
+ if (zesPowerGetProperties(powers[i], &props) != ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get power domain %d properties", i);
+ ok = false;
+ break;
+ }
+ zes_power_energy_counter_t counter;
+ if (zesPowerGetEnergyCounter(powers[i], &counter) != ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get power domain %d energy counter", i);
+ ok = false;
+ break;
+ }
+ metric_set_subdev(&metric, props.onSubdevice, props.subdeviceId);
+ if (config.output & OUTPUT_RAW) {
+ metric.value.counter = counter.energy;
+ metric_family_metric_append(&fam_energy, metric);
+ reported_energy = true;
+ }
+ zes_power_energy_counter_t *old = &gpu->power[i];
+ if (old->timestamp && (config.output & OUTPUT_DERIVED) &&
+ counter.timestamp > old->timestamp) {
+ /* microJoules / microSeconds => watts */
+ metric.value.gauge = (double)(counter.energy - old->energy) /
+ (counter.timestamp - old->timestamp);
+ metric_family_metric_append(&fam_power, metric);
+ reported_power = true;
+ }
+ *old = counter;
+ ok = true;
+ }
+ if (ok) {
+ metric_reset(&metric);
+ if (reported_energy) {
+ gpu_submit(gpu, &fam_energy);
+ }
+ if (reported_power) {
+ gpu_submit(gpu, &fam_power);
+ }
+ }
+ free(powers);
+ return ok;
+}
+
+/* Report engine activity in relevant groups, return true for success */
+static bool gpu_engines(gpu_device_t *gpu) {
+ uint32_t i, engine_count = 0;
+ zes_device_handle_t dev = gpu->handle;
+ if ((zesDeviceEnumEngineGroups(dev, &engine_count, NULL) !=
+ ZE_RESULT_SUCCESS)) {
+ ERROR(PLUGIN_NAME ": failed to get engine groups count");
+ return false;
+ }
+ zes_engine_handle_t *engines;
+ engines = scalloc(engine_count, sizeof(*engines));
+ if (zesDeviceEnumEngineGroups(dev, &engine_count, engines) !=
+ ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get %d engine groups", engine_count);
+ free(engines);
+ return false;
+ }
+
+ if (gpu->engine_count != engine_count) {
+ INFO(PLUGIN_NAME ": Sysman reports %d engine groups", engine_count);
+ if (gpu->engine) {
+ free(gpu->engine);
+ }
+ gpu->engine = scalloc(engine_count, sizeof(*gpu->engine));
+ gpu->engine_count = engine_count;
+ }
+
+ metric_family_t fam_ratio = {
+ .help = "Average GPU engine / group utilization ratio (0-1) over query "
+ "interval",
+ .name = METRIC_PREFIX "engine_ratio",
+ .type = METRIC_TYPE_GAUGE,
+ };
+ metric_family_t fam_counter = {
+ .help = "GPU engine / group execution time (activity) total (in "
+ "microseconds)",
+ .name = METRIC_PREFIX "engine_use_usecs_total",
+ .type = METRIC_TYPE_COUNTER,
+ };
+ metric_t metric = {0};
+
+ int type_idx[16] = {0};
+ bool reported_ratio = false, reported_counter = false, ok = false;
+ for (i = 0; i < engine_count; i++) {
+ zes_engine_properties_t props;
+ if (zesEngineGetProperties(engines[i], &props) != ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get engine group %d properties", i);
+ ok = false;
+ break;
+ }
+ bool all = false;
+ const char *type;
+ switch (props.type) {
+ case ZES_ENGINE_GROUP_ALL:
+ type = "all";
+ all = true;
+ break;
+ /* multiple engines */
+ case ZES_ENGINE_GROUP_COMPUTE_ALL:
+ type = "compute";
+ all = true;
+ break;
+ case ZES_ENGINE_GROUP_MEDIA_ALL:
+ type = "media";
+ all = true;
+ break;
+ case ZES_ENGINE_GROUP_COPY_ALL:
+ type = "copy";
+ all = true;
+ break;
+ /* individual engines */
+ case ZES_ENGINE_GROUP_COMPUTE_SINGLE:
+ type = "compute";
+ break;
+ case ZES_ENGINE_GROUP_MEDIA_DECODE_SINGLE:
+ type = "decode";
+ break;
+ case ZES_ENGINE_GROUP_MEDIA_ENCODE_SINGLE:
+ type = "encode";
+ break;
+ case ZES_ENGINE_GROUP_COPY_SINGLE:
+ type = "copy";
+ break;
+ case ZES_ENGINE_GROUP_RENDER_SINGLE:
+ type = "render";
+ break;
+
+ /* Following defines require at least Level-Zero relase v1.1 */
+ case ZES_ENGINE_GROUP_RENDER_ALL:
+ type = "render";
+ all = true;
+ break;
+ case ZES_ENGINE_GROUP_3D_ALL:
+ type = "3d";
+ all = true;
+ break;
+ case ZES_ENGINE_GROUP_3D_RENDER_COMPUTE_ALL:
+ type = "3d-render-compute";
+ all = true;
+ break;
+ case ZES_ENGINE_GROUP_MEDIA_ENHANCEMENT_SINGLE:
+ type = "enhance";
+ break;
+ case ZES_ENGINE_GROUP_3D_SINGLE:
+ type = "3d";
+ break;
+
+ default:
+ type = "unknown";
+ }
+ const char *vname;
+ char buf[32];
+ if (all) {
+ vname = type;
+ } else {
+ if (gpu->disabled.engine_single) {
+ continue;
+ }
+ assert(props.type < sizeof(type_idx));
+ /* include engine index as there can be multiple engines of same type */
+ snprintf(buf, sizeof(buf), "%s-%03d", type, type_idx[props.type]);
+ type_idx[props.type]++;
+ vname = buf;
+ }
+ ze_result_t ret;
+ zes_engine_stats_t stats;
+ if (ret = zesEngineGetActivity(engines[i], &stats),
+ ret != ZE_RESULT_SUCCESS) {
+ ERROR(PLUGIN_NAME ": failed to get engine %d (%s) group activity => 0x%x",
+ i, vname, ret);
+ ok = false;
+ break;
+ }
+ metric_set_subdev(&metric, props.onSubdevice, props.subdeviceId);
+ metric_label_set(&metric, "type", vname);
+ if (config.output & OUTPUT_RAW) {
+ metric.value.counter = stats.activeTime;
+ metric_family_metric_append(&fam_counter, metric);
+ reported_counter = true;
+ }
+ zes_engine_stats_t *old = &gpu->engine[i];
+ if (old->timestamp && (config.output & OUTPUT_DERIVED) &&
+ stats.timestamp > old->timestamp) {
+ metric.value.gauge = (double)(stats.activeTime - old->activeTime) /
+ (stats.timestamp - old->timestamp);
+ metric_family_metric_append(&fam_ratio, metric);
+ reported_ratio = true;
+ }
+ *old = stats;
+ ok = true;
+ }
+ if (ok) {
+ metric_reset(&metric);
+ if (reported_ratio) {
+ gpu_submit(gpu, &fam_ratio);
+ }
+ if (reported_counter) {
+ gpu_submit(gpu, &fam_counter);
+ }
+ }
+ free(engines);
+ return ok;
+}
+
+static int gpu_read(void) {
+ /* no metrics yet */
+ int retval = RET_NO_METRICS;
+ /* go through all GPUs */
+ for (uint32_t i = 0; i < gpu_count; i++) {
+ gpu_device_t *gpu = &gpus[i];
+ gpu_disable_t *disabled = &gpu->disabled;
+ if (disabled->all) {
+ continue;
+ }
+ if (!gpu->counter) {
+ INFO(PLUGIN_NAME ": GPU-%d queries:", i);
+ }
+ /* 'cache_idx' is high frequency sampling aggregation counter.
+ *
+ * Functions needing that should use gpu_subarray_realloc() to
+ * allocate 'config.samples' sized array of metric value arrays,
+ * and use 'cache_idx' as index to that array.
+ *
+ * 'cache_idx' goes down to zero, so that functions themselves
+ * need to care less about config.samples value. But when it
+ * does reache zero, function should process 'config.samples'
+ * amount of cached items and provide aggregated metrics of
+ * them to gpu_submit().
+ */
+ unsigned int cache_idx =
+ (config.samples - 1) - gpu->counter % config.samples;
+ /* get potentially high-frequency metrics data (aggregate metrics sent when
+ * counter=0)
+ */
+ if (!disabled->freq && !gpu_freqs(gpu, cache_idx)) {
+ WARNING(PLUGIN_NAME
+ ": GPU-%d frequency query fail / no domains => disabled",
+ i);
+ disabled->freq = true;
+ }
+ if (!disabled->mem && !gpu_mems(gpu, cache_idx)) {
+ WARNING(PLUGIN_NAME ": GPU-%d memory query fail / no modules => disabled",
+ i);
+ disabled->mem = true;
+ }
+ /* rest of the metrics are read only when the high frequency
+ * counter goes down to zero
+ */
+ gpu->counter++;
+ if (cache_idx > 0) {
+ if (!disabled->all) {
+ /* there are still valid counters at least for this GPU */
+ retval = RET_OK;
+ }
+ continue;
+ }
+
+ /* process lower frequency counters */
+ if (config.samples > 1 && gpu->counter <= config.samples) {
+ INFO(PLUGIN_NAME ": GPU-%d queries:", i);
+ }
+ /* get lower frequency metrics */
+ if (!disabled->engine && !gpu_engines(gpu)) {
+ WARNING(PLUGIN_NAME ": GPU-%d engine query fail / no groups => disabled",
+ i);
+ disabled->engine = true;
+ }
+ if (!disabled->membw && !gpu_mems_bw(gpu)) {
+ WARNING(PLUGIN_NAME ": GPU-%d mem BW query fail / no modules => disabled",
+ i);
+ gpu->disabled.membw = true;
+ }
+ if (!disabled->power && !gpu_powers(gpu)) {
+ WARNING(PLUGIN_NAME ": GPU-%d power query fail / no domains => disabled",
+ i);
+ disabled->power = true;
+ }
+ if (!disabled->ras && !gpu_ras(gpu)) {
+ WARNING(PLUGIN_NAME ": GPU-%d errors query fail / no sets => disabled",
+ i);
+ disabled->ras = true;
+ }
+ if (!disabled->temp && !gpu_temps(gpu)) {
+ WARNING(PLUGIN_NAME
+ ": GPU-%d temperature query fail / no sensors => disabled",
+ i);
+ disabled->temp = true;
+ }
+ if (!disabled->throttle && !gpu_freqs_throttle(gpu)) {
+ WARNING(PLUGIN_NAME
+ ": GPU-%d throttle time query fail / no domains => disabled",
+ i);
+ gpu->disabled.throttle = true;
+ }
+ if (disabled->engine && disabled->mem && disabled->freq &&
+ disabled->membw && disabled->power && disabled->ras && disabled->temp &&
+ disabled->throttle) {
+ /* all metrics missing -> disable use of that GPU */
+ ERROR(PLUGIN_NAME ": No metrics from GPU-%d, disabling its querying", i);
+ disabled->all = true;
+ } else {
+ retval = RET_OK;
+ }
+ }
+ return retval;
+}
+
+static int gpu_config_parse(const char *key, const char *value) {
+ /* all metrics are enabled by default, but user can disable them */
+ if (strcasecmp(key, KEY_DISABLE_ENGINE) == 0) {
+ config.disabled.engine = IS_TRUE(value);
+ } else if (strcasecmp(key, KEY_DISABLE_ENGINE_SINGLE) == 0) {
+ config.disabled.engine_single = IS_TRUE(value);
+ } else if (strcasecmp(key, KEY_DISABLE_FREQ) == 0) {
+ config.disabled.freq = IS_TRUE(value);
+ } else if (strcasecmp(key, KEY_DISABLE_MEM) == 0) {
+ config.disabled.mem = IS_TRUE(value);
+ } else if (strcasecmp(key, KEY_DISABLE_MEMBW) == 0) {
+ config.disabled.membw = IS_TRUE(value);
+ } else if (strcasecmp(key, KEY_DISABLE_POWER) == 0) {
+ config.disabled.power = IS_TRUE(value);
+ } else if (strcasecmp(key, KEY_DISABLE_RAS) == 0) {
+ config.disabled.ras = IS_TRUE(value);
+ } else if (strcasecmp(key, KEY_DISABLE_RAS_SEPARATE) == 0) {
+ config.disabled.ras_separate = IS_TRUE(value);
+ } else if (strcasecmp(key, KEY_DISABLE_TEMP) == 0) {
+ config.disabled.temp = IS_TRUE(value);
+ } else if (strcasecmp(key, KEY_DISABLE_THROTTLE) == 0) {
+ config.disabled.throttle = IS_TRUE(value);
+ } else if (strcasecmp(key, KEY_LOG_GPU_INFO) == 0) {
+ config.gpuinfo = IS_TRUE(value);
+ } else if (strcasecmp(key, KEY_METRICS_OUTPUT) == 0) {
+ config.output = OUTPUT_UNSET;
+ for (unsigned i = 0; i < STATIC_ARRAY_SIZE(metrics_output); i++) {
+ if (strcasecmp(value, metrics_output[i]) == 0) {
+ config.output = i;
+ break;
+ }
+ }
+ if (config.output == OUTPUT_UNSET) {
+ ERROR(PLUGIN_NAME ": Invalid '%s' config key value '%s'", key, value);
+ return RET_INVALID_CONFIG;
+ }
+ } else if (strcasecmp(key, KEY_SAMPLES) == 0) {
+ /* because collectd converts config values to floating point strings,
+ * this can't use strtol() to check that value is integer, so simply
+ * just take the integer part
+ */
+ int samples = atoi(value);
+ if (samples < 1 || samples > MAX_SAMPLES) {
+ ERROR(PLUGIN_NAME ": Invalid " KEY_SAMPLES " value '%s'", value);
+ return RET_INVALID_CONFIG;
+ }
+ /* number of samples cannot be changed without freeing per-GPU
+ * metrics cache arrays & members, zeroing metric counters and
+ * GPU cache index counter. However, this parse function should
+ * be called only before gpu structures have been initialized, so
+ * just assert here
+ */
+ assert(gpus == NULL);
+ config.samples = samples;
+ } else {
+ ERROR(PLUGIN_NAME ": Invalid '%s' config key", key);
+ return RET_INVALID_CONFIG;
+ }
+ return RET_OK;
+}
+
+void module_register(void) {
+ /* NOTE: key strings *must* be static */
+ static const char *config_keys[] = {
+ KEY_DISABLE_ENGINE, KEY_DISABLE_ENGINE_SINGLE, KEY_DISABLE_FREQ,
+ KEY_DISABLE_MEM, KEY_DISABLE_MEMBW, KEY_DISABLE_POWER,
+ KEY_DISABLE_RAS, KEY_DISABLE_RAS_SEPARATE, KEY_DISABLE_TEMP,
+ KEY_DISABLE_THROTTLE, KEY_METRICS_OUTPUT, KEY_LOG_GPU_INFO,
+ KEY_SAMPLES};
+ const int config_keys_num = STATIC_ARRAY_SIZE(config_keys);
+
+ plugin_register_config(PLUGIN_NAME, gpu_config_parse, config_keys,
+ config_keys_num);
+ plugin_register_init(PLUGIN_NAME, gpu_init);
+ plugin_register_read(PLUGIN_NAME, gpu_read);
+ plugin_register_shutdown(PLUGIN_NAME, gpu_config_free);
+} /* void module_register */
--- /dev/null
+/**
+ * collectd - src/gpu_sysman_test.c
+ *
+ * Copyright(c) 2020-2022 Intel Corporation. All rights reserved.
+ *
+ * Licensed under the same terms and conditions as src/gpu_sysman.c.
+ *
+ * Authors:
+ * - Eero Tamminen <eero.t.tamminen@intel.com>
+ *
+ * Testing for gpu_sysman.c Sysman API and its error handling.
+ *
+ * See: https://spec.oneapi.com/level-zero/latest/sysman/PROG.html
+ *
+ * Building unit-tests:
+ * gcc -I. -Idaemon -I/path/to/level-zero -O3 -g --coverage -Werror \
+ * -Wall -Wextra -Wpedantic -Wcast-align=strict -Wformat-security \
+ * gpu_sysman_test.c -o test_plugin_gpu_sysman
+ *
+ * Running unit-units:
+ * ./test_plugin_gpu_sysman
+ *
+ * Testing for memory leakage:
+ * valgrind --error-exitcode=1 --leak-check=full test_plugin_gpu_sysman
+ *
+ * Test coverage:
+ * ./test_plugin_gpu_sysman
+ * gcov gpu_sysman_test.*
+ * Untested lines:
+ * grep '###' gpu_sysman.c.gcov
+ *
+ * Note:
+ * - Code lines run coverage is best with code compiled using -O3 because
+ * it causes gcc to convert switch-cases to lookup tables. Builds without
+ * optimizations have significantly lower coverage due to each (trivial
+ * and build-time verifiable) switch-case being considered separately
+ *
+ *
+ * Mock up functionality details:
+ * - All functions return only a single property or metric item,
+ * until hitting earlier set call limit, after which they return error
+ * - All metric property functions report them coming from subdevice 0
+ * (as non-subdevice cases can be tested on more easily available real HW)
+ * - Except for device.prop.type, subdev type in metric property, and
+ * actual metric values in metric state structs, all other struct members
+ * are zeroed
+ * - Memory free metric is decreased, all other metric values are increased
+ * after each query
+ *
+ * Testing validates that:
+ * - All registered config variables work and invalid config values are rejected
+ * - All mocked up Sysman functions get called when no errors are returned and
+ * count of Sysman calls is always same for plugin init() and read() callbacks
+ * - Plugin dispatch API receives correct values for all metrics both in
+ * single-sampling and multi-sampling configurations
+ * - Single Sysman call failing during init or metrics queries causes logging
+ * of the failure, and in case of metric queries, disabling of the (only)
+ * relevant metric, and that working for all metrics and Sysman APIs they call
+ * - Plugin init, shutdown and re-init works without problems
+ */
+
+#define KERNEL_LINUX 1
+#define FP_LAYOUT_NEED_NOTHING 1
+#include "gpu_sysman.c" /* test this */
+
+/* logging check bit, and per-phase logging bits enabling it */
+#define VERBOSE_CALLS 1
+#define VERBOSE_CALLS_INIT 2
+#define VERBOSE_CALLS_INIT_LIMIT 4
+#define VERBOSE_CALLS_METRICS 8
+#define VERBOSE_CALLS_METRICS_LIMIT 16
+#define VERBOSE_CALLS_METRICS_SAMPLED 32
+
+/* logging check bit, and per-phase logging bits enabling it */
+#define VERBOSE_METRICS 64
+#define VERBOSE_METRICS_NORMAL 128
+#define VERBOSE_METRICS_LIMIT 256
+#define VERBOSE_METRICS_SAMPLED 512
+
+static struct {
+ /* bitmask of enabled verbosity areas */
+ unsigned int verbose;
+
+ /* to be able to count & limit Sysman API calls */
+ unsigned int api_calls, api_limit;
+
+ /* to verify that all mocked Level-Zero/Sysman functions get called */
+ unsigned int callbits;
+
+ /* how many errors & warnings have been logged */
+ unsigned int warnings;
+
+ /* how many messages have been logged regardless of log level */
+ unsigned int messages;
+} globs;
+
+/* set verbosity mask call & metric logging bits based on calls & metrics
+ * enabling bits */
+static void set_verbose(unsigned int callmask, unsigned int metricmask) {
+ if (globs.verbose & callmask) {
+ globs.verbose |= VERBOSE_CALLS;
+ fprintf(stderr, "Enabling call tracing...\n\n");
+ } else {
+ globs.verbose &= ~VERBOSE_CALLS;
+ }
+ if (globs.verbose & metricmask) {
+ fprintf(stderr, "Enabling metrics value tracing...\n\n");
+ globs.verbose |= VERBOSE_METRICS;
+ } else {
+ globs.verbose &= ~VERBOSE_METRICS;
+ }
+}
+
+/* set given bit in the 'callbits' call type tracking bitmask
+ * and increase 'api_calls' API call counter.
+ *
+ * return true if given call should be failed (call=limit)
+ */
+static bool call_limit(int callbit, const char *name) {
+ globs.callbits |= 1u << callbit;
+ globs.api_calls++;
+
+ if (globs.verbose & VERBOSE_CALLS) {
+ fprintf(stderr, "CALL %d: %s()\n", globs.api_calls, name);
+ }
+ if (!globs.api_limit || globs.api_calls != globs.api_limit) {
+ return false;
+ }
+ fprintf(stderr, "LIMIT @ %d: %s()\n", globs.api_calls, name);
+ return true;
+}
+
+/* ------------------------------------------------------------------------- */
+/* mock up level-zero init/driver/device handling API, called during gpu_init()
+ */
+
+/* mock up handle values to set & check against */
+#define DRV_HANDLE ((ze_driver_handle_t)(0x123456))
+#define DEV_HANDLE ((ze_device_handle_t)(0xecced))
+#define VAL_HANDLE 0xcaffa
+
+ze_result_t zeInit(ze_init_flags_t flags) {
+ if (call_limit(0, "zeInit"))
+ return ZE_RESULT_ERROR_DEVICE_LOST;
+ if (flags && flags != ZE_INIT_FLAG_GPU_ONLY) {
+ return ZE_RESULT_ERROR_INVALID_ENUMERATION;
+ }
+ return ZE_RESULT_SUCCESS;
+}
+
+ze_result_t zeDriverGet(uint32_t *count, ze_driver_handle_t *handles) {
+ if (call_limit(1, "zeDriverGet"))
+ return ZE_RESULT_ERROR_DEVICE_LOST;
+ if (!count)
+ return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
+ if (!*count) {
+ *count = 1;
+ return ZE_RESULT_SUCCESS;
+ }
+ if (*count != 1)
+ return ZE_RESULT_ERROR_INVALID_SIZE;
+ if (!handles)
+ return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
+ handles[0] = DRV_HANDLE;
+ return ZE_RESULT_SUCCESS;
+}
+
+ze_result_t zeDeviceGet(ze_driver_handle_t drv, uint32_t *count,
+ ze_device_handle_t *handles) {
+ if (call_limit(2, "zeDeviceGet"))
+ return ZE_RESULT_ERROR_DEVICE_LOST;
+ if (drv != DRV_HANDLE)
+ return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
+ if (!count)
+ return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
+ if (!*count) {
+ *count = 1;
+ return ZE_RESULT_SUCCESS;
+ }
+ if (*count != 1)
+ return ZE_RESULT_ERROR_INVALID_SIZE;
+ if (!handles)
+ return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
+ handles[0] = DEV_HANDLE;
+ return ZE_RESULT_SUCCESS;
+}
+
+ze_result_t zeDeviceGetProperties(ze_device_handle_t dev,
+ ze_device_properties_t *props) {
+ if (call_limit(3, "zeDeviceGetProperties"))
+ return ZE_RESULT_ERROR_DEVICE_LOST;
+ if (dev != DEV_HANDLE)
+ return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
+ if (!props)
+ return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
+ memset(props, 0, sizeof(*props));
+ props->type = ZE_DEVICE_TYPE_GPU;
+ return ZE_RESULT_SUCCESS;
+}
+
+ze_result_t zeDeviceGetMemoryProperties(ze_device_handle_t dev, uint32_t *count,
+ ze_device_memory_properties_t *props) {
+ if (call_limit(4, "zeDeviceGetMemoryProperties"))
+ return ZE_RESULT_ERROR_DEVICE_LOST;
+ if (dev != DEV_HANDLE)
+ return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
+ if (!count)
+ return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
+ if (!*count) {
+ *count = 1;
+ return ZE_RESULT_SUCCESS;
+ }
+ if (*count != 1)
+ return ZE_RESULT_ERROR_INVALID_SIZE;
+ if (!props)
+ return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
+ memset(props, 0, sizeof(*props));
+ return ZE_RESULT_SUCCESS;
+}
+
+/* mock up level-zero sysman device handling API, called during gpu_init() */
+
+ze_result_t zesDeviceGetProperties(zes_device_handle_t dev,
+ zes_device_properties_t *props) {
+ if (call_limit(5, "zesDeviceGetProperties"))
+ return ZE_RESULT_ERROR_DEVICE_LOST;
+ if (dev != DEV_HANDLE)
+ return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
+ if (!props)
+ return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
+ memset(props, 0, sizeof(*props));
+ return ZE_RESULT_SUCCESS;
+}
+
+ze_result_t zesDevicePciGetProperties(zes_device_handle_t dev,
+ zes_pci_properties_t *props) {
+ if (call_limit(6, "zesDevicePciGetProperties"))
+ return ZE_RESULT_ERROR_DEVICE_LOST;
+ if (dev != DEV_HANDLE)
+ return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
+ if (!props)
+ return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
+ memset(props, 0, sizeof(*props));
+ return ZE_RESULT_SUCCESS;
+}
+
+ze_result_t zesDeviceGetState(zes_device_handle_t dev,
+ zes_device_state_t *state) {
+ if (call_limit(7, "zesDeviceGetState"))
+ return ZE_RESULT_ERROR_DEVICE_LOST;
+ if (dev != DEV_HANDLE)
+ return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
+ if (!state)
+ return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
+ memset(state, 0, sizeof(*state));
+ return ZE_RESULT_SUCCESS;
+}
+
+#define INIT_CALL_FUNCS 8
+#define INIT_CALL_BITS (((uint64_t)1 << INIT_CALL_FUNCS) - 1)
+
+/* ------------------------------------------------------------------------- */
+/* mock up Sysman API metrics querying functions */
+
+#define COUNTER_START 100000 // 100ms
+#define COUNTER_INC 20000 // 20ms
+#define TIME_START 5000000 // 5s in us
+#define TIME_INC 1000000 // 1s in us
+#define COUNTER_MAX TIME_INC
+
+/* what should get reported as result of above */
+#define COUNTER_RATIO ((double)COUNTER_INC / TIME_INC)
+
+#define FREQ_INIT 300
+#define FREQ_INC 50
+
+#define MEMORY_SIZE (1024 * 1024 * 1024)
+#define MEMORY_INIT (MEMORY_SIZE / 2) // so that both free & used get same value
+#define MEMORY_INC (MEMORY_SIZE / 64)
+
+#define RAS_INIT 0
+#define RAS_INC 1
+
+#define TEMP_INIT 10
+#define TEMP_INC 5
+
+/* Call bit, metric enumaration function name, its handle type,
+ * corresponding zes*GetProperties() function name, its property struct type,
+ * corresponding zes*GetState() function name, its state struct type, global
+ * variable for intial state values, two increment operations for the global
+ * state variable members (or void)
+ */
+#define ADD_METRIC(callbit, getname, handletype, propname, proptype, \
+ statename, statetype, statevar, stateinc1, stateinc2) \
+ ze_result_t getname(zes_device_handle_t dev, uint32_t *count, \
+ handletype *handles) { \
+ if (call_limit(callbit, #getname)) \
+ return ZE_RESULT_ERROR_NOT_AVAILABLE; \
+ if (dev != DEV_HANDLE) \
+ return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; \
+ if (!count) \
+ return ZE_RESULT_ERROR_INVALID_NULL_POINTER; \
+ if (!*count) { \
+ *count = 1; \
+ return ZE_RESULT_SUCCESS; \
+ } \
+ if (*count != 1) \
+ return ZE_RESULT_ERROR_INVALID_SIZE; \
+ if (!handles) \
+ return ZE_RESULT_ERROR_INVALID_NULL_POINTER; \
+ handles[0] = (handletype)VAL_HANDLE; \
+ return ZE_RESULT_SUCCESS; \
+ } \
+ ze_result_t propname(handletype handle, proptype *prop) { \
+ proptype value = {.onSubdevice = true}; \
+ if (call_limit(callbit + 1, #propname)) \
+ return ZE_RESULT_ERROR_NOT_AVAILABLE; \
+ if (handle != (handletype)VAL_HANDLE) \
+ return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; \
+ if (!prop) \
+ return ZE_RESULT_ERROR_INVALID_NULL_POINTER; \
+ *prop = value; \
+ return ZE_RESULT_SUCCESS; \
+ } \
+ ze_result_t statename(handletype handle, statetype *state) { \
+ if (call_limit(callbit + 2, #statename)) \
+ return ZE_RESULT_ERROR_NOT_AVAILABLE; \
+ if (handle != (handletype)VAL_HANDLE) \
+ return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; \
+ if (!state) \
+ return ZE_RESULT_ERROR_INVALID_NULL_POINTER; \
+ *state = statevar; \
+ stateinc1; \
+ stateinc2; \
+ return ZE_RESULT_SUCCESS; \
+ }
+
+static zes_engine_stats_t engine_stats = {.activeTime = COUNTER_START,
+ .timestamp = TIME_START};
+
+ADD_METRIC(0, zesDeviceEnumEngineGroups, zes_engine_handle_t,
+ zesEngineGetProperties, zes_engine_properties_t,
+ zesEngineGetActivity, zes_engine_stats_t, engine_stats,
+ engine_stats.activeTime += COUNTER_INC,
+ engine_stats.timestamp += TIME_INC)
+
+static zes_freq_state_t freq_state = {.request = FREQ_INIT,
+ .actual = FREQ_INIT};
+
+ADD_METRIC(3, zesDeviceEnumFrequencyDomains, zes_freq_handle_t,
+ zesFrequencyGetProperties, zes_freq_properties_t,
+ zesFrequencyGetState, zes_freq_state_t, freq_state,
+ freq_state.request += 2 * FREQ_INC, freq_state.actual += FREQ_INC)
+
+static zes_mem_state_t mem_state = {.free = MEMORY_SIZE - MEMORY_INIT,
+ .size = MEMORY_SIZE};
+
+ADD_METRIC(6, zesDeviceEnumMemoryModules, zes_mem_handle_t,
+ zesMemoryGetProperties, zes_mem_properties_t, zesMemoryGetState,
+ zes_mem_state_t, mem_state, mem_state.free -= MEMORY_INC,
+ mem_state.health ^= ZES_MEM_HEALTH_OK)
+
+static zes_power_energy_counter_t power_counter = {.energy = COUNTER_START,
+ .timestamp = TIME_START};
+
+ADD_METRIC(9, zesDeviceEnumPowerDomains, zes_pwr_handle_t,
+ zesPowerGetProperties, zes_power_properties_t,
+ zesPowerGetEnergyCounter, zes_power_energy_counter_t, power_counter,
+ power_counter.energy += COUNTER_INC,
+ power_counter.timestamp += TIME_INC)
+
+static int dummy;
+static double temperature = TEMP_INIT;
+
+ADD_METRIC(12, zesDeviceEnumTemperatureSensors, zes_temp_handle_t,
+ zesTemperatureGetProperties, zes_temp_properties_t,
+ zesTemperatureGetState, double, temperature, temperature += TEMP_INC,
+ dummy = 0)
+
+ADD_METRIC(15, zesDeviceEnumRasErrorSets, zes_ras_handle_t, zesRasGetProperties,
+ zes_ras_properties_t, zesRasGetDummy, int,
+ dummy, // dummy as state API differs from others
+ dummy = 0, dummy = 0)
+
+ze_result_t zesRasGetState(zes_ras_handle_t handle, ze_bool_t clear,
+ zes_ras_state_t *state) {
+ if (call_limit(17, "zesRasGetState")) {
+ return ZE_RESULT_ERROR_NOT_AVAILABLE;
+ }
+ if (handle != (zes_ras_handle_t)VAL_HANDLE) {
+ return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
+ }
+ if (clear) {
+ return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
+ }
+ if (!state) {
+ return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
+ }
+ static uint64_t count = RAS_INIT;
+ memset(state, 0, sizeof(zes_ras_state_t));
+ /* props default to zeroes i.e. correctable error type,
+ * so this needs to be a correctable category
+ */
+ state->category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS] = count;
+ count += RAS_INC;
+ return ZE_RESULT_SUCCESS;
+}
+
+ze_result_t zesFrequencyGetThrottleTime(zes_freq_handle_t handle,
+ zes_freq_throttle_time_t *state) {
+ if (call_limit(18, "zesFrequencyGetThrottleTime")) {
+ return ZE_RESULT_ERROR_NOT_AVAILABLE;
+ }
+ if (handle != (zes_freq_handle_t)VAL_HANDLE) {
+ return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
+ }
+ if (!state) {
+ return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
+ }
+ static zes_freq_throttle_time_t throttle = {.throttleTime = COUNTER_START,
+ .timestamp = TIME_START};
+ *state = throttle;
+ throttle.timestamp += TIME_INC;
+ throttle.throttleTime += COUNTER_INC;
+ return ZE_RESULT_SUCCESS;
+}
+
+ze_result_t zesMemoryGetBandwidth(zes_mem_handle_t handle,
+ zes_mem_bandwidth_t *state) {
+ if (call_limit(19, "zesMemoryGetBandwidth")) {
+ return ZE_RESULT_ERROR_NOT_AVAILABLE;
+ }
+ if (handle != (zes_mem_handle_t)VAL_HANDLE) {
+ return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
+ }
+ if (!state) {
+ return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
+ }
+ static zes_mem_bandwidth_t bw = {.readCounter = 2 * COUNTER_START,
+ .writeCounter = COUNTER_START,
+ .maxBandwidth = COUNTER_MAX,
+ .timestamp = TIME_START};
+ *state = bw;
+ bw.timestamp += TIME_INC;
+ bw.readCounter += 2 * COUNTER_INC;
+ bw.writeCounter += COUNTER_INC;
+ return ZE_RESULT_SUCCESS;
+}
+
+#define QUERY_CALL_FUNCS 20
+#define QUERY_CALL_BITS (((uint64_t)1 << QUERY_CALL_FUNCS) - 1)
+
+/* ------------------------------------------------------------------------- */
+/* mock up metrics reporting and validation */
+
+typedef struct {
+ const char *name;
+ /* present also when multisampling */
+ const bool multipresent;
+ /* metric values are multisampled and present only when multisampling */
+ const bool multisampled;
+ const double value_init;
+ const double value_inc;
+ unsigned int count;
+ double last;
+} metrics_validation_t;
+
+#define RATIO_INIT ((double)MEMORY_INIT / MEMORY_SIZE)
+#define RATIO_INC ((double)MEMORY_INC / MEMORY_SIZE)
+
+static metrics_validation_t valid_metrics[] = {
+ {"all_errors_total", true, false, RAS_INIT, RAS_INC, 0, 0.0},
+ {"frequency_mhz/actual/gpu/min", true, true, FREQ_INIT, FREQ_INC, 0, 0.0},
+ {"frequency_mhz/actual/gpu/max", true, true, FREQ_INIT, FREQ_INC, 0, 0.0},
+ {"frequency_mhz/actual/gpu", false, false, FREQ_INIT, FREQ_INC, 0, 0.0},
+ {"frequency_mhz/request/gpu/min", true, true, FREQ_INIT, 2 * FREQ_INC, 0,
+ 0.0},
+ {"frequency_mhz/request/gpu/max", true, true, FREQ_INIT, 2 * FREQ_INC, 0,
+ 0.0},
+ {"frequency_mhz/request/gpu", false, false, FREQ_INIT, 2 * FREQ_INC, 0,
+ 0.0},
+ {"memory_used_bytes/HBM/system/min", true, true, MEMORY_INIT, +MEMORY_INC,
+ 0, 0.0},
+ {"memory_used_bytes/HBM/system/max", true, true, MEMORY_INIT, +MEMORY_INC,
+ 0, 0.0},
+ {"memory_used_bytes/HBM/system", false, false, MEMORY_INIT, +MEMORY_INC, 0,
+ 0.0},
+ {"memory_usage_ratio/HBM/system/min", true, true, RATIO_INIT, +RATIO_INC, 0,
+ 0.0},
+ {"memory_usage_ratio/HBM/system/max", true, true, RATIO_INIT, +RATIO_INC, 0,
+ 0.0},
+ {"memory_usage_ratio/HBM/system", false, false, RATIO_INIT, +RATIO_INC, 0,
+ 0.0},
+ {"temperature_celsius", true, false, TEMP_INIT, TEMP_INC, 0, 0.0},
+
+ /* while counters increase, per-time incremented value should stay same */
+ {"engine_use_usecs_total/all", true, false, COUNTER_START, COUNTER_INC, 0,
+ 0.0},
+ {"engine_ratio/all", true, false, COUNTER_RATIO, 0, 0, 0.0},
+ {"throttled_usecs_total/gpu", true, false, COUNTER_START, COUNTER_INC, 0,
+ 0.0},
+ {"throttled_ratio/gpu", true, false, COUNTER_RATIO, 0, 0, 0.0},
+ {"memory_bw_bytes_total/HBM/system/read", true, false, 2 * COUNTER_START,
+ 2 * COUNTER_INC, 0, 0.0},
+ {"memory_bw_bytes_total/HBM/system/write", true, false, COUNTER_START,
+ COUNTER_INC, 0, 0.0},
+ {"memory_bw_ratio/HBM/system/read", true, false, 2 * COUNTER_RATIO, 0, 0,
+ 0.0},
+ {"memory_bw_ratio/HBM/system/write", true, false, COUNTER_RATIO, 0, 0, 0.0},
+ {"energy_ujoules_total", true, false, COUNTER_START, COUNTER_INC, 0, 0.0},
+ {"power_watts", true, false, COUNTER_RATIO, 0, 0, 0.0},
+};
+
+/* VALIDATE: reset tracked metrics values and return count of how many
+ * metrics were not set since last reset.
+ *
+ * For non-zero 'base_rounds' parameter values, last metrics value
+ * will be compared to expected value for that round, and if there's
+ * a mismatch, error is logged and that metrics is also included to
+ * returned count.
+ *
+ * If 'multisampled' is non-zero, rounds is increased by suitable
+ * amount based on 'config.samples' value and metric 'multisample'
+ * flag.
+ */
+static int validate_and_reset_saved_metrics(unsigned int base_rounds,
+ unsigned int multisampled) {
+ assert(config.samples > 0);
+ int wrong = 0, missing = 0;
+ for (int i = 0; i < (int)STATIC_ARRAY_SIZE(valid_metrics); i++) {
+ metrics_validation_t *metric = &valid_metrics[i];
+ if (!metric->count) {
+ bool missed = false;
+ if (multisampled) {
+ if (metric->multipresent) {
+ missed = true;
+ }
+ } else {
+ if (!metric->multisampled) {
+ missed = true;
+ }
+ }
+ if (missed) {
+ fprintf(stderr, "expected metric type '%s' not reported\n",
+ metric->name);
+ missing++;
+ }
+ continue;
+ }
+ /* verify metrics array above is correctly filled */
+ if (multisampled && !metric->multipresent) {
+ fprintf(stderr, "%s: %s / %s = %g (%d)\n", metric->name,
+ metric->multipresent ? "multipresent" : "-",
+ metric->multisampled ? "multisampled" : "-", metric->last,
+ metric->count);
+ abort();
+ }
+
+ double last = metric->last;
+ metric->last = 0.0;
+ metric->count = 0;
+ if (!base_rounds) {
+ /* no metric value checking requested */
+ continue;
+ }
+ int incrounds = base_rounds - 1;
+ if (multisampled && metric->multisampled) {
+ /* min for increasing metrics is first value in given multisample round */
+ if (metric->value_inc > 0.0 && strstr(metric->name, "/min")) {
+ incrounds += multisampled - config.samples + 1;
+ }
+ /* max for decreasing metrics is first value in given multisample round */
+ else if (metric->value_inc < 0.0 && strstr(metric->name, "/max")) {
+ incrounds += multisampled - config.samples + 1;
+ } else {
+ /* for all others, it's the last value sampled */
+ incrounds += multisampled;
+ }
+ } else {
+ /* other metrics are sampled only at sample intervals */
+ incrounds += multisampled / config.samples;
+ }
+ double expected = metric->value_init + incrounds * metric->value_inc;
+ if (last != expected) {
+ fprintf(
+ stderr,
+ "ERROR: expected %g, but got value %g for metric '%s' on round %d\n",
+ expected, last, metric->name, incrounds);
+ wrong++;
+ } else if (globs.verbose & VERBOSE_METRICS) {
+ fprintf(stderr, "round %d metric value verified for '%s' (%.2f)\n",
+ incrounds, metric->name, expected);
+ }
+ }
+ if (missing && (globs.verbose & VERBOSE_METRICS)) {
+ fprintf(stderr, "%d metric(s) missing\n", missing);
+ }
+ return missing + wrong;
+}
+
+/* sort in reverse order so 'type' label comes first */
+static int cmp_labels(const void *a, const void *b) {
+ return strcmp(((const label_pair_t *)b)->name,
+ ((const label_pair_t *)a)->name);
+}
+
+/* constructs metric name from metric family name and metric label values */
+static void compose_name(char *buf, size_t bufsize, const char *name,
+ metric_t *metric) {
+ label_pair_t *label = metric->label.ptr;
+ size_t num = metric->label.num;
+ assert(num && label);
+
+ /* guarantee stable label ordering i.e. names */
+ qsort(label, num, sizeof(*label), cmp_labels);
+
+ /* compose names (metric family + metric label values) */
+ size_t len = strlen(name);
+ assert(len < bufsize);
+ sstrncpy(buf, name, bufsize);
+ for (size_t i = 0; i < num; i++) {
+ const char *name = label[i].name;
+ const char *value = label[i].value;
+ assert(name && value);
+ if (strcmp(name, "pci_bdf") == 0 || strcmp(name, "sub_dev") == 0) {
+ /* do not add device PCI ID / sub device IDs to metric name */
+ continue;
+ }
+ len += snprintf(buf + len, bufsize - len, "/%s", value);
+ }
+ assert(len < bufsize);
+}
+
+static double get_value(metric_type_t type, value_t value) {
+ switch (type) {
+ case METRIC_TYPE_COUNTER:
+ return value.counter;
+ break;
+ case METRIC_TYPE_GAUGE:
+ return value.gauge;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+/* matches constructed metric names against validation array ones and
+ * updates the values accordingly
+ */
+int plugin_dispatch_metric_family(metric_family_t const *fam) {
+ assert(fam && fam->name && fam->metric.num && fam->metric.ptr);
+
+ char name[128];
+ bool found = false;
+ metric_t *metric = fam->metric.ptr;
+
+ for (size_t m = 0; m < fam->metric.num; m++) {
+ double value = get_value(fam->type, metric[m].value);
+ compose_name(name, sizeof(name), fam->name, &metric[m]);
+ if (globs.verbose & VERBOSE_METRICS) {
+ fprintf(stderr, "METRIC: %s: %.2f\n", name, value);
+ }
+ /* for now, ignore other errors than for all_errors */
+ if (strstr(name, "errors") && !strstr(name, "all_errors")) {
+ return 0;
+ }
+ for (int v = 0; v < (int)STATIC_ARRAY_SIZE(valid_metrics); v++) {
+ metrics_validation_t *valid = &valid_metrics[v];
+ if (strstr(name, valid->name)) {
+ valid->last = value;
+ valid->count++;
+ found = true;
+ break;
+ }
+ }
+ }
+ assert(found);
+ return 0;
+}
+
+#define MAX_LABELS 8
+
+/* mock function uses just one large enough metrics array (for testing)
+ * instead of increasing it one-by-one, like the real collectd metrics
+ * code does
+ */
+int metric_label_set(metric_t *m, char const *name, char const *value) {
+ assert(m && name);
+ size_t num = m->label.num;
+ label_pair_t *pair = m->label.ptr;
+ if (num) {
+ assert(num < MAX_LABELS);
+ assert(pair);
+ } else {
+ assert(!pair);
+ pair = scalloc(MAX_LABELS, sizeof(*pair));
+ m->label.ptr = pair;
+ }
+ int i;
+ for (i = 0; i < MAX_LABELS; i++) {
+ if (!pair[i].name) {
+ /* not found -> new label */
+ pair[i].name = strdup(name);
+ m->label.num++;
+ break;
+ }
+ if (strcmp(name, pair[i].name) == 0) {
+ break;
+ }
+ }
+ assert(value); /* removing label with NULL 'value' is not supported */
+ free(pair[i].value);
+ pair[i].value = strdup(value);
+ return 0;
+}
+
+int metric_reset(metric_t *m) {
+ assert(m);
+ size_t num = m->label.num;
+ label_pair_t *pair = m->label.ptr;
+ if (!num) {
+ assert(!pair);
+ return 0;
+ }
+ assert(pair);
+ for (int i = 0; i < MAX_LABELS; i++) {
+ if (!pair[i].name) {
+ break;
+ }
+ free(pair[i].name);
+ free(pair[i].value);
+ pair[i].value = pair[i].name = NULL;
+ num--;
+ }
+ assert(!num);
+ free(pair);
+ m->label.ptr = NULL;
+ m->label.num = 0;
+ return 0;
+}
+
+#define MAX_METRICS 8
+
+/* mock function uses just one large enough metrics array (for testing)
+ * instead of increasing it one-by-one, like the real collectd metrics
+ * code does
+ */
+int metric_family_metric_append(metric_family_t *fam, metric_t m) {
+ assert(fam);
+ size_t num = fam->metric.num;
+ metric_t *metric = fam->metric.ptr;
+ if (num) {
+ assert(num < MAX_METRICS);
+ assert(metric);
+ } else {
+ assert(!metric);
+ metric = scalloc(MAX_METRICS, sizeof(*metric));
+ fam->metric.ptr = metric;
+ }
+ /* copy metric and pointers to its labels */
+ metric[num] = m;
+ label_pair_t *src = m.label.ptr;
+ if (src) {
+ /* alloc max size as labels can be added also to family metrics copies */
+ label_pair_t *dst = scalloc(MAX_LABELS, sizeof(*src));
+ metric[num].label.ptr = dst;
+ for (size_t i = 0; i < m.label.num; i++) {
+ dst[i].name = strdup(src[i].name);
+ dst[i].value = strdup(src[i].value);
+ }
+ }
+ fam->metric.num++;
+ m.family = fam;
+ return 0;
+}
+
+int metric_family_metric_reset(metric_family_t *fam) {
+ metric_t *metric = fam->metric.ptr;
+ for (size_t m = 0; m < fam->metric.num; m++) {
+ label_pair_t *pair = metric[m].label.ptr;
+ for (size_t i = 0; i < metric[m].label.num; i++) {
+ free(pair[i].name);
+ free(pair[i].value);
+ }
+ free(pair);
+ metric[m].label.ptr = NULL;
+ metric[m].label.num = 0;
+ }
+ free(fam->metric.ptr);
+ fam->metric.ptr = NULL;
+ fam->metric.num = 0;
+ return 0;
+}
+
+/* ------------------------------------------------------------------------- */
+/* mock up of collectd plugin API */
+
+static struct {
+ char *name;
+ char **keys;
+ unsigned int key_count;
+ int (*config)(const char *key, const char *val);
+ plugin_init_cb init;
+ int (*read)(void);
+ plugin_shutdown_cb shutdown;
+} registry;
+
+int plugin_register_config(const char *name,
+ int (*callback)(const char *key, const char *val),
+ const char **keys, int keys_num) {
+ assert(name && callback && keys && keys_num > 0);
+ registry.name = strdup(name);
+ registry.config = callback;
+
+ registry.keys = scalloc(keys_num, sizeof(char *));
+ for (int i = 0; i < keys_num; i++) {
+ assert(keys[i]);
+ registry.keys[i] = strdup(keys[i]);
+ }
+ registry.key_count = keys_num;
+ return 0;
+}
+int plugin_register_init(const char *name, plugin_init_cb callback) {
+ assert(name && callback);
+ assert(strcmp(name, registry.name) == 0);
+ registry.init = callback;
+ return 0;
+}
+int plugin_register_read(const char *name, int (*callback)(void)) {
+ assert(name && callback);
+ assert(strcmp(name, registry.name) == 0);
+ registry.read = callback;
+ return 0;
+}
+int plugin_register_shutdown(const char *name, plugin_shutdown_cb callback) {
+ assert(name && callback);
+ assert(strcmp(name, registry.name) == 0);
+ registry.shutdown = callback;
+ return 0;
+}
+
+/* ------------------------------------------------------------------------- */
+/* helper code partially copied from collectd (initially Copyright Florian
+ * Foster) */
+
+static const struct {
+ int level;
+ const char *name;
+} log_levels[] = {{0, "???"},
+ {1, "???"},
+ {2, "???"},
+ {LOG_ERR, "ERROR"},
+ {LOG_WARNING, "WARN"},
+ {LOG_NOTICE, "NOTICE"},
+ {LOG_INFO, "INFO"},
+ {LOG_DEBUG, "DEBUG"}};
+
+/* half based on daemon/plugin.c, for logging */
+void plugin_log(int level, const char *format, ...) {
+ assert(level >= LOG_ERR && level < (int)STATIC_ARRAY_SIZE(log_levels));
+ if (level <= LOG_WARNING) {
+ globs.warnings++;
+ }
+ globs.messages++;
+ char msg[1024];
+ va_list ap;
+ va_start(ap, format);
+ vsnprintf(msg, sizeof(msg), format, ap);
+ va_end(ap);
+ fprintf(stderr, "%s (%s)\n", msg, log_levels[level].name);
+}
+
+/* safe function wrapper from utils/common/common.c */
+char *sstrncpy(char *dest, const char *src, size_t n) {
+ strncpy(dest, src, n);
+ dest[n - 1] = '\0';
+ return dest;
+}
+void *scalloc(size_t nmemb, size_t size) {
+ void *p = calloc(nmemb, size);
+ assert(p);
+ return p;
+}
+void *smalloc(size_t size) {
+ void *p = malloc(size);
+ assert(p);
+ return p;
+}
+
+/* ------------------------------------------------------------------------- */
+/* TEST: plugin setup & teardown */
+
+static void plugin_register(void) {
+ for (int i = 0; i < (int)STATIC_ARRAY_SIZE(log_levels); i++) {
+ /* verify log levels match expected */
+ assert(log_levels[i].level == i);
+ }
+ module_register();
+ assert(registry.config && registry.init && registry.read &&
+ registry.shutdown);
+}
+
+/* free test code registry struct allocs after config checks are done
+ */
+static void plugin_register_free(void) {
+ for (unsigned int i = 0; i < registry.key_count; i++) {
+ free(registry.keys[i]);
+ }
+ free(registry.keys);
+ registry.keys = NULL;
+ free(registry.name);
+ registry.name = NULL;
+}
+
+/* ------------------------------------------------------------------------- */
+
+/* TEST: config keys. 'check_nonbool' checks non-boolean config keys,
+ * 'enable_metrics' enables quering of all metrics, and 'enable_logs' enables
+ * all logs as part of testing. return 0 for success
+ */
+static int test_config_keys(bool check_nonbool, bool enable_metrics,
+ bool enable_logs) {
+ struct {
+ bool set_false;
+ const char *prefix;
+ } bool_checks[] = {{enable_metrics, "Disable"}, {!enable_logs, "Log"}};
+ /* tests for non-bool config keys */
+ struct {
+ const char *key;
+ const char *value;
+ bool success;
+ } test[] = {
+ {"MetricsOutput", "derived", true},
+ {"MetricsOutput", "raW", true},
+ {"MetricsOutput", "Foobar", false},
+ {"MetricsOutput", "1", false},
+ {"Foobar", "Foobar", false},
+ {"Samples", "999", false},
+ {"Samples", "-1", false},
+ {"Samples", "8", true},
+ /* set back to default */
+ {"MetricsOutput", "Both", true},
+ {"Samples", "1", true},
+ };
+ unsigned int i, j;
+ int ret, fails = 0;
+
+ if (check_nonbool) {
+ for (i = 0; i < STATIC_ARRAY_SIZE(test); i++) {
+ ret = registry.config(test[i].key, test[i].value);
+ if ((ret == 0) != test[i].success) {
+ fprintf(stderr, "ERROR: unexpected config %s with '%s'='%s'\n",
+ ret ? "fail" : "success", test[i].key, test[i].value);
+ fails++;
+ }
+ }
+ }
+
+ /* make sure that also bool values work */
+ for (i = 0; i < registry.key_count; i++) {
+
+ const char *prefix, *key = registry.keys[i];
+ for (j = 0; j < 2; j++) {
+ prefix = bool_checks[j].prefix;
+
+ if (strncmp(key, prefix, strlen(prefix))) {
+ continue;
+ }
+ ret = registry.config(key, "true");
+ if (bool_checks[j].set_false) {
+ ret += registry.config(key, "false");
+ }
+ if (ret != 0) {
+ fprintf(stderr, "ERROR: unexpected '%s' bool config set fail\n", key);
+ fails++;
+ }
+ }
+ }
+ return fails;
+}
+
+/* ------------------------------------------------------------------------- */
+
+/*
+ * set all GPU metrics Disable* flags to 'value', update bitmask of
+ * what was changed + set what's the full bitmask, and return count
+ * of changed items
+ */
+static int get_reset_disabled(gpu_disable_t *disabled, bool value, int *mask,
+ int *all) {
+ struct {
+ const char *name;
+ bool *flag;
+ } flags[] = {
+ {"engine", &disabled->engine}, {"frequency", &disabled->freq},
+ {"memory", &disabled->mem}, {"membw", &disabled->membw},
+ {"power", &disabled->power}, {"errors", &disabled->ras},
+ {"temperature", &disabled->temp}, {"throttle", &disabled->throttle}};
+ *all = 0;
+ int count = 0;
+ for (int i = 0; i < (int)STATIC_ARRAY_SIZE(flags); i++) {
+ if (*(flags[i].flag) != value) {
+ if (globs.verbose & VERBOSE_METRICS) {
+ fprintf(stderr, "=> %s: %s\n", value ? "DISABLED" : "ENABLED",
+ flags[i].name);
+ }
+ *(flags[i].flag) = value;
+ *mask |= (1 << i);
+ count++;
+ }
+ *all |= (1 << i);
+ }
+ return count;
+}
+
+/* TEST: metrics queries error handling, return 0 for success */
+static int test_query_errors(unsigned int limit) {
+ assert(gpu_count == 1);
+ gpu_disable_t *disabled = &(gpus[0].disabled);
+
+ /* enable all metrics */
+ int fails, all, mask = 0;
+ get_reset_disabled(disabled, false, &mask, &all);
+
+ mask = fails = 0;
+ for (; limit > 0; limit--) {
+ int count;
+
+ globs.warnings = 0;
+ globs.api_calls = 0;
+ globs.api_limit = limit;
+
+ if (registry.read() != 0) {
+ fprintf(stderr,
+ "ERROR: metrics query failed completely with single call fail\n");
+ fails++;
+ }
+ /* there were logged call failures? */
+ if (globs.warnings == 0) {
+ fprintf(stderr, "ERROR: no errors/warnings reported when call %d fails\n",
+ limit);
+ fails++;
+ }
+ /* enable all metrics again & check that exactly one metric type got
+ * disabled? */
+ count = get_reset_disabled(disabled, false, &mask, &all);
+ if (count != 1) {
+ fprintf(stderr, "ERROR: %d metric types disabled instead of 1\n", count);
+ fails++;
+ }
+ }
+ if (mask != all) {
+ fprintf(stderr,
+ "ERROR: all metric types were not disabled, expected %x, got %x\n",
+ all, mask);
+ fails++;
+ }
+ /* disable all metrics & check read fail */
+ globs.warnings = 0;
+ get_reset_disabled(disabled, true, &mask, &all);
+ registry.read();
+ if (registry.read() == 0) {
+ fprintf(
+ stderr,
+ "ERROR: metrics query succceeded although all metrics were disabled\n");
+ fails++;
+ }
+ globs.warnings = globs.api_limit = 0;
+ return fails;
+}
+
+/* change sampling rate to given, implies plugin reset */
+static void change_sampling_reset(const char *samples) {
+ fprintf(stderr, "Setting 'Samples' to '%s' and reseting plugin\n", samples);
+ assert(registry.shutdown() == 0);
+ assert(atoi(samples) > 0);
+ assert(registry.config("Samples", samples) == 0);
+ assert(registry.init() == 0);
+}
+
+/* TEST: metrics queries with multiple samples, return number of fails */
+static int test_multisampled_queries(unsigned int prev_rounds,
+ unsigned int samples) {
+ assert(samples > 1);
+ /* first 'samples' rounds to prime counter metrics & count API calls */
+ if (globs.verbose & VERBOSE_METRICS) {
+ fprintf(stderr, "METRIC: first %d multisample rounds for query priming:\n",
+ samples);
+ }
+ unsigned int i, calls_sampled = 0;
+ for (i = 1; i <= samples; i++) {
+ globs.api_calls = 0;
+ assert(registry.read() == 0);
+ assert(globs.warnings == 0);
+ if (!calls_sampled) {
+ calls_sampled = globs.api_calls;
+ }
+ }
+ unsigned int calls_all = globs.api_calls;
+ fprintf(stderr,
+ "expect %d API calls for %dx multisampled metrics, >= %d for all\n",
+ calls_sampled, samples, calls_all);
+
+ /* additional 2x 'samples' rounds to verify the results */
+ if (globs.verbose & VERBOSE_METRICS) {
+ fprintf(stderr,
+ "METRIC: additional %d+%d multisample rounds for verification:\n",
+ samples, samples);
+ }
+ int fails = 0;
+ for (/* i=samples */; i <= 3 * samples; i++) {
+ globs.api_calls = 0;
+ assert(registry.read() == 0);
+ assert(globs.warnings == 0);
+ /* verify same amount of calls on every run, separately for
+ * the case when only sampled metrics are read, and when all are
+ */
+ if (i % samples > 0) {
+ if (calls_sampled != globs.api_calls) {
+ fprintf(stderr, "ERROR: expected %d API calls, got %d\n", calls_sampled,
+ globs.api_calls);
+ fails++;
+ }
+ continue;
+ }
+ /* number of calls may differ on multisampled rounds, so just
+ * check that at least expected number of them is done
+ */
+ if (calls_all < calls_sampled || calls_all > globs.api_calls) {
+ fprintf(stderr, "ERROR: expected >= %d (and > %d) API calls, got %d\n",
+ calls_all, calls_sampled, globs.api_calls);
+ fails++;
+ }
+ fails += validate_and_reset_saved_metrics(prev_rounds, i);
+ }
+ /* back to single sample */
+ assert(registry.shutdown() == 0);
+ assert(registry.config("Samples", "1") == 0);
+ assert(registry.init() == 0);
+ return fails;
+}
+
+/* TEST: error handling for Sysman calls during plugin init, return 0 for
+ * success */
+static int test_init_errors(unsigned int limit) {
+ int fails = 0;
+ for (; limit > 0; limit--) {
+ globs.warnings = 0;
+ globs.api_calls = 0;
+ globs.api_limit = limit;
+
+ if (registry.init() == 0) {
+ fprintf(stderr, "ERROR: metrics init succeeded despite call %d failing\n",
+ limit);
+ fails++;
+ if (registry.shutdown() != 0) {
+ fprintf(stderr, "ERROR: plugin shutdown failed after init succeeded\n");
+ fails++;
+ }
+ }
+ if (globs.warnings == 0) {
+ fprintf(stderr, "ERROR: no errors/warnings reported when call %d fails\n",
+ limit);
+ fails++;
+ }
+ }
+ globs.warnings = globs.api_limit = 0;
+ return fails;
+}
+
+/* ------------------------------------------------------------------------- */
+/* options parsing & main */
+
+static void parse_options(int argc, const char **argv) {
+ static const struct {
+ const char *opt;
+ unsigned int bit;
+ const char *desc;
+ } opts[] = {{"-ci", VERBOSE_CALLS_INIT, "Trace calls during metric inits"},
+ {"-cil", VERBOSE_CALLS_INIT_LIMIT,
+ "Trace calls during N call-limited init runs"},
+ {"-cm", VERBOSE_CALLS_METRICS,
+ "Trace calls during normal metric query runs"},
+ {"-cms", VERBOSE_CALLS_METRICS_SAMPLED,
+ "Trace calls during N sampled metric runs"},
+ {"-cml", VERBOSE_CALLS_METRICS_LIMIT,
+ "Trace calls during N call-limited metric runs"},
+ {"-mn", VERBOSE_METRICS_NORMAL,
+ "Log metric values in normal (samples=1) runs"},
+ {"-ms", VERBOSE_METRICS_SAMPLED,
+ "Log metric values in N sampled (samples>1) runs"},
+ {"-ml", VERBOSE_METRICS_LIMIT,
+ "Log metric values in N call-limited runs"}};
+ int i, j, count = STATIC_ARRAY_SIZE(opts);
+
+ for (i = 1; i < argc; i++) {
+ for (j = 0; j < count; j++) {
+ if (strcmp(argv[i], opts[j].opt) != 0) {
+ continue;
+ }
+ globs.verbose |= opts[j].bit;
+ break;
+ }
+ if (j >= count) {
+ const char *basename = strrchr(argv[0], '/');
+ fprintf(stderr, "\nUsage: %s [options]\n\nOptions:\n", basename);
+ for (int j = 0; j < count; j++) {
+ fprintf(stderr, "\t%s\t%s\n", opts[j].opt, opts[j].desc);
+ }
+ fprintf(stderr, "\n\t(Only Sysman API calls are traced.)\n");
+ exit(1);
+ }
+ }
+}
+
+int main(int argc, const char **argv) {
+ parse_options(argc, argv);
+
+ plugin_register();
+
+ /* config & minimal init checks */
+
+ set_verbose(VERBOSE_CALLS_INIT, 0);
+
+ fprintf(stderr, "Default plugin config + 2*init + shutdown...\n");
+ assert(registry.init() == 0);
+ /* 2nd init call should be no-op with log message about that */
+ globs.messages = 0;
+ assert(registry.init() == 0);
+ assert(globs.messages > 0);
+ assert(registry.shutdown() == 0);
+ fprintf(stderr, "default init/shutdown: PASS\n\n");
+
+ /* check misc config options, enable all metrics & extra plugin logging */
+ fprintf(stderr, "Misc config options checks...\n");
+ globs.warnings = 0;
+ assert(test_config_keys(true, true, true) == 0);
+ assert(globs.warnings > 0);
+ /* more coverage by disabling only some of metrics at init */
+ globs.warnings = 0;
+ assert(registry.config("DisablePower", "true") == 0);
+ assert(registry.init() == 0);
+ assert(registry.shutdown() == 0);
+ assert(globs.warnings == 0);
+ fprintf(stderr, "misc config: PASS\n\n");
+
+ /* init should fail when every metric is disabled */
+ globs.warnings = 0;
+ fprintf(stderr, "All metrics & logs disabled + init/shutdown...\n");
+ assert(test_config_keys(false, false, false) == 0);
+ assert(registry.init() != 0);
+ assert(globs.warnings > 0);
+ /* undefined whether shutdown() returns fail or success after failed init */
+ registry.shutdown();
+ fprintf(stderr, "metrics disabled init/shutdown: PASS\n\n");
+
+ /* config tests done, re-enable metrics */
+ globs.warnings = 0;
+ assert(test_config_keys(false, true, false) == 0);
+ plugin_register_free();
+
+ /* full init checks */
+
+ /* make sure all Sysman functions are called at init */
+ assert(registry.config("LogGpuInfo", "true") == 0);
+ assert(globs.warnings == 0);
+
+ fprintf(stderr,
+ "Check whether init with GPU info does all Sysman calls...\n");
+ globs.warnings = globs.api_calls = globs.callbits = 0;
+ assert(registry.init() == 0);
+ /* all Sysman metric init functions got called? */
+ assert(globs.callbits == INIT_CALL_BITS);
+ fprintf(stderr, "%d calls to all %d Sysman metric init functions\n",
+ globs.api_calls, INIT_CALL_FUNCS);
+ assert(registry.shutdown() == 0);
+ assert(globs.warnings == 0);
+ fprintf(stderr, "full init: PASS\n\n");
+
+ /* skip Sysman functions which failure isn't fatal for init */
+ assert(registry.config("LogGpuInfo", "false") == 0);
+
+ /* count relevant API calls */
+ globs.warnings = globs.api_calls = 0;
+ fprintf(stderr, "No init errors/warnings with GPU info disabled...\n");
+ assert(registry.init() == 0);
+ assert(registry.shutdown() == 0);
+ assert(globs.warnings == 0);
+ fprintf(stderr, "init warnings: PASS\n\n");
+
+ set_verbose(VERBOSE_CALLS_INIT_LIMIT, 0);
+
+ unsigned int api_calls = globs.api_calls;
+
+ fprintf(stderr,
+ "Error handling for each of %d relevant init Sysman calls...\n",
+ api_calls);
+ assert(test_init_errors(api_calls) == 0);
+ /* undefined whether shutdown() returns fail or success after failed init */
+ registry.shutdown();
+ fprintf(stderr, "init error handling: PASS\n\n");
+
+ /* metrics query & value checks */
+
+ assert(registry.config("DisableSeparateErrors", "false") == 0);
+ set_verbose(VERBOSE_CALLS_METRICS, VERBOSE_METRICS_NORMAL);
+ assert(registry.init() == 0);
+
+ fprintf(stderr, "Query all metrics for the first time, with separate errors "
+ "enabled...\n");
+ globs.warnings = globs.api_calls = globs.callbits = 0;
+ assert(registry.read() == 0);
+ /* all Sysman metric query functions got successfully called? */
+ assert(globs.callbits == QUERY_CALL_BITS);
+ assert(globs.warnings == 0);
+ fprintf(stderr, "%d calls to all %d Sysman metric query functions\n",
+ globs.api_calls, QUERY_CALL_FUNCS);
+ /* per-time counters do not report on first round */
+ assert(validate_and_reset_saved_metrics(1, 0) > 0);
+ fprintf(stderr, "metrics query round 1: PASS\n\n");
+
+ api_calls = globs.api_calls;
+ globs.api_calls = 0;
+
+ fprintf(stderr, "Another query for per-timediff metric values + validation "
+ "for all values...\n");
+ assert(registry.read() == 0);
+ /* make sure second round does (successfully) same (amount of) calls */
+ assert(globs.warnings == 0);
+ /* second round may make additional calls */
+ assert(globs.api_calls >= api_calls);
+ /* make sure metrics values were correct and all metric types were now
+ * reported */
+ assert(validate_and_reset_saved_metrics(2, 0) == 0);
+ fprintf(stderr, "metrics query round 2: PASS\n\n");
+
+ /* just report total count of errors (should not affect calls) */
+ assert(registry.config("DisableSeparateErrors", "true") == 0);
+
+ api_calls = globs.api_calls;
+ globs.api_calls = 0;
+
+ fprintf(stderr, "One more query to verify increment handling, with only "
+ "error totals...\n");
+ assert(registry.read() == 0);
+ assert(globs.warnings == 0);
+ assert(globs.api_calls == api_calls);
+ /* make sure metrics values were correct and all metric types were reported */
+ assert(validate_and_reset_saved_metrics(3, 0) == 0);
+ fprintf(stderr, "metrics query round 3: PASS\n\n");
+
+ /* queries with metrics sampling enabled */
+
+ set_verbose(VERBOSE_CALLS_METRICS_SAMPLED, VERBOSE_METRICS_SAMPLED);
+ fprintf(stderr, "Check metrics with >1 'Samples' sampling factor...\n");
+ change_sampling_reset("8");
+ assert(test_multisampled_queries(3, 8) == 0);
+ fprintf(stderr, "metrics sampling: PASS\n\n");
+
+ /* metrics error handling checks */
+
+ set_verbose(VERBOSE_CALLS_METRICS_LIMIT, VERBOSE_METRICS_LIMIT);
+ fprintf(stderr,
+ "Test error handling separately for each of the %d query calls...\n",
+ api_calls);
+ /* disable multisampling & do one query round to guarantee
+ * that all L0 calls are done on every read */
+ change_sampling_reset("1");
+ assert(registry.read() == 0);
+ assert(test_query_errors(api_calls) == 0);
+ assert(registry.shutdown() == 0);
+ fprintf(stderr, "metrics query error handling: PASS\n\n");
+
+ fprintf(stderr, "=> SUCCESS, all tests PASSed!\n");
+ return 0;
+}